updated video choice logic

main
oscar 1 month ago
parent 834ca1b272
commit a69468f555

@ -211,9 +211,10 @@ def favorites_page():
@web.route("/users")
def users():
# ---- filters ----
import math
# ---- filters ----
q = (request.args.get("q") or "").lower().strip()
sort = request.args.get("sort", "total_size") # user|site|total_size|video_count
sort = request.args.get("sort", "total_size") # user|site|total_size|video_count
dir_ = request.args.get("dir", "desc")
reverse = (dir_ == "desc")
timeframe = request.args.get("timeframe", "all")
@ -237,19 +238,19 @@ def users():
params["end"] = end
where_sql = " AND ".join(where)
# ---- ORDER BY ----
# ---- ORDER BY (use computed GB alias) ----
sort_map = {
"user": "username",
"site": "site",
"total_size": "total_bytes",
"total_size": "total_gb", # <- sort by GB, not raw MB sum
"video_count": "video_count",
}
order_col = sort_map.get(sort, "total_bytes")
order_col = sort_map.get(sort, "total_gb")
order_dir = "DESC" if reverse else "ASC"
# ---- pagination ----
page = max(1, int(request.args.get("page", 1)))
per_page = 100 # or your DASHBOARD_PER_PAGE
per_page = 100
offset = (page - 1) * per_page
# ---- count distinct (username, site) for pager ----
@ -262,16 +263,18 @@ def users():
) t;
"""
# ---- aggregate page ----
# size is in **MB**, convert to GB:
# 1 GiB = 1024 MB → divide by 1024.0
# (If you really want decimal GB, change 1024.0 to 1000.0)
agg_sql = f"""
SELECT
username,
site,
COUNT(*) AS video_count,
SUM(size) AS total_bytes,
AVG(size) AS avg_bytes,
SUM(size)::numeric / 1000000000.0 AS total_gb,
AVG(size)::numeric / 1000000000.0 AS avg_gb
COUNT(*) AS video_count,
SUM(size) AS total_mb,
AVG(size) AS avg_mb,
(SUM(size)::numeric / 1024.0) AS total_gb,
(AVG(size)::numeric / 1024.0) AS avg_gb
FROM videos
WHERE {where_sql}
GROUP BY username, site
@ -290,9 +293,9 @@ def users():
cur.execute(agg_sql, params)
rows = cur.fetchall()
# rows: (username, site, video_count, total_bytes, avg_bytes)
# rows: (username, site, video_count, total_mb, avg_mb, total_gb, avg_gb)
# ---- get recording sets (for status dots) ----
# ---- online/recording status sets (optional) ----
online_usernames: set[str] = set()
recording_offline_usernames: set[str] = set()
if show_online_first:
@ -309,64 +312,55 @@ def users():
except Exception:
pass
# ---- thumbnail subquery (only for current page) ----
# ---- thumbnail candidates per (user, site) ----
tcur = conn.cursor()
thumb_sql = """
SELECT thumbnail
FROM videos
WHERE username = %(u)s
AND site = %(s)s
AND thumbnail IS NOT NULL
AND thumbnail <> ''
AND site = %(s)s
AND thumbnail IS NOT NULL
AND thumbnail <> ''
ORDER BY created_at DESC
LIMIT 3;
"""
tcur = conn.cursor()
def to_gb(n): return (n or 0) / 1_000_000_000.0
cards = []
for (username, site, video_count, total_bytes, avg_bytes, total_gb, avg_gb) in rows:
# fetch up to 3 recent thumbnails (unchanged logic you already added)
for (username, site, video_count, total_mb, avg_mb, total_gb, avg_gb) in rows:
# fetch up to 3 recent thumbnails
thumb_urls = []
try:
tcur.execute(
"""
SELECT thumbnail
FROM videos
WHERE username = %(u)s AND site = %(s)s
AND thumbnail IS NOT NULL AND thumbnail <> ''
ORDER BY created_at DESC
LIMIT 3;
""",
{"u": username, "s": site},
)
tcur.execute(thumb_sql, {"u": username, "s": site})
thumb_urls = [r[0] for r in tcur.fetchall() if r and r[0]]
except Exception:
pass
# ---- PRE-FORMAT display strings here (avoid Jinja float filter entirely) ----
total_gb_val = float(total_gb or 0)
avg_gb_val = float(avg_gb or 0)
total_gb_val = float(total_gb or 0.0)
avg_gb_val = float(avg_gb or 0.0)
uname_low = (username or "").lower()
cards.append({
"user": username,
"site": site,
"total_size": total_gb_val, # keep the raw number if you need it
"avg_size": avg_gb_val, # keep raw
"total_size_display": f"{total_gb_val:.2f}", # <— use this in HTML
"avg_size_display": f"{avg_gb_val:.2f}", # <— use this in HTML
"video_count": int(video_count),
# numeric
"total_size": total_gb_val,
"avg_size": avg_gb_val,
# preformatted strings for display
"total_size_display": f"{total_gb_val:.2f}",
"avg_size_display": f"{avg_gb_val:.2f}",
"thumb_urls": thumb_urls,
"is_online": uname_low in online_usernames,
"is_recording_offline": (uname_low in recording_offline_usernames) and (uname_low not in online_usernames),
})
# ---- optional: reorder with online-first grouping ----
if show_online_first:
online_cards = [c for c in cards if c["is_online"]]
rec_off_cards = [c for c in cards if c["is_recording_offline"] and not c["is_online"]]
the_rest = [c for c in cards if (c not in online_cards) and (c not in rec_off_cards)]
online_cards = [c for c in cards if c["is_online"]]
rec_off_cards = [c for c in cards if c["is_recording_offline"] and not c["is_online"]]
the_rest = [c for c in cards if (c not in online_cards) and (c not in rec_off_cards)]
key_map = {
"user": lambda c: c["user"].lower(),
@ -380,7 +374,6 @@ def users():
the_rest.sort(key=k, reverse=reverse)
cards = online_cards + rec_off_cards + the_rest
# ---- render users.html ----
return render_template(
"users.html",
cards=cards,
@ -393,4 +386,4 @@ def users():
start_date=start_str,
end_date=end_str,
online="1" if show_online_first else "0",
)
)

@ -127,60 +127,145 @@ def update_codec_db(video_id, codec):
conn.commit()
conn.close()
def smart_choice(cursor, small_mb=250):
"""
Returns a list of candidate videos to encode, ordered by:
1) time window priority: 7d, then 30d, then 90d, then fallback (any time)
2) streamer priority: total MB per (username, site) DESC within the window
3) small (< small_mb MB) first, then big
4) inside each group: size DESC, then created_at DESC
NOTE: 'size' is stored in MB.
"""
def pick(days: int):
# Build the prioritized list for a given window
cursor.execute("""
WITH candidates AS (
SELECT v.*
FROM videos v
WHERE v.codec IS NULL
AND v.status <> 'missing'
AND v.filepath IS NOT NULL
AND v.created_at >= NOW() - make_interval(days => %s)
),
by_streamer AS (
SELECT username, site, SUM(size) AS total_mb
FROM candidates
GROUP BY username, site
),
ordered AS (
SELECT c.*,
bs.total_mb,
CASE WHEN c.size < %s THEN 0 ELSE 1 END AS small_first
FROM candidates c
JOIN by_streamer bs
ON bs.username = c.username
AND bs.site = c.site
)
SELECT *
FROM ordered
ORDER BY
total_mb DESC, -- top streamers first
small_first ASC, -- small (< small_mb) first
size DESC, -- then bigger files first inside each group
created_at DESC; -- then newest
""", (days, small_mb))
return cursor.fetchall()
# Try 7d → 30d → 90d
for d in (7, 30, 90):
rows = pick(d)
if rows:
return rows
# Fallback: any time, same ordering logic
cursor.execute("""
WITH candidates AS (
SELECT v.*
FROM videos v
WHERE v.codec IS NULL
AND v.status <> 'missing'
AND v.filepath IS NOT NULL
),
by_streamer AS (
SELECT username, site, SUM(size) AS total_mb
FROM candidates
GROUP BY username, site
),
ordered AS (
SELECT c.*,
bs.total_mb,
CASE WHEN c.size < %s THEN 0 ELSE 1 END AS small_first
FROM candidates c
JOIN by_streamer bs
ON bs.username = c.username
AND bs.site = c.site
)
SELECT *
FROM ordered
ORDER BY
total_mb DESC,
small_first ASC,
size DESC,
created_at DESC;
""", (small_mb,))
return cursor.fetchall()
def reencode_videos_av1():
# get videos
conn, cursor = config.get_local_db_connection()
cursor.execute("SELECT * FROM videos WHERE codec IS NULL AND status != 'missing' AND filepath IS NOT NULL ORDER BY size DESC;")
videos = cursor.fetchall()
# for video in tqdm(videos, desc="Processing videos", unit="file"):
with tqdm(videos, desc="Processing videos", unit="file") as pbar:
for video in videos:
pbar.update(1)
input_path = video['filepath']
if not os.path.exists(input_path):
print(f"🚫 File not found: {input_path}")
continue
file_size_in_mb = os.path.getsize(input_path) / (1024 * 1024)
print(f"\nProcessing {os.path.basename(input_path)} ({file_size_in_mb:.2f} MB)...")
if file_size_in_mb < 1:
print("Video is too small. Skipping.")
os.remove(input_path)
continue
# 2) Get current bitrate & resolution
current_bitrate, (width, height) = get_video_info(input_path)
if not current_bitrate:
print("Video's bitrate is not available. Skipping")
continue
target_bitrate = get_target_bitrate(width, height)
# If current bitrate <= target, it's not worth it to re-encode
if current_bitrate <= target_bitrate:
target_bitrate = current_bitrate
if is_av1(input_path):
print("Video is already encoded in AV1. Skipping")
update_codec_db(video['id'], 'av1')
continue
# 3) Re-encode
output_path = os.path.join('.temp', os.path.basename(input_path))
os.makedirs(os.path.dirname(output_path), exist_ok=True)
encoded = encode_video(input_path, output_path, target_bitrate)
if not encoded:
print("Encoding failed. Skipping.")
continue
# 4) Compare file sizes and replace if smaller
if check_and_replace_if_smaller(input_path, output_path):
update_codec_db(video['id'], 'av1')
# cursor.execute("SELECT * FROM videos WHERE codec IS NULL AND status != 'missing' AND filepath IS NOT NULL AND filepath NOT LIKE 'U:%' ORDER BY size ASC;")
# videos = cursor.fetchall()
while True:
videos = smart_choice(cursor)
with tqdm(videos, desc="Processing videos", unit="file") as pbar:
for video in videos:
pbar.update(1)
input_path = video['filepath']
if not os.path.exists(input_path):
print(f"🚫 File not found: {input_path}")
continue
file_size_in_mb = os.path.getsize(input_path) / (1024 * 1024)
print(f"\nProcessing {os.path.basename(input_path)} ({file_size_in_mb:.2f} MB)...")
if file_size_in_mb < 1:
print("Video is too small. Skipping.")
os.remove(input_path)
continue
# 2) Get current bitrate & resolution
current_bitrate, (width, height) = get_video_info(input_path)
if not current_bitrate:
print("Video's bitrate is not available. Skipping")
continue
target_bitrate = get_target_bitrate(width, height)
# If current bitrate <= target, it's not worth it to re-encode
if current_bitrate <= target_bitrate:
target_bitrate = current_bitrate
if is_av1(input_path):
print("Video is already encoded in AV1. Skipping")
update_codec_db(video['id'], 'av1')
continue
# 3) Re-encode
output_path = os.path.join('.temp', os.path.basename(input_path))
os.makedirs(os.path.dirname(output_path), exist_ok=True)
encoded = encode_video(input_path, output_path, target_bitrate)
if not encoded:
print("Encoding failed. Skipping.")
continue
# 4) Compare file sizes and replace if smaller
if check_and_replace_if_smaller(input_path, output_path):
update_codec_db(video['id'], 'av1')
if __name__ == "__main__":
reencode_videos_av1()

@ -28,7 +28,7 @@
.thumb img, .thumb .fallback {
position:absolute; inset:0;
width:100%; height:100%;
object-fit:cover; display:block;
object-fit:cover; object-position:center center; display:block;
}
.thumb .fallback { display:none; align-items:center; justify-content:center; font-size:28px; }
@ -84,13 +84,13 @@
<!-- Sort -->
{% set next_user_dir = 'asc' if sort != 'user' or dir == 'desc' else 'desc' %}
{% set next_site_dir = 'asc' if sort != 'site' or dir == 'desc' else 'desc' %}
{% set next_total_dir = 'asc' if sort != 'total_size_display' or dir == 'desc' else 'desc' %}
{% set next_total_dir = 'asc' if sort != 'total_size' or dir == 'desc' else 'desc' %}
{% set next_count_dir = 'asc' if sort != 'video_count' or dir == 'desc' else 'desc' %}
<div class="toolbar">
<a href="{{ url_for('web.users', q=query, page=1, sort='user', dir=next_user_dir, online=online, timeframe=timeframe, start=start_date, end=end_date) }}">Sort: User{% if sort=='user' %} {{ '▲' if dir=='asc' else '▼' }}{% endif %}</a>
<a href="{{ url_for('web.users', q=query, page=1, sort='site', dir=next_site_dir, online=online, timeframe=timeframe, start=start_date, end=end_date) }}">Sort: Site{% if sort=='site' %} {{ '▲' if dir=='asc' else '▼' }}{% endif %}</a>
<a href="{{ url_for('web.users', q=query, page=1, sort='total_size_display', dir=next_total_dir, online=online, timeframe=timeframe, start=start_date, end=end_date) }}">Sort: Total Size{% if sort=='total_size_display' %} {{ '▲' if dir=='asc' else '▼' }}{% endif %}</a>
<a href="{{ url_for('web.users', q=query, page=1, sort='total_size', dir=next_total_dir, online=online, timeframe=timeframe, start=start_date, end=end_date) }}">Sort: Total Size{% if sort=='total_size' %} {{ '▲' if dir=='asc' else '▼' }}{% endif %}</a>
<a href="{{ url_for('web.users', q=query, page=1, sort='video_count', dir=next_count_dir, online=online, timeframe=timeframe, start=start_date, end=end_date) }}">Sort: Videos{% if sort=='video_count' %} {{ '▲' if dir=='asc' else '▼' }}{% endif %}</a>
<!-- Online-first toggle -->
@ -119,40 +119,43 @@
</form>
</div>
<div class="grid">
{% for c in cards %}
<div class="card">
<div class="thumb">
{% if c.thumb_urls and c.thumb_urls|length %}
{# render all candidates; show first, hide the rest; each tries the next on error #}
{% for url in c.thumb_urls %}
<img src="{{ url }}" loading="lazy" decoding="async" alt="{{ c.user }}" {% if not loop.first %}style="display:none"{% endif %} onerror="tryNext(this)">
{% endfor %}
{% endif %}
<span class="fallback">🎞️</span>
</div>
<div class="meta">
<h3>
<a href="{{ url_for('web.user_page', username=c.user) }}">{{ c.user }}</a>
{% if c.is_online %}
<span class="status-dot dot-online" title="Online"></span>
{% elif c.is_recording_offline %}
<span class="status-dot dot-record" title="Recording (offline)"></span>
<div class="grid">
{% for c in cards %}
<div class="card">
<div class="thumb">
{% if c.thumb_urls and c.thumb_urls|length %}
{# render all candidates; show first, hide the rest; each tries the next on error #}
{% for url in c.thumb_urls %}
<img src="{{ url }}" loading="lazy" decoding="async" alt="{{ c.user }}" {% if not loop.first %}style="display:none"{% endif %} onerror="tryNext(this)">
{% endfor %}
<span class="fallback">🎞️</span>
{% else %}
<span class="status-dot dot-offline" title="Offline"></span>
{# no thumbnails at all → show fallback by default #}
<span class="fallback" style="display:flex">🎞️</span>
{% endif %}
</h3>
<div class="row">
<span class="muted">Site:</span>
<a href="https://{{ c.site }}.com/{{ c.user }}" target="_blank" rel="noopener">{{ c.site }}</a>
</div>
<div class="row"><span class="muted">Total size:</span> {{ c.total_size_display }} GB</div>
<div class="row"><span class="muted">Videos:</span> {{ c.video_count }}</div>
<div class="meta">
<h3>
<a href="{{ url_for('web.user_page', username=c.user) }}">{{ c.user }}</a>
{% if c.is_online %}
<span class="status-dot dot-online" title="Online"></span>
{% elif c.is_recording_offline %}
<span class="status-dot dot-record" title="Recording (offline)"></span>
{% else %}
<span class="status-dot dot-offline" title="Offline"></span>
{% endif %}
</h3>
<div class="row">
<span class="muted">Site:</span>
<a href="https://{{ c.site }}.com/{{ c.user }}" target="_blank" rel="noopener">{{ c.site }}</a>
</div>
<div class="row"><span class="muted">Total size:</span> {{ c.total_size_display }} GB</div>
<div class="row"><span class="muted">Videos:</span> {{ c.video_count }}</div>
</div>
</div>
{% endfor %}
</div>
{% endfor %}
</div>
{% if total_pages > 1 %}
<div class="pagination">

Loading…
Cancel
Save