database update

main
oscar 3 months ago
parent a7bf98a23b
commit 61e6079372

205
app.py

@ -1,40 +1,51 @@
# app.py Optimised AF
from flask import Flask, render_template, request, redirect, url_for, jsonify, send_from_directory, send_file
from funcs import process_videos, group_videos, match_data_to_video_fast, get_all_videos, get_all_data
from config import connect_redis
# app.py DB-powered version
import os, time, json, zlib, math, hashlib, subprocess
from concurrent.futures import ThreadPoolExecutor
import hashlib, json, math, os, subprocess, time, zlib
from datetime import datetime
from flask import (
Flask, render_template, request, jsonify, send_file
)
from config import connect_redis, get_local_db_connection
# ───────── CONFIG ───────── #
app = Flask(__name__)
app = Flask(__name__)
redis = connect_redis()
CACHE_KEY = "video_cache_v2" # bump key so we dont fight old data
META_HASH = "video_meta_v2" # per-file meta cache
THUMB_DIR = "static/thumbnails"
VIDEOS_PER_PAGE = 40
DASHBOARD_PER_PAGE = 100 # for the dashboard
THUMB_WIDTH = 640 # px
FF_QUALITY = "80" # 0-100 for WebP
SCAN_DIRS = [
r"E:/streamaster/downloaded/",
r"U:/encoded",
r"U:/count_sorted"
]
DATA_DIRS = [
r"E:/streamaster/data",
r"E:/streamaster/downloaded",
]
CACHE_KEY = "video_cache_v3" # bumped because source changed
META_HASH = "video_meta_v3"
THUMB_DIR = "static/thumbnails"
VIDEOS_PER_PAGE = 40
DASHBOARD_PER_PAGE = 100
THUMB_WIDTH = 640
FF_QUALITY = "80"
os.makedirs(THUMB_DIR, exist_ok=True)
# ───────── UTILS ───────── #
def _hashed_thumb_path(video_id: str) -> str:
"""
Static/thumbnails/ab/cd/<video_id>.webp
keeps any subdir under ~256 files.
"""
# ───────── DB HELPER ───────── #
def db_get_videos():
"""Return list of dicts exactly like the old parser produced."""
conn, cur = get_local_db_connection()
cur.execute("""
SELECT
video_id, username, site AS platform,
filepath, size_mb AS size,
duration,
gender,
created_at,
updated_at
FROM videos
""")
rows = cur.fetchall()
# Convert psycopg rows → list[dict]
cols = [desc[0] for desc in cur.description]
videos = [dict(zip(cols, row)) for row in rows]
cur.close(); conn.close()
return videos
# ───────── THUMB UTILS ───────── #
def _hashed_thumb_path(video_id: str):
h = hashlib.md5(video_id.encode()).hexdigest()
sub1, sub2 = h[:2], h[2:4]
path = os.path.join(THUMB_DIR, sub1, sub2)
@ -52,53 +63,49 @@ def _gen_thumb_cmd(src: str, dest: str):
]
def generate_thumbnail(task):
"""Run in threadpool. task = (video_path, dest_path)"""
src, dest = task
if os.path.exists(dest):
return
subprocess.run(_gen_thumb_cmd(src, dest), stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
def load_video_lists():
videos, data = [], []
for d in SCAN_DIRS: videos += get_all_videos(d)
for d in DATA_DIRS: data += get_all_data(d)
parsed, _ = match_data_to_video_fast(videos, data)
return process_videos(parsed)
if not os.path.exists(dest):
subprocess.run(_gen_thumb_cmd(src, dest),
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL)
# ───────── CACHE BUILDER ───────── #
def build_cache():
parsed_videos = load_video_lists()
grouped = group_videos(parsed_videos, sort_by="count", order="desc")
videos = db_get_videos()
storage_usage = {}
avg_sizes = {}
video_map = {}
# group by (username, platform)
grouped = {}
for v in videos:
key = (v["username"], v["platform"])
grouped.setdefault(key, []).append(v)
# Threaded thumb generation queue
thumb_tasks = []
storage_usage, avg_sizes, video_map = {}, {}, {}
thumb_tasks = []
for (username, platform), vids in grouped.items():
key = f"{username}::{platform}"
total_gb = sum(v["size"] for v in vids) / 1024
storage_usage[key] = {"total_size": total_gb, "video_count": len(vids)}
avg_sizes[key] = total_gb / len(vids) if vids else 0
total_gb = sum(v["size"] for v in vids) / 1024
storage_usage[key] = {
"total_size": total_gb,
"video_count": len(vids)
}
avg_sizes[key] = total_gb / len(vids) if vids else 0
for v in vids:
video_id = os.path.basename(v["filepath"]).rsplit(".", 1)[0]
video_id = v["video_id"]
thumb_path = _hashed_thumb_path(video_id)
# Meta-cache (skip thumb regen if unchanged)
mtime = os.path.getmtime(v["filepath"])
meta = redis.hget(META_HASH, v["filepath"])
if not meta or json.loads(meta)["mtime"] != mtime:
# Meta-cache use DB updated_at as mtime surrogate
meta = redis.hget(META_HASH, video_id)
if not meta or json.loads(meta)["updated_at"] != str(v["updated_at"]):
thumb_tasks.append((v["filepath"], thumb_path))
redis.hset(META_HASH, v["filepath"],
json.dumps({"mtime": mtime, "thumb": thumb_path}))
redis.hset(META_HASH, video_id,
json.dumps({"updated_at": str(v["updated_at"]),
"thumb" : thumb_path}))
v["thumbnail"] = thumb_path
video_map[key] = vids
# Smash thumbnails in parallel
if thumb_tasks:
with ThreadPoolExecutor(max_workers=os.cpu_count()*2) as exe:
list(exe.map(generate_thumbnail, thumb_tasks))
@ -109,52 +116,49 @@ def build_cache():
"storage_usage" : storage_usage,
"avg_sizes" : avg_sizes
}
# Compress JSON → binary before Redis
redis.set(CACHE_KEY, zlib.compress(json.dumps(cache).encode()))
# also drop to disk in case Redis is wiped
blob = zlib.compress(json.dumps(cache).encode())
redis.set(CACHE_KEY, blob)
with open("video_cache.json.gz", "wb") as f:
f.write(zlib.compress(json.dumps(cache).encode()))
f.write(blob)
return cache
def get_cached_data():
# try Redis first
blob = redis.get(CACHE_KEY)
if blob:
return json.loads(zlib.decompress(blob).decode())
# fallback to disk
if os.path.exists("video_cache.json.gz"):
with open("video_cache.json.gz", "rb") as f:
return json.loads(zlib.decompress(f.read()).decode())
# last resort full rebuild
return build_cache()
# ───────── ROUTES ───────── #
# ───────── ROUTES (unchanged logic) ───────── #
@app.route("/")
def dashboard():
cache = get_cached_data()
# --- SEARCH ---
query = request.args.get("q", "").lower().strip()
sorted_usage = sorted(cache["storage_usage"].items(), key=lambda x: x[1]["total_size"], reverse=True)
sorted_usage = sorted(
cache["storage_usage"].items(),
key=lambda x: x[1]["total_size"],
reverse=True
)
if query:
sorted_usage = [entry for entry in sorted_usage if query in entry[0].lower()]
sorted_usage = [e for e in sorted_usage if query in e[0].lower()]
# --- PAGINATION ---
page = max(1, int(request.args.get("page", 1)))
page = max(1, int(request.args.get("page", 1)))
total_pages = max(1, math.ceil(len(sorted_usage) / DASHBOARD_PER_PAGE))
start = (page - 1) * DASHBOARD_PER_PAGE
paginated_usage = sorted_usage[start:start + DASHBOARD_PER_PAGE]
start = (page - 1) * DASHBOARD_PER_PAGE
paginated = sorted_usage[start:start + DASHBOARD_PER_PAGE]
return render_template(
"analytics.html",
storage_usage=paginated_usage,
avg_sizes=cache["avg_sizes"],
page=page,
total_pages=total_pages,
query=query
storage_usage = paginated,
avg_sizes = cache["avg_sizes"],
page = page,
total_pages = total_pages,
query = query
)
@app.route("/refresh")
@ -170,28 +174,24 @@ def refresh():
def user_page(username):
cache = get_cached_data()
videos = [v | {"platform": key.split("::")[1]}
for key, vids in cache["videos"].items()
if key.split("::")[0] == username
for v in vids]
# generate video_id for sorting
for v in videos:
v["video_id"] = os.path.basename(v["filepath"]).rsplit(".", 1)[0]
videos = [
v | {"platform": key.split("::")[1]}
for key, vids in cache["videos"].items()
if key.split("::")[0] == username
for v in vids
]
# Pagination
page = max(1, int(request.args.get("page", 1)))
per_page = VIDEOS_PER_PAGE
total_pages = max(1, math.ceil(len(videos) / per_page))
start = (page - 1) * per_page
paginated_videos = videos[start:start + per_page]
page = max(1, int(request.args.get("page", 1)))
total_pages = max(1, math.ceil(len(videos) / VIDEOS_PER_PAGE))
start = (page - 1) * VIDEOS_PER_PAGE
paginated = videos[start:start + VIDEOS_PER_PAGE]
return render_template(
"user_page.html",
username=username,
videos=paginated_videos,
page=page,
total_pages=total_pages
username = username,
videos = paginated,
page = page,
total_pages = total_pages
)
@app.route("/video/stream/<video_id>")
@ -199,8 +199,7 @@ def stream_video(video_id):
cache = get_cached_data()
for vids in cache["videos"].values():
for v in vids:
vid_id = os.path.basename(v["filepath"]).rsplit(".", 1)[0]
if vid_id == video_id:
if v["video_id"] == video_id:
return send_file(v["filepath"], mimetype="video/mp4")
return "Video not found", 404
@ -209,11 +208,9 @@ def view_video(video_id):
cache = get_cached_data()
for vids in cache["videos"].values():
for v in vids:
vid_id = os.path.splitext(os.path.basename(v["filepath"]))[0]
if vid_id == video_id:
v["video_id"] = vid_id # ✅ precompute safe ID
if v["video_id"] == video_id:
return render_template("video_view.html", video=v)
return "Video not found", 404
if __name__ == "__main__":
app.run(debug=True)
app.run(debug=True)

Loading…
Cancel
Save