database update

3 months ago · 61e6079372
parent a7bf98a23b
commit 61e6079372
1 changed files with 101 additions and 104 deletions
--- a/app.py
+++ b/app.py
@ -1,40 +1,51 @@
-# app.py  –  Optimised AF
-from flask import Flask, render_template, request, redirect, url_for, jsonify, send_from_directory, send_file
-from funcs import process_videos, group_videos, match_data_to_video_fast, get_all_videos, get_all_data
-from config import connect_redis
+# app.py  –  DB-powered version
+import os, time, json, zlib, math, hashlib, subprocess
 from concurrent.futures import ThreadPoolExecutor
-import hashlib, json, math, os, subprocess, time, zlib
+from datetime import datetime
+
+from flask import (
+    Flask, render_template, request, jsonify, send_file
+)
+
+from config import connect_redis, get_local_db_connection

 # ───────── CONFIG ───────── #
-app = Flask(__name__)
+app   = Flask(__name__)
 redis = connect_redis()

-CACHE_KEY        = "video_cache_v2"        # bump key so we don’t fight old data
-META_HASH        = "video_meta_v2"         # per-file meta cache
-THUMB_DIR        = "static/thumbnails"
-VIDEOS_PER_PAGE  = 40
-DASHBOARD_PER_PAGE = 100                   # for the dashboard
-THUMB_WIDTH      = 640                     # px
-FF_QUALITY       = "80"                    # 0-100 for WebP
-
-SCAN_DIRS = [
-    r"E:/streamaster/downloaded/",
-    r"U:/encoded",
-    r"U:/count_sorted"
-]
-DATA_DIRS = [
-    r"E:/streamaster/data",
-    r"E:/streamaster/downloaded",
-]
+CACHE_KEY          = "video_cache_v3"       # bumped because source changed
+META_HASH          = "video_meta_v3"
+THUMB_DIR          = "static/thumbnails"
+VIDEOS_PER_PAGE    = 40
+DASHBOARD_PER_PAGE = 100
+THUMB_WIDTH        = 640
+FF_QUALITY         = "80"

 os.makedirs(THUMB_DIR, exist_ok=True)

-# ───────── UTILS ───────── #
-def _hashed_thumb_path(video_id: str) -> str:
-    """
-    Static/thumbnails/ab/cd/<video_id>.webp
-    keeps any subdir under ~256 files.
-    """
+# ───────── DB HELPER ───────── #
+def db_get_videos():
+    """Return list of dicts exactly like the old parser produced."""
+    conn, cur = get_local_db_connection()
+    cur.execute("""
+        SELECT
+            video_id, username, site AS platform,
+            filepath, size_mb       AS size,
+            duration,
+            gender,
+            created_at,
+            updated_at
+        FROM videos
+    """)
+    rows = cur.fetchall()
+    # Convert psycopg rows → list[dict]
+    cols = [desc[0] for desc in cur.description]
+    videos = [dict(zip(cols, row)) for row in rows]
+    cur.close(); conn.close()
+    return videos
+
+# ───────── THUMB UTILS ───────── #
+def _hashed_thumb_path(video_id: str):
    h = hashlib.md5(video_id.encode()).hexdigest()
    sub1, sub2 = h[:2], h[2:4]
    path = os.path.join(THUMB_DIR, sub1, sub2)
@ -52,53 +63,49 @@ def _gen_thumb_cmd(src: str, dest: str):
    ]

 def generate_thumbnail(task):
-    """Run in threadpool. task = (video_path, dest_path)"""
    src, dest = task
-    if os.path.exists(dest):
-        return
-    subprocess.run(_gen_thumb_cmd(src, dest), stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
-
-def load_video_lists():
-    videos, data = [], []
-    for d in SCAN_DIRS: videos += get_all_videos(d)
-    for d in DATA_DIRS: data   += get_all_data(d)
-    parsed, _ = match_data_to_video_fast(videos, data)
-    return process_videos(parsed)
+    if not os.path.exists(dest):
+        subprocess.run(_gen_thumb_cmd(src, dest),
+                       stdout=subprocess.DEVNULL,
+                       stderr=subprocess.DEVNULL)

+# ───────── CACHE BUILDER ───────── #
 def build_cache():
-    parsed_videos = load_video_lists()
-    grouped       = group_videos(parsed_videos, sort_by="count", order="desc")
+    videos = db_get_videos()

-    storage_usage = {}
-    avg_sizes     = {}
-    video_map     = {}
+    # group by (username, platform)
+    grouped = {}
+    for v in videos:
+        key = (v["username"], v["platform"])
+        grouped.setdefault(key, []).append(v)

-    # Threaded thumb generation queue
-    thumb_tasks   = []
+    storage_usage, avg_sizes, video_map = {}, {}, {}
+    thumb_tasks = []

    for (username, platform), vids in grouped.items():
        key = f"{username}::{platform}"
-
-        total_gb   = sum(v["size"] for v in vids) / 1024
-        storage_usage[key] = {"total_size": total_gb, "video_count": len(vids)}
-        avg_sizes[key]     = total_gb / len(vids) if vids else 0
+        total_gb = sum(v["size"] for v in vids) / 1024
+        storage_usage[key] = {
+            "total_size": total_gb,
+            "video_count": len(vids)
+        }
+        avg_sizes[key] = total_gb / len(vids) if vids else 0

        for v in vids:
-            video_id   = os.path.basename(v["filepath"]).rsplit(".", 1)[0]
+            video_id   = v["video_id"]
            thumb_path = _hashed_thumb_path(video_id)

-            # Meta-cache (skip thumb regen if unchanged)
-            mtime  = os.path.getmtime(v["filepath"])
-            meta   = redis.hget(META_HASH, v["filepath"])
-            if not meta or json.loads(meta)["mtime"] != mtime:
+            # Meta-cache – use DB updated_at as mtime surrogate
+            meta = redis.hget(META_HASH, video_id)
+            if not meta or json.loads(meta)["updated_at"] != str(v["updated_at"]):
                thumb_tasks.append((v["filepath"], thumb_path))
-                redis.hset(META_HASH, v["filepath"],
-                           json.dumps({"mtime": mtime, "thumb": thumb_path}))
+                redis.hset(META_HASH, video_id,
+                           json.dumps({"updated_at": str(v["updated_at"]),
+                                       "thumb"     : thumb_path}))
            v["thumbnail"] = thumb_path

        video_map[key] = vids

-    # Smash thumbnails in parallel
    if thumb_tasks:
        with ThreadPoolExecutor(max_workers=os.cpu_count()*2) as exe:
            list(exe.map(generate_thumbnail, thumb_tasks))
@ -109,52 +116,49 @@ def build_cache():
        "storage_usage" : storage_usage,
        "avg_sizes"     : avg_sizes
    }
-    # Compress JSON → binary before Redis
-    redis.set(CACHE_KEY, zlib.compress(json.dumps(cache).encode()))
-    # also drop to disk in case Redis is wiped
+    blob = zlib.compress(json.dumps(cache).encode())
+    redis.set(CACHE_KEY, blob)
    with open("video_cache.json.gz", "wb") as f:
-        f.write(zlib.compress(json.dumps(cache).encode()))
+        f.write(blob)
    return cache

 def get_cached_data():
-    # try Redis first
    blob = redis.get(CACHE_KEY)
    if blob:
        return json.loads(zlib.decompress(blob).decode())

-    # fallback to disk
    if os.path.exists("video_cache.json.gz"):
        with open("video_cache.json.gz", "rb") as f:
            return json.loads(zlib.decompress(f.read()).decode())

-    # last resort full rebuild
    return build_cache()

-# ───────── ROUTES ───────── #
+# ───────── ROUTES (unchanged logic) ───────── #
@app.route("/")
 def dashboard():
    cache = get_cached_data()
-
-    # --- SEARCH ---
    query = request.args.get("q", "").lower().strip()
-    sorted_usage = sorted(cache["storage_usage"].items(), key=lambda x: x[1]["total_size"], reverse=True)

+    sorted_usage = sorted(
+        cache["storage_usage"].items(),
+        key=lambda x: x[1]["total_size"],
+        reverse=True
+    )
    if query:
-        sorted_usage = [entry for entry in sorted_usage if query in entry[0].lower()]
+        sorted_usage = [e for e in sorted_usage if query in e[0].lower()]

-    # --- PAGINATION ---
-    page = max(1, int(request.args.get("page", 1)))
+    page        = max(1, int(request.args.get("page", 1)))
    total_pages = max(1, math.ceil(len(sorted_usage) / DASHBOARD_PER_PAGE))
-    start = (page - 1) * DASHBOARD_PER_PAGE
-    paginated_usage = sorted_usage[start:start + DASHBOARD_PER_PAGE]
+    start       = (page - 1) * DASHBOARD_PER_PAGE
+    paginated   = sorted_usage[start:start + DASHBOARD_PER_PAGE]

    return render_template(
        "analytics.html",
-        storage_usage=paginated_usage,
-        avg_sizes=cache["avg_sizes"],
-        page=page,
-        total_pages=total_pages,
-        query=query
+        storage_usage = paginated,
+        avg_sizes     = cache["avg_sizes"],
+        page          = page,
+        total_pages   = total_pages,
+        query         = query
    )

@app.route("/refresh")
@ -170,28 +174,24 @@ def refresh():
 def user_page(username):
    cache = get_cached_data()

-    videos = [v | {"platform": key.split("::")[1]}
-              for key, vids in cache["videos"].items()
-              if key.split("::")[0] == username
-              for v in vids]
-    
-    # generate video_id for sorting
-    for v in videos:
-        v["video_id"] = os.path.basename(v["filepath"]).rsplit(".", 1)[0]
+    videos = [
+        v | {"platform": key.split("::")[1]}
+        for key, vids in cache["videos"].items()
+        if key.split("::")[0] == username
+        for v in vids
+    ]

-    # Pagination
-    page = max(1, int(request.args.get("page", 1)))
-    per_page = VIDEOS_PER_PAGE
-    total_pages = max(1, math.ceil(len(videos) / per_page))
-    start = (page - 1) * per_page
-    paginated_videos = videos[start:start + per_page]
+    page        = max(1, int(request.args.get("page", 1)))
+    total_pages = max(1, math.ceil(len(videos) / VIDEOS_PER_PAGE))
+    start       = (page - 1) * VIDEOS_PER_PAGE
+    paginated   = videos[start:start + VIDEOS_PER_PAGE]

    return render_template(
        "user_page.html",
-        username=username,
-        videos=paginated_videos,
-        page=page,
-        total_pages=total_pages
+        username    = username,
+        videos      = paginated,
+        page        = page,
+        total_pages = total_pages
    )

@app.route("/video/stream/<video_id>")
@ -199,8 +199,7 @@ def stream_video(video_id):
    cache = get_cached_data()
    for vids in cache["videos"].values():
        for v in vids:
-            vid_id = os.path.basename(v["filepath"]).rsplit(".", 1)[0]
-            if vid_id == video_id:
+            if v["video_id"] == video_id:
                return send_file(v["filepath"], mimetype="video/mp4")
    return "Video not found", 404

@ -209,11 +208,9 @@ def view_video(video_id):
    cache = get_cached_data()
    for vids in cache["videos"].values():
        for v in vids:
-            vid_id = os.path.splitext(os.path.basename(v["filepath"]))[0]
-            if vid_id == video_id:
-                v["video_id"] = vid_id  # ✅ precompute safe ID
+            if v["video_id"] == video_id:
                return render_template("video_view.html", video=v)
    return "Video not found", 404

 if __name__ == "__main__":
-    app.run(debug=True)
+    app.run(debug=True)