working version of web app

3 months ago · 40f8ce8e2c
parent 29938c3cbd
commit 40f8ce8e2c
4 changed files with 177 additions and 123 deletions
--- a/app.py
+++ b/app.py
@ -1,131 +1,178 @@
-from flask import Flask, render_template, request, redirect, url_for
-from funcs import process_videos, group_videos, match_data_to_video_fast, get_all_videos, get_all_data
+# app.py  –  Optimised AF
+from flask import Flask, render_template, request, redirect, url_for, jsonify
+from funcs import (
+    process_videos, group_videos, match_data_to_video_fast,
+    get_all_videos, get_all_data
+)
 from config import connect_redis
-import json, os, time, math, subprocess
-from tqdm import tqdm
+from concurrent.futures import ThreadPoolExecutor
+import hashlib, json, math, os, subprocess, time, zlib

-# -------------------- CONFIG -------------------- #
+# ───────── CONFIG ───────── #
 app = Flask(__name__)
 redis = connect_redis()

-CACHE_KEY = "video_cache"
+CACHE_KEY        = "video_cache_v2"        # bump key so we don’t fight old data
+META_HASH        = "video_meta_v2"         # per-file meta cache
 THUMB_DIR        = "static/thumbnails"
 VIDEOS_PER_PAGE  = 20
+THUMB_WIDTH      = 320                     # px
+FF_QUALITY       = "80"                    # 0-100 for WebP

 SCAN_DIRS = [
-    "E:/streamaster/downloaded/",
-    "U:/encoded",
-    "U:/count_sorted"
+    r"E:/streamaster/downloaded/",
+    r"U:/encoded",
+    r"U:/count_sorted"
 ]
-
 DATA_DIRS = [
-    "E:/streamaster/data",
-    "E:/streamaster/downloaded",
+    r"E:/streamaster/data",
+    r"E:/streamaster/downloaded",
 ]

 os.makedirs(THUMB_DIR, exist_ok=True)

-# -------------------- UTILS -------------------- # 
-def generate_thumbnail(video_path, thumb_path):
-    if os.path.exists(thumb_path):
-        return
-    cmd = [
-        "ffmpeg", "-y", "-i", video_path, "-ss", "00:00:05.000",
-        "-vframes", "1", thumb_path
+# ───────── UTILS ───────── #
+def _hashed_thumb_path(video_id: str) -> str:
+    """
+    Static/thumbnails/ab/cd/<video_id>.webp
+    keeps any subdir under ~256 files.
+    """
+    h = hashlib.md5(video_id.encode()).hexdigest()
+    sub1, sub2 = h[:2], h[2:4]
+    path = os.path.join(THUMB_DIR, sub1, sub2)
+    os.makedirs(path, exist_ok=True)
+    return os.path.join(path, f"{video_id}.webp")
+
+def _gen_thumb_cmd(src: str, dest: str):
+    return [
+        "ffmpeg", "-y", "-loglevel", "error",
+        "-ss", "0", "-i", src,
+        "-vframes", "1",
+        "-vf", f"thumbnail,scale={THUMB_WIDTH}:-1",
+        "-q:v", FF_QUALITY,
+        dest
    ]
-    subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

-def load_video_data():
-    videos = []
-    for d in SCAN_DIRS:
-        videos += get_all_videos(d)
+def generate_thumbnail(task):
+    """Run in threadpool. task = (video_path, dest_path)"""
+    src, dest = task
+    if os.path.exists(dest):
+        return
+    subprocess.run(_gen_thumb_cmd(src, dest), stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

-    data = []
-    for d in DATA_DIRS:
-        data += get_all_data(d)
+def load_video_lists():
+    videos, data = [], []
+    for d in SCAN_DIRS: videos += get_all_videos(d)
+    for d in DATA_DIRS: data   += get_all_data(d)
+    parsed, _ = match_data_to_video_fast(videos, data)
+    return process_videos(parsed)

-    parsed_videos, unmatched = match_data_to_video_fast(videos, data)
-    parsed_videos = process_videos(parsed_videos)
-    video_data = group_videos(parsed_videos, sort_by="count", order="desc")
-    return video_data
+def build_cache():
+    parsed_videos = load_video_lists()
+    grouped       = group_videos(parsed_videos, sort_by="count", order="desc")

-def compute_analytics(video_data):
    storage_usage = {}
    avg_sizes     = {}
    video_map     = {}

-    for (username, platform), vids in video_data.items():
-        total_size_gb = sum(v['size'] for v in vids) / 1024
-        avg_size_gb = (total_size_gb / len(vids)) if vids else 0
+    # Threaded thumb generation queue
+    thumb_tasks   = []
+
+    for (username, platform), vids in grouped.items():
        key = f"{username}::{platform}"
-        storage_usage[key] = {
-            "total_size": total_size_gb,
-            "video_count": len(vids)
-        }
-        avg_sizes[key] = avg_size_gb

-        with tqdm(vids, desc=f"Generating thumbnails for {username} ({platform})") as pbar:
+        total_gb   = sum(v["size"] for v in vids) / 1024
+        storage_usage[key] = {"total_size": total_gb, "video_count": len(vids)}
+        avg_sizes[key]     = total_gb / len(vids) if vids else 0
+
        for v in vids:
-                pbar.update(1)
-                video_id = os.path.basename(v['filepath']).split('.')[0]
-                thumb_path = os.path.join(THUMB_DIR, f"{video_id}.jpg")
-                generate_thumbnail(v['filepath'], thumb_path)
-                v['thumbnail'] = thumb_path
+            video_id   = os.path.basename(v["filepath"]).rsplit(".", 1)[0]
+            thumb_path = _hashed_thumb_path(video_id)
+
+            # Meta-cache (skip thumb regen if unchanged)
+            mtime  = os.path.getmtime(v["filepath"])
+            meta   = redis.hget(META_HASH, v["filepath"])
+            if not meta or json.loads(meta)["mtime"] != mtime:
+                thumb_tasks.append((v["filepath"], thumb_path))
+                redis.hset(META_HASH, v["filepath"],
+                           json.dumps({"mtime": mtime, "thumb": thumb_path}))
+            v["thumbnail"] = thumb_path
+
        video_map[key] = vids
-    return storage_usage, avg_sizes, video_map

-def refresh_data():
-    video_data = load_video_data()
-    storage_usage, avg_sizes, video_map = compute_analytics(video_data)
+    # Smash thumbnails in parallel
+    if thumb_tasks:
+        with ThreadPoolExecutor(max_workers=os.cpu_count()*2) as exe:
+            list(exe.map(generate_thumbnail, thumb_tasks))
+
    cache = {
-        "timestamp": time.time(),
-        "videos": video_map,
-        "storage_usage": storage_usage,
-        "avg_sizes": avg_sizes
+        "timestamp"     : time.time(),
+        "videos"        : video_map,
+        "storage_usage" : storage_usage,
+        "avg_sizes"     : avg_sizes
    }
-    redis.set(CACHE_KEY, json.dumps(cache))
+    # Compress JSON → binary before Redis
+    redis.set(CACHE_KEY, zlib.compress(json.dumps(cache).encode()))
+    # also drop to disk in case Redis is wiped
+    with open("video_cache.json.gz", "wb") as f:
+        f.write(zlib.compress(json.dumps(cache).encode()))
    return cache

 def get_cached_data():
-    try:
-        cached = redis.get(CACHE_KEY)
-        return json.loads(cached)  # ✅ Use cache if it exists
-    except Exception as e:
-        return refresh_data()      # ✅ Generate and store fresh data if empty
+    # try Redis first
+    blob = redis.get(CACHE_KEY)
+    if blob:
+        return json.loads(zlib.decompress(blob).decode())
+
+    # fallback to disk
+    if os.path.exists("video_cache.json.gz"):
+        with open("video_cache.json.gz", "rb") as f:
+            return json.loads(zlib.decompress(f.read()).decode())

-# -------------------- ROUTES -------------------- #
+    # last resort full rebuild
+    return build_cache()
+
+# ───────── ROUTES ───────── #
@app.route("/")
 def dashboard():
    cache = get_cached_data()
-    sorted_usage = sorted(cache["storage_usage"].items(), key=lambda x: x[1]["total_size"], reverse=True)
-    return render_template("analytics.html", storage_usage=sorted_usage, avg_sizes=cache["avg_sizes"])
+    sorted_usage = sorted(
+        cache["storage_usage"].items(),
+        key=lambda x: x[1]["total_size"],
+        reverse=True
+    )
+    return render_template(
+        "analytics.html",
+        storage_usage=sorted_usage,
+        avg_sizes=cache["avg_sizes"]
+    )

@app.route("/refresh")
 def refresh():
-    refresh_data()
-    return redirect(url_for("dashboard"))
+    cache = build_cache()
+    return jsonify({
+        "status"  : "ok",
+        "videos"  : sum(x["video_count"] for x in cache["storage_usage"].values()),
+        "updated" : time.ctime(cache["timestamp"])
+    })

@app.route("/user/<username>")
 def user_page(username):
    cache  = get_cached_data()
-    videos = []
-    for key, vid_list in cache["videos"].items():
-        user, platform = key.split("::")
-        if user == username:
-            for v in vid_list:
-                v['platform'] = platform
-            videos.extend(vid_list)
-
-    page = int(request.args.get("page", 1))
-    total_pages = math.ceil(len(videos) / VIDEOS_PER_PAGE)
-    start = (page - 1) * VIDEOS_PER_PAGE
-    paginated = videos[start:start + VIDEOS_PER_PAGE]
+    videos = [v | {"platform": key.split("::")[1]}
+              for key, vids in cache["videos"].items()
+              if key.split("::")[0] == username
+              for v in vids]

-    return render_template("user_page.html",
+    page        = max(1, int(request.args.get("page", 1)))
+    total_pages = max(1, math.ceil(len(videos) / VIDEOS_PER_PAGE))
+    start       = (page - 1) * VIDEOS_PER_PAGE
+    return render_template(
+        "user_page.html",
        username=username,
-                           videos=paginated,
-                           page=page,
-                           total_pages=total_pages)
+        videos=videos[start:start + VIDEOS_PER_PAGE],
+        page=page, total_pages=total_pages
+    )

 if __name__ == "__main__":
    app.run(debug=True)
--- a/config.py
+++ b/config.py
@ -1,24 +1,10 @@
 from redis import Redis
-import json

 redisCred = {"host": "192.168.0.27", "port": 30036, "password": "bignigga123"}

-
-def redis_gen_connection():
-    return Redis(host=redisCred["host"], port=redisCred["port"], password=redisCred["password"])
-
 def connect_redis():
-    REDIS_HOST = "192.168.0.27"
-    REDIS_PORT = 30036
-    REDIS_PASSWORD = "bignigga123"
-
    try:
-        client = Redis(
-            host=REDIS_HOST,
-            port=REDIS_PORT,
-            password=REDIS_PASSWORD,
-            decode_responses=True
-        )
+        client = Redis(host=redisCred["host"], port=redisCred["port"], password=redisCred["password"])

        response = client.ping()
        if response:
@ -30,18 +16,3 @@ def connect_redis():
    except Exception as e:
        print(f"An error occurred: {e}")
        return None
-
-def get_streamer_data(username):
-    try:
-        redis_client = redis_gen_connection()
-        streamer_data = redis_client.hget("streamers", username)
-
-        if streamer_data is None:
-            return None
-
-        streamer_data = json.loads(streamer_data)
-        return streamer_data
-    except Exception as e:
-        print(f"Unexpected error: {e}")
-
-    return None
--- a/organize_thumbnails.py
+++ b/organize_thumbnails.py
@ -0,0 +1,36 @@
+# organize_thumbnails.py (fixed)
+import os
+import hashlib
+import shutil
+
+OLD_THUMB_DIR = "static/thumbnails"
+HASHED_DIR = "static/thumbnails_hashed"
+
+def hashed_path(video_id: str) -> str:
+    """Return hashed path based on video ID (no extension)."""
+    h = hashlib.md5(video_id.encode()).hexdigest()
+    sub1, sub2 = h[:2], h[2:4]
+    return os.path.join(HASHED_DIR, sub1, sub2, f"{video_id}.webp")
+
+def organize_thumbnails():
+    os.makedirs(HASHED_DIR, exist_ok=True)
+    moved_count = 0
+
+    for root, _, files in os.walk(OLD_THUMB_DIR):
+        for file in files:
+            video_id = os.path.splitext(file)[0]  # strip extension
+            src_path = os.path.join(root, file)
+            dest_path = hashed_path(video_id)
+
+            os.makedirs(os.path.dirname(dest_path), exist_ok=True)
+
+            if not os.path.exists(dest_path):
+                shutil.move(src_path, dest_path)
+                moved_count += 1
+            else:
+                print(f"[SKIP] Exists: {dest_path}")
+
+    print(f"\n✅ Done! Organized {moved_count} thumbnails into hashed structure.")
+
+if __name__ == "__main__":
+    organize_thumbnails()
--- a/video_cache.json.gz
+++ b/video_cache.json.gz