major cleanup and fixes

1 month ago · 00aa9a9bf1
parent fd62cb5a30
commit 00aa9a9bf1
7 changed files with 456 additions and 261 deletions
--- a/.gitignore
+++ b/.gitignore
@ -183,3 +183,5 @@ cython_debug/
 /static/thumbnails
 /.temp
 .last_checked
+/concated
+/edited
--- a/checkfailedmoved.py
+++ b/checkfailedmoved.py
@ -0,0 +1,43 @@
+import os
+from config import get_local_db_connection
+from tqdm import tqdm
+import shutil
+
+RECORDER_DIR = 'E:/streamaster/streamaster/downloaded/'
+ARCHIVE_DIR = 'U:/streamaster/streams/'
+
+
+def get_all_video_files():
+    files = {}
+    for root, _, filenames in os.walk(RECORDER_DIR):
+        for filename in filenames:
+            if filename.endswith(".mp4"):
+                video_id = filename.split(".")[0]
+                files[video_id] = os.path.join(root, filename)
+    return files
+
+if __name__ == '__main__':
+    conn, cursor = get_local_db_connection()
+    
+    downloaded_videos = get_all_video_files()
+    
+    # for every video in downloaded_videos, re-create the folder it would have been in, int he archive directory
+    # if it exists there already, check if its corrupted. if so, then move the one in the downloaded to the archive dir
+    # because it failed during moving in the other script
+    
+    
+    for video_id in tqdm(downloaded_videos.keys(), desc="Checking for failed videos..."):
+        video_path = downloaded_videos[video_id]
+
+        try:
+            cursor.execute("SELECT username FROM videos WHERE video_id = %s", (video_id,))
+            username = cursor.fetchone()['username']
+        except:
+            print(f"Video {video_id} does not exist in the database")
+            continue
+        
+        archive_path = os.path.join(ARCHIVE_DIR, username, video_path.replace(RECORDER_DIR, ''))
+        
+        if os.path.exists(archive_path):
+            print(f"Video {video_id} already exists in the archive directory")
+            shutil.move(video_path, archive_path)
--- a/cleanup.py
+++ b/cleanup.py
@ -4,7 +4,6 @@ from funcs import get_duration, get_file_size_in_mb, calculate_file_hash
 from tqdm import tqdm

 import os, hashlib, subprocess, shutil
-from config import get_local_db_connection
 from concurrent.futures import ThreadPoolExecutor

 EDITED_DIR = "edited/"
@ -12,9 +11,13 @@ THUMB_DIR          = "static/thumbnails"
 THUMB_WIDTH        = 640
 FF_QUALITY         = "80"

+RECORDER_DIR = 'E:/streamaster/streamaster/downloaded/'
+ARCHIVE_DIR = 'U:/streamaster/streams/'
+CONCATED_DIR = 'concated/'
+
 VIDEO_DIRS = [
-    "U:/streamaster/",
-    "E:/streamaster/streamaster/downloaded"
+    RECORDER_DIR,
+    ARCHIVE_DIR
 ]

 def get_all_video_files():
@ -23,11 +26,12 @@ def get_all_video_files():
        for root, _, filenames in os.walk(base):
            for filename in filenames:
                if filename.endswith(".mp4"):
-                    files[filename] = os.path.join(root, filename)
+                    video_id = filename.split(".")[0]
+                    files[video_id] = os.path.join(root, filename)
    return files

-def find_video_path(filename: str):
-    return all_videos[filename] if filename in all_videos else None
+def find_video_path(video_id: str):
+    return all_videos[video_id] if video_id in all_videos else None

 def mark_missing_videos(cursor, conn):
    cursor.execute("SELECT video_id, filepath FROM videos WHERE status != 'missing'")
@ -35,57 +39,47 @@ def mark_missing_videos(cursor, conn):

    with tqdm(videos, desc="Scanning for missing videos...") as pbar:
        for vid in videos:
-            pbar.update(1)
-            video_id, filepath = vid.values()
-            if not filepath:
-                filename = f'{video_id}.mp4'
-            else:
-                filename = os.path.basename(filepath)
-            if not find_video_path(filename):
-                print(f"🚫 Missing: {filename}")
+            video_id, filepath = vid['video_id'], vid['filepath']
+            if not find_video_path(video_id):
+                print(f"🚫 Missing: {video_id}")
                cursor.execute("UPDATE videos SET status = 'missing' WHERE video_id = %s", (video_id,))
                conn.commit()
+            pbar.update(1)

-def find_missing_videos(cursor, conn):
+def update_video_paths(cursor, conn):
    cursor.execute("SELECT id, filepath, status, video_id FROM videos")
    videos = cursor.fetchall()

    with tqdm(videos, desc="Updating filepaths...") as pbar:
        for vid in videos:
-            pbar.update(1)
-
            filepath = vid['filepath']
-            if not filepath:
-                filename = f'{vid["video_id"]}.mp4'
-            else:
-                filename = os.path.basename(filepath)
-    
            status = vid['status']
            
-            path = find_video_path(filename)
+            path = find_video_path(vid['video_id'])
            
            if not path:
                continue
            
            path = path.replace("\\", "/")
-            if path == filepath and status != 'missing':
+            if path == filepath and status != 'missing': # change this
                continue
            
            cursor.execute("UPDATE videos SET filepath = %s, status = 'active' WHERE id = %s", (path, vid['id']))
            conn.commit()
-
+            pbar.update(1)
+            
 def fill_missing_hashes(cursor, conn):
    cursor.execute("SELECT video_id, filepath FROM videos WHERE (hash IS NULL OR hash = '') AND status != 'missing'")
    videos = cursor.fetchall()

    with tqdm(videos, desc="Updating hashes...") as pbar:
        for vid in videos:
-            pbar.update(1)
            video_id, filepath = vid.values()
            if filepath and os.path.exists(filepath):
                h = calculate_file_hash(filepath)
                cursor.execute("UPDATE videos SET hash = %s WHERE video_id = %s", (h, video_id))
                conn.commit()
+            pbar.update(1)

 def fill_missing_sizes(cursor, conn):
    cursor.execute("SELECT video_id, filepath FROM videos WHERE size = 0 AND status != 'missing'")
@ -93,12 +87,12 @@ def fill_missing_sizes(cursor, conn):

    with tqdm(videos, desc="Updating sizes...") as pbar:
        for vid in videos:
-            pbar.update(1)
-            video_id, filepath = vid.values()
+            video_id, filepath = vid['video_id'], vid['filepath']
            if filepath and os.path.exists(filepath):
                size = get_file_size_in_mb(filepath)
                cursor.execute("UPDATE videos SET size = %s WHERE video_id = %s", (size, video_id))
                conn.commit()
+            pbar.update(1)

 def fill_missing_durations(cursor, conn):
    cursor.execute("SELECT video_id, filepath FROM videos WHERE duration = 0 AND status != 'missing' ORDER BY size ASC")
@ -106,7 +100,6 @@ def fill_missing_durations(cursor, conn):

    with tqdm(videos, desc="Updating durations...") as pbar:
        for vid in videos:
-            pbar.update(1)
            video_id, filepath = vid.values()
            if filepath and os.path.exists(filepath):
                duration = get_duration(filepath)
@ -116,48 +109,63 @@ def fill_missing_durations(cursor, conn):
                    continue
                cursor.execute("UPDATE videos SET duration = %s WHERE video_id = %s", (duration, video_id))
                conn.commit()
+            pbar.update(1)

-def fill_missing_gender(cursor, conn):
+def map_gender(gender):
+    genders = {
+        'woman': 'Female',
+        'couple': 'Couple',
+        'trans': 'Trans',
+        'a man': 'Male'
+    }
+
+    for g in genders:
+        if g in gender:
+            return genders[g]
+    print(f"🚫 Failed to map gender: {gender}")
+    return None
+
+def fill_missing_gender(cursor, conn):   
    def get_data(username):
        import requests
        url = f"https://chaturbate.com/api/biocontext/{username}"
-        data = requests.get(url)
-        data = data.json()
+        try:
+            data = requests.get(url)
+            data = data.json()
+            if 'status' in data:
+                if data['status'] == 401:
+                    return False
+        except:
+            return False
        return data
    
    cursor.execute("SELECT DISTINCT username, site FROM videos WHERE gender IS NULL AND status != 'missing'")
    videos = cursor.fetchall()
    
+    api_fetches = 10
    with tqdm(videos, desc="Updating genders...") as pbar:
        for vid in videos:
-            pbar.update(1)
            username, site = vid.values()
-            # try to fetch an item from videos table with the same username and site but with a non-null gender
            cursor.execute("SELECT gender FROM videos WHERE username = %s AND site = %s AND gender IS NOT NULL LIMIT 1", (username, site))
            gender = cursor.fetchone()
-            if not gender:
+            if gender:
+                gender_str = gender['gender']
+            else:
+                if api_fetches <= 0:
+                    continue
                data = get_data(username)
+                api_fetches -= 1
                if not data:
                    continue
-                if 'status' in data:
-                    if data['status'] == 401:
-                        continue
-                gender = data['sex']
-                if 'woman' in gender:
-                    gender_str = 'Female'
-                elif 'couple' in gender:
-                    gender_str = 'Couple'
-                elif 'trans' in gender:
-                    gender_str = 'Trans'
-                else:
-                    print(f"fuck?: {gender}")
+                gender = map_gender(data['sex'])
+                if not gender:
                    continue
-            else:
-                gender_str = gender['gender']
-                
+                gender_str = gender
+
            cursor.execute("UPDATE videos SET gender = %s WHERE username = %s AND site = %s", (gender_str, username, site))
            conn.commit()
            print(f"[{cursor.rowcount}] ✅ Updated gender for {username} on {site}")
+            pbar.update(1)

 def generate_thumbnails_for_videos(cursor, conn):
    cursor.execute("SELECT video_id, filepath FROM videos WHERE status != 'missing' AND thumbnail IS NULL")
@ -166,7 +174,6 @@ def generate_thumbnails_for_videos(cursor, conn):
    tasks = []
    with tqdm(videos, desc="Generating thumbnails...") as pbar:
        for v in videos:
-            pbar.update(1)
            video_id   = v.get("video_id")
            filepath = v.get("filepath")

@ -181,6 +188,7 @@ def generate_thumbnails_for_videos(cursor, conn):
                tasks.append((filepath, thumb_path))

            v["thumbnail"] = thumb_path
+            pbar.update(1)

    if tasks:
        with ThreadPoolExecutor(max_workers=os.cpu_count() * 2) as exe:
@ -217,40 +225,68 @@ def _gen_thumb_cmd(src: str, dest: str):
 def move_edited_videos(cursor, conn):
    edited_videos = [f for f in os.listdir(EDITED_DIR) if os.path.isfile(os.path.join(EDITED_DIR, f)) and f.endswith(".mp4")]
    
-    for filename in edited_videos:
-        edited_path = os.path.join(EDITED_DIR, filename)
-        video_id = filename.split(".")[0]
-        
-        cursor.execute("SELECT filepath FROM videos WHERE video_id = %s", (video_id,))
-        video = cursor.fetchone()
-        
-        if not video:
-            continue
-        
-        video_path = video['filepath']
-        if not os.path.exists(video_path):
-            continue
-
-        shutil.move(edited_path, video_path)
-        print(f"✅ Moved edited video {video_id} to {video_path}")
+    with tqdm(edited_videos, desc="Moving edited videos...") as pbar:
+        for filename in edited_videos:
+            edited_path = os.path.join(EDITED_DIR, filename)
+            video_id = filename.split(".")[0]
+            
+            cursor.execute("SELECT filepath, username FROM videos WHERE video_id = %s", (video_id,))
+            video = cursor.fetchone()
+            
+            if not video:
+                continue
+            
+            video_path = video['filepath']
+            if not os.path.exists(video_path):
+                video_path = os.path.join(ARCHIVE_DIR, video['username'], filename)
+                
+            file_size = get_file_size_in_mb(edited_path)
+            cursor.execute("UPDATE videos SET size = %s WHERE video_id = %s", (file_size, video_id))
+            conn.commit()
+
+            shutil.move(edited_path, video_path)
+            pbar.update(1)
+
+def move_concated_videos(cursor, conn):
+    concated_videos = [f for f in os.listdir(CONCATED_DIR) if os.path.isfile(os.path.join(CONCATED_DIR, f)) and f.endswith(".mp4")]
+    concated_videos = sorted(concated_videos, key=lambda f: os.path.getsize(os.path.join(CONCATED_DIR, f)))
+    
+    with tqdm(concated_videos, desc="Moving concated videos...") as pbar:
+        for filename in concated_videos:
+            edited_path = os.path.join(CONCATED_DIR, filename)
+            video_id = filename.split(".")[0]
+            
+            cursor.execute("SELECT filepath, username FROM videos WHERE video_id = %s", (video_id,))
+            video = cursor.fetchone()
+            
+            if not video:
+                continue
+            
+            video_path = video['filepath']
+            if not os.path.exists(video_path):
+                video_path = os.path.join(ARCHIVE_DIR, video['username'], filename)
+                
+            file_size = get_file_size_in_mb(edited_path)
+            cursor.execute("UPDATE videos SET size = %s, status = 'concated' WHERE video_id = %s", (file_size, video_id))
+            conn.commit()
+
+            shutil.move(edited_path, video_path)
+            pbar.update(1)

 if __name__ == '__main__':
    conn, cursor = get_local_db_connection()

-    print("🔍 Scanning for missing data...")
-
+    all_videos = get_all_video_files()
+    update_video_paths(cursor, conn)
+    mark_missing_videos(cursor, conn)
+    
    move_edited_videos(cursor, conn)
-
-    if True:
-        all_videos = get_all_video_files()
-        find_missing_videos(cursor, conn)
-        mark_missing_videos(cursor, conn)
-        
+    move_concated_videos(cursor, conn)
+    
    generate_thumbnails_for_videos(cursor, conn)
    fill_missing_sizes(cursor, conn)
    fill_missing_durations(cursor, conn)
-
-    # fill_missing_gender(cursor, conn)
+    fill_missing_gender(cursor, conn)
    # fill_missing_hashes(cursor, conn)

    cursor.close()
--- a/concat_helper.py
+++ b/concat_helper.py
@ -2,16 +2,14 @@ import subprocess
 import json
 import os
 import tempfile
+import shutil

-# --- helpers --------------------------------------------------------------- #
+from video_funcs import get_video_info, get_target_bitrate, get_target_resolution, get_fps

-def ffprobe_json(fp: str) -> dict:
-    """Return the full ffprobe-JSON for a media file."""
-    cmd = [
-        "ffprobe", "-v", "quiet", "-print_format", "json",
-        "-show_streams", "-show_format", fp
-    ]
-    return json.loads(subprocess.check_output(cmd, text=True))
+TEMP_DIR = ".temp"
+CONCATED_DIR = "concated"
+
+# --- helpers --------------------------------------------------------------- #

 def get_signature(fp: str) -> tuple:
    """
@ -19,6 +17,14 @@ def get_signature(fp: str) -> tuple:
        – video: codec, width, height, fps (as a float), pix_fmt, color_range
        – audio: codec, sample_rate, channels, channel_layout
    """
+    def ffprobe_json(fp: str) -> dict:
+        """Return the full ffprobe-JSON for a media file."""
+        cmd = [
+            "ffprobe", "-v", "quiet", "-print_format", "json",
+            "-show_streams", "-show_format", fp
+        ]
+        return json.loads(subprocess.check_output(cmd, text=True))
+    
    info = ffprobe_json(fp)
    v_stream = next(s for s in info["streams"] if s["codec_type"] == "video")
    a_stream = next((s for s in info["streams"] if s["codec_type"] == "audio"), None)
@ -45,6 +51,9 @@ def all_signatures_equal(videos):
    ref = get_signature(videos[0]["filepath"])
    return all(get_signature(v["filepath"]) == ref for v in videos[1:])

+
+# --- concat functions --------------------------------------------------------------- #
+
 def concat_copy(videos, out_path):
    """Lossless concat with the *concat demuxer* (-c copy)."""
    with tempfile.NamedTemporaryFile("w", suffix=".txt", delete=False) as f:
@ -77,6 +86,8 @@ def concat_copy(videos, out_path):

    if result.returncode != 0 or any(err in result.stderr for err in ffmpeg_errors):
        print("❌ FFmpeg concat failed or produced corrupted output.")
+        print("FFmpeg stderr:")
+        print(result.stderr)
        # Remove broken file if it exists
        if os.path.exists(out_path):
            os.remove(out_path)
@ -86,7 +97,29 @@ def concat_copy(videos, out_path):
    print("✅ FFmpeg concat completed successfully.")
    return True

+def concatenate_videos(videos_list, reencode_concate = False):
+    """
+    Concatenate pre-grouped videos, then re-encode them using AV1 (NVENC)
+    while forcing a unified resolution and frame rate on each input
+    before final concatenation in one ffmpeg command.
+    """
+    
+    if len(videos_list) <= 1:
+        return False
+
+    copy_concat = copy_concatenate_videos(videos_list)
+    
+    if copy_concat:
+        return copy_concat
+
+    if not reencode_concate:
+        return False
+    
+    return encode_concatenate_videos(videos_list)
+
 def copy_concatenate_videos(videos_list):
+    from concat_helper import all_signatures_equal, concat_copy
+    
    if not (len(videos_list) > 1 and all_signatures_equal(videos_list)):
        print("Streams are not compatible for lossless concat.")
        return False
@ -95,9 +128,9 @@ def copy_concatenate_videos(videos_list):
    print("All streams are compatible – attempting lossless concat …")
    main_video = videos_list[0]
    video_path = main_video["filepath"]
-    output_path = os.path.join("temp", os.path.basename(video_path))
+    output_path = os.path.join(TEMP_DIR, os.path.basename(video_path))

-    os.makedirs("concated", exist_ok=True)
+    os.makedirs(CONCATED_DIR, exist_ok=True)
    success = concat_copy(videos_list, output_path)

    if not success:
@ -109,6 +142,93 @@ def copy_concatenate_videos(videos_list):
        os.remove(v["filepath"])

    # move temp to concated folder
-    os.rename(output_path, os.path.join("concated", os.path.basename(video_path)))
+    os.rename(output_path, os.path.join(CONCATED_DIR, os.path.basename(video_path)))

    return main_video
+
+def encode_concatenate_videos(videos_list):
+    """Your existing function to encode and concatenate videos."""
+    main_video = videos_list[0]
+    video_path = main_video["filepath"]
+
+    os.makedirs(TEMP_DIR, exist_ok=True)
+    os.makedirs(CONCATED_DIR, exist_ok=True)
+    
+    temp_path = os.path.join(TEMP_DIR, os.path.basename(video_path))
+    output_path = os.path.join(CONCATED_DIR, os.path.basename(video_path))
+
+    video_info = get_video_info(videos_list[0]['filepath'])
+    current_bitrate = int(video_info.get('bitrate') or 0)
+
+    target_width, target_height = get_target_resolution(videos_list)
+    target_bitrate_kbps = get_target_bitrate(target_width, target_height)
+
+    # Clamp target bitrate to not exceed source
+    if current_bitrate > 0:
+        target_bitrate_kbps = min(target_bitrate_kbps, current_bitrate)
+
+    # Max bitrate shouldn't exceed source either
+    if current_bitrate > 0:
+        max_bitrate_kbps = min(int(1.5 * target_bitrate_kbps), current_bitrate)
+    else:
+        max_bitrate_kbps = int(1.5 * target_bitrate_kbps)
+
+
+    fps_float = get_fps(video_path) or video_info.get('fps') or 30.0
+    if fps_float <= 0:
+        fps_float = 30.0
+    keyframe_interval = int(fps_float)
+
+    print(f"Concatenating {len(videos_list)} videos into {temp_path}")
+    print(f"  Mode Resolution: {target_width}x{target_height}")
+    print(f"  Target Bitrate: {target_bitrate_kbps}k (max ~{max_bitrate_kbps}k)")
+    print(f"  Keyframe Interval: {keyframe_interval}")
+
+    cmd = ["ffmpeg", "-y"]  # Overwrite output if exists
+    for v in videos_list:
+        cmd.extend(["-i", v["filepath"]])
+
+    filter_statements = []
+    concat_streams = []
+    n = len(videos_list)
+
+    unified_fps = 30
+
+    for i in range(n):
+        filter_statements.append(
+            f"[{i}:v]fps={unified_fps},scale={target_width}:{target_height}[v{i}]"
+        )
+        concat_streams.append(f"[v{i}][{i}:a]")
+
+    # Example final: [v0][0:a][v1][1:a]concat=n=2:v=1:a=1[outv][outa]
+    concat_line = "".join(concat_streams) + f"concat=n={n}:v=1:a=1[outv][outa]"
+    filter_statements.append(concat_line)
+
+    filter_complex = ";".join(filter_statements)
+
+    cmd.extend([
+        "-filter_complex", filter_complex,
+        "-map", "[outv]",
+        "-map", "[outa]",
+        "-c:v", "av1_nvenc",
+        "-b:v", f"{target_bitrate_kbps}k",
+        "-maxrate", f"{max_bitrate_kbps}k",
+        "-bufsize", f"{max_bitrate_kbps}k",
+        "-preset", "p5",
+        "-g", str(keyframe_interval),
+        "-c:a", "aac",
+        "-b:a", "192k",
+        temp_path
+    ])
+
+    try:
+        subprocess.run(cmd, check=True)
+    except:
+        return False
+
+    for video in videos_list:
+        os.remove(video["filepath"])
+
+    shutil.move(temp_path, output_path)
+  
+    return main_video
--- a/concater.py
+++ b/concater.py
@ -1,17 +1,16 @@
-print("Importing modules...")
-
 from funcs import group_videos, group_for_concatenation_simple
-from video_funcs import concatenate_videos
+from concat_helper import concatenate_videos
 import os, config, shutil

 MOVE_FUCKED = False
-sort_type = {"size": lambda x: sum([video['size'] for video in x]),"count": lambda x: len(x)}    
+sort_type = {"size": lambda x: sum([video['size'] for video in x]),"count": lambda x: len(x)}

 def get_videos(cursor, username=None):
    if username:
-        cursor.execute("SELECT * FROM videos WHERE username = %s AND status != 'missing' ORDER BY created_at DESC", (username,))
-    else:
-        cursor.execute("SELECT * FROM videos WHERE status != 'missing' ORDER BY created_at DESC")
+        cursor.execute("SELECT * FROM videos WHERE username = %s AND status != 'missing';", (username,))
+        return cursor.fetchall()
+
+    cursor.execute("SELECT * FROM videos WHERE status != 'missing';")
    return cursor.fetchall()

 def organize_videos():
@ -28,7 +27,7 @@ def organize_videos():
    for user, videos in video_data.items():
        grouped_videos.extend(group_for_concatenation_simple(videos))
        
-    sorted_processed_videos = sorted(grouped_videos, key=sort_type["size"], reverse=True)
+    sorted_processed_videos = sorted(grouped_videos, key=sort_type["count"], reverse=True)
    
    # group the videos for concatenation
    for video_list in sorted_processed_videos:
@ -42,7 +41,7 @@ def organize_videos():
        print("\n"*2)
        print(100*"=")
        
-        main_video = concatenate_videos(video_list)
+        main_video = concatenate_videos(video_list, reencode_concate=True)
        
        if main_video:
            print(f"Processed {len(video_list)} input videos into {main_video["filepath"]} output video.")
--- a/superencoderav.py
+++ b/superencoderav.py
@ -90,8 +90,7 @@ def encode_video(filepath, output_path, target_bitrate):
            )
        )
        
-        print(f"  Finished encoding {os.path.basename(filepath)} to AV1 at {target_bitrate} kbps "
-              f"(maxrate={max_bitrate} kbps).")
+        print(f"  Finished encoding {os.path.basename(filepath)} to AV1 at {target_bitrate} kbps (maxrate={max_bitrate} kbps).")

        return True
    except ffmpeg.Error as e:
@ -119,7 +118,7 @@ def check_and_replace_if_smaller(original_path, temp_output_path):
        print(f"  Original: {size_original_mb:.2f} MB \n  Re-encoded: {size_processed_mb:.2f} MB.")
        print(100*"=")
        shutil.move(temp_output_path, original_path)
-        return True
+        return size_processed_mb

 def update_codec_db(video_id, codec):
    conn, cursor = config.get_local_db_connection()
@ -127,6 +126,12 @@ def update_codec_db(video_id, codec):
    conn.commit()
    conn.close()

+def update_file_size_db(video_id, size):
+    conn, cursor = config.get_local_db_connection()
+    cursor.execute("UPDATE videos SET size = %s WHERE id = %s", (size, video_id))
+    conn.commit()
+    conn.close()
+
 def smart_choice(cursor, small_mb=250):
    """
    Returns a list of candidate videos to encode, ordered by:
@ -167,7 +172,7 @@ def smart_choice(cursor, small_mb=250):
            ORDER BY
                total_mb DESC,          -- top streamers first
                small_first ASC,        -- small (< small_mb) first
-                size DESC,              -- then bigger files first inside each group
+                size ASC,              -- then bigger files first inside each group
                created_at DESC;        -- then newest
        """, (days, small_mb))
        return cursor.fetchall()
@ -211,26 +216,108 @@ def smart_choice(cursor, small_mb=250):
    """, (small_mb,))
    return cursor.fetchall()

+def smart_choice_by_count(cursor, small_mb=250):
+    """
+    Returns candidate videos ordered by:
+      1) time window: 7d, then 30d, then 90d, then fallback (any time)
+      2) streamer priority: COUNT(*) per (username, site) DESC within the window
+      3) small (< small_mb MB) first, then big
+      4) inside each group: size DESC, then created_at DESC
+    NOTE: 'size' is stored in MB.
+    """
+    def pick(days: int):
+        cursor.execute("""
+            WITH candidates AS (
+                SELECT v.*
+                FROM videos v
+                WHERE v.codec IS NULL
+                  AND v.status <> 'missing'
+                  AND v.filepath IS NOT NULL
+                  AND v.created_at >= NOW() - make_interval(days => %s)
+            ),
+            by_streamer AS (
+                SELECT username, site, COUNT(*) AS total_vids
+                FROM candidates
+                GROUP BY username, site
+            ),
+            ordered AS (
+                SELECT c.*,
+                       bs.total_vids,
+                       CASE WHEN c.size < %s THEN 0 ELSE 1 END AS small_first
+                FROM candidates c
+                JOIN by_streamer bs
+                  ON bs.username = c.username
+                 AND bs.site     = c.site
+            )
+            SELECT *
+            FROM ordered
+            ORDER BY
+                total_vids DESC,        -- most videos first
+                small_first ASC,        -- small (< small_mb) first
+                size DESC,              -- then larger files first within group
+                created_at DESC;        -- then newest
+        """, (days, small_mb))
+        return cursor.fetchall()
+
+    # Try windows: 7 → 30 → 90 days
+    for d in (7, 30, 90):
+        rows = pick(d)
+        if rows:
+            return rows
+
+    # Fallback: any time, same ordering
+    cursor.execute("""
+        WITH candidates AS (
+            SELECT v.*
+            FROM videos v
+            WHERE v.codec IS NULL
+              AND v.status <> 'missing'
+              AND v.filepath IS NOT NULL
+        ),
+        by_streamer AS (
+            SELECT username, site, COUNT(*) AS total_vids
+            FROM candidates
+            GROUP BY username, site
+        ),
+        ordered AS (
+            SELECT c.*,
+                   bs.total_vids,
+                   CASE WHEN c.size < %s THEN 0 ELSE 1 END AS small_first
+            FROM candidates c
+            JOIN by_streamer bs
+              ON bs.username = c.username
+             AND bs.site     = c.site
+        )
+        SELECT *
+        FROM ordered
+        ORDER BY
+            total_vids DESC,
+            small_first ASC,
+            size DESC,
+            created_at DESC;
+    """, (small_mb,))
+    return cursor.fetchall()
+
 def select_user_videos(username, cursor):
-    cursor.execute("SELECT * FROM videos WHERE username = %s AND status != 'missing' AND codec IS NULL ORDER BY size ASC", (username,))
+    if username == "all":
+        cursor.execute("SELECT * FROM videos WHERE status != 'missing' AND codec IS NULL ORDER BY size ASC")
+    else:
+        cursor.execute("SELECT * FROM videos WHERE username = %s AND status != 'missing' AND codec IS NULL ORDER BY size ASC", (username,))
    return cursor.fetchall()
    
 def reencode_videos_av1():
-    # get videos
    conn, cursor = config.get_local_db_connection()
-    # cursor.execute("SELECT * FROM videos WHERE codec IS NULL AND status != 'missing' AND filepath IS NOT NULL AND filepath NOT LIKE 'U:%' ORDER BY size ASC;")
-    # videos = cursor.fetchall()

    while True:
        username = input("Enter username: ")
        if username:
            videos = select_user_videos(username, cursor)
        else:
-            videos = smart_choice(cursor)
+            # videos = smart_choice(cursor)
+            videos = smart_choice_by_count(cursor)
+            
        with tqdm(videos, desc="Processing videos", unit="file") as pbar:
            for video in videos:
-                pbar.update(1)
-
                input_path = video['filepath']

                if not os.path.exists(input_path):
@ -243,6 +330,7 @@ def reencode_videos_av1():
                if file_size_in_mb < 1:
                    print("Video is too small. Skipping.")
                    os.remove(input_path)
+                    cursor.execute("UPDATE videos SET status = 'deleted' WHERE id = %s", (video['id'],))
                    continue
                
                # 2) Get current bitrate & resolution
@ -272,8 +360,13 @@ def reencode_videos_av1():
                    continue

                # 4) Compare file sizes and replace if smaller
-                if check_and_replace_if_smaller(input_path, output_path):
+                new_size = check_and_replace_if_smaller(input_path, output_path)
+                if new_size:
                    update_codec_db(video['id'], 'av1')
+                    update_file_size_db(video['id'], new_size)
+
+                pbar.update(1)
+

 if __name__ == "__main__":
    reencode_videos_av1()
--- a/video_funcs.py
+++ b/video_funcs.py
@ -1,9 +1,8 @@
 import ffmpeg
 import subprocess
 import json
-from collections import Counter
-import shutil
 import os
+from collections import Counter

 def is_av1(filepath):
    """Check if a video file is already AV1-encoded."""
@ -17,27 +16,6 @@ def is_av1(filepath):
        print(f"Error probing {filepath}: {e}")
    return False

-def get_video_info(filepath):
-    """
-    Returns (bitrate_in_kbps, (width, height)) for the specified video file.
-    If probing fails, returns (None, (None, None)).
-    """
-    try:
-        probe = ffmpeg.probe(filepath)
-        format_info = probe['format']
-        video_stream = next(
-            (stream for stream in probe['streams'] if stream['codec_type'] == 'video'),
-            None
-        )
-        if video_stream:
-            # Convert from bits/sec to kbps
-            bitrate_kbps = int(format_info['bit_rate']) // 1000
-            width = video_stream['width']
-            height = video_stream['height']
-            return bitrate_kbps, (width, height)
-    except ffmpeg.Error as e:
-        print(f"Error getting video info for {filepath}: {e}")
-    return None, (None, None)

 def get_fps(filepath):
    """Get the frames per second (FPS) of the input video using ffmpeg.probe."""
@ -52,44 +30,69 @@ def get_fps(filepath):
        print(f"Error getting FPS for {filepath}: {e}")
    return None

-def get_video_metadata(video_path):
-    """Minimal example to get width/height from FFprobe directly via subprocess."""
+def get_video_info(filepath):
+    """
+    Returns dict:
+      { 'width': int, 'height': int, 'bitrate': int, 'fps': float }
+    - bitrate is Kbps (rounded down)
+    - uses stream bit_rate, else format bit_rate, else computed
+    """
    cmd = [
-        "ffprobe", "-v", "error", "-select_streams", "v:0",
-        "-show_entries", "stream=width,height,bit_rate",
-        "-of", "json", video_path
+        "ffprobe","-v","error",
+        "-select_streams","v:0",
+        "-show_entries","stream=width,height,bit_rate,r_frame_rate",
+        "-show_entries","format=bit_rate,duration",
+        "-of","json", filepath
    ]
-    result = subprocess.run(cmd, capture_output=True, text=True)
-    if result.returncode != 0:
-        return {"width": 0, "height": 0, "bit_rate": 0}
+    r = subprocess.run(cmd, capture_output=True, text=True)
+    if r.returncode:
+        return {"width": 0, "height": 0, "bitrate": 0, "fps": 0.0}

    try:
-        data = json.loads(result.stdout)
-        streams = data.get("streams", [])
-        if not streams:
-            return {"width": 0, "height": 0, "bit_rate": 0}
-        stream = streams[0]
-        width = int(stream.get("width", 0))
-        height = int(stream.get("height", 0))
-        br = int(stream.get("bit_rate", 0))  # in bits per second
-        return {"width": width, "height": height, "bit_rate": br}
-    except json.JSONDecodeError:
-        return {"width": 0, "height": 0, "bit_rate": 0}
+        d = json.loads(r.stdout or "{}")
+        s = (d.get("streams") or [{}])[0]
+        f = d.get("format") or {}
+
+        width  = int(s.get("width") or 0)
+        height = int(s.get("height") or 0)
+
+        # fps (r_frame_rate like "30000/1001")
+        fps = 0.0
+        rfr = s.get("r_frame_rate")
+        if rfr and rfr != "0/0":
+            try:
+                num, den = rfr.split("/")
+                num = float(num); den = float(den)
+                fps = (num/den) if den else 0.0
+            except Exception:
+                pass
+
+        # bitrate in bps → prefer stream, fallback to format, else compute
+        br_bps = s.get("bit_rate") or f.get("bit_rate")
+        if not br_bps:
+            try:
+                dur = float(f.get("duration") or 0)
+                if dur > 0:
+                    br_bps = int(os.path.getsize(filepath) * 8 / dur)
+            except Exception:
+                br_bps = 0
+        br_kbps = int(int(br_bps or 0) / 1000)
+
+        return {"width": width, "height": height, "bitrate": br_kbps, "fps": fps}
+    except Exception:
+        return {"width": 0, "height": 0, "bitrate": 0, "fps": 0.0}
    
 def get_target_resolution(group):
    """Collect the most common resolution from the group's videos."""
    resolutions = []
    for v in group:
-        meta = get_video_metadata(v["filepath"])
-        width, height = meta["width"], meta["height"]
-        if width > 0 and height > 0:
-            resolutions.append((width, height))
-
+        info = get_video_info(v["filepath"])
+        w, h = info["width"], info["height"]
+        if w and h:
+            resolutions.append((w, h))
    if not resolutions:
        return (1280, 720)
-
-    counter = Counter(resolutions)
-    return counter.most_common(1)[0][0]  # (width, height)
+    return Counter(resolutions).most_common(1)[0][0]

 def get_target_bitrate(width, height):
    """Your existing function to choose a bitrate based on resolution."""
@ -97,105 +100,4 @@ def get_target_bitrate(width, height):
    for res, bitrate in resolutions.items():
        if width <= res[0] and height <= res[1]:
            return bitrate
-    return 2500
-
-def concatenate_videos(videos_list, reencode_concate = False):
-    """
-    Concatenate pre-grouped videos, then re-encode them using AV1 (NVENC)
-    while forcing a unified resolution and frame rate on each input
-    before final concatenation in one ffmpeg command.
-    """
-    
-    if len(videos_list) <= 1:
-        return False
-
-    from concat_helper import copy_concatenate_videos
-    copy_concat = copy_concatenate_videos(videos_list)
-    
-    if copy_concat:
-        return copy_concat
-
-    if not reencode_concate:
-        return False
-
-    main_video = videos_list[0]
-    video_path = main_video["filepath"]
-
-    os.makedirs("temp", exist_ok=True)
-    os.makedirs("concated", exist_ok=True)
-    
-    temp_path = os.path.join("temp", os.path.basename(video_path))
-    output_path = os.path.join("concated", os.path.basename(video_path))
-
-    current_bitrate, (width, height) = get_video_info(videos_list[0]['filepath'])
-
-    target_width, target_height = get_target_resolution(videos_list)
-    target_bitrate_kbps = get_target_bitrate(target_width, target_height)
-
-    if target_bitrate_kbps > current_bitrate:
-        target_bitrate_kbps = current_bitrate
-
-    max_bitrate_kbps = int(1.5 * target_bitrate_kbps)
-
-    fps_float = get_fps(video_path)
-    if fps_float is None or fps_float <= 0:
-        print(f"Could not determine FPS for {video_path}. Using default keyframe interval of 30.")
-        fps_float = 30.0
-    keyframe_interval = int(fps_float)
-
-    print(f"Concatenating {len(videos_list)} videos into {temp_path}")
-    print(f"  Mode Resolution: {target_width}x{target_height}")
-    print(f"  Target Bitrate: {target_bitrate_kbps}k (max ~{max_bitrate_kbps}k)")
-    print(f"  Keyframe Interval: {keyframe_interval}")
-
-    cmd = ["ffmpeg", "-y"]  # Overwrite output if exists
-    for v in videos_list:
-        cmd.extend(["-i", v["filepath"]])
-
-    filter_statements = []
-    concat_streams = []
-    n = len(videos_list)
-
-    unified_fps = 30
-
-    for i in range(n):
-        filter_statements.append(
-            f"[{i}:v]fps={unified_fps},scale={target_width}:{target_height}[v{i}]"
-        )
-        concat_streams.append(f"[v{i}][{i}:a]")
-
-    # Example final: [v0][0:a][v1][1:a]concat=n=2:v=1:a=1[outv][outa]
-    concat_line = "".join(concat_streams) + f"concat=n={n}:v=1:a=1[outv][outa]"
-    filter_statements.append(concat_line)
-
-    filter_complex = ";".join(filter_statements)
-
-    cmd.extend([
-        "-filter_complex", filter_complex,
-        "-map", "[outv]",
-        "-map", "[outa]",
-        "-c:v", "av1_nvenc",
-        "-b:v", f"{target_bitrate_kbps}k",
-        "-maxrate", f"{max_bitrate_kbps}k",
-        "-bufsize", f"{max_bitrate_kbps}k",
-        "-preset", "p5",
-        "-g", str(keyframe_interval),
-        "-c:a", "aac",
-        "-b:a", "192k",
-        temp_path
-    ])
-
-    try:
-        subprocess.run(cmd, check=True)
-    except:
-        return False
-
-    for video in videos_list:
-        os.remove(video["filepath"])
-
-    shutil.move(temp_path, output_path)
-  
-    return main_video
-
-def get_file_size_in_mb(file_path):
-    return os.path.getsize(file_path) / (1024 * 1024)
+    return 2500