cleanup and organied

1 month ago · fd62cb5a30
parent 2a7cd821c1
commit fd62cb5a30
5 changed files with 193 additions and 23 deletions
--- a/cleanup.py
+++ b/cleanup.py
@ -38,14 +38,15 @@ def mark_missing_videos(cursor, conn):
            pbar.update(1)
            video_id, filepath = vid.values()
            if not filepath:
-                continue
+                filename = f'{video_id}.mp4'
-            filename = os.path.basename(filepath)
+            else:
                filename = os.path.basename(filepath)
            if not find_video_path(filename):
                print(f"🚫 Missing: {filename}")
                cursor.execute("UPDATE videos SET status = 'missing' WHERE video_id = %s", (video_id,))
                conn.commit()
-def fill_missing_filepaths(cursor, conn):
+def find_missing_videos(cursor, conn):
    cursor.execute("SELECT id, filepath, status, video_id FROM videos")
    videos = cursor.fetchall()
@ -242,7 +243,7 @@ if __name__ == '__main__':
    if True:
        all_videos = get_all_video_files()
-        fill_missing_filepaths(cursor, conn)
+        find_missing_videos(cursor, conn)
        mark_missing_videos(cursor, conn)
    generate_thumbnails_for_videos(cursor, conn)
--- a/concat_helper.py
+++ b/concat_helper.py
@ -0,0 +1,114 @@
 import subprocess
 import json
 import os
 import tempfile
 # --- helpers --------------------------------------------------------------- #
 def ffprobe_json(fp: str) -> dict:
    """Return the full ffprobe-JSON for a media file."""
    cmd = [
        "ffprobe", "-v", "quiet", "-print_format", "json",
        "-show_streams", "-show_format", fp
    ]
    return json.loads(subprocess.check_output(cmd, text=True))
 def get_signature(fp: str) -> tuple:
    """
    A ‘signature’ is everything that has to match for a bit-perfect concat:
        – video: codec, width, height, fps (as a float), pix_fmt, color_range
        – audio: codec, sample_rate, channels, channel_layout
    """
    info = ffprobe_json(fp)
    v_stream = next(s for s in info["streams"] if s["codec_type"] == "video")
    a_stream = next((s for s in info["streams"] if s["codec_type"] == "audio"), None)
    def fps(stream):
        fr = stream.get("r_frame_rate", "0/0")
        num, den = map(int, fr.split("/"))
        return round(num / den, 3) if den else 0.0
    sig = (
        v_stream["codec_name"],
        int(v_stream["width"]), int(v_stream["height"]),
        fps(v_stream),
        v_stream.get("pix_fmt"),
        v_stream.get("color_range"),
        a_stream["codec_name"] if a_stream else None,
        int(a_stream["sample_rate"]) if a_stream else None,
        a_stream.get("channels") if a_stream else None,
        a_stream.get("channel_layout") if a_stream else None,
    )
    return sig
 def all_signatures_equal(videos):
    ref = get_signature(videos[0]["filepath"])
    return all(get_signature(v["filepath"]) == ref for v in videos[1:])
 def concat_copy(videos, out_path):
    """Lossless concat with the *concat demuxer* (-c copy)."""
    with tempfile.NamedTemporaryFile("w", suffix=".txt", delete=False) as f:
        for v in videos:
            f.write(f"file '{os.path.abspath(v['filepath']).replace('\'', '\\\'')}'\n")
        list_file = f.name
    cmd = [
        "ffmpeg", "-y",
        "-f", "concat", "-safe", "0",
        "-i", list_file,
        "-c", "copy",
        out_path,
    ]
    print("Running FFmpeg concat...")
    result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
    os.unlink(list_file)
    # Look for specific error patterns in FFmpeg's stderr
    ffmpeg_errors = [
        "corrupt input packet",
        "Invalid OBU",
        "Failed to parse temporal unit",
        "Packet corrupt",
        "partial file",
        "Non-monotonic DTS"
    ]
    if result.returncode != 0 or any(err in result.stderr for err in ffmpeg_errors):
        print("❌ FFmpeg concat failed or produced corrupted output.")
        # Remove broken file if it exists
        if os.path.exists(out_path):
            os.remove(out_path)
            print(f"🗑️ Removed corrupt output: {out_path}")
        return False
    print("✅ FFmpeg concat completed successfully.")
    return True
 def copy_concatenate_videos(videos_list):
    if not (len(videos_list) > 1 and all_signatures_equal(videos_list)):
        print("Streams are not compatible for lossless concat.")
        return False
    print("All streams are compatible – attempting lossless concat …")
    main_video = videos_list[0]
    video_path = main_video["filepath"]
    output_path = os.path.join("temp", os.path.basename(video_path))
    os.makedirs("concated", exist_ok=True)
    success = concat_copy(videos_list, output_path)
    if not success:
        print("Falling back to re-encoding due to concat failure.")
        return False
    # Remove originals
    for v in videos_list:
        os.remove(v["filepath"])
    # move temp to concated folder
    os.rename(output_path, os.path.join("concated", os.path.basename(video_path)))
    return main_video
--- a/concater.py
+++ b/concater.py
@ -32,9 +32,7 @@ def organize_videos():
    # group the videos for concatenation
    for video_list in sorted_processed_videos:
-        first_video = video_list[0]
+        video_id = video_list[0]['video_id']
        video_id = os.path.splitext(os.path.basename(first_video['filepath']))[0]
        videos_sum_size = sum([video['size'] for video in video_list])
@ -54,7 +52,7 @@ def organize_videos():
            print(f"Videos are fucked.")
            main_video = video_list[0]
-            video_name = main_video['videoID']
+            video_name = main_video['video_id']
            fucked_dir = os.path.join("concate_fucked", video_name)
            os.makedirs(fucked_dir, exist_ok=True)
--- a/funcs.py
+++ b/funcs.py
@ -1,5 +1,5 @@
 from datetime import datetime, timedelta, timezone
-from VideoManager import get_duration
+from video_manager import get_duration
 import os, json, subprocess, shutil
@ -34,14 +34,13 @@ def update_video_data(dataPath, data):
        if existing_data == data:
            return  # No update needed if data hasn't changed.
-        data["updatedAt"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        data["updated_at"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    with open(dataPath, "w") as f:
        json.dump(data, f)  # Write to file if new or if data has changed.
-def is_recent(updated_at_str, minutes=30):
+def is_recent(updated_at, minutes=30):
    updated_at = format_datetime(updated_at_str)
    updated_at = updated_at.replace(tzinfo=timezone.utc)
    now = datetime.now(timezone.utc)
    return now - updated_at < timedelta(minutes=minutes)
@ -104,7 +103,7 @@ def group_videos(video_list, sort_by="count", order="desc"):
    # Ensure videos for each user and site are sorted by creation date
    for key in video_data:
-        video_data[key].sort(key=lambda x: format_datetime(x["createdAt"]))
+        video_data[key].sort(key=lambda x: (x["created_at"]))
    # Further sort groups if required based on size or count
    if sort_by == "size":
@ -124,8 +123,8 @@ def process_videos(video_data):
        video_path = video["filepath"]
        data_path = video["jsonpath"]
-        if 'size' not in video:
+        filesize = get_file_size_in_mb(video_path)
-            filesize = get_file_size_in_mb(video_path)
+        if 'size' not in video or video['size'] != filesize:
            video['size'] = filesize
            is_updated = True
@ -135,8 +134,8 @@ def process_videos(video_data):
        # Move corrupted videos to the failed folder
        if video['duration'] == 0:
-            print(f"{video['videoID']} is corrupted, moving to failed folder")
+            print(f"{video['video_id']} is corrupted, moving to failed folder")
-            failed_video_path = os.path.join(failed_directory, video["videoID"] + ".mp4")
+            failed_video_path = os.path.join(failed_directory, video["video_id"] + ".mp4")
            failed_data_path = failed_video_path.replace(".mp4", ".json")
            shutil.move(video_path, failed_video_path)
@ -166,7 +165,7 @@ def group_for_concatenation(videos, time_limit=30):
    reference_params = None  # We'll store the 'ffprobe' params for the first video in each group
    for video in videos:
-        video_start = format_datetime(video['createdAt'])
+        video_start = (video['created_at'])
        video_end = video_start + timedelta(seconds=video['duration'])
        # Probe the video to get parameters
@ -218,7 +217,7 @@ def group_for_concatenation(videos, time_limit=30):
    if concatenated_video_groups:
        last_group = concatenated_video_groups[-1]
        last_video = last_group[-1]
-        last_updated_at = datetime.strptime(last_video['createdAt'], "%Y-%m-%d %H:%M:%S")
+        last_updated_at = datetime.strptime(last_video['created_at'], "%Y-%m-%d %H:%M:%S")
        if datetime.now() - last_updated_at <= timedelta(minutes=time_limit):
            print(f"Last group is not ready for upload. Removing from final groups.")
            concatenated_video_groups.pop()
@ -287,10 +286,10 @@ def get_video_params(video_path):
 def generate_list_file(videos):
    directory = os.path.dirname(videos[0]["filepath"])
-    list_filename = os.path.join(directory, f"{videos[0]['videoID']}.txt")
+    list_filename = os.path.join(directory, f"{videos[0]['video_id']}.txt")
    with open(list_filename, "w") as list_file:
        for video in videos:
-            list_file.write(f"file '{video['videoID']}.mp4'\n")
+            list_file.write(f"file '{video['video_id']}.mp4'\n")
    return list_filename
@ -398,4 +397,54 @@ def calculate_file_hash(file_path):
    import hashlib
    with open(file_path, 'rb') as f:
        data = f.read()
-    return hashlib.sha256(data).hexdigest()
+    return hashlib.sha256(data).hexdigest()
 def group_for_concatenation_simple(videos, time_limit=60):
    """
    Groups videos into lists where:
     - total group size <= 9GB (9216 MB),
     - time gap between consecutive videos <= time_limit minutes,
     - AND all have the same resolution/fps/codecs for no-reencode concat.
    """
    concatenated_video_groups = []
    current_group = []
    current_size_mb = 0
    last_video_end = None
    for video in videos:
        video_start = video['created_at']
        video_end = video_start + timedelta(seconds=video['duration'])
        if current_group:
            # Check if adding this video breaks the size limit
            time_difference = (video_start - last_video_end).total_seconds() / 60
            size_exceeded = (current_size_mb + video['size'] > 9216)
            time_exceeded = (time_difference > time_limit)
            # If we exceed size, exceed time gap, or mismatch in parameters => start new group
            if size_exceeded or time_exceeded:
                concatenated_video_groups.append(current_group)
                current_group = []
                current_size_mb = 0
        # Add the current video to the group
        current_group.append(video)
        current_size_mb += video['size']
        last_video_end = video_end
    # Add the last group if not empty
    if current_group:
        concatenated_video_groups.append(current_group)
    # Optional: Ensure the last group is "ready" for upload based on time difference
    if concatenated_video_groups:
        last_group = concatenated_video_groups[-1]
        last_video = last_group[-1]
        last_updated_at = last_video['created_at']
        if datetime.now() - last_updated_at <= timedelta(minutes=time_limit):
            print(f"Last group is not ready for upload. Removing from final groups.")
            concatenated_video_groups.pop()
    concatenated_video_groups = [group for group in concatenated_video_groups if len(group) > 1]
    return concatenated_video_groups
--- a/superencoderav.py
+++ b/superencoderav.py
@ -210,6 +210,10 @@ def smart_choice(cursor, small_mb=250):
            created_at DESC;
    """, (small_mb,))
    return cursor.fetchall()
 def select_user_videos(username, cursor):
    cursor.execute("SELECT * FROM videos WHERE username = %s AND status != 'missing' AND codec IS NULL ORDER BY size ASC", (username,))
    return cursor.fetchall()
 def reencode_videos_av1():
    # get videos
@ -218,7 +222,11 @@ def reencode_videos_av1():
    # videos = cursor.fetchall()
    while True:
-        videos = smart_choice(cursor)
+        username = input("Enter username: ")
        if username:
            videos = select_user_videos(username, cursor)
        else:
            videos = smart_choice(cursor)
        with tqdm(videos, desc="Processing videos", unit="file") as pbar:
            for video in videos:
                pbar.update(1)