cleanup

1 month ago · 1a3ee2e430
parent 4b1b7d08e3
commit 1a3ee2e430
4 changed files with 0 additions and 343 deletions
--- a/MP4Manager.py
+++ b/MP4Manager.py
@ -1,100 +0,0 @@
-from moviepy.editor import VideoFileClip, concatenate_videoclips
-import os, cv2
-
-def add_intro_to_video(input_video, intro_video='intro.mp4', output_video='output.mp4'):
-    clip_main = VideoFileClip(input_video)
-    
-    clip_intro = VideoFileClip(intro_video).resize(clip_main.size).set_fps(clip_main.fps)
-    
-    if clip_main.audio is not None and clip_intro.audio is None:
-        from moviepy.editor import AudioArrayClip
-        silent_audio = AudioArrayClip([[0] * int(clip_intro.duration * clip_main.audio.fps)], fps=clip_main.audio.fps)
-        clip_intro = clip_intro.set_audio(silent_audio)
-    
-    final_clip = concatenate_videoclips([clip_intro, clip_main])
-    
-    final_clip.write_videofile(output_video, codec='libx264')
-
-def get_duration(input_file):
-    if not os.path.isfile(input_file):
-        print('Input file does not exist')
-        return 0
-
-    try:
-        video = cv2.VideoCapture(input_file)
-        frames = video.get(cv2.CAP_PROP_FRAME_COUNT)
-        fps = video.get(cv2.CAP_PROP_FPS)
-        duration = frames / fps
-        video.release()
-
-        return int(duration)
-    except Exception as e:
-        print(e)
-        return 0
-
-def generate_thumbnails(input_file, filename):
-    output_folder = 'temp/'
-    if not os.path.isfile(input_file):
-        raise ValueError('Input file does not exist')
-    if not os.path.exists(output_folder):
-        os.makedirs(output_folder)
-
-    posterPath = os.path.join(output_folder, f'{filename}.jpg')
-    previewPath = os.path.join(output_folder, f'{filename}.mp4')
-    
-    clip = VideoFileClip(input_file)
-    duration = clip.duration
-
-    interval = duration / 11.0
-
-    start_time_first_clip = 0 * interval
-    try:
-        clip.save_frame(posterPath, t=start_time_first_clip)
-    except:
-        pass
-    
-    clips = []
-    for i in range(10):
-        start_time = i * interval
-        end_time = start_time + 1
-        clips.append(clip.subclip(start_time, end_time))
-    
-    final_clip = concatenate_videoclips(clips).resize(newsize=(384, 216)).without_audio()
-    final_clip.write_videofile(previewPath, fps=24, codec="libx264")
-
-    for subclip in clips:
-        subclip.close()
-
-    clip.close()
-    final_clip.close()
-    
-    return posterPath, previewPath
-
-def split_video(file_path, segment_size_gb=8):
-    import subprocess
-    
-    # Convert GB to bytes
-    segment_size_bytes = segment_size_gb * 1024 * 1024 * 1024
-    
-    # Get the total size of the video file
-    total_size_bytes = os.path.getsize(file_path)
-    
-    # Calculate the number of segments needed
-    num_segments = total_size_bytes // segment_size_bytes + 1
-
-    # Get the duration of the video file
-    duration = get_duration(file_path)
-    
-    # Calculate the duration of each segment
-    segment_duration = duration / num_segments
-    
-    # Generate output file pattern
-    file_name, file_extension = os.path.splitext(file_path)
-    output_pattern = f"{file_name}_segment_%03d{file_extension}"
-    
-    # Run FFmpeg command to split the video
-    command = [
-        "ffmpeg", "-i", file_path, "-c", "copy", "-map", "0", 
-        "-segment_time", str(segment_duration), "-f", "segment", output_pattern
-    ]
-    subprocess.run(command)
--- a/organize_data.py
+++ b/organize_data.py
@ -1,138 +0,0 @@
-from archiveConfig import get_local_db_connection
-from psycopg2.extras import execute_values
-from datetime import datetime
-import uuid, shutil, json, os
-from tqdm import tqdm
-
-DATA_DIR = 'data'
-DOWNLOAD_DIR = 'downloaded'
-
-conn, cursor = get_local_db_connection()
-
-def is_valid_uuid(val: str, version=None) -> bool:
-    try:
-        u = uuid.UUID(val, version=version) if version else uuid.UUID(val)
-        return str(u) == val.lower()  # Match exact input (handles casing)
-    except (ValueError, AttributeError, TypeError):
-        return False
-
-def parse_json_file(filepath):
-    with open(filepath, 'r', encoding='utf-8') as f:
-        data = json.load(f)
-    
-    if "createdAt" in data:
-        date = data.get("createdAt")
-    elif "date" in data:
-        date = data.get("date")
-
-    if date:
-        created_at = datetime.strptime(date, "%Y-%m-%d %H:%M:%S")
-    else:
-        created_at = None
-        print(f"⚠️ No createdAt or date found in {filepath}")
-    
-    if "updatedAt" in data:
-        updated_at = datetime.strptime(data.get("updatedAt"), "%Y-%m-%d %H:%M:%S")
-    else:
-        updated_at = created_at
-    
-    video_id = os.path.splitext(os.path.basename(filepath))[0]
-    if not is_valid_uuid(video_id):
-        print(f"⚠️ Invalid video_id: {video_id}")
-        return
-    
-    parsed_data = {
-        'video_id': video_id,
-        'username': data.get("username"),
-        'site': data.get("site"),
-        'gender': data.get("gender"),
-        'size': data.get("size") if data.get("size") else 0,
-        'duration': data.get("duration") if data.get("duration") else 0,
-        'filepath': data.get("filepath"),
-        'jsonpath': data.get("jsonpath"),
-        'hash': None,  # You can add hash calculation here if needed
-        'created_at': created_at,
-        'updated_at': updated_at
-    }
-    
-    return parsed_data
-
-def insert_data(all_data):
-    query = """
-        INSERT INTO videos (
-            video_id, username, site, gender, size, duration,
-            filepath, hash, created_at, updated_at
-        )
-        VALUES %s
-        ON CONFLICT (video_id) DO NOTHING;
-    """
-    values = [
-        (
-            d['video_id'], d['username'], d['site'], d['gender'],
-            d['size'], d['duration'], d['filepath'],
-            d['hash'], d['created_at'], d['updated_at']
-        )
-        for d in all_data
-    ]
-    execute_values(cursor, query, values)
-    conn.commit()
-    print(f"✅ Inserted {cursor.rowcount} new records.")
-
-def get_files(dir):
-    files = []
-    for root, _, filenames in os.walk(dir):
-        for filename in filenames:
-            if filename.endswith('.json'):
-                files.append(os.path.join(root, filename))
-    return files
-
-def main():
-    all_records = []
-
-    data_files = [f for f in get_files(DOWNLOAD_DIR) if f.endswith('.json')]
-
-    with tqdm(data_files, desc="Processing files", unit="file") as t:
-        for filepath in data_files:
-            t.update(1)
-            try:
-                record = parse_json_file(filepath)
-                all_records.append(record)
-            except Exception as e:
-                print(f"❌ Failed to process {filepath}: {e}")
-
-    if all_records:
-        insert_data(all_records)
-    else:
-        print("⚠️ No new records to insert.")
-
-def check_and_move():
-    db_ids = get_video_ids_from_db()
-    moved = 0
-
-    for path in get_json_files(DOWNLOAD_DIR):
-        video_id = os.path.splitext(os.path.basename(path))[0]
-        if video_id in db_ids:
-            output_path = os.path.join(DATA_DIR, os.path.basename(path))
-            if os.path.exists(output_path):
-                print(f"⚠️ Skipping {path} because it already exists in {DOWNLOAD_DIR}/")
-                continue
-            shutil.move(path, output_path)
-            moved += 1
-
-    print(f"✅ Moved {moved} files to {DOWNLOAD_DIR}/")
-
-# Get all existing video IDs
-def get_video_ids_from_db():
-    cursor.execute("SELECT video_id FROM videos;")
-    return {row['video_id'] for row in cursor.fetchall()}
-
-# Iterate files
-def get_json_files(dir):
-    for root, _, files in os.walk(dir):
-        for file in files:
-            if file.endswith('.json'):
-                yield os.path.join(root, file)  
-
-if __name__ == '__main__':
-    main()
-    check_and_move()
--- a/organize_thumbnails.py
+++ b/organize_thumbnails.py
@ -1,36 +0,0 @@
-# organize_thumbnails.py (fixed)
-import os
-import hashlib
-import shutil
-
-OLD_THUMB_DIR = "static/thumbnails"
-HASHED_DIR = "static/thumbnails_hashed"
-
-def hashed_path(video_id: str) -> str:
-    """Return hashed path based on video ID (no extension)."""
-    h = hashlib.md5(video_id.encode()).hexdigest()
-    sub1, sub2 = h[:2], h[2:4]
-    return os.path.join(HASHED_DIR, sub1, sub2, f"{video_id}.webp")
-
-def organize_thumbnails():
-    os.makedirs(HASHED_DIR, exist_ok=True)
-    moved_count = 0
-
-    for root, _, files in os.walk(OLD_THUMB_DIR):
-        for file in files:
-            video_id = os.path.splitext(file)[0]  # strip extension
-            src_path = os.path.join(root, file)
-            dest_path = hashed_path(video_id)
-
-            os.makedirs(os.path.dirname(dest_path), exist_ok=True)
-
-            if not os.path.exists(dest_path):
-                shutil.move(src_path, dest_path)
-                moved_count += 1
-            else:
-                print(f"[SKIP] Exists: {dest_path}")
-
-    print(f"\n✅ Done! Organized {moved_count} thumbnails into hashed structure.")
-
-if __name__ == "__main__":
-    organize_thumbnails()
--- a/scan_existing_av1.py
+++ b/scan_existing_av1.py
@ -1,69 +0,0 @@
-import os, shutil, config
-import ffmpeg
-from tqdm import tqdm
-
-def is_av1(filepath):
-    try:
-        probe = ffmpeg.probe(filepath)
-        for stream in probe['streams']:
-            if stream['codec_type'] == 'video' and 'codec_name' in stream:
-                if stream['codec_name'] == 'av1':
-                    return True
-    except ffmpeg.Error as e:
-        print(f"Error probing {filepath}: {e}")
-        return "Fucked"
-    return False
-
-def save_last_checked(filepath):
-    with open(".last_checked", "w") as f:
-        f.write(filepath)
-
-def get_last_checked():
-    if os.path.exists(".last_checked"):
-        with open(".last_checked", "r") as f:
-            return f.read().strip()
-    return None
-
-def init_list(videos):
-    last_checked = get_last_checked()
-    if last_checked:
-        for video in videos:
-            if os.path.basename(video['filepath']) == last_checked:
-                return videos[videos.index(video) + 1:]
-    return videos
-
-def reencode_videos_av1():
-    conn, cursor = config.get_local_db_connection()
-    cursor.execute("SELECT filepath, id, codec FROM videos WHERE status != 'missing' AND filepath IS NOT NULL ORDER BY size ASC;")
-    videos = cursor.fetchall()
-
-    os.makedirs("fucked", exist_ok=True)
-
-    videos = init_list(videos)
-
-    with tqdm(videos, desc="Checking videos", unit="file") as pbar:
-        for video in videos:
-            pbar.update(1)
-            
-            if pbar.n % 100 == 0:
-                save_last_checked(os.path.basename(video['filepath']))
-            
-            if video['codec'] == 'av1':
-                continue
-            
-            input_path = video['filepath']
-            isav1 = is_av1(input_path)
-            
-            if isav1 == "Fucked":
-                print(f"🚫 Error probing {input_path}")
-                shutil.move(input_path, "fucked/" + os.path.basename(input_path))
-                continue
-                
-            if isav1 == False:
-                continue
-            
-            cursor.execute("UPDATE videos SET codec = %s WHERE id = %s", ('av1', video['id']))
-            conn.commit()
-
-if __name__ == "__main__":
-    reencode_videos_av1()