cleanup

1 month ago · 1a3ee2e430
parent 4b1b7d08e3
commit 1a3ee2e430
4 changed files with 0 additions and 343 deletions
--- a/MP4Manager.py
+++ b/MP4Manager.py
@ -1,100 +0,0 @@
 from moviepy.editor import VideoFileClip, concatenate_videoclips
 import os, cv2
 def add_intro_to_video(input_video, intro_video='intro.mp4', output_video='output.mp4'):
    clip_main = VideoFileClip(input_video)
    clip_intro = VideoFileClip(intro_video).resize(clip_main.size).set_fps(clip_main.fps)
    if clip_main.audio is not None and clip_intro.audio is None:
        from moviepy.editor import AudioArrayClip
        silent_audio = AudioArrayClip([[0] * int(clip_intro.duration * clip_main.audio.fps)], fps=clip_main.audio.fps)
        clip_intro = clip_intro.set_audio(silent_audio)
    final_clip = concatenate_videoclips([clip_intro, clip_main])
    final_clip.write_videofile(output_video, codec='libx264')
 def get_duration(input_file):
    if not os.path.isfile(input_file):
        print('Input file does not exist')
        return 0
    try:
        video = cv2.VideoCapture(input_file)
        frames = video.get(cv2.CAP_PROP_FRAME_COUNT)
        fps = video.get(cv2.CAP_PROP_FPS)
        duration = frames / fps
        video.release()
        return int(duration)
    except Exception as e:
        print(e)
        return 0
 def generate_thumbnails(input_file, filename):
    output_folder = 'temp/'
    if not os.path.isfile(input_file):
        raise ValueError('Input file does not exist')
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    posterPath = os.path.join(output_folder, f'{filename}.jpg')
    previewPath = os.path.join(output_folder, f'{filename}.mp4')
    clip = VideoFileClip(input_file)
    duration = clip.duration
    interval = duration / 11.0
    start_time_first_clip = 0 * interval
    try:
        clip.save_frame(posterPath, t=start_time_first_clip)
    except:
        pass
    clips = []
    for i in range(10):
        start_time = i * interval
        end_time = start_time + 1
        clips.append(clip.subclip(start_time, end_time))
    final_clip = concatenate_videoclips(clips).resize(newsize=(384, 216)).without_audio()
    final_clip.write_videofile(previewPath, fps=24, codec="libx264")
    for subclip in clips:
        subclip.close()
    clip.close()
    final_clip.close()
    return posterPath, previewPath
 def split_video(file_path, segment_size_gb=8):
    import subprocess
    # Convert GB to bytes
    segment_size_bytes = segment_size_gb * 1024 * 1024 * 1024
    # Get the total size of the video file
    total_size_bytes = os.path.getsize(file_path)
    # Calculate the number of segments needed
    num_segments = total_size_bytes // segment_size_bytes + 1
    # Get the duration of the video file
    duration = get_duration(file_path)
    # Calculate the duration of each segment
    segment_duration = duration / num_segments
    # Generate output file pattern
    file_name, file_extension = os.path.splitext(file_path)
    output_pattern = f"{file_name}_segment_%03d{file_extension}"
    # Run FFmpeg command to split the video
    command = [
        "ffmpeg", "-i", file_path, "-c", "copy", "-map", "0", 
        "-segment_time", str(segment_duration), "-f", "segment", output_pattern
    ]
    subprocess.run(command)
--- a/organize_data.py
+++ b/organize_data.py
@ -1,138 +0,0 @@
 from archiveConfig import get_local_db_connection
 from psycopg2.extras import execute_values
 from datetime import datetime
 import uuid, shutil, json, os
 from tqdm import tqdm
 DATA_DIR = 'data'
 DOWNLOAD_DIR = 'downloaded'
 conn, cursor = get_local_db_connection()
 def is_valid_uuid(val: str, version=None) -> bool:
    try:
        u = uuid.UUID(val, version=version) if version else uuid.UUID(val)
        return str(u) == val.lower()  # Match exact input (handles casing)
    except (ValueError, AttributeError, TypeError):
        return False
 def parse_json_file(filepath):
    with open(filepath, 'r', encoding='utf-8') as f:
        data = json.load(f)
    if "createdAt" in data:
        date = data.get("createdAt")
    elif "date" in data:
        date = data.get("date")
    if date:
        created_at = datetime.strptime(date, "%Y-%m-%d %H:%M:%S")
    else:
        created_at = None
        print(f"⚠️ No createdAt or date found in {filepath}")
    if "updatedAt" in data:
        updated_at = datetime.strptime(data.get("updatedAt"), "%Y-%m-%d %H:%M:%S")
    else:
        updated_at = created_at
    video_id = os.path.splitext(os.path.basename(filepath))[0]
    if not is_valid_uuid(video_id):
        print(f"⚠️ Invalid video_id: {video_id}")
        return
    parsed_data = {
        'video_id': video_id,
        'username': data.get("username"),
        'site': data.get("site"),
        'gender': data.get("gender"),
        'size': data.get("size") if data.get("size") else 0,
        'duration': data.get("duration") if data.get("duration") else 0,
        'filepath': data.get("filepath"),
        'jsonpath': data.get("jsonpath"),
        'hash': None,  # You can add hash calculation here if needed
        'created_at': created_at,
        'updated_at': updated_at
    }
    return parsed_data
 def insert_data(all_data):
    query = """
        INSERT INTO videos (
            video_id, username, site, gender, size, duration,
            filepath, hash, created_at, updated_at
        )
        VALUES %s
        ON CONFLICT (video_id) DO NOTHING;
    """
    values = [
        (
            d['video_id'], d['username'], d['site'], d['gender'],
            d['size'], d['duration'], d['filepath'],
            d['hash'], d['created_at'], d['updated_at']
        )
        for d in all_data
    ]
    execute_values(cursor, query, values)
    conn.commit()
    print(f"✅ Inserted {cursor.rowcount} new records.")
 def get_files(dir):
    files = []
    for root, _, filenames in os.walk(dir):
        for filename in filenames:
            if filename.endswith('.json'):
                files.append(os.path.join(root, filename))
    return files
 def main():
    all_records = []
    data_files = [f for f in get_files(DOWNLOAD_DIR) if f.endswith('.json')]
    with tqdm(data_files, desc="Processing files", unit="file") as t:
        for filepath in data_files:
            t.update(1)
            try:
                record = parse_json_file(filepath)
                all_records.append(record)
            except Exception as e:
                print(f"❌ Failed to process {filepath}: {e}")
    if all_records:
        insert_data(all_records)
    else:
        print("⚠️ No new records to insert.")
 def check_and_move():
    db_ids = get_video_ids_from_db()
    moved = 0
    for path in get_json_files(DOWNLOAD_DIR):
        video_id = os.path.splitext(os.path.basename(path))[0]
        if video_id in db_ids:
            output_path = os.path.join(DATA_DIR, os.path.basename(path))
            if os.path.exists(output_path):
                print(f"⚠️ Skipping {path} because it already exists in {DOWNLOAD_DIR}/")
                continue
            shutil.move(path, output_path)
            moved += 1
    print(f"✅ Moved {moved} files to {DOWNLOAD_DIR}/")
 # Get all existing video IDs
 def get_video_ids_from_db():
    cursor.execute("SELECT video_id FROM videos;")
    return {row['video_id'] for row in cursor.fetchall()}
 # Iterate files
 def get_json_files(dir):
    for root, _, files in os.walk(dir):
        for file in files:
            if file.endswith('.json'):
                yield os.path.join(root, file)  
 if __name__ == '__main__':
    main()
    check_and_move()
--- a/organize_thumbnails.py
+++ b/organize_thumbnails.py
@ -1,36 +0,0 @@
 # organize_thumbnails.py (fixed)
 import os
 import hashlib
 import shutil
 OLD_THUMB_DIR = "static/thumbnails"
 HASHED_DIR = "static/thumbnails_hashed"
 def hashed_path(video_id: str) -> str:
    """Return hashed path based on video ID (no extension)."""
    h = hashlib.md5(video_id.encode()).hexdigest()
    sub1, sub2 = h[:2], h[2:4]
    return os.path.join(HASHED_DIR, sub1, sub2, f"{video_id}.webp")
 def organize_thumbnails():
    os.makedirs(HASHED_DIR, exist_ok=True)
    moved_count = 0
    for root, _, files in os.walk(OLD_THUMB_DIR):
        for file in files:
            video_id = os.path.splitext(file)[0]  # strip extension
            src_path = os.path.join(root, file)
            dest_path = hashed_path(video_id)
            os.makedirs(os.path.dirname(dest_path), exist_ok=True)
            if not os.path.exists(dest_path):
                shutil.move(src_path, dest_path)
                moved_count += 1
            else:
                print(f"[SKIP] Exists: {dest_path}")
    print(f"\n✅ Done! Organized {moved_count} thumbnails into hashed structure.")
 if __name__ == "__main__":
    organize_thumbnails()
--- a/scan_existing_av1.py
+++ b/scan_existing_av1.py
@ -1,69 +0,0 @@
 import os, shutil, config
 import ffmpeg
 from tqdm import tqdm
 def is_av1(filepath):
    try:
        probe = ffmpeg.probe(filepath)
        for stream in probe['streams']:
            if stream['codec_type'] == 'video' and 'codec_name' in stream:
                if stream['codec_name'] == 'av1':
                    return True
    except ffmpeg.Error as e:
        print(f"Error probing {filepath}: {e}")
        return "Fucked"
    return False
 def save_last_checked(filepath):
    with open(".last_checked", "w") as f:
        f.write(filepath)
 def get_last_checked():
    if os.path.exists(".last_checked"):
        with open(".last_checked", "r") as f:
            return f.read().strip()
    return None
 def init_list(videos):
    last_checked = get_last_checked()
    if last_checked:
        for video in videos:
            if os.path.basename(video['filepath']) == last_checked:
                return videos[videos.index(video) + 1:]
    return videos
 def reencode_videos_av1():
    conn, cursor = config.get_local_db_connection()
    cursor.execute("SELECT filepath, id, codec FROM videos WHERE status != 'missing' AND filepath IS NOT NULL ORDER BY size ASC;")
    videos = cursor.fetchall()
    os.makedirs("fucked", exist_ok=True)
    videos = init_list(videos)
    with tqdm(videos, desc="Checking videos", unit="file") as pbar:
        for video in videos:
            pbar.update(1)
            if pbar.n % 100 == 0:
                save_last_checked(os.path.basename(video['filepath']))
            if video['codec'] == 'av1':
                continue
            input_path = video['filepath']
            isav1 = is_av1(input_path)
            if isav1 == "Fucked":
                print(f"🚫 Error probing {input_path}")
                shutil.move(input_path, "fucked/" + os.path.basename(input_path))
                continue
            if isav1 == False:
                continue
            cursor.execute("UPDATE videos SET codec = %s WHERE id = %s", ('av1', video['id']))
            conn.commit()
 if __name__ == "__main__":
    reencode_videos_av1()