You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

213 lines
7.7 KiB
Python

import os
from config import get_local_db_connection
from funcs import get_duration, get_file_size_in_mb, calculate_file_hash
from tqdm import tqdm
import os, hashlib, subprocess, json
from config import get_local_db_connection
from concurrent.futures import ThreadPoolExecutor
THUMB_DIR = "static/thumbnails"
THUMB_WIDTH = 640
FF_QUALITY = "80"
VIDEO_DIRS = [
"U:/streamaster",
"E:/streamaster/downloaded"
]
def get_all_video_files():
files = {}
for base in VIDEO_DIRS:
for root, _, filenames in os.walk(base):
for filename in filenames:
if filename.endswith(".mp4"):
files[filename] = os.path.join(root, filename)
return files
def find_video_path(filename: str):
return all_videos[filename] if filename in all_videos else None
def mark_missing_videos(cursor, conn):
cursor.execute("SELECT video_id, filepath FROM videos WHERE status != 'missing'")
videos = cursor.fetchall()
with tqdm(videos, desc="Scanning for missing videos...") as pbar:
for vid in videos:
pbar.update(1)
video_id, filepath = vid.values()
if not filepath:
continue
filename = os.path.basename(filepath)
if not find_video_path(filename):
print(f"🚫 Missing: {filename}")
cursor.execute("UPDATE videos SET status = 'missing' WHERE video_id = %s", (video_id,))
conn.commit()
def fill_missing_filepaths(cursor, conn):
cursor.execute("SELECT video_id, filepath FROM videos WHERE status != 'missing'")
videos = cursor.fetchall()
with tqdm(videos, desc="Updating filepaths...") as pbar:
for vid in videos:
pbar.update(1)
video_id, filepath = vid.values()
filename = f"{video_id}.mp4"
path = find_video_path(filename)
if path:
path = path.replace("\\", "/")
if path == filepath:
continue
cursor.execute("UPDATE videos SET filepath = %s WHERE video_id = %s", (path, video_id))
conn.commit()
def fill_missing_hashes(cursor, conn):
cursor.execute("SELECT video_id, filepath FROM videos WHERE (hash IS NULL OR hash = '') AND status != 'missing'")
videos = cursor.fetchall()
with tqdm(videos, desc="Updating hashes...") as pbar:
for vid in videos:
pbar.update(1)
video_id, filepath = vid.values()
if filepath and os.path.exists(filepath):
h = calculate_file_hash(filepath)
cursor.execute("UPDATE videos SET hash = %s WHERE video_id = %s", (h, video_id))
conn.commit()
def fill_missing_sizes(cursor, conn):
cursor.execute("SELECT video_id, filepath FROM videos WHERE size = 0 AND status != 'missing'")
videos = cursor.fetchall()
with tqdm(videos, desc="Updating sizes...") as pbar:
for vid in videos:
pbar.update(1)
video_id, filepath = vid.values()
if filepath and os.path.exists(filepath):
size = get_file_size_in_mb(filepath)
cursor.execute("UPDATE videos SET size = %s WHERE video_id = %s", (size, video_id))
conn.commit()
def fill_missing_durations(cursor, conn):
cursor.execute("SELECT video_id, filepath FROM videos WHERE duration = 0 AND status != 'missing'")
videos = cursor.fetchall()
with tqdm(videos, desc="Updating durations...") as pbar:
for vid in videos:
pbar.update(1)
video_id, filepath = vid.values()
if filepath and os.path.exists(filepath):
duration = get_duration(filepath)
cursor.execute("UPDATE videos SET duration = %s WHERE video_id = %s", (duration, video_id))
conn.commit()
def fill_missing_gender(cursor, conn):
import chaturbate
cursor.execute("SELECT DISTINCT username, site FROM videos WHERE gender IS NULL AND status != 'missing'")
videos = cursor.fetchall()
with tqdm(videos, desc="Updating genders...") as pbar:
for vid in videos:
pbar.update(1)
username, site = vid.values()
# try to fetch an item from videos table with the same username and site but with a non-null gender
cursor.execute("SELECT gender FROM videos WHERE username = %s AND site = %s AND gender IS NOT NULL LIMIT 1", (username, site))
gender = cursor.fetchone()
if not gender:
gender = chaturbate.get_data(username)
if not gender:
continue
if 'status' in gender:
if gender['status'] == 401:
continue
gender = gender['sex']
if 'woman' in gender:
gender_str = 'Female'
elif 'couple' in gender:
gender_str = 'Couple'
elif 'trans' in gender:
gender_str = 'Trans'
else:
print(f"fuck?: {gender}")
continue
else:
gender_str = gender['gender']
cursor.execute("UPDATE videos SET gender = %s WHERE username = %s AND site = %s", (gender_str, username, site))
conn.commit()
print(f"[{cursor.rowcount}] ✅ Updated gender for {username} on {site}")
def generate_thumbnails_for_videos(cursor, conn):
cursor.execute("SELECT video_id, filepath FROM videos WHERE status != 'missing' AND thumbnail IS NULL")
videos = cursor.fetchall()
tasks = []
for v in videos:
video_id = v.get("video_id")
filepath = v.get("filepath")
thumb_path = _hashed_thumb_path(video_id)
if not filepath:
# print(f"⚠️ Skipping {video_id}: missing filepath")
continue
if not os.path.exists(filepath):
# print(f"⚠️ Skipping {video_id}: file not found → {filepath}")
continue
if not os.path.exists(thumb_path):
tasks.append((filepath, thumb_path))
v["thumbnail"] = thumb_path
if tasks:
with ThreadPoolExecutor(max_workers=os.cpu_count() * 2) as exe:
list(exe.map(lambda t: subprocess.run(
_gen_thumb_cmd(*t),
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL
), tasks))
for v in videos:
if 'thumbnail' not in v:
continue
v['thumbnail'] = v['thumbnail'].replace("\\", "/")
cursor.execute("UPDATE videos SET thumbnail = %s WHERE video_id = %s", (v['thumbnail'], v['video_id']))
conn.commit()
def _hashed_thumb_path(video_id: str):
h = hashlib.md5(video_id.encode()).hexdigest()
sub1, sub2 = h[:2], h[2:4]
path = os.path.join(THUMB_DIR, sub1, sub2)
os.makedirs(path, exist_ok=True)
return os.path.join(path, f"{video_id}.webp")
def _gen_thumb_cmd(src: str, dest: str):
return [
"ffmpeg", "-y", "-loglevel", "error",
"-ss", "0", "-i", src,
"-vframes", "1",
"-vf", f"thumbnail,scale={THUMB_WIDTH}:-1",
"-q:v", FF_QUALITY,
dest
]
if __name__ == '__main__':
conn, cursor = get_local_db_connection()
all_videos = get_all_video_files()
print("🔍 Scanning for missing data...")
fill_missing_filepaths(cursor, conn)
# fill_missing_durations(cursor, conn)
# fill_missing_sizes(cursor, conn)
# generate_thumbnails_for_videos(cursor, conn)
# fill_missing_hashes(cursor, conn)
# mark_missing_videos(cursor, conn)
# fill_missing_gender(cursor, conn)
cursor.close()
conn.close()
print("✅ All cleanup tasks completed.")