optimization
parent
ea7d8a4635
commit
ecfa2f8745
@ -0,0 +1,8 @@
|
||||
import requests
|
||||
|
||||
|
||||
def get_data(username):
|
||||
url = f"https://chaturbate.com/api/biocontext/{username}"
|
||||
data = requests.get(url)
|
||||
data = data.json()
|
||||
return data
|
||||
@ -0,0 +1,211 @@
|
||||
import os
|
||||
from config import get_local_db_connection
|
||||
from funcs import get_duration, get_file_size_in_mb, calculate_file_hash
|
||||
from tqdm import tqdm
|
||||
|
||||
import os, hashlib, subprocess, json
|
||||
from config import get_local_db_connection
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
THUMB_DIR = "static/thumbnails"
|
||||
THUMB_WIDTH = 640
|
||||
FF_QUALITY = "80"
|
||||
|
||||
VIDEO_DIRS = [
|
||||
"U:/streamaster",
|
||||
"E:/streamaster/downloaded"
|
||||
]
|
||||
|
||||
def get_all_video_files():
|
||||
files = {}
|
||||
for base in VIDEO_DIRS:
|
||||
for root, _, filenames in os.walk(base):
|
||||
for filename in filenames:
|
||||
if filename.endswith(".mp4"):
|
||||
files[filename] = os.path.join(root, filename)
|
||||
return files
|
||||
|
||||
def find_video_path(filename: str):
|
||||
return all_videos[filename] if filename in all_videos else None
|
||||
|
||||
def mark_missing_videos(cursor, conn):
|
||||
cursor.execute("SELECT video_id, filepath FROM videos WHERE status != 'missing'")
|
||||
videos = cursor.fetchall()
|
||||
|
||||
with tqdm(videos, desc="Scanning for missing videos...") as pbar:
|
||||
for vid in videos:
|
||||
pbar.update(1)
|
||||
video_id, filepath = vid.values()
|
||||
if not filepath:
|
||||
continue
|
||||
filename = os.path.basename(filepath)
|
||||
if not find_video_path(filename):
|
||||
print(f"🚫 Missing: {filename}")
|
||||
cursor.execute("UPDATE videos SET status = 'missing' WHERE video_id = %s", (video_id,))
|
||||
conn.commit()
|
||||
|
||||
def fill_missing_filepaths(cursor, conn):
|
||||
cursor.execute("SELECT video_id, filepath FROM videos WHERE status != 'missing'")
|
||||
videos = cursor.fetchall()
|
||||
|
||||
with tqdm(videos, desc="Updating filepaths...") as pbar:
|
||||
for vid in videos:
|
||||
pbar.update(1)
|
||||
video_id, filepath = vid.values()
|
||||
filename = f"{video_id}.mp4"
|
||||
path = find_video_path(filename)
|
||||
if path:
|
||||
path = path.replace("\\", "/")
|
||||
if path == filepath:
|
||||
continue
|
||||
cursor.execute("UPDATE videos SET filepath = %s WHERE video_id = %s", (path, video_id))
|
||||
conn.commit()
|
||||
|
||||
def fill_missing_hashes(cursor, conn):
|
||||
cursor.execute("SELECT video_id, filepath FROM videos WHERE (hash IS NULL OR hash = '') AND status != 'missing'")
|
||||
videos = cursor.fetchall()
|
||||
|
||||
with tqdm(videos, desc="Updating hashes...") as pbar:
|
||||
for vid in videos:
|
||||
pbar.update(1)
|
||||
video_id, filepath = vid.values()
|
||||
if filepath and os.path.exists(filepath):
|
||||
h = calculate_file_hash(filepath)
|
||||
cursor.execute("UPDATE videos SET hash = %s WHERE video_id = %s", (h, video_id))
|
||||
conn.commit()
|
||||
|
||||
def fill_missing_sizes(cursor, conn):
|
||||
cursor.execute("SELECT video_id, filepath FROM videos WHERE size = 0 AND status != 'missing'")
|
||||
videos = cursor.fetchall()
|
||||
|
||||
with tqdm(videos, desc="Updating sizes...") as pbar:
|
||||
for vid in videos:
|
||||
pbar.update(1)
|
||||
video_id, filepath = vid.values()
|
||||
if filepath and os.path.exists(filepath):
|
||||
size = get_file_size_in_mb(filepath)
|
||||
cursor.execute("UPDATE videos SET size = %s WHERE video_id = %s", (size, video_id))
|
||||
conn.commit()
|
||||
|
||||
def fill_missing_durations(cursor, conn):
|
||||
cursor.execute("SELECT video_id, filepath FROM videos WHERE duration = 0 AND status != 'missing'")
|
||||
videos = cursor.fetchall()
|
||||
|
||||
with tqdm(videos, desc="Updating durations...") as pbar:
|
||||
for vid in videos:
|
||||
pbar.update(1)
|
||||
video_id, filepath = vid.values()
|
||||
if filepath and os.path.exists(filepath):
|
||||
duration = get_duration(filepath)
|
||||
cursor.execute("UPDATE videos SET duration = %s WHERE video_id = %s", (duration, video_id))
|
||||
conn.commit()
|
||||
|
||||
def fill_missing_gender(cursor, conn):
|
||||
import chaturbate
|
||||
cursor.execute("SELECT DISTINCT username, site FROM videos WHERE gender IS NULL AND status != 'missing'")
|
||||
videos = cursor.fetchall()
|
||||
|
||||
with tqdm(videos, desc="Updating genders...") as pbar:
|
||||
for vid in videos:
|
||||
pbar.update(1)
|
||||
username, site = vid.values()
|
||||
# try to fetch an item from videos table with the same username and site but with a non-null gender
|
||||
cursor.execute("SELECT gender FROM videos WHERE username = %s AND site = %s AND gender IS NOT NULL LIMIT 1", (username, site))
|
||||
gender = cursor.fetchone()
|
||||
if not gender:
|
||||
gender = chaturbate.get_data(username)
|
||||
if not gender:
|
||||
continue
|
||||
if 'status' in gender:
|
||||
if gender['status'] == 401:
|
||||
continue
|
||||
gender = gender['sex']
|
||||
if 'woman' in gender:
|
||||
gender_str = 'Female'
|
||||
elif 'couple' in gender:
|
||||
gender_str = 'Couple'
|
||||
elif 'trans' in gender:
|
||||
gender_str = 'Trans'
|
||||
else:
|
||||
print(f"fuck?: {gender}")
|
||||
continue
|
||||
else:
|
||||
gender_str = gender['gender']
|
||||
|
||||
cursor.execute("UPDATE videos SET gender = %s WHERE username = %s AND site = %s", (gender_str, username, site))
|
||||
conn.commit()
|
||||
print(f"[{cursor.rowcount}] ✅ Updated gender for {username} on {site}")
|
||||
|
||||
def generate_thumbnails_for_videos(cursor, conn):
|
||||
cursor.execute("SELECT video_id, filepath FROM videos WHERE status != 'missing' AND thumbnail IS NULL")
|
||||
videos = cursor.fetchall()
|
||||
|
||||
tasks = []
|
||||
for v in videos:
|
||||
video_id = v.get("video_id")
|
||||
filepath = v.get("filepath")
|
||||
thumb_path = _hashed_thumb_path(video_id)
|
||||
|
||||
if not filepath:
|
||||
# print(f"⚠️ Skipping {video_id}: missing filepath")
|
||||
continue
|
||||
|
||||
if not os.path.exists(filepath):
|
||||
# print(f"⚠️ Skipping {video_id}: file not found → {filepath}")
|
||||
continue
|
||||
|
||||
if not os.path.exists(thumb_path):
|
||||
tasks.append((filepath, thumb_path))
|
||||
|
||||
v["thumbnail"] = thumb_path
|
||||
|
||||
if tasks:
|
||||
with ThreadPoolExecutor(max_workers=os.cpu_count() * 2) as exe:
|
||||
list(exe.map(lambda t: subprocess.run(
|
||||
_gen_thumb_cmd(*t),
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL
|
||||
), tasks))
|
||||
|
||||
for v in videos:
|
||||
if 'thumbnail' not in v:
|
||||
continue
|
||||
v['thumbnail'] = v['thumbnail'].replace("\\", "/")
|
||||
cursor.execute("UPDATE videos SET thumbnail = %s WHERE video_id = %s", (v['thumbnail'], v['video_id']))
|
||||
conn.commit()
|
||||
|
||||
def _hashed_thumb_path(video_id: str):
|
||||
h = hashlib.md5(video_id.encode()).hexdigest()
|
||||
sub1, sub2 = h[:2], h[2:4]
|
||||
path = os.path.join(THUMB_DIR, sub1, sub2)
|
||||
os.makedirs(path, exist_ok=True)
|
||||
return os.path.join(path, f"{video_id}.webp")
|
||||
|
||||
def _gen_thumb_cmd(src: str, dest: str):
|
||||
return [
|
||||
"ffmpeg", "-y", "-loglevel", "error",
|
||||
"-ss", "0", "-i", src,
|
||||
"-vframes", "1",
|
||||
"-vf", f"thumbnail,scale={THUMB_WIDTH}:-1",
|
||||
"-q:v", FF_QUALITY,
|
||||
dest
|
||||
]
|
||||
|
||||
if __name__ == '__main__':
|
||||
conn, cursor = get_local_db_connection()
|
||||
all_videos = get_all_video_files()
|
||||
|
||||
print("🔍 Scanning for missing data...")
|
||||
|
||||
fill_missing_filepaths(cursor, conn)
|
||||
# mark_missing_videos(cursor, conn)
|
||||
# fill_missing_hashes(cursor, conn)
|
||||
fill_missing_sizes(cursor, conn)
|
||||
fill_missing_durations(cursor, conn)
|
||||
# fill_missing_gender(cursor, conn)
|
||||
generate_thumbnails_for_videos(cursor, conn)
|
||||
|
||||
|
||||
cursor.close()
|
||||
conn.close()
|
||||
print("✅ All cleanup tasks completed.")
|
||||
Loading…
Reference in New Issue