You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
291 lines
10 KiB
Python
291 lines
10 KiB
Python
import os
|
|
from config import get_local_db_connection
|
|
from funcs import get_duration, get_file_size_in_mb, calculate_file_hash
|
|
from tqdm import tqdm
|
|
|
|
import os, hashlib, subprocess, shutil
|
|
from concurrent.futures import ThreadPoolExecutor
|
|
|
|
EDITED_DIR = "edited/"
|
|
THUMB_DIR = "static/thumbnails"
|
|
THUMB_WIDTH = 640
|
|
FF_QUALITY = "80"
|
|
|
|
RECORDER_DIR = 'E:/streamaster/streamaster/downloaded/'
|
|
ARCHIVE_DIR = 'U:/streamaster/streams/'
|
|
CONCATED_DIR = 'concated/'
|
|
|
|
VIDEO_DIRS = [
|
|
RECORDER_DIR,
|
|
ARCHIVE_DIR
|
|
]
|
|
|
|
def get_all_video_files():
|
|
files = {}
|
|
for base in VIDEO_DIRS:
|
|
for root, _, filenames in os.walk(base):
|
|
for filename in filenames:
|
|
if filename.endswith(".mp4"):
|
|
video_id = filename.split(".")[0]
|
|
files[video_id] = os.path.join(root, filename)
|
|
return files
|
|
|
|
def find_video_path(video_id: str):
|
|
return all_videos[video_id] if video_id in all_videos else None
|
|
|
|
def mark_missing_videos(cursor, conn):
|
|
cursor.execute("SELECT video_id, filepath FROM videos WHERE status = 'active'")
|
|
videos = cursor.fetchall()
|
|
|
|
with tqdm(videos, desc="Scanning for missing videos...") as pbar:
|
|
for vid in videos:
|
|
video_id, filepath = vid['video_id'], vid['filepath']
|
|
if not find_video_path(video_id):
|
|
print(f"🚫 Missing: {video_id}")
|
|
cursor.execute("UPDATE videos SET status = 'missing' WHERE video_id = %s", (video_id,))
|
|
conn.commit()
|
|
pbar.update(1)
|
|
|
|
def update_video_paths(cursor, conn):
|
|
cursor.execute("SELECT id, filepath, status, video_id FROM videos")
|
|
videos = cursor.fetchall()
|
|
|
|
with tqdm(videos, desc="Updating filepaths...") as pbar:
|
|
for vid in videos:
|
|
path = find_video_path(vid['video_id'])
|
|
|
|
if not path:
|
|
continue
|
|
|
|
path = path.replace("\\", "/")
|
|
if path == vid['filepath']:
|
|
continue
|
|
|
|
cursor.execute("UPDATE videos SET filepath = %s, status = 'active' WHERE id = %s", (path, vid['id']))
|
|
conn.commit()
|
|
pbar.update(1)
|
|
|
|
def fill_missing_hashes(cursor, conn):
|
|
cursor.execute("SELECT video_id, filepath FROM videos WHERE (hash IS NULL OR hash = '') AND status = 'active'")
|
|
videos = cursor.fetchall()
|
|
|
|
with tqdm(videos, desc="Updating hashes...") as pbar:
|
|
for vid in videos:
|
|
video_id, filepath = vid.values()
|
|
if filepath and os.path.exists(filepath):
|
|
h = calculate_file_hash(filepath)
|
|
cursor.execute("UPDATE videos SET hash = %s WHERE video_id = %s", (h, video_id))
|
|
conn.commit()
|
|
pbar.update(1)
|
|
|
|
def fill_missing_sizes(cursor, conn):
|
|
cursor.execute("SELECT video_id, filepath FROM videos WHERE size = 0 AND status = 'active'")
|
|
videos = cursor.fetchall()
|
|
|
|
with tqdm(videos, desc="Updating sizes...") as pbar:
|
|
for vid in videos:
|
|
video_id, filepath = vid['video_id'], vid['filepath']
|
|
if filepath and os.path.exists(filepath):
|
|
size = get_file_size_in_mb(filepath)
|
|
cursor.execute("UPDATE videos SET size = %s WHERE video_id = %s", (size, video_id))
|
|
conn.commit()
|
|
pbar.update(1)
|
|
|
|
def fill_missing_durations(cursor, conn):
|
|
cursor.execute("SELECT video_id, filepath FROM videos WHERE duration = 0 AND status = 'active' ORDER BY size ASC")
|
|
videos = cursor.fetchall()
|
|
|
|
with tqdm(videos, desc="Updating durations...") as pbar:
|
|
for vid in videos:
|
|
video_id, filepath = vid.values()
|
|
if filepath and os.path.exists(filepath):
|
|
duration = get_duration(filepath)
|
|
if duration <= 0:
|
|
print(f"🚫 Failed to get duration for {filepath}")
|
|
os.remove(filepath)
|
|
continue
|
|
cursor.execute("UPDATE videos SET duration = %s WHERE video_id = %s", (duration, video_id))
|
|
conn.commit()
|
|
pbar.update(1)
|
|
|
|
def map_gender(gender):
|
|
genders = {
|
|
'woman': 'Female',
|
|
'couple': 'Couple',
|
|
'trans': 'Trans',
|
|
'a man': 'Male'
|
|
}
|
|
|
|
for g in genders:
|
|
if g in gender:
|
|
return genders[g]
|
|
print(f"🚫 Failed to map gender: {gender}")
|
|
return None
|
|
|
|
def fill_missing_gender(cursor, conn):
|
|
def get_data(username):
|
|
import requests
|
|
url = f"https://chaturbate.com/api/biocontext/{username}"
|
|
try:
|
|
data = requests.get(url)
|
|
data = data.json()
|
|
if 'status' in data:
|
|
if data['status'] == 401:
|
|
return False
|
|
except:
|
|
return False
|
|
return data
|
|
|
|
cursor.execute("SELECT DISTINCT username, site FROM videos WHERE gender IS NULL AND status = 'active'")
|
|
videos = cursor.fetchall()
|
|
|
|
api_fetches = 10
|
|
with tqdm(videos, desc="Updating genders...") as pbar:
|
|
for vid in videos:
|
|
username, site = vid.values()
|
|
cursor.execute("SELECT gender FROM videos WHERE username = %s AND site = %s AND gender IS NOT NULL LIMIT 1", (username, site))
|
|
gender = cursor.fetchone()
|
|
if gender:
|
|
gender_str = gender['gender']
|
|
else:
|
|
if api_fetches <= 0:
|
|
continue
|
|
data = get_data(username)
|
|
api_fetches -= 1
|
|
if not data:
|
|
continue
|
|
gender = map_gender(data['sex'])
|
|
if not gender:
|
|
continue
|
|
gender_str = gender
|
|
|
|
cursor.execute("UPDATE videos SET gender = %s WHERE username = %s AND site = %s", (gender_str, username, site))
|
|
conn.commit()
|
|
print(f"[{cursor.rowcount}] ✅ Updated gender for {username} on {site}")
|
|
pbar.update(1)
|
|
|
|
def generate_thumbnails_for_videos(cursor, conn):
|
|
cursor.execute("SELECT video_id, filepath FROM videos WHERE status = 'active' AND thumbnail IS NULL")
|
|
videos = cursor.fetchall()
|
|
|
|
tasks = []
|
|
with tqdm(videos, desc="Generating thumbnails...") as pbar:
|
|
for v in videos:
|
|
video_id = v.get("video_id")
|
|
filepath = v.get("filepath")
|
|
|
|
if not filepath:
|
|
continue
|
|
|
|
if not os.path.exists(filepath):
|
|
continue
|
|
|
|
thumb_path = _hashed_thumb_path(video_id)
|
|
if not os.path.exists(thumb_path):
|
|
tasks.append((filepath, thumb_path))
|
|
|
|
v["thumbnail"] = thumb_path
|
|
pbar.update(1)
|
|
|
|
if tasks:
|
|
with ThreadPoolExecutor(max_workers=os.cpu_count() * 2) as exe:
|
|
list(exe.map(lambda t: subprocess.run(
|
|
_gen_thumb_cmd(*t),
|
|
stdout=subprocess.DEVNULL,
|
|
stderr=subprocess.DEVNULL
|
|
), tasks))
|
|
|
|
for v in videos:
|
|
if 'thumbnail' not in v:
|
|
continue
|
|
v['thumbnail'] = v['thumbnail'].replace("\\", "/")
|
|
cursor.execute("UPDATE videos SET thumbnail = %s WHERE video_id = %s", (v['thumbnail'], v['video_id']))
|
|
conn.commit()
|
|
|
|
def _hashed_thumb_path(video_id: str):
|
|
h = hashlib.md5(video_id.encode()).hexdigest()
|
|
sub1, sub2 = h[:2], h[2:4]
|
|
path = os.path.join(THUMB_DIR, sub1, sub2)
|
|
os.makedirs(path, exist_ok=True)
|
|
return os.path.join(path, f"{video_id}.webp")
|
|
|
|
def _gen_thumb_cmd(src: str, dest: str):
|
|
return [
|
|
"ffmpeg", "-y", "-loglevel", "error",
|
|
"-ss", "0", "-i", src,
|
|
"-vframes", "1",
|
|
"-vf", f"thumbnail,scale={THUMB_WIDTH}:-1",
|
|
"-q:v", FF_QUALITY,
|
|
dest
|
|
]
|
|
|
|
def move_edited_videos(cursor, conn):
|
|
edited_videos = [f for f in os.listdir(EDITED_DIR) if os.path.isfile(os.path.join(EDITED_DIR, f)) and f.endswith(".mp4")]
|
|
|
|
with tqdm(edited_videos, desc="Moving edited videos...") as pbar:
|
|
for filename in edited_videos:
|
|
edited_path = os.path.join(EDITED_DIR, filename)
|
|
video_id = filename.split(".")[0]
|
|
|
|
cursor.execute("SELECT filepath, username FROM videos WHERE video_id = %s", (video_id,))
|
|
video = cursor.fetchone()
|
|
|
|
if not video:
|
|
continue
|
|
|
|
video_path = video['filepath']
|
|
if not os.path.exists(video_path):
|
|
video_path = os.path.join(ARCHIVE_DIR, video['username'], filename)
|
|
|
|
file_size = get_file_size_in_mb(edited_path)
|
|
cursor.execute("UPDATE videos SET size = %s WHERE video_id = %s", (file_size, video_id))
|
|
conn.commit()
|
|
|
|
shutil.move(edited_path, video_path)
|
|
pbar.update(1)
|
|
|
|
def move_concated_videos(cursor, conn):
|
|
concated_videos = [f for f in os.listdir(CONCATED_DIR) if os.path.isfile(os.path.join(CONCATED_DIR, f)) and f.endswith(".mp4")]
|
|
concated_videos = sorted(concated_videos, key=lambda f: os.path.getsize(os.path.join(CONCATED_DIR, f)))
|
|
|
|
with tqdm(concated_videos, desc="Moving concated videos...") as pbar:
|
|
for filename in concated_videos:
|
|
edited_path = os.path.join(CONCATED_DIR, filename)
|
|
video_id = filename.split(".")[0]
|
|
|
|
cursor.execute("SELECT filepath, username FROM videos WHERE video_id = %s", (video_id,))
|
|
video = cursor.fetchone()
|
|
|
|
if not video:
|
|
continue
|
|
|
|
video_path = video['filepath']
|
|
if not os.path.exists(video_path):
|
|
video_path = os.path.join(ARCHIVE_DIR, video['username'], filename)
|
|
|
|
file_size = get_file_size_in_mb(edited_path)
|
|
cursor.execute("UPDATE videos SET size = %s, status = 'concated' WHERE video_id = %s", (file_size, video_id))
|
|
conn.commit()
|
|
|
|
shutil.move(edited_path, video_path)
|
|
pbar.update(1)
|
|
|
|
if __name__ == '__main__':
|
|
conn, cursor = get_local_db_connection()
|
|
|
|
all_videos = get_all_video_files()
|
|
update_video_paths(cursor, conn)
|
|
mark_missing_videos(cursor, conn)
|
|
|
|
generate_thumbnails_for_videos(cursor, conn)
|
|
fill_missing_sizes(cursor, conn)
|
|
fill_missing_durations(cursor, conn)
|
|
fill_missing_gender(cursor, conn)
|
|
# fill_missing_hashes(cursor, conn)
|
|
|
|
move_edited_videos(cursor, conn)
|
|
move_concated_videos(cursor, conn)
|
|
|
|
cursor.close()
|
|
conn.close()
|
|
print("✅ All cleanup tasks completed.") |