@ -4,7 +4,6 @@ from funcs import get_duration, get_file_size_in_mb, calculate_file_hash
from tqdm import tqdm
import os , hashlib , subprocess , shutil
from config import get_local_db_connection
from concurrent . futures import ThreadPoolExecutor
EDITED_DIR = " edited/ "
@ -12,9 +11,13 @@ THUMB_DIR = "static/thumbnails"
THUMB_WIDTH = 640
FF_QUALITY = " 80 "
RECORDER_DIR = ' E:/streamaster/streamaster/downloaded/ '
ARCHIVE_DIR = ' U:/streamaster/streams/ '
CONCATED_DIR = ' concated/ '
VIDEO_DIRS = [
" U:/streamaster/ " ,
" E:/streamaster/streamaster/downloaded "
RECORDER_DIR ,
ARCHIVE_DIR
]
def get_all_video_files ( ) :
@ -23,11 +26,12 @@ def get_all_video_files():
for root , _ , filenames in os . walk ( base ) :
for filename in filenames :
if filename . endswith ( " .mp4 " ) :
files [ filename ] = os . path . join ( root , filename )
video_id = filename . split ( " . " ) [ 0 ]
files [ video_id ] = os . path . join ( root , filename )
return files
def find_video_path ( filename : str ) :
return all_videos [ filename] if filename in all_videos else None
def find_video_path ( video_id : str ) :
return all_videos [ video_id] if video_id in all_videos else None
def mark_missing_videos ( cursor , conn ) :
cursor . execute ( " SELECT video_id, filepath FROM videos WHERE status != ' missing ' " )
@ -35,57 +39,47 @@ def mark_missing_videos(cursor, conn):
with tqdm ( videos , desc = " Scanning for missing videos... " ) as pbar :
for vid in videos :
pbar . update ( 1 )
video_id , filepath = vid . values ( )
if not filepath :
filename = f ' { video_id } .mp4 '
else :
filename = os . path . basename ( filepath )
if not find_video_path ( filename ) :
print ( f " 🚫 Missing: { filename } " )
video_id , filepath = vid [ ' video_id ' ] , vid [ ' filepath ' ]
if not find_video_path ( video_id ) :
print ( f " 🚫 Missing: { video_id } " )
cursor . execute ( " UPDATE videos SET status = ' missing ' WHERE video_id = %s " , ( video_id , ) )
conn . commit ( )
pbar . update ( 1 )
def find_missing_video s( cursor , conn ) :
def update_video_paths ( cursor , conn ) :
cursor . execute ( " SELECT id, filepath, status, video_id FROM videos " )
videos = cursor . fetchall ( )
with tqdm ( videos , desc = " Updating filepaths... " ) as pbar :
for vid in videos :
pbar . update ( 1 )
filepath = vid [ ' filepath ' ]
if not filepath :
filename = f ' { vid [ " video_id " ] } .mp4 '
else :
filename = os . path . basename ( filepath )
status = vid [ ' status ' ]
path = find_video_path ( filename )
path = find_video_path ( vid [ ' video_id ' ] )
if not path :
continue
path = path . replace ( " \\ " , " / " )
if path == filepath and status != ' missing ' :
if path == filepath and status != ' missing ' : # change this
continue
cursor . execute ( " UPDATE videos SET filepath = %s , status = ' active ' WHERE id = %s " , ( path , vid [ ' id ' ] ) )
conn . commit ( )
pbar . update ( 1 )
def fill_missing_hashes ( cursor , conn ) :
cursor . execute ( " SELECT video_id, filepath FROM videos WHERE (hash IS NULL OR hash = ' ' ) AND status != ' missing ' " )
videos = cursor . fetchall ( )
with tqdm ( videos , desc = " Updating hashes... " ) as pbar :
for vid in videos :
pbar . update ( 1 )
video_id , filepath = vid . values ( )
if filepath and os . path . exists ( filepath ) :
h = calculate_file_hash ( filepath )
cursor . execute ( " UPDATE videos SET hash = %s WHERE video_id = %s " , ( h , video_id ) )
conn . commit ( )
pbar . update ( 1 )
def fill_missing_sizes ( cursor , conn ) :
cursor . execute ( " SELECT video_id, filepath FROM videos WHERE size = 0 AND status != ' missing ' " )
@ -93,12 +87,12 @@ def fill_missing_sizes(cursor, conn):
with tqdm ( videos , desc = " Updating sizes... " ) as pbar :
for vid in videos :
pbar . update ( 1 )
video_id , filepath = vid . values ( )
video_id , filepath = vid [ ' video_id ' ] , vid [ ' filepath ' ]
if filepath and os . path . exists ( filepath ) :
size = get_file_size_in_mb ( filepath )
cursor . execute ( " UPDATE videos SET size = %s WHERE video_id = %s " , ( size , video_id ) )
conn . commit ( )
pbar . update ( 1 )
def fill_missing_durations ( cursor , conn ) :
cursor . execute ( " SELECT video_id, filepath FROM videos WHERE duration = 0 AND status != ' missing ' ORDER BY size ASC " )
@ -106,7 +100,6 @@ def fill_missing_durations(cursor, conn):
with tqdm ( videos , desc = " Updating durations... " ) as pbar :
for vid in videos :
pbar . update ( 1 )
video_id , filepath = vid . values ( )
if filepath and os . path . exists ( filepath ) :
duration = get_duration ( filepath )
@ -116,48 +109,63 @@ def fill_missing_durations(cursor, conn):
continue
cursor . execute ( " UPDATE videos SET duration = %s WHERE video_id = %s " , ( duration , video_id ) )
conn . commit ( )
pbar . update ( 1 )
def fill_missing_gender ( cursor , conn ) :
def map_gender ( gender ) :
genders = {
' woman ' : ' Female ' ,
' couple ' : ' Couple ' ,
' trans ' : ' Trans ' ,
' a man ' : ' Male '
}
for g in genders :
if g in gender :
return genders [ g ]
print ( f " 🚫 Failed to map gender: { gender } " )
return None
def fill_missing_gender ( cursor , conn ) :
def get_data ( username ) :
import requests
url = f " https://chaturbate.com/api/biocontext/ { username } "
data = requests . get ( url )
data = data . json ( )
try :
data = requests . get ( url )
data = data . json ( )
if ' status ' in data :
if data [ ' status ' ] == 401 :
return False
except :
return False
return data
cursor . execute ( " SELECT DISTINCT username, site FROM videos WHERE gender IS NULL AND status != ' missing ' " )
videos = cursor . fetchall ( )
api_fetches = 10
with tqdm ( videos , desc = " Updating genders... " ) as pbar :
for vid in videos :
pbar . update ( 1 )
username , site = vid . values ( )
# try to fetch an item from videos table with the same username and site but with a non-null gender
cursor . execute ( " SELECT gender FROM videos WHERE username = %s AND site = %s AND gender IS NOT NULL LIMIT 1 " , ( username , site ) )
gender = cursor . fetchone ( )
if not gender :
if gender :
gender_str = gender [ ' gender ' ]
else :
if api_fetches < = 0 :
continue
data = get_data ( username )
api_fetches - = 1
if not data :
continue
if ' status ' in data :
if data [ ' status ' ] == 401 :
continue
gender = data [ ' sex ' ]
if ' woman ' in gender :
gender_str = ' Female '
elif ' couple ' in gender :
gender_str = ' Couple '
elif ' trans ' in gender :
gender_str = ' Trans '
else :
print ( f " fuck?: { gender } " )
gender = map_gender ( data [ ' sex ' ] )
if not gender :
continue
else :
gender_str = gender [ ' gender ' ]
gender_str = gender
cursor . execute ( " UPDATE videos SET gender = %s WHERE username = %s AND site = %s " , ( gender_str , username , site ) )
conn . commit ( )
print ( f " [ { cursor . rowcount } ] ✅ Updated gender for { username } on { site } " )
pbar . update ( 1 )
def generate_thumbnails_for_videos ( cursor , conn ) :
cursor . execute ( " SELECT video_id, filepath FROM videos WHERE status != ' missing ' AND thumbnail IS NULL " )
@ -166,7 +174,6 @@ def generate_thumbnails_for_videos(cursor, conn):
tasks = [ ]
with tqdm ( videos , desc = " Generating thumbnails... " ) as pbar :
for v in videos :
pbar . update ( 1 )
video_id = v . get ( " video_id " )
filepath = v . get ( " filepath " )
@ -181,6 +188,7 @@ def generate_thumbnails_for_videos(cursor, conn):
tasks . append ( ( filepath , thumb_path ) )
v [ " thumbnail " ] = thumb_path
pbar . update ( 1 )
if tasks :
with ThreadPoolExecutor ( max_workers = os . cpu_count ( ) * 2 ) as exe :
@ -217,40 +225,68 @@ def _gen_thumb_cmd(src: str, dest: str):
def move_edited_videos ( cursor , conn ) :
edited_videos = [ f for f in os . listdir ( EDITED_DIR ) if os . path . isfile ( os . path . join ( EDITED_DIR , f ) ) and f . endswith ( " .mp4 " ) ]
for filename in edited_videos :
edited_path = os . path . join ( EDITED_DIR , filename )
video_id = filename . split ( " . " ) [ 0 ]
cursor . execute ( " SELECT filepath FROM videos WHERE video_id = %s " , ( video_id , ) )
video = cursor . fetchone ( )
if not video :
continue
video_path = video [ ' filepath ' ]
if not os . path . exists ( video_path ) :
continue
shutil . move ( edited_path , video_path )
print ( f " ✅ Moved edited video { video_id } to { video_path } " )
with tqdm ( edited_videos , desc = " Moving edited videos... " ) as pbar :
for filename in edited_videos :
edited_path = os . path . join ( EDITED_DIR , filename )
video_id = filename . split ( " . " ) [ 0 ]
cursor . execute ( " SELECT filepath, username FROM videos WHERE video_id = %s " , ( video_id , ) )
video = cursor . fetchone ( )
if not video :
continue
video_path = video [ ' filepath ' ]
if not os . path . exists ( video_path ) :
video_path = os . path . join ( ARCHIVE_DIR , video [ ' username ' ] , filename )
file_size = get_file_size_in_mb ( edited_path )
cursor . execute ( " UPDATE videos SET size = %s WHERE video_id = %s " , ( file_size , video_id ) )
conn . commit ( )
shutil . move ( edited_path , video_path )
pbar . update ( 1 )
def move_concated_videos ( cursor , conn ) :
concated_videos = [ f for f in os . listdir ( CONCATED_DIR ) if os . path . isfile ( os . path . join ( CONCATED_DIR , f ) ) and f . endswith ( " .mp4 " ) ]
concated_videos = sorted ( concated_videos , key = lambda f : os . path . getsize ( os . path . join ( CONCATED_DIR , f ) ) )
with tqdm ( concated_videos , desc = " Moving concated videos... " ) as pbar :
for filename in concated_videos :
edited_path = os . path . join ( CONCATED_DIR , filename )
video_id = filename . split ( " . " ) [ 0 ]
cursor . execute ( " SELECT filepath, username FROM videos WHERE video_id = %s " , ( video_id , ) )
video = cursor . fetchone ( )
if not video :
continue
video_path = video [ ' filepath ' ]
if not os . path . exists ( video_path ) :
video_path = os . path . join ( ARCHIVE_DIR , video [ ' username ' ] , filename )
file_size = get_file_size_in_mb ( edited_path )
cursor . execute ( " UPDATE videos SET size = %s , status = ' concated ' WHERE video_id = %s " , ( file_size , video_id ) )
conn . commit ( )
shutil . move ( edited_path , video_path )
pbar . update ( 1 )
if __name__ == ' __main__ ' :
conn , cursor = get_local_db_connection ( )
print ( " 🔍 Scanning for missing data... " )
all_videos = get_all_video_files ( )
update_video_paths ( cursor , conn )
mark_missing_videos ( cursor , conn )
move_edited_videos ( cursor , conn )
if True :
all_videos = get_all_video_files ( )
find_missing_videos ( cursor , conn )
mark_missing_videos ( cursor , conn )
move_concated_videos ( cursor , conn )
generate_thumbnails_for_videos ( cursor , conn )
fill_missing_sizes ( cursor , conn )
fill_missing_durations ( cursor , conn )
# fill_missing_gender(cursor, conn)
fill_missing_gender ( cursor , conn )
# fill_missing_hashes(cursor, conn)
cursor . close ( )