major cleanup and fixes

main
oscar 1 month ago
parent fd62cb5a30
commit 00aa9a9bf1

2
.gitignore vendored

@ -183,3 +183,5 @@ cython_debug/
/static/thumbnails
/.temp
.last_checked
/concated
/edited

@ -0,0 +1,43 @@
import os
from config import get_local_db_connection
from tqdm import tqdm
import shutil
RECORDER_DIR = 'E:/streamaster/streamaster/downloaded/'
ARCHIVE_DIR = 'U:/streamaster/streams/'
def get_all_video_files():
files = {}
for root, _, filenames in os.walk(RECORDER_DIR):
for filename in filenames:
if filename.endswith(".mp4"):
video_id = filename.split(".")[0]
files[video_id] = os.path.join(root, filename)
return files
if __name__ == '__main__':
conn, cursor = get_local_db_connection()
downloaded_videos = get_all_video_files()
# for every video in downloaded_videos, re-create the folder it would have been in, int he archive directory
# if it exists there already, check if its corrupted. if so, then move the one in the downloaded to the archive dir
# because it failed during moving in the other script
for video_id in tqdm(downloaded_videos.keys(), desc="Checking for failed videos..."):
video_path = downloaded_videos[video_id]
try:
cursor.execute("SELECT username FROM videos WHERE video_id = %s", (video_id,))
username = cursor.fetchone()['username']
except:
print(f"Video {video_id} does not exist in the database")
continue
archive_path = os.path.join(ARCHIVE_DIR, username, video_path.replace(RECORDER_DIR, ''))
if os.path.exists(archive_path):
print(f"Video {video_id} already exists in the archive directory")
shutil.move(video_path, archive_path)

@ -4,7 +4,6 @@ from funcs import get_duration, get_file_size_in_mb, calculate_file_hash
from tqdm import tqdm
import os, hashlib, subprocess, shutil
from config import get_local_db_connection
from concurrent.futures import ThreadPoolExecutor
EDITED_DIR = "edited/"
@ -12,9 +11,13 @@ THUMB_DIR = "static/thumbnails"
THUMB_WIDTH = 640
FF_QUALITY = "80"
RECORDER_DIR = 'E:/streamaster/streamaster/downloaded/'
ARCHIVE_DIR = 'U:/streamaster/streams/'
CONCATED_DIR = 'concated/'
VIDEO_DIRS = [
"U:/streamaster/",
"E:/streamaster/streamaster/downloaded"
RECORDER_DIR,
ARCHIVE_DIR
]
def get_all_video_files():
@ -23,11 +26,12 @@ def get_all_video_files():
for root, _, filenames in os.walk(base):
for filename in filenames:
if filename.endswith(".mp4"):
files[filename] = os.path.join(root, filename)
video_id = filename.split(".")[0]
files[video_id] = os.path.join(root, filename)
return files
def find_video_path(filename: str):
return all_videos[filename] if filename in all_videos else None
def find_video_path(video_id: str):
return all_videos[video_id] if video_id in all_videos else None
def mark_missing_videos(cursor, conn):
cursor.execute("SELECT video_id, filepath FROM videos WHERE status != 'missing'")
@ -35,57 +39,47 @@ def mark_missing_videos(cursor, conn):
with tqdm(videos, desc="Scanning for missing videos...") as pbar:
for vid in videos:
pbar.update(1)
video_id, filepath = vid.values()
if not filepath:
filename = f'{video_id}.mp4'
else:
filename = os.path.basename(filepath)
if not find_video_path(filename):
print(f"🚫 Missing: {filename}")
video_id, filepath = vid['video_id'], vid['filepath']
if not find_video_path(video_id):
print(f"🚫 Missing: {video_id}")
cursor.execute("UPDATE videos SET status = 'missing' WHERE video_id = %s", (video_id,))
conn.commit()
pbar.update(1)
def find_missing_videos(cursor, conn):
def update_video_paths(cursor, conn):
cursor.execute("SELECT id, filepath, status, video_id FROM videos")
videos = cursor.fetchall()
with tqdm(videos, desc="Updating filepaths...") as pbar:
for vid in videos:
pbar.update(1)
filepath = vid['filepath']
if not filepath:
filename = f'{vid["video_id"]}.mp4'
else:
filename = os.path.basename(filepath)
status = vid['status']
path = find_video_path(filename)
path = find_video_path(vid['video_id'])
if not path:
continue
path = path.replace("\\", "/")
if path == filepath and status != 'missing':
if path == filepath and status != 'missing': # change this
continue
cursor.execute("UPDATE videos SET filepath = %s, status = 'active' WHERE id = %s", (path, vid['id']))
conn.commit()
pbar.update(1)
def fill_missing_hashes(cursor, conn):
cursor.execute("SELECT video_id, filepath FROM videos WHERE (hash IS NULL OR hash = '') AND status != 'missing'")
videos = cursor.fetchall()
with tqdm(videos, desc="Updating hashes...") as pbar:
for vid in videos:
pbar.update(1)
video_id, filepath = vid.values()
if filepath and os.path.exists(filepath):
h = calculate_file_hash(filepath)
cursor.execute("UPDATE videos SET hash = %s WHERE video_id = %s", (h, video_id))
conn.commit()
pbar.update(1)
def fill_missing_sizes(cursor, conn):
cursor.execute("SELECT video_id, filepath FROM videos WHERE size = 0 AND status != 'missing'")
@ -93,12 +87,12 @@ def fill_missing_sizes(cursor, conn):
with tqdm(videos, desc="Updating sizes...") as pbar:
for vid in videos:
pbar.update(1)
video_id, filepath = vid.values()
video_id, filepath = vid['video_id'], vid['filepath']
if filepath and os.path.exists(filepath):
size = get_file_size_in_mb(filepath)
cursor.execute("UPDATE videos SET size = %s WHERE video_id = %s", (size, video_id))
conn.commit()
pbar.update(1)
def fill_missing_durations(cursor, conn):
cursor.execute("SELECT video_id, filepath FROM videos WHERE duration = 0 AND status != 'missing' ORDER BY size ASC")
@ -106,7 +100,6 @@ def fill_missing_durations(cursor, conn):
with tqdm(videos, desc="Updating durations...") as pbar:
for vid in videos:
pbar.update(1)
video_id, filepath = vid.values()
if filepath and os.path.exists(filepath):
duration = get_duration(filepath)
@ -116,48 +109,63 @@ def fill_missing_durations(cursor, conn):
continue
cursor.execute("UPDATE videos SET duration = %s WHERE video_id = %s", (duration, video_id))
conn.commit()
pbar.update(1)
def fill_missing_gender(cursor, conn):
def map_gender(gender):
genders = {
'woman': 'Female',
'couple': 'Couple',
'trans': 'Trans',
'a man': 'Male'
}
for g in genders:
if g in gender:
return genders[g]
print(f"🚫 Failed to map gender: {gender}")
return None
def fill_missing_gender(cursor, conn):
def get_data(username):
import requests
url = f"https://chaturbate.com/api/biocontext/{username}"
data = requests.get(url)
data = data.json()
try:
data = requests.get(url)
data = data.json()
if 'status' in data:
if data['status'] == 401:
return False
except:
return False
return data
cursor.execute("SELECT DISTINCT username, site FROM videos WHERE gender IS NULL AND status != 'missing'")
videos = cursor.fetchall()
api_fetches = 10
with tqdm(videos, desc="Updating genders...") as pbar:
for vid in videos:
pbar.update(1)
username, site = vid.values()
# try to fetch an item from videos table with the same username and site but with a non-null gender
cursor.execute("SELECT gender FROM videos WHERE username = %s AND site = %s AND gender IS NOT NULL LIMIT 1", (username, site))
gender = cursor.fetchone()
if not gender:
if gender:
gender_str = gender['gender']
else:
if api_fetches <= 0:
continue
data = get_data(username)
api_fetches -= 1
if not data:
continue
if 'status' in data:
if data['status'] == 401:
continue
gender = data['sex']
if 'woman' in gender:
gender_str = 'Female'
elif 'couple' in gender:
gender_str = 'Couple'
elif 'trans' in gender:
gender_str = 'Trans'
else:
print(f"fuck?: {gender}")
gender = map_gender(data['sex'])
if not gender:
continue
else:
gender_str = gender['gender']
gender_str = gender
cursor.execute("UPDATE videos SET gender = %s WHERE username = %s AND site = %s", (gender_str, username, site))
conn.commit()
print(f"[{cursor.rowcount}] ✅ Updated gender for {username} on {site}")
pbar.update(1)
def generate_thumbnails_for_videos(cursor, conn):
cursor.execute("SELECT video_id, filepath FROM videos WHERE status != 'missing' AND thumbnail IS NULL")
@ -166,7 +174,6 @@ def generate_thumbnails_for_videos(cursor, conn):
tasks = []
with tqdm(videos, desc="Generating thumbnails...") as pbar:
for v in videos:
pbar.update(1)
video_id = v.get("video_id")
filepath = v.get("filepath")
@ -181,6 +188,7 @@ def generate_thumbnails_for_videos(cursor, conn):
tasks.append((filepath, thumb_path))
v["thumbnail"] = thumb_path
pbar.update(1)
if tasks:
with ThreadPoolExecutor(max_workers=os.cpu_count() * 2) as exe:
@ -217,40 +225,68 @@ def _gen_thumb_cmd(src: str, dest: str):
def move_edited_videos(cursor, conn):
edited_videos = [f for f in os.listdir(EDITED_DIR) if os.path.isfile(os.path.join(EDITED_DIR, f)) and f.endswith(".mp4")]
for filename in edited_videos:
edited_path = os.path.join(EDITED_DIR, filename)
video_id = filename.split(".")[0]
cursor.execute("SELECT filepath FROM videos WHERE video_id = %s", (video_id,))
video = cursor.fetchone()
if not video:
continue
video_path = video['filepath']
if not os.path.exists(video_path):
continue
shutil.move(edited_path, video_path)
print(f"✅ Moved edited video {video_id} to {video_path}")
with tqdm(edited_videos, desc="Moving edited videos...") as pbar:
for filename in edited_videos:
edited_path = os.path.join(EDITED_DIR, filename)
video_id = filename.split(".")[0]
cursor.execute("SELECT filepath, username FROM videos WHERE video_id = %s", (video_id,))
video = cursor.fetchone()
if not video:
continue
video_path = video['filepath']
if not os.path.exists(video_path):
video_path = os.path.join(ARCHIVE_DIR, video['username'], filename)
file_size = get_file_size_in_mb(edited_path)
cursor.execute("UPDATE videos SET size = %s WHERE video_id = %s", (file_size, video_id))
conn.commit()
shutil.move(edited_path, video_path)
pbar.update(1)
def move_concated_videos(cursor, conn):
concated_videos = [f for f in os.listdir(CONCATED_DIR) if os.path.isfile(os.path.join(CONCATED_DIR, f)) and f.endswith(".mp4")]
concated_videos = sorted(concated_videos, key=lambda f: os.path.getsize(os.path.join(CONCATED_DIR, f)))
with tqdm(concated_videos, desc="Moving concated videos...") as pbar:
for filename in concated_videos:
edited_path = os.path.join(CONCATED_DIR, filename)
video_id = filename.split(".")[0]
cursor.execute("SELECT filepath, username FROM videos WHERE video_id = %s", (video_id,))
video = cursor.fetchone()
if not video:
continue
video_path = video['filepath']
if not os.path.exists(video_path):
video_path = os.path.join(ARCHIVE_DIR, video['username'], filename)
file_size = get_file_size_in_mb(edited_path)
cursor.execute("UPDATE videos SET size = %s, status = 'concated' WHERE video_id = %s", (file_size, video_id))
conn.commit()
shutil.move(edited_path, video_path)
pbar.update(1)
if __name__ == '__main__':
conn, cursor = get_local_db_connection()
print("🔍 Scanning for missing data...")
all_videos = get_all_video_files()
update_video_paths(cursor, conn)
mark_missing_videos(cursor, conn)
move_edited_videos(cursor, conn)
if True:
all_videos = get_all_video_files()
find_missing_videos(cursor, conn)
mark_missing_videos(cursor, conn)
move_concated_videos(cursor, conn)
generate_thumbnails_for_videos(cursor, conn)
fill_missing_sizes(cursor, conn)
fill_missing_durations(cursor, conn)
# fill_missing_gender(cursor, conn)
fill_missing_gender(cursor, conn)
# fill_missing_hashes(cursor, conn)
cursor.close()

@ -2,16 +2,14 @@ import subprocess
import json
import os
import tempfile
import shutil
# --- helpers --------------------------------------------------------------- #
from video_funcs import get_video_info, get_target_bitrate, get_target_resolution, get_fps
def ffprobe_json(fp: str) -> dict:
"""Return the full ffprobe-JSON for a media file."""
cmd = [
"ffprobe", "-v", "quiet", "-print_format", "json",
"-show_streams", "-show_format", fp
]
return json.loads(subprocess.check_output(cmd, text=True))
TEMP_DIR = ".temp"
CONCATED_DIR = "concated"
# --- helpers --------------------------------------------------------------- #
def get_signature(fp: str) -> tuple:
"""
@ -19,6 +17,14 @@ def get_signature(fp: str) -> tuple:
video: codec, width, height, fps (as a float), pix_fmt, color_range
audio: codec, sample_rate, channels, channel_layout
"""
def ffprobe_json(fp: str) -> dict:
"""Return the full ffprobe-JSON for a media file."""
cmd = [
"ffprobe", "-v", "quiet", "-print_format", "json",
"-show_streams", "-show_format", fp
]
return json.loads(subprocess.check_output(cmd, text=True))
info = ffprobe_json(fp)
v_stream = next(s for s in info["streams"] if s["codec_type"] == "video")
a_stream = next((s for s in info["streams"] if s["codec_type"] == "audio"), None)
@ -45,6 +51,9 @@ def all_signatures_equal(videos):
ref = get_signature(videos[0]["filepath"])
return all(get_signature(v["filepath"]) == ref for v in videos[1:])
# --- concat functions --------------------------------------------------------------- #
def concat_copy(videos, out_path):
"""Lossless concat with the *concat demuxer* (-c copy)."""
with tempfile.NamedTemporaryFile("w", suffix=".txt", delete=False) as f:
@ -77,6 +86,8 @@ def concat_copy(videos, out_path):
if result.returncode != 0 or any(err in result.stderr for err in ffmpeg_errors):
print("❌ FFmpeg concat failed or produced corrupted output.")
print("FFmpeg stderr:")
print(result.stderr)
# Remove broken file if it exists
if os.path.exists(out_path):
os.remove(out_path)
@ -86,7 +97,29 @@ def concat_copy(videos, out_path):
print("✅ FFmpeg concat completed successfully.")
return True
def concatenate_videos(videos_list, reencode_concate = False):
"""
Concatenate pre-grouped videos, then re-encode them using AV1 (NVENC)
while forcing a unified resolution and frame rate on each input
before final concatenation in one ffmpeg command.
"""
if len(videos_list) <= 1:
return False
copy_concat = copy_concatenate_videos(videos_list)
if copy_concat:
return copy_concat
if not reencode_concate:
return False
return encode_concatenate_videos(videos_list)
def copy_concatenate_videos(videos_list):
from concat_helper import all_signatures_equal, concat_copy
if not (len(videos_list) > 1 and all_signatures_equal(videos_list)):
print("Streams are not compatible for lossless concat.")
return False
@ -95,9 +128,9 @@ def copy_concatenate_videos(videos_list):
print("All streams are compatible attempting lossless concat …")
main_video = videos_list[0]
video_path = main_video["filepath"]
output_path = os.path.join("temp", os.path.basename(video_path))
output_path = os.path.join(TEMP_DIR, os.path.basename(video_path))
os.makedirs("concated", exist_ok=True)
os.makedirs(CONCATED_DIR, exist_ok=True)
success = concat_copy(videos_list, output_path)
if not success:
@ -109,6 +142,93 @@ def copy_concatenate_videos(videos_list):
os.remove(v["filepath"])
# move temp to concated folder
os.rename(output_path, os.path.join("concated", os.path.basename(video_path)))
os.rename(output_path, os.path.join(CONCATED_DIR, os.path.basename(video_path)))
return main_video
def encode_concatenate_videos(videos_list):
"""Your existing function to encode and concatenate videos."""
main_video = videos_list[0]
video_path = main_video["filepath"]
os.makedirs(TEMP_DIR, exist_ok=True)
os.makedirs(CONCATED_DIR, exist_ok=True)
temp_path = os.path.join(TEMP_DIR, os.path.basename(video_path))
output_path = os.path.join(CONCATED_DIR, os.path.basename(video_path))
video_info = get_video_info(videos_list[0]['filepath'])
current_bitrate = int(video_info.get('bitrate') or 0)
target_width, target_height = get_target_resolution(videos_list)
target_bitrate_kbps = get_target_bitrate(target_width, target_height)
# Clamp target bitrate to not exceed source
if current_bitrate > 0:
target_bitrate_kbps = min(target_bitrate_kbps, current_bitrate)
# Max bitrate shouldn't exceed source either
if current_bitrate > 0:
max_bitrate_kbps = min(int(1.5 * target_bitrate_kbps), current_bitrate)
else:
max_bitrate_kbps = int(1.5 * target_bitrate_kbps)
fps_float = get_fps(video_path) or video_info.get('fps') or 30.0
if fps_float <= 0:
fps_float = 30.0
keyframe_interval = int(fps_float)
print(f"Concatenating {len(videos_list)} videos into {temp_path}")
print(f" Mode Resolution: {target_width}x{target_height}")
print(f" Target Bitrate: {target_bitrate_kbps}k (max ~{max_bitrate_kbps}k)")
print(f" Keyframe Interval: {keyframe_interval}")
cmd = ["ffmpeg", "-y"] # Overwrite output if exists
for v in videos_list:
cmd.extend(["-i", v["filepath"]])
filter_statements = []
concat_streams = []
n = len(videos_list)
unified_fps = 30
for i in range(n):
filter_statements.append(
f"[{i}:v]fps={unified_fps},scale={target_width}:{target_height}[v{i}]"
)
concat_streams.append(f"[v{i}][{i}:a]")
# Example final: [v0][0:a][v1][1:a]concat=n=2:v=1:a=1[outv][outa]
concat_line = "".join(concat_streams) + f"concat=n={n}:v=1:a=1[outv][outa]"
filter_statements.append(concat_line)
filter_complex = ";".join(filter_statements)
cmd.extend([
"-filter_complex", filter_complex,
"-map", "[outv]",
"-map", "[outa]",
"-c:v", "av1_nvenc",
"-b:v", f"{target_bitrate_kbps}k",
"-maxrate", f"{max_bitrate_kbps}k",
"-bufsize", f"{max_bitrate_kbps}k",
"-preset", "p5",
"-g", str(keyframe_interval),
"-c:a", "aac",
"-b:a", "192k",
temp_path
])
try:
subprocess.run(cmd, check=True)
except:
return False
for video in videos_list:
os.remove(video["filepath"])
shutil.move(temp_path, output_path)
return main_video

@ -1,17 +1,16 @@
print("Importing modules...")
from funcs import group_videos, group_for_concatenation_simple
from video_funcs import concatenate_videos
from concat_helper import concatenate_videos
import os, config, shutil
MOVE_FUCKED = False
sort_type = {"size": lambda x: sum([video['size'] for video in x]),"count": lambda x: len(x)}
sort_type = {"size": lambda x: sum([video['size'] for video in x]),"count": lambda x: len(x)}
def get_videos(cursor, username=None):
if username:
cursor.execute("SELECT * FROM videos WHERE username = %s AND status != 'missing' ORDER BY created_at DESC", (username,))
else:
cursor.execute("SELECT * FROM videos WHERE status != 'missing' ORDER BY created_at DESC")
cursor.execute("SELECT * FROM videos WHERE username = %s AND status != 'missing';", (username,))
return cursor.fetchall()
cursor.execute("SELECT * FROM videos WHERE status != 'missing';")
return cursor.fetchall()
def organize_videos():
@ -28,7 +27,7 @@ def organize_videos():
for user, videos in video_data.items():
grouped_videos.extend(group_for_concatenation_simple(videos))
sorted_processed_videos = sorted(grouped_videos, key=sort_type["size"], reverse=True)
sorted_processed_videos = sorted(grouped_videos, key=sort_type["count"], reverse=True)
# group the videos for concatenation
for video_list in sorted_processed_videos:
@ -42,7 +41,7 @@ def organize_videos():
print("\n"*2)
print(100*"=")
main_video = concatenate_videos(video_list)
main_video = concatenate_videos(video_list, reencode_concate=True)
if main_video:
print(f"Processed {len(video_list)} input videos into {main_video["filepath"]} output video.")

@ -90,8 +90,7 @@ def encode_video(filepath, output_path, target_bitrate):
)
)
print(f" Finished encoding {os.path.basename(filepath)} to AV1 at {target_bitrate} kbps "
f"(maxrate={max_bitrate} kbps).")
print(f" Finished encoding {os.path.basename(filepath)} to AV1 at {target_bitrate} kbps (maxrate={max_bitrate} kbps).")
return True
except ffmpeg.Error as e:
@ -119,7 +118,7 @@ def check_and_replace_if_smaller(original_path, temp_output_path):
print(f" Original: {size_original_mb:.2f} MB \n Re-encoded: {size_processed_mb:.2f} MB.")
print(100*"=")
shutil.move(temp_output_path, original_path)
return True
return size_processed_mb
def update_codec_db(video_id, codec):
conn, cursor = config.get_local_db_connection()
@ -127,6 +126,12 @@ def update_codec_db(video_id, codec):
conn.commit()
conn.close()
def update_file_size_db(video_id, size):
conn, cursor = config.get_local_db_connection()
cursor.execute("UPDATE videos SET size = %s WHERE id = %s", (size, video_id))
conn.commit()
conn.close()
def smart_choice(cursor, small_mb=250):
"""
Returns a list of candidate videos to encode, ordered by:
@ -167,7 +172,7 @@ def smart_choice(cursor, small_mb=250):
ORDER BY
total_mb DESC, -- top streamers first
small_first ASC, -- small (< small_mb) first
size DESC, -- then bigger files first inside each group
size ASC, -- then bigger files first inside each group
created_at DESC; -- then newest
""", (days, small_mb))
return cursor.fetchall()
@ -211,26 +216,108 @@ def smart_choice(cursor, small_mb=250):
""", (small_mb,))
return cursor.fetchall()
def smart_choice_by_count(cursor, small_mb=250):
"""
Returns candidate videos ordered by:
1) time window: 7d, then 30d, then 90d, then fallback (any time)
2) streamer priority: COUNT(*) per (username, site) DESC within the window
3) small (< small_mb MB) first, then big
4) inside each group: size DESC, then created_at DESC
NOTE: 'size' is stored in MB.
"""
def pick(days: int):
cursor.execute("""
WITH candidates AS (
SELECT v.*
FROM videos v
WHERE v.codec IS NULL
AND v.status <> 'missing'
AND v.filepath IS NOT NULL
AND v.created_at >= NOW() - make_interval(days => %s)
),
by_streamer AS (
SELECT username, site, COUNT(*) AS total_vids
FROM candidates
GROUP BY username, site
),
ordered AS (
SELECT c.*,
bs.total_vids,
CASE WHEN c.size < %s THEN 0 ELSE 1 END AS small_first
FROM candidates c
JOIN by_streamer bs
ON bs.username = c.username
AND bs.site = c.site
)
SELECT *
FROM ordered
ORDER BY
total_vids DESC, -- most videos first
small_first ASC, -- small (< small_mb) first
size DESC, -- then larger files first within group
created_at DESC; -- then newest
""", (days, small_mb))
return cursor.fetchall()
# Try windows: 7 → 30 → 90 days
for d in (7, 30, 90):
rows = pick(d)
if rows:
return rows
# Fallback: any time, same ordering
cursor.execute("""
WITH candidates AS (
SELECT v.*
FROM videos v
WHERE v.codec IS NULL
AND v.status <> 'missing'
AND v.filepath IS NOT NULL
),
by_streamer AS (
SELECT username, site, COUNT(*) AS total_vids
FROM candidates
GROUP BY username, site
),
ordered AS (
SELECT c.*,
bs.total_vids,
CASE WHEN c.size < %s THEN 0 ELSE 1 END AS small_first
FROM candidates c
JOIN by_streamer bs
ON bs.username = c.username
AND bs.site = c.site
)
SELECT *
FROM ordered
ORDER BY
total_vids DESC,
small_first ASC,
size DESC,
created_at DESC;
""", (small_mb,))
return cursor.fetchall()
def select_user_videos(username, cursor):
cursor.execute("SELECT * FROM videos WHERE username = %s AND status != 'missing' AND codec IS NULL ORDER BY size ASC", (username,))
if username == "all":
cursor.execute("SELECT * FROM videos WHERE status != 'missing' AND codec IS NULL ORDER BY size ASC")
else:
cursor.execute("SELECT * FROM videos WHERE username = %s AND status != 'missing' AND codec IS NULL ORDER BY size ASC", (username,))
return cursor.fetchall()
def reencode_videos_av1():
# get videos
conn, cursor = config.get_local_db_connection()
# cursor.execute("SELECT * FROM videos WHERE codec IS NULL AND status != 'missing' AND filepath IS NOT NULL AND filepath NOT LIKE 'U:%' ORDER BY size ASC;")
# videos = cursor.fetchall()
while True:
username = input("Enter username: ")
if username:
videos = select_user_videos(username, cursor)
else:
videos = smart_choice(cursor)
# videos = smart_choice(cursor)
videos = smart_choice_by_count(cursor)
with tqdm(videos, desc="Processing videos", unit="file") as pbar:
for video in videos:
pbar.update(1)
input_path = video['filepath']
if not os.path.exists(input_path):
@ -243,6 +330,7 @@ def reencode_videos_av1():
if file_size_in_mb < 1:
print("Video is too small. Skipping.")
os.remove(input_path)
cursor.execute("UPDATE videos SET status = 'deleted' WHERE id = %s", (video['id'],))
continue
# 2) Get current bitrate & resolution
@ -272,8 +360,13 @@ def reencode_videos_av1():
continue
# 4) Compare file sizes and replace if smaller
if check_and_replace_if_smaller(input_path, output_path):
new_size = check_and_replace_if_smaller(input_path, output_path)
if new_size:
update_codec_db(video['id'], 'av1')
update_file_size_db(video['id'], new_size)
pbar.update(1)
if __name__ == "__main__":
reencode_videos_av1()

@ -1,9 +1,8 @@
import ffmpeg
import subprocess
import json
from collections import Counter
import shutil
import os
from collections import Counter
def is_av1(filepath):
"""Check if a video file is already AV1-encoded."""
@ -17,27 +16,6 @@ def is_av1(filepath):
print(f"Error probing {filepath}: {e}")
return False
def get_video_info(filepath):
"""
Returns (bitrate_in_kbps, (width, height)) for the specified video file.
If probing fails, returns (None, (None, None)).
"""
try:
probe = ffmpeg.probe(filepath)
format_info = probe['format']
video_stream = next(
(stream for stream in probe['streams'] if stream['codec_type'] == 'video'),
None
)
if video_stream:
# Convert from bits/sec to kbps
bitrate_kbps = int(format_info['bit_rate']) // 1000
width = video_stream['width']
height = video_stream['height']
return bitrate_kbps, (width, height)
except ffmpeg.Error as e:
print(f"Error getting video info for {filepath}: {e}")
return None, (None, None)
def get_fps(filepath):
"""Get the frames per second (FPS) of the input video using ffmpeg.probe."""
@ -52,44 +30,69 @@ def get_fps(filepath):
print(f"Error getting FPS for {filepath}: {e}")
return None
def get_video_metadata(video_path):
"""Minimal example to get width/height from FFprobe directly via subprocess."""
def get_video_info(filepath):
"""
Returns dict:
{ 'width': int, 'height': int, 'bitrate': int, 'fps': float }
- bitrate is Kbps (rounded down)
- uses stream bit_rate, else format bit_rate, else computed
"""
cmd = [
"ffprobe", "-v", "error", "-select_streams", "v:0",
"-show_entries", "stream=width,height,bit_rate",
"-of", "json", video_path
"ffprobe","-v","error",
"-select_streams","v:0",
"-show_entries","stream=width,height,bit_rate,r_frame_rate",
"-show_entries","format=bit_rate,duration",
"-of","json", filepath
]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
return {"width": 0, "height": 0, "bit_rate": 0}
r = subprocess.run(cmd, capture_output=True, text=True)
if r.returncode:
return {"width": 0, "height": 0, "bitrate": 0, "fps": 0.0}
try:
data = json.loads(result.stdout)
streams = data.get("streams", [])
if not streams:
return {"width": 0, "height": 0, "bit_rate": 0}
stream = streams[0]
width = int(stream.get("width", 0))
height = int(stream.get("height", 0))
br = int(stream.get("bit_rate", 0)) # in bits per second
return {"width": width, "height": height, "bit_rate": br}
except json.JSONDecodeError:
return {"width": 0, "height": 0, "bit_rate": 0}
d = json.loads(r.stdout or "{}")
s = (d.get("streams") or [{}])[0]
f = d.get("format") or {}
width = int(s.get("width") or 0)
height = int(s.get("height") or 0)
# fps (r_frame_rate like "30000/1001")
fps = 0.0
rfr = s.get("r_frame_rate")
if rfr and rfr != "0/0":
try:
num, den = rfr.split("/")
num = float(num); den = float(den)
fps = (num/den) if den else 0.0
except Exception:
pass
# bitrate in bps → prefer stream, fallback to format, else compute
br_bps = s.get("bit_rate") or f.get("bit_rate")
if not br_bps:
try:
dur = float(f.get("duration") or 0)
if dur > 0:
br_bps = int(os.path.getsize(filepath) * 8 / dur)
except Exception:
br_bps = 0
br_kbps = int(int(br_bps or 0) / 1000)
return {"width": width, "height": height, "bitrate": br_kbps, "fps": fps}
except Exception:
return {"width": 0, "height": 0, "bitrate": 0, "fps": 0.0}
def get_target_resolution(group):
"""Collect the most common resolution from the group's videos."""
resolutions = []
for v in group:
meta = get_video_metadata(v["filepath"])
width, height = meta["width"], meta["height"]
if width > 0 and height > 0:
resolutions.append((width, height))
info = get_video_info(v["filepath"])
w, h = info["width"], info["height"]
if w and h:
resolutions.append((w, h))
if not resolutions:
return (1280, 720)
counter = Counter(resolutions)
return counter.most_common(1)[0][0] # (width, height)
return Counter(resolutions).most_common(1)[0][0]
def get_target_bitrate(width, height):
"""Your existing function to choose a bitrate based on resolution."""
@ -97,105 +100,4 @@ def get_target_bitrate(width, height):
for res, bitrate in resolutions.items():
if width <= res[0] and height <= res[1]:
return bitrate
return 2500
def concatenate_videos(videos_list, reencode_concate = False):
"""
Concatenate pre-grouped videos, then re-encode them using AV1 (NVENC)
while forcing a unified resolution and frame rate on each input
before final concatenation in one ffmpeg command.
"""
if len(videos_list) <= 1:
return False
from concat_helper import copy_concatenate_videos
copy_concat = copy_concatenate_videos(videos_list)
if copy_concat:
return copy_concat
if not reencode_concate:
return False
main_video = videos_list[0]
video_path = main_video["filepath"]
os.makedirs("temp", exist_ok=True)
os.makedirs("concated", exist_ok=True)
temp_path = os.path.join("temp", os.path.basename(video_path))
output_path = os.path.join("concated", os.path.basename(video_path))
current_bitrate, (width, height) = get_video_info(videos_list[0]['filepath'])
target_width, target_height = get_target_resolution(videos_list)
target_bitrate_kbps = get_target_bitrate(target_width, target_height)
if target_bitrate_kbps > current_bitrate:
target_bitrate_kbps = current_bitrate
max_bitrate_kbps = int(1.5 * target_bitrate_kbps)
fps_float = get_fps(video_path)
if fps_float is None or fps_float <= 0:
print(f"Could not determine FPS for {video_path}. Using default keyframe interval of 30.")
fps_float = 30.0
keyframe_interval = int(fps_float)
print(f"Concatenating {len(videos_list)} videos into {temp_path}")
print(f" Mode Resolution: {target_width}x{target_height}")
print(f" Target Bitrate: {target_bitrate_kbps}k (max ~{max_bitrate_kbps}k)")
print(f" Keyframe Interval: {keyframe_interval}")
cmd = ["ffmpeg", "-y"] # Overwrite output if exists
for v in videos_list:
cmd.extend(["-i", v["filepath"]])
filter_statements = []
concat_streams = []
n = len(videos_list)
unified_fps = 30
for i in range(n):
filter_statements.append(
f"[{i}:v]fps={unified_fps},scale={target_width}:{target_height}[v{i}]"
)
concat_streams.append(f"[v{i}][{i}:a]")
# Example final: [v0][0:a][v1][1:a]concat=n=2:v=1:a=1[outv][outa]
concat_line = "".join(concat_streams) + f"concat=n={n}:v=1:a=1[outv][outa]"
filter_statements.append(concat_line)
filter_complex = ";".join(filter_statements)
cmd.extend([
"-filter_complex", filter_complex,
"-map", "[outv]",
"-map", "[outa]",
"-c:v", "av1_nvenc",
"-b:v", f"{target_bitrate_kbps}k",
"-maxrate", f"{max_bitrate_kbps}k",
"-bufsize", f"{max_bitrate_kbps}k",
"-preset", "p5",
"-g", str(keyframe_interval),
"-c:a", "aac",
"-b:a", "192k",
temp_path
])
try:
subprocess.run(cmd, check=True)
except:
return False
for video in videos_list:
os.remove(video["filepath"])
shutil.move(temp_path, output_path)
return main_video
def get_file_size_in_mb(file_path):
return os.path.getsize(file_path) / (1024 * 1024)
return 2500
Loading…
Cancel
Save