small update

main
oscar 2 months ago
parent 9ac5555876
commit c79ec8e179

@ -1,8 +0,0 @@
import requests
def get_data(username):
url = f"https://chaturbate.com/api/biocontext/{username}"
data = requests.get(url)
data = data.json()
return data

@ -3,7 +3,7 @@ from config import get_local_db_connection
from funcs import get_duration, get_file_size_in_mb, calculate_file_hash
from tqdm import tqdm
import os, hashlib, subprocess, json
import os, hashlib, subprocess
from config import get_local_db_connection
from concurrent.futures import ThreadPoolExecutor
@ -12,8 +12,8 @@ THUMB_WIDTH = 640
FF_QUALITY = "80"
VIDEO_DIRS = [
"U:/streamaster",
"E:/streamaster/downloaded"
"U:/streamaster/",
"E:/streamaster/streamaster/downloaded"
]
def get_all_video_files():
@ -45,20 +45,29 @@ def mark_missing_videos(cursor, conn):
conn.commit()
def fill_missing_filepaths(cursor, conn):
cursor.execute("SELECT video_id, filepath FROM videos WHERE status != 'missing'")
cursor.execute("SELECT id, filepath, status FROM videos")
videos = cursor.fetchall()
with tqdm(videos, desc="Updating filepaths...") as pbar:
for vid in videos:
pbar.update(1)
video_id, filepath = vid.values()
filename = f"{video_id}.mp4"
filepath = vid['filepath']
if not filepath:
continue
filename = os.path.basename(filepath)
status = vid['status']
path = find_video_path(filename)
if path:
if not path:
continue
path = path.replace("\\", "/")
if path == filepath:
if path == filepath and status != 'missing':
continue
cursor.execute("UPDATE videos SET filepath = %s WHERE video_id = %s", (path, video_id))
cursor.execute("UPDATE videos SET filepath = %s, status = 'active' WHERE id = %s", (path, vid['id']))
conn.commit()
def fill_missing_hashes(cursor, conn):
@ -88,7 +97,7 @@ def fill_missing_sizes(cursor, conn):
conn.commit()
def fill_missing_durations(cursor, conn):
cursor.execute("SELECT video_id, filepath FROM videos WHERE duration = 0 AND status != 'missing'")
cursor.execute("SELECT video_id, filepath FROM videos WHERE duration = 0 AND status != 'missing' ORDER BY size ASC")
videos = cursor.fetchall()
with tqdm(videos, desc="Updating durations...") as pbar:
@ -97,11 +106,21 @@ def fill_missing_durations(cursor, conn):
video_id, filepath = vid.values()
if filepath and os.path.exists(filepath):
duration = get_duration(filepath)
if duration <= 0:
print(f"🚫 Failed to get duration for {filepath}")
os.remove(filepath)
continue
cursor.execute("UPDATE videos SET duration = %s WHERE video_id = %s", (duration, video_id))
conn.commit()
def fill_missing_gender(cursor, conn):
import chaturbate
def get_data(username):
import requests
url = f"https://chaturbate.com/api/biocontext/{username}"
data = requests.get(url)
data = data.json()
return data
cursor.execute("SELECT DISTINCT username, site FROM videos WHERE gender IS NULL AND status != 'missing'")
videos = cursor.fetchall()
@ -113,13 +132,13 @@ def fill_missing_gender(cursor, conn):
cursor.execute("SELECT gender FROM videos WHERE username = %s AND site = %s AND gender IS NOT NULL LIMIT 1", (username, site))
gender = cursor.fetchone()
if not gender:
gender = chaturbate.get_data(username)
if not gender:
data = get_data(username)
if not data:
continue
if 'status' in gender:
if gender['status'] == 401:
if 'status' in data:
if data['status'] == 401:
continue
gender = gender['sex']
gender = data['sex']
if 'woman' in gender:
gender_str = 'Female'
elif 'couple' in gender:
@ -141,19 +160,19 @@ def generate_thumbnails_for_videos(cursor, conn):
videos = cursor.fetchall()
tasks = []
with tqdm(videos, desc="Generating thumbnails...") as pbar:
for v in videos:
pbar.update(1)
video_id = v.get("video_id")
filepath = v.get("filepath")
thumb_path = _hashed_thumb_path(video_id)
if not filepath:
# print(f"⚠️ Skipping {video_id}: missing filepath")
continue
if not os.path.exists(filepath):
# print(f"⚠️ Skipping {video_id}: file not found → {filepath}")
continue
thumb_path = _hashed_thumb_path(video_id)
if not os.path.exists(thumb_path):
tasks.append((filepath, thumb_path))
@ -193,18 +212,20 @@ def _gen_thumb_cmd(src: str, dest: str):
if __name__ == '__main__':
conn, cursor = get_local_db_connection()
all_videos = get_all_video_files()
print("🔍 Scanning for missing data...")
if True:
all_videos = get_all_video_files()
fill_missing_filepaths(cursor, conn)
mark_missing_videos(cursor, conn)
# fill_missing_durations(cursor, conn)
# fill_missing_sizes(cursor, conn)
# generate_thumbnails_for_videos(cursor, conn)
# fill_missing_hashes(cursor, conn)
generate_thumbnails_for_videos(cursor, conn)
fill_missing_sizes(cursor, conn)
fill_missing_durations(cursor, conn)
# fill_missing_gender(cursor, conn)
# fill_missing_hashes(cursor, conn)
cursor.close()

@ -0,0 +1,228 @@
import os, shutil
import ffmpeg
from tqdm import tqdm
def is_av1(filepath):
"""
Check if a video file is already AV1-encoded.
"""
try:
probe = ffmpeg.probe(filepath)
for stream in probe['streams']:
if stream['codec_type'] == 'video' and 'codec_name' in stream:
if stream['codec_name'] == 'av1':
return True
except ffmpeg.Error as e:
print(f"Error probing {filepath}: {e}")
return False
def get_video_info(filepath):
"""
Returns (bitrate_in_kbps, (width, height)) for the specified video file.
If probing fails, returns (None, (None, None)).
"""
try:
probe = ffmpeg.probe(filepath)
format_info = probe['format']
video_stream = next(
(stream for stream in probe['streams'] if stream['codec_type'] == 'video'),
None
)
if video_stream:
# Convert from bits/sec to kbps
bitrate_kbps = int(format_info['bit_rate']) // 1000
width = video_stream['width']
height = video_stream['height']
return bitrate_kbps, (width, height)
except ffmpeg.Error as e:
print(f"Error getting video info for {filepath}: {e}")
return None, (None, None)
def get_files(folder):
"""
Recursively gather all .mp4 files in `folder`.
Sort them by file size (smallest to largest) just as an example sorting.
"""
all_files = []
for root, _, filenames in os.walk(folder):
for filename in filenames:
if filename.lower().endswith(('.mp4', '.mkv', '.avi', '.mov')):
if not "encoded" in root:
all_files.append(os.path.join(root, filename))
return sorted(all_files, key=os.path.getsize)
def parse_text_for_print(text):
"""
If string is longer than 100 characters, only print the first 100 characters.
"""
return text[:100] + '...' if len(text) > 100 else text
def get_target_bitrate(width, height):
"""
Your existing function to choose a bitrate based on resolution.
"""
resolutions = {
(854, 480): 1000,
(1280, 720): 1500,
(1920, 1080): 3000,
(2560, 1440): 5000,
(3840, 2160): 12000
}
for res, bitrate in resolutions.items():
if width <= res[0] and height <= res[1]:
return bitrate
return 2500
def get_fps(filepath):
"""Get the frames per second (FPS) of the input video."""
try:
probe = ffmpeg.probe(filepath)
video_stream = next((stream for stream in probe['streams'] if stream['codec_type'] == 'video'), None)
if video_stream and 'r_frame_rate' in video_stream:
fps_str = video_stream['r_frame_rate']
num, den = map(int, fps_str.split('/'))
fps = num / den
return fps
except ffmpeg.Error as e:
print(f"Error getting FPS for {filepath}: {e}")
return None
def encode_video(filepath, output_path, target_bitrate):
"""
Encode video using ffmpeg with a target bitrate (in kbps).
Using NVIDIA AV1 hardware encoder (av1_nvenc) for RTX 40-series.
"""
try:
fps = get_fps(filepath)
if fps is None:
print(f"Could not determine FPS for {filepath}, using default keyframe interval.")
fps = 30 # Default fallback if FPS can't be determined
keyframe_interval = int(fps) # Set the keyframe interval to match 1 second
# Calculate 1.5x for max bitrate
max_bitrate = int(1.5 * target_bitrate)
print(f" Encoding {filepath} to AV1 at {target_bitrate} kbps...")
(
ffmpeg
.input(filepath)
.output(
output_path,
vcodec='av1_nvenc',
format='mp4',
b=f"{target_bitrate}k",
maxrate=f"{max_bitrate}k",
bufsize=f"{max_bitrate}k",
preset='p5',
g=keyframe_interval
)
.run(
overwrite_output=True,
)
)
filepath_print = parse_text_for_print(filepath)
print(f" Finished encoding {filepath_print} to AV1 at {target_bitrate} kbps "
f"(maxrate={max_bitrate} kbps).")
except ffmpeg.Error as e:
filepath_print = parse_text_for_print(filepath)
print(f" Error encoding {filepath_print} to AV1: {e}")
def check_and_replace_if_smaller(original_path, temp_output_path):
"""
Compare file sizes and replace the original if the new one is smaller.
Otherwise, delete the temporary file.
"""
if not os.path.exists(temp_output_path):
print(f"[ERROR] Temp file {temp_output_path} not found. Skipping replacement...")
return
original_size = os.path.getsize(original_path)
processed_size = os.path.getsize(temp_output_path)
size_original_mb = original_size / (1024 * 1024)
size_processed_mb = processed_size / (1024 * 1024)
size_diff_perc = (1 - processed_size / original_size) * 100
size_diff_mb = size_original_mb - size_processed_mb
if size_original_mb < 25:
shutil.move(temp_output_path, original_path)
return True
if processed_size >= original_size or size_diff_mb < 1:
os.remove(temp_output_path)
return False
else:
print(100*"=")
print(f" Re-encoded is smaller by {size_diff_perc:.2f}% ({size_diff_mb:.2f} MB). Replacing original.")
print(f" Original: {size_original_mb:.2f} MB \n Re-encoded: {size_processed_mb:.2f} MB.")
print(100*"=")
shutil.move(temp_output_path, original_path)
return True
def reencode_videos_av1(input_folder):
"""
Main loop:
1. Gather .mp4 files
2. For each file, check if it's AV1 or if it needs re-encoding based on target bitrate.
3. Re-encode if needed.
4. Compare file sizes and replace if smaller.
"""
files = get_files(input_folder)
for input_path in tqdm(files, desc="Processing videos", unit="file"):
short_name = parse_text_for_print(input_path)
file_size_in_mb = os.path.getsize(input_path) / (1024 * 1024)
print(f"\nProcessing {short_name} ({file_size_in_mb:.2f} MB)...")
# 2) Get current bitrate & resolution
current_bitrate, (width, height) = get_video_info(input_path)
if not current_bitrate:
print("Video's bitrate is not available. Skipping")
continue
target_bitrate = get_target_bitrate(width, height)
# If current bitrate <= target, it's not worth it to re-encode
if current_bitrate <= target_bitrate:
target_bitrate = current_bitrate
if is_av1(input_path):
print("Video is already encoded in AV1. Skipping")
# move to 'encoded' folder inside the input folder
new_file_path = os.path.join(input_folder, "encoded", os.path.basename(input_path))
os.makedirs(os.path.dirname(new_file_path), exist_ok=True)
shutil.move(input_path, new_file_path)
continue
# 3) Re-encode
output_path = os.path.join('temp', os.path.basename(input_path))
encode_video(input_path, output_path, target_bitrate)
# 4) Compare file sizes and replace if smaller
check_and_replace_if_smaller(input_path, output_path)
# move to 'encoded' folder inside the input folder
new_file_path = os.path.join(input_folder, "encoded", os.path.basename(input_path))
os.makedirs(os.path.dirname(new_file_path), exist_ok=True)
shutil.move(input_path, new_file_path)
# ---------------------- Main Script Entry ---------------------- #
if __name__ == "__main__":
import sys
if len(sys.argv) > 1:
input_folder = sys.argv[1]
else:
input_folder = input("Enter the input folder path: ")
if not os.path.isdir(input_folder):
print(f"Input folder '{input_folder}' does not exist.")
sys.exit(1)
print("Re-encoding videos to AV1 (only if bitrate is above our resolution-based presets)...")
reencode_videos_av1(input_folder)
print("All done!")
Loading…
Cancel
Save