You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

372 lines
13 KiB
Python

import os, shutil, config
import ffmpeg
from tqdm import tqdm
def is_av1(filepath):
try:
probe = ffmpeg.probe(filepath)
for stream in probe['streams']:
if stream['codec_type'] == 'video' and 'codec_name' in stream:
if stream['codec_name'] == 'av1':
return True
except ffmpeg.Error as e:
print(f"Error probing {filepath}: {e}")
return False
def get_video_info(filepath):
try:
probe = ffmpeg.probe(filepath)
format_info = probe['format']
video_stream = next(
(stream for stream in probe['streams'] if stream['codec_type'] == 'video'),
None
)
if video_stream:
# Convert from bits/sec to kbps
bitrate_kbps = int(format_info['bit_rate']) // 1000
width = video_stream['width']
height = video_stream['height']
return bitrate_kbps, (width, height)
except ffmpeg.Error as e:
print(f"Error getting video info for {filepath}: {e}")
return None, (None, None)
def get_target_bitrate(width, height):
resolutions = {
(854, 480): 1000,
(1280, 720): 1500,
(1920, 1080): 3000,
(2560, 1440): 5000,
(3840, 2160): 12000
}
for res, bitrate in resolutions.items():
if width <= res[0] and height <= res[1]:
return bitrate
return 2500
def get_fps(filepath):
try:
probe = ffmpeg.probe(filepath)
video_stream = next((stream for stream in probe['streams'] if stream['codec_type'] == 'video'), None)
if video_stream and 'r_frame_rate' in video_stream:
fps_str = video_stream['r_frame_rate']
num, den = map(int, fps_str.split('/'))
fps = num / den
return fps
except ffmpeg.Error as e:
print(f"Error getting FPS for {filepath}: {e}")
return None
def encode_video(filepath, output_path, target_bitrate):
try:
fps = get_fps(filepath)
if fps is None:
print(f"Could not determine FPS for {filepath}.")
return False
keyframe_interval = int(fps) # Set the keyframe interval to match 1 second
# Calculate 1.5x for max bitrate
max_bitrate = int(1.5 * target_bitrate)
print(f" Encoding {filepath} to AV1 at {target_bitrate} kbps...")
(
ffmpeg
.input(filepath)
.output(
output_path,
vcodec='av1_nvenc',
format='mp4',
b=f"{target_bitrate}k",
maxrate=f"{max_bitrate}k",
bufsize=f"{max_bitrate}k",
preset='p5',
g=keyframe_interval
)
.run(
overwrite_output=True, quiet=True
)
)
print(f" Finished encoding {os.path.basename(filepath)} to AV1 at {target_bitrate} kbps (maxrate={max_bitrate} kbps).")
return True
except ffmpeg.Error as e:
print(f" Error encoding {os.path.basename(filepath)} to AV1: {e}")
def check_and_replace_if_smaller(original_path, temp_output_path):
if not os.path.exists(temp_output_path):
print(f"[ERROR] Temp file {temp_output_path} not found. Skipping replacement...")
return
original_size = os.path.getsize(original_path)
processed_size = os.path.getsize(temp_output_path)
size_original_mb = original_size / (1024 * 1024)
size_processed_mb = processed_size / (1024 * 1024)
size_diff_perc = (1 - processed_size / original_size) * 100
size_diff_mb = size_original_mb - size_processed_mb
if processed_size >= original_size or size_diff_mb < 1:
os.remove(temp_output_path)
return False
else:
print(100*"=")
print(f" Re-encoded is smaller by {size_diff_perc:.2f}% ({size_diff_mb:.2f} MB). Replacing original.")
print(f" Original: {size_original_mb:.2f} MB \n Re-encoded: {size_processed_mb:.2f} MB.")
print(100*"=")
shutil.move(temp_output_path, original_path)
return size_processed_mb
def update_codec_db(video_id, codec):
conn, cursor = config.get_local_db_connection()
cursor.execute("UPDATE videos SET codec = %s WHERE id = %s", (codec, video_id))
conn.commit()
conn.close()
def update_file_size_db(video_id, size):
conn, cursor = config.get_local_db_connection()
cursor.execute("UPDATE videos SET size = %s WHERE id = %s", (size, video_id))
conn.commit()
conn.close()
def smart_choice(cursor, small_mb=250):
"""
Returns a list of candidate videos to encode, ordered by:
1) time window priority: 7d, then 30d, then 90d, then fallback (any time)
2) streamer priority: total MB per (username, site) DESC within the window
3) small (< small_mb MB) first, then big
4) inside each group: size DESC, then created_at DESC
NOTE: 'size' is stored in MB.
"""
def pick(days: int):
# Build the prioritized list for a given window
cursor.execute("""
WITH candidates AS (
SELECT v.*
FROM videos v
WHERE v.codec IS NULL
AND v.status = 'active'
AND v.filepath IS NOT NULL
AND v.created_at >= NOW() - make_interval(days => %s)
),
by_streamer AS (
SELECT username, site, SUM(size) AS total_mb
FROM candidates
GROUP BY username, site
),
ordered AS (
SELECT c.*,
bs.total_mb,
CASE WHEN c.size < %s THEN 0 ELSE 1 END AS small_first
FROM candidates c
JOIN by_streamer bs
ON bs.username = c.username
AND bs.site = c.site
)
SELECT *
FROM ordered
ORDER BY
total_mb DESC, -- top streamers first
small_first ASC, -- small (< small_mb) first
size ASC, -- then bigger files first inside each group
created_at DESC; -- then newest
""", (days, small_mb))
return cursor.fetchall()
# Try 7d → 30d → 90d
for d in (7, 30, 90):
rows = pick(d)
if rows:
return rows
# Fallback: any time, same ordering logic
cursor.execute("""
WITH candidates AS (
SELECT v.*
FROM videos v
WHERE v.codec IS NULL
AND v.status = 'active'
AND v.filepath IS NOT NULL
),
by_streamer AS (
SELECT username, site, SUM(size) AS total_mb
FROM candidates
GROUP BY username, site
),
ordered AS (
SELECT c.*,
bs.total_mb,
CASE WHEN c.size < %s THEN 0 ELSE 1 END AS small_first
FROM candidates c
JOIN by_streamer bs
ON bs.username = c.username
AND bs.site = c.site
)
SELECT *
FROM ordered
ORDER BY
total_mb DESC,
small_first ASC,
size DESC,
created_at DESC;
""", (small_mb,))
return cursor.fetchall()
def smart_choice_by_count(cursor, small_mb=250):
"""
Returns candidate videos ordered by:
1) time window: 7d, then 30d, then 90d, then fallback (any time)
2) streamer priority: COUNT(*) per (username, site) DESC within the window
3) small (< small_mb MB) first, then big
4) inside each group: size DESC, then created_at DESC
NOTE: 'size' is stored in MB.
"""
def pick(days: int):
cursor.execute("""
WITH candidates AS (
SELECT v.*
FROM videos v
WHERE v.codec IS NULL
AND v.status = 'active'
AND v.filepath IS NOT NULL
AND v.created_at >= NOW() - make_interval(days => %s)
),
by_streamer AS (
SELECT username, site, COUNT(*) AS total_vids
FROM candidates
GROUP BY username, site
),
ordered AS (
SELECT c.*,
bs.total_vids,
CASE WHEN c.size < %s THEN 0 ELSE 1 END AS small_first
FROM candidates c
JOIN by_streamer bs
ON bs.username = c.username
AND bs.site = c.site
)
SELECT *
FROM ordered
ORDER BY
total_vids DESC, -- most videos first
small_first ASC, -- small (< small_mb) first
size DESC, -- then larger files first within group
created_at DESC; -- then newest
""", (days, small_mb))
return cursor.fetchall()
# Try windows: 7 → 30 → 90 days
for d in (7, 30, 90):
rows = pick(d)
if rows:
return rows
# Fallback: any time, same ordering
cursor.execute("""
WITH candidates AS (
SELECT v.*
FROM videos v
WHERE v.codec IS NULL
AND v.status = 'active'
AND v.filepath IS NOT NULL
),
by_streamer AS (
SELECT username, site, COUNT(*) AS total_vids
FROM candidates
GROUP BY username, site
),
ordered AS (
SELECT c.*,
bs.total_vids,
CASE WHEN c.size < %s THEN 0 ELSE 1 END AS small_first
FROM candidates c
JOIN by_streamer bs
ON bs.username = c.username
AND bs.site = c.site
)
SELECT *
FROM ordered
ORDER BY
total_vids DESC,
small_first ASC,
size DESC,
created_at DESC;
""", (small_mb,))
return cursor.fetchall()
def select_user_videos(username, cursor):
if username == "all":
cursor.execute("SELECT * FROM videos WHERE status = 'active' AND codec IS NULL ORDER BY size ASC")
else:
cursor.execute("SELECT * FROM videos WHERE username = %s AND status = 'active' AND codec IS NULL ORDER BY size ASC", (username,))
return cursor.fetchall()
def reencode_videos_av1():
conn, cursor = config.get_local_db_connection()
while True:
username = input("Enter username: ")
if username:
videos = select_user_videos(username, cursor)
else:
# videos = smart_choice(cursor)
videos = smart_choice_by_count(cursor)
with tqdm(videos, desc="Processing videos", unit="file") as pbar:
for video in videos:
input_path = video['filepath']
if not os.path.exists(input_path):
print(f"🚫 File not found: {input_path}")
continue
file_size_in_mb = os.path.getsize(input_path) / (1024 * 1024)
print(f"\nProcessing {os.path.basename(input_path)} ({file_size_in_mb:.2f} MB)...")
if file_size_in_mb < 1:
print("Video is too small. Skipping.")
os.remove(input_path)
cursor.execute("UPDATE videos SET status = 'deleted' WHERE id = %s", (video['id'],))
continue
# 2) Get current bitrate & resolution
current_bitrate, (width, height) = get_video_info(input_path)
if not current_bitrate:
print("Video's bitrate is not available. Skipping")
continue
target_bitrate = get_target_bitrate(width, height)
# If current bitrate <= target, it's not worth it to re-encode
if current_bitrate <= target_bitrate:
target_bitrate = current_bitrate
if is_av1(input_path):
print("Video is already encoded in AV1. Skipping")
update_codec_db(video['id'], 'av1')
continue
# 3) Re-encode
output_path = os.path.join('.temp', os.path.basename(input_path))
os.makedirs(os.path.dirname(output_path), exist_ok=True)
encoded = encode_video(input_path, output_path, target_bitrate)
if not encoded:
print("Encoding failed. Skipping.")
continue
# 4) Compare file sizes and replace if smaller
new_size = check_and_replace_if_smaller(input_path, output_path)
if new_size:
update_codec_db(video['id'], 'av1')
update_file_size_db(video['id'], new_size)
pbar.update(1)
if __name__ == "__main__":
reencode_videos_av1()