You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
372 lines
13 KiB
Python
372 lines
13 KiB
Python
import os, shutil, config
|
|
import ffmpeg
|
|
from tqdm import tqdm
|
|
|
|
def is_av1(filepath):
|
|
try:
|
|
probe = ffmpeg.probe(filepath)
|
|
for stream in probe['streams']:
|
|
if stream['codec_type'] == 'video' and 'codec_name' in stream:
|
|
if stream['codec_name'] == 'av1':
|
|
return True
|
|
except ffmpeg.Error as e:
|
|
print(f"Error probing {filepath}: {e}")
|
|
return False
|
|
|
|
def get_video_info(filepath):
|
|
try:
|
|
probe = ffmpeg.probe(filepath)
|
|
format_info = probe['format']
|
|
video_stream = next(
|
|
(stream for stream in probe['streams'] if stream['codec_type'] == 'video'),
|
|
None
|
|
)
|
|
if video_stream:
|
|
# Convert from bits/sec to kbps
|
|
bitrate_kbps = int(format_info['bit_rate']) // 1000
|
|
width = video_stream['width']
|
|
height = video_stream['height']
|
|
return bitrate_kbps, (width, height)
|
|
except ffmpeg.Error as e:
|
|
print(f"Error getting video info for {filepath}: {e}")
|
|
return None, (None, None)
|
|
|
|
def get_target_bitrate(width, height):
|
|
resolutions = {
|
|
(854, 480): 1000,
|
|
(1280, 720): 1500,
|
|
(1920, 1080): 3000,
|
|
(2560, 1440): 5000,
|
|
(3840, 2160): 12000
|
|
}
|
|
|
|
for res, bitrate in resolutions.items():
|
|
if width <= res[0] and height <= res[1]:
|
|
return bitrate
|
|
|
|
return 2500
|
|
|
|
def get_fps(filepath):
|
|
try:
|
|
probe = ffmpeg.probe(filepath)
|
|
video_stream = next((stream for stream in probe['streams'] if stream['codec_type'] == 'video'), None)
|
|
if video_stream and 'r_frame_rate' in video_stream:
|
|
fps_str = video_stream['r_frame_rate']
|
|
num, den = map(int, fps_str.split('/'))
|
|
fps = num / den
|
|
return fps
|
|
except ffmpeg.Error as e:
|
|
print(f"Error getting FPS for {filepath}: {e}")
|
|
return None
|
|
|
|
def encode_video(filepath, output_path, target_bitrate):
|
|
try:
|
|
fps = get_fps(filepath)
|
|
if fps is None:
|
|
print(f"Could not determine FPS for {filepath}.")
|
|
return False
|
|
|
|
keyframe_interval = int(fps) # Set the keyframe interval to match 1 second
|
|
|
|
# Calculate 1.5x for max bitrate
|
|
max_bitrate = int(1.5 * target_bitrate)
|
|
|
|
print(f" Encoding {filepath} to AV1 at {target_bitrate} kbps...")
|
|
(
|
|
ffmpeg
|
|
.input(filepath)
|
|
.output(
|
|
output_path,
|
|
vcodec='av1_nvenc',
|
|
format='mp4',
|
|
b=f"{target_bitrate}k",
|
|
maxrate=f"{max_bitrate}k",
|
|
bufsize=f"{max_bitrate}k",
|
|
preset='p5',
|
|
g=keyframe_interval
|
|
)
|
|
.run(
|
|
overwrite_output=True, quiet=True
|
|
)
|
|
)
|
|
|
|
print(f" Finished encoding {os.path.basename(filepath)} to AV1 at {target_bitrate} kbps (maxrate={max_bitrate} kbps).")
|
|
|
|
return True
|
|
except ffmpeg.Error as e:
|
|
print(f" Error encoding {os.path.basename(filepath)} to AV1: {e}")
|
|
|
|
def check_and_replace_if_smaller(original_path, temp_output_path):
|
|
if not os.path.exists(temp_output_path):
|
|
print(f"[ERROR] Temp file {temp_output_path} not found. Skipping replacement...")
|
|
return
|
|
|
|
original_size = os.path.getsize(original_path)
|
|
processed_size = os.path.getsize(temp_output_path)
|
|
|
|
size_original_mb = original_size / (1024 * 1024)
|
|
size_processed_mb = processed_size / (1024 * 1024)
|
|
size_diff_perc = (1 - processed_size / original_size) * 100
|
|
size_diff_mb = size_original_mb - size_processed_mb
|
|
|
|
if processed_size >= original_size or size_diff_mb < 1:
|
|
os.remove(temp_output_path)
|
|
return False
|
|
else:
|
|
print(100*"=")
|
|
print(f" Re-encoded is smaller by {size_diff_perc:.2f}% ({size_diff_mb:.2f} MB). Replacing original.")
|
|
print(f" Original: {size_original_mb:.2f} MB \n Re-encoded: {size_processed_mb:.2f} MB.")
|
|
print(100*"=")
|
|
shutil.move(temp_output_path, original_path)
|
|
return size_processed_mb
|
|
|
|
def update_codec_db(video_id, codec):
|
|
conn, cursor = config.get_local_db_connection()
|
|
cursor.execute("UPDATE videos SET codec = %s WHERE id = %s", (codec, video_id))
|
|
conn.commit()
|
|
conn.close()
|
|
|
|
def update_file_size_db(video_id, size):
|
|
conn, cursor = config.get_local_db_connection()
|
|
cursor.execute("UPDATE videos SET size = %s WHERE id = %s", (size, video_id))
|
|
conn.commit()
|
|
conn.close()
|
|
|
|
def smart_choice(cursor, small_mb=250):
|
|
"""
|
|
Returns a list of candidate videos to encode, ordered by:
|
|
1) time window priority: 7d, then 30d, then 90d, then fallback (any time)
|
|
2) streamer priority: total MB per (username, site) DESC within the window
|
|
3) small (< small_mb MB) first, then big
|
|
4) inside each group: size DESC, then created_at DESC
|
|
NOTE: 'size' is stored in MB.
|
|
"""
|
|
|
|
def pick(days: int):
|
|
# Build the prioritized list for a given window
|
|
cursor.execute("""
|
|
WITH candidates AS (
|
|
SELECT v.*
|
|
FROM videos v
|
|
WHERE v.codec IS NULL
|
|
AND v.status <> 'missing'
|
|
AND v.filepath IS NOT NULL
|
|
AND v.created_at >= NOW() - make_interval(days => %s)
|
|
),
|
|
by_streamer AS (
|
|
SELECT username, site, SUM(size) AS total_mb
|
|
FROM candidates
|
|
GROUP BY username, site
|
|
),
|
|
ordered AS (
|
|
SELECT c.*,
|
|
bs.total_mb,
|
|
CASE WHEN c.size < %s THEN 0 ELSE 1 END AS small_first
|
|
FROM candidates c
|
|
JOIN by_streamer bs
|
|
ON bs.username = c.username
|
|
AND bs.site = c.site
|
|
)
|
|
SELECT *
|
|
FROM ordered
|
|
ORDER BY
|
|
total_mb DESC, -- top streamers first
|
|
small_first ASC, -- small (< small_mb) first
|
|
size ASC, -- then bigger files first inside each group
|
|
created_at DESC; -- then newest
|
|
""", (days, small_mb))
|
|
return cursor.fetchall()
|
|
|
|
# Try 7d → 30d → 90d
|
|
for d in (7, 30, 90):
|
|
rows = pick(d)
|
|
if rows:
|
|
return rows
|
|
|
|
# Fallback: any time, same ordering logic
|
|
cursor.execute("""
|
|
WITH candidates AS (
|
|
SELECT v.*
|
|
FROM videos v
|
|
WHERE v.codec IS NULL
|
|
AND v.status <> 'missing'
|
|
AND v.filepath IS NOT NULL
|
|
),
|
|
by_streamer AS (
|
|
SELECT username, site, SUM(size) AS total_mb
|
|
FROM candidates
|
|
GROUP BY username, site
|
|
),
|
|
ordered AS (
|
|
SELECT c.*,
|
|
bs.total_mb,
|
|
CASE WHEN c.size < %s THEN 0 ELSE 1 END AS small_first
|
|
FROM candidates c
|
|
JOIN by_streamer bs
|
|
ON bs.username = c.username
|
|
AND bs.site = c.site
|
|
)
|
|
SELECT *
|
|
FROM ordered
|
|
ORDER BY
|
|
total_mb DESC,
|
|
small_first ASC,
|
|
size DESC,
|
|
created_at DESC;
|
|
""", (small_mb,))
|
|
return cursor.fetchall()
|
|
|
|
def smart_choice_by_count(cursor, small_mb=250):
|
|
"""
|
|
Returns candidate videos ordered by:
|
|
1) time window: 7d, then 30d, then 90d, then fallback (any time)
|
|
2) streamer priority: COUNT(*) per (username, site) DESC within the window
|
|
3) small (< small_mb MB) first, then big
|
|
4) inside each group: size DESC, then created_at DESC
|
|
NOTE: 'size' is stored in MB.
|
|
"""
|
|
def pick(days: int):
|
|
cursor.execute("""
|
|
WITH candidates AS (
|
|
SELECT v.*
|
|
FROM videos v
|
|
WHERE v.codec IS NULL
|
|
AND v.status <> 'missing'
|
|
AND v.filepath IS NOT NULL
|
|
AND v.created_at >= NOW() - make_interval(days => %s)
|
|
),
|
|
by_streamer AS (
|
|
SELECT username, site, COUNT(*) AS total_vids
|
|
FROM candidates
|
|
GROUP BY username, site
|
|
),
|
|
ordered AS (
|
|
SELECT c.*,
|
|
bs.total_vids,
|
|
CASE WHEN c.size < %s THEN 0 ELSE 1 END AS small_first
|
|
FROM candidates c
|
|
JOIN by_streamer bs
|
|
ON bs.username = c.username
|
|
AND bs.site = c.site
|
|
)
|
|
SELECT *
|
|
FROM ordered
|
|
ORDER BY
|
|
total_vids DESC, -- most videos first
|
|
small_first ASC, -- small (< small_mb) first
|
|
size DESC, -- then larger files first within group
|
|
created_at DESC; -- then newest
|
|
""", (days, small_mb))
|
|
return cursor.fetchall()
|
|
|
|
# Try windows: 7 → 30 → 90 days
|
|
for d in (7, 30, 90):
|
|
rows = pick(d)
|
|
if rows:
|
|
return rows
|
|
|
|
# Fallback: any time, same ordering
|
|
cursor.execute("""
|
|
WITH candidates AS (
|
|
SELECT v.*
|
|
FROM videos v
|
|
WHERE v.codec IS NULL
|
|
AND v.status <> 'missing'
|
|
AND v.filepath IS NOT NULL
|
|
),
|
|
by_streamer AS (
|
|
SELECT username, site, COUNT(*) AS total_vids
|
|
FROM candidates
|
|
GROUP BY username, site
|
|
),
|
|
ordered AS (
|
|
SELECT c.*,
|
|
bs.total_vids,
|
|
CASE WHEN c.size < %s THEN 0 ELSE 1 END AS small_first
|
|
FROM candidates c
|
|
JOIN by_streamer bs
|
|
ON bs.username = c.username
|
|
AND bs.site = c.site
|
|
)
|
|
SELECT *
|
|
FROM ordered
|
|
ORDER BY
|
|
total_vids DESC,
|
|
small_first ASC,
|
|
size DESC,
|
|
created_at DESC;
|
|
""", (small_mb,))
|
|
return cursor.fetchall()
|
|
|
|
def select_user_videos(username, cursor):
|
|
if username == "all":
|
|
cursor.execute("SELECT * FROM videos WHERE status != 'missing' AND codec IS NULL ORDER BY size ASC")
|
|
else:
|
|
cursor.execute("SELECT * FROM videos WHERE username = %s AND status != 'missing' AND codec IS NULL ORDER BY size ASC", (username,))
|
|
return cursor.fetchall()
|
|
|
|
def reencode_videos_av1():
|
|
conn, cursor = config.get_local_db_connection()
|
|
|
|
while True:
|
|
username = input("Enter username: ")
|
|
if username:
|
|
videos = select_user_videos(username, cursor)
|
|
else:
|
|
# videos = smart_choice(cursor)
|
|
videos = smart_choice_by_count(cursor)
|
|
|
|
with tqdm(videos, desc="Processing videos", unit="file") as pbar:
|
|
for video in videos:
|
|
input_path = video['filepath']
|
|
|
|
if not os.path.exists(input_path):
|
|
print(f"🚫 File not found: {input_path}")
|
|
continue
|
|
|
|
file_size_in_mb = os.path.getsize(input_path) / (1024 * 1024)
|
|
print(f"\nProcessing {os.path.basename(input_path)} ({file_size_in_mb:.2f} MB)...")
|
|
|
|
if file_size_in_mb < 1:
|
|
print("Video is too small. Skipping.")
|
|
os.remove(input_path)
|
|
cursor.execute("UPDATE videos SET status = 'deleted' WHERE id = %s", (video['id'],))
|
|
continue
|
|
|
|
# 2) Get current bitrate & resolution
|
|
current_bitrate, (width, height) = get_video_info(input_path)
|
|
if not current_bitrate:
|
|
print("Video's bitrate is not available. Skipping")
|
|
continue
|
|
|
|
target_bitrate = get_target_bitrate(width, height)
|
|
|
|
# If current bitrate <= target, it's not worth it to re-encode
|
|
if current_bitrate <= target_bitrate:
|
|
target_bitrate = current_bitrate
|
|
|
|
if is_av1(input_path):
|
|
print("Video is already encoded in AV1. Skipping")
|
|
update_codec_db(video['id'], 'av1')
|
|
continue
|
|
|
|
# 3) Re-encode
|
|
output_path = os.path.join('.temp', os.path.basename(input_path))
|
|
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
|
encoded = encode_video(input_path, output_path, target_bitrate)
|
|
|
|
if not encoded:
|
|
print("Encoding failed. Skipping.")
|
|
continue
|
|
|
|
# 4) Compare file sizes and replace if smaller
|
|
new_size = check_and_replace_if_smaller(input_path, output_path)
|
|
if new_size:
|
|
update_codec_db(video['id'], 'av1')
|
|
update_file_size_db(video['id'], new_size)
|
|
|
|
pbar.update(1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
reencode_videos_av1() |