cleanup to only be the flask app
parent
9933768c38
commit
4c6bc25398
@ -1,43 +0,0 @@
|
||||
import os
|
||||
from config import get_local_db_connection
|
||||
from tqdm import tqdm
|
||||
import shutil
|
||||
|
||||
RECORDER_DIR = 'E:/streamaster/streamaster/downloaded/'
|
||||
ARCHIVE_DIR = 'U:/streamaster/streams/'
|
||||
|
||||
|
||||
def get_all_video_files():
|
||||
files = {}
|
||||
for root, _, filenames in os.walk(RECORDER_DIR):
|
||||
for filename in filenames:
|
||||
if filename.endswith(".mp4"):
|
||||
video_id = filename.split(".")[0]
|
||||
files[video_id] = os.path.join(root, filename)
|
||||
return files
|
||||
|
||||
if __name__ == '__main__':
|
||||
conn, cursor = get_local_db_connection()
|
||||
|
||||
downloaded_videos = get_all_video_files()
|
||||
|
||||
# for every video in downloaded_videos, re-create the folder it would have been in, int he archive directory
|
||||
# if it exists there already, check if its corrupted. if so, then move the one in the downloaded to the archive dir
|
||||
# because it failed during moving in the other script
|
||||
|
||||
|
||||
for video_id in tqdm(downloaded_videos.keys(), desc="Checking for failed videos..."):
|
||||
video_path = downloaded_videos[video_id]
|
||||
|
||||
try:
|
||||
cursor.execute("SELECT username FROM videos WHERE video_id = %s", (video_id,))
|
||||
username = cursor.fetchone()['username']
|
||||
except:
|
||||
print(f"Video {video_id} does not exist in the database")
|
||||
continue
|
||||
|
||||
archive_path = os.path.join(ARCHIVE_DIR, username, video_path.replace(RECORDER_DIR, ''))
|
||||
|
||||
if os.path.exists(archive_path):
|
||||
print(f"Video {video_id} already exists in the archive directory")
|
||||
shutil.move(video_path, archive_path)
|
||||
@ -1,291 +0,0 @@
|
||||
import os
|
||||
from config import get_local_db_connection
|
||||
from funcs import get_duration, get_file_size_in_mb, calculate_file_hash
|
||||
from tqdm import tqdm
|
||||
|
||||
import os, hashlib, subprocess, shutil
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
EDITED_DIR = "edited/"
|
||||
THUMB_DIR = "static/thumbnails"
|
||||
THUMB_WIDTH = 640
|
||||
FF_QUALITY = "80"
|
||||
|
||||
RECORDER_DIR = 'E:/streamaster/streamaster/downloaded/'
|
||||
ARCHIVE_DIR = 'U:/streamaster/streams/'
|
||||
CONCATED_DIR = 'concated/'
|
||||
|
||||
VIDEO_DIRS = [
|
||||
RECORDER_DIR,
|
||||
ARCHIVE_DIR
|
||||
]
|
||||
|
||||
def get_all_video_files():
|
||||
files = {}
|
||||
for base in VIDEO_DIRS:
|
||||
for root, _, filenames in os.walk(base):
|
||||
for filename in filenames:
|
||||
if filename.endswith(".mp4"):
|
||||
video_id = filename.split(".")[0]
|
||||
files[video_id] = os.path.join(root, filename)
|
||||
return files
|
||||
|
||||
def find_video_path(video_id: str):
|
||||
return all_videos[video_id] if video_id in all_videos else None
|
||||
|
||||
def mark_missing_videos(cursor, conn):
|
||||
cursor.execute("SELECT video_id, filepath FROM videos WHERE status = 'active'")
|
||||
videos = cursor.fetchall()
|
||||
|
||||
with tqdm(videos, desc="Scanning for missing videos...") as pbar:
|
||||
for vid in videos:
|
||||
video_id, filepath = vid['video_id'], vid['filepath']
|
||||
if not find_video_path(video_id):
|
||||
print(f"🚫 Missing: {video_id}")
|
||||
cursor.execute("UPDATE videos SET status = 'missing' WHERE video_id = %s", (video_id,))
|
||||
conn.commit()
|
||||
pbar.update(1)
|
||||
|
||||
def update_video_paths(cursor, conn):
|
||||
cursor.execute("SELECT id, filepath, status, video_id FROM videos")
|
||||
videos = cursor.fetchall()
|
||||
|
||||
with tqdm(videos, desc="Updating filepaths...") as pbar:
|
||||
for vid in videos:
|
||||
path = find_video_path(vid['video_id'])
|
||||
|
||||
if not path:
|
||||
continue
|
||||
|
||||
path = path.replace("\\", "/")
|
||||
if path == vid['filepath']:
|
||||
continue
|
||||
|
||||
cursor.execute("UPDATE videos SET filepath = %s, status = 'active' WHERE id = %s", (path, vid['id']))
|
||||
conn.commit()
|
||||
pbar.update(1)
|
||||
|
||||
def fill_missing_hashes(cursor, conn):
|
||||
cursor.execute("SELECT video_id, filepath FROM videos WHERE (hash IS NULL OR hash = '') AND status = 'active'")
|
||||
videos = cursor.fetchall()
|
||||
|
||||
with tqdm(videos, desc="Updating hashes...") as pbar:
|
||||
for vid in videos:
|
||||
video_id, filepath = vid.values()
|
||||
if filepath and os.path.exists(filepath):
|
||||
h = calculate_file_hash(filepath)
|
||||
cursor.execute("UPDATE videos SET hash = %s WHERE video_id = %s", (h, video_id))
|
||||
conn.commit()
|
||||
pbar.update(1)
|
||||
|
||||
def fill_missing_sizes(cursor, conn):
|
||||
cursor.execute("SELECT video_id, filepath FROM videos WHERE size = 0 AND status = 'active'")
|
||||
videos = cursor.fetchall()
|
||||
|
||||
with tqdm(videos, desc="Updating sizes...") as pbar:
|
||||
for vid in videos:
|
||||
video_id, filepath = vid['video_id'], vid['filepath']
|
||||
if filepath and os.path.exists(filepath):
|
||||
size = get_file_size_in_mb(filepath)
|
||||
cursor.execute("UPDATE videos SET size = %s WHERE video_id = %s", (size, video_id))
|
||||
conn.commit()
|
||||
pbar.update(1)
|
||||
|
||||
def fill_missing_durations(cursor, conn):
|
||||
cursor.execute("SELECT video_id, filepath FROM videos WHERE duration = 0 AND status = 'active' ORDER BY size ASC")
|
||||
videos = cursor.fetchall()
|
||||
|
||||
with tqdm(videos, desc="Updating durations...") as pbar:
|
||||
for vid in videos:
|
||||
video_id, filepath = vid.values()
|
||||
if filepath and os.path.exists(filepath):
|
||||
duration = get_duration(filepath)
|
||||
if duration <= 0:
|
||||
print(f"🚫 Failed to get duration for {filepath}")
|
||||
os.remove(filepath)
|
||||
continue
|
||||
cursor.execute("UPDATE videos SET duration = %s WHERE video_id = %s", (duration, video_id))
|
||||
conn.commit()
|
||||
pbar.update(1)
|
||||
|
||||
def map_gender(gender):
|
||||
genders = {
|
||||
'woman': 'Female',
|
||||
'couple': 'Couple',
|
||||
'trans': 'Trans',
|
||||
'a man': 'Male'
|
||||
}
|
||||
|
||||
for g in genders:
|
||||
if g in gender:
|
||||
return genders[g]
|
||||
print(f"🚫 Failed to map gender: {gender}")
|
||||
return None
|
||||
|
||||
def fill_missing_gender(cursor, conn):
|
||||
def get_data(username):
|
||||
import requests
|
||||
url = f"https://chaturbate.com/api/biocontext/{username}"
|
||||
try:
|
||||
data = requests.get(url)
|
||||
data = data.json()
|
||||
if 'status' in data:
|
||||
if data['status'] == 401:
|
||||
return False
|
||||
except:
|
||||
return False
|
||||
return data
|
||||
|
||||
cursor.execute("SELECT DISTINCT username, site FROM videos WHERE gender IS NULL AND status = 'active'")
|
||||
videos = cursor.fetchall()
|
||||
|
||||
api_fetches = 10
|
||||
with tqdm(videos, desc="Updating genders...") as pbar:
|
||||
for vid in videos:
|
||||
username, site = vid.values()
|
||||
cursor.execute("SELECT gender FROM videos WHERE username = %s AND site = %s AND gender IS NOT NULL LIMIT 1", (username, site))
|
||||
gender = cursor.fetchone()
|
||||
if gender:
|
||||
gender_str = gender['gender']
|
||||
else:
|
||||
if api_fetches <= 0:
|
||||
continue
|
||||
data = get_data(username)
|
||||
api_fetches -= 1
|
||||
if not data:
|
||||
continue
|
||||
gender = map_gender(data['sex'])
|
||||
if not gender:
|
||||
continue
|
||||
gender_str = gender
|
||||
|
||||
cursor.execute("UPDATE videos SET gender = %s WHERE username = %s AND site = %s", (gender_str, username, site))
|
||||
conn.commit()
|
||||
print(f"[{cursor.rowcount}] ✅ Updated gender for {username} on {site}")
|
||||
pbar.update(1)
|
||||
|
||||
def generate_thumbnails_for_videos(cursor, conn):
|
||||
cursor.execute("SELECT video_id, filepath FROM videos WHERE status = 'active' AND thumbnail IS NULL")
|
||||
videos = cursor.fetchall()
|
||||
|
||||
tasks = []
|
||||
with tqdm(videos, desc="Generating thumbnails...") as pbar:
|
||||
for v in videos:
|
||||
video_id = v.get("video_id")
|
||||
filepath = v.get("filepath")
|
||||
|
||||
if not filepath:
|
||||
continue
|
||||
|
||||
if not os.path.exists(filepath):
|
||||
continue
|
||||
|
||||
thumb_path = _hashed_thumb_path(video_id)
|
||||
if not os.path.exists(thumb_path):
|
||||
tasks.append((filepath, thumb_path))
|
||||
|
||||
v["thumbnail"] = thumb_path
|
||||
pbar.update(1)
|
||||
|
||||
if tasks:
|
||||
with ThreadPoolExecutor(max_workers=os.cpu_count() * 2) as exe:
|
||||
list(exe.map(lambda t: subprocess.run(
|
||||
_gen_thumb_cmd(*t),
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL
|
||||
), tasks))
|
||||
|
||||
for v in videos:
|
||||
if 'thumbnail' not in v:
|
||||
continue
|
||||
v['thumbnail'] = v['thumbnail'].replace("\\", "/")
|
||||
cursor.execute("UPDATE videos SET thumbnail = %s WHERE video_id = %s", (v['thumbnail'], v['video_id']))
|
||||
conn.commit()
|
||||
|
||||
def _hashed_thumb_path(video_id: str):
|
||||
h = hashlib.md5(video_id.encode()).hexdigest()
|
||||
sub1, sub2 = h[:2], h[2:4]
|
||||
path = os.path.join(THUMB_DIR, sub1, sub2)
|
||||
os.makedirs(path, exist_ok=True)
|
||||
return os.path.join(path, f"{video_id}.webp")
|
||||
|
||||
def _gen_thumb_cmd(src: str, dest: str):
|
||||
return [
|
||||
"ffmpeg", "-y", "-loglevel", "error",
|
||||
"-ss", "0", "-i", src,
|
||||
"-vframes", "1",
|
||||
"-vf", f"thumbnail,scale={THUMB_WIDTH}:-1",
|
||||
"-q:v", FF_QUALITY,
|
||||
dest
|
||||
]
|
||||
|
||||
def move_edited_videos(cursor, conn):
|
||||
edited_videos = [f for f in os.listdir(EDITED_DIR) if os.path.isfile(os.path.join(EDITED_DIR, f)) and f.endswith(".mp4")]
|
||||
|
||||
with tqdm(edited_videos, desc="Moving edited videos...") as pbar:
|
||||
for filename in edited_videos:
|
||||
edited_path = os.path.join(EDITED_DIR, filename)
|
||||
video_id = filename.split(".")[0]
|
||||
|
||||
cursor.execute("SELECT filepath, username FROM videos WHERE video_id = %s", (video_id,))
|
||||
video = cursor.fetchone()
|
||||
|
||||
if not video:
|
||||
continue
|
||||
|
||||
video_path = video['filepath']
|
||||
if not os.path.exists(video_path):
|
||||
video_path = os.path.join(ARCHIVE_DIR, video['username'], filename)
|
||||
|
||||
file_size = get_file_size_in_mb(edited_path)
|
||||
cursor.execute("UPDATE videos SET size = %s WHERE video_id = %s", (file_size, video_id))
|
||||
conn.commit()
|
||||
|
||||
shutil.move(edited_path, video_path)
|
||||
pbar.update(1)
|
||||
|
||||
def move_concated_videos(cursor, conn):
|
||||
concated_videos = [f for f in os.listdir(CONCATED_DIR) if os.path.isfile(os.path.join(CONCATED_DIR, f)) and f.endswith(".mp4")]
|
||||
concated_videos = sorted(concated_videos, key=lambda f: os.path.getsize(os.path.join(CONCATED_DIR, f)))
|
||||
|
||||
with tqdm(concated_videos, desc="Moving concated videos...") as pbar:
|
||||
for filename in concated_videos:
|
||||
edited_path = os.path.join(CONCATED_DIR, filename)
|
||||
video_id = filename.split(".")[0]
|
||||
|
||||
cursor.execute("SELECT filepath, username FROM videos WHERE video_id = %s", (video_id,))
|
||||
video = cursor.fetchone()
|
||||
|
||||
if not video:
|
||||
continue
|
||||
|
||||
video_path = video['filepath']
|
||||
if not os.path.exists(video_path):
|
||||
video_path = os.path.join(ARCHIVE_DIR, video['username'], filename)
|
||||
|
||||
file_size = get_file_size_in_mb(edited_path)
|
||||
cursor.execute("UPDATE videos SET size = %s, status = 'concated' WHERE video_id = %s", (file_size, video_id))
|
||||
conn.commit()
|
||||
|
||||
shutil.move(edited_path, video_path)
|
||||
pbar.update(1)
|
||||
|
||||
if __name__ == '__main__':
|
||||
conn, cursor = get_local_db_connection()
|
||||
|
||||
all_videos = get_all_video_files()
|
||||
update_video_paths(cursor, conn)
|
||||
mark_missing_videos(cursor, conn)
|
||||
|
||||
generate_thumbnails_for_videos(cursor, conn)
|
||||
fill_missing_sizes(cursor, conn)
|
||||
fill_missing_durations(cursor, conn)
|
||||
fill_missing_gender(cursor, conn)
|
||||
# fill_missing_hashes(cursor, conn)
|
||||
|
||||
move_edited_videos(cursor, conn)
|
||||
move_concated_videos(cursor, conn)
|
||||
|
||||
cursor.close()
|
||||
conn.close()
|
||||
print("✅ All cleanup tasks completed.")
|
||||
@ -1,53 +0,0 @@
|
||||
from funcs import group_videos, group_for_concatenation_simple
|
||||
from concat_helper import concatenate_videos
|
||||
import os, config, shutil
|
||||
|
||||
MOVE_FUCKED = False
|
||||
sort_type = {"size": lambda x: sum([video['size'] for video in x]),"count": lambda x: len(x)}
|
||||
|
||||
def get_videos(cursor, username=None):
|
||||
if username:
|
||||
cursor.execute("SELECT * FROM videos WHERE username = %s AND status = 'active';", (username,))
|
||||
return cursor.fetchall()
|
||||
|
||||
cursor.execute("SELECT * FROM videos WHERE status = 'active';")
|
||||
return cursor.fetchall()
|
||||
|
||||
def organize_videos():
|
||||
username = input("Enter username: ")
|
||||
|
||||
conn, cursor = config.get_local_db_connection()
|
||||
videos = get_videos(cursor, username)
|
||||
|
||||
# process the videos
|
||||
video_data = group_videos(videos, sort_by="size", order="asc")
|
||||
|
||||
print("Grouping videos for concatenation...")
|
||||
# group all videos for concatation first.
|
||||
grouped_videos = []
|
||||
for user, videos in video_data.items():
|
||||
grouped_videos.extend(group_for_concatenation_simple(videos))
|
||||
|
||||
|
||||
sorted_processed_videos = sorted(grouped_videos, key=sort_type["count"], reverse=True)
|
||||
|
||||
# group the videos for concatenation
|
||||
for video_list in sorted_processed_videos:
|
||||
print(100*"=")
|
||||
print("\n"*2)
|
||||
|
||||
video_id = video_list[0]['video_id']
|
||||
videos_sum_size = sum([video['size'] for video in video_list])
|
||||
|
||||
print(f"Group {video_id} has {len(video_list)} videos and total size of {videos_sum_size} MB")
|
||||
main_video = concatenate_videos(video_list, reencode_concate=True)
|
||||
|
||||
if main_video:
|
||||
print(f"Processed {len(video_list)} input videos into {main_video["filepath"]} output video.")
|
||||
continue
|
||||
|
||||
print(f"Failed to process {len(video_list)} input videos into output video.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
organize_videos()
|
||||
@ -1,451 +0,0 @@
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from video_manager import get_duration
|
||||
import os, json, subprocess, shutil
|
||||
|
||||
|
||||
def is_file_empty(filepath):
|
||||
return os.stat(filepath).st_size == 0
|
||||
|
||||
def format_datetime(datetime_str):
|
||||
"""Format the datetime string to a more readable format."""
|
||||
return datetime.strptime(datetime_str, "%Y-%m-%d %H:%M:%S")
|
||||
|
||||
def get_file_size_in_mb(file_path):
|
||||
return os.path.getsize(file_path) / (1024 ** 2)
|
||||
|
||||
def get_file_size_gb(file_path):
|
||||
return os.path.getsize(file_path) / 1024 / 1024 / 1024
|
||||
|
||||
def get_data(data_path):
|
||||
try:
|
||||
with open(data_path, 'r') as file:
|
||||
data = json.load(file)
|
||||
return data
|
||||
except Exception as e:
|
||||
print(f"Error loading {data_path}: {e}")
|
||||
return None
|
||||
|
||||
def update_video_data(dataPath, data):
|
||||
"""Update or create a JSON file for the video metadata."""
|
||||
if os.path.exists(dataPath):
|
||||
with open(dataPath, "r") as f:
|
||||
existing_data = json.load(f)
|
||||
|
||||
if existing_data == data:
|
||||
return # No update needed if data hasn't changed.
|
||||
|
||||
data["updated_at"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
|
||||
with open(dataPath, "w") as f:
|
||||
json.dump(data, f) # Write to file if new or if data has changed.
|
||||
|
||||
|
||||
def is_recent(updated_at, minutes=30):
|
||||
updated_at = updated_at.replace(tzinfo=timezone.utc)
|
||||
now = datetime.now(timezone.utc)
|
||||
return now - updated_at < timedelta(minutes=minutes)
|
||||
|
||||
|
||||
def is_file_size_bigger_than(file_size_in_mb, max_size_gb):
|
||||
"""Check if the file size is bigger than the specified max size in GB."""
|
||||
max_size_megabytes = max_size_gb * 1024 # Convert GB to MB
|
||||
return file_size_in_mb > max_size_megabytes
|
||||
|
||||
|
||||
def cleanup_data_files(folder_path):
|
||||
videos = [video for video in os.listdir(folder_path) if video.endswith(".json")]
|
||||
for filename in videos:
|
||||
json_path = os.path.join(folder_path, filename)
|
||||
video_path = json_path.replace(".json", ".mp4")
|
||||
if not os.path.exists(video_path):
|
||||
os.remove(json_path)
|
||||
|
||||
|
||||
def get_video_data(videoPath):
|
||||
with open(videoPath, "r") as f:
|
||||
data = json.load(f)
|
||||
return data
|
||||
|
||||
|
||||
def get_videos(folder_path):
|
||||
"""Retrieve video metadata from the JSON files in a specified folder."""
|
||||
video_list = []
|
||||
|
||||
# List all .mp4 files and their corresponding .json metadata files
|
||||
videos = [f for f in os.listdir(folder_path) if f.endswith(".mp4")]
|
||||
|
||||
for video_filename in videos:
|
||||
video_path = os.path.join(folder_path, video_filename)
|
||||
json_path = video_path.replace(".mp4", ".json")
|
||||
|
||||
if not os.path.exists(json_path):
|
||||
continue
|
||||
|
||||
data = get_video_data(json_path)
|
||||
data['size'] = get_file_size_in_mb(video_path) # Include size in MB for further processing
|
||||
data['filepath'] = video_path
|
||||
|
||||
video_list.append(data)
|
||||
|
||||
return video_list
|
||||
|
||||
|
||||
def group_videos(video_list, sort_by="count", order="desc"):
|
||||
"""Group video data by username and site, and sort the groups by video creation time."""
|
||||
video_data = {}
|
||||
is_desc = order == "desc"
|
||||
|
||||
for video in video_list:
|
||||
key = (video["username"], video["site"])
|
||||
if key not in video_data:
|
||||
video_data[key] = []
|
||||
video_data[key].append(video)
|
||||
|
||||
# Ensure videos for each user and site are sorted by creation date
|
||||
for key in video_data:
|
||||
video_data[key].sort(key=lambda x: (x["created_at"]))
|
||||
|
||||
# Further sort groups if required based on size or count
|
||||
if sort_by == "size":
|
||||
video_data = dict(sorted(video_data.items(), key=lambda x: sum(item['size'] for item in x[1]), reverse=is_desc))
|
||||
elif sort_by == "count":
|
||||
video_data = dict(sorted(video_data.items(), key=lambda x: len(x[1]), reverse=is_desc))
|
||||
|
||||
return video_data
|
||||
|
||||
|
||||
def process_videos(video_data):
|
||||
processed_videos = []
|
||||
failed_directory = "failed"
|
||||
|
||||
for video in video_data:
|
||||
is_updated = False
|
||||
video_path = video["filepath"]
|
||||
data_path = video["jsonpath"]
|
||||
|
||||
filesize = get_file_size_in_mb(video_path)
|
||||
if 'size' not in video or video['size'] != filesize:
|
||||
video['size'] = filesize
|
||||
is_updated = True
|
||||
|
||||
if is_updated and 'duration' not in video:
|
||||
video['duration'] = get_duration(video_path)
|
||||
is_updated = True
|
||||
|
||||
# Move corrupted videos to the failed folder
|
||||
if video['duration'] == 0:
|
||||
print(f"{video['video_id']} is corrupted, moving to failed folder")
|
||||
failed_video_path = os.path.join(failed_directory, video["video_id"] + ".mp4")
|
||||
failed_data_path = failed_video_path.replace(".mp4", ".json")
|
||||
|
||||
shutil.move(video_path, failed_video_path)
|
||||
shutil.move(data_path, failed_data_path)
|
||||
|
||||
continue # Skip further processing for this video
|
||||
|
||||
if is_updated:
|
||||
update_video_data(data_path, video)
|
||||
|
||||
processed_videos.append(video)
|
||||
|
||||
return processed_videos
|
||||
|
||||
|
||||
def group_for_concatenation(videos, time_limit=30):
|
||||
"""
|
||||
Groups videos into lists where:
|
||||
- total group size <= 9GB (9216 MB),
|
||||
- time gap between consecutive videos <= time_limit minutes,
|
||||
- AND all have the same resolution/fps/codecs for no-reencode concat.
|
||||
"""
|
||||
concatenated_video_groups = []
|
||||
current_group = []
|
||||
current_size_mb = 0
|
||||
last_video_end = None
|
||||
reference_params = None # We'll store the 'ffprobe' params for the first video in each group
|
||||
|
||||
for video in videos:
|
||||
video_start = (video['created_at'])
|
||||
video_end = video_start + timedelta(seconds=video['duration'])
|
||||
|
||||
# Probe the video to get parameters
|
||||
video_path = video['filepath']
|
||||
params = get_video_params(video_path)
|
||||
if params is None:
|
||||
# If ffprobe fails, skip or handle the error
|
||||
print(f"Skipping {video_path}, failed to get ffprobe info.")
|
||||
continue
|
||||
|
||||
if current_group:
|
||||
# Check if adding this video breaks the size limit
|
||||
time_difference = (video_start - last_video_end).total_seconds() / 60
|
||||
size_exceeded = (current_size_mb + video['size'] > 9216)
|
||||
time_exceeded = (time_difference > time_limit)
|
||||
|
||||
# Check if the video parameters match the group's reference
|
||||
param_mismatch = False
|
||||
if reference_params:
|
||||
# Compare relevant fields
|
||||
for field in ['video_codec','width','height','pix_fmt','fps',
|
||||
'audio_codec','audio_sample_rate','audio_channels','audio_channel_layout']:
|
||||
if params[field] != reference_params[field]:
|
||||
param_mismatch = True
|
||||
break
|
||||
|
||||
# If we exceed size, exceed time gap, or mismatch in parameters => start new group
|
||||
if size_exceeded or time_exceeded or param_mismatch:
|
||||
concatenated_video_groups.append(current_group)
|
||||
current_group = []
|
||||
current_size_mb = 0
|
||||
reference_params = None # reset for new group
|
||||
|
||||
# If we're starting a new group, set reference parameters
|
||||
if not current_group:
|
||||
reference_params = params
|
||||
|
||||
# Add the current video to the group
|
||||
current_group.append(video)
|
||||
current_size_mb += video['size']
|
||||
last_video_end = video_end
|
||||
|
||||
# Add the last group if not empty
|
||||
if current_group:
|
||||
concatenated_video_groups.append(current_group)
|
||||
|
||||
# Optional: Ensure the last group is "ready" for upload based on time difference
|
||||
# (Your original logic that if last video was updated < time_limit minutes ago, remove the group)
|
||||
if concatenated_video_groups:
|
||||
last_group = concatenated_video_groups[-1]
|
||||
last_video = last_group[-1]
|
||||
last_updated_at = datetime.strptime(last_video['created_at'], "%Y-%m-%d %H:%M:%S")
|
||||
if datetime.now() - last_updated_at <= timedelta(minutes=time_limit):
|
||||
print(f"Last group is not ready for upload. Removing from final groups.")
|
||||
concatenated_video_groups.pop()
|
||||
|
||||
concatenated_video_groups = [group for group in concatenated_video_groups if len(group) > 1]
|
||||
|
||||
return concatenated_video_groups
|
||||
|
||||
|
||||
def get_video_params(video_path):
|
||||
"""
|
||||
Run ffprobe on a given video path to extract:
|
||||
- codec_name (video + audio)
|
||||
- width, height
|
||||
- pix_fmt
|
||||
- r_frame_rate (frame rate)
|
||||
- sample_rate, channel_layout (audio)
|
||||
Returns a dict with these parameters or None if there's an error.
|
||||
"""
|
||||
cmd = [
|
||||
'ffprobe', '-v', 'error',
|
||||
'-print_format', 'json',
|
||||
'-show_streams',
|
||||
'-show_format',
|
||||
video_path
|
||||
]
|
||||
|
||||
try:
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
||||
info = json.loads(result.stdout)
|
||||
|
||||
# We'll parse out the first video & audio streams we find.
|
||||
video_stream = next((s for s in info['streams'] if s['codec_type'] == 'video'), None)
|
||||
audio_stream = next((s for s in info['streams'] if s['codec_type'] == 'audio'), None)
|
||||
|
||||
if not video_stream:
|
||||
raise ValueError(f"No video stream found in {video_path}")
|
||||
|
||||
# Frame rate can be something like "30000/1001" - convert to float
|
||||
r_frame_rate = video_stream.get('r_frame_rate', '0/0')
|
||||
try:
|
||||
num, den = r_frame_rate.split('/')
|
||||
fps = float(num) / float(den) if float(den) != 0 else 0.0
|
||||
except:
|
||||
fps = 0.0
|
||||
|
||||
# Gather the key parameters
|
||||
params = {
|
||||
'video_codec': video_stream.get('codec_name', 'unknown'),
|
||||
'width': video_stream.get('width', 0),
|
||||
'height': video_stream.get('height', 0),
|
||||
'pix_fmt': video_stream.get('pix_fmt', 'unknown'),
|
||||
'fps': fps,
|
||||
'audio_codec': audio_stream.get('codec_name', 'none') if audio_stream else 'none',
|
||||
'audio_sample_rate': audio_stream.get('sample_rate', '0') if audio_stream else '0',
|
||||
'audio_channels': audio_stream.get('channels', 0) if audio_stream else 0,
|
||||
'audio_channel_layout': audio_stream.get('channel_layout', 'none') if audio_stream else 'none'
|
||||
}
|
||||
|
||||
return params
|
||||
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"Failed to run ffprobe on {video_path}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def generate_list_file(videos):
|
||||
directory = os.path.dirname(videos[0]["filepath"])
|
||||
list_filename = os.path.join(directory, f"{videos[0]['video_id']}.txt")
|
||||
with open(list_filename, "w") as list_file:
|
||||
for video in videos:
|
||||
list_file.write(f"file '{video['video_id']}.mp4'\n")
|
||||
return list_filename
|
||||
|
||||
|
||||
def concatenate_videos(grouped_videos, directory):
|
||||
"""Concatenate pre-grouped videos, updating metadata and managing file operations."""
|
||||
processed_videos = []
|
||||
|
||||
for group in grouped_videos:
|
||||
if len(group) == 1:
|
||||
processed_videos.append(group[0])
|
||||
continue
|
||||
|
||||
# Set up paths based on the first video in the group
|
||||
first_video = group[0]
|
||||
video_path = first_video["filepath"]
|
||||
data_path = video_path.replace(".mp4", ".json")
|
||||
temp_path = video_path.replace(".mp4", "_temp.mp4")
|
||||
|
||||
# Generate a list file for ffmpeg concatenation
|
||||
list_filename = generate_list_file(directory, group)
|
||||
|
||||
# Run ffmpeg to concatenate videos
|
||||
subprocess.run(["ffmpeg", "-f", "concat", "-safe", "0", "-i", list_filename, "-c", "copy", temp_path])
|
||||
|
||||
# Remove individual video files and their metadata
|
||||
[os.remove(v["filepath"]) for v in group]
|
||||
[os.remove(v["filepath"].replace(".mp4", ".json")) for v in group]
|
||||
os.remove(list_filename)
|
||||
|
||||
os.rename(temp_path, video_path)
|
||||
|
||||
# Update the metadata for the concatenated video
|
||||
first_video["filepath"] = video_path
|
||||
first_video["size"] = get_file_size_in_mb(video_path)
|
||||
first_video["duration"] = get_duration(video_path)
|
||||
update_video_data(data_path, first_video) # Ensure this function reflects the changes of concatenation
|
||||
processed_videos.append(first_video)
|
||||
|
||||
return processed_videos
|
||||
|
||||
def get_all_videos(directory):
|
||||
# find all .mp4 files in the directory and its subdirectories
|
||||
videos = []
|
||||
for root, dirs, files in os.walk(directory):
|
||||
for file in files:
|
||||
if file.endswith(".mp4"):
|
||||
videos.append(os.path.join(root, file))
|
||||
return videos
|
||||
|
||||
def get_all_data(directory):
|
||||
# finds all json files in the directory and its subdirectories
|
||||
data = []
|
||||
for root, dirs, files in os.walk(directory):
|
||||
for file in files:
|
||||
if file.endswith(".json"):
|
||||
data.append(os.path.join(root, file))
|
||||
return data
|
||||
|
||||
def match_data_to_video_fast(videos, data):
|
||||
data_dict = {os.path.splitext(os.path.basename(d))[0]: d for d in data}
|
||||
matched, unmatched = [], []
|
||||
for v in videos:
|
||||
video_id = os.path.splitext(os.path.basename(v))[0]
|
||||
if video_id in data_dict:
|
||||
matched.append((v, data_dict[video_id]))
|
||||
else:
|
||||
unmatched.append(v)
|
||||
return parse_video_data(matched), unmatched
|
||||
|
||||
def parse_video_data(matched_videos):
|
||||
"""Retrieve video metadata from the JSON files in a specified folder."""
|
||||
import tqdm
|
||||
video_list = []
|
||||
|
||||
with tqdm.tqdm(total=len(matched_videos), desc="Parsing video data") as pbar:
|
||||
for video in matched_videos:
|
||||
pbar.update(1)
|
||||
video_path, json_path = video
|
||||
|
||||
data = get_video_data(json_path)
|
||||
data['filepath'] = video_path
|
||||
data['jsonpath'] = json_path
|
||||
|
||||
video_list.append(data)
|
||||
|
||||
return video_list
|
||||
|
||||
def get_videos_matched(video_dirs, data_dirs):
|
||||
# get all videos
|
||||
videos = []
|
||||
for d in video_dirs:
|
||||
videos += get_all_videos(d)
|
||||
|
||||
# get all data
|
||||
data = []
|
||||
for d in data_dirs:
|
||||
data += get_all_data(d)
|
||||
|
||||
# match the data to the videos
|
||||
parsed_videos, unmatched = match_data_to_video_fast(videos, data)
|
||||
|
||||
return parsed_videos, unmatched
|
||||
|
||||
def calculate_file_hash(file_path):
|
||||
import hashlib
|
||||
with open(file_path, 'rb') as f:
|
||||
data = f.read()
|
||||
return hashlib.sha256(data).hexdigest()
|
||||
|
||||
def group_for_concatenation_simple(videos, time_limit=120):
|
||||
"""
|
||||
Groups videos into lists where:
|
||||
- total group size <= 9GB (9216 MB),
|
||||
- time gap between consecutive videos <= time_limit minutes,
|
||||
- AND all have the same resolution/fps/codecs for no-reencode concat.
|
||||
"""
|
||||
concatenated_video_groups = []
|
||||
current_group = []
|
||||
current_size_mb = 0
|
||||
last_video_end = None
|
||||
|
||||
for video in videos:
|
||||
video_start = video['created_at']
|
||||
video_end = video_start + timedelta(seconds=video['duration'])
|
||||
|
||||
if current_group:
|
||||
# Check if adding this video breaks the size limit
|
||||
time_difference = (video_start - last_video_end).total_seconds() / 60
|
||||
time_exceeded = (time_difference > time_limit)
|
||||
|
||||
# size_exceeded = (current_size_mb + video['size'] > 9216)
|
||||
|
||||
# If we exceed size, exceed time gap, or mismatch in parameters => start new group
|
||||
if time_exceeded: #or size_exceeded:
|
||||
concatenated_video_groups.append(current_group)
|
||||
current_group = []
|
||||
current_size_mb = 0
|
||||
|
||||
# Add the current video to the group
|
||||
current_group.append(video)
|
||||
current_size_mb += video['size']
|
||||
last_video_end = video_end
|
||||
|
||||
# Add the last group if not empty
|
||||
if current_group:
|
||||
concatenated_video_groups.append(current_group)
|
||||
|
||||
# Optional: Ensure the last group is "ready" for upload based on time difference
|
||||
if concatenated_video_groups:
|
||||
last_group = concatenated_video_groups[-1]
|
||||
last_video = last_group[-1]
|
||||
last_updated_at = last_video['created_at']
|
||||
if datetime.now() - last_updated_at <= timedelta(minutes=time_limit):
|
||||
print(f"Last group is not ready for upload. Removing from final groups.")
|
||||
concatenated_video_groups.pop()
|
||||
|
||||
concatenated_video_groups = [group for group in concatenated_video_groups if len(group) > 1]
|
||||
|
||||
return concatenated_video_groups
|
||||
@ -1,50 +0,0 @@
|
||||
import os, shutil, config
|
||||
from tqdm import tqdm
|
||||
|
||||
if __name__ == "__main__":
|
||||
output_dir = 'U:/streamaster/streams/'
|
||||
|
||||
conn, cursor = config.get_local_db_connection()
|
||||
cursor.execute("SELECT * FROM videos WHERE status = 'active' AND filepath NOT LIKE %s ORDER BY size ASC;", ("%" + output_dir + "%",))
|
||||
videos = cursor.fetchall()
|
||||
|
||||
# process the videos
|
||||
output_dir = "U:/streamaster/streams/"
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
total_size = int(sum([video['size'] for video in videos]))
|
||||
total_moved = 0
|
||||
|
||||
with tqdm(total=total_size, desc=f"Moved [{total_moved}/{len(videos)}] videos", unit="MB") as pbar:
|
||||
for video in videos:
|
||||
file_size_mb = int(video["size"]) if video["size"] >= 1 else 1
|
||||
pbar.update(file_size_mb)
|
||||
|
||||
username = video["username"]
|
||||
video_path = video["filepath"]
|
||||
|
||||
if not video_path:
|
||||
continue
|
||||
|
||||
user_folder = os.path.join(output_dir, username)
|
||||
video_name = os.path.basename(video_path)
|
||||
new_video_path = os.path.join(user_folder, video_name)
|
||||
|
||||
if os.path.exists(new_video_path):
|
||||
cursor.execute("UPDATE videos SET filepath = %s WHERE id = %s;", (new_video_path, video["id"],))
|
||||
conn.commit()
|
||||
continue
|
||||
|
||||
if not os.path.exists(video_path):
|
||||
continue
|
||||
|
||||
os.makedirs(user_folder, exist_ok=True)
|
||||
|
||||
# move the file to the new location
|
||||
shutil.move(video_path, new_video_path)
|
||||
|
||||
cursor.execute("UPDATE videos SET filepath = %s WHERE id = %s;", (new_video_path, video["id"],))
|
||||
conn.commit()
|
||||
|
||||
total_moved += 1
|
||||
pbar.desc = f"Moved [{total_moved}/{len(videos)}] videos"
|
||||
@ -1,372 +0,0 @@
|
||||
import os, shutil, config
|
||||
import ffmpeg
|
||||
from tqdm import tqdm
|
||||
|
||||
def is_av1(filepath):
|
||||
try:
|
||||
probe = ffmpeg.probe(filepath)
|
||||
for stream in probe['streams']:
|
||||
if stream['codec_type'] == 'video' and 'codec_name' in stream:
|
||||
if stream['codec_name'] == 'av1':
|
||||
return True
|
||||
except ffmpeg.Error as e:
|
||||
print(f"Error probing {filepath}: {e}")
|
||||
return False
|
||||
|
||||
def get_video_info(filepath):
|
||||
try:
|
||||
probe = ffmpeg.probe(filepath)
|
||||
format_info = probe['format']
|
||||
video_stream = next(
|
||||
(stream for stream in probe['streams'] if stream['codec_type'] == 'video'),
|
||||
None
|
||||
)
|
||||
if video_stream:
|
||||
# Convert from bits/sec to kbps
|
||||
bitrate_kbps = int(format_info['bit_rate']) // 1000
|
||||
width = video_stream['width']
|
||||
height = video_stream['height']
|
||||
return bitrate_kbps, (width, height)
|
||||
except ffmpeg.Error as e:
|
||||
print(f"Error getting video info for {filepath}: {e}")
|
||||
return None, (None, None)
|
||||
|
||||
def get_target_bitrate(width, height):
|
||||
resolutions = {
|
||||
(854, 480): 1000,
|
||||
(1280, 720): 1500,
|
||||
(1920, 1080): 3000,
|
||||
(2560, 1440): 5000,
|
||||
(3840, 2160): 12000
|
||||
}
|
||||
|
||||
for res, bitrate in resolutions.items():
|
||||
if width <= res[0] and height <= res[1]:
|
||||
return bitrate
|
||||
|
||||
return 2500
|
||||
|
||||
def get_fps(filepath):
|
||||
try:
|
||||
probe = ffmpeg.probe(filepath)
|
||||
video_stream = next((stream for stream in probe['streams'] if stream['codec_type'] == 'video'), None)
|
||||
if video_stream and 'r_frame_rate' in video_stream:
|
||||
fps_str = video_stream['r_frame_rate']
|
||||
num, den = map(int, fps_str.split('/'))
|
||||
fps = num / den
|
||||
return fps
|
||||
except ffmpeg.Error as e:
|
||||
print(f"Error getting FPS for {filepath}: {e}")
|
||||
return None
|
||||
|
||||
def encode_video(filepath, output_path, target_bitrate):
|
||||
try:
|
||||
fps = get_fps(filepath)
|
||||
if fps is None:
|
||||
print(f"Could not determine FPS for {filepath}.")
|
||||
return False
|
||||
|
||||
keyframe_interval = int(fps) # Set the keyframe interval to match 1 second
|
||||
|
||||
# Calculate 1.5x for max bitrate
|
||||
max_bitrate = int(1.5 * target_bitrate)
|
||||
|
||||
print(f" Encoding {filepath} to AV1 at {target_bitrate} kbps...")
|
||||
(
|
||||
ffmpeg
|
||||
.input(filepath)
|
||||
.output(
|
||||
output_path,
|
||||
vcodec='av1_nvenc',
|
||||
format='mp4',
|
||||
b=f"{target_bitrate}k",
|
||||
maxrate=f"{max_bitrate}k",
|
||||
bufsize=f"{max_bitrate}k",
|
||||
preset='p5',
|
||||
g=keyframe_interval
|
||||
)
|
||||
.run(
|
||||
overwrite_output=True, quiet=True
|
||||
)
|
||||
)
|
||||
|
||||
print(f" Finished encoding {os.path.basename(filepath)} to AV1 at {target_bitrate} kbps (maxrate={max_bitrate} kbps).")
|
||||
|
||||
return True
|
||||
except ffmpeg.Error as e:
|
||||
print(f" Error encoding {os.path.basename(filepath)} to AV1: {e}")
|
||||
|
||||
def check_and_replace_if_smaller(original_path, temp_output_path):
|
||||
if not os.path.exists(temp_output_path):
|
||||
print(f"[ERROR] Temp file {temp_output_path} not found. Skipping replacement...")
|
||||
return
|
||||
|
||||
original_size = os.path.getsize(original_path)
|
||||
processed_size = os.path.getsize(temp_output_path)
|
||||
|
||||
size_original_mb = original_size / (1024 * 1024)
|
||||
size_processed_mb = processed_size / (1024 * 1024)
|
||||
size_diff_perc = (1 - processed_size / original_size) * 100
|
||||
size_diff_mb = size_original_mb - size_processed_mb
|
||||
|
||||
if processed_size >= original_size or size_diff_mb < 1:
|
||||
os.remove(temp_output_path)
|
||||
return False
|
||||
else:
|
||||
print(100*"=")
|
||||
print(f" Re-encoded is smaller by {size_diff_perc:.2f}% ({size_diff_mb:.2f} MB). Replacing original.")
|
||||
print(f" Original: {size_original_mb:.2f} MB \n Re-encoded: {size_processed_mb:.2f} MB.")
|
||||
print(100*"=")
|
||||
shutil.move(temp_output_path, original_path)
|
||||
return size_processed_mb
|
||||
|
||||
def update_codec_db(video_id, codec):
|
||||
conn, cursor = config.get_local_db_connection()
|
||||
cursor.execute("UPDATE videos SET codec = %s WHERE id = %s", (codec, video_id))
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
def update_file_size_db(video_id, size):
|
||||
conn, cursor = config.get_local_db_connection()
|
||||
cursor.execute("UPDATE videos SET size = %s WHERE id = %s", (size, video_id))
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
def smart_choice(cursor, small_mb=250):
|
||||
"""
|
||||
Returns a list of candidate videos to encode, ordered by:
|
||||
1) time window priority: 7d, then 30d, then 90d, then fallback (any time)
|
||||
2) streamer priority: total MB per (username, site) DESC within the window
|
||||
3) small (< small_mb MB) first, then big
|
||||
4) inside each group: size DESC, then created_at DESC
|
||||
NOTE: 'size' is stored in MB.
|
||||
"""
|
||||
|
||||
def pick(days: int):
|
||||
# Build the prioritized list for a given window
|
||||
cursor.execute("""
|
||||
WITH candidates AS (
|
||||
SELECT v.*
|
||||
FROM videos v
|
||||
WHERE v.codec IS NULL
|
||||
AND v.status = 'active'
|
||||
AND v.filepath IS NOT NULL
|
||||
AND v.created_at >= NOW() - make_interval(days => %s)
|
||||
),
|
||||
by_streamer AS (
|
||||
SELECT username, site, SUM(size) AS total_mb
|
||||
FROM candidates
|
||||
GROUP BY username, site
|
||||
),
|
||||
ordered AS (
|
||||
SELECT c.*,
|
||||
bs.total_mb,
|
||||
CASE WHEN c.size < %s THEN 0 ELSE 1 END AS small_first
|
||||
FROM candidates c
|
||||
JOIN by_streamer bs
|
||||
ON bs.username = c.username
|
||||
AND bs.site = c.site
|
||||
)
|
||||
SELECT *
|
||||
FROM ordered
|
||||
ORDER BY
|
||||
total_mb DESC, -- top streamers first
|
||||
small_first ASC, -- small (< small_mb) first
|
||||
size ASC, -- then bigger files first inside each group
|
||||
created_at DESC; -- then newest
|
||||
""", (days, small_mb))
|
||||
return cursor.fetchall()
|
||||
|
||||
# Try 7d → 30d → 90d
|
||||
for d in (7, 30, 90):
|
||||
rows = pick(d)
|
||||
if rows:
|
||||
return rows
|
||||
|
||||
# Fallback: any time, same ordering logic
|
||||
cursor.execute("""
|
||||
WITH candidates AS (
|
||||
SELECT v.*
|
||||
FROM videos v
|
||||
WHERE v.codec IS NULL
|
||||
AND v.status = 'active'
|
||||
AND v.filepath IS NOT NULL
|
||||
),
|
||||
by_streamer AS (
|
||||
SELECT username, site, SUM(size) AS total_mb
|
||||
FROM candidates
|
||||
GROUP BY username, site
|
||||
),
|
||||
ordered AS (
|
||||
SELECT c.*,
|
||||
bs.total_mb,
|
||||
CASE WHEN c.size < %s THEN 0 ELSE 1 END AS small_first
|
||||
FROM candidates c
|
||||
JOIN by_streamer bs
|
||||
ON bs.username = c.username
|
||||
AND bs.site = c.site
|
||||
)
|
||||
SELECT *
|
||||
FROM ordered
|
||||
ORDER BY
|
||||
total_mb DESC,
|
||||
small_first ASC,
|
||||
size DESC,
|
||||
created_at DESC;
|
||||
""", (small_mb,))
|
||||
return cursor.fetchall()
|
||||
|
||||
def smart_choice_by_count(cursor, small_mb=250):
|
||||
"""
|
||||
Returns candidate videos ordered by:
|
||||
1) time window: 7d, then 30d, then 90d, then fallback (any time)
|
||||
2) streamer priority: COUNT(*) per (username, site) DESC within the window
|
||||
3) small (< small_mb MB) first, then big
|
||||
4) inside each group: size DESC, then created_at DESC
|
||||
NOTE: 'size' is stored in MB.
|
||||
"""
|
||||
def pick(days: int):
|
||||
cursor.execute("""
|
||||
WITH candidates AS (
|
||||
SELECT v.*
|
||||
FROM videos v
|
||||
WHERE v.codec IS NULL
|
||||
AND v.status = 'active'
|
||||
AND v.filepath IS NOT NULL
|
||||
AND v.created_at >= NOW() - make_interval(days => %s)
|
||||
),
|
||||
by_streamer AS (
|
||||
SELECT username, site, COUNT(*) AS total_vids
|
||||
FROM candidates
|
||||
GROUP BY username, site
|
||||
),
|
||||
ordered AS (
|
||||
SELECT c.*,
|
||||
bs.total_vids,
|
||||
CASE WHEN c.size < %s THEN 0 ELSE 1 END AS small_first
|
||||
FROM candidates c
|
||||
JOIN by_streamer bs
|
||||
ON bs.username = c.username
|
||||
AND bs.site = c.site
|
||||
)
|
||||
SELECT *
|
||||
FROM ordered
|
||||
ORDER BY
|
||||
total_vids DESC, -- most videos first
|
||||
small_first ASC, -- small (< small_mb) first
|
||||
size DESC, -- then larger files first within group
|
||||
created_at DESC; -- then newest
|
||||
""", (days, small_mb))
|
||||
return cursor.fetchall()
|
||||
|
||||
# Try windows: 7 → 30 → 90 days
|
||||
for d in (7, 30, 90):
|
||||
rows = pick(d)
|
||||
if rows:
|
||||
return rows
|
||||
|
||||
# Fallback: any time, same ordering
|
||||
cursor.execute("""
|
||||
WITH candidates AS (
|
||||
SELECT v.*
|
||||
FROM videos v
|
||||
WHERE v.codec IS NULL
|
||||
AND v.status = 'active'
|
||||
AND v.filepath IS NOT NULL
|
||||
),
|
||||
by_streamer AS (
|
||||
SELECT username, site, COUNT(*) AS total_vids
|
||||
FROM candidates
|
||||
GROUP BY username, site
|
||||
),
|
||||
ordered AS (
|
||||
SELECT c.*,
|
||||
bs.total_vids,
|
||||
CASE WHEN c.size < %s THEN 0 ELSE 1 END AS small_first
|
||||
FROM candidates c
|
||||
JOIN by_streamer bs
|
||||
ON bs.username = c.username
|
||||
AND bs.site = c.site
|
||||
)
|
||||
SELECT *
|
||||
FROM ordered
|
||||
ORDER BY
|
||||
total_vids DESC,
|
||||
small_first ASC,
|
||||
size DESC,
|
||||
created_at DESC;
|
||||
""", (small_mb,))
|
||||
return cursor.fetchall()
|
||||
|
||||
def select_user_videos(username, cursor):
|
||||
if username == "all":
|
||||
cursor.execute("SELECT * FROM videos WHERE status = 'active' AND codec IS NULL ORDER BY size ASC")
|
||||
else:
|
||||
cursor.execute("SELECT * FROM videos WHERE username = %s AND status = 'active' AND codec IS NULL ORDER BY size ASC", (username,))
|
||||
return cursor.fetchall()
|
||||
|
||||
def reencode_videos_av1():
|
||||
conn, cursor = config.get_local_db_connection()
|
||||
|
||||
while True:
|
||||
username = input("Enter username: ")
|
||||
if username:
|
||||
videos = select_user_videos(username, cursor)
|
||||
else:
|
||||
# videos = smart_choice(cursor)
|
||||
videos = smart_choice_by_count(cursor)
|
||||
|
||||
with tqdm(videos, desc="Processing videos", unit="file") as pbar:
|
||||
for video in videos:
|
||||
input_path = video['filepath']
|
||||
|
||||
if not os.path.exists(input_path):
|
||||
print(f"🚫 File not found: {input_path}")
|
||||
continue
|
||||
|
||||
file_size_in_mb = os.path.getsize(input_path) / (1024 * 1024)
|
||||
print(f"\nProcessing {os.path.basename(input_path)} ({file_size_in_mb:.2f} MB)...")
|
||||
|
||||
if file_size_in_mb < 1:
|
||||
print("Video is too small. Skipping.")
|
||||
os.remove(input_path)
|
||||
cursor.execute("UPDATE videos SET status = 'deleted' WHERE id = %s", (video['id'],))
|
||||
continue
|
||||
|
||||
# 2) Get current bitrate & resolution
|
||||
current_bitrate, (width, height) = get_video_info(input_path)
|
||||
if not current_bitrate:
|
||||
print("Video's bitrate is not available. Skipping")
|
||||
continue
|
||||
|
||||
target_bitrate = get_target_bitrate(width, height)
|
||||
|
||||
# If current bitrate <= target, it's not worth it to re-encode
|
||||
if current_bitrate <= target_bitrate:
|
||||
target_bitrate = current_bitrate
|
||||
|
||||
if is_av1(input_path):
|
||||
print("Video is already encoded in AV1. Skipping")
|
||||
update_codec_db(video['id'], 'av1')
|
||||
continue
|
||||
|
||||
# 3) Re-encode
|
||||
output_path = os.path.join('.temp', os.path.basename(input_path))
|
||||
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
||||
encoded = encode_video(input_path, output_path, target_bitrate)
|
||||
|
||||
if not encoded:
|
||||
print("Encoding failed. Skipping.")
|
||||
continue
|
||||
|
||||
# 4) Compare file sizes and replace if smaller
|
||||
new_size = check_and_replace_if_smaller(input_path, output_path)
|
||||
if new_size:
|
||||
update_codec_db(video['id'], 'av1')
|
||||
update_file_size_db(video['id'], new_size)
|
||||
|
||||
pbar.update(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
reencode_videos_av1()
|
||||
@ -1,141 +0,0 @@
|
||||
import ffmpeg
|
||||
import subprocess
|
||||
import json
|
||||
import os
|
||||
from collections import Counter
|
||||
|
||||
def is_av1(filepath):
|
||||
"""Check if a video file is already AV1-encoded."""
|
||||
try:
|
||||
probe = ffmpeg.probe(filepath)
|
||||
for stream in probe['streams']:
|
||||
if stream['codec_type'] == 'video' and 'codec_name' in stream:
|
||||
if stream['codec_name'] == 'av1':
|
||||
return True
|
||||
except ffmpeg.Error as e:
|
||||
print(f"Error probing {filepath}: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def get_fps(filepath):
|
||||
"""Get the frames per second (FPS) of the input video using ffmpeg.probe."""
|
||||
try:
|
||||
probe = ffmpeg.probe(filepath)
|
||||
video_stream = next((s for s in probe['streams'] if s['codec_type'] == 'video'), None)
|
||||
if video_stream and 'r_frame_rate' in video_stream:
|
||||
fps_str = video_stream['r_frame_rate'] # e.g. "30/1", "25/1"
|
||||
num, den = map(int, fps_str.split('/'))
|
||||
return num / den
|
||||
except ffmpeg.Error as e:
|
||||
print(f"Error getting FPS for {filepath}: {e}")
|
||||
return None
|
||||
|
||||
def get_video_info(filepath):
|
||||
"""
|
||||
Returns dict:
|
||||
{ 'width': int, 'height': int, 'bitrate': int, 'fps': float }
|
||||
- bitrate is Kbps (rounded down)
|
||||
- uses stream bit_rate, else format bit_rate, else computed
|
||||
"""
|
||||
cmd = [
|
||||
"ffprobe","-v","error",
|
||||
"-select_streams","v:0",
|
||||
"-show_entries","stream=width,height,bit_rate,r_frame_rate",
|
||||
"-show_entries","format=bit_rate,duration",
|
||||
"-of","json", filepath
|
||||
]
|
||||
r = subprocess.run(cmd, capture_output=True, text=True)
|
||||
if r.returncode:
|
||||
return {"width": 0, "height": 0, "bitrate": 0, "fps": 0.0}
|
||||
|
||||
try:
|
||||
d = json.loads(r.stdout or "{}")
|
||||
s = (d.get("streams") or [{}])[0]
|
||||
f = d.get("format") or {}
|
||||
|
||||
width = int(s.get("width") or 0)
|
||||
height = int(s.get("height") or 0)
|
||||
|
||||
# fps (r_frame_rate like "30000/1001")
|
||||
fps = 0.0
|
||||
rfr = s.get("r_frame_rate")
|
||||
if rfr and rfr != "0/0":
|
||||
try:
|
||||
num, den = rfr.split("/")
|
||||
num = float(num); den = float(den)
|
||||
fps = (num/den) if den else 0.0
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# bitrate in bps → prefer stream, fallback to format, else compute
|
||||
br_bps = s.get("bit_rate") or f.get("bit_rate")
|
||||
if not br_bps:
|
||||
try:
|
||||
dur = float(f.get("duration") or 0)
|
||||
if dur > 0:
|
||||
br_bps = int(os.path.getsize(filepath) * 8 / dur)
|
||||
except Exception:
|
||||
br_bps = 0
|
||||
br_kbps = int(int(br_bps or 0) / 1000)
|
||||
|
||||
return {"width": width, "height": height, "bitrate": br_kbps, "fps": fps}
|
||||
except Exception:
|
||||
return {"width": 0, "height": 0, "bitrate": 0, "fps": 0.0}
|
||||
|
||||
def get_common_resolution(group):
|
||||
"""Most common (w,h) across the group's videos. Fallback 1280x720."""
|
||||
resolutions = []
|
||||
for v in group:
|
||||
info = get_video_info(v["filepath"])
|
||||
w, h = info.get("width"), info.get("height")
|
||||
if w and h:
|
||||
resolutions.append((w, h))
|
||||
if not resolutions:
|
||||
return (1280, 720)
|
||||
return Counter(resolutions).most_common(1)[0][0]
|
||||
|
||||
def get_target_resolution(group):
|
||||
"""
|
||||
Choose (w,h) whose videos have the highest *total duration*.
|
||||
Tie-breakers: higher count, then larger area. Fallback 1280x720.
|
||||
"""
|
||||
totals = {} # (w,h) -> total duration
|
||||
counts = {} # (w,h) -> number of files
|
||||
|
||||
for v in group:
|
||||
info = get_video_info(v["filepath"])
|
||||
w, h = info.get("width"), info.get("height")
|
||||
if not (w and h):
|
||||
continue
|
||||
|
||||
# Prefer DB duration if present, else probe info['duration'], else 0
|
||||
dur = v.get("duration", info.get("duration", 0))
|
||||
try:
|
||||
dur = float(dur)
|
||||
except (TypeError, ValueError):
|
||||
dur = 0.0
|
||||
|
||||
key = (w, h)
|
||||
totals[key] = totals.get(key, 0.0) + dur
|
||||
counts[key] = counts.get(key, 0) + 1
|
||||
|
||||
if not totals:
|
||||
return (1280, 720)
|
||||
|
||||
def sort_key(item):
|
||||
(w, h), total = item
|
||||
cnt = counts[(w, h)]
|
||||
area = (w or 0) * (h or 0)
|
||||
return (total, cnt, area)
|
||||
|
||||
best_resolution = max(totals.items(), key=sort_key)[0]
|
||||
return best_resolution
|
||||
|
||||
|
||||
def get_target_bitrate(width, height):
|
||||
"""Your existing function to choose a bitrate based on resolution."""
|
||||
resolutions = {(854, 480): 1000,(1280, 720): 1500,(1920, 1080): 3000,(2560, 1440): 5000,(3840, 2160): 12000}
|
||||
for res, bitrate in resolutions.items():
|
||||
if width <= res[0] and height <= res[1]:
|
||||
return bitrate
|
||||
return 2500
|
||||
@ -1,100 +0,0 @@
|
||||
from moviepy.editor import VideoFileClip, concatenate_videoclips
|
||||
import os, cv2
|
||||
|
||||
def add_intro_to_video(input_video, intro_video='intro.mp4', output_video='output.mp4'):
|
||||
clip_main = VideoFileClip(input_video)
|
||||
|
||||
clip_intro = VideoFileClip(intro_video).resize(clip_main.size).set_fps(clip_main.fps)
|
||||
|
||||
if clip_main.audio is not None and clip_intro.audio is None:
|
||||
from moviepy.editor import AudioArrayClip
|
||||
silent_audio = AudioArrayClip([[0] * int(clip_intro.duration * clip_main.audio.fps)], fps=clip_main.audio.fps)
|
||||
clip_intro = clip_intro.set_audio(silent_audio)
|
||||
|
||||
final_clip = concatenate_videoclips([clip_intro, clip_main])
|
||||
|
||||
final_clip.write_videofile(output_video, codec='libx264')
|
||||
|
||||
def get_duration(input_file):
|
||||
if not os.path.isfile(input_file):
|
||||
print('Input file does not exist')
|
||||
return 0
|
||||
|
||||
try:
|
||||
video = cv2.VideoCapture(input_file)
|
||||
frames = video.get(cv2.CAP_PROP_FRAME_COUNT)
|
||||
fps = video.get(cv2.CAP_PROP_FPS)
|
||||
duration = frames / fps
|
||||
video.release()
|
||||
|
||||
return int(duration)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
return 0
|
||||
|
||||
def generate_thumbnails(input_file, filename):
|
||||
output_folder = 'temp/'
|
||||
if not os.path.isfile(input_file):
|
||||
raise ValueError('Input file does not exist')
|
||||
if not os.path.exists(output_folder):
|
||||
os.makedirs(output_folder)
|
||||
|
||||
posterPath = os.path.join(output_folder, f'{filename}.jpg')
|
||||
previewPath = os.path.join(output_folder, f'{filename}.mp4')
|
||||
|
||||
clip = VideoFileClip(input_file)
|
||||
duration = clip.duration
|
||||
|
||||
interval = duration / 11.0
|
||||
|
||||
start_time_first_clip = 0 * interval
|
||||
try:
|
||||
clip.save_frame(posterPath, t=start_time_first_clip)
|
||||
except:
|
||||
pass
|
||||
|
||||
clips = []
|
||||
for i in range(10):
|
||||
start_time = i * interval
|
||||
end_time = start_time + 1
|
||||
clips.append(clip.subclip(start_time, end_time))
|
||||
|
||||
final_clip = concatenate_videoclips(clips).resize(newsize=(384, 216)).without_audio()
|
||||
final_clip.write_videofile(previewPath, fps=24, codec="libx264")
|
||||
|
||||
for subclip in clips:
|
||||
subclip.close()
|
||||
|
||||
clip.close()
|
||||
final_clip.close()
|
||||
|
||||
return posterPath, previewPath
|
||||
|
||||
def split_video(file_path, segment_size_gb=8):
|
||||
import subprocess
|
||||
|
||||
# Convert GB to bytes
|
||||
segment_size_bytes = segment_size_gb * 1024 * 1024 * 1024
|
||||
|
||||
# Get the total size of the video file
|
||||
total_size_bytes = os.path.getsize(file_path)
|
||||
|
||||
# Calculate the number of segments needed
|
||||
num_segments = total_size_bytes // segment_size_bytes + 1
|
||||
|
||||
# Get the duration of the video file
|
||||
duration = get_duration(file_path)
|
||||
|
||||
# Calculate the duration of each segment
|
||||
segment_duration = duration / num_segments
|
||||
|
||||
# Generate output file pattern
|
||||
file_name, file_extension = os.path.splitext(file_path)
|
||||
output_pattern = f"{file_name}_segment_%03d{file_extension}"
|
||||
|
||||
# Run FFmpeg command to split the video
|
||||
command = [
|
||||
"ffmpeg", "-i", file_path, "-c", "copy", "-map", "0",
|
||||
"-segment_time", str(segment_duration), "-f", "segment", output_pattern
|
||||
]
|
||||
subprocess.run(command)
|
||||
Loading…
Reference in New Issue