cleanup
parent
4b1b7d08e3
commit
1a3ee2e430
@ -1,100 +0,0 @@
|
||||
from moviepy.editor import VideoFileClip, concatenate_videoclips
|
||||
import os, cv2
|
||||
|
||||
def add_intro_to_video(input_video, intro_video='intro.mp4', output_video='output.mp4'):
|
||||
clip_main = VideoFileClip(input_video)
|
||||
|
||||
clip_intro = VideoFileClip(intro_video).resize(clip_main.size).set_fps(clip_main.fps)
|
||||
|
||||
if clip_main.audio is not None and clip_intro.audio is None:
|
||||
from moviepy.editor import AudioArrayClip
|
||||
silent_audio = AudioArrayClip([[0] * int(clip_intro.duration * clip_main.audio.fps)], fps=clip_main.audio.fps)
|
||||
clip_intro = clip_intro.set_audio(silent_audio)
|
||||
|
||||
final_clip = concatenate_videoclips([clip_intro, clip_main])
|
||||
|
||||
final_clip.write_videofile(output_video, codec='libx264')
|
||||
|
||||
def get_duration(input_file):
|
||||
if not os.path.isfile(input_file):
|
||||
print('Input file does not exist')
|
||||
return 0
|
||||
|
||||
try:
|
||||
video = cv2.VideoCapture(input_file)
|
||||
frames = video.get(cv2.CAP_PROP_FRAME_COUNT)
|
||||
fps = video.get(cv2.CAP_PROP_FPS)
|
||||
duration = frames / fps
|
||||
video.release()
|
||||
|
||||
return int(duration)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
return 0
|
||||
|
||||
def generate_thumbnails(input_file, filename):
|
||||
output_folder = 'temp/'
|
||||
if not os.path.isfile(input_file):
|
||||
raise ValueError('Input file does not exist')
|
||||
if not os.path.exists(output_folder):
|
||||
os.makedirs(output_folder)
|
||||
|
||||
posterPath = os.path.join(output_folder, f'{filename}.jpg')
|
||||
previewPath = os.path.join(output_folder, f'{filename}.mp4')
|
||||
|
||||
clip = VideoFileClip(input_file)
|
||||
duration = clip.duration
|
||||
|
||||
interval = duration / 11.0
|
||||
|
||||
start_time_first_clip = 0 * interval
|
||||
try:
|
||||
clip.save_frame(posterPath, t=start_time_first_clip)
|
||||
except:
|
||||
pass
|
||||
|
||||
clips = []
|
||||
for i in range(10):
|
||||
start_time = i * interval
|
||||
end_time = start_time + 1
|
||||
clips.append(clip.subclip(start_time, end_time))
|
||||
|
||||
final_clip = concatenate_videoclips(clips).resize(newsize=(384, 216)).without_audio()
|
||||
final_clip.write_videofile(previewPath, fps=24, codec="libx264")
|
||||
|
||||
for subclip in clips:
|
||||
subclip.close()
|
||||
|
||||
clip.close()
|
||||
final_clip.close()
|
||||
|
||||
return posterPath, previewPath
|
||||
|
||||
def split_video(file_path, segment_size_gb=8):
|
||||
import subprocess
|
||||
|
||||
# Convert GB to bytes
|
||||
segment_size_bytes = segment_size_gb * 1024 * 1024 * 1024
|
||||
|
||||
# Get the total size of the video file
|
||||
total_size_bytes = os.path.getsize(file_path)
|
||||
|
||||
# Calculate the number of segments needed
|
||||
num_segments = total_size_bytes // segment_size_bytes + 1
|
||||
|
||||
# Get the duration of the video file
|
||||
duration = get_duration(file_path)
|
||||
|
||||
# Calculate the duration of each segment
|
||||
segment_duration = duration / num_segments
|
||||
|
||||
# Generate output file pattern
|
||||
file_name, file_extension = os.path.splitext(file_path)
|
||||
output_pattern = f"{file_name}_segment_%03d{file_extension}"
|
||||
|
||||
# Run FFmpeg command to split the video
|
||||
command = [
|
||||
"ffmpeg", "-i", file_path, "-c", "copy", "-map", "0",
|
||||
"-segment_time", str(segment_duration), "-f", "segment", output_pattern
|
||||
]
|
||||
subprocess.run(command)
|
||||
@ -1,138 +0,0 @@
|
||||
from archiveConfig import get_local_db_connection
|
||||
from psycopg2.extras import execute_values
|
||||
from datetime import datetime
|
||||
import uuid, shutil, json, os
|
||||
from tqdm import tqdm
|
||||
|
||||
DATA_DIR = 'data'
|
||||
DOWNLOAD_DIR = 'downloaded'
|
||||
|
||||
conn, cursor = get_local_db_connection()
|
||||
|
||||
def is_valid_uuid(val: str, version=None) -> bool:
|
||||
try:
|
||||
u = uuid.UUID(val, version=version) if version else uuid.UUID(val)
|
||||
return str(u) == val.lower() # Match exact input (handles casing)
|
||||
except (ValueError, AttributeError, TypeError):
|
||||
return False
|
||||
|
||||
def parse_json_file(filepath):
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
if "createdAt" in data:
|
||||
date = data.get("createdAt")
|
||||
elif "date" in data:
|
||||
date = data.get("date")
|
||||
|
||||
if date:
|
||||
created_at = datetime.strptime(date, "%Y-%m-%d %H:%M:%S")
|
||||
else:
|
||||
created_at = None
|
||||
print(f"⚠️ No createdAt or date found in {filepath}")
|
||||
|
||||
if "updatedAt" in data:
|
||||
updated_at = datetime.strptime(data.get("updatedAt"), "%Y-%m-%d %H:%M:%S")
|
||||
else:
|
||||
updated_at = created_at
|
||||
|
||||
video_id = os.path.splitext(os.path.basename(filepath))[0]
|
||||
if not is_valid_uuid(video_id):
|
||||
print(f"⚠️ Invalid video_id: {video_id}")
|
||||
return
|
||||
|
||||
parsed_data = {
|
||||
'video_id': video_id,
|
||||
'username': data.get("username"),
|
||||
'site': data.get("site"),
|
||||
'gender': data.get("gender"),
|
||||
'size': data.get("size") if data.get("size") else 0,
|
||||
'duration': data.get("duration") if data.get("duration") else 0,
|
||||
'filepath': data.get("filepath"),
|
||||
'jsonpath': data.get("jsonpath"),
|
||||
'hash': None, # You can add hash calculation here if needed
|
||||
'created_at': created_at,
|
||||
'updated_at': updated_at
|
||||
}
|
||||
|
||||
return parsed_data
|
||||
|
||||
def insert_data(all_data):
|
||||
query = """
|
||||
INSERT INTO videos (
|
||||
video_id, username, site, gender, size, duration,
|
||||
filepath, hash, created_at, updated_at
|
||||
)
|
||||
VALUES %s
|
||||
ON CONFLICT (video_id) DO NOTHING;
|
||||
"""
|
||||
values = [
|
||||
(
|
||||
d['video_id'], d['username'], d['site'], d['gender'],
|
||||
d['size'], d['duration'], d['filepath'],
|
||||
d['hash'], d['created_at'], d['updated_at']
|
||||
)
|
||||
for d in all_data
|
||||
]
|
||||
execute_values(cursor, query, values)
|
||||
conn.commit()
|
||||
print(f"✅ Inserted {cursor.rowcount} new records.")
|
||||
|
||||
def get_files(dir):
|
||||
files = []
|
||||
for root, _, filenames in os.walk(dir):
|
||||
for filename in filenames:
|
||||
if filename.endswith('.json'):
|
||||
files.append(os.path.join(root, filename))
|
||||
return files
|
||||
|
||||
def main():
|
||||
all_records = []
|
||||
|
||||
data_files = [f for f in get_files(DOWNLOAD_DIR) if f.endswith('.json')]
|
||||
|
||||
with tqdm(data_files, desc="Processing files", unit="file") as t:
|
||||
for filepath in data_files:
|
||||
t.update(1)
|
||||
try:
|
||||
record = parse_json_file(filepath)
|
||||
all_records.append(record)
|
||||
except Exception as e:
|
||||
print(f"❌ Failed to process {filepath}: {e}")
|
||||
|
||||
if all_records:
|
||||
insert_data(all_records)
|
||||
else:
|
||||
print("⚠️ No new records to insert.")
|
||||
|
||||
def check_and_move():
|
||||
db_ids = get_video_ids_from_db()
|
||||
moved = 0
|
||||
|
||||
for path in get_json_files(DOWNLOAD_DIR):
|
||||
video_id = os.path.splitext(os.path.basename(path))[0]
|
||||
if video_id in db_ids:
|
||||
output_path = os.path.join(DATA_DIR, os.path.basename(path))
|
||||
if os.path.exists(output_path):
|
||||
print(f"⚠️ Skipping {path} because it already exists in {DOWNLOAD_DIR}/")
|
||||
continue
|
||||
shutil.move(path, output_path)
|
||||
moved += 1
|
||||
|
||||
print(f"✅ Moved {moved} files to {DOWNLOAD_DIR}/")
|
||||
|
||||
# Get all existing video IDs
|
||||
def get_video_ids_from_db():
|
||||
cursor.execute("SELECT video_id FROM videos;")
|
||||
return {row['video_id'] for row in cursor.fetchall()}
|
||||
|
||||
# Iterate files
|
||||
def get_json_files(dir):
|
||||
for root, _, files in os.walk(dir):
|
||||
for file in files:
|
||||
if file.endswith('.json'):
|
||||
yield os.path.join(root, file)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
check_and_move()
|
||||
@ -1,36 +0,0 @@
|
||||
# organize_thumbnails.py (fixed)
|
||||
import os
|
||||
import hashlib
|
||||
import shutil
|
||||
|
||||
OLD_THUMB_DIR = "static/thumbnails"
|
||||
HASHED_DIR = "static/thumbnails_hashed"
|
||||
|
||||
def hashed_path(video_id: str) -> str:
|
||||
"""Return hashed path based on video ID (no extension)."""
|
||||
h = hashlib.md5(video_id.encode()).hexdigest()
|
||||
sub1, sub2 = h[:2], h[2:4]
|
||||
return os.path.join(HASHED_DIR, sub1, sub2, f"{video_id}.webp")
|
||||
|
||||
def organize_thumbnails():
|
||||
os.makedirs(HASHED_DIR, exist_ok=True)
|
||||
moved_count = 0
|
||||
|
||||
for root, _, files in os.walk(OLD_THUMB_DIR):
|
||||
for file in files:
|
||||
video_id = os.path.splitext(file)[0] # strip extension
|
||||
src_path = os.path.join(root, file)
|
||||
dest_path = hashed_path(video_id)
|
||||
|
||||
os.makedirs(os.path.dirname(dest_path), exist_ok=True)
|
||||
|
||||
if not os.path.exists(dest_path):
|
||||
shutil.move(src_path, dest_path)
|
||||
moved_count += 1
|
||||
else:
|
||||
print(f"[SKIP] Exists: {dest_path}")
|
||||
|
||||
print(f"\n✅ Done! Organized {moved_count} thumbnails into hashed structure.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
organize_thumbnails()
|
||||
@ -1,69 +0,0 @@
|
||||
import os, shutil, config
|
||||
import ffmpeg
|
||||
from tqdm import tqdm
|
||||
|
||||
def is_av1(filepath):
|
||||
try:
|
||||
probe = ffmpeg.probe(filepath)
|
||||
for stream in probe['streams']:
|
||||
if stream['codec_type'] == 'video' and 'codec_name' in stream:
|
||||
if stream['codec_name'] == 'av1':
|
||||
return True
|
||||
except ffmpeg.Error as e:
|
||||
print(f"Error probing {filepath}: {e}")
|
||||
return "Fucked"
|
||||
return False
|
||||
|
||||
def save_last_checked(filepath):
|
||||
with open(".last_checked", "w") as f:
|
||||
f.write(filepath)
|
||||
|
||||
def get_last_checked():
|
||||
if os.path.exists(".last_checked"):
|
||||
with open(".last_checked", "r") as f:
|
||||
return f.read().strip()
|
||||
return None
|
||||
|
||||
def init_list(videos):
|
||||
last_checked = get_last_checked()
|
||||
if last_checked:
|
||||
for video in videos:
|
||||
if os.path.basename(video['filepath']) == last_checked:
|
||||
return videos[videos.index(video) + 1:]
|
||||
return videos
|
||||
|
||||
def reencode_videos_av1():
|
||||
conn, cursor = config.get_local_db_connection()
|
||||
cursor.execute("SELECT filepath, id, codec FROM videos WHERE status != 'missing' AND filepath IS NOT NULL ORDER BY size ASC;")
|
||||
videos = cursor.fetchall()
|
||||
|
||||
os.makedirs("fucked", exist_ok=True)
|
||||
|
||||
videos = init_list(videos)
|
||||
|
||||
with tqdm(videos, desc="Checking videos", unit="file") as pbar:
|
||||
for video in videos:
|
||||
pbar.update(1)
|
||||
|
||||
if pbar.n % 100 == 0:
|
||||
save_last_checked(os.path.basename(video['filepath']))
|
||||
|
||||
if video['codec'] == 'av1':
|
||||
continue
|
||||
|
||||
input_path = video['filepath']
|
||||
isav1 = is_av1(input_path)
|
||||
|
||||
if isav1 == "Fucked":
|
||||
print(f"🚫 Error probing {input_path}")
|
||||
shutil.move(input_path, "fucked/" + os.path.basename(input_path))
|
||||
continue
|
||||
|
||||
if isav1 == False:
|
||||
continue
|
||||
|
||||
cursor.execute("UPDATE videos SET codec = %s WHERE id = %s", ('av1', video['id']))
|
||||
conn.commit()
|
||||
|
||||
if __name__ == "__main__":
|
||||
reencode_videos_av1()
|
||||
Loading…
Reference in New Issue