main
oscar 1 month ago
parent 4b1b7d08e3
commit 1a3ee2e430

@ -1,100 +0,0 @@
from moviepy.editor import VideoFileClip, concatenate_videoclips
import os, cv2
def add_intro_to_video(input_video, intro_video='intro.mp4', output_video='output.mp4'):
clip_main = VideoFileClip(input_video)
clip_intro = VideoFileClip(intro_video).resize(clip_main.size).set_fps(clip_main.fps)
if clip_main.audio is not None and clip_intro.audio is None:
from moviepy.editor import AudioArrayClip
silent_audio = AudioArrayClip([[0] * int(clip_intro.duration * clip_main.audio.fps)], fps=clip_main.audio.fps)
clip_intro = clip_intro.set_audio(silent_audio)
final_clip = concatenate_videoclips([clip_intro, clip_main])
final_clip.write_videofile(output_video, codec='libx264')
def get_duration(input_file):
if not os.path.isfile(input_file):
print('Input file does not exist')
return 0
try:
video = cv2.VideoCapture(input_file)
frames = video.get(cv2.CAP_PROP_FRAME_COUNT)
fps = video.get(cv2.CAP_PROP_FPS)
duration = frames / fps
video.release()
return int(duration)
except Exception as e:
print(e)
return 0
def generate_thumbnails(input_file, filename):
output_folder = 'temp/'
if not os.path.isfile(input_file):
raise ValueError('Input file does not exist')
if not os.path.exists(output_folder):
os.makedirs(output_folder)
posterPath = os.path.join(output_folder, f'{filename}.jpg')
previewPath = os.path.join(output_folder, f'{filename}.mp4')
clip = VideoFileClip(input_file)
duration = clip.duration
interval = duration / 11.0
start_time_first_clip = 0 * interval
try:
clip.save_frame(posterPath, t=start_time_first_clip)
except:
pass
clips = []
for i in range(10):
start_time = i * interval
end_time = start_time + 1
clips.append(clip.subclip(start_time, end_time))
final_clip = concatenate_videoclips(clips).resize(newsize=(384, 216)).without_audio()
final_clip.write_videofile(previewPath, fps=24, codec="libx264")
for subclip in clips:
subclip.close()
clip.close()
final_clip.close()
return posterPath, previewPath
def split_video(file_path, segment_size_gb=8):
import subprocess
# Convert GB to bytes
segment_size_bytes = segment_size_gb * 1024 * 1024 * 1024
# Get the total size of the video file
total_size_bytes = os.path.getsize(file_path)
# Calculate the number of segments needed
num_segments = total_size_bytes // segment_size_bytes + 1
# Get the duration of the video file
duration = get_duration(file_path)
# Calculate the duration of each segment
segment_duration = duration / num_segments
# Generate output file pattern
file_name, file_extension = os.path.splitext(file_path)
output_pattern = f"{file_name}_segment_%03d{file_extension}"
# Run FFmpeg command to split the video
command = [
"ffmpeg", "-i", file_path, "-c", "copy", "-map", "0",
"-segment_time", str(segment_duration), "-f", "segment", output_pattern
]
subprocess.run(command)

@ -1,138 +0,0 @@
from archiveConfig import get_local_db_connection
from psycopg2.extras import execute_values
from datetime import datetime
import uuid, shutil, json, os
from tqdm import tqdm
DATA_DIR = 'data'
DOWNLOAD_DIR = 'downloaded'
conn, cursor = get_local_db_connection()
def is_valid_uuid(val: str, version=None) -> bool:
try:
u = uuid.UUID(val, version=version) if version else uuid.UUID(val)
return str(u) == val.lower() # Match exact input (handles casing)
except (ValueError, AttributeError, TypeError):
return False
def parse_json_file(filepath):
with open(filepath, 'r', encoding='utf-8') as f:
data = json.load(f)
if "createdAt" in data:
date = data.get("createdAt")
elif "date" in data:
date = data.get("date")
if date:
created_at = datetime.strptime(date, "%Y-%m-%d %H:%M:%S")
else:
created_at = None
print(f"⚠️ No createdAt or date found in {filepath}")
if "updatedAt" in data:
updated_at = datetime.strptime(data.get("updatedAt"), "%Y-%m-%d %H:%M:%S")
else:
updated_at = created_at
video_id = os.path.splitext(os.path.basename(filepath))[0]
if not is_valid_uuid(video_id):
print(f"⚠️ Invalid video_id: {video_id}")
return
parsed_data = {
'video_id': video_id,
'username': data.get("username"),
'site': data.get("site"),
'gender': data.get("gender"),
'size': data.get("size") if data.get("size") else 0,
'duration': data.get("duration") if data.get("duration") else 0,
'filepath': data.get("filepath"),
'jsonpath': data.get("jsonpath"),
'hash': None, # You can add hash calculation here if needed
'created_at': created_at,
'updated_at': updated_at
}
return parsed_data
def insert_data(all_data):
query = """
INSERT INTO videos (
video_id, username, site, gender, size, duration,
filepath, hash, created_at, updated_at
)
VALUES %s
ON CONFLICT (video_id) DO NOTHING;
"""
values = [
(
d['video_id'], d['username'], d['site'], d['gender'],
d['size'], d['duration'], d['filepath'],
d['hash'], d['created_at'], d['updated_at']
)
for d in all_data
]
execute_values(cursor, query, values)
conn.commit()
print(f"✅ Inserted {cursor.rowcount} new records.")
def get_files(dir):
files = []
for root, _, filenames in os.walk(dir):
for filename in filenames:
if filename.endswith('.json'):
files.append(os.path.join(root, filename))
return files
def main():
all_records = []
data_files = [f for f in get_files(DOWNLOAD_DIR) if f.endswith('.json')]
with tqdm(data_files, desc="Processing files", unit="file") as t:
for filepath in data_files:
t.update(1)
try:
record = parse_json_file(filepath)
all_records.append(record)
except Exception as e:
print(f"❌ Failed to process {filepath}: {e}")
if all_records:
insert_data(all_records)
else:
print("⚠️ No new records to insert.")
def check_and_move():
db_ids = get_video_ids_from_db()
moved = 0
for path in get_json_files(DOWNLOAD_DIR):
video_id = os.path.splitext(os.path.basename(path))[0]
if video_id in db_ids:
output_path = os.path.join(DATA_DIR, os.path.basename(path))
if os.path.exists(output_path):
print(f"⚠️ Skipping {path} because it already exists in {DOWNLOAD_DIR}/")
continue
shutil.move(path, output_path)
moved += 1
print(f"✅ Moved {moved} files to {DOWNLOAD_DIR}/")
# Get all existing video IDs
def get_video_ids_from_db():
cursor.execute("SELECT video_id FROM videos;")
return {row['video_id'] for row in cursor.fetchall()}
# Iterate files
def get_json_files(dir):
for root, _, files in os.walk(dir):
for file in files:
if file.endswith('.json'):
yield os.path.join(root, file)
if __name__ == '__main__':
main()
check_and_move()

@ -1,36 +0,0 @@
# organize_thumbnails.py (fixed)
import os
import hashlib
import shutil
OLD_THUMB_DIR = "static/thumbnails"
HASHED_DIR = "static/thumbnails_hashed"
def hashed_path(video_id: str) -> str:
"""Return hashed path based on video ID (no extension)."""
h = hashlib.md5(video_id.encode()).hexdigest()
sub1, sub2 = h[:2], h[2:4]
return os.path.join(HASHED_DIR, sub1, sub2, f"{video_id}.webp")
def organize_thumbnails():
os.makedirs(HASHED_DIR, exist_ok=True)
moved_count = 0
for root, _, files in os.walk(OLD_THUMB_DIR):
for file in files:
video_id = os.path.splitext(file)[0] # strip extension
src_path = os.path.join(root, file)
dest_path = hashed_path(video_id)
os.makedirs(os.path.dirname(dest_path), exist_ok=True)
if not os.path.exists(dest_path):
shutil.move(src_path, dest_path)
moved_count += 1
else:
print(f"[SKIP] Exists: {dest_path}")
print(f"\n✅ Done! Organized {moved_count} thumbnails into hashed structure.")
if __name__ == "__main__":
organize_thumbnails()

@ -1,69 +0,0 @@
import os, shutil, config
import ffmpeg
from tqdm import tqdm
def is_av1(filepath):
try:
probe = ffmpeg.probe(filepath)
for stream in probe['streams']:
if stream['codec_type'] == 'video' and 'codec_name' in stream:
if stream['codec_name'] == 'av1':
return True
except ffmpeg.Error as e:
print(f"Error probing {filepath}: {e}")
return "Fucked"
return False
def save_last_checked(filepath):
with open(".last_checked", "w") as f:
f.write(filepath)
def get_last_checked():
if os.path.exists(".last_checked"):
with open(".last_checked", "r") as f:
return f.read().strip()
return None
def init_list(videos):
last_checked = get_last_checked()
if last_checked:
for video in videos:
if os.path.basename(video['filepath']) == last_checked:
return videos[videos.index(video) + 1:]
return videos
def reencode_videos_av1():
conn, cursor = config.get_local_db_connection()
cursor.execute("SELECT filepath, id, codec FROM videos WHERE status != 'missing' AND filepath IS NOT NULL ORDER BY size ASC;")
videos = cursor.fetchall()
os.makedirs("fucked", exist_ok=True)
videos = init_list(videos)
with tqdm(videos, desc="Checking videos", unit="file") as pbar:
for video in videos:
pbar.update(1)
if pbar.n % 100 == 0:
save_last_checked(os.path.basename(video['filepath']))
if video['codec'] == 'av1':
continue
input_path = video['filepath']
isav1 = is_av1(input_path)
if isav1 == "Fucked":
print(f"🚫 Error probing {input_path}")
shutil.move(input_path, "fucked/" + os.path.basename(input_path))
continue
if isav1 == False:
continue
cursor.execute("UPDATE videos SET codec = %s WHERE id = %s", ('av1', video['id']))
conn.commit()
if __name__ == "__main__":
reencode_videos_av1()
Loading…
Cancel
Save