cleanup
parent
4b1b7d08e3
commit
1a3ee2e430
@ -1,100 +0,0 @@
|
|||||||
from moviepy.editor import VideoFileClip, concatenate_videoclips
|
|
||||||
import os, cv2
|
|
||||||
|
|
||||||
def add_intro_to_video(input_video, intro_video='intro.mp4', output_video='output.mp4'):
|
|
||||||
clip_main = VideoFileClip(input_video)
|
|
||||||
|
|
||||||
clip_intro = VideoFileClip(intro_video).resize(clip_main.size).set_fps(clip_main.fps)
|
|
||||||
|
|
||||||
if clip_main.audio is not None and clip_intro.audio is None:
|
|
||||||
from moviepy.editor import AudioArrayClip
|
|
||||||
silent_audio = AudioArrayClip([[0] * int(clip_intro.duration * clip_main.audio.fps)], fps=clip_main.audio.fps)
|
|
||||||
clip_intro = clip_intro.set_audio(silent_audio)
|
|
||||||
|
|
||||||
final_clip = concatenate_videoclips([clip_intro, clip_main])
|
|
||||||
|
|
||||||
final_clip.write_videofile(output_video, codec='libx264')
|
|
||||||
|
|
||||||
def get_duration(input_file):
|
|
||||||
if not os.path.isfile(input_file):
|
|
||||||
print('Input file does not exist')
|
|
||||||
return 0
|
|
||||||
|
|
||||||
try:
|
|
||||||
video = cv2.VideoCapture(input_file)
|
|
||||||
frames = video.get(cv2.CAP_PROP_FRAME_COUNT)
|
|
||||||
fps = video.get(cv2.CAP_PROP_FPS)
|
|
||||||
duration = frames / fps
|
|
||||||
video.release()
|
|
||||||
|
|
||||||
return int(duration)
|
|
||||||
except Exception as e:
|
|
||||||
print(e)
|
|
||||||
return 0
|
|
||||||
|
|
||||||
def generate_thumbnails(input_file, filename):
|
|
||||||
output_folder = 'temp/'
|
|
||||||
if not os.path.isfile(input_file):
|
|
||||||
raise ValueError('Input file does not exist')
|
|
||||||
if not os.path.exists(output_folder):
|
|
||||||
os.makedirs(output_folder)
|
|
||||||
|
|
||||||
posterPath = os.path.join(output_folder, f'{filename}.jpg')
|
|
||||||
previewPath = os.path.join(output_folder, f'{filename}.mp4')
|
|
||||||
|
|
||||||
clip = VideoFileClip(input_file)
|
|
||||||
duration = clip.duration
|
|
||||||
|
|
||||||
interval = duration / 11.0
|
|
||||||
|
|
||||||
start_time_first_clip = 0 * interval
|
|
||||||
try:
|
|
||||||
clip.save_frame(posterPath, t=start_time_first_clip)
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
clips = []
|
|
||||||
for i in range(10):
|
|
||||||
start_time = i * interval
|
|
||||||
end_time = start_time + 1
|
|
||||||
clips.append(clip.subclip(start_time, end_time))
|
|
||||||
|
|
||||||
final_clip = concatenate_videoclips(clips).resize(newsize=(384, 216)).without_audio()
|
|
||||||
final_clip.write_videofile(previewPath, fps=24, codec="libx264")
|
|
||||||
|
|
||||||
for subclip in clips:
|
|
||||||
subclip.close()
|
|
||||||
|
|
||||||
clip.close()
|
|
||||||
final_clip.close()
|
|
||||||
|
|
||||||
return posterPath, previewPath
|
|
||||||
|
|
||||||
def split_video(file_path, segment_size_gb=8):
|
|
||||||
import subprocess
|
|
||||||
|
|
||||||
# Convert GB to bytes
|
|
||||||
segment_size_bytes = segment_size_gb * 1024 * 1024 * 1024
|
|
||||||
|
|
||||||
# Get the total size of the video file
|
|
||||||
total_size_bytes = os.path.getsize(file_path)
|
|
||||||
|
|
||||||
# Calculate the number of segments needed
|
|
||||||
num_segments = total_size_bytes // segment_size_bytes + 1
|
|
||||||
|
|
||||||
# Get the duration of the video file
|
|
||||||
duration = get_duration(file_path)
|
|
||||||
|
|
||||||
# Calculate the duration of each segment
|
|
||||||
segment_duration = duration / num_segments
|
|
||||||
|
|
||||||
# Generate output file pattern
|
|
||||||
file_name, file_extension = os.path.splitext(file_path)
|
|
||||||
output_pattern = f"{file_name}_segment_%03d{file_extension}"
|
|
||||||
|
|
||||||
# Run FFmpeg command to split the video
|
|
||||||
command = [
|
|
||||||
"ffmpeg", "-i", file_path, "-c", "copy", "-map", "0",
|
|
||||||
"-segment_time", str(segment_duration), "-f", "segment", output_pattern
|
|
||||||
]
|
|
||||||
subprocess.run(command)
|
|
||||||
@ -1,138 +0,0 @@
|
|||||||
from archiveConfig import get_local_db_connection
|
|
||||||
from psycopg2.extras import execute_values
|
|
||||||
from datetime import datetime
|
|
||||||
import uuid, shutil, json, os
|
|
||||||
from tqdm import tqdm
|
|
||||||
|
|
||||||
DATA_DIR = 'data'
|
|
||||||
DOWNLOAD_DIR = 'downloaded'
|
|
||||||
|
|
||||||
conn, cursor = get_local_db_connection()
|
|
||||||
|
|
||||||
def is_valid_uuid(val: str, version=None) -> bool:
|
|
||||||
try:
|
|
||||||
u = uuid.UUID(val, version=version) if version else uuid.UUID(val)
|
|
||||||
return str(u) == val.lower() # Match exact input (handles casing)
|
|
||||||
except (ValueError, AttributeError, TypeError):
|
|
||||||
return False
|
|
||||||
|
|
||||||
def parse_json_file(filepath):
|
|
||||||
with open(filepath, 'r', encoding='utf-8') as f:
|
|
||||||
data = json.load(f)
|
|
||||||
|
|
||||||
if "createdAt" in data:
|
|
||||||
date = data.get("createdAt")
|
|
||||||
elif "date" in data:
|
|
||||||
date = data.get("date")
|
|
||||||
|
|
||||||
if date:
|
|
||||||
created_at = datetime.strptime(date, "%Y-%m-%d %H:%M:%S")
|
|
||||||
else:
|
|
||||||
created_at = None
|
|
||||||
print(f"⚠️ No createdAt or date found in {filepath}")
|
|
||||||
|
|
||||||
if "updatedAt" in data:
|
|
||||||
updated_at = datetime.strptime(data.get("updatedAt"), "%Y-%m-%d %H:%M:%S")
|
|
||||||
else:
|
|
||||||
updated_at = created_at
|
|
||||||
|
|
||||||
video_id = os.path.splitext(os.path.basename(filepath))[0]
|
|
||||||
if not is_valid_uuid(video_id):
|
|
||||||
print(f"⚠️ Invalid video_id: {video_id}")
|
|
||||||
return
|
|
||||||
|
|
||||||
parsed_data = {
|
|
||||||
'video_id': video_id,
|
|
||||||
'username': data.get("username"),
|
|
||||||
'site': data.get("site"),
|
|
||||||
'gender': data.get("gender"),
|
|
||||||
'size': data.get("size") if data.get("size") else 0,
|
|
||||||
'duration': data.get("duration") if data.get("duration") else 0,
|
|
||||||
'filepath': data.get("filepath"),
|
|
||||||
'jsonpath': data.get("jsonpath"),
|
|
||||||
'hash': None, # You can add hash calculation here if needed
|
|
||||||
'created_at': created_at,
|
|
||||||
'updated_at': updated_at
|
|
||||||
}
|
|
||||||
|
|
||||||
return parsed_data
|
|
||||||
|
|
||||||
def insert_data(all_data):
|
|
||||||
query = """
|
|
||||||
INSERT INTO videos (
|
|
||||||
video_id, username, site, gender, size, duration,
|
|
||||||
filepath, hash, created_at, updated_at
|
|
||||||
)
|
|
||||||
VALUES %s
|
|
||||||
ON CONFLICT (video_id) DO NOTHING;
|
|
||||||
"""
|
|
||||||
values = [
|
|
||||||
(
|
|
||||||
d['video_id'], d['username'], d['site'], d['gender'],
|
|
||||||
d['size'], d['duration'], d['filepath'],
|
|
||||||
d['hash'], d['created_at'], d['updated_at']
|
|
||||||
)
|
|
||||||
for d in all_data
|
|
||||||
]
|
|
||||||
execute_values(cursor, query, values)
|
|
||||||
conn.commit()
|
|
||||||
print(f"✅ Inserted {cursor.rowcount} new records.")
|
|
||||||
|
|
||||||
def get_files(dir):
|
|
||||||
files = []
|
|
||||||
for root, _, filenames in os.walk(dir):
|
|
||||||
for filename in filenames:
|
|
||||||
if filename.endswith('.json'):
|
|
||||||
files.append(os.path.join(root, filename))
|
|
||||||
return files
|
|
||||||
|
|
||||||
def main():
|
|
||||||
all_records = []
|
|
||||||
|
|
||||||
data_files = [f for f in get_files(DOWNLOAD_DIR) if f.endswith('.json')]
|
|
||||||
|
|
||||||
with tqdm(data_files, desc="Processing files", unit="file") as t:
|
|
||||||
for filepath in data_files:
|
|
||||||
t.update(1)
|
|
||||||
try:
|
|
||||||
record = parse_json_file(filepath)
|
|
||||||
all_records.append(record)
|
|
||||||
except Exception as e:
|
|
||||||
print(f"❌ Failed to process {filepath}: {e}")
|
|
||||||
|
|
||||||
if all_records:
|
|
||||||
insert_data(all_records)
|
|
||||||
else:
|
|
||||||
print("⚠️ No new records to insert.")
|
|
||||||
|
|
||||||
def check_and_move():
|
|
||||||
db_ids = get_video_ids_from_db()
|
|
||||||
moved = 0
|
|
||||||
|
|
||||||
for path in get_json_files(DOWNLOAD_DIR):
|
|
||||||
video_id = os.path.splitext(os.path.basename(path))[0]
|
|
||||||
if video_id in db_ids:
|
|
||||||
output_path = os.path.join(DATA_DIR, os.path.basename(path))
|
|
||||||
if os.path.exists(output_path):
|
|
||||||
print(f"⚠️ Skipping {path} because it already exists in {DOWNLOAD_DIR}/")
|
|
||||||
continue
|
|
||||||
shutil.move(path, output_path)
|
|
||||||
moved += 1
|
|
||||||
|
|
||||||
print(f"✅ Moved {moved} files to {DOWNLOAD_DIR}/")
|
|
||||||
|
|
||||||
# Get all existing video IDs
|
|
||||||
def get_video_ids_from_db():
|
|
||||||
cursor.execute("SELECT video_id FROM videos;")
|
|
||||||
return {row['video_id'] for row in cursor.fetchall()}
|
|
||||||
|
|
||||||
# Iterate files
|
|
||||||
def get_json_files(dir):
|
|
||||||
for root, _, files in os.walk(dir):
|
|
||||||
for file in files:
|
|
||||||
if file.endswith('.json'):
|
|
||||||
yield os.path.join(root, file)
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
||||||
check_and_move()
|
|
||||||
@ -1,36 +0,0 @@
|
|||||||
# organize_thumbnails.py (fixed)
|
|
||||||
import os
|
|
||||||
import hashlib
|
|
||||||
import shutil
|
|
||||||
|
|
||||||
OLD_THUMB_DIR = "static/thumbnails"
|
|
||||||
HASHED_DIR = "static/thumbnails_hashed"
|
|
||||||
|
|
||||||
def hashed_path(video_id: str) -> str:
|
|
||||||
"""Return hashed path based on video ID (no extension)."""
|
|
||||||
h = hashlib.md5(video_id.encode()).hexdigest()
|
|
||||||
sub1, sub2 = h[:2], h[2:4]
|
|
||||||
return os.path.join(HASHED_DIR, sub1, sub2, f"{video_id}.webp")
|
|
||||||
|
|
||||||
def organize_thumbnails():
|
|
||||||
os.makedirs(HASHED_DIR, exist_ok=True)
|
|
||||||
moved_count = 0
|
|
||||||
|
|
||||||
for root, _, files in os.walk(OLD_THUMB_DIR):
|
|
||||||
for file in files:
|
|
||||||
video_id = os.path.splitext(file)[0] # strip extension
|
|
||||||
src_path = os.path.join(root, file)
|
|
||||||
dest_path = hashed_path(video_id)
|
|
||||||
|
|
||||||
os.makedirs(os.path.dirname(dest_path), exist_ok=True)
|
|
||||||
|
|
||||||
if not os.path.exists(dest_path):
|
|
||||||
shutil.move(src_path, dest_path)
|
|
||||||
moved_count += 1
|
|
||||||
else:
|
|
||||||
print(f"[SKIP] Exists: {dest_path}")
|
|
||||||
|
|
||||||
print(f"\n✅ Done! Organized {moved_count} thumbnails into hashed structure.")
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
organize_thumbnails()
|
|
||||||
@ -1,69 +0,0 @@
|
|||||||
import os, shutil, config
|
|
||||||
import ffmpeg
|
|
||||||
from tqdm import tqdm
|
|
||||||
|
|
||||||
def is_av1(filepath):
|
|
||||||
try:
|
|
||||||
probe = ffmpeg.probe(filepath)
|
|
||||||
for stream in probe['streams']:
|
|
||||||
if stream['codec_type'] == 'video' and 'codec_name' in stream:
|
|
||||||
if stream['codec_name'] == 'av1':
|
|
||||||
return True
|
|
||||||
except ffmpeg.Error as e:
|
|
||||||
print(f"Error probing {filepath}: {e}")
|
|
||||||
return "Fucked"
|
|
||||||
return False
|
|
||||||
|
|
||||||
def save_last_checked(filepath):
|
|
||||||
with open(".last_checked", "w") as f:
|
|
||||||
f.write(filepath)
|
|
||||||
|
|
||||||
def get_last_checked():
|
|
||||||
if os.path.exists(".last_checked"):
|
|
||||||
with open(".last_checked", "r") as f:
|
|
||||||
return f.read().strip()
|
|
||||||
return None
|
|
||||||
|
|
||||||
def init_list(videos):
|
|
||||||
last_checked = get_last_checked()
|
|
||||||
if last_checked:
|
|
||||||
for video in videos:
|
|
||||||
if os.path.basename(video['filepath']) == last_checked:
|
|
||||||
return videos[videos.index(video) + 1:]
|
|
||||||
return videos
|
|
||||||
|
|
||||||
def reencode_videos_av1():
|
|
||||||
conn, cursor = config.get_local_db_connection()
|
|
||||||
cursor.execute("SELECT filepath, id, codec FROM videos WHERE status != 'missing' AND filepath IS NOT NULL ORDER BY size ASC;")
|
|
||||||
videos = cursor.fetchall()
|
|
||||||
|
|
||||||
os.makedirs("fucked", exist_ok=True)
|
|
||||||
|
|
||||||
videos = init_list(videos)
|
|
||||||
|
|
||||||
with tqdm(videos, desc="Checking videos", unit="file") as pbar:
|
|
||||||
for video in videos:
|
|
||||||
pbar.update(1)
|
|
||||||
|
|
||||||
if pbar.n % 100 == 0:
|
|
||||||
save_last_checked(os.path.basename(video['filepath']))
|
|
||||||
|
|
||||||
if video['codec'] == 'av1':
|
|
||||||
continue
|
|
||||||
|
|
||||||
input_path = video['filepath']
|
|
||||||
isav1 = is_av1(input_path)
|
|
||||||
|
|
||||||
if isav1 == "Fucked":
|
|
||||||
print(f"🚫 Error probing {input_path}")
|
|
||||||
shutil.move(input_path, "fucked/" + os.path.basename(input_path))
|
|
||||||
continue
|
|
||||||
|
|
||||||
if isav1 == False:
|
|
||||||
continue
|
|
||||||
|
|
||||||
cursor.execute("UPDATE videos SET codec = %s WHERE id = %s", ('av1', video['id']))
|
|
||||||
conn.commit()
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
reencode_videos_av1()
|
|
||||||
Loading…
Reference in New Issue