MASS CLEANUP

8 months ago · 373f3ab661
parent e6ad418ecd
commit 373f3ab661
52 changed files with 168 additions and 478473 deletions
--- a/archived/compress_data.py
+++ b/archived/compress_data.py
@ -1,35 +0,0 @@
-import os
-import json
-import gzip
-
-data_dir = 'data'
-data_compressed_dir = 'data_compressed'
-os.makedirs(data_compressed_dir, exist_ok=True)
-
-def compress_file(filepath, output_file):
-    with open(filepath, 'r') as f:
-        data = json.load(f)
-    compress_data(data, output_file)
-    return output_file
-
-def compress_data(data, output_file):
-    with gzip.open(output_file, 'wb') as f:
-        f.write(json.dumps(data).encode('utf-8'))
-    return output_file
-
-    
-data_files = os.listdir(data_dir)
-for file in data_files:
-    if not file.endswith('.json'):
-        continue
-
-    filepath = f'{data_dir}/{file}'
-    output_file = f'{data_compressed_dir}/{file}.gz'
-    output_file = compress_file(filepath, output_file)
-    if output_file:
-        print(f'Compressed {file} to {output_file}')
-        os.remove(filepath)
-    else:
-        print(f'Failed to compress {file}')
-
-print('Data compression completed')
--- a/archived/customdump.py
+++ b/archived/customdump.py
@ -1,137 +0,0 @@
-from BunnyCDN.Storage import Storage
-from PIL import Image
-import os, uuid, cv2, config
-import hashlib
-
-def clean_empty_folders(directory):
-    for foldername, subfolders, filenames in os.walk(directory, topdown=False):
-        for subfolder in subfolders:
-            folder_path = os.path.join(foldername, subfolder)
-            if not os.listdir(folder_path):
-                os.rmdir(folder_path)
-                print(f"Removed empty folder: {folder_path}")
-
-def calculate_file_hash(file_path, hash_func='sha256'):
-    h = hashlib.new(hash_func)
-
-    with open(file_path, 'rb') as file:
-        chunk = 0
-        while chunk != b'':
-            chunk = file.read(8192)
-            h.update(chunk)
-
-    return h.hexdigest()
-
-def extract_file_info(filename):
-    try:
-        username = filename.split("~")[0]
-        timestamp = filename.split("~")[1]
-        user_id = filename.split("~")[2]
-        media_id, some2 = user_id.split("_")
-        user_id = some2.split(".")[0]
-
-        return username, media_id, user_id, timestamp
-    except:
-        return None, None, None, None
-
-def extract_file_info2(filename):
-    try:
-        username = filename.split("~")[0]
-        elements = filename.split("~")[1].split("_")
-
-        media_id, user_id = elements[0], elements[1].split(".")[0]
-        
-        return username, media_id, user_id
-    except:
-        return None, None, None
-
-def upload_file(filepath, username, media_id = None, media_type='image', post_type = 'story', user_id = None, date = None):
-    filename = os.path.basename(filepath)
-    file_extension = filename.split('.')[-1]
-
-    dirtype = 'stories' if post_type == 'story' else 'posts'
-    server_path = f'users/{dirtype}/{username}/{media_id if media_id else uuid.uuid4().hex}.{file_extension}'
-
-
-    file_url = f"https://storysave.b-cdn.net/{server_path}"
-    fileHash = calculate_file_hash(filepath)
-
-    if media_type == 'image':
-        with Image.open(filepath) as img:
-            width, height = img.size
-    else:
-        width, height = get_video_dimensions(filepath)
-
-    query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, media_id, post_type, user_id, hash, date) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
-    values = (username, media_type, file_url, width, height, media_id, post_type, user_id, fileHash, date)
-    newCursor.execute(query, values)
-    newDB.commit()
-
-    existing_files.append(media_id)
-
-    if newCursor.rowcount == 0:
-        print('What the fuck just happend?')
-
-    obj_storage.PutFile(filepath, server_path)
-
-    os.remove(filepath)
-    print(f'[{newCursor.rowcount}]{os.path.basename(filepath)} {file_url}')
-
-
-def get_video_dimensions(video_path):
-    cap = cv2.VideoCapture(video_path)
-    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
-    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-    cap.release()
-    return width, height
-
-
-def get_media_type(filename):
-    if filename.lower().endswith(".jpg") or filename.lower().endswith(".webp") or filename.lower().endswith(".jpeg") or filename.lower().endswith(".png") or filename.lower().endswith(".gif"):
-        return 'image'
-    if filename.lower().endswith(".mp4") or filename.lower().endswith(".mov"):
-        return 'video'
-
-
-def dump_instagram(folder_path):
-    for root, dirs, files in os.walk(folder_path):
-        for folder in dirs:
-            username = folder
-            folder_path = os.path.join(root, folder)
-
-            for filename in os.listdir(folder_path):
-                if "~" not in filename:
-                    continue
-
-                username, media_id, user_id, timestamp = extract_file_info(filename)
-                
-                if None in [username, media_id, user_id, timestamp]:
-                    username, media_id, user_id = extract_file_info2(filename)
-                    if None in [username, media_id, user_id]:
-                        print(f"Failed to extract info from {filename}")
-                        continue
-
-                media_id = int(media_id) if media_id else None
-
-                if media_id in existing_files:
-                    print(f'Duplicate, {filename}')
-                    os.remove(os.path.join(folder_path, filename))
-                    continue
-
-                filepath = os.path.join(folder_path, filename)
-                mediatype = get_media_type(filename)
-                upload_file(username=username, media_type=mediatype, filepath=filepath, media_id=media_id, user_id = user_id,)
-
-if __name__ == '__main__':
-    print('Starting processing...')
-
-    newDB, newCursor = config.gen_connection()
-
-    obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
-
-    newCursor.execute("SELECT media_id FROM media")
-    existing_files = [image[0] for image in newCursor.fetchall()]
-
-    dump_instagram('StorySave/')
-
-    print("Processing completed.")
--- a/archived/dedupe_scripts/deduper_new.py
+++ b/archived/dedupe_scripts/deduper_new.py
@ -1,4 +1,4 @@
-from funcs import get_files
+from funcs import get_files, get_media_type
 from PIL import Image
 import imagehash
 import config
@ -25,10 +25,14 @@ def are_phashes_duplicates(phash1, phash2, threshold=5):
        print(f"Error comparing pHashes: {e}")
        return False

-def find_duplicate_phash(phash, existing_medias, threshold=5):
+def get_media_by_phash(phash, username, existing_medias, threshold=5):
    for media in existing_medias:
        existing_phash_str = media[1]
        existing_username = media[2]
+
+        if username:
+            if username != existing_username:
+                continue
        
        # Convert stored pHash string to ImageHash object
        existing_phash = imagehash.hex_to_hash(existing_phash_str)
@ -38,46 +42,39 @@ def find_duplicate_phash(phash, existing_medias, threshold=5):
            return media
    return None

-def get_media_by_hash(hash, existing_medias):
-    for media in existing_medias:
-        existing_hash = media[1]
-        if hash == existing_hash:
-            return media
-    return None
-
-def get_media_by_id(media_id, existing_medias):
-    for media in existing_medias:
-        existing_media_id = media[1]
-        if media_id == existing_media_id:
-            return media
-    return None
-
-def get_data_by_filename(filename, data):
-    for item in data:
-        if filename in item['filepath']:
-            return item
-    return None
+def get_image_files(directory):
+    return [file for file in get_files(directory) if get_media_type(file) == 'image']

+def get_images_with_username(directory):
+    files = {}
+    for username in os.listdir(directory):
+        user_files = get_image_files(os.path.join(directory, username))
+        files[username] = user_files

 # Database connection
 db, cursor = config.gen_connection()
    
 # Fetch existing media with pHashes (assuming media are images, adjust media_type if needed)
-cursor.execute("SELECT id, phash, username FROM media WHERE media_type = %s AND phash IS NOT NULL AND media_id IS NULL;", ['image'])
+cursor.execute("SELECT id, phash, username FROM media WHERE media_type = %s AND phash IS NOT NULL", ['image'])
 existing_medias = cursor.fetchall()

-directory = 'check_if_exists/'  # Directory containing user images
-files = [file for file in get_files(directory) if file.endswith(('.jpg', '.jpeg', '.png'))]
+# Directory containing user images
+directory = 'media/check_if_exists'
+usernames = os.listdir(directory)
+
+files = get_image_files(directory)
+username = None
 for filepath in files:
    image_filename = os.path.basename(filepath)
+    print(f'Processing {image_filename}...')

    # Generate pHash for the image
    phash = generate_image_phash(filepath, hash_size=8)
    if phash is None:
-        continue
+        continue  # Skip this image if there's an issue

    # Check if the image is a duplicate of any in the database
-    duplicate_media = find_duplicate_phash(phash, existing_medias)
+    duplicate_media = get_media_by_phash(phash, username, existing_medias, threshold=5)
    if duplicate_media:
        print(f'Duplicate found: https://altpins.com/pin/{duplicate_media[0]}')
        print(f'Duplicate image path: {filepath}')
--- a/archived/dedupe_scripts/find_duplicates_by_phash_videos_new.py
+++ b/archived/dedupe_scripts/find_duplicates_by_phash_videos_new.py
@ -1,11 +1,11 @@
-from funcs import get_files
+from funcs import get_files, get_media_type
 from PIL import Image
 import imagehash
 import config
 import cv2
 import os

-def get_video_phash(filepath, hash_size=8):
+def get_video_phash(filepath, hash_size=8):  # Set hash_size to 8
    cap = cv2.VideoCapture(filepath)
    ret, frame = cap.read()
    cap.release()
@ -37,43 +37,54 @@ def are_phashes_duplicates(phash1, phash2, threshold=5):

    return distance <= threshold

-def get_media_by_phash(phash, existing_medias, threshold=5):
+def get_media_by_phash(phash, username, existing_medias, threshold=5):
    for media in existing_medias:
        existing_phash_str = media[1]
        existing_username = media[2]

+        if username:
+            if existing_username != username:
+                continue
+
+        # Convert stored phash string to ImageHash object
        existing_phash = imagehash.hex_to_hash(existing_phash_str)

        if are_phashes_duplicates(phash, existing_phash, threshold=threshold):
            return media
    return None

+def get_video_files(directory):
+    return [file for file in get_files(directory) if get_media_type(file) == 'video']
+
+def get_videos_with_username(directory):
+    videos = {}
+    for username in os.listdir(directory):
+        user_videos = get_video_files(os.path.join(directory, username))
+        videos[username] = user_videos
+    return videos
+
 # Database connection
 db, cursor = config.gen_connection()

 # Directory containing user videos
-directory = 'check_if_exists/'  # Directory containing user images
+directory = 'check_if_exists'

 # Fetch existing videos with pHashes
-cursor.execute("SELECT id, phash, username FROM media WHERE media_type = %s AND phash IS NOT NULL;", ['video'])
+cursor.execute("SELECT id, phash, username FROM media WHERE media_type = %s AND phash IS NOT NULL", ['video'])
 existing_medias = cursor.fetchall()

-# make a list of all video files
-files = [file for file in get_files(directory) if file.endswith(('.mp4', '.avi', '.mov'))]
-
-
-for filepath in files:
-    video_filename = os.path.basename(filepath)
-
+videos = get_video_files(directory)
+username = None
+for filepath in videos:
    phash = get_video_phash(filepath, hash_size=8)  # Use hash_size=8
    if phash is None:
        continue

-    duplicate_media = get_media_by_phash(phash, existing_medias, threshold=5)
+    duplicate_media = get_media_by_phash(phash, username, existing_medias, threshold=5)
    if duplicate_media:
        print(f'Duplicate url found: https://altpins.com/pin/{duplicate_media[0]}')
        print(f'Duplicate video path: {filepath}')
-        newpath = os.path.join('duplicates', duplicate_media[2], video_filename)
+        newpath = filepath.replace(directory, 'duplicates')
        os.makedirs(os.path.dirname(newpath), exist_ok=True)
        os.rename(filepath, newpath)
        print(f'Moved {filepath} to duplicates/')
--- a/archived/dedupe_scripts/dedupe_phash.py
+++ b/archived/dedupe_scripts/dedupe_phash.py
@ -1,96 +0,0 @@
-from funcs import get_files
-from PIL import Image
-import imagehash
-import config
-import os
-
-def generate_image_phash(filepath, hash_size=8):
-    try:
-        # Open the image using PIL
-        pil_image = Image.open(filepath)
-        
-        # Compute pHash using the imagehash library
-        phash = imagehash.phash(pil_image, hash_size=hash_size)
-        return phash
-    except Exception as e:
-        print(f"Error processing image {filepath}: {e}")
-        return None
-
-def are_phashes_duplicates(phash1, phash2, threshold=5):
-    try:
-        # Compute the Hamming distance between the pHashes
-        distance = phash1 - phash2
-        return distance <= threshold
-    except TypeError as e:
-        print(f"Error comparing pHashes: {e}")
-        return False
-
-def get_media_by_phash(phash, username, existing_medias, threshold=5):
-    for media in existing_medias:
-        existing_phash_str = media[1]
-        existing_username = media[2]
-
-        # if username != existing_username:
-        #     continue
-        
-        # Convert stored pHash string to ImageHash object
-        existing_phash = imagehash.hex_to_hash(existing_phash_str)
-
-        # Check if the current pHash is a duplicate
-        if are_phashes_duplicates(phash, existing_phash, threshold=threshold):
-            return media
-    return None
-
-def get_media_by_hash(hash, existing_medias):
-    for media in existing_medias:
-        existing_hash = media[1]
-        if hash == existing_hash:
-            return media
-    return None
-
-def get_media_by_id(media_id, existing_medias):
-    for media in existing_medias:
-        existing_media_id = media[1]
-        if media_id == existing_media_id:
-            return media
-    return None
-
-def get_data_by_filename(filename, data):
-    for item in data:
-        if filename in item['filepath']:
-            return item
-    return None
-
-directory = 'media/check_if_exists'  # Directory containing user images
-
-# Database connection
-db, cursor = config.gen_connection()
-    
-# Fetch existing media with pHashes (assuming media are images, adjust media_type if needed)
-cursor.execute("SELECT id, phash, username FROM media WHERE media_type = %s AND phash IS NOT NULL", ['image'])
-existing_medias = cursor.fetchall()
-
-usernames = os.listdir(directory)
-
-for username in usernames:
-    files = get_files(os.path.join(directory, username))
-    for filepath in files:
-        image_filename = os.path.basename(filepath)
-        print(f'Processing {image_filename}...')
-
-        # Generate pHash for the image
-        phash = generate_image_phash(filepath, hash_size=8)
-        if phash is None:
-            continue  # Skip this image if there's an issue
-
-        phash_str = str(phash)
-
-        # Check if the image is a duplicate of any in the database
-        duplicate_media = get_media_by_phash(phash, username, existing_medias, threshold=5)
-        if duplicate_media:
-            print(f'Duplicate found: https://altpins.com/pin/{duplicate_media[0]}')
-            print(f'Duplicate image path: {filepath}')
-            newpath = os.path.join('duplicates', duplicate_media[2], image_filename)
-            os.makedirs(os.path.dirname(newpath), exist_ok=True)
-            os.rename(filepath, newpath)
-            print(f'Moved {image_filename} to duplicates/')
--- a/archived/dedupe_scripts/dupecleaner_phash.py
+++ b/archived/dedupe_scripts/dupecleaner_phash.py
@ -1,68 +0,0 @@
-from funcs import generate_phash  # Assuming this function computes the pHash and returns a string
-import imagehash
-import os
-
-def get_files(directory):
-    # Recursively get all files in the directory
-    file_list = []
-    for root, dirs, files in os.walk(directory):
-        for filename in files:
-            file_list.append(os.path.join(root, filename))
-    return file_list
-
-# Function to compute pHashes for all images in a directory
-def compute_phashes(image_paths):
-    phash_dict = {}
-    for image_path in image_paths:
-        try:
-            # Compute pHash and get it as a string
-            phash_str = generate_phash(image_path)
-            # Convert the hash string to an ImageHash object
-            phash = imagehash.hex_to_hash(phash_str)
-            phash_dict[image_path] = phash
-        except Exception as e:
-            print(f"Error processing {image_path}: {e}")
-    return phash_dict
-
-# Get all image files from 'ready_to_upload' and 'sorted' directories
-ready_images = get_files('ready_to_upload')
-ready_images = [image for image in ready_images if not image.lower().endswith('.mp4')]
-
-sorted_images = get_files('sorted')
-sorted_images = [image for image in sorted_images if not image.lower().endswith('.mp4')]
-
-# Compute pHashes for images in 'ready_to_upload'
-print("Computing pHashes for 'ready_to_upload' images...")
-ready_image_phashes = compute_phashes(ready_images)
-
-# Compute pHashes for images in 'sorted'
-print("Computing pHashes for 'sorted' images...")
-sorted_image_phashes = compute_phashes(sorted_images)
-
-# Prepare the 'already_processed' directory
-os.makedirs('already_processed', exist_ok=True)
-
-# Set a Hamming distance threshold for considering images as duplicates
-threshold = 5  # Adjust this value as needed
-
-# Find and move duplicates
-for sorted_image, sorted_phash in sorted_image_phashes.items():
-    duplicate_found = False
-    for ready_image, ready_phash in ready_image_phashes.items():
-        # Compute Hamming distance between the two pHashes
-        try:
-            distance = sorted_phash - ready_phash
-        except TypeError as e:
-            print(f"Error comparing hashes for {sorted_image} and {ready_image}: {e}")
-            continue
-
-        if distance <= threshold:
-            # Duplicate found
-            newpath = sorted_image.replace('sorted', 'already_processed')
-            os.makedirs(os.path.dirname(newpath), exist_ok=True)
-            print(f"Moving {sorted_image} (duplicate of {ready_image}) to 'already_processed'")
-            os.rename(sorted_image, newpath)
-            duplicate_found = True
-            break  # Exit the loop since a duplicate is found
-    if not duplicate_found:
-        print(f"No duplicate found for {sorted_image}")
--- a/archived/dedupe_scripts/find_by_phash.py
+++ b/archived/dedupe_scripts/find_by_phash.py
@ -1,59 +0,0 @@
-import config
-
-# Function to find the closest perceptual hash (phash) match
-def find_almost_identical_phash(phash, usernames, max_distance=1):
-    """
-    Find a username whose phash is nearly identical to the given phash.
-    :param phash: The phash to compare (e.g., from the 'unknown' image).
-    :param usernames: List of tuples containing (username, phash).
-    :param max_distance: Maximum Hamming distance to consider as "identical".
-    :return: The matching username and phash, or None if no match is found.
-    """
-    for username in usernames:
-        dist = hamming_distance(phash, username[1])
-        if dist <= max_distance:
-            return username
-    return None
-
-def hamming_distance(phash1, phash2):
-    """
-    Calculate the Hamming distance between two binary strings.
-    """
-    if len(phash1) != len(phash2):
-        raise ValueError("Hashes must be of the same length")
-    return sum(c1 != c2 for c1, c2 in zip(phash1, phash2))
-
-
-# Establish database connection
-db, cursor = config.gen_connection()
-
-# Fetch all images with an 'unknown' username
-cursor.execute("SELECT id, username, phash FROM media WHERE username = 'unknown'")
-rows = cursor.fetchall()
-
-# Fetch all non-unknown usernames and their associated phash
-cursor.execute("SELECT username, phash FROM media WHERE username != 'unknown' AND phash IS NOT NULL AND status = 'public'")
-usernames = cursor.fetchall()
-
-# Ensure there are valid usernames to compare against
-if not usernames:
-    print("No known usernames found in the database.")
-    exit()
-
-# Adjusted section in your script
-for row in rows:
-    id = row[0]
-    phash = row[2]
-
-    # Find a nearly identical phash match
-    closest = find_almost_identical_phash(phash, usernames, max_distance=2)
-
-    if closest:
-        print(f"Found match for image {id}: {closest[0]} with phash {closest[1]}")
-        cursor.execute(
-            "UPDATE media SET username = %s WHERE id = %s",
-            (closest[0], id),
-        )
-        db.commit()
-    else:
-        print(f"No nearly identical match found for image {id}.")
--- a/archived/dedupe_scripts/find_duplicates_by_phash.py
+++ b/archived/dedupe_scripts/find_duplicates_by_phash.py
@ -1,90 +0,0 @@
-from funcs import get_files  # Assuming this is defined elsewhere
-from PIL import Image
-import imagehash
-import config
-import os
-
-def generate_image_phash(filepath, hash_size=8):
-    try:
-        # Open the image using PIL
-        pil_image = Image.open(filepath)
-        
-        # Compute pHash using the imagehash library
-        phash = imagehash.phash(pil_image, hash_size=hash_size)
-        return phash
-    except Exception as e:
-        print(f"Error processing image {filepath}: {e}")
-        return None
-
-def are_phashes_duplicates(phash1, phash2, threshold=5):
-    try:
-        # Compute the Hamming distance between the pHashes
-        distance = phash1 - phash2
-        return distance <= threshold
-    except TypeError as e:
-        print(f"Error comparing pHashes: {e}")
-        return False
-
-def get_media_by_phash(phash, username, existing_medias, threshold=5):
-    for media in existing_medias:
-        existing_phash_str = media[1]
-
-        # existing_username = media[2]
-        # if existing_username != username:
-        #     continue  # Only compare with the same user's media
-        
-        # Convert stored pHash string to ImageHash object
-        existing_phash = imagehash.hex_to_hash(existing_phash_str)
-
-        # Check if the current pHash is a duplicate
-        if are_phashes_duplicates(phash, existing_phash, threshold=threshold):
-            return media
-    return None
-
-# Database connection
-db, cursor = config.gen_connection()
-
-directory = 'check_if_exists'  # Directory containing user images
-
-# Fetch existing media with pHashes (assuming media are images, adjust media_type if needed)
-cursor.execute("SELECT id, phash, username FROM media WHERE media_type = %s AND phash IS NOT NULL", ['image'])
-existing_medias = cursor.fetchall()
-
-existing_phashes = [media[1] for media in existing_medias]
-
-# Go through the directory folder where each subfolder is a username
-users = os.listdir(directory)
-
-for username in users:
-    user_images_path = os.path.join(directory, username)
-    if not os.path.isdir(user_images_path):
-        continue  # Skip non-directory files
-
-    # Get all images for the current user
-    images = get_files(user_images_path)  # Assuming this gets all image files
-
-    for filepath in images:
-        image_filename = os.path.basename(filepath)
-        print(f'Processing {image_filename}...')
-
-        # Generate pHash for the image
-        phash = generate_image_phash(filepath, hash_size=8)
-        if phash is None:
-            continue  # Skip this image if there's an issue
-
-        phash_str = str(phash)
-
-        if phash_str not in existing_phashes:
-            print(f'No duplicate found for {image_filename}')
-            continue
-
-        # Check if the image is a duplicate of any in the database
-        duplicate_media = get_media_by_phash(phash, username, existing_medias, threshold=5)
-        if duplicate_media:
-            found_username = duplicate_media[2]
-            print(f'Duplicate found: https://altpins.com/pin/{duplicate_media[0]}')
-            print(f'Duplicate image path: {filepath}')
-            newpath = os.path.join('duplicates', found_username, image_filename)
-            os.makedirs(os.path.dirname(newpath), exist_ok=True)
-            os.rename(filepath, newpath)
-            print(f'Moved {image_filename} to duplicates/')
--- a/archived/dedupe_scripts/find_duplicates_by_phash_videos.py
+++ b/archived/dedupe_scripts/find_duplicates_by_phash_videos.py
@ -1,87 +0,0 @@
-from PIL import Image
-import imagehash
-import config
-import cv2
-import os
-
-def generate_thumbnail_phash(filepath, hash_size=8):  # Set hash_size to 8
-    cap = cv2.VideoCapture(filepath)
-    ret, frame = cap.read()
-    cap.release()
-
-    if not ret:
-        print(f"Error reading frame from {filepath}")
-        return None
-
-    # Resize frame to a standard size
-    standard_size = (320, 240)
-    resized_frame = cv2.resize(frame, standard_size, interpolation=cv2.INTER_AREA)
-
-    # Convert OpenCV image (BGR) to PIL Image (RGB)
-    image_rgb = cv2.cvtColor(resized_frame, cv2.COLOR_BGR2RGB)
-    pil_image = Image.fromarray(image_rgb)
-
-    # Compute pHash
-    phash = imagehash.phash(pil_image, hash_size=hash_size)
-
-    return phash
-
-def are_phashes_duplicates(phash1, phash2, threshold=5):
-    # Compute Hamming distance between the pHashes
-    try:
-        distance = phash1 - phash2
-    except TypeError as e:
-        print(f"Error comparing pHashes: {e}")
-        return False
-
-    return distance <= threshold
-
-def get_media_by_phash(phash, username, existing_medias, threshold=5):
-    for media in existing_medias:
-        existing_phash_str = media[1]
-        existing_username = media[2]
-        if existing_username != username:
-            continue
-
-        # Convert stored phash string to ImageHash object
-        existing_phash = imagehash.hex_to_hash(existing_phash_str)
-
-        if are_phashes_duplicates(phash, existing_phash, threshold=threshold):
-            return media
-    return None
-
-# Database connection
-db, cursor = config.gen_connection()
-
-# Directory containing user videos
-directory = 'check_if_exists'
-
-# Fetch existing videos with pHashes
-cursor.execute("SELECT id, phash, username FROM media WHERE media_type = %s AND phash IS NOT NULL", ['video'])
-existing_medias = cursor.fetchall()
-
-users = os.listdir(directory)  # Assuming 'check_if_exists' contains user videos
-for username in users:
-    user_videos_path = os.path.join(directory, username)
-    if not os.path.isdir(user_videos_path):
-        continue
-
-    videos = [video for video in os.listdir(user_videos_path) if video.endswith(('.mp4', '.avi', '.mov'))]
-    for video in videos:
-        print(f'Processing {video}...')
-        filepath = os.path.join(user_videos_path, video)
-
-        phash = generate_thumbnail_phash(filepath, hash_size=8)  # Use hash_size=8
-        if phash is None:
-            continue
-
-        phash_str = str(phash)
-
-        duplicate_media = get_media_by_phash(phash, username, existing_medias, threshold=5)
-        if duplicate_media:
-            print(f'Duplicate url found: https://altpins.com/pin/{duplicate_media[0]}')
-            print(f'Duplicate video path: {filepath}')
-            newpath = filepath.replace(directory, 'duplicates')
-            os.makedirs(os.path.dirname(newpath), exist_ok=True)
-            os.rename(filepath, newpath)
-            print(f'Moved {video} to duplicates/')
--- a/archived/dedupe_scripts/image_dupe_cleaner.py
+++ b/archived/dedupe_scripts/image_dupe_cleaner.py
@ -1,58 +0,0 @@
-from funcs import generate_phash
-import os
-
-def find_duplicates(source_dir, target_dir, extensions, max_distance):
-    """Remove duplicates in target_dir that are found in source_dir based on Hamming distance."""
-    source_files = {}
-    target_files = {}
-
-    # Helper function to filter files by extension
-    def filter_files(files):
-        return [f for f in files if os.path.splitext(f)[1].lower() in extensions]
-
-    # Build hash map of source directory
-    for dirpath, _, filenames in os.walk(source_dir):
-        for filename in filter_files(filenames):
-            filepath = os.path.join(dirpath, filename)
-            filehash = generate_phash(filepath, str=False)
-            if filehash:
-                source_files[filehash] = filepath
-
-    # Build hash map of target directory and compare
-    for dirpath, _, filenames in os.walk(target_dir):
-        for filename in filter_files(filenames):
-            filepath = os.path.join(dirpath, filename)
-            filehash = generate_phash(filepath, str=False)
-            if not filehash:
-                continue
-
-            # Check if this file is similar to any of the source files
-            is_duplicate = False
-            for source_hash in source_files.keys():
-                distance = filehash - source_hash  # Hamming distance
-                if distance <= max_distance:
-                    is_duplicate = True
-                    break  # Found a duplicate
-
-            if is_duplicate:
-                newpath = os.path.join('duplicates', filename)
-                os.makedirs(os.path.dirname(newpath), exist_ok=True)
-                os.rename(filepath, newpath)
-                print(f"Moved duplicate: {filepath} to duplicates/ (distance: {distance})")
-            else:
-                target_files[filehash] = filepath
-
-if __name__ == '__main__':
-    # Paths to the directories
-    source_dir = 'D:/Crawlers/media/Coomer/sadierayxo'
-    target_dir = 'sorted/sadierayxo'
-
-    # List of accepted extensions
-    extensions = {'.png', '.jpg', '.jpeg', '.webp', '.gif'}
-
-    # Maximum Hamming distance to consider as duplicates
-    MAX_DISTANCE = 5  # Adjust this threshold as needed
-
-    find_duplicates(source_dir, target_dir, extensions, MAX_DISTANCE)
-
-    print("Duplicate removal process completed.")
--- a/archived/dedupe_scripts/snappy_duplicates.py
+++ b/archived/dedupe_scripts/snappy_duplicates.py
--- a/archived/dump.py
+++ b/archived/dump.py
@ -1,110 +0,0 @@
-from BunnyCDN.Storage import Storage
-from PIL import Image
-import os, uuid, cv2, config
-
-def scan_dupes(folder_path):
-    for root, dirs, files in os.walk(folder_path):
-        for folder in dirs:
-            folder_path = os.path.join(root, folder)
-            for filename in os.listdir(folder_path):
-                media_id = filename.replace('.mp4', '').replace('.jpg', '')
-                filepath = os.path.join(folder_path, filename)
-                if media_id:
-                    try:
-                        if int(media_id) in existing_files:
-                            print(f'Duplicate')
-                            os.remove(filepath)
-                    except:
-                        print(f'Error: {filepath}')
-
-def clean_empty_folders(directory):
-    for foldername, subfolders, filenames in os.walk(directory, topdown=False):
-        for subfolder in subfolders:
-            folder_path = os.path.join(foldername, subfolder)
-            if not os.listdir(folder_path):
-                os.rmdir(folder_path)
-                print(f"Removed empty folder: {folder_path}")
-
-def upload_file(filepath, username, media_id = None, media_type='image', post_type = 'story'):
-    filename = os.path.basename(filepath)
-    file_extension = filename.split('.')[-1]
-
-    try:
-        if int(media_id) in existing_files:
-            print(f'Duplicate')
-            os.remove(filepath)
-            return True
-    except: media_id = uuid.uuid4().hex
-
-    dirtype = 'stories' if post_type == 'story' else 'posts'
-    server_path = f'users/{dirtype}/{username}/{media_id}.{file_extension}'
-
-    obj_storage.PutFile(filepath, server_path)
-
-    file_url = f"https://storysave.b-cdn.net/{server_path}"
-
-    if media_type == 'image':
-        with Image.open(filepath) as img:
-            width, height = img.size
-    else:
-        width, height = get_video_dimensions(filepath)
-
-    query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, media_id, post_type) VALUES (%s, %s, %s, %s, %s, %s, %s)"
-    values = (username, media_type, file_url, width, height, media_id, post_type)
-    newCursor.execute(query, values)
-    newDB.commit()
-
-    os.remove(filepath)
-    print(f'[{newCursor.rowcount}]{os.path.basename(filepath)} {file_url}')
-
-
-def get_video_dimensions(video_path):
-    cap = cv2.VideoCapture(video_path)
-    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
-    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-    cap.release()
-    return width, height
-
-
-def get_media_type(filename):
-    if filename.lower().endswith(".jpg") or filename.lower().endswith(".webp") or filename.lower().endswith(".jpeg") or filename.lower().endswith(".png") or filename.lower().endswith(".gif"):
-        return 'image'
-    if filename.lower().endswith(".mp4") or filename.lower().endswith(".mov"):
-        return 'video'
-
-
-def dump_instagram(folder_path):
-    for root, dirs, files in os.walk(folder_path):
-        for folder in dirs:
-            username = folder
-            folder_path = os.path.join(root, folder)
-
-            post_type = 'story' if folder_path.split('\\')[0] == 'stories' else 'post'
-
-            for filename in os.listdir(folder_path):
-                media_id = filename.replace('.mp4', '').replace('.jpg', '')
-                filepath = os.path.join(folder_path, filename)
-                mediatype = get_media_type(filename)
-                upload_file(username=username, media_type=mediatype, filepath=filepath, media_id=media_id, post_type=post_type)
-
-if __name__ == '__main__':
-    print('Starting processing...')
-
-    newDB, newCursor = config.gen_connection()
-
-    obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
-
-    newCursor.execute("SELECT media_id FROM media")
-    existing_files = [image[0] for image in newCursor.fetchall()]
-
-    dump_instagram('media/posts')
-    dump_instagram('media/stories')
-
-    scan_dupes('media/posts')
-    scan_dupes('media/stories')
-
-    clean_empty_folders('media/posts')
-    clean_empty_folders('media/stories')
-
-
-    print("Processing completed.")
--- a/archived/dump_facebook.py
+++ b/archived/dump_facebook.py
@ -1,110 +0,0 @@
-from BunnyCDN.Storage import Storage
-import os, uuid, config, funcs, cv2
-from datetime import datetime
-from PIL import Image
-
-def dump_facebook(folder_path):
-    for filename in os.listdir(folder_path):
-        if os.path.isdir(os.path.join(folder_path, filename)):
-            continue
-        
-        username = filename.split("'")[0]
-
-        filepath = os.path.join(folder_path, filename)
-        
-        mediatype = funcs.get_media_type(filename)
-        post_type = funcs.determine_post_type(filepath, mediatype)
-
-        upload_file(username=username, media_type=mediatype, filepath=filepath, post_type=post_type)
-
-    for folder in os.listdir(folder_path):
-        if os.path.isdir(os.path.join(folder_path, folder)):
-            username = folder
-            
-            for filename in os.listdir(os.path.join(folder_path, folder)):
-                filepath = os.path.join(folder_path, folder, filename)
-                
-                mediatype = funcs.get_media_type(filename)
-                post_type = funcs.determine_post_type(filepath, mediatype)
-
-                upload_file(username=username, media_type=mediatype, filepath=filepath, post_type=post_type)
-                
-def upload_file(filepath, username, media_type='image', post_type='story', timestamp=None, user_id=None):
-    filename = os.path.basename(filepath)
-    file_extension = os.path.splitext(filename)[1].lower()
-
-    file_hash = funcs.calculate_file_hash(filepath)
-
-    if file_hash in existing_files:
-        print('Duplicate file detected. Removing...') 
-        os.remove(filepath)
-        return False
-
-    duration = funcs.get_video_duration(filepath) if media_type == 'video' else 0
-
-    if "FB_IMG" in filename: media_id = filename.split("_")[2].split(".")[0]
-    else: media_id = uuid.uuid4().hex
-
-    dirtype = funcs.determine_post_type(filepath, media_type)
-    server_path = os.path.join('media', dirtype, username, f'{media_id}{file_extension}')
-        
-    obj_storage.PutFile(filepath, server_path)
-
-    file_url = f"https://storysave.b-cdn.net/{server_path}"
-
-    if media_type == 'image':
-        with Image.open(filepath) as img:
-            width, height = img.size
-    else:
-        width, height = funcs.get_video_dimensions(filepath)
-
-    thumbnail_url = None
-    if media_type == 'video':
-        thumbPath = f'temp/{media_id}.jpg'
-        cap = cv2.VideoCapture(filepath)
-        ret, frame = cap.read()
-        cv2.imwrite(thumbPath, frame)
-        cap.release()
-        obj_storage.PutFile(thumbPath, f'thumbnails/{media_id}.jpg')
-        thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{media_id}.jpg"
-
-    post_date = datetime.fromtimestamp(int(timestamp)) if timestamp else datetime.now()
-
-    if post_type == 'stories':
-        post_type = 'story'
-    else:
-        post_type = 'post'
-    
-    query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, post_type, date, user_id, platform, hash, filename, duration, thumbnail) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
-    values = (username, media_type, file_url, width, height, post_type, post_date, user_id, 'facebook', file_hash, filename, duration, thumbnail_url)
-
-    try:
-        newCursor.execute(query, values)
-        newDB.commit()
-        print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}')
-    except Exception as e:
-        print(f"Database error: {e}")
-        return False
-
-    try:
-        if newCursor.rowcount > 0:
-            os.remove(filepath)
-    except Exception as e:
-        print(f"Failed to remove local file {filepath}: {e}")
-
-    return True
-
-
-if __name__ == '__main__':
-    print('Starting processing...')
-
-    newDB, newCursor = config.gen_connection()
-
-    obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
-
-    newCursor.execute("SELECT hash FROM media WHERE platform='facebook' AND hash IS NOT NULL")
-    existing_files = [image[0] for image in newCursor.fetchall()]
-
-    dump_facebook('facebook/')
-
-    print("Processing completed.")
--- a/archived/dump_missing_data.py
+++ b/archived/dump_missing_data.py
@ -1,82 +0,0 @@
-from BunnyCDN.Storage import Storage
-from datetime import datetime
-import os, config, funcs
-from PIL import Image
-
-def dump_instagram(folder_path):
-    for filename in os.listdir(folder_path):
-        parts = filename.split('_')
-        
-        try:
-            username = '_'.join(parts[:-2])  # Join all except last two
-            timestamp = int(parts[-2])  # Second last is timestamp
-            user_id = int(parts[-1].split('.')[0])  # Last part before extension is user_id
-        except Exception as e:
-            print(f"Invalid filename: {filename}. Error: {e}")
-            continue
-        
-        filepath = os.path.join(folder_path, filename)
-
-        mediatype = funcs.get_media_type(filename)
-        post_type = funcs.determine_post_type(filepath, mediatype)
-
-        UploadMedia(username=username, media_type=mediatype, filepath=filepath, post_type=post_type, timestamp=timestamp, user_id=user_id)
-
-
-def UploadMedia(filepath, username, media_type='image', post_type='story', timestamp=None, user_id=None):
-    if 'tero' in username:
-        pass
-    
-    filename = os.path.basename(filepath)
-    file_extension = os.path.splitext(filename)[1].lower()
-
-    file_hash = funcs.calculate_file_hash(filepath)
-
-    duration = funcs.get_video_duration(filepath) if media_type == 'video' else 0
-
-    post_date = datetime.fromtimestamp(int(timestamp)) if timestamp else datetime.now()
-
-    dirtype = funcs.determine_post_type(filepath, media_type)
-
-    server_path = f'media/{dirtype}/{username}/{file_hash}{file_extension}'
-
-    file_url = f"https://storysave.b-cdn.net/{server_path}"
-
-    if file_hash in existing_files:
-        print('Duplicate file detected. Removing...')
-        os.remove(filepath)
-        return True
-
-    obj_storage.PutFile(filepath, server_path)
-
-    if media_type == 'image':
-        with Image.open(filepath) as img:
-            width, height = img.size
-    else:
-        width, height = funcs.get_video_dimensions(filepath)
-
-    query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, post_type, date, user_id, hash, filename, duration) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
-    values = (username, media_type, file_url, width, height, post_type, post_date, user_id, file_hash, filename, duration)
-
-    newCursor.execute(query, values)
-    newDB.commit()
-    print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}')
-
-    os.remove(filepath)
-
-    return True
- 
-
-if __name__ == '__main__':
-    print('Starting processing...')
-
-    newDB, newCursor = config.gen_connection()
-
-    obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
-
-    newCursor.execute("SELECT hash FROM media WHERE platform='instagram' AND hash IS NOT NULL")
-    existing_files = [image[0] for image in newCursor.fetchall()]
-
-    dump_instagram('storysaver/missingdata/')
-
-    print("Processing completed.")
--- a/archived/dump_tiktok.py
+++ b/archived/dump_tiktok.py
@ -1,67 +0,0 @@
-from BunnyCDN.Storage import Storage
-import os, uuid, config, funcs
-from datetime import datetime
-from PIL import Image
-
-def dump_facebook(folder_path):
-    for folder in os.listdir(folder_path):
-        if os.path.isdir(os.path.join(folder_path, folder)):
-            username = folder
-            
-            for filename in os.listdir(os.path.join(folder_path, folder)):
-                filepath = os.path.join(folder_path, folder, filename)
-                
-                upload_file(username=username, filepath=filepath)
-                
-def upload_file(filepath, username):
-    filename = os.path.basename(filepath)
-    media_id = filename.split('.')[0]
-    
-    file_extension = os.path.splitext(filename)[1].lower()
-
-    media_type = funcs.get_media_type(filename)
-
-    file_hash = funcs.calculate_file_hash(filepath)
-
-    duration = funcs.get_video_duration(filepath) if media_type == 'video' else 0
-    
-    width, height = funcs.get_video_dimensions(filepath) if media_type == 'video' else Image.open(filepath).size
-
-
-    dirtype = funcs.determine_post_type(filepath, media_type)
-    server_path = os.path.join('media', dirtype, username, f'{media_id}{file_extension}')
-        
-    obj_storage.PutFile(filepath, server_path)
-
-    file_url = f"https://storysave.b-cdn.net/{server_path}"
-
-    if file_hash in existing_files:
-        print('Duplicate file detected. Removing...')
-        os.remove(filepath)
-        return False
-
-    query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, platform, hash, filename, duration, media_id) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
-    values = (username, media_type, file_url, width, height, 'tiktok', file_hash, filename, duration, media_id)
-
-    newCursor.execute(query, values)
-    newDB.commit()
-    print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}')
-
-    if newCursor.rowcount > 0:
-        os.remove(filepath)
-
-    return True
-
-if __name__ == '__main__':
-    print('Starting processing...')
-
-    newDB, newCursor = config.gen_connection()
-
-    obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
-
-    newCursor.execute("SELECT hash FROM media WHERE platform='tiktok' AND hash IS NOT NULL")
-    existing_files = [image[0] for image in newCursor.fetchall()]
-
-    dump_facebook('tiktok/')
-
-    print("Processing completed.")
--- a/archived/dupecleaner_filenames.py
+++ b/archived/dupecleaner_filenames.py
@ -1,32 +0,0 @@
-import os, funcs
-from funcs import generate_phash
-
-def get_username(image, ready_images):
-    for ready_image in ready_images:
-        if os.path.basename(image) in ready_image:
-            ready_image = ready_image.replace('\\', '/')
-            return ready_image.split('/')[1]
-    return None
-    
-ready_images = funcs.get_files('ready_to_upload')
-ready_images = [image for image in ready_images if not image.endswith('.mp4')]
-
-sorted_images = funcs.get_files('sorted')
-sorted_images = [image for image in sorted_images if not image.endswith('.mp4')]
-
-os.makedirs('already_processed', exist_ok=True)
-
-for image in sorted_images:
-    image = image.replace('\\', '/')
-    username = image.split('/')[1]
-    filename = os.path.basename(image)
-    
-    for ready_image in ready_images:
-        if filename in ready_image:
-            username = get_username(image, ready_images)
-            newpath = ready_image.replace('ready_to_upload', 'already_processed')
-            os.makedirs(os.path.dirname(newpath), exist_ok=True)
-            print(f'Moving {image} which is a match for {ready_image} to already_processed')
-            os.rename(image, newpath)
-            print(f'Moved {ready_image} to already_processed')
-            break
--- a/archived/generate_missing_data/fix_phash_db_videos.py
+++ b/archived/generate_missing_data/fix_phash_db_videos.py
--- a/archived/update_snap_id.py
+++ b/archived/update_snap_id.py
--- a/archived/fixes/fixthumbnails.py
+++ b/archived/fixes/fixthumbnails.py
@ -1,11 +1,9 @@
-from BunnyCDN.Storage import Storage
 import config, os, cv2
 from concurrent.futures import ThreadPoolExecutor

 # this script will take a screenshot of the first frame of each video and upload it as a thumbnail to BunnyCDN

-obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
-
+obj_storage = config.get_storage()
 db, cursor = config.gen_connection()

 cursor.execute("SELECT id, media_id, media_url FROM media WHERE media_type = 'video' AND thumbnail IS NULL and status = 'public';")
@ -30,7 +28,7 @@ def DownloadFile(serverPath, cacheDir):
 def ImportMedias():
    with ThreadPoolExecutor(max_workers=10) as executor:
        for video in results:
-            serverPath = video[2].replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
+            serverPath = video[2].replace("https://cdn.altpins.com/", '').replace('//', '/').replace('\\', '/')
            executor.submit(DownloadFile, serverPath, cacheDir)
    

@ -41,7 +39,7 @@ for result in results:
    mediaURL = result[2]
    extension = mediaURL.split('.')[-1]
    
-    serverPath = result[2].replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
+    serverPath = result[2].replace("https://cdn.altpins.com/", '').replace('//', '/').replace('\\', '/')
    
    localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))

@ -52,7 +50,7 @@ for result in results:
    cv2.imwrite('thumbnail.jpg', frame)
    cap.release()

-    thumbnailURL = f"https://storysave.b-cdn.net/thumbnails/{itemID}.jpg"
+    thumbnailURL = f"https://cdn.altpins.com/thumbnails/{itemID}.jpg"
    
    obj_storage.PutFile('thumbnail.jpg', f'thumbnails/{itemID}.jpg')
    
--- a/archived/fixes/fix_facebook_missing_uploads.py
+++ b/archived/fixes/fix_facebook_missing_uploads.py
@ -1,56 +0,0 @@
-from BunnyCDN.Storage import Storage
-import os, config, requests
-from moviepy.editor import VideoFileClip
-
-def get_media_type(filename):
-    image_extensions = {".jpg", ".jpeg", ".png", ".gif", ".webp"}
-    video_extensions = {".mp4", ".mov"}
-    extension = os.path.splitext(filename.lower())[1]
-    if extension in image_extensions:
-        return 'image'
-    elif extension in video_extensions:
-        return 'video'
-    else:
-        return 'unknown'
-
-def determine_post_type(media_type):
-    # Assuming the post type is directly based on media type.
-    return media_type
-
-def get_video_dimensions(filepath):
-    with VideoFileClip(filepath) as clip:
-        width, height = clip.size
-    return width, height
-
-def download_file(url):
-    local_filename = url.split('/')[-1]
-    # Note: Stream=True to avoid loading the whole file into memory
-    with requests.get(url, stream=True) as r:
-        r.raise_for_status()
-        with open(local_filename, 'wb') as f:
-            for chunk in r.iter_content(chunk_size=8192):
-                f.write(chunk)
-    return local_filename
-
-if __name__ == '__main__':
-    newDB, newCursor = config.gen_connection()
-    obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
-    
-    posts = open('fucked', 'r')
-    
-    for item in posts:
-        username, url = item.strip().split('~')
-        media_id = url.split('/')[-1].split('.')[0]
-        media_type = get_media_type(url)
-
-        query = "INSERT IGNORE INTO media (username, media_type, platform, media_url) VALUES (%s, %s, %s, %s)"
-        values = (username, media_type, 'facebook', url)
-
-        try:
-            newCursor.execute(query, values)
-            newDB.commit()
-            print(f'[{newCursor.rowcount}] records updated.{url}')
-        except Exception as e:
-            print(f"Database error: {e}")
-
-    posts.close()
--- a/archived/fixes/fix_filepaths.py
+++ b/archived/fixes/fix_filepaths.py
@ -1,40 +0,0 @@
-import config, os, json
-from PIL import Image
-import imagehash
-
-def find_file(filename, directory):
-    filename = filename.lower().split('.')[0]
-    for root, dirs, files in os.walk(directory):
-        for file in files:
-            if filename in file:
-                return os.path.join(root, file)
-    return None
-
-def generate_phash(image_path):
-    image = Image.open(image_path)
-    return str(imagehash.phash(image))
-
-count = 0
-
-cacheDir = 'sorted'
-dataPath = 'pins.json'
-
-os.makedirs(cacheDir, exist_ok=True)
-
-medias = json.load(open(dataPath))
-
-for item in medias:
-    count += 1
-
-    filepath = item['filepath']
-    if os.path.exists(filepath):
-        continue
-    
-    newfilepath = find_file(os.path.basename(filepath), cacheDir)
-    if newfilepath:
-        print(f"Found file {newfilepath} for {filepath}")
-        item['filepath'] = newfilepath
-        
-        
-with open(dataPath, 'w') as f:
-    json.dump(medias, f)
--- a/archived/fixes/fix_phash.py
+++ b/archived/fixes/fix_phash.py
@ -1,28 +0,0 @@
-import os, json
-from funcs import generate_phash
-
-count = 0
-cacheDir = '_sort'
-dataPath = 'pins.json'
-
-os.makedirs(cacheDir, exist_ok=True)
-
-medias = json.load(open(dataPath))
-
-for item in medias:
-    count += 1
-    if item['type'] == 'image':
-        filepath = item['filepath']
-        if 'phash' in item:
-            print(f"Skipping {count}/{len(medias)}: already processed.")
-            continue
-        
-        if not os.path.exists(filepath):
-            print(f"File {filepath} does not exist, skipping.")
-            continue
-        phash = generate_phash(filepath)
-        item['phash'] = phash
-        print(f"Processed {count}/{len(medias)}: with pHash {phash}")
-        
-with open(dataPath, 'w') as f:
-    json.dump(medias, f)
--- a/archived/fixes/fix_user_id_api.py
+++ b/archived/fixes/fix_user_id_api.py
@ -1,19 +0,0 @@
-import config, storysave_api
-
-
-db, cursor = config.gen_connection()
-
-usernames = []
-with open('usernames.txt', 'r') as f:
-    for line in f:
-        usernames.append(line.strip())
-
-for username in usernames:
-    print(f"Username: {username}")
-
-    user_id = storysave_api.get_user_id(username)
-    
-    # Update the user_id in the database
-    cursor.execute("UPDATE media SET user_id = %s WHERE username = %s AND user_id IS NULL;", [user_id, username])
-    db.commit()
-    print(f"[{cursor.rowcount}] Updated user_id for {username}")
--- a/archived/fixes/fixduration.py
+++ b/archived/fixes/fixduration.py
@ -1,94 +0,0 @@
-from BunnyCDN.Storage import Storage
-from moviepy.editor import VideoFileClip
-import config
-import hashlib
-import requests
-import os
-
-def file_hash_from_url(url, hash_algo='sha256'):
-    h = hashlib.new(hash_algo)
-
-    response = requests.get(url, stream=True)
-
-    if response.status_code == 200:
-        for chunk in response.iter_content(8192):
-            h.update(chunk)
-        return h.hexdigest()
-    else:
-        raise Exception(f"Failed to download file: Status code {response.status_code}")
-
-def get_video_duration(file_path):
-    """
-    Returns the duration of the video file in seconds.
-
-    :param file_path: Path to the video file
-    :return: Duration in seconds
-    """
-    try:
-        with VideoFileClip(file_path) as video:
-            return video.duration
-    except:
-        return 0
-
-def file_hash(filename, hash_algo='sha256'):
-    """
-    Compute the hash of a file.
-
-    :param filename: Path to the file.
-    :param hash_algo: Hashing algorithm to use (e.g., 'sha256', 'md5').
-    :return: Hexadecimal hash string.
-    """
-    # Create a hash object
-    h = hashlib.new(hash_algo)
-
-    # Open the file in binary mode and read in chunks
-    with open(filename, 'rb') as file:
-        while chunk := file.read(8192):
-            h.update(chunk)
-
-    # Return the hexadecimal digest of the hash
-    return h.hexdigest()
-
-# the hash of the images are different due to optimizer
-
-#obj_storage = Storage('577cb82d-8176-4ccf-935ce0a574bf-fe4c-4012', 'altpins')
-obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
-
-db, cursor = config.gen_connection()
-
-cursor.execute("SELECT id, media_id, media_url FROM media WHERE duration = 0 AND media_type = 'video' AND status != 'deleted';")
-results = cursor.fetchall()
-
-count = 0
-print(f"Found {len(results)} files to process.")
-
-cacheDir = 'cache'
-for result in results:
-    count += 1
-    videoID = result[0]
-    mediaID = result[1]
-    mediaURL = result[2]
-    extension = mediaURL.split('.')[-1]
-    
-    serverPath = result[2].replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
-    
-    localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))
-    
-    if os.path.exists(localFilePath):
-        print(f"File already exists: {localFilePath}")
-    else:
-        obj_storage.DownloadFile(storage_path=serverPath, download_path=cacheDir)
-
-    duration = get_video_duration(localFilePath)
-
-    if duration == 0:
-        print(f"Failed to get duration for {localFilePath}")
-        continue
-
-    if duration < 1:
-        duration = 1
-
-    cursor.execute("UPDATE media SET duration = %s WHERE id = %s;", (duration, result[0]))
-    db.commit()
-
-    print(f"[{count}/{len(results)}] {result[1]}: {duration}, {cursor.rowcount}")
--- a/archived/fixes/fixhash.py
+++ b/archived/fixes/fixhash.py
@ -1,47 +0,0 @@
-from BunnyCDN.Storage import Storage
-import config
-import hashlib
-import os
-
-def file_hash(filename, hash_algo='sha256'):
-    """
-    Compute the hash of a file.
-
-    :param filename: Path to the file.
-    :param hash_algo: Hashing algorithm to use (e.g., 'sha256', 'md5').
-    :return: Hexadecimal hash string.
-    """
-    h = hashlib.new(hash_algo)
-
-    with open(filename, 'rb') as file:
-        while chunk := file.read(8192):
-            h.update(chunk)
-
-    return h.hexdigest()
-
-
-#obj_storage = Storage('577cb82d-8176-4ccf-935ce0a574bf-fe4c-4012', 'altpins')
-obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
-
-db, cursor = config.gen_connection()
-
-cursor.execute("SELECT id, media_id, media_url FROM media WHERE hash IS NULL;")
-results = cursor.fetchall()
-
-count = 0
-print(f"Found {len(results)} files to process.")
-
-for result in results:
-    count += 1
-    serverPath = result[2].replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
-
-    localFilePath = os.path.join(os.getcwd(), 'temp', os.path.basename(serverPath))
-    if not os.path.exists(localFilePath):
-        obj_storage.DownloadFile(storage_path=serverPath, download_path=os.path.join(os.getcwd(), 'temp'))
-
-    filehash = file_hash(localFilePath)
-
-    cursor.execute("UPDATE media SET hash = %s WHERE id = %s;", (filehash, result[0]))
-    db.commit()
-
-    print(f"[{count}/{len(results)}] {result[1]}: {filehash}, {cursor.rowcount}")
--- a/archived/fixes/fixresolution.py
+++ b/archived/fixes/fixresolution.py
@ -1,47 +0,0 @@
-from BunnyCDN.Storage import Storage
-import config, os, funcs
-from PIL import Image
-
-# the hash of the images are different due to optimizer
-
-#obj_storage = Storage('577cb82d-8176-4ccf-935ce0a574bf-fe4c-4012', 'altpins')
-obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
-
-db, cursor = config.gen_connection()
-
-cursor.execute("SELECT id, media_id, media_url FROM media WHERE width = 0;")
-results = cursor.fetchall()
-
-count = 0
-print(f"Found {len(results)} files to process.")
-
-cacheDir = 'cache'
-for result in results:
-    count += 1
-    videoID = result[0]
-    mediaID = result[1]
-    mediaURL = result[2]
-    extension = mediaURL.split('.')[-1]
-    
-    serverPath = result[2].replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
-    
-    localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))
-    
-    if os.path.exists(localFilePath):
-        print(f"File already exists: {localFilePath}")
-    else:
-        obj_storage.DownloadFile(storage_path=serverPath, download_path=cacheDir)
-
-    mediaType = funcs.get_media_type(localFilePath)
-
-    if mediaType == 'image':
-        with Image.open(localFilePath) as img:
-            width, height = img.size
-    elif mediaType == 'video':
-        width, height = funcs.get_video_dimensions(localFilePath)
-            
-
-    cursor.execute("UPDATE media SET width = %s, height=%s WHERE id = %s;", (width, height, videoID))
-    db.commit()
-
-    print(f"[{count}/{len(results)}] width: {width}, height: {height} {cursor.rowcount}")
--- a/archived/fixes/fixsize.py
+++ b/archived/fixes/fixsize.py
@ -1,32 +0,0 @@
-import config
-import os
-
-temp_directory = "cache"
-os.makedirs(temp_directory, exist_ok=True)
-
-obj_storage = config.get_storage()
-db, cursor = config.gen_connection()
-
-cursor.execute("SELECT id, media_url FROM media WHERE file_size = 0;")
-results = cursor.fetchall()
-
-count = 0
-print(f"Found {len(results)} files to process.")
-
-for result in results:
-    count += 1
-    
-    id, media_url = result
-    
-    serverPath = media_url.replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
-    localFilePath = os.path.join(os.getcwd(), temp_directory, os.path.basename(serverPath))
-
-    if not os.path.exists(localFilePath):
-        continue
-    
-    file_size = os.path.getsize(localFilePath)
-
-    cursor.execute("UPDATE media SET file_size = %s WHERE id = %s;", (file_size, id))
-    db.commit()
-
-    print(f"[{count}/{len(results)}] {media_url}: {file_size}, {cursor.rowcount}")
--- a/archived/fixes/generate_missing_phash_db.py
+++ b/archived/fixes/generate_missing_phash_db.py
@ -1,36 +0,0 @@
-import config
-from funcs import generate_phash
-
-count = 0
-
-storage = config.get_storage()
-
-db, cursor = config.gen_connection()
-
-generate_for = 'media_url'
-media_type = 'image'
-
-cursor.execute(f"SELECT id, {generate_for} FROM media WHERE media_type = %s AND phash IS NULL;", [media_type])
-medias = cursor.fetchall()
-
-for item in medias:
-    count += 1
-    
-    itemID = item[0]
-    media_url = item[1]
-
-    server_path = media_url.replace('https://storysave.b-cdn.net/', '').replace('\\', '/')
-    filepath = storage.DownloadFile(server_path, 'temp')
-    if not filepath:
-        print(f"Error downloading {server_path}")
-        continue
-    
-    phash = generate_phash(filepath)
-    if not phash:
-        print(f"Error generating pHash for {filepath}")
-        continue
-
-    cursor.execute("UPDATE media SET phash = %s WHERE id = %s", [phash, itemID])
-    db.commit()
-    
-    print(f"[{cursor.rowcount}] Processed {count}/{len(medias)}: with pHash {phash}")
--- a/archived/generate_missing_data/fix_phash_db.py
+++ b/archived/generate_missing_data/fix_phash_db.py
@ -1,39 +0,0 @@
-import config, os
-from funcs import generate_phash
-
-db, cursor = config.gen_connection()
-
-cursor.execute("SELECT id, media_id, media_url FROM media WHERE media_type = 'image' AND phash = 0;")
-results = cursor.fetchall()
-
-count = 0
-cacheDir = 'cache'
-os.makedirs(cacheDir, exist_ok=True)
-print(f"Found {len(results)} files to process.")
-
-
-for result in results:
-    count += 1
-    itemID = result[0]
-    mediaID = result[1]
-    if not mediaID:
-        print(f"Media ID is null, skipping.")
-        continue
-    mediaURL = result[2]
-    
-    serverPath = mediaURL.replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
-    localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))
-    
-    if not os.path.exists(localFilePath):
-        print(f"File {localFilePath} does not exist, skipping.")
-        continue
-    
-    phash = generate_phash(localFilePath)
-    if not phash:
-        print(f"Error generating pHash for {localFilePath}, skipping.")
-        continue
-    
-    cursor.execute("UPDATE media SET phash = %s WHERE id = %s", (phash, itemID))
-    db.commit()
-    
-    print(f"Processed {count}/{len(results)}: {mediaID} with pHash {phash}")
--- a/archived/generate_missing_data/fix_phash_db_fast.py
+++ b/archived/generate_missing_data/fix_phash_db_fast.py
@ -1,74 +0,0 @@
-import config, os, threading, queue
-from funcs import generate_phash
-
-# Initialize database connection
-db, cursor = config.gen_connection()
-
-# Query the media table for unprocessed images
-cursor.execute("SELECT id, media_id, media_url FROM media WHERE media_type = 'image' AND phash = '0';")
-results = cursor.fetchall()
-
-# Setup cache directory
-cacheDir = 'cache'
-os.makedirs(cacheDir, exist_ok=True)
-
-print(f"Found {len(results)} files to process.")
-
-# Thread-safe queue for processed media
-processed_media_queue = queue.Queue()
-
-def process_media():
-    """Thread function to update database with processed pHash values."""
-    while True:
-        try:
-            item = processed_media_queue.get(timeout=10)  # Timeout prevents infinite blocking
-            if item is None:  # Sentinel value to exit the loop
-                break
-            
-            itemID, phash = item
-            cursor.execute("UPDATE media SET phash = %s WHERE id = %s", (phash, itemID))
-            db.commit()
-            print(f"Updated database for ID {itemID} with pHash {phash}.")
-        except queue.Empty:
-            continue
-
-# Start the database update thread
-update_thread = threading.Thread(target=process_media, daemon=True)
-update_thread.start()
-
-# Main processing loop for generating pHash
-count = 0
-
-for result in results:
-    count += 1
-    itemID = result[0]
-    mediaID = result[1]
-    
-    if not mediaID:
-        print(f"Media ID is null, skipping.")
-        continue
-
-    mediaURL = result[2]
-    serverPath = mediaURL.replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
-    localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))
-
-    if not os.path.exists(localFilePath):
-        print(f"File {localFilePath} does not exist, skipping.")
-        continue
-
-    phash = generate_phash(localFilePath)
-    if not phash:
-        print(f"Error generating pHash for {localFilePath}, skipping.")
-        continue
-
-    # Add the processed media to the queue
-    processed_media_queue.put((itemID, phash))
-    print(f"Processed {count}/{len(results)}: {mediaID} with pHash {phash}")
-
-# Signal the update thread to stop
-processed_media_queue.put(None)
-
-# Wait for the update thread to finish
-update_thread.join()
-
-print("Processing completed.")
--- a/archived/generate_missing_data/generate_file_size_data.py
+++ b/archived/generate_missing_data/generate_file_size_data.py
@ -1,43 +0,0 @@
-import os
-import json
-import config
-
-# Establish database connection
-db, cursor = config.gen_connection()
-
-# Fetch rows with file_size = 0
-cursor.execute("SELECT id, media_id, media_url FROM media WHERE file_size = 0;")
-results = cursor.fetchall()
-
-cacheDir = 'cache'
-os.makedirs(cacheDir, exist_ok=True)
-print(f"Found {len(results)} files to process.")
-
-update_data = []
-for result in results:
-    itemID = result[0]
-    media_id = result[1]
-
-    if not media_id:
-        print(f"Media ID is null for ID {itemID}, skipping.")
-        continue
-
-    mediaURL = result[2]
-    serverPath = mediaURL.replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
-    localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))
-
-    if not os.path.exists(localFilePath):
-        print(f"File {localFilePath} does not exist for ID {itemID}, skipping.")
-        continue
-
-    file_size = os.path.getsize(localFilePath)
-    update_data.append({"id": itemID, "file_size": file_size})
-
-# Save the results to a JSON file
-output_file = "update_data.json"
-with open(output_file, 'w') as f:
-    json.dump(update_data, f, indent=4)
-
-print(f"Saved {len(update_data)} updates to {output_file}.")
-cursor.close()
-db.close()
--- a/archived/generate_missing_data/update_data.json
+++ b/archived/generate_missing_data/update_data.json
--- a/archived/generate_missing_data/update_data_filesize.py
+++ b/archived/generate_missing_data/update_data_filesize.py
@ -1,29 +0,0 @@
-import json
-import config
-
-# Establish database connection
-db, cursor = config.gen_connection()
-
-# Load update data from the JSON file
-input_file = "update_data.json"
-with open(input_file, 'r') as f:
-    update_data = json.load(f)
-
-print(f"Loaded {len(update_data)} records to update.")
-
-# Process each record one by one
-for count, item in enumerate(update_data, start=1):
-    item_id = item["id"]
-    file_size = item["file_size"]
-
-    try:
-        cursor.execute("UPDATE media SET file_size = %s WHERE id = %s", (file_size, item_id))
-        db.commit()
-        print(f"Processed {count}/{len(update_data)}: ID {item_id} updated with file size {file_size}.")
-    except Exception as e:
-        print(f"Error updating ID {item_id}: {e}")
-        db.rollback()
-
-print("All updates completed.")
-cursor.close()
-db.close()
--- a/archived/generate_missing_data/update_filesize.py
+++ b/archived/generate_missing_data/update_filesize.py
@ -1,31 +0,0 @@
-from BunnyCDN.Storage import Storage
-import config, os
-
-db, cursor = config.gen_connection()
-obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
-
-cursor.execute("SELECT id, media_id, media_url FROM media WHERE file_size = 0;")
-results = cursor.fetchall()
-print(f"Found {len(results)} files to process.")
-
-cacheDir = 'cache'
-
-for result in results:
-    itemID = result[0]
-
-    mediaURL = result[2]
-    serverPath = mediaURL.replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
-    localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))
-
-    if not os.path.exists(localFilePath):
-        continue    
-
-    file_size = os.path.getsize(localFilePath)
-
-    cursor.execute("UPDATE media SET file_size = %s WHERE id = %s;", (file_size, itemID))
-    db.commit()
-    
-    print(f"Processed ID {itemID}: updated with file size {file_size}.")
-
-cursor.close()
-db.close()
--- a/archived/newdump.py
+++ b/archived/newdump.py
@ -1,112 +0,0 @@
-from BunnyCDN.Storage import Storage
-from PIL import Image
-import os, uuid, cv2, config
-
-def scan_dupes(folder_path):
-    for root, dirs, files in os.walk(folder_path):
-        for folder in dirs:
-            folder_path = os.path.join(root, folder)
-            for filename in os.listdir(folder_path):
-                media_id = filename.replace('.mp4', '').replace('.jpg', '')
-                filepath = os.path.join(folder_path, filename)
-                if media_id:
-                    try:
-                        if int(media_id) in existing_files:
-                            print(f'Duplicate')
-                            os.remove(filepath)
-                    except:
-                        pass
-
-def clean_empty_folders(directory):
-    for foldername, subfolders, filenames in os.walk(directory, topdown=False):
-        for subfolder in subfolders:
-            folder_path = os.path.join(foldername, subfolder)
-            if not os.listdir(folder_path):
-                os.rmdir(folder_path)
-                print(f"Removed empty folder: {folder_path}")
-
-def upload_file(filepath, username, media_id = None, media_type='image', post_type = 'story'):
-    filename = os.path.basename(filepath)
-    file_extension = filename.split('.')[-1]
-
-    try:
-        if int(media_id) in existing_files:
-            print(f'Duplicate')
-            os.remove(filepath)
-            return True
-    except: media_id = uuid.uuid4().hex
-
-    dirtype = 'stories' if post_type == 'story' else 'posts'
-    server_path = f'users/{dirtype}/{username}/{media_id}.{file_extension}'
-
-    obj_storage.PutFile(filepath, server_path)
-
-    file_url = f"https://storysave.b-cdn.net/{server_path}"
-
-    if media_type == 'image':
-        with Image.open(filepath) as img:
-            width, height = img.size
-    else:
-        width, height = get_video_dimensions(filepath)
-
-    query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, media_id, post_type) VALUES (%s, %s, %s, %s, %s, %s, %s)"
-    values = (username, media_type, file_url, width, height, media_id, post_type)
-    newCursor.execute(query, values)
-    newDB.commit()
-
-    os.remove(filepath)
-    print(f'[{newCursor.rowcount}]{os.path.basename(filepath)} {file_url}')
-
-
-def get_video_dimensions(video_path):
-    cap = cv2.VideoCapture(video_path)
-    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
-    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-    cap.release()
-    return width, height
-
-
-def get_media_type(filename):
-    if filename.lower().endswith(".jpg") or filename.lower().endswith(".webp") or filename.lower().endswith(".jpeg") or filename.lower().endswith(".png") or filename.lower().endswith(".gif"):
-        return 'image'
-    if filename.lower().endswith(".mp4") or filename.lower().endswith(".mov"):
-        return 'video'
-
-
-def dump_instagram(folder_path):
-    for root, dirs, files in os.walk(folder_path):
-        for folder in dirs:
-            username = folder
-            folder_path = os.path.join(root, folder)
-
-            post_type = 'story' if folder_path.split('\\')[0] == 'stories' else 'post'
-
-            for filename in os.listdir(folder_path):
-                media_id = filename.replace('.mp4', '').replace('.jpg', '')
-                filepath = os.path.join(folder_path, filename)
-                mediatype = get_media_type(filename)
-                upload_file(username=username, media_type=mediatype, filepath=filepath, media_id=media_id, post_type=post_type)
-
-if __name__ == '__main__':
-    print('Starting processing...')
-
-    newDB, newCursor = config.gen_connection()
-
-    obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
-
-    newCursor.execute("SELECT media_id FROM media")
-    existing_files = [image[0] for image in newCursor.fetchall()]
-
-    scan_dupes('media/posts')
-    scan_dupes('media/stories')
-    scan_dupes('StorySave/')
-
-    dump_instagram('media/posts')
-    dump_instagram('media/stories')
-    dump_instagram('StorySave/')
-
-    clean_empty_folders('media/posts')
-    clean_empty_folders('media/stories')
-    clean_empty_folders('StorySave/')
-
-    print("Processing completed.")
--- a/archived/old_CREATE_VIDEOS_LIST.py
+++ b/archived/old_CREATE_VIDEOS_LIST.py
@ -1,33 +0,0 @@
-import bunny, json
-
-medias = json.load(open('videos.json', 'r'))
-videoIDS = [media['url'].split('/')[-1] for media in medias]
-
-videos = bunny.list_videos()
-
-with open('allVideos.json', 'w') as f:
-    json.dump(videos, f, indent=4)
-
-missingVideos = []
-for video in videos:
-    if video['guid'] in videoIDS:
-        continue
-    missingVideos.append(video)
-
-datas = []
-for video in missingVideos:
-    data = {
-        'guid': video['guid'],
-        'title': video['title'],
-        'length': video['length'],
-        'width': video['width'],
-        'height': video['height'],
-        'availableResolutions': video['availableResolutions'],
-        'storageSize': video['storageSize'],
-        'hasMP4Fallback': video['hasMP4Fallback'],
-        'category': video['category'],
-    }
-    datas.append(data)
-
-with open('missing_videos.json', 'w') as f:
-    json.dump(datas, f, indent=4)
--- a/archived/old_DOWNLOAD_STORAGE.py
+++ b/archived/old_DOWNLOAD_STORAGE.py
@ -1,27 +0,0 @@
-from BunnyCDN.Storage import Storage
-import os, json
-
-altpins_obj_storage = Storage('577cb82d-8176-4ccf-935ce0a574bf-fe4c-4012', 'altpins')
-obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
-
-medias = json.load(open('db_pins.json', 'r'))
-
-count = 0
-print(f"Found {len(medias)} files to process.")
-
-cacheDir = 'old_altpins_cache'
-for media in medias:
-    count += 1
-    username = media['title']
-    mediaID = media['photo_id']
-    mediaURL = media['url']
-    extension = mediaURL.split('.')[-1]
-    
-    serverPath = mediaURL.replace("https://altpins.b-cdn.net/", '').replace('//', '/').replace('\\', '/').replace('https://altpins.b-cdn.net/', '')
-    localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))
-    
-    if os.path.exists(localFilePath):
-        continue
-
-    altpins_obj_storage.DownloadFile(storage_path=serverPath, download_path=cacheDir)
-    print(f"Downloaded {count}/{len(medias)}: {localFilePath}")
--- a/archived/old_DOWNLOAD_STREAM.py
+++ b/archived/old_DOWNLOAD_STREAM.py
@ -1,16 +0,0 @@
-import json, bunny, os
-from concurrent.futures import ThreadPoolExecutor
-
-medias = json.load(open('missing_videos.json', 'r'))
-#videoIDS = [media['url'].split('/')[-1] for media in medias]
-videoIDS = [media['guid'] for media in medias]
-
-with ThreadPoolExecutor(max_workers=10) as executor:
-    for id in videoIDS:
-        filePath = f"MISSING_STREAM_VIDEOS/{id}.zip"
-        
-        if os.path.exists(filePath):
-            print(f'Video already exists as {filePath}. Skipping...')
-            continue
-
-        executor.submit(bunny.download_video, id)
--- a/archived/old_IMPORTED_PINS_CLEANUP.py
+++ b/archived/old_IMPORTED_PINS_CLEANUP.py
@ -1,29 +0,0 @@
-import os, json, config
-
-# Load the data
-pins = json.load(open('db_pins.json', 'r'))
-files = os.listdir('STORAGE_IMPORTED/')
-
-db, cursor = config.gen_connection()
-
-cursor.execute('SELECT hash FROM media WHERE hash IS NOT NULL;')
-existing_hashes = [hash[0] for hash in cursor.fetchall()]
-
-for pin in pins:
-    if pin['hash'] in existing_hashes:
-        print(f"Found {pin['hash']} in the imported folder.")
-        pins.remove(pin)
-
-alreadyImported = []
-for pin in pins:
-    filepath = pin['filepath']
-    username = pin['title']
-    filename = os.path.basename(filepath)
-    
-    if filename in files:
-        print(f"Found {filename} in the imported folder.")
-        alreadyImported.append(pins.pop(pins.index(pin)))
- 
-# Save to the file
-json.dump(pins, open('db_pins.json', 'w'))
-json.dump(alreadyImported, open('db_pins_imported.json', 'w'))
--- a/archived/old_SCAN_MP4.py
+++ b/archived/old_SCAN_MP4.py
@ -1,14 +0,0 @@
-import os, json, bunny
-
-medias = json.load(open('allVideos.json', 'r'))
-mp4Medias = [media for media in medias if media['hasMP4Fallback'] == True]
-
-missing = json.load(open('missing_videos.json', 'r'))
-
-count = 0
-cacheDir = 'old_mp4fallback_cache'
-print(f"Found {len(medias)} files to process.")
-for media in mp4Medias:
-    count += 1
-    filePath = os.path.join(cacheDir, media['guid'] + '.mp4')
-    
--- a/archived/old_SORT_MISSING.py
+++ b/archived/old_SORT_MISSING.py
@ -1,36 +0,0 @@
-import os, json, bunny, config        
-        
-db, cursor = config.gen_connection()
-
-cursor.execute('SELECT media_id FROM media WHERE media_id IS NOT NULL;')
-mediaIDS = cursor.fetchall()
-
-
-
-pins = json.load(open('pins.json', 'r'))
-
-videos = json.load(open('db_videos.json', 'r'))
-pins = json.load(open('db_pins.json', 'r'))
-ids = [video['id'] for video in videos]
-
-for pin in pins:
-    if pin['id'] in ids:
-        pins.remove(pin)
-
-# save to the file
-json.dump(pins, open('db_pins.json', 'w'))
-
-
-medias = json.load(open('allVideos.json', 'r'))
-mp4Medias = [media for media in medias if media['hasMP4Fallback'] == True]
-
-missing = json.load(open('missing_videos.json', 'r'))
-
-count = 0
-cacheDir = 'old_mp4fallback_cache'
-print(f"Found {len(medias)} files to process.")
-for media in mp4Medias:
-    count += 1
-    filePath = os.path.join(cacheDir, media['guid'] + '.mp4')
-    
-    
--- a/archived/old_SORT_PINS.py
+++ b/archived/old_SORT_PINS.py
@ -1,53 +0,0 @@
-import os, json, funcs
-
-STORAGE_IMPORTED = 'STORAGE_IMPORTED'
-pins = json.load(open('db_pins.json', 'r'))
-
-for pin in pins:
-    filename = pin['url'].split('/')[-1]
-    filepath = os.path.join(STORAGE_IMPORTED, filename)
-    pin['filename'] = filename
-    if not pin['hash']:
-        pin['hash'] = funcs.calculate_file_hash(filepath)
-
-json.dump(pins, open('db_pins.json', 'w'), indent=4)
-
-files = os.listdir(STORAGE_IMPORTED)
-
-for file in files:
-    filepath = os.path.join(STORAGE_IMPORTED, file)
-    fileHash = funcs.calculate_file_hash(filepath)
-    if fileHash not in file:
-        print(f'Renaming {file} to {fileHash}')
-        os.rename(filepath, os.path.join(STORAGE_IMPORTED, fileHash))
-
-pins_by_username = {}
-for pin in pins:
-    username = pin['title']
-    if username not in pins_by_username:
-        pins_by_username[username] = []
-    pins_by_username[username].append(pin)
-
-for username, username_pins in pins_by_username.items():
-    username_folder = os.path.join(STORAGE_IMPORTED, username)
-    os.makedirs(username_folder, exist_ok=True)
-    for pin in username_pins:
-        photo_id = pin['photo_id']
-        photo_url = pin['url']
-        fileHash = pin['hash']
-
-        if not fileHash:
-            continue
-        
-        extension = photo_url.split('.')[-1]
-        filename = f'{fileHash}.{extension}'
-        
-        filePath = os.path.join(STORAGE_IMPORTED, filename)
-        outputPath = os.path.join(STORAGE_IMPORTED, username, filename)
-                
-        if os.path.exists(outputPath):
-            print(f'File {outputPath} already exists. Skipping...')
-            continue
-        
-        print(f'Moving {photo_url} to {outputPath}')
-        os.rename(filePath, outputPath)
--- a/archived/organize_tiktoks.py
+++ b/archived/organize_tiktoks.py
@ -1,57 +0,0 @@
-import os
-import hashlib
-
-# Directories
-fucked_dir = 'tiktoks/fucked/aleksandra'
-source_dir = 'tiktoks/waiting_for_process/aleksandraverse'
-
-def hash_file(filepath):
-    """Generate MD5 hash of a file."""
-    hash_md5 = hashlib.md5()
-    with open(filepath, "rb") as f:
-        for chunk in iter(lambda: f.read(4096), b""):
-            hash_md5.update(chunk)
-    return hash_md5.hexdigest()
-
-def get_file_hashes(directory):
-    """Generate a dictionary of file hashes for all files in a directory."""
-    file_hashes = {}
-    for root, _, files in os.walk(directory):
-        for file in files:
-            file_path = os.path.join(root, file)
-            file_hashes[file_path] = hash_file(file_path)
-    return file_hashes
-
-def files_are_identical(file1, file2):
-    """Compare two files byte-by-byte."""
-    with open(file1, "rb") as f1, open(file2, "rb") as f2:
-        while True:
-            chunk1 = f1.read(4096)
-            chunk2 = f2.read(4096)
-            if chunk1 != chunk2:
-                return False
-            if not chunk1:  # End of file
-                return True
-
-def remove_duplicates(fucked_dir, source_files):
-    """Remove files in 'fucked' that are identical to those in 'source_files'."""
-    for root, _, files in os.walk(fucked_dir):
-        for file in files:
-            file_path = os.path.join(root, file)
-            for source_file in source_files:
-                if files_are_identical(file_path, source_file):
-                    print(f"Duplicate found. Removing: {file_path}")
-                    os.remove(file_path)
-                    break
-
-def main():
-    print("Scanning source directory for hashes...")
-    source_hashes = get_file_hashes(source_dir)
-    
-    print("Scanning 'fucked' directory for duplicates...")
-    remove_duplicates(fucked_dir, source_hashes)
-    
-    print("Cleanup complete.")
-
-if __name__ == "__main__":
-    main()
--- a/archived/organizer.py
+++ b/archived/organizer.py
@ -1,49 +0,0 @@
-import json, os
-from videohash import VideoHash
-from moviepy.editor import VideoFileClip
-
-def is_valid_video(file_path):
-    try:
-        with VideoFileClip(file_path) as video:
-            return True
-    except Exception as e:
-        print(f"Invalid video {file_path}: {str(e)}")
-        return False
-
-def load_hashes(file_path):
-    try:
-        with open(file_path, 'r') as file:
-            return json.load(file)
-    except FileNotFoundError:
-        return {}
-
-def save_hashes(hashes, file_path):
-    with open(file_path, 'w') as file:
-        json.dump(hashes, file, indent=4)
-
-hashes = load_hashes('video_hashes.json')
-video_directory = 'STORAGE'
-
-for username in os.listdir(video_directory):
-    user_dir = os.path.join(video_directory, username)
-    if not os.path.isdir(user_dir):
-        continue
-
-    for video_file in os.listdir(user_dir):
-        video_path = os.path.join(user_dir, video_file)
-        if not video_file.endswith(('.mp4', '.mkv', '.avi')) or not is_valid_video(video_path):
-            continue
-
-        if username in hashes and any(v[0] == video_file for v in hashes[username]):
-            continue
-
-        try:
-            video_hash = VideoHash(path=video_path)
-            if username in hashes:
-                hashes[username].append((video_file, video_hash.hash))
-            else:
-                hashes[username] = [(video_file, video_hash.hash)]
-        except Exception as e:
-            print(f"Error processing {video_file}: {str(e)}")
-
-save_hashes(hashes, 'video_hashes.json')
--- a/archived/scan_dupes.py
+++ b/archived/scan_dupes.py
@ -1,17 +0,0 @@
-import os, config, funcs
-
-db, cursor = config.gen_connection()
-
-cursor.execute("SELECT phash FROM media WHERE phash IS NOT NULL")
-phashes = set([x[0] for x in cursor.fetchall()])
-
-files = funcs.get_files("check_if_exists")
-
-for file in files:
-    image_phash = funcs.generate_phash(file)
-    
-    if image_phash in phashes:
-        print(f"File {file} exists in the database")
-        os.remove(file)
-        
-funcs.cleanEmptyFolders("check_if_exists")
--- a/archived/snappy.py
+++ b/archived/snappy.py
@ -1,159 +0,0 @@
-from snapchat import get_data, get_stories, get_highlight_stories
-from datetime import datetime
-import requests
-import config
-import json
-import os
-
-"""
-	media_url_filename = url.split('/')[-1].split('?')[0]
-	etag = response.headers.get('ETag', '').replace('"', '')
-	filename = f"{username}~{timestamp}-{media_url_filename}~{etag}{extension}"
-	filepath = os.path.join(directory, 'highlights', filename)
-"""
-
-directory = "snapchat"
-data_directory = "data"
-
-def get_existing_snap_ids(directory):
-	existing_snap_ids = set()
-	for root, _, files in os.walk(directory):
-		for file in files:
-			if '~' not in file:
-				continue
-			
-			filename, _ = os.path.splitext(file)
-			snap_id = filename.split('~')[2]
-			existing_snap_ids.add(snap_id)
-	return existing_snap_ids
-
-def find_duplicate_snap(existing_snaps, snap_id, username):
-	for snap in existing_snaps:
-		if username == snap[2]:
-			if snap_id in snap[1]:
-				return snap
-	return False
-	
-def archive_data(data, username):
-	data_filename = f"{username}~{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.json"
-	data_filepath = os.path.join(data_directory, data_filename)
-	with open(data_filepath, 'w') as f:
-		f.write(json.dumps(data))
-	print(f"Archived data for {username} at {data_filepath}")
-	
-def get_file_extension(url):
-	response = requests.head(url)
-	if response.status_code != 200:
-		print(f"Failed to access media {url}")
-		return None
-
-	content_type = response.headers.get('Content-Type', '')
-	if 'image' in content_type:
-		return '.jpg'
-	elif 'video' in content_type:
-		return '.mp4'
-	else:
-		print(f"Unknown content type for media {url}")
-		return None
-	
-def extract_file_type(url):
-	file_types = {
-		'400': '.jpg',
-		'1322': '.mp4',
-		'1325': '.mp4',
-		'1034': '.mp4',
-		'1023': '.jpg'
-	}
-
-	base_url = url.split("?")[0]  # Remove query string
-
-	snap_data = base_url.split('/')[-1]
-
-	# Extract the file type number
-	data_parts = snap_data.split('.')
-	if len(data_parts) > 1:
-		file_type_number = data_parts[1]
-		if file_type_number in file_types:
-			return file_types[file_type_number]
-	else:
-		print(f"Unexpected URL format: {base_url}")
-		return None
-
-def download_media(url, filepath):
-	if os.path.exists(filepath):
-		print(f"File {filepath} already exists. Skipping download.")
-		return filepath
-	
-	response = requests.get(url)
-	if response.status_code != 200:
-		print(f"Failed to download media {url}")
-		return None
-
-	with open(filepath, 'wb') as f:
-		f.write(response.content)
-	return filepath
-
-def main():
-	if not os.path.exists(directory):
-		os.makedirs(directory)
-    
-	db, cursor = config.gen_connection()
-
-	cursor.execute("SELECT username FROM following WHERE platform = 'snapchat'")
-	usernames = [row[0] for row in cursor.fetchall()]
-	
-	cursor.execute("SELECT id, filename, username FROM media WHERE filename IS NOT NULL AND platform = 'snapchat'")
-	existing_medias = cursor.fetchall()
-	
-	existing_snap_ids = get_existing_snap_ids(directory)
-	
-	for username in usernames:
-		print(f"Getting stories for {username}...")
-		data = get_data(username)
-		if not data:
-			continue
-
-		archive_data(data, username)
-		
-		print("Getting stories...")
-		stories = get_stories(data)
-
-		print("Getting highlights...")
-		stories.extend(get_highlight_stories(data))
-
-		for story in stories:
-			snap_id = story['snap_id']
-			url = story['url']
-			timestamp = story['timestamp']
-			
-			duplicate_snap = find_duplicate_snap(existing_medias, snap_id, username)
-			if duplicate_snap:
-				print(f"Media {snap_id} already exists. Skipping download.")
-				continue
-			
-			# Check if media already exists
-			if snap_id in existing_snap_ids:
-				print(f"Media {snap_id} already exists. Skipping download.")
-				continue
-
-			# Determine file extension using HEAD request.
-			# TODO: find a better way to determine file extension without downloading the file.
-			extension = extract_file_type(url)
-			if not extension:
-				continue
-
-			filename = f"{username}~{timestamp}~{snap_id}{extension}"
-			filepath = os.path.join(directory, filename)
-			
-			# Check if file already exists
-			if os.path.exists(filepath):
-				print(f"File {filename} already exists. Skipping download.")
-				continue
-
-			# Download the media
-			filepath = download_media(url, filepath)
-			
-			print(f"Downloaded {filename} at {timestamp}")
-
-if __name__ == "__main__":
-    main()
--- a/archived/storysave_dump_media.py
+++ b/archived/storysave_dump_media.py
@ -1,154 +0,0 @@
-from datetime import datetime
-import config
-import funcs
-import cv2
-import os
-
-directory = 'media/instagram/'
-
-def UploadMedia(media):
-    media_id = media['media_id']
-    username = media['username']
-    post_date = media['timestamp']
-    user_id = media['user_id']
-    filepath = media['filepath']
-    highlight_id = media['highlight_id']
-    post_type = media['post_type']
-    thumbnail_url = None
-    phash = None
-    
-    if media_id and int(media_id) in existing_files:
-        print('Duplicate file detected. Removing...')
-        os.remove(filepath)
-        return True
-        
-    filename = os.path.basename(filepath)
-    file_extension = os.path.splitext(filename)[1].lower()
-
-    media_type = funcs.get_media_type(filename)
-
-    file_hash = funcs.calculate_file_hash(filepath)
-
-    width, height = funcs.get_media_dimensions(filepath)
-    
-    duration = funcs.get_video_duration(filepath)
-
-    if media_type == 'video':
-        try:
-            thumbPath = f'temp/{media_id}.jpg'
-            cap = cv2.VideoCapture(filepath)
-            ret, frame = cap.read()
-            cv2.imwrite(thumbPath, frame)
-            cap.release()
-            obj_storage.PutFile(thumbPath, f'thumbnails/{media_id}.jpg') # slower
-            thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{media_id}.jpg"
-            phash = funcs.generate_phash(thumbPath)
-            os.remove(thumbPath)
-        except:
-            print('Error generating thumbnail. Skipping...')
-            return False
-    elif media_type == 'image':
-        phash = funcs.generate_phash(filepath)
-
-    if media_id:
-        newFilename = f'{media_id}{file_extension}'
-    else:
-        newFilename = f'{file_hash}{file_extension}'
-
-    server_path = f'media/{post_type}/{username}/{newFilename}'
-
-    file_url = f"https://storysave.b-cdn.net/{server_path}"
-
-    obj_storage.PutFile(filepath, server_path) # slow as fuck
-
-    if highlight_id:
-        newCursor.execute("INSERT IGNORE INTO highlights (highlight_id, user_id, media_id) VALUES (%s, %s, %s)", (highlight_id, user_id, media_id))
-        newDB.commit()
-        print(f'[{newCursor.rowcount}] added highlight {highlight_id} to user {user_id}')
-
-    query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, media_id, post_type, date, user_id, hash, filename, duration, thumbnail, phash, platform) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
-    values = (username, media_type, file_url, width, height, media_id, post_type, post_date, user_id, file_hash, filename, duration, thumbnail_url, phash, 'instagram')
-
-    newCursor.execute(query, values) # slower
-    newDB.commit()
-    print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}')
-
-    os.remove(filepath)
-
-    return True
-
-def get_user_id(username):
-    username = username.lower()
-    if username in existing_users:
-        return existing_users[username]
-    
-    return None
-
-def get_media():
-    medias = []
-    post_types = {
-        'posts': 'post',
-        'stories': 'story',
-        'profile': 'profile',
-    }
-    
-    for post_type in os.listdir(directory):
-        users_dir = os.path.join(directory, post_type)
-        if not os.path.isdir(users_dir):
-            continue
-        users = os.listdir(users_dir)
-        
-        for username in users:
-            user_path = os.path.join(directory, post_type, username)
-            if not os.path.isdir(user_path):
-                continue
-            for filename in os.listdir(user_path):
-                if filename.startswith('.'):
-                    continue
-                
-                data = {}
-                filepath = os.path.join(user_path, filename)
-                
-                if 'com.instagram.android__' in filename:
-                    timestamp_str = filename.split('__')[-1].split('.')[0]
-                    data['timestamp'] = datetime.strptime(timestamp_str, '%Y%m%d%H%M%S%f')
-                else:
-                    data['timestamp'] = datetime.now()
-
-                data['post_type'] = post_types[post_type]
-                data['username'] = username
-                data['filepath'] = filepath
-                data['media_id'] = None
-                data['user_id'] = get_user_id(data['username'])
-                data['highlight_id'] = None
-                medias.append(data)
-    
-    return medias
-
-def dump_instagram():
-    medias = get_media()
-
-    for media in medias:
-        UploadMedia(media)
-        existing_files.append(media['media_id'])
-
-if __name__ == '__main__':
-    print('Starting processing...')
-
-    if not os.listdir(directory):
-        print('No files to process. Exiting...')
-        exit()
-
-    newDB, newCursor = config.gen_connection()
-
-    obj_storage = config.get_storage()
-
-    newCursor.execute("SELECT media_id FROM media WHERE media_id IS NOT NULL")
-    existing_files = [image[0] for image in newCursor.fetchall()]
-
-    newCursor.execute("SELECT DISTINCT username, user_id FROM media WHERE user_id IS NOT NULL")
-    existing_users = {user[0].lower(): user[1].lower() for user in newCursor.fetchall()}
-    
-    dump_instagram()
-
-    print("Processing completed.")
--- a/archived/templates/index.html
+++ b/archived/templates/index.html
@ -1,34 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-<head>
-    <meta charset="UTF-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>Image Gallery</title>
-    <style>
-        .gallery {
-            display: flex;
-            flex-wrap: wrap;
-        }
-        .gallery img {
-            margin: 10px;
-            max-width: 200px;
-            height: auto;
-        }
-        .gallery div {
-            text-align: center;
-            margin: 10px;
-        }
-    </style>
-</head>
-<body>
-    <h1>Image Gallery</h1>
-    <div class="gallery">
-        {% for image in images %}
-        <div>
-            <h3>{{ image['username'] }}</h3>
-            <img src="{{ image['media_url'] }}" alt="Image for {{ image['username'] }}">
-        </div>
-        {% endfor %}
-    </div>
-</body>
-</html>
--- a/archived/templates/old_index.html
+++ b/archived/templates/old_index.html
@ -1,84 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-<head>
-    <meta charset="UTF-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>Media Gallery</title>
-    <style>
-        body {
-            display: flex;
-            justify-content: center;
-        }
-        .container {
-            max-width: 1600px;
-            width: 100%;
-            padding: 20px;
-        }
-        .media-container {
-            column-count: 4;
-            column-gap: 10px;
-        }
-        .media-item {
-            break-inside: avoid;
-            margin-bottom: 10px;
-        }
-        img, video {
-            width: 100%;
-            height: auto;
-            display: block;
-        }
-    </style>
-</head>
-<body>
-    <div class="container">
-        <h1>Media Gallery</h1>
-        <div class="media-container" id="media-container"></div>
-    </div>
-
-    <script>
-        let page = 0;
-
-        async function loadMore() {
-            const response = await fetch(`/load-more?page=${page}`);
-            const mediaFiles = await response.json();
-            const container = document.getElementById('media-container');
-
-            mediaFiles.forEach(file => {
-                const mediaItem = document.createElement('div');
-                mediaItem.className = 'media-item';
-
-                if (file.endsWith('.png') || file.endsWith('.jpg') || file.endsWith('.jpeg') || file.endsWith('.gif')) {
-                    const img = document.createElement('img');
-                    img.src = `/media/${file}`;
-                    img.alt = file;
-                    mediaItem.appendChild(img);
-                } else if (file.endsWith('.mp4') || file.endsWith('.mkv') || file.endsWith('.mov')) {
-                    const video = document.createElement('video');
-                    video.controls = false;
-                    video.autoplay = true;
-                    video.muted = true;
-                    video.loop = true;
-                    const source = document.createElement('source');
-                    source.src = `/media/${file}`;
-                    source.type = 'video/mp4';
-                    video.appendChild(source);
-                    mediaItem.appendChild(video);
-                }
-
-                container.appendChild(mediaItem);
-            });
-
-            page += 1;
-        }
-
-        window.addEventListener('scroll', () => {
-            if (window.innerHeight + window.scrollY >= document.body.offsetHeight) {
-                loadMore();
-            }
-        });
-
-        // Initial load
-        loadMore();
-    </script>
-</body>
-</html>
--- a/archived/web.py
+++ b/archived/web.py
@ -1,32 +0,0 @@
-from flask import Flask, render_template, send_from_directory, jsonify, request
-import os
-
-app = Flask(__name__)
-media_dir = 'storysaver'
-MEDIA_PER_PAGE = 20
-
-def get_media_files(start, count):
-    media_files = []
-    for root, dirs, files in os.walk(media_dir):
-        for filename in files:
-            if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.mp4', '.mkv', '.mov')):
-                file_path = os.path.relpath(os.path.join(root, filename), media_dir)
-                media_files.append(file_path)
-    return media_files[start:start + count]
-
-@app.route('/')
-def index():
-    return render_template('index.html')
-
-@app.route('/media/<path:filename>')
-def media(filename):
-    return send_from_directory(media_dir, filename)
-
-@app.route('/load-more')
-def load_more():
-    page = int(request.args.get('page', 0))
-    media_files = get_media_files(page * MEDIA_PER_PAGE, MEDIA_PER_PAGE)
-    return jsonify(media_files)
-
-if __name__ == '__main__':
-    app.run(host='0.0.0.0', port=5000, debug=True)
--- a/archived/weirdump.py
+++ b/archived/weirdump.py
@ -1,133 +0,0 @@
-from BunnyCDN.Storage import Storage
-from PIL import Image
-import os, uuid, cv2, config
-import hashlib
-from moviepy.editor import VideoFileClip
-
-def scan_dupes(folder_path):
-    newCursor.execute("SELECT hash FROM media")
-    existing_files = [image[0] for image in newCursor.fetchall()]
-
-    for root, dirs, files in os.walk(folder_path):
-        for folder in dirs:
-            folder_path = os.path.join(root, folder)
-            for filename in os.listdir(folder_path):
-                media_id = filename.replace('.mp4', '').replace('.jpg', '')
-                filepath = os.path.join(folder_path, filename)
-                if media_id:
-                    fileHash = calculate_file_hash(filepath)
-                    if fileHash in existing_files:
-                        print(f'Duplicate')
-                        os.remove(filepath)
-
-def clean_empty_folders(directory):
-    for foldername, subfolders, filenames in os.walk(directory, topdown=False):
-        for subfolder in subfolders:
-            folder_path = os.path.join(foldername, subfolder)
-            if not os.listdir(folder_path):
-                os.rmdir(folder_path)   
-                print(f"Removed empty folder: {folder_path}")
-
-def upload_file(filepath, username, media_type='image', post_type = 'story'):
-    filename = os.path.basename(filepath)
-    file_extension = filename.split('.')[-1]
-    dirtype = 'stories' if post_type == 'story' else 'posts'
-
-    #dirtype = 'profile'
-
-    fileHash = calculate_file_hash(filepath)
-
-    try:
-        if int(media_id) in existing_files:
-            print(f'Duplicate')
-            os.remove(filepath)
-            return True
-    except: media_id = uuid.uuid4().hex
-
-    server_path = f'users/{dirtype}/{username}/{media_id}.{file_extension}'
-
-    obj_storage.PutFile(filepath, server_path)
-
-    file_url = f"https://storysave.b-cdn.net/{server_path}"
-
-    duration = 0
-    if media_type == 'image':
-        try:
-            with Image.open(filepath) as img:
-                width, height = img.size
-        except:
-            os.remove(filepath)
-            return
-    else:
-        width, height = get_video_dimensions(filepath)
-        duration = get_video_duration(filepath)
-
-    query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, post_type, hash, filename, media_id, duration) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
-    values = (username, media_type, file_url, width, height, post_type, fileHash, filename, media_id, duration)
-    newCursor.execute(query, values)
-    newDB.commit()
-
-    os.remove(filepath)
-    print(f'[{newCursor.rowcount}]{os.path.basename(filepath)} {file_url}')
-
-
-def get_video_dimensions(video_path):
-    cap = cv2.VideoCapture(video_path)
-    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
-    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-    cap.release()
-    return width, height
-
-def get_video_duration(file_path):
-    """
-    Returns the duration of the video file in seconds.
-
-    :param file_path: Path to the video file
-    :return: Duration in seconds
-    """
-    with VideoFileClip(file_path) as video:
-        return video.duration
-
-def get_media_type(filename):
-    if filename.lower().endswith(".jpg") or filename.lower().endswith(".webp") or filename.lower().endswith(".jpeg") or filename.lower().endswith(".png") or filename.lower().endswith(".gif"):
-        return 'image'
-    if filename.lower().endswith(".mp4") or filename.lower().endswith(".mov"):
-        return 'video'
-
-
-def dump_instagram(folder_path):
-    for root, dirs, files in os.walk(folder_path):
-        for folder in dirs:
-            username = folder
-            folder_path = os.path.join(root, folder)
-
-            post_type = 'post' if 'post' in folder_path.lower() else 'story'
-
-            for filename in os.listdir(folder_path):
-                filepath = os.path.join(folder_path, filename)
-                mediatype = get_media_type(filename)
-                upload_file(username=username, media_type=mediatype, filepath=filepath, post_type=post_type)
-
-def calculate_file_hash(file_path, hash_func='sha256'):
-    h = hashlib.new(hash_func)
-
-    with open(file_path, 'rb') as file:
-        chunk = 0
-        while chunk != b'':
-            chunk = file.read(8192)
-            h.update(chunk)
-
-    return h.hexdigest()
-
-if __name__ == '__main__':
-    print('Starting processing...')
-
-    newDB, newCursor = config.gen_connection()
-
-    obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
-
-    storiesPath = 'StorySave/'
-
-    dump_instagram(storiesPath)
-
-    print("Processing completed.")
--- a/db_normalizer.py
+++ b/db_normalizer.py
@ -0,0 +1,116 @@
+import os
+from funcs import calculate_file_hash, get_media_dimensions, get_media_type, generate_phash
+import config
+
+# --- Configuration & Constants ---
+BASE_URL = "https://cdn.altpins.com/"
+TEMP_DIR = os.path.join(os.getcwd(), 'temp')
+CACHE_DIR = os.path.join(os.getcwd(), 'cache')
+
+os.makedirs(TEMP_DIR, exist_ok=True)
+os.makedirs(CACHE_DIR, exist_ok=True)
+
+def normalize_server_path(media_url, replace_all=True):
+    """
+    Remove the BASE_URL from media_url and normalize slashes.
+    If replace_all is True, replace double slashes and backslashes.
+    """
+    path = media_url.replace(BASE_URL, '')
+    if replace_all:
+        path = path.replace('//', '/').replace('\\', '/')
+    else:
+        path = path.replace('\\', '/')
+    return path
+
+def update_hashes(cursor, db, obj_storage):
+    cursor.execute("SELECT id, media_id, media_url FROM media WHERE hash IS NULL;")
+    results = cursor.fetchall()
+    total = len(results)
+    print(f"Found {total} files to process for hash updating.")
+
+    for idx, (record_id, media_id, media_url) in enumerate(results, start=1):
+        server_path = normalize_server_path(media_url)
+        local_file = os.path.join(TEMP_DIR, os.path.basename(server_path))
+        
+        if not os.path.exists(local_file):
+            obj_storage.DownloadFile(storage_path=server_path, download_path=TEMP_DIR)
+        
+        filehash = calculate_file_hash(local_file)
+        cursor.execute("UPDATE media SET hash = %s WHERE id = %s;", (filehash, record_id))
+        db.commit()
+        print(f"[{idx}/{total}] {media_id}: {filehash}, Rows affected: {cursor.rowcount}")
+
+def update_dimensions(cursor, db, obj_storage):
+    cursor.execute("SELECT id, media_id, media_url FROM media WHERE width = 0 OR height = 0;")
+    results = cursor.fetchall()
+    total = len(results)
+    print(f"Found {total} files to process for dimensions updating.")
+
+    for idx, (record_id, media_id, media_url) in enumerate(results, start=1):
+        server_path = normalize_server_path(media_url)
+        local_file = os.path.join(CACHE_DIR, os.path.basename(server_path))
+        
+        if not os.path.exists(local_file):
+            obj_storage.DownloadFile(storage_path=server_path, download_path=CACHE_DIR)
+        
+        # Optionally, you could get the media type if needed:
+        media_type = get_media_type(local_file)
+        width, height = get_media_dimensions(local_file)
+        
+        cursor.execute("UPDATE media SET width = %s, height = %s WHERE id = %s;", (width, height, record_id))
+        db.commit()
+        print(f"[{idx}/{total}] {media_id}: width: {width}, height: {height}, Rows affected: {cursor.rowcount}")
+
+def update_file_size(cursor, db, obj_storage):
+    cursor.execute("SELECT id, media_url FROM media WHERE file_size = 0 AND status != 'deleted';")
+    results = cursor.fetchall()
+    total = len(results)
+    print(f"Found {total} files to process for file size updating.")
+
+    for idx, (record_id, media_url) in enumerate(results, start=1):
+        server_path = normalize_server_path(media_url)
+        local_file = os.path.join(TEMP_DIR, os.path.basename(server_path))
+        
+        if not os.path.exists(local_file):
+            obj_storage.DownloadFile(storage_path=server_path, download_path=TEMP_DIR)
+        
+        file_size = os.path.getsize(local_file)
+        cursor.execute("UPDATE media SET file_size = %s WHERE id = %s;", (file_size, record_id))
+        db.commit()
+        print(f"[{idx}/{total}] {media_url}: {file_size} bytes, Rows affected: {cursor.rowcount}")
+
+def update_phash(cursor, db, obj_storage):
+    generate_for = 'media_url'
+    media_type = 'image'
+    cursor.execute(f"SELECT id, {generate_for} FROM media WHERE media_type = %s AND phash IS NULL AND status != 'deleted';", [media_type])
+    medias = cursor.fetchall()
+    total = len(medias)
+    print(f"Found {total} files to process for pHash updating.")
+
+    for idx, (record_id, media_url) in enumerate(medias, start=1):
+        server_path = normalize_server_path(media_url, replace_all=False)
+        local_file = os.path.join(TEMP_DIR, os.path.basename(server_path))
+
+        if not os.path.exists(local_file):
+            obj_storage.DownloadFile(storage_path=server_path, download_path=TEMP_DIR)
+
+        phash = generate_phash(local_file)
+        if not phash:
+            print(f"Error generating pHash for {local_file}")
+            continue
+
+        cursor.execute("UPDATE media SET phash = %s WHERE id = %s", [phash, record_id])
+        db.commit()
+        print(f"[{idx}/{total}] Processed record {record_id} with pHash: {phash}")
+
+def main():
+    obj_storage = config.get_storage()
+    db, cursor = config.gen_connection()
+
+    update_hashes(cursor, db, obj_storage)
+    update_dimensions(cursor, db, obj_storage)
+    update_file_size(cursor, db, obj_storage)
+    update_phash(cursor, db, obj_storage)
+
+if __name__ == '__main__':
+    main()