Altpins-Instagram/old/customdump.py

from BunnyCDN.Storage import Storage
from PIL import Image
import os, uuid, cv2, config
import hashlib

def clean_empty_folders(directory):
    for foldername, subfolders, filenames in os.walk(directory, topdown=False):
        for subfolder in subfolders:
            folder_path = os.path.join(foldername, subfolder)
            if not os.listdir(folder_path):
                os.rmdir(folder_path)
                print(f"Removed empty folder: {folder_path}")

def calculate_file_hash(file_path, hash_func='sha256'):
    h = hashlib.new(hash_func)

    with open(file_path, 'rb') as file:
        chunk = 0
        while chunk != b'':
            chunk = file.read(8192)
            h.update(chunk)

    return h.hexdigest()

def extract_file_info(filename):
    try:
        username = filename.split("~")[0]
        timestamp = filename.split("~")[1]
        user_id = filename.split("~")[2]
        media_id, some2 = user_id.split("_")
        user_id = some2.split(".")[0]

        return username, media_id, user_id, timestamp
    except:
        return None, None, None, None

def extract_file_info2(filename):
    try:
        username = filename.split("~")[0]
        elements = filename.split("~")[1].split("_")

        media_id, user_id = elements[0], elements[1].split(".")[0]
        
        return username, media_id, user_id
    except:
        return None, None, None

def upload_file(filepath, username, media_id = None, media_type='image', post_type = 'story', user_id = None, date = None):
    filename = os.path.basename(filepath)
    file_extension = filename.split('.')[-1]

    dirtype = 'stories' if post_type == 'story' else 'posts'
    server_path = f'users/{dirtype}/{username}/{media_id if media_id else uuid.uuid4().hex}.{file_extension}'


    file_url = f"https://storysave.b-cdn.net/{server_path}"
    fileHash = calculate_file_hash(filepath)

    if media_type == 'image':
        with Image.open(filepath) as img:
            width, height = img.size
    else:
        width, height = get_video_dimensions(filepath)

    query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, media_id, post_type, user_id, hash, date) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
    values = (username, media_type, file_url, width, height, media_id, post_type, user_id, fileHash, date)
    newCursor.execute(query, values)
    newDB.commit()

    existing_files.append(media_id)

    if newCursor.rowcount == 0:
        print('What the fuck just happend?')

    obj_storage.PutFile(filepath, server_path)

    os.remove(filepath)
    print(f'[{newCursor.rowcount}]{os.path.basename(filepath)} {file_url}')


def get_video_dimensions(video_path):
    cap = cv2.VideoCapture(video_path)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    cap.release()
    return width, height


def get_media_type(filename):
    if filename.lower().endswith(".jpg") or filename.lower().endswith(".webp") or filename.lower().endswith(".jpeg") or filename.lower().endswith(".png") or filename.lower().endswith(".gif"):
        return 'image'
    if filename.lower().endswith(".mp4") or filename.lower().endswith(".mov"):
        return 'video'


def dump_instagram(folder_path):
    for root, dirs, files in os.walk(folder_path):
        for folder in dirs:
            username = folder
            folder_path = os.path.join(root, folder)

            for filename in os.listdir(folder_path):
                if "~" not in filename:
                    continue

                username, media_id, user_id, timestamp = extract_file_info(filename)
                
                if None in [username, media_id, user_id, timestamp]:
                    username, media_id, user_id = extract_file_info2(filename)
                    if None in [username, media_id, user_id]:
                        print(f"Failed to extract info from {filename}")
                        continue

                media_id = int(media_id) if media_id else None

                if media_id in existing_files:
                    print(f'Duplicate, {filename}')
                    os.remove(os.path.join(folder_path, filename))
                    continue

                filepath = os.path.join(folder_path, filename)
                mediatype = get_media_type(filename)
                upload_file(username=username, media_type=mediatype, filepath=filepath, media_id=media_id, user_id = user_id,)

if __name__ == '__main__':
    print('Starting processing...')

    newDB, newCursor = config.gen_connection()

    obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')

    newCursor.execute("SELECT media_id FROM media")
    existing_files = [image[0] for image in newCursor.fetchall()]

    dump_instagram('StorySave/')

    print("Processing completed.")
update 11 months ago			`from BunnyCDN.Storage import Storage`
			`from PIL import Image`
			`import os, uuid, cv2, config`
			`import hashlib`

			`def clean_empty_folders(directory):`
			`for foldername, subfolders, filenames in os.walk(directory, topdown=False):`
			`for subfolder in subfolders:`
			`folder_path = os.path.join(foldername, subfolder)`
			`if not os.listdir(folder_path):`
			`os.rmdir(folder_path)`
			`print(f"Removed empty folder: {folder_path}")`

			`def calculate_file_hash(file_path, hash_func='sha256'):`
			`h = hashlib.new(hash_func)`

			`with open(file_path, 'rb') as file:`
			`chunk = 0`
			`while chunk != b'':`
			`chunk = file.read(8192)`
			`h.update(chunk)`

			`return h.hexdigest()`

			`def extract_file_info(filename):`
			`try:`
			`username = filename.split("~")[0]`
			`timestamp = filename.split("~")[1]`
			`user_id = filename.split("~")[2]`
			`media_id, some2 = user_id.split("_")`
			`user_id = some2.split(".")[0]`

			`return username, media_id, user_id, timestamp`
			`except:`
			`return None, None, None, None`

			`def extract_file_info2(filename):`
			`try:`
			`username = filename.split("~")[0]`
			`elements = filename.split("~")[1].split("_")`

			`media_id, user_id = elements[0], elements[1].split(".")[0]`

			`return username, media_id, user_id`
			`except:`
			`return None, None, None`

			`def upload_file(filepath, username, media_id = None, media_type='image', post_type = 'story', user_id = None, date = None):`
			`filename = os.path.basename(filepath)`
			`file_extension = filename.split('.')[-1]`

			`dirtype = 'stories' if post_type == 'story' else 'posts'`
			`server_path = f'users/{dirtype}/{username}/{media_id if media_id else uuid.uuid4().hex}.{file_extension}'`


			`file_url = f"https://storysave.b-cdn.net/{server_path}"`
			`fileHash = calculate_file_hash(filepath)`

			`if media_type == 'image':`
			`with Image.open(filepath) as img:`
			`width, height = img.size`
			`else:`
			`width, height = get_video_dimensions(filepath)`

			`query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, media_id, post_type, user_id, hash, date) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"`
			`values = (username, media_type, file_url, width, height, media_id, post_type, user_id, fileHash, date)`
			`newCursor.execute(query, values)`
			`newDB.commit()`

			`existing_files.append(media_id)`

			`if newCursor.rowcount == 0:`
			`print('What the fuck just happend?')`

			`obj_storage.PutFile(filepath, server_path)`

			`os.remove(filepath)`
			`print(f'[{newCursor.rowcount}]{os.path.basename(filepath)} {file_url}')`


			`def get_video_dimensions(video_path):`
			`cap = cv2.VideoCapture(video_path)`
			`width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))`
			`height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))`
			`cap.release()`
			`return width, height`


			`def get_media_type(filename):`
			`if filename.lower().endswith(".jpg") or filename.lower().endswith(".webp") or filename.lower().endswith(".jpeg") or filename.lower().endswith(".png") or filename.lower().endswith(".gif"):`
			`return 'image'`
			`if filename.lower().endswith(".mp4") or filename.lower().endswith(".mov"):`
			`return 'video'`


			`def dump_instagram(folder_path):`
			`for root, dirs, files in os.walk(folder_path):`
			`for folder in dirs:`
			`username = folder`
			`folder_path = os.path.join(root, folder)`

			`for filename in os.listdir(folder_path):`
			`if "~" not in filename:`
			`continue`

			`username, media_id, user_id, timestamp = extract_file_info(filename)`

			`if None in [username, media_id, user_id, timestamp]:`
			`username, media_id, user_id = extract_file_info2(filename)`
			`if None in [username, media_id, user_id]:`
			`print(f"Failed to extract info from {filename}")`
			`continue`

			`media_id = int(media_id) if media_id else None`

			`if media_id in existing_files:`
			`print(f'Duplicate, {filename}')`
			`os.remove(os.path.join(folder_path, filename))`
			`continue`

			`filepath = os.path.join(folder_path, filename)`
			`mediatype = get_media_type(filename)`
			`upload_file(username=username, media_type=mediatype, filepath=filepath, media_id=media_id, user_id = user_id,)`

			`if __name__ == '__main__':`
			`print('Starting processing...')`

			`newDB, newCursor = config.gen_connection()`

			`obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')`

			`newCursor.execute("SELECT media_id FROM media")`
			`existing_files = [image[0] for image in newCursor.fetchall()]`

			`dump_instagram('StorySave/')`

			`print("Processing completed.")`