from BunnyCDN.Storage import Storage from PIL import Image import os, uuid, cv2, config import hashlib def clean_empty_folders(directory): for foldername, subfolders, filenames in os.walk(directory, topdown=False): for subfolder in subfolders: folder_path = os.path.join(foldername, subfolder) if not os.listdir(folder_path): os.rmdir(folder_path) print(f"Removed empty folder: {folder_path}") def calculate_file_hash(file_path, hash_func='sha256'): h = hashlib.new(hash_func) with open(file_path, 'rb') as file: chunk = 0 while chunk != b'': chunk = file.read(8192) h.update(chunk) return h.hexdigest() def extract_file_info(filename): try: username = filename.split("~")[0] timestamp = filename.split("~")[1] user_id = filename.split("~")[2] media_id, some2 = user_id.split("_") user_id = some2.split(".")[0] return username, media_id, user_id, timestamp except: return None, None, None, None def extract_file_info2(filename): try: username = filename.split("~")[0] elements = filename.split("~")[1].split("_") media_id, user_id = elements[0], elements[1].split(".")[0] return username, media_id, user_id except: return None, None, None def upload_file(filepath, username, media_id = None, media_type='image', post_type = 'story', user_id = None, date = None): filename = os.path.basename(filepath) file_extension = filename.split('.')[-1] dirtype = 'stories' if post_type == 'story' else 'posts' server_path = f'users/{dirtype}/{username}/{media_id if media_id else uuid.uuid4().hex}.{file_extension}' file_url = f"https://storysave.b-cdn.net/{server_path}" fileHash = calculate_file_hash(filepath) if media_type == 'image': with Image.open(filepath) as img: width, height = img.size else: width, height = get_video_dimensions(filepath) query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, media_id, post_type, user_id, hash, date) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" values = (username, media_type, file_url, width, height, media_id, post_type, user_id, fileHash, date) newCursor.execute(query, values) newDB.commit() existing_files.append(media_id) if newCursor.rowcount == 0: print('What the fuck just happend?') obj_storage.PutFile(filepath, server_path) os.remove(filepath) print(f'[{newCursor.rowcount}]{os.path.basename(filepath)} {file_url}') def get_video_dimensions(video_path): cap = cv2.VideoCapture(video_path) width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) cap.release() return width, height def get_media_type(filename): if filename.lower().endswith(".jpg") or filename.lower().endswith(".webp") or filename.lower().endswith(".jpeg") or filename.lower().endswith(".png") or filename.lower().endswith(".gif"): return 'image' if filename.lower().endswith(".mp4") or filename.lower().endswith(".mov"): return 'video' def dump_instagram(folder_path): for root, dirs, files in os.walk(folder_path): for folder in dirs: username = folder folder_path = os.path.join(root, folder) for filename in os.listdir(folder_path): if "~" not in filename: continue username, media_id, user_id, timestamp = extract_file_info(filename) if None in [username, media_id, user_id, timestamp]: username, media_id, user_id = extract_file_info2(filename) if None in [username, media_id, user_id]: print(f"Failed to extract info from {filename}") continue media_id = int(media_id) if media_id else None if media_id in existing_files: print(f'Duplicate, {filename}') os.remove(os.path.join(folder_path, filename)) continue filepath = os.path.join(folder_path, filename) mediatype = get_media_type(filename) upload_file(username=username, media_type=mediatype, filepath=filepath, media_id=media_id, user_id = user_id,) if __name__ == '__main__': print('Starting processing...') newDB, newCursor = config.gen_connection() obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave') newCursor.execute("SELECT media_id FROM media") existing_files = [image[0] for image in newCursor.fetchall()] dump_instagram('StorySave/') print("Processing completed.")