from BunnyCDN.Storage import Storage from datetime import datetime import os, config, funcs, cv2 from PIL import Image def UploadMedia(media): username = media['username'] timestamp = media['timestamp'] filepath = media['filepath'] thumbnail_url = None phash = None filename = os.path.basename(filepath) file_extension = os.path.splitext(filename)[1].lower() if filename in existing_files: print('Duplicate file detected. Removing...') os.remove(filepath) return True media_type = funcs.get_media_type(filename) file_hash = funcs.calculate_file_hash(filepath) if '-' in timestamp: timestamp = timestamp.split('-')[0] post_date = datetime.fromtimestamp(int(timestamp)) if timestamp else datetime.now() width, height = funcs.get_video_dimensions(filepath) if media_type == 'video' else Image.open(filepath).size duration = funcs.get_video_duration(filepath) if media_type == 'video' else 0 # slower if media_type == 'video': try: thumbPath = f'temp/{file_hash}.jpg' cap = cv2.VideoCapture(filepath) ret, frame = cap.read() cv2.imwrite(thumbPath, frame) cap.release() obj_storage.PutFile(thumbPath, f'thumbnails/{file_hash}.jpg') # slower thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{file_hash}.jpg" phash = funcs.generate_phash(thumbPath) os.remove(thumbPath) except: print('Error generating thumbnail. Skipping...') return False elif media_type == 'image': phash = funcs.generate_phash(filepath) newFilename = f'{file_hash}{file_extension}' server_path = f'media/snaps/{username}/{newFilename}' file_url = f"https://storysave.b-cdn.net/{server_path}" obj_storage.PutFile(filepath, server_path) # slow as fuck query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, post_type, date, hash, filename, duration, thumbnail, phash, platform) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" values = (username, media_type, file_url, width, height, 'story', post_date, file_hash, filename, duration, thumbnail_url, phash, 'snapchat') newCursor.execute(query, values) # slower newDB.commit() print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}') os.remove(filepath) return True def get_media_data(filepath): filename = os.path.basename(filepath) parts = filename.split('~') if len(parts) < 3: return False username = parts[0] timestamp = parts[1] data = {'username': username, 'timestamp': timestamp, 'filepath': filepath} return data def get_media(folder_path): medias = [] for root, dirs, files in os.walk(folder_path): for filename in files: filepath = os.path.join(root, filename) data = get_media_data(filepath) if data: medias.append(data) return medias def dump(folder_path): medias = get_media(folder_path) for media in medias: UploadMedia(media) if __name__ == '__main__': print('Starting processing...') directory = 'snapchat/' if not os.listdir(directory): print('No files to process. Exiting...') exit() newDB, newCursor = config.gen_connection() obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave') newCursor.execute("SELECT filename FROM media WHERE filename IS NOT NULL AND platform = 'snapchat'") existing_files = [image[0] for image in newCursor.fetchall()] dump(directory) print("Processing completed.")