diff --git a/dump_ig_scanner.py b/dump_ig_scanner.py new file mode 100644 index 0000000..e19a154 --- /dev/null +++ b/dump_ig_scanner.py @@ -0,0 +1,77 @@ +from BunnyCDN.Storage import Storage +from datetime import datetime +import os, config, funcs +from PIL import Image +import time + +def UploadMedia(filepath, username, media_id, timestamp = None, user_id = None): + filename = os.path.basename(filepath) + file_extension = os.path.splitext(filename)[1].lower() + + media_type = funcs.get_media_type(filename) + + post_type = funcs.determine_post_type(filepath, media_type) + + file_hash = funcs.calculate_file_hash(filepath) + + duration = funcs.get_video_duration(filepath) if media_type == 'video' else 0 + + post_date = datetime.fromtimestamp(int(timestamp)) if timestamp else datetime.now() + + width, height = funcs.get_video_dimensions(filepath) if media_type == 'video' else Image.open(filepath).size + + server_path = f'media/{post_type}/{username}/{media_id}{file_extension}' + + file_url = f"https://storysave.b-cdn.net/{server_path}" + + if media_id and int(media_id) in existing_files: + print('Duplicate file detected. Removing...') + os.remove(filepath) + return True + + existing_files.append(int(media_id)) + + obj_storage.PutFile(filepath, server_path) + + query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, media_id, post_type, date, user_id, hash, filename, duration) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" + values = (username, media_type, file_url, width, height, media_id, post_type, post_date, user_id, file_hash, filename, duration) + + newCursor.execute(query, values) + newDB.commit() + print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}') + + os.remove(filepath) + + return True + + +def dump_instagram(folder_path): + for filename in os.listdir(folder_path): + parts = filename.split('~') + if len(parts) < 4: + continue + + username = parts[0] + timestamp = parts[1] + media_id = parts[2] + user_id = parts[3].split('_')[-1].split('.')[0] + + filepath = os.path.join(folder_path, filename) + UploadMedia(username=username, filepath=filepath, media_id=media_id, timestamp=timestamp, user_id=user_id) + + +if __name__ == '__main__': + print('Starting processing...') + + newDB, newCursor = config.gen_connection() + + obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave') + + newCursor.execute("SELECT media_id FROM media WHERE platform='instagram' AND media_id IS NOT NULL") + existing_files = [image[0] for image in newCursor.fetchall()] + + while True: + print("Processing...") + dump_instagram('storysaver/') + print("Processing completed.") + time.sleep(15) \ No newline at end of file diff --git a/dump_instagram.py b/dump_instagram.py index eead969..d0492d4 100644 --- a/dump_instagram.py +++ b/dump_instagram.py @@ -3,20 +3,6 @@ from datetime import datetime import os, config, funcs from PIL import Image -def dump_instagram(folder_path): - for filename in os.listdir(folder_path): - parts = filename.split('~') - if len(parts) < 4: - continue - - username = parts[0] - timestamp = parts[1] - media_id = parts[2] - user_id = parts[3].split('_')[-1].split('.')[0] - - filepath = os.path.join(folder_path, filename) - UploadMedia(username=username, filepath=filepath, media_id=media_id, timestamp=timestamp, user_id=user_id) - def UploadMedia(filepath, username, media_id, timestamp = None, user_id = None): filename = os.path.basename(filepath) @@ -55,6 +41,21 @@ def UploadMedia(filepath, username, media_id, timestamp = None, user_id = None): os.remove(filepath) return True + + +def dump_instagram(folder_path): + for filename in os.listdir(folder_path): + parts = filename.split('~') + if len(parts) < 4: + continue + + username = parts[0] + timestamp = parts[1] + media_id = parts[2] + user_id = parts[3].split('_')[-1].split('.')[0] + + filepath = os.path.join(folder_path, filename) + UploadMedia(username=username, filepath=filepath, media_id=media_id, timestamp=timestamp, user_id=user_id) if __name__ == '__main__': @@ -67,9 +68,6 @@ if __name__ == '__main__': newCursor.execute("SELECT media_id FROM media WHERE platform='instagram' AND media_id IS NOT NULL") existing_files = [image[0] for image in newCursor.fetchall()] - filePath = 'storysaver/3385905371606651364.jpg' - UploadMedia(filepath=filePath, username='unknown', media_id=3385905371606651364) - dump_instagram('storysaver/') print("Processing completed.") \ No newline at end of file