From e6ad418ecdffbc941386ff9b87f717a61ba708e0 Mon Sep 17 00:00:00 2001 From: oscar Date: Thu, 13 Feb 2025 02:38:54 +0200 Subject: [PATCH] updates --- .DS_Store | Bin 18436 -> 10244 bytes config.py | 21 +++- find_static_videos.py | 35 +++++- funcs.py | 104 +++++++--------- import_cache.py | 9 +- requirements.txt | 2 +- snapchat.py | 67 +++++++++- snappy_master.py | 263 ++++++++++++++++++--------------------- storysave_api.py | 90 ++++++-------- storysave_dump.py | 168 ++++++++++++++++++------- storysave_dump_custom.py | 6 +- storysave_scanner.py | 86 +++++++++++-- storysaver.py | 15 +-- storysaver_new.py | 43 ++++--- 14 files changed, 557 insertions(+), 352 deletions(-) diff --git a/.DS_Store b/.DS_Store index dd8492d8676faccc4e9406f245ecfd15b240093d..f06dbc8c78255308d54df512b9ddff87c73ab95a 100644 GIT binary patch delta 1466 zcmds1O-K}R6o3EyayA!tOhEGc2?Jq(AMq4 zTM5d8E)iJNA&NZgp*jS52|7p7NfZn^^m&MaKzg$~YqPloUOUXpf8P7O@8AE;%(0m+ z7XYx1v4|f)q$5X<_T?S(!&UQ=8|9Eipns6*VI+rG)Zg8&Dv1m;*=!l!beCmHI19&Q zbcpT?!UH=2Eg(S@24PfDL!od?4U>%fwLX3F>Ajc1LVe_)`ousk7})^qS-|V!rHWCw z2nr;^6Ul_``Z<2|4mY}|ru;2UvPYCt9#KE~kNBHYDLW@L_}b3&48@eVdIOhOUCB-I zX9Xz}s6N5W$-zmy;u=bm0Cey9DszFSkLTcdt+A}>g=MejPPQzAy`ugptT>{uv{ed_ z)-uY681Ej5$JBU~mK+bcivx z`bCi<5Hb2-1o~ls#!4vc^wsiYmx6EX=SZyU7ha2AW9=pAV-_E22 zZBknT;nM!*OkYJSV_kT=GMwH5Fg6J$KYcFd#Mr8i-u3TqX0xr@;>*~u;<$s^#zl;6 zgA`w+Z4kkkeL}CL%zvps&-?r=^2VGJMsO2 z4)pD{?yxrmdbK$Gupt=*a1nfGSq&HUcH_hwB*VtOSrK@=yV2yRZJci=UC#E_2Tu3cQtcf-|= zHv;z>?heosWm460F_kw{4Rzm%FWS&4zR`DTCzJB7{{J{t%;(CL@$p}aw)T$Bu5FQR zk)FsSi6yg|sA!dy^0Zbu!@o|LW;95RiHmNWE{B*kiDNEXfRS@ZpbSI-D=g&_ZxPScK*oP)B+#9p=6A#=!ap6LDWN`GpgU6H0`GR47 z&Jw0z+4Hr-N}T8Ck?X|WMP-btL>1C0vzjOwdDF<^eT~i`2s5DsLJ5Qt2qkb^Nxqel7dE( zBDF|`?P3Uv9PeXX9ckQ7*R9kdCt;D3uv=!wg)bpnM;gNqTp})$@DWNNlz@)}_TpU> zequ#>ifhPevT-H z9ELXJkOUuiu;`8E$4#8^^d}a;tpZNVa>UCRXAvHBVKH_~IKHGixlnx)C8(%3`N|>t-I*MK~KO*0{ z?wJ;!2LH>}$gf{-Mx(@hZta+fuS9a`RXd_3UZ_QcUW-O+F`@45J$LNr?ThZ*)xUFK z=iuPr?xDdw!z07VXn#seFK0^}>p5iR&C+y!ML&}*dYeceteMhU9l_aj~pd=9T=ZWVElGE}8lKoLSCQa;93-h_OX;?k) zlWg&GuB0=|(W!q)l|ZBMkJd8xSeYn|S{XzW2WmMx6(`CNME7XCXuar$HL#x^qL0%_ zAn6=EMPH?F(@*Fn`aS)V-VobFRO}P`#iaPKm=Z_CtT-)_VnO6A@;2K@pm(|35s%`x z?=0uD6N$80u$$k*Mul&^f;lr|Yr;s^q3X*=FaGSI=5;eI*DWro?Z?2>Z-*b= zeKo};Ao+lz!~Ar13EGf#L)DK1W`o*$6b>CSt_Th9(HY-Ey}K2i;ndqQdcqlzq8q+c z(+z(RcC^^h41K%$`uhh4cSrX$(F%+_MrR#Ho2eUuH9;WmRW;@y9_Vce>4s@2NO$xE z5dR)J=t2As=oj=Wx<-Ga{|F)4M32}Zc8ei#k9fZr69*V3Sl2bQk66EJPigu)bLB#K z=YO#SV>jf;I99NV>aocumZvOKE9G@0`<1)U+ZE*wIJ6$b_P{DC;-taE&YnT4$v%`k z7}{UAIOM+Dgi1lw4{Pi1rkz#{oj)IZRPls08BLEWK8x}ugS#X$}j%LYL^v^ekPW7wHvjsDF{*+AjLVptwuCTa1ejhzG@# zMYLEPHDd%!qDW8zKt=7v^ai(O)tXtRuofEiQ~OUcPU1>?zu;=4ze zUa&M)Ks%})yJo(!cWO=Qxd18^?UZZFNNQx((JUjk*R16B5)f&$<|J)UlfLjt4 z?^@6b;}bU*{hbQEwMz3?Uwm35ZRggw3bqy1UhTVKu1VO_Il07jHUdt3(w6pbxs6e&)*nPW1+ejxOR$fRvp{S*k5Kz1p0dD_0eqd4C{<7Def0~Y+$Ho* zD0N?CNfcjHP#oom0b0vQ{ktezPpZ**1x2^EZyY#M#hz~A94id9^CGq{al(C`wOQVA z;2gTpOHxG!(x|w8<5ly%k69LV3v_kM>qh8Yx`1LV!oapXT7u85tuJ+Q(qB#jztRf^ z>K{cJykECkmbRB_okvj108LTu*`W3K&$YN)L%l7d4;n_M;Q!70(~jS{nTOan+nM$~ z@g9vY@De6`?9$rFH^05LahjPEgdH3t011H^n-?+5m zPdCi4OFOiGkiTzfsH19w>Z{KuzW=z4eEHO?qkp4%F0{ADSx^nUJm}`{SzOp`hHClE zpSqj(_)s>)(8SMSEvQu84i)O6tp66kwOjH=Cx$-CxD)gfKa_3TBvu5wj>{N|M7hI#7Lbw{&0 zs@~|j@<$#Ye38)qcWVre-grD8$6ZzQMd<&-nbcGlHT3@(QG9h-NxTQ?!vI;6|NpmY zh0jm|p#-+11VqP7VrCjc6HSK4|7#b+y?~o9uD8w^T*%9R#Y4(o1^<*G>ONzy=>A^#2u0JoNwZ4=#;|>08hL{~v_T7Wx1H diff --git a/config.py b/config.py index 1169eae..1562947 100644 --- a/config.py +++ b/config.py @@ -1,5 +1,16 @@ -from BunnyCDN.Storage import Storage -import mysql.connector +import os + +MEDIA_DIRECTORY = "media" +SNAPCHAT_DIRECTORY = "snapchat" +INSTAGRAM_DIRECTORY = "instagram" + +@property +def get_instagram_directory(): + return os.path.join(MEDIA_DIRECTORY, INSTAGRAM_DIRECTORY) + +@property +def snapchat_output_dir(): + return os.path.join(MEDIA_DIRECTORY, SNAPCHAT_DIRECTORY) username = "doadmin" password = "AVNS_2qeFJuiGRpBQXkJjlA6" @@ -9,10 +20,16 @@ database = "storysave" sslmode = "REQUIRED" def gen_connection(): + import mysql.connector print("Connecting to database") newDB = mysql.connector.connect(host=host, user=username, password=password, database=database, port=port) print("Connected to database") return newDB, newDB.cursor() def get_storage(): + from BunnyCDN.Storage import Storage + return Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave') + +def get_custom_storage(): + from bunny import Storage return Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave') \ No newline at end of file diff --git a/find_static_videos.py b/find_static_videos.py index 86a2f5e..4629192 100644 --- a/find_static_videos.py +++ b/find_static_videos.py @@ -2,6 +2,7 @@ from funcs import get_files from PIL import Image import imagehash import cv2 +import os def is_static_video_phash_optimized(video_path, frame_sample_rate=30, hash_size=16, hamming_threshold=1): """ @@ -32,7 +33,6 @@ def is_static_video_phash_optimized(video_path, frame_sample_rate=30, hash_size= pil_image = Image.fromarray(frame_rgb) previous_hash = imagehash.phash(pil_image, hash_size=hash_size) - frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) is_static = True current_frame_number = 1 @@ -66,11 +66,34 @@ def is_static_video_phash_optimized(video_path, frame_sample_rate=30, hash_size= return is_static -directory = 'videos' +directory = input("Enter the directory path: ") -files = get_files(directory) +files = [file for file in get_files(directory) if file.endswith('.mp4')] +total_space_saved = 0 for video_file in files: - if video_file.endswith('.mp4'): - if is_static_video_phash_optimized(video_file): - print("The video is static: " + video_file) \ No newline at end of file + if not is_static_video_phash_optimized(video_file): + continue + + screenshot_path = os.path.join('.temp', os.path.basename(video_file) + '.jpg') + + if not os.path.exists(screenshot_path): + cap = cv2.VideoCapture(video_file) + ret, frame = cap.read() + cap.release() + + if ret: + cv2.imwrite(screenshot_path, frame) + + screenshot_size = os.path.getsize(screenshot_path) + video_size = os.path.getsize(video_file) + + if screenshot_size < video_size: + screenshot_size_in_mb = screenshot_size / (1024 * 1024) + video_size_in_mb = video_size / (1024 * 1024) + total_space_saved += video_size - screenshot_size + print(f"Screenshot size: {screenshot_size_in_mb:.2f} MB, Video size: {video_size_in_mb:.2f} MB") + else: + os.remove(screenshot_path) + +print(f"Total space saved: {total_space_saved / (1024 * 1024):.2f} MB") \ No newline at end of file diff --git a/funcs.py b/funcs.py index 1537e6c..cad3dbe 100644 --- a/funcs.py +++ b/funcs.py @@ -10,6 +10,21 @@ from moviepy.editor import VideoFileClip headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36"} proxies={"http": "http://yehyuxsl-rotate:4tl5bvrwkz5e@p.webshare.io:80/","https": "http://yehyuxsl-rotate:4tl5bvrwkz5e@p.webshare.io:80/"} +def get_file_extension(url): + response = requests.head(url) + if response.status_code != 200: + print(f"Failed to access media {url}") + return None + + content_type = response.headers.get('Content-Type', '') + if 'image' in content_type: + return '.jpg' + elif 'video' in content_type: + return '.mp4' + else: + print(f"Unknown content type for media {url}") + return None + def generate_phash(image_path): try: image = Image.open(image_path) @@ -18,10 +33,10 @@ def generate_phash(image_path): print(f"Error generating phash for {image_path}: {e}") return False -def cleanEmptyFolders(path): +def clean_empty_folders(path): for root, dirs, fs in os.walk(path): for d in dirs: - cleanEmptyFolders(os.path.join(root, d)) + clean_empty_folders(os.path.join(root, d)) if not os.listdir(root): os.rmdir(root) @@ -29,6 +44,8 @@ def get_files(directory): files = [] for root, dirs, filenames in os.walk(directory): for filename in filenames: + if filename.startswith('.'): + continue files.append(os.path.join(root, filename)) return files @@ -87,68 +104,35 @@ def compare_images(image_path1, image_path2): else: return False -def remove_empty_folders(dir_path): - import shutil - - def is_folder_empty(folder_path): - return len(os.listdir(folder_path)) == 0 - - num_folder = 0 - for root, dirs, files in os.walk(dir_path, topdown=False): - for dir_name in dirs: - dir_path = os.path.join(root, dir_name) - - if not os.path.isdir(dir_path): - continue - - if '$' in dir_name or '$' in dir_path: - print(f"Skipping system folder: {dir_path}") - continue - - if 'system volume information' in dir_name.lower() or 'system volume information' in dir_path.lower(): - print(f"Skipping system folder: {dir_path}") - continue - - if is_folder_empty(dir_path) or dir_name.lower() == '__pycache__': - shutil.rmtree(dir_path) - print(f"Moved empty folder: {dir_path}") - num_folder+=1 - def download_file(url, filePath): try: - response = requests.get(url, stream=True, headers=headers) - response.raise_for_status() + if os.path.exists(filePath): + return filePath - directory = os.path.dirname(filePath) + if not url: + print(f"Invalid URL: {url}") + return False + + response = requests.get(url, stream=True, headers=headers) - if not os.path.exists(directory): - os.makedirs(directory) + if response.status_code != 200: + print(f"Failed to download {url}. Status code: {response.status_code}") + return False + + os.makedirs(os.path.dirname(filePath), exist_ok=True) with open(filePath, "wb") as out_file: for chunk in response.iter_content(chunk_size=8192): out_file.write(chunk) - print(f"Downloaded {filePath}") - return True + return filePath except Exception as e: print(f"Failed to download {url}. Error: {e}") return False -def determine_post_type(filepath): - width, height = get_media_dimensions(filepath) - - if 0 in (width, height): - return False - - aspect_ratio = width / height - if aspect_ratio > 0.5 and aspect_ratio < 0.6: - return 'stories' - else: - return 'posts' - def get_media_type(filename): image_extensions = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".tiff", ".tif", ".svg", ".eps", ".raw", ".cr2", ".nef", ".orf", ".sr2", ".heic", ".indd", ".ai", ".psd", ".svg"} - video_extensions = {".mp4", ".mov"} + video_extensions = {".mp4", ".mov", ".avi", ".mkv", ".wmv", ".flv", ".webm", ".vob", ".ogg", ".ts", ".flv"} filetype_dict = {"image": image_extensions, "video": video_extensions} extension = os.path.splitext(filename.lower())[1] # Get the extension and convert to lower case @@ -163,9 +147,7 @@ def get_video_duration(file_path): print(f"File not found: {file_path}") return 0 - video_types = {".mp4", ".mov", ".mkv"} - extension = os.path.splitext(file_path.lower())[1] - if extension not in video_types: + if not get_media_type(file_path) == 'video': return 0 try: @@ -178,6 +160,12 @@ def get_video_duration(file_path): print(f"Error getting duration for {file_path}: {e}") return 0 +def get_media_dimensions(media_path): + if get_media_type(media_path) == 'video': + return get_video_dimensions(media_path) + else: + return get_image_dimensions(media_path) + def get_video_dimensions(video_path): cap = cv2.VideoCapture(video_path) width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) @@ -185,13 +173,13 @@ def get_video_dimensions(video_path): cap.release() return width, height -def get_media_dimensions(media_path): - if get_media_type(media_path) == 'video': - return get_video_dimensions(media_path) - else: - with Image.open(media_path) as img: +def get_image_dimensions(image_path): + try: + with Image.open(image_path) as img: return img.size - + except: + return 0, 0 + def get_video_data(video_path): data = {'duration': 0, 'width': 0, 'height': 0} try: diff --git a/import_cache.py b/import_cache.py index 6bb906e..4f622d1 100644 --- a/import_cache.py +++ b/import_cache.py @@ -1,5 +1,4 @@ from concurrent.futures import ThreadPoolExecutor -from BunnyCDN.Storage import Storage import config, os def DownloadFile(serverPath, cacheDir): @@ -9,8 +8,8 @@ def DownloadFile(serverPath, cacheDir): print(f"File already exists: {localFilePath}") return localFilePath + print(f"Downloading {serverPath} to {localFilePath}") obj_storage.DownloadFile(storage_path=serverPath, download_path=cacheDir) - print(f"Downloaded {serverPath} to {localFilePath}") return localFilePath def ImportMedias(results): @@ -20,14 +19,14 @@ def ImportMedias(results): executor.submit(DownloadFile, serverPath, cacheDir) -obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave') - +obj_storage = config.get_storage() db, cursor = config.gen_connection() -cursor.execute("SELECT id, media_id, media_url FROM media WHERE file_size = 0;") +cursor.execute("SELECT id, media_id, media_url FROM media WHERE file_size = 0 ORDER BY id DESC;") results = cursor.fetchall() cacheDir = 'cache' +os.makedirs(cacheDir, exist_ok=True) print(f"Found {len(results)} files to process.") diff --git a/requirements.txt b/requirements.txt index ce30ba0..39d8f61 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,6 +16,6 @@ undetected_chromedriver python-telegram-bot tqdm webdriver-manager -moviepy +moviepy==1.0.3 instagrapi ImageHash \ No newline at end of file diff --git a/snapchat.py b/snapchat.py index 487f2f7..00a3110 100644 --- a/snapchat.py +++ b/snapchat.py @@ -5,6 +5,16 @@ import json headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36"} +snap_types = { + 27 : ['spotlight', 'video'], + 256 : ['thumbnail', 'image'], + 400 : ['idk', 'image'], + 1023 : ['idk', 'image'], + 1034 : ['downscaled_video', 'video'], + 1322 : ['idk', 'video'], + 1325 : ['idk', 'video'], +} + def get_data(username): url = f"https://www.snapchat.com/add/{username}" response = requests.get(url, headers=headers) @@ -16,6 +26,24 @@ def get_data(username): data = json.loads(data_script.string) return data +def get_social_medias(data): + website_url = None + try: + website_url = data['props']['pageProps']['userProfile']['publicProfileInfo']['websiteUrl'] + except KeyError: + pass + return website_url + +def get_related_profiles(data): + related_profiles = [] + try: + related_profiles_data = data['props']['pageProps']['userProfile']['relatedProfiles'] + for profile in related_profiles_data: + related_profiles.append(profile['username']) + except KeyError: + pass + return related_profiles + def get_all_users_data(usernames): all_data = {} @@ -48,27 +76,38 @@ def parse_stories(stories): return parsed_stories def get_stories(data): + """Extract story list from the JSON data.""" try: stories = data['props']['pageProps']['story']['snapList'] - return parse_stories(stories) - except KeyError: + + if not type(stories) == list: + return [] + + stories.sort(key=lambda x: x.get('snapIndex'), reverse=True) + return stories + except: return [] def get_highlights(data): + """Extract highlights from possible highlight keys in JSON data.""" highlights = [] + page_props = data.get('props', {}).get('pageProps', {}) - # Possible keys that might contain highlights possible_highlight_keys = ['curatedHighlights', 'savedHighlights', 'highlights'] + for key in possible_highlight_keys: highlight_data = page_props.get(key, []) if highlight_data: highlights.extend(highlight_data) + return highlights def parse_story(story): original_snap_id = story.get('snapId', {}).get('value', '') snap_url = story.get('snapUrls', {}).get('mediaUrl', '') timestamp = story.get('timestampInSec', {}).get('value', '') + media_type = story.get('snapMediaType') + media_type = 'image' if media_type == 0 else 'video' return { "original_snap_id": original_snap_id, @@ -77,10 +116,12 @@ def parse_story(story): "timestamp": timestamp, "platform": "snapchat", "type": "story", + "username": story.get('username', ''), + "media_type": media_type, } def get_snap_id(url): - return url.split('/')[-1].split('.')[0] + return url.split('?')[0].split('/')[-1].split('.')[0] def get_highlight_stories(data): stories = [] @@ -93,4 +134,20 @@ def get_highlight_stories(data): story = parse_story(snap) stories.append(story) - return stories \ No newline at end of file + return stories + +def get_spotlight_metadata(data): + """Extract spotlight metadata from JSON data.""" + try: + return data['props']['pageProps']['spotlightStoryMetadata'] + except KeyError: + return [] + +def get_username(data): + """Extract username from JSON data.""" + try: + return data['props']['pageProps']['userProfile']['publicProfileInfo']['username'] + except KeyError: + return None + + diff --git a/snappy_master.py b/snappy_master.py index fa57412..8282609 100644 --- a/snappy_master.py +++ b/snappy_master.py @@ -1,103 +1,49 @@ -from snapchat import get_stories, get_highlight_stories, get_all_users_data +from snapchat import get_stories, get_highlight_stories, get_all_users_data, parse_stories from datetime import datetime from uuid import uuid4 -import requests import config import funcs -import json import cv2 import os +import json -directory = "snapchat" -data_directory = "data" +UPLOAD_MODE = True -def find_duplicate_snap(existing_snaps, snap_id, username): - """ - Find a snap in the existing_snaps list on database.s - """ - for snap in existing_snaps: - if username == snap[2]: - if snap_id in snap[1]: - return snap - return False - -def archive_data(data, username): - data_filename = f"{username}~{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.json" - data_filepath = os.path.join(data_directory, data_filename) - with open(data_filepath, 'w') as f: - f.write(json.dumps(data)) - -def get_file_extension(url): - response = requests.head(url) - if response.status_code != 200: - print(f"Failed to access media {url}") - return None - - content_type = response.headers.get('Content-Type', '') - if 'image' in content_type: - return '.jpg' - elif 'video' in content_type: - return '.mp4' - else: - print(f"Unknown content type for media {url}") - return None - -def extract_file_type(url): - file_types = { - '400': '.jpg', - '1322': '.mp4', - '1325': '.mp4', - '1034': '.mp4', - '1023': '.jpg' - } +media_directory = "media" +snapchat_directory = "snapchat" +temp_directory = ".temp" +data_directory = "data" - base_url = url.split("?")[0] # Remove query string +directory = os.path.join(media_directory, snapchat_directory) - snap_data = base_url.split('/')[-1] +os.makedirs(media_directory, exist_ok=True) +os.makedirs(directory, exist_ok=True) +os.makedirs(temp_directory, exist_ok=True) +os.makedirs(data_directory, exist_ok=True) - # Extract the file type number - data_parts = snap_data.split('.') - if len(data_parts) > 1: - file_type_number = data_parts[1] - if file_type_number in file_types: - return file_types[file_type_number] - else: - print(f"Unexpected URL format: {base_url}") - return None +def find_duplicate_snap(existing_snap_ids, snap_id): + return snap_id in existing_snap_ids - -def download_media(url, filepath): - if os.path.exists(filepath): - # File already exists, skip download and return the filepath as if it was downloaded. - return filepath - - response = requests.get(url) - if response.status_code != 200: - print(f"Failed to download media {url}") - return None - - with open(filepath, 'wb') as f: - f.write(response.content) - return filepath - -def get_snapchat_stories(): - os.makedirs(directory, exist_ok=True) - os.makedirs(data_directory, exist_ok=True) - - cursor.execute("SELECT username FROM following WHERE platform = 'snapchat' ORDER BY id DESC") - usernames = [row[0] for row in cursor.fetchall()] - - cursor.execute("SELECT id, filename, username FROM media WHERE filename IS NOT NULL AND platform = 'snapchat' ORDER BY id DESC") - existing_medias = cursor.fetchall() - +def archive_data(data, username): + try: + current_timestamp = int(datetime.now().timestamp()) + data_filename = f"{username}~{current_timestamp}.json" + data_filepath = os.path.join(data_directory, data_filename) + with open(data_filepath, 'w') as f: + f.write(json.dumps(data, indent=4)) + except: + print(f"Failed to archive data for {username}.") + return False + +def get_snapchat_stories(usernames): snapchat_users_data = get_all_users_data(usernames) + snapchat_users_data = dict(sorted(snapchat_users_data.items())) ready_stories = [] - for username in usernames: + for username, data in snapchat_users_data.items(): print(f"Getting stories for {username}...") - data = snapchat_users_data.get(username) if not data: print(f"Failed to get data for {username}. Skipping.") continue @@ -105,6 +51,7 @@ def get_snapchat_stories(): archive_data(data, username) stories = get_stories(data) + stories = parse_stories(stories) stories.extend(get_highlight_stories(data)) @@ -112,14 +59,10 @@ def get_snapchat_stories(): snap_id = story['snap_id'] url = story['url'] timestamp = story['timestamp'] - - duplicate_snap = find_duplicate_snap(existing_medias, snap_id, username) - if duplicate_snap: - # Snap already exists in the database - continue - - # Determine file extension using HEAD request. - extension = extract_file_type(url) + + # Determine file extension + file_exts = {'image': '.jpg', 'video': '.mp4'} + extension = file_exts.get(story['media_type']) if not extension: print(f"Failed to determine file extension for {url}. Skipping.") continue @@ -127,19 +70,15 @@ def get_snapchat_stories(): filename = f"{username}~{timestamp}~{snap_id}{extension}" filepath = os.path.join(directory, filename) - media = { - 'username': username, - 'timestamp': timestamp, - 'filepath': filepath, - 'snap_id': snap_id, - 'original_snap_id': story['original_snap_id'], - 'media_url': url, - } + story['media_url'] = url + story['snap_id'] = snap_id + story['filepath'] = filepath + story['username'] = username + story['timestamp'] = timestamp + story['original_snap_id'] = story['original_snap_id'] - ready_stories.append(media) - print(f"Media {snap_id} ready for download.") + ready_stories.append(story) - # sort ready_stories by timestamp from oldest to newest ready_stories.sort(key=lambda x: x['timestamp']) return ready_stories @@ -151,45 +90,73 @@ def get_snapchat_files(): return stories def main(): - ready_stories = get_snapchat_stories() + print('Initializing snappy...') + ready_stories = [] + stories_from_files = get_snapchat_files() - ready_stories.extend(stories_from_files) + cursor.execute("SELECT username FROM following WHERE platform = 'snapchat' ORDER BY id DESC") + usernames = [row[0] for row in cursor.fetchall()] - download_stories(ready_stories) + print(f"Getting stories for {len(usernames)} users...") + new_stories = get_snapchat_stories(usernames) + + cleaned_stories = [] + print("Checking for duplicates...") + for story in new_stories: + duplicate_snap = find_duplicate_snap(existing_snap_ids, story['snap_id']) + if duplicate_snap: + print(f"Snap {story['filepath']} already exists in the database. Removing...") + continue + cleaned_stories.append(story) + + cleaned_stories = download_stories(cleaned_stories) + + ready_stories.extend(cleaned_stories) + ready_stories.extend(stories_from_files) + + for story in ready_stories: + UploadMedia(story) def download_stories(stories): + downloaded_stories = [] for story in stories: - # Download the media filepath = story['filepath'] url = story['media_url'] - filename = os.path.basename(filepath) - timestamp = story['timestamp'] - filepath = download_media(url, filepath) - print(f"Downloaded {filename} at {timestamp}") + filepath = funcs.download_file(url, filepath) + print(f"Downloaded {os.path.basename(filepath)}") if not filepath: continue - story['filepath'] = filepath - - UploadMedia(story) + story['hash'] = funcs.calculate_file_hash(filepath) + story['size'] = os.path.getsize(filepath) + + downloaded_stories.append(story) + return downloaded_stories + def UploadMedia(media): - username = media['username'] - timestamp = media['timestamp'] + file_size = media['size'] + file_hash = media['hash'] filepath = media['filepath'] filename = os.path.basename(filepath) + + username = media['username'] + timestamp = media['timestamp'] + media_type = media['media_type'] snap_id = media['snap_id'] original_snap_id = media['original_snap_id'] thumbnail_url = None phash = None - - media_type = funcs.get_media_type(filename) - - file_hash = funcs.calculate_file_hash(filepath) + duplicate_snap = find_duplicate_snap(existing_snap_ids, media['snap_id']) + if duplicate_snap: + print(f"Snap {filename} already exists in the database. Removing...") + os.remove(filepath) + return False + post_date = datetime.fromtimestamp(int(timestamp)) width, height = funcs.get_media_dimensions(filepath) @@ -214,8 +181,8 @@ def UploadMedia(media): obj_storage.PutFile(filepath, server_path) - query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, post_type, date, hash, filename, duration, thumbnail, phash, platform, snap_id, original_snap_id) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" - values = (username, media_type, file_url, width, height, 'story', post_date, file_hash, filename, duration, thumbnail_url, phash, 'snapchat', snap_id, original_snap_id) + query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, post_type, date, hash, filename, duration, thumbnail, phash, platform, snap_id, original_snap_id, file_size) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" + values = (username, media_type, file_url, width, height, 'story', post_date, file_hash, filename, duration, thumbnail_url, phash, 'snapchat', snap_id, original_snap_id, file_size) cursor.execute(query, values) db.commit() @@ -226,7 +193,7 @@ def UploadMedia(media): return True def generate_thumbnail(filepath): - thumb_path = f'temp/{uuid4()}.jpg' + thumb_path = os.path.join(temp_directory, f'{uuid4()}.jpg') cap = cv2.VideoCapture(filepath) ret, frame = cap.read() cv2.imwrite(thumb_path, frame) @@ -234,36 +201,42 @@ def generate_thumbnail(filepath): return thumb_path def get_media_data(filepath): - filename = os.path.basename(filepath) - parts = filename.split('~') - if len(parts) < 3: - return False - - username = parts[0] - timestamp = parts[1] - snap_id = parts[2] - snap_id = os.path.splitext(snap_id)[0] - - data = {'username': username, 'timestamp': timestamp, 'filepath': filepath, 'snap_id': snap_id, 'original_snap_id': None, 'media_url': None} - # data = {'username': username, 'timestamp': timestamp, 'filepath': filepath, 'snap_id': None, 'original_snap_id': snap_id, 'media_url': None} - - return data - -def process_snap_ids(filenames): - snap_ids = [] - for filename in filenames: - snap_id = filename.split('~')[2] - snap_id = os.path.splitext(snap_id)[0] - if snap_id not in snap_ids: - snap_ids.append(snap_id) - - return snap_ids + filename = os.path.basename(filepath) + parts = filename.split('~') + if len(parts) < 3: + return False + + username = parts[0] + timestamp = parts[1] + snap_id = parts[2] + snap_id = os.path.splitext(snap_id)[0] + + file_size = os.path.getsize(filepath) + file_hash = funcs.calculate_file_hash(filepath) + + data = { + "username": username, + "timestamp": timestamp, + "filepath": filepath, + "snap_id": snap_id, + "original_snap_id": None, + "media_url": None, + "size": file_size, + "hash": file_hash + } + + return data if __name__ == '__main__': print('Starting snappy...') db, cursor = config.gen_connection() obj_storage = config.get_storage() + + cursor.execute("SELECT snap_id FROM media WHERE filename IS NOT NULL AND platform = 'snapchat' ORDER BY id DESC") + existing_snap_ids = cursor.fetchall() + + existing_snap_ids = {row[0] for row in existing_snap_ids} main() diff --git a/storysave_api.py b/storysave_api.py index 82c49dc..ead33e7 100644 --- a/storysave_api.py +++ b/storysave_api.py @@ -2,32 +2,21 @@ from bs4 import BeautifulSoup import requests import json -def findPost(filePath = 'test.json'): - params = {'av': '17841401225494803','__a': '1','__req': '1','__hs': '19906.HYP:instagram_web_pkg.2.1..0.1','dpr': '1','__ccg': 'UNKNOWN','__rev': '1014609539','__s': 'guk60j:651i2v:pmhu0r','__hsi': '7386834689999716220','__dyn': '7xe5WwlEnwn8K2Wmm1twpUnwgU7S6EdF8aUco38w5ux609vCwjE1xoswaq0yE6u0nS4oaEd86a3a1YwBgao1aU2swbOU2zxe2GewGw9a362W2K0zEnwhEe82mwww4cwJCwLyES1TwTwFwIwbS1LwTwKG1pg2Xwr86C1mwrd6goK3ibxKi2K7ErwYCz8rwHw','__csr': 'igAzIj5OgR5YBHdRtivbkyFv-zJIZE_ykzfahdAydeHCHAAAqyk4pqBgDzeV4-qlbBF29UlCxFpVokDwAyosyV9KWUmx6iu58WqdwSDCDAFwHxi3C00lWy2FG4k583NxW8yFE0bUyxd06lxO5C2a8yFm2u290ejg1JU2Gw2rQ061U','__comet_req': '7','fb_dtsg': 'NAcPDfX2XufdLkctek6zNxz3DWxPW4t-cJzz39QtOQ5KS-_Rq3erT4A:17843708194158284:1719013044','jazoest': '26262','lsd': 'D0zmaX16yIQu_GwDXKTbMc','__spin_r': '1014609539','__spin_b': 'trunk','__spin_t': '1719881474','__jssesw': '1','fb_api_caller_class': 'RelayModern','fb_api_req_friendly_name': 'PolarisProfilePageContentDirectQuery', 'variables': '{"id":"57771591453","render_surface":"PROFILE"}','server_timestamps': 'true','doc_id': '7663723823674585'} +doc_ids = [7663723823674585, 9539110062771438] + +def get_posts(): + data = { + "variables": '{"id":"57771591453","render_surface":"PROFILE"}', + "doc_id": "7663723823674585", + } - data = requests.get('https://www.instagram.com/graphql/query') + data = requests.get('https://www.instagram.com/graphql/query', params=data).json() - posts = data['data']['xdt_api__v1__feed__user_timeline_graphql_connection']['edges'] + posts = data['data'] posts = [post['node'] for post in posts] return max(posts, key=lambda post: max(c['width'] * c['height'] for c in post['image_versions2']['candidates'])) -def getHDProfilePicture(): - url = 'https://www.save-free.com/process' - - zoom_data = {'instagram_url': 'natahalieeee','type': 'profile','resource': 'zoom'} - data = {'instagram_url': 'natahalieeee','type': 'profile','resource': 'save'} - - headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36','Referer' : 'https://www.save-free.com/profile-downloader/',} - - response = requests.post(url, data=data, headers=headers) - - response = requests.post(url, data=zoom_data, headers=headers) - - with open('image.jpg', 'wb') as f: - f.write(response.content) - - def get_username_by_user_id(user_id): url = 'https://www.instagram.com/graphql/query/' @@ -131,31 +120,10 @@ def get_user_id(username): def get_profile_data(username): url = 'https://www.instagram.com/graphql/query' + user_id = get_user_id(username) + data = { - 'av': '17841401225494803', - '__d': 'www', - '__user': 0, - '__a': 1, - '__req': 2, - '__hs': '20047.HYP:instagram_web_pkg.2.1..0.1', - 'dpr': 1, - '__ccg': 'EXCELLENT', - '__rev': 1018347086, - '__s': '8di41h:vwko3r:whjifd', - '__hsi': 7439320945163371549, - '__dyn': '7xe5WwlEnwn8K2Wmm1twpUnwgU7S6EdF8aUco38w5ux60p-0LVE4W0qa0FE2awgo1EUhwnU6a3a0EA2C0iK0D830wae4UaEW2G0AEco5G0zE5W0Y81eEdEGdwtU662O0Lo6-3u2WE15E6O1FwlE6PhA6bwg8rAwHxW1oCz8rwHwcOEym5oqw', - '__csr': 'hA5I8EAy7hnfqiIBklLZHVkmTHQmVmAh5UCchA9GQByu_yfD-nUBaVaDmSbDyUydCDgzyQAcggDK48Sm2ai8y8lxe6UTgmjwCyUC8yFXK9zooxmez9FUW684qu4awQwF9w04XAg0wi0nB03981oU082Oa0fMe3e19g512AK6Ulo5C3lw7Uy8G6Efo9k08mgiaaw25VobU2bw3KU023zw6Pw', - '__comet_req': 7, - 'fb_dtsg': 'NAcO7gvrsNlfWXA8giwQC4bVYRXXAGomAqcIRYUJUE2Hk8HmABf56Yg:17854575481098892:1732030177', - 'jazoest': 26190, - 'lsd': 'zcsn3c8we8kpMB_AVukeii', - '__spin_r': 1018347086, - '__spin_b': 'trunk', - '__spin_t': 1732101883, - 'fb_api_caller_class': 'RelayModern', - 'fb_api_req_friendly_name': 'PolarisProfilePageContentQuery', - 'variables': '{"id":"6687693830","render_surface":"PROFILE"}', - 'server_timestamps': 'true', + 'variables': '{"id":"' + user_id + '","render_surface":"PROFILE"}', 'doc_id': 9539110062771438 } @@ -163,21 +131,39 @@ def get_profile_data(username): 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36', } - response = requests.post(url, headers=headers, data=data) json_data = response.json() return json_data +def get_hd_profile_picture(username = None, user_id = None): + api_url = 'https://www.instagram.com/graphql/query' -username_check = 'tal_ohana' + if not username and not user_id: + return None + + if not user_id: + user_id = get_user_id(username) + if not user_id: + return None + + data = { + 'variables': '{"id":"' + user_id +' ","render_surface":"PROFILE"}', + 'doc_id': 9539110062771438 + } -user_id = get_user_id(username_check) + try: + response = requests.post(api_url, data=data) -username = get_username_by_user_id(user_id) + json_data = response.json() + if 'message' in json_data: + if json_data['message'] == 'Please wait a few minutes before you try again.': + print('Rate limited. Please try again later.') + return None -if username: - print(f"Username: {username}") -else: - print("Could not retrieve username.") \ No newline at end of file + hd_profile_pic = json_data['data']['user']['hd_profile_pic_url_info']['url'] + except: + hd_profile_pic = None + + return hd_profile_pic \ No newline at end of file diff --git a/storysave_dump.py b/storysave_dump.py index 37f20d2..577fac8 100644 --- a/storysave_dump.py +++ b/storysave_dump.py @@ -4,22 +4,35 @@ import funcs import config import cv2 import os +import re -directory = 'storysaver' +temp_directory = ".temp" +directory = 'media/instagram/' + +media_types = { + 'stories' : 'story', + 'posts' : 'post', + 'profile' : 'profile' +} + +os.makedirs(temp_directory, exist_ok=True) def UploadMedia(media): - platform = 'Instagram' - media_id = media['media_id'] username = media['username'] - timestamp = media['timestamp'] user_id = media['user_id'] filepath = media['filepath'] + platform = media['platform'] + + media_id = media['media_id'] + timestamp = media['timestamp'] highlight_id = media['highlight_id'] + post_type = media['post_type'] + file_size = os.path.getsize(filepath) thumbnail_url = None phash = None - if media_id and int(media_id) in existing_files: + if media_id and media_id in existing_files: print('Duplicate file detected. Removing...') os.remove(filepath) return True @@ -32,16 +45,14 @@ def UploadMedia(media): print(f'Error determining media type for {filename}. Skipping...') return False - post_type = funcs.determine_post_type(filepath) - if not post_type: - print(f'Error determining post type for {filename}. Skipping...') - return False - file_hash = funcs.calculate_file_hash(filepath) post_date = datetime.fromtimestamp(int(timestamp)) if timestamp else datetime.now() width, height = funcs.get_media_dimensions(filepath) + if 0 in (width, height): + print(f'Error getting dimensions for {filename}. Skipping...') + return False duration = funcs.get_video_duration(filepath) @@ -52,38 +63,42 @@ def UploadMedia(media): thumb_path = generate_thumbnail(filepath) obj_storage.PutFile(thumb_path, f'thumbnails/{file_hash}.jpg') # this might be a problem in case of duplicate hashes thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{file_hash}.jpg" - phash = funcs.generate_phash(thumb_path) + phash = funcs.generate_phash(thumb_path) os.remove(thumb_path) except Exception as e: print(f'Error generating thumbnail: {e}. Skipping...') return False - newFilename = f'{media_id}{file_extension}' - server_path = f'media/{post_type}/{username}/{newFilename}' + if media_id: + newFilename = f'{media_id}{file_extension}' + else: + newFilename = f'{file_hash}{file_extension}' + server_path = f'media/{post_type}/{username}/{newFilename}' file_url = f"https://storysave.b-cdn.net/{server_path}" - obj_storage.PutFile(filepath, server_path) # slow as fuck + obj_storage.PutFile(filepath, server_path) if highlight_id: newCursor.execute("INSERT IGNORE INTO highlights (highlight_id, user_id, media_id) VALUES (%s, %s, %s)", (highlight_id, user_id, media_id)) newDB.commit() print(f'[{newCursor.rowcount}] added highlight {highlight_id} to user {user_id}') - post_type = 'story' if post_type == 'stories' else 'post' query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, media_id, post_type, date, user_id, hash, filename, duration, thumbnail, phash, platform, file_size) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" values = (username, media_type, file_url, width, height, media_id, post_type, post_date, user_id, file_hash, filename, duration, thumbnail_url, phash, platform, file_size) - newCursor.execute(query, values) # slower + newCursor.execute(query, values) newDB.commit() - print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}') + + print(f'[{newCursor.rowcount}] records updated.\nFile: {filename}\nURL: {file_url}') + print("="*100) os.remove(filepath) return True -def generate_thumbnail(filepath): - thumb_path = f'temp/{uuid4()}.jpg' +def generate_thumbnail(filepath): + thumb_path = os.path.join(temp_directory, f'{uuid4()}.jpg') cap = cv2.VideoCapture(filepath) ret, frame = cap.read() cv2.imwrite(thumb_path, frame) @@ -94,7 +109,7 @@ def get_user_id(username): username = username.lower() if username in existing_users: return existing_users[username] - + return None def get_media_data(filepath): @@ -107,36 +122,97 @@ def get_media_data(filepath): timestamp = parts[1] media_id = parts[2] user_id = parts[3].split('_')[-1].split('.')[0] + platform = 'instagram' highlight_id = user_id.replace('highlight', '') if 'highlight' in user_id else None - if highlight_id: + + if not user_id.isdigit(): user_id = get_user_id(username) - try: + if media_id.isdigit(): media_id = int(media_id) - except: - print(f'Invalid media_id for file {filename}. Skipping...') + else: media_id = None - data = {'username': username, 'timestamp': timestamp, 'media_id': media_id, 'user_id': user_id, 'filepath': filepath, 'highlight_id': highlight_id} + data = {'username': username, 'timestamp': timestamp, 'media_id': media_id, 'user_id': user_id, 'filepath': filepath, 'highlight_id': highlight_id, 'platform': platform} return data def get_media(folder_path): medias = [] - - for root, dirs, files in os.walk(folder_path): - for filename in files: - filepath = os.path.join(root, filename) - + + for media_type, post_type in media_types.items(): + folder_path = os.path.join(directory, media_type) + + if not os.path.exists(folder_path): + continue + + all_files = funcs.get_files(folder_path) + for filepath in all_files: data = get_media_data(filepath) - if data: - medias.append(data) + if not data: + continue + + data['post_type'] = post_type + medias.append(data) return medias +def get_custom_media(): + medias = [] + + folder_path = 'media/instagram' + platform = 'instagram' + + for media_type, post_type in media_types.items(): + folder_path = os.path.join(directory, media_type) + + user_dirs = [d for d in os.listdir(folder_path) if os.path.isdir(os.path.join(folder_path, d))] + for user_dir in user_dirs: + user_folder_path = os.path.join(folder_path, user_dir) + + if not os.path.exists(user_folder_path): + continue + + username = user_dir + + files = os.listdir(user_folder_path) + + for filename in files: + filepath = os.path.join(user_folder_path, filename) + + if filename.startswith('.'): + continue + + user_id = get_user_id(username) + timestamp = int(os.path.getctime(filepath)) + media_id = os.path.splitext(filename)[0] + + if media_id.isdigit(): + media_id = int(media_id) + if media_id < 10000000: + media_id = None + else: + media_id = None + + data = { + "username": username, + "timestamp": timestamp, + "media_id": media_id, + "user_id": user_id, + "filepath": filepath, + "platform": platform, + "highlight_id": None, + "post_type": post_type + } + + medias.append(data) + + return medias + def dump_instagram(folder_path): medias = get_media(folder_path) + # medias.extend(get_custom_media()) if cleanup_dupe_stories(medias): medias = get_media(folder_path) @@ -150,28 +226,25 @@ def cleanup_dupe_stories(medias): for media in medias: media_id = media['media_id'] filepath = media['filepath'] - + if not media_id: print(f'Invalid media_id for file {filepath}. Skipping...') continue - if media_id in existing_files: - removed_count += 1 - print(f'Found duplicate file {filepath}. Removing...') - os.remove(filepath) - - if '(1)' in filepath: + # Check if media_id is in existing_files OR if filepath contains any '(number)' + if media_id in existing_files or re.search(r'\(\d+\)', filepath): removed_count += 1 print(f'Found duplicate file {filepath}. Removing...') os.remove(filepath) - + continue + print(f'Removed {removed_count} duplicate files.') return removed_count if __name__ == '__main__': print('Starting processing...') - if not os.listdir(directory): + if not funcs.get_files(directory): print('No files to process. Exiting...') exit() @@ -179,12 +252,19 @@ if __name__ == '__main__': obj_storage = config.get_storage() - newCursor.execute("SELECT media_id FROM media WHERE media_id IS NOT NULL") + print('Getting existing files and users...') + newCursor.execute("SELECT media_id FROM media WHERE media_id IS NOT NULL AND platform = 'instagram'") existing_files = [image[0] for image in newCursor.fetchall()] - newCursor.execute("SELECT DISTINCT username, user_id FROM media WHERE user_id IS NOT NULL") + print('Getting existing users...') + newCursor.execute("SELECT DISTINCT username, user_id FROM media WHERE user_id IS NOT NULL AND platform = 'instagram'") existing_users = {user[0].lower(): user[1].lower() for user in newCursor.fetchall()} dump_instagram(directory) - print("Processing completed.") \ No newline at end of file + print("Processing completed.") + + newDB.close() + + for mediatype, _ in media_types.items(): + funcs.clean_empty_folders(os.path.join(directory, mediatype)) \ No newline at end of file diff --git a/storysave_dump_custom.py b/storysave_dump_custom.py index b40e848..59f92b4 100644 --- a/storysave_dump_custom.py +++ b/storysave_dump_custom.py @@ -52,8 +52,8 @@ def UploadMedia(media): thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{file_hash}.jpg" phash = funcs.generate_phash(thumb_path) os.remove(thumb_path) - except: - print('Error generating thumbnail. Skipping...') + except Exception as e: + print(f'Error generating thumbnail. Skipping... {e}') return False newFilename = f'{file_hash}{file_extension}' @@ -76,7 +76,7 @@ def UploadMedia(media): return True def generate_thumbnail(filepath): - thumb_path = f'temp/{uuid4()}.jpg' + thumb_path = f'.temp/{uuid4()}.jpg' cap = cv2.VideoCapture(filepath) ret, frame = cap.read() cv2.imwrite(thumb_path, frame) diff --git a/storysave_scanner.py b/storysave_scanner.py index f5ef9c5..b29dc49 100644 --- a/storysave_scanner.py +++ b/storysave_scanner.py @@ -3,17 +3,89 @@ from watchdog.observers import Observer import shutil import time import os +from funcs import get_media_dimensions + +media_dir = 'media' +output_dir = 'instagram' +stories_dir = 'stories' +posts_dir = 'posts' + +def is_story(width, height, tolerance=0.02): + """ + Determine if the given width/height are close to 9:16 (0.5625) ratio + within a certain tolerance. Default tolerance is 2%. + + Tolerance means how close the ratio must be to 9/16 for it + to be considered a story. + """ + if width == 0 or height == 0: + return False + + # Calculate the ratio in portrait orientation (ensure width < height). + # You can also just do width/height, but watch out for landscape images. + # We’ll assume portrait means stories. + ratio = width / height if width < height else height / width + + # The official story ratio is 9/16 = 0.5625 + story_ratio = 9/16 + # Check how far off we are from the official ratio + difference = abs(ratio - story_ratio) + + # If the difference is within the tolerance, we consider it a story + return difference <= (story_ratio * tolerance) + +def determine_post_type(filepath): + """ + Determines if a file is for 'posts' or 'stories' based on its aspect ratio. + - If the path includes 'posts' (as you mentioned), we automatically return 'posts'. + - Otherwise, we check if the aspect ratio matches (roughly) the 9:16 ratio. + - If it does, we say 'stories', otherwise 'posts'. + """ + # If "posts" is part of the filepath, consider it a post + if 'posts' in filepath.lower(): + return 'posts' + + # Get actual dimensions + try: + width, height = get_media_dimensions(filepath) + except: + # If we fail to get dimensions, return None or some fallback + return None + + # If dimensions are invalid, return None or False + if width == 0 or height == 0: + return None + + # Use our ratio check + if is_story(width, height): + return 'stories' + else: + return 'posts' class DownloadHandler(FileSystemEventHandler): def process_file(self, file_path): file = os.path.basename(file_path) - if 'crdownload' not in file and file.count('~') == 3: - print(f'Moving {file}...') - outputPath = os.path.join('storysaver', file) - try: - shutil.move(file_path, outputPath) - except Exception as e: - print(f'Failed to move file: {e}') + + if 'crdownload' in file: + return + + if file.count('~') != 3: + return + + if not os.path.exists(file_path): + return + + print(f'Moving {file}...') + + post_type = determine_post_type(file_path) + if post_type == 'posts': + media_type_dir = posts_dir + elif post_type == 'stories': + media_type_dir = stories_dir + + outputPath = os.path.join(media_dir, output_dir, media_type_dir, file) + + shutil.move(file_path, outputPath) def on_created(self, event): if not event.is_directory and 'crdownload' not in event.src_path: diff --git a/storysaver.py b/storysaver.py index 0ec9ba8..2ced2b9 100644 --- a/storysaver.py +++ b/storysaver.py @@ -84,19 +84,20 @@ def parse_media_data(media_item): mediaInfo = {'taken_at': taken_at, 'post_type' : post_type, 'media_type': mediaTypes[media_item.media_type]} + if media_item.media_type not in [1, 2]: + print(f"Unsupported media type with ID {media_item.pk}") + return None + + mediaInfo['media_id'] = int(media_item.pk) + if media_item.media_type == 1: # Image - mediaInfo['media_id'] = int(media_item.pk) mediaInfo['fileURL'] = media_item.thumbnail_url - mediaInfo['filename'] = f"{media_item.pk}.jpg" + mediaInfo['filename'] = f"{media_item.pk}.jpg" # Fix this, get the actual file extension elif media_item.media_type == 2: # Video - mediaInfo['media_id'] = int(media_item.pk) mediaInfo['fileURL'] = media_item.video_url - try:mediaInfo['duration'] = media_item.video_duration + try:mediaInfo['duration'] = media_item.video_duration # Fix this, get the actual file extension except:mediaInfo['duration'] = 0 mediaInfo['filename'] = f"{media_item.pk}.mp4" - else: - print(f"Unsupported media type with ID {media_item.pk}") - return None return mediaInfo diff --git a/storysaver_new.py b/storysaver_new.py index 20f1124..9412d9e 100644 --- a/storysaver_new.py +++ b/storysaver_new.py @@ -5,6 +5,7 @@ from uuid import uuid4 from PIL import Image import config import funcs +import json import os def insert_highlight_items(media_ids, highlight_id, title, user_id): @@ -31,23 +32,28 @@ def upload_to_storage(local_path, server_path): print(f"Failed to upload {local_path} to {server_path}. Error: {e}") -def login(): +def login(force=False): client = Client() - if os.path.exists("session_data.json"): - client.load_settings("session_data.json") - return client - - with open("p.enc", "rb") as encrypted_file: - encrypted_data = encrypted_file.read() - - fernet = Fernet(open("key.enc", "r").read()) - password = str(fernet.decrypt(encrypted_data), "utf-8") - username = "olivercury" - - auth = input("Enter your 2FA code (leave blank if not enabled): ") - client.login(username=username, password=password, verification_code=auth) - client.dump_settings("session_data.json") + try: + if not force: + client.load_settings("session_data.json") + else: + raise FileNotFoundError + except (FileNotFoundError, json.JSONDecodeError): + with open("p.enc", "rb") as encrypted_file: + encrypted_data = encrypted_file.read() + + fernet = Fernet(open("key.enc", "r").read()) + password = str(fernet.decrypt(encrypted_data), "utf-8") + username = "olivercury" + + auth = input("Enter your 2FA code (leave blank if not enabled): ") + if auth: + client.login(username=username, password=password, verification_code=auth) + else: + client.login(username, password) + client.dump_settings("session_data.json") print("Logged in successfully.") @@ -198,8 +204,11 @@ if __name__ == "__main__": for mediaInfo in medias: filePath = os.path.join('media', mediaInfo['post_type'], username, mediaInfo['filename']) - funcs.download_file(mediaInfo['media_url'], filePath) - + filePath = funcs.download_file(mediaInfo['media_url'], filePath) + + if not filePath: + continue + mediaInfo["hash"] = funcs.calculate_file_hash(filePath) mediaInfo["username"] = username