import os import requests import json from bs4 import BeautifulSoup def get_data(username): url = f"https://www.snapchat.com/add/{username}" headers = { "user-agent": ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) " "AppleWebKit/537.36 (KHTML, like Gecko) " "Chrome/129.0.0.0 Safari/537.36") } response = requests.get(url, headers=headers) soup = BeautifulSoup(response.text, "html.parser") data_script = soup.find("script", id="__NEXT_DATA__") if not data_script: print(f"No data found for {username}.") return None data = json.loads(data_script.string) return data def parse_stories(stories): parsed_stories = [] for story in stories: snap_id = story.get('snapId', {}).get('value', '') snap_url = story.get('snapUrls', {}).get('mediaUrl', '') timestamp = story.get('timestampInSec', {}).get('value', '') if snap_url and timestamp and snap_id: parsed_stories.append({ "media_id": snap_id, "url": snap_url, "timestamp": timestamp }) return parsed_stories def get_stories(data): try: stories = data['props']['pageProps']['story']['snapList'] return parse_stories(stories) except KeyError: return [] def get_highlights(data): highlights = [] page_props = data.get('props', {}).get('pageProps', {}) # Possible keys that might contain highlights possible_highlight_keys = ['curatedHighlights', 'savedHighlights', 'highlights'] for key in possible_highlight_keys: highlight_data = page_props.get(key, []) if highlight_data: highlights.extend(highlight_data) return highlights def get_highlight_stories(data): stories = [] highlights = get_highlights(data) for highlight in highlights: snap_list = highlight.get('snapList', []) for snap in snap_list: timestamp = snap.get('timestampInSec', {}).get('value', '') snap_url = snap.get('snapUrls', {}).get('mediaUrl', '') stories.append({ "media_id": snap.get('snapId', {}).get('value', ''), "url": snap_url, "timestamp": timestamp }) return stories def get_existing_media_ids(directory): # get all files and their their base filename without extension, split the filename by ~ and get the 3rd element existing_media_ids = set() for root, _, files in os.walk(directory): for file in files: if '~' not in file: continue filename, _ = os.path.splitext(file) media_id = filename.split('~')[2] existing_media_ids.add(media_id) return existing_media_ids def main(): directory = "snapchat" if not os.path.exists(directory): os.makedirs(directory) usernames = [ 'aleximarianna', 'little.warren1', 'neiima22', 'awesome.nads', 'noordabash', 'jaynagirl', 'sierracannon', 'stefaniedra6', 'ciaoxxw', 'nadia-stone', 'c.aitknight', 'aimeejaiii', 'leonanaomii', 'ratskelet0n', ] existing_media_ids = get_existing_media_ids(directory) for username in usernames: print(f"Getting stories for {username}...") data = get_data(username) if not data: continue print("Getting stories...") stories = get_stories(data) print("Getting highlights...") stories.extend(get_highlight_stories(data)) for story in stories: media_id = story['media_id'] url = story['url'] timestamp = story['timestamp'] # Check if media already exists if media_id in existing_media_ids: print(f"Media {media_id} already exists. Skipping download.") continue # Determine file extension using HEAD request response = requests.head(url) if response.status_code != 200: print(f"Failed to access media {media_id}") continue content_type = response.headers.get('Content-Type', '') if 'image' in content_type: extension = '.jpg' elif 'video' in content_type: extension = '.mp4' else: print(f"Unknown content type for media {media_id}") continue if media_id: filename = f"{username}~{timestamp}~{media_id}{extension}" filepath = os.path.join(directory, filename) else: media_url_filename = url.split('/')[-1].split('?')[0] etag = response.headers.get('ETag', '').replace('"', '') filename = f"{username}~{timestamp}-{media_url_filename}~{etag}{extension}" filepath = os.path.join(directory, 'highlights', filename) # Check if file already exists if os.path.exists(filepath): print(f"File {filename} already exists. Skipping download.") continue # Download the media response = requests.get(url, stream=True) if response.status_code != 200: print(f"Failed to download media {media_id}") continue # Save the file with open(filepath, 'wb') as f: for chunk in response.iter_content(chunk_size=1024): if chunk: f.write(chunk) print(f"Downloaded {filename} at {timestamp}") if __name__ == "__main__": main()