from concurrent.futures import ThreadPoolExecutor, as_completed from bs4 import BeautifulSoup import requests import json headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36"} def get_data(username): url = f"https://www.snapchat.com/add/{username}" response = requests.get(url, headers=headers) soup = BeautifulSoup(response.text, "html.parser") data_script = soup.find("script", id="__NEXT_DATA__") if not data_script: print(f"No data found for {username}.") return None data = json.loads(data_script.string) return data def get_all_users_data(usernames): all_data = {} # Define a helper function for threading def fetch_data(username): return username, get_data(username) # Use ThreadPoolExecutor for concurrent fetching with ThreadPoolExecutor() as executor: futures = {executor.submit(fetch_data, username): username for username in usernames} for future in as_completed(futures): username = futures[future] try: username, data = future.result() all_data[username] = data except Exception as e: print(f"Error fetching data for {username}: {e}") all_data[username] = None return all_data def parse_stories(stories): parsed_stories = [] for story in stories: parsed_story = parse_story(story) parsed_stories.append(parsed_story) return parsed_stories def get_stories(data): try: stories = data['props']['pageProps']['story']['snapList'] return parse_stories(stories) except KeyError: return [] def get_highlights(data): highlights = [] page_props = data.get('props', {}).get('pageProps', {}) # Possible keys that might contain highlights possible_highlight_keys = ['curatedHighlights', 'savedHighlights', 'highlights'] for key in possible_highlight_keys: highlight_data = page_props.get(key, []) if highlight_data: highlights.extend(highlight_data) return highlights def parse_story(story): original_snap_id = story.get('snapId', {}).get('value', '') snap_url = story.get('snapUrls', {}).get('mediaUrl', '') timestamp = story.get('timestampInSec', {}).get('value', '') return { "original_snap_id": original_snap_id, "snap_id": get_snap_id(snap_url), "url": snap_url, "timestamp": timestamp, "platform": "snapchat", "type": "story", } def get_snap_id(url): return url.split('/')[-1].split('.')[0] def get_highlight_stories(data): stories = [] highlights = get_highlights(data) for highlight in highlights: snap_list = highlight.get('snapList', []) for snap in snap_list: story = parse_story(snap) stories.append(story) return stories