from uuid import uuid4 from datetime import datetime import os, requests, config, json, funcs, cv2 from snapchat import get_stories, get_highlight_stories, get_all_users_data directory = "snapchat" data_directory = "data" def get_existing_snap_ids(directory): existing_snap_ids = set() for root, _, files in os.walk(directory): for file in files: if '~' not in file: continue filename, _ = os.path.splitext(file) snap_id = filename.split('~')[2] existing_snap_ids.add(snap_id) return existing_snap_ids def find_duplicate_snap(existing_snaps, snap_id, username): for snap in existing_snaps: if username == snap[2]: if snap_id in snap[1]: return snap return False def archive_data(data, username): data_filename = f"{username}~{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.json" data_filepath = os.path.join(data_directory, data_filename) with open(data_filepath, 'w') as f: f.write(json.dumps(data)) print(f"Archived data for {username} at {data_filepath}") def get_file_extension(url): response = requests.head(url) if response.status_code != 200: print(f"Failed to access media {url}") return None content_type = response.headers.get('Content-Type', '') if 'image' in content_type: return '.jpg' elif 'video' in content_type: return '.mp4' else: print(f"Unknown content type for media {url}") return None def download_media(url, filepath): if os.path.exists(filepath): print(f"File {filepath} already exists. Skipping download.") return filepath response = requests.get(url) if response.status_code != 200: print(f"Failed to download media {url}") return None with open(filepath, 'wb') as f: f.write(response.content) return filepath def main(): os.makedirs(directory, exist_ok=True) os.makedirs(data_directory, exist_ok=True) cursor.execute("SELECT username FROM following WHERE platform = 'snapchat' ORDER BY id DESC") usernames = [row[0] for row in cursor.fetchall()] cursor.execute("SELECT id, filename, username FROM media WHERE filename IS NOT NULL AND platform = 'snapchat' ORDER BY id DESC") existing_medias = cursor.fetchall() existing_snap_ids = get_existing_snap_ids(directory) snapchat_users_data = get_all_users_data(usernames) ready_stories = [] for username in usernames: print(f"Getting stories for {username}...") data = snapchat_users_data.get(username) if not data: print(f"Failed to get data for {username}. Skipping.") continue archive_data(data, username) print("Getting stories...") stories = get_stories(data) print("Getting highlights...") stories.extend(get_highlight_stories(data)) for story in stories: snap_id = story['snap_id'] url = story['url'] timestamp = story['timestamp'] duplicate_snap = find_duplicate_snap(existing_medias, snap_id, username) if duplicate_snap: print(f"Media {snap_id} already exists. Skipping download.") continue # Check if media already exists if snap_id in existing_snap_ids: print(f"Media {snap_id} already exists. Skipping download.") continue # Determine file extension using HEAD request. extension = get_file_extension(url) if not extension: continue filename = f"{username}~{timestamp}~{snap_id}{extension}" filepath = os.path.join(directory, filename) # Check if file already exists if os.path.exists(filepath): print(f"File {filename} already exists. Skipping download.") continue media = { 'username': username, 'timestamp': timestamp, 'filepath': filepath, 'snap_id': snap_id, 'original_snap_id': story['original_snap_id'], 'media_url': url, } ready_stories.append(media) print(f"Media {snap_id} ready for download.") for media in ready_stories: # Download the media filepath = download_media(url, filepath) print(f"Downloaded {filename} at {timestamp}") if not filepath: continue media['filepath'] = filepath UploadMedia(media) def UploadMedia(media): username = media['username'] timestamp = media['timestamp'] filepath = media['filepath'] filename = os.path.basename(filepath) snap_id = media['snap_id'] original_snap_id = media['original_snap_id'] thumbnail_url = None phash = None media_type = funcs.get_media_type(filename) file_hash = funcs.calculate_file_hash(filepath) post_date = datetime.fromtimestamp(int(timestamp)) if timestamp else datetime.now() width, height = funcs.get_media_dimensions(filepath) duration = funcs.get_video_duration(filepath) if media_type == 'image': phash = funcs.generate_phash(filepath) elif media_type == 'video': try: thumb_path = generate_thumbnail(filepath) obj_storage.PutFile(thumb_path, f'thumbnails/{filename}') thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{filename}" phash = funcs.generate_phash(thumb_path) os.remove(thumb_path) except: print('Error generating thumbnail. Skipping...') return False server_path = f'media/snaps/{username}/{filename}' file_url = f"https://storysave.b-cdn.net/{server_path}" obj_storage.PutFile(filepath, server_path) query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, post_type, date, hash, filename, duration, thumbnail, phash, platform, snap_id, original_snap_id) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" values = (username, media_type, file_url, width, height, 'story', post_date, file_hash, filename, duration, thumbnail_url, phash, 'snapchat', snap_id, original_snap_id) cursor.execute(query, values) db.commit() print(f'[{cursor.rowcount}] records updated. File {filename} uploaded to {file_url}') os.remove(filepath) return True def generate_thumbnail(filepath): thumb_path = f'temp/{uuid4()}.jpg' cap = cv2.VideoCapture(filepath) ret, frame = cap.read() cv2.imwrite(thumb_path, frame) cap.release() return thumb_path def get_media_data(filepath): filename = os.path.basename(filepath) parts = filename.split('~') if len(parts) < 3: return False username = parts[0] timestamp = parts[1] snap_id = parts[2] snap_id = os.path.splitext(snap_id)[0] data = {'username': username, 'timestamp': timestamp, 'filepath': filepath, 'media_id': snap_id} return data def process_snap_ids(filenames): snap_ids = [] for filename in filenames: snap_id = filename.split('~')[2] snap_id = os.path.splitext(snap_id)[0] if snap_id not in snap_ids: snap_ids.append(snap_id) return snap_ids if __name__ == '__main__': print('Starting snappy...') db, cursor = config.gen_connection() obj_storage = config.get_storage() main() print("Processing completed.")