Altpins-Instagram/snappy.py

import os
import requests
import json
from bs4 import BeautifulSoup

def get_data(username):
    url = f"https://www.snapchat.com/add/{username}"
    headers = {
        "user-agent": ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                       "AppleWebKit/537.36 (KHTML, like Gecko) "
                       "Chrome/129.0.0.0 Safari/537.36")
    }
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, "html.parser")
    data_script = soup.find("script", id="__NEXT_DATA__")
    if not data_script:
        print(f"No data found for {username}.")
        return None
    data = json.loads(data_script.string)
    return data

def parse_stories(stories):
    parsed_stories = []
    for story in stories:
        snap_id = story.get('snapId', {}).get('value', '')
        snap_url = story.get('snapUrls', {}).get('mediaUrl', '')
        timestamp = story.get('timestampInSec', {}).get('value', '')
        if snap_url and timestamp and snap_id:
            parsed_stories.append({
                "media_id": snap_id,
                "url": snap_url,
                "timestamp": timestamp
            })
    return parsed_stories

def get_stories(data):
    try:
        stories = data['props']['pageProps']['story']['snapList']
        return parse_stories(stories)
    except KeyError:
        return []

def get_highlights(data):
    highlights = []
    page_props = data.get('props', {}).get('pageProps', {})
    # Possible keys that might contain highlights
    possible_highlight_keys = ['curatedHighlights', 'savedHighlights', 'highlights']
    for key in possible_highlight_keys:
        highlight_data = page_props.get(key, [])
        if highlight_data:
            highlights.extend(highlight_data)
    return highlights

def get_highlight_stories(data):
	stories = []
	highlights = get_highlights(data)
	for highlight in highlights:
		snap_list = highlight.get('snapList', [])

		for snap in snap_list:
			timestamp = snap.get('timestampInSec', {}).get('value', '')
			snap_url = snap.get('snapUrls', {}).get('mediaUrl', '')
			stories.append({
				"media_id": snap.get('snapId', {}).get('value', ''),
				"url": snap_url,
				"timestamp": timestamp
			})
			
	return stories

def get_existing_media_ids(directory):
	# get all files and their their base filename without extension, split the filename by ~ and get the 3rd element
	existing_media_ids = set()
	for root, _, files in os.walk(directory):
		for file in files:
			if '~' not in file:
				continue
			
			filename, _ = os.path.splitext(file)
			media_id = filename.split('~')[2]
			existing_media_ids.add(media_id)
	return existing_media_ids

def main():
	directory = "snapchat"
	if not os.path.exists(directory):
		os.makedirs(directory)

	usernames = [
		'aleximarianna', 'little.warren1', 'neiima22', 'awesome.nads', 'noordabash',
		'jaynagirl', 'sierracannon', 'stefaniedra6',
		'ciaoxxw', 'nadia-stone', 'c.aitknight', 'aimeejaiii',
		'leonanaomii', 'ratskelet0n', 
	]
    
	existing_media_ids = get_existing_media_ids(directory)
	
	for username in usernames:
		print(f"Getting stories for {username}...")
		data = get_data(username)
		if not data:
			continue

		print("Getting stories...")
		stories = get_stories(data)

		print("Getting highlights...")
		stories.extend(get_highlight_stories(data))

		for story in stories:
			media_id = story['media_id']
			url = story['url']
			timestamp = story['timestamp']

			# Check if media already exists
			if media_id in existing_media_ids:
				print(f"Media {media_id} already exists. Skipping download.")
				continue

			# Determine file extension using HEAD request
			response = requests.head(url)
			if response.status_code != 200:
				print(f"Failed to access media {media_id}")
				continue

			content_type = response.headers.get('Content-Type', '')
			if 'image' in content_type:
				extension = '.jpg'
			elif 'video' in content_type:
				extension = '.mp4'
			else:
				print(f"Unknown content type for media {media_id}")
				continue

			if media_id:
				filename = f"{username}~{timestamp}~{media_id}{extension}"
				filepath = os.path.join(directory, filename)
			else:
				media_url_filename = url.split('/')[-1].split('?')[0]
				etag = response.headers.get('ETag', '').replace('"', '')
				filename = f"{username}~{timestamp}-{media_url_filename}~{etag}{extension}"
				filepath = os.path.join(directory, 'highlights', filename)
				
			# Check if file already exists
			if os.path.exists(filepath):
				print(f"File {filename} already exists. Skipping download.")
				continue

			# Download the media
			response = requests.get(url, stream=True)
			if response.status_code != 200:
				print(f"Failed to download media {media_id}")
				continue

			# Save the file
			with open(filepath, 'wb') as f:
				for chunk in response.iter_content(chunk_size=1024):
					if chunk:
						f.write(chunk)

			print(f"Downloaded {filename} at {timestamp}")

if __name__ == "__main__":
    main()
last update 11 months ago			`import os`
			`import requests`
			`import json`
			`from bs4 import BeautifulSoup`

			`def get_data(username):`
			`url = f"https://www.snapchat.com/add/{username}"`
			`headers = {`
			`"user-agent": ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) "`
			`"AppleWebKit/537.36 (KHTML, like Gecko) "`
			`"Chrome/129.0.0.0 Safari/537.36")`
			`}`
			`response = requests.get(url, headers=headers)`
			`soup = BeautifulSoup(response.text, "html.parser")`
			`data_script = soup.find("script", id="__NEXT_DATA__")`
			`if not data_script:`
			`print(f"No data found for {username}.")`
			`return None`
			`data = json.loads(data_script.string)`
			`return data`

			`def parse_stories(stories):`
			`parsed_stories = []`
			`for story in stories:`
			`snap_id = story.get('snapId', {}).get('value', '')`
			`snap_url = story.get('snapUrls', {}).get('mediaUrl', '')`
			`timestamp = story.get('timestampInSec', {}).get('value', '')`
			`if snap_url and timestamp and snap_id:`
			`parsed_stories.append({`
			`"media_id": snap_id,`
			`"url": snap_url,`
			`"timestamp": timestamp`
			`})`
			`return parsed_stories`

			`def get_stories(data):`
			`try:`
			`stories = data['props']['pageProps']['story']['snapList']`
			`return parse_stories(stories)`
			`except KeyError:`
			`return []`

			`def get_highlights(data):`
			`highlights = []`
			`page_props = data.get('props', {}).get('pageProps', {})`
			`# Possible keys that might contain highlights`
			`possible_highlight_keys = ['curatedHighlights', 'savedHighlights', 'highlights']`
			`for key in possible_highlight_keys:`
			`highlight_data = page_props.get(key, [])`
			`if highlight_data:`
			`highlights.extend(highlight_data)`
			`return highlights`

			`def get_highlight_stories(data):`
			`stories = []`
			`highlights = get_highlights(data)`
			`for highlight in highlights:`
			`snap_list = highlight.get('snapList', [])`

			`for snap in snap_list:`
			`timestamp = snap.get('timestampInSec', {}).get('value', '')`
			`snap_url = snap.get('snapUrls', {}).get('mediaUrl', '')`
			`stories.append({`
			`"media_id": snap.get('snapId', {}).get('value', ''),`
			`"url": snap_url,`
			`"timestamp": timestamp`
			`})`

			`return stories`

			`def get_existing_media_ids(directory):`
			`# get all files and their their base filename without extension, split the filename by ~ and get the 3rd element`
			`existing_media_ids = set()`
			`for root, _, files in os.walk(directory):`
			`for file in files:`
			`if '~' not in file:`
			`continue`

			`filename, _ = os.path.splitext(file)`
			`media_id = filename.split('~')[2]`
			`existing_media_ids.add(media_id)`
			`return existing_media_ids`

			`def main():`
			`directory = "snapchat"`
			`if not os.path.exists(directory):`
			`os.makedirs(directory)`

			`usernames = [`
			`'aleximarianna', 'little.warren1', 'neiima22', 'awesome.nads', 'noordabash',`
			`'jaynagirl', 'sierracannon', 'stefaniedra6',`
			`'ciaoxxw', 'nadia-stone', 'c.aitknight', 'aimeejaiii',`
			`'leonanaomii', 'ratskelet0n',`
			`]`

			`existing_media_ids = get_existing_media_ids(directory)`

			`for username in usernames:`
			`print(f"Getting stories for {username}...")`
			`data = get_data(username)`
			`if not data:`
			`continue`

			`print("Getting stories...")`
			`stories = get_stories(data)`

			`print("Getting highlights...")`
			`stories.extend(get_highlight_stories(data))`

			`for story in stories:`
			`media_id = story['media_id']`
			`url = story['url']`
			`timestamp = story['timestamp']`

			`# Check if media already exists`
			`if media_id in existing_media_ids:`
			`print(f"Media {media_id} already exists. Skipping download.")`
			`continue`

			`# Determine file extension using HEAD request`
			`response = requests.head(url)`
			`if response.status_code != 200:`
			`print(f"Failed to access media {media_id}")`
			`continue`

			`content_type = response.headers.get('Content-Type', '')`
			`if 'image' in content_type:`
			`extension = '.jpg'`
			`elif 'video' in content_type:`
			`extension = '.mp4'`
			`else:`
			`print(f"Unknown content type for media {media_id}")`
			`continue`

			`if media_id:`
			`filename = f"{username}~{timestamp}~{media_id}{extension}"`
			`filepath = os.path.join(directory, filename)`
			`else:`
			`media_url_filename = url.split('/')[-1].split('?')[0]`
			`etag = response.headers.get('ETag', '').replace('"', '')`
			`filename = f"{username}~{timestamp}-{media_url_filename}~{etag}{extension}"`
			`filepath = os.path.join(directory, 'highlights', filename)`

			`# Check if file already exists`
			`if os.path.exists(filepath):`
			`print(f"File {filename} already exists. Skipping download.")`
			`continue`

			`# Download the media`
			`response = requests.get(url, stream=True)`
			`if response.status_code != 200:`
			`print(f"Failed to download media {media_id}")`
			`continue`

			`# Save the file`
			`with open(filepath, 'wb') as f:`
			`for chunk in response.iter_content(chunk_size=1024):`
			`if chunk:`
			`f.write(chunk)`

			`print(f"Downloaded {filename} at {timestamp}")`

			`if __name__ == "__main__":`
			`main()`