You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

164 lines
4.9 KiB
Python

11 months ago
import os
import requests
import json
from bs4 import BeautifulSoup
def get_data(username):
url = f"https://www.snapchat.com/add/{username}"
headers = {
"user-agent": ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/129.0.0.0 Safari/537.36")
}
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, "html.parser")
data_script = soup.find("script", id="__NEXT_DATA__")
if not data_script:
print(f"No data found for {username}.")
return None
data = json.loads(data_script.string)
return data
def parse_stories(stories):
parsed_stories = []
for story in stories:
snap_id = story.get('snapId', {}).get('value', '')
snap_url = story.get('snapUrls', {}).get('mediaUrl', '')
timestamp = story.get('timestampInSec', {}).get('value', '')
if snap_url and timestamp and snap_id:
parsed_stories.append({
"media_id": snap_id,
"url": snap_url,
"timestamp": timestamp
})
return parsed_stories
def get_stories(data):
try:
stories = data['props']['pageProps']['story']['snapList']
return parse_stories(stories)
except KeyError:
return []
def get_highlights(data):
highlights = []
page_props = data.get('props', {}).get('pageProps', {})
# Possible keys that might contain highlights
possible_highlight_keys = ['curatedHighlights', 'savedHighlights', 'highlights']
for key in possible_highlight_keys:
highlight_data = page_props.get(key, [])
if highlight_data:
highlights.extend(highlight_data)
return highlights
def get_highlight_stories(data):
stories = []
highlights = get_highlights(data)
for highlight in highlights:
snap_list = highlight.get('snapList', [])
for snap in snap_list:
timestamp = snap.get('timestampInSec', {}).get('value', '')
snap_url = snap.get('snapUrls', {}).get('mediaUrl', '')
stories.append({
"media_id": snap.get('snapId', {}).get('value', ''),
"url": snap_url,
"timestamp": timestamp
})
return stories
def get_existing_media_ids(directory):
# get all files and their their base filename without extension, split the filename by ~ and get the 3rd element
existing_media_ids = set()
for root, _, files in os.walk(directory):
for file in files:
if '~' not in file:
continue
filename, _ = os.path.splitext(file)
media_id = filename.split('~')[2]
existing_media_ids.add(media_id)
return existing_media_ids
def main():
directory = "snapchat"
if not os.path.exists(directory):
os.makedirs(directory)
usernames = [
'aleximarianna', 'little.warren1', 'neiima22', 'awesome.nads', 'noordabash',
'jaynagirl', 'sierracannon', 'stefaniedra6',
'ciaoxxw', 'nadia-stone', 'c.aitknight', 'aimeejaiii',
'leonanaomii', 'ratskelet0n',
]
existing_media_ids = get_existing_media_ids(directory)
for username in usernames:
print(f"Getting stories for {username}...")
data = get_data(username)
if not data:
continue
print("Getting stories...")
stories = get_stories(data)
print("Getting highlights...")
stories.extend(get_highlight_stories(data))
for story in stories:
media_id = story['media_id']
url = story['url']
timestamp = story['timestamp']
# Check if media already exists
if media_id in existing_media_ids:
print(f"Media {media_id} already exists. Skipping download.")
continue
# Determine file extension using HEAD request
response = requests.head(url)
if response.status_code != 200:
print(f"Failed to access media {media_id}")
continue
content_type = response.headers.get('Content-Type', '')
if 'image' in content_type:
extension = '.jpg'
elif 'video' in content_type:
extension = '.mp4'
else:
print(f"Unknown content type for media {media_id}")
continue
if media_id:
filename = f"{username}~{timestamp}~{media_id}{extension}"
filepath = os.path.join(directory, filename)
else:
media_url_filename = url.split('/')[-1].split('?')[0]
etag = response.headers.get('ETag', '').replace('"', '')
filename = f"{username}~{timestamp}-{media_url_filename}~{etag}{extension}"
filepath = os.path.join(directory, 'highlights', filename)
# Check if file already exists
if os.path.exists(filepath):
print(f"File {filename} already exists. Skipping download.")
continue
# Download the media
response = requests.get(url, stream=True)
if response.status_code != 200:
print(f"Failed to download media {media_id}")
continue
# Save the file
with open(filepath, 'wb') as f:
for chunk in response.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
print(f"Downloaded {filename} at {timestamp}")
if __name__ == "__main__":
main()