You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
164 lines
4.9 KiB
Python
164 lines
4.9 KiB
Python
|
11 months ago
|
import os
|
||
|
|
import requests
|
||
|
|
import json
|
||
|
|
from bs4 import BeautifulSoup
|
||
|
|
|
||
|
|
def get_data(username):
|
||
|
|
url = f"https://www.snapchat.com/add/{username}"
|
||
|
|
headers = {
|
||
|
|
"user-agent": ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||
|
|
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||
|
|
"Chrome/129.0.0.0 Safari/537.36")
|
||
|
|
}
|
||
|
|
response = requests.get(url, headers=headers)
|
||
|
|
soup = BeautifulSoup(response.text, "html.parser")
|
||
|
|
data_script = soup.find("script", id="__NEXT_DATA__")
|
||
|
|
if not data_script:
|
||
|
|
print(f"No data found for {username}.")
|
||
|
|
return None
|
||
|
|
data = json.loads(data_script.string)
|
||
|
|
return data
|
||
|
|
|
||
|
|
def parse_stories(stories):
|
||
|
|
parsed_stories = []
|
||
|
|
for story in stories:
|
||
|
|
snap_id = story.get('snapId', {}).get('value', '')
|
||
|
|
snap_url = story.get('snapUrls', {}).get('mediaUrl', '')
|
||
|
|
timestamp = story.get('timestampInSec', {}).get('value', '')
|
||
|
|
if snap_url and timestamp and snap_id:
|
||
|
|
parsed_stories.append({
|
||
|
|
"media_id": snap_id,
|
||
|
|
"url": snap_url,
|
||
|
|
"timestamp": timestamp
|
||
|
|
})
|
||
|
|
return parsed_stories
|
||
|
|
|
||
|
|
def get_stories(data):
|
||
|
|
try:
|
||
|
|
stories = data['props']['pageProps']['story']['snapList']
|
||
|
|
return parse_stories(stories)
|
||
|
|
except KeyError:
|
||
|
|
return []
|
||
|
|
|
||
|
|
def get_highlights(data):
|
||
|
|
highlights = []
|
||
|
|
page_props = data.get('props', {}).get('pageProps', {})
|
||
|
|
# Possible keys that might contain highlights
|
||
|
|
possible_highlight_keys = ['curatedHighlights', 'savedHighlights', 'highlights']
|
||
|
|
for key in possible_highlight_keys:
|
||
|
|
highlight_data = page_props.get(key, [])
|
||
|
|
if highlight_data:
|
||
|
|
highlights.extend(highlight_data)
|
||
|
|
return highlights
|
||
|
|
|
||
|
|
def get_highlight_stories(data):
|
||
|
|
stories = []
|
||
|
|
highlights = get_highlights(data)
|
||
|
|
for highlight in highlights:
|
||
|
|
snap_list = highlight.get('snapList', [])
|
||
|
|
|
||
|
|
for snap in snap_list:
|
||
|
|
timestamp = snap.get('timestampInSec', {}).get('value', '')
|
||
|
|
snap_url = snap.get('snapUrls', {}).get('mediaUrl', '')
|
||
|
|
stories.append({
|
||
|
|
"media_id": snap.get('snapId', {}).get('value', ''),
|
||
|
|
"url": snap_url,
|
||
|
|
"timestamp": timestamp
|
||
|
|
})
|
||
|
|
|
||
|
|
return stories
|
||
|
|
|
||
|
|
def get_existing_media_ids(directory):
|
||
|
|
# get all files and their their base filename without extension, split the filename by ~ and get the 3rd element
|
||
|
|
existing_media_ids = set()
|
||
|
|
for root, _, files in os.walk(directory):
|
||
|
|
for file in files:
|
||
|
|
if '~' not in file:
|
||
|
|
continue
|
||
|
|
|
||
|
|
filename, _ = os.path.splitext(file)
|
||
|
|
media_id = filename.split('~')[2]
|
||
|
|
existing_media_ids.add(media_id)
|
||
|
|
return existing_media_ids
|
||
|
|
|
||
|
|
def main():
|
||
|
|
directory = "snapchat"
|
||
|
|
if not os.path.exists(directory):
|
||
|
|
os.makedirs(directory)
|
||
|
|
|
||
|
|
usernames = [
|
||
|
|
'aleximarianna', 'little.warren1', 'neiima22', 'awesome.nads', 'noordabash',
|
||
|
|
'jaynagirl', 'sierracannon', 'stefaniedra6',
|
||
|
|
'ciaoxxw', 'nadia-stone', 'c.aitknight', 'aimeejaiii',
|
||
|
|
'leonanaomii', 'ratskelet0n',
|
||
|
|
]
|
||
|
|
|
||
|
|
existing_media_ids = get_existing_media_ids(directory)
|
||
|
|
|
||
|
|
for username in usernames:
|
||
|
|
print(f"Getting stories for {username}...")
|
||
|
|
data = get_data(username)
|
||
|
|
if not data:
|
||
|
|
continue
|
||
|
|
|
||
|
|
print("Getting stories...")
|
||
|
|
stories = get_stories(data)
|
||
|
|
|
||
|
|
print("Getting highlights...")
|
||
|
|
stories.extend(get_highlight_stories(data))
|
||
|
|
|
||
|
|
for story in stories:
|
||
|
|
media_id = story['media_id']
|
||
|
|
url = story['url']
|
||
|
|
timestamp = story['timestamp']
|
||
|
|
|
||
|
|
# Check if media already exists
|
||
|
|
if media_id in existing_media_ids:
|
||
|
|
print(f"Media {media_id} already exists. Skipping download.")
|
||
|
|
continue
|
||
|
|
|
||
|
|
# Determine file extension using HEAD request
|
||
|
|
response = requests.head(url)
|
||
|
|
if response.status_code != 200:
|
||
|
|
print(f"Failed to access media {media_id}")
|
||
|
|
continue
|
||
|
|
|
||
|
|
content_type = response.headers.get('Content-Type', '')
|
||
|
|
if 'image' in content_type:
|
||
|
|
extension = '.jpg'
|
||
|
|
elif 'video' in content_type:
|
||
|
|
extension = '.mp4'
|
||
|
|
else:
|
||
|
|
print(f"Unknown content type for media {media_id}")
|
||
|
|
continue
|
||
|
|
|
||
|
|
if media_id:
|
||
|
|
filename = f"{username}~{timestamp}~{media_id}{extension}"
|
||
|
|
filepath = os.path.join(directory, filename)
|
||
|
|
else:
|
||
|
|
media_url_filename = url.split('/')[-1].split('?')[0]
|
||
|
|
etag = response.headers.get('ETag', '').replace('"', '')
|
||
|
|
filename = f"{username}~{timestamp}-{media_url_filename}~{etag}{extension}"
|
||
|
|
filepath = os.path.join(directory, 'highlights', filename)
|
||
|
|
|
||
|
|
# Check if file already exists
|
||
|
|
if os.path.exists(filepath):
|
||
|
|
print(f"File {filename} already exists. Skipping download.")
|
||
|
|
continue
|
||
|
|
|
||
|
|
# Download the media
|
||
|
|
response = requests.get(url, stream=True)
|
||
|
|
if response.status_code != 200:
|
||
|
|
print(f"Failed to download media {media_id}")
|
||
|
|
continue
|
||
|
|
|
||
|
|
# Save the file
|
||
|
|
with open(filepath, 'wb') as f:
|
||
|
|
for chunk in response.iter_content(chunk_size=1024):
|
||
|
|
if chunk:
|
||
|
|
f.write(chunk)
|
||
|
|
|
||
|
|
print(f"Downloaded {filename} at {timestamp}")
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
main()
|