You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

126 lines
4.1 KiB
Python

import os
import json
from tqdm import tqdm
from funcs import get_files
from snapchat import get_stories, get_highlights, get_spotlight_metadata, get_username
# import config as altpinsConfig
import altpinsConfig
def get_data(filepath):
try:
with open(filepath, 'r', encoding='utf-8') as f:
return json.load(f)
except:
print(f"Error reading {filepath}")
return None
def process_story(story, username, story_type, db, cursor):
snap_urls = story.get('snapUrls', {})
media_url = snap_urls.get('mediaUrl', '').split('?')[0]
media_id = media_url.split('/')[-1].split('.')[0].split('?')[-1]
if media_id in existing_media_ids:
return False
media_url = f"https://cf-st.sc-cdn.net/d/{media_url.split('/')[-1]}"
media_preview_url = snap_urls.get('mediaPreviewUrl', '').get('value', '').split('?')[0]
media_preview_url = f"https://cf-st.sc-cdn.net/d/{media_preview_url.split('/')[-1]}"
timestamp = story.get('timestampInSec', {}).get('value', '')
media_type = story.get('snapMediaType')
snap_id = story.get('snapId', {}).get('value', '')
query = "INSERT IGNORE INTO snapchat_stories (snapId, mediaUrl, mediaPreviewUrl, timestampInSec, snapMediaType, storyType, username, media_id) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)"
cursor.execute(query, (snap_id, media_url, media_preview_url, timestamp, media_type, story_type, username, media_id))
db.commit()
existing_media_ids.add(media_id)
print_emoji = '' if cursor.rowcount else ''
print(f"{print_emoji} Inserted story {media_id}")
def process_json(json_path, db, cursor):
"""
Given a path to a JSON file, parse it and insert relevant data
into the database.
"""
# Load JSON data
data = get_data(json_path)
username = get_username(data)
ready_stories = []
# Insert stories (regular)
stories = get_stories(data)
for story in stories:
story['storyType'] = 'story'
ready_stories.append(story)
# Insert stories (highlights)
highlights = get_highlights(data)
highlight_stories = [story for highlight in highlights for story in highlight.get('snapList', [])]
highlight_stories.sort(key=lambda x: x.get('snapIndex'), reverse=True)
for story in highlight_stories:
story['storyType'] = 'highlight'
ready_stories.append(story)
for story in ready_stories:
story_type = story.get('storyType')
process_story(story, username, story_type, db, cursor)
# Insert spotlight metadata
spotlight_metadata = get_spotlight_metadata(data)
for story in spotlight_metadata:
try:
media_id = story['videoMetadata']['contentUrl'].split('/')[-1].split('.')[0].split('?')[-1]
deepLinkUrl = story['oneLinkParams']['deepLinkUrl'].split('?')[0]
except:
continue
if not all((media_id, deepLinkUrl)):
continue
if deepLinkUrl in existing_spotlights:
continue
deepLinkId = deepLinkUrl.split('/')[-1]
description = story['description']
insert_query = "INSERT IGNORE INTO snapchat_metadata (media_id, deepLinkUrl, description, username, deepLinkId) VALUES (%s, %s, %s, %s, %s)"
cursor.execute(insert_query, (media_id, deepLinkUrl, description, username, deepLinkId))
db.commit()
existing_spotlights.add(deepLinkUrl)
print_emoji = '' if cursor.rowcount else ''
print(f"{print_emoji} Inserted spotlight {media_id}")
os.remove(json_path)
db, cursor = altpinsConfig.gen_connection()
existing_media_ids = []
cursor.execute("SELECT media_id FROM snapchat_stories WHERE media_id != '';")
existing_media_ids = {row[0] for row in cursor.fetchall()}
existing_spotlights = []
cursor.execute("SELECT deepLinkUrl FROM snapchat_metadata;")
existing_spotlights = {row[0] for row in cursor.fetchall()}
data_dir = 'data'
files = [f for f in get_files(data_dir) if f.endswith('.json')]
# Wrap the file list with tqdm to show a progress bar
for filepath in tqdm(files, desc="Processing files", unit="file"):
process_json(filepath, db, cursor)
db.close()