You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
126 lines
4.1 KiB
Python
126 lines
4.1 KiB
Python
import os
|
|
import json
|
|
from tqdm import tqdm
|
|
|
|
from funcs import get_files
|
|
from snapchat import get_stories, get_highlights, get_spotlight_metadata, get_username
|
|
|
|
# import config as altpinsConfig
|
|
import altpinsConfig
|
|
|
|
def get_data(filepath):
|
|
try:
|
|
with open(filepath, 'r', encoding='utf-8') as f:
|
|
return json.load(f)
|
|
except:
|
|
print(f"Error reading {filepath}")
|
|
return None
|
|
|
|
def process_story(story, username, story_type, db, cursor):
|
|
snap_urls = story.get('snapUrls', {})
|
|
media_url = snap_urls.get('mediaUrl', '').split('?')[0]
|
|
media_id = media_url.split('/')[-1].split('.')[0].split('?')[-1]
|
|
|
|
if media_id in existing_media_ids:
|
|
return False
|
|
|
|
media_url = f"https://cf-st.sc-cdn.net/d/{media_url.split('/')[-1]}"
|
|
|
|
media_preview_url = snap_urls.get('mediaPreviewUrl', '').get('value', '').split('?')[0]
|
|
media_preview_url = f"https://cf-st.sc-cdn.net/d/{media_preview_url.split('/')[-1]}"
|
|
|
|
|
|
timestamp = story.get('timestampInSec', {}).get('value', '')
|
|
media_type = story.get('snapMediaType')
|
|
snap_id = story.get('snapId', {}).get('value', '')
|
|
|
|
|
|
query = "INSERT IGNORE INTO snapchat_stories (snapId, mediaUrl, mediaPreviewUrl, timestampInSec, snapMediaType, storyType, username, media_id) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)"
|
|
cursor.execute(query, (snap_id, media_url, media_preview_url, timestamp, media_type, story_type, username, media_id))
|
|
db.commit()
|
|
|
|
existing_media_ids.add(media_id)
|
|
|
|
print_emoji = '✅' if cursor.rowcount else '❌'
|
|
print(f"{print_emoji} Inserted story {media_id}")
|
|
|
|
def process_json(json_path, db, cursor):
|
|
"""
|
|
Given a path to a JSON file, parse it and insert relevant data
|
|
into the database.
|
|
"""
|
|
|
|
# Load JSON data
|
|
data = get_data(json_path)
|
|
username = get_username(data)
|
|
|
|
ready_stories = []
|
|
|
|
# Insert stories (regular)
|
|
stories = get_stories(data)
|
|
for story in stories:
|
|
story['storyType'] = 'story'
|
|
ready_stories.append(story)
|
|
|
|
# Insert stories (highlights)
|
|
highlights = get_highlights(data)
|
|
highlight_stories = [story for highlight in highlights for story in highlight.get('snapList', [])]
|
|
highlight_stories.sort(key=lambda x: x.get('snapIndex'), reverse=True)
|
|
for story in highlight_stories:
|
|
story['storyType'] = 'highlight'
|
|
ready_stories.append(story)
|
|
|
|
|
|
for story in ready_stories:
|
|
story_type = story.get('storyType')
|
|
process_story(story, username, story_type, db, cursor)
|
|
|
|
|
|
# Insert spotlight metadata
|
|
spotlight_metadata = get_spotlight_metadata(data)
|
|
for story in spotlight_metadata:
|
|
try:
|
|
media_id = story['videoMetadata']['contentUrl'].split('/')[-1].split('.')[0].split('?')[-1]
|
|
deepLinkUrl = story['oneLinkParams']['deepLinkUrl'].split('?')[0]
|
|
except:
|
|
continue
|
|
|
|
if not all((media_id, deepLinkUrl)):
|
|
continue
|
|
|
|
if deepLinkUrl in existing_spotlights:
|
|
continue
|
|
|
|
deepLinkId = deepLinkUrl.split('/')[-1]
|
|
description = story['description']
|
|
|
|
insert_query = "INSERT IGNORE INTO snapchat_metadata (media_id, deepLinkUrl, description, username, deepLinkId) VALUES (%s, %s, %s, %s, %s)"
|
|
cursor.execute(insert_query, (media_id, deepLinkUrl, description, username, deepLinkId))
|
|
db.commit()
|
|
|
|
existing_spotlights.add(deepLinkUrl)
|
|
|
|
print_emoji = '✅' if cursor.rowcount else '❌'
|
|
print(f"{print_emoji} Inserted spotlight {media_id}")
|
|
|
|
os.remove(json_path)
|
|
|
|
|
|
db, cursor = altpinsConfig.gen_connection()
|
|
|
|
existing_media_ids = []
|
|
cursor.execute("SELECT media_id FROM snapchat_stories WHERE media_id != '';")
|
|
existing_media_ids = {row[0] for row in cursor.fetchall()}
|
|
|
|
existing_spotlights = []
|
|
cursor.execute("SELECT deepLinkUrl FROM snapchat_metadata;")
|
|
existing_spotlights = {row[0] for row in cursor.fetchall()}
|
|
|
|
data_dir = 'data'
|
|
files = [f for f in get_files(data_dir) if f.endswith('.json')]
|
|
|
|
# Wrap the file list with tqdm to show a progress bar
|
|
for filepath in tqdm(files, desc="Processing files", unit="file"):
|
|
process_json(filepath, db, cursor)
|
|
|
|
db.close() |