Altpins-Instagram/snapchat_backer.py

import os
import json
from tqdm import tqdm

from funcs import get_files
from snapchat import get_stories, get_highlights, get_spotlight_metadata, get_username

# import config as altpinsConfig
import altpinsConfig

def get_data(filepath):
    try:
        with open(filepath, 'r', encoding='utf-8') as f:
            return json.load(f)
    except:
        print(f"Error reading {filepath}")
        return None

def process_story(story, username, story_type, db, cursor):
    snap_urls = story.get('snapUrls', {})
    media_url = snap_urls.get('mediaUrl', '').split('?')[0]
    media_id = media_url.split('/')[-1].split('.')[0].split('?')[-1]

    if media_id in existing_media_ids:
        return False

    media_url = f"https://cf-st.sc-cdn.net/d/{media_url.split('/')[-1]}"

    media_preview_url = snap_urls.get('mediaPreviewUrl', '').get('value', '').split('?')[0]
    media_preview_url = f"https://cf-st.sc-cdn.net/d/{media_preview_url.split('/')[-1]}"


    timestamp = story.get('timestampInSec', {}).get('value', '')
    media_type = story.get('snapMediaType')
    snap_id = story.get('snapId', {}).get('value', '')


    query = "INSERT IGNORE INTO snapchat_stories (snapId, mediaUrl, mediaPreviewUrl, timestampInSec, snapMediaType, storyType, username, media_id) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)"
    cursor.execute(query, (snap_id, media_url, media_preview_url, timestamp, media_type, story_type, username, media_id))
    db.commit()

    existing_media_ids.add(media_id)

    print_emoji = '✅' if cursor.rowcount else '❌'
    print(f"{print_emoji} Inserted story {media_id}")

def process_json(json_path, db, cursor):
    """
    Given a path to a JSON file, parse it and insert relevant data
    into the database.
    """

    # Load JSON data
    data = get_data(json_path)
    username = get_username(data)

    ready_stories = []

    # Insert stories (regular)
    stories = get_stories(data)
    for story in stories:
        story['storyType'] = 'story'
        ready_stories.append(story)

    # Insert stories (highlights)
    highlights = get_highlights(data)
    highlight_stories = [story for highlight in highlights for story in highlight.get('snapList', [])]
    highlight_stories.sort(key=lambda x: x.get('snapIndex'), reverse=True)
    for story in highlight_stories:
        story['storyType'] = 'highlight'
        ready_stories.append(story)


    for story in ready_stories:
        story_type = story.get('storyType')
        process_story(story, username, story_type, db, cursor)


    # Insert spotlight metadata
    spotlight_metadata = get_spotlight_metadata(data)
    for story in spotlight_metadata:
        try:
            media_id = story['videoMetadata']['contentUrl'].split('/')[-1].split('.')[0].split('?')[-1]
            deepLinkUrl = story['oneLinkParams']['deepLinkUrl'].split('?')[0]
        except:
            continue

        if not all((media_id, deepLinkUrl)):
            continue

        if deepLinkUrl in existing_spotlights:
            continue

        deepLinkId = deepLinkUrl.split('/')[-1]
        description = story['description']

        insert_query = "INSERT IGNORE INTO snapchat_metadata (media_id, deepLinkUrl, description, username, deepLinkId) VALUES (%s, %s, %s, %s, %s)"
        cursor.execute(insert_query, (media_id, deepLinkUrl, description, username, deepLinkId))
        db.commit()

        existing_spotlights.add(deepLinkUrl)

        print_emoji = '✅' if cursor.rowcount else '❌'
        print(f"{print_emoji} Inserted spotlight {media_id}")

    os.remove(json_path)


db, cursor = altpinsConfig.gen_connection()

existing_media_ids = []
cursor.execute("SELECT media_id FROM snapchat_stories WHERE media_id != '';")
existing_media_ids = {row[0] for row in cursor.fetchall()}

existing_spotlights = []
cursor.execute("SELECT deepLinkUrl FROM snapchat_metadata;")
existing_spotlights = {row[0] for row in cursor.fetchall()}

data_dir = 'data'
files = [f for f in get_files(data_dir) if f.endswith('.json')]

# Wrap the file list with tqdm to show a progress bar
for filepath in tqdm(files, desc="Processing files", unit="file"):
    process_json(filepath, db, cursor)

db.close()