Altpins-Instagram/dump_instagram.py

from BunnyCDN.Storage import Storage
from datetime import datetime
import os, config, funcs, cv2
from PIL import Image


def UploadMedia(media):
    media_id = media['media_id']
    username = media['username']
    timestamp = media['timestamp']
    user_id = media['user_id']
    filepath = media['filepath']

    filename = os.path.basename(filepath)
    file_extension = os.path.splitext(filename)[1].lower()

    media_type = funcs.get_media_type(filename)

    post_type = funcs.determine_post_type(filepath, media_type)

    file_hash = funcs.calculate_file_hash(filepath)

    duration = funcs.get_video_duration(filepath) if media_type == 'video' else 0

    post_date = datetime.fromtimestamp(int(timestamp)) if timestamp else datetime.now()

    width, height = funcs.get_video_dimensions(filepath) if media_type == 'video' else Image.open(filepath).size

    thumbnail_url = None
    if media_type == 'video':
        try:
            thumbPath = f'temp/{media_id}.jpg'
            cap = cv2.VideoCapture(filepath)
            ret, frame = cap.read()
            cv2.imwrite(thumbPath, frame)
            cap.release()
            obj_storage.PutFile(thumbPath, f'thumbnails/{media_id}.jpg')
            thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{media_id}.jpg"
        except:
            print('Error generating thumbnail. Skipping...')
            return False

    server_path = f'media/{post_type}/{username}/{media_id}{file_extension}'

    file_url = f"https://storysave.b-cdn.net/{server_path}"

    if user_id and 'highlight' in user_id:
        highlight_id = user_id.replace('highlight', '')
        user_id = None

        try:
            newCursor.execute("SELECT user_id FROM media WHERE username=%s", (username,))
            user_id = newCursor.fetchall()[0][0]
        except:
            print(f'User {username} not found in database. Skipping...')
            user_id = None

        newCursor.execute("INSERT IGNORE INTO highlights (highlight_id, user_id, media_id) VALUES (%s, %s, %s)", (highlight_id, user_id, media_id))
        newDB.commit()

        print(f'[{newCursor.rowcount}] added highlight {highlight_id} to user {user_id}')

    obj_storage.PutFile(filepath, server_path)

    query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, media_id, post_type, date, user_id, hash, filename, duration, thumbnail) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
    values = (username, media_type, file_url, width, height, media_id, post_type, post_date, user_id, file_hash, filename, duration, thumbnail_url)

    newCursor.execute(query, values)
    newDB.commit()
    print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}')

    os.remove(filepath)

    return True

def getMedias(folder_path):
    medias = []
    for filename in os.listdir(folder_path):
        parts = filename.split('~')
        if len(parts) < 4:
            continue

        username = parts[0]
        timestamp = parts[1]
        media_id = parts[2]
        user_id = parts[3].split('_')[-1].split('.')[0]

        filepath = os.path.join(folder_path, filename)

        if not media_id:
            print(f'Invalid media_id for file {filename}. Skipping...')
            continue

        try:media_id = int(media_id)
        except:
            print(f'Invalid media_id for file {filename}. Skipping...')
            continue

        data = {
            'username': username,
            'timestamp': timestamp,
            'media_id': media_id,
            'user_id': user_id,
            'filepath': filepath
        }
        medias.append(data)
    return medias

def dump_instagram(folder_path):
    medias = getMedias(folder_path)

    for media in medias:
        if media['media_id'] in existing_files:
            print('Duplicate file detected. Removing...')
            os.remove(media['filepath'])

    for media in medias:
        UploadMedia(media)


if __name__ == '__main__':
    print('Starting processing...')

    newDB, newCursor = config.gen_connection()

    obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')

    newCursor.execute("SELECT media_id FROM media WHERE platform='instagram' AND media_id IS NOT NULL")
    existing_files = [image[0] for image in newCursor.fetchall()]

    dump_instagram('storysaver/')

    print("Processing completed.")