diff --git a/.DS_Store b/.DS_Store index f06dbc8..839bb6a 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/.gitignore b/.gitignore index f893aa7..44d79ae 100644 --- a/.gitignore +++ b/.gitignore @@ -32,3 +32,4 @@ uploadlater snapchat.json /add_to_liked /.profiles +/.vscode diff --git a/config.py b/config.py index 1562947..ef6b21d 100644 --- a/config.py +++ b/config.py @@ -1,17 +1,3 @@ -import os - -MEDIA_DIRECTORY = "media" -SNAPCHAT_DIRECTORY = "snapchat" -INSTAGRAM_DIRECTORY = "instagram" - -@property -def get_instagram_directory(): - return os.path.join(MEDIA_DIRECTORY, INSTAGRAM_DIRECTORY) - -@property -def snapchat_output_dir(): - return os.path.join(MEDIA_DIRECTORY, SNAPCHAT_DIRECTORY) - username = "doadmin" password = "AVNS_2qeFJuiGRpBQXkJjlA6" host = "storysave-do-user-13308724-0.c.db.ondigitalocean.com" diff --git a/db_normalizer.py b/db_normalizer.py index 71f9d76..acc4452 100644 --- a/db_normalizer.py +++ b/db_normalizer.py @@ -1,5 +1,5 @@ import os -from funcs import calculate_file_hash, get_media_dimensions, get_media_type, generate_phash +from funcs import calculate_file_hash, get_media_dimensions, generate_phash import config # --- Configuration & Constants --- @@ -54,8 +54,11 @@ def update_dimensions(cursor, db, obj_storage): obj_storage.DownloadFile(storage_path=server_path, download_path=CACHE_DIR) # Optionally, you could get the media type if needed: - media_type = get_media_type(local_file) width, height = get_media_dimensions(local_file) + + if width == 0 or height == 0: + print(f"Error getting dimensions for {media_url}") + continue cursor.execute("UPDATE media SET width = %s, height = %s WHERE id = %s;", (width, height, record_id)) db.commit() @@ -103,6 +106,31 @@ def update_phash(cursor, db, obj_storage): db.commit() print(f"[{idx}/{total}] Processed record {record_id} with pHash: {phash}") +def update_user_ids(cursor, db): + cursor.execute("SELECT DISTINCT username FROM media WHERE user_id IS NULL AND platform = 'instagram';") + usernames = [username[0] for username in cursor.fetchall()] + total = len(usernames) + print(f"Found {total} usernames to process for user_id updating.") + + for idx, username in enumerate(usernames, start=1): + print(f"[{idx}/{total}] Username: {username}") + + cursor.execute("SELECT DISTINCT user_id FROM media WHERE username = %s AND user_id IS NOT NULL;", [username]) + possible_user_ids = [user_id for user_id, in cursor.fetchall()] + + if len(possible_user_ids) == 0: + print(f"No user_id found for {username}") + continue + + if len(possible_user_ids) > 1: + print(f"Multiple user_ids found for {username}: {possible_user_ids}") + continue + + user_id = possible_user_ids[0] + cursor.execute("UPDATE media SET user_id = %s WHERE username = %s AND user_id IS NULL;", [user_id, username]) + db.commit() + print(f"[{idx}/{total}] Updated user_id for {username}, Rows affected: {cursor.rowcount}") + def main(): obj_storage = config.get_storage() db, cursor = config.gen_connection() @@ -111,6 +139,7 @@ def main(): update_dimensions(cursor, db, obj_storage) update_file_size(cursor, db, obj_storage) update_phash(cursor, db, obj_storage) + update_user_ids(cursor, db) if __name__ == '__main__': main() \ No newline at end of file diff --git a/funcs.py b/funcs.py index cad3dbe..3b3c9de 100644 --- a/funcs.py +++ b/funcs.py @@ -107,6 +107,7 @@ def compare_images(image_path1, image_path2): def download_file(url, filePath): try: if os.path.exists(filePath): + print(f"File already exists: {filePath}") return filePath if not url: @@ -198,4 +199,15 @@ def calculate_file_hash(file_path, hash_func='sha256'): while chunk: h.update(chunk) chunk = file.read(8192) - return h.hexdigest() \ No newline at end of file + return h.hexdigest() + +def files_are_identical(file1, file2): + """Compare two files byte-by-byte.""" + with open(file1, "rb") as f1, open(file2, "rb") as f2: + while True: + chunk1 = f1.read(4096) + chunk2 = f2.read(4096) + if chunk1 != chunk2: + return False + if not chunk1: # End of file + return True \ No newline at end of file diff --git a/key.enc b/key.enc deleted file mode 100644 index 4a6e584..0000000 --- a/key.enc +++ /dev/null @@ -1 +0,0 @@ -DH3ucOuYLbJ2Va3lfJPEYQq_6mk_v3R9dnrAYSQHr-Q= \ No newline at end of file diff --git a/kick_downloader.py b/kick_downloader.py deleted file mode 100644 index fffb7aa..0000000 --- a/kick_downloader.py +++ /dev/null @@ -1,229 +0,0 @@ -import requests - -user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36" - -videos_cookie = "USER_LOCALE=en; _cfuvid=XPSOsPnC.GolOMHuCpDhhA9IC3dZs2VjyH6VlvHBrak-1736200174559-0.0.1.1-604800000; Zb8Yq4Gq0BDtyTgD2FKm3EBvdj5TIfT8O4T6L4w7=eyJpdiI6IjA0V3Evbi9MN0xWTm5Qc21LSURucXc9PSIsInZhbHVlIjoidGdqejd6bmQxTGhkWTJQNFFxcGxHNEVUaHJ6SUxFU0Z6RU1QdkVvZE94eWVYRS80VWhZYmNQQ3JENHVSZTFQeWV3RnJPeFZtSVRocEZoT3A0dnRMSHpqY1hYS1lkYU1vaFEvN0treTJpMmpGVXp2TEdDVGpGeW9tcHBqUTRpblh4TGhmNGY2L2dURkRvc2h2czNLK2pDbHluL3loemtlc1NRaVZGUXpreTdtK3E0Ny9xV2N2UStiY3BNNzQ5SWhPSVV2YmMyNHJhTTJwV2k1dERhbTRDKzZwZGtCTTlQelBDZ2gwTzBLRVpqL3Nwa3VOcDdyWHRnMHBQYXRZSXlhaVFuRjFOWGMwa3VhTU9ZSVM5NFJ5NnFyNEZNaVJ6SUszOTJ4cmUvZ3BZU2lqYkZrY0tXVS84dExBdjltelpGdnJ3STRCVFJnQjdsMm0yOW5XUUNmOWwwWFNDbUlEbzQwNjNaK1ZxZnJDWXorN0Q5RVlGSnk4TitvbHBJL2NHZERLclhaNnh6elRKdEhhVk1zeEg0YnVyMC81TGF1bmVQSFJrZFdMaGs2d0dPSysvaTVKdm5TQWNobnFNTU1ITGdLYSIsIm1hYyI6IjQ2MGQ5MTFkYTA5YTg4NTVhNDhmZThiYTk3OThiMzJhYWYyMDA3Y2VmMmEzZWRmOGFkN2E1ZTE1MTA5MjRmNTAiLCJ0YWciOiIifQ==; lZiPtxKwAqyahxufRO6jbpdvC9uRDrCYbgQ9Z4aW=eyJpdiI6IldhU0hiT1RoRkc1d2VWdUJqdFZPdkE9PSIsInZhbHVlIjoiaEhuNGppRnNXZXFuK1FuNlZwUUFlSE04ODZnOFBqeWxZcE9vRm1SQ0Q0WFJXS1hXZWk2WXY4TWRpTVJRVnZOSmIzS0NPTEd4Slk1clVhWkVnc1lQSXVJUVYyamNNbDE2YmNsd2psVUdtNDVhTnBjQTZHL3pnejFrS2k2dWZybkdRUVRnbU5XVnJTYXVQOFlYUjRuMjMwdy9ZUXlFRW9xRUlLNzJ0OGZBTGJXVkdMVlJMYVZybVFER0tpOTA3b2hBWVByN0EzV3gxK2RzK2RGWnJJbFdVWDBhVXkrdVZpQXlINWphbTZHaEluZW1wK200SWZFZ3o3VE5DVE15K3FQamMzdDhMWGdmZWtoTUxsbWI1WHNPUHFwNTlnYjBXNmtPOTRKZXh6Ym9XNzJvaytYZk9CVVdqd3pJaG5kWmZXMzdaSW9GZXFjbjk5RHd2Y1hWTGdNSG5GbDhXb1ZEc1NFWnJqMU1ibHBvY0hibkQwckN2WTBEaVJvNDY4aDZnU3dSVkpFQk5ZYlF6alhjZTFHTjRzQ0ttVE5BTmVFWVE3dURLNzBLWThoSG5YMDc0elkwTzhiQml2cnQ3Q2J3VkxJOCIsIm1hYyI6IjFkYTU5MjM1YjNjMzg2ZGM2ZmE5NDE3ZDcwZTRmYTY3MTNmZTkyNDkxY2IzZTExYmY0YjcxNGY2ODg2MTBjMGMiLCJ0YWciOiIifQ==; 5cjeFdP65qgqhABa5zdSwVe98cLCVREcp88fTZqn=eyJpdiI6IjZLaUJWcVdNY0tlZlVZNVhLUVdSRWc9PSIsInZhbHVlIjoieGs3YkJSQW95VUlYaVhtSWxlc1J2N2lmV1NqWUh2UXRTZlJJT2ZsL1dXVlBMa1k2TnJCOXpEY1Q4OXd4eThLc0d4NVBHMDU0L1Z0MDAwUTdGYkNsc1NsaWc4L2h1UHR6UGdEZE5OVkdMVlczK3BQWFYzNlJGV2t5TW5Hb05YT0VUZnFwRlo3WjhrbXlLcHFvYmErNjNJYTljVXNpQzVFR2dXaDMyU01UUGF2cVRTaGwzVU1HOXVzQWZUTUNUb0N5M3RGQ2o4cFNyTEtLM0MyR0ZxZlZySjFUaHk2NG1ONXVBc2V5QWJtNFhBdXU2TmpqeUF1TitLcHhZUExPdlBPNW5PWXQ1WVgwSGFtWTVsSWRVOVNTMTUyNkc3NE45Vk1mLzFXbEVFS3U3SWdybmVaVzJPUjNDWWp6TituaFRmampZYkxpN1l0V3graEdjcVRaOFNkaVd3OStPalQ0a1NaNmVET3dKOWpkWVpXWGg5bVh3N1FQbXlydnJESkpQbDBrbHA4bUtISkMvcUtuOVVhc1A3eU5sVjhHdENJRUJPNFlUbzl3L3REcmZnQTZhTmU2b2p1aXE2MUNGdkNNalpWNlNqS3UvaUkwRDlqMk10K2hSdmk3cHc9PSIsIm1hYyI6IjNlZTJlYzkzMDhlMDU5YWY5ZDcwN2NiYjUwZDY1OGU1YzBhMzYwNGY4YTVjYzAzZjJmOGI1YzE2YmVmNjE2YTAiLCJ0YWciOiIifQ==; KP_UIDz-ssn=0RQPiLYS1lfwY0WNbRLO8JhfqkXEorOqKCFriSk7OR7XPgad3oB9Uu5jDabu78RCn3Bx91Znxj2ToL8AoPcvcnzFOEAf0T9c3tAk0Zkzl7vxJaNioXGS8DLxooBuJyOzD0eRV9Qe85h7mEDGHWbELEe2O86V0yWMnWjf7Kij2krB; KP_UIDz=0RQPiLYS1lfwY0WNbRLO8JhfqkXEorOqKCFriSk7OR7XPgad3oB9Uu5jDabu78RCn3Bx91Znxj2ToL8AoPcvcnzFOEAf0T9c3tAk0Zkzl7vxJaNioXGS8DLxooBuJyOzD0eRV9Qe85h7mEDGHWbELEe2O86V0yWMnWjf7Kij2krB; __stripe_mid=e3982e51-13f1-4f6d-ab6f-5a73283fc07fc5e5a4; showMatureContent=true; __stripe_sid=737a7032-68c1-44ed-935a-f10ea6fb9a152d2d74; volume=0; tile_player_muted=true; cf_clearance=g7tAuajHsJTtG2hC7RTtSF3DHTWBMjfaS61z6cOaRaI-1736205550-1.2.1.1-BtRpFVXob7L0PKI1tADxWd63iD6oBlyyZx7uzIjAyG6_81uPe7JX8EuXjo_XhcznBUS3fYap5JfMtU0PGk81HOIKKFxwZ728nnbtpI9jQz1ZKOlO3yYn96K1H45zEwsQJvJvJ0YElGhgTQ5fUhCAA31FQ4JxGKLQvrU3ivaTLKtdTN4aZWdjrxz8wIiKsN4Eb0QWKK0n9vqcR.gvzlJiaH2HVrSDrZikbJgjO_OV9XoXIVdQ4Vm5LEjtlsvMzayil.b8Wzz1B1OswQHtGphVlNqhUsogAocE6f3USbYm4.ZeMqOqEL9wAVPU4umivC_wDKIOI6w7RW8qGvPPzD9tMm0TpbWCJy3fyI2kIBb7ifnoP3B1lTjf5T28FbgnntDERGNAoR8Uu1VQzHhZ83ePGg; __cf_bm=wrDxfttp5jbVeo49KftpeCTyQWg11bWzReo8MJwPp8I-1736205876-1.0.1.1-F8EmzFhgGpnceV91XVFMzdkl4RYB1hGfCSpXev.palD84jswmL_HV0DslH2qsw1Z92c.4S0pcqJuiehm.otr6w; sort_options_clips=sort=view?range=all; XSRF-TOKEN=eyJpdiI6IlpDQ2o5Z0Vuemc5aFROY29LaTFhaEE9PSIsInZhbHVlIjoiS3lqUE5OUlljcGtHa2lSUHdaS1FDdk94QmVza1kzNnQ4S2dnclBXakp3UWRtdUg1NkdNTmNCL3Zzdkh6OWZhak14dEVLblZDTFFUcGZYaTVMVWhsQ2UrcEpPN085Tkh1d3Vwc2pqbnhtdVZiemJpTDB1dC9Bc29DSzhsa2JPK0oiLCJtYWMiOiIyYzU1M2ZiODIxM2U0NzU3NjIzMzk1ZWFkYjZhZjcwOGQxZjJkMzY5YmMwODExNDJiYWE5MWQ0NjdjZGUzNmVmIiwidGFnIjoiIn0=; kick_session=eyJpdiI6Ikd5OXJCMzFqYUxMcVNKRVo4UjJUSkE9PSIsInZhbHVlIjoiUGtRVUxzYlBrTmhqR1NUYi9KZTVTTW40TGh0dU9GNnZKdkJlK1BxcmtWeWhtZG9IeXdMWnB0OFovTHVoamY4cnlFVy9CRnhFcG02d1dnQlNuYWhBZ0doUUt4V2Z4ZGVLaHh6T0wxbEtidmk1OTBQeS9iUTVydVhwLy9CSEpxR1giLCJtYWMiOiJjMzkwYzljZDk2MDE0MGY5OGU3NTBiNjAwOTZjOGZkNjhjMmE1NjkwOGM2MTZlNDA0MzA1NDJlM2I0NmI2OWFiIiwidGFnIjoiIn0=; zEtssaQ3IfAtrsEcXvsiAHe6PsVsric6EGVK5vAl=eyJpdiI6IjU3a2w2ZFNmanQxSTJ0amZiQ2R2RVE9PSIsInZhbHVlIjoidjhTMTkwaFkrT0xONHBtWHk3Q2IvVWtkaERYOGgwNzd4aEF6L2w1M2tMUFRnOWpGa0MvSzJmbW4yc0MwMmVId2dqck5lM1dRQjZRWDVCZmM5WmVMU0ZVb3hEOFFwZC8yaEdGYm9jem5YRWxRdFFkUmpTdlRhUWhLeE1sZ0c5ZWEzOHliSWhuVS9wUnJhb21LM09zUnhCTHE2OTNublFhdWZyMmVQUnpWTVpVUEdjSlFwZDlxTVZjTUNMVVUrc0Z6N3hnNkw0WEVHa1o4SWtFL043bzlxR1UwYTIrNzY4RFhQRmhnclp3NS8zdy83WFNtMEd3SW1BZVVETmtIN1hxSXJSS1hxaXBxTUI0ZWNtbVVsd1V2c1VSUkhyVGh4VWpyNFM3Q2g0cExFS0pvamdrWDNnVG1KamVSTngxVGZQR3R3cFd6TU5wcEhabkN2dGU2T3BpR1lLUHFubDYrTWgxd3RTeVMva2NLeFhUakxxN3lsUHVzVXVjZnVNUXFXOTNxR0hIbmhWNnl5QUltWi94VlViWitnZlZKUGhpbFFhUTJPby9iekFQU2VXUU5YMDBFTTg0b0RUNEFDaExzcEJkKyIsIm1hYyI6IjJkNjAwMDI2NWE1YzFhNWRhMWU3NGYzMjUzYmU1NDZjNDQyYmRjMjVhYTE0ZWEzMjZhYTM2NjA2OWI3ZmUzYzAiLCJ0YWciOiIifQ==; _dd_s=logs=1&id=9164d60e-ee5c-42e5-9d5f-6df4b2193d31&created=1736200174377&expire=1736206971407" -streams_cookie = "USER_LOCALE=en; Zb8Yq4Gq0BDtyTgD2FKm3EBvdj5TIfT8O4T6L4w7=eyJpdiI6IjA0V3Evbi9MN0xWTm5Qc21LSURucXc9PSIsInZhbHVlIjoidGdqejd6bmQxTGhkWTJQNFFxcGxHNEVUaHJ6SUxFU0Z6RU1QdkVvZE94eWVYRS80VWhZYmNQQ3JENHVSZTFQeWV3RnJPeFZtSVRocEZoT3A0dnRMSHpqY1hYS1lkYU1vaFEvN0treTJpMmpGVXp2TEdDVGpGeW9tcHBqUTRpblh4TGhmNGY2L2dURkRvc2h2czNLK2pDbHluL3loemtlc1NRaVZGUXpreTdtK3E0Ny9xV2N2UStiY3BNNzQ5SWhPSVV2YmMyNHJhTTJwV2k1dERhbTRDKzZwZGtCTTlQelBDZ2gwTzBLRVpqL3Nwa3VOcDdyWHRnMHBQYXRZSXlhaVFuRjFOWGMwa3VhTU9ZSVM5NFJ5NnFyNEZNaVJ6SUszOTJ4cmUvZ3BZU2lqYkZrY0tXVS84dExBdjltelpGdnJ3STRCVFJnQjdsMm0yOW5XUUNmOWwwWFNDbUlEbzQwNjNaK1ZxZnJDWXorN0Q5RVlGSnk4TitvbHBJL2NHZERLclhaNnh6elRKdEhhVk1zeEg0YnVyMC81TGF1bmVQSFJrZFdMaGs2d0dPSysvaTVKdm5TQWNobnFNTU1ITGdLYSIsIm1hYyI6IjQ2MGQ5MTFkYTA5YTg4NTVhNDhmZThiYTk3OThiMzJhYWYyMDA3Y2VmMmEzZWRmOGFkN2E1ZTE1MTA5MjRmNTAiLCJ0YWciOiIifQ==; lZiPtxKwAqyahxufRO6jbpdvC9uRDrCYbgQ9Z4aW=eyJpdiI6IldhU0hiT1RoRkc1d2VWdUJqdFZPdkE9PSIsInZhbHVlIjoiaEhuNGppRnNXZXFuK1FuNlZwUUFlSE04ODZnOFBqeWxZcE9vRm1SQ0Q0WFJXS1hXZWk2WXY4TWRpTVJRVnZOSmIzS0NPTEd4Slk1clVhWkVnc1lQSXVJUVYyamNNbDE2YmNsd2psVUdtNDVhTnBjQTZHL3pnejFrS2k2dWZybkdRUVRnbU5XVnJTYXVQOFlYUjRuMjMwdy9ZUXlFRW9xRUlLNzJ0OGZBTGJXVkdMVlJMYVZybVFER0tpOTA3b2hBWVByN0EzV3gxK2RzK2RGWnJJbFdVWDBhVXkrdVZpQXlINWphbTZHaEluZW1wK200SWZFZ3o3VE5DVE15K3FQamMzdDhMWGdmZWtoTUxsbWI1WHNPUHFwNTlnYjBXNmtPOTRKZXh6Ym9XNzJvaytYZk9CVVdqd3pJaG5kWmZXMzdaSW9GZXFjbjk5RHd2Y1hWTGdNSG5GbDhXb1ZEc1NFWnJqMU1ibHBvY0hibkQwckN2WTBEaVJvNDY4aDZnU3dSVkpFQk5ZYlF6alhjZTFHTjRzQ0ttVE5BTmVFWVE3dURLNzBLWThoSG5YMDc0elkwTzhiQml2cnQ3Q2J3VkxJOCIsIm1hYyI6IjFkYTU5MjM1YjNjMzg2ZGM2ZmE5NDE3ZDcwZTRmYTY3MTNmZTkyNDkxY2IzZTExYmY0YjcxNGY2ODg2MTBjMGMiLCJ0YWciOiIifQ==; 5cjeFdP65qgqhABa5zdSwVe98cLCVREcp88fTZqn=eyJpdiI6IjZLaUJWcVdNY0tlZlVZNVhLUVdSRWc9PSIsInZhbHVlIjoieGs3YkJSQW95VUlYaVhtSWxlc1J2N2lmV1NqWUh2UXRTZlJJT2ZsL1dXVlBMa1k2TnJCOXpEY1Q4OXd4eThLc0d4NVBHMDU0L1Z0MDAwUTdGYkNsc1NsaWc4L2h1UHR6UGdEZE5OVkdMVlczK3BQWFYzNlJGV2t5TW5Hb05YT0VUZnFwRlo3WjhrbXlLcHFvYmErNjNJYTljVXNpQzVFR2dXaDMyU01UUGF2cVRTaGwzVU1HOXVzQWZUTUNUb0N5M3RGQ2o4cFNyTEtLM0MyR0ZxZlZySjFUaHk2NG1ONXVBc2V5QWJtNFhBdXU2TmpqeUF1TitLcHhZUExPdlBPNW5PWXQ1WVgwSGFtWTVsSWRVOVNTMTUyNkc3NE45Vk1mLzFXbEVFS3U3SWdybmVaVzJPUjNDWWp6TituaFRmampZYkxpN1l0V3graEdjcVRaOFNkaVd3OStPalQ0a1NaNmVET3dKOWpkWVpXWGg5bVh3N1FQbXlydnJESkpQbDBrbHA4bUtISkMvcUtuOVVhc1A3eU5sVjhHdENJRUJPNFlUbzl3L3REcmZnQTZhTmU2b2p1aXE2MUNGdkNNalpWNlNqS3UvaUkwRDlqMk10K2hSdmk3cHc9PSIsIm1hYyI6IjNlZTJlYzkzMDhlMDU5YWY5ZDcwN2NiYjUwZDY1OGU1YzBhMzYwNGY4YTVjYzAzZjJmOGI1YzE2YmVmNjE2YTAiLCJ0YWciOiIifQ==; __stripe_mid=e3982e51-13f1-4f6d-ab6f-5a73283fc07fc5e5a4; showMatureContent=true; tile_player_muted=true; sort_options_clips=sort=view?range=all; stream_quality_cookie=720; volume=0; KP_UIDz-ssn=0MrL7PuuY1ecYCmYW9QcZPd00jVlO2zPfxQAOSbGSIfsn78YPjxvtSE89ZIr1cegxvsQDVg6t7uwevGlIkg6fiYVFXvnqSb1y3r97BHthO282dKXhM02IwkcVWeQ5LVc3PDaH1U6h9JRIAP4HCNcnARkQ2ZrLjL0dfKNNjvKHR7I; KP_UIDz=0MrL7PuuY1ecYCmYW9QcZPd00jVlO2zPfxQAOSbGSIfsn78YPjxvtSE89ZIr1cegxvsQDVg6t7uwevGlIkg6fiYVFXvnqSb1y3r97BHthO282dKXhM02IwkcVWeQ5LVc3PDaH1U6h9JRIAP4HCNcnARkQ2ZrLjL0dfKNNjvKHR7I; _cfuvid=FXvxAU5ASgX1ma90hpSGRpsdvI40Yp7YxDZQPhe3XdU-1736338951992-0.0.1.1-604800000; __cf_bm=vJqoLZMQnChEqTCPLZEaEtqpIJSTVzMVv3mhMPrV2N4-1736340611-1.0.1.1-p3TImgDhDkBWUjy7deWagLxWt_DCTwIjHKi6fBXMA4CIpQafPKu1R.Ji9S7fJkwDyB7GSr26DHD8DAPsQitxHw; cf_clearance=DoLlos55rsOW.N_5EgzqviM6FVlKodb_fA8_ZodlMxQ-1736340648-1.2.1.1-ux_OFOYU2QkchiJg82ggjSw0mbyNIVSs8OgUiyt7dBcsmgtBX6q3Kv4dUKYOTZyfpv27Lho44REjH5BsDdA2w4O2a_.R.fC_8joXmg_zA8WopHj8OV1HnOCzpLbaHD0n2MgZV5swsLmpACqYRDTfHCDKlGbR_rz39_G3YX6Ze3pyhJS7Gg8o5ynfMs3FqF_5M3xYn7GYIJoGuQLBNdLDzqjAWklZfFfHqrhs1pQbkl87E12McHdw0Y9UeSL05.JZTkwato25lcJKIRAEyiYgiXQZu12qnTv5tmbhYyTn8Hlr1YgkCYV0NRO.6TmWnwxmpocHUISSkz3DxekgZb60ad17wojbSVuR44XjuzkU1pLBoGBR4Xpt8QNV9eePx4W4Uc6pjK0gA8uPFBV6mDkUew; XSRF-TOKEN=eyJpdiI6IlF2alhLWGZES1VwQlhxMEdIVEwvcUE9PSIsInZhbHVlIjoiVmlJVTlMV0MzelAyVDR5cW53LzJPVGdUWlVDMlFoMTBFTmJYUmc2WENXMUdYYlhOSTdKc0JaT3NrdEE2VnhDekgvZ0grUnNLSis1TEZJc0NhazZyZGtDRG1nWmJFM0pWdnFKZ1ZndUV4cnc2WWVlellabFA0TC9MMTVGbjdpczIiLCJtYWMiOiJiMjBkOGJhMjUyNjhlODJkOTFhMTBmNDE0OThlMmIxZjMzMDVkNDE0NzkyOGVkNzJmMTk5ZTEwYjMxZGY1M2QwIiwidGFnIjoiIn0=; kick_session=eyJpdiI6ImxZOW5UOFB0VnV2Wkd3d2J3L3ZaRFE9PSIsInZhbHVlIjoid20rNDRvVWlEUkloMGxvYkRXcE5zUDNtZWxFVUVpcVUwcjcrTTQrZ0Rha3RBWjVnSmU4Q0xyUlRFckRVZFRQQmZwWS9ka3RKcW5JRE80MWxnSmhYbVpkSko4RC9nQUFDd1BSdkxPbWpQSUk4ZExDSCtrajUzMzFrMHVydi8zbFYiLCJtYWMiOiJjOTIyNTljMDNmZmU4OGFhZGVjYzViYTExZDA0MjU2ZmNkZDhhZGRhMzA4NTI0NzViMjk4NTRmZGNiZTc2NjVmIiwidGFnIjoiIn0=; zEtssaQ3IfAtrsEcXvsiAHe6PsVsric6EGVK5vAl=eyJpdiI6IlJxNU1tUEs5MHVYUU1OWjhuYjNmRWc9PSIsInZhbHVlIjoiUjZ2enh4L3htL0VObThGdEIrcGFuMmYxTUlIc1l3M2hEeFRIT08wak9heUw3ZkMvM0ROZWNsZUVMOW5tU0hDWW5hTUtrdDZManpYQzUyNGtaaXBKdnZ4Q041bTlHNEY2Tkd2TjNNNGc4V2NGOEwzdjkxVlQrMGVDL2VmVnE2T1NSbE5KQlAxUGpWSHkxV0FUN3RvV3Vwck5OTmxncWZraGtDZ1QzZUtwQ2l2dWllbXlCbEwweUEzOHBKL1JzWk9OaFN2U1BuTlJwOW1samUzdHVTUnUwZk02ZEZSeFFObmQxQ0VqU09DUkYzL0w2YUdQc3BOdlZUd2xkVkJtZ29Tb0hRRkhXeVc4NHJoVXdSelVOUmJKYU1YWHEvRklkT2dySHdtQnFDL3B6K1RMb1N6Qm0yWmliRTRyYXV4K0hzQUZBcktXdGpWclN1cUI5UGlpcW9UK2paaC95SnVtR3d3ZVBqZWg0TG9tc3U1QXVtUUJRUzkzalhueEVDZWovRlAwRnRIcE9oREREWWVKRVNQdFZ0Z0RsRXhUd2c1ZzhiSVgzNUI0QmNwQjFYUT0iLCJtYWMiOiIzODVkOTU3ZDFlYjY0NjlmOTBmOTYxYTBlMWM5NTE5M2VlYjNmNDQwZDljNjhkMmUxOGNlNDc4NDU4MjNhYTMxIiwidGFnIjoiIn0=; _dd_s=logs=1&id=fc0e4d94-77f2-4baf-9721-9f07ebaa7bc7&created=1736340647984&expire=1736341553875; sortOptions=sort=viewers_low_to_high" -categories_cookie = "USER_LOCALE=en; Zb8Yq4Gq0BDtyTgD2FKm3EBvdj5TIfT8O4T6L4w7=eyJpdiI6IjA0V3Evbi9MN0xWTm5Qc21LSURucXc9PSIsInZhbHVlIjoidGdqejd6bmQxTGhkWTJQNFFxcGxHNEVUaHJ6SUxFU0Z6RU1QdkVvZE94eWVYRS80VWhZYmNQQ3JENHVSZTFQeWV3RnJPeFZtSVRocEZoT3A0dnRMSHpqY1hYS1lkYU1vaFEvN0treTJpMmpGVXp2TEdDVGpGeW9tcHBqUTRpblh4TGhmNGY2L2dURkRvc2h2czNLK2pDbHluL3loemtlc1NRaVZGUXpreTdtK3E0Ny9xV2N2UStiY3BNNzQ5SWhPSVV2YmMyNHJhTTJwV2k1dERhbTRDKzZwZGtCTTlQelBDZ2gwTzBLRVpqL3Nwa3VOcDdyWHRnMHBQYXRZSXlhaVFuRjFOWGMwa3VhTU9ZSVM5NFJ5NnFyNEZNaVJ6SUszOTJ4cmUvZ3BZU2lqYkZrY0tXVS84dExBdjltelpGdnJ3STRCVFJnQjdsMm0yOW5XUUNmOWwwWFNDbUlEbzQwNjNaK1ZxZnJDWXorN0Q5RVlGSnk4TitvbHBJL2NHZERLclhaNnh6elRKdEhhVk1zeEg0YnVyMC81TGF1bmVQSFJrZFdMaGs2d0dPSysvaTVKdm5TQWNobnFNTU1ITGdLYSIsIm1hYyI6IjQ2MGQ5MTFkYTA5YTg4NTVhNDhmZThiYTk3OThiMzJhYWYyMDA3Y2VmMmEzZWRmOGFkN2E1ZTE1MTA5MjRmNTAiLCJ0YWciOiIifQ==; lZiPtxKwAqyahxufRO6jbpdvC9uRDrCYbgQ9Z4aW=eyJpdiI6IldhU0hiT1RoRkc1d2VWdUJqdFZPdkE9PSIsInZhbHVlIjoiaEhuNGppRnNXZXFuK1FuNlZwUUFlSE04ODZnOFBqeWxZcE9vRm1SQ0Q0WFJXS1hXZWk2WXY4TWRpTVJRVnZOSmIzS0NPTEd4Slk1clVhWkVnc1lQSXVJUVYyamNNbDE2YmNsd2psVUdtNDVhTnBjQTZHL3pnejFrS2k2dWZybkdRUVRnbU5XVnJTYXVQOFlYUjRuMjMwdy9ZUXlFRW9xRUlLNzJ0OGZBTGJXVkdMVlJMYVZybVFER0tpOTA3b2hBWVByN0EzV3gxK2RzK2RGWnJJbFdVWDBhVXkrdVZpQXlINWphbTZHaEluZW1wK200SWZFZ3o3VE5DVE15K3FQamMzdDhMWGdmZWtoTUxsbWI1WHNPUHFwNTlnYjBXNmtPOTRKZXh6Ym9XNzJvaytYZk9CVVdqd3pJaG5kWmZXMzdaSW9GZXFjbjk5RHd2Y1hWTGdNSG5GbDhXb1ZEc1NFWnJqMU1ibHBvY0hibkQwckN2WTBEaVJvNDY4aDZnU3dSVkpFQk5ZYlF6alhjZTFHTjRzQ0ttVE5BTmVFWVE3dURLNzBLWThoSG5YMDc0elkwTzhiQml2cnQ3Q2J3VkxJOCIsIm1hYyI6IjFkYTU5MjM1YjNjMzg2ZGM2ZmE5NDE3ZDcwZTRmYTY3MTNmZTkyNDkxY2IzZTExYmY0YjcxNGY2ODg2MTBjMGMiLCJ0YWciOiIifQ==; 5cjeFdP65qgqhABa5zdSwVe98cLCVREcp88fTZqn=eyJpdiI6IjZLaUJWcVdNY0tlZlVZNVhLUVdSRWc9PSIsInZhbHVlIjoieGs3YkJSQW95VUlYaVhtSWxlc1J2N2lmV1NqWUh2UXRTZlJJT2ZsL1dXVlBMa1k2TnJCOXpEY1Q4OXd4eThLc0d4NVBHMDU0L1Z0MDAwUTdGYkNsc1NsaWc4L2h1UHR6UGdEZE5OVkdMVlczK3BQWFYzNlJGV2t5TW5Hb05YT0VUZnFwRlo3WjhrbXlLcHFvYmErNjNJYTljVXNpQzVFR2dXaDMyU01UUGF2cVRTaGwzVU1HOXVzQWZUTUNUb0N5M3RGQ2o4cFNyTEtLM0MyR0ZxZlZySjFUaHk2NG1ONXVBc2V5QWJtNFhBdXU2TmpqeUF1TitLcHhZUExPdlBPNW5PWXQ1WVgwSGFtWTVsSWRVOVNTMTUyNkc3NE45Vk1mLzFXbEVFS3U3SWdybmVaVzJPUjNDWWp6TituaFRmampZYkxpN1l0V3graEdjcVRaOFNkaVd3OStPalQ0a1NaNmVET3dKOWpkWVpXWGg5bVh3N1FQbXlydnJESkpQbDBrbHA4bUtISkMvcUtuOVVhc1A3eU5sVjhHdENJRUJPNFlUbzl3L3REcmZnQTZhTmU2b2p1aXE2MUNGdkNNalpWNlNqS3UvaUkwRDlqMk10K2hSdmk3cHc9PSIsIm1hYyI6IjNlZTJlYzkzMDhlMDU5YWY5ZDcwN2NiYjUwZDY1OGU1YzBhMzYwNGY4YTVjYzAzZjJmOGI1YzE2YmVmNjE2YTAiLCJ0YWciOiIifQ==; __stripe_mid=e3982e51-13f1-4f6d-ab6f-5a73283fc07fc5e5a4; showMatureContent=true; tile_player_muted=true; sort_options_clips=sort=view?range=all; stream_quality_cookie=720; volume=0; KP_UIDz-ssn=0MrL7PuuY1ecYCmYW9QcZPd00jVlO2zPfxQAOSbGSIfsn78YPjxvtSE89ZIr1cegxvsQDVg6t7uwevGlIkg6fiYVFXvnqSb1y3r97BHthO282dKXhM02IwkcVWeQ5LVc3PDaH1U6h9JRIAP4HCNcnARkQ2ZrLjL0dfKNNjvKHR7I; KP_UIDz=0MrL7PuuY1ecYCmYW9QcZPd00jVlO2zPfxQAOSbGSIfsn78YPjxvtSE89ZIr1cegxvsQDVg6t7uwevGlIkg6fiYVFXvnqSb1y3r97BHthO282dKXhM02IwkcVWeQ5LVc3PDaH1U6h9JRIAP4HCNcnARkQ2ZrLjL0dfKNNjvKHR7I; sortOptions=sort=viewers_high_to_low; XSRF-TOKEN=eyJpdiI6InZtV0pwOUl2WVliMzVBR1Q0Nm0zM1E9PSIsInZhbHVlIjoidVY0USt1R2Joc0FRLy9LMGlPUVdseHpYTnBOTm1wQitYV2JTSUpETk0rRHdNR1RZS0d4aWc4aEdKcVdiTkhXZk1FZFR3Qm5ERVpLazdnY2ozaVRYSHJPMjRUMnp0RFRkZS9wNFo0TnZZbEg0M1JJcUpBTTFDQ2VpTXUzY2o3OGQiLCJtYWMiOiJjNzRlOWI1MDdiMTA5NjQyZGM0MmJkNjdmMDU2MjJiNzdkZWU2NWE2YzQ1NGUwN2YwNmNhOGNjMTUzMzdiNDM5IiwidGFnIjoiIn0=; kick_session=eyJpdiI6IjRGWlBqVUx1aGZHQnc1OThleXVoSFE9PSIsInZhbHVlIjoiODZqREZKOG0va2VwczlOQnM2ejRSVTFVeW9rWnVVQWNRMG93bEpnaTN0SkoxNHIxUzRRbkJ1WjlTa2dVWjdEbzdIY2VOQzRaY2NyOGNpRU03aHU4bHRxRHEybGtHZ1FiMzgzR0RZaFJseHlyL1ZTKy9zdSs0K3Fmd0w4NVJqcWQiLCJtYWMiOiIxYTgwMjQ5MWY5MzY1Yzk2ZDZlYmEwN2E2MWJlODc3MWNjMWZmN2YzMDQ4OWFlMGQ5NWE3YWZlYTkzNTE0M2VkIiwidGFnIjoiIn0=; zEtssaQ3IfAtrsEcXvsiAHe6PsVsric6EGVK5vAl=eyJpdiI6Ik1WeERNc2RjR1lNRUhveTJoYkVsR1E9PSIsInZhbHVlIjoiRUdaQXRleEJMWHBGd25sN29CRE5hdjRMb3p0UTJMN21qMzd6SnErZTdyS29jSjYrU3RvT3JTK1BKMFMrNzJzUzROdGp3YWl5MnFuS3A3Z0VnbWZxN2c1MXB3MmtVS3dwUC9ibFBFZHIyNGU1L1FYWVBLdXY3QXNWeUdKajE4QmdWanFKUzd4SWpwVC9vQXl3QVJUTm85V3ZKVnFWaGhqVTJ0TDNpSWh6dE1QVVc2STBhaXA4TlUwVjd6dkFnU1FoRW84SmpvOEdXbUFOVjkwZHRhZ2lmNVZ6Qm9IaGpDUEpwcXdEbGVDOGQ4M3BiSmpoYnNpTERLVjNWTXdWNjdYc0xYTFJWSGZvM0oyUzhOV1ZEbEdwTVJDUTV4dFdiVGk0RVNSaEVKZ00venBlNC93NFY5RkNLQmpHOU5OVkozNDZDdjliUkRHR1Bib2NsM2YxKzdiU0pnTkZDNWE0ai9PTkNlR0NqeURSSXNzR21hMkdSSmRUZEpYaWJhTUxzSktxUmlSL3JKd3RuRjRKSU1udlh5VWNGLzNPaU1laWdKVVZXR1NSbzQwSlZMdFJ1bHh1TWxQdDdBOEpLUEk1UUpRYyIsIm1hYyI6IjA5NWNmODIxNmFkNzBlZWEyNTk0NmU5NDRiYWYxMDc2ZWJiMGI4YTU5MWIzMzBmMDViNTE4MTk5MTA3YjJkYzUiLCJ0YWciOiIifQ==; _cfuvid=euWxqYL1tv9LpP.fp4nbZ2GYesW_sNEkHT1aliKZHGA-1736343178829-0.0.1.1-604800000; __cf_bm=mzo9qKsXPUNMtrSYRZRyys_yxdqSEV1hHsbXK.c1VJ4-1736346077-1.0.1.1-TlLd1K_gDH6WW.DJSkTepZSOeJzCkgxyOdX7kO4zRCwW8rG7zo6YZWa3MJvTaqrQXA16TS2D9Qe5e2wdhBX5iQ; _dd_s=logs=1&id=ac536f8f-4082-4d44-82c2-53b7956beb18&created=1736346122257&expire=1736347022257; cf_clearance=6NTiHykNI1FmiXNFkllmdmNSWATVWQY5ohYmvGhhC4Q-1736346122-1.2.1.1-h287.XC8AdjLJlyJjBNrZJMUtPhgnu_xb7oZRrwoswGEAXRDHGfgZNyE43MIqNVbtlhOly9f5rqyMDDQIsnUC9B8K6p4GAkX3vCJXOqh5MggAltksLoWyPwQzAHxs85xZ.0FuV9ZXlg2AQeExhXhFr0ROx_lKVw8dgWG8JLk_D_LkiEEnYlF02Wj_gpAbcBl5wdBzIXUIEsZB.bsTh3fTvOGCgDZxdzFiI15mpA4e_TT4RqWB9mGrDmrpVZaLnl.KOzFggAmlJnCHzHZBzFvpR1thFDY2TtIs9ej5CZGrlqTfBEgh5xNh4aZAAgbj1sojkTyD5vsjbpNLtKnb_9VfltvJ2QTc2sjvFMdxDvDFv1nz8jk.smVUWC0CDjGMeFoRkq2zXGY4oEVdhvdZtXGTA" -categories_cookie = "USER_LOCALE=en; Zb8Yq4Gq0BDtyTgD2FKm3EBvdj5TIfT8O4T6L4w7=eyJpdiI6IjA0V3Evbi9MN0xWTm5Qc21LSURucXc9PSIsInZhbHVlIjoidGdqejd6bmQxTGhkWTJQNFFxcGxHNEVUaHJ6SUxFU0Z6RU1QdkVvZE94eWVYRS80VWhZYmNQQ3JENHVSZTFQeWV3RnJPeFZtSVRocEZoT3A0dnRMSHpqY1hYS1lkYU1vaFEvN0treTJpMmpGVXp2TEdDVGpGeW9tcHBqUTRpblh4TGhmNGY2L2dURkRvc2h2czNLK2pDbHluL3loemtlc1NRaVZGUXpreTdtK3E0Ny9xV2N2UStiY3BNNzQ5SWhPSVV2YmMyNHJhTTJwV2k1dERhbTRDKzZwZGtCTTlQelBDZ2gwTzBLRVpqL3Nwa3VOcDdyWHRnMHBQYXRZSXlhaVFuRjFOWGMwa3VhTU9ZSVM5NFJ5NnFyNEZNaVJ6SUszOTJ4cmUvZ3BZU2lqYkZrY0tXVS84dExBdjltelpGdnJ3STRCVFJnQjdsMm0yOW5XUUNmOWwwWFNDbUlEbzQwNjNaK1ZxZnJDWXorN0Q5RVlGSnk4TitvbHBJL2NHZERLclhaNnh6elRKdEhhVk1zeEg0YnVyMC81TGF1bmVQSFJrZFdMaGs2d0dPSysvaTVKdm5TQWNobnFNTU1ITGdLYSIsIm1hYyI6IjQ2MGQ5MTFkYTA5YTg4NTVhNDhmZThiYTk3OThiMzJhYWYyMDA3Y2VmMmEzZWRmOGFkN2E1ZTE1MTA5MjRmNTAiLCJ0YWciOiIifQ%3D%3D; lZiPtxKwAqyahxufRO6jbpdvC9uRDrCYbgQ9Z4aW=eyJpdiI6IldhU0hiT1RoRkc1d2VWdUJqdFZPdkE9PSIsInZhbHVlIjoiaEhuNGppRnNXZXFuK1FuNlZwUUFlSE04ODZnOFBqeWxZcE9vRm1SQ0Q0WFJXS1hXZWk2WXY4TWRpTVJRVnZOSmIzS0NPTEd4Slk1clVhWkVnc1lQSXVJUVYyamNNbDE2YmNsd2psVUdtNDVhTnBjQTZHL3pnejFrS2k2dWZybkdRUVRnbU5XVnJTYXVQOFlYUjRuMjMwdy9ZUXlFRW9xRUlLNzJ0OGZBTGJXVkdMVlJMYVZybVFER0tpOTA3b2hBWVByN0EzV3gxK2RzK2RGWnJJbFdVWDBhVXkrdVZpQXlINWphbTZHaEluZW1wK200SWZFZ3o3VE5DVE15K3FQamMzdDhMWGdmZWtoTUxsbWI1WHNPUHFwNTlnYjBXNmtPOTRKZXh6Ym9XNzJvaytYZk9CVVdqd3pJaG5kWmZXMzdaSW9GZXFjbjk5RHd2Y1hWTGdNSG5GbDhXb1ZEc1NFWnJqMU1ibHBvY0hibkQwckN2WTBEaVJvNDY4aDZnU3dSVkpFQk5ZYlF6alhjZTFHTjRzQ0ttVE5BTmVFWVE3dURLNzBLWThoSG5YMDc0elkwTzhiQml2cnQ3Q2J3VkxJOCIsIm1hYyI6IjFkYTU5MjM1YjNjMzg2ZGM2ZmE5NDE3ZDcwZTRmYTY3MTNmZTkyNDkxY2IzZTExYmY0YjcxNGY2ODg2MTBjMGMiLCJ0YWciOiIifQ%3D%3D; 5cjeFdP65qgqhABa5zdSwVe98cLCVREcp88fTZqn=eyJpdiI6IjZLaUJWcVdNY0tlZlVZNVhLUVdSRWc9PSIsInZhbHVlIjoieGs3YkJSQW95VUlYaVhtSWxlc1J2N2lmV1NqWUh2UXRTZlJJT2ZsL1dXVlBMa1k2TnJCOXpEY1Q4OXd4eThLc0d4NVBHMDU0L1Z0MDAwUTdGYkNsc1NsaWc4L2h1UHR6UGdEZE5OVkdMVlczK3BQWFYzNlJGV2t5TW5Hb05YT0VUZnFwRlo3WjhrbXlLcHFvYmErNjNJYTljVXNpQzVFR2dXaDMyU01UUGF2cVRTaGwzVU1HOXVzQWZUTUNUb0N5M3RGQ2o4cFNyTEtLM0MyR0ZxZlZySjFUaHk2NG1ONXVBc2V5QWJtNFhBdXU2TmpqeUF1TitLcHhZUExPdlBPNW5PWXQ1WVgwSGFtWTVsSWRVOVNTMTUyNkc3NE45Vk1mLzFXbEVFS3U3SWdybmVaVzJPUjNDWWp6TituaFRmampZYkxpN1l0V3graEdjcVRaOFNkaVd3OStPalQ0a1NaNmVET3dKOWpkWVpXWGg5bVh3N1FQbXlydnJESkpQbDBrbHA4bUtISkMvcUtuOVVhc1A3eU5sVjhHdENJRUJPNFlUbzl3L3REcmZnQTZhTmU2b2p1aXE2MUNGdkNNalpWNlNqS3UvaUkwRDlqMk10K2hSdmk3cHc9PSIsIm1hYyI6IjNlZTJlYzkzMDhlMDU5YWY5ZDcwN2NiYjUwZDY1OGU1YzBhMzYwNGY4YTVjYzAzZjJmOGI1YzE2YmVmNjE2YTAiLCJ0YWciOiIifQ%3D%3D; __stripe_mid=e3982e51-13f1-4f6d-ab6f-5a73283fc07fc5e5a4; showMatureContent=true; tile_player_muted=true; sort_options_clips=sort%3Dview%26range%3Dall; stream_quality_cookie=720; KP_UIDz-ssn=0MrL7PuuY1ecYCmYW9QcZPd00jVlO2zPfxQAOSbGSIfsn78YPjxvtSE89ZIr1cegxvsQDVg6t7uwevGlIkg6fiYVFXvnqSb1y3r97BHthO282dKXhM02IwkcVWeQ5LVc3PDaH1U6h9JRIAP4HCNcnARkQ2ZrLjL0dfKNNjvKHR7I; KP_UIDz=0MrL7PuuY1ecYCmYW9QcZPd00jVlO2zPfxQAOSbGSIfsn78YPjxvtSE89ZIr1cegxvsQDVg6t7uwevGlIkg6fiYVFXvnqSb1y3r97BHthO282dKXhM02IwkcVWeQ5LVc3PDaH1U6h9JRIAP4HCNcnARkQ2ZrLjL0dfKNNjvKHR7I; _cfuvid=euWxqYL1tv9LpP.fp4nbZ2GYesW_sNEkHT1aliKZHGA-1736343178829-0.0.1.1-604800000; volume=0; sortOptions=sort%3Dviewers_high_to_low; cf_clearance=gP0FPFmx9TMcKB22RlvfB7.wB9Se_b.rbhzCsrOF_24-1736366052-1.2.1.1-2w4Y8TCAQ7BT_6iH_1fYhpc8wFnI0ZsUyRRnax.00ruJ1RiBOElg1CkiVluOGrncd_miW3FEHd.oSAF7C1oRxj7wDIj7YXQ2GZYc6ayqfO4NilzWXGgHAFbz72Imqw4ThFa.TOU9JLRR4Wded7jBVN1GTESpNOnjOcNzN7iIQwBm1vXOuAR3VH96UBrygsmtwsH7BsotHsahWZUwo094SS7bB8uSFvpbmJSCMX4KrNGvJXx0VkCBARZnJu_kTcdZUCWqVx_xS1mmQ2qiMSGzrBcGsUG9aoQHSg9aRL63Y1bOFYjcxv.lqiO7ifB.CAQZC6484Hn1MDA7pGlPTWjwwe1AYRJ8K7UbWi2o61D6jEmSOjsjJarWCU9XVxXtiUxFzQ01sjWd1PrNaXGPL073CA; __stripe_sid=00cbd1f6-1244-437a-b9d8-caa9090967c151d8da; XSRF-TOKEN=eyJpdiI6ImxrV2tHck9iRS9sa1lxVm5LTUp5SVE9PSIsInZhbHVlIjoiVVVISWtrbDg4amY0WGc0eXY3L3R5YUNIeHc5RHdoSVN2TEdLNUlLc2FucUlpZ0RqQnl4TlZrbjVCdnAzcWNsWmNVVVhwSkdMSzVNT2ViOWNKeG5FSnlXMGszS3FPZFFoSWtLZjYzT1Rpdk52N1o0MzU0MjQwYk9TdTVpNFI5S1IiLCJtYWMiOiI5YTJmNzIxOTA0YjNmNDBmMzAyODQ0OWE2MTc3NjQyYjU0MjE2NjU0MWZjZTUxNGEwYjA3ZjhjOTk0NmQ4MWE1IiwidGFnIjoiIn0%3D; kick_session=eyJpdiI6IkticEx2L3ZudWYxUXp5a1lCM3lwM1E9PSIsInZhbHVlIjoiWFZPRlhtY2xGWHJBMzhXajBPWU54TVlNSlpHSG5EcHd0Y1BDdGNNa09Xekd6RXNmYm5id3Z6Z0R3Y1VPYUhvSDUzR0JFSFRyNm8yU1ZmemQ1TFpmVjBGMmRtQ1dzcVYwUnJYVERxNkdOaWRBZ3JNMlNwWEhJaWgzQU1qZS9KckciLCJtYWMiOiJiZWYwODY1NDY5MWI0MzVhYmRmOTcwMDU3NGQwMjQ1ZDcwZjBiNWQ4MjhiMGYzZTM0NGE1NmUwNWQwN2FiM2I0IiwidGFnIjoiIn0%3D; zEtssaQ3IfAtrsEcXvsiAHe6PsVsric6EGVK5vAl=eyJpdiI6Ik5SNVFQY1dqUHNpakswU1lGUm1KQ2c9PSIsInZhbHVlIjoiblFKQkRIU2FtZWxLK2MwZDBKRXQ2MnIxeElPdThVZjUvVDUzR1JzenFYK250OEpFbHBsa2xTeVhXVUM1cktDWGNBTEg5QVdDNTd1UWJCM1hSL1RHNytFRldkai8yOWhMYW90T1RISFNWV1F3a29maDZkODQ3Q25OcWtzcVM1RnRPM0FoeVM2cWdZZmVHc2ZjODFuNzQybkdZK3RvU3VYUTJHVWxiYkR5M1IrQVdqRTFDNDBkZ0JwYlFXQmFYV1AwWVhMbkM1SVAxMGl3Nit3L0FMaFcvVmp1VEx5cXdvRm44ZEl1dExsZVNiTXYydWJaR3k5ZnZBbXFPaXIrcUs2bWJIYXU4ZkdxQTJlWjlxcXJlQ0ZNTEdNSVNQK0RmR3U2WU5RT0hXZEpka21OWFUzaWltRlMzRkxKSEprQ0hhTDJJd0xuTm04c1Njb2l4OXVkZXRybGp6UEhnUnpYbm9TdTZpa3Z6ZTF1ZVdXWE5IbnIvSWZVT1pzYmVDNkRnc2g2OEg0eitaNjFWYTl5MXlxOWxIZkF4NmRPWitGc3lmNGxLRkZlT2FvVXIvZz0iLCJtYWMiOiIxMGI0ZGY5ZmM5YzYzYmVhNzc2M2VkYzQ5MGY0ZjQyODdjNjk0MWY4MTUyMjhjZjFiZGIxMDZmYzJlNjE1YWI0IiwidGFnIjoiIn0%3D; __cf_bm=Nzqz3pHAEzG6Todu.QBH1U0i9Tjm_GS79v86DJuPHVQ-1736366434-1.0.1.1-ciGSZWwrHS5xrR5IG9s8gZT6pMh72Brakn0bfHYxfV1aUHxnSjAVV9Pgha25iZCtF.qYcSCDFd9BVuYIXEVN1g; _dd_s=logs=1&id=b4628b04-2919-429f-a5aa-466afaf1e236&created=1736361875977&expire=1736367427697" - -permanent_cookies = [ - "USER_LOCALE=en", - "showMatureContent=true", - "tile_player_muted=true", - "stream_quality_cookie=720", - "volume=0" -] - -headers = { - 'User-Agent': user_agent, -} - -def parse_cookie(cookie): - cookie_dict = {} - for item in cookie.split('; '): - key, value = item.split('=', 1) - cookie_dict[key] = value - return cookie_dict - -def build_categories_cookie(): - categories_cookie = {} - - categories_cookie['sortOptions'] = 'sort=viewers_high_to_low' - categories_cookie['sort_options_clips'] = 'sort=view?range=all' - categories_cookie['__stripe_mid'] = 'e3982e51-13f1-4f6d-ab6f-5a73283fc07fc5e5a4' - categories_cookie['Zb8Yq4Gq0BDtyTgD2FKm3EBvdj5TIfT8O4T6L4w7'] = 'eyJpdiI6IjA0V3Evbi9MN0xWTm5Qc21LSURucXc9PSIsInZhbHVlIjoidGdqejd6bmQxTGhkWTJQNFFxcGxHNEVUaHJ6SUxFU0Z6RU1QdkVvZE94eWVYRS80VWhZYmNQQ3JENHVSZTFQeWV3RnJPeFZtSVRocEZoT3A0dnRMSHpqY1hYS1lkYU1vaFEvN0treTJpMmpGVXp2TEdDVGpGeW9tcHBqUTRpblh4TGhmNGY2L2dURkRvc2h2czNLK2pDbHluL3loemtlc1NRaVZGUXpreTdtK3E0Ny9xV2N2UStiY3BNNzQ5SWhPSVV2YmMyNHJhTTJwV2k1dERhbTRDKzZwZGtCTTlQelBDZ2gwTzBLRVpqL3Nwa3VOcDdyWHRnMHBQYXRZSXlhaVFuRjFOWGMwa3VhTU9ZSVM5NFJ5NnFyNEZNaVJ6SUszOTJ4cmUvZ3BZU2lqYkZrY0tXVS84dExBdjltelpGdnJ3STRCVFJnQjdsMm0yOW5XUUNmOWwwWFNDbUlEbzQwNjNaK1ZxZnJDWXorN0Q5RVlGSnk4TitvbHBJL2NHZERLclhaNnh6elRKdEhhVk1zeEg0YnVyMC81TGF1bmVQSFJrZFdMaGs2d0dPSysvaTVKdm5TQWNobnFNTU1ITGdLYSIsIm1hYyI6IjQ2MGQ5MTFkYTA5YTg4NTVhNDhmZThiYTk3OThiMzJhYWYyMDA3Y2VmMmEzZWRmOGFkN2E1ZTE1MTA5MjRmNTAiLCJ0YWciOiIifQ==' - - return categories_cookie - - -def build_cookie_string(cookies): - cookie_string = "" - - for cookie in cookies: - cookie_string += f"{cookie}={cookies[cookie]}; " - - return cookie_string - -def get_clips(username): - api_url = f'https://kick.com/api/v2/channels/{username}/clips' - - params = { - 'sort': 'view', - 'time': 'all' - } - - response = requests.get(api_url, headers=headers, params=params) - - if response.status_code != 200: - print(f"Failed to fetch clips for {username}.") - return None - - data = response.json() - clips = data['clips'] - - while 'nextCursor' in data: - next_cursor = data['nextCursor'] - api_url = f'https://kick.com/api/v2/channels/{username}/clips' - - params = { - 'sort': 'view', - 'time': 'all', - 'cursor': next_cursor - } - - response = requests.get(api_url, headers=headers, params=params) - - if response.status_code != 200: - print(f"Failed to fetch clips for {username}.") - return None - - data = response.json() - clips.extend(data['clips']) - - return clips - -def parse_clip_data(clips): - data = [] - - for clip in clips: - creator_data = clip['channel'] - username = creator_data['username'] - channel_id = clip['channel_id'] - - duration = clip['duration'] - clip_id = clip['id'] - title = clip['title'] - views = clip['views'] - url = clip['video']['url'] - - data.append({'title': title, 'views': views, 'url': url, 'user_id': channel_id, 'username': username}) - - return data - -def parse_stream_data(streams): - data = [] - - for stream in streams: - creator_data = stream['channel'] - username = creator_data['username'] - channel_id = stream['channel_id'] - - title = stream['title'] - views = stream['views'] - url = stream['video']['url'] - - data.append({'title': title, 'views': views, 'url': url, 'user_id': channel_id, 'username': username}) - - return data - -def get_categories(): - url = 'https://kick.com/api/v1/subcategories' - - params = { - 'limit': 32, - 'page': 1 - } - - cookies = parse_cookie(categories_cookie) - - headers['Cookie'] = build_cookie_string(cookies) - response = requests.get(url, headers=headers, params=params) - - if response.status_code != 200: - print("Failed to fetch categories.") - return None - - data = response.json() - categories = data['data'] - - while len(data['data']) > 0: - print(f"Fetching page {params['page']}...") - params['page'] += 1 - - response = requests.get(url, headers=headers, params=params) - - if response.status_code != 200: - print("Failed to fetch categories.") - return None - - data = response.json() - categories.extend(data['data']) - - return categories - -def get_streams(category = 'pools-hot-tubs-bikinis'): - api_url = f'https://kick.com/stream/livestreams/en' - - params = { - 'page': 1, - 'limit': 24, - 'subcategory': category, - 'sort': 'desc' - } - - headers['Cookie'] = streams_cookie - response = requests.get(api_url, headers=headers, params=params) - - if response.status_code != 200: - print(f"Failed to fetch streams for {category}.") - return None - - data = response.json() - streams = data['data'] - - while len(data['data']) > 0: - params['page'] += 1 - - response = requests.get(api_url, headers=headers, params=params) - - if response.status_code != 200: - print(f"Failed to fetch clips for {username}.") - return None - - data = response.json() - clips.extend(data['data']) - - return streams - -def get_clips(username): - api_url = f'https://kick.com/api/v2/channels/{username}/clips' - - params = { - 'sort': 'view', - 'time': 'all' - } - - headers['Cookie'] = videos_cookie - response = requests.get(api_url, headers=headers, params=params) - - if response.status_code != 200: - print(f"Failed to fetch clips for {username}.") - return None - - data = response.json() - clips = data['clips'] - - while 'nextCursor' in data: - params['cursor'] = data['nextCursor'] - api_url = f'https://kick.com/api/v2/channels/{username}/clips' - - response = requests.get(api_url, headers=headers, params=params) - - if response.status_code != 200: - print(f"Failed to fetch clips for {username}.") - return None - - data = response.json() - clips.extend(data['clips']) - - return clips - -if __name__ == "__main__": - categories = get_categories() - streams = get_streams() - - for stream in streams: - username = stream['username'] - clips = get_clips(username) - parsed_data = parse_clip_data(clips) \ No newline at end of file diff --git a/p.enc b/p.enc deleted file mode 100644 index 28c4b33..0000000 --- a/p.enc +++ /dev/null @@ -1 +0,0 @@ -gAAAAABmRUff7c9t9gngWj_2cwvaTBrUDJ_JUyYVUfG-p3SvDV7qOSHddJ4eHADiJeRtJNtY9UxkohSB5I1MmLahAb_hxxwIVA== \ No newline at end of file diff --git a/profile_pic.py b/profile_pic.py index 8f235e2..82a781f 100644 --- a/profile_pic.py +++ b/profile_pic.py @@ -1,20 +1,41 @@ from storysave_api import get_hd_profile_picture -import config, funcs, os +import config, funcs, os, time +known_phashes = {'e7c51a904b69d366': 'default empty profile picture', + 'cb3ce46194c335dc': 'default empty profile picture', + } + +known_hashes = { + '09c3cf34d4f117d99fa6285f4bfd3a0d888d7ab2cbca665b16097f6b93ca0de6' : 'default empty profile picture', + '2b9c0914d8f3f0aa6cf86705df70b7b21e9ca2f9013a346463788e7cebd0158f' : 'default empty profile picture', +} db, cursor = config.gen_connection() -cursor.execute(f"SELECT DISTINCT username, user_id FROM media WHERE user_id IS NOT NULL AND username IN (SELECT username FROM following WHERE platform = 'instagram');") +cursor.execute("SELECT DISTINCT username, user_id, favorite FROM following WHERE user_id IS NOT NULL AND platform = 'instagram' ORDER BY favorite DESC;") usernames = cursor.fetchall() -for username, user_id in usernames: +for username, user_id, favorite in usernames: profilepicurl = get_hd_profile_picture(user_id=user_id) if not profilepicurl: + print(f'Failed for {username}') continue filename = os.path.basename(profilepicurl).split('?')[0] user_dir = os.path.join('media', 'instagram', 'profile', username) filepath = os.path.join(user_dir, filename) - funcs.download_file(profilepicurl, filepath) - print(f"Downloaded profile picture for {username}.") \ No newline at end of file + filepath = funcs.download_file(profilepicurl, filepath) + + if not filepath: + continue + + phash = funcs.generate_phash(filepath) + if phash in known_phashes: + print(f"Profile picture for {username} is the default empty profile picture.") + os.remove(filepath) + continue + + print(f"Downloaded profile picture for {username}.") + + time.sleep(1) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 39d8f61..5d7825e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,4 +18,5 @@ tqdm webdriver-manager moviepy==1.0.3 instagrapi -ImageHash \ No newline at end of file +ImageHash +watchdog \ No newline at end of file diff --git a/snapchat.py b/snapchat.py deleted file mode 100644 index 00a3110..0000000 --- a/snapchat.py +++ /dev/null @@ -1,153 +0,0 @@ -from concurrent.futures import ThreadPoolExecutor, as_completed -from bs4 import BeautifulSoup -import requests -import json - -headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36"} - -snap_types = { - 27 : ['spotlight', 'video'], - 256 : ['thumbnail', 'image'], - 400 : ['idk', 'image'], - 1023 : ['idk', 'image'], - 1034 : ['downscaled_video', 'video'], - 1322 : ['idk', 'video'], - 1325 : ['idk', 'video'], -} - -def get_data(username): - url = f"https://www.snapchat.com/add/{username}" - response = requests.get(url, headers=headers) - soup = BeautifulSoup(response.text, "html.parser") - data_script = soup.find("script", id="__NEXT_DATA__") - if not data_script: - print(f"No data found for {username}.") - return None - data = json.loads(data_script.string) - return data - -def get_social_medias(data): - website_url = None - try: - website_url = data['props']['pageProps']['userProfile']['publicProfileInfo']['websiteUrl'] - except KeyError: - pass - return website_url - -def get_related_profiles(data): - related_profiles = [] - try: - related_profiles_data = data['props']['pageProps']['userProfile']['relatedProfiles'] - for profile in related_profiles_data: - related_profiles.append(profile['username']) - except KeyError: - pass - return related_profiles - -def get_all_users_data(usernames): - all_data = {} - - # Define a helper function for threading - def fetch_data(username): - return username, get_data(username) - - # Use ThreadPoolExecutor for concurrent fetching - with ThreadPoolExecutor() as executor: - futures = {executor.submit(fetch_data, username): username for username in usernames} - - for future in as_completed(futures): - username = futures[future] - try: - username, data = future.result() - all_data[username] = data - except Exception as e: - print(f"Error fetching data for {username}: {e}") - all_data[username] = None - - return all_data - -def parse_stories(stories): - parsed_stories = [] - - for story in stories: - parsed_story = parse_story(story) - parsed_stories.append(parsed_story) - - return parsed_stories - -def get_stories(data): - """Extract story list from the JSON data.""" - try: - stories = data['props']['pageProps']['story']['snapList'] - - if not type(stories) == list: - return [] - - stories.sort(key=lambda x: x.get('snapIndex'), reverse=True) - return stories - except: - return [] - -def get_highlights(data): - """Extract highlights from possible highlight keys in JSON data.""" - highlights = [] - - page_props = data.get('props', {}).get('pageProps', {}) - possible_highlight_keys = ['curatedHighlights', 'savedHighlights', 'highlights'] - - for key in possible_highlight_keys: - highlight_data = page_props.get(key, []) - if highlight_data: - highlights.extend(highlight_data) - - return highlights - -def parse_story(story): - original_snap_id = story.get('snapId', {}).get('value', '') - snap_url = story.get('snapUrls', {}).get('mediaUrl', '') - timestamp = story.get('timestampInSec', {}).get('value', '') - media_type = story.get('snapMediaType') - media_type = 'image' if media_type == 0 else 'video' - - return { - "original_snap_id": original_snap_id, - "snap_id": get_snap_id(snap_url), - "url": snap_url, - "timestamp": timestamp, - "platform": "snapchat", - "type": "story", - "username": story.get('username', ''), - "media_type": media_type, - } - -def get_snap_id(url): - return url.split('?')[0].split('/')[-1].split('.')[0] - -def get_highlight_stories(data): - stories = [] - highlights = get_highlights(data) - - for highlight in highlights: - snap_list = highlight.get('snapList', []) - - for snap in snap_list: - story = parse_story(snap) - stories.append(story) - - return stories - -def get_spotlight_metadata(data): - """Extract spotlight metadata from JSON data.""" - try: - return data['props']['pageProps']['spotlightStoryMetadata'] - except KeyError: - return [] - -def get_username(data): - """Extract username from JSON data.""" - try: - return data['props']['pageProps']['userProfile']['publicProfileInfo']['username'] - except KeyError: - return None - - diff --git a/snapchat_backer.py b/snapchat_backer.py deleted file mode 100644 index b121f21..0000000 --- a/snapchat_backer.py +++ /dev/null @@ -1,126 +0,0 @@ -import os -import json -from tqdm import tqdm - -from funcs import get_files -from snapchat import get_stories, get_highlights, get_spotlight_metadata, get_username - -# import config as altpinsConfig -import altpinsConfig - -def get_data(filepath): - try: - with open(filepath, 'r', encoding='utf-8') as f: - return json.load(f) - except: - print(f"Error reading {filepath}") - return None - -def process_story(story, username, story_type, db, cursor): - snap_urls = story.get('snapUrls', {}) - media_url = snap_urls.get('mediaUrl', '').split('?')[0] - media_id = media_url.split('/')[-1].split('.')[0].split('?')[-1] - - if media_id in existing_media_ids: - return False - - media_url = f"https://cf-st.sc-cdn.net/d/{media_url.split('/')[-1]}" - - media_preview_url = snap_urls.get('mediaPreviewUrl', '').get('value', '').split('?')[0] - media_preview_url = f"https://cf-st.sc-cdn.net/d/{media_preview_url.split('/')[-1]}" - - - timestamp = story.get('timestampInSec', {}).get('value', '') - media_type = story.get('snapMediaType') - snap_id = story.get('snapId', {}).get('value', '') - - - query = "INSERT IGNORE INTO snapchat_stories (snapId, mediaUrl, mediaPreviewUrl, timestampInSec, snapMediaType, storyType, username, media_id) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)" - cursor.execute(query, (snap_id, media_url, media_preview_url, timestamp, media_type, story_type, username, media_id)) - db.commit() - - existing_media_ids.add(media_id) - - print_emoji = '✅' if cursor.rowcount else '❌' - print(f"{print_emoji} Inserted story {media_id}") - -def process_json(json_path, db, cursor): - """ - Given a path to a JSON file, parse it and insert relevant data - into the database. - """ - - # Load JSON data - data = get_data(json_path) - username = get_username(data) - - ready_stories = [] - - # Insert stories (regular) - stories = get_stories(data) - for story in stories: - story['storyType'] = 'story' - ready_stories.append(story) - - # Insert stories (highlights) - highlights = get_highlights(data) - highlight_stories = [story for highlight in highlights for story in highlight.get('snapList', [])] - highlight_stories.sort(key=lambda x: x.get('snapIndex'), reverse=True) - for story in highlight_stories: - story['storyType'] = 'highlight' - ready_stories.append(story) - - - for story in ready_stories: - story_type = story.get('storyType') - process_story(story, username, story_type, db, cursor) - - - # Insert spotlight metadata - spotlight_metadata = get_spotlight_metadata(data) - for story in spotlight_metadata: - try: - media_id = story['videoMetadata']['contentUrl'].split('/')[-1].split('.')[0].split('?')[-1] - deepLinkUrl = story['oneLinkParams']['deepLinkUrl'].split('?')[0] - except: - continue - - if not all((media_id, deepLinkUrl)): - continue - - if deepLinkUrl in existing_spotlights: - continue - - deepLinkId = deepLinkUrl.split('/')[-1] - description = story['description'] - - insert_query = "INSERT IGNORE INTO snapchat_metadata (media_id, deepLinkUrl, description, username, deepLinkId) VALUES (%s, %s, %s, %s, %s)" - cursor.execute(insert_query, (media_id, deepLinkUrl, description, username, deepLinkId)) - db.commit() - - existing_spotlights.add(deepLinkUrl) - - print_emoji = '✅' if cursor.rowcount else '❌' - print(f"{print_emoji} Inserted spotlight {media_id}") - - os.remove(json_path) - - -db, cursor = altpinsConfig.gen_connection() - -existing_media_ids = [] -cursor.execute("SELECT media_id FROM snapchat_stories WHERE media_id != '';") -existing_media_ids = {row[0] for row in cursor.fetchall()} - -existing_spotlights = [] -cursor.execute("SELECT deepLinkUrl FROM snapchat_metadata;") -existing_spotlights = {row[0] for row in cursor.fetchall()} - -data_dir = 'data' -files = [f for f in get_files(data_dir) if f.endswith('.json')] - -# Wrap the file list with tqdm to show a progress bar -for filepath in tqdm(files, desc="Processing files", unit="file"): - process_json(filepath, db, cursor) - -db.close() \ No newline at end of file diff --git a/snapchat_master_crawler.py b/snapchat_master_crawler.py deleted file mode 100644 index 9299311..0000000 --- a/snapchat_master_crawler.py +++ /dev/null @@ -1,66 +0,0 @@ -from snapchat import get_all_users_data, get_stories, get_highlight_stories, get_social_medias, get_related_profiles -import os, config - -snapchat_directory = "snapchat" -media_directory = "media" -temp_directory = ".temp" -data_directory = "data" - -directory = os.path.join(media_directory, snapchat_directory) - -def get_snapchat_stories(usernames): - usernames = usernames[:5] - snapchat_users_data = get_all_users_data(usernames) - snapchat_users_data = dict(sorted(snapchat_users_data.items())) - - ready_stories = [] - - for username, data in snapchat_users_data.items(): - print(f"Getting stories for {username}...") - - data = snapchat_users_data.get(username) - if not data: - print(f"Failed to get data for {username}. Skipping.") - continue - - website_url = get_social_medias(data) - - related_profiles = get_related_profiles(data) - - stories = get_stories(data) - - stories.extend(get_highlight_stories(data)) - - for story in stories: - snap_id = story['snap_id'] - url = story['url'] - timestamp = story['timestamp'] - - # Determine file extension - extension = '.jpg' if story['media_type'] == 'image' else '.mp4' - - filename = f"{username}~{timestamp}~{snap_id}{extension}" - filepath = os.path.join(directory, filename) - - story['media_url'] = url - story['snap_id'] = snap_id - story['filepath'] = filepath - story['username'] = username - story['timestamp'] = timestamp - story['original_snap_id'] = story['original_snap_id'] - - ready_stories.append(story) - - # sort ready_stories by timestamp from oldest to newest - ready_stories.sort(key=lambda x: x['timestamp']) - - return ready_stories - -db, cursor = config.gen_connection() - -cursor.execute("SELECT username FROM following WHERE platform = 'snapchat' ORDER BY id DESC") -usernames = [row[0] for row in cursor.fetchall()] - -stories = get_snapchat_stories(usernames) - - diff --git a/snappy_master.py b/snappy_master.py deleted file mode 100644 index 8282609..0000000 --- a/snappy_master.py +++ /dev/null @@ -1,243 +0,0 @@ -from snapchat import get_stories, get_highlight_stories, get_all_users_data, parse_stories -from datetime import datetime -from uuid import uuid4 -import config -import funcs -import cv2 -import os -import json - -UPLOAD_MODE = True - -media_directory = "media" -snapchat_directory = "snapchat" -temp_directory = ".temp" -data_directory = "data" - -directory = os.path.join(media_directory, snapchat_directory) - -os.makedirs(media_directory, exist_ok=True) -os.makedirs(directory, exist_ok=True) -os.makedirs(temp_directory, exist_ok=True) -os.makedirs(data_directory, exist_ok=True) - -def find_duplicate_snap(existing_snap_ids, snap_id): - return snap_id in existing_snap_ids - -def archive_data(data, username): - try: - current_timestamp = int(datetime.now().timestamp()) - data_filename = f"{username}~{current_timestamp}.json" - data_filepath = os.path.join(data_directory, data_filename) - with open(data_filepath, 'w') as f: - f.write(json.dumps(data, indent=4)) - except: - print(f"Failed to archive data for {username}.") - return False - -def get_snapchat_stories(usernames): - snapchat_users_data = get_all_users_data(usernames) - snapchat_users_data = dict(sorted(snapchat_users_data.items())) - - ready_stories = [] - - for username, data in snapchat_users_data.items(): - print(f"Getting stories for {username}...") - - if not data: - print(f"Failed to get data for {username}. Skipping.") - continue - - archive_data(data, username) - - stories = get_stories(data) - stories = parse_stories(stories) - - stories.extend(get_highlight_stories(data)) - - for story in stories: - snap_id = story['snap_id'] - url = story['url'] - timestamp = story['timestamp'] - - # Determine file extension - file_exts = {'image': '.jpg', 'video': '.mp4'} - extension = file_exts.get(story['media_type']) - if not extension: - print(f"Failed to determine file extension for {url}. Skipping.") - continue - - filename = f"{username}~{timestamp}~{snap_id}{extension}" - filepath = os.path.join(directory, filename) - - story['media_url'] = url - story['snap_id'] = snap_id - story['filepath'] = filepath - story['username'] = username - story['timestamp'] = timestamp - story['original_snap_id'] = story['original_snap_id'] - - ready_stories.append(story) - - ready_stories.sort(key=lambda x: x['timestamp']) - - return ready_stories - -def get_snapchat_files(): - stories = funcs.get_files(directory) - stories = [get_media_data(filepath) for filepath in stories] - stories = [story for story in stories if story] - return stories - -def main(): - print('Initializing snappy...') - ready_stories = [] - - stories_from_files = get_snapchat_files() - - cursor.execute("SELECT username FROM following WHERE platform = 'snapchat' ORDER BY id DESC") - usernames = [row[0] for row in cursor.fetchall()] - - print(f"Getting stories for {len(usernames)} users...") - new_stories = get_snapchat_stories(usernames) - - cleaned_stories = [] - print("Checking for duplicates...") - for story in new_stories: - duplicate_snap = find_duplicate_snap(existing_snap_ids, story['snap_id']) - if duplicate_snap: - print(f"Snap {story['filepath']} already exists in the database. Removing...") - continue - cleaned_stories.append(story) - - cleaned_stories = download_stories(cleaned_stories) - - ready_stories.extend(cleaned_stories) - ready_stories.extend(stories_from_files) - - for story in ready_stories: - UploadMedia(story) - -def download_stories(stories): - downloaded_stories = [] - for story in stories: - filepath = story['filepath'] - url = story['media_url'] - - filepath = funcs.download_file(url, filepath) - print(f"Downloaded {os.path.basename(filepath)}") - - if not filepath: - continue - - story['hash'] = funcs.calculate_file_hash(filepath) - story['size'] = os.path.getsize(filepath) - - downloaded_stories.append(story) - - return downloaded_stories - -def UploadMedia(media): - file_size = media['size'] - file_hash = media['hash'] - filepath = media['filepath'] - filename = os.path.basename(filepath) - - username = media['username'] - timestamp = media['timestamp'] - media_type = media['media_type'] - snap_id = media['snap_id'] - original_snap_id = media['original_snap_id'] - thumbnail_url = None - phash = None - - duplicate_snap = find_duplicate_snap(existing_snap_ids, media['snap_id']) - if duplicate_snap: - print(f"Snap {filename} already exists in the database. Removing...") - os.remove(filepath) - return False - - post_date = datetime.fromtimestamp(int(timestamp)) - - width, height = funcs.get_media_dimensions(filepath) - - duration = funcs.get_video_duration(filepath) - - if media_type == 'image': - phash = funcs.generate_phash(filepath) - elif media_type == 'video': - try: - thumb_path = generate_thumbnail(filepath) - obj_storage.PutFile(thumb_path, f'thumbnails/{filename}') - thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{filename}" - phash = funcs.generate_phash(thumb_path) - os.remove(thumb_path) - except: - print('Error generating thumbnail. Skipping...') - return False - - server_path = f'media/snaps/{username}/{filename}' - file_url = f"https://storysave.b-cdn.net/{server_path}" - - obj_storage.PutFile(filepath, server_path) - - query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, post_type, date, hash, filename, duration, thumbnail, phash, platform, snap_id, original_snap_id, file_size) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" - values = (username, media_type, file_url, width, height, 'story', post_date, file_hash, filename, duration, thumbnail_url, phash, 'snapchat', snap_id, original_snap_id, file_size) - - cursor.execute(query, values) - db.commit() - print(f'[{cursor.rowcount}] records updated. File {filename} uploaded to {file_url}') - - os.remove(filepath) - - return True - -def generate_thumbnail(filepath): - thumb_path = os.path.join(temp_directory, f'{uuid4()}.jpg') - cap = cv2.VideoCapture(filepath) - ret, frame = cap.read() - cv2.imwrite(thumb_path, frame) - cap.release() - return thumb_path - -def get_media_data(filepath): - filename = os.path.basename(filepath) - parts = filename.split('~') - if len(parts) < 3: - return False - - username = parts[0] - timestamp = parts[1] - snap_id = parts[2] - snap_id = os.path.splitext(snap_id)[0] - - file_size = os.path.getsize(filepath) - file_hash = funcs.calculate_file_hash(filepath) - - data = { - "username": username, - "timestamp": timestamp, - "filepath": filepath, - "snap_id": snap_id, - "original_snap_id": None, - "media_url": None, - "size": file_size, - "hash": file_hash - } - - return data - -if __name__ == '__main__': - print('Starting snappy...') - - db, cursor = config.gen_connection() - obj_storage = config.get_storage() - - cursor.execute("SELECT snap_id FROM media WHERE filename IS NOT NULL AND platform = 'snapchat' ORDER BY id DESC") - existing_snap_ids = cursor.fetchall() - - existing_snap_ids = {row[0] for row in existing_snap_ids} - - main() - - print("Processing completed.") \ No newline at end of file diff --git a/storysave_api.py b/storysave_api.py index ead33e7..fc0dab6 100644 --- a/storysave_api.py +++ b/storysave_api.py @@ -2,20 +2,50 @@ from bs4 import BeautifulSoup import requests import json -doc_ids = [7663723823674585, 9539110062771438] +doc_ids = [7663723823674585, 9539110062771438, 8964418863643891, 9066276850131169] +active_doc_id = doc_ids[3] -def get_posts(): - data = { - "variables": '{"id":"57771591453","render_surface":"PROFILE"}', - "doc_id": "7663723823674585", +headers = { + 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36', +} + +def get_posts(username): + + url = 'https://www.instagram.com/graphql/query/' + + variables = { + "data": { + "count": 12, + "include_reel_media_seen_timestamp": True, + "include_relationship_info": True, + "latest_besties_reel_media": True, + "latest_reel_media": True + }, + "username": username, + "__relay_internal__pv__PolarisIsLoggedInrelayprovider": True, + "__relay_internal__pv__PolarisShareSheetV3relayprovider": False + } + + params = { + 'variables': json.dumps(variables), + 'doc_id': active_doc_id } - - data = requests.get('https://www.instagram.com/graphql/query', params=data).json() - posts = data['data'] - posts = [post['node'] for post in posts] + response = requests.get(url, headers=headers, params=params) - return max(posts, key=lambda post: max(c['width'] * c['height'] for c in post['image_versions2']['candidates'])) + if response.status_code == 200: + try: + data = response.json() + posts = data['data']['xdt_api__v1__feed__user_timeline_graphql_connection']['edges'] + end_cursor = data['data']['xdt_api__v1__feed__user_timeline_graphql_connection']['page_info']['end_cursor'] + return posts + except (KeyError, TypeError) as e: + print(f"Error parsing JSON response: {e}") + return None + else: + print(f"Failed to fetch data. Status code: {response.status_code}") + return None + def get_username_by_user_id(user_id): url = 'https://www.instagram.com/graphql/query/' @@ -35,10 +65,6 @@ def get_username_by_user_id(user_id): 'variables': json.dumps(variables) } - headers = { - "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36", - } - response = requests.get(url, headers=headers, params=params) if response.status_code == 200: @@ -57,7 +83,7 @@ def extract_script_tags(username): url = f"https://www.instagram.com/{username}/" try: # Fetch the HTML content of the page - response = requests.get(url) + response = requests.get(url, headers=headers) response.raise_for_status() # Parse the HTML content with BeautifulSoup @@ -122,13 +148,14 @@ def get_profile_data(username): user_id = get_user_id(username) - data = { - 'variables': '{"id":"' + user_id + '","render_surface":"PROFILE"}', - 'doc_id': 9539110062771438 + variables = { + "id": user_id, + "render_surface": "PROFILE" } - - headers = { - 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36', + + data = { + 'variables': json.dumps(variables), + 'doc_id': active_doc_id } response = requests.post(url, headers=headers, data=data) @@ -148,9 +175,45 @@ def get_hd_profile_picture(username = None, user_id = None): if not user_id: return None + variables = { + "id": user_id, + "render_surface": "PROFILE" + } + + data = { + 'variables': json.dumps(variables), + 'doc_id': '9539110062771438' + } + data = { - 'variables': '{"id":"' + user_id +' ","render_surface":"PROFILE"}', - 'doc_id': 9539110062771438 + 'av': '17841401225494803', + '__d': 'www', + '__user': 0, + '__a': 1, + '__req': 4, + '__hs': '20231.HYP%3Ainstagram_web_pkg.2.1...1', + 'dpr': 2, + '__ccg': 'GOOD', + '__rev': 1023131892, + '__s': 'g7nwhv%3Ad6c29x%3Aaag0uk', + '__hsi': 7507576467274562470, + '__dyn': '7xe5WwlEnwn8K2Wmm1twpUnwgU7S6EdF8aUco38w5ux609vCwjE1EE2Cw8G11wBw5Zx62G3i1ywOwa90Fw4Hw9O0Lbwae4UaEW2G0AEco5G0zEnwhE3Mw51wLyES1Twoob82ZwrUdUbGwmk0KU6O1FwlE6PhA6bwg8rAwHxW1oxe6UaU3cyUrw4rxO2C', + '__csr': 'gg84YIJgSyn2Ob7oDs-h7qhmToSsDl_8uAAaBigC8yQiaKJuumUkyybh4i9qBFaiayqBAVKczV4cBjhHUbqxeq3q9Suuum9zkEjAy9Ua8ymi45DUG7EgzoeUfKm2ym6UblG00kXK0jUE3Ug3dwh24DgAi1mo0AyaDw4WwiU1Y80bCm12g2Jwww5OCkE18Wc0mmqA4pU22wCw1Ucw06TW0csw7Gw', + '__hsdp': 'l2DMCyPBdbclSEgBiHWhqWiRV5kKKyoFtoYABrqafK699onQtK1fg96qiK5EZcIk0A5bwau0xVEhwAyQElwik0qi1cwam0m20ou06L82Ew56w4-w8O1Xw75wnoc85i', + '__hblp': '08K19xO0V89815oaEtwUCwhoOq4opxG5o8oS4Vk4U9o9o7C0zof82Nwg8uG0jV0Hweu1OwsE13o1ZU11UlwVwko2wwfy0G89E17U11EdU2cwuU5C0Yp8660Eo5idz8vxucw', + '__comet_req': 7, + 'fb_dtsg': 'NAfvHXND-ELXKZFgyrogJIig1C4j6gRiNUaBBBomMZ1mNa-FvpKl6bw%3A17854231342124680%3A1731941013', + 'jazoest': 26187, + 'lsd': 'NFD0t4uLm10VsaniLLl9nv', + '__spin_r': 1023131892, + '__spin_b': 'trunk', + '__spin_t': 1747993861, + '__crn': 'comet.igweb.PolarisProfilePostsTabRoute', + 'fb_api_caller_class': 'RelayModern', + 'fb_api_req_friendly_name': 'PolarisProfileNoteBubbleQuery', + 'variables': '%7B%22user_id%22%3A%228309584937%22%7D', + 'server_timestamps': True, + 'doc_id': 8698637896906070 } try: @@ -166,4 +229,83 @@ def get_hd_profile_picture(username = None, user_id = None): except: hd_profile_pic = None - return hd_profile_pic \ No newline at end of file + return hd_profile_pic + + +def get_user_id_by_username(username): + url = 'https://www.instagram.com/graphql/query' + + variables = { + "data": { + "context": "blended", + "include_reel": True, + "query": username, + "rank_token": "", + "search_surface": "web_top_search" + }, + "hasQuery": True + } + + data = { + 'variables': json.dumps(variables), + 'doc_id': active_doc_id + } + + response = requests.post(url, headers=headers, data=data) + + if response.status_code == 200: + json_data = response.json() + + users = json_data['data']['xdt_api__v1__fbsearch__topsearch_connection']['users'] + + for user in users: + user_data = user['user'] + if user_data['username'] == username: + return user_data['pk'] + else: + print(f"Failed to fetch data. Status code: {response.status_code}") + return None + +def get_user_id_api(username): + url = f"https://www.instagram.com/api/v1/users/web_profile_info/?username={username}" + headers['referer'] = f"https://www.instagram.com/{username}/" + headers['x-ig-app-id'] = '936619743392459' + + response = requests.get(url, headers=headers) + + if response.status_code == 200: + try: + data = response.json() + user_id = data['data']['user']['id'] + return user_id + except (KeyError, TypeError) as e: + print(f"Error parsing JSON response: {e}") + return None + else: + print(f"Failed to fetch data. Status code: {response.status_code}") + return None + +def get_highest_quality_image(image_versions): + max_res = 0 + max_res_url = None + for image in image_versions: + if image['width'] > max_res: + max_res = image['width'] + max_res_url = image['url'] + return max_res_url + +def parse_post(post): + medias = post['node']['carousel_media'] + media_items = [] + for media in medias: + media_item = {} + + image_versions = media['image_versions2']['candidates'] + + media_item['image_url'] = get_highest_quality_image(image_versions) + media_item['pk'] = media['pk'] + media_item['media_type'] = media['media_type'] + + media_items.append(media_item) + + return media_items \ No newline at end of file diff --git a/storysave_dump.py b/storysave_dump.py index 577fac8..9ec64d1 100644 --- a/storysave_dump.py +++ b/storysave_dump.py @@ -1,13 +1,15 @@ -from datetime import datetime +from datetime import datetime, timedelta from uuid import uuid4 -import funcs import config +import funcs +import json import cv2 import os import re temp_directory = ".temp" -directory = 'media/instagram/' +directory = 'media' +os.makedirs(temp_directory, exist_ok=True) media_types = { 'stories' : 'story', @@ -15,39 +17,42 @@ media_types = { 'profile' : 'profile' } -os.makedirs(temp_directory, exist_ok=True) +UPLOAD_CUSTOM = False +CACHE_FILE = os.path.join(temp_directory, 'existing_media_ids.json') +CACHE_TTL = timedelta(hours=48) def UploadMedia(media): username = media['username'] user_id = media['user_id'] filepath = media['filepath'] platform = media['platform'] - media_id = media['media_id'] timestamp = media['timestamp'] highlight_id = media['highlight_id'] post_type = media['post_type'] - - file_size = os.path.getsize(filepath) thumbnail_url = None phash = None - if media_id and media_id in existing_files: + if media_id and media_id in existing_media_ids: print('Duplicate file detected. Removing...') os.remove(filepath) return True - + + file_size = os.path.getsize(filepath) filename = os.path.basename(filepath) file_extension = os.path.splitext(filename)[1].lower() + file_hash = funcs.calculate_file_hash(filepath) + + if not user_id: + user_id = get_user_id(username) media_type = funcs.get_media_type(filename) if not media_type: print(f'Error determining media type for {filename}. Skipping...') return False - file_hash = funcs.calculate_file_hash(filepath) - - post_date = datetime.fromtimestamp(int(timestamp)) if timestamp else datetime.now() + try:post_date = datetime.fromtimestamp(int(timestamp)) + except:post_date = datetime.fromtimestamp(os.path.getctime(filepath)) width, height = funcs.get_media_dimensions(filepath) if 0 in (width, height): @@ -62,21 +67,19 @@ def UploadMedia(media): try: thumb_path = generate_thumbnail(filepath) obj_storage.PutFile(thumb_path, f'thumbnails/{file_hash}.jpg') # this might be a problem in case of duplicate hashes - thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{file_hash}.jpg" - phash = funcs.generate_phash(thumb_path) + thumbnail_url = f"https://cdn.altpins.com/thumbnails/{file_hash}.jpg" + phash = funcs.generate_phash(thumb_path) os.remove(thumb_path) except Exception as e: print(f'Error generating thumbnail: {e}. Skipping...') return False - if media_id: - newFilename = f'{media_id}{file_extension}' - else: - newFilename = f'{file_hash}{file_extension}' + custom_filename = media_id if media_id else file_hash + newFilename = f'{custom_filename}{file_extension}' server_path = f'media/{post_type}/{username}/{newFilename}' - file_url = f"https://storysave.b-cdn.net/{server_path}" - + file_url = f"https://cdn.altpins.com/{server_path}" + obj_storage.PutFile(filepath, server_path) if highlight_id: @@ -84,18 +87,25 @@ def UploadMedia(media): newDB.commit() print(f'[{newCursor.rowcount}] added highlight {highlight_id} to user {user_id}') + query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, media_id, post_type, date, user_id, hash, filename, duration, thumbnail, phash, platform, file_size) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" values = (username, media_type, file_url, width, height, media_id, post_type, post_date, user_id, file_hash, filename, duration, thumbnail_url, phash, platform, file_size) newCursor.execute(query, values) newDB.commit() - print(f'[{newCursor.rowcount}] records updated.\nFile: {filename}\nURL: {file_url}') + correct_emoji = '✅' if newCursor.rowcount > 0 else '❌' + print(f'{correct_emoji} added {filename} to database') + print(f'File: {filename}') + print(f'URL: {file_url}') + print(f'Pin URL: https://altpins.com/pin/{newCursor.lastrowid}') print("="*100) os.remove(filepath) - return True + existing_media_ids.add(media_id) + + return newCursor.lastrowid def generate_thumbnail(filepath): thumb_path = os.path.join(temp_directory, f'{uuid4()}.jpg') @@ -114,8 +124,9 @@ def get_user_id(username): def get_media_data(filepath): filename = os.path.basename(filepath) + parts = filename.split('~') - if len(parts) < 4: + if len(parts) != 4: return False username = parts[0] @@ -126,7 +137,9 @@ def get_media_data(filepath): highlight_id = user_id.replace('highlight', '') if 'highlight' in user_id else None - if not user_id.isdigit(): + if user_id.isdigit(): + user_id = int(user_id) + else: user_id = get_user_id(username) if media_id.isdigit(): @@ -138,52 +151,46 @@ def get_media_data(filepath): return data -def get_media(folder_path): +def get_media(): medias = [] + failed_medias = [] for media_type, post_type in media_types.items(): - folder_path = os.path.join(directory, media_type) + media_folder_path = os.path.join(directory, media_type) - if not os.path.exists(folder_path): + if not os.path.exists(media_folder_path): continue - all_files = funcs.get_files(folder_path) + all_files = funcs.get_files(media_folder_path) for filepath in all_files: data = get_media_data(filepath) if not data: + failed_medias.append(filepath) continue data['post_type'] = post_type medias.append(data) - return medias + return medias, failed_medias -def get_custom_media(): +def get_custom_media(failed_medias): medias = [] - folder_path = 'media/instagram' - platform = 'instagram' - for media_type, post_type in media_types.items(): folder_path = os.path.join(directory, media_type) user_dirs = [d for d in os.listdir(folder_path) if os.path.isdir(os.path.join(folder_path, d))] - for user_dir in user_dirs: - user_folder_path = os.path.join(folder_path, user_dir) + for username in user_dirs: + user_folder_path = os.path.join(folder_path, username) - if not os.path.exists(user_folder_path): - continue - - username = user_dir + for filename in os.listdir(user_folder_path): + if filename.startswith('.'): + continue - files = os.listdir(user_folder_path) - - for filename in files: filepath = os.path.join(user_folder_path, filename) - - if filename.startswith('.'): + if not filepath in failed_medias: continue - + user_id = get_user_id(username) timestamp = int(os.path.getctime(filepath)) media_id = os.path.splitext(filename)[0] @@ -201,7 +208,7 @@ def get_custom_media(): "media_id": media_id, "user_id": user_id, "filepath": filepath, - "platform": platform, + "platform": 'instagram', "highlight_id": None, "post_type": post_type } @@ -209,20 +216,40 @@ def get_custom_media(): medias.append(data) return medias + +def dump_instagram(): + medias, failed_medias = get_media() + medias = clean_dupes(medias) + failed_medias = get_custom_media(failed_medias) -def dump_instagram(folder_path): - medias = get_media(folder_path) - # medias.extend(get_custom_media()) - - if cleanup_dupe_stories(medias): - medias = get_media(folder_path) - + medias.sort(key=lambda x: (x['username'].lower(), x['timestamp'])) + + new_user_ids = {} + for media in medias: + if media['user_id']: + user_id = media['user_id'] + username = media['username'] + if username not in existing_users: + existing_users[username] = user_id + new_user_ids[username] = user_id + for media in medias: - UploadMedia(media) - existing_files.append(media['media_id']) + user_id = media['user_id'] + username = media['username'] + if user_id is None and username in new_user_ids: + media['user_id'] = new_user_ids[username] -def cleanup_dupe_stories(medias): + for media in medias: + pinid = UploadMedia(media) + existing_media_ids.add(media['media_id']) + + if UPLOAD_CUSTOM: + for media in failed_medias: + pinid = UploadMedia(media) + +def clean_dupes(medias): removed_count = 0 + new_medias = [] for media in medias: media_id = media['media_id'] filepath = media['filepath'] @@ -231,16 +258,70 @@ def cleanup_dupe_stories(medias): print(f'Invalid media_id for file {filepath}. Skipping...') continue - # Check if media_id is in existing_files OR if filepath contains any '(number)' - if media_id in existing_files or re.search(r'\(\d+\)', filepath): + # Check if media_id is in existing_media_ids OR if filepath contains any '(number)' + if media_id in existing_media_ids or re.search(r'\(\d+\)', filepath): removed_count += 1 print(f'Found duplicate file {filepath}. Removing...') os.remove(filepath) continue + new_medias.append(media) + print(f'Removed {removed_count} duplicate files.') - return removed_count + return new_medias + +def get_cached_data(): + if not os.path.exists(CACHE_FILE): + print('No cache file found. Generating new cache…') + return None, None + + try: + with open(CACHE_FILE, 'r') as f: + cache_data = json.load(f) + + timestamp = datetime.fromisoformat(cache_data.get('timestamp', '')) + if datetime.now() - timestamp < CACHE_TTL: + print('Using cached data…') + return set(tuple(x) for x in cache_data.get('existing_media_ids', [])), cache_data.get('existing_users', {}) + except Exception as e: + print(f"Cache read error: {e}") + + return None, None + +def save_cached_data(existing_media_ids, existing_users): + with open(CACHE_FILE, 'w') as f: + json.dump({'timestamp': datetime.now().isoformat(), 'existing_media_ids': list(existing_media_ids), 'existing_users': existing_users}, f) + +def get_existing_medias(newCursor): + existing_media_ids, existing_users = get_cached_data() + + if existing_media_ids and existing_users: + newest_id = max(existing_media_ids, key=lambda x: x[0])[0] + + existing_media_ids = {image[1] for image in existing_media_ids} + + newCursor.execute("SELECT id, media_id FROM media WHERE media_id IS NOT NULL AND platform = 'instagram' AND status = 'public' AND id > %s ORDER BY id DESC", (newest_id,)) + new_media_ids = {image[1] for image in newCursor.fetchall()} + + for media_id in new_media_ids: + existing_media_ids.add(media_id) + + return existing_media_ids, existing_users + + print('Getting existing files and users...') + newCursor.execute("SELECT id, media_id FROM media WHERE media_id IS NOT NULL AND platform = 'instagram' AND status = 'public';") + existing_media_ids = {image for image in newCursor.fetchall()} + + print('Getting existing users...') + newCursor.execute("SELECT DISTINCT username, user_id FROM media WHERE user_id IS NOT NULL AND platform = 'instagram'") + existing_users = {user[0].lower(): user[1].lower() for user in newCursor.fetchall()} + + cache_file = os.path.join(temp_directory, 'existing_media_ids.json') + with open(cache_file, 'w') as f: + json.dump({'timestamp': datetime.now().isoformat(), 'existing_media_ids': list(existing_media_ids), 'existing_users': existing_users}, f) + return existing_media_ids, existing_users + if __name__ == '__main__': print('Starting processing...') @@ -252,19 +333,11 @@ if __name__ == '__main__': obj_storage = config.get_storage() - print('Getting existing files and users...') - newCursor.execute("SELECT media_id FROM media WHERE media_id IS NOT NULL AND platform = 'instagram'") - existing_files = [image[0] for image in newCursor.fetchall()] - - print('Getting existing users...') - newCursor.execute("SELECT DISTINCT username, user_id FROM media WHERE user_id IS NOT NULL AND platform = 'instagram'") - existing_users = {user[0].lower(): user[1].lower() for user in newCursor.fetchall()} + existing_media_ids, existing_users = get_existing_medias(newCursor) - dump_instagram(directory) + dump_instagram() print("Processing completed.") - newDB.close() - for mediatype, _ in media_types.items(): funcs.clean_empty_folders(os.path.join(directory, mediatype)) \ No newline at end of file diff --git a/storysave_dump_custom.py b/storysave_dump_custom.py deleted file mode 100644 index 59f92b4..0000000 --- a/storysave_dump_custom.py +++ /dev/null @@ -1,147 +0,0 @@ -from datetime import datetime -from uuid import uuid4 -import funcs -import config -import cv2 -import os - - -media_directory = "media/ready_for_upload" -platform = "instagram" - -working_directory = os.path.join(media_directory, platform) - -def UploadMedia(media): - username = media['username'] - user_id = media['user_id'] - filepath = media['filepath'] - platform = media['platform'] - - media_id = media['media_id'] - - thumbnail_url = None - phash = None - - filename = os.path.basename(filepath) - file_extension = os.path.splitext(filename)[1].lower() - - media_type = funcs.get_media_type(filename) - if not media_type: - print(f'Error determining media type for {filename}. Skipping...') - return False - - post_type = funcs.determine_post_type(filepath) - if not post_type: - print(f'Error determining post type for {filename}. Skipping...') - return False - - file_hash = funcs.calculate_file_hash(filepath) - - post_date = datetime.now() - - width, height = funcs.get_media_dimensions(filepath) - - duration = funcs.get_video_duration(filepath) - - if media_type == 'image': - phash = funcs.generate_phash(filepath) - elif media_type == 'video': - try: - thumb_path = generate_thumbnail(filepath) - obj_storage.PutFile(thumb_path, f'thumbnails/{file_hash}.jpg') # this might be a problem in case of duplicate hashes - thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{file_hash}.jpg" - phash = funcs.generate_phash(thumb_path) - os.remove(thumb_path) - except Exception as e: - print(f'Error generating thumbnail. Skipping... {e}') - return False - - newFilename = f'{file_hash}{file_extension}' - server_path = f'media/{post_type}/{username}/{newFilename}' - - file_url = f"https://storysave.b-cdn.net/{server_path}" - - obj_storage.PutFile(filepath, server_path) # slow as fuck - - post_type = 'story' if post_type == 'stories' else 'post' - query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, post_type, date, user_id, hash, filename, duration, thumbnail, phash, platform, media_id) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" - values = (username, media_type, file_url, width, height, post_type, post_date, user_id, file_hash, filename, duration, thumbnail_url, phash, platform, media_id) - - newCursor.execute(query, values) # slower - newDB.commit() - print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}') - - os.remove(filepath) - - return True - -def generate_thumbnail(filepath): - thumb_path = f'.temp/{uuid4()}.jpg' - cap = cv2.VideoCapture(filepath) - ret, frame = cap.read() - cv2.imwrite(thumb_path, frame) - cap.release() - return thumb_path - -def get_user_id(username): - username = username.lower() - if username in existing_users: - return existing_users[username] - - return None - -def get_media(folder_path): - medias = [] - - user_folders = os.listdir(folder_path) - for user_folder in user_folders: - user_folder_path = os.path.join(folder_path, user_folder) - - if not os.path.isdir(user_folder_path): - continue - - files = os.listdir(user_folder_path) - for filename in files: - filepath = os.path.join(folder_path, user_folder, filename) - - # skip file if its hidden - if filename.startswith('.'): - continue - - try: - media_id = filename.split('.')[0] - media_id = int(media_id) - except: - media_id = None - - media = { - 'username': user_folder, - 'filepath': filepath, - 'user_id': get_user_id(user_folder), - 'media_id': media_id, - 'platform': platform - } - - medias.append(media) - - return medias - -def dump_instagram(folder_path): - medias = get_media(folder_path) - - for media in medias: - UploadMedia(media) - -if __name__ == '__main__': - print('Starting processing...') - - newDB, newCursor = config.gen_connection() - - obj_storage = config.get_storage() - - newCursor.execute("SELECT DISTINCT username, user_id FROM media WHERE user_id IS NOT NULL") - existing_users = {user[0].lower(): user[1].lower() for user in newCursor.fetchall()} - - dump_instagram(working_directory) - - print("Processing completed.") \ No newline at end of file diff --git a/storysave_scanner.py b/storysave_scanner.py index 605e3c6..10c6593 100644 --- a/storysave_scanner.py +++ b/storysave_scanner.py @@ -6,7 +6,6 @@ import os from funcs import get_media_dimensions media_dir = 'media' -output_dir = 'instagram' stories_dir = 'stories' posts_dir = 'posts' @@ -75,8 +74,6 @@ class DownloadHandler(FileSystemEventHandler): if not os.path.exists(file_path): return - print(f'Moving {file}...') - post_type = determine_post_type(file_path) if post_type == 'posts': media_type_dir = posts_dir @@ -86,9 +83,15 @@ class DownloadHandler(FileSystemEventHandler): print(f"Could not determine post type for {file}. Skipping...") return - outputPath = os.path.join(media_dir, output_dir, media_type_dir, file) + outputPath = os.path.join(media_dir, media_type_dir, file) + + if os.path.exists(outputPath): + print(f"File already exists {outputPath}. Removing...") + os.remove(file_path) + return shutil.move(file_path, outputPath) + print(f"Moved {file_path} to {outputPath}") def on_created(self, event): if not event.is_directory and 'crdownload' not in event.src_path: @@ -110,4 +113,4 @@ if __name__ == "__main__": time.sleep(1) # Add a 1-second sleep to reduce CPU usage except KeyboardInterrupt: observer.stop() - observer.join() + observer.join() \ No newline at end of file diff --git a/tiktok_dump.py b/tiktok_dump.py deleted file mode 100644 index 255d0c7..0000000 --- a/tiktok_dump.py +++ /dev/null @@ -1,140 +0,0 @@ -from datetime import datetime -from uuid import uuid4 -import funcs -import config -import cv2 -import os - -directory = 'processed_tiktoks' - -def UploadMedia(media): - platform = 'TikTok' - username = media['username'] - filepath = media['filepath'] - file_size = os.path.getsize(filepath) - thumbnail_url = None - phash = None - - filename = os.path.basename(filepath) - file_extension = os.path.splitext(filename)[1].lower() - - media_type = funcs.get_media_type(filename) - if not media_type: - print(f'Error determining media type for {filename}. Skipping...') - return False - - post_type = funcs.determine_post_type(filepath) - if not post_type: - print(f'Error determining post type for {filename}. Skipping...') - return False - - file_hash = funcs.calculate_file_hash(filepath) - if file_hash in existing_hashes: - print(f'File {filename} already exists. Skipping...') - return False - - post_date = datetime.now() - - width, height = funcs.get_media_dimensions(filepath) - - duration = funcs.get_video_duration(filepath) - - if media_type == 'image': - phash = funcs.generate_phash(filepath) - elif media_type == 'video': - try: - thumb_path = generate_thumbnail(filepath) - obj_storage.PutFile(thumb_path, f'thumbnails/{file_hash}.jpg') # this might be a problem in case of duplicate hashes - thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{file_hash}.jpg" - phash = funcs.generate_phash(thumb_path) - os.remove(thumb_path) - except: - print('Error generating thumbnail. Skipping...') - return False - - newFilename = f'{file_hash}{file_extension}' - server_path = f'media/tiktoks/{username}/{newFilename}' - - file_url = f"https://storysave.b-cdn.net/{server_path}" - - obj_storage.PutFile(filepath, server_path) # slow as fuck - - post_type = 'story' if post_type == 'stories' else 'post' - query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, post_type, date, hash, filename, duration, thumbnail, phash, platform, file_size) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" - values = (username, media_type, file_url, width, height, post_type, post_date, file_hash, filename, duration, thumbnail_url, phash, platform, file_size) - - newCursor.execute(query, values) # slower - newDB.commit() - print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}') - - os.remove(filepath) - - return True - -def generate_thumbnail(filepath): - thumb_path = f'temp/{uuid4()}.jpg' - cap = cv2.VideoCapture(filepath) - ret, frame = cap.read() - cv2.imwrite(thumb_path, frame) - cap.release() - return thumb_path - -def get_media_data(filepath): - filename = os.path.basename(filepath) - parts = filename.split('~') - - if len(parts) == 3: - username, title, tiktok_id = parts - elif len(parts) == 2: - username, title = parts - tiktok_id = None - else: - return False - - data = {'username': username, 'filepath': filepath, 'tiktok_id': tiktok_id, 'title': title} - - return data - -def get_media(folder_path): - medias = [] - - users = os.listdir(folder_path) - for user in users: - user_folder = os.path.join(folder_path, user) - if not os.path.isdir(user_folder): - print(f"Skipping {user}") - continue - - files = os.listdir(user_folder) - for filename in files: - filepath = os.path.join(user_folder, filename) - - data = get_media_data(filepath) - if data: - medias.append(data) - - return medias - -def dump_instagram(folder_path): - medias = get_media(folder_path) - - for media in medias: - UploadMedia(media) - -if __name__ == '__main__': - print('Starting processing...') - - if not os.listdir(directory): - print('No files to process. Exiting...') - exit() - - newDB, newCursor = config.gen_connection() - - obj_storage = config.get_storage() - - newCursor.execute("SELECT hash FROM media WHERE hash IS NOT NULL AND platform = 'TikTok'") - existing_hashes = [row[0] for row in newCursor.fetchall()] - - dump_instagram(directory) - - print("Processing completed.") \ No newline at end of file diff --git a/twitch_downloader.py b/twitch_downloader.py deleted file mode 100644 index b904d48..0000000 --- a/twitch_downloader.py +++ /dev/null @@ -1,123 +0,0 @@ -from selenium.webdriver.common.by import By -import undetected_chromedriver as uc -import requests -import base64 -import re -import os - -def format_url(url): - clean_url = re.sub(r'%[0-9A-F]{2}', '', url) - return clean_url - -def encode_offset(offset_num): - offset_base64 = str(offset_num).encode('utf-8') - offset_base64 = base64.b64encode(offset_base64).decode('utf-8') - return offset_base64 - -def get_clips(username): - url = 'https://gql.twitch.tv/gql' - - offset_num = 20 - offset_base64 = encode_offset(offset_num) - - user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36' - - headers = { - 'client-id': 'kimne78kx3ncx6brgo4mv6wki5h1ko', - 'Content-Type': 'text/plain;charset=UTF-8', - 'User-Agent': user_agent - } - - data = { - "operationName":"ClipsCards__User", - "variables":{"login":username,"limit":100,}, - "extensions":{"persistedQuery":{"version":1,"sha256Hash":"4eb8f85fc41a36c481d809e8e99b2a32127fdb7647c336d27743ec4a88c4ea44"}} - } - - response = requests.post(url, headers=headers, json=data) - - clips = response.json() - - clips = clips['data']['user']['clips']['edges'] - - cleaned_clips = parse_clips(clips) - - return cleaned_clips - - -def parse_clips(clips): - """ - clips is a list of dictionaries - """ - - cleaned_clips = [] - for clip in clips: - clip = clip['node'] - - clip_id = clip['id'] - clip_url = clip['url'] - clip_title = clip['title'] - clip_view_count = clip['viewCount'] - clip_duration = clip['durationSeconds'] - - cleaned_clip = { - 'id': clip_id, - 'url': clip_url, - 'title': clip_title, - 'views': clip_view_count, - 'duration': clip_duration - } - - cleaned_clips.append(cleaned_clip) - - return cleaned_clips - -def get_video_url(video_url, driver): - driver.get(video_url) - - # Get the video element - video = driver.find_element(By.TAG_NAME, 'video') - - # Get the video source - video_src = video.get_attribute('src') - - return video_src - -def download_video(video_url, filepath): - if os.path.exists(filepath): - return filepath - - video = requests.get(video_url) - - # Download in chunks - with open(filepath, 'wb') as f: - for chunk in video.iter_content(chunk_size=1024): - f.write(chunk) - - return filepath - - -# Set up an undetected Chrome driver in headless mode -opts = uc.ChromeOptions() -opts.add_argument("--headless") -opts.add_argument("--window-size=1920,1080") - -driver = uc.Chrome(use_subprocess=True, options=opts) - -username = 'didicandy666' -clips = get_clips(username) - -for clip in clips: - clip_url = clip['clip_url'] - - filename = f"{clip['id']}.mp4" - filepath = os.path.join('clips', filename) - - if os.path.exists(filepath): - print(f"Already downloaded {filename}") - continue - - video_url = get_video_url(clip_url, driver) - - download_video(video_url, filepath) - print(f"Downloaded {filename}") \ No newline at end of file diff --git a/webdriver_instagram_api.py b/webdriver_instagram_api.py new file mode 100644 index 0000000..f456e04 --- /dev/null +++ b/webdriver_instagram_api.py @@ -0,0 +1,143 @@ +import os +import time +import requests +from selenium import webdriver +from selenium.webdriver.common.by import By +from selenium.webdriver.common.keys import Keys +from selenium.webdriver.chrome.options import Options + +# --- Configuration --- +USERNAME = "maorshabakov" # your Instagram username +PASSWORD = "PeyxCU%MD*Zq9p" # your Instagram password +TARGET_USER = "cata.leyah" # the username of the profile to scrape +DOWNLOAD_DIR = "downloads" # directory to save media +SCROLL_PAUSE_TIME = 2 # seconds to wait after each scroll + +# --- Helper functions --- +def login_instagram(driver, username, password): + driver.get("https://www.instagram.com/accounts/login/") + time.sleep(3) # wait for the login page to load + + # Accept cookies if prompted (may need to adjust for your region) + try: + accept_button = driver.find_element(By.XPATH, "//button[text()='Allow all cookies']") + accept_button.click() + time.sleep(2) + except Exception: + pass + + # check if already logged in by checking if the current url has been redirected to the home page + if driver.current_url == "https://www.instagram.com/": + print("Already logged in.") + return + + # Enter username and password + username_input = driver.find_element(By.NAME, "username") + password_input = driver.find_element(By.NAME, "password") + username_input.send_keys(username) + password_input.send_keys(password) + password_input.send_keys(Keys.RETURN) + time.sleep(5) # wait for login to complete + +def scroll_to_load_posts(driver, post_count=12): + post_links = dict() + + last_height = driver.execute_script("return document.body.scrollHeight") + while True: + driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") + time.sleep(SCROLL_PAUSE_TIME) + new_height = driver.execute_script("return document.body.scrollHeight") + + new_posts = get_post_links(driver) + for link in new_posts: + if link not in post_links: + post_links[link] = True + + if len(post_links) >= post_count: + break + + if new_height == last_height: + break + last_height = new_height + +def get_post_links(driver): + # Find all post links on the profile page. + # Instagram posts are links with hrefs that contain '/p/' + post_elements = driver.find_elements(By.XPATH, "//a[contains(@href, '/p/')]") + links = [elem.get_attribute("href") for elem in post_elements] + # Remove duplicates + return list(set(links)) + +def download_media(url, download_folder, filename): + response = requests.get(url, stream=True) + if response.status_code == 200: + filepath = os.path.join(download_folder, filename) + with open(filepath, 'wb') as f: + for chunk in response.iter_content(1024): + f.write(chunk) + print(f"Downloaded: {filename}") + else: + print(f"Failed to download: {url}") + +def extract_media_url(driver): + # Try to get video first + try: + video = driver.find_element(By.TAG_NAME, "video") + media_url = video.get_attribute("src") + if media_url: + return media_url, "mp4" + except Exception: + pass + + # Fallback to image extraction + try: + # Sometimes the post image is inside a div with role="button" + image = driver.find_element(By.XPATH, "//img[contains(@src, 'scontent')]") + media_url = image.get_attribute("src") + if media_url: + return media_url, "jpg" + except Exception: + pass + + return None, None + +# --- Main script --- +def main(): + os.makedirs(DOWNLOAD_DIR, exist_ok=True) + + chrome_options = Options() + chrome_options.add_argument("--user-data-dir=.profiles/thenigga") + driver = webdriver.Chrome(options=chrome_options) + driver.maximize_window() + + try: + # Log in to Instagram + login_instagram(driver, USERNAME, PASSWORD) + + # Navigate to the target user's profile + driver.get(f"https://www.instagram.com/{TARGET_USER}/") + time.sleep(5) # let the page load + + # Scroll down to load all posts + scroll_to_load_posts(driver) + + # Gather all post links from the profile page + post_links = get_post_links(driver) + print(f"Found {len(post_links)} posts.") + + # Process each post + for idx, post_link in enumerate(post_links): + driver.get(post_link) + time.sleep(3) # wait for post to load + + # click download button where div class post-download-all-button + download_button = driver.find_element(By.XPATH, "//div[@class='post-download-all-button']") + driver.execute_script("arguments[0].click();", download_button) + + time.sleep(1) + + finally: + driver.quit() + +if __name__ == "__main__": + main() \ No newline at end of file