main
oscar 9 months ago
parent 445b0ad9f0
commit e6ad418ecd

BIN
.DS_Store vendored

Binary file not shown.

@ -1,5 +1,16 @@
from BunnyCDN.Storage import Storage
import mysql.connector
import os
MEDIA_DIRECTORY = "media"
SNAPCHAT_DIRECTORY = "snapchat"
INSTAGRAM_DIRECTORY = "instagram"
@property
def get_instagram_directory():
return os.path.join(MEDIA_DIRECTORY, INSTAGRAM_DIRECTORY)
@property
def snapchat_output_dir():
return os.path.join(MEDIA_DIRECTORY, SNAPCHAT_DIRECTORY)
username = "doadmin"
password = "AVNS_2qeFJuiGRpBQXkJjlA6"
@ -9,10 +20,16 @@ database = "storysave"
sslmode = "REQUIRED"
def gen_connection():
import mysql.connector
print("Connecting to database")
newDB = mysql.connector.connect(host=host, user=username, password=password, database=database, port=port)
print("Connected to database")
return newDB, newDB.cursor()
def get_storage():
from BunnyCDN.Storage import Storage
return Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
def get_custom_storage():
from bunny import Storage
return Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')

@ -2,6 +2,7 @@ from funcs import get_files
from PIL import Image
import imagehash
import cv2
import os
def is_static_video_phash_optimized(video_path, frame_sample_rate=30, hash_size=16, hamming_threshold=1):
"""
@ -32,7 +33,6 @@ def is_static_video_phash_optimized(video_path, frame_sample_rate=30, hash_size=
pil_image = Image.fromarray(frame_rgb)
previous_hash = imagehash.phash(pil_image, hash_size=hash_size)
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
is_static = True
current_frame_number = 1
@ -66,11 +66,34 @@ def is_static_video_phash_optimized(video_path, frame_sample_rate=30, hash_size=
return is_static
directory = 'videos'
directory = input("Enter the directory path: ")
files = get_files(directory)
files = [file for file in get_files(directory) if file.endswith('.mp4')]
total_space_saved = 0
for video_file in files:
if video_file.endswith('.mp4'):
if is_static_video_phash_optimized(video_file):
print("The video is static: " + video_file)
if not is_static_video_phash_optimized(video_file):
continue
screenshot_path = os.path.join('.temp', os.path.basename(video_file) + '.jpg')
if not os.path.exists(screenshot_path):
cap = cv2.VideoCapture(video_file)
ret, frame = cap.read()
cap.release()
if ret:
cv2.imwrite(screenshot_path, frame)
screenshot_size = os.path.getsize(screenshot_path)
video_size = os.path.getsize(video_file)
if screenshot_size < video_size:
screenshot_size_in_mb = screenshot_size / (1024 * 1024)
video_size_in_mb = video_size / (1024 * 1024)
total_space_saved += video_size - screenshot_size
print(f"Screenshot size: {screenshot_size_in_mb:.2f} MB, Video size: {video_size_in_mb:.2f} MB")
else:
os.remove(screenshot_path)
print(f"Total space saved: {total_space_saved / (1024 * 1024):.2f} MB")

@ -10,6 +10,21 @@ from moviepy.editor import VideoFileClip
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36"}
proxies={"http": "http://yehyuxsl-rotate:4tl5bvrwkz5e@p.webshare.io:80/","https": "http://yehyuxsl-rotate:4tl5bvrwkz5e@p.webshare.io:80/"}
def get_file_extension(url):
response = requests.head(url)
if response.status_code != 200:
print(f"Failed to access media {url}")
return None
content_type = response.headers.get('Content-Type', '')
if 'image' in content_type:
return '.jpg'
elif 'video' in content_type:
return '.mp4'
else:
print(f"Unknown content type for media {url}")
return None
def generate_phash(image_path):
try:
image = Image.open(image_path)
@ -18,10 +33,10 @@ def generate_phash(image_path):
print(f"Error generating phash for {image_path}: {e}")
return False
def cleanEmptyFolders(path):
def clean_empty_folders(path):
for root, dirs, fs in os.walk(path):
for d in dirs:
cleanEmptyFolders(os.path.join(root, d))
clean_empty_folders(os.path.join(root, d))
if not os.listdir(root):
os.rmdir(root)
@ -29,6 +44,8 @@ def get_files(directory):
files = []
for root, dirs, filenames in os.walk(directory):
for filename in filenames:
if filename.startswith('.'):
continue
files.append(os.path.join(root, filename))
return files
@ -87,68 +104,35 @@ def compare_images(image_path1, image_path2):
else:
return False
def remove_empty_folders(dir_path):
import shutil
def is_folder_empty(folder_path):
return len(os.listdir(folder_path)) == 0
num_folder = 0
for root, dirs, files in os.walk(dir_path, topdown=False):
for dir_name in dirs:
dir_path = os.path.join(root, dir_name)
if not os.path.isdir(dir_path):
continue
if '$' in dir_name or '$' in dir_path:
print(f"Skipping system folder: {dir_path}")
continue
if 'system volume information' in dir_name.lower() or 'system volume information' in dir_path.lower():
print(f"Skipping system folder: {dir_path}")
continue
if is_folder_empty(dir_path) or dir_name.lower() == '__pycache__':
shutil.rmtree(dir_path)
print(f"Moved empty folder: {dir_path}")
num_folder+=1
def download_file(url, filePath):
try:
response = requests.get(url, stream=True, headers=headers)
response.raise_for_status()
if os.path.exists(filePath):
return filePath
directory = os.path.dirname(filePath)
if not url:
print(f"Invalid URL: {url}")
return False
response = requests.get(url, stream=True, headers=headers)
if not os.path.exists(directory):
os.makedirs(directory)
if response.status_code != 200:
print(f"Failed to download {url}. Status code: {response.status_code}")
return False
os.makedirs(os.path.dirname(filePath), exist_ok=True)
with open(filePath, "wb") as out_file:
for chunk in response.iter_content(chunk_size=8192):
out_file.write(chunk)
print(f"Downloaded {filePath}")
return True
return filePath
except Exception as e:
print(f"Failed to download {url}. Error: {e}")
return False
def determine_post_type(filepath):
width, height = get_media_dimensions(filepath)
if 0 in (width, height):
return False
aspect_ratio = width / height
if aspect_ratio > 0.5 and aspect_ratio < 0.6:
return 'stories'
else:
return 'posts'
def get_media_type(filename):
image_extensions = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".tiff", ".tif", ".svg", ".eps", ".raw", ".cr2", ".nef", ".orf", ".sr2", ".heic", ".indd", ".ai", ".psd", ".svg"}
video_extensions = {".mp4", ".mov"}
video_extensions = {".mp4", ".mov", ".avi", ".mkv", ".wmv", ".flv", ".webm", ".vob", ".ogg", ".ts", ".flv"}
filetype_dict = {"image": image_extensions, "video": video_extensions}
extension = os.path.splitext(filename.lower())[1] # Get the extension and convert to lower case
@ -163,9 +147,7 @@ def get_video_duration(file_path):
print(f"File not found: {file_path}")
return 0
video_types = {".mp4", ".mov", ".mkv"}
extension = os.path.splitext(file_path.lower())[1]
if extension not in video_types:
if not get_media_type(file_path) == 'video':
return 0
try:
@ -178,6 +160,12 @@ def get_video_duration(file_path):
print(f"Error getting duration for {file_path}: {e}")
return 0
def get_media_dimensions(media_path):
if get_media_type(media_path) == 'video':
return get_video_dimensions(media_path)
else:
return get_image_dimensions(media_path)
def get_video_dimensions(video_path):
cap = cv2.VideoCapture(video_path)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
@ -185,13 +173,13 @@ def get_video_dimensions(video_path):
cap.release()
return width, height
def get_media_dimensions(media_path):
if get_media_type(media_path) == 'video':
return get_video_dimensions(media_path)
else:
with Image.open(media_path) as img:
def get_image_dimensions(image_path):
try:
with Image.open(image_path) as img:
return img.size
except:
return 0, 0
def get_video_data(video_path):
data = {'duration': 0, 'width': 0, 'height': 0}
try:

@ -1,5 +1,4 @@
from concurrent.futures import ThreadPoolExecutor
from BunnyCDN.Storage import Storage
import config, os
def DownloadFile(serverPath, cacheDir):
@ -9,8 +8,8 @@ def DownloadFile(serverPath, cacheDir):
print(f"File already exists: {localFilePath}")
return localFilePath
print(f"Downloading {serverPath} to {localFilePath}")
obj_storage.DownloadFile(storage_path=serverPath, download_path=cacheDir)
print(f"Downloaded {serverPath} to {localFilePath}")
return localFilePath
def ImportMedias(results):
@ -20,14 +19,14 @@ def ImportMedias(results):
executor.submit(DownloadFile, serverPath, cacheDir)
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
obj_storage = config.get_storage()
db, cursor = config.gen_connection()
cursor.execute("SELECT id, media_id, media_url FROM media WHERE file_size = 0;")
cursor.execute("SELECT id, media_id, media_url FROM media WHERE file_size = 0 ORDER BY id DESC;")
results = cursor.fetchall()
cacheDir = 'cache'
os.makedirs(cacheDir, exist_ok=True)
print(f"Found {len(results)} files to process.")

@ -16,6 +16,6 @@ undetected_chromedriver
python-telegram-bot
tqdm
webdriver-manager
moviepy
moviepy==1.0.3
instagrapi
ImageHash

@ -5,6 +5,16 @@ import json
headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36"}
snap_types = {
27 : ['spotlight', 'video'],
256 : ['thumbnail', 'image'],
400 : ['idk', 'image'],
1023 : ['idk', 'image'],
1034 : ['downscaled_video', 'video'],
1322 : ['idk', 'video'],
1325 : ['idk', 'video'],
}
def get_data(username):
url = f"https://www.snapchat.com/add/{username}"
response = requests.get(url, headers=headers)
@ -16,6 +26,24 @@ def get_data(username):
data = json.loads(data_script.string)
return data
def get_social_medias(data):
website_url = None
try:
website_url = data['props']['pageProps']['userProfile']['publicProfileInfo']['websiteUrl']
except KeyError:
pass
return website_url
def get_related_profiles(data):
related_profiles = []
try:
related_profiles_data = data['props']['pageProps']['userProfile']['relatedProfiles']
for profile in related_profiles_data:
related_profiles.append(profile['username'])
except KeyError:
pass
return related_profiles
def get_all_users_data(usernames):
all_data = {}
@ -48,27 +76,38 @@ def parse_stories(stories):
return parsed_stories
def get_stories(data):
"""Extract story list from the JSON data."""
try:
stories = data['props']['pageProps']['story']['snapList']
return parse_stories(stories)
except KeyError:
if not type(stories) == list:
return []
stories.sort(key=lambda x: x.get('snapIndex'), reverse=True)
return stories
except:
return []
def get_highlights(data):
"""Extract highlights from possible highlight keys in JSON data."""
highlights = []
page_props = data.get('props', {}).get('pageProps', {})
# Possible keys that might contain highlights
possible_highlight_keys = ['curatedHighlights', 'savedHighlights', 'highlights']
for key in possible_highlight_keys:
highlight_data = page_props.get(key, [])
if highlight_data:
highlights.extend(highlight_data)
return highlights
def parse_story(story):
original_snap_id = story.get('snapId', {}).get('value', '')
snap_url = story.get('snapUrls', {}).get('mediaUrl', '')
timestamp = story.get('timestampInSec', {}).get('value', '')
media_type = story.get('snapMediaType')
media_type = 'image' if media_type == 0 else 'video'
return {
"original_snap_id": original_snap_id,
@ -77,10 +116,12 @@ def parse_story(story):
"timestamp": timestamp,
"platform": "snapchat",
"type": "story",
"username": story.get('username', ''),
"media_type": media_type,
}
def get_snap_id(url):
return url.split('/')[-1].split('.')[0]
return url.split('?')[0].split('/')[-1].split('.')[0]
def get_highlight_stories(data):
stories = []
@ -93,4 +134,20 @@ def get_highlight_stories(data):
story = parse_story(snap)
stories.append(story)
return stories
return stories
def get_spotlight_metadata(data):
"""Extract spotlight metadata from JSON data."""
try:
return data['props']['pageProps']['spotlightStoryMetadata']
except KeyError:
return []
def get_username(data):
"""Extract username from JSON data."""
try:
return data['props']['pageProps']['userProfile']['publicProfileInfo']['username']
except KeyError:
return None

@ -1,103 +1,49 @@
from snapchat import get_stories, get_highlight_stories, get_all_users_data
from snapchat import get_stories, get_highlight_stories, get_all_users_data, parse_stories
from datetime import datetime
from uuid import uuid4
import requests
import config
import funcs
import json
import cv2
import os
import json
directory = "snapchat"
data_directory = "data"
UPLOAD_MODE = True
def find_duplicate_snap(existing_snaps, snap_id, username):
"""
Find a snap in the existing_snaps list on database.s
"""
for snap in existing_snaps:
if username == snap[2]:
if snap_id in snap[1]:
return snap
return False
def archive_data(data, username):
data_filename = f"{username}~{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.json"
data_filepath = os.path.join(data_directory, data_filename)
with open(data_filepath, 'w') as f:
f.write(json.dumps(data))
def get_file_extension(url):
response = requests.head(url)
if response.status_code != 200:
print(f"Failed to access media {url}")
return None
content_type = response.headers.get('Content-Type', '')
if 'image' in content_type:
return '.jpg'
elif 'video' in content_type:
return '.mp4'
else:
print(f"Unknown content type for media {url}")
return None
def extract_file_type(url):
file_types = {
'400': '.jpg',
'1322': '.mp4',
'1325': '.mp4',
'1034': '.mp4',
'1023': '.jpg'
}
media_directory = "media"
snapchat_directory = "snapchat"
temp_directory = ".temp"
data_directory = "data"
base_url = url.split("?")[0] # Remove query string
directory = os.path.join(media_directory, snapchat_directory)
snap_data = base_url.split('/')[-1]
os.makedirs(media_directory, exist_ok=True)
os.makedirs(directory, exist_ok=True)
os.makedirs(temp_directory, exist_ok=True)
os.makedirs(data_directory, exist_ok=True)
# Extract the file type number
data_parts = snap_data.split('.')
if len(data_parts) > 1:
file_type_number = data_parts[1]
if file_type_number in file_types:
return file_types[file_type_number]
else:
print(f"Unexpected URL format: {base_url}")
return None
def find_duplicate_snap(existing_snap_ids, snap_id):
return snap_id in existing_snap_ids
def download_media(url, filepath):
if os.path.exists(filepath):
# File already exists, skip download and return the filepath as if it was downloaded.
return filepath
response = requests.get(url)
if response.status_code != 200:
print(f"Failed to download media {url}")
return None
with open(filepath, 'wb') as f:
f.write(response.content)
return filepath
def get_snapchat_stories():
os.makedirs(directory, exist_ok=True)
os.makedirs(data_directory, exist_ok=True)
cursor.execute("SELECT username FROM following WHERE platform = 'snapchat' ORDER BY id DESC")
usernames = [row[0] for row in cursor.fetchall()]
cursor.execute("SELECT id, filename, username FROM media WHERE filename IS NOT NULL AND platform = 'snapchat' ORDER BY id DESC")
existing_medias = cursor.fetchall()
def archive_data(data, username):
try:
current_timestamp = int(datetime.now().timestamp())
data_filename = f"{username}~{current_timestamp}.json"
data_filepath = os.path.join(data_directory, data_filename)
with open(data_filepath, 'w') as f:
f.write(json.dumps(data, indent=4))
except:
print(f"Failed to archive data for {username}.")
return False
def get_snapchat_stories(usernames):
snapchat_users_data = get_all_users_data(usernames)
snapchat_users_data = dict(sorted(snapchat_users_data.items()))
ready_stories = []
for username in usernames:
for username, data in snapchat_users_data.items():
print(f"Getting stories for {username}...")
data = snapchat_users_data.get(username)
if not data:
print(f"Failed to get data for {username}. Skipping.")
continue
@ -105,6 +51,7 @@ def get_snapchat_stories():
archive_data(data, username)
stories = get_stories(data)
stories = parse_stories(stories)
stories.extend(get_highlight_stories(data))
@ -112,14 +59,10 @@ def get_snapchat_stories():
snap_id = story['snap_id']
url = story['url']
timestamp = story['timestamp']
duplicate_snap = find_duplicate_snap(existing_medias, snap_id, username)
if duplicate_snap:
# Snap already exists in the database
continue
# Determine file extension using HEAD request.
extension = extract_file_type(url)
# Determine file extension
file_exts = {'image': '.jpg', 'video': '.mp4'}
extension = file_exts.get(story['media_type'])
if not extension:
print(f"Failed to determine file extension for {url}. Skipping.")
continue
@ -127,19 +70,15 @@ def get_snapchat_stories():
filename = f"{username}~{timestamp}~{snap_id}{extension}"
filepath = os.path.join(directory, filename)
media = {
'username': username,
'timestamp': timestamp,
'filepath': filepath,
'snap_id': snap_id,
'original_snap_id': story['original_snap_id'],
'media_url': url,
}
story['media_url'] = url
story['snap_id'] = snap_id
story['filepath'] = filepath
story['username'] = username
story['timestamp'] = timestamp
story['original_snap_id'] = story['original_snap_id']
ready_stories.append(media)
print(f"Media {snap_id} ready for download.")
ready_stories.append(story)
# sort ready_stories by timestamp from oldest to newest
ready_stories.sort(key=lambda x: x['timestamp'])
return ready_stories
@ -151,45 +90,73 @@ def get_snapchat_files():
return stories
def main():
ready_stories = get_snapchat_stories()
print('Initializing snappy...')
ready_stories = []
stories_from_files = get_snapchat_files()
ready_stories.extend(stories_from_files)
cursor.execute("SELECT username FROM following WHERE platform = 'snapchat' ORDER BY id DESC")
usernames = [row[0] for row in cursor.fetchall()]
download_stories(ready_stories)
print(f"Getting stories for {len(usernames)} users...")
new_stories = get_snapchat_stories(usernames)
cleaned_stories = []
print("Checking for duplicates...")
for story in new_stories:
duplicate_snap = find_duplicate_snap(existing_snap_ids, story['snap_id'])
if duplicate_snap:
print(f"Snap {story['filepath']} already exists in the database. Removing...")
continue
cleaned_stories.append(story)
cleaned_stories = download_stories(cleaned_stories)
ready_stories.extend(cleaned_stories)
ready_stories.extend(stories_from_files)
for story in ready_stories:
UploadMedia(story)
def download_stories(stories):
downloaded_stories = []
for story in stories:
# Download the media
filepath = story['filepath']
url = story['media_url']
filename = os.path.basename(filepath)
timestamp = story['timestamp']
filepath = download_media(url, filepath)
print(f"Downloaded {filename} at {timestamp}")
filepath = funcs.download_file(url, filepath)
print(f"Downloaded {os.path.basename(filepath)}")
if not filepath:
continue
story['filepath'] = filepath
UploadMedia(story)
story['hash'] = funcs.calculate_file_hash(filepath)
story['size'] = os.path.getsize(filepath)
downloaded_stories.append(story)
return downloaded_stories
def UploadMedia(media):
username = media['username']
timestamp = media['timestamp']
file_size = media['size']
file_hash = media['hash']
filepath = media['filepath']
filename = os.path.basename(filepath)
username = media['username']
timestamp = media['timestamp']
media_type = media['media_type']
snap_id = media['snap_id']
original_snap_id = media['original_snap_id']
thumbnail_url = None
phash = None
media_type = funcs.get_media_type(filename)
file_hash = funcs.calculate_file_hash(filepath)
duplicate_snap = find_duplicate_snap(existing_snap_ids, media['snap_id'])
if duplicate_snap:
print(f"Snap {filename} already exists in the database. Removing...")
os.remove(filepath)
return False
post_date = datetime.fromtimestamp(int(timestamp))
width, height = funcs.get_media_dimensions(filepath)
@ -214,8 +181,8 @@ def UploadMedia(media):
obj_storage.PutFile(filepath, server_path)
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, post_type, date, hash, filename, duration, thumbnail, phash, platform, snap_id, original_snap_id) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
values = (username, media_type, file_url, width, height, 'story', post_date, file_hash, filename, duration, thumbnail_url, phash, 'snapchat', snap_id, original_snap_id)
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, post_type, date, hash, filename, duration, thumbnail, phash, platform, snap_id, original_snap_id, file_size) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
values = (username, media_type, file_url, width, height, 'story', post_date, file_hash, filename, duration, thumbnail_url, phash, 'snapchat', snap_id, original_snap_id, file_size)
cursor.execute(query, values)
db.commit()
@ -226,7 +193,7 @@ def UploadMedia(media):
return True
def generate_thumbnail(filepath):
thumb_path = f'temp/{uuid4()}.jpg'
thumb_path = os.path.join(temp_directory, f'{uuid4()}.jpg')
cap = cv2.VideoCapture(filepath)
ret, frame = cap.read()
cv2.imwrite(thumb_path, frame)
@ -234,36 +201,42 @@ def generate_thumbnail(filepath):
return thumb_path
def get_media_data(filepath):
filename = os.path.basename(filepath)
parts = filename.split('~')
if len(parts) < 3:
return False
username = parts[0]
timestamp = parts[1]
snap_id = parts[2]
snap_id = os.path.splitext(snap_id)[0]
data = {'username': username, 'timestamp': timestamp, 'filepath': filepath, 'snap_id': snap_id, 'original_snap_id': None, 'media_url': None}
# data = {'username': username, 'timestamp': timestamp, 'filepath': filepath, 'snap_id': None, 'original_snap_id': snap_id, 'media_url': None}
return data
def process_snap_ids(filenames):
snap_ids = []
for filename in filenames:
snap_id = filename.split('~')[2]
snap_id = os.path.splitext(snap_id)[0]
if snap_id not in snap_ids:
snap_ids.append(snap_id)
return snap_ids
filename = os.path.basename(filepath)
parts = filename.split('~')
if len(parts) < 3:
return False
username = parts[0]
timestamp = parts[1]
snap_id = parts[2]
snap_id = os.path.splitext(snap_id)[0]
file_size = os.path.getsize(filepath)
file_hash = funcs.calculate_file_hash(filepath)
data = {
"username": username,
"timestamp": timestamp,
"filepath": filepath,
"snap_id": snap_id,
"original_snap_id": None,
"media_url": None,
"size": file_size,
"hash": file_hash
}
return data
if __name__ == '__main__':
print('Starting snappy...')
db, cursor = config.gen_connection()
obj_storage = config.get_storage()
cursor.execute("SELECT snap_id FROM media WHERE filename IS NOT NULL AND platform = 'snapchat' ORDER BY id DESC")
existing_snap_ids = cursor.fetchall()
existing_snap_ids = {row[0] for row in existing_snap_ids}
main()

@ -2,32 +2,21 @@ from bs4 import BeautifulSoup
import requests
import json
def findPost(filePath = 'test.json'):
params = {'av': '17841401225494803','__a': '1','__req': '1','__hs': '19906.HYP:instagram_web_pkg.2.1..0.1','dpr': '1','__ccg': 'UNKNOWN','__rev': '1014609539','__s': 'guk60j:651i2v:pmhu0r','__hsi': '7386834689999716220','__dyn': '7xe5WwlEnwn8K2Wmm1twpUnwgU7S6EdF8aUco38w5ux609vCwjE1xoswaq0yE6u0nS4oaEd86a3a1YwBgao1aU2swbOU2zxe2GewGw9a362W2K0zEnwhEe82mwww4cwJCwLyES1TwTwFwIwbS1LwTwKG1pg2Xwr86C1mwrd6goK3ibxKi2K7ErwYCz8rwHw','__csr': 'igAzIj5OgR5YBHdRtivbkyFv-zJIZE_ykzfahdAydeHCHAAAqyk4pqBgDzeV4-qlbBF29UlCxFpVokDwAyosyV9KWUmx6iu58WqdwSDCDAFwHxi3C00lWy2FG4k583NxW8yFE0bUyxd06lxO5C2a8yFm2u290ejg1JU2Gw2rQ061U','__comet_req': '7','fb_dtsg': 'NAcPDfX2XufdLkctek6zNxz3DWxPW4t-cJzz39QtOQ5KS-_Rq3erT4A:17843708194158284:1719013044','jazoest': '26262','lsd': 'D0zmaX16yIQu_GwDXKTbMc','__spin_r': '1014609539','__spin_b': 'trunk','__spin_t': '1719881474','__jssesw': '1','fb_api_caller_class': 'RelayModern','fb_api_req_friendly_name': 'PolarisProfilePageContentDirectQuery', 'variables': '{"id":"57771591453","render_surface":"PROFILE"}','server_timestamps': 'true','doc_id': '7663723823674585'}
doc_ids = [7663723823674585, 9539110062771438]
def get_posts():
data = {
"variables": '{"id":"57771591453","render_surface":"PROFILE"}',
"doc_id": "7663723823674585",
}
data = requests.get('https://www.instagram.com/graphql/query')
data = requests.get('https://www.instagram.com/graphql/query', params=data).json()
posts = data['data']['xdt_api__v1__feed__user_timeline_graphql_connection']['edges']
posts = data['data']
posts = [post['node'] for post in posts]
return max(posts, key=lambda post: max(c['width'] * c['height'] for c in post['image_versions2']['candidates']))
def getHDProfilePicture():
url = 'https://www.save-free.com/process'
zoom_data = {'instagram_url': 'natahalieeee','type': 'profile','resource': 'zoom'}
data = {'instagram_url': 'natahalieeee','type': 'profile','resource': 'save'}
headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36','Referer' : 'https://www.save-free.com/profile-downloader/',}
response = requests.post(url, data=data, headers=headers)
response = requests.post(url, data=zoom_data, headers=headers)
with open('image.jpg', 'wb') as f:
f.write(response.content)
def get_username_by_user_id(user_id):
url = 'https://www.instagram.com/graphql/query/'
@ -131,31 +120,10 @@ def get_user_id(username):
def get_profile_data(username):
url = 'https://www.instagram.com/graphql/query'
user_id = get_user_id(username)
data = {
'av': '17841401225494803',
'__d': 'www',
'__user': 0,
'__a': 1,
'__req': 2,
'__hs': '20047.HYP:instagram_web_pkg.2.1..0.1',
'dpr': 1,
'__ccg': 'EXCELLENT',
'__rev': 1018347086,
'__s': '8di41h:vwko3r:whjifd',
'__hsi': 7439320945163371549,
'__dyn': '7xe5WwlEnwn8K2Wmm1twpUnwgU7S6EdF8aUco38w5ux60p-0LVE4W0qa0FE2awgo1EUhwnU6a3a0EA2C0iK0D830wae4UaEW2G0AEco5G0zE5W0Y81eEdEGdwtU662O0Lo6-3u2WE15E6O1FwlE6PhA6bwg8rAwHxW1oCz8rwHwcOEym5oqw',
'__csr': 'hA5I8EAy7hnfqiIBklLZHVkmTHQmVmAh5UCchA9GQByu_yfD-nUBaVaDmSbDyUydCDgzyQAcggDK48Sm2ai8y8lxe6UTgmjwCyUC8yFXK9zooxmez9FUW684qu4awQwF9w04XAg0wi0nB03981oU082Oa0fMe3e19g512AK6Ulo5C3lw7Uy8G6Efo9k08mgiaaw25VobU2bw3KU023zw6Pw',
'__comet_req': 7,
'fb_dtsg': 'NAcO7gvrsNlfWXA8giwQC4bVYRXXAGomAqcIRYUJUE2Hk8HmABf56Yg:17854575481098892:1732030177',
'jazoest': 26190,
'lsd': 'zcsn3c8we8kpMB_AVukeii',
'__spin_r': 1018347086,
'__spin_b': 'trunk',
'__spin_t': 1732101883,
'fb_api_caller_class': 'RelayModern',
'fb_api_req_friendly_name': 'PolarisProfilePageContentQuery',
'variables': '{"id":"6687693830","render_surface":"PROFILE"}',
'server_timestamps': 'true',
'variables': '{"id":"' + user_id + '","render_surface":"PROFILE"}',
'doc_id': 9539110062771438
}
@ -163,21 +131,39 @@ def get_profile_data(username):
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
}
response = requests.post(url, headers=headers, data=data)
json_data = response.json()
return json_data
def get_hd_profile_picture(username = None, user_id = None):
api_url = 'https://www.instagram.com/graphql/query'
username_check = 'tal_ohana'
if not username and not user_id:
return None
if not user_id:
user_id = get_user_id(username)
if not user_id:
return None
data = {
'variables': '{"id":"' + user_id +' ","render_surface":"PROFILE"}',
'doc_id': 9539110062771438
}
user_id = get_user_id(username_check)
try:
response = requests.post(api_url, data=data)
username = get_username_by_user_id(user_id)
json_data = response.json()
if 'message' in json_data:
if json_data['message'] == 'Please wait a few minutes before you try again.':
print('Rate limited. Please try again later.')
return None
if username:
print(f"Username: {username}")
else:
print("Could not retrieve username.")
hd_profile_pic = json_data['data']['user']['hd_profile_pic_url_info']['url']
except:
hd_profile_pic = None
return hd_profile_pic

@ -4,22 +4,35 @@ import funcs
import config
import cv2
import os
import re
directory = 'storysaver'
temp_directory = ".temp"
directory = 'media/instagram/'
media_types = {
'stories' : 'story',
'posts' : 'post',
'profile' : 'profile'
}
os.makedirs(temp_directory, exist_ok=True)
def UploadMedia(media):
platform = 'Instagram'
media_id = media['media_id']
username = media['username']
timestamp = media['timestamp']
user_id = media['user_id']
filepath = media['filepath']
platform = media['platform']
media_id = media['media_id']
timestamp = media['timestamp']
highlight_id = media['highlight_id']
post_type = media['post_type']
file_size = os.path.getsize(filepath)
thumbnail_url = None
phash = None
if media_id and int(media_id) in existing_files:
if media_id and media_id in existing_files:
print('Duplicate file detected. Removing...')
os.remove(filepath)
return True
@ -32,16 +45,14 @@ def UploadMedia(media):
print(f'Error determining media type for {filename}. Skipping...')
return False
post_type = funcs.determine_post_type(filepath)
if not post_type:
print(f'Error determining post type for {filename}. Skipping...')
return False
file_hash = funcs.calculate_file_hash(filepath)
post_date = datetime.fromtimestamp(int(timestamp)) if timestamp else datetime.now()
width, height = funcs.get_media_dimensions(filepath)
if 0 in (width, height):
print(f'Error getting dimensions for {filename}. Skipping...')
return False
duration = funcs.get_video_duration(filepath)
@ -52,38 +63,42 @@ def UploadMedia(media):
thumb_path = generate_thumbnail(filepath)
obj_storage.PutFile(thumb_path, f'thumbnails/{file_hash}.jpg') # this might be a problem in case of duplicate hashes
thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{file_hash}.jpg"
phash = funcs.generate_phash(thumb_path)
phash = funcs.generate_phash(thumb_path)
os.remove(thumb_path)
except Exception as e:
print(f'Error generating thumbnail: {e}. Skipping...')
return False
newFilename = f'{media_id}{file_extension}'
server_path = f'media/{post_type}/{username}/{newFilename}'
if media_id:
newFilename = f'{media_id}{file_extension}'
else:
newFilename = f'{file_hash}{file_extension}'
server_path = f'media/{post_type}/{username}/{newFilename}'
file_url = f"https://storysave.b-cdn.net/{server_path}"
obj_storage.PutFile(filepath, server_path) # slow as fuck
obj_storage.PutFile(filepath, server_path)
if highlight_id:
newCursor.execute("INSERT IGNORE INTO highlights (highlight_id, user_id, media_id) VALUES (%s, %s, %s)", (highlight_id, user_id, media_id))
newDB.commit()
print(f'[{newCursor.rowcount}] added highlight {highlight_id} to user {user_id}')
post_type = 'story' if post_type == 'stories' else 'post'
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, media_id, post_type, date, user_id, hash, filename, duration, thumbnail, phash, platform, file_size) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
values = (username, media_type, file_url, width, height, media_id, post_type, post_date, user_id, file_hash, filename, duration, thumbnail_url, phash, platform, file_size)
newCursor.execute(query, values) # slower
newCursor.execute(query, values)
newDB.commit()
print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}')
print(f'[{newCursor.rowcount}] records updated.\nFile: {filename}\nURL: {file_url}')
print("="*100)
os.remove(filepath)
return True
def generate_thumbnail(filepath):
thumb_path = f'temp/{uuid4()}.jpg'
def generate_thumbnail(filepath):
thumb_path = os.path.join(temp_directory, f'{uuid4()}.jpg')
cap = cv2.VideoCapture(filepath)
ret, frame = cap.read()
cv2.imwrite(thumb_path, frame)
@ -94,7 +109,7 @@ def get_user_id(username):
username = username.lower()
if username in existing_users:
return existing_users[username]
return None
def get_media_data(filepath):
@ -107,36 +122,97 @@ def get_media_data(filepath):
timestamp = parts[1]
media_id = parts[2]
user_id = parts[3].split('_')[-1].split('.')[0]
platform = 'instagram'
highlight_id = user_id.replace('highlight', '') if 'highlight' in user_id else None
if highlight_id:
if not user_id.isdigit():
user_id = get_user_id(username)
try:
if media_id.isdigit():
media_id = int(media_id)
except:
print(f'Invalid media_id for file {filename}. Skipping...')
else:
media_id = None
data = {'username': username, 'timestamp': timestamp, 'media_id': media_id, 'user_id': user_id, 'filepath': filepath, 'highlight_id': highlight_id}
data = {'username': username, 'timestamp': timestamp, 'media_id': media_id, 'user_id': user_id, 'filepath': filepath, 'highlight_id': highlight_id, 'platform': platform}
return data
def get_media(folder_path):
medias = []
for root, dirs, files in os.walk(folder_path):
for filename in files:
filepath = os.path.join(root, filename)
for media_type, post_type in media_types.items():
folder_path = os.path.join(directory, media_type)
if not os.path.exists(folder_path):
continue
all_files = funcs.get_files(folder_path)
for filepath in all_files:
data = get_media_data(filepath)
if data:
medias.append(data)
if not data:
continue
data['post_type'] = post_type
medias.append(data)
return medias
def get_custom_media():
medias = []
folder_path = 'media/instagram'
platform = 'instagram'
for media_type, post_type in media_types.items():
folder_path = os.path.join(directory, media_type)
user_dirs = [d for d in os.listdir(folder_path) if os.path.isdir(os.path.join(folder_path, d))]
for user_dir in user_dirs:
user_folder_path = os.path.join(folder_path, user_dir)
if not os.path.exists(user_folder_path):
continue
username = user_dir
files = os.listdir(user_folder_path)
for filename in files:
filepath = os.path.join(user_folder_path, filename)
if filename.startswith('.'):
continue
user_id = get_user_id(username)
timestamp = int(os.path.getctime(filepath))
media_id = os.path.splitext(filename)[0]
if media_id.isdigit():
media_id = int(media_id)
if media_id < 10000000:
media_id = None
else:
media_id = None
data = {
"username": username,
"timestamp": timestamp,
"media_id": media_id,
"user_id": user_id,
"filepath": filepath,
"platform": platform,
"highlight_id": None,
"post_type": post_type
}
medias.append(data)
return medias
def dump_instagram(folder_path):
medias = get_media(folder_path)
# medias.extend(get_custom_media())
if cleanup_dupe_stories(medias):
medias = get_media(folder_path)
@ -150,28 +226,25 @@ def cleanup_dupe_stories(medias):
for media in medias:
media_id = media['media_id']
filepath = media['filepath']
if not media_id:
print(f'Invalid media_id for file {filepath}. Skipping...')
continue
if media_id in existing_files:
removed_count += 1
print(f'Found duplicate file {filepath}. Removing...')
os.remove(filepath)
if '(1)' in filepath:
# Check if media_id is in existing_files OR if filepath contains any '(number)'
if media_id in existing_files or re.search(r'\(\d+\)', filepath):
removed_count += 1
print(f'Found duplicate file {filepath}. Removing...')
os.remove(filepath)
continue
print(f'Removed {removed_count} duplicate files.')
return removed_count
if __name__ == '__main__':
print('Starting processing...')
if not os.listdir(directory):
if not funcs.get_files(directory):
print('No files to process. Exiting...')
exit()
@ -179,12 +252,19 @@ if __name__ == '__main__':
obj_storage = config.get_storage()
newCursor.execute("SELECT media_id FROM media WHERE media_id IS NOT NULL")
print('Getting existing files and users...')
newCursor.execute("SELECT media_id FROM media WHERE media_id IS NOT NULL AND platform = 'instagram'")
existing_files = [image[0] for image in newCursor.fetchall()]
newCursor.execute("SELECT DISTINCT username, user_id FROM media WHERE user_id IS NOT NULL")
print('Getting existing users...')
newCursor.execute("SELECT DISTINCT username, user_id FROM media WHERE user_id IS NOT NULL AND platform = 'instagram'")
existing_users = {user[0].lower(): user[1].lower() for user in newCursor.fetchall()}
dump_instagram(directory)
print("Processing completed.")
print("Processing completed.")
newDB.close()
for mediatype, _ in media_types.items():
funcs.clean_empty_folders(os.path.join(directory, mediatype))

@ -52,8 +52,8 @@ def UploadMedia(media):
thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{file_hash}.jpg"
phash = funcs.generate_phash(thumb_path)
os.remove(thumb_path)
except:
print('Error generating thumbnail. Skipping...')
except Exception as e:
print(f'Error generating thumbnail. Skipping... {e}')
return False
newFilename = f'{file_hash}{file_extension}'
@ -76,7 +76,7 @@ def UploadMedia(media):
return True
def generate_thumbnail(filepath):
thumb_path = f'temp/{uuid4()}.jpg'
thumb_path = f'.temp/{uuid4()}.jpg'
cap = cv2.VideoCapture(filepath)
ret, frame = cap.read()
cv2.imwrite(thumb_path, frame)

@ -3,17 +3,89 @@ from watchdog.observers import Observer
import shutil
import time
import os
from funcs import get_media_dimensions
media_dir = 'media'
output_dir = 'instagram'
stories_dir = 'stories'
posts_dir = 'posts'
def is_story(width, height, tolerance=0.02):
"""
Determine if the given width/height are close to 9:16 (0.5625) ratio
within a certain tolerance. Default tolerance is 2%.
Tolerance means how close the ratio must be to 9/16 for it
to be considered a story.
"""
if width == 0 or height == 0:
return False
# Calculate the ratio in portrait orientation (ensure width < height).
# You can also just do width/height, but watch out for landscape images.
# Well assume portrait means stories.
ratio = width / height if width < height else height / width
# The official story ratio is 9/16 = 0.5625
story_ratio = 9/16
# Check how far off we are from the official ratio
difference = abs(ratio - story_ratio)
# If the difference is within the tolerance, we consider it a story
return difference <= (story_ratio * tolerance)
def determine_post_type(filepath):
"""
Determines if a file is for 'posts' or 'stories' based on its aspect ratio.
- If the path includes 'posts' (as you mentioned), we automatically return 'posts'.
- Otherwise, we check if the aspect ratio matches (roughly) the 9:16 ratio.
- If it does, we say 'stories', otherwise 'posts'.
"""
# If "posts" is part of the filepath, consider it a post
if 'posts' in filepath.lower():
return 'posts'
# Get actual dimensions
try:
width, height = get_media_dimensions(filepath)
except:
# If we fail to get dimensions, return None or some fallback
return None
# If dimensions are invalid, return None or False
if width == 0 or height == 0:
return None
# Use our ratio check
if is_story(width, height):
return 'stories'
else:
return 'posts'
class DownloadHandler(FileSystemEventHandler):
def process_file(self, file_path):
file = os.path.basename(file_path)
if 'crdownload' not in file and file.count('~') == 3:
print(f'Moving {file}...')
outputPath = os.path.join('storysaver', file)
try:
shutil.move(file_path, outputPath)
except Exception as e:
print(f'Failed to move file: {e}')
if 'crdownload' in file:
return
if file.count('~') != 3:
return
if not os.path.exists(file_path):
return
print(f'Moving {file}...')
post_type = determine_post_type(file_path)
if post_type == 'posts':
media_type_dir = posts_dir
elif post_type == 'stories':
media_type_dir = stories_dir
outputPath = os.path.join(media_dir, output_dir, media_type_dir, file)
shutil.move(file_path, outputPath)
def on_created(self, event):
if not event.is_directory and 'crdownload' not in event.src_path:

@ -84,19 +84,20 @@ def parse_media_data(media_item):
mediaInfo = {'taken_at': taken_at, 'post_type' : post_type, 'media_type': mediaTypes[media_item.media_type]}
if media_item.media_type not in [1, 2]:
print(f"Unsupported media type with ID {media_item.pk}")
return None
mediaInfo['media_id'] = int(media_item.pk)
if media_item.media_type == 1: # Image
mediaInfo['media_id'] = int(media_item.pk)
mediaInfo['fileURL'] = media_item.thumbnail_url
mediaInfo['filename'] = f"{media_item.pk}.jpg"
mediaInfo['filename'] = f"{media_item.pk}.jpg" # Fix this, get the actual file extension
elif media_item.media_type == 2: # Video
mediaInfo['media_id'] = int(media_item.pk)
mediaInfo['fileURL'] = media_item.video_url
try:mediaInfo['duration'] = media_item.video_duration
try:mediaInfo['duration'] = media_item.video_duration # Fix this, get the actual file extension
except:mediaInfo['duration'] = 0
mediaInfo['filename'] = f"{media_item.pk}.mp4"
else:
print(f"Unsupported media type with ID {media_item.pk}")
return None
return mediaInfo

@ -5,6 +5,7 @@ from uuid import uuid4
from PIL import Image
import config
import funcs
import json
import os
def insert_highlight_items(media_ids, highlight_id, title, user_id):
@ -31,23 +32,28 @@ def upload_to_storage(local_path, server_path):
print(f"Failed to upload {local_path} to {server_path}. Error: {e}")
def login():
def login(force=False):
client = Client()
if os.path.exists("session_data.json"):
client.load_settings("session_data.json")
return client
with open("p.enc", "rb") as encrypted_file:
encrypted_data = encrypted_file.read()
fernet = Fernet(open("key.enc", "r").read())
password = str(fernet.decrypt(encrypted_data), "utf-8")
username = "olivercury"
auth = input("Enter your 2FA code (leave blank if not enabled): ")
client.login(username=username, password=password, verification_code=auth)
client.dump_settings("session_data.json")
try:
if not force:
client.load_settings("session_data.json")
else:
raise FileNotFoundError
except (FileNotFoundError, json.JSONDecodeError):
with open("p.enc", "rb") as encrypted_file:
encrypted_data = encrypted_file.read()
fernet = Fernet(open("key.enc", "r").read())
password = str(fernet.decrypt(encrypted_data), "utf-8")
username = "olivercury"
auth = input("Enter your 2FA code (leave blank if not enabled): ")
if auth:
client.login(username=username, password=password, verification_code=auth)
else:
client.login(username, password)
client.dump_settings("session_data.json")
print("Logged in successfully.")
@ -198,8 +204,11 @@ if __name__ == "__main__":
for mediaInfo in medias:
filePath = os.path.join('media', mediaInfo['post_type'], username, mediaInfo['filename'])
funcs.download_file(mediaInfo['media_url'], filePath)
filePath = funcs.download_file(mediaInfo['media_url'], filePath)
if not filePath:
continue
mediaInfo["hash"] = funcs.calculate_file_hash(filePath)
mediaInfo["username"] = username

Loading…
Cancel
Save