You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
383 lines
15 KiB
Python
383 lines
15 KiB
Python
|
11 months ago
|
import cv2, os, json, config, time, hashlib, requests
|
||
|
|
|
||
|
|
from concurrent.futures import ThreadPoolExecutor
|
||
|
|
from moviepy.editor import VideoFileClip
|
||
|
|
from cryptography.fernet import Fernet
|
||
|
|
from BunnyCDN.Storage import Storage
|
||
|
|
from instagrapi import Client
|
||
|
|
from PIL import Image
|
||
|
|
|
||
|
|
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36"}
|
||
|
|
proxies={
|
||
|
|
"http": "http://yehyuxsl-rotate:4tl5bvrwkz5e@p.webshare.io:80/",
|
||
|
|
"https": "http://yehyuxsl-rotate:4tl5bvrwkz5e@p.webshare.io:80/"
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
def file_hash(filename, hash_algo='sha256'):
|
||
|
|
"""
|
||
|
|
Compute the hash of a file.
|
||
|
|
|
||
|
|
:param filename: Path to the file.
|
||
|
|
:param hash_algo: Hashing algorithm to use (e.g., 'sha256', 'md5').
|
||
|
|
:return: Hexadecimal hash string.
|
||
|
|
"""
|
||
|
|
h = hashlib.new(hash_algo)
|
||
|
|
|
||
|
|
with open(filename, 'rb') as file:
|
||
|
|
while chunk := file.read(8192):
|
||
|
|
h.update(chunk)
|
||
|
|
|
||
|
|
return h.hexdigest()
|
||
|
|
|
||
|
|
|
||
|
|
def get_video_duration(file_path):
|
||
|
|
"""
|
||
|
|
Returns the duration of the video file in seconds.
|
||
|
|
|
||
|
|
:param file_path: Path to the video file
|
||
|
|
:return: Duration in seconds
|
||
|
|
"""
|
||
|
|
try:
|
||
|
|
with VideoFileClip(file_path) as video:
|
||
|
|
return video.duration
|
||
|
|
except:
|
||
|
|
return 0
|
||
|
|
|
||
|
|
|
||
|
|
def login(force=False):
|
||
|
|
client = Client()
|
||
|
|
|
||
|
|
try:
|
||
|
|
if not force:
|
||
|
|
client.load_settings("session_data.json")
|
||
|
|
else:
|
||
|
|
raise FileNotFoundError
|
||
|
|
except (FileNotFoundError, json.JSONDecodeError):
|
||
|
|
#username = input("Enter your Instagram username: ")
|
||
|
|
#password = getpass.getpass("Enter your Instagram password: ")
|
||
|
|
|
||
|
|
with open('p.enc', 'rb') as encrypted_file:
|
||
|
|
encrypted_data = encrypted_file.read()
|
||
|
|
|
||
|
|
fernet = Fernet(open('key.enc', 'r').read())
|
||
|
|
password = str(fernet.decrypt(encrypted_data), 'utf-8')
|
||
|
|
username = 'olivercury'
|
||
|
|
|
||
|
|
auth = input("Enter your 2FA code (leave blank if not enabled): ")
|
||
|
|
if auth:
|
||
|
|
client.login(username=username, password=password, verification_code=auth)
|
||
|
|
else:
|
||
|
|
client.login(username, password)
|
||
|
|
client.dump_settings("session_data.json")
|
||
|
|
|
||
|
|
print("Logged in successfully.")
|
||
|
|
|
||
|
|
return client
|
||
|
|
|
||
|
|
|
||
|
|
def parse_media_data(media_item):
|
||
|
|
mediaTypes = {1: 'image', 2: 'video', 8: 'album'}
|
||
|
|
|
||
|
|
try:taken_at = media_item.taken_at
|
||
|
|
except:taken_at = None
|
||
|
|
try:post_type = media_item.product_type
|
||
|
|
except:post_type = None
|
||
|
|
|
||
|
|
mediaInfo = {'taken_at': taken_at, 'post_type' : post_type, 'media_type': mediaTypes[media_item.media_type]}
|
||
|
|
|
||
|
|
if media_item.media_type == 1: # Image
|
||
|
|
mediaInfo['media_id'] = int(media_item.pk)
|
||
|
|
mediaInfo['fileURL'] = media_item.thumbnail_url
|
||
|
|
mediaInfo['filename'] = f"{media_item.pk}.jpg"
|
||
|
|
elif media_item.media_type == 2: # Video
|
||
|
|
mediaInfo['media_id'] = int(media_item.pk)
|
||
|
|
mediaInfo['fileURL'] = media_item.video_url
|
||
|
|
try:mediaInfo['duration'] = media_item.video_duration
|
||
|
|
except:mediaInfo['duration'] = 0
|
||
|
|
mediaInfo['filename'] = f"{media_item.pk}.mp4"
|
||
|
|
else:
|
||
|
|
print(f"Unsupported media type with ID {media_item.pk}")
|
||
|
|
return None
|
||
|
|
|
||
|
|
return mediaInfo
|
||
|
|
|
||
|
|
def download_file(url, filePath):
|
||
|
|
try:
|
||
|
|
response = requests.get(url, stream=True, headers=headers, proxies=proxies)
|
||
|
|
response.raise_for_status()
|
||
|
|
|
||
|
|
directory = os.path.dirname(filePath)
|
||
|
|
|
||
|
|
if not os.path.exists(directory):
|
||
|
|
os.makedirs(directory)
|
||
|
|
|
||
|
|
with open(filePath, 'wb') as out_file:
|
||
|
|
for chunk in response.iter_content(chunk_size=8192):
|
||
|
|
out_file.write(chunk)
|
||
|
|
print(f"Downloaded {filePath}")
|
||
|
|
except Exception as e:
|
||
|
|
print(f"Failed to download {url}. Error: {e}")
|
||
|
|
|
||
|
|
|
||
|
|
def process_media(mediaInfo, filePath):
|
||
|
|
if mediaInfo['media_type'] == 'image':
|
||
|
|
with Image.open(filePath) as img:
|
||
|
|
mediaInfo['width'], mediaInfo['height'] = img.size
|
||
|
|
else:
|
||
|
|
mediaInfo['width'], mediaInfo['height'] = get_video_dimensions(filePath)
|
||
|
|
mediaInfo['duration'] = get_video_duration(filePath)
|
||
|
|
|
||
|
|
if 'hash' not in mediaInfo:
|
||
|
|
mediaInfo['hash'] = file_hash(filePath)
|
||
|
|
|
||
|
|
def upload_to_storage(local_path, server_path):
|
||
|
|
try:
|
||
|
|
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
|
||
|
|
obj_storage.PutFile(local_path, server_path)
|
||
|
|
print(f"Uploaded to https://storysave.b-cdn.net/{server_path}")
|
||
|
|
except Exception as e:
|
||
|
|
print(f"Failed to upload {local_path} to {server_path}. Error: {e}")
|
||
|
|
|
||
|
|
|
||
|
|
def add_media_to_db(mediaInfo):
|
||
|
|
media_id = mediaInfo['media_id']
|
||
|
|
user_id = mediaInfo['user_id']
|
||
|
|
username = mediaInfo['username']
|
||
|
|
date = mediaInfo['taken_at'] if 'taken_at' in mediaInfo else None
|
||
|
|
media_type = mediaInfo['media_type']
|
||
|
|
post_type = mediaInfo['post_type']
|
||
|
|
duration = mediaInfo.get('duration', 0)
|
||
|
|
media_url = mediaInfo['media_url']
|
||
|
|
width = mediaInfo['width']
|
||
|
|
height = mediaInfo['height']
|
||
|
|
filehash = mediaInfo['hash']
|
||
|
|
|
||
|
|
try:
|
||
|
|
db, cursor = config.gen_connection()
|
||
|
|
|
||
|
|
query = """
|
||
|
|
INSERT INTO media (user_id, username, date, media_type, post_type, media_url, duration, width, height, media_id, hash)
|
||
|
|
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
|
||
|
|
"""
|
||
|
|
|
||
|
|
data = (user_id, username, date, media_type, post_type, media_url, duration, width, height, media_id, filehash)
|
||
|
|
|
||
|
|
cursor.execute(query, data)
|
||
|
|
db.commit()
|
||
|
|
print(f"Added media for {username} to the database.")
|
||
|
|
except Exception as e:
|
||
|
|
print(f"Failed to add media for {username} to the database. Error: {e}")
|
||
|
|
|
||
|
|
|
||
|
|
def insert_highlight_items(media_ids, highlight_id, title, user_id):
|
||
|
|
try:
|
||
|
|
db, cursor = config.gen_connection()
|
||
|
|
|
||
|
|
query = "INSERT IGNORE INTO highlights (media_id, highlight_id, title, user_id) VALUES (%s, %s, %s, %s)"
|
||
|
|
|
||
|
|
values = [(media_id, highlight_id, title, user_id) for media_id in media_ids]
|
||
|
|
cursor.executemany(query, values)
|
||
|
|
db.commit()
|
||
|
|
if cursor.rowcount > 0:
|
||
|
|
print(f"Added {cursor.rowcount} highlight items to the database.")
|
||
|
|
except Exception as e:
|
||
|
|
print(f"Failed to add highlight items to the database. Error: {e}")
|
||
|
|
|
||
|
|
|
||
|
|
def get_video_dimensions(video_path):
|
||
|
|
cap = cv2.VideoCapture(video_path)
|
||
|
|
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||
|
|
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||
|
|
cap.release()
|
||
|
|
return width, height
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == '__main__':
|
||
|
|
client = login()
|
||
|
|
client.set_proxy(proxies['https'])
|
||
|
|
|
||
|
|
db, cursor = config.gen_connection()
|
||
|
|
|
||
|
|
cursor.execute("SELECT instagram_username, instagram_user_id, favorite FROM following ORDER BY id DESC;")
|
||
|
|
following = cursor.fetchall()
|
||
|
|
|
||
|
|
new_following = []
|
||
|
|
for user in following:
|
||
|
|
username, user_id, favorite = user
|
||
|
|
|
||
|
|
if bool(favorite):
|
||
|
|
new_following.insert(0, user)
|
||
|
|
else:
|
||
|
|
new_following.append(user)
|
||
|
|
|
||
|
|
following = new_following
|
||
|
|
|
||
|
|
cursor.execute("SELECT media_id FROM media WHERE media_id IS NOT NULL;")
|
||
|
|
existing_files = [media[0] for media in cursor.fetchall()]
|
||
|
|
|
||
|
|
continueFromLast = input("Continue from the last user? (y/n): ").lower() == 'y'
|
||
|
|
|
||
|
|
if continueFromLast:
|
||
|
|
cursor.execute("SELECT username FROM media ORDER BY id DESC LIMIT 1;")
|
||
|
|
lastUser = cursor.fetchone()
|
||
|
|
|
||
|
|
if lastUser:
|
||
|
|
lastUser = lastUser[0]
|
||
|
|
while True:
|
||
|
|
if lastUser != following[0][0]:
|
||
|
|
following.pop(0)
|
||
|
|
else:
|
||
|
|
break
|
||
|
|
|
||
|
|
actionsTaken = 0
|
||
|
|
|
||
|
|
with ThreadPoolExecutor(max_workers=10) as executor:
|
||
|
|
for user in following:
|
||
|
|
while True:
|
||
|
|
try:
|
||
|
|
firstImport = False
|
||
|
|
username, user_id, isFavorite = user
|
||
|
|
|
||
|
|
if not user_id:
|
||
|
|
firstImport = True
|
||
|
|
user_id = client.user_id_from_username(username)
|
||
|
|
actionsTaken += 1
|
||
|
|
cursor.execute("UPDATE following SET instagram_user_id = %s WHERE instagram_username = %s;", (user_id, username))
|
||
|
|
db.commit()
|
||
|
|
print(f"Updated user ID for {username} to {user_id}")
|
||
|
|
|
||
|
|
#################### profile picture ####################
|
||
|
|
profilePath = os.path.join('media', 'profile', username, 'profile.jpg')
|
||
|
|
profileURL = client.user_info(user_id).profile_pic_url_hd
|
||
|
|
download_file(profileURL, profilePath)
|
||
|
|
|
||
|
|
fileHash = file_hash(profilePath)
|
||
|
|
serverPath = os.path.join(os.path.dirname(profilePath), f"{fileHash}.jpg")
|
||
|
|
|
||
|
|
upload_to_storage(profilePath, serverPath)
|
||
|
|
|
||
|
|
mediaInfo = {
|
||
|
|
'username': username,
|
||
|
|
'user_id': user_id,
|
||
|
|
'media_id': None,
|
||
|
|
'media_type': 'image',
|
||
|
|
'post_type': 'profile',
|
||
|
|
'media_url': f"https://storysave.b-cdn.net/{serverPath}",
|
||
|
|
'duration': 0,
|
||
|
|
'hash': fileHash
|
||
|
|
}
|
||
|
|
|
||
|
|
process_media(mediaInfo, profilePath)
|
||
|
|
add_media_to_db(mediaInfo)
|
||
|
|
#################### profile picture ####################
|
||
|
|
|
||
|
|
#################### stories ####################
|
||
|
|
print(f"[{username}]\nChecking: Stories")
|
||
|
|
|
||
|
|
# fetch user stories
|
||
|
|
stories = client.user_stories(user_id)
|
||
|
|
actionsTaken += 1
|
||
|
|
|
||
|
|
# fetch user's highlights and add to stories
|
||
|
|
if firstImport or isFavorite:
|
||
|
|
highlights = client.user_highlights(user_id) # API request
|
||
|
|
actionsTaken += 1
|
||
|
|
for highlight in highlights:
|
||
|
|
try:
|
||
|
|
highlight_items = client.highlight_info_v1(highlight.pk).items # API request
|
||
|
|
actionsTaken += 1
|
||
|
|
except:
|
||
|
|
print(f"Failed to get highlight items for {highlight.pk}")
|
||
|
|
time.sleep(5)
|
||
|
|
|
||
|
|
media_ids = [item.pk for item in highlight_items]
|
||
|
|
executor.submit(insert_highlight_items, media_ids, highlight.pk, highlight.title, user_id)
|
||
|
|
stories.extend(highlight_items)
|
||
|
|
|
||
|
|
# process stories and highlight stories
|
||
|
|
newStoryCount = 0
|
||
|
|
for story in stories:
|
||
|
|
try:
|
||
|
|
mediaInfo = parse_media_data(story)
|
||
|
|
|
||
|
|
# skip duplicates
|
||
|
|
if mediaInfo['media_id'] in existing_files:
|
||
|
|
continue
|
||
|
|
|
||
|
|
newStoryCount += 1
|
||
|
|
mediaInfo['user_id'] = user_id
|
||
|
|
mediaInfo['username'] = username
|
||
|
|
mediaInfo['post_type'] = 'story'
|
||
|
|
if mediaInfo['fileURL'] and mediaInfo['filename']:
|
||
|
|
filePath = os.path.join('media', 'stories', username, mediaInfo['filename'])
|
||
|
|
mediaInfo['media_url'] = f"https://storysave.b-cdn.net/{filePath}"
|
||
|
|
|
||
|
|
download_file(mediaInfo['fileURL'], filePath)
|
||
|
|
process_media(mediaInfo, filePath)
|
||
|
|
upload_to_storage(filePath, filePath)
|
||
|
|
add_media_to_db(mediaInfo)
|
||
|
|
os.remove(filePath)
|
||
|
|
|
||
|
|
existing_files.append(mediaInfo['media_id'])
|
||
|
|
except Exception as e:
|
||
|
|
print(f"Failed to process story for {username}. Error: {e}")
|
||
|
|
#################### stories ####################
|
||
|
|
|
||
|
|
#################### posts ####################
|
||
|
|
print("Checking: Posts")
|
||
|
|
medias = client.user_medias(user_id, 36) # API request
|
||
|
|
actionsTaken += 1
|
||
|
|
|
||
|
|
posts = []
|
||
|
|
for post in medias:
|
||
|
|
if post.media_type == 8:
|
||
|
|
for item in post.resources:
|
||
|
|
posts.append(item)
|
||
|
|
continue
|
||
|
|
posts.append(post)
|
||
|
|
|
||
|
|
newPostsCount = 0
|
||
|
|
for post in posts:
|
||
|
|
mediaInfo = parse_media_data(post)
|
||
|
|
if mediaInfo['media_id'] in existing_files:
|
||
|
|
continue
|
||
|
|
|
||
|
|
newPostsCount += 1
|
||
|
|
mediaInfo['user_id'] = user_id
|
||
|
|
mediaInfo['username'] = username
|
||
|
|
mediaInfo['post_type'] = 'post'
|
||
|
|
if mediaInfo['fileURL'] and mediaInfo['filename']:
|
||
|
|
filePath = os.path.join('media', 'posts', username, mediaInfo['filename'])
|
||
|
|
mediaInfo['media_url'] = f"https://storysave.b-cdn.net/{filePath}"
|
||
|
|
|
||
|
|
download_file(mediaInfo['fileURL'], filePath)
|
||
|
|
process_media(mediaInfo, filePath)
|
||
|
|
upload_to_storage(filePath, filePath)
|
||
|
|
add_media_to_db(mediaInfo)
|
||
|
|
os.remove(filePath)
|
||
|
|
|
||
|
|
existing_files.append(mediaInfo['media_id'])
|
||
|
|
#################### posts ####################
|
||
|
|
|
||
|
|
print(f"New stories: {newStoryCount}\tNew Posts: {newPostsCount}")
|
||
|
|
print(f"Actions taken: {actionsTaken}")
|
||
|
|
print("=====================================")
|
||
|
|
break
|
||
|
|
except Exception as e:
|
||
|
|
if "login_required" in str(e):
|
||
|
|
print("Please log in to your account again.")
|
||
|
|
client = login(force=True)
|
||
|
|
elif "Please wait a few minutes before you try again." in str(e):
|
||
|
|
print("Rate limited. Waiting for 5 minutes...")
|
||
|
|
client = login(force=True)
|
||
|
|
else:
|
||
|
|
print("An unexpected error occurred:", e)
|
||
|
|
break
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
# TO DO
|
||
|
|
# ADD DATE TO POSTS / STORIES
|
||
|
|
# FETCH ONLY THE NEW STORIES
|
||
|
|
# MINIMIZE DATABASE CONNECTIONS
|