You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

422 lines
15 KiB
Python

11 months ago
import cv2, os, json, config, time, hashlib, requests
from concurrent.futures import ThreadPoolExecutor
from moviepy.editor import VideoFileClip
from cryptography.fernet import Fernet
from BunnyCDN.Storage import Storage
from instagrapi import Client
from PIL import Image
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36"
}
proxies = {
"http": "http://yehyuxsl-rotate:4tl5bvrwkz5e@p.webshare.io:80/",
"https": "http://yehyuxsl-rotate:4tl5bvrwkz5e@p.webshare.io:80/",
}
def file_hash(filename, hash_algo="sha256"):
"""
Compute the hash of a file.
:param filename: Path to the file.
:param hash_algo: Hashing algorithm to use (e.g., 'sha256', 'md5').
:return: Hexadecimal hash string.
"""
h = hashlib.new(hash_algo)
with open(filename, "rb") as file:
while chunk := file.read(8192):
h.update(chunk)
return h.hexdigest()
def get_video_duration(file_path):
"""
Returns the duration of the video file in seconds.
:param file_path: Path to the video file
:return: Duration in seconds
"""
try:
with VideoFileClip(file_path) as video:
return video.duration
except:
return 0
def login(force=False):
client = Client()
try:
if not force:
client.load_settings("session_data.json")
else:
raise FileNotFoundError
except (FileNotFoundError, json.JSONDecodeError):
# username = input("Enter your Instagram username: ")
# password = getpass.getpass("Enter your Instagram password: ")
with open("p.enc", "rb") as encrypted_file:
encrypted_data = encrypted_file.read()
fernet = Fernet(open("key.enc", "r").read())
password = str(fernet.decrypt(encrypted_data), "utf-8")
username = "olivercury"
auth = input("Enter your 2FA code (leave blank if not enabled): ")
if auth:
client.login(username=username, password=password, verification_code=auth)
else:
client.login(username, password)
client.dump_settings("session_data.json")
print("Logged in successfully.")
return client
def parse_media_data(media_item):
mediaTypes = {1: "image", 2: "video", 8: "album"}
try:
taken_at = media_item.taken_at
except:
taken_at = None
try:
post_type = media_item.product_type
except:
post_type = None
mediaInfo = {
"taken_at": taken_at,
"post_type": post_type,
"media_type": mediaTypes[media_item.media_type],
}
if media_item.media_type == 1: # Image
mediaInfo["media_id"] = int(media_item.pk)
mediaInfo["fileURL"] = media_item.thumbnail_url
mediaInfo["filename"] = f"{media_item.pk}.jpg"
elif media_item.media_type == 2: # Video
mediaInfo["media_id"] = int(media_item.pk)
mediaInfo["fileURL"] = media_item.video_url
try:
mediaInfo["duration"] = media_item.video_duration
except:
mediaInfo["duration"] = 0
mediaInfo["filename"] = f"{media_item.pk}.mp4"
else:
print(f"Unsupported media type with ID {media_item.pk}")
return None
return mediaInfo
def download_file(url, filePath):
try:
response = requests.get(url, stream=True, headers=headers) # , proxies=proxies
response.raise_for_status()
directory = os.path.dirname(filePath)
if not os.path.exists(directory):
os.makedirs(directory)
with open(filePath, "wb") as out_file:
for chunk in response.iter_content(chunk_size=8192):
out_file.write(chunk)
print(f"Downloaded {filePath}")
except Exception as e:
print(f"Failed to download {url}. Error: {e}")
def process_media(mediaInfo, filePath):
if mediaInfo["media_type"] == "image":
with Image.open(filePath) as img:
mediaInfo["width"], mediaInfo["height"] = img.size
else:
mediaInfo["width"], mediaInfo["height"] = get_video_dimensions(filePath)
mediaInfo["duration"] = get_video_duration(filePath)
if "hash" not in mediaInfo:
mediaInfo["hash"] = file_hash(filePath)
def upload_to_storage(local_path, server_path):
try:
obj_storage = Storage("345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e", "storysave")
obj_storage.PutFile(local_path, server_path)
print(f"Uploaded to https://storysave.b-cdn.net/{server_path}")
except Exception as e:
print(f"Failed to upload {local_path} to {server_path}. Error: {e}")
def add_media_to_db(mediaInfo):
media_id = mediaInfo["media_id"]
user_id = mediaInfo["user_id"]
username = mediaInfo["username"]
date = mediaInfo["taken_at"] if "taken_at" in mediaInfo else None
media_type = mediaInfo["media_type"]
post_type = mediaInfo["post_type"]
duration = mediaInfo.get("duration", 0)
media_url = mediaInfo["media_url"]
width = mediaInfo["width"]
height = mediaInfo["height"]
filehash = mediaInfo["hash"]
try:
db, cursor = config.gen_connection()
query = """
INSERT INTO media (user_id, username, date, media_type, post_type, media_url, duration, width, height, media_id, hash)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
"""
data = (
user_id,
username,
date,
media_type,
post_type,
media_url,
duration,
width,
height,
media_id,
filehash,
)
cursor.execute(query, data)
db.commit()
print(f"Added media for {username} to the database.")
except Exception as e:
print(f"Failed to add media for {username} to the database. Error: {e}")
def insert_highlight_items(media_ids, highlight_id, title, user_id):
try:
db, cursor = config.gen_connection()
query = "INSERT IGNORE INTO highlights (media_id, highlight_id, title, user_id) VALUES (%s, %s, %s, %s)"
values = [(media_id, highlight_id, title, user_id) for media_id in media_ids]
cursor.executemany(query, values)
db.commit()
if cursor.rowcount > 0:
print(f"Added {cursor.rowcount} highlight items to the database.")
except Exception as e:
print(f"Failed to add highlight items to the database. Error: {e}")
def get_video_dimensions(video_path):
cap = cv2.VideoCapture(video_path)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
cap.release()
return width, height
if __name__ == "__main__":
client = login()
client.set_proxy(proxies["https"])
db, cursor = config.gen_connection()
cursor.execute(
"SELECT instagram_username, instagram_user_id, favorite FROM following ORDER BY favorite DESC, id DESC;"
)
following = cursor.fetchall()
cursor.execute("SELECT media_id FROM media WHERE media_id IS NOT NULL;")
existing_files = [media[0] for media in cursor.fetchall()]
continueFromLast = input("Continue from the last user? (y/N): ").lower() == "y"
if continueFromLast:
cursor.execute("SELECT username FROM media ORDER BY id DESC LIMIT 1;")
lastUser = cursor.fetchone()
if lastUser:
lastUser = lastUser[0]
for idx, user in enumerate(following):
if user[0] == lastUser:
following = following[idx:]
break
actionsTaken = 0
with ThreadPoolExecutor(max_workers=10) as executor:
for user in following:
while True:
try:
firstImport = False
username, user_id, isFavorite = user
if not user_id:
firstImport = True
user_id = client.user_id_from_username(username)
actionsTaken += 1
cursor.execute(
"UPDATE following SET instagram_user_id = %s WHERE instagram_username = %s;",
(user_id, username),
)
db.commit()
print(f"Updated user ID for {username} to {user_id}")
#################### profile picture ####################
profilePath = os.path.join(
"media", "profile", username, "profile.jpg"
)
profileURL = client.user_info(user_id).profile_pic_url_hd
download_file(profileURL, profilePath)
fileHash = file_hash(profilePath)
serverPath = os.path.join(
os.path.dirname(profilePath), f"{fileHash}.jpg"
)
upload_to_storage(profilePath, serverPath)
mediaInfo = {
"username": username,
"user_id": user_id,
"media_id": None,
"media_type": "image",
"post_type": "profile",
"media_url": f"https://storysave.b-cdn.net/{serverPath}",
"duration": 0,
"hash": fileHash,
}
process_media(mediaInfo, profilePath)
add_media_to_db(mediaInfo)
#################### profile picture ####################
#################### stories ####################
print(f"[{username}]\nChecking: Stories")
# fetch user stories
stories = client.user_stories(user_id)
actionsTaken += 1
# fetch user's highlights and add to stories
if firstImport or isFavorite:
highlights = client.user_highlights(user_id) # API request
actionsTaken += 1
for highlight in highlights:
try:
highlight_items = client.highlight_info_v1(
highlight.pk
).items # API request
actionsTaken += 1
except:
print(
f"Failed to get highlight items for {highlight.pk}"
)
time.sleep(5)
media_ids = [item.pk for item in highlight_items]
executor.submit(
insert_highlight_items,
media_ids,
highlight.pk,
highlight.title,
user_id,
)
stories.extend(highlight_items)
# process stories and highlight stories
newStoryCount = 0
for story in stories:
try:
mediaInfo = parse_media_data(story)
# skip duplicates
if mediaInfo["media_id"] in existing_files:
continue
newStoryCount += 1
mediaInfo["user_id"] = user_id
mediaInfo["username"] = username
mediaInfo["post_type"] = "story"
if mediaInfo["fileURL"] and mediaInfo["filename"]:
filePath = os.path.join(
"media", "stories", username, mediaInfo["filename"]
)
mediaInfo["media_url"] = (
f"https://storysave.b-cdn.net/{filePath}"
)
download_file(mediaInfo["fileURL"], filePath)
process_media(mediaInfo, filePath)
upload_to_storage(filePath, filePath)
add_media_to_db(mediaInfo)
os.remove(filePath)
existing_files.append(mediaInfo["media_id"])
except Exception as e:
print(f"Failed to process story for {username}. Error: {e}")
#################### stories ####################
#################### posts ####################
print("Checking: Posts")
medias = client.user_medias(user_id, 36) # API request
actionsTaken += 1
posts = []
for post in medias:
if post.media_type == 8:
for item in post.resources:
posts.append(item)
continue
posts.append(post)
newPostsCount = 0
for post in posts:
mediaInfo = parse_media_data(post)
if mediaInfo["media_id"] in existing_files:
continue
newPostsCount += 1
mediaInfo["user_id"] = user_id
mediaInfo["username"] = username
mediaInfo["post_type"] = "post"
if mediaInfo["fileURL"] and mediaInfo["filename"]:
filePath = os.path.join(
"media", "posts", username, mediaInfo["filename"]
)
mediaInfo["media_url"] = (
f"https://storysave.b-cdn.net/{filePath}"
)
download_file(mediaInfo["fileURL"], filePath)
process_media(mediaInfo, filePath)
upload_to_storage(filePath, filePath)
add_media_to_db(mediaInfo)
os.remove(filePath)
existing_files.append(mediaInfo["media_id"])
#################### posts ####################
print(f"New stories: {newStoryCount}\tNew Posts: {newPostsCount}")
print(f"Actions taken: {actionsTaken}")
print("=====================================")
break
except Exception as e:
if "login_required" in str(e):
print("Please log in to your account again.")
client = login(force=True)
elif "Please wait a few minutes before you try again." in str(e):
print("Rate limited. Waiting for 5 minutes...")
client = login(force=True)
else:
print("An unexpected error occurred:", e)
break
# TO DO
# ADD DATE TO POSTS / STORIES
# FETCH ONLY THE NEW STORIES
# MINIMIZE DATABASE CONNECTIONS