Altpins-Instagram/funcs.py

from moviepy.editor import VideoFileClip
import os, cv2, hashlib, requests
from PIL import Image
import numpy as np
import imagehash

headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36"}
proxies={"http": "http://yehyuxsl-rotate:4tl5bvrwkz5e@p.webshare.io:80/","https": "http://yehyuxsl-rotate:4tl5bvrwkz5e@p.webshare.io:80/"}

def generate_phash(image_path):
    try:
        image = Image.open(image_path)
        return str(imagehash.phash(image))
    except Exception as e:
        print(f"Error generating phash for {image_path}: {e}")
        return False

def cleanEmptyFolders(path):
    for root, dirs, fs in os.walk(path):
        for d in dirs:
            cleanEmptyFolders(os.path.join(root, d))
        if not os.listdir(root):
            os.rmdir(root)

def get_files(directory):
    files = []
    for root, dirs, filenames in os.walk(directory):
        for filename in filenames:
            files.append(os.path.join(root, filename))
    return files

import cv2
import numpy as np

def compare_images(image_path1, image_path2):
    # Load the images in grayscale
    img1 = cv2.imread(image_path1, cv2.IMREAD_GRAYSCALE)
    img2 = cv2.imread(image_path2, cv2.IMREAD_GRAYSCALE)

    if img1 is None or img2 is None:
        print("Error loading images!")
        return False  # Or you could raise an exception

    # Initialize SIFT detector
    sift = cv2.SIFT_create()

    # Find keypoints and descriptors with SIFT
    kp1, des1 = sift.detectAndCompute(img1, None)
    kp2, des2 = sift.detectAndCompute(img2, None)

    # Check if descriptors are None
    if des1 is None or des2 is None:
        return False

    # FLANN parameters
    index_params = dict(algorithm=1, trees=5)
    search_params = dict(checks=50)

    # FLANN based matcher
    flann = cv2.FlannBasedMatcher(index_params, search_params)

    # Matching descriptor vectors using KNN algorithm
    matches = flann.knnMatch(des1, des2, k=2)

    # Apply ratio test
    good = []
    for m, n in matches:
        if m.distance < 0.6 * n.distance:  # More stringent ratio
            good.append(m)

    # Minimum number of matches
    MIN_MATCH_COUNT = 15  # Adjust this threshold as needed

    if len(good) > MIN_MATCH_COUNT:
        # Extract location of good matches
        src_pts = np.float32([kp1[m.queryIdx].pt for m in good]).reshape(-1, 1, 2)
        dst_pts = np.float32([kp2[m.trainIdx].pt for m in good]).reshape(-1, 1, 2)

        # Find homography
        M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
        matchesMask = mask.ravel().tolist()

        if np.sum(matchesMask) > 10:  # Check if enough points agree on homography
            return True
        else:
            return False
    else:
        return False

def remove_empty_folders(dir_path):
    import shutil

    def is_folder_empty(folder_path):
        return len(os.listdir(folder_path)) == 0

    num_folder = 0
    for root, dirs, files in os.walk(dir_path, topdown=False):
        for dir_name in dirs:
            dir_path = os.path.join(root, dir_name)

            if not os.path.isdir(dir_path):
                continue

            if '$' in dir_name or '$' in dir_path:
                print(f"Skipping system folder: {dir_path}")
                continue

            if 'system volume information' in dir_name.lower() or 'system volume information' in dir_path.lower():
                print(f"Skipping system folder: {dir_path}")
                continue

            if is_folder_empty(dir_path) or dir_name.lower() == '__pycache__':
                shutil.rmtree(dir_path)
                print(f"Moved empty folder: {dir_path}")
                num_folder+=1

def download_file(url, filePath):
    try:
        response = requests.get(url, stream=True, headers=headers)
        response.raise_for_status()

        directory = os.path.dirname(filePath)

        if not os.path.exists(directory):
            os.makedirs(directory)

        with open(filePath, "wb") as out_file:
            for chunk in response.iter_content(chunk_size=8192):
                out_file.write(chunk)

        print(f"Downloaded {filePath}")
    except Exception as e:
        print(f"Failed to download {url}. Error: {e}")

def determine_post_type(filepath):
    width, height = get_media_dimensions(filepath)

    if 0 in (width, height):
        return False

    aspect_ratio = width / height
    if aspect_ratio > 0.5 and aspect_ratio < 0.6:
        return 'stories'
    else:
        return 'posts'

def get_media_type(filename):
    image_extensions = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".tiff", ".tif", ".svg", ".eps", ".raw", ".cr2", ".nef", ".orf", ".sr2", ".heic", ".indd", ".ai", ".psd", ".svg"}
    video_extensions = {".mp4", ".mov"}
    filetype_dict = {"image": image_extensions, "video": video_extensions}

    extension = os.path.splitext(filename.lower())[1]  # Get the extension and convert to lower case

    for filetype, extensions in filetype_dict.items():
        if extension in extensions:
            return filetype
    return None

def get_video_duration(file_path):
    if not os.path.exists(file_path):
        print(f"File not found: {file_path}")
        return 0

    video_types = {".mp4", ".mov", ".mkv"}
    extension = os.path.splitext(file_path.lower())[1]
    if extension not in video_types:
        print(f"File is not a video: {file_path}")
        return 0

    try:
        with VideoFileClip(file_path) as video:
            duration = video.duration
            if duration == 0:
                duration = 1
            return duration
    except Exception as e:
        print(f"Error getting duration for {file_path}: {e}")
        return 0

def get_video_dimensions(video_path):
    cap = cv2.VideoCapture(video_path)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    cap.release()
    return width, height

def get_media_dimensions(media_path):
    if get_media_type(media_path) == 'video':
        return get_video_dimensions(media_path)
    else:
        with Image.open(media_path) as img:
            return img.size

def get_video_data(video_path):
    data = {'duration': 0, 'width': 0, 'height': 0}
    try:
        with VideoFileClip(video_path) as video:
            data['duration'] = video.duration
            data['width'] = video.size[0]
            data['height'] = video.size[1]
    except Exception as e:
        print(f"Error getting video data for {video_path}: {e}")
    return data

def calculate_file_hash(file_path, hash_func='sha256'):
    h = hashlib.new(hash_func)
    with open(file_path, 'rb') as file:
        chunk = file.read(8192)
        while chunk:
            h.update(chunk)
            chunk = file.read(8192)
    return h.hexdigest()