You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

213 lines
6.8 KiB
Python

11 months ago
from moviepy.editor import VideoFileClip
11 months ago
import os, cv2, hashlib, requests
11 months ago
from PIL import Image
11 months ago
import numpy as np
import imagehash
11 months ago
11 months ago
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36"}
proxies={"http": "http://yehyuxsl-rotate:4tl5bvrwkz5e@p.webshare.io:80/","https": "http://yehyuxsl-rotate:4tl5bvrwkz5e@p.webshare.io:80/"}
11 months ago
11 months ago
def generate_phash(image_path):
try:
image = Image.open(image_path)
return str(imagehash.phash(image))
11 months ago
except Exception as e:
print(f"Error generating phash for {image_path}: {e}")
11 months ago
return False
def cleanEmptyFolders(path):
for root, dirs, fs in os.walk(path):
for d in dirs:
cleanEmptyFolders(os.path.join(root, d))
if not os.listdir(root):
os.rmdir(root)
def get_files(directory):
files = []
for root, dirs, filenames in os.walk(directory):
for filename in filenames:
files.append(os.path.join(root, filename))
return files
import cv2
import numpy as np
def compare_images(image_path1, image_path2):
# Load the images in grayscale
img1 = cv2.imread(image_path1, cv2.IMREAD_GRAYSCALE)
img2 = cv2.imread(image_path2, cv2.IMREAD_GRAYSCALE)
if img1 is None or img2 is None:
print("Error loading images!")
return False # Or you could raise an exception
# Initialize SIFT detector
sift = cv2.SIFT_create()
# Find keypoints and descriptors with SIFT
kp1, des1 = sift.detectAndCompute(img1, None)
kp2, des2 = sift.detectAndCompute(img2, None)
# Check if descriptors are None
if des1 is None or des2 is None:
return False
# FLANN parameters
index_params = dict(algorithm=1, trees=5)
search_params = dict(checks=50)
# FLANN based matcher
flann = cv2.FlannBasedMatcher(index_params, search_params)
# Matching descriptor vectors using KNN algorithm
matches = flann.knnMatch(des1, des2, k=2)
# Apply ratio test
good = []
for m, n in matches:
if m.distance < 0.6 * n.distance: # More stringent ratio
good.append(m)
# Minimum number of matches
MIN_MATCH_COUNT = 15 # Adjust this threshold as needed
if len(good) > MIN_MATCH_COUNT:
# Extract location of good matches
src_pts = np.float32([kp1[m.queryIdx].pt for m in good]).reshape(-1, 1, 2)
dst_pts = np.float32([kp2[m.trainIdx].pt for m in good]).reshape(-1, 1, 2)
# Find homography
M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
matchesMask = mask.ravel().tolist()
if np.sum(matchesMask) > 10: # Check if enough points agree on homography
return True
else:
return False
else:
return False
11 months ago
def remove_empty_folders(dir_path):
import shutil
def is_folder_empty(folder_path):
return len(os.listdir(folder_path)) == 0
num_folder = 0
for root, dirs, files in os.walk(dir_path, topdown=False):
for dir_name in dirs:
dir_path = os.path.join(root, dir_name)
if not os.path.isdir(dir_path):
continue
if '$' in dir_name or '$' in dir_path:
print(f"Skipping system folder: {dir_path}")
continue
if 'system volume information' in dir_name.lower() or 'system volume information' in dir_path.lower():
print(f"Skipping system folder: {dir_path}")
continue
if is_folder_empty(dir_path) or dir_name.lower() == '__pycache__':
shutil.rmtree(dir_path)
print(f"Moved empty folder: {dir_path}")
num_folder+=1
11 months ago
def download_file(url, filePath):
try:
response = requests.get(url, stream=True, headers=headers)
response.raise_for_status()
directory = os.path.dirname(filePath)
if not os.path.exists(directory):
os.makedirs(directory)
with open(filePath, "wb") as out_file:
for chunk in response.iter_content(chunk_size=8192):
out_file.write(chunk)
11 months ago
11 months ago
print(f"Downloaded {filePath}")
11 months ago
return True
11 months ago
except Exception as e:
print(f"Failed to download {url}. Error: {e}")
11 months ago
return False
11 months ago
11 months ago
def determine_post_type(filepath):
width, height = get_media_dimensions(filepath)
11 months ago
if 0 in (width, height):
return False
11 months ago
aspect_ratio = width / height
if aspect_ratio > 0.5 and aspect_ratio < 0.6:
return 'stories'
else:
return 'posts'
def get_media_type(filename):
image_extensions = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".tiff", ".tif", ".svg", ".eps", ".raw", ".cr2", ".nef", ".orf", ".sr2", ".heic", ".indd", ".ai", ".psd", ".svg"}
video_extensions = {".mp4", ".mov"}
11 months ago
filetype_dict = {"image": image_extensions, "video": video_extensions}
11 months ago
extension = os.path.splitext(filename.lower())[1] # Get the extension and convert to lower case
11 months ago
for filetype, extensions in filetype_dict.items():
if extension in extensions:
return filetype
return None
11 months ago
def get_video_duration(file_path):
11 months ago
if not os.path.exists(file_path):
print(f"File not found: {file_path}")
return 0
video_types = {".mp4", ".mov", ".mkv"}
extension = os.path.splitext(file_path.lower())[1]
if extension not in video_types:
return 0
11 months ago
try:
with VideoFileClip(file_path) as video:
11 months ago
duration = video.duration
if duration == 0:
duration = 1
return duration
11 months ago
except Exception as e:
print(f"Error getting duration for {file_path}: {e}")
return 0
11 months ago
def get_video_dimensions(video_path):
cap = cv2.VideoCapture(video_path)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
cap.release()
return width, height
11 months ago
def get_media_dimensions(media_path):
if get_media_type(media_path) == 'video':
return get_video_dimensions(media_path)
else:
with Image.open(media_path) as img:
return img.size
11 months ago
def get_video_data(video_path):
data = {'duration': 0, 'width': 0, 'height': 0}
try:
with VideoFileClip(video_path) as video:
data['duration'] = video.duration
data['width'] = video.size[0]
data['height'] = video.size[1]
except Exception as e:
print(f"Error getting video data for {video_path}: {e}")
return data
11 months ago
def calculate_file_hash(file_path, hash_func='sha256'):
h = hashlib.new(hash_func)
with open(file_path, 'rb') as file:
chunk = file.read(8192)
while chunk:
h.update(chunk)
chunk = file.read(8192)
11 months ago
return h.hexdigest()