update
parent
73889be10e
commit
89c8e35e3b
@ -0,0 +1,12 @@
|
||||
import os
|
||||
|
||||
def remove_empty_folders(folder):
|
||||
for root, dirs, files in os.walk(folder):
|
||||
for dir in dirs:
|
||||
dirpath = os.path.join(root, dir)
|
||||
if not os.listdir(dirpath):
|
||||
print(f"Removing empty folder {dirpath}")
|
||||
os.rmdir(dirpath)
|
||||
|
||||
folder = 'media'
|
||||
remove_empty_folders(folder)
|
||||
@ -0,0 +1,85 @@
|
||||
import os
|
||||
import config
|
||||
import cv2
|
||||
from funcs import get_files # Assuming this is defined elsewhere
|
||||
import imagehash
|
||||
from PIL import Image
|
||||
|
||||
def generate_thumbnail_phash(filepath, hash_size=8): # Set hash_size to 8
|
||||
cap = cv2.VideoCapture(filepath)
|
||||
ret, frame = cap.read()
|
||||
cap.release()
|
||||
|
||||
if not ret:
|
||||
print(f"Error reading frame from {filepath}")
|
||||
return None
|
||||
|
||||
# Resize frame to a standard size
|
||||
standard_size = (320, 240)
|
||||
resized_frame = cv2.resize(frame, standard_size, interpolation=cv2.INTER_AREA)
|
||||
|
||||
# Convert OpenCV image (BGR) to PIL Image (RGB)
|
||||
image_rgb = cv2.cvtColor(resized_frame, cv2.COLOR_BGR2RGB)
|
||||
pil_image = Image.fromarray(image_rgb)
|
||||
|
||||
# Compute pHash
|
||||
phash = imagehash.phash(pil_image, hash_size=hash_size)
|
||||
|
||||
return phash
|
||||
|
||||
def are_phashes_duplicates(phash1, phash2, threshold=5):
|
||||
# Compute Hamming distance between the pHashes
|
||||
try:
|
||||
distance = phash1 - phash2
|
||||
except TypeError as e:
|
||||
print(f"Error comparing pHashes: {e}")
|
||||
return False
|
||||
|
||||
return distance <= threshold
|
||||
|
||||
def get_media_by_phash(phash, username, existing_medias, threshold=5):
|
||||
for media in existing_medias:
|
||||
existing_phash_str = media[1]
|
||||
existing_username = media[2]
|
||||
if existing_username != username:
|
||||
continue
|
||||
|
||||
# Convert stored phash string to ImageHash object
|
||||
existing_phash = imagehash.hex_to_hash(existing_phash_str)
|
||||
|
||||
if are_phashes_duplicates(phash, existing_phash, threshold=threshold):
|
||||
return media
|
||||
return None
|
||||
|
||||
# Database connection
|
||||
db, cursor = config.gen_connection()
|
||||
|
||||
# Fetch existing videos with pHashes
|
||||
cursor.execute("SELECT id, phash, username FROM media WHERE media_type = %s AND phash IS NOT NULL", ['video'])
|
||||
existing_medias = cursor.fetchall()
|
||||
|
||||
users = os.listdir('videos')
|
||||
for username in users:
|
||||
user_videos_path = os.path.join('videos', username)
|
||||
if not os.path.isdir(user_videos_path):
|
||||
continue
|
||||
|
||||
videos = os.listdir(user_videos_path)
|
||||
for video in videos:
|
||||
print(f'Processing {video}...')
|
||||
filepath = os.path.join(user_videos_path, video)
|
||||
|
||||
phash = generate_thumbnail_phash(filepath, hash_size=8) # Use hash_size=8
|
||||
if phash is None:
|
||||
continue
|
||||
|
||||
phash_str = str(phash)
|
||||
|
||||
duplicate_media = get_media_by_phash(phash, username, existing_medias, threshold=5)
|
||||
if duplicate_media:
|
||||
print(f'Duplicate url found: https://altpins.com/pin/{duplicate_media[0]}')
|
||||
print(f'Duplicate video path: {filepath}')
|
||||
newpath = filepath.replace('videos', 'duplicates')
|
||||
os.makedirs(os.path.dirname(newpath), exist_ok=True)
|
||||
os.rename(filepath, newpath)
|
||||
print(f'Moved {video} to duplicates/')
|
||||
@ -0,0 +1,81 @@
|
||||
import os
|
||||
import config
|
||||
import imagehash
|
||||
from PIL import Image
|
||||
from funcs import get_files # Assuming this is defined elsewhere
|
||||
|
||||
def generate_image_phash(filepath, hash_size=8):
|
||||
try:
|
||||
# Open the image using PIL
|
||||
pil_image = Image.open(filepath)
|
||||
|
||||
# Compute pHash using the imagehash library
|
||||
phash = imagehash.phash(pil_image, hash_size=hash_size)
|
||||
return phash
|
||||
except Exception as e:
|
||||
print(f"Error processing image {filepath}: {e}")
|
||||
return None
|
||||
|
||||
def are_phashes_duplicates(phash1, phash2, threshold=5):
|
||||
try:
|
||||
# Compute the Hamming distance between the pHashes
|
||||
distance = phash1 - phash2
|
||||
return distance <= threshold
|
||||
except TypeError as e:
|
||||
print(f"Error comparing pHashes: {e}")
|
||||
return False
|
||||
|
||||
def get_media_by_phash(phash, username, existing_medias, threshold=6):
|
||||
for media in existing_medias:
|
||||
existing_phash_str = media[1]
|
||||
existing_username = media[2]
|
||||
|
||||
if existing_username != username:
|
||||
continue # Only compare with the same user's media
|
||||
|
||||
# Convert stored pHash string to ImageHash object
|
||||
existing_phash = imagehash.hex_to_hash(existing_phash_str)
|
||||
|
||||
# Check if the current pHash is a duplicate
|
||||
if are_phashes_duplicates(phash, existing_phash, threshold=threshold):
|
||||
return media
|
||||
return None
|
||||
|
||||
# Database connection
|
||||
db, cursor = config.gen_connection()
|
||||
|
||||
# Fetch existing media with pHashes (assuming media are images, adjust media_type if needed)
|
||||
cursor.execute("SELECT id, phash, username FROM media WHERE media_type = %s AND phash IS NOT NULL", ['image'])
|
||||
existing_medias = cursor.fetchall()
|
||||
|
||||
# Go through the 'sorted' folder where each subfolder is a username
|
||||
users = os.listdir('sorted')
|
||||
|
||||
for username in users:
|
||||
user_images_path = os.path.join('sorted', username)
|
||||
if not os.path.isdir(user_images_path):
|
||||
continue # Skip non-directory files
|
||||
|
||||
# Get all images for the current user
|
||||
images = get_files(user_images_path) # Assuming this gets all image files
|
||||
|
||||
for filepath in images:
|
||||
image_filename = os.path.basename(filepath)
|
||||
print(f'Processing {image_filename}...')
|
||||
|
||||
# Generate pHash for the image
|
||||
phash = generate_image_phash(filepath, hash_size=8)
|
||||
if phash is None:
|
||||
continue # Skip this image if there's an issue
|
||||
|
||||
phash_str = str(phash)
|
||||
|
||||
# Check if the image is a duplicate of any in the database
|
||||
duplicate_media = get_media_by_phash(phash, username, existing_medias, threshold=5)
|
||||
if duplicate_media:
|
||||
print(f'Duplicate found: https://altpins.com/pin/{duplicate_media[0]}')
|
||||
print(f'Duplicate image path: {filepath}')
|
||||
newpath = filepath.replace('sorted', 'duplicates')
|
||||
os.makedirs(os.path.dirname(newpath), exist_ok=True)
|
||||
os.rename(filepath, newpath)
|
||||
print(f'Moved {image_filename} to duplicates/')
|
||||
@ -0,0 +1,76 @@
|
||||
import cv2, os
|
||||
import imagehash
|
||||
from PIL import Image
|
||||
from funcs import get_files
|
||||
|
||||
def is_static_video_phash_optimized(video_path, frame_sample_rate=30, hash_size=16, hamming_threshold=1):
|
||||
"""
|
||||
Determines if a video is static using perceptual hashing (pHash) by comparing consecutive frames.
|
||||
|
||||
Parameters:
|
||||
- video_path: Path to the video file.
|
||||
- frame_sample_rate: Number of frames to skip between comparisons.
|
||||
- hash_size: Size of the hash; larger values increase sensitivity.
|
||||
- hamming_threshold: Maximum Hamming distance between consecutive frames to consider the video static.
|
||||
|
||||
Returns:
|
||||
- True if the video is static, False otherwise.
|
||||
"""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print("Error: Cannot open video file.")
|
||||
return False
|
||||
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
print("Error: Cannot read video frames.")
|
||||
cap.release()
|
||||
return False
|
||||
|
||||
# Convert first frame to PIL Image and compute hash
|
||||
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
||||
pil_image = Image.fromarray(frame_rgb)
|
||||
previous_hash = imagehash.phash(pil_image, hash_size=hash_size)
|
||||
|
||||
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
is_static = True
|
||||
|
||||
current_frame_number = 1
|
||||
|
||||
while True:
|
||||
# Skip frames according to the sample rate
|
||||
cap.set(cv2.CAP_PROP_POS_FRAMES, current_frame_number)
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
# Convert frame to PIL Image and compute hash
|
||||
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
||||
pil_image = Image.fromarray(frame_rgb)
|
||||
current_hash = imagehash.phash(pil_image, hash_size=hash_size)
|
||||
|
||||
# Compute Hamming distance between hashes
|
||||
hamming_distance = previous_hash - current_hash
|
||||
|
||||
if hamming_distance > hamming_threshold:
|
||||
is_static = False
|
||||
break
|
||||
|
||||
# Update the previous hash
|
||||
previous_hash = current_hash
|
||||
|
||||
# Move to the next frame according to the sample rate
|
||||
current_frame_number += frame_sample_rate
|
||||
|
||||
cap.release()
|
||||
return is_static
|
||||
|
||||
|
||||
directory = 'videos'
|
||||
|
||||
files = get_files(directory)
|
||||
|
||||
for video_file in files:
|
||||
if video_file.endswith('.mp4'):
|
||||
if is_static_video_phash_optimized(video_file):
|
||||
print("The video is static: " + video_file)
|
||||
@ -0,0 +1,40 @@
|
||||
import config, os, json
|
||||
from PIL import Image
|
||||
import imagehash
|
||||
|
||||
def find_file(filename, directory):
|
||||
filename = filename.lower().split('.')[0]
|
||||
for root, dirs, files in os.walk(directory):
|
||||
for file in files:
|
||||
if filename in file:
|
||||
return os.path.join(root, file)
|
||||
return None
|
||||
|
||||
def generate_phash(image_path):
|
||||
image = Image.open(image_path)
|
||||
return str(imagehash.phash(image))
|
||||
|
||||
count = 0
|
||||
|
||||
cacheDir = 'sorted'
|
||||
dataPath = 'pins.json'
|
||||
|
||||
os.makedirs(cacheDir, exist_ok=True)
|
||||
|
||||
medias = json.load(open(dataPath))
|
||||
|
||||
for item in medias:
|
||||
count += 1
|
||||
|
||||
filepath = item['filepath']
|
||||
if os.path.exists(filepath):
|
||||
continue
|
||||
|
||||
newfilepath = find_file(os.path.basename(filepath), cacheDir)
|
||||
if newfilepath:
|
||||
print(f"Found file {newfilepath} for {filepath}")
|
||||
item['filepath'] = newfilepath
|
||||
|
||||
|
||||
with open(dataPath, 'w') as f:
|
||||
json.dump(medias, f)
|
||||
@ -0,0 +1,141 @@
|
||||
import requests, hashlib, os
|
||||
|
||||
access_key = "471cd2e1-a943-4c61-ae69ddc6c2c2-c36d-4737"
|
||||
video_library_id = 125094
|
||||
|
||||
def create_video(title):
|
||||
url = f"https://video.bunnycdn.com/library/{video_library_id}/videos"
|
||||
|
||||
payload = f"{{\"title\":\"{title}\"}}"
|
||||
headers = {
|
||||
"accept": "application/json",
|
||||
"content-type": "application/*+json",
|
||||
"AccessKey": access_key
|
||||
}
|
||||
|
||||
response = requests.post(url, data=payload, headers=headers)
|
||||
|
||||
return response
|
||||
|
||||
def generate_signature(library_id, api_key, expiration_time, video_id):
|
||||
signature = hashlib.sha256((library_id + api_key + str(expiration_time) + video_id).encode()).hexdigest()
|
||||
return signature
|
||||
|
||||
def upload_video_process(file_path, video_id):
|
||||
url = f"https://video.bunnycdn.com/library/{video_library_id}/videos/{video_id}"
|
||||
|
||||
headers = {"accept": "application/json","AccessKey": access_key}
|
||||
|
||||
with open(file_path, "rb") as file:
|
||||
file_data = file.read()
|
||||
|
||||
response = requests.put(url, headers=headers, data=file_data)
|
||||
|
||||
return response.status_code
|
||||
|
||||
def upload_video(file_path, title=None):
|
||||
video_item = create_video(title)
|
||||
if video_item.status_code != 200:
|
||||
return False
|
||||
|
||||
video_id = video_item.json()['guid']
|
||||
upload_video_process(file_path, video_id)
|
||||
|
||||
return {
|
||||
"embed_link": f"https://vz-58ca89f1-986.b-cdn.net/{video_id}/playlist.m3u8",
|
||||
"animated_thumbnail": f"https://vz-58ca89f1-986.b-cdn.net/{video_id}/preview.webp",
|
||||
"default_thumbnail": f"https://vz-58ca89f1-986.b-cdn.net/{video_id}/thumbnail.jpg",
|
||||
}
|
||||
|
||||
|
||||
def upload_video_recurbate(videoInfo):
|
||||
title = f"{videoInfo['username']} {videoInfo['platform']}"
|
||||
video_item = create_video(title)
|
||||
if video_item.status_code != 200:
|
||||
return False
|
||||
|
||||
video_id = video_item.json()['guid']
|
||||
upload_video_process(videoInfo['filename'], video_id)
|
||||
|
||||
videoInfo["embed_link"] = f"https://vz-58ca89f1-986.b-cdn.net/{video_id}/playlist.m3u8"
|
||||
videoInfo["animated_thumbnail"] = f"https://vz-58ca89f1-986.b-cdn.net/{video_id}/preview.webp"
|
||||
videoInfo["default_thumbnail"] = f"https://vz-58ca89f1-986.b-cdn.net/{video_id}/thumbnail.jpg"
|
||||
|
||||
return True
|
||||
|
||||
def delete_video(video_id):
|
||||
video_id = video_id.replace('https://vz-58ca89f1-986.b-cdn.net/', '').replace('/playlist.m3u8', '')
|
||||
|
||||
url = f"https://video.bunnycdn.com/library/{video_library_id}/videos/{video_id}"
|
||||
|
||||
headers = {"accept": "application/json","AccessKey": access_key}
|
||||
|
||||
response = requests.delete(url, headers=headers)
|
||||
|
||||
return response.status_code
|
||||
|
||||
def list_videos():
|
||||
url = f"https://video.bunnycdn.com/library/{video_library_id}/videos"
|
||||
|
||||
params = {
|
||||
"page": 1,
|
||||
"itemsPerPage": 1000,
|
||||
"orderBy": "date"
|
||||
}
|
||||
|
||||
headers = {"accept": "application/json","AccessKey": access_key}
|
||||
|
||||
videos = []
|
||||
while True:
|
||||
response = requests.get(url, headers=headers, params=params)
|
||||
|
||||
data = response.json()
|
||||
videos += data['items']
|
||||
|
||||
if len(videos) == data['totalItems']:
|
||||
return videos
|
||||
|
||||
params['page'] += 1
|
||||
|
||||
def get_heatmap(video_id):
|
||||
url = "https://video.bunnycdn.com/library/libraryId/videos/videoId/heatmap"
|
||||
url = url.replace('libraryId', str(video_library_id)).replace('videoId', str(video_id))
|
||||
|
||||
headers = {"accept": "application/json","AccessKey": access_key}
|
||||
|
||||
response = requests.get(url, headers=headers).json()
|
||||
|
||||
return response
|
||||
|
||||
def get_video(video_id):
|
||||
url = "https://video.bunnycdn.com/library/libraryId/videos/videoId"
|
||||
url = url.replace('libraryId', str(video_library_id)).replace('videoId', str(video_id))
|
||||
|
||||
headers = {"accept": "application/json","AccessKey": access_key}
|
||||
|
||||
response = requests.get(url, headers=headers).json()
|
||||
|
||||
return response
|
||||
|
||||
|
||||
def download_video(video_id, directory):
|
||||
download_url = f'https://storage.bunnycdn.com/vz-dd4ea005-7c2/{video_id}/'
|
||||
|
||||
params = {'download': '','accessKey': '5b1766f7-c1ab-463f-b05cce6f1f2e-1190-4c09'}
|
||||
|
||||
video_response = requests.get(download_url, params=params)
|
||||
|
||||
if video_response.status_code == 200:
|
||||
content_disposition = video_response.headers.get('Content-Disposition')
|
||||
if content_disposition:
|
||||
filename = content_disposition.split('filename=')[1].strip('"')
|
||||
ext = filename.split('.')[-1]
|
||||
|
||||
filename = f'{video_id}.{ext}'
|
||||
filePath = os.path.join(directory, filename)
|
||||
|
||||
with open(filePath, 'wb') as video_file:
|
||||
video_file.write(video_response.content)
|
||||
print(f'Video downloaded successfully as {filePath}')
|
||||
else:
|
||||
print('Failed to download video', video_response.status_code, video_response.text)
|
||||
@ -0,0 +1,23 @@
|
||||
import json
|
||||
|
||||
with open('bunny_data/missing_videos.json', 'r') as f:
|
||||
missing_videos = json.load(f)
|
||||
|
||||
with open('bunny_data/allVideos.json', 'r') as f:
|
||||
all_videos = json.load(f)
|
||||
|
||||
all_videos_guids = {video['guid'] for video in all_videos}
|
||||
|
||||
for video in missing_videos:
|
||||
if video['guid'] in all_videos_guids:
|
||||
video['imported'] = True
|
||||
|
||||
combined_data = {
|
||||
"missing_videos": missing_videos,
|
||||
"all_videos": all_videos
|
||||
}
|
||||
|
||||
with open('bunny_data/combined_videos.json', 'w') as f:
|
||||
json.dump(combined_data, f, indent=4)
|
||||
|
||||
print("Combined data has been written to bunny_data/combined_videos.json")
|
||||
@ -0,0 +1,16 @@
|
||||
import os, json
|
||||
|
||||
|
||||
pins = open('db_pins.json', 'r')
|
||||
pins = json.load(pins)
|
||||
|
||||
importedPins = open('db_pins_imported.json', 'r')
|
||||
importedPins = json.load(importedPins)
|
||||
|
||||
allPins = pins + importedPins
|
||||
print(len(allPins))
|
||||
|
||||
finalPins = open('allPins.json', 'r')
|
||||
finalPins = json.load(finalPins)
|
||||
|
||||
print(len(finalPins))
|
||||
@ -0,0 +1,110 @@
|
||||
from BunnyCDN.Storage import Storage
|
||||
import os, uuid, config, funcs, cv2
|
||||
from datetime import datetime
|
||||
from PIL import Image
|
||||
|
||||
def dump_facebook(folder_path):
|
||||
for filename in os.listdir(folder_path):
|
||||
if os.path.isdir(os.path.join(folder_path, filename)):
|
||||
continue
|
||||
|
||||
username = filename.split("'")[0]
|
||||
|
||||
filepath = os.path.join(folder_path, filename)
|
||||
|
||||
mediatype = funcs.get_media_type(filename)
|
||||
post_type = funcs.determine_post_type(filepath, mediatype)
|
||||
|
||||
upload_file(username=username, media_type=mediatype, filepath=filepath, post_type=post_type)
|
||||
|
||||
for folder in os.listdir(folder_path):
|
||||
if os.path.isdir(os.path.join(folder_path, folder)):
|
||||
username = folder
|
||||
|
||||
for filename in os.listdir(os.path.join(folder_path, folder)):
|
||||
filepath = os.path.join(folder_path, folder, filename)
|
||||
|
||||
mediatype = funcs.get_media_type(filename)
|
||||
post_type = funcs.determine_post_type(filepath, mediatype)
|
||||
|
||||
upload_file(username=username, media_type=mediatype, filepath=filepath, post_type=post_type)
|
||||
|
||||
def upload_file(filepath, username, media_type='image', post_type='story', timestamp=None, user_id=None):
|
||||
filename = os.path.basename(filepath)
|
||||
file_extension = os.path.splitext(filename)[1].lower()
|
||||
|
||||
file_hash = funcs.calculate_file_hash(filepath)
|
||||
|
||||
if file_hash in existing_files:
|
||||
print('Duplicate file detected. Removing...')
|
||||
os.remove(filepath)
|
||||
return False
|
||||
|
||||
duration = funcs.get_video_duration(filepath) if media_type == 'video' else 0
|
||||
|
||||
if "FB_IMG" in filename: media_id = filename.split("_")[2].split(".")[0]
|
||||
else: media_id = uuid.uuid4().hex
|
||||
|
||||
dirtype = funcs.determine_post_type(filepath, media_type)
|
||||
server_path = os.path.join('media', dirtype, username, f'{media_id}{file_extension}')
|
||||
|
||||
obj_storage.PutFile(filepath, server_path)
|
||||
|
||||
file_url = f"https://storysave.b-cdn.net/{server_path}"
|
||||
|
||||
if media_type == 'image':
|
||||
with Image.open(filepath) as img:
|
||||
width, height = img.size
|
||||
else:
|
||||
width, height = funcs.get_video_dimensions(filepath)
|
||||
|
||||
thumbnail_url = None
|
||||
if media_type == 'video':
|
||||
thumbPath = f'temp/{media_id}.jpg'
|
||||
cap = cv2.VideoCapture(filepath)
|
||||
ret, frame = cap.read()
|
||||
cv2.imwrite(thumbPath, frame)
|
||||
cap.release()
|
||||
obj_storage.PutFile(thumbPath, f'thumbnails/{media_id}.jpg')
|
||||
thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{media_id}.jpg"
|
||||
|
||||
post_date = datetime.fromtimestamp(int(timestamp)) if timestamp else datetime.now()
|
||||
|
||||
if post_type == 'stories':
|
||||
post_type = 'story'
|
||||
else:
|
||||
post_type = 'post'
|
||||
|
||||
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, post_type, date, user_id, platform, hash, filename, duration, thumbnail) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
|
||||
values = (username, media_type, file_url, width, height, post_type, post_date, user_id, 'facebook', file_hash, filename, duration, thumbnail_url)
|
||||
|
||||
try:
|
||||
newCursor.execute(query, values)
|
||||
newDB.commit()
|
||||
print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}')
|
||||
except Exception as e:
|
||||
print(f"Database error: {e}")
|
||||
return False
|
||||
|
||||
try:
|
||||
if newCursor.rowcount > 0:
|
||||
os.remove(filepath)
|
||||
except Exception as e:
|
||||
print(f"Failed to remove local file {filepath}: {e}")
|
||||
|
||||
return True
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
print('Starting processing...')
|
||||
|
||||
newDB, newCursor = config.gen_connection()
|
||||
|
||||
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
|
||||
|
||||
newCursor.execute("SELECT hash FROM media WHERE platform='facebook' AND hash IS NOT NULL")
|
||||
existing_files = [image[0] for image in newCursor.fetchall()]
|
||||
|
||||
dump_facebook('facebook/')
|
||||
|
||||
print("Processing completed.")
|
||||
@ -0,0 +1,67 @@
|
||||
from BunnyCDN.Storage import Storage
|
||||
import os, uuid, config, funcs
|
||||
from datetime import datetime
|
||||
from PIL import Image
|
||||
|
||||
def dump_facebook(folder_path):
|
||||
for folder in os.listdir(folder_path):
|
||||
if os.path.isdir(os.path.join(folder_path, folder)):
|
||||
username = folder
|
||||
|
||||
for filename in os.listdir(os.path.join(folder_path, folder)):
|
||||
filepath = os.path.join(folder_path, folder, filename)
|
||||
|
||||
upload_file(username=username, filepath=filepath)
|
||||
|
||||
def upload_file(filepath, username):
|
||||
filename = os.path.basename(filepath)
|
||||
media_id = filename.split('.')[0]
|
||||
|
||||
file_extension = os.path.splitext(filename)[1].lower()
|
||||
|
||||
media_type = funcs.get_media_type(filename)
|
||||
|
||||
file_hash = funcs.calculate_file_hash(filepath)
|
||||
|
||||
duration = funcs.get_video_duration(filepath) if media_type == 'video' else 0
|
||||
|
||||
width, height = funcs.get_video_dimensions(filepath) if media_type == 'video' else Image.open(filepath).size
|
||||
|
||||
|
||||
dirtype = funcs.determine_post_type(filepath, media_type)
|
||||
server_path = os.path.join('media', dirtype, username, f'{media_id}{file_extension}')
|
||||
|
||||
obj_storage.PutFile(filepath, server_path)
|
||||
|
||||
file_url = f"https://storysave.b-cdn.net/{server_path}"
|
||||
|
||||
if file_hash in existing_files:
|
||||
print('Duplicate file detected. Removing...')
|
||||
os.remove(filepath)
|
||||
return False
|
||||
|
||||
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, platform, hash, filename, duration, media_id) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
|
||||
values = (username, media_type, file_url, width, height, 'tiktok', file_hash, filename, duration, media_id)
|
||||
|
||||
newCursor.execute(query, values)
|
||||
newDB.commit()
|
||||
print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}')
|
||||
|
||||
if newCursor.rowcount > 0:
|
||||
os.remove(filepath)
|
||||
|
||||
return True
|
||||
|
||||
if __name__ == '__main__':
|
||||
print('Starting processing...')
|
||||
|
||||
newDB, newCursor = config.gen_connection()
|
||||
|
||||
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
|
||||
|
||||
newCursor.execute("SELECT hash FROM media WHERE platform='tiktok' AND hash IS NOT NULL")
|
||||
existing_files = [image[0] for image in newCursor.fetchall()]
|
||||
|
||||
dump_facebook('tiktok/')
|
||||
|
||||
print("Processing completed.")
|
||||
@ -0,0 +1,38 @@
|
||||
import os, json
|
||||
|
||||
def getMedia(filename, list):
|
||||
for item in list:
|
||||
if filename.split('.')[0] in item['filepath']:
|
||||
return item
|
||||
return None
|
||||
|
||||
|
||||
data = json.loads(open('oldpins.json').read())
|
||||
files = os.listdir('STORAGE')
|
||||
|
||||
count = 0
|
||||
for file in files:
|
||||
filepath = f'STORAGE/{file}'
|
||||
|
||||
if os.path.isdir(filepath):
|
||||
continue
|
||||
media = getMedia(file, data)
|
||||
if not media:
|
||||
continue
|
||||
|
||||
username = media['title']
|
||||
filetype = media['type']
|
||||
filetype = 'jpg' if filetype == 'image' else 'mp4'
|
||||
filename = media['filepath'].split('/')[-1] + '.' + filetype
|
||||
|
||||
output = os.path.join('STORAGE', username, filename)
|
||||
os.makedirs(os.path.dirname(output), exist_ok=True)
|
||||
if os.path.exists(output):
|
||||
os.remove(output)
|
||||
output = os.path.join('STORAGE', username, file)
|
||||
os.rename(filepath, output)
|
||||
|
||||
count += 1
|
||||
print(f'File: {file}')
|
||||
|
||||
print(f'Total: {count}')
|
||||
@ -0,0 +1,45 @@
|
||||
import funcs, json, os, config
|
||||
|
||||
db, newCursor = config.gen_connection()
|
||||
|
||||
newCursor.execute("SELECT hash FROM media")
|
||||
hashes = [hash[0] for hash in newCursor.fetchall()]
|
||||
|
||||
file = 'bunnyVideos.json'
|
||||
|
||||
data = json.loads(open(file).read())
|
||||
|
||||
for media in data:
|
||||
if media['imported'] == True:
|
||||
if os.path.exists(media['filepath']):
|
||||
print(f'File {media["filepath"]} does not exist. Skipping...')
|
||||
continue
|
||||
|
||||
|
||||
countImported = 0
|
||||
countSkipped = 0
|
||||
for media in data:
|
||||
filepath = os.path.join('STREAM_VIDEOS_IMPORTED', media['guid'] + '.mp4')
|
||||
if media['imported'] == True:
|
||||
countImported += 1
|
||||
print('File already imported. Skipping...')
|
||||
continue
|
||||
|
||||
countSkipped += 1
|
||||
|
||||
if not os.path.exists(filepath):
|
||||
print(f'File {filepath} does not exist. Skipping...')
|
||||
continue
|
||||
|
||||
hash = funcs.calculate_file_hash(filepath)
|
||||
|
||||
if '67caa15e-390c-4223-b7b9-4d7842f3b443' in filepath:
|
||||
print(f'File {filepath} does not exist. Skipping...')
|
||||
continue
|
||||
|
||||
if hash in hashes:
|
||||
print('Duplicate file detected. Removing...')
|
||||
|
||||
|
||||
print(f'Imported: {countImported}')
|
||||
print(f'Skipped: {countSkipped}')
|
||||
@ -0,0 +1,17 @@
|
||||
from funcs import get_files, generate_phash
|
||||
import os, config
|
||||
|
||||
|
||||
db, cursor = config.gen_connection()
|
||||
cursor.execute("SELECT phash FROM media WHERE phash IS NOT NULL;")
|
||||
phashes = [x[0] for x in cursor.fetchall()]
|
||||
|
||||
files = get_files('images')
|
||||
for item in files:
|
||||
phash = generate_phash(item)
|
||||
if phash in phashes:
|
||||
print(item)
|
||||
newpath = item.replace('images', 'duplicates')
|
||||
newdir = os.path.dirname(newpath)
|
||||
os.makedirs(newdir, exist_ok=True)
|
||||
os.rename(item, newpath)
|
||||
@ -0,0 +1,56 @@
|
||||
from BunnyCDN.Storage import Storage
|
||||
import os, config, requests
|
||||
from moviepy.editor import VideoFileClip
|
||||
|
||||
def get_media_type(filename):
|
||||
image_extensions = {".jpg", ".jpeg", ".png", ".gif", ".webp"}
|
||||
video_extensions = {".mp4", ".mov"}
|
||||
extension = os.path.splitext(filename.lower())[1]
|
||||
if extension in image_extensions:
|
||||
return 'image'
|
||||
elif extension in video_extensions:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
def determine_post_type(media_type):
|
||||
# Assuming the post type is directly based on media type.
|
||||
return media_type
|
||||
|
||||
def get_video_dimensions(filepath):
|
||||
with VideoFileClip(filepath) as clip:
|
||||
width, height = clip.size
|
||||
return width, height
|
||||
|
||||
def download_file(url):
|
||||
local_filename = url.split('/')[-1]
|
||||
# Note: Stream=True to avoid loading the whole file into memory
|
||||
with requests.get(url, stream=True) as r:
|
||||
r.raise_for_status()
|
||||
with open(local_filename, 'wb') as f:
|
||||
for chunk in r.iter_content(chunk_size=8192):
|
||||
f.write(chunk)
|
||||
return local_filename
|
||||
|
||||
if __name__ == '__main__':
|
||||
newDB, newCursor = config.gen_connection()
|
||||
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
|
||||
|
||||
posts = open('fucked', 'r')
|
||||
|
||||
for item in posts:
|
||||
username, url = item.strip().split('~')
|
||||
media_id = url.split('/')[-1].split('.')[0]
|
||||
media_type = get_media_type(url)
|
||||
|
||||
query = "INSERT IGNORE INTO media (username, media_type, platform, media_url) VALUES (%s, %s, %s, %s)"
|
||||
values = (username, media_type, 'facebook', url)
|
||||
|
||||
try:
|
||||
newCursor.execute(query, values)
|
||||
newDB.commit()
|
||||
print(f'[{newCursor.rowcount}] records updated.{url}')
|
||||
except Exception as e:
|
||||
print(f"Database error: {e}")
|
||||
|
||||
posts.close()
|
||||
@ -0,0 +1,94 @@
|
||||
from BunnyCDN.Storage import Storage
|
||||
from moviepy.editor import VideoFileClip
|
||||
import config
|
||||
import hashlib
|
||||
import requests
|
||||
import os
|
||||
|
||||
def file_hash_from_url(url, hash_algo='sha256'):
|
||||
h = hashlib.new(hash_algo)
|
||||
|
||||
response = requests.get(url, stream=True)
|
||||
|
||||
if response.status_code == 200:
|
||||
for chunk in response.iter_content(8192):
|
||||
h.update(chunk)
|
||||
return h.hexdigest()
|
||||
else:
|
||||
raise Exception(f"Failed to download file: Status code {response.status_code}")
|
||||
|
||||
def get_video_duration(file_path):
|
||||
"""
|
||||
Returns the duration of the video file in seconds.
|
||||
|
||||
:param file_path: Path to the video file
|
||||
:return: Duration in seconds
|
||||
"""
|
||||
try:
|
||||
with VideoFileClip(file_path) as video:
|
||||
return video.duration
|
||||
except:
|
||||
return 0
|
||||
|
||||
def file_hash(filename, hash_algo='sha256'):
|
||||
"""
|
||||
Compute the hash of a file.
|
||||
|
||||
:param filename: Path to the file.
|
||||
:param hash_algo: Hashing algorithm to use (e.g., 'sha256', 'md5').
|
||||
:return: Hexadecimal hash string.
|
||||
"""
|
||||
# Create a hash object
|
||||
h = hashlib.new(hash_algo)
|
||||
|
||||
# Open the file in binary mode and read in chunks
|
||||
with open(filename, 'rb') as file:
|
||||
while chunk := file.read(8192):
|
||||
h.update(chunk)
|
||||
|
||||
# Return the hexadecimal digest of the hash
|
||||
return h.hexdigest()
|
||||
|
||||
# the hash of the images are different due to optimizer
|
||||
|
||||
#obj_storage = Storage('577cb82d-8176-4ccf-935ce0a574bf-fe4c-4012', 'altpins')
|
||||
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
|
||||
|
||||
db, cursor = config.gen_connection()
|
||||
|
||||
cursor.execute("SELECT id, media_id, media_url FROM media WHERE duration = 0 AND media_type = 'video' AND status != 'deleted';")
|
||||
results = cursor.fetchall()
|
||||
|
||||
count = 0
|
||||
print(f"Found {len(results)} files to process.")
|
||||
|
||||
cacheDir = 'cache'
|
||||
for result in results:
|
||||
count += 1
|
||||
videoID = result[0]
|
||||
mediaID = result[1]
|
||||
mediaURL = result[2]
|
||||
extension = mediaURL.split('.')[-1]
|
||||
|
||||
serverPath = result[2].replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
|
||||
|
||||
localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))
|
||||
|
||||
if os.path.exists(localFilePath):
|
||||
print(f"File already exists: {localFilePath}")
|
||||
else:
|
||||
obj_storage.DownloadFile(storage_path=serverPath, download_path=cacheDir)
|
||||
|
||||
duration = get_video_duration(localFilePath)
|
||||
|
||||
if duration == 0:
|
||||
print(f"Failed to get duration for {localFilePath}")
|
||||
continue
|
||||
|
||||
if duration < 1:
|
||||
duration = 1
|
||||
|
||||
cursor.execute("UPDATE media SET duration = %s WHERE id = %s;", (duration, result[0]))
|
||||
db.commit()
|
||||
|
||||
print(f"[{count}/{len(results)}] {result[1]}: {duration}, {cursor.rowcount}")
|
||||
@ -0,0 +1,47 @@
|
||||
from BunnyCDN.Storage import Storage
|
||||
import config
|
||||
import hashlib
|
||||
import os
|
||||
|
||||
def file_hash(filename, hash_algo='sha256'):
|
||||
"""
|
||||
Compute the hash of a file.
|
||||
|
||||
:param filename: Path to the file.
|
||||
:param hash_algo: Hashing algorithm to use (e.g., 'sha256', 'md5').
|
||||
:return: Hexadecimal hash string.
|
||||
"""
|
||||
h = hashlib.new(hash_algo)
|
||||
|
||||
with open(filename, 'rb') as file:
|
||||
while chunk := file.read(8192):
|
||||
h.update(chunk)
|
||||
|
||||
return h.hexdigest()
|
||||
|
||||
|
||||
#obj_storage = Storage('577cb82d-8176-4ccf-935ce0a574bf-fe4c-4012', 'altpins')
|
||||
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
|
||||
|
||||
db, cursor = config.gen_connection()
|
||||
|
||||
cursor.execute("SELECT id, media_id, media_url FROM media WHERE hash IS NULL;")
|
||||
results = cursor.fetchall()
|
||||
|
||||
count = 0
|
||||
print(f"Found {len(results)} files to process.")
|
||||
|
||||
for result in results:
|
||||
count += 1
|
||||
serverPath = result[2].replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
|
||||
|
||||
localFilePath = os.path.join(os.getcwd(), 'temp', os.path.basename(serverPath))
|
||||
if not os.path.exists(localFilePath):
|
||||
obj_storage.DownloadFile(storage_path=serverPath, download_path=os.path.join(os.getcwd(), 'temp'))
|
||||
|
||||
filehash = file_hash(localFilePath)
|
||||
|
||||
cursor.execute("UPDATE media SET hash = %s WHERE id = %s;", (filehash, result[0]))
|
||||
db.commit()
|
||||
|
||||
print(f"[{count}/{len(results)}] {result[1]}: {filehash}, {cursor.rowcount}")
|
||||
@ -0,0 +1,41 @@
|
||||
import config, os
|
||||
from PIL import Image
|
||||
import imagehash
|
||||
|
||||
def generate_phash(image_path):
|
||||
image = Image.open(image_path)
|
||||
return str(imagehash.phash(image))
|
||||
|
||||
db, cursor = config.gen_connection()
|
||||
|
||||
cursor.execute("SELECT id, media_id, media_url FROM media WHERE media_type = 'image' AND phash IS NULL;")
|
||||
results = cursor.fetchall()
|
||||
|
||||
count = 0
|
||||
cacheDir = 'cache'
|
||||
os.makedirs(cacheDir, exist_ok=True)
|
||||
print(f"Found {len(results)} files to process.")
|
||||
|
||||
|
||||
for result in results:
|
||||
count += 1
|
||||
itemID = result[0]
|
||||
mediaID = result[1]
|
||||
mediaURL = result[2]
|
||||
|
||||
serverPath = mediaURL.replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
|
||||
localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))
|
||||
|
||||
if not os.path.exists(localFilePath):
|
||||
print(f"File {localFilePath} does not exist, skipping.")
|
||||
continue
|
||||
|
||||
try:
|
||||
phash = generate_phash(localFilePath)
|
||||
|
||||
cursor.execute("UPDATE media SET phash = %s WHERE id = %s", (phash, itemID))
|
||||
db.commit()
|
||||
|
||||
print(f"Processed {count}/{len(results)}: {mediaID} with pHash {phash}")
|
||||
except Exception as e:
|
||||
print(f"Error processing {mediaID}: {e}")
|
||||
@ -0,0 +1,47 @@
|
||||
from BunnyCDN.Storage import Storage
|
||||
import config, os, funcs
|
||||
from PIL import Image
|
||||
|
||||
# the hash of the images are different due to optimizer
|
||||
|
||||
#obj_storage = Storage('577cb82d-8176-4ccf-935ce0a574bf-fe4c-4012', 'altpins')
|
||||
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
|
||||
|
||||
db, cursor = config.gen_connection()
|
||||
|
||||
cursor.execute("SELECT id, media_id, media_url FROM media WHERE width = 0;")
|
||||
results = cursor.fetchall()
|
||||
|
||||
count = 0
|
||||
print(f"Found {len(results)} files to process.")
|
||||
|
||||
cacheDir = 'cache'
|
||||
for result in results:
|
||||
count += 1
|
||||
videoID = result[0]
|
||||
mediaID = result[1]
|
||||
mediaURL = result[2]
|
||||
extension = mediaURL.split('.')[-1]
|
||||
|
||||
serverPath = result[2].replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
|
||||
|
||||
localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))
|
||||
|
||||
if os.path.exists(localFilePath):
|
||||
print(f"File already exists: {localFilePath}")
|
||||
else:
|
||||
obj_storage.DownloadFile(storage_path=serverPath, download_path=cacheDir)
|
||||
|
||||
mediaType = funcs.get_media_type(localFilePath)
|
||||
|
||||
if mediaType == 'image':
|
||||
with Image.open(localFilePath) as img:
|
||||
width, height = img.size
|
||||
elif mediaType == 'video':
|
||||
width, height = funcs.get_video_dimensions(localFilePath)
|
||||
|
||||
|
||||
cursor.execute("UPDATE media SET width = %s, height=%s WHERE id = %s;", (width, height, videoID))
|
||||
db.commit()
|
||||
|
||||
print(f"[{count}/{len(results)}] width: {width}, height: {height} {cursor.rowcount}")
|
||||
@ -0,0 +1,63 @@
|
||||
from BunnyCDN.Storage import Storage
|
||||
import config, os, cv2
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
# this script will take a screenshot of the first frame of each video and upload it as a thumbnail to BunnyCDN
|
||||
|
||||
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
|
||||
|
||||
db, cursor = config.gen_connection()
|
||||
|
||||
cursor.execute("SELECT id, media_id, media_url FROM media WHERE media_type = 'video' AND thumbnail IS NULL and status = 'public';")
|
||||
results = cursor.fetchall()
|
||||
|
||||
count = 0
|
||||
print(f"Found {len(results)} files to process.")
|
||||
|
||||
cacheDir = 'cache'
|
||||
|
||||
def DownloadFile(serverPath, cacheDir):
|
||||
localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))
|
||||
|
||||
if os.path.exists(localFilePath):
|
||||
print(f"File already exists: {localFilePath}")
|
||||
return localFilePath
|
||||
|
||||
obj_storage.DownloadFile(storage_path=serverPath, download_path=cacheDir)
|
||||
print(f"Downloaded {serverPath} to {localFilePath}")
|
||||
return localFilePath
|
||||
|
||||
def ImportMedias():
|
||||
with ThreadPoolExecutor(max_workers=10) as executor:
|
||||
for video in results:
|
||||
serverPath = video[2].replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
|
||||
executor.submit(DownloadFile, serverPath, cacheDir)
|
||||
|
||||
|
||||
for result in results:
|
||||
count += 1
|
||||
itemID = result[0]
|
||||
mediaID = result[1]
|
||||
mediaURL = result[2]
|
||||
extension = mediaURL.split('.')[-1]
|
||||
|
||||
serverPath = result[2].replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
|
||||
|
||||
localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))
|
||||
|
||||
filePath = DownloadFile(serverPath, cacheDir)
|
||||
|
||||
cap = cv2.VideoCapture(localFilePath)
|
||||
ret, frame = cap.read()
|
||||
cv2.imwrite('thumbnail.jpg', frame)
|
||||
cap.release()
|
||||
|
||||
thumbnailURL = f"https://storysave.b-cdn.net/thumbnails/{itemID}.jpg"
|
||||
|
||||
obj_storage.PutFile('thumbnail.jpg', f'thumbnails/{itemID}.jpg')
|
||||
|
||||
|
||||
cursor.execute("UPDATE media SET thumbnail = %s WHERE id = %s;", (thumbnailURL, itemID))
|
||||
db.commit()
|
||||
|
||||
print(f"[{count}/{len(results)}] thumbnail: {thumbnailURL} {cursor.rowcount}")
|
||||
@ -0,0 +1,35 @@
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from BunnyCDN.Storage import Storage
|
||||
import config, os
|
||||
|
||||
def DownloadFile(serverPath, cacheDir):
|
||||
localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))
|
||||
|
||||
if os.path.exists(localFilePath):
|
||||
print(f"File already exists: {localFilePath}")
|
||||
return localFilePath
|
||||
|
||||
obj_storage.DownloadFile(storage_path=serverPath, download_path=cacheDir)
|
||||
print(f"Downloaded {serverPath} to {localFilePath}")
|
||||
return localFilePath
|
||||
|
||||
def ImportMedias(results):
|
||||
with ThreadPoolExecutor(max_workers=10) as executor:
|
||||
for video in results:
|
||||
serverPath = video[2].replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
|
||||
executor.submit(DownloadFile, serverPath, cacheDir)
|
||||
|
||||
|
||||
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
|
||||
|
||||
db, cursor = config.gen_connection()
|
||||
|
||||
cursor.execute("SELECT id, media_id, media_url FROM media WHERE media_type = 'image' AND phash IS NULL;")
|
||||
results = cursor.fetchall()
|
||||
|
||||
|
||||
count = 0
|
||||
cacheDir = 'cache'
|
||||
print(f"Found {len(results)} files to process.")
|
||||
|
||||
ImportMedias(results)
|
||||
@ -0,0 +1,24 @@
|
||||
import os, json
|
||||
from funcs import generate_phash
|
||||
|
||||
count = 0
|
||||
cacheDir = 'cache'
|
||||
dataPath = 'pins.json'
|
||||
|
||||
os.makedirs(cacheDir, exist_ok=True)
|
||||
|
||||
medias = json.load(open(dataPath))
|
||||
|
||||
for item in medias:
|
||||
count += 1
|
||||
if item['type'] == 'image':
|
||||
filepath = item['filepath']
|
||||
if not os.path.exists(filepath):
|
||||
print(f"File {filepath} does not exist, skipping.")
|
||||
continue
|
||||
phash = generate_phash(filepath)
|
||||
item['phash'] = phash
|
||||
print(f"Processed {count}/{len(medias)}: with pHash {phash}")
|
||||
|
||||
with open(dataPath, 'w') as f:
|
||||
json.dump(medias, f)
|
||||
@ -0,0 +1,33 @@
|
||||
import config
|
||||
from funcs import generate_phash
|
||||
|
||||
count = 0
|
||||
|
||||
storage = config.get_storage()
|
||||
|
||||
db, cursor = config.gen_connection()
|
||||
|
||||
cursor.execute("SELECT id, media_url FROM media WHERE media_type = %s AND phash IS NULL;", ['image'])
|
||||
medias = cursor.fetchall()
|
||||
|
||||
for item in medias:
|
||||
count += 1
|
||||
|
||||
itemID = item[0]
|
||||
media_url = item[1]
|
||||
|
||||
server_path = media_url.replace('https://storysave.b-cdn.net/', '').replace('\\', '/')
|
||||
filepath = storage.DownloadFile(server_path, 'temp')
|
||||
if not filepath:
|
||||
print(f"Error downloading {server_path}")
|
||||
continue
|
||||
|
||||
phash = generate_phash(filepath)
|
||||
if not phash:
|
||||
print(f"Error generating pHash for {filepath}")
|
||||
continue
|
||||
|
||||
cursor.execute("UPDATE media SET phash = %s WHERE id = %s", [phash, itemID])
|
||||
db.commit()
|
||||
|
||||
print(f"[{cursor.rowcount}] Processed {count}/{len(medias)}: with pHash {phash}")
|
||||
@ -0,0 +1,33 @@
|
||||
import config
|
||||
from funcs import generate_phash
|
||||
|
||||
count = 0
|
||||
|
||||
storage = config.get_storage()
|
||||
|
||||
db, cursor = config.gen_connection()
|
||||
|
||||
cursor.execute("SELECT id, thumbnail FROM media WHERE media_type = %s AND phash IS NULL AND thumbnail IS NOT NULL;", ['video'])
|
||||
medias = cursor.fetchall()
|
||||
|
||||
for item in medias:
|
||||
count += 1
|
||||
|
||||
itemID = item[0]
|
||||
media_url = item[1]
|
||||
|
||||
server_path = media_url.replace('https://storysave.b-cdn.net/', '').replace('\\', '/')
|
||||
filepath = storage.DownloadFile(server_path, 'temp')
|
||||
if not filepath:
|
||||
print(f"Error downloading {server_path}")
|
||||
continue
|
||||
|
||||
phash = generate_phash(filepath)
|
||||
if not phash:
|
||||
print(f"Error generating pHash for {filepath}")
|
||||
continue
|
||||
|
||||
cursor.execute("UPDATE media SET phash = %s WHERE id = %s", [phash, itemID])
|
||||
db.commit()
|
||||
|
||||
print(f"[{cursor.rowcount}] Processed {count}/{len(medias)}: with pHash {phash}")
|
||||
@ -0,0 +1,24 @@
|
||||
import config
|
||||
|
||||
altpins_db, altpins_cursor = config.altpins_gen_connection()
|
||||
db, cursor = config.gen_connection()
|
||||
|
||||
altpins_cursor.execute("SELECT id, title, hash, url FROM pins WHERE hash IS NOT NULL;")
|
||||
altpins_results = { (row[1], row[2]): (row[0], row[3]) for row in altpins_cursor.fetchall() }
|
||||
|
||||
cursor.execute("SELECT id, username, hash, media_url FROM media WHERE hash IS NOT NULL;")
|
||||
media_results = { (row[1], row[2]): (row[0], row[3]) for row in cursor.fetchall() }
|
||||
|
||||
common_items = set(altpins_results.keys()) & set(media_results.keys())
|
||||
|
||||
for title, hash_value in common_items:
|
||||
altpins_id, altpins_url = altpins_results[(title, hash_value)]
|
||||
media_id, media_url = media_results[(title, hash_value)]
|
||||
|
||||
print(f"Found a match for hash {hash_value} with title {title}")
|
||||
print(f"Altpins URL: {altpins_url}")
|
||||
print(f"Media URL: {media_url}")
|
||||
|
||||
altpins_cursor.execute("DELETE FROM pins WHERE id = %s;", [altpins_id])
|
||||
altpins_db.commit()
|
||||
print(f"Deleted pin {altpins_id}. {altpins_cursor.rowcount} rows affected")
|
||||
@ -0,0 +1,33 @@
|
||||
import bunny, json
|
||||
|
||||
medias = json.load(open('videos.json', 'r'))
|
||||
videoIDS = [media['url'].split('/')[-1] for media in medias]
|
||||
|
||||
videos = bunny.list_videos()
|
||||
|
||||
with open('allVideos.json', 'w') as f:
|
||||
json.dump(videos, f, indent=4)
|
||||
|
||||
missingVideos = []
|
||||
for video in videos:
|
||||
if video['guid'] in videoIDS:
|
||||
continue
|
||||
missingVideos.append(video)
|
||||
|
||||
datas = []
|
||||
for video in missingVideos:
|
||||
data = {
|
||||
'guid': video['guid'],
|
||||
'title': video['title'],
|
||||
'length': video['length'],
|
||||
'width': video['width'],
|
||||
'height': video['height'],
|
||||
'availableResolutions': video['availableResolutions'],
|
||||
'storageSize': video['storageSize'],
|
||||
'hasMP4Fallback': video['hasMP4Fallback'],
|
||||
'category': video['category'],
|
||||
}
|
||||
datas.append(data)
|
||||
|
||||
with open('missing_videos.json', 'w') as f:
|
||||
json.dump(datas, f, indent=4)
|
||||
@ -0,0 +1,27 @@
|
||||
from BunnyCDN.Storage import Storage
|
||||
import os, json
|
||||
|
||||
altpins_obj_storage = Storage('577cb82d-8176-4ccf-935ce0a574bf-fe4c-4012', 'altpins')
|
||||
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
|
||||
|
||||
medias = json.load(open('db_pins.json', 'r'))
|
||||
|
||||
count = 0
|
||||
print(f"Found {len(medias)} files to process.")
|
||||
|
||||
cacheDir = 'old_altpins_cache'
|
||||
for media in medias:
|
||||
count += 1
|
||||
username = media['title']
|
||||
mediaID = media['photo_id']
|
||||
mediaURL = media['url']
|
||||
extension = mediaURL.split('.')[-1]
|
||||
|
||||
serverPath = mediaURL.replace("https://altpins.b-cdn.net/", '').replace('//', '/').replace('\\', '/').replace('https://altpins.b-cdn.net/', '')
|
||||
localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))
|
||||
|
||||
if os.path.exists(localFilePath):
|
||||
continue
|
||||
|
||||
altpins_obj_storage.DownloadFile(storage_path=serverPath, download_path=cacheDir)
|
||||
print(f"Downloaded {count}/{len(medias)}: {localFilePath}")
|
||||
@ -0,0 +1,16 @@
|
||||
import json, bunny, os
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
medias = json.load(open('missing_videos.json', 'r'))
|
||||
#videoIDS = [media['url'].split('/')[-1] for media in medias]
|
||||
videoIDS = [media['guid'] for media in medias]
|
||||
|
||||
with ThreadPoolExecutor(max_workers=10) as executor:
|
||||
for id in videoIDS:
|
||||
filePath = f"MISSING_STREAM_VIDEOS/{id}.zip"
|
||||
|
||||
if os.path.exists(filePath):
|
||||
print(f'Video already exists as {filePath}. Skipping...')
|
||||
continue
|
||||
|
||||
executor.submit(bunny.download_video, id)
|
||||
@ -0,0 +1,29 @@
|
||||
import os, json, config
|
||||
|
||||
# Load the data
|
||||
pins = json.load(open('db_pins.json', 'r'))
|
||||
files = os.listdir('STORAGE_IMPORTED/')
|
||||
|
||||
db, cursor = config.gen_connection()
|
||||
|
||||
cursor.execute('SELECT hash FROM media WHERE hash IS NOT NULL;')
|
||||
existing_hashes = [hash[0] for hash in cursor.fetchall()]
|
||||
|
||||
for pin in pins:
|
||||
if pin['hash'] in existing_hashes:
|
||||
print(f"Found {pin['hash']} in the imported folder.")
|
||||
pins.remove(pin)
|
||||
|
||||
alreadyImported = []
|
||||
for pin in pins:
|
||||
filepath = pin['filepath']
|
||||
username = pin['title']
|
||||
filename = os.path.basename(filepath)
|
||||
|
||||
if filename in files:
|
||||
print(f"Found {filename} in the imported folder.")
|
||||
alreadyImported.append(pins.pop(pins.index(pin)))
|
||||
|
||||
# Save to the file
|
||||
json.dump(pins, open('db_pins.json', 'w'))
|
||||
json.dump(alreadyImported, open('db_pins_imported.json', 'w'))
|
||||
@ -0,0 +1,14 @@
|
||||
import os, json, bunny
|
||||
|
||||
medias = json.load(open('allVideos.json', 'r'))
|
||||
mp4Medias = [media for media in medias if media['hasMP4Fallback'] == True]
|
||||
|
||||
missing = json.load(open('missing_videos.json', 'r'))
|
||||
|
||||
count = 0
|
||||
cacheDir = 'old_mp4fallback_cache'
|
||||
print(f"Found {len(medias)} files to process.")
|
||||
for media in mp4Medias:
|
||||
count += 1
|
||||
filePath = os.path.join(cacheDir, media['guid'] + '.mp4')
|
||||
|
||||
@ -0,0 +1,36 @@
|
||||
import os, json, bunny, config
|
||||
|
||||
db, cursor = config.gen_connection()
|
||||
|
||||
cursor.execute('SELECT media_id FROM media WHERE media_id IS NOT NULL;')
|
||||
mediaIDS = cursor.fetchall()
|
||||
|
||||
|
||||
|
||||
pins = json.load(open('pins.json', 'r'))
|
||||
|
||||
videos = json.load(open('db_videos.json', 'r'))
|
||||
pins = json.load(open('db_pins.json', 'r'))
|
||||
ids = [video['id'] for video in videos]
|
||||
|
||||
for pin in pins:
|
||||
if pin['id'] in ids:
|
||||
pins.remove(pin)
|
||||
|
||||
# save to the file
|
||||
json.dump(pins, open('db_pins.json', 'w'))
|
||||
|
||||
|
||||
medias = json.load(open('allVideos.json', 'r'))
|
||||
mp4Medias = [media for media in medias if media['hasMP4Fallback'] == True]
|
||||
|
||||
missing = json.load(open('missing_videos.json', 'r'))
|
||||
|
||||
count = 0
|
||||
cacheDir = 'old_mp4fallback_cache'
|
||||
print(f"Found {len(medias)} files to process.")
|
||||
for media in mp4Medias:
|
||||
count += 1
|
||||
filePath = os.path.join(cacheDir, media['guid'] + '.mp4')
|
||||
|
||||
|
||||
@ -0,0 +1,53 @@
|
||||
import os, json, funcs
|
||||
|
||||
STORAGE_IMPORTED = 'STORAGE_IMPORTED'
|
||||
pins = json.load(open('db_pins.json', 'r'))
|
||||
|
||||
for pin in pins:
|
||||
filename = pin['url'].split('/')[-1]
|
||||
filepath = os.path.join(STORAGE_IMPORTED, filename)
|
||||
pin['filename'] = filename
|
||||
if not pin['hash']:
|
||||
pin['hash'] = funcs.calculate_file_hash(filepath)
|
||||
|
||||
json.dump(pins, open('db_pins.json', 'w'), indent=4)
|
||||
|
||||
files = os.listdir(STORAGE_IMPORTED)
|
||||
|
||||
for file in files:
|
||||
filepath = os.path.join(STORAGE_IMPORTED, file)
|
||||
fileHash = funcs.calculate_file_hash(filepath)
|
||||
if fileHash not in file:
|
||||
print(f'Renaming {file} to {fileHash}')
|
||||
os.rename(filepath, os.path.join(STORAGE_IMPORTED, fileHash))
|
||||
|
||||
pins_by_username = {}
|
||||
for pin in pins:
|
||||
username = pin['title']
|
||||
if username not in pins_by_username:
|
||||
pins_by_username[username] = []
|
||||
pins_by_username[username].append(pin)
|
||||
|
||||
for username, username_pins in pins_by_username.items():
|
||||
username_folder = os.path.join(STORAGE_IMPORTED, username)
|
||||
os.makedirs(username_folder, exist_ok=True)
|
||||
for pin in username_pins:
|
||||
photo_id = pin['photo_id']
|
||||
photo_url = pin['url']
|
||||
fileHash = pin['hash']
|
||||
|
||||
if not fileHash:
|
||||
continue
|
||||
|
||||
extension = photo_url.split('.')[-1]
|
||||
filename = f'{fileHash}.{extension}'
|
||||
|
||||
filePath = os.path.join(STORAGE_IMPORTED, filename)
|
||||
outputPath = os.path.join(STORAGE_IMPORTED, username, filename)
|
||||
|
||||
if os.path.exists(outputPath):
|
||||
print(f'File {outputPath} already exists. Skipping...')
|
||||
continue
|
||||
|
||||
print(f'Moving {photo_url} to {outputPath}')
|
||||
os.rename(filePath, outputPath)
|
||||
@ -0,0 +1,27 @@
|
||||
import os, json
|
||||
|
||||
|
||||
|
||||
folderPath = 'STREAM_IMPORTED'
|
||||
jsonFile = 'bunnyVideos.json'
|
||||
|
||||
data = json.load(open(jsonFile))
|
||||
|
||||
for item in data:
|
||||
username = item['title']
|
||||
filepath = os.path.join(folderPath, item['guid'] + '.mp4')
|
||||
|
||||
if username in filepath:
|
||||
continue
|
||||
|
||||
username = item['title']
|
||||
output = os.path.join(folderPath, username, os.path.basename(filepath))
|
||||
os.makedirs(os.path.dirname(output), exist_ok=True)
|
||||
if os.path.exists(filepath):
|
||||
os.rename(filepath, output)
|
||||
item['filepath'] = output
|
||||
|
||||
|
||||
# save to fiel
|
||||
with open(jsonFile, 'w') as f:
|
||||
json.dump(data, f, indent=4)
|
||||
@ -0,0 +1,49 @@
|
||||
import json, os
|
||||
from videohash import VideoHash
|
||||
from moviepy.editor import VideoFileClip
|
||||
|
||||
def is_valid_video(file_path):
|
||||
try:
|
||||
with VideoFileClip(file_path) as video:
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"Invalid video {file_path}: {str(e)}")
|
||||
return False
|
||||
|
||||
def load_hashes(file_path):
|
||||
try:
|
||||
with open(file_path, 'r') as file:
|
||||
return json.load(file)
|
||||
except FileNotFoundError:
|
||||
return {}
|
||||
|
||||
def save_hashes(hashes, file_path):
|
||||
with open(file_path, 'w') as file:
|
||||
json.dump(hashes, file, indent=4)
|
||||
|
||||
hashes = load_hashes('video_hashes.json')
|
||||
video_directory = 'STORAGE'
|
||||
|
||||
for username in os.listdir(video_directory):
|
||||
user_dir = os.path.join(video_directory, username)
|
||||
if not os.path.isdir(user_dir):
|
||||
continue
|
||||
|
||||
for video_file in os.listdir(user_dir):
|
||||
video_path = os.path.join(user_dir, video_file)
|
||||
if not video_file.endswith(('.mp4', '.mkv', '.avi')) or not is_valid_video(video_path):
|
||||
continue
|
||||
|
||||
if username in hashes and any(v[0] == video_file for v in hashes[username]):
|
||||
continue
|
||||
|
||||
try:
|
||||
video_hash = VideoHash(path=video_path)
|
||||
if username in hashes:
|
||||
hashes[username].append((video_file, video_hash.hash))
|
||||
else:
|
||||
hashes[username] = [(video_file, video_hash.hash)]
|
||||
except Exception as e:
|
||||
print(f"Error processing {video_file}: {str(e)}")
|
||||
|
||||
save_hashes(hashes, 'video_hashes.json')
|
||||
@ -0,0 +1,44 @@
|
||||
from moviepy.editor import VideoFileClip
|
||||
import json
|
||||
|
||||
def is_valid_video(file_path):
|
||||
try:
|
||||
with VideoFileClip(file_path) as video:
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"Invalid video {file_path}: {str(e)}")
|
||||
return False
|
||||
|
||||
def load_hashes(file_path):
|
||||
try:
|
||||
with open(file_path, 'r') as file:
|
||||
return json.load(file)
|
||||
except FileNotFoundError:
|
||||
return {}
|
||||
|
||||
def save_hashes(hashes, file_path):
|
||||
with open(file_path, 'w') as file:
|
||||
json.dump(hashes, file, indent=4)
|
||||
|
||||
def find_duplicates(video_hashes):
|
||||
hash_map = {}
|
||||
for video, v_hash in video_hashes:
|
||||
if v_hash in hash_map:
|
||||
hash_map[v_hash].append(video)
|
||||
else:
|
||||
hash_map[v_hash] = [video]
|
||||
|
||||
duplicates = {h: vids for h, vids in hash_map.items() if len(vids) > 1}
|
||||
return duplicates
|
||||
|
||||
hashes = load_hashes('video_hashes.json')
|
||||
for username, user_hashes in hashes.items():
|
||||
print(f"Checking for duplicates in '{username}' videos:")
|
||||
duplicates = find_duplicates(user_hashes)
|
||||
if duplicates:
|
||||
for dup_hash, dup_videos in duplicates.items():
|
||||
print(f"Duplicate hash: {dup_hash}")
|
||||
for vid in dup_videos:
|
||||
print(f" - {vid}")
|
||||
else:
|
||||
print("No duplicates found.")
|
||||
@ -0,0 +1,48 @@
|
||||
from videohash import VideoHash
|
||||
import os
|
||||
|
||||
# Directory containing videos grouped by username
|
||||
video_directory = '/path/to/videos'
|
||||
hashes = {}
|
||||
|
||||
for username in os.listdir(video_directory):
|
||||
user_dir = os.path.join(video_directory, username)
|
||||
if os.path.isdir(user_dir):
|
||||
for video_file in os.listdir(user_dir):
|
||||
if video_file.endswith(('.mp4', '.mkv', '.avi')): # Ensure it's a video file
|
||||
video_path = os.path.join(user_dir, video_file)
|
||||
try:
|
||||
# Calculate the hash for each video
|
||||
video_hash = VideoHash(path=video_path)
|
||||
print(f"Hash for {video_file}: {video_hash.hash}")
|
||||
|
||||
# Store hashes in a dictionary
|
||||
if username in hashes:
|
||||
hashes[username].append((video_file, video_hash.hash))
|
||||
else:
|
||||
hashes[username] = [(video_file, video_hash.hash)]
|
||||
except Exception as e:
|
||||
print(f"Error processing {video_file}: {str(e)}")
|
||||
|
||||
def find_duplicates(hashes):
|
||||
duplicate_videos = []
|
||||
all_hashes = [(user, video, hsh) for user, videos in hashes.items() for video, hsh in videos]
|
||||
hash_dict = {}
|
||||
|
||||
for user, video, hsh in all_hashes:
|
||||
if hsh in hash_dict:
|
||||
hash_dict[hsh].append((user, video))
|
||||
else:
|
||||
hash_dict[hsh] = [(user, video)]
|
||||
|
||||
for videos in hash_dict.values():
|
||||
if len(videos) > 1:
|
||||
duplicate_videos.append(videos)
|
||||
|
||||
return duplicate_videos
|
||||
|
||||
duplicates = find_duplicates(hashes)
|
||||
for duplicate in duplicates:
|
||||
print("Duplicate videos found:")
|
||||
for video_info in duplicate:
|
||||
print(f"User: {video_info[0]}, Video: {video_info[1]}")
|
||||
@ -0,0 +1,49 @@
|
||||
import os, json
|
||||
|
||||
def get_file_type(filepath):
|
||||
if filepath.endswith('.jpg') or filepath.endswith('.png'):
|
||||
return 'image'
|
||||
elif filepath.endswith('.mp4'):
|
||||
return 'video'
|
||||
else:
|
||||
return None
|
||||
|
||||
def get_files(directory):
|
||||
files = []
|
||||
for root, dirs, filenames in os.walk(directory):
|
||||
for filename in filenames:
|
||||
files.append(os.path.join(root, filename))
|
||||
return files
|
||||
|
||||
files = get_files('STORAGE/')
|
||||
os.makedirs('images', exist_ok=True)
|
||||
os.makedirs('videos', exist_ok=True)
|
||||
|
||||
for filepath in files:
|
||||
if not os.path.exists(filepath):
|
||||
print(f"File {filepath} does not exist, skipping.")
|
||||
continue
|
||||
|
||||
# Extract the username from the filepath assuming the structure STORAGE/{username}/{filename}
|
||||
filepath = filepath.replace('\\', '/') # Replace backslashes with forward slashes
|
||||
parts = filepath.split('/') # Split the path by the system's separator
|
||||
if len(parts) < 3 or parts[0] != 'STORAGE': # Check if the structure is valid
|
||||
print(f"Unexpected filepath format: {filepath}")
|
||||
continue
|
||||
|
||||
username = parts[1] # Extract the username from the second part
|
||||
fileType = get_file_type(filepath) # Determine the type of the file
|
||||
if not fileType:
|
||||
print(f"Unknown file type for {filepath}")
|
||||
continue
|
||||
|
||||
if fileType == 'image':
|
||||
newpath = os.path.join('images', username, os.path.basename(filepath))
|
||||
elif fileType == 'video':
|
||||
newpath = os.path.join('videos', username, os.path.basename(filepath))
|
||||
else:
|
||||
print(f"Unknown media type {fileType} for {filepath}")
|
||||
continue
|
||||
|
||||
os.makedirs(os.path.dirname(newpath), exist_ok=True) # Create directory structure if it doesn't exist
|
||||
os.rename(filepath, newpath) # Move the file to the new location
|
||||
@ -0,0 +1,34 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Image Gallery</title>
|
||||
<style>
|
||||
.gallery {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
.gallery img {
|
||||
margin: 10px;
|
||||
max-width: 200px;
|
||||
height: auto;
|
||||
}
|
||||
.gallery div {
|
||||
text-align: center;
|
||||
margin: 10px;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Image Gallery</h1>
|
||||
<div class="gallery">
|
||||
{% for image in images %}
|
||||
<div>
|
||||
<h3>{{ image['username'] }}</h3>
|
||||
<img src="{{ image['media_url'] }}" alt="Image for {{ image['username'] }}">
|
||||
</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
@ -0,0 +1,84 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Media Gallery</title>
|
||||
<style>
|
||||
body {
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
}
|
||||
.container {
|
||||
max-width: 1600px;
|
||||
width: 100%;
|
||||
padding: 20px;
|
||||
}
|
||||
.media-container {
|
||||
column-count: 4;
|
||||
column-gap: 10px;
|
||||
}
|
||||
.media-item {
|
||||
break-inside: avoid;
|
||||
margin-bottom: 10px;
|
||||
}
|
||||
img, video {
|
||||
width: 100%;
|
||||
height: auto;
|
||||
display: block;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<h1>Media Gallery</h1>
|
||||
<div class="media-container" id="media-container"></div>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
let page = 0;
|
||||
|
||||
async function loadMore() {
|
||||
const response = await fetch(`/load-more?page=${page}`);
|
||||
const mediaFiles = await response.json();
|
||||
const container = document.getElementById('media-container');
|
||||
|
||||
mediaFiles.forEach(file => {
|
||||
const mediaItem = document.createElement('div');
|
||||
mediaItem.className = 'media-item';
|
||||
|
||||
if (file.endsWith('.png') || file.endsWith('.jpg') || file.endsWith('.jpeg') || file.endsWith('.gif')) {
|
||||
const img = document.createElement('img');
|
||||
img.src = `/media/${file}`;
|
||||
img.alt = file;
|
||||
mediaItem.appendChild(img);
|
||||
} else if (file.endsWith('.mp4') || file.endsWith('.mkv') || file.endsWith('.mov')) {
|
||||
const video = document.createElement('video');
|
||||
video.controls = false;
|
||||
video.autoplay = true;
|
||||
video.muted = true;
|
||||
video.loop = true;
|
||||
const source = document.createElement('source');
|
||||
source.src = `/media/${file}`;
|
||||
source.type = 'video/mp4';
|
||||
video.appendChild(source);
|
||||
mediaItem.appendChild(video);
|
||||
}
|
||||
|
||||
container.appendChild(mediaItem);
|
||||
});
|
||||
|
||||
page += 1;
|
||||
}
|
||||
|
||||
window.addEventListener('scroll', () => {
|
||||
if (window.innerHeight + window.scrollY >= document.body.offsetHeight) {
|
||||
loadMore();
|
||||
}
|
||||
});
|
||||
|
||||
// Initial load
|
||||
loadMore();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
@ -0,0 +1,32 @@
|
||||
from flask import Flask, render_template, send_from_directory, jsonify, request
|
||||
import os
|
||||
|
||||
app = Flask(__name__)
|
||||
media_dir = 'storysaver'
|
||||
MEDIA_PER_PAGE = 20
|
||||
|
||||
def get_media_files(start, count):
|
||||
media_files = []
|
||||
for root, dirs, files in os.walk(media_dir):
|
||||
for filename in files:
|
||||
if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.mp4', '.mkv', '.mov')):
|
||||
file_path = os.path.relpath(os.path.join(root, filename), media_dir)
|
||||
media_files.append(file_path)
|
||||
return media_files[start:start + count]
|
||||
|
||||
@app.route('/')
|
||||
def index():
|
||||
return render_template('index.html')
|
||||
|
||||
@app.route('/media/<path:filename>')
|
||||
def media(filename):
|
||||
return send_from_directory(media_dir, filename)
|
||||
|
||||
@app.route('/load-more')
|
||||
def load_more():
|
||||
page = int(request.args.get('page', 0))
|
||||
media_files = get_media_files(page * MEDIA_PER_PAGE, MEDIA_PER_PAGE)
|
||||
return jsonify(media_files)
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run(host='0.0.0.0', port=5000, debug=True)
|
||||
@ -0,0 +1,26 @@
|
||||
import json, requests
|
||||
|
||||
def findPost(filePath = 'test.json'):
|
||||
params = {'av': '17841401225494803','__a': '1','__req': '1','__hs': '19906.HYP:instagram_web_pkg.2.1..0.1','dpr': '1','__ccg': 'UNKNOWN','__rev': '1014609539','__s': 'guk60j:651i2v:pmhu0r','__hsi': '7386834689999716220','__dyn': '7xe5WwlEnwn8K2Wmm1twpUnwgU7S6EdF8aUco38w5ux609vCwjE1xoswaq0yE6u0nS4oaEd86a3a1YwBgao1aU2swbOU2zxe2GewGw9a362W2K0zEnwhEe82mwww4cwJCwLyES1TwTwFwIwbS1LwTwKG1pg2Xwr86C1mwrd6goK3ibxKi2K7ErwYCz8rwHw','__csr': 'igAzIj5OgR5YBHdRtivbkyFv-zJIZE_ykzfahdAydeHCHAAAqyk4pqBgDzeV4-qlbBF29UlCxFpVokDwAyosyV9KWUmx6iu58WqdwSDCDAFwHxi3C00lWy2FG4k583NxW8yFE0bUyxd06lxO5C2a8yFm2u290ejg1JU2Gw2rQ061U','__comet_req': '7','fb_dtsg': 'NAcPDfX2XufdLkctek6zNxz3DWxPW4t-cJzz39QtOQ5KS-_Rq3erT4A:17843708194158284:1719013044','jazoest': '26262','lsd': 'D0zmaX16yIQu_GwDXKTbMc','__spin_r': '1014609539','__spin_b': 'trunk','__spin_t': '1719881474','__jssesw': '1','fb_api_caller_class': 'RelayModern','fb_api_req_friendly_name': 'PolarisProfilePageContentDirectQuery', 'variables': '{"id":"57771591453","render_surface":"PROFILE"}','server_timestamps': 'true','doc_id': '7663723823674585'}
|
||||
|
||||
data = requests.get('https://www.instagram.com/graphql/query')
|
||||
|
||||
posts = data['data']['xdt_api__v1__feed__user_timeline_graphql_connection']['edges']
|
||||
posts = [post['node'] for post in posts]
|
||||
|
||||
return max(posts, key=lambda post: max(c['width'] * c['height'] for c in post['image_versions2']['candidates']))
|
||||
|
||||
def getHDProfilePicture():
|
||||
url = 'https://www.save-free.com/process'
|
||||
|
||||
zoom_data = {'instagram_url': 'natahalieeee','type': 'profile','resource': 'zoom'}
|
||||
data = {'instagram_url': 'natahalieeee','type': 'profile','resource': 'save'}
|
||||
|
||||
headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36','Referer' : 'https://www.save-free.com/profile-downloader/',}
|
||||
|
||||
response = requests.post(url, data=data, headers=headers)
|
||||
|
||||
response = requests.post(url, data=zoom_data, headers=headers)
|
||||
|
||||
with open('image.jpg', 'wb') as f:
|
||||
f.write(response.content)
|
||||
@ -0,0 +1,149 @@
|
||||
from BunnyCDN.Storage import Storage
|
||||
from datetime import datetime
|
||||
import os, config, funcs, cv2
|
||||
from PIL import Image
|
||||
|
||||
|
||||
def UploadMedia(media):
|
||||
media_id = media['media_id']
|
||||
username = media['username']
|
||||
timestamp = media['timestamp']
|
||||
user_id = media['user_id']
|
||||
filepath = media['filepath']
|
||||
highlight_id = media['highlight_id']
|
||||
thumbnail_url = None
|
||||
phash = None
|
||||
|
||||
if media_id and int(media_id) in existing_files:
|
||||
print('Duplicate file detected. Removing...')
|
||||
os.remove(filepath)
|
||||
return True
|
||||
|
||||
filename = os.path.basename(filepath)
|
||||
file_extension = os.path.splitext(filename)[1].lower()
|
||||
|
||||
media_type = funcs.get_media_type(filename)
|
||||
|
||||
post_type = funcs.determine_post_type(filepath, media_type)
|
||||
if not post_type:
|
||||
print(f'Error determining post type for {filename}. Skipping...')
|
||||
return False
|
||||
|
||||
file_hash = funcs.calculate_file_hash(filepath)
|
||||
|
||||
post_date = datetime.fromtimestamp(int(timestamp)) if timestamp else datetime.now()
|
||||
|
||||
width, height = funcs.get_video_dimensions(filepath) if media_type == 'video' else Image.open(filepath).size
|
||||
|
||||
duration = funcs.get_video_duration(filepath) if media_type == 'video' else 0 # slower
|
||||
|
||||
if media_type == 'video':
|
||||
try:
|
||||
thumbPath = f'temp/{media_id}.jpg'
|
||||
cap = cv2.VideoCapture(filepath)
|
||||
ret, frame = cap.read()
|
||||
cv2.imwrite(thumbPath, frame)
|
||||
cap.release()
|
||||
obj_storage.PutFile(thumbPath, f'thumbnails/{media_id}.jpg') # slower
|
||||
thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{media_id}.jpg"
|
||||
phash = funcs.generate_phash(thumbPath)
|
||||
os.remove(thumbPath)
|
||||
except:
|
||||
print('Error generating thumbnail. Skipping...')
|
||||
return False
|
||||
elif media_type == 'image':
|
||||
phash = funcs.generate_phash(filepath)
|
||||
|
||||
newFilename = f'{media_id}{file_extension}'
|
||||
server_path = f'media/{post_type}/{username}/{newFilename}'
|
||||
|
||||
file_url = f"https://storysave.b-cdn.net/{server_path}"
|
||||
|
||||
obj_storage.PutFile(filepath, server_path) # slow as fuck
|
||||
|
||||
if highlight_id:
|
||||
newCursor.execute("INSERT IGNORE INTO highlights (highlight_id, user_id, media_id) VALUES (%s, %s, %s)", (highlight_id, user_id, media_id))
|
||||
newDB.commit()
|
||||
print(f'[{newCursor.rowcount}] added highlight {highlight_id} to user {user_id}')
|
||||
|
||||
post_type = 'story' if post_type == 'stories' else 'post'
|
||||
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, media_id, post_type, date, user_id, hash, filename, duration, thumbnail, phash) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
|
||||
values = (username, media_type, file_url, width, height, media_id, post_type, post_date, user_id, file_hash, filename, duration, thumbnail_url, phash)
|
||||
|
||||
newCursor.execute(query, values) # slower
|
||||
newDB.commit()
|
||||
print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}')
|
||||
|
||||
os.remove(filepath)
|
||||
|
||||
return True
|
||||
|
||||
def get_user_id(username):
|
||||
username = username.lower()
|
||||
if username in existing_users:
|
||||
return existing_users[username]
|
||||
|
||||
return None
|
||||
|
||||
def get_media_data(filepath):
|
||||
filename = os.path.basename(filepath)
|
||||
parts = filename.split('~')
|
||||
if len(parts) < 4:
|
||||
return False
|
||||
|
||||
username = parts[0]
|
||||
timestamp = parts[1]
|
||||
media_id = parts[2]
|
||||
user_id = parts[3].split('_')[-1].split('.')[0]
|
||||
|
||||
highlight_id = user_id.replace('highlight', '') if 'highlight' in user_id else None
|
||||
if highlight_id:
|
||||
user_id = get_user_id(username)
|
||||
|
||||
try:
|
||||
media_id = int(media_id)
|
||||
except:
|
||||
print(f'Invalid media_id for file {filename}. Skipping...')
|
||||
media_id = None
|
||||
|
||||
data = {'username': username, 'timestamp': timestamp, 'media_id': media_id, 'user_id': user_id, 'filepath': filepath, 'highlight_id': highlight_id}
|
||||
|
||||
return data
|
||||
|
||||
def get_media(folder_path):
|
||||
medias = []
|
||||
|
||||
for root, dirs, files in os.walk(folder_path):
|
||||
for filename in files:
|
||||
filepath = os.path.join(root, filename)
|
||||
|
||||
data = get_media_data(filepath)
|
||||
if data:
|
||||
medias.append(data)
|
||||
|
||||
return medias
|
||||
|
||||
def dump_instagram(folder_path):
|
||||
medias = get_media(folder_path)
|
||||
|
||||
for media in medias:
|
||||
UploadMedia(media)
|
||||
existing_files.append(media['media_id'])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
print('Starting processing...')
|
||||
|
||||
newDB, newCursor = config.gen_connection()
|
||||
|
||||
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
|
||||
|
||||
newCursor.execute("SELECT media_id FROM media WHERE media_id IS NOT NULL")
|
||||
existing_files = [image[0] for image in newCursor.fetchall()]
|
||||
|
||||
newCursor.execute("SELECT DISTINCT username, user_id FROM media WHERE user_id IS NOT NULL")
|
||||
existing_users = {user[0].lower(): user[1].lower() for user in newCursor.fetchall()}
|
||||
|
||||
dump_instagram('storysaver/')
|
||||
|
||||
print("Processing completed.")
|
||||
@ -0,0 +1,137 @@
|
||||
from BunnyCDN.Storage import Storage
|
||||
from datetime import datetime
|
||||
import os, config, funcs, cv2
|
||||
from PIL import Image
|
||||
|
||||
|
||||
def UploadMedia(media):
|
||||
media_id = media['media_id']
|
||||
username = media['username']
|
||||
post_date = media['timestamp']
|
||||
user_id = media['user_id']
|
||||
filepath = media['filepath']
|
||||
highlight_id = media['highlight_id']
|
||||
post_type = media['post_type']
|
||||
thumbnail_url = None
|
||||
phash = None
|
||||
|
||||
if media_id and int(media_id) in existing_files:
|
||||
print('Duplicate file detected. Removing...')
|
||||
os.remove(filepath)
|
||||
return True
|
||||
|
||||
filename = os.path.basename(filepath)
|
||||
file_extension = os.path.splitext(filename)[1].lower()
|
||||
|
||||
media_type = funcs.get_media_type(filename)
|
||||
|
||||
file_hash = funcs.calculate_file_hash(filepath)
|
||||
|
||||
width, height = funcs.get_video_dimensions(filepath) if media_type == 'video' else Image.open(filepath).size
|
||||
|
||||
duration = funcs.get_video_duration(filepath) if media_type == 'video' else 0 # slower
|
||||
|
||||
if media_type == 'video':
|
||||
try:
|
||||
thumbPath = f'temp/{media_id}.jpg'
|
||||
cap = cv2.VideoCapture(filepath)
|
||||
ret, frame = cap.read()
|
||||
cv2.imwrite(thumbPath, frame)
|
||||
cap.release()
|
||||
obj_storage.PutFile(thumbPath, f'thumbnails/{media_id}.jpg') # slower
|
||||
thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{media_id}.jpg"
|
||||
phash = funcs.generate_phash(thumbPath)
|
||||
os.remove(thumbPath)
|
||||
except:
|
||||
print('Error generating thumbnail. Skipping...')
|
||||
return False
|
||||
elif media_type == 'image':
|
||||
phash = funcs.generate_phash(filepath)
|
||||
|
||||
if media_id:
|
||||
newFilename = f'{media_id}{file_extension}'
|
||||
else:
|
||||
newFilename = f'{file_hash}{file_extension}'
|
||||
|
||||
server_path = f'media/{post_type}/{username}/{newFilename}'
|
||||
|
||||
file_url = f"https://storysave.b-cdn.net/{server_path}"
|
||||
|
||||
obj_storage.PutFile(filepath, server_path) # slow as fuck
|
||||
|
||||
if highlight_id:
|
||||
newCursor.execute("INSERT IGNORE INTO highlights (highlight_id, user_id, media_id) VALUES (%s, %s, %s)", (highlight_id, user_id, media_id))
|
||||
newDB.commit()
|
||||
print(f'[{newCursor.rowcount}] added highlight {highlight_id} to user {user_id}')
|
||||
|
||||
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, media_id, post_type, date, user_id, hash, filename, duration, thumbnail, phash) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
|
||||
values = (username, media_type, file_url, width, height, media_id, post_type, post_date, user_id, file_hash, filename, duration, thumbnail_url, phash)
|
||||
|
||||
newCursor.execute(query, values) # slower
|
||||
newDB.commit()
|
||||
print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}')
|
||||
|
||||
os.remove(filepath)
|
||||
|
||||
return True
|
||||
|
||||
def get_user_id(username):
|
||||
username = username.lower()
|
||||
if username in existing_users:
|
||||
return existing_users[username]
|
||||
|
||||
return None
|
||||
|
||||
def get_media():
|
||||
medias = []
|
||||
post_types = {
|
||||
'posts': 'post',
|
||||
'stories': 'story',
|
||||
'profile': 'profile',
|
||||
}
|
||||
|
||||
for post_type in os.listdir('media'):
|
||||
users = os.listdir(f'media/{post_type}')
|
||||
for user in users:
|
||||
user_path = f'media/{post_type}/{user}'
|
||||
for filename in os.listdir(user_path):
|
||||
data = {}
|
||||
filepath = os.path.join(user_path, filename)
|
||||
|
||||
data['post_type'] = post_types[post_type]
|
||||
data['username'] = user
|
||||
data['timestamp'] = filename.split('__')[-1].split('.')[0] if 'com.instagram.android__' in filename else datetime.now()
|
||||
if 'com.instagram.android__' in filename:
|
||||
data['timestamp'] = datetime.strptime(data, '%Y%m%d%H%M%S%f')
|
||||
data['filepath'] = filepath
|
||||
data['media_id'] = None
|
||||
data['user_id'] = get_user_id(data['username'])
|
||||
data['highlight_id'] = None
|
||||
medias.append(data)
|
||||
|
||||
return medias
|
||||
|
||||
def dump_instagram():
|
||||
medias = get_media()
|
||||
|
||||
for media in medias:
|
||||
UploadMedia(media)
|
||||
existing_files.append(media['media_id'])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
print('Starting processing...')
|
||||
|
||||
newDB, newCursor = config.gen_connection()
|
||||
|
||||
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
|
||||
|
||||
newCursor.execute("SELECT media_id FROM media WHERE media_id IS NOT NULL")
|
||||
existing_files = [image[0] for image in newCursor.fetchall()]
|
||||
|
||||
newCursor.execute("SELECT DISTINCT username, user_id FROM media WHERE user_id IS NOT NULL")
|
||||
existing_users = {user[0].lower(): user[1].lower() for user in newCursor.fetchall()}
|
||||
|
||||
dump_instagram()
|
||||
|
||||
print("Processing completed.")
|
||||
@ -0,0 +1,36 @@
|
||||
import os, shutil, time
|
||||
from watchdog.observers import Observer
|
||||
from watchdog.events import FileSystemEventHandler
|
||||
|
||||
class DownloadHandler(FileSystemEventHandler):
|
||||
def process_file(self, file_path):
|
||||
file = os.path.basename(file_path)
|
||||
if 'crdownload' not in file and file.count('~') == 3:
|
||||
print(f'Moving {file}...')
|
||||
outputPath = os.path.join('storysaver', file)
|
||||
try:
|
||||
shutil.move(file_path, outputPath)
|
||||
except Exception as e:
|
||||
print(f'Failed to move file: {e}')
|
||||
|
||||
def on_created(self, event):
|
||||
if not event.is_directory and 'crdownload' not in event.src_path:
|
||||
self.process_file(event.src_path)
|
||||
|
||||
def on_moved(self, event):
|
||||
if not event.is_directory and 'crdownload' not in event.dest_path:
|
||||
self.process_file(event.dest_path)
|
||||
|
||||
if __name__ == "__main__":
|
||||
downloadPath = os.path.join(os.path.expanduser('~'), 'Downloads')
|
||||
event_handler = DownloadHandler()
|
||||
observer = Observer()
|
||||
observer.schedule(event_handler, downloadPath, recursive=False)
|
||||
observer.start()
|
||||
|
||||
try:
|
||||
while True:
|
||||
time.sleep(1) # Add a 1-second sleep to reduce CPU usage
|
||||
except KeyboardInterrupt:
|
||||
observer.stop()
|
||||
observer.join()
|
||||
Loading…
Reference in New Issue