You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

137 lines
4.7 KiB
Python

11 months ago
from BunnyCDN.Storage import Storage
from PIL import Image
import os, uuid, cv2, config
import hashlib
def clean_empty_folders(directory):
for foldername, subfolders, filenames in os.walk(directory, topdown=False):
for subfolder in subfolders:
folder_path = os.path.join(foldername, subfolder)
if not os.listdir(folder_path):
os.rmdir(folder_path)
print(f"Removed empty folder: {folder_path}")
def calculate_file_hash(file_path, hash_func='sha256'):
h = hashlib.new(hash_func)
with open(file_path, 'rb') as file:
chunk = 0
while chunk != b'':
chunk = file.read(8192)
h.update(chunk)
return h.hexdigest()
def extract_file_info(filename):
try:
username = filename.split("~")[0]
timestamp = filename.split("~")[1]
user_id = filename.split("~")[2]
media_id, some2 = user_id.split("_")
user_id = some2.split(".")[0]
return username, media_id, user_id, timestamp
except:
return None, None, None, None
def extract_file_info2(filename):
try:
username = filename.split("~")[0]
elements = filename.split("~")[1].split("_")
media_id, user_id = elements[0], elements[1].split(".")[0]
return username, media_id, user_id
except:
return None, None, None
def upload_file(filepath, username, media_id = None, media_type='image', post_type = 'story', user_id = None, date = None):
filename = os.path.basename(filepath)
file_extension = filename.split('.')[-1]
dirtype = 'stories' if post_type == 'story' else 'posts'
server_path = f'users/{dirtype}/{username}/{media_id if media_id else uuid.uuid4().hex}.{file_extension}'
file_url = f"https://storysave.b-cdn.net/{server_path}"
fileHash = calculate_file_hash(filepath)
if media_type == 'image':
with Image.open(filepath) as img:
width, height = img.size
else:
width, height = get_video_dimensions(filepath)
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, media_id, post_type, user_id, hash, date) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
values = (username, media_type, file_url, width, height, media_id, post_type, user_id, fileHash, date)
newCursor.execute(query, values)
newDB.commit()
existing_files.append(media_id)
if newCursor.rowcount == 0:
print('What the fuck just happend?')
obj_storage.PutFile(filepath, server_path)
os.remove(filepath)
print(f'[{newCursor.rowcount}]{os.path.basename(filepath)} {file_url}')
def get_video_dimensions(video_path):
cap = cv2.VideoCapture(video_path)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
cap.release()
return width, height
def get_media_type(filename):
if filename.lower().endswith(".jpg") or filename.lower().endswith(".webp") or filename.lower().endswith(".jpeg") or filename.lower().endswith(".png") or filename.lower().endswith(".gif"):
return 'image'
if filename.lower().endswith(".mp4") or filename.lower().endswith(".mov"):
return 'video'
def dump_instagram(folder_path):
for root, dirs, files in os.walk(folder_path):
for folder in dirs:
username = folder
folder_path = os.path.join(root, folder)
for filename in os.listdir(folder_path):
if "~" not in filename:
continue
username, media_id, user_id, timestamp = extract_file_info(filename)
if None in [username, media_id, user_id, timestamp]:
username, media_id, user_id = extract_file_info2(filename)
if None in [username, media_id, user_id]:
print(f"Failed to extract info from {filename}")
continue
media_id = int(media_id) if media_id else None
if media_id in existing_files:
print(f'Duplicate, {filename}')
os.remove(os.path.join(folder_path, filename))
continue
filepath = os.path.join(folder_path, filename)
mediatype = get_media_type(filename)
upload_file(username=username, media_type=mediatype, filepath=filepath, media_id=media_id, user_id = user_id,)
if __name__ == '__main__':
print('Starting processing...')
newDB, newCursor = config.gen_connection()
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
newCursor.execute("SELECT media_id FROM media")
existing_files = [image[0] for image in newCursor.fetchall()]
dump_instagram('StorySave/')
print("Processing completed.")