You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
112 lines
4.0 KiB
Python
112 lines
4.0 KiB
Python
from BunnyCDN.Storage import Storage
|
|
from PIL import Image
|
|
import os, uuid, cv2, config
|
|
|
|
def scan_dupes(folder_path):
|
|
for root, dirs, files in os.walk(folder_path):
|
|
for folder in dirs:
|
|
folder_path = os.path.join(root, folder)
|
|
for filename in os.listdir(folder_path):
|
|
media_id = filename.replace('.mp4', '').replace('.jpg', '')
|
|
filepath = os.path.join(folder_path, filename)
|
|
if media_id:
|
|
try:
|
|
if int(media_id) in existing_files:
|
|
print(f'Duplicate')
|
|
os.remove(filepath)
|
|
except:
|
|
pass
|
|
|
|
def clean_empty_folders(directory):
|
|
for foldername, subfolders, filenames in os.walk(directory, topdown=False):
|
|
for subfolder in subfolders:
|
|
folder_path = os.path.join(foldername, subfolder)
|
|
if not os.listdir(folder_path):
|
|
os.rmdir(folder_path)
|
|
print(f"Removed empty folder: {folder_path}")
|
|
|
|
def upload_file(filepath, username, media_id = None, media_type='image', post_type = 'story'):
|
|
filename = os.path.basename(filepath)
|
|
file_extension = filename.split('.')[-1]
|
|
|
|
try:
|
|
if int(media_id) in existing_files:
|
|
print(f'Duplicate')
|
|
os.remove(filepath)
|
|
return True
|
|
except: media_id = uuid.uuid4().hex
|
|
|
|
dirtype = 'stories' if post_type == 'story' else 'posts'
|
|
server_path = f'users/{dirtype}/{username}/{media_id}.{file_extension}'
|
|
|
|
obj_storage.PutFile(filepath, server_path)
|
|
|
|
file_url = f"https://storysave.b-cdn.net/{server_path}"
|
|
|
|
if media_type == 'image':
|
|
with Image.open(filepath) as img:
|
|
width, height = img.size
|
|
else:
|
|
width, height = get_video_dimensions(filepath)
|
|
|
|
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, media_id, post_type) VALUES (%s, %s, %s, %s, %s, %s, %s)"
|
|
values = (username, media_type, file_url, width, height, media_id, post_type)
|
|
newCursor.execute(query, values)
|
|
newDB.commit()
|
|
|
|
os.remove(filepath)
|
|
print(f'[{newCursor.rowcount}]{os.path.basename(filepath)} {file_url}')
|
|
|
|
|
|
def get_video_dimensions(video_path):
|
|
cap = cv2.VideoCapture(video_path)
|
|
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
|
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
|
cap.release()
|
|
return width, height
|
|
|
|
|
|
def get_media_type(filename):
|
|
if filename.lower().endswith(".jpg") or filename.lower().endswith(".webp") or filename.lower().endswith(".jpeg") or filename.lower().endswith(".png") or filename.lower().endswith(".gif"):
|
|
return 'image'
|
|
if filename.lower().endswith(".mp4") or filename.lower().endswith(".mov"):
|
|
return 'video'
|
|
|
|
|
|
def dump_instagram(folder_path):
|
|
for root, dirs, files in os.walk(folder_path):
|
|
for folder in dirs:
|
|
username = folder
|
|
folder_path = os.path.join(root, folder)
|
|
|
|
post_type = 'story' if folder_path.split('\\')[0] == 'stories' else 'post'
|
|
|
|
for filename in os.listdir(folder_path):
|
|
media_id = filename.replace('.mp4', '').replace('.jpg', '')
|
|
filepath = os.path.join(folder_path, filename)
|
|
mediatype = get_media_type(filename)
|
|
upload_file(username=username, media_type=mediatype, filepath=filepath, media_id=media_id, post_type=post_type)
|
|
|
|
if __name__ == '__main__':
|
|
print('Starting processing...')
|
|
|
|
newDB, newCursor = config.gen_connection()
|
|
|
|
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
|
|
|
|
newCursor.execute("SELECT media_id FROM media")
|
|
existing_files = [image[0] for image in newCursor.fetchall()]
|
|
|
|
scan_dupes('media/posts')
|
|
scan_dupes('media/stories')
|
|
scan_dupes('StorySave/')
|
|
|
|
dump_instagram('media/posts')
|
|
dump_instagram('media/stories')
|
|
dump_instagram('StorySave/')
|
|
|
|
clean_empty_folders('media/posts')
|
|
clean_empty_folders('media/stories')
|
|
clean_empty_folders('StorySave/')
|
|
|
|
print("Processing completed.") |