You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

120 lines
3.6 KiB
Python

from BunnyCDN.Storage import Storage
from datetime import datetime
import os, config, funcs, cv2
from PIL import Image
def UploadMedia(media):
username = media['username']
timestamp = media['timestamp']
filepath = media['filepath']
thumbnail_url = None
phash = None
filename = os.path.basename(filepath)
file_extension = os.path.splitext(filename)[1].lower()
if filename in existing_files:
print('Duplicate file detected. Removing...')
os.remove(filepath)
return True
media_type = funcs.get_media_type(filename)
file_hash = funcs.calculate_file_hash(filepath)
if '-' in timestamp:
timestamp = timestamp.split('-')[0]
post_date = datetime.fromtimestamp(int(timestamp)) if timestamp else datetime.now()
width, height = funcs.get_video_dimensions(filepath) if media_type == 'video' else Image.open(filepath).size
duration = funcs.get_video_duration(filepath) if media_type == 'video' else 0 # slower
if media_type == 'video':
try:
thumbPath = f'temp/{file_hash}.jpg'
cap = cv2.VideoCapture(filepath)
ret, frame = cap.read()
cv2.imwrite(thumbPath, frame)
cap.release()
obj_storage.PutFile(thumbPath, f'thumbnails/{file_hash}.jpg') # slower
thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{file_hash}.jpg"
phash = funcs.generate_phash(thumbPath)
os.remove(thumbPath)
except:
print('Error generating thumbnail. Skipping...')
return False
elif media_type == 'image':
phash = funcs.generate_phash(filepath)
newFilename = f'{file_hash}{file_extension}'
server_path = f'media/snaps/{username}/{newFilename}'
file_url = f"https://storysave.b-cdn.net/{server_path}"
obj_storage.PutFile(filepath, server_path) # slow as fuck
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, post_type, date, hash, filename, duration, thumbnail, phash, platform) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
values = (username, media_type, file_url, width, height, 'story', post_date, file_hash, filename, duration, thumbnail_url, phash, 'snapchat')
newCursor.execute(query, values) # slower
newDB.commit()
print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}')
os.remove(filepath)
return True
def get_media_data(filepath):
filename = os.path.basename(filepath)
parts = filename.split('~')
if len(parts) < 3:
return False
username = parts[0]
timestamp = parts[1]
data = {'username': username, 'timestamp': timestamp, 'filepath': filepath}
return data
def get_media(folder_path):
medias = []
for root, dirs, files in os.walk(folder_path):
for filename in files:
filepath = os.path.join(root, filename)
data = get_media_data(filepath)
if data:
medias.append(data)
return medias
def dump(folder_path):
medias = get_media(folder_path)
for media in medias:
UploadMedia(media)
if __name__ == '__main__':
print('Starting processing...')
directory = 'snapchat/'
if not os.listdir(directory):
print('No files to process. Exiting...')
exit()
newDB, newCursor = config.gen_connection()
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
newCursor.execute("SELECT filename FROM media WHERE filename IS NOT NULL AND platform = 'snapchat'")
existing_files = [image[0] for image in newCursor.fetchall()]
dump(directory)
print("Processing completed.")