You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

139 lines
4.2 KiB
Python

11 months ago
from datetime import datetime
import os, config, funcs, cv2
11 months ago
from uuid import uuid4
11 months ago
11 months ago
directory = 'snapchat'
11 months ago
def UploadMedia(media):
username = media['username']
timestamp = media['timestamp']
filepath = media['filepath']
11 months ago
filename = os.path.basename(filepath)
media_id = media['media_id']
11 months ago
thumbnail_url = None
phash = None
if filename in existing_files:
print('Duplicate file detected. Removing...')
os.remove(filepath)
return True
11 months ago
if media_id in existing_files:
print('Duplicate file detected. Removing...')
return True
11 months ago
media_type = funcs.get_media_type(filename)
file_hash = funcs.calculate_file_hash(filepath)
if '-' in timestamp:
timestamp = timestamp.split('-')[0]
11 months ago
11 months ago
post_date = datetime.fromtimestamp(int(timestamp)) if timestamp else datetime.now()
11 months ago
width, height = funcs.get_media_dimensions(filepath)
11 months ago
11 months ago
duration = funcs.get_video_duration(filepath)
11 months ago
11 months ago
if media_type == 'image':
phash = funcs.generate_phash(filepath)
elif media_type == 'video':
11 months ago
try:
11 months ago
thumb_path = generate_thumbnail(filepath)
obj_storage.PutFile(thumb_path, f'thumbnails/{file_hash}.jpg') # this might be a problem in case of duplicate hashes
11 months ago
thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{file_hash}.jpg"
11 months ago
phash = funcs.generate_phash(thumb_path)
os.remove(thumb_path)
11 months ago
except:
print('Error generating thumbnail. Skipping...')
return False
11 months ago
file_extension = os.path.splitext(filename)[1].lower()
new_filename = f'{file_hash}{file_extension}'
server_path = f'media/snaps/{username}/{filename}'
11 months ago
file_url = f"https://storysave.b-cdn.net/{server_path}"
obj_storage.PutFile(filepath, server_path) # slow as fuck
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, post_type, date, hash, filename, duration, thumbnail, phash, platform) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
values = (username, media_type, file_url, width, height, 'story', post_date, file_hash, filename, duration, thumbnail_url, phash, 'snapchat')
newCursor.execute(query, values) # slower
newDB.commit()
print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}')
os.remove(filepath)
return True
11 months ago
def generate_thumbnail(filepath):
thumb_path = f'temp/{uuid4()}.jpg'
cap = cv2.VideoCapture(filepath)
ret, frame = cap.read()
cv2.imwrite(thumb_path, frame)
cap.release()
return thumb_path
11 months ago
def get_media_data(filepath):
filename = os.path.basename(filepath)
parts = filename.split('~')
if len(parts) < 3:
return False
username = parts[0]
timestamp = parts[1]
11 months ago
snap_id = parts[2]
snap_id = os.path.splitext(snap_id)[0]
11 months ago
11 months ago
data = {'username': username, 'timestamp': timestamp, 'filepath': filepath, 'media_id': snap_id}
11 months ago
return data
def get_media(folder_path):
medias = []
for root, dirs, files in os.walk(folder_path):
for filename in files:
filepath = os.path.join(root, filename)
data = get_media_data(filepath)
if data:
medias.append(data)
return medias
def dump(folder_path):
medias = get_media(folder_path)
for media in medias:
UploadMedia(media)
11 months ago
def process_snap_ids(filenames):
snap_ids = []
for filename in filenames:
snap_id = filename.split('~')[2]
snap_id = os.path.splitext(snap_id)[0]
if snap_id not in snap_ids:
snap_ids.append(snap_id)
return snap_ids
11 months ago
11 months ago
if __name__ == '__main__':
print('Starting processing...')
11 months ago
11 months ago
if not os.listdir(directory):
print('No files to process. Exiting...')
exit()
11 months ago
11 months ago
newDB, newCursor = config.gen_connection()
11 months ago
11 months ago
obj_storage = config.get_storage()
11 months ago
11 months ago
newCursor.execute("SELECT filename FROM media WHERE filename IS NOT NULL AND platform = 'snapchat'")
existing_files = [image[0] for image in newCursor.fetchall()]
existing_files = process_snap_ids(existing_files)
11 months ago
11 months ago
dump(directory)
11 months ago
11 months ago
print("Processing completed.")