You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
140 lines
4.2 KiB
Python
140 lines
4.2 KiB
Python
from datetime import datetime
|
|
from uuid import uuid4
|
|
import funcs
|
|
import config
|
|
import cv2
|
|
import os
|
|
|
|
directory = 'processed_tiktoks'
|
|
|
|
def UploadMedia(media):
|
|
platform = 'TikTok'
|
|
username = media['username']
|
|
filepath = media['filepath']
|
|
file_size = os.path.getsize(filepath)
|
|
thumbnail_url = None
|
|
phash = None
|
|
|
|
filename = os.path.basename(filepath)
|
|
file_extension = os.path.splitext(filename)[1].lower()
|
|
|
|
media_type = funcs.get_media_type(filename)
|
|
if not media_type:
|
|
print(f'Error determining media type for {filename}. Skipping...')
|
|
return False
|
|
|
|
post_type = funcs.determine_post_type(filepath)
|
|
if not post_type:
|
|
print(f'Error determining post type for {filename}. Skipping...')
|
|
return False
|
|
|
|
file_hash = funcs.calculate_file_hash(filepath)
|
|
if file_hash in existing_hashes:
|
|
print(f'File {filename} already exists. Skipping...')
|
|
return False
|
|
|
|
post_date = datetime.now()
|
|
|
|
width, height = funcs.get_media_dimensions(filepath)
|
|
|
|
duration = funcs.get_video_duration(filepath)
|
|
|
|
if media_type == 'image':
|
|
phash = funcs.generate_phash(filepath)
|
|
elif media_type == 'video':
|
|
try:
|
|
thumb_path = generate_thumbnail(filepath)
|
|
obj_storage.PutFile(thumb_path, f'thumbnails/{file_hash}.jpg') # this might be a problem in case of duplicate hashes
|
|
thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{file_hash}.jpg"
|
|
phash = funcs.generate_phash(thumb_path)
|
|
os.remove(thumb_path)
|
|
except:
|
|
print('Error generating thumbnail. Skipping...')
|
|
return False
|
|
|
|
newFilename = f'{file_hash}{file_extension}'
|
|
server_path = f'media/tiktoks/{username}/{newFilename}'
|
|
|
|
file_url = f"https://storysave.b-cdn.net/{server_path}"
|
|
|
|
obj_storage.PutFile(filepath, server_path) # slow as fuck
|
|
|
|
post_type = 'story' if post_type == 'stories' else 'post'
|
|
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, post_type, date, hash, filename, duration, thumbnail, phash, platform, file_size) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
|
|
values = (username, media_type, file_url, width, height, post_type, post_date, file_hash, filename, duration, thumbnail_url, phash, platform, file_size)
|
|
|
|
newCursor.execute(query, values) # slower
|
|
newDB.commit()
|
|
print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}')
|
|
|
|
os.remove(filepath)
|
|
|
|
return True
|
|
|
|
def generate_thumbnail(filepath):
|
|
thumb_path = f'temp/{uuid4()}.jpg'
|
|
cap = cv2.VideoCapture(filepath)
|
|
ret, frame = cap.read()
|
|
cv2.imwrite(thumb_path, frame)
|
|
cap.release()
|
|
return thumb_path
|
|
|
|
def get_media_data(filepath):
|
|
filename = os.path.basename(filepath)
|
|
parts = filename.split('~')
|
|
|
|
if len(parts) == 3:
|
|
username, title, tiktok_id = parts
|
|
elif len(parts) == 2:
|
|
username, title = parts
|
|
tiktok_id = None
|
|
else:
|
|
return False
|
|
|
|
data = {'username': username, 'filepath': filepath, 'tiktok_id': tiktok_id, 'title': title}
|
|
|
|
return data
|
|
|
|
def get_media(folder_path):
|
|
medias = []
|
|
|
|
users = os.listdir(folder_path)
|
|
for user in users:
|
|
user_folder = os.path.join(folder_path, user)
|
|
if not os.path.isdir(user_folder):
|
|
print(f"Skipping {user}")
|
|
continue
|
|
|
|
files = os.listdir(user_folder)
|
|
for filename in files:
|
|
filepath = os.path.join(user_folder, filename)
|
|
|
|
data = get_media_data(filepath)
|
|
if data:
|
|
medias.append(data)
|
|
|
|
return medias
|
|
|
|
def dump_instagram(folder_path):
|
|
medias = get_media(folder_path)
|
|
|
|
for media in medias:
|
|
UploadMedia(media)
|
|
|
|
if __name__ == '__main__':
|
|
print('Starting processing...')
|
|
|
|
if not os.listdir(directory):
|
|
print('No files to process. Exiting...')
|
|
exit()
|
|
|
|
newDB, newCursor = config.gen_connection()
|
|
|
|
obj_storage = config.get_storage()
|
|
|
|
newCursor.execute("SELECT hash FROM media WHERE hash IS NOT NULL AND platform = 'TikTok'")
|
|
existing_hashes = [row[0] for row in newCursor.fetchall()]
|
|
|
|
dump_instagram(directory)
|
|
|
|
print("Processing completed.") |