You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

133 lines
4.4 KiB
Python

11 months ago
from BunnyCDN.Storage import Storage
from datetime import datetime
11 months ago
import os, config, funcs, cv2
11 months ago
from PIL import Image
11 months ago
def UploadMedia(media):
media_id = media['media_id']
username = media['username']
timestamp = media['timestamp']
user_id = media['user_id']
filepath = media['filepath']
11 months ago
filename = os.path.basename(filepath)
file_extension = os.path.splitext(filename)[1].lower()
media_type = funcs.get_media_type(filename)
post_type = funcs.determine_post_type(filepath, media_type)
file_hash = funcs.calculate_file_hash(filepath)
duration = funcs.get_video_duration(filepath) if media_type == 'video' else 0
post_date = datetime.fromtimestamp(int(timestamp)) if timestamp else datetime.now()
width, height = funcs.get_video_dimensions(filepath) if media_type == 'video' else Image.open(filepath).size
11 months ago
thumbnail_url = None
if media_type == 'video':
try:
thumbPath = f'temp/{media_id}.jpg'
cap = cv2.VideoCapture(filepath)
ret, frame = cap.read()
cv2.imwrite(thumbPath, frame)
cap.release()
obj_storage.PutFile(thumbPath, f'thumbnails/{media_id}.jpg')
thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{media_id}.jpg"
except:
print('Error generating thumbnail. Skipping...')
return False
11 months ago
server_path = f'media/{post_type}/{username}/{media_id}{file_extension}'
file_url = f"https://storysave.b-cdn.net/{server_path}"
11 months ago
if user_id and 'highlight' in user_id:
highlight_id = user_id.replace('highlight', '')
user_id = None
try:
newCursor.execute("SELECT user_id FROM media WHERE username=%s", (username,))
user_id = newCursor.fetchall()[0][0]
except:
print(f'User {username} not found in database. Skipping...')
user_id = None
newCursor.execute("INSERT IGNORE INTO highlights (highlight_id, user_id, media_id) VALUES (%s, %s, %s)", (highlight_id, user_id, media_id))
newDB.commit()
print(f'[{newCursor.rowcount}] added highlight {highlight_id} to user {user_id}')
11 months ago
obj_storage.PutFile(filepath, server_path)
11 months ago
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, media_id, post_type, date, user_id, hash, filename, duration, thumbnail) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
values = (username, media_type, file_url, width, height, media_id, post_type, post_date, user_id, file_hash, filename, duration, thumbnail_url)
11 months ago
newCursor.execute(query, values)
newDB.commit()
print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}')
os.remove(filepath)
return True
11 months ago
11 months ago
def getMedias(folder_path):
medias = []
11 months ago
for filename in os.listdir(folder_path):
parts = filename.split('~')
if len(parts) < 4:
continue
11 months ago
11 months ago
username = parts[0]
timestamp = parts[1]
media_id = parts[2]
user_id = parts[3].split('_')[-1].split('.')[0]
filepath = os.path.join(folder_path, filename)
11 months ago
if not media_id:
print(f'Invalid media_id for file {filename}. Skipping...')
continue
try:media_id = int(media_id)
except:
print(f'Invalid media_id for file {filename}. Skipping...')
continue
data = {
'username': username,
'timestamp': timestamp,
'media_id': media_id,
'user_id': user_id,
'filepath': filepath
}
medias.append(data)
return medias
def dump_instagram(folder_path):
medias = getMedias(folder_path)
for media in medias:
if media['media_id'] in existing_files:
print('Duplicate file detected. Removing...')
os.remove(media['filepath'])
for media in medias:
UploadMedia(media)
11 months ago
if __name__ == '__main__':
print('Starting processing...')
newDB, newCursor = config.gen_connection()
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
newCursor.execute("SELECT media_id FROM media WHERE platform='instagram' AND media_id IS NOT NULL")
existing_files = [image[0] for image in newCursor.fetchall()]
dump_instagram('storysaver/')
print("Processing completed.")