|
|
|
|
from BunnyCDN.Storage import Storage
|
|
|
|
|
from datetime import datetime
|
|
|
|
|
import os, config, funcs, cv2
|
|
|
|
|
from PIL import Image
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def UploadMedia(media):
|
|
|
|
|
media_id = media['media_id']
|
|
|
|
|
username = media['username']
|
|
|
|
|
timestamp = media['timestamp']
|
|
|
|
|
user_id = media['user_id']
|
|
|
|
|
filepath = media['filepath']
|
|
|
|
|
|
|
|
|
|
filename = os.path.basename(filepath)
|
|
|
|
|
file_extension = os.path.splitext(filename)[1].lower()
|
|
|
|
|
|
|
|
|
|
media_type = funcs.get_media_type(filename)
|
|
|
|
|
|
|
|
|
|
post_type = funcs.determine_post_type(filepath, media_type)
|
|
|
|
|
|
|
|
|
|
file_hash = funcs.calculate_file_hash(filepath)
|
|
|
|
|
|
|
|
|
|
duration = funcs.get_video_duration(filepath) if media_type == 'video' else 0
|
|
|
|
|
|
|
|
|
|
post_date = datetime.fromtimestamp(int(timestamp)) if timestamp else datetime.now()
|
|
|
|
|
|
|
|
|
|
width, height = funcs.get_video_dimensions(filepath) if media_type == 'video' else Image.open(filepath).size
|
|
|
|
|
|
|
|
|
|
thumbnail_url = None
|
|
|
|
|
if media_type == 'video':
|
|
|
|
|
try:
|
|
|
|
|
thumbPath = f'temp/{media_id}.jpg'
|
|
|
|
|
cap = cv2.VideoCapture(filepath)
|
|
|
|
|
ret, frame = cap.read()
|
|
|
|
|
cv2.imwrite(thumbPath, frame)
|
|
|
|
|
cap.release()
|
|
|
|
|
obj_storage.PutFile(thumbPath, f'thumbnails/{media_id}.jpg')
|
|
|
|
|
thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{media_id}.jpg"
|
|
|
|
|
except:
|
|
|
|
|
print('Error generating thumbnail. Skipping...')
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
server_path = f'media/{post_type}/{username}/{media_id}{file_extension}'
|
|
|
|
|
|
|
|
|
|
file_url = f"https://storysave.b-cdn.net/{server_path}"
|
|
|
|
|
|
|
|
|
|
if user_id and 'highlight' in user_id:
|
|
|
|
|
highlight_id = user_id.replace('highlight', '')
|
|
|
|
|
user_id = None
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
newCursor.execute("SELECT user_id FROM media WHERE username=%s", (username,))
|
|
|
|
|
user_id = newCursor.fetchall()[0][0]
|
|
|
|
|
except:
|
|
|
|
|
print(f'User {username} not found in database. Skipping...')
|
|
|
|
|
user_id = None
|
|
|
|
|
|
|
|
|
|
newCursor.execute("INSERT IGNORE INTO highlights (highlight_id, user_id, media_id) VALUES (%s, %s, %s)", (highlight_id, user_id, media_id))
|
|
|
|
|
newDB.commit()
|
|
|
|
|
|
|
|
|
|
print(f'[{newCursor.rowcount}] added highlight {highlight_id} to user {user_id}')
|
|
|
|
|
|
|
|
|
|
obj_storage.PutFile(filepath, server_path)
|
|
|
|
|
|
|
|
|
|
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, media_id, post_type, date, user_id, hash, filename, duration, thumbnail) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
|
|
|
|
|
values = (username, media_type, file_url, width, height, media_id, post_type, post_date, user_id, file_hash, filename, duration, thumbnail_url)
|
|
|
|
|
|
|
|
|
|
newCursor.execute(query, values)
|
|
|
|
|
newDB.commit()
|
|
|
|
|
print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}')
|
|
|
|
|
|
|
|
|
|
os.remove(filepath)
|
|
|
|
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
def getMedias(folder_path):
|
|
|
|
|
medias = []
|
|
|
|
|
for filename in os.listdir(folder_path):
|
|
|
|
|
parts = filename.split('~')
|
|
|
|
|
if len(parts) < 4:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
username = parts[0]
|
|
|
|
|
timestamp = parts[1]
|
|
|
|
|
media_id = parts[2]
|
|
|
|
|
user_id = parts[3].split('_')[-1].split('.')[0]
|
|
|
|
|
|
|
|
|
|
filepath = os.path.join(folder_path, filename)
|
|
|
|
|
|
|
|
|
|
if not media_id:
|
|
|
|
|
print(f'Invalid media_id for file {filename}. Skipping...')
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
try:media_id = int(media_id)
|
|
|
|
|
except:
|
|
|
|
|
print(f'Invalid media_id for file {filename}. Skipping...')
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
data = {
|
|
|
|
|
'username': username,
|
|
|
|
|
'timestamp': timestamp,
|
|
|
|
|
'media_id': media_id,
|
|
|
|
|
'user_id': user_id,
|
|
|
|
|
'filepath': filepath
|
|
|
|
|
}
|
|
|
|
|
medias.append(data)
|
|
|
|
|
return medias
|
|
|
|
|
|
|
|
|
|
def dump_instagram(folder_path):
|
|
|
|
|
medias = getMedias(folder_path)
|
|
|
|
|
|
|
|
|
|
for media in medias:
|
|
|
|
|
if media['media_id'] in existing_files:
|
|
|
|
|
print('Duplicate file detected. Removing...')
|
|
|
|
|
os.remove(media['filepath'])
|
|
|
|
|
|
|
|
|
|
for media in medias:
|
|
|
|
|
UploadMedia(media)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
print('Starting processing...')
|
|
|
|
|
|
|
|
|
|
newDB, newCursor = config.gen_connection()
|
|
|
|
|
|
|
|
|
|
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
|
|
|
|
|
|
|
|
|
|
newCursor.execute("SELECT media_id FROM media WHERE platform='instagram' AND media_id IS NOT NULL")
|
|
|
|
|
existing_files = [image[0] for image in newCursor.fetchall()]
|
|
|
|
|
|
|
|
|
|
dump_instagram('storysaver/')
|
|
|
|
|
|
|
|
|
|
print("Processing completed.")
|