main
oscar 11 months ago
parent eafc36e805
commit 73889be10e

5
.gitignore vendored

@ -1,4 +1,7 @@
# Content
storysaver/
facebook/
media/
media/
cache/
temp/
*.pyc

@ -1,15 +1,16 @@
from BunnyCDN.Storage import Storage
from datetime import datetime
import os, config, funcs
import os, config, funcs, cv2
from PIL import Image
def UploadMedia(filepath, username, media_id, timestamp = None, user_id = None):
if media_id and int(media_id) in existing_files:
print('Duplicate file detected. Removing...')
os.remove(filepath)
return True
def UploadMedia(media):
media_id = media['media_id']
username = media['username']
timestamp = media['timestamp']
user_id = media['user_id']
filepath = media['filepath']
filename = os.path.basename(filepath)
file_extension = os.path.splitext(filename)[1].lower()
@ -25,6 +26,20 @@ def UploadMedia(filepath, username, media_id, timestamp = None, user_id = None):
width, height = funcs.get_video_dimensions(filepath) if media_type == 'video' else Image.open(filepath).size
thumbnail_url = None
if media_type == 'video':
try:
thumbPath = f'temp/{media_id}.jpg'
cap = cv2.VideoCapture(filepath)
ret, frame = cap.read()
cv2.imwrite(thumbPath, frame)
cap.release()
obj_storage.PutFile(thumbPath, f'thumbnails/{media_id}.jpg')
thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{media_id}.jpg"
except:
print('Error generating thumbnail. Skipping...')
return False
server_path = f'media/{post_type}/{username}/{media_id}{file_extension}'
file_url = f"https://storysave.b-cdn.net/{server_path}"
@ -47,8 +62,8 @@ def UploadMedia(filepath, username, media_id, timestamp = None, user_id = None):
obj_storage.PutFile(filepath, server_path)
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, media_id, post_type, date, user_id, hash, filename, duration) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
values = (username, media_type, file_url, width, height, media_id, post_type, post_date, user_id, file_hash, filename, duration)
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, media_id, post_type, date, user_id, hash, filename, duration, thumbnail) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
values = (username, media_type, file_url, width, height, media_id, post_type, post_date, user_id, file_hash, filename, duration, thumbnail_url)
newCursor.execute(query, values)
newDB.commit()
@ -58,8 +73,8 @@ def UploadMedia(filepath, username, media_id, timestamp = None, user_id = None):
return True
def dump_instagram(folder_path):
def getMedias(folder_path):
medias = []
for filename in os.listdir(folder_path):
parts = filename.split('~')
if len(parts) < 4:
@ -71,7 +86,36 @@ def dump_instagram(folder_path):
user_id = parts[3].split('_')[-1].split('.')[0]
filepath = os.path.join(folder_path, filename)
UploadMedia(username=username, filepath=filepath, media_id=media_id, timestamp=timestamp, user_id=user_id)
if not media_id:
print(f'Invalid media_id for file {filename}. Skipping...')
continue
try:media_id = int(media_id)
except:
print(f'Invalid media_id for file {filename}. Skipping...')
continue
data = {
'username': username,
'timestamp': timestamp,
'media_id': media_id,
'user_id': user_id,
'filepath': filepath
}
medias.append(data)
return medias
def dump_instagram(folder_path):
medias = getMedias(folder_path)
for media in medias:
if media['media_id'] in existing_files:
print('Duplicate file detected. Removing...')
os.remove(media['filepath'])
for media in medias:
UploadMedia(media)
if __name__ == '__main__':

@ -0,0 +1,47 @@
from BunnyCDN.Storage import Storage
import config, os, funcs
from PIL import Image
# the hash of the images are different due to optimizer
#obj_storage = Storage('577cb82d-8176-4ccf-935ce0a574bf-fe4c-4012', 'altpins')
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
db, cursor = config.gen_connection()
cursor.execute("SELECT id, media_id, media_url FROM media WHERE width = 0;")
results = cursor.fetchall()
count = 0
print(f"Found {len(results)} files to process.")
cacheDir = 'cache'
for result in results:
count += 1
videoID = result[0]
mediaID = result[1]
mediaURL = result[2]
extension = mediaURL.split('.')[-1]
serverPath = result[2].replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))
if os.path.exists(localFilePath):
print(f"File already exists: {localFilePath}")
else:
obj_storage.DownloadFile(storage_path=serverPath, download_path=cacheDir)
mediaType = funcs.get_media_type(localFilePath)
if mediaType == 'image':
with Image.open(localFilePath) as img:
width, height = img.size
elif mediaType == 'video':
width, height = funcs.get_video_dimensions(localFilePath)
cursor.execute("UPDATE media SET width = %s, height=%s WHERE id = %s;", (width, height, videoID))
db.commit()
print(f"[{count}/{len(results)}] width: {width}, height: {height} {cursor.rowcount}")

@ -0,0 +1,63 @@
from BunnyCDN.Storage import Storage
import config, os, cv2
from concurrent.futures import ThreadPoolExecutor
# this script will take a screenshot of the first frame of each video and upload it as a thumbnail to BunnyCDN
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
db, cursor = config.gen_connection()
cursor.execute("SELECT id, media_id, media_url FROM media WHERE media_type = 'video' AND thumbnail IS NULL and status = 'public';")
results = cursor.fetchall()
count = 0
print(f"Found {len(results)} files to process.")
cacheDir = 'cache'
def DownloadFile(serverPath, cacheDir):
localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))
if os.path.exists(localFilePath):
print(f"File already exists: {localFilePath}")
return localFilePath
obj_storage.DownloadFile(storage_path=serverPath, download_path=cacheDir)
print(f"Downloaded {serverPath} to {localFilePath}")
return localFilePath
def ImportMedias():
with ThreadPoolExecutor(max_workers=10) as executor:
for video in results:
serverPath = video[2].replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
executor.submit(DownloadFile, serverPath, cacheDir)
for result in results:
count += 1
itemID = result[0]
mediaID = result[1]
mediaURL = result[2]
extension = mediaURL.split('.')[-1]
serverPath = result[2].replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))
filePath = DownloadFile(serverPath, cacheDir)
cap = cv2.VideoCapture(localFilePath)
ret, frame = cap.read()
cv2.imwrite('thumbnail.jpg', frame)
cap.release()
thumbnailURL = f"https://storysave.b-cdn.net/thumbnails/{itemID}.jpg"
obj_storage.PutFile('thumbnail.jpg', f'thumbnails/{itemID}.jpg')
cursor.execute("UPDATE media SET thumbnail = %s WHERE id = %s;", (thumbnailURL, itemID))
db.commit()
print(f"[{count}/{len(results)}] thumbnail: {thumbnailURL} {cursor.rowcount}")
Loading…
Cancel
Save