new altpins update
parent
1d8bb3c85f
commit
e9f3404d1c
@ -1,25 +1,25 @@
|
||||
# Content
|
||||
storysaver/
|
||||
facebook/
|
||||
media/
|
||||
cache/
|
||||
temp/
|
||||
*.pyc
|
||||
/old_altpins_cache
|
||||
/__pycache__
|
||||
/STORAGE_IMPORTED
|
||||
/STREAM_VIDEOS
|
||||
/STREAM_VIDEOS_IMPORTED
|
||||
/STORAGE
|
||||
/other
|
||||
/Sort
|
||||
# python files
|
||||
*.pyc
|
||||
/images
|
||||
/sortlater
|
||||
/videos
|
||||
/duplicates
|
||||
/ready_to_upload
|
||||
/archive
|
||||
/images
|
||||
/images
|
||||
/sorted
|
||||
__pycache__
|
||||
|
||||
# Content
|
||||
storysaver
|
||||
facebook
|
||||
media
|
||||
cache
|
||||
temp
|
||||
ready_to_upload
|
||||
check_if_exists
|
||||
data
|
||||
media
|
||||
old_snapchats
|
||||
OnlyFans
|
||||
|
||||
|
||||
process
|
||||
processed_tiktoks
|
||||
screenshot_stories
|
||||
snapchat
|
||||
snapchat_new
|
||||
sorted
|
||||
uploadlater
|
||||
@ -1,24 +0,0 @@
|
||||
import config
|
||||
|
||||
altpins_db, altpins_cursor = config.altpins_gen_connection()
|
||||
db, cursor = config.gen_connection()
|
||||
|
||||
altpins_cursor.execute("SELECT id, title, hash, url FROM pins WHERE hash IS NOT NULL;")
|
||||
altpins_results = { (row[1], row[2]): (row[0], row[3]) for row in altpins_cursor.fetchall() }
|
||||
|
||||
cursor.execute("SELECT id, username, hash, media_url FROM media WHERE hash IS NOT NULL;")
|
||||
media_results = { (row[1], row[2]): (row[0], row[3]) for row in cursor.fetchall() }
|
||||
|
||||
common_items = set(altpins_results.keys()) & set(media_results.keys())
|
||||
|
||||
for title, hash_value in common_items:
|
||||
altpins_id, altpins_url = altpins_results[(title, hash_value)]
|
||||
media_id, media_url = media_results[(title, hash_value)]
|
||||
|
||||
print(f"Found a match for hash {hash_value} with title {title}")
|
||||
print(f"Altpins URL: {altpins_url}")
|
||||
print(f"Media URL: {media_url}")
|
||||
|
||||
altpins_cursor.execute("DELETE FROM pins WHERE id = %s;", [altpins_id])
|
||||
altpins_db.commit()
|
||||
print(f"Deleted pin {altpins_id}. {altpins_cursor.rowcount} rows affected")
|
||||
@ -1,17 +0,0 @@
|
||||
you have not failed enough
|
||||
you care what other people think
|
||||
you think youre smarter than everyone
|
||||
you lack curiousity
|
||||
you dont ask enough qestions
|
||||
you cant handle the truth
|
||||
you dont see opportunities
|
||||
|
||||
resell to the people you already sold
|
||||
staff either save you time or make you money
|
||||
sell on people's weaknesses, insecurities and addictions
|
||||
make people think they NEED your product
|
||||
any business can be started with no money
|
||||
business is money IN and not money OUT
|
||||
take money, not make money
|
||||
use whatever you've got
|
||||
act with speed
|
||||
@ -1,99 +0,0 @@
|
||||
fix recorder
|
||||
recover missing streams
|
||||
re-encode all videos to 10 seconds
|
||||
scan last dood videos with missing download_link
|
||||
delete all "deleted" self-hosted videosdump all useless self-hosted videos to mix/dood
|
||||
fix camsrip crawler
|
||||
move camsrip to new server
|
||||
finish converting download_link faster
|
||||
check failed mixdrop uploads
|
||||
add cache for .ts files bunny/nvme
|
||||
manage all domains
|
||||
pay for onlycats domain
|
||||
onlyfans crawler
|
||||
telegram crawler
|
||||
optimize instagram crawler
|
||||
do ethernet cables
|
||||
get access to xn
|
||||
paint light switches/phone case/pc cases
|
||||
microscope shorts
|
||||
fix / sell scooter
|
||||
paperless ngx
|
||||
do hand scan
|
||||
go to psychiatrist
|
||||
do general checkup on body
|
||||
fix and brush teeth
|
||||
SFP and NTP
|
||||
phising ig
|
||||
xss tate shop
|
||||
finish and improve opsec
|
||||
delete internet t
|
||||
clean cry
|
||||
warm up pay
|
||||
install wasabi
|
||||
install / try gaming linux
|
||||
finish atrazat on
|
||||
set up nas
|
||||
dump last stories
|
||||
photoshop originals
|
||||
finish ab recoverer/cleaner
|
||||
fix controller
|
||||
fix hdd 100% load on video server
|
||||
replace exoclick
|
||||
fake comments bot
|
||||
advanced tags/streamer data bot
|
||||
self host all thumbs with bunny
|
||||
reupload all dmca'd videos with new id's
|
||||
generate shorts
|
||||
use user's tokens to record private shows
|
||||
create alert system
|
||||
set up streaming server
|
||||
minimize amount of scripts i need
|
||||
normalize database
|
||||
load balancers for web server
|
||||
set up recu downloader
|
||||
handle premium expired / purchases and upgrades
|
||||
create bunny-like database and api for videos
|
||||
save file sizes for videos
|
||||
add payment options like paypal, usdt and more
|
||||
re-generate thumbs for all videos self-hosted
|
||||
download all mixdrop/dood/xpo videos
|
||||
add streamate and cherrytv to recorder and website
|
||||
delete stripchat dupes
|
||||
delete "fav" dupes
|
||||
blacklist ruta and other dmca agencies's crawlers
|
||||
send emails to potential premiums
|
||||
fix streamers db having 2 queries with and wuthout gender
|
||||
create storage manager for recorder
|
||||
visualize nginx logs to track dmca bots
|
||||
append all cutoff streams
|
||||
add ssh keys
|
||||
|
||||
|
||||
frontend:
|
||||
add forums
|
||||
add width sections for video player
|
||||
coins/credit system (basically affiliate)
|
||||
enable user uploaded content
|
||||
performer accounts
|
||||
advanced search system
|
||||
affiliate system - optimize and create a panel where i can easily manage all
|
||||
sort by dmca and most popular on /profile
|
||||
change comments, follow and save to js
|
||||
add payment options
|
||||
optimize history/following
|
||||
create contests and affiliates for premium
|
||||
"copy" saved videos
|
||||
keep views uncached on main page
|
||||
add heatmap for player
|
||||
fix missing animated thumbs in saved page
|
||||
fix duplicates in saved videos page
|
||||
add ip logging for security
|
||||
require phone numbers for logging in?
|
||||
add recu affiliate?
|
||||
fix history dupes
|
||||
try node.js to get the mp4 url from mixdrop
|
||||
add profile pictures in search
|
||||
add collections
|
||||
mark premium videos
|
||||
add credit card payment with skrill or others
|
||||
@ -1,62 +0,0 @@
|
||||
import os
|
||||
import tarfile
|
||||
from datetime import datetime
|
||||
import sys # Import sys for command line arguments
|
||||
from BunnyCDN.Storage import Storage
|
||||
|
||||
def is_hidden(path):
|
||||
"""
|
||||
Check if the given path is a hidden folder or file.
|
||||
"""
|
||||
return path.startswith('.') or '/.' in path
|
||||
|
||||
def should_exclude(path, excluded_items):
|
||||
"""
|
||||
Check if the given path should be excluded.
|
||||
"""
|
||||
if is_hidden(path):
|
||||
return True
|
||||
for item in excluded_items:
|
||||
if path.startswith(item):
|
||||
return True
|
||||
return False
|
||||
|
||||
def backup(folder_path, excluded_folders=[], excluded_files=[]):
|
||||
"""
|
||||
Create a compressed backup of the specified folder, excluding specified items and hidden folders.
|
||||
"""
|
||||
timestamp = int(datetime.timestamp(datetime.now()))
|
||||
backup_file = os.path.join(folder_path, f'backup-{timestamp}.tar.gz')
|
||||
|
||||
with tarfile.open(backup_file, "w:gz") as tar:
|
||||
for root, dirs, file_names in os.walk(folder_path):
|
||||
if should_exclude(root, excluded_folders):
|
||||
continue
|
||||
|
||||
for file_name in file_names:
|
||||
file_path = os.path.join(root, file_name)
|
||||
|
||||
if should_exclude(file_path, excluded_files):
|
||||
continue
|
||||
|
||||
print("Adding %s" % file_path)
|
||||
tar.add(file_path, arcname=os.path.relpath(file_path, start=folder_path))
|
||||
|
||||
return backup_file
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 2:
|
||||
print("Usage: python script.py <folder_path>")
|
||||
sys.exit(1)
|
||||
|
||||
folder_path = sys.argv[1]
|
||||
|
||||
if not os.path.isdir(folder_path):
|
||||
print(f"Error: The folder '{folder_path}' does not exist.")
|
||||
sys.exit(1)
|
||||
|
||||
backup_file = backup(folder_path)
|
||||
|
||||
obj_storage = Storage('99f4c72b-2674-4e6a-a1825c269cc0-b959-48a1', 'ab-backups')
|
||||
obj_storage.PutFile(backup_file, f'backups/{os.path.basename(backup_file)}')
|
||||
print("Backup and upload successful.")
|
||||
@ -0,0 +1,79 @@
|
||||
import os, requests, config
|
||||
from snapchat import get_stories, get_highlight_stories, get_all_users_data
|
||||
|
||||
def get_file_extension(url):
|
||||
response = requests.head(url)
|
||||
if response.status_code != 200:
|
||||
print(f"Failed to access media {url}")
|
||||
return None
|
||||
|
||||
content_type = response.headers.get('Content-Type', '')
|
||||
if 'image' in content_type:
|
||||
return '.jpg'
|
||||
elif 'video' in content_type:
|
||||
return '.mp4'
|
||||
else:
|
||||
print(f"Unknown content type for media {url}")
|
||||
return None
|
||||
|
||||
import re
|
||||
def extract_file_type(url):
|
||||
# Use a regular expression to extract the file type number
|
||||
match = re.search(r"/d/[^.]+\.([0-9]+)\.", url)
|
||||
if match:
|
||||
return match.group(1) # Return the number as a string
|
||||
return None
|
||||
|
||||
def map_file_type_to_extension(urls):
|
||||
file_type_to_extension = {}
|
||||
seen_file_types = set()
|
||||
|
||||
for url in urls:
|
||||
# Extract the file type number
|
||||
file_type_number = extract_file_type(url)
|
||||
if not file_type_number:
|
||||
continue
|
||||
|
||||
# Skip if we've already checked this file type
|
||||
if file_type_number in seen_file_types:
|
||||
continue
|
||||
|
||||
# Use the get_file_extension function to determine the extension
|
||||
file_extension = get_file_extension(url)
|
||||
if file_extension:
|
||||
file_type_to_extension[file_type_number] = file_extension
|
||||
seen_file_types.add(file_type_number)
|
||||
|
||||
return file_type_to_extension
|
||||
|
||||
def main():
|
||||
cursor.execute("SELECT username FROM following WHERE platform = 'snapchat' ORDER BY id DESC")
|
||||
usernames = [row[0] for row in cursor.fetchall()]
|
||||
|
||||
snapchat_users_data = get_all_users_data(usernames)
|
||||
|
||||
all_stories = [get_stories(data) + get_highlight_stories(data) for data in snapchat_users_data.values()]
|
||||
|
||||
processed_stories = []
|
||||
for stories in all_stories:
|
||||
processed_stories.extend(stories)
|
||||
|
||||
all_urls = [story['url'] for story in processed_stories]
|
||||
|
||||
# Map file type numbers to extensions
|
||||
file_type_to_extension = map_file_type_to_extension(all_urls)
|
||||
|
||||
# Print the mapping
|
||||
print("File Type to Extension Mapping:")
|
||||
for file_type, extension in file_type_to_extension.items():
|
||||
print(f"File Type {file_type}: {extension}")
|
||||
|
||||
if __name__ == '__main__':
|
||||
print('Starting snappy...')
|
||||
|
||||
db, cursor = config.gen_connection()
|
||||
obj_storage = config.get_storage()
|
||||
|
||||
main()
|
||||
|
||||
print("Processing completed.")
|
||||
@ -1,99 +0,0 @@
|
||||
from BunnyCDN.Storage import Storage
|
||||
import os, uuid, config, funcs
|
||||
from datetime import datetime
|
||||
from PIL import Image
|
||||
|
||||
def dump_facebook(folder_path):
|
||||
for filename in os.listdir(folder_path):
|
||||
if os.path.isdir(os.path.join(folder_path, filename)):
|
||||
continue
|
||||
|
||||
username = filename.split("'")[0]
|
||||
|
||||
filepath = os.path.join(folder_path, filename)
|
||||
|
||||
mediatype = funcs.get_media_type(filename)
|
||||
post_type = funcs.determine_post_type(filepath, mediatype)
|
||||
|
||||
upload_file(username=username, media_type=mediatype, filepath=filepath, post_type=post_type)
|
||||
|
||||
for folder in os.listdir(folder_path):
|
||||
if os.path.isdir(os.path.join(folder_path, folder)):
|
||||
username = folder
|
||||
|
||||
for filename in os.listdir(os.path.join(folder_path, folder)):
|
||||
filepath = os.path.join(folder_path, folder, filename)
|
||||
|
||||
mediatype = funcs.get_media_type(filename)
|
||||
post_type = funcs.determine_post_type(filepath, mediatype)
|
||||
|
||||
upload_file(username=username, media_type=mediatype, filepath=filepath, post_type=post_type)
|
||||
|
||||
def upload_file(filepath, username, media_type='image', post_type='story', timestamp=None, user_id=None):
|
||||
filename = os.path.basename(filepath)
|
||||
file_extension = os.path.splitext(filename)[1].lower()
|
||||
|
||||
file_hash = funcs.calculate_file_hash(filepath)
|
||||
|
||||
if file_hash in existing_files:
|
||||
print('Duplicate file detected. Removing...')
|
||||
os.remove(filepath)
|
||||
return False
|
||||
|
||||
duration = funcs.get_video_duration(filepath) if media_type == 'video' else 0
|
||||
|
||||
if "FB_IMG" in filename: media_id = filename.split("_")[2].split(".")[0]
|
||||
else: media_id = uuid.uuid4().hex
|
||||
|
||||
dirtype = funcs.determine_post_type(filepath, media_type)
|
||||
server_path = os.path.join('media', dirtype, username, f'{media_id}{file_extension}')
|
||||
|
||||
obj_storage.PutFile(filepath, server_path)
|
||||
|
||||
file_url = f"https://storysave.b-cdn.net/{server_path}"
|
||||
|
||||
if media_type == 'image':
|
||||
with Image.open(filepath) as img:
|
||||
width, height = img.size
|
||||
else:
|
||||
width, height = funcs.get_video_dimensions(filepath)
|
||||
|
||||
post_date = datetime.fromtimestamp(int(timestamp)) if timestamp else datetime.now()
|
||||
|
||||
if post_type == 'stories':
|
||||
post_type = 'story'
|
||||
else:
|
||||
post_type = 'post'
|
||||
|
||||
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, post_type, date, user_id, platform, hash, filename, duration) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
|
||||
values = (username, media_type, file_url, width, height, post_type, post_date, user_id, 'facebook', file_hash, filename, duration)
|
||||
|
||||
try:
|
||||
newCursor.execute(query, values)
|
||||
newDB.commit()
|
||||
print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}')
|
||||
except Exception as e:
|
||||
print(f"Database error: {e}")
|
||||
return False
|
||||
|
||||
try:
|
||||
if newCursor.rowcount > 0:
|
||||
os.remove(filepath)
|
||||
except Exception as e:
|
||||
print(f"Failed to remove local file {filepath}: {e}")
|
||||
|
||||
return True
|
||||
|
||||
if __name__ == '__main__':
|
||||
print('Starting processing...')
|
||||
|
||||
newDB, newCursor = config.gen_connection()
|
||||
|
||||
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
|
||||
|
||||
newCursor.execute("SELECT hash FROM media WHERE platform='facebook' AND hash IS NOT NULL")
|
||||
existing_files = [image[0] for image in newCursor.fetchall()]
|
||||
|
||||
dump_facebook('facebook/')
|
||||
|
||||
print("Processing completed.")
|
||||
@ -1,19 +0,0 @@
|
||||
from BunnyCDN.Storage import Storage
|
||||
import dump_instagram as storysaver
|
||||
import time, config
|
||||
|
||||
if __name__ == '__main__':
|
||||
print('Starting processing...')
|
||||
|
||||
newDB, newCursor = config.gen_connection()
|
||||
|
||||
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
|
||||
|
||||
newCursor.execute("SELECT media_id FROM media WHERE platform='instagram' AND media_id IS NOT NULL")
|
||||
existing_files = [image[0] for image in newCursor.fetchall()]
|
||||
|
||||
while True:
|
||||
print("Processing...")
|
||||
storysaver.dump_instagram('storysaver/')
|
||||
print("Processing completed.")
|
||||
time.sleep(15)
|
||||
@ -1,133 +0,0 @@
|
||||
from BunnyCDN.Storage import Storage
|
||||
from datetime import datetime
|
||||
import os, config, funcs, cv2
|
||||
from PIL import Image
|
||||
|
||||
|
||||
def UploadMedia(media):
|
||||
media_id = media['media_id']
|
||||
username = media['username']
|
||||
timestamp = media['timestamp']
|
||||
user_id = media['user_id']
|
||||
filepath = media['filepath']
|
||||
|
||||
filename = os.path.basename(filepath)
|
||||
file_extension = os.path.splitext(filename)[1].lower()
|
||||
|
||||
media_type = funcs.get_media_type(filename)
|
||||
|
||||
post_type = funcs.determine_post_type(filepath, media_type)
|
||||
|
||||
file_hash = funcs.calculate_file_hash(filepath)
|
||||
|
||||
duration = funcs.get_video_duration(filepath) if media_type == 'video' else 0
|
||||
|
||||
post_date = datetime.fromtimestamp(int(timestamp)) if timestamp else datetime.now()
|
||||
|
||||
width, height = funcs.get_video_dimensions(filepath) if media_type == 'video' else Image.open(filepath).size
|
||||
|
||||
thumbnail_url = None
|
||||
if media_type == 'video':
|
||||
try:
|
||||
thumbPath = f'temp/{media_id}.jpg'
|
||||
cap = cv2.VideoCapture(filepath)
|
||||
ret, frame = cap.read()
|
||||
cv2.imwrite(thumbPath, frame)
|
||||
cap.release()
|
||||
obj_storage.PutFile(thumbPath, f'thumbnails/{media_id}.jpg')
|
||||
thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{media_id}.jpg"
|
||||
except:
|
||||
print('Error generating thumbnail. Skipping...')
|
||||
return False
|
||||
|
||||
server_path = f'media/{post_type}/{username}/{media_id}{file_extension}'
|
||||
|
||||
file_url = f"https://storysave.b-cdn.net/{server_path}"
|
||||
|
||||
if user_id and 'highlight' in user_id:
|
||||
highlight_id = user_id.replace('highlight', '')
|
||||
user_id = None
|
||||
|
||||
try:
|
||||
newCursor.execute("SELECT user_id FROM media WHERE username=%s", (username,))
|
||||
user_id = newCursor.fetchall()[0][0]
|
||||
except:
|
||||
print(f'User {username} not found in database. Skipping...')
|
||||
user_id = None
|
||||
|
||||
newCursor.execute("INSERT IGNORE INTO highlights (highlight_id, user_id, media_id) VALUES (%s, %s, %s)", (highlight_id, user_id, media_id))
|
||||
newDB.commit()
|
||||
|
||||
print(f'[{newCursor.rowcount}] added highlight {highlight_id} to user {user_id}')
|
||||
|
||||
obj_storage.PutFile(filepath, server_path)
|
||||
|
||||
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, media_id, post_type, date, user_id, hash, filename, duration, thumbnail) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
|
||||
values = (username, media_type, file_url, width, height, media_id, post_type, post_date, user_id, file_hash, filename, duration, thumbnail_url)
|
||||
|
||||
newCursor.execute(query, values)
|
||||
newDB.commit()
|
||||
print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}')
|
||||
|
||||
os.remove(filepath)
|
||||
|
||||
return True
|
||||
|
||||
def getMedias(folder_path):
|
||||
medias = []
|
||||
for filename in os.listdir(folder_path):
|
||||
parts = filename.split('~')
|
||||
if len(parts) < 4:
|
||||
continue
|
||||
|
||||
username = parts[0]
|
||||
timestamp = parts[1]
|
||||
media_id = parts[2]
|
||||
user_id = parts[3].split('_')[-1].split('.')[0]
|
||||
|
||||
filepath = os.path.join(folder_path, filename)
|
||||
|
||||
if not media_id:
|
||||
print(f'Invalid media_id for file {filename}. Skipping...')
|
||||
continue
|
||||
|
||||
try:media_id = int(media_id)
|
||||
except:
|
||||
print(f'Invalid media_id for file {filename}. Skipping...')
|
||||
continue
|
||||
|
||||
data = {
|
||||
'username': username,
|
||||
'timestamp': timestamp,
|
||||
'media_id': media_id,
|
||||
'user_id': user_id,
|
||||
'filepath': filepath
|
||||
}
|
||||
medias.append(data)
|
||||
return medias
|
||||
|
||||
def dump_instagram(folder_path):
|
||||
medias = getMedias(folder_path)
|
||||
|
||||
for media in medias:
|
||||
if media['media_id'] in existing_files:
|
||||
print('Duplicate file detected. Removing...')
|
||||
os.remove(media['filepath'])
|
||||
|
||||
for media in medias:
|
||||
UploadMedia(media)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
print('Starting processing...')
|
||||
|
||||
newDB, newCursor = config.gen_connection()
|
||||
|
||||
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
|
||||
|
||||
newCursor.execute("SELECT media_id FROM media WHERE platform='instagram' AND media_id IS NOT NULL")
|
||||
existing_files = [image[0] for image in newCursor.fetchall()]
|
||||
|
||||
dump_instagram('storysaver/')
|
||||
|
||||
print("Processing completed.")
|
||||
@ -1,82 +0,0 @@
|
||||
from BunnyCDN.Storage import Storage
|
||||
from datetime import datetime
|
||||
import os, config, funcs
|
||||
from PIL import Image
|
||||
|
||||
def dump_instagram(folder_path):
|
||||
for filename in os.listdir(folder_path):
|
||||
parts = filename.split('_')
|
||||
|
||||
try:
|
||||
username = '_'.join(parts[:-2]) # Join all except last two
|
||||
timestamp = int(parts[-2]) # Second last is timestamp
|
||||
user_id = int(parts[-1].split('.')[0]) # Last part before extension is user_id
|
||||
except ValueError as e:
|
||||
print(f"Invalid filename: {filename}. Error: {e}")
|
||||
continue
|
||||
|
||||
filepath = os.path.join(folder_path, filename)
|
||||
|
||||
mediatype = funcs.get_media_type(filename)
|
||||
post_type = funcs.determine_post_type(filepath, mediatype)
|
||||
|
||||
UploadMedia(username=username, media_type=mediatype, filepath=filepath, post_type=post_type, timestamp=timestamp, user_id=user_id)
|
||||
|
||||
|
||||
def UploadMedia(filepath, username, media_type='image', post_type='story', timestamp=None, user_id=None):
|
||||
if 'tero' in username:
|
||||
pass
|
||||
|
||||
filename = os.path.basename(filepath)
|
||||
file_extension = os.path.splitext(filename)[1].lower()
|
||||
|
||||
file_hash = funcs.calculate_file_hash(filepath)
|
||||
|
||||
duration = funcs.get_video_duration(filepath) if media_type == 'video' else 0
|
||||
|
||||
post_date = datetime.fromtimestamp(int(timestamp)) if timestamp else datetime.now()
|
||||
|
||||
dirtype = funcs.determine_post_type(filepath, media_type)
|
||||
|
||||
server_path = f'media/{dirtype}/{username}/{file_hash}{file_extension}'
|
||||
|
||||
file_url = f"https://storysave.b-cdn.net/{server_path}"
|
||||
|
||||
if file_hash in existing_files:
|
||||
print('Duplicate file detected. Removing...')
|
||||
os.remove(filepath)
|
||||
return True
|
||||
|
||||
obj_storage.PutFile(filepath, server_path)
|
||||
|
||||
if media_type == 'image':
|
||||
with Image.open(filepath) as img:
|
||||
width, height = img.size
|
||||
else:
|
||||
width, height = funcs.get_video_dimensions(filepath)
|
||||
|
||||
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, post_type, date, user_id, hash, filename, duration) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
|
||||
values = (username, media_type, file_url, width, height, post_type, post_date, user_id, file_hash, filename, duration)
|
||||
|
||||
newCursor.execute(query, values)
|
||||
newDB.commit()
|
||||
print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}')
|
||||
|
||||
os.remove(filepath)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
print('Starting processing...')
|
||||
|
||||
newDB, newCursor = config.gen_connection()
|
||||
|
||||
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
|
||||
|
||||
newCursor.execute("SELECT hash FROM media WHERE platform='instagram' AND hash IS NOT NULL")
|
||||
existing_files = [image[0] for image in newCursor.fetchall()]
|
||||
|
||||
dump_instagram('storysaver/missing/')
|
||||
|
||||
print("Processing completed.")
|
||||
@ -1,67 +0,0 @@
|
||||
from BunnyCDN.Storage import Storage
|
||||
import os, uuid, config, funcs
|
||||
from datetime import datetime
|
||||
from PIL import Image
|
||||
|
||||
def dump_facebook(folder_path):
|
||||
for folder in os.listdir(folder_path):
|
||||
if os.path.isdir(os.path.join(folder_path, folder)):
|
||||
username = folder
|
||||
|
||||
for filename in os.listdir(os.path.join(folder_path, folder)):
|
||||
filepath = os.path.join(folder_path, folder, filename)
|
||||
|
||||
upload_file(username=username, filepath=filepath)
|
||||
|
||||
def upload_file(filepath, username):
|
||||
filename = os.path.basename(filepath)
|
||||
media_id = filename.split('.')[0]
|
||||
|
||||
file_extension = os.path.splitext(filename)[1].lower()
|
||||
|
||||
media_type = funcs.get_media_type(filename)
|
||||
|
||||
file_hash = funcs.calculate_file_hash(filepath)
|
||||
|
||||
duration = funcs.get_video_duration(filepath) if media_type == 'video' else 0
|
||||
|
||||
width, height = funcs.get_video_dimensions(filepath) if media_type == 'video' else Image.open(filepath).size
|
||||
|
||||
|
||||
dirtype = funcs.determine_post_type(filepath, media_type)
|
||||
server_path = os.path.join('media', dirtype, username, f'{media_id}{file_extension}')
|
||||
|
||||
obj_storage.PutFile(filepath, server_path)
|
||||
|
||||
file_url = f"https://storysave.b-cdn.net/{server_path}"
|
||||
|
||||
if file_hash in existing_files:
|
||||
print('Duplicate file detected. Removing...')
|
||||
os.remove(filepath)
|
||||
return False
|
||||
|
||||
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, platform, hash, filename, duration, media_id) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
|
||||
values = (username, media_type, file_url, width, height, 'tiktok', file_hash, filename, duration, media_id)
|
||||
|
||||
newCursor.execute(query, values)
|
||||
newDB.commit()
|
||||
print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}')
|
||||
|
||||
if newCursor.rowcount > 0:
|
||||
os.remove(filepath)
|
||||
|
||||
return True
|
||||
|
||||
if __name__ == '__main__':
|
||||
print('Starting processing...')
|
||||
|
||||
newDB, newCursor = config.gen_connection()
|
||||
|
||||
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
|
||||
|
||||
newCursor.execute("SELECT hash FROM media WHERE platform='tiktok' AND hash IS NOT NULL")
|
||||
existing_files = [image[0] for image in newCursor.fetchall()]
|
||||
|
||||
dump_facebook('tiktok/')
|
||||
|
||||
print("Processing completed.")
|
||||
@ -1,13 +0,0 @@
|
||||
from BunnyCDN.Storage import Storage
|
||||
from datetime import datetime
|
||||
import os, config, funcs
|
||||
from PIL import Image
|
||||
|
||||
if __name__ == '__main__':
|
||||
print('Starting processing...')
|
||||
|
||||
files = os.listdir('')
|
||||
|
||||
for file in files:
|
||||
filePath = os.path.join('storysaver/missing_data/', file)
|
||||
file_hash = funcs.calculate_file_hash(filePath)
|
||||
@ -1,22 +0,0 @@
|
||||
from BunnyCDN.Storage import Storage
|
||||
from datetime import datetime
|
||||
import os, config, funcs
|
||||
from PIL import Image
|
||||
|
||||
if __name__ == '__main__':
|
||||
print('Starting processing...')
|
||||
|
||||
newDB, newCursor = config.gen_connection()
|
||||
|
||||
newCursor.execute("SELECT hash FROM media WHERE platform='instagram' AND hash IS NOT NULL")
|
||||
existing_files = [image[0] for image in newCursor.fetchall()]
|
||||
|
||||
files = os.listdir('storysaver/missing_data/')
|
||||
|
||||
for file in files:
|
||||
filePath = os.path.join('storysaver/missing_data/', file)
|
||||
file_hash = funcs.calculate_file_hash(filePath)
|
||||
|
||||
if file_hash in existing_files:
|
||||
print(f'Duplicate file detected. Removing {filePath}...')
|
||||
os.rename(filePath, f'storysaver/dupes/{file}')
|
||||
@ -1,12 +0,0 @@
|
||||
import os
|
||||
|
||||
def remove_empty_folders(folder):
|
||||
for root, dirs, files in os.walk(folder):
|
||||
for dir in dirs:
|
||||
dirpath = os.path.join(root, dir)
|
||||
if not os.listdir(dirpath):
|
||||
print(f"Removing empty folder {dirpath}")
|
||||
os.rmdir(dirpath)
|
||||
|
||||
folder = 'media'
|
||||
remove_empty_folders(folder)
|
||||
@ -1,10 +0,0 @@
|
||||
ChallengeResolve: Unknown step_name "submit_phone" for "olivercury" in challenge resolver: {'step_name': 'submit_phone', 'step_data': {'phone_number': '+972522618221', 'show_whatsapp_otp_choice': True, 'whatsapp': False}, 'flow_render_type': 3, 'bloks_action': 'com.instagram.challenge.navigation.take_challenge', 'cni': 18436897147040850, 'challenge_context': 'Af6pVKkiomiOMxWvLzouGukazqMMhFbzNERezSMhBU-dHrO_DNGfTJpUPp8-di6HHm8WfAfL6_PQaLkV6sOkb6CC68ugfQtLMd3OgMVasZkOI5O6YdnoqMtBzNBGd944VtUNEEkl9bNVM5yQbfMskCuKTUf7AQOIYD2zEuvd8wC-AUBPziP105a1xq3GbaSeyJ9QnEJHHWgpFenBURUNbdLvQ9lzs5j62zCxo_0fe4Fw', 'challenge_type_enum_str': 'SMS', 'status': 'ok'}
|
||||
requests.exceptions.HTTPError: 401 Client Error: Unauthorized for url: https://www.instagram.com/graphql/query/?variables=%7B%22user_id%22%3A%226208321762%22%2C%22include_reel%22%3Atrue%7D&query_hash=ad99dd9d3646cc3c0dda65debcd266a7
|
||||
|
||||
During handling of the above exception, another exception occurred:
|
||||
|
||||
instagrapi.exceptions.ClientUnauthorizedError: 401 Client Error: Unauthorized for url: https://www.instagram.com/graphql/query/?variables=%7B%22user_id%22%3A%226208321762%22%2C%22include_reel%22%3Atrue%7D&query_hash=ad99dd9d3646cc3c0dda65debcd266a7
|
||||
|
||||
During handling of the above exception, another exception occurred:
|
||||
|
||||
requests.exceptions.HTTPError: 401 Client Error: Unauthorized for url: https://i.instagram.com/api/v1/users/6208321762/info/
|
||||
@ -1,53 +0,0 @@
|
||||
import os
|
||||
import shutil
|
||||
import hashlib
|
||||
|
||||
def clean_empty_folders(directory):
|
||||
for foldername, subfolders, filenames in os.walk(directory, topdown=False):
|
||||
for subfolder in subfolders:
|
||||
folder_path = os.path.join(foldername, subfolder)
|
||||
if not os.listdir(folder_path):
|
||||
os.rmdir(folder_path)
|
||||
print(f"Removed empty folder: {folder_path}")
|
||||
|
||||
def calculate_file_hash(file_path, hash_func='sha256'):
|
||||
h = hashlib.new(hash_func)
|
||||
with open(file_path, 'rb') as file:
|
||||
chunk = file.read(8192)
|
||||
while chunk:
|
||||
h.update(chunk)
|
||||
chunk = file.read(8192)
|
||||
return h.hexdigest()
|
||||
|
||||
def get_media_type(filename):
|
||||
extensions = {
|
||||
'.jpg': 'image', '.jpeg': 'image', '.webp': 'image', '.png': 'image', '.gif': 'image',
|
||||
'.mp4': 'video', '.mov': 'video'
|
||||
}
|
||||
for ext, media_type in extensions.items():
|
||||
if filename.lower().endswith(ext):
|
||||
return media_type
|
||||
return None
|
||||
|
||||
def move_files(source_root, destination_root):
|
||||
for root, dirs, files in os.walk(source_root):
|
||||
for file in files:
|
||||
if "~" in file or 'FB_IMG' in file or 's instagram' in file:
|
||||
username = file.split("'")[0]
|
||||
source_path = os.path.join(root, file)
|
||||
rel_path = os.path.relpath(root, source_root)
|
||||
destination_path = os.path.join(destination_root, username, rel_path)
|
||||
|
||||
if not os.path.exists(destination_path):
|
||||
os.makedirs(destination_path)
|
||||
|
||||
shutil.move(source_path, os.path.join(destination_path, file))
|
||||
print(f"Moved {file} to {destination_path}")
|
||||
|
||||
if __name__ == '__main__':
|
||||
print('Starting processing...')
|
||||
source_directory = 'StorySave_Sort/Sort/StorySave'
|
||||
destination_directory = 'StorySave_Sort/Final/Stories'
|
||||
move_files(source_directory, destination_directory)
|
||||
clean_empty_folders(source_directory)
|
||||
print("Processing completed.")
|
||||
@ -1,85 +0,0 @@
|
||||
import os
|
||||
import config
|
||||
import cv2
|
||||
from funcs import get_files # Assuming this is defined elsewhere
|
||||
import imagehash
|
||||
from PIL import Image
|
||||
|
||||
def generate_thumbnail_phash(filepath, hash_size=8): # Set hash_size to 8
|
||||
cap = cv2.VideoCapture(filepath)
|
||||
ret, frame = cap.read()
|
||||
cap.release()
|
||||
|
||||
if not ret:
|
||||
print(f"Error reading frame from {filepath}")
|
||||
return None
|
||||
|
||||
# Resize frame to a standard size
|
||||
standard_size = (320, 240)
|
||||
resized_frame = cv2.resize(frame, standard_size, interpolation=cv2.INTER_AREA)
|
||||
|
||||
# Convert OpenCV image (BGR) to PIL Image (RGB)
|
||||
image_rgb = cv2.cvtColor(resized_frame, cv2.COLOR_BGR2RGB)
|
||||
pil_image = Image.fromarray(image_rgb)
|
||||
|
||||
# Compute pHash
|
||||
phash = imagehash.phash(pil_image, hash_size=hash_size)
|
||||
|
||||
return phash
|
||||
|
||||
def are_phashes_duplicates(phash1, phash2, threshold=5):
|
||||
# Compute Hamming distance between the pHashes
|
||||
try:
|
||||
distance = phash1 - phash2
|
||||
except TypeError as e:
|
||||
print(f"Error comparing pHashes: {e}")
|
||||
return False
|
||||
|
||||
return distance <= threshold
|
||||
|
||||
def get_media_by_phash(phash, username, existing_medias, threshold=5):
|
||||
for media in existing_medias:
|
||||
existing_phash_str = media[1]
|
||||
existing_username = media[2]
|
||||
if existing_username != username:
|
||||
continue
|
||||
|
||||
# Convert stored phash string to ImageHash object
|
||||
existing_phash = imagehash.hex_to_hash(existing_phash_str)
|
||||
|
||||
if are_phashes_duplicates(phash, existing_phash, threshold=threshold):
|
||||
return media
|
||||
return None
|
||||
|
||||
# Database connection
|
||||
db, cursor = config.gen_connection()
|
||||
|
||||
# Fetch existing videos with pHashes
|
||||
cursor.execute("SELECT id, phash, username FROM media WHERE media_type = %s AND phash IS NOT NULL", ['video'])
|
||||
existing_medias = cursor.fetchall()
|
||||
|
||||
users = os.listdir('videos')
|
||||
for username in users:
|
||||
user_videos_path = os.path.join('videos', username)
|
||||
if not os.path.isdir(user_videos_path):
|
||||
continue
|
||||
|
||||
videos = os.listdir(user_videos_path)
|
||||
for video in videos:
|
||||
print(f'Processing {video}...')
|
||||
filepath = os.path.join(user_videos_path, video)
|
||||
|
||||
phash = generate_thumbnail_phash(filepath, hash_size=8) # Use hash_size=8
|
||||
if phash is None:
|
||||
continue
|
||||
|
||||
phash_str = str(phash)
|
||||
|
||||
duplicate_media = get_media_by_phash(phash, username, existing_medias, threshold=5)
|
||||
if duplicate_media:
|
||||
print(f'Duplicate url found: https://altpins.com/pin/{duplicate_media[0]}')
|
||||
print(f'Duplicate video path: {filepath}')
|
||||
newpath = filepath.replace('videos', 'duplicates')
|
||||
os.makedirs(os.path.dirname(newpath), exist_ok=True)
|
||||
os.rename(filepath, newpath)
|
||||
print(f'Moved {video} to duplicates/')
|
||||
@ -1,2 +0,0 @@
|
||||
https://www.instagram.com/anya_shtril/
|
||||
https://www.instagram.com/anyarodionov/
|
||||
@ -1,40 +0,0 @@
|
||||
import config, os, json
|
||||
from PIL import Image
|
||||
import imagehash
|
||||
|
||||
def find_file(filename, directory):
|
||||
filename = filename.lower().split('.')[0]
|
||||
for root, dirs, files in os.walk(directory):
|
||||
for file in files:
|
||||
if filename in file:
|
||||
return os.path.join(root, file)
|
||||
return None
|
||||
|
||||
def generate_phash(image_path):
|
||||
image = Image.open(image_path)
|
||||
return str(imagehash.phash(image))
|
||||
|
||||
count = 0
|
||||
|
||||
cacheDir = 'sorted'
|
||||
dataPath = 'pins.json'
|
||||
|
||||
os.makedirs(cacheDir, exist_ok=True)
|
||||
|
||||
medias = json.load(open(dataPath))
|
||||
|
||||
for item in medias:
|
||||
count += 1
|
||||
|
||||
filepath = item['filepath']
|
||||
if os.path.exists(filepath):
|
||||
continue
|
||||
|
||||
newfilepath = find_file(os.path.basename(filepath), cacheDir)
|
||||
if newfilepath:
|
||||
print(f"Found file {newfilepath} for {filepath}")
|
||||
item['filepath'] = newfilepath
|
||||
|
||||
|
||||
with open(dataPath, 'w') as f:
|
||||
json.dump(medias, f)
|
||||
@ -1,94 +0,0 @@
|
||||
from BunnyCDN.Storage import Storage
|
||||
from moviepy.editor import VideoFileClip
|
||||
import config
|
||||
import hashlib
|
||||
import requests
|
||||
import os
|
||||
|
||||
def file_hash_from_url(url, hash_algo='sha256'):
|
||||
h = hashlib.new(hash_algo)
|
||||
|
||||
response = requests.get(url, stream=True)
|
||||
|
||||
if response.status_code == 200:
|
||||
for chunk in response.iter_content(8192):
|
||||
h.update(chunk)
|
||||
return h.hexdigest()
|
||||
else:
|
||||
raise Exception(f"Failed to download file: Status code {response.status_code}")
|
||||
|
||||
def get_video_duration(file_path):
|
||||
"""
|
||||
Returns the duration of the video file in seconds.
|
||||
|
||||
:param file_path: Path to the video file
|
||||
:return: Duration in seconds
|
||||
"""
|
||||
try:
|
||||
with VideoFileClip(file_path) as video:
|
||||
return video.duration
|
||||
except:
|
||||
return 0
|
||||
|
||||
def file_hash(filename, hash_algo='sha256'):
|
||||
"""
|
||||
Compute the hash of a file.
|
||||
|
||||
:param filename: Path to the file.
|
||||
:param hash_algo: Hashing algorithm to use (e.g., 'sha256', 'md5').
|
||||
:return: Hexadecimal hash string.
|
||||
"""
|
||||
# Create a hash object
|
||||
h = hashlib.new(hash_algo)
|
||||
|
||||
# Open the file in binary mode and read in chunks
|
||||
with open(filename, 'rb') as file:
|
||||
while chunk := file.read(8192):
|
||||
h.update(chunk)
|
||||
|
||||
# Return the hexadecimal digest of the hash
|
||||
return h.hexdigest()
|
||||
|
||||
# the hash of the images are different due to optimizer
|
||||
|
||||
#obj_storage = Storage('577cb82d-8176-4ccf-935ce0a574bf-fe4c-4012', 'altpins')
|
||||
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
|
||||
|
||||
db, cursor = config.gen_connection()
|
||||
|
||||
cursor.execute("SELECT id, media_id, media_url FROM media WHERE duration = 0 AND media_type = 'video' AND status != 'deleted';")
|
||||
results = cursor.fetchall()
|
||||
|
||||
count = 0
|
||||
print(f"Found {len(results)} files to process.")
|
||||
|
||||
cacheDir = 'cache'
|
||||
for result in results:
|
||||
count += 1
|
||||
videoID = result[0]
|
||||
mediaID = result[1]
|
||||
mediaURL = result[2]
|
||||
extension = mediaURL.split('.')[-1]
|
||||
|
||||
serverPath = result[2].replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
|
||||
|
||||
localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))
|
||||
|
||||
if os.path.exists(localFilePath):
|
||||
print(f"File already exists: {localFilePath}")
|
||||
else:
|
||||
obj_storage.DownloadFile(storage_path=serverPath, download_path=cacheDir)
|
||||
|
||||
duration = get_video_duration(localFilePath)
|
||||
|
||||
if duration == 0:
|
||||
print(f"Failed to get duration for {localFilePath}")
|
||||
continue
|
||||
|
||||
if duration < 1:
|
||||
duration = 1
|
||||
|
||||
cursor.execute("UPDATE media SET duration = %s WHERE id = %s;", (duration, result[0]))
|
||||
db.commit()
|
||||
|
||||
print(f"[{count}/{len(results)}] {result[1]}: {duration}, {cursor.rowcount}")
|
||||
@ -1,47 +0,0 @@
|
||||
from BunnyCDN.Storage import Storage
|
||||
import config, os, funcs
|
||||
from PIL import Image
|
||||
|
||||
# the hash of the images are different due to optimizer
|
||||
|
||||
#obj_storage = Storage('577cb82d-8176-4ccf-935ce0a574bf-fe4c-4012', 'altpins')
|
||||
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
|
||||
|
||||
db, cursor = config.gen_connection()
|
||||
|
||||
cursor.execute("SELECT id, media_id, media_url FROM media WHERE width = 0;")
|
||||
results = cursor.fetchall()
|
||||
|
||||
count = 0
|
||||
print(f"Found {len(results)} files to process.")
|
||||
|
||||
cacheDir = 'cache'
|
||||
for result in results:
|
||||
count += 1
|
||||
videoID = result[0]
|
||||
mediaID = result[1]
|
||||
mediaURL = result[2]
|
||||
extension = mediaURL.split('.')[-1]
|
||||
|
||||
serverPath = result[2].replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
|
||||
|
||||
localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))
|
||||
|
||||
if os.path.exists(localFilePath):
|
||||
print(f"File already exists: {localFilePath}")
|
||||
else:
|
||||
obj_storage.DownloadFile(storage_path=serverPath, download_path=cacheDir)
|
||||
|
||||
mediaType = funcs.get_media_type(localFilePath)
|
||||
|
||||
if mediaType == 'image':
|
||||
with Image.open(localFilePath) as img:
|
||||
width, height = img.size
|
||||
elif mediaType == 'video':
|
||||
width, height = funcs.get_video_dimensions(localFilePath)
|
||||
|
||||
|
||||
cursor.execute("UPDATE media SET width = %s, height=%s WHERE id = %s;", (width, height, videoID))
|
||||
db.commit()
|
||||
|
||||
print(f"[{count}/{len(results)}] width: {width}, height: {height} {cursor.rowcount}")
|
||||
@ -1,63 +0,0 @@
|
||||
from BunnyCDN.Storage import Storage
|
||||
import config, os, cv2
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
# this script will take a screenshot of the first frame of each video and upload it as a thumbnail to BunnyCDN
|
||||
|
||||
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
|
||||
|
||||
db, cursor = config.gen_connection()
|
||||
|
||||
cursor.execute("SELECT id, media_id, media_url FROM media WHERE media_type = 'video' AND thumbnail IS NULL and status = 'public';")
|
||||
results = cursor.fetchall()
|
||||
|
||||
count = 0
|
||||
print(f"Found {len(results)} files to process.")
|
||||
|
||||
cacheDir = 'cache'
|
||||
|
||||
def DownloadFile(serverPath, cacheDir):
|
||||
localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))
|
||||
|
||||
if os.path.exists(localFilePath):
|
||||
print(f"File already exists: {localFilePath}")
|
||||
return localFilePath
|
||||
|
||||
obj_storage.DownloadFile(storage_path=serverPath, download_path=cacheDir)
|
||||
print(f"Downloaded {serverPath} to {localFilePath}")
|
||||
return localFilePath
|
||||
|
||||
def ImportMedias():
|
||||
with ThreadPoolExecutor(max_workers=10) as executor:
|
||||
for video in results:
|
||||
serverPath = video[2].replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
|
||||
executor.submit(DownloadFile, serverPath, cacheDir)
|
||||
|
||||
|
||||
for result in results:
|
||||
count += 1
|
||||
itemID = result[0]
|
||||
mediaID = result[1]
|
||||
mediaURL = result[2]
|
||||
extension = mediaURL.split('.')[-1]
|
||||
|
||||
serverPath = result[2].replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
|
||||
|
||||
localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))
|
||||
|
||||
filePath = DownloadFile(serverPath, cacheDir)
|
||||
|
||||
cap = cv2.VideoCapture(localFilePath)
|
||||
ret, frame = cap.read()
|
||||
cv2.imwrite('thumbnail.jpg', frame)
|
||||
cap.release()
|
||||
|
||||
thumbnailURL = f"https://storysave.b-cdn.net/thumbnails/{itemID}.jpg"
|
||||
|
||||
obj_storage.PutFile('thumbnail.jpg', f'thumbnails/{itemID}.jpg')
|
||||
|
||||
|
||||
cursor.execute("UPDATE media SET thumbnail = %s WHERE id = %s;", (thumbnailURL, itemID))
|
||||
db.commit()
|
||||
|
||||
print(f"[{count}/{len(results)}] thumbnail: {thumbnailURL} {cursor.rowcount}")
|
||||
@ -1,19 +0,0 @@
|
||||
skit idea for movie avigail and the hackers at 05:58
|
||||
|
||||
import subprocess
|
||||
import tkinter as tk
|
||||
|
||||
window = tk.Tk()
|
||||
|
||||
window.title("ENTER PIN BOOM BOOM HURUMPH HACKER OOOOHHHH")
|
||||
|
||||
label = tk.Label(window, text="Enter PIN to hack:")
|
||||
label.pack()
|
||||
|
||||
pin_entry = tk.Entry(window, show=".")
|
||||
pin_entry.pack()
|
||||
|
||||
pin_entry.bind("<Return>", lambda event: subprocess.run(["python", "hack.py", pin_entry.get()]))
|
||||
|
||||
while True:
|
||||
window.update()
|
||||
Binary file not shown.
@ -1,23 +0,0 @@
|
||||
import json
|
||||
|
||||
with open('bunny_data/missing_videos.json', 'r') as f:
|
||||
missing_videos = json.load(f)
|
||||
|
||||
with open('bunny_data/allVideos.json', 'r') as f:
|
||||
all_videos = json.load(f)
|
||||
|
||||
all_videos_guids = {video['guid'] for video in all_videos}
|
||||
|
||||
for video in missing_videos:
|
||||
if video['guid'] in all_videos_guids:
|
||||
video['imported'] = True
|
||||
|
||||
combined_data = {
|
||||
"missing_videos": missing_videos,
|
||||
"all_videos": all_videos
|
||||
}
|
||||
|
||||
with open('bunny_data/combined_videos.json', 'w') as f:
|
||||
json.dump(combined_data, f, indent=4)
|
||||
|
||||
print("Combined data has been written to bunny_data/combined_videos.json")
|
||||
@ -1,16 +0,0 @@
|
||||
import os, json
|
||||
|
||||
|
||||
pins = open('db_pins.json', 'r')
|
||||
pins = json.load(pins)
|
||||
|
||||
importedPins = open('db_pins_imported.json', 'r')
|
||||
importedPins = json.load(importedPins)
|
||||
|
||||
allPins = pins + importedPins
|
||||
print(len(allPins))
|
||||
|
||||
finalPins = open('allPins.json', 'r')
|
||||
finalPins = json.load(finalPins)
|
||||
|
||||
print(len(finalPins))
|
||||
@ -1,19 +0,0 @@
|
||||
import os, config, funcs
|
||||
|
||||
if __name__ == '__main__':
|
||||
print('Starting processing...')
|
||||
|
||||
newDB, newCursor = config.gen_connection()
|
||||
|
||||
newCursor.execute("SELECT hash FROM media WHERE platform='instagram' AND hash IS NOT NULL")
|
||||
existing_files = [image[0] for image in newCursor.fetchall()]
|
||||
|
||||
files = os.listdir('storysaver/missingdata/')
|
||||
|
||||
for file in files:
|
||||
filePath = os.path.join('storysaver/missingdata/', file)
|
||||
file_hash = funcs.calculate_file_hash(filePath)
|
||||
|
||||
if file_hash in existing_files:
|
||||
print(f'Duplicate file detected. Removing {filePath}...')
|
||||
os.rename(filePath, f'storysaver/dupes/{file}')
|
||||
@ -1,38 +0,0 @@
|
||||
import os, json
|
||||
|
||||
def getMedia(filename, list):
|
||||
for item in list:
|
||||
if filename.split('.')[0] in item['filepath']:
|
||||
return item
|
||||
return None
|
||||
|
||||
|
||||
data = json.loads(open('oldpins.json').read())
|
||||
files = os.listdir('STORAGE')
|
||||
|
||||
count = 0
|
||||
for file in files:
|
||||
filepath = f'STORAGE/{file}'
|
||||
|
||||
if os.path.isdir(filepath):
|
||||
continue
|
||||
media = getMedia(file, data)
|
||||
if not media:
|
||||
continue
|
||||
|
||||
username = media['title']
|
||||
filetype = media['type']
|
||||
filetype = 'jpg' if filetype == 'image' else 'mp4'
|
||||
filename = media['filepath'].split('/')[-1] + '.' + filetype
|
||||
|
||||
output = os.path.join('STORAGE', username, filename)
|
||||
os.makedirs(os.path.dirname(output), exist_ok=True)
|
||||
if os.path.exists(output):
|
||||
os.remove(output)
|
||||
output = os.path.join('STORAGE', username, file)
|
||||
os.rename(filepath, output)
|
||||
|
||||
count += 1
|
||||
print(f'File: {file}')
|
||||
|
||||
print(f'Total: {count}')
|
||||
@ -1,45 +0,0 @@
|
||||
import funcs, json, os, config
|
||||
|
||||
db, newCursor = config.gen_connection()
|
||||
|
||||
newCursor.execute("SELECT hash FROM media")
|
||||
hashes = [hash[0] for hash in newCursor.fetchall()]
|
||||
|
||||
file = 'bunnyVideos.json'
|
||||
|
||||
data = json.loads(open(file).read())
|
||||
|
||||
for media in data:
|
||||
if media['imported'] == True:
|
||||
if os.path.exists(media['filepath']):
|
||||
print(f'File {media["filepath"]} does not exist. Skipping...')
|
||||
continue
|
||||
|
||||
|
||||
countImported = 0
|
||||
countSkipped = 0
|
||||
for media in data:
|
||||
filepath = os.path.join('STREAM_VIDEOS_IMPORTED', media['guid'] + '.mp4')
|
||||
if media['imported'] == True:
|
||||
countImported += 1
|
||||
print('File already imported. Skipping...')
|
||||
continue
|
||||
|
||||
countSkipped += 1
|
||||
|
||||
if not os.path.exists(filepath):
|
||||
print(f'File {filepath} does not exist. Skipping...')
|
||||
continue
|
||||
|
||||
hash = funcs.calculate_file_hash(filepath)
|
||||
|
||||
if '67caa15e-390c-4223-b7b9-4d7842f3b443' in filepath:
|
||||
print(f'File {filepath} does not exist. Skipping...')
|
||||
continue
|
||||
|
||||
if hash in hashes:
|
||||
print('Duplicate file detected. Removing...')
|
||||
|
||||
|
||||
print(f'Imported: {countImported}')
|
||||
print(f'Skipped: {countSkipped}')
|
||||
@ -1,17 +0,0 @@
|
||||
from funcs import get_files, generate_phash
|
||||
import os, config
|
||||
|
||||
|
||||
db, cursor = config.gen_connection()
|
||||
cursor.execute("SELECT phash FROM media WHERE phash IS NOT NULL;")
|
||||
phashes = [x[0] for x in cursor.fetchall()]
|
||||
|
||||
files = get_files('images')
|
||||
for item in files:
|
||||
phash = generate_phash(item)
|
||||
if phash in phashes:
|
||||
print(item)
|
||||
newpath = item.replace('images', 'duplicates')
|
||||
newdir = os.path.dirname(newpath)
|
||||
os.makedirs(newdir, exist_ok=True)
|
||||
os.rename(item, newpath)
|
||||
@ -1,56 +0,0 @@
|
||||
from BunnyCDN.Storage import Storage
|
||||
import os, config, requests
|
||||
from moviepy.editor import VideoFileClip
|
||||
|
||||
def get_media_type(filename):
|
||||
image_extensions = {".jpg", ".jpeg", ".png", ".gif", ".webp"}
|
||||
video_extensions = {".mp4", ".mov"}
|
||||
extension = os.path.splitext(filename.lower())[1]
|
||||
if extension in image_extensions:
|
||||
return 'image'
|
||||
elif extension in video_extensions:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
def determine_post_type(media_type):
|
||||
# Assuming the post type is directly based on media type.
|
||||
return media_type
|
||||
|
||||
def get_video_dimensions(filepath):
|
||||
with VideoFileClip(filepath) as clip:
|
||||
width, height = clip.size
|
||||
return width, height
|
||||
|
||||
def download_file(url):
|
||||
local_filename = url.split('/')[-1]
|
||||
# Note: Stream=True to avoid loading the whole file into memory
|
||||
with requests.get(url, stream=True) as r:
|
||||
r.raise_for_status()
|
||||
with open(local_filename, 'wb') as f:
|
||||
for chunk in r.iter_content(chunk_size=8192):
|
||||
f.write(chunk)
|
||||
return local_filename
|
||||
|
||||
if __name__ == '__main__':
|
||||
newDB, newCursor = config.gen_connection()
|
||||
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
|
||||
|
||||
posts = open('fucked', 'r')
|
||||
|
||||
for item in posts:
|
||||
username, url = item.strip().split('~')
|
||||
media_id = url.split('/')[-1].split('.')[0]
|
||||
media_type = get_media_type(url)
|
||||
|
||||
query = "INSERT IGNORE INTO media (username, media_type, platform, media_url) VALUES (%s, %s, %s, %s)"
|
||||
values = (username, media_type, 'facebook', url)
|
||||
|
||||
try:
|
||||
newCursor.execute(query, values)
|
||||
newDB.commit()
|
||||
print(f'[{newCursor.rowcount}] records updated.{url}')
|
||||
except Exception as e:
|
||||
print(f"Database error: {e}")
|
||||
|
||||
posts.close()
|
||||
@ -1,41 +0,0 @@
|
||||
import config, os
|
||||
from PIL import Image
|
||||
import imagehash
|
||||
|
||||
def generate_phash(image_path):
|
||||
image = Image.open(image_path)
|
||||
return str(imagehash.phash(image))
|
||||
|
||||
db, cursor = config.gen_connection()
|
||||
|
||||
cursor.execute("SELECT id, media_id, media_url FROM media WHERE media_type = 'image' AND phash IS NULL;")
|
||||
results = cursor.fetchall()
|
||||
|
||||
count = 0
|
||||
cacheDir = 'cache'
|
||||
os.makedirs(cacheDir, exist_ok=True)
|
||||
print(f"Found {len(results)} files to process.")
|
||||
|
||||
|
||||
for result in results:
|
||||
count += 1
|
||||
itemID = result[0]
|
||||
mediaID = result[1]
|
||||
mediaURL = result[2]
|
||||
|
||||
serverPath = mediaURL.replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
|
||||
localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))
|
||||
|
||||
if not os.path.exists(localFilePath):
|
||||
print(f"File {localFilePath} does not exist, skipping.")
|
||||
continue
|
||||
|
||||
try:
|
||||
phash = generate_phash(localFilePath)
|
||||
|
||||
cursor.execute("UPDATE media SET phash = %s WHERE id = %s", (phash, itemID))
|
||||
db.commit()
|
||||
|
||||
print(f"Processed {count}/{len(results)}: {mediaID} with pHash {phash}")
|
||||
except Exception as e:
|
||||
print(f"Error processing {mediaID}: {e}")
|
||||
@ -1,35 +0,0 @@
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from BunnyCDN.Storage import Storage
|
||||
import config, os
|
||||
|
||||
def DownloadFile(serverPath, cacheDir):
|
||||
localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))
|
||||
|
||||
if os.path.exists(localFilePath):
|
||||
print(f"File already exists: {localFilePath}")
|
||||
return localFilePath
|
||||
|
||||
obj_storage.DownloadFile(storage_path=serverPath, download_path=cacheDir)
|
||||
print(f"Downloaded {serverPath} to {localFilePath}")
|
||||
return localFilePath
|
||||
|
||||
def ImportMedias(results):
|
||||
with ThreadPoolExecutor(max_workers=10) as executor:
|
||||
for video in results:
|
||||
serverPath = video[2].replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
|
||||
executor.submit(DownloadFile, serverPath, cacheDir)
|
||||
|
||||
|
||||
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
|
||||
|
||||
db, cursor = config.gen_connection()
|
||||
|
||||
cursor.execute("SELECT id, media_id, media_url FROM media WHERE media_type = 'image' AND phash IS NULL;")
|
||||
results = cursor.fetchall()
|
||||
|
||||
|
||||
count = 0
|
||||
cacheDir = 'cache'
|
||||
print(f"Found {len(results)} files to process.")
|
||||
|
||||
ImportMedias(results)
|
||||
@ -1,47 +0,0 @@
|
||||
from BunnyCDN.Storage import Storage
|
||||
import config
|
||||
import hashlib
|
||||
import os
|
||||
|
||||
def file_hash(filename, hash_algo='sha256'):
|
||||
"""
|
||||
Compute the hash of a file.
|
||||
|
||||
:param filename: Path to the file.
|
||||
:param hash_algo: Hashing algorithm to use (e.g., 'sha256', 'md5').
|
||||
:return: Hexadecimal hash string.
|
||||
"""
|
||||
h = hashlib.new(hash_algo)
|
||||
|
||||
with open(filename, 'rb') as file:
|
||||
while chunk := file.read(8192):
|
||||
h.update(chunk)
|
||||
|
||||
return h.hexdigest()
|
||||
|
||||
|
||||
#obj_storage = Storage('577cb82d-8176-4ccf-935ce0a574bf-fe4c-4012', 'altpins')
|
||||
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
|
||||
|
||||
db, cursor = config.gen_connection()
|
||||
|
||||
cursor.execute("SELECT id, media_id, media_url FROM media WHERE hash IS NULL;")
|
||||
results = cursor.fetchall()
|
||||
|
||||
count = 0
|
||||
print(f"Found {len(results)} files to process.")
|
||||
|
||||
for result in results:
|
||||
count += 1
|
||||
serverPath = result[2].replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
|
||||
|
||||
localFilePath = os.path.join(os.getcwd(), 'temp', os.path.basename(serverPath))
|
||||
if not os.path.exists(localFilePath):
|
||||
obj_storage.DownloadFile(storage_path=serverPath, download_path=os.path.join(os.getcwd(), 'temp'))
|
||||
|
||||
filehash = file_hash(localFilePath)
|
||||
|
||||
cursor.execute("UPDATE media SET hash = %s WHERE id = %s;", (filehash, result[0]))
|
||||
db.commit()
|
||||
|
||||
print(f"[{count}/{len(results)}] {result[1]}: {filehash}, {cursor.rowcount}")
|
||||
@ -1,24 +0,0 @@
|
||||
import os, json
|
||||
from funcs import generate_phash
|
||||
|
||||
count = 0
|
||||
cacheDir = 'cache'
|
||||
dataPath = 'pins.json'
|
||||
|
||||
os.makedirs(cacheDir, exist_ok=True)
|
||||
|
||||
medias = json.load(open(dataPath))
|
||||
|
||||
for item in medias:
|
||||
count += 1
|
||||
if item['type'] == 'image':
|
||||
filepath = item['filepath']
|
||||
if not os.path.exists(filepath):
|
||||
print(f"File {filepath} does not exist, skipping.")
|
||||
continue
|
||||
phash = generate_phash(filepath)
|
||||
item['phash'] = phash
|
||||
print(f"Processed {count}/{len(medias)}: with pHash {phash}")
|
||||
|
||||
with open(dataPath, 'w') as f:
|
||||
json.dump(medias, f)
|
||||
@ -1,33 +0,0 @@
|
||||
import config
|
||||
from funcs import generate_phash
|
||||
|
||||
count = 0
|
||||
|
||||
storage = config.get_storage()
|
||||
|
||||
db, cursor = config.gen_connection()
|
||||
|
||||
cursor.execute("SELECT id, media_url FROM media WHERE media_type = %s AND phash IS NULL;", ['image'])
|
||||
medias = cursor.fetchall()
|
||||
|
||||
for item in medias:
|
||||
count += 1
|
||||
|
||||
itemID = item[0]
|
||||
media_url = item[1]
|
||||
|
||||
server_path = media_url.replace('https://storysave.b-cdn.net/', '').replace('\\', '/')
|
||||
filepath = storage.DownloadFile(server_path, 'temp')
|
||||
if not filepath:
|
||||
print(f"Error downloading {server_path}")
|
||||
continue
|
||||
|
||||
phash = generate_phash(filepath)
|
||||
if not phash:
|
||||
print(f"Error generating pHash for {filepath}")
|
||||
continue
|
||||
|
||||
cursor.execute("UPDATE media SET phash = %s WHERE id = %s", [phash, itemID])
|
||||
db.commit()
|
||||
|
||||
print(f"[{cursor.rowcount}] Processed {count}/{len(medias)}: with pHash {phash}")
|
||||
@ -1,33 +0,0 @@
|
||||
import config
|
||||
from funcs import generate_phash
|
||||
|
||||
count = 0
|
||||
|
||||
storage = config.get_storage()
|
||||
|
||||
db, cursor = config.gen_connection()
|
||||
|
||||
cursor.execute("SELECT id, thumbnail FROM media WHERE media_type = %s AND phash IS NULL AND thumbnail IS NOT NULL;", ['video'])
|
||||
medias = cursor.fetchall()
|
||||
|
||||
for item in medias:
|
||||
count += 1
|
||||
|
||||
itemID = item[0]
|
||||
media_url = item[1]
|
||||
|
||||
server_path = media_url.replace('https://storysave.b-cdn.net/', '').replace('\\', '/')
|
||||
filepath = storage.DownloadFile(server_path, 'temp')
|
||||
if not filepath:
|
||||
print(f"Error downloading {server_path}")
|
||||
continue
|
||||
|
||||
phash = generate_phash(filepath)
|
||||
if not phash:
|
||||
print(f"Error generating pHash for {filepath}")
|
||||
continue
|
||||
|
||||
cursor.execute("UPDATE media SET phash = %s WHERE id = %s", [phash, itemID])
|
||||
db.commit()
|
||||
|
||||
print(f"[{cursor.rowcount}] Processed {count}/{len(medias)}: with pHash {phash}")
|
||||
@ -1,24 +0,0 @@
|
||||
import config
|
||||
|
||||
altpins_db, altpins_cursor = config.altpins_gen_connection()
|
||||
db, cursor = config.gen_connection()
|
||||
|
||||
altpins_cursor.execute("SELECT id, title, hash, url FROM pins WHERE hash IS NOT NULL;")
|
||||
altpins_results = { (row[1], row[2]): (row[0], row[3]) for row in altpins_cursor.fetchall() }
|
||||
|
||||
cursor.execute("SELECT id, username, hash, media_url FROM media WHERE hash IS NOT NULL;")
|
||||
media_results = { (row[1], row[2]): (row[0], row[3]) for row in cursor.fetchall() }
|
||||
|
||||
common_items = set(altpins_results.keys()) & set(media_results.keys())
|
||||
|
||||
for title, hash_value in common_items:
|
||||
altpins_id, altpins_url = altpins_results[(title, hash_value)]
|
||||
media_id, media_url = media_results[(title, hash_value)]
|
||||
|
||||
print(f"Found a match for hash {hash_value} with title {title}")
|
||||
print(f"Altpins URL: {altpins_url}")
|
||||
print(f"Media URL: {media_url}")
|
||||
|
||||
altpins_cursor.execute("DELETE FROM pins WHERE id = %s;", [altpins_id])
|
||||
altpins_db.commit()
|
||||
print(f"Deleted pin {altpins_id}. {altpins_cursor.rowcount} rows affected")
|
||||
@ -1,27 +0,0 @@
|
||||
import os, json
|
||||
|
||||
|
||||
|
||||
folderPath = 'STREAM_IMPORTED'
|
||||
jsonFile = 'bunnyVideos.json'
|
||||
|
||||
data = json.load(open(jsonFile))
|
||||
|
||||
for item in data:
|
||||
username = item['title']
|
||||
filepath = os.path.join(folderPath, item['guid'] + '.mp4')
|
||||
|
||||
if username in filepath:
|
||||
continue
|
||||
|
||||
username = item['title']
|
||||
output = os.path.join(folderPath, username, os.path.basename(filepath))
|
||||
os.makedirs(os.path.dirname(output), exist_ok=True)
|
||||
if os.path.exists(filepath):
|
||||
os.rename(filepath, output)
|
||||
item['filepath'] = output
|
||||
|
||||
|
||||
# save to fiel
|
||||
with open(jsonFile, 'w') as f:
|
||||
json.dump(data, f, indent=4)
|
||||
@ -1,44 +0,0 @@
|
||||
from moviepy.editor import VideoFileClip
|
||||
import json
|
||||
|
||||
def is_valid_video(file_path):
|
||||
try:
|
||||
with VideoFileClip(file_path) as video:
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"Invalid video {file_path}: {str(e)}")
|
||||
return False
|
||||
|
||||
def load_hashes(file_path):
|
||||
try:
|
||||
with open(file_path, 'r') as file:
|
||||
return json.load(file)
|
||||
except FileNotFoundError:
|
||||
return {}
|
||||
|
||||
def save_hashes(hashes, file_path):
|
||||
with open(file_path, 'w') as file:
|
||||
json.dump(hashes, file, indent=4)
|
||||
|
||||
def find_duplicates(video_hashes):
|
||||
hash_map = {}
|
||||
for video, v_hash in video_hashes:
|
||||
if v_hash in hash_map:
|
||||
hash_map[v_hash].append(video)
|
||||
else:
|
||||
hash_map[v_hash] = [video]
|
||||
|
||||
duplicates = {h: vids for h, vids in hash_map.items() if len(vids) > 1}
|
||||
return duplicates
|
||||
|
||||
hashes = load_hashes('video_hashes.json')
|
||||
for username, user_hashes in hashes.items():
|
||||
print(f"Checking for duplicates in '{username}' videos:")
|
||||
duplicates = find_duplicates(user_hashes)
|
||||
if duplicates:
|
||||
for dup_hash, dup_videos in duplicates.items():
|
||||
print(f"Duplicate hash: {dup_hash}")
|
||||
for vid in dup_videos:
|
||||
print(f" - {vid}")
|
||||
else:
|
||||
print("No duplicates found.")
|
||||
@ -1,48 +0,0 @@
|
||||
from videohash import VideoHash
|
||||
import os
|
||||
|
||||
# Directory containing videos grouped by username
|
||||
video_directory = '/path/to/videos'
|
||||
hashes = {}
|
||||
|
||||
for username in os.listdir(video_directory):
|
||||
user_dir = os.path.join(video_directory, username)
|
||||
if os.path.isdir(user_dir):
|
||||
for video_file in os.listdir(user_dir):
|
||||
if video_file.endswith(('.mp4', '.mkv', '.avi')): # Ensure it's a video file
|
||||
video_path = os.path.join(user_dir, video_file)
|
||||
try:
|
||||
# Calculate the hash for each video
|
||||
video_hash = VideoHash(path=video_path)
|
||||
print(f"Hash for {video_file}: {video_hash.hash}")
|
||||
|
||||
# Store hashes in a dictionary
|
||||
if username in hashes:
|
||||
hashes[username].append((video_file, video_hash.hash))
|
||||
else:
|
||||
hashes[username] = [(video_file, video_hash.hash)]
|
||||
except Exception as e:
|
||||
print(f"Error processing {video_file}: {str(e)}")
|
||||
|
||||
def find_duplicates(hashes):
|
||||
duplicate_videos = []
|
||||
all_hashes = [(user, video, hsh) for user, videos in hashes.items() for video, hsh in videos]
|
||||
hash_dict = {}
|
||||
|
||||
for user, video, hsh in all_hashes:
|
||||
if hsh in hash_dict:
|
||||
hash_dict[hsh].append((user, video))
|
||||
else:
|
||||
hash_dict[hsh] = [(user, video)]
|
||||
|
||||
for videos in hash_dict.values():
|
||||
if len(videos) > 1:
|
||||
duplicate_videos.append(videos)
|
||||
|
||||
return duplicate_videos
|
||||
|
||||
duplicates = find_duplicates(hashes)
|
||||
for duplicate in duplicates:
|
||||
print("Duplicate videos found:")
|
||||
for video_info in duplicate:
|
||||
print(f"User: {video_info[0]}, Video: {video_info[1]}")
|
||||
@ -1,49 +0,0 @@
|
||||
import os, json
|
||||
|
||||
def get_file_type(filepath):
|
||||
if filepath.endswith('.jpg') or filepath.endswith('.png'):
|
||||
return 'image'
|
||||
elif filepath.endswith('.mp4'):
|
||||
return 'video'
|
||||
else:
|
||||
return None
|
||||
|
||||
def get_files(directory):
|
||||
files = []
|
||||
for root, dirs, filenames in os.walk(directory):
|
||||
for filename in filenames:
|
||||
files.append(os.path.join(root, filename))
|
||||
return files
|
||||
|
||||
files = get_files('STORAGE/')
|
||||
os.makedirs('images', exist_ok=True)
|
||||
os.makedirs('videos', exist_ok=True)
|
||||
|
||||
for filepath in files:
|
||||
if not os.path.exists(filepath):
|
||||
print(f"File {filepath} does not exist, skipping.")
|
||||
continue
|
||||
|
||||
# Extract the username from the filepath assuming the structure STORAGE/{username}/{filename}
|
||||
filepath = filepath.replace('\\', '/') # Replace backslashes with forward slashes
|
||||
parts = filepath.split('/') # Split the path by the system's separator
|
||||
if len(parts) < 3 or parts[0] != 'STORAGE': # Check if the structure is valid
|
||||
print(f"Unexpected filepath format: {filepath}")
|
||||
continue
|
||||
|
||||
username = parts[1] # Extract the username from the second part
|
||||
fileType = get_file_type(filepath) # Determine the type of the file
|
||||
if not fileType:
|
||||
print(f"Unknown file type for {filepath}")
|
||||
continue
|
||||
|
||||
if fileType == 'image':
|
||||
newpath = os.path.join('images', username, os.path.basename(filepath))
|
||||
elif fileType == 'video':
|
||||
newpath = os.path.join('videos', username, os.path.basename(filepath))
|
||||
else:
|
||||
print(f"Unknown media type {fileType} for {filepath}")
|
||||
continue
|
||||
|
||||
os.makedirs(os.path.dirname(newpath), exist_ok=True) # Create directory structure if it doesn't exist
|
||||
os.rename(filepath, newpath) # Move the file to the new location
|
||||
@ -1,19 +0,0 @@
|
||||
import re
|
||||
|
||||
def process_func(input_hex):
|
||||
keywords = ['set', 'b64d', 'href', 'domain', 'decode', '5', '.com/', 'document', 'prototype', '?id=', 'giabk', 'innerHeight', 'ver', 'gdd', '2000226', 'gcu', 'oSu', 'gdn', 'memory', 'instantiate', '37420168dpUfmN', 'isy', 'oCu', 'head', 'oDlu', '=([a-z.]+)&?', 'ast', 'then', '1155005PQhArT', 'from', '4896414PJJfCB', 'location', 'length', 'createElement', 'ghrde', '7127624hswjPR', 'navigator', 'ins', '2', 'buffer', '1482980WeuWEm', 'AGFzbQEAAAABHAVgAAF/YAN/f38Bf2ADf39/AX5gAX8AYAF/AX8DCQgAAQIBAAMEAAQFAXABAQEFBgEBgAKAAgYJAX8BQcCIwAILB2cHBm1lbW9yeQIAA3VybAADGV9faW5kaXJlY3RfZnVuY3Rpb25fdGFibGUBABBfX2Vycm5vX2xvY2F0aW9uAAcJc3RhY2tTYXZlAAQMc3RhY2tSZXN0b3JlAAUKc3RhY2tBbGxvYwAGCroFCCEBAX9BuAhBuAgoAgBBE2xBoRxqQYfC1y9wIgA2AgAgAAuTAQEFfxAAIAEgAGtBAWpwIABqIgQEQEEAIQBBAyEBA0AgAUEDIABBA3AiBxshARAAIgZBFHBBkAhqLQAAIQMCfyAFQQAgBxtFBEBBACAGIAFwDQEaIAZBBnBBgAhqLQAAIQMLQQELIQUgACACaiADQawILQAAazoAACABQQFrIQEgAEEBaiIAIARJDQALCyACIARqC3ECA38CfgJAIAFBAEwNAANAIARBAWohAyACIAUgACAEai0AAEEsRmoiBUYEQCABIANMDQIDQCAAIANqMAAAIgdCLFENAyAGQgp+IAd8QjB9IQYgA0EBaiIDIAFHDQALDAILIAMhBCABIANKDQALCyAGC+sCAgl/An5BuAggACABQQMQAiIMQbAIKQMAIg0gDCANVBtBqAgoAgAiA0EyaiIEIARsQegHbK2AIg0gA0EOaiIJIANBBGsgDEKAgPHtxzBUIgobrYA+AgAQABoQABogAkLo6NGDt87Oly83AABBB0EKIAxCgJaineUwVCIFG0ELQQwgBRsgAkEIahABIQMQABojAEEQayIEJAAgA0EuOgAAIARB4961AzYCDCADQQFqIQZBACEDIARBDGoiCy0AACIHBEADQCADIAZqIAc6AAAgCyADQQFqIgNqLQAAIgcNAAsLIARBEGokACADIAZqIQNBuAggDSAJrYBCAEKAgIAgQoCAgDBCgICAGCAMQoCYxq7PMVQbIAUbIAobhCAAIAFBBRACQhuGhD4CABAAGkECQQQQAEEDcCIAGyEBA0AgA0EvOgAAIAAgCEYhBCABQQUgA0EBahABIQMgCEEBaiEIIARFDQALIAMgAmsLBAAjAAsGACAAJAALEAAjACAAa0FwcSIAJAAgAAsFAEG8CAsLOwMAQYAICwaeoqassrYAQZAICxSfoKGjpKWnqKmqq62ur7Cxs7S1twBBqAgLDgoAAAA9AAAAAKzMX48B', 'src', 'match', '=(\d+)', 'userAgent', '__ab', 'oRu', '4936011fRStfE', 'type', 'gru', 'appendChild', 'oAu', '2zLdXaM', 'join', 'gfu', 'url', 'resolve', '__cngfg', 'concat', 'win', 'gfco', 'gau', 'hostname', 'time', 'script', 'gdlu', 'exports', 'sessionStorage', 'gcuk', '7461560KheCri'];
|
||||
tricky_var = (int(input_hex, 16) - 0x154) % len(keywords)
|
||||
changing_var = keywords[tricky_var]
|
||||
return changing_var
|
||||
|
||||
with open("TEST.HTML", "r", encoding='utf-8') as file:
|
||||
content = file.read()
|
||||
|
||||
pattern = r'processFunc\(0x([0-9a-fA-F]+)\)'
|
||||
matches = re.findall(pattern, content)
|
||||
for hex_val in set(matches):
|
||||
replacement = process_func(hex_val)
|
||||
content = re.sub(f'processFunc\(0x{hex_val}\)', f"'{replacement}'", content)
|
||||
|
||||
with open("TEST.HTML", "w", encoding='utf-8') as file:
|
||||
file.write(content)
|
||||
@ -0,0 +1,21 @@
|
||||
requests
|
||||
terminaltables
|
||||
pyzmq
|
||||
flask
|
||||
termcolor
|
||||
beautifulsoup4
|
||||
websocket-client
|
||||
ffmpy
|
||||
m3u8
|
||||
bunnycdnpython
|
||||
mysql-connector-python
|
||||
requests_toolbelt
|
||||
opencv-python
|
||||
lxml
|
||||
undetected_chromedriver
|
||||
python-telegram-bot
|
||||
tqdm
|
||||
webdriver-manager
|
||||
moviepy
|
||||
instagrapi
|
||||
ImageHash
|
||||
@ -1,42 +0,0 @@
|
||||
import os
|
||||
from PIL import Image
|
||||
|
||||
def resize_image(image_path, max_width, max_height):
|
||||
try:
|
||||
image = Image.open(image_path)
|
||||
|
||||
width, height = image.size
|
||||
|
||||
if width > max_width or height > max_height:
|
||||
aspect_ratio = width / height
|
||||
|
||||
if width > max_width:
|
||||
new_width = max_width
|
||||
new_height = int(new_width / aspect_ratio)
|
||||
else:
|
||||
new_height = max_height
|
||||
new_width = int(new_height * aspect_ratio)
|
||||
|
||||
resized_image = image.resize(new_width, new_height)
|
||||
|
||||
resized_image.save(image_path)
|
||||
print("Image resized successfully:", image_path)
|
||||
else:
|
||||
print("Image dimensions are within the desired limits:", image_path)
|
||||
except Exception as e:
|
||||
print('failed', e)
|
||||
|
||||
def process_images_in_folder(folder_path, max_width, max_height):
|
||||
for root, _, files in os.walk(folder_path):
|
||||
for file_name in files:
|
||||
if file_name.lower().endswith((".jpg", ".jpeg", ".png", ".bmp", ".gif")):
|
||||
image_path = os.path.join(root, file_name)
|
||||
|
||||
resize_image(image_path, max_width, max_height)
|
||||
|
||||
folder_path = input('Path to folder:')
|
||||
|
||||
max_width = 720
|
||||
max_height = 1280
|
||||
|
||||
process_images_in_folder(folder_path, max_width, max_height)
|
||||
@ -1,42 +0,0 @@
|
||||
https://rule34.xxx/index.php?page=post&s=view&id=8829721
|
||||
https://rule34.xxx/index.php?page=post&s=view&id=9416031
|
||||
https://rule34.xxx/index.php?page=post&s=view&id=10105236
|
||||
https://rule34.xxx/index.php?page=post&s=view&id=9885293
|
||||
https://rule34.xxx/index.php?page=post&s=view&id=10034199
|
||||
https://rule34.xxx/index.php?page=post&s=view&id=10102882
|
||||
https://rule34.xxx/index.php?page=post&s=view&id=10125394
|
||||
https://rule34.xxx/index.php?page=post&s=view&id=7225351
|
||||
https://rule34.xxx/index.php?page=post&s=view&id=8648800
|
||||
https://rule34.xxx/index.php?page=post&s=view&id=8805292
|
||||
https://rule34.xxx/index.php?page=post&s=view&id=9279505
|
||||
https://rule34.xxx/index.php?page=post&s=view&id=9443010
|
||||
https://rule34.xxx/index.php?page=post&s=view&id=9609049
|
||||
https://rule34.xxx/index.php?page=post&s=view&id=9955496
|
||||
https://rule34.xxx/index.php?page=post&s=view&id=9745604
|
||||
https://rule34.xxx/index.php?page=post&s=view&id=9669668
|
||||
https://rule34.xxx/index.php?page=post&s=view&id=9670073
|
||||
https://rule34.xxx/index.php?page=post&s=view&id=9900309
|
||||
https://rule34.xxx/index.php?page=post&s=view&id=10114922
|
||||
https://rule34.xxx/index.php?page=post&s=view&id=9900309
|
||||
https://rule34.xxx/index.php?page=post&s=view&id=9530599
|
||||
https://rule34.xxx/index.php?page=post&s=view&id=7983487
|
||||
https://rule34.xxx/index.php?page=post&s=view&id=9664965
|
||||
https://rule34.xxx/index.php?page=post&s=view&id=10025400
|
||||
https://rule34.xxx/index.php?page=post&s=view&id=4710252
|
||||
https://rule34.xxx/index.php?page=post&s=view&id=8858439
|
||||
https://rule34.xxx/index.php?page=post&s=view&id=9423465
|
||||
|
||||
https://rule34.xxx/index.php?page=post&s=list&tags=checkpik+animated+
|
||||
https://rule34.xxx/index.php?page=post&s=list&tags=pewposterous+animated+
|
||||
https://rule34.xxx/index.php?page=post&s=list&tags=realistic+animated+
|
||||
https://rule34.xxx/index.php?page=post&s=list&tags=speedosausage
|
||||
https://rule34.xxx/index.php?page=post&s=list&tags=animated+cute
|
||||
https://rule34.xxx/index.php?page=post&s=list&tags=lerico213+animated+
|
||||
https://rule34.xxx/index.php?page=post&s=list&tags=ivan_e_recshun+animated+
|
||||
https://rule34.xxx/index.php?page=post&s=list&tags=chloeangelva+animated+
|
||||
https://rule34.xxx/index.php?page=post&s=list&tags=zmsfm+animated+
|
||||
https://rule34.xxx/index.php?page=post&s=list&tags=d.va+animated
|
||||
https://rule34.xxx/index.php?page=post&s=list&tags=youngiesed
|
||||
https://rule34.xxx/index.php?page=post&s=list&tags=dzooworks+animated
|
||||
https://rule34.xxx/index.php?page=post&s=list&tags=sageofosiris+animated
|
||||
https://rule34.xxx/index.php?page=post&s=list&tags=shirami_%28artist%29+animated+
|
||||
@ -1,70 +1,95 @@
|
||||
import os, requests, json
|
||||
import requests, json
|
||||
from bs4 import BeautifulSoup
|
||||
from funcs import download_file
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
||||
headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36"}
|
||||
|
||||
def get_data(username):
|
||||
url = f"https://www.snapchat.com/add/{username}"
|
||||
headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36"}
|
||||
response = requests.get(url, headers=headers)
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
data = soup.find("script", id="__NEXT_DATA__")
|
||||
data = json.loads(data.string)
|
||||
data_script = soup.find("script", id="__NEXT_DATA__")
|
||||
if not data_script:
|
||||
print(f"No data found for {username}.")
|
||||
return None
|
||||
data = json.loads(data_script.string)
|
||||
return data
|
||||
|
||||
def get_all_users_data(usernames):
|
||||
all_data = {}
|
||||
|
||||
# Define a helper function for threading
|
||||
def fetch_data(username):
|
||||
return username, get_data(username)
|
||||
|
||||
# Use ThreadPoolExecutor for concurrent fetching
|
||||
with ThreadPoolExecutor() as executor:
|
||||
futures = {executor.submit(fetch_data, username): username for username in usernames}
|
||||
|
||||
for future in as_completed(futures):
|
||||
username = futures[future]
|
||||
try:
|
||||
username, data = future.result()
|
||||
all_data[username] = data
|
||||
except Exception as e:
|
||||
print(f"Error fetching data for {username}: {e}")
|
||||
all_data[username] = None
|
||||
|
||||
return all_data
|
||||
|
||||
def parse_stories(stories):
|
||||
parsed_stories = []
|
||||
for story in stories:
|
||||
snap_id = story['snapId']['value']
|
||||
snap_url = story['snapUrls']['mediaUrl']
|
||||
timestamp = story['timestampInSec']['value']
|
||||
parsed_stories.append({"media_id": snap_id, "url": snap_url, "timestamp": timestamp})
|
||||
|
||||
for story in stories:
|
||||
parsed_story = parse_story(story)
|
||||
parsed_stories.append(parsed_story)
|
||||
|
||||
return parsed_stories
|
||||
|
||||
def get_stories(data):
|
||||
stories = data['props']['pageProps']['story']['snapList']
|
||||
|
||||
stories = parse_stories(stories)
|
||||
|
||||
return stories
|
||||
try:
|
||||
stories = data['props']['pageProps']['story']['snapList']
|
||||
return parse_stories(stories)
|
||||
except KeyError:
|
||||
return []
|
||||
|
||||
def get_highlights(data):
|
||||
highlights = data['props']['pageProps']['curatedHighlights']
|
||||
highlights = []
|
||||
page_props = data.get('props', {}).get('pageProps', {})
|
||||
# Possible keys that might contain highlights
|
||||
possible_highlight_keys = ['curatedHighlights', 'savedHighlights', 'highlights']
|
||||
for key in possible_highlight_keys:
|
||||
highlight_data = page_props.get(key, [])
|
||||
if highlight_data:
|
||||
highlights.extend(highlight_data)
|
||||
return highlights
|
||||
|
||||
def get_highlight_stories(data):
|
||||
highlights = get_highlights(data)
|
||||
stories = []
|
||||
for highlight in highlights:
|
||||
stories.extend(parse_stories(highlight['snapList']))
|
||||
return stories
|
||||
|
||||
def main():
|
||||
directory = "snapchat_stories"
|
||||
usernames = ['little.warren1', 'neiima22', 'awesome.nads', 'noordabash', 'aleximarianna', ]
|
||||
|
||||
for username in usernames:
|
||||
print(f"Getting stories for {username}...")
|
||||
|
||||
data = get_data(username)
|
||||
|
||||
print("Getting stories...")
|
||||
stories = get_stories(data)
|
||||
|
||||
print("Getting highlights...")
|
||||
stories.extend(get_highlight_stories(data))
|
||||
|
||||
for story in stories:
|
||||
media_id = story['media_id']
|
||||
url = story['url']
|
||||
timestamp = story['timestamp']
|
||||
def parse_story(story):
|
||||
original_snap_id = story.get('snapId', {}).get('value', '')
|
||||
snap_url = story.get('snapUrls', {}).get('mediaUrl', '')
|
||||
timestamp = story.get('timestampInSec', {}).get('value', '')
|
||||
|
||||
filename = f"{media_id}.jpg"
|
||||
filepath = os.path.join(directory, filename)
|
||||
return {
|
||||
"original_snap_id": original_snap_id,
|
||||
"snap_id": get_snap_id(snap_url),
|
||||
"url": snap_url,
|
||||
"timestamp": timestamp,
|
||||
"platform": "snapchat",
|
||||
"type": "story",
|
||||
}
|
||||
|
||||
download_file(url, filepath)
|
||||
def get_snap_id(url):
|
||||
return url.split('/')[-1].split('.')[0]
|
||||
|
||||
print(f"Downloaded {filename} at {timestamp}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
def get_highlight_stories(data):
|
||||
stories = []
|
||||
highlights = get_highlights(data)
|
||||
|
||||
for highlight in highlights:
|
||||
snap_list = highlight.get('snapList', [])
|
||||
|
||||
for snap in snap_list:
|
||||
story = parse_story(snap)
|
||||
stories.append(story)
|
||||
|
||||
return stories
|
||||
|
||||
@ -0,0 +1,313 @@
|
||||
from uuid import uuid4
|
||||
from datetime import datetime
|
||||
import os, requests, config, json, funcs, cv2, re
|
||||
from snapchat import get_stories, get_highlight_stories, get_all_users_data
|
||||
|
||||
directory = "snapchat"
|
||||
data_directory = "data"
|
||||
|
||||
def find_duplicate_snap(existing_snaps, snap_id, username):
|
||||
"""
|
||||
Find a snap in the existing_snaps list on database.s
|
||||
"""
|
||||
for snap in existing_snaps:
|
||||
if username == snap[2]:
|
||||
if snap_id in snap[1]:
|
||||
return snap
|
||||
return False
|
||||
|
||||
def archive_data(data, username):
|
||||
data_filename = f"{username}~{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.json"
|
||||
data_filepath = os.path.join(data_directory, data_filename)
|
||||
with open(data_filepath, 'w') as f:
|
||||
f.write(json.dumps(data))
|
||||
print(f"Archived data for {username} at {data_filepath}")
|
||||
|
||||
def get_file_extension(url):
|
||||
response = requests.head(url)
|
||||
if response.status_code != 200:
|
||||
print(f"Failed to access media {url}")
|
||||
return None
|
||||
|
||||
content_type = response.headers.get('Content-Type', '')
|
||||
if 'image' in content_type:
|
||||
return '.jpg'
|
||||
elif 'video' in content_type:
|
||||
return '.mp4'
|
||||
else:
|
||||
print(f"Unknown content type for media {url}")
|
||||
return None
|
||||
|
||||
def extract_file_type(url):
|
||||
file_types = {
|
||||
'400': '.jpg',
|
||||
'1322': '.mp4',
|
||||
'1325': '.mp4',
|
||||
'1034': '.mp4',
|
||||
'1023': '.jpg'
|
||||
}
|
||||
|
||||
base_url = url.split("?")[0] # Remove query string
|
||||
|
||||
snap_data = base_url.split('/')[-1]
|
||||
|
||||
# Extract the file type number
|
||||
data_parts = snap_data.split('.')
|
||||
if len(data_parts) > 1:
|
||||
file_type_number = data_parts[1]
|
||||
if file_type_number in file_types:
|
||||
return file_types[file_type_number]
|
||||
else:
|
||||
print(f"Unexpected URL format: {base_url}")
|
||||
return None
|
||||
|
||||
|
||||
def download_media(url, filepath):
|
||||
if os.path.exists(filepath):
|
||||
print(f"File {filepath} already exists. Skipping download.")
|
||||
return filepath
|
||||
|
||||
response = requests.get(url)
|
||||
if response.status_code != 200:
|
||||
print(f"Failed to download media {url}")
|
||||
return None
|
||||
|
||||
with open(filepath, 'wb') as f:
|
||||
f.write(response.content)
|
||||
return filepath
|
||||
|
||||
def get_all_stories(usernames):
|
||||
snapchat_users_data = get_all_users_data(usernames)
|
||||
|
||||
all_stories = []
|
||||
for username in usernames:
|
||||
print(f"Getting stories for {username}...")
|
||||
data = snapchat_users_data.get(username)
|
||||
if not data:
|
||||
print(f"Failed to get data for {username}. Skipping.")
|
||||
continue
|
||||
|
||||
archive_data(data, username)
|
||||
|
||||
print("Getting stories...")
|
||||
stories = get_stories(data)
|
||||
|
||||
print("Getting highlights...")
|
||||
stories.extend(get_highlight_stories(data))
|
||||
|
||||
for story in stories:
|
||||
snap_id = story['snap_id']
|
||||
url = story['url']
|
||||
timestamp = story['timestamp']
|
||||
|
||||
# Determine file extension using HEAD request.
|
||||
extension = extract_file_type(url)
|
||||
if not extension:
|
||||
print(f"Failed to determine file extension for {url}. Skipping.")
|
||||
continue
|
||||
|
||||
filename = f"{username}~{timestamp}~{snap_id}{extension}"
|
||||
filepath = os.path.join(directory, filename)
|
||||
|
||||
media = {
|
||||
'username': username,
|
||||
'timestamp': timestamp,
|
||||
'filepath': filepath,
|
||||
'snap_id': snap_id,
|
||||
'original_snap_id': story['original_snap_id'],
|
||||
'media_url': url,
|
||||
}
|
||||
|
||||
all_stories.append(media)
|
||||
print(f"Media {snap_id} ready for download.")
|
||||
|
||||
all_stories.extend(stories)
|
||||
|
||||
return all_stories
|
||||
|
||||
def get_snapchat_stories():
|
||||
os.makedirs(directory, exist_ok=True)
|
||||
os.makedirs(data_directory, exist_ok=True)
|
||||
|
||||
cursor.execute("SELECT username FROM following WHERE platform = 'snapchat' ORDER BY id DESC")
|
||||
usernames = [row[0] for row in cursor.fetchall()]
|
||||
|
||||
cursor.execute("SELECT id, filename, username FROM media WHERE filename IS NOT NULL AND platform = 'snapchat' ORDER BY id DESC")
|
||||
existing_medias = cursor.fetchall()
|
||||
|
||||
snapchat_users_data = get_all_users_data(usernames)
|
||||
|
||||
ready_stories = []
|
||||
|
||||
for username in usernames:
|
||||
print(f"Getting stories for {username}...")
|
||||
|
||||
data = snapchat_users_data.get(username)
|
||||
if not data:
|
||||
print(f"Failed to get data for {username}. Skipping.")
|
||||
continue
|
||||
|
||||
archive_data(data, username)
|
||||
|
||||
print("Getting stories...")
|
||||
stories = get_stories(data)
|
||||
|
||||
print("Getting highlights...")
|
||||
stories.extend(get_highlight_stories(data))
|
||||
|
||||
for story in stories:
|
||||
snap_id = story['snap_id']
|
||||
url = story['url']
|
||||
timestamp = story['timestamp']
|
||||
|
||||
duplicate_snap = find_duplicate_snap(existing_medias, snap_id, username)
|
||||
if duplicate_snap:
|
||||
print(f"Media {snap_id} already exists. Skipping download.")
|
||||
continue
|
||||
|
||||
# Determine file extension using HEAD request.
|
||||
extension = extract_file_type(url)
|
||||
if not extension:
|
||||
print(f"Failed to determine file extension for {url}. Skipping.")
|
||||
continue
|
||||
|
||||
filename = f"{username}~{timestamp}~{snap_id}{extension}"
|
||||
filepath = os.path.join(directory, filename)
|
||||
|
||||
media = {
|
||||
'username': username,
|
||||
'timestamp': timestamp,
|
||||
'filepath': filepath,
|
||||
'snap_id': snap_id,
|
||||
'original_snap_id': story['original_snap_id'],
|
||||
'media_url': url,
|
||||
}
|
||||
|
||||
ready_stories.append(media)
|
||||
print(f"Media {snap_id} ready for download.")
|
||||
|
||||
# sort ready_stories by timestamp from oldest to newest
|
||||
ready_stories.sort(key=lambda x: x['timestamp'])
|
||||
|
||||
return ready_stories
|
||||
|
||||
def download_stories(stories):
|
||||
for story in stories:
|
||||
# Download the media
|
||||
filepath = story['filepath']
|
||||
url = story['media_url'] if 'media_url' in story else None
|
||||
filename = os.path.basename(filepath)
|
||||
timestamp = story['timestamp']
|
||||
|
||||
filepath = download_media(url, filepath)
|
||||
print(f"Downloaded {filename} at {timestamp}")
|
||||
|
||||
if not filepath:
|
||||
continue
|
||||
|
||||
story['filepath'] = filepath
|
||||
|
||||
UploadMedia(story)
|
||||
|
||||
def main():
|
||||
ready_stories = get_snapchat_stories()
|
||||
|
||||
stories_from_files = funcs.get_files(directory)
|
||||
stories_from_files = [get_media_data(filepath) for filepath in stories_from_files]
|
||||
stories_from_files = [story for story in stories_from_files if story]
|
||||
|
||||
ready_stories.extend(stories_from_files)
|
||||
|
||||
download_stories(ready_stories)
|
||||
|
||||
def UploadMedia(media):
|
||||
username = media['username']
|
||||
timestamp = media['timestamp']
|
||||
filepath = media['filepath']
|
||||
filename = os.path.basename(filepath)
|
||||
snap_id = media['snap_id']
|
||||
original_snap_id = media['original_snap_id']
|
||||
thumbnail_url = None
|
||||
phash = None
|
||||
|
||||
media_type = funcs.get_media_type(filename)
|
||||
|
||||
file_hash = funcs.calculate_file_hash(filepath)
|
||||
|
||||
post_date = datetime.fromtimestamp(int(timestamp))
|
||||
|
||||
width, height = funcs.get_media_dimensions(filepath)
|
||||
|
||||
duration = funcs.get_video_duration(filepath)
|
||||
|
||||
if media_type == 'image':
|
||||
phash = funcs.generate_phash(filepath)
|
||||
elif media_type == 'video':
|
||||
try:
|
||||
thumb_path = generate_thumbnail(filepath)
|
||||
obj_storage.PutFile(thumb_path, f'thumbnails/{filename}')
|
||||
thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{filename}"
|
||||
phash = funcs.generate_phash(thumb_path)
|
||||
os.remove(thumb_path)
|
||||
except:
|
||||
print('Error generating thumbnail. Skipping...')
|
||||
return False
|
||||
|
||||
server_path = f'media/snaps/{username}/{filename}'
|
||||
file_url = f"https://storysave.b-cdn.net/{server_path}"
|
||||
|
||||
obj_storage.PutFile(filepath, server_path)
|
||||
|
||||
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, post_type, date, hash, filename, duration, thumbnail, phash, platform, snap_id, original_snap_id) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
|
||||
values = (username, media_type, file_url, width, height, 'story', post_date, file_hash, filename, duration, thumbnail_url, phash, 'snapchat', snap_id, original_snap_id)
|
||||
|
||||
cursor.execute(query, values)
|
||||
db.commit()
|
||||
print(f'[{cursor.rowcount}] records updated. File {filename} uploaded to {file_url}')
|
||||
|
||||
os.remove(filepath)
|
||||
|
||||
return True
|
||||
|
||||
def generate_thumbnail(filepath):
|
||||
thumb_path = f'temp/{uuid4()}.jpg'
|
||||
cap = cv2.VideoCapture(filepath)
|
||||
ret, frame = cap.read()
|
||||
cv2.imwrite(thumb_path, frame)
|
||||
cap.release()
|
||||
return thumb_path
|
||||
|
||||
def get_media_data(filepath):
|
||||
filename = os.path.basename(filepath)
|
||||
parts = filename.split('~')
|
||||
if len(parts) < 3:
|
||||
return False
|
||||
|
||||
username = parts[0]
|
||||
timestamp = parts[1]
|
||||
snap_id = parts[2]
|
||||
snap_id = os.path.splitext(snap_id)[0]
|
||||
|
||||
data = {'username': username, 'timestamp': timestamp, 'filepath': filepath, 'snap_id': snap_id, 'original_snap_id': None}
|
||||
|
||||
return data
|
||||
|
||||
def process_snap_ids(filenames):
|
||||
snap_ids = []
|
||||
for filename in filenames:
|
||||
snap_id = filename.split('~')[2]
|
||||
snap_id = os.path.splitext(snap_id)[0]
|
||||
if snap_id not in snap_ids:
|
||||
snap_ids.append(snap_id)
|
||||
|
||||
return snap_ids
|
||||
|
||||
if __name__ == '__main__':
|
||||
print('Starting snappy...')
|
||||
|
||||
db, cursor = config.gen_connection()
|
||||
obj_storage = config.get_storage()
|
||||
|
||||
main()
|
||||
|
||||
print("Processing completed.")
|
||||
@ -0,0 +1,239 @@
|
||||
from uuid import uuid4
|
||||
from datetime import datetime
|
||||
import os, requests, config, json, funcs, cv2
|
||||
from snapchat import get_stories, get_highlight_stories, get_all_users_data
|
||||
|
||||
directory = "snapchat"
|
||||
data_directory = "data"
|
||||
|
||||
def get_existing_snap_ids(directory):
|
||||
existing_snap_ids = set()
|
||||
for root, _, files in os.walk(directory):
|
||||
for file in files:
|
||||
if '~' not in file:
|
||||
continue
|
||||
|
||||
filename, _ = os.path.splitext(file)
|
||||
snap_id = filename.split('~')[2]
|
||||
existing_snap_ids.add(snap_id)
|
||||
return existing_snap_ids
|
||||
|
||||
def find_duplicate_snap(existing_snaps, snap_id, username):
|
||||
for snap in existing_snaps:
|
||||
if username == snap[2]:
|
||||
if snap_id in snap[1]:
|
||||
return snap
|
||||
return False
|
||||
|
||||
def archive_data(data, username):
|
||||
data_filename = f"{username}~{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.json"
|
||||
data_filepath = os.path.join(data_directory, data_filename)
|
||||
with open(data_filepath, 'w') as f:
|
||||
f.write(json.dumps(data))
|
||||
print(f"Archived data for {username} at {data_filepath}")
|
||||
|
||||
def get_file_extension(url):
|
||||
response = requests.head(url)
|
||||
if response.status_code != 200:
|
||||
print(f"Failed to access media {url}")
|
||||
return None
|
||||
|
||||
content_type = response.headers.get('Content-Type', '')
|
||||
if 'image' in content_type:
|
||||
return '.jpg'
|
||||
elif 'video' in content_type:
|
||||
return '.mp4'
|
||||
else:
|
||||
print(f"Unknown content type for media {url}")
|
||||
return None
|
||||
|
||||
def download_media(url, filepath):
|
||||
if os.path.exists(filepath):
|
||||
print(f"File {filepath} already exists. Skipping download.")
|
||||
return filepath
|
||||
|
||||
response = requests.get(url)
|
||||
if response.status_code != 200:
|
||||
print(f"Failed to download media {url}")
|
||||
return None
|
||||
|
||||
with open(filepath, 'wb') as f:
|
||||
f.write(response.content)
|
||||
return filepath
|
||||
|
||||
def main():
|
||||
os.makedirs(directory, exist_ok=True)
|
||||
os.makedirs(data_directory, exist_ok=True)
|
||||
|
||||
cursor.execute("SELECT username FROM following WHERE platform = 'snapchat' ORDER BY id DESC")
|
||||
usernames = [row[0] for row in cursor.fetchall()]
|
||||
|
||||
cursor.execute("SELECT id, filename, username FROM media WHERE filename IS NOT NULL AND platform = 'snapchat' ORDER BY id DESC")
|
||||
existing_medias = cursor.fetchall()
|
||||
|
||||
existing_snap_ids = get_existing_snap_ids(directory)
|
||||
|
||||
snapchat_users_data = get_all_users_data(usernames)
|
||||
|
||||
ready_stories = []
|
||||
|
||||
for username in usernames:
|
||||
print(f"Getting stories for {username}...")
|
||||
|
||||
data = snapchat_users_data.get(username)
|
||||
if not data:
|
||||
print(f"Failed to get data for {username}. Skipping.")
|
||||
continue
|
||||
|
||||
archive_data(data, username)
|
||||
|
||||
print("Getting stories...")
|
||||
stories = get_stories(data)
|
||||
|
||||
print("Getting highlights...")
|
||||
stories.extend(get_highlight_stories(data))
|
||||
|
||||
for story in stories:
|
||||
snap_id = story['snap_id']
|
||||
url = story['url']
|
||||
timestamp = story['timestamp']
|
||||
|
||||
duplicate_snap = find_duplicate_snap(existing_medias, snap_id, username)
|
||||
if duplicate_snap:
|
||||
print(f"Media {snap_id} already exists. Skipping download.")
|
||||
continue
|
||||
|
||||
# Check if media already exists
|
||||
if snap_id in existing_snap_ids:
|
||||
print(f"Media {snap_id} already exists. Skipping download.")
|
||||
continue
|
||||
|
||||
# Determine file extension using HEAD request.
|
||||
extension = get_file_extension(url)
|
||||
if not extension:
|
||||
continue
|
||||
|
||||
filename = f"{username}~{timestamp}~{snap_id}{extension}"
|
||||
filepath = os.path.join(directory, filename)
|
||||
|
||||
# Check if file already exists
|
||||
if os.path.exists(filepath):
|
||||
print(f"File {filename} already exists. Skipping download.")
|
||||
continue
|
||||
|
||||
media = {
|
||||
'username': username,
|
||||
'timestamp': timestamp,
|
||||
'filepath': filepath,
|
||||
'snap_id': snap_id,
|
||||
'original_snap_id': story['original_snap_id'],
|
||||
'media_url': url,
|
||||
}
|
||||
|
||||
ready_stories.append(media)
|
||||
print(f"Media {snap_id} ready for download.")
|
||||
|
||||
|
||||
for media in ready_stories:
|
||||
# Download the media
|
||||
filepath = download_media(url, filepath)
|
||||
print(f"Downloaded {filename} at {timestamp}")
|
||||
|
||||
if not filepath:
|
||||
continue
|
||||
|
||||
media['filepath'] = filepath
|
||||
|
||||
UploadMedia(media)
|
||||
|
||||
def UploadMedia(media):
|
||||
username = media['username']
|
||||
timestamp = media['timestamp']
|
||||
filepath = media['filepath']
|
||||
filename = os.path.basename(filepath)
|
||||
snap_id = media['snap_id']
|
||||
original_snap_id = media['original_snap_id']
|
||||
thumbnail_url = None
|
||||
phash = None
|
||||
|
||||
media_type = funcs.get_media_type(filename)
|
||||
|
||||
file_hash = funcs.calculate_file_hash(filepath)
|
||||
|
||||
post_date = datetime.fromtimestamp(int(timestamp)) if timestamp else datetime.now()
|
||||
|
||||
width, height = funcs.get_media_dimensions(filepath)
|
||||
|
||||
duration = funcs.get_video_duration(filepath)
|
||||
|
||||
if media_type == 'image':
|
||||
phash = funcs.generate_phash(filepath)
|
||||
elif media_type == 'video':
|
||||
try:
|
||||
thumb_path = generate_thumbnail(filepath)
|
||||
obj_storage.PutFile(thumb_path, f'thumbnails/{filename}')
|
||||
thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{filename}"
|
||||
phash = funcs.generate_phash(thumb_path)
|
||||
os.remove(thumb_path)
|
||||
except:
|
||||
print('Error generating thumbnail. Skipping...')
|
||||
return False
|
||||
|
||||
server_path = f'media/snaps/{username}/{filename}'
|
||||
file_url = f"https://storysave.b-cdn.net/{server_path}"
|
||||
|
||||
obj_storage.PutFile(filepath, server_path)
|
||||
|
||||
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, post_type, date, hash, filename, duration, thumbnail, phash, platform, snap_id, original_snap_id) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
|
||||
values = (username, media_type, file_url, width, height, 'story', post_date, file_hash, filename, duration, thumbnail_url, phash, 'snapchat', snap_id, original_snap_id)
|
||||
|
||||
cursor.execute(query, values)
|
||||
db.commit()
|
||||
print(f'[{cursor.rowcount}] records updated. File {filename} uploaded to {file_url}')
|
||||
|
||||
os.remove(filepath)
|
||||
|
||||
return True
|
||||
|
||||
def generate_thumbnail(filepath):
|
||||
thumb_path = f'temp/{uuid4()}.jpg'
|
||||
cap = cv2.VideoCapture(filepath)
|
||||
ret, frame = cap.read()
|
||||
cv2.imwrite(thumb_path, frame)
|
||||
cap.release()
|
||||
return thumb_path
|
||||
|
||||
def get_media_data(filepath):
|
||||
filename = os.path.basename(filepath)
|
||||
parts = filename.split('~')
|
||||
if len(parts) < 3:
|
||||
return False
|
||||
|
||||
username = parts[0]
|
||||
timestamp = parts[1]
|
||||
snap_id = parts[2]
|
||||
snap_id = os.path.splitext(snap_id)[0]
|
||||
|
||||
data = {'username': username, 'timestamp': timestamp, 'filepath': filepath, 'media_id': snap_id}
|
||||
|
||||
return data
|
||||
|
||||
def process_snap_ids(filenames):
|
||||
snap_ids = []
|
||||
for filename in filenames:
|
||||
snap_id = filename.split('~')[2]
|
||||
snap_id = os.path.splitext(snap_id)[0]
|
||||
if snap_id not in snap_ids:
|
||||
snap_ids.append(snap_id)
|
||||
|
||||
return snap_ids
|
||||
|
||||
if __name__ == '__main__':
|
||||
print('Starting snappy...')
|
||||
|
||||
db, cursor = config.gen_connection()
|
||||
obj_storage = config.get_storage()
|
||||
|
||||
main()
|
||||
|
||||
print("Processing completed.")
|
||||
@ -1,137 +0,0 @@
|
||||
from BunnyCDN.Storage import Storage
|
||||
from datetime import datetime
|
||||
import os, config, funcs, cv2
|
||||
from PIL import Image
|
||||
|
||||
|
||||
def UploadMedia(media):
|
||||
media_id = media['media_id']
|
||||
username = media['username']
|
||||
post_date = media['timestamp']
|
||||
user_id = media['user_id']
|
||||
filepath = media['filepath']
|
||||
highlight_id = media['highlight_id']
|
||||
post_type = media['post_type']
|
||||
thumbnail_url = None
|
||||
phash = None
|
||||
|
||||
if media_id and int(media_id) in existing_files:
|
||||
print('Duplicate file detected. Removing...')
|
||||
os.remove(filepath)
|
||||
return True
|
||||
|
||||
filename = os.path.basename(filepath)
|
||||
file_extension = os.path.splitext(filename)[1].lower()
|
||||
|
||||
media_type = funcs.get_media_type(filename)
|
||||
|
||||
file_hash = funcs.calculate_file_hash(filepath)
|
||||
|
||||
width, height = funcs.get_video_dimensions(filepath) if media_type == 'video' else Image.open(filepath).size
|
||||
|
||||
duration = funcs.get_video_duration(filepath) if media_type == 'video' else 0 # slower
|
||||
|
||||
if media_type == 'video':
|
||||
try:
|
||||
thumbPath = f'temp/{media_id}.jpg'
|
||||
cap = cv2.VideoCapture(filepath)
|
||||
ret, frame = cap.read()
|
||||
cv2.imwrite(thumbPath, frame)
|
||||
cap.release()
|
||||
obj_storage.PutFile(thumbPath, f'thumbnails/{media_id}.jpg') # slower
|
||||
thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{media_id}.jpg"
|
||||
phash = funcs.generate_phash(thumbPath)
|
||||
os.remove(thumbPath)
|
||||
except:
|
||||
print('Error generating thumbnail. Skipping...')
|
||||
return False
|
||||
elif media_type == 'image':
|
||||
phash = funcs.generate_phash(filepath)
|
||||
|
||||
if media_id:
|
||||
newFilename = f'{media_id}{file_extension}'
|
||||
else:
|
||||
newFilename = f'{file_hash}{file_extension}'
|
||||
|
||||
server_path = f'media/{post_type}/{username}/{newFilename}'
|
||||
|
||||
file_url = f"https://storysave.b-cdn.net/{server_path}"
|
||||
|
||||
obj_storage.PutFile(filepath, server_path) # slow as fuck
|
||||
|
||||
if highlight_id:
|
||||
newCursor.execute("INSERT IGNORE INTO highlights (highlight_id, user_id, media_id) VALUES (%s, %s, %s)", (highlight_id, user_id, media_id))
|
||||
newDB.commit()
|
||||
print(f'[{newCursor.rowcount}] added highlight {highlight_id} to user {user_id}')
|
||||
|
||||
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, media_id, post_type, date, user_id, hash, filename, duration, thumbnail, phash) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
|
||||
values = (username, media_type, file_url, width, height, media_id, post_type, post_date, user_id, file_hash, filename, duration, thumbnail_url, phash)
|
||||
|
||||
newCursor.execute(query, values) # slower
|
||||
newDB.commit()
|
||||
print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}')
|
||||
|
||||
os.remove(filepath)
|
||||
|
||||
return True
|
||||
|
||||
def get_user_id(username):
|
||||
username = username.lower()
|
||||
if username in existing_users:
|
||||
return existing_users[username]
|
||||
|
||||
return None
|
||||
|
||||
def get_media():
|
||||
medias = []
|
||||
post_types = {
|
||||
'posts': 'post',
|
||||
'stories': 'story',
|
||||
'profile': 'profile',
|
||||
}
|
||||
|
||||
for post_type in os.listdir('media'):
|
||||
users = os.listdir(f'media/{post_type}')
|
||||
for user in users:
|
||||
user_path = f'media/{post_type}/{user}'
|
||||
for filename in os.listdir(user_path):
|
||||
data = {}
|
||||
filepath = os.path.join(user_path, filename)
|
||||
|
||||
data['post_type'] = post_types[post_type]
|
||||
data['username'] = user
|
||||
data['timestamp'] = filename.split('__')[-1].split('.')[0] if 'com.instagram.android__' in filename else datetime.now()
|
||||
if 'com.instagram.android__' in filename:
|
||||
data['timestamp'] = datetime.strptime(data, '%Y%m%d%H%M%S%f')
|
||||
data['filepath'] = filepath
|
||||
data['media_id'] = None
|
||||
data['user_id'] = get_user_id(data['username'])
|
||||
data['highlight_id'] = None
|
||||
medias.append(data)
|
||||
|
||||
return medias
|
||||
|
||||
def dump_instagram():
|
||||
medias = get_media()
|
||||
|
||||
for media in medias:
|
||||
UploadMedia(media)
|
||||
existing_files.append(media['media_id'])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
print('Starting processing...')
|
||||
|
||||
newDB, newCursor = config.gen_connection()
|
||||
|
||||
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
|
||||
|
||||
newCursor.execute("SELECT media_id FROM media WHERE media_id IS NOT NULL")
|
||||
existing_files = [image[0] for image in newCursor.fetchall()]
|
||||
|
||||
newCursor.execute("SELECT DISTINCT username, user_id FROM media WHERE user_id IS NOT NULL")
|
||||
existing_users = {user[0].lower(): user[1].lower() for user in newCursor.fetchall()}
|
||||
|
||||
dump_instagram()
|
||||
|
||||
print("Processing completed.")
|
||||
@ -1,38 +1,137 @@
|
||||
import os
|
||||
from datetime import datetime
|
||||
import os, config, funcs, cv2
|
||||
from uuid import uuid4
|
||||
|
||||
directory = 'processed_tiktoks'
|
||||
|
||||
# file name : masstik_caammmyyy_1310_655_going blonde wednesdayyyy.mp4
|
||||
# file name : masstiktok_aleksandraverse__#fyp #trending #viral #foryou.mp4
|
||||
# where the first item is prefix, second is username and after those is the tiktok title
|
||||
def UploadMedia(media):
|
||||
platform = 'TikTok'
|
||||
username = media['username']
|
||||
filepath = media['filepath']
|
||||
file_size = os.path.getsize(filepath)
|
||||
thumbnail_url = None
|
||||
phash = None
|
||||
|
||||
filename = os.path.basename(filepath)
|
||||
file_extension = os.path.splitext(filename)[1].lower()
|
||||
|
||||
processed_dir = 'processed_tiktoks'
|
||||
os.makedirs(processed_dir, exist_ok=True)
|
||||
media_type = funcs.get_media_type(filename)
|
||||
if not media_type:
|
||||
print(f'Error determining media type for {filename}. Skipping...')
|
||||
return False
|
||||
|
||||
users = os.listdir('tiktoks')
|
||||
post_type = funcs.determine_post_type(filepath)
|
||||
if not post_type:
|
||||
print(f'Error determining post type for {filename}. Skipping...')
|
||||
return False
|
||||
|
||||
for user in users:
|
||||
files = os.path.join('tiktoks', user)
|
||||
for file in os.listdir(files):
|
||||
if 'masstik' not in file and 'masstiktok' not in file:
|
||||
print(f"Skipping {file}")
|
||||
file_hash = funcs.calculate_file_hash(filepath)
|
||||
if file_hash in existing_hashes:
|
||||
print(f'File {filename} already exists. Skipping...')
|
||||
return False
|
||||
|
||||
post_date = datetime.now()
|
||||
|
||||
width, height = funcs.get_media_dimensions(filepath)
|
||||
|
||||
duration = funcs.get_video_duration(filepath)
|
||||
|
||||
if media_type == 'image':
|
||||
phash = funcs.generate_phash(filepath)
|
||||
elif media_type == 'video':
|
||||
try:
|
||||
thumb_path = generate_thumbnail(filepath)
|
||||
obj_storage.PutFile(thumb_path, f'thumbnails/{file_hash}.jpg') # this might be a problem in case of duplicate hashes
|
||||
thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{file_hash}.jpg"
|
||||
phash = funcs.generate_phash(thumb_path)
|
||||
os.remove(thumb_path)
|
||||
except:
|
||||
print('Error generating thumbnail. Skipping...')
|
||||
return False
|
||||
|
||||
newFilename = f'{file_hash}{file_extension}'
|
||||
server_path = f'media/tiktoks/{username}/{newFilename}'
|
||||
|
||||
file_url = f"https://storysave.b-cdn.net/{server_path}"
|
||||
|
||||
obj_storage.PutFile(filepath, server_path) # slow as fuck
|
||||
|
||||
post_type = 'story' if post_type == 'stories' else 'post'
|
||||
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, post_type, date, hash, filename, duration, thumbnail, phash, platform, file_size) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
|
||||
values = (username, media_type, file_url, width, height, post_type, post_date, file_hash, filename, duration, thumbnail_url, phash, platform, file_size)
|
||||
|
||||
newCursor.execute(query, values) # slower
|
||||
newDB.commit()
|
||||
print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}')
|
||||
|
||||
os.remove(filepath)
|
||||
|
||||
return True
|
||||
|
||||
def generate_thumbnail(filepath):
|
||||
thumb_path = f'temp/{uuid4()}.jpg'
|
||||
cap = cv2.VideoCapture(filepath)
|
||||
ret, frame = cap.read()
|
||||
cv2.imwrite(thumb_path, frame)
|
||||
cap.release()
|
||||
return thumb_path
|
||||
|
||||
def get_media_data(filepath):
|
||||
filename = os.path.basename(filepath)
|
||||
parts = filename.split('~')
|
||||
|
||||
if len(parts) == 3:
|
||||
username, title, tiktok_id = parts
|
||||
elif len(parts) == 2:
|
||||
username, title = parts
|
||||
tiktok_id = None
|
||||
else:
|
||||
return False
|
||||
|
||||
data = {'username': username, 'filepath': filepath, 'tiktok_id': tiktok_id, 'title': title}
|
||||
|
||||
return data
|
||||
|
||||
def get_media(folder_path):
|
||||
medias = []
|
||||
|
||||
users = os.listdir(folder_path)
|
||||
for user in users:
|
||||
user_folder = os.path.join(folder_path, user)
|
||||
if not os.path.isdir(user_folder):
|
||||
print(f"Skipping {user}")
|
||||
continue
|
||||
|
||||
filepath = os.path.join(files, file)
|
||||
file_ext = os.path.splitext(file)[1]
|
||||
data = file.split('_')
|
||||
prefix = data[0]
|
||||
username = data[1]
|
||||
username = username.replace('@', '')
|
||||
title = ' '.join(data[2:])
|
||||
title = os.path.splitext(title)[0]
|
||||
|
||||
print("="*100)
|
||||
title = title.encode('utf-8', 'ignore').decode('utf-8')
|
||||
print(f"Prefix: {prefix}\nUsername: {username}\nTitle: {title}")
|
||||
print("="*100)
|
||||
|
||||
new_filename = f"{username}~{title}.{file_ext}"
|
||||
new_filepath = os.path.join(processed_dir, new_filename)
|
||||
|
||||
os.rename(filepath, new_filepath)
|
||||
print(f"Renamed {file} to {new_filename}")
|
||||
|
||||
files = os.listdir(user_folder)
|
||||
for filename in files:
|
||||
filepath = os.path.join(user_folder, filename)
|
||||
|
||||
data = get_media_data(filepath)
|
||||
if data:
|
||||
medias.append(data)
|
||||
|
||||
return medias
|
||||
|
||||
def dump_instagram(folder_path):
|
||||
medias = get_media(folder_path)
|
||||
|
||||
for media in medias:
|
||||
UploadMedia(media)
|
||||
|
||||
if __name__ == '__main__':
|
||||
print('Starting processing...')
|
||||
|
||||
if not os.listdir(directory):
|
||||
print('No files to process. Exiting...')
|
||||
exit()
|
||||
|
||||
newDB, newCursor = config.gen_connection()
|
||||
|
||||
obj_storage = config.get_storage()
|
||||
|
||||
newCursor.execute("SELECT hash FROM media WHERE hash IS NOT NULL AND platform = 'TikTok'")
|
||||
existing_hashes = [row[0] for row in newCursor.fetchall()]
|
||||
|
||||
dump_instagram(directory)
|
||||
|
||||
print("Processing completed.")
|
||||
@ -0,0 +1,62 @@
|
||||
import os
|
||||
from uuid import uuid4
|
||||
import uuid
|
||||
|
||||
def is_valid_uuid(uuid_to_test, version=4):
|
||||
try:
|
||||
uuid_obj = uuid.UUID(uuid_to_test, version=version)
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
return str(uuid_obj) == uuid_to_test
|
||||
|
||||
# file name : masstik_caammmyyy_1310_655_going blonde wednesdayyyy.mp4
|
||||
# file name : masstiktok_aleksandraverse__#fyp #trending #viral #foryou.mp4
|
||||
# where the first item is prefix, second is username and after those is the tiktok title
|
||||
|
||||
source_dir = 'tiktoks/'
|
||||
processed_dir = 'processed_tiktoks'
|
||||
|
||||
os.makedirs(processed_dir, exist_ok=True)
|
||||
|
||||
users = os.listdir(source_dir)
|
||||
|
||||
for user in users:
|
||||
user_dir = os.path.join(source_dir, user)
|
||||
if not os.path.isdir(user_dir):
|
||||
print(f"Skipping {user}")
|
||||
continue
|
||||
|
||||
for file in os.listdir(user_dir):
|
||||
filename = os.path.splitext(file)[0]
|
||||
filepath = os.path.join(user_dir, file)
|
||||
file_ext = os.path.splitext(file)[1]
|
||||
|
||||
tiktok_id = str(uuid4())
|
||||
username = user
|
||||
|
||||
if is_valid_uuid(filename):
|
||||
title = ''
|
||||
tiktok_id = filename
|
||||
elif 'masstik' in file or 'masstiktok' in file:
|
||||
data = file.split('_')
|
||||
title = filename.split('_')[-1]
|
||||
else:
|
||||
title = filename
|
||||
|
||||
|
||||
print("="*100)
|
||||
title = title.encode('utf-8', 'ignore').decode('utf-8')
|
||||
print(f"Username: {username}\nTitle: {title}")
|
||||
|
||||
new_filename = f"{username}~{title}~{tiktok_id}{file_ext}"
|
||||
new_filepath = os.path.join(processed_dir, username, new_filename)
|
||||
|
||||
os.makedirs(os.path.dirname(new_filepath), exist_ok=True)
|
||||
if not os.path.exists(new_filepath):
|
||||
os.rename(filepath, new_filepath)
|
||||
print(f"Renamed {file} to {new_filepath}")
|
||||
else:
|
||||
print("File with the same name already exists. Renaming aborted.")
|
||||
|
||||
print("="*100)
|
||||
@ -1,383 +0,0 @@
|
||||
import cv2, os, json, config, time, hashlib, requests
|
||||
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from moviepy.editor import VideoFileClip
|
||||
from cryptography.fernet import Fernet
|
||||
from BunnyCDN.Storage import Storage
|
||||
from instagrapi import Client
|
||||
from PIL import Image
|
||||
|
||||
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36"}
|
||||
proxies={
|
||||
"http": "http://yehyuxsl-rotate:4tl5bvrwkz5e@p.webshare.io:80/",
|
||||
"https": "http://yehyuxsl-rotate:4tl5bvrwkz5e@p.webshare.io:80/"
|
||||
}
|
||||
|
||||
|
||||
def file_hash(filename, hash_algo='sha256'):
|
||||
"""
|
||||
Compute the hash of a file.
|
||||
|
||||
:param filename: Path to the file.
|
||||
:param hash_algo: Hashing algorithm to use (e.g., 'sha256', 'md5').
|
||||
:return: Hexadecimal hash string.
|
||||
"""
|
||||
h = hashlib.new(hash_algo)
|
||||
|
||||
with open(filename, 'rb') as file:
|
||||
while chunk := file.read(8192):
|
||||
h.update(chunk)
|
||||
|
||||
return h.hexdigest()
|
||||
|
||||
|
||||
def get_video_duration(file_path):
|
||||
"""
|
||||
Returns the duration of the video file in seconds.
|
||||
|
||||
:param file_path: Path to the video file
|
||||
:return: Duration in seconds
|
||||
"""
|
||||
try:
|
||||
with VideoFileClip(file_path) as video:
|
||||
return video.duration
|
||||
except:
|
||||
return 0
|
||||
|
||||
|
||||
def login(force=False):
|
||||
client = Client()
|
||||
|
||||
try:
|
||||
if not force:
|
||||
client.load_settings("session_data.json")
|
||||
else:
|
||||
raise FileNotFoundError
|
||||
except (FileNotFoundError, json.JSONDecodeError):
|
||||
#username = input("Enter your Instagram username: ")
|
||||
#password = getpass.getpass("Enter your Instagram password: ")
|
||||
|
||||
with open('p.enc', 'rb') as encrypted_file:
|
||||
encrypted_data = encrypted_file.read()
|
||||
|
||||
fernet = Fernet(open('key.enc', 'r').read())
|
||||
password = str(fernet.decrypt(encrypted_data), 'utf-8')
|
||||
username = 'olivercury'
|
||||
|
||||
auth = input("Enter your 2FA code (leave blank if not enabled): ")
|
||||
if auth:
|
||||
client.login(username=username, password=password, verification_code=auth)
|
||||
else:
|
||||
client.login(username, password)
|
||||
client.dump_settings("session_data.json")
|
||||
|
||||
print("Logged in successfully.")
|
||||
|
||||
return client
|
||||
|
||||
|
||||
def parse_media_data(media_item):
|
||||
mediaTypes = {1: 'image', 2: 'video', 8: 'album'}
|
||||
|
||||
try:taken_at = media_item.taken_at
|
||||
except:taken_at = None
|
||||
try:post_type = media_item.product_type
|
||||
except:post_type = None
|
||||
|
||||
mediaInfo = {'taken_at': taken_at, 'post_type' : post_type, 'media_type': mediaTypes[media_item.media_type]}
|
||||
|
||||
if media_item.media_type == 1: # Image
|
||||
mediaInfo['media_id'] = int(media_item.pk)
|
||||
mediaInfo['fileURL'] = media_item.thumbnail_url
|
||||
mediaInfo['filename'] = f"{media_item.pk}.jpg"
|
||||
elif media_item.media_type == 2: # Video
|
||||
mediaInfo['media_id'] = int(media_item.pk)
|
||||
mediaInfo['fileURL'] = media_item.video_url
|
||||
try:mediaInfo['duration'] = media_item.video_duration
|
||||
except:mediaInfo['duration'] = 0
|
||||
mediaInfo['filename'] = f"{media_item.pk}.mp4"
|
||||
else:
|
||||
print(f"Unsupported media type with ID {media_item.pk}")
|
||||
return None
|
||||
|
||||
return mediaInfo
|
||||
|
||||
def download_file(url, filePath):
|
||||
try:
|
||||
response = requests.get(url, stream=True, headers=headers, proxies=proxies)
|
||||
response.raise_for_status()
|
||||
|
||||
directory = os.path.dirname(filePath)
|
||||
|
||||
if not os.path.exists(directory):
|
||||
os.makedirs(directory)
|
||||
|
||||
with open(filePath, 'wb') as out_file:
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
out_file.write(chunk)
|
||||
print(f"Downloaded {filePath}")
|
||||
except Exception as e:
|
||||
print(f"Failed to download {url}. Error: {e}")
|
||||
|
||||
|
||||
def process_media(mediaInfo, filePath):
|
||||
if mediaInfo['media_type'] == 'image':
|
||||
with Image.open(filePath) as img:
|
||||
mediaInfo['width'], mediaInfo['height'] = img.size
|
||||
else:
|
||||
mediaInfo['width'], mediaInfo['height'] = get_video_dimensions(filePath)
|
||||
mediaInfo['duration'] = get_video_duration(filePath)
|
||||
|
||||
if 'hash' not in mediaInfo:
|
||||
mediaInfo['hash'] = file_hash(filePath)
|
||||
|
||||
def upload_to_storage(local_path, server_path):
|
||||
try:
|
||||
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
|
||||
obj_storage.PutFile(local_path, server_path)
|
||||
print(f"Uploaded to https://storysave.b-cdn.net/{server_path}")
|
||||
except Exception as e:
|
||||
print(f"Failed to upload {local_path} to {server_path}. Error: {e}")
|
||||
|
||||
|
||||
def add_media_to_db(mediaInfo):
|
||||
media_id = mediaInfo['media_id']
|
||||
user_id = mediaInfo['user_id']
|
||||
username = mediaInfo['username']
|
||||
date = mediaInfo['taken_at'] if 'taken_at' in mediaInfo else None
|
||||
media_type = mediaInfo['media_type']
|
||||
post_type = mediaInfo['post_type']
|
||||
duration = mediaInfo.get('duration', 0)
|
||||
media_url = mediaInfo['media_url']
|
||||
width = mediaInfo['width']
|
||||
height = mediaInfo['height']
|
||||
filehash = mediaInfo['hash']
|
||||
|
||||
try:
|
||||
db, cursor = config.gen_connection()
|
||||
|
||||
query = """
|
||||
INSERT INTO media (user_id, username, date, media_type, post_type, media_url, duration, width, height, media_id, hash)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
|
||||
"""
|
||||
|
||||
data = (user_id, username, date, media_type, post_type, media_url, duration, width, height, media_id, filehash)
|
||||
|
||||
cursor.execute(query, data)
|
||||
db.commit()
|
||||
print(f"Added media for {username} to the database.")
|
||||
except Exception as e:
|
||||
print(f"Failed to add media for {username} to the database. Error: {e}")
|
||||
|
||||
|
||||
def insert_highlight_items(media_ids, highlight_id, title, user_id):
|
||||
try:
|
||||
db, cursor = config.gen_connection()
|
||||
|
||||
query = "INSERT IGNORE INTO highlights (media_id, highlight_id, title, user_id) VALUES (%s, %s, %s, %s)"
|
||||
|
||||
values = [(media_id, highlight_id, title, user_id) for media_id in media_ids]
|
||||
cursor.executemany(query, values)
|
||||
db.commit()
|
||||
if cursor.rowcount > 0:
|
||||
print(f"Added {cursor.rowcount} highlight items to the database.")
|
||||
except Exception as e:
|
||||
print(f"Failed to add highlight items to the database. Error: {e}")
|
||||
|
||||
|
||||
def get_video_dimensions(video_path):
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
cap.release()
|
||||
return width, height
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
client = login()
|
||||
client.set_proxy(proxies['https'])
|
||||
|
||||
db, cursor = config.gen_connection()
|
||||
|
||||
cursor.execute("SELECT instagram_username, instagram_user_id, favorite FROM following ORDER BY id DESC;")
|
||||
following = cursor.fetchall()
|
||||
|
||||
new_following = []
|
||||
for user in following:
|
||||
username, user_id, favorite = user
|
||||
|
||||
if bool(favorite):
|
||||
new_following.insert(0, user)
|
||||
else:
|
||||
new_following.append(user)
|
||||
|
||||
following = new_following
|
||||
|
||||
cursor.execute("SELECT media_id FROM media WHERE media_id IS NOT NULL;")
|
||||
existing_files = [media[0] for media in cursor.fetchall()]
|
||||
|
||||
continueFromLast = input("Continue from the last user? (y/n): ").lower() == 'y'
|
||||
|
||||
if continueFromLast:
|
||||
cursor.execute("SELECT username FROM media ORDER BY id DESC LIMIT 1;")
|
||||
lastUser = cursor.fetchone()
|
||||
|
||||
if lastUser:
|
||||
lastUser = lastUser[0]
|
||||
while True:
|
||||
if lastUser != following[0][0]:
|
||||
following.pop(0)
|
||||
else:
|
||||
break
|
||||
|
||||
actionsTaken = 0
|
||||
|
||||
with ThreadPoolExecutor(max_workers=10) as executor:
|
||||
for user in following:
|
||||
while True:
|
||||
try:
|
||||
firstImport = False
|
||||
username, user_id, isFavorite = user
|
||||
|
||||
if not user_id:
|
||||
firstImport = True
|
||||
user_id = client.user_id_from_username(username)
|
||||
actionsTaken += 1
|
||||
cursor.execute("UPDATE following SET instagram_user_id = %s WHERE instagram_username = %s;", (user_id, username))
|
||||
db.commit()
|
||||
print(f"Updated user ID for {username} to {user_id}")
|
||||
|
||||
#################### profile picture ####################
|
||||
profilePath = os.path.join('media', 'profile', username, 'profile.jpg')
|
||||
profileURL = client.user_info(user_id).profile_pic_url_hd
|
||||
download_file(profileURL, profilePath)
|
||||
|
||||
fileHash = file_hash(profilePath)
|
||||
serverPath = os.path.join(os.path.dirname(profilePath), f"{fileHash}.jpg")
|
||||
|
||||
upload_to_storage(profilePath, serverPath)
|
||||
|
||||
mediaInfo = {
|
||||
'username': username,
|
||||
'user_id': user_id,
|
||||
'media_id': None,
|
||||
'media_type': 'image',
|
||||
'post_type': 'profile',
|
||||
'media_url': f"https://storysave.b-cdn.net/{serverPath}",
|
||||
'duration': 0,
|
||||
'hash': fileHash
|
||||
}
|
||||
|
||||
process_media(mediaInfo, profilePath)
|
||||
add_media_to_db(mediaInfo)
|
||||
#################### profile picture ####################
|
||||
|
||||
#################### stories ####################
|
||||
print(f"[{username}]\nChecking: Stories")
|
||||
|
||||
# fetch user stories
|
||||
stories = client.user_stories(user_id)
|
||||
actionsTaken += 1
|
||||
|
||||
# fetch user's highlights and add to stories
|
||||
if firstImport or isFavorite:
|
||||
highlights = client.user_highlights(user_id) # API request
|
||||
actionsTaken += 1
|
||||
for highlight in highlights:
|
||||
try:
|
||||
highlight_items = client.highlight_info_v1(highlight.pk).items # API request
|
||||
actionsTaken += 1
|
||||
except:
|
||||
print(f"Failed to get highlight items for {highlight.pk}")
|
||||
time.sleep(5)
|
||||
|
||||
media_ids = [item.pk for item in highlight_items]
|
||||
executor.submit(insert_highlight_items, media_ids, highlight.pk, highlight.title, user_id)
|
||||
stories.extend(highlight_items)
|
||||
|
||||
# process stories and highlight stories
|
||||
newStoryCount = 0
|
||||
for story in stories:
|
||||
try:
|
||||
mediaInfo = parse_media_data(story)
|
||||
|
||||
# skip duplicates
|
||||
if mediaInfo['media_id'] in existing_files:
|
||||
continue
|
||||
|
||||
newStoryCount += 1
|
||||
mediaInfo['user_id'] = user_id
|
||||
mediaInfo['username'] = username
|
||||
mediaInfo['post_type'] = 'story'
|
||||
if mediaInfo['fileURL'] and mediaInfo['filename']:
|
||||
filePath = os.path.join('media', 'stories', username, mediaInfo['filename'])
|
||||
mediaInfo['media_url'] = f"https://storysave.b-cdn.net/{filePath}"
|
||||
|
||||
download_file(mediaInfo['fileURL'], filePath)
|
||||
process_media(mediaInfo, filePath)
|
||||
upload_to_storage(filePath, filePath)
|
||||
add_media_to_db(mediaInfo)
|
||||
os.remove(filePath)
|
||||
|
||||
existing_files.append(mediaInfo['media_id'])
|
||||
except Exception as e:
|
||||
print(f"Failed to process story for {username}. Error: {e}")
|
||||
#################### stories ####################
|
||||
|
||||
#################### posts ####################
|
||||
print("Checking: Posts")
|
||||
medias = client.user_medias(user_id, 36) # API request
|
||||
actionsTaken += 1
|
||||
|
||||
posts = []
|
||||
for post in medias:
|
||||
if post.media_type == 8:
|
||||
for item in post.resources:
|
||||
posts.append(item)
|
||||
continue
|
||||
posts.append(post)
|
||||
|
||||
newPostsCount = 0
|
||||
for post in posts:
|
||||
mediaInfo = parse_media_data(post)
|
||||
if mediaInfo['media_id'] in existing_files:
|
||||
continue
|
||||
|
||||
newPostsCount += 1
|
||||
mediaInfo['user_id'] = user_id
|
||||
mediaInfo['username'] = username
|
||||
mediaInfo['post_type'] = 'post'
|
||||
if mediaInfo['fileURL'] and mediaInfo['filename']:
|
||||
filePath = os.path.join('media', 'posts', username, mediaInfo['filename'])
|
||||
mediaInfo['media_url'] = f"https://storysave.b-cdn.net/{filePath}"
|
||||
|
||||
download_file(mediaInfo['fileURL'], filePath)
|
||||
process_media(mediaInfo, filePath)
|
||||
upload_to_storage(filePath, filePath)
|
||||
add_media_to_db(mediaInfo)
|
||||
os.remove(filePath)
|
||||
|
||||
existing_files.append(mediaInfo['media_id'])
|
||||
#################### posts ####################
|
||||
|
||||
print(f"New stories: {newStoryCount}\tNew Posts: {newPostsCount}")
|
||||
print(f"Actions taken: {actionsTaken}")
|
||||
print("=====================================")
|
||||
break
|
||||
except Exception as e:
|
||||
if "login_required" in str(e):
|
||||
print("Please log in to your account again.")
|
||||
client = login(force=True)
|
||||
elif "Please wait a few minutes before you try again." in str(e):
|
||||
print("Rate limited. Waiting for 5 minutes...")
|
||||
client = login(force=True)
|
||||
else:
|
||||
print("An unexpected error occurred:", e)
|
||||
break
|
||||
|
||||
|
||||
|
||||
# TO DO
|
||||
# ADD DATE TO POSTS / STORIES
|
||||
# FETCH ONLY THE NEW STORIES
|
||||
# MINIMIZE DATABASE CONNECTIONS
|
||||
@ -1,32 +0,0 @@
|
||||
import requests
|
||||
|
||||
url = 'https://www.save-free.com/process'
|
||||
|
||||
data = {
|
||||
'instagram_url': 'natahalieeee',
|
||||
'type': 'profile',
|
||||
'resource': 'save'
|
||||
}
|
||||
|
||||
zoom_data = {
|
||||
'instagram_url': 'natahalieeee',
|
||||
'type': 'profile',
|
||||
'resource': 'zoom'
|
||||
}
|
||||
|
||||
headers = {
|
||||
'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
|
||||
'Referer' : 'https://www.save-free.com/profile-downloader/',
|
||||
}
|
||||
|
||||
proxies={
|
||||
"http": "http://yehyuxsl-rotate:4tl5bvrwkz5e@p.webshare.io:80/",
|
||||
"https": "http://yehyuxsl-rotate:4tl5bvrwkz5e@p.webshare.io:80/"
|
||||
}
|
||||
|
||||
response = requests.post(url, data=data, headers=headers)
|
||||
|
||||
response = requests.post(url, data=zoom_data, headers=headers)
|
||||
|
||||
with open('image.jpg', 'wb') as f:
|
||||
f.write(response.content)
|
||||
@ -1,24 +0,0 @@
|
||||
import json
|
||||
|
||||
filePath = 'test.json'
|
||||
|
||||
with open(filePath, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
print(data)
|
||||
|
||||
|
||||
posts = data['data']['xdt_api__v1__feed__user_timeline_graphql_connection']['edges']
|
||||
posts = [post['node'] for post in posts]
|
||||
|
||||
for post in posts:
|
||||
biggestRes = 0
|
||||
images = post['image_versions2']['candidates']
|
||||
for image in images:
|
||||
width = image['width']
|
||||
height = image['height']
|
||||
if width * height > biggestRes:
|
||||
biggestRes = width * height
|
||||
goodPost = post
|
||||
|
||||
print(goodPost)
|
||||
@ -1,37 +0,0 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import cloudscraper
|
||||
from zenrows import ZenRowsClient
|
||||
|
||||
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36"}
|
||||
|
||||
def get_tiktok_video(url):
|
||||
client = ZenRowsClient("39cf41d4c1ffcb944fca23a95fee8a2722bf4f28")
|
||||
data = client.get(url, headers=headers)
|
||||
soup = BeautifulSoup(data.text, 'html.parser')
|
||||
|
||||
video_url = soup.find('div', class_='video_html5').find('video').get('src')
|
||||
|
||||
return video_url
|
||||
|
||||
def get_user_videos(username):
|
||||
url = f'https://urlebird.com/user/{username}/'
|
||||
|
||||
client = ZenRowsClient("39cf41d4c1ffcb944fca23a95fee8a2722bf4f28")
|
||||
data = client.get(url)
|
||||
soup = BeautifulSoup(data.text, 'html.parser')
|
||||
|
||||
video_urls = []
|
||||
foundVideos = soup.find_all('div', class_='thumb')
|
||||
for video in foundVideos:
|
||||
videoURL = video.find_all('a')[-1].get('href')
|
||||
video_urls.append(videoURL)
|
||||
|
||||
return video_urls
|
||||
|
||||
get_tiktok_video('https://urlebird.com/video/7295074788165373190/')
|
||||
|
||||
videos = get_user_videos('liliashaked')
|
||||
|
||||
for video in videos:
|
||||
print(get_tiktok_video(video))
|
||||
@ -1,2 +0,0 @@
|
||||
https://www.redgifs.com/watch/terrificexhaustedgannet#rel=tag%3Anaomi-soraya%2Ca;order=trending
|
||||
https://www.sex.com/pins
|
||||
@ -0,0 +1,70 @@
|
||||
import os, requests, config
|
||||
from snapchat import get_data, get_stories, get_highlight_stories
|
||||
|
||||
headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36"}
|
||||
directory = "snapchat"
|
||||
|
||||
def get_existing_media_ids(directory):
|
||||
# get all files and their their base filename without extension, split the filename by ~ and get the 3rd element
|
||||
existing_media_ids = set()
|
||||
for root, _, files in os.walk(directory):
|
||||
for file in files:
|
||||
if '~' not in file:
|
||||
continue
|
||||
|
||||
filename, _ = os.path.splitext(file)
|
||||
media_id = filename.split('~')[2]
|
||||
existing_media_ids.add(media_id)
|
||||
return existing_media_ids
|
||||
|
||||
def get_media_id(url):
|
||||
return url.split('/')[-1].split('?')[0].split('.')[0]
|
||||
|
||||
def find_duplicate_snap(existing_snaps, snap_id):
|
||||
for snap in existing_snaps:
|
||||
if snap_id in snap[1]:
|
||||
return snap
|
||||
return False
|
||||
|
||||
def main():
|
||||
if not os.path.exists(directory):
|
||||
os.makedirs(directory)
|
||||
|
||||
db, cursor = config.gen_connection()
|
||||
|
||||
cursor.execute("SELECT username FROM following WHERE platform = 'snapchat'")
|
||||
usernames = [row[0] for row in cursor.fetchall()]
|
||||
|
||||
cursor.execute("SELECT id, filename FROM media WHERE filename IS NOT NULL AND platform = 'snapchat' AND snap_id IS NULL")
|
||||
existing_medias = cursor.fetchall()
|
||||
|
||||
existing_media_ids = get_existing_media_ids(directory)
|
||||
|
||||
for username in usernames:
|
||||
print(f"Getting stories for {username}...")
|
||||
data = get_data(username)
|
||||
if not data:
|
||||
continue
|
||||
|
||||
print("Getting stories...")
|
||||
stories = get_stories(data)
|
||||
|
||||
print("Getting highlights...")
|
||||
stories.extend(get_highlight_stories(data))
|
||||
|
||||
for story in stories:
|
||||
media_id = story['media_id']
|
||||
url = story['url']
|
||||
timestamp = story['timestamp']
|
||||
|
||||
snap_id = get_media_id(url)
|
||||
duplicate_snap = find_duplicate_snap(existing_medias, snap_id)
|
||||
if duplicate_snap:
|
||||
snap_id = get_media_id(url)
|
||||
cursor.execute("UPDATE media SET snap_id = %s WHERE id = %s", (snap_id, duplicate_snap[0]))
|
||||
db.commit()
|
||||
print(f"{cursor.rowcount} Media {snap_id} updated.")
|
||||
continue
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Reference in New Issue