update - new scripts

main
oscar 9 months ago
parent 48d2330193
commit ad39eeaed1

@ -0,0 +1,424 @@
import requests
import hashlib
access_key = "ccd3f9d4-9e6f-4bd2-8f594402b5a7-3646-48fe"
video_library_id = 106867
def create_video(title):
url = f"https://video.bunnycdn.com/library/{video_library_id}/videos"
payload = f"{{\"title\":\"{title}\"}}"
headers = {
"accept": "application/json",
"content-type": "application/*+json",
"AccessKey": access_key
}
response = requests.post(url, data=payload, headers=headers)
return response
def generate_signature(library_id, api_key, expiration_time, video_id):
signature = hashlib.sha256((library_id + api_key + str(expiration_time) + video_id).encode()).hexdigest()
return signature
def upload_video_process(file_path, video_id):
url = f"https://video.bunnycdn.com/library/{video_library_id}/videos/{video_id}"
headers = {
"accept": "application/json",
"AccessKey": access_key
}
with open(file_path, "rb") as file:
file_data = file.read()
response = requests.put(url, headers=headers, data=file_data)
return response.status_code
def upload_video(file_path, title=None):
video_item = create_video(title)
if video_item.status_code != 200:
return False
video_id = video_item.json()['guid']
upload_video_process(file_path, video_id)
return {
"embed_link": f"https://vz-58ca89f1-986.b-cdn.net/{video_id}/playlist.m3u8",
"animated_thumbnail": f"https://vz-58ca89f1-986.b-cdn.net/{video_id}/preview.webp",
"default_thumbnail": f"https://vz-58ca89f1-986.b-cdn.net/{video_id}/thumbnail.jpg",
}
def upload_video_recurbate(videoInfo):
title = f"{videoInfo['username']} {videoInfo['platform']}"
video_item = create_video(title)
if video_item.status_code != 200:
return False
video_id = video_item.json()['guid']
upload_video_process(videoInfo['filename'], video_id)
videoInfo["embed_link"] = f"https://vz-58ca89f1-986.b-cdn.net/{video_id}/playlist.m3u8"
videoInfo["animated_thumbnail"] = f"https://vz-58ca89f1-986.b-cdn.net/{video_id}/preview.webp"
videoInfo["default_thumbnail"] = f"https://vz-58ca89f1-986.b-cdn.net/{video_id}/thumbnail.jpg"
return True
def delete_video(video_id):
video_id = video_id.replace('https://vz-58ca89f1-986.b-cdn.net/', '').replace('/playlist.m3u8', '')
url = f"https://video.bunnycdn.com/library/{video_library_id}/videos/{video_id}"
headers = {
"accept": "application/json",
"AccessKey": access_key
}
response = requests.delete(url, headers=headers)
return response.status_code
def list_videos():
url = f"https://video.bunnycdn.com/library/{video_library_id}/videos?page=1&itemsPerPage=2147483647&orderBy=date"
headers = {
"accept": "application/json",
"AccessKey": access_key
}
response = requests.get(url, headers=headers)
return response.json()['items']
def get_heatmap(video_id):
url = "https://video.bunnycdn.com/library/libraryId/videos/videoId/heatmap"
url = url.replace('libraryId', str(video_library_id)).replace('videoId', str(video_id))
headers = {
"accept": "application/json",
"AccessKey": access_key
}
response = requests.get(url, headers=headers).json()
return response
def get_video(video_id):
url = "https://video.bunnycdn.com/library/libraryId/videos/videoId"
url = url.replace('libraryId', str(video_library_id)).replace('videoId', str(video_id))
headers = {
"accept": "application/json",
"AccessKey": access_key
}
response = requests.get(url, headers=headers).json()
return response
import os
import requests
from requests.exceptions import HTTPError
from urllib import parse
class Storage:
def __init__(self, api_key, storage_zone, storage_zone_region="de"):
"""
Creates an object for using BunnyCDN Storage API
Parameters
----------
api_key : String
Your bunnycdn storage
Apikey/FTP password of
storage zone
storage_zone : String
Name of your storage zone
storage_zone_region(optional parameter) : String
The storage zone region code
as per BunnyCDN
"""
self.headers = {
# headers to be passed in HTTP requests
"AccessKey": api_key,
"Content-Type": "application/json",
"Accept": "applcation/json",
}
# applying constraint that storage_zone must be specified
assert storage_zone != "", "storage_zone is not specified/missing"
# For generating base_url for sending requests
if storage_zone_region == "de" or storage_zone_region == "":
self.base_url = "https://storage.bunnycdn.com/" + storage_zone + "/"
else:
self.base_url = (
"https://"
+ storage_zone_region
+ ".storage.bunnycdn.com/"
+ storage_zone
+ "/"
)
def DownloadFile(self, storage_path, download_path=os.getcwd()):
"""
This function will get the files and subfolders of storage zone mentioned in path
and download it to the download_path location mentioned
Parameters
----------
storage_path : String
The path of the directory
(including file name and excluding storage zone name)
from which files are to be retrieved
download_path : String
The directory on local server to which downloaded file must be saved
Note:For download_path instead of '\' '\\' should be used example: C:\\Users\\XYZ\\OneDrive
"""
assert (
storage_path != ""
), "storage_path must be specified" # to make sure storage_path is not null
# to build correct url
if storage_path[0] == "/":
storage_path = storage_path[1:]
if storage_path[-1] == "/":
storage_path = storage_path[:-1]
url = self.base_url + parse.quote(storage_path)
file_name = url.split("/")[-1] # For storing file name
# to return appropriate help messages if file is present or not and download file if present
try:
response = requests.get(url, headers=self.headers, stream=True)
response.raise_for_status()
except HTTPError as http:
return {
"status": "error",
"HTTP": response.status_code,
"msg": f"Http error occured {http}",
}
except Exception as err:
return {
"status": "error",
"HTTP": response.status_code,
"msg": f"error occured {err}",
}
else:
download_path = os.path.join(download_path, file_name)
# Downloading file
with open(download_path, "wb") as file:
for chunk in response.iter_content(chunk_size=1024):
if chunk:
file.write(chunk)
return {
"status": "success",
"HTTP": response.status_code,
"msg": "File downloaded Successfully",
}
def PutFile(
self,
file_name,
storage_path=None,
local_upload_file_path=os.getcwd(),
):
"""
This function uploads files to your BunnyCDN storage zone
Parameters
----------
storage_path : String
The path of directory in storage zone
(including the name of file as desired and excluding storage zone name)
to which file is to be uploaded
file_name : String
The name of the file as stored in local server
local_upload_file_path : String
The path of file as stored in local server(excluding file name)
from where file is to be uploaded
Examples
--------
file_name : 'ABC.txt'
local_upload_file_path : 'C:\\User\\Sample_Directory'
storage_path : '<Directory name in storage zone>/<file name as to be uploaded on storage zone>.txt'
#Here .txt because the file being uploaded in example is txt
"""
local_upload_file_path = os.path.join(local_upload_file_path, file_name)
# to build correct url
if storage_path is not None and storage_path != "":
if storage_path[0] == "/":
storage_path = storage_path[1:]
if storage_path[-1] == "/":
storage_path = storage_path[:-1]
url = self.base_url + parse.quote(storage_path)
else:
url = self.base_url + parse.quote(file_name)
with open(local_upload_file_path, "rb") as file:
file_data = file.read()
response = requests.put(url, data=file_data, headers=self.headers)
try:
response.raise_for_status()
except HTTPError as http:
return {
"status": "error",
"HTTP": response.status_code,
"msg": f"Upload Failed HTTP Error Occured: {http}",
}
else:
return {
"status": "success",
"HTTP": response.status_code,
"msg": "The File Upload was Successful",
}
def DeleteFile(self, storage_path=""):
"""
This function deletes a file or folder mentioned in the storage_path from the storage zone
Parameters
----------
storage_path : The directory path to your file (including file name) or folder which is to be deleted.
If this is the root of your storage zone, you can ignore this parameter.
"""
# Add code below
assert (
storage_path != ""
), "storage_path must be specified" # to make sure storage_path is not null
# to build correct url
if storage_path[0] == "/":
storage_path = storage_path[1:]
url = self.base_url + parse.quote(storage_path)
try:
response = requests.delete(url, headers=self.headers)
response.raise_for_status
except HTTPError as http:
return {
"status": "error",
"HTTP": response.raise_for_status(),
"msg": f"HTTP Error occured: {http}",
}
except Exception as err:
return {
"status": "error",
"HTTP": response.status_code,
"msg": f"Object Delete failed ,Error occured:{err}",
}
else:
return {
"status": "success",
"HTTP": response.status_code,
"msg": "Object Successfully Deleted",
}
def GetStoragedObjectsList(self, storage_path=None):
"""
This functions returns a list of files and directories located in given storage_path.
Parameters
----------
storage_path : The directory path that you want to list.
"""
# to build correct url
if storage_path is not None:
if storage_path[0] == "/":
storage_path = storage_path[1:]
if storage_path[-1] != "/":
url = self.base_url + parse.quote(storage_path) + "/"
else:
url = self.base_url
# Sending GET request
try:
response = requests.get(url, headers=self.headers)
response.raise_for_status()
except HTTPError as http:
return {
"status": "error",
"HTTP": response.status_code,
"msg": f"http error occured {http}",
}
else:
storage_list = []
for dictionary in response.json():
temp_dict = {}
for key in dictionary:
if key == "ObjectName" and dictionary["IsDirectory"] is False:
temp_dict["File_Name"] = dictionary[key]
if key == "ObjectName" and dictionary["IsDirectory"]:
temp_dict["Folder_Name"] = dictionary[key]
storage_list.append(temp_dict)
return storage_list
def MoveFile(self, old_path, new_path):
"""
Moves a file by downloading from the old path and uploading to the new path,
then deleting from the old path. Uses existing PutFile and DeleteFile methods.
Parameters
----------
old_path : str
The current path (relative to storage zone root) of the file to move.
new_path : str
The new path (relative to storage zone root) for the file.
Returns
-------
dict
A dictionary containing 'status', 'msg', and optionally 'HTTP'.
"""
# Validate arguments
if not old_path or not new_path:
return {
"status": "error",
"msg": "Both old_path and new_path must be provided."
}
# 1. Download from old_path to a temporary local directory
# If you already have the file locally, you can skip this download step.
download_response = self.DownloadFile(old_path, download_path="temp")
if download_response.get("status") != "success":
return {
"status": "error",
"msg": f"Failed to download file for moving. Reason: {download_response.get('msg', 'unknown')}",
"HTTP": download_response.get("HTTP")
}
# Extract the filename from old_path to know what we downloaded
filename = os.path.basename(old_path)
# 2. Upload to new_path using existing PutFile
# We'll assume new_path includes the desired filename. If it does not, adjust logic.
put_response = self.PutFile(
file_name=filename,
storage_path=new_path, # e.g. "folder/newfile.jpg"
local_upload_file_path="temp" # where we downloaded it
)
if put_response.get("status") != "success":
return {
"status": "error",
"msg": f"Failed to upload file to new path. Reason: {put_response.get('msg', 'unknown')}",
"HTTP": put_response.get("HTTP")
}
# 3. Delete the original file using existing DeleteFile
delete_response = self.DeleteFile(old_path)
if delete_response.get("status") != "success":
return {
"status": "error",
"msg": f"Failed to delete old file. Reason: {delete_response.get('msg', 'unknown')}",
"HTTP": delete_response.get("HTTP")
}
# (Optional) Clean up the local temp file
local_temp_path = os.path.join("temp", filename)
if os.path.exists(local_temp_path):
os.remove(local_temp_path)
return {
"status": "success",
"msg": f"File successfully moved from '{old_path}' to '{new_path}'."
}

@ -0,0 +1,24 @@
import config
db, cursor = config.gen_connection()
cursor.execute("SELECT DISTINCT username FROM media WHERE user_id IS NULL AND platform = 'instagram';")
usernames = [username[0] for username in cursor.fetchall()]
for username in usernames:
print(f"Username: {username}")
cursor.execute("SELECT DISTINCT user_id FROM media WHERE username = %s AND user_id IS NOT NULL;", [username])
possible_user_ids = [user_id for user_id, in cursor.fetchall()]
if len(possible_user_ids) == 0:
print(f"No user_id found for {username}")
continue
if len(possible_user_ids) > 1:
print(f"Multiple user_ids found for {username}: {possible_user_ids}")
continue
user_id = possible_user_ids[0]
cursor.execute("UPDATE media SET user_id = %s WHERE username = %s AND user_id IS NULL;", [user_id, username])
db.commit()
print(f"[{cursor.rowcount}] Updated user_id for {username}")

File diff suppressed because one or more lines are too long

@ -0,0 +1,78 @@
import os
import config
import logging
# Set up logging
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
handlers=[
logging.StreamHandler() # or use logging.FileHandler('script.log')
]
)
# Prepare database connection
db, cursor = config.gen_connection()
# Ensure local temp directory exists
TEMP_DIR = "temp"
os.makedirs(TEMP_DIR, exist_ok=True)
URL_PREFIX = "https://storysave.b-cdn.net/"
# Retrieve records from database
query = f"""
SELECT id, date, media_url, platform, username, hash
FROM media
WHERE media_url like '%none%';
"""
cursor.execute(query)
rows = cursor.fetchall()
# Initialize Bunny.net Storage (credentials redacted)
obj_storage = config.get_custom_storage()
count = 0
total = len(rows)
for row in rows:
count += 1
pin_id, date, media_url, platform, username, file_hash = row
logging.info(f"[{count}/{total}] Processing screenshot ID: {pin_id}")
serverPath = media_url.replace(URL_PREFIX, "").split("?")[0]
filename = os.path.basename(serverPath)
filename = filename.replace("none", file_hash).replace("None", file_hash)
filepath = os.path.join(TEMP_DIR, filename)
# 2. Create new path (based on date)
year = date.year
month = str(date.month).zfill(2)
day = str(date.day).zfill(2)
formatted_date = os.path.join(str(year), month, day)
# Extract the server path (remove domain and query)
newPath = os.path.join("media", "stories", username, filename)
new_media_url = f"{URL_PREFIX}{newPath}"
# 3. Move file to new path
logging.info(f"Moving screenshot from {serverPath} to {newPath}")
status = obj_storage.MoveFile(serverPath, newPath)
if status['status'] != 'success':
logging.info(f"Failed to move file {serverPath} to {newPath}. Error: {status['status']}")
continue
# 4. Update DB
logging.info(f"Updating DB record {pin_id} to new URL\n{new_media_url}\nhttps://altpins.com/pin/{pin_id}")
cursor.execute("UPDATE media SET media_url = %s WHERE id = %s", [new_media_url, pin_id])
db.commit()
logging.info(f"Successfully processed screenshot {pin_id}")
# Close the DB connection
cursor.close()
db.close()
logging.info("All done!")

@ -0,0 +1,20 @@
from storysave_api import get_hd_profile_picture
import config, funcs, os
db, cursor = config.gen_connection()
cursor.execute(f"SELECT DISTINCT username, user_id FROM media WHERE user_id IS NOT NULL AND username IN (SELECT username FROM following WHERE platform = 'instagram');")
usernames = cursor.fetchall()
for username, user_id in usernames:
profilepicurl = get_hd_profile_picture(user_id=user_id)
if not profilepicurl:
continue
filename = os.path.basename(profilepicurl).split('?')[0]
user_dir = os.path.join('media', 'instagram', 'profile', username)
filepath = os.path.join(user_dir, filename)
funcs.download_file(profilepicurl, filepath)
print(f"Downloaded profile picture for {username}.")

@ -0,0 +1,126 @@
import os
import json
from tqdm import tqdm
from funcs import get_files
from snapchat import get_stories, get_highlights, get_spotlight_metadata, get_username
# import config as altpinsConfig
import altpinsConfig
def get_data(filepath):
try:
with open(filepath, 'r', encoding='utf-8') as f:
return json.load(f)
except:
print(f"Error reading {filepath}")
return None
def process_story(story, username, story_type, db, cursor):
snap_urls = story.get('snapUrls', {})
media_url = snap_urls.get('mediaUrl', '').split('?')[0]
media_id = media_url.split('/')[-1].split('.')[0].split('?')[-1]
if media_id in existing_media_ids:
return False
media_url = f"https://cf-st.sc-cdn.net/d/{media_url.split('/')[-1]}"
media_preview_url = snap_urls.get('mediaPreviewUrl', '').get('value', '').split('?')[0]
media_preview_url = f"https://cf-st.sc-cdn.net/d/{media_preview_url.split('/')[-1]}"
timestamp = story.get('timestampInSec', {}).get('value', '')
media_type = story.get('snapMediaType')
snap_id = story.get('snapId', {}).get('value', '')
query = "INSERT IGNORE INTO snapchat_stories (snapId, mediaUrl, mediaPreviewUrl, timestampInSec, snapMediaType, storyType, username, media_id) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)"
cursor.execute(query, (snap_id, media_url, media_preview_url, timestamp, media_type, story_type, username, media_id))
db.commit()
existing_media_ids.add(media_id)
print_emoji = '' if cursor.rowcount else ''
print(f"{print_emoji} Inserted story {media_id}")
def process_json(json_path, db, cursor):
"""
Given a path to a JSON file, parse it and insert relevant data
into the database.
"""
# Load JSON data
data = get_data(json_path)
username = get_username(data)
ready_stories = []
# Insert stories (regular)
stories = get_stories(data)
for story in stories:
story['storyType'] = 'story'
ready_stories.append(story)
# Insert stories (highlights)
highlights = get_highlights(data)
highlight_stories = [story for highlight in highlights for story in highlight.get('snapList', [])]
highlight_stories.sort(key=lambda x: x.get('snapIndex'), reverse=True)
for story in highlight_stories:
story['storyType'] = 'highlight'
ready_stories.append(story)
for story in ready_stories:
story_type = story.get('storyType')
process_story(story, username, story_type, db, cursor)
# Insert spotlight metadata
spotlight_metadata = get_spotlight_metadata(data)
for story in spotlight_metadata:
try:
media_id = story['videoMetadata']['contentUrl'].split('/')[-1].split('.')[0].split('?')[-1]
deepLinkUrl = story['oneLinkParams']['deepLinkUrl'].split('?')[0]
except:
continue
if not all((media_id, deepLinkUrl)):
continue
if deepLinkUrl in existing_spotlights:
continue
deepLinkId = deepLinkUrl.split('/')[-1]
description = story['description']
insert_query = "INSERT IGNORE INTO snapchat_metadata (media_id, deepLinkUrl, description, username, deepLinkId) VALUES (%s, %s, %s, %s, %s)"
cursor.execute(insert_query, (media_id, deepLinkUrl, description, username, deepLinkId))
db.commit()
existing_spotlights.add(deepLinkUrl)
print_emoji = '' if cursor.rowcount else ''
print(f"{print_emoji} Inserted spotlight {media_id}")
os.remove(json_path)
db, cursor = altpinsConfig.gen_connection()
existing_media_ids = []
cursor.execute("SELECT media_id FROM snapchat_stories WHERE media_id != '';")
existing_media_ids = {row[0] for row in cursor.fetchall()}
existing_spotlights = []
cursor.execute("SELECT deepLinkUrl FROM snapchat_metadata;")
existing_spotlights = {row[0] for row in cursor.fetchall()}
data_dir = 'data'
files = [f for f in get_files(data_dir) if f.endswith('.json')]
# Wrap the file list with tqdm to show a progress bar
for filepath in tqdm(files, desc="Processing files", unit="file"):
process_json(filepath, db, cursor)
db.close()

@ -0,0 +1,66 @@
from snapchat import get_all_users_data, get_stories, get_highlight_stories, get_social_medias, get_related_profiles
import os, config
snapchat_directory = "snapchat"
media_directory = "media"
temp_directory = ".temp"
data_directory = "data"
directory = os.path.join(media_directory, snapchat_directory)
def get_snapchat_stories(usernames):
usernames = usernames[:5]
snapchat_users_data = get_all_users_data(usernames)
snapchat_users_data = dict(sorted(snapchat_users_data.items()))
ready_stories = []
for username, data in snapchat_users_data.items():
print(f"Getting stories for {username}...")
data = snapchat_users_data.get(username)
if not data:
print(f"Failed to get data for {username}. Skipping.")
continue
website_url = get_social_medias(data)
related_profiles = get_related_profiles(data)
stories = get_stories(data)
stories.extend(get_highlight_stories(data))
for story in stories:
snap_id = story['snap_id']
url = story['url']
timestamp = story['timestamp']
# Determine file extension
extension = '.jpg' if story['media_type'] == 'image' else '.mp4'
filename = f"{username}~{timestamp}~{snap_id}{extension}"
filepath = os.path.join(directory, filename)
story['media_url'] = url
story['snap_id'] = snap_id
story['filepath'] = filepath
story['username'] = username
story['timestamp'] = timestamp
story['original_snap_id'] = story['original_snap_id']
ready_stories.append(story)
# sort ready_stories by timestamp from oldest to newest
ready_stories.sort(key=lambda x: x['timestamp'])
return ready_stories
db, cursor = config.gen_connection()
cursor.execute("SELECT username FROM following WHERE platform = 'snapchat' ORDER BY id DESC")
usernames = [row[0] for row in cursor.fetchall()]
stories = get_snapchat_stories(usernames)

@ -0,0 +1,41 @@
import config
import requests
def is_url_accessible(url):
try:
response = requests.head(url, timeout=5) # HEAD request is usually faster and enough to check availability
return response.status_code == 200
except requests.RequestException:
return False
media_names = ['mediaUrl', 'mediaPreviewUrl']
db, cursor = config.gen_connection()
for media_type in media_names:
cursor.execute(f"SELECT id, {media_type} FROM snapchat_stories WHERE {media_type} NOT LIKE 'https://cf-st.sc-cdn.net/d/%' AND status != 'inactive'")
rows = cursor.fetchall()
total = len(rows)
count = 0
for row in rows:
count += 1
record_id, original_url = row
media_id = original_url.split('/')[-1]
new_url = f'https://cf-st.sc-cdn.net/d/{media_id}'
if is_url_accessible(new_url):
print(f"✅ [{count} / {total}] {new_url} is accessible (converted from {original_url})")
cursor.execute(f"UPDATE snapchat_stories SET {media_type} = %s, status = 'updated' WHERE id = %s", (new_url, record_id))
db.commit()
continue
print(f"❌ [{count} / {total}] {new_url} is NOT accessible (original: {original_url})")
cursor.execute("""UPDATE snapchat_stories SET status = 'inactive' WHERE id = %s""", (record_id,))
db.commit()
cursor.close()
db.close()

@ -0,0 +1,140 @@
from datetime import datetime
from uuid import uuid4
import funcs
import config
import cv2
import os
directory = 'processed_tiktoks'
def UploadMedia(media):
platform = 'TikTok'
username = media['username']
filepath = media['filepath']
file_size = os.path.getsize(filepath)
thumbnail_url = None
phash = None
filename = os.path.basename(filepath)
file_extension = os.path.splitext(filename)[1].lower()
media_type = funcs.get_media_type(filename)
if not media_type:
print(f'Error determining media type for {filename}. Skipping...')
return False
post_type = funcs.determine_post_type(filepath)
if not post_type:
print(f'Error determining post type for {filename}. Skipping...')
return False
file_hash = funcs.calculate_file_hash(filepath)
if file_hash in existing_hashes:
print(f'File {filename} already exists. Skipping...')
return False
post_date = datetime.now()
width, height = funcs.get_media_dimensions(filepath)
duration = funcs.get_video_duration(filepath)
if media_type == 'image':
phash = funcs.generate_phash(filepath)
elif media_type == 'video':
try:
thumb_path = generate_thumbnail(filepath)
obj_storage.PutFile(thumb_path, f'thumbnails/{file_hash}.jpg') # this might be a problem in case of duplicate hashes
thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{file_hash}.jpg"
phash = funcs.generate_phash(thumb_path)
os.remove(thumb_path)
except:
print('Error generating thumbnail. Skipping...')
return False
newFilename = f'{file_hash}{file_extension}'
server_path = f'media/tiktoks/{username}/{newFilename}'
file_url = f"https://storysave.b-cdn.net/{server_path}"
obj_storage.PutFile(filepath, server_path) # slow as fuck
post_type = 'story' if post_type == 'stories' else 'post'
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, post_type, date, hash, filename, duration, thumbnail, phash, platform, file_size) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
values = (username, media_type, file_url, width, height, post_type, post_date, file_hash, filename, duration, thumbnail_url, phash, platform, file_size)
newCursor.execute(query, values) # slower
newDB.commit()
print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}')
os.remove(filepath)
return True
def generate_thumbnail(filepath):
thumb_path = f'temp/{uuid4()}.jpg'
cap = cv2.VideoCapture(filepath)
ret, frame = cap.read()
cv2.imwrite(thumb_path, frame)
cap.release()
return thumb_path
def get_media_data(filepath):
filename = os.path.basename(filepath)
parts = filename.split('~')
if len(parts) == 3:
username, title, tiktok_id = parts
elif len(parts) == 2:
username, title = parts
tiktok_id = None
else:
return False
data = {'username': username, 'filepath': filepath, 'tiktok_id': tiktok_id, 'title': title}
return data
def get_media(folder_path):
medias = []
users = os.listdir(folder_path)
for user in users:
user_folder = os.path.join(folder_path, user)
if not os.path.isdir(user_folder):
print(f"Skipping {user}")
continue
files = os.listdir(user_folder)
for filename in files:
filepath = os.path.join(user_folder, filename)
data = get_media_data(filepath)
if data:
medias.append(data)
return medias
def dump_instagram(folder_path):
medias = get_media(folder_path)
for media in medias:
UploadMedia(media)
if __name__ == '__main__':
print('Starting processing...')
if not os.listdir(directory):
print('No files to process. Exiting...')
exit()
newDB, newCursor = config.gen_connection()
obj_storage = config.get_storage()
newCursor.execute("SELECT hash FROM media WHERE hash IS NOT NULL AND platform = 'TikTok'")
existing_hashes = [row[0] for row in newCursor.fetchall()]
dump_instagram(directory)
print("Processing completed.")

@ -0,0 +1,58 @@
from uuid import uuid4
import uuid
import os
def is_valid_uuid(uuid_to_test, version=4):
try:
uuid_obj = uuid.UUID(uuid_to_test, version=version)
except ValueError:
return False
return str(uuid_obj) == uuid_to_test
source_dir = 'tiktoks/'
processed_dir = 'processed_tiktoks'
os.makedirs(processed_dir, exist_ok=True)
users = os.listdir(source_dir)
for user in users:
user_dir = os.path.join(source_dir, user)
if not os.path.isdir(user_dir):
print(f"Skipping {user}")
continue
for file in os.listdir(user_dir):
filename = os.path.splitext(file)[0]
filepath = os.path.join(user_dir, file)
file_ext = os.path.splitext(file)[1]
tiktok_id = str(uuid4())
username = user
if is_valid_uuid(filename):
title = ''
tiktok_id = filename
elif 'masstik' in file or 'masstiktok' in file:
data = file.split('_')
title = filename.split('_')[-1]
else:
title = filename
print("="*100)
title = title.encode('utf-8', 'ignore').decode('utf-8')
print(f"Username: {username}\nTitle: {title}")
new_filename = f"{username}~{title}~{tiktok_id}{file_ext}"
new_filepath = os.path.join(processed_dir, username, new_filename)
os.makedirs(os.path.dirname(new_filepath), exist_ok=True)
if not os.path.exists(new_filepath):
os.rename(filepath, new_filepath)
print(f"Renamed {file} to {new_filepath}")
else:
print("File with the same name already exists. Renaming aborted.")
print("="*100)

@ -0,0 +1,124 @@
from selenium.webdriver.common.by import By
import undetected_chromedriver as uc
from bs4 import BeautifulSoup
import requests
import base64
import re
import os
def format_url(url):
clean_url = re.sub(r'%[0-9A-F]{2}', '', url)
return clean_url
def encode_offset(offset_num):
offset_base64 = str(offset_num).encode('utf-8')
offset_base64 = base64.b64encode(offset_base64).decode('utf-8')
return offset_base64
def get_clips(username):
url = 'https://gql.twitch.tv/gql'
offset_num = 20
offset_base64 = encode_offset(offset_num)
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36'
headers = {
'client-id': 'kimne78kx3ncx6brgo4mv6wki5h1ko',
'Content-Type': 'text/plain;charset=UTF-8',
'User-Agent': user_agent
}
data = {
"operationName":"ClipsCards__User",
"variables":{"login":username,"limit":100,},
"extensions":{"persistedQuery":{"version":1,"sha256Hash":"4eb8f85fc41a36c481d809e8e99b2a32127fdb7647c336d27743ec4a88c4ea44"}}
}
response = requests.post(url, headers=headers, json=data)
clips = response.json()
clips = clips['data']['user']['clips']['edges']
cleaned_clips = parse_clips(clips)
return cleaned_clips
def parse_clips(clips):
"""
clips is a list of dictionaries
"""
cleaned_clips = []
for clip in clips:
clip = clip['node']
clip_id = clip['id']
clip_url = clip['url']
clip_title = clip['title']
clip_view_count = clip['viewCount']
clip_duration = clip['durationSeconds']
cleaned_clip = {
'id': clip_id,
'url': clip_url,
'title': clip_title,
'views': clip_view_count,
'duration': clip_duration
}
cleaned_clips.append(cleaned_clip)
return cleaned_clips
def get_video_url(video_url, driver):
driver.get(video_url)
# Get the video element
video = driver.find_element(By.TAG_NAME, 'video')
# Get the video source
video_src = video.get_attribute('src')
return video_src
def download_video(video_url, filepath):
if os.path.exists(filepath):
return filepath
video = requests.get(video_url)
# Download in chunks
with open(filepath, 'wb') as f:
for chunk in video.iter_content(chunk_size=1024):
f.write(chunk)
return filepath
# Set up an undetected Chrome driver in headless mode
opts = uc.ChromeOptions()
opts.add_argument("--headless")
opts.add_argument("--window-size=1920,1080")
driver = uc.Chrome(use_subprocess=True, options=opts)
username = 'didicandy666'
clips = get_clips(username)
for clip in clips:
clip_url = clip['clip_url']
filename = f"{clip['id']}.mp4"
filepath = os.path.join('clips', filename)
if os.path.exists(filepath):
print(f"Already downloaded {filename}")
continue
video_url = get_video_url(clip_url, driver)
download_video(video_url, filepath)
print(f"Downloaded {filename}")
Loading…
Cancel
Save