update - new scripts
parent
48d2330193
commit
ad39eeaed1
@ -0,0 +1,424 @@
|
||||
import requests
|
||||
import hashlib
|
||||
|
||||
access_key = "ccd3f9d4-9e6f-4bd2-8f594402b5a7-3646-48fe"
|
||||
video_library_id = 106867
|
||||
|
||||
def create_video(title):
|
||||
url = f"https://video.bunnycdn.com/library/{video_library_id}/videos"
|
||||
|
||||
payload = f"{{\"title\":\"{title}\"}}"
|
||||
headers = {
|
||||
"accept": "application/json",
|
||||
"content-type": "application/*+json",
|
||||
"AccessKey": access_key
|
||||
}
|
||||
|
||||
response = requests.post(url, data=payload, headers=headers)
|
||||
|
||||
return response
|
||||
|
||||
def generate_signature(library_id, api_key, expiration_time, video_id):
|
||||
signature = hashlib.sha256((library_id + api_key + str(expiration_time) + video_id).encode()).hexdigest()
|
||||
return signature
|
||||
|
||||
def upload_video_process(file_path, video_id):
|
||||
url = f"https://video.bunnycdn.com/library/{video_library_id}/videos/{video_id}"
|
||||
|
||||
headers = {
|
||||
"accept": "application/json",
|
||||
"AccessKey": access_key
|
||||
}
|
||||
|
||||
with open(file_path, "rb") as file:
|
||||
file_data = file.read()
|
||||
|
||||
response = requests.put(url, headers=headers, data=file_data)
|
||||
|
||||
return response.status_code
|
||||
|
||||
def upload_video(file_path, title=None):
|
||||
video_item = create_video(title)
|
||||
if video_item.status_code != 200:
|
||||
return False
|
||||
|
||||
video_id = video_item.json()['guid']
|
||||
upload_video_process(file_path, video_id)
|
||||
|
||||
return {
|
||||
"embed_link": f"https://vz-58ca89f1-986.b-cdn.net/{video_id}/playlist.m3u8",
|
||||
"animated_thumbnail": f"https://vz-58ca89f1-986.b-cdn.net/{video_id}/preview.webp",
|
||||
"default_thumbnail": f"https://vz-58ca89f1-986.b-cdn.net/{video_id}/thumbnail.jpg",
|
||||
}
|
||||
|
||||
|
||||
def upload_video_recurbate(videoInfo):
|
||||
title = f"{videoInfo['username']} {videoInfo['platform']}"
|
||||
video_item = create_video(title)
|
||||
if video_item.status_code != 200:
|
||||
return False
|
||||
|
||||
video_id = video_item.json()['guid']
|
||||
upload_video_process(videoInfo['filename'], video_id)
|
||||
|
||||
videoInfo["embed_link"] = f"https://vz-58ca89f1-986.b-cdn.net/{video_id}/playlist.m3u8"
|
||||
videoInfo["animated_thumbnail"] = f"https://vz-58ca89f1-986.b-cdn.net/{video_id}/preview.webp"
|
||||
videoInfo["default_thumbnail"] = f"https://vz-58ca89f1-986.b-cdn.net/{video_id}/thumbnail.jpg"
|
||||
|
||||
return True
|
||||
|
||||
def delete_video(video_id):
|
||||
video_id = video_id.replace('https://vz-58ca89f1-986.b-cdn.net/', '').replace('/playlist.m3u8', '')
|
||||
|
||||
url = f"https://video.bunnycdn.com/library/{video_library_id}/videos/{video_id}"
|
||||
|
||||
headers = {
|
||||
"accept": "application/json",
|
||||
"AccessKey": access_key
|
||||
}
|
||||
|
||||
response = requests.delete(url, headers=headers)
|
||||
|
||||
return response.status_code
|
||||
|
||||
def list_videos():
|
||||
url = f"https://video.bunnycdn.com/library/{video_library_id}/videos?page=1&itemsPerPage=2147483647&orderBy=date"
|
||||
|
||||
headers = {
|
||||
"accept": "application/json",
|
||||
"AccessKey": access_key
|
||||
}
|
||||
|
||||
response = requests.get(url, headers=headers)
|
||||
|
||||
return response.json()['items']
|
||||
|
||||
def get_heatmap(video_id):
|
||||
url = "https://video.bunnycdn.com/library/libraryId/videos/videoId/heatmap"
|
||||
url = url.replace('libraryId', str(video_library_id)).replace('videoId', str(video_id))
|
||||
|
||||
headers = {
|
||||
"accept": "application/json",
|
||||
"AccessKey": access_key
|
||||
}
|
||||
|
||||
response = requests.get(url, headers=headers).json()
|
||||
|
||||
return response
|
||||
|
||||
def get_video(video_id):
|
||||
url = "https://video.bunnycdn.com/library/libraryId/videos/videoId"
|
||||
url = url.replace('libraryId', str(video_library_id)).replace('videoId', str(video_id))
|
||||
|
||||
headers = {
|
||||
"accept": "application/json",
|
||||
"AccessKey": access_key
|
||||
}
|
||||
|
||||
response = requests.get(url, headers=headers).json()
|
||||
|
||||
return response
|
||||
|
||||
|
||||
import os
|
||||
import requests
|
||||
from requests.exceptions import HTTPError
|
||||
from urllib import parse
|
||||
|
||||
class Storage:
|
||||
def __init__(self, api_key, storage_zone, storage_zone_region="de"):
|
||||
"""
|
||||
Creates an object for using BunnyCDN Storage API
|
||||
Parameters
|
||||
----------
|
||||
api_key : String
|
||||
Your bunnycdn storage
|
||||
Apikey/FTP password of
|
||||
storage zone
|
||||
|
||||
storage_zone : String
|
||||
Name of your storage zone
|
||||
|
||||
storage_zone_region(optional parameter) : String
|
||||
The storage zone region code
|
||||
as per BunnyCDN
|
||||
"""
|
||||
self.headers = {
|
||||
# headers to be passed in HTTP requests
|
||||
"AccessKey": api_key,
|
||||
"Content-Type": "application/json",
|
||||
"Accept": "applcation/json",
|
||||
}
|
||||
|
||||
# applying constraint that storage_zone must be specified
|
||||
assert storage_zone != "", "storage_zone is not specified/missing"
|
||||
|
||||
# For generating base_url for sending requests
|
||||
if storage_zone_region == "de" or storage_zone_region == "":
|
||||
self.base_url = "https://storage.bunnycdn.com/" + storage_zone + "/"
|
||||
else:
|
||||
self.base_url = (
|
||||
"https://"
|
||||
+ storage_zone_region
|
||||
+ ".storage.bunnycdn.com/"
|
||||
+ storage_zone
|
||||
+ "/"
|
||||
)
|
||||
|
||||
def DownloadFile(self, storage_path, download_path=os.getcwd()):
|
||||
"""
|
||||
This function will get the files and subfolders of storage zone mentioned in path
|
||||
and download it to the download_path location mentioned
|
||||
Parameters
|
||||
----------
|
||||
storage_path : String
|
||||
The path of the directory
|
||||
(including file name and excluding storage zone name)
|
||||
from which files are to be retrieved
|
||||
download_path : String
|
||||
The directory on local server to which downloaded file must be saved
|
||||
Note:For download_path instead of '\' '\\' should be used example: C:\\Users\\XYZ\\OneDrive
|
||||
"""
|
||||
|
||||
assert (
|
||||
storage_path != ""
|
||||
), "storage_path must be specified" # to make sure storage_path is not null
|
||||
# to build correct url
|
||||
if storage_path[0] == "/":
|
||||
storage_path = storage_path[1:]
|
||||
if storage_path[-1] == "/":
|
||||
storage_path = storage_path[:-1]
|
||||
url = self.base_url + parse.quote(storage_path)
|
||||
file_name = url.split("/")[-1] # For storing file name
|
||||
|
||||
# to return appropriate help messages if file is present or not and download file if present
|
||||
try:
|
||||
response = requests.get(url, headers=self.headers, stream=True)
|
||||
response.raise_for_status()
|
||||
except HTTPError as http:
|
||||
return {
|
||||
"status": "error",
|
||||
"HTTP": response.status_code,
|
||||
"msg": f"Http error occured {http}",
|
||||
}
|
||||
except Exception as err:
|
||||
return {
|
||||
"status": "error",
|
||||
"HTTP": response.status_code,
|
||||
"msg": f"error occured {err}",
|
||||
}
|
||||
else:
|
||||
download_path = os.path.join(download_path, file_name)
|
||||
# Downloading file
|
||||
with open(download_path, "wb") as file:
|
||||
|
||||
for chunk in response.iter_content(chunk_size=1024):
|
||||
if chunk:
|
||||
file.write(chunk)
|
||||
return {
|
||||
"status": "success",
|
||||
"HTTP": response.status_code,
|
||||
"msg": "File downloaded Successfully",
|
||||
}
|
||||
|
||||
def PutFile(
|
||||
self,
|
||||
file_name,
|
||||
storage_path=None,
|
||||
local_upload_file_path=os.getcwd(),
|
||||
):
|
||||
|
||||
"""
|
||||
This function uploads files to your BunnyCDN storage zone
|
||||
Parameters
|
||||
----------
|
||||
storage_path : String
|
||||
The path of directory in storage zone
|
||||
(including the name of file as desired and excluding storage zone name)
|
||||
to which file is to be uploaded
|
||||
file_name : String
|
||||
The name of the file as stored in local server
|
||||
local_upload_file_path : String
|
||||
The path of file as stored in local server(excluding file name)
|
||||
from where file is to be uploaded
|
||||
Examples
|
||||
--------
|
||||
file_name : 'ABC.txt'
|
||||
local_upload_file_path : 'C:\\User\\Sample_Directory'
|
||||
storage_path : '<Directory name in storage zone>/<file name as to be uploaded on storage zone>.txt'
|
||||
#Here .txt because the file being uploaded in example is txt
|
||||
"""
|
||||
local_upload_file_path = os.path.join(local_upload_file_path, file_name)
|
||||
|
||||
# to build correct url
|
||||
if storage_path is not None and storage_path != "":
|
||||
if storage_path[0] == "/":
|
||||
storage_path = storage_path[1:]
|
||||
if storage_path[-1] == "/":
|
||||
storage_path = storage_path[:-1]
|
||||
url = self.base_url + parse.quote(storage_path)
|
||||
else:
|
||||
url = self.base_url + parse.quote(file_name)
|
||||
with open(local_upload_file_path, "rb") as file:
|
||||
file_data = file.read()
|
||||
response = requests.put(url, data=file_data, headers=self.headers)
|
||||
try:
|
||||
response.raise_for_status()
|
||||
except HTTPError as http:
|
||||
return {
|
||||
"status": "error",
|
||||
"HTTP": response.status_code,
|
||||
"msg": f"Upload Failed HTTP Error Occured: {http}",
|
||||
}
|
||||
else:
|
||||
return {
|
||||
"status": "success",
|
||||
"HTTP": response.status_code,
|
||||
"msg": "The File Upload was Successful",
|
||||
}
|
||||
|
||||
def DeleteFile(self, storage_path=""):
|
||||
"""
|
||||
This function deletes a file or folder mentioned in the storage_path from the storage zone
|
||||
Parameters
|
||||
----------
|
||||
storage_path : The directory path to your file (including file name) or folder which is to be deleted.
|
||||
If this is the root of your storage zone, you can ignore this parameter.
|
||||
"""
|
||||
# Add code below
|
||||
assert (
|
||||
storage_path != ""
|
||||
), "storage_path must be specified" # to make sure storage_path is not null
|
||||
# to build correct url
|
||||
if storage_path[0] == "/":
|
||||
storage_path = storage_path[1:]
|
||||
url = self.base_url + parse.quote(storage_path)
|
||||
|
||||
try:
|
||||
response = requests.delete(url, headers=self.headers)
|
||||
response.raise_for_status
|
||||
except HTTPError as http:
|
||||
return {
|
||||
"status": "error",
|
||||
"HTTP": response.raise_for_status(),
|
||||
"msg": f"HTTP Error occured: {http}",
|
||||
}
|
||||
except Exception as err:
|
||||
return {
|
||||
"status": "error",
|
||||
"HTTP": response.status_code,
|
||||
"msg": f"Object Delete failed ,Error occured:{err}",
|
||||
}
|
||||
else:
|
||||
return {
|
||||
"status": "success",
|
||||
"HTTP": response.status_code,
|
||||
"msg": "Object Successfully Deleted",
|
||||
}
|
||||
|
||||
def GetStoragedObjectsList(self, storage_path=None):
|
||||
"""
|
||||
This functions returns a list of files and directories located in given storage_path.
|
||||
Parameters
|
||||
----------
|
||||
storage_path : The directory path that you want to list.
|
||||
"""
|
||||
# to build correct url
|
||||
if storage_path is not None:
|
||||
if storage_path[0] == "/":
|
||||
storage_path = storage_path[1:]
|
||||
if storage_path[-1] != "/":
|
||||
url = self.base_url + parse.quote(storage_path) + "/"
|
||||
else:
|
||||
url = self.base_url
|
||||
# Sending GET request
|
||||
try:
|
||||
response = requests.get(url, headers=self.headers)
|
||||
response.raise_for_status()
|
||||
except HTTPError as http:
|
||||
return {
|
||||
"status": "error",
|
||||
"HTTP": response.status_code,
|
||||
"msg": f"http error occured {http}",
|
||||
}
|
||||
else:
|
||||
storage_list = []
|
||||
for dictionary in response.json():
|
||||
temp_dict = {}
|
||||
for key in dictionary:
|
||||
if key == "ObjectName" and dictionary["IsDirectory"] is False:
|
||||
temp_dict["File_Name"] = dictionary[key]
|
||||
if key == "ObjectName" and dictionary["IsDirectory"]:
|
||||
temp_dict["Folder_Name"] = dictionary[key]
|
||||
storage_list.append(temp_dict)
|
||||
return storage_list
|
||||
|
||||
def MoveFile(self, old_path, new_path):
|
||||
"""
|
||||
Moves a file by downloading from the old path and uploading to the new path,
|
||||
then deleting from the old path. Uses existing PutFile and DeleteFile methods.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
old_path : str
|
||||
The current path (relative to storage zone root) of the file to move.
|
||||
new_path : str
|
||||
The new path (relative to storage zone root) for the file.
|
||||
|
||||
Returns
|
||||
-------
|
||||
dict
|
||||
A dictionary containing 'status', 'msg', and optionally 'HTTP'.
|
||||
"""
|
||||
# Validate arguments
|
||||
if not old_path or not new_path:
|
||||
return {
|
||||
"status": "error",
|
||||
"msg": "Both old_path and new_path must be provided."
|
||||
}
|
||||
|
||||
# 1. Download from old_path to a temporary local directory
|
||||
# If you already have the file locally, you can skip this download step.
|
||||
download_response = self.DownloadFile(old_path, download_path="temp")
|
||||
if download_response.get("status") != "success":
|
||||
return {
|
||||
"status": "error",
|
||||
"msg": f"Failed to download file for moving. Reason: {download_response.get('msg', 'unknown')}",
|
||||
"HTTP": download_response.get("HTTP")
|
||||
}
|
||||
|
||||
# Extract the filename from old_path to know what we downloaded
|
||||
filename = os.path.basename(old_path)
|
||||
|
||||
# 2. Upload to new_path using existing PutFile
|
||||
# We'll assume new_path includes the desired filename. If it does not, adjust logic.
|
||||
put_response = self.PutFile(
|
||||
file_name=filename,
|
||||
storage_path=new_path, # e.g. "folder/newfile.jpg"
|
||||
local_upload_file_path="temp" # where we downloaded it
|
||||
)
|
||||
if put_response.get("status") != "success":
|
||||
return {
|
||||
"status": "error",
|
||||
"msg": f"Failed to upload file to new path. Reason: {put_response.get('msg', 'unknown')}",
|
||||
"HTTP": put_response.get("HTTP")
|
||||
}
|
||||
|
||||
# 3. Delete the original file using existing DeleteFile
|
||||
delete_response = self.DeleteFile(old_path)
|
||||
if delete_response.get("status") != "success":
|
||||
return {
|
||||
"status": "error",
|
||||
"msg": f"Failed to delete old file. Reason: {delete_response.get('msg', 'unknown')}",
|
||||
"HTTP": delete_response.get("HTTP")
|
||||
}
|
||||
|
||||
# (Optional) Clean up the local temp file
|
||||
local_temp_path = os.path.join("temp", filename)
|
||||
if os.path.exists(local_temp_path):
|
||||
os.remove(local_temp_path)
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"msg": f"File successfully moved from '{old_path}' to '{new_path}'."
|
||||
}
|
||||
@ -0,0 +1,24 @@
|
||||
import config
|
||||
|
||||
db, cursor = config.gen_connection()
|
||||
|
||||
cursor.execute("SELECT DISTINCT username FROM media WHERE user_id IS NULL AND platform = 'instagram';")
|
||||
usernames = [username[0] for username in cursor.fetchall()]
|
||||
|
||||
for username in usernames:
|
||||
print(f"Username: {username}")
|
||||
|
||||
cursor.execute("SELECT DISTINCT user_id FROM media WHERE username = %s AND user_id IS NOT NULL;", [username])
|
||||
possible_user_ids = [user_id for user_id, in cursor.fetchall()]
|
||||
if len(possible_user_ids) == 0:
|
||||
print(f"No user_id found for {username}")
|
||||
continue
|
||||
|
||||
if len(possible_user_ids) > 1:
|
||||
print(f"Multiple user_ids found for {username}: {possible_user_ids}")
|
||||
continue
|
||||
|
||||
user_id = possible_user_ids[0]
|
||||
cursor.execute("UPDATE media SET user_id = %s WHERE username = %s AND user_id IS NULL;", [user_id, username])
|
||||
db.commit()
|
||||
print(f"[{cursor.rowcount}] Updated user_id for {username}")
|
||||
File diff suppressed because one or more lines are too long
@ -0,0 +1,78 @@
|
||||
import os
|
||||
import config
|
||||
import logging
|
||||
|
||||
# Set up logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [%(levelname)s] %(message)s",
|
||||
handlers=[
|
||||
logging.StreamHandler() # or use logging.FileHandler('script.log')
|
||||
]
|
||||
)
|
||||
|
||||
# Prepare database connection
|
||||
db, cursor = config.gen_connection()
|
||||
|
||||
# Ensure local temp directory exists
|
||||
TEMP_DIR = "temp"
|
||||
os.makedirs(TEMP_DIR, exist_ok=True)
|
||||
|
||||
URL_PREFIX = "https://storysave.b-cdn.net/"
|
||||
|
||||
# Retrieve records from database
|
||||
query = f"""
|
||||
SELECT id, date, media_url, platform, username, hash
|
||||
FROM media
|
||||
WHERE media_url like '%none%';
|
||||
"""
|
||||
cursor.execute(query)
|
||||
rows = cursor.fetchall()
|
||||
|
||||
# Initialize Bunny.net Storage (credentials redacted)
|
||||
obj_storage = config.get_custom_storage()
|
||||
|
||||
count = 0
|
||||
total = len(rows)
|
||||
for row in rows:
|
||||
count += 1
|
||||
pin_id, date, media_url, platform, username, file_hash = row
|
||||
logging.info(f"[{count}/{total}] Processing screenshot ID: {pin_id}")
|
||||
|
||||
serverPath = media_url.replace(URL_PREFIX, "").split("?")[0]
|
||||
|
||||
filename = os.path.basename(serverPath)
|
||||
filename = filename.replace("none", file_hash).replace("None", file_hash)
|
||||
|
||||
filepath = os.path.join(TEMP_DIR, filename)
|
||||
|
||||
# 2. Create new path (based on date)
|
||||
year = date.year
|
||||
month = str(date.month).zfill(2)
|
||||
day = str(date.day).zfill(2)
|
||||
formatted_date = os.path.join(str(year), month, day)
|
||||
|
||||
# Extract the server path (remove domain and query)
|
||||
newPath = os.path.join("media", "stories", username, filename)
|
||||
new_media_url = f"{URL_PREFIX}{newPath}"
|
||||
|
||||
# 3. Move file to new path
|
||||
logging.info(f"Moving screenshot from {serverPath} to {newPath}")
|
||||
status = obj_storage.MoveFile(serverPath, newPath)
|
||||
|
||||
if status['status'] != 'success':
|
||||
logging.info(f"Failed to move file {serverPath} to {newPath}. Error: {status['status']}")
|
||||
continue
|
||||
|
||||
# 4. Update DB
|
||||
logging.info(f"Updating DB record {pin_id} to new URL\n{new_media_url}\nhttps://altpins.com/pin/{pin_id}")
|
||||
cursor.execute("UPDATE media SET media_url = %s WHERE id = %s", [new_media_url, pin_id])
|
||||
db.commit()
|
||||
|
||||
logging.info(f"Successfully processed screenshot {pin_id}")
|
||||
|
||||
|
||||
# Close the DB connection
|
||||
cursor.close()
|
||||
db.close()
|
||||
logging.info("All done!")
|
||||
@ -0,0 +1,20 @@
|
||||
from storysave_api import get_hd_profile_picture
|
||||
import config, funcs, os
|
||||
|
||||
|
||||
db, cursor = config.gen_connection()
|
||||
|
||||
cursor.execute(f"SELECT DISTINCT username, user_id FROM media WHERE user_id IS NOT NULL AND username IN (SELECT username FROM following WHERE platform = 'instagram');")
|
||||
usernames = cursor.fetchall()
|
||||
|
||||
for username, user_id in usernames:
|
||||
profilepicurl = get_hd_profile_picture(user_id=user_id)
|
||||
if not profilepicurl:
|
||||
continue
|
||||
|
||||
filename = os.path.basename(profilepicurl).split('?')[0]
|
||||
user_dir = os.path.join('media', 'instagram', 'profile', username)
|
||||
filepath = os.path.join(user_dir, filename)
|
||||
|
||||
funcs.download_file(profilepicurl, filepath)
|
||||
print(f"Downloaded profile picture for {username}.")
|
||||
@ -0,0 +1,126 @@
|
||||
import os
|
||||
import json
|
||||
from tqdm import tqdm
|
||||
|
||||
from funcs import get_files
|
||||
from snapchat import get_stories, get_highlights, get_spotlight_metadata, get_username
|
||||
|
||||
# import config as altpinsConfig
|
||||
import altpinsConfig
|
||||
|
||||
def get_data(filepath):
|
||||
try:
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
return json.load(f)
|
||||
except:
|
||||
print(f"Error reading {filepath}")
|
||||
return None
|
||||
|
||||
def process_story(story, username, story_type, db, cursor):
|
||||
snap_urls = story.get('snapUrls', {})
|
||||
media_url = snap_urls.get('mediaUrl', '').split('?')[0]
|
||||
media_id = media_url.split('/')[-1].split('.')[0].split('?')[-1]
|
||||
|
||||
if media_id in existing_media_ids:
|
||||
return False
|
||||
|
||||
media_url = f"https://cf-st.sc-cdn.net/d/{media_url.split('/')[-1]}"
|
||||
|
||||
media_preview_url = snap_urls.get('mediaPreviewUrl', '').get('value', '').split('?')[0]
|
||||
media_preview_url = f"https://cf-st.sc-cdn.net/d/{media_preview_url.split('/')[-1]}"
|
||||
|
||||
|
||||
timestamp = story.get('timestampInSec', {}).get('value', '')
|
||||
media_type = story.get('snapMediaType')
|
||||
snap_id = story.get('snapId', {}).get('value', '')
|
||||
|
||||
|
||||
query = "INSERT IGNORE INTO snapchat_stories (snapId, mediaUrl, mediaPreviewUrl, timestampInSec, snapMediaType, storyType, username, media_id) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)"
|
||||
cursor.execute(query, (snap_id, media_url, media_preview_url, timestamp, media_type, story_type, username, media_id))
|
||||
db.commit()
|
||||
|
||||
existing_media_ids.add(media_id)
|
||||
|
||||
print_emoji = '✅' if cursor.rowcount else '❌'
|
||||
print(f"{print_emoji} Inserted story {media_id}")
|
||||
|
||||
def process_json(json_path, db, cursor):
|
||||
"""
|
||||
Given a path to a JSON file, parse it and insert relevant data
|
||||
into the database.
|
||||
"""
|
||||
|
||||
# Load JSON data
|
||||
data = get_data(json_path)
|
||||
username = get_username(data)
|
||||
|
||||
ready_stories = []
|
||||
|
||||
# Insert stories (regular)
|
||||
stories = get_stories(data)
|
||||
for story in stories:
|
||||
story['storyType'] = 'story'
|
||||
ready_stories.append(story)
|
||||
|
||||
# Insert stories (highlights)
|
||||
highlights = get_highlights(data)
|
||||
highlight_stories = [story for highlight in highlights for story in highlight.get('snapList', [])]
|
||||
highlight_stories.sort(key=lambda x: x.get('snapIndex'), reverse=True)
|
||||
for story in highlight_stories:
|
||||
story['storyType'] = 'highlight'
|
||||
ready_stories.append(story)
|
||||
|
||||
|
||||
for story in ready_stories:
|
||||
story_type = story.get('storyType')
|
||||
process_story(story, username, story_type, db, cursor)
|
||||
|
||||
|
||||
# Insert spotlight metadata
|
||||
spotlight_metadata = get_spotlight_metadata(data)
|
||||
for story in spotlight_metadata:
|
||||
try:
|
||||
media_id = story['videoMetadata']['contentUrl'].split('/')[-1].split('.')[0].split('?')[-1]
|
||||
deepLinkUrl = story['oneLinkParams']['deepLinkUrl'].split('?')[0]
|
||||
except:
|
||||
continue
|
||||
|
||||
if not all((media_id, deepLinkUrl)):
|
||||
continue
|
||||
|
||||
if deepLinkUrl in existing_spotlights:
|
||||
continue
|
||||
|
||||
deepLinkId = deepLinkUrl.split('/')[-1]
|
||||
description = story['description']
|
||||
|
||||
insert_query = "INSERT IGNORE INTO snapchat_metadata (media_id, deepLinkUrl, description, username, deepLinkId) VALUES (%s, %s, %s, %s, %s)"
|
||||
cursor.execute(insert_query, (media_id, deepLinkUrl, description, username, deepLinkId))
|
||||
db.commit()
|
||||
|
||||
existing_spotlights.add(deepLinkUrl)
|
||||
|
||||
print_emoji = '✅' if cursor.rowcount else '❌'
|
||||
print(f"{print_emoji} Inserted spotlight {media_id}")
|
||||
|
||||
os.remove(json_path)
|
||||
|
||||
|
||||
db, cursor = altpinsConfig.gen_connection()
|
||||
|
||||
existing_media_ids = []
|
||||
cursor.execute("SELECT media_id FROM snapchat_stories WHERE media_id != '';")
|
||||
existing_media_ids = {row[0] for row in cursor.fetchall()}
|
||||
|
||||
existing_spotlights = []
|
||||
cursor.execute("SELECT deepLinkUrl FROM snapchat_metadata;")
|
||||
existing_spotlights = {row[0] for row in cursor.fetchall()}
|
||||
|
||||
data_dir = 'data'
|
||||
files = [f for f in get_files(data_dir) if f.endswith('.json')]
|
||||
|
||||
# Wrap the file list with tqdm to show a progress bar
|
||||
for filepath in tqdm(files, desc="Processing files", unit="file"):
|
||||
process_json(filepath, db, cursor)
|
||||
|
||||
db.close()
|
||||
@ -0,0 +1,66 @@
|
||||
from snapchat import get_all_users_data, get_stories, get_highlight_stories, get_social_medias, get_related_profiles
|
||||
import os, config
|
||||
|
||||
snapchat_directory = "snapchat"
|
||||
media_directory = "media"
|
||||
temp_directory = ".temp"
|
||||
data_directory = "data"
|
||||
|
||||
directory = os.path.join(media_directory, snapchat_directory)
|
||||
|
||||
def get_snapchat_stories(usernames):
|
||||
usernames = usernames[:5]
|
||||
snapchat_users_data = get_all_users_data(usernames)
|
||||
snapchat_users_data = dict(sorted(snapchat_users_data.items()))
|
||||
|
||||
ready_stories = []
|
||||
|
||||
for username, data in snapchat_users_data.items():
|
||||
print(f"Getting stories for {username}...")
|
||||
|
||||
data = snapchat_users_data.get(username)
|
||||
if not data:
|
||||
print(f"Failed to get data for {username}. Skipping.")
|
||||
continue
|
||||
|
||||
website_url = get_social_medias(data)
|
||||
|
||||
related_profiles = get_related_profiles(data)
|
||||
|
||||
stories = get_stories(data)
|
||||
|
||||
stories.extend(get_highlight_stories(data))
|
||||
|
||||
for story in stories:
|
||||
snap_id = story['snap_id']
|
||||
url = story['url']
|
||||
timestamp = story['timestamp']
|
||||
|
||||
# Determine file extension
|
||||
extension = '.jpg' if story['media_type'] == 'image' else '.mp4'
|
||||
|
||||
filename = f"{username}~{timestamp}~{snap_id}{extension}"
|
||||
filepath = os.path.join(directory, filename)
|
||||
|
||||
story['media_url'] = url
|
||||
story['snap_id'] = snap_id
|
||||
story['filepath'] = filepath
|
||||
story['username'] = username
|
||||
story['timestamp'] = timestamp
|
||||
story['original_snap_id'] = story['original_snap_id']
|
||||
|
||||
ready_stories.append(story)
|
||||
|
||||
# sort ready_stories by timestamp from oldest to newest
|
||||
ready_stories.sort(key=lambda x: x['timestamp'])
|
||||
|
||||
return ready_stories
|
||||
|
||||
db, cursor = config.gen_connection()
|
||||
|
||||
cursor.execute("SELECT username FROM following WHERE platform = 'snapchat' ORDER BY id DESC")
|
||||
usernames = [row[0] for row in cursor.fetchall()]
|
||||
|
||||
stories = get_snapchat_stories(usernames)
|
||||
|
||||
|
||||
@ -0,0 +1,41 @@
|
||||
import config
|
||||
import requests
|
||||
|
||||
def is_url_accessible(url):
|
||||
try:
|
||||
response = requests.head(url, timeout=5) # HEAD request is usually faster and enough to check availability
|
||||
return response.status_code == 200
|
||||
except requests.RequestException:
|
||||
return False
|
||||
|
||||
media_names = ['mediaUrl', 'mediaPreviewUrl']
|
||||
|
||||
db, cursor = config.gen_connection()
|
||||
|
||||
for media_type in media_names:
|
||||
cursor.execute(f"SELECT id, {media_type} FROM snapchat_stories WHERE {media_type} NOT LIKE 'https://cf-st.sc-cdn.net/d/%' AND status != 'inactive'")
|
||||
|
||||
rows = cursor.fetchall()
|
||||
total = len(rows)
|
||||
count = 0
|
||||
|
||||
for row in rows:
|
||||
count += 1
|
||||
record_id, original_url = row
|
||||
|
||||
media_id = original_url.split('/')[-1]
|
||||
new_url = f'https://cf-st.sc-cdn.net/d/{media_id}'
|
||||
|
||||
if is_url_accessible(new_url):
|
||||
print(f"✅ [{count} / {total}] {new_url} is accessible (converted from {original_url})")
|
||||
|
||||
cursor.execute(f"UPDATE snapchat_stories SET {media_type} = %s, status = 'updated' WHERE id = %s", (new_url, record_id))
|
||||
db.commit()
|
||||
continue
|
||||
|
||||
print(f"❌ [{count} / {total}] {new_url} is NOT accessible (original: {original_url})")
|
||||
cursor.execute("""UPDATE snapchat_stories SET status = 'inactive' WHERE id = %s""", (record_id,))
|
||||
db.commit()
|
||||
|
||||
cursor.close()
|
||||
db.close()
|
||||
@ -0,0 +1,140 @@
|
||||
from datetime import datetime
|
||||
from uuid import uuid4
|
||||
import funcs
|
||||
import config
|
||||
import cv2
|
||||
import os
|
||||
|
||||
directory = 'processed_tiktoks'
|
||||
|
||||
def UploadMedia(media):
|
||||
platform = 'TikTok'
|
||||
username = media['username']
|
||||
filepath = media['filepath']
|
||||
file_size = os.path.getsize(filepath)
|
||||
thumbnail_url = None
|
||||
phash = None
|
||||
|
||||
filename = os.path.basename(filepath)
|
||||
file_extension = os.path.splitext(filename)[1].lower()
|
||||
|
||||
media_type = funcs.get_media_type(filename)
|
||||
if not media_type:
|
||||
print(f'Error determining media type for {filename}. Skipping...')
|
||||
return False
|
||||
|
||||
post_type = funcs.determine_post_type(filepath)
|
||||
if not post_type:
|
||||
print(f'Error determining post type for {filename}. Skipping...')
|
||||
return False
|
||||
|
||||
file_hash = funcs.calculate_file_hash(filepath)
|
||||
if file_hash in existing_hashes:
|
||||
print(f'File {filename} already exists. Skipping...')
|
||||
return False
|
||||
|
||||
post_date = datetime.now()
|
||||
|
||||
width, height = funcs.get_media_dimensions(filepath)
|
||||
|
||||
duration = funcs.get_video_duration(filepath)
|
||||
|
||||
if media_type == 'image':
|
||||
phash = funcs.generate_phash(filepath)
|
||||
elif media_type == 'video':
|
||||
try:
|
||||
thumb_path = generate_thumbnail(filepath)
|
||||
obj_storage.PutFile(thumb_path, f'thumbnails/{file_hash}.jpg') # this might be a problem in case of duplicate hashes
|
||||
thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{file_hash}.jpg"
|
||||
phash = funcs.generate_phash(thumb_path)
|
||||
os.remove(thumb_path)
|
||||
except:
|
||||
print('Error generating thumbnail. Skipping...')
|
||||
return False
|
||||
|
||||
newFilename = f'{file_hash}{file_extension}'
|
||||
server_path = f'media/tiktoks/{username}/{newFilename}'
|
||||
|
||||
file_url = f"https://storysave.b-cdn.net/{server_path}"
|
||||
|
||||
obj_storage.PutFile(filepath, server_path) # slow as fuck
|
||||
|
||||
post_type = 'story' if post_type == 'stories' else 'post'
|
||||
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, post_type, date, hash, filename, duration, thumbnail, phash, platform, file_size) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
|
||||
values = (username, media_type, file_url, width, height, post_type, post_date, file_hash, filename, duration, thumbnail_url, phash, platform, file_size)
|
||||
|
||||
newCursor.execute(query, values) # slower
|
||||
newDB.commit()
|
||||
print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}')
|
||||
|
||||
os.remove(filepath)
|
||||
|
||||
return True
|
||||
|
||||
def generate_thumbnail(filepath):
|
||||
thumb_path = f'temp/{uuid4()}.jpg'
|
||||
cap = cv2.VideoCapture(filepath)
|
||||
ret, frame = cap.read()
|
||||
cv2.imwrite(thumb_path, frame)
|
||||
cap.release()
|
||||
return thumb_path
|
||||
|
||||
def get_media_data(filepath):
|
||||
filename = os.path.basename(filepath)
|
||||
parts = filename.split('~')
|
||||
|
||||
if len(parts) == 3:
|
||||
username, title, tiktok_id = parts
|
||||
elif len(parts) == 2:
|
||||
username, title = parts
|
||||
tiktok_id = None
|
||||
else:
|
||||
return False
|
||||
|
||||
data = {'username': username, 'filepath': filepath, 'tiktok_id': tiktok_id, 'title': title}
|
||||
|
||||
return data
|
||||
|
||||
def get_media(folder_path):
|
||||
medias = []
|
||||
|
||||
users = os.listdir(folder_path)
|
||||
for user in users:
|
||||
user_folder = os.path.join(folder_path, user)
|
||||
if not os.path.isdir(user_folder):
|
||||
print(f"Skipping {user}")
|
||||
continue
|
||||
|
||||
files = os.listdir(user_folder)
|
||||
for filename in files:
|
||||
filepath = os.path.join(user_folder, filename)
|
||||
|
||||
data = get_media_data(filepath)
|
||||
if data:
|
||||
medias.append(data)
|
||||
|
||||
return medias
|
||||
|
||||
def dump_instagram(folder_path):
|
||||
medias = get_media(folder_path)
|
||||
|
||||
for media in medias:
|
||||
UploadMedia(media)
|
||||
|
||||
if __name__ == '__main__':
|
||||
print('Starting processing...')
|
||||
|
||||
if not os.listdir(directory):
|
||||
print('No files to process. Exiting...')
|
||||
exit()
|
||||
|
||||
newDB, newCursor = config.gen_connection()
|
||||
|
||||
obj_storage = config.get_storage()
|
||||
|
||||
newCursor.execute("SELECT hash FROM media WHERE hash IS NOT NULL AND platform = 'TikTok'")
|
||||
existing_hashes = [row[0] for row in newCursor.fetchall()]
|
||||
|
||||
dump_instagram(directory)
|
||||
|
||||
print("Processing completed.")
|
||||
@ -0,0 +1,58 @@
|
||||
from uuid import uuid4
|
||||
import uuid
|
||||
import os
|
||||
|
||||
def is_valid_uuid(uuid_to_test, version=4):
|
||||
try:
|
||||
uuid_obj = uuid.UUID(uuid_to_test, version=version)
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
return str(uuid_obj) == uuid_to_test
|
||||
|
||||
source_dir = 'tiktoks/'
|
||||
processed_dir = 'processed_tiktoks'
|
||||
|
||||
os.makedirs(processed_dir, exist_ok=True)
|
||||
|
||||
users = os.listdir(source_dir)
|
||||
|
||||
for user in users:
|
||||
user_dir = os.path.join(source_dir, user)
|
||||
if not os.path.isdir(user_dir):
|
||||
print(f"Skipping {user}")
|
||||
continue
|
||||
|
||||
for file in os.listdir(user_dir):
|
||||
filename = os.path.splitext(file)[0]
|
||||
filepath = os.path.join(user_dir, file)
|
||||
file_ext = os.path.splitext(file)[1]
|
||||
|
||||
tiktok_id = str(uuid4())
|
||||
username = user
|
||||
|
||||
if is_valid_uuid(filename):
|
||||
title = ''
|
||||
tiktok_id = filename
|
||||
elif 'masstik' in file or 'masstiktok' in file:
|
||||
data = file.split('_')
|
||||
title = filename.split('_')[-1]
|
||||
else:
|
||||
title = filename
|
||||
|
||||
|
||||
print("="*100)
|
||||
title = title.encode('utf-8', 'ignore').decode('utf-8')
|
||||
print(f"Username: {username}\nTitle: {title}")
|
||||
|
||||
new_filename = f"{username}~{title}~{tiktok_id}{file_ext}"
|
||||
new_filepath = os.path.join(processed_dir, username, new_filename)
|
||||
|
||||
os.makedirs(os.path.dirname(new_filepath), exist_ok=True)
|
||||
if not os.path.exists(new_filepath):
|
||||
os.rename(filepath, new_filepath)
|
||||
print(f"Renamed {file} to {new_filepath}")
|
||||
else:
|
||||
print("File with the same name already exists. Renaming aborted.")
|
||||
|
||||
print("="*100)
|
||||
@ -0,0 +1,124 @@
|
||||
from selenium.webdriver.common.by import By
|
||||
import undetected_chromedriver as uc
|
||||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
import base64
|
||||
import re
|
||||
import os
|
||||
|
||||
def format_url(url):
|
||||
clean_url = re.sub(r'%[0-9A-F]{2}', '', url)
|
||||
return clean_url
|
||||
|
||||
def encode_offset(offset_num):
|
||||
offset_base64 = str(offset_num).encode('utf-8')
|
||||
offset_base64 = base64.b64encode(offset_base64).decode('utf-8')
|
||||
return offset_base64
|
||||
|
||||
def get_clips(username):
|
||||
url = 'https://gql.twitch.tv/gql'
|
||||
|
||||
offset_num = 20
|
||||
offset_base64 = encode_offset(offset_num)
|
||||
|
||||
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36'
|
||||
|
||||
headers = {
|
||||
'client-id': 'kimne78kx3ncx6brgo4mv6wki5h1ko',
|
||||
'Content-Type': 'text/plain;charset=UTF-8',
|
||||
'User-Agent': user_agent
|
||||
}
|
||||
|
||||
data = {
|
||||
"operationName":"ClipsCards__User",
|
||||
"variables":{"login":username,"limit":100,},
|
||||
"extensions":{"persistedQuery":{"version":1,"sha256Hash":"4eb8f85fc41a36c481d809e8e99b2a32127fdb7647c336d27743ec4a88c4ea44"}}
|
||||
}
|
||||
|
||||
response = requests.post(url, headers=headers, json=data)
|
||||
|
||||
clips = response.json()
|
||||
|
||||
clips = clips['data']['user']['clips']['edges']
|
||||
|
||||
cleaned_clips = parse_clips(clips)
|
||||
|
||||
return cleaned_clips
|
||||
|
||||
|
||||
def parse_clips(clips):
|
||||
"""
|
||||
clips is a list of dictionaries
|
||||
"""
|
||||
|
||||
cleaned_clips = []
|
||||
for clip in clips:
|
||||
clip = clip['node']
|
||||
|
||||
clip_id = clip['id']
|
||||
clip_url = clip['url']
|
||||
clip_title = clip['title']
|
||||
clip_view_count = clip['viewCount']
|
||||
clip_duration = clip['durationSeconds']
|
||||
|
||||
cleaned_clip = {
|
||||
'id': clip_id,
|
||||
'url': clip_url,
|
||||
'title': clip_title,
|
||||
'views': clip_view_count,
|
||||
'duration': clip_duration
|
||||
}
|
||||
|
||||
cleaned_clips.append(cleaned_clip)
|
||||
|
||||
return cleaned_clips
|
||||
|
||||
def get_video_url(video_url, driver):
|
||||
driver.get(video_url)
|
||||
|
||||
# Get the video element
|
||||
video = driver.find_element(By.TAG_NAME, 'video')
|
||||
|
||||
# Get the video source
|
||||
video_src = video.get_attribute('src')
|
||||
|
||||
return video_src
|
||||
|
||||
def download_video(video_url, filepath):
|
||||
if os.path.exists(filepath):
|
||||
return filepath
|
||||
|
||||
video = requests.get(video_url)
|
||||
|
||||
# Download in chunks
|
||||
with open(filepath, 'wb') as f:
|
||||
for chunk in video.iter_content(chunk_size=1024):
|
||||
f.write(chunk)
|
||||
|
||||
return filepath
|
||||
|
||||
|
||||
# Set up an undetected Chrome driver in headless mode
|
||||
opts = uc.ChromeOptions()
|
||||
opts.add_argument("--headless")
|
||||
opts.add_argument("--window-size=1920,1080")
|
||||
|
||||
driver = uc.Chrome(use_subprocess=True, options=opts)
|
||||
|
||||
username = 'didicandy666'
|
||||
clips = get_clips(username)
|
||||
|
||||
for clip in clips:
|
||||
clip_url = clip['clip_url']
|
||||
|
||||
filename = f"{clip['id']}.mp4"
|
||||
filepath = os.path.join('clips', filename)
|
||||
|
||||
if os.path.exists(filepath):
|
||||
print(f"Already downloaded {filename}")
|
||||
continue
|
||||
|
||||
video_url = get_video_url(clip_url, driver)
|
||||
|
||||
download_video(video_url, filepath)
|
||||
print(f"Downloaded {filename}")
|
||||
Loading…
Reference in New Issue