update
parent
93c9d660f9
commit
322e39b51f
@ -0,0 +1,109 @@
|
||||
import os, config, funcs, cv2, imagehash
|
||||
from PIL import Image
|
||||
|
||||
directory = "old_snapchats"
|
||||
duplicate_dir = 'dupelicate_snaps'
|
||||
|
||||
|
||||
def generate_video_phash(filepath):
|
||||
try:
|
||||
cap = cv2.VideoCapture(filepath)
|
||||
ret, frame = cap.read()
|
||||
cap.release()
|
||||
if not ret:
|
||||
return None
|
||||
phash = imagehash.phash(Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)))
|
||||
return str(phash)
|
||||
except:
|
||||
return None
|
||||
|
||||
def get_snapchat_files():
|
||||
stories = funcs.get_files(directory)
|
||||
stories = [get_media_data(filepath) for filepath in stories]
|
||||
stories = [story for story in stories if story]
|
||||
return stories
|
||||
|
||||
def get_media_data(filepath):
|
||||
filename = os.path.basename(filepath)
|
||||
parts = filename.split('~')
|
||||
if len(parts) < 3:
|
||||
return False
|
||||
|
||||
username = parts[0]
|
||||
timestamp = parts[1]
|
||||
snap_id = parts[2]
|
||||
snap_id = os.path.splitext(snap_id)[0]
|
||||
|
||||
# data = {'username': username, 'timestamp': timestamp, 'filepath': filepath, 'snap_id': snap_id, 'original_snap_id': None}
|
||||
data = {'username': username, 'timestamp': timestamp, 'filepath': filepath, 'snap_id': None, 'original_snap_id': snap_id}
|
||||
|
||||
return data
|
||||
|
||||
def process_snap_ids(filenames):
|
||||
snap_ids = []
|
||||
for filename in filenames:
|
||||
snap_id = filename.split('~')[2]
|
||||
snap_id = os.path.splitext(snap_id)[0]
|
||||
if snap_id not in snap_ids:
|
||||
snap_ids.append(snap_id)
|
||||
|
||||
return snap_ids
|
||||
|
||||
def find_duplicate_snap(existing_snaps, current_snap):
|
||||
filepath = current_snap['filepath']
|
||||
original_snap_id = current_snap['original_snap_id']
|
||||
username = current_snap['username']
|
||||
|
||||
snap_hash = funcs.calculate_file_hash(current_snap['filepath'])
|
||||
if filepath.endswith('.mp4'):
|
||||
phash = generate_video_phash(current_snap['filepath'])
|
||||
elif filepath.endswith('.jpg'):
|
||||
phash = funcs.generate_phash(current_snap['filepath'])
|
||||
|
||||
for snap in existing_snaps:
|
||||
if username != snap[2]:
|
||||
continue
|
||||
|
||||
if original_snap_id in snap[1]:
|
||||
return snap
|
||||
if original_snap_id == snap[5]:
|
||||
return snap
|
||||
if snap_hash == snap[3]:
|
||||
return snap
|
||||
if phash == snap[4]:
|
||||
return snap
|
||||
|
||||
return False
|
||||
|
||||
if __name__ == '__main__':
|
||||
print('Starting snappy...')
|
||||
|
||||
db, cursor = config.gen_connection()
|
||||
obj_storage = config.get_storage()
|
||||
|
||||
stories_from_files = get_snapchat_files()
|
||||
|
||||
# this script will check if there are any duplicates in old_snapchats folder in the database in table media where platform = 'snapchat'
|
||||
cursor.execute("SELECT id, filename, username, hash, phash, original_snap_id FROM media WHERE filename IS NOT NULL AND platform = 'snapchat'")
|
||||
existing_medias = cursor.fetchall()
|
||||
|
||||
snap_files = get_snapchat_files()
|
||||
|
||||
os.makedirs(duplicate_dir, exist_ok=True)
|
||||
|
||||
for story in snap_files:
|
||||
print(f"Processing {story['username']}...")
|
||||
snap_id = story['snap_id']
|
||||
original_snap_id = story['original_snap_id']
|
||||
username = story['username']
|
||||
|
||||
# check if the snap_id is already in the database
|
||||
existing_snap = find_duplicate_snap(existing_medias, story)
|
||||
|
||||
if existing_snap:
|
||||
print(f"Snap {original_snap_id} already exists in the database.")
|
||||
new_filename = os.path.basename(story['filepath'])
|
||||
new_filepath = os.path.join(duplicate_dir, new_filename)
|
||||
os.rename(story['filepath'], new_filepath)
|
||||
|
||||
print("Processing completed.")
|
||||
Loading…
Reference in New Issue