|
|
|
|
@ -17,6 +17,11 @@ media_types = {
|
|
|
|
|
'profile' : 'profile'
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for media_type, _ in media_types.items():
|
|
|
|
|
os.makedirs(os.path.join(directory, media_type), exist_ok=True)
|
|
|
|
|
|
|
|
|
|
existing_media_ids = {}
|
|
|
|
|
|
|
|
|
|
UPLOAD_CUSTOM = False
|
|
|
|
|
CACHE_FILE = os.path.join(temp_directory, 'existing_media_ids.json')
|
|
|
|
|
CACHE_TTL = timedelta(hours=48)
|
|
|
|
|
@ -217,6 +222,12 @@ def get_custom_media(failed_medias):
|
|
|
|
|
|
|
|
|
|
return medias
|
|
|
|
|
|
|
|
|
|
def save_highlight_data(highlights):
|
|
|
|
|
filename = f'{uuid4()}.json'
|
|
|
|
|
filepath = os.path.join('highlight_data', filename)
|
|
|
|
|
with open(filepath, 'w') as f:
|
|
|
|
|
json.dump(highlights, f)
|
|
|
|
|
|
|
|
|
|
def dump_instagram():
|
|
|
|
|
medias, failed_medias = get_media()
|
|
|
|
|
medias = clean_dupes(medias)
|
|
|
|
|
@ -224,20 +235,42 @@ def dump_instagram():
|
|
|
|
|
|
|
|
|
|
medias.sort(key=lambda x: (x['username'].lower(), x['timestamp']))
|
|
|
|
|
|
|
|
|
|
# Update new user ids and existing user ids
|
|
|
|
|
new_user_ids = {}
|
|
|
|
|
for media in medias:
|
|
|
|
|
user_id = media['user_id']
|
|
|
|
|
username = media['username']
|
|
|
|
|
|
|
|
|
|
if not media['user_id']:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
if username in existing_users:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
existing_users[username] = user_id
|
|
|
|
|
new_user_ids[username] = user_id
|
|
|
|
|
|
|
|
|
|
# Assign user ids
|
|
|
|
|
for media in medias:
|
|
|
|
|
if media['user_id']:
|
|
|
|
|
user_id = media['user_id']
|
|
|
|
|
username = media['username']
|
|
|
|
|
if username not in existing_users:
|
|
|
|
|
existing_users[username] = user_id
|
|
|
|
|
new_user_ids[username] = user_id
|
|
|
|
|
continue
|
|
|
|
|
if media['username'] in new_user_ids:
|
|
|
|
|
media['user_id'] = new_user_ids[media['username']]
|
|
|
|
|
|
|
|
|
|
highlights = []
|
|
|
|
|
for media in medias:
|
|
|
|
|
user_id = media['user_id']
|
|
|
|
|
username = media['username']
|
|
|
|
|
if user_id is None and username in new_user_ids:
|
|
|
|
|
media['user_id'] = new_user_ids[username]
|
|
|
|
|
if not media['highlight_id']:
|
|
|
|
|
continue
|
|
|
|
|
highlights.append({
|
|
|
|
|
"media_id": media["media_id"],
|
|
|
|
|
"user_id": media["user_id"],
|
|
|
|
|
"highlight_id": media['highlight_id'],
|
|
|
|
|
"username": media['username'],
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
# save highlights data into folder highlight_Data
|
|
|
|
|
if highlights:
|
|
|
|
|
save_highlight_data(highlights)
|
|
|
|
|
|
|
|
|
|
for media in medias:
|
|
|
|
|
pinid = UploadMedia(media)
|
|
|
|
|
@ -258,8 +291,13 @@ def clean_dupes(medias):
|
|
|
|
|
print(f'Invalid media_id for file {filepath}. Skipping...')
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# Check if media_id is in existing_media_ids OR if filepath contains any '(number)'
|
|
|
|
|
if media_id in existing_media_ids or re.search(r'\(\d+\)', filepath):
|
|
|
|
|
if media_id in existing_media_ids:
|
|
|
|
|
removed_count += 1
|
|
|
|
|
print(f'Found duplicate file {filepath}. Removing...')
|
|
|
|
|
os.remove(filepath)
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
if re.search(r'\(\d+\)', filepath):
|
|
|
|
|
removed_count += 1
|
|
|
|
|
print(f'Found duplicate file {filepath}. Removing...')
|
|
|
|
|
os.remove(filepath)
|
|
|
|
|
@ -339,5 +377,5 @@ if __name__ == '__main__':
|
|
|
|
|
|
|
|
|
|
print("Processing completed.")
|
|
|
|
|
|
|
|
|
|
for mediatype, _ in media_types.items():
|
|
|
|
|
funcs.clean_empty_folders(os.path.join(directory, mediatype))
|
|
|
|
|
# for mediatype, _ in media_types.items():
|
|
|
|
|
# funcs.clean_empty_folders(os.path.join(directory, mediatype))
|