From 322e39b51f1ea25da4b9f6d370457be5a1cc91ec Mon Sep 17 00:00:00 2001
From: oscar <oscar@MacBook-Pro.local>
Date: Sun, 1 Dec 2024 17:02:09 +0200
Subject: [PATCH] update

---
 .DS_Store                                     | Bin 20484 -> 18436 bytes
 .gitignore                                    |   1 +
 .../dedupe_phash.py                           |   0
 .../dupecleaner_phash.py                      |   0
 .../find_by_phash.py                          |   0
 .../find_duplicates_by_phash.py               |   0
 .../find_duplicates_by_phash_videos.py        |   0
 .../image_dupe_cleaner.py                     |   0
 dedupe_scripts/snappy_duplicates.py           | 109 ++++++++++++++++++
 funcs.py                                      |   3 -
 .../organize_tiktoks.py                       |   0
 snappy.py => old/snappy.py                    |  26 ++++-
 update_snap_id.py => old/update_snap_id.py    |   0
 snapchat.py                                   |   2 +-
 snappy_master.py                              |  86 +++-----------
 storysave_dump_tiktok.py                      |   2 +-
 storysave_dump_tiktok_process.py              |   4 -
 storysave_dump_unknown.py                     |  10 +-
 18 files changed, 163 insertions(+), 80 deletions(-)
 rename dedupe_phash.py => dedupe_scripts/dedupe_phash.py (100%)
 rename dupecleaner_phash.py => dedupe_scripts/dupecleaner_phash.py (100%)
 rename find_by_phash.py => dedupe_scripts/find_by_phash.py (100%)
 rename find_duplicates_by_phash.py => dedupe_scripts/find_duplicates_by_phash.py (100%)
 rename find_duplicates_by_phash_videos.py => dedupe_scripts/find_duplicates_by_phash_videos.py (100%)
 rename image_dupe_cleaner.py => dedupe_scripts/image_dupe_cleaner.py (100%)
 create mode 100644 dedupe_scripts/snappy_duplicates.py
 rename organize_tiktoks.py => old/organize_tiktoks.py (100%)
 rename snappy.py => old/snappy.py (87%)
 rename update_snap_id.py => old/update_snap_id.py (100%)
diff --git a/.DS_Store b/.DS_Store
index de03c36febe233a79b284312f8a5bdffebfedd1f..dd8492d8676faccc4e9406f245ecfd15b240093d 100644
GIT binary patch
delta 1908
zcmbVNUrbYH6hF83a^+7M7p2f%aa<_27y<jwY(xnKVa$YKGmRRyP$;(|v85d?GKUub
z#mxC4o)<M{(JV$2HF3EyaoLM8nNMnXRv+Ah*~6xh#4KSM)AJQ<?@gOx*5vm4zI(p&
zeZO;lzkANf%iPJcTrB`_U6ZOCz)#a+de|tL>2>jJt#{2KS_$+5Kn+bL;-N?^H2cj}
zboB{ECT7BpW(z(Nr=>REk!EGu{fWobthl`%HwNt9$|Bk#LIPqi0Z9mhs;#OOgrARA
zm`XS@(XY5nt1PY-G>7>G&(&+Ur7ui)+L8Y_G*#1GYszq=e}nebRxigV#=gVZ_EBb#
zBBSn|Be7_F>0GOTZ!I#uw3LeCwyDScDO|KTvQt@#@7HfvfVNw;1Jzwz-bl39HTSu}
zqW}8obRiu|6yniXcQ;E23!~L{`annOL8sBK7PK26h!RS25*Js0n+ssx>&1{<g9|mS
z+MHeDaFb2s_TnFngX!0M)|T^c&P-uYmhsryG5&7=Hm6+wYvO!pR&0Y+J*hOZRv3jb
zNa~Sa4_q1OJ+h2EW6vVbXEz}KIDjXVH)wIx;b#s1<e=9AcEuz>(DyqYwKr7huioOc
z@4ru1GXp+c-^D}lELJ+ARlQ#ZT$739L>hBH$!8qGIN>;#b2w93i2y1L6Dh-}_<h=~
zZVy+wi<A*ADY5$l_^PK3=h_8)zPZts*{K(cJn+JSLr$xB^Ttp8+f$fw%I~K?I%Qry
zKZn0M2iAQ=w~&Hi65=!lN!25y>M>d+A%Q8!sl4E33cBfG0PMI#@fO^#PhQv4=-0n3
z@m9cR1`^R&mdp=xf`ox!3UMW12X{`4M`Ginw2yt*Rhc|#bxjJNn;E9);~VeI9=ysm
zpt;4mHAEE#cw#yNsy@oQwqGL4QD&^!aF(n%)M7IocHwl3jAuOYI*E7*lOj|&L_F<F
zSD#>-w+PLL@t*j7E+P_A{R)nt>c7BpeA&2nW<?hx#a61NR7?xvGbUi+S!`3^QenyC
zH$wq3L8b#^j@jH4b~O4K=~f8i${^E}V`)bcpA^;Rui2N~o3YZr4R3YZu;<gixFH;F
z_n_Jq*5*5#sEYMo#5*G9<)YlsHuQNH=@?e|DkPb|R#uPAQkcK}9JC)c+{shBGeWzv
zPGn(Ge=DQEp7%WtK%rjeGE*1&lgEm_Hr5fDhFN!GBO{D0SZn+ohx#ty?vAGa5i-tq
G+TQ`tYT~;9

literal 20484
zcmeHPZ;Vt|6+d@(fms&VVgJAaYPTgsQYh{Mg%UI@TPjdgR+dsIe`aTA-Y(Ofoj1(P
zvMeigKd5O-YpOA6s-c=_Tj~d>BvzvK0~i(41c-^6Xl*}``o*^Pi>QePf9Jk8Z_d0w
zGqcE2z<J4g_sx6foOka%@BHpL=iNI*q`F7qokUxRD1@EM@GPRbnMCa<JFw?BzR$a7
ztHl+C{X5vFfuAT&5z11EqLlVfS1(k1pxOhsQV-OiGHk<j*k_YT$s+rW(Lva8j<Si^
z;dFn}?1!<x(4S5wvbm0qw^dDT-L&a5LiIBnX3d_{7@phM)Y#nI(mF5PwjdI2?vKWX
z1~cYJD*m9EG&9}F5hD_A%pNpHx5ms=PsTW$Fh+Ohj3c?;#8KR8axv^M;s*+sc8;Y%
z(^9ygg>KFmMr!Y1CK2C0FpxEJyT$pQefn8BV|Z(7z=V8$-u84ZVWzUXj7%2go@gc$
zO${2|WBZfQej~X()otc-=J4(WYRI+H;(F83SRysJc*BR5=IR%BEw#?VwAeZUI=1A`
z>{9KCM{~x;c-)ACq0hRyxNDnzJ48!pJ$;^b(GVS_Z_o*Pm0qVe>34L6-lKmirD{}z
znyu!k`D&?JsaB~^Tbwlvx4bR1iu_SMgfl=ea5@jaw)*-<v$pc+Cwb-lUfIf<5o!qS
z?}bu&bJ5&LwmX{H%fEJ-X0ktj9_`<Su0!uXXr^+}M9Ro$hD6)|$-YMuskk}X&5F)y
z-d^iQs6JF53C|xN?^xdcz?x&r+s9XS+`nddduQjmb;pj?hguhZYW<GLP;xkBK4mpR
z<(oD;-zd9$8s&7oXYr0^@J1RG7tQ+VO>fr1@$t^JE1i~E=bfuox?A>))q=p5J&RE?
zZ)FjTng)%Ec1^LmP&d2BLk(6vtH){&RC}P>15OVxKD1-!vdb<ftWrY<4(vkkeRd)A
zhI7OL`dYhPcG=~GRcco7XjYUKtFl83WwGOWJCmdAvdampwAe{m?4;}`D?36_`sv_n
z*W{$Stg?En_CU1<ygjf4m!`vMbUdr%U}ta49M<vs^p~3FxbF{}n>Ja|0EPar=M~yC
z`m2pzns`~7^g;2QQ<u4BMfEt`n@J_j)F7_2$%j$LoUY{<&Z2Y}-cm&884|Pte$?33
zq#3j8#Srj5*gm+XH~b=+>&qXf5gG=!pM;+S-JTN?=9iQZfgJdylt7()q^Z5G`m~pk
zmV=}r-Qpp%GY!Ajgs*6TE9)npUPCC?Bx<{$jQkP!n?u-58pYA4#`FbO`&)b^W>Kps
zB(V3I@sp$Ob;+6OBPX+YM^DE{HkW~>>hNS!%OmooE!MR9D&(KET7K;s_<eb9o?ktc
zVUTarM0oz&d?@rzIGm61oB+M4d2Y+x*4BCR+gcYaT(r<XNS871REFoA42WgSWU|N1
zCO9guAYk|K>@FL^dvgv>An!!uegXaBu4`vI7&gj8_GzXes)zHkj5fL=`k&DS`ZN8F
z{-tK9u)0SrQ>)cy)CRRheNpXDF*U4GYSa?o#PuJye&>(s2+n-9E@ArVe{4hA)}Kzo
zb?)6W9jCvf;o4W`uvviZa<me@kEeUM!oD(HFh1MQ_<dpHILvyj05m@U&j1>A*z6|(
z`k>kdA)!vE4gmKq067J^PYM8W=mcoo&PfJ_R8E?p<pL|dmB)%|Di6*@iB#Usl$kP+
z#j|bQ!425_WU_eosTFE1n+e`)JBIOO@w#x+(Ur7ts(JSnc~4sPrGMYs^JB}Z9bRe4
ze)pyI>J+eeuFGN@;3B1w8a6S^$5C=h7(>0whL#oF4?PG6a0!b#n}7dqS~jw6e0=4)
zd^$2PX~*eCY=KET;1UJ2PHIdY8+<mHQtB_h>dIdqw4CSwxl)_usikh!6Md*eP<>sR
zX3FYT?SX0!{2%oI4@q@!p9T?c<|Keo^SY18dlIs6J+}Usb|f8M2~vLdB`ndR4?#k6
z^3T<SN)GX3g8DJs@Bou_4KVM3ev9R#nK7Ktba)e}JIApZGF&`}xoL%P0H=Y6^9Ct8
zEL+-qap){4#a-Ff_1`?1&v6t{6kKJ03fbP4C!fOu0K*X-%a7@pnZNvS4kIG#0%dT<
zBY;xY>vKOk`qb4it%E@7LLJLf%lg_m+(!L%%D|Cx5D6N<(V!zZ<6KPE6|+Zm285^#
zmrF+%&|0aZGTg#}xV>HSGVPq4qdvcF{zn_m-+-^}*Bh#PBlMlA=rB9%Iw-Y*b9U4>
zQxD$W937|U>HEl+{gN&rJpYUS+&)t^t5)?1b(iW;_p3E(1Dk6ZN4<;&rB&CRXhD!!
z&N8HRUokJ;y}`2XGS88?TRE4s_PQ5e<HnYGj>}oB2P$xuZUAU72we4KU~ASurnKxj
zgby4%MDG|Ic3LELJ@3fAV`#Z^Pipn#r9aibkq_1E-jMq9yDv30?+h!dAls#rM)WY}
zpgCaUktMBX@MLlH)6WkeN$RJ?;fx{7GoULgr&~r2eHn6I6r*fdzY{4)i(oV#D_0NB
z)EDtD68u(Ml6c2B%dGu^%A_hDS7fd3nY7dhe1(MWQ4`~xPY-fVzvvmQ;u$<ba*es>
zWhH4_k+ls$5@+aR{!gx9-AVdZJZBis8`OF)yaC0U^l1ZYPt6h9Yt)j`+Mmjy=G^a=
zr{c%WUs)vg40s9#biM*d)ZR7L^EVJlJxzNI`kRT1650!*6zDO<Gi0VRlP{Vs!Goxc
zzh{fP5<bFboX>4V6a_cWpF%<iQ9B+WpWp(KTBsfq0gDyFFKCfMY^pm$8Fpie6mhic
z1iHz-BbD&_X(t|V?iLi_y<61z<2vrK`CJ`eVg}}&(arqcRkvHL2bQ8fCEh$~6Fc8O
z^uyBB(oUAV_1(M99o6w^3Hj3dsf<I~(<A%axjRe9ciC8owqsmMVuboE#<dh>g=O8|
z@ua6^QgbzL4z{76JTy{Y55K+cKlfQ_Xdk))!j+<vx~aL`!MgFta%rSldm4jU8wpH?
zX1`U=eem_X*#9XSj;6ua_4PTlfAfU!Z)Si}%%up(VYsM@2SYu#q=ZcIPTj^HimMAm
zLgIY>%T0+{2AF2h+O}Z9qK`H$;h8aTLuVOyGZxO$fy!nX1f?zt9~;ZgD_Br8uVAZs
zOhr_min8U2^aUq!#iky|50`!=T+n0B@9|Pdu(*8Sy$h=?uyDzdfF4XGfr9q-aot>U
zTre@W*lYn499|XyMSv++z<J9U?5fqPv8uplB|#-?Br0g=4K-rtWxsvr33)A8&VLd{
zoaQ%_$Zx7p2_xQSaoI-vF!dr6GfH2lZ_~?k7Wv0NVzJLvRflCha&pjQjJN`wOT#{Q
z^>PF1Q~{SX_7@&``O2$Dv?o^)XTevP9H#_@^LOh=se<RZfT9T*lNf&s(isTOeVKp>
zUX^Oi9!=nS(3gvfcyAf72%N!C30PFr)a!zag#pX|q-EW{h~a>q7dix7*oHqDxNznd
zO?EC}9xf6V*f?{G{Kou>v@QXIE}mjJ74b>PIB@FFsjpf7L4~{p(3mW*gblhh!QD{N
zGhM*J^9!x%)d@gG3XowZ=n+^vj(fP~3IjOtEfeUTwU+@N3D@wJ*?8rur{dRR_)QA~
zoPDpQ+yUk<(abS?`0bod!B%EMq4j;TPm{u_DE1S2X&)V;3H_DwG@YZ1SQPwsx~7){
zv!AdWUntkAF0~Pph#ytED{}Td8ZI*O+6Or4?4gkcJ^I#}UVD<E4_(1{OVJe>YHb&{
z-XqIplssM7`ZtOEI@thY)axr`Oy1zb>G?*feuF%M<eKH-vL_h3Oql_~K>Y*hT~NW_
zxFl2r^4&7t#s9QiJFs~ISbkm2BIv&x@PWtt9}qae;y^@Y03-f<0Fcy&caOJnOBB4R
zgt#))fW;i>R+zXSsBVcewR4|*qZF+~oA3SN{8!Ffx#K|DGSN^8y+ohdRvpj<%66ep
zy@Lus5Wg?MH7~hvWKIzWyApxx%xW+LuLy&RR>8S&gum8)AhMa3a)@}+as{!HFu(KQ
zm|?MR;h69t3|g-L|LNlE)vekC)gHLBdqCA~>fO|hHs;r*xSPvs?c1^UVdstMCahAk
z;9xJu*Y`rW#WX#3*~cYsxHe&xnhn1#wwv}R0IB$O{FM-o&{MZou{!>1UwX7U{_BNP
Lu2t|;H~#+@|08FZ

diff --git a/.gitignore b/.gitignore
index fd9dd52..5a4bdc2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,7 @@
 # python files
 *.pyc
 __pycache__
+*.DS_Store
 
 # Content
 storysaver
diff --git a/dedupe_phash.py b/dedupe_scripts/dedupe_phash.py
similarity index 100%
rename from dedupe_phash.py
rename to dedupe_scripts/dedupe_phash.py
diff --git a/dupecleaner_phash.py b/dedupe_scripts/dupecleaner_phash.py
similarity index 100%
rename from dupecleaner_phash.py
rename to dedupe_scripts/dupecleaner_phash.py
diff --git a/find_by_phash.py b/dedupe_scripts/find_by_phash.py
similarity index 100%
rename from find_by_phash.py
rename to dedupe_scripts/find_by_phash.py
diff --git a/find_duplicates_by_phash.py b/dedupe_scripts/find_duplicates_by_phash.py
similarity index 100%
rename from find_duplicates_by_phash.py
rename to dedupe_scripts/find_duplicates_by_phash.py
diff --git a/find_duplicates_by_phash_videos.py b/dedupe_scripts/find_duplicates_by_phash_videos.py
similarity index 100%
rename from find_duplicates_by_phash_videos.py
rename to dedupe_scripts/find_duplicates_by_phash_videos.py
diff --git a/image_dupe_cleaner.py b/dedupe_scripts/image_dupe_cleaner.py
similarity index 100%
rename from image_dupe_cleaner.py
rename to dedupe_scripts/image_dupe_cleaner.py
diff --git a/dedupe_scripts/snappy_duplicates.py b/dedupe_scripts/snappy_duplicates.py
new file mode 100644
index 0000000..0b7138c
--- /dev/null
+++ b/dedupe_scripts/snappy_duplicates.py
@@ -0,0 +1,109 @@
+import os, config, funcs, cv2, imagehash
+from PIL import Image
+
+directory = "old_snapchats"
+duplicate_dir = 'dupelicate_snaps'
+
+
+def generate_video_phash(filepath):
+	try:
+		cap = cv2.VideoCapture(filepath)
+		ret, frame = cap.read()
+		cap.release()
+		if not ret:
+			return None
+		phash = imagehash.phash(Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)))
+		return str(phash)
+	except:
+		return None
+
+def get_snapchat_files():
+	stories = funcs.get_files(directory)
+	stories = [get_media_data(filepath) for filepath in stories]
+	stories = [story for story in stories if story]
+	return stories
+
+def get_media_data(filepath):
+    filename = os.path.basename(filepath)
+    parts = filename.split('~')
+    if len(parts) < 3:
+        return False
+
+    username = parts[0]
+    timestamp = parts[1]
+    snap_id = parts[2]
+    snap_id = os.path.splitext(snap_id)[0]
+
+    # data = {'username': username, 'timestamp': timestamp, 'filepath': filepath, 'snap_id': snap_id, 'original_snap_id': None}
+    data = {'username': username, 'timestamp': timestamp, 'filepath': filepath, 'snap_id': None, 'original_snap_id': snap_id}
+
+    return data
+
+def process_snap_ids(filenames):
+    snap_ids = []
+    for filename in filenames:
+        snap_id = filename.split('~')[2]
+        snap_id = os.path.splitext(snap_id)[0]
+        if snap_id not in snap_ids:
+            snap_ids.append(snap_id)
+            
+    return snap_ids
+
+def find_duplicate_snap(existing_snaps, current_snap):
+	filepath = current_snap['filepath']
+	original_snap_id = current_snap['original_snap_id']
+	username = current_snap['username']
+	
+	snap_hash = funcs.calculate_file_hash(current_snap['filepath'])
+	if filepath.endswith('.mp4'):
+		phash = generate_video_phash(current_snap['filepath'])
+	elif filepath.endswith('.jpg'):
+		phash = funcs.generate_phash(current_snap['filepath'])
+
+	for snap in existing_snaps:
+		if username != snap[2]:
+			continue
+
+		if original_snap_id in snap[1]:
+			return snap
+		if original_snap_id == snap[5]:
+			return snap
+		if snap_hash == snap[3]:
+			return snap
+		if phash == snap[4]:
+			return snap
+	
+	return False
+
+if __name__ == '__main__':
+	print('Starting snappy...')
+
+	db, cursor = config.gen_connection()
+	obj_storage = config.get_storage()
+
+	stories_from_files = get_snapchat_files()
+
+	# this script will check if there are any duplicates in old_snapchats folder in the database in table media where platform = 'snapchat'
+	cursor.execute("SELECT id, filename, username, hash, phash, original_snap_id FROM media WHERE filename IS NOT NULL AND platform = 'snapchat'")
+	existing_medias = cursor.fetchall()
+
+	snap_files = get_snapchat_files()
+ 
+	os.makedirs(duplicate_dir, exist_ok=True)
+ 
+	for story in snap_files:
+		print(f"Processing {story['username']}...")
+		snap_id = story['snap_id']
+		original_snap_id = story['original_snap_id']
+		username = story['username']
+
+		# check if the snap_id is already in the database
+		existing_snap = find_duplicate_snap(existing_medias, story)
+
+		if existing_snap:
+			print(f"Snap {original_snap_id} already exists in the database.")
+			new_filename = os.path.basename(story['filepath'])
+			new_filepath = os.path.join(duplicate_dir, new_filename)
+			os.rename(story['filepath'], new_filepath)
+		
+	print("Processing completed.")
\ No newline at end of file
diff --git a/funcs.py b/funcs.py
index 728a6fc..fb63241 100644
--- a/funcs.py
+++ b/funcs.py
@@ -29,9 +29,6 @@ def get_files(directory):
             files.append(os.path.join(root, filename))
     return files
 
-import cv2
-import numpy as np
-
 def compare_images(image_path1, image_path2):
     # Load the images in grayscale
     img1 = cv2.imread(image_path1, cv2.IMREAD_GRAYSCALE)
diff --git a/organize_tiktoks.py b/old/organize_tiktoks.py
similarity index 100%
rename from organize_tiktoks.py
rename to old/organize_tiktoks.py
diff --git a/snappy.py b/old/snappy.py
similarity index 87%
rename from snappy.py
rename to old/snappy.py
index 1a2afd6..d70186c 100644
--- a/snappy.py
+++ b/old/snappy.py
@@ -52,6 +52,29 @@ def get_file_extension(url):
 	else:
 		print(f"Unknown content type for media {url}")
 		return None
+	
+def extract_file_type(url):
+	file_types = {
+		'400': '.jpg',
+		'1322': '.mp4',
+		'1325': '.mp4',
+		'1034': '.mp4',
+		'1023': '.jpg'
+	}
+
+	base_url = url.split("?")[0]  # Remove query string
+
+	snap_data = base_url.split('/')[-1]
+
+	# Extract the file type number
+	data_parts = snap_data.split('.')
+	if len(data_parts) > 1:
+		file_type_number = data_parts[1]
+		if file_type_number in file_types:
+			return file_types[file_type_number]
+	else:
+		print(f"Unexpected URL format: {base_url}")
+		return None
 
 def download_media(url, filepath):
 	if os.path.exists(filepath):
@@ -112,9 +135,10 @@ def main():
 
 			# Determine file extension using HEAD request.
 			# TODO: find a better way to determine file extension without downloading the file.
-			extension = get_file_extension(url)
+			extension = extract_file_type(url)
 			if not extension:
 				continue
+
 			filename = f"{username}~{timestamp}~{snap_id}{extension}"
 			filepath = os.path.join(directory, filename)
 			
diff --git a/update_snap_id.py b/old/update_snap_id.py
similarity index 100%
rename from update_snap_id.py
rename to old/update_snap_id.py
diff --git a/snapchat.py b/snapchat.py
index da2981d..c1167e0 100644
--- a/snapchat.py
+++ b/snapchat.py
@@ -92,4 +92,4 @@ def get_highlight_stories(data):
             story = parse_story(snap)
             stories.append(story)
 			
-    return stories
+    return stories
\ No newline at end of file
diff --git a/snappy_master.py b/snappy_master.py
index 81cea1e..0977274 100644
--- a/snappy_master.py
+++ b/snappy_master.py
@@ -21,7 +21,6 @@ def archive_data(data, username):
 	data_filepath = os.path.join(data_directory, data_filename)
 	with open(data_filepath, 'w') as f:
 		f.write(json.dumps(data))
-	print(f"Archived data for {username} at {data_filepath}")
 	
 def get_file_extension(url):
 	response = requests.head(url)
@@ -64,7 +63,7 @@ def extract_file_type(url):
 		
 def download_media(url, filepath):
 	if os.path.exists(filepath):
-		print(f"File {filepath} already exists. Skipping download.")
+		# File already exists, skip download and return the filepath as if it was downloaded.
 		return filepath
 	
 	response = requests.get(url)
@@ -76,55 +75,6 @@ def download_media(url, filepath):
 		f.write(response.content)
 	return filepath
 
-def get_all_stories(usernames):
-	snapchat_users_data = get_all_users_data(usernames)
-
-	all_stories = []
-	for username in usernames:
-		print(f"Getting stories for {username}...")
-		data = snapchat_users_data.get(username)
-		if not data:
-			print(f"Failed to get data for {username}. Skipping.")
-			continue
-			
-		archive_data(data, username)
-
-		print("Getting stories...")
-		stories = get_stories(data)
-
-		print("Getting highlights...")
-		stories.extend(get_highlight_stories(data))
-
-		for story in stories:
-			snap_id = story['snap_id']
-			url = story['url']
-			timestamp = story['timestamp']
-
-			# Determine file extension using HEAD request.
-			extension = extract_file_type(url)
-			if not extension:
-				print(f"Failed to determine file extension for {url}. Skipping.")
-				continue
-
-			filename = f"{username}~{timestamp}~{snap_id}{extension}"
-			filepath = os.path.join(directory, filename)
-
-			media = {
-				'username': username,
-				'timestamp': timestamp,
-				'filepath': filepath,
-				'snap_id': snap_id,
-				'original_snap_id': story['original_snap_id'],
-				'media_url': url,
-			}
-
-			all_stories.append(media)
-			print(f"Media {snap_id} ready for download.")
-
-		all_stories.extend(stories)
-
-	return all_stories
-
 def get_snapchat_stories():
 	os.makedirs(directory, exist_ok=True)
 	os.makedirs(data_directory, exist_ok=True)
@@ -149,10 +99,8 @@ def get_snapchat_stories():
 		
 		archive_data(data, username)
 		
-		print("Getting stories...")
 		stories = get_stories(data)
 
-		print("Getting highlights...")
 		stories.extend(get_highlight_stories(data))
 
 		for story in stories:
@@ -162,7 +110,7 @@ def get_snapchat_stories():
 			
 			duplicate_snap = find_duplicate_snap(existing_medias, snap_id, username)
 			if duplicate_snap:
-				print(f"Media {snap_id} already exists. Skipping download.")
+				# Snap already exists in the database
 				continue
 			
 			# Determine file extension using HEAD request.
@@ -191,11 +139,25 @@ def get_snapchat_stories():
 
 	return ready_stories
 
+def get_snapchat_files():
+	stories = funcs.get_files(directory)
+	stories = [get_media_data(filepath) for filepath in stories]
+	stories = [story for story in stories if story]
+	return stories
+
+def main():
+	ready_stories = get_snapchat_stories()
+	stories_from_files = get_snapchat_files()
+
+	ready_stories.extend(stories_from_files)
+	
+	download_stories(ready_stories)
+
 def download_stories(stories):
 	for story in stories:
 		# Download the media
 		filepath = story['filepath']
-		url = story['media_url'] if 'media_url' in story else None
+		url = story['media_url']
 		filename = os.path.basename(filepath)
 		timestamp = story['timestamp']
 
@@ -209,17 +171,6 @@ def download_stories(stories):
 
 		UploadMedia(story)
 
-def main():
-	ready_stories = get_snapchat_stories()
-
-	stories_from_files = funcs.get_files(directory)
-	stories_from_files = [get_media_data(filepath) for filepath in stories_from_files]
-	stories_from_files = [story for story in stories_from_files if story]
-
-	ready_stories.extend(stories_from_files)
-	
-	download_stories(ready_stories)
-
 def UploadMedia(media):
 	username = media['username']
 	timestamp = media['timestamp']
@@ -288,7 +239,8 @@ def get_media_data(filepath):
     snap_id = parts[2]
     snap_id = os.path.splitext(snap_id)[0]
 
-    data = {'username': username, 'timestamp': timestamp, 'filepath': filepath, 'snap_id': snap_id, 'original_snap_id': None}
+    data = {'username': username, 'timestamp': timestamp, 'filepath': filepath, 'snap_id': snap_id, 'original_snap_id': None, 'media_url': None}
+    # data = {'username': username, 'timestamp': timestamp, 'filepath': filepath, 'snap_id': None, 'original_snap_id': snap_id, 'media_url': None}
 
     return data
 
diff --git a/storysave_dump_tiktok.py b/storysave_dump_tiktok.py
index 5310642..f4ddf10 100644
--- a/storysave_dump_tiktok.py
+++ b/storysave_dump_tiktok.py
@@ -120,7 +120,7 @@ def dump_instagram(folder_path):
 
 if __name__ == '__main__':
     print('Starting processing...')
-
+    
     if not os.listdir(directory):
         print('No files to process. Exiting...')
         exit()
diff --git a/storysave_dump_tiktok_process.py b/storysave_dump_tiktok_process.py
index 6d08495..59452b1 100644
--- a/storysave_dump_tiktok_process.py
+++ b/storysave_dump_tiktok_process.py
@@ -10,10 +10,6 @@ def is_valid_uuid(uuid_to_test, version=4):
 
     return str(uuid_obj) == uuid_to_test
 
-# file name : masstik_caammmyyy_1310_655_going blonde wednesdayyyy.mp4
-# file name : masstiktok_aleksandraverse__#fyp #trending #viral #foryou.mp4
-# where the first item is prefix, second is username and after those is the tiktok title
-
 source_dir = 'tiktoks/'
 processed_dir = 'processed_tiktoks'
 
diff --git a/storysave_dump_unknown.py b/storysave_dump_unknown.py
index 9eaa8eb..5d0031e 100644
--- a/storysave_dump_unknown.py
+++ b/storysave_dump_unknown.py
@@ -2,7 +2,7 @@ from datetime import datetime
 import os, config, funcs, cv2
 from uuid import uuid4
 
-directory = 'ready_to_upload'
+directory = 'ready_for_upload/instagram'
 
 def UploadMedia(username, user_id, filepath):
     thumbnail_url = None
@@ -80,8 +80,12 @@ def get_user_id(username):
 def get_media(folder_path):
     medias = []
     
-    for user_folder in os.listdir(folder_path):
-        files = os.listdir(os.path.join(folder_path, user_folder))
+    user_folders = os.listdir(folder_path)
+    for user_folder in user_folders:
+        user_folder_path = os.path.join(folder_path, user_folder)
+        if not os.path.isdir(user_folder_path):
+            continue
+        files = os.listdir(user_folder_path)
         for filename in files:
             filepath = os.path.join(folder_path, user_folder, filename)
             media = {