You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
	
	
		
			138 lines
		
	
	
		
			5.0 KiB
		
	
	
	
		
			Python
		
	
			
		
		
	
	
			138 lines
		
	
	
		
			5.0 KiB
		
	
	
	
		
			Python
		
	
| # playwright_iframe_extract_debug.py
 | |
| from playwright.sync_api import sync_playwright
 | |
| from bs4 import BeautifulSoup
 | |
| import re, time, os, sys
 | |
| 
 | |
| TEST_URL = "https://striphub.cam/play/68f731ea62f66877cc80e54f"  # replace if needed
 | |
| MX_PATTERN = re.compile(r"https?://(?:www\.)?mxdrop\.to/e/[^\s\"'<>]+", re.I)
 | |
| 
 | |
| def extract_from_html(html: str):
 | |
|     """Fallback regex scan over raw HTML (catches inline JS/JSON)."""
 | |
|     return set(MX_PATTERN.findall(html))
 | |
| 
 | |
| def run():
 | |
|     found = set()
 | |
| 
 | |
|     with sync_playwright() as p:
 | |
|         # Use headless=False while debugging to *see* what's happening
 | |
|         browser = p.chromium.launch(headless=False, args=["--disable-blink-features=AutomationControlled"])
 | |
|         context = browser.new_context(
 | |
|             user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
 | |
|                        "(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
 | |
|             locale="en-US",
 | |
|             viewport={"width": 1280, "height": 900}
 | |
|         )
 | |
|         page = context.new_page()
 | |
| 
 | |
|         # 1) Capture ANY network response hitting mxdrop
 | |
|         def on_response(resp):
 | |
|             url = resp.url
 | |
|             if "mxdrop.to/e/" in url:
 | |
|                 print("[NET] mxdrop response:", url)
 | |
|                 found.add(url)
 | |
|         page.on("response", on_response)
 | |
| 
 | |
|         print("Navigating to", TEST_URL)
 | |
|         page.goto(TEST_URL, wait_until="domcontentloaded", timeout=60000)
 | |
| 
 | |
|         # 2) Give the page time to settle network/XHR
 | |
|         try:
 | |
|             page.wait_for_load_state("networkidle", timeout=20000)
 | |
|         except Exception:
 | |
|             print("⚠️ networkidle timed out—continuing")
 | |
| 
 | |
|         # 3) Try a few generic clicks that often reveal the player/iframe
 | |
|         # (No-ops if not present; they just fail silently)
 | |
|         for sel in [
 | |
|             'button:has-text("Play")',
 | |
|             'button:has-text("I understand")',
 | |
|             'button:has-text("Continue")',
 | |
|             'button:has-text("Accept")',
 | |
|             "#player, .video-player, .plyr__control",
 | |
|         ]:
 | |
|             try:
 | |
|                 el = page.locator(sel)
 | |
|                 if el.count() > 0:
 | |
|                     el.first.click(timeout=2000)
 | |
|             except Exception:
 | |
|                 pass
 | |
| 
 | |
|         # 4) Scroll to bottom to trigger lazy-load iframes
 | |
|         try:
 | |
|             page.evaluate("""
 | |
|                 const delay = ms => new Promise(r => setTimeout(r, ms));
 | |
|                 (async () => {
 | |
|                   for (let y = 0; y < document.body.scrollHeight; y += 800) {
 | |
|                     window.scrollTo(0, y);
 | |
|                     await delay(200);
 | |
|                   }
 | |
|                   window.scrollTo(0, document.body.scrollHeight);
 | |
|                 })();
 | |
|             """)
 | |
|             time.sleep(1.0)
 | |
|         except Exception:
 | |
|             pass
 | |
| 
 | |
|         # 5) Dump a screenshot & HTML so you can inspect what loaded
 | |
|         try:
 | |
|             page.screenshot(path="debug_page.png", full_page=True)
 | |
|             print("Saved screenshot -> debug_page.png")
 | |
|         except Exception:
 | |
|             pass
 | |
| 
 | |
|         html = page.content()
 | |
|         with open("debug_page.html", "w", encoding="utf-8") as f:
 | |
|             f.write(html)
 | |
|         print("Saved HTML -> debug_page.html")
 | |
| 
 | |
|         # 6) Parse DOM for iframes (src and data-src)
 | |
|         soup = BeautifulSoup(html, "html.parser")
 | |
|         iframes = soup.find_all("iframe")
 | |
|         print(f"Found {len(iframes)} <iframe> tags in DOM")
 | |
| 
 | |
|         for iframe in iframes:
 | |
|             for attr in ("src", "data-src"):
 | |
|                 val = iframe.get(attr)
 | |
|                 if val and "mxdrop.to/e/" in val:
 | |
|                     print("[DOM] iframe", attr, "=", val)
 | |
|                     found.add(val)
 | |
| 
 | |
|         # 7) Regex over the full HTML (catches script-injected strings)
 | |
|         regex_hits = extract_from_html(html)
 | |
|         for u in regex_hits:
 | |
|             print("[HTML-REGEX]", u)
 | |
|         found.update(regex_hits)
 | |
| 
 | |
|         # 8) Also list all frame URLs Playwright sees (nested frames)
 | |
|         for fr in page.frames:
 | |
|             if fr.url and "mxdrop.to/e/" in fr.url:
 | |
|                 print("[FRAME] url:", fr.url)
 | |
|                 found.add(fr.url)
 | |
| 
 | |
|         # 9) Print final result
 | |
|         found = sorted(found)
 | |
|         print("\n==== MXDROP RESULTS ====")
 | |
|         if found:
 | |
|             for u in found:
 | |
|                 print(u)
 | |
|         else:
 | |
|             title = soup.title.string.strip() if soup.title and soup.title.string else "(no title)"
 | |
|             print("No mxdrop links detected.")
 | |
|             print("Page title:", title)
 | |
|             # Quick hint if you hit a challenge:
 | |
|             snippet = html[:400].replace("\n", " ")
 | |
|             if "Just a moment" in snippet or "Cloudflare" in snippet or "cf-chl" in snippet:
 | |
|                 print("Looks like a Cloudflare challenge / interstitial (human step required).")
 | |
| 
 | |
|         # 10) Save results if any
 | |
|         if found:
 | |
|             with open("embedLinks.txt", "w", encoding="utf-8") as f:
 | |
|                 for u in found:
 | |
|                     f.write(u + "\n")
 | |
|             print("Saved -> embedLinks.txt")
 | |
| 
 | |
|         browser.close()
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|     run()
 |