You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
138 lines
5.0 KiB
Python
138 lines
5.0 KiB
Python
|
3 days ago
|
# playwright_iframe_extract_debug.py
|
||
|
|
from playwright.sync_api import sync_playwright
|
||
|
|
from bs4 import BeautifulSoup
|
||
|
|
import re, time, os, sys
|
||
|
|
|
||
|
|
TEST_URL = "https://striphub.cam/play/68f731ea62f66877cc80e54f" # replace if needed
|
||
|
|
MX_PATTERN = re.compile(r"https?://(?:www\.)?mxdrop\.to/e/[^\s\"'<>]+", re.I)
|
||
|
|
|
||
|
|
def extract_from_html(html: str):
|
||
|
|
"""Fallback regex scan over raw HTML (catches inline JS/JSON)."""
|
||
|
|
return set(MX_PATTERN.findall(html))
|
||
|
|
|
||
|
|
def run():
|
||
|
|
found = set()
|
||
|
|
|
||
|
|
with sync_playwright() as p:
|
||
|
|
# Use headless=False while debugging to *see* what's happening
|
||
|
|
browser = p.chromium.launch(headless=False, args=["--disable-blink-features=AutomationControlled"])
|
||
|
|
context = browser.new_context(
|
||
|
|
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
|
||
|
|
"(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||
|
|
locale="en-US",
|
||
|
|
viewport={"width": 1280, "height": 900}
|
||
|
|
)
|
||
|
|
page = context.new_page()
|
||
|
|
|
||
|
|
# 1) Capture ANY network response hitting mxdrop
|
||
|
|
def on_response(resp):
|
||
|
|
url = resp.url
|
||
|
|
if "mxdrop.to/e/" in url:
|
||
|
|
print("[NET] mxdrop response:", url)
|
||
|
|
found.add(url)
|
||
|
|
page.on("response", on_response)
|
||
|
|
|
||
|
|
print("Navigating to", TEST_URL)
|
||
|
|
page.goto(TEST_URL, wait_until="domcontentloaded", timeout=60000)
|
||
|
|
|
||
|
|
# 2) Give the page time to settle network/XHR
|
||
|
|
try:
|
||
|
|
page.wait_for_load_state("networkidle", timeout=20000)
|
||
|
|
except Exception:
|
||
|
|
print("⚠️ networkidle timed out—continuing")
|
||
|
|
|
||
|
|
# 3) Try a few generic clicks that often reveal the player/iframe
|
||
|
|
# (No-ops if not present; they just fail silently)
|
||
|
|
for sel in [
|
||
|
|
'button:has-text("Play")',
|
||
|
|
'button:has-text("I understand")',
|
||
|
|
'button:has-text("Continue")',
|
||
|
|
'button:has-text("Accept")',
|
||
|
|
"#player, .video-player, .plyr__control",
|
||
|
|
]:
|
||
|
|
try:
|
||
|
|
el = page.locator(sel)
|
||
|
|
if el.count() > 0:
|
||
|
|
el.first.click(timeout=2000)
|
||
|
|
except Exception:
|
||
|
|
pass
|
||
|
|
|
||
|
|
# 4) Scroll to bottom to trigger lazy-load iframes
|
||
|
|
try:
|
||
|
|
page.evaluate("""
|
||
|
|
const delay = ms => new Promise(r => setTimeout(r, ms));
|
||
|
|
(async () => {
|
||
|
|
for (let y = 0; y < document.body.scrollHeight; y += 800) {
|
||
|
|
window.scrollTo(0, y);
|
||
|
|
await delay(200);
|
||
|
|
}
|
||
|
|
window.scrollTo(0, document.body.scrollHeight);
|
||
|
|
})();
|
||
|
|
""")
|
||
|
|
time.sleep(1.0)
|
||
|
|
except Exception:
|
||
|
|
pass
|
||
|
|
|
||
|
|
# 5) Dump a screenshot & HTML so you can inspect what loaded
|
||
|
|
try:
|
||
|
|
page.screenshot(path="debug_page.png", full_page=True)
|
||
|
|
print("Saved screenshot -> debug_page.png")
|
||
|
|
except Exception:
|
||
|
|
pass
|
||
|
|
|
||
|
|
html = page.content()
|
||
|
|
with open("debug_page.html", "w", encoding="utf-8") as f:
|
||
|
|
f.write(html)
|
||
|
|
print("Saved HTML -> debug_page.html")
|
||
|
|
|
||
|
|
# 6) Parse DOM for iframes (src and data-src)
|
||
|
|
soup = BeautifulSoup(html, "html.parser")
|
||
|
|
iframes = soup.find_all("iframe")
|
||
|
|
print(f"Found {len(iframes)} <iframe> tags in DOM")
|
||
|
|
|
||
|
|
for iframe in iframes:
|
||
|
|
for attr in ("src", "data-src"):
|
||
|
|
val = iframe.get(attr)
|
||
|
|
if val and "mxdrop.to/e/" in val:
|
||
|
|
print("[DOM] iframe", attr, "=", val)
|
||
|
|
found.add(val)
|
||
|
|
|
||
|
|
# 7) Regex over the full HTML (catches script-injected strings)
|
||
|
|
regex_hits = extract_from_html(html)
|
||
|
|
for u in regex_hits:
|
||
|
|
print("[HTML-REGEX]", u)
|
||
|
|
found.update(regex_hits)
|
||
|
|
|
||
|
|
# 8) Also list all frame URLs Playwright sees (nested frames)
|
||
|
|
for fr in page.frames:
|
||
|
|
if fr.url and "mxdrop.to/e/" in fr.url:
|
||
|
|
print("[FRAME] url:", fr.url)
|
||
|
|
found.add(fr.url)
|
||
|
|
|
||
|
|
# 9) Print final result
|
||
|
|
found = sorted(found)
|
||
|
|
print("\n==== MXDROP RESULTS ====")
|
||
|
|
if found:
|
||
|
|
for u in found:
|
||
|
|
print(u)
|
||
|
|
else:
|
||
|
|
title = soup.title.string.strip() if soup.title and soup.title.string else "(no title)"
|
||
|
|
print("No mxdrop links detected.")
|
||
|
|
print("Page title:", title)
|
||
|
|
# Quick hint if you hit a challenge:
|
||
|
|
snippet = html[:400].replace("\n", " ")
|
||
|
|
if "Just a moment" in snippet or "Cloudflare" in snippet or "cf-chl" in snippet:
|
||
|
|
print("Looks like a Cloudflare challenge / interstitial (human step required).")
|
||
|
|
|
||
|
|
# 10) Save results if any
|
||
|
|
if found:
|
||
|
|
with open("embedLinks.txt", "w", encoding="utf-8") as f:
|
||
|
|
for u in found:
|
||
|
|
f.write(u + "\n")
|
||
|
|
print("Saved -> embedLinks.txt")
|
||
|
|
|
||
|
|
browser.close()
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
run()
|