pimpbunny and more fixes

This commit is contained in:
Simon
2026-05-18 16:52:25 +00:00
committed by ForgeCode
parent dc70e2c9a6
commit ee47bbe74d
13 changed files with 1457 additions and 311 deletions

41
src/util/browser.rs Normal file
View File

@@ -0,0 +1,41 @@
use crate::util::{playwright, webdriver};
use std::time::Duration;
use tokio::sync::mpsc;
const DEFAULT_TIMEOUT: Duration = Duration::from_secs(30);
/// Races chromiumoxide (webdriver) and Playwright against each other.
/// Navigates to `url` and returns the `src` attribute of the first element
/// matched by `xpath` — whichever backend resolves it first wins.
pub async fn wait_for_src(url: &str, xpath: &str) -> Option<String> {
wait_for_attribute(url, xpath, "src", DEFAULT_TIMEOUT).await
}
/// Like [`wait_for_src`] but lets the caller choose the attribute name and timeout.
pub async fn wait_for_attribute(
url: &str,
xpath: &str,
attribute: &str,
wait_timeout: Duration,
) -> Option<String> {
// Buffer of 1: the first send wins; the losing task's send is silently
// dropped when the receiver is gone.
let (tx, mut rx) = mpsc::channel::<String>(1);
let (u, x, a) = (url.to_string(), xpath.to_string(), attribute.to_string());
let tx1 = tx.clone();
tokio::spawn(async move {
if let Some(src) = webdriver::wait_for_attribute(&u, &x, &a, wait_timeout).await {
let _ = tx1.send(src).await;
}
});
let (u, x, a) = (url.to_string(), xpath.to_string(), attribute.to_string());
tokio::spawn(async move {
if let Some(src) = playwright::wait_for_attribute(&u, &x, &a, wait_timeout).await {
let _ = tx.send(src).await;
}
});
rx.recv().await
}

View File

@@ -9,7 +9,10 @@ pub mod hoster_proxy;
pub mod proxy;
pub mod requester;
pub mod time;
pub mod browser;
pub mod dean_edwards;
pub mod playwright;
pub mod webdriver;
pub fn parse_abbreviated_number(s: &str) -> Option<u32> {
let s = s.trim();

128
src/util/playwright.rs Normal file
View File

@@ -0,0 +1,128 @@
use playwright::Playwright;
use playwright::api::Page;
use std::path::Path;
use std::time::Duration;
use tokio::time::{sleep, timeout};
const POLL_INTERVAL: Duration = Duration::from_millis(500);
const DEFAULT_TIMEOUT: Duration = Duration::from_secs(30);
const CHROME_ARGS: &[&str] = &[
"--no-sandbox",
"--disable-dev-shm-usage",
"--no-first-run",
"--no-default-browser-check",
"--disable-background-timer-throttling",
"--disable-backgrounding-occluded-windows",
"--disable-renderer-backgrounding",
"--disable-ipc-flooding-protection",
"--disable-hang-monitor",
"--disable-gpu",
"--disable-software-rasterizer",
"--disable-accelerated-2d-canvas",
"--disable-webgl",
"--disable-3d-apis",
"--blink-settings=imagesEnabled=false",
"--mute-audio",
"--disable-background-networking",
"--disable-client-side-phishing-detection",
"--disable-component-update",
"--disable-domain-reliability",
"--disable-sync",
"--metrics-recording-only",
"--safebrowsing-disable-auto-update",
"--disable-extensions",
"--disable-translate",
"--disable-default-apps",
"--disable-features=site-per-process",
"--password-store=basic",
"--use-mock-keychain",
];
/// Launches a headless Chrome browser via Playwright, navigates to `url`, and
/// polls until the element matched by `xpath` has a non-empty `src` attribute.
pub async fn wait_for_src(url: &str, xpath: &str) -> Option<String> {
wait_for_attribute(url, xpath, "src", DEFAULT_TIMEOUT).await
}
/// Like [`wait_for_src`] but lets the caller choose the attribute name and timeout.
pub async fn wait_for_attribute(
url: &str,
xpath: &str,
attribute: &str,
wait_timeout: Duration,
) -> Option<String> {
// _pw must be kept alive — dropping it kills the driver process.
let (_pw, page) = open_page(url).await?;
let xpath_js = serde_json::to_string(xpath).unwrap_or_default();
let attr_js = serde_json::to_string(attribute).unwrap_or_default();
let js = format!(
r#"() => {{
var r = document.evaluate({xpath_js}, document, null,
XPathResult.FIRST_ORDERED_NODE_TYPE, null);
var el = r.singleNodeValue;
if (!el) return '';
return el.getAttribute({attr_js}) || el[{attr_js}] || '';
}}"#
);
let result = timeout(wait_timeout, async {
loop {
match page.evaluate::<(), String>(&js, ()).await {
Ok(s) if !s.is_empty() => return Some(s),
Ok(_) => {}
Err(e) => eprintln!("[playwright] evaluate error: {e}"),
}
sleep(POLL_INTERVAL).await;
}
})
.await
.ok()
.flatten();
result
}
// ── internals ─────────────────────────────────────────────────────────────────
async fn open_page(url: &str) -> Option<(Playwright, Page)> {
let pw = Playwright::initialize()
.await
.map_err(|e| eprintln!("[playwright] init error: {e}"))
.ok()?;
let args: Vec<String> = CHROME_ARGS.iter().map(|s| s.to_string()).collect();
let browser = pw
.chromium()
.launcher()
.executable(Path::new("/usr/bin/google-chrome"))
.headless(true)
.args(&args)
.launch()
.await
.map_err(|e| eprintln!("[playwright] launch error: {e}"))
.ok()?;
let context = browser
.context_builder()
.build()
.await
.map_err(|e| eprintln!("[playwright] context error: {e}"))
.ok()?;
let page = context
.new_page()
.await
.map_err(|e| eprintln!("[playwright] new_page error: {e}"))
.ok()?;
page.goto_builder(url)
.goto()
.await
.map_err(|e| eprintln!("[playwright] goto error: {e}"))
.ok()?;
Some((pw, page))
}

130
src/util/webdriver.rs Normal file
View File

@@ -0,0 +1,130 @@
use chromiumoxide::{Browser, BrowserConfig};
use futures::StreamExt;
use std::time::Duration;
use tokio::time::{sleep, timeout};
const POLL_INTERVAL: Duration = Duration::from_millis(500);
const DEFAULT_TIMEOUT: Duration = Duration::from_secs(30);
/// Launches a headless Chrome browser, navigates to `url`, and polls until the
/// element matched by `xpath` has a non-empty `src` attribute, then returns it.
///
/// Equivalent to Selenium's:
/// `WebDriverWait(driver, 30).until(lambda d: d.find_element(By.XPATH, xpath).get_attribute("src"))`
pub async fn wait_for_src(url: &str, xpath: &str) -> Option<String> {
wait_for_attribute(url, xpath, "src", DEFAULT_TIMEOUT).await
}
/// Like [`wait_for_src`] but lets the caller choose the attribute name and timeout.
pub async fn wait_for_attribute(
url: &str,
xpath: &str,
attribute: &str,
wait_timeout: Duration,
) -> Option<String> {
let (mut browser, page) = open_page(url).await?;
// Embed xpath and attribute as JSON strings so they are safely quoted inside
// the JS template — avoids any injection from untrusted caller values.
let xpath_js = serde_json::to_string(xpath).unwrap_or_default();
let attr_js = serde_json::to_string(attribute).unwrap_or_default();
let js = format!(
r#"(function() {{
var r = document.evaluate({xpath_js}, document, null,
XPathResult.FIRST_ORDERED_NODE_TYPE, null);
var el = r.singleNodeValue;
if (!el) return '';
return el.getAttribute({attr_js}) || el[{attr_js}] || '';
}})()"#
);
let result = timeout(wait_timeout, async {
loop {
match page.evaluate(js.as_str()).await {
Ok(val) => {
if let Ok(s) = val.into_value::<String>() {
if !s.is_empty() {
return Some(s);
}
}
}
Err(e) => eprintln!("[webdriver] evaluate error: {e}"),
}
sleep(POLL_INTERVAL).await;
}
})
.await
.ok()
.flatten();
let _ = browser.close().await;
result
}
// ── internals ─────────────────────────────────────────────────────────────────
async fn open_page(
url: &str,
) -> Option<(Browser, chromiumoxide::Page)> {
let config = BrowserConfig::builder()
// ── headless & sandbox ────────────────────────────────────────
.arg("--headless=new")
.arg("--no-sandbox")
.arg("--disable-dev-shm-usage")
.arg("--no-first-run")
.arg("--no-default-browser-check")
// ── JS timer / scheduler: prevent Chrome from throttling
// background timers and renderers (critical for player init) ──
.arg("--disable-background-timer-throttling")
.arg("--disable-backgrounding-occluded-windows")
.arg("--disable-renderer-backgrounding")
.arg("--disable-ipc-flooding-protection")
.arg("--disable-hang-monitor")
// ── skip rendering work we don't need ────────────────────────
.arg("--disable-gpu")
.arg("--disable-software-rasterizer")
.arg("--disable-accelerated-2d-canvas")
.arg("--disable-webgl")
.arg("--disable-3d-apis")
.arg("--blink-settings=imagesEnabled=false")
.arg("--mute-audio")
// ── cut background network / update noise ────────────────────
.arg("--disable-background-networking")
.arg("--disable-client-side-phishing-detection")
.arg("--disable-component-update")
.arg("--disable-domain-reliability")
.arg("--disable-sync")
.arg("--metrics-recording-only")
.arg("--safebrowsing-disable-auto-update")
// ── misc overhead ─────────────────────────────────────────────
.arg("--disable-extensions")
.arg("--disable-translate")
.arg("--disable-default-apps")
.arg("--disable-features=site-per-process")
.arg("--password-store=basic")
.arg("--use-mock-keychain")
.build()
.map_err(|e| eprintln!("[webdriver] BrowserConfig error: {e}"))
.ok()?;
let (browser, mut handler) = Browser::launch(config)
.await
.map_err(|e| eprintln!("[webdriver] Browser launch error: {e}"))
.ok()?;
tokio::spawn(async move {
loop {
if handler.next().await.is_none() {
break;
}
}
});
let page = browser
.new_page(url)
.await
.map_err(|e| eprintln!("[webdriver] new_page error: {e}"))
.ok()?;
Some((browser, page))
}