pimpbunny and more fixes
This commit is contained in:
41
src/util/browser.rs
Normal file
41
src/util/browser.rs
Normal file
@@ -0,0 +1,41 @@
|
||||
use crate::util::{playwright, webdriver};
|
||||
use std::time::Duration;
|
||||
use tokio::sync::mpsc;
|
||||
|
||||
const DEFAULT_TIMEOUT: Duration = Duration::from_secs(30);
|
||||
|
||||
/// Races chromiumoxide (webdriver) and Playwright against each other.
|
||||
/// Navigates to `url` and returns the `src` attribute of the first element
|
||||
/// matched by `xpath` — whichever backend resolves it first wins.
|
||||
pub async fn wait_for_src(url: &str, xpath: &str) -> Option<String> {
|
||||
wait_for_attribute(url, xpath, "src", DEFAULT_TIMEOUT).await
|
||||
}
|
||||
|
||||
/// Like [`wait_for_src`] but lets the caller choose the attribute name and timeout.
|
||||
pub async fn wait_for_attribute(
|
||||
url: &str,
|
||||
xpath: &str,
|
||||
attribute: &str,
|
||||
wait_timeout: Duration,
|
||||
) -> Option<String> {
|
||||
// Buffer of 1: the first send wins; the losing task's send is silently
|
||||
// dropped when the receiver is gone.
|
||||
let (tx, mut rx) = mpsc::channel::<String>(1);
|
||||
|
||||
let (u, x, a) = (url.to_string(), xpath.to_string(), attribute.to_string());
|
||||
let tx1 = tx.clone();
|
||||
tokio::spawn(async move {
|
||||
if let Some(src) = webdriver::wait_for_attribute(&u, &x, &a, wait_timeout).await {
|
||||
let _ = tx1.send(src).await;
|
||||
}
|
||||
});
|
||||
|
||||
let (u, x, a) = (url.to_string(), xpath.to_string(), attribute.to_string());
|
||||
tokio::spawn(async move {
|
||||
if let Some(src) = playwright::wait_for_attribute(&u, &x, &a, wait_timeout).await {
|
||||
let _ = tx.send(src).await;
|
||||
}
|
||||
});
|
||||
|
||||
rx.recv().await
|
||||
}
|
||||
@@ -9,7 +9,10 @@ pub mod hoster_proxy;
|
||||
pub mod proxy;
|
||||
pub mod requester;
|
||||
pub mod time;
|
||||
pub mod browser;
|
||||
pub mod dean_edwards;
|
||||
pub mod playwright;
|
||||
pub mod webdriver;
|
||||
|
||||
pub fn parse_abbreviated_number(s: &str) -> Option<u32> {
|
||||
let s = s.trim();
|
||||
|
||||
128
src/util/playwright.rs
Normal file
128
src/util/playwright.rs
Normal file
@@ -0,0 +1,128 @@
|
||||
use playwright::Playwright;
|
||||
use playwright::api::Page;
|
||||
use std::path::Path;
|
||||
use std::time::Duration;
|
||||
use tokio::time::{sleep, timeout};
|
||||
|
||||
const POLL_INTERVAL: Duration = Duration::from_millis(500);
|
||||
const DEFAULT_TIMEOUT: Duration = Duration::from_secs(30);
|
||||
|
||||
const CHROME_ARGS: &[&str] = &[
|
||||
"--no-sandbox",
|
||||
"--disable-dev-shm-usage",
|
||||
"--no-first-run",
|
||||
"--no-default-browser-check",
|
||||
"--disable-background-timer-throttling",
|
||||
"--disable-backgrounding-occluded-windows",
|
||||
"--disable-renderer-backgrounding",
|
||||
"--disable-ipc-flooding-protection",
|
||||
"--disable-hang-monitor",
|
||||
"--disable-gpu",
|
||||
"--disable-software-rasterizer",
|
||||
"--disable-accelerated-2d-canvas",
|
||||
"--disable-webgl",
|
||||
"--disable-3d-apis",
|
||||
"--blink-settings=imagesEnabled=false",
|
||||
"--mute-audio",
|
||||
"--disable-background-networking",
|
||||
"--disable-client-side-phishing-detection",
|
||||
"--disable-component-update",
|
||||
"--disable-domain-reliability",
|
||||
"--disable-sync",
|
||||
"--metrics-recording-only",
|
||||
"--safebrowsing-disable-auto-update",
|
||||
"--disable-extensions",
|
||||
"--disable-translate",
|
||||
"--disable-default-apps",
|
||||
"--disable-features=site-per-process",
|
||||
"--password-store=basic",
|
||||
"--use-mock-keychain",
|
||||
];
|
||||
|
||||
/// Launches a headless Chrome browser via Playwright, navigates to `url`, and
|
||||
/// polls until the element matched by `xpath` has a non-empty `src` attribute.
|
||||
pub async fn wait_for_src(url: &str, xpath: &str) -> Option<String> {
|
||||
wait_for_attribute(url, xpath, "src", DEFAULT_TIMEOUT).await
|
||||
}
|
||||
|
||||
/// Like [`wait_for_src`] but lets the caller choose the attribute name and timeout.
|
||||
pub async fn wait_for_attribute(
|
||||
url: &str,
|
||||
xpath: &str,
|
||||
attribute: &str,
|
||||
wait_timeout: Duration,
|
||||
) -> Option<String> {
|
||||
// _pw must be kept alive — dropping it kills the driver process.
|
||||
let (_pw, page) = open_page(url).await?;
|
||||
|
||||
let xpath_js = serde_json::to_string(xpath).unwrap_or_default();
|
||||
let attr_js = serde_json::to_string(attribute).unwrap_or_default();
|
||||
let js = format!(
|
||||
r#"() => {{
|
||||
var r = document.evaluate({xpath_js}, document, null,
|
||||
XPathResult.FIRST_ORDERED_NODE_TYPE, null);
|
||||
var el = r.singleNodeValue;
|
||||
if (!el) return '';
|
||||
return el.getAttribute({attr_js}) || el[{attr_js}] || '';
|
||||
}}"#
|
||||
);
|
||||
|
||||
let result = timeout(wait_timeout, async {
|
||||
loop {
|
||||
match page.evaluate::<(), String>(&js, ()).await {
|
||||
Ok(s) if !s.is_empty() => return Some(s),
|
||||
Ok(_) => {}
|
||||
Err(e) => eprintln!("[playwright] evaluate error: {e}"),
|
||||
}
|
||||
sleep(POLL_INTERVAL).await;
|
||||
}
|
||||
})
|
||||
.await
|
||||
.ok()
|
||||
.flatten();
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
// ── internals ─────────────────────────────────────────────────────────────────
|
||||
|
||||
async fn open_page(url: &str) -> Option<(Playwright, Page)> {
|
||||
let pw = Playwright::initialize()
|
||||
.await
|
||||
.map_err(|e| eprintln!("[playwright] init error: {e}"))
|
||||
.ok()?;
|
||||
|
||||
let args: Vec<String> = CHROME_ARGS.iter().map(|s| s.to_string()).collect();
|
||||
|
||||
let browser = pw
|
||||
.chromium()
|
||||
.launcher()
|
||||
.executable(Path::new("/usr/bin/google-chrome"))
|
||||
.headless(true)
|
||||
.args(&args)
|
||||
.launch()
|
||||
.await
|
||||
.map_err(|e| eprintln!("[playwright] launch error: {e}"))
|
||||
.ok()?;
|
||||
|
||||
let context = browser
|
||||
.context_builder()
|
||||
.build()
|
||||
.await
|
||||
.map_err(|e| eprintln!("[playwright] context error: {e}"))
|
||||
.ok()?;
|
||||
|
||||
let page = context
|
||||
.new_page()
|
||||
.await
|
||||
.map_err(|e| eprintln!("[playwright] new_page error: {e}"))
|
||||
.ok()?;
|
||||
|
||||
page.goto_builder(url)
|
||||
.goto()
|
||||
.await
|
||||
.map_err(|e| eprintln!("[playwright] goto error: {e}"))
|
||||
.ok()?;
|
||||
|
||||
Some((pw, page))
|
||||
}
|
||||
130
src/util/webdriver.rs
Normal file
130
src/util/webdriver.rs
Normal file
@@ -0,0 +1,130 @@
|
||||
use chromiumoxide::{Browser, BrowserConfig};
|
||||
use futures::StreamExt;
|
||||
use std::time::Duration;
|
||||
use tokio::time::{sleep, timeout};
|
||||
|
||||
const POLL_INTERVAL: Duration = Duration::from_millis(500);
|
||||
const DEFAULT_TIMEOUT: Duration = Duration::from_secs(30);
|
||||
|
||||
/// Launches a headless Chrome browser, navigates to `url`, and polls until the
|
||||
/// element matched by `xpath` has a non-empty `src` attribute, then returns it.
|
||||
///
|
||||
/// Equivalent to Selenium's:
|
||||
/// `WebDriverWait(driver, 30).until(lambda d: d.find_element(By.XPATH, xpath).get_attribute("src"))`
|
||||
pub async fn wait_for_src(url: &str, xpath: &str) -> Option<String> {
|
||||
wait_for_attribute(url, xpath, "src", DEFAULT_TIMEOUT).await
|
||||
}
|
||||
|
||||
/// Like [`wait_for_src`] but lets the caller choose the attribute name and timeout.
|
||||
pub async fn wait_for_attribute(
|
||||
url: &str,
|
||||
xpath: &str,
|
||||
attribute: &str,
|
||||
wait_timeout: Duration,
|
||||
) -> Option<String> {
|
||||
let (mut browser, page) = open_page(url).await?;
|
||||
|
||||
// Embed xpath and attribute as JSON strings so they are safely quoted inside
|
||||
// the JS template — avoids any injection from untrusted caller values.
|
||||
let xpath_js = serde_json::to_string(xpath).unwrap_or_default();
|
||||
let attr_js = serde_json::to_string(attribute).unwrap_or_default();
|
||||
let js = format!(
|
||||
r#"(function() {{
|
||||
var r = document.evaluate({xpath_js}, document, null,
|
||||
XPathResult.FIRST_ORDERED_NODE_TYPE, null);
|
||||
var el = r.singleNodeValue;
|
||||
if (!el) return '';
|
||||
return el.getAttribute({attr_js}) || el[{attr_js}] || '';
|
||||
}})()"#
|
||||
);
|
||||
|
||||
let result = timeout(wait_timeout, async {
|
||||
loop {
|
||||
match page.evaluate(js.as_str()).await {
|
||||
Ok(val) => {
|
||||
if let Ok(s) = val.into_value::<String>() {
|
||||
if !s.is_empty() {
|
||||
return Some(s);
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => eprintln!("[webdriver] evaluate error: {e}"),
|
||||
}
|
||||
sleep(POLL_INTERVAL).await;
|
||||
}
|
||||
})
|
||||
.await
|
||||
.ok()
|
||||
.flatten();
|
||||
|
||||
let _ = browser.close().await;
|
||||
result
|
||||
}
|
||||
|
||||
// ── internals ─────────────────────────────────────────────────────────────────
|
||||
|
||||
async fn open_page(
|
||||
url: &str,
|
||||
) -> Option<(Browser, chromiumoxide::Page)> {
|
||||
let config = BrowserConfig::builder()
|
||||
// ── headless & sandbox ────────────────────────────────────────
|
||||
.arg("--headless=new")
|
||||
.arg("--no-sandbox")
|
||||
.arg("--disable-dev-shm-usage")
|
||||
.arg("--no-first-run")
|
||||
.arg("--no-default-browser-check")
|
||||
// ── JS timer / scheduler: prevent Chrome from throttling
|
||||
// background timers and renderers (critical for player init) ──
|
||||
.arg("--disable-background-timer-throttling")
|
||||
.arg("--disable-backgrounding-occluded-windows")
|
||||
.arg("--disable-renderer-backgrounding")
|
||||
.arg("--disable-ipc-flooding-protection")
|
||||
.arg("--disable-hang-monitor")
|
||||
// ── skip rendering work we don't need ────────────────────────
|
||||
.arg("--disable-gpu")
|
||||
.arg("--disable-software-rasterizer")
|
||||
.arg("--disable-accelerated-2d-canvas")
|
||||
.arg("--disable-webgl")
|
||||
.arg("--disable-3d-apis")
|
||||
.arg("--blink-settings=imagesEnabled=false")
|
||||
.arg("--mute-audio")
|
||||
// ── cut background network / update noise ────────────────────
|
||||
.arg("--disable-background-networking")
|
||||
.arg("--disable-client-side-phishing-detection")
|
||||
.arg("--disable-component-update")
|
||||
.arg("--disable-domain-reliability")
|
||||
.arg("--disable-sync")
|
||||
.arg("--metrics-recording-only")
|
||||
.arg("--safebrowsing-disable-auto-update")
|
||||
// ── misc overhead ─────────────────────────────────────────────
|
||||
.arg("--disable-extensions")
|
||||
.arg("--disable-translate")
|
||||
.arg("--disable-default-apps")
|
||||
.arg("--disable-features=site-per-process")
|
||||
.arg("--password-store=basic")
|
||||
.arg("--use-mock-keychain")
|
||||
.build()
|
||||
.map_err(|e| eprintln!("[webdriver] BrowserConfig error: {e}"))
|
||||
.ok()?;
|
||||
|
||||
let (browser, mut handler) = Browser::launch(config)
|
||||
.await
|
||||
.map_err(|e| eprintln!("[webdriver] Browser launch error: {e}"))
|
||||
.ok()?;
|
||||
|
||||
tokio::spawn(async move {
|
||||
loop {
|
||||
if handler.next().await.is_none() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
let page = browser
|
||||
.new_page(url)
|
||||
.await
|
||||
.map_err(|e| eprintln!("[webdriver] new_page error: {e}"))
|
||||
.ok()?;
|
||||
|
||||
Some((browser, page))
|
||||
}
|
||||
Reference in New Issue
Block a user