From 7680a93fab2366428eb8a4191b650f8b800ba9c6 Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 17 Mar 2026 09:17:28 +0000 Subject: [PATCH] pimpbunny thumb --- src/providers/pimpbunny.rs | 72 ++++++ src/proxies/mod.rs | 1 + src/proxies/pimpbunnythumb.rs | 97 +++++++ src/proxy.rs | 5 + src/util/requester.rs | 462 ++++++++++++---------------------- 5 files changed, 329 insertions(+), 308 deletions(-) create mode 100644 src/proxies/pimpbunnythumb.rs diff --git a/src/providers/pimpbunny.rs b/src/providers/pimpbunny.rs index 6dd8967..fc669ca 100644 --- a/src/providers/pimpbunny.rs +++ b/src/providers/pimpbunny.rs @@ -15,6 +15,7 @@ use htmlentity::entity::{ICodedDataTrait, decode}; use std::sync::{Arc, RwLock}; use std::{thread, vec}; use titlecase::Titlecase; +use url::Url; use wreq::Version; error_chain! { @@ -167,6 +168,32 @@ impl PimpbunnyProvider { } } + fn is_allowed_thumb_url(url: &str) -> bool { + let Some(url) = Url::parse(url).ok() else { + return false; + }; + if url.scheme() != "https" { + return false; + } + let Some(host) = url.host_str() else { + return false; + }; + matches!(host, "pimpbunny.com" | "www.pimpbunny.com") + && url.path().starts_with("/contents/videos_screenshots/") + } + + fn proxied_thumb(&self, options: &ServerOptions, thumb: &str) -> String { + if thumb.is_empty() || !Self::is_allowed_thumb_url(thumb) { + return thumb.to_string(); + } + + crate::providers::build_proxy_url( + options, + "pimpbunny-thumb", + &crate::providers::strip_url_scheme(thumb), + ) + } + async fn load_stars(base: &str, stars: Arc>>) -> Result<()> { let mut requester = Requester::new(); let text = requester @@ -558,6 +585,7 @@ impl Provider for PimpbunnyProvider { options: ServerOptions, ) -> Vec { let page = page.parse::().unwrap_or(1); + let thumb_options = options.clone(); let res = match query { Some(q) => self.to_owned().query(cache, page, &q, options).await, @@ -568,9 +596,53 @@ impl Provider for PimpbunnyProvider { eprintln!("pimpbunny error: {e}"); vec![] }) + .into_iter() + .map(|mut item| { + if !item.thumb.is_empty() { + item.thumb = self.proxied_thumb(&thumb_options, &item.thumb); + } + item + }) + .collect() } fn get_channel(&self, v: ClientVersion) -> Option { Some(self.build_channel(v)) } } + +#[cfg(test)] +mod tests { + use super::PimpbunnyProvider; + use crate::videos::ServerOptions; + + #[test] + fn rewrites_allowed_thumbs_to_proxy_urls() { + let provider = PimpbunnyProvider::new(); + let options = ServerOptions { + featured: None, + category: None, + sites: None, + filter: None, + language: None, + public_url_base: Some("https://example.com".to_string()), + requester: None, + network: None, + stars: None, + categories: None, + duration: None, + sort: None, + sexuality: None, + }; + + let proxied = provider.proxied_thumb( + &options, + "https://pimpbunny.com/contents/videos_screenshots/517000/517329/800x450/1.jpg", + ); + + assert_eq!( + proxied, + "https://example.com/proxy/pimpbunny-thumb/pimpbunny.com/contents/videos_screenshots/517000/517329/800x450/1.jpg" + ); + } +} diff --git a/src/proxies/mod.rs b/src/proxies/mod.rs index 4a5cd8a..9176b89 100644 --- a/src/proxies/mod.rs +++ b/src/proxies/mod.rs @@ -8,6 +8,7 @@ pub mod hanimecdn; pub mod hqpornerthumb; pub mod javtiful; pub mod noodlemagazine; +pub mod pimpbunnythumb; pub mod porndish; pub mod porndishthumb; pub mod spankbang; diff --git a/src/proxies/pimpbunnythumb.rs b/src/proxies/pimpbunnythumb.rs new file mode 100644 index 0000000..dfac189 --- /dev/null +++ b/src/proxies/pimpbunnythumb.rs @@ -0,0 +1,97 @@ +use ntex::http::header::{CONTENT_LENGTH, CONTENT_TYPE}; +use ntex::{ + http::Response, + web::{self, HttpRequest, error}, +}; +use url::Url; + +use crate::util::requester::Requester; + +fn is_allowed_thumb_url(url: &str) -> bool { + let Some(url) = Url::parse(url).ok() else { + return false; + }; + if url.scheme() != "https" { + return false; + } + let Some(host) = url.host_str() else { + return false; + }; + + matches!(host, "pimpbunny.com" | "www.pimpbunny.com") + && url.path().starts_with("/contents/videos_screenshots/") +} + +pub async fn get_image( + req: HttpRequest, + requester: web::types::State, +) -> Result { + let endpoint = req.match_info().query("endpoint").to_string(); + let image_url = if endpoint.starts_with("http://") || endpoint.starts_with("https://") { + endpoint + } else { + format!("https://{}", endpoint.trim_start_matches('/')) + }; + + if !is_allowed_thumb_url(&image_url) { + return Ok(web::HttpResponse::BadRequest().finish()); + } + + let upstream = match requester + .get_ref() + .clone() + .get_raw_with_headers( + image_url.as_str(), + vec![("Referer".to_string(), "https://pimpbunny.com/".to_string())], + ) + .await + { + Ok(response) if response.status().is_success() => response, + _ => return Ok(web::HttpResponse::NotFound().finish()), + }; + + let status = upstream.status(); + let headers = upstream.headers().clone(); + let bytes = upstream.bytes().await.map_err(error::ErrorBadGateway)?; + + let mut resp = Response::build(status); + + if let Some(ct) = headers.get(CONTENT_TYPE) { + if let Ok(ct_str) = ct.to_str() { + resp.set_header(CONTENT_TYPE, ct_str); + } + } + if let Some(cl) = headers.get(CONTENT_LENGTH) { + if let Ok(cl_str) = cl.to_str() { + resp.set_header(CONTENT_LENGTH, cl_str); + } + } + + Ok(resp.body(bytes.to_vec())) +} + +#[cfg(test)] +mod tests { + use super::is_allowed_thumb_url; + + #[test] + fn allows_expected_pimpbunny_thumb_paths() { + assert!(is_allowed_thumb_url( + "https://pimpbunny.com/contents/videos_screenshots/517000/517329/800x450/1.jpg" + )); + assert!(is_allowed_thumb_url( + "https://www.pimpbunny.com/contents/videos_screenshots/1/2/800x450/3.webp" + )); + } + + #[test] + fn rejects_non_thumb_or_non_pimpbunny_urls() { + assert!(!is_allowed_thumb_url("http://pimpbunny.com/contents/videos_screenshots/x.jpg")); + assert!(!is_allowed_thumb_url( + "https://pimpbunny.com/videos/example-video/" + )); + assert!(!is_allowed_thumb_url( + "https://example.com/contents/videos_screenshots/x.jpg" + )); + } +} diff --git a/src/proxy.rs b/src/proxy.rs index fa6e025..12ddc17 100644 --- a/src/proxy.rs +++ b/src/proxy.rs @@ -48,6 +48,11 @@ pub fn config(cfg: &mut web::ServiceConfig) { .route(web::post().to(crate::proxies::porndishthumb::get_image)) .route(web::get().to(crate::proxies::porndishthumb::get_image)), ); + cfg.service( + web::resource("/pimpbunny-thumb/{endpoint}*") + .route(web::post().to(crate::proxies::pimpbunnythumb::get_image)) + .route(web::get().to(crate::proxies::pimpbunnythumb::get_image)), + ); } async fn proxy2redirect( diff --git a/src/util/requester.rs b/src/util/requester.rs index 870ec42..1e6916a 100644 --- a/src/util/requester.rs +++ b/src/util/requester.rs @@ -1,137 +1,28 @@ -use base64::Engine; -use futures::TryStreamExt; -use ntex::http::header::{CONTENT_TYPE, COOKIE, HeaderMap, HeaderName, HeaderValue, USER_AGENT}; -use ntex::http::StatusCode; use serde::Serialize; use std::env; use std::fmt; -use std::io::Write; -use std::process::{Command, Stdio}; use std::sync::Arc; +use wreq::Client; +use wreq::Proxy; +use wreq::Response; use wreq::Version; -use wreq::cookie::{CookieStore, Cookies, Jar}; +use wreq::cookie::Jar; +use wreq::header::{HeaderMap, HeaderValue, USER_AGENT}; use wreq::multipart::Form; -use wreq::Uri; +use wreq::redirect::Policy; +use wreq_util::Emulation; use crate::util::flaresolverr::FlareSolverrRequest; use crate::util::flaresolverr::Flaresolverr; use crate::util::proxy; +// A Send + Sync error type for all async paths type AnyErr = Box; -const CURL_CFFI_SCRIPT: &str = r#" -import base64 -import json -import sys -from curl_cffi import requests - -def main(): - payload = json.load(sys.stdin) - headers = {k: v for k, v in payload.get("headers", [])} - body_b64 = payload.get("body_base64") - data = base64.b64decode(body_b64) if body_b64 else None - - kwargs = { - "method": payload["method"], - "url": payload["url"], - "headers": headers or None, - "timeout": payload.get("timeout_secs", 60), - "allow_redirects": payload.get("follow_redirects", True), - "verify": False, - "impersonate": payload.get("impersonate", "chrome"), - } - - proxy_url = payload.get("proxy_url") - if proxy_url: - kwargs["proxies"] = {"http": proxy_url, "https": proxy_url} - if data is not None: - kwargs["data"] = data - - response = requests.request(**kwargs) - - cookies = [] - cookie_jar = getattr(response.cookies, "jar", None) - if cookie_jar is not None: - for cookie in cookie_jar: - parts = [f"{cookie.name}={cookie.value}"] - if cookie.domain: - parts.append(f"Domain={cookie.domain}") - if cookie.path: - parts.append(f"Path={cookie.path}") - if cookie.secure: - parts.append("Secure") - cookies.append("; ".join(parts)) - - meta = { - "status": response.status_code, - "headers": list(response.headers.items()), - "cookies": cookies, - } - sys.stderr.write(json.dumps(meta)) - sys.stdout.buffer.write(response.content) - -if __name__ == "__main__": - try: - main() - except Exception as exc: - sys.stderr.write(json.dumps({"error": str(exc)})) - sys.exit(1) -"#; - -#[derive(Debug, Clone)] -pub struct Response { - status: StatusCode, - headers: HeaderMap, - body: Vec, -} - -impl Response { - pub fn status(&self) -> StatusCode { - self.status - } - - pub fn headers(&self) -> &HeaderMap { - &self.headers - } - - pub async fn text(self) -> Result { - String::from_utf8(self.body).map_err(|error| error.into()) - } - - pub async fn bytes(self) -> Result, AnyErr> { - Ok(self.body) - } - - pub async fn json(self) -> Result - where - T: serde::de::DeserializeOwned, - { - Ok(serde_json::from_slice(&self.body)?) - } -} - -#[derive(Serialize)] -struct PythonRequestPayload<'a> { - method: &'a str, - url: &'a str, - headers: Vec<(String, String)>, - body_base64: Option, - follow_redirects: bool, - timeout_secs: u64, - proxy_url: Option, - impersonate: &'a str, -} - -#[derive(serde::Deserialize)] -struct PythonResponseMeta { - status: Option, - headers: Option>, - cookies: Option>, - error: Option, -} - #[derive(serde::Serialize, serde::Deserialize, Clone)] pub struct Requester { + #[serde(skip)] + client: Client, #[serde(skip)] cookie_jar: Arc, proxy: bool, @@ -150,9 +41,31 @@ impl fmt::Debug for Requester { } impl Requester { + fn build_client(cookie_jar: Arc, user_agent: Option<&str>) -> Client { + let mut builder = Client::builder() + .cert_verification(false) + .emulation(Emulation::Firefox146) + .cookie_provider(cookie_jar) + .redirect(Policy::default()); + + if let Some(user_agent) = user_agent { + let mut headers = HeaderMap::new(); + if let Ok(value) = HeaderValue::from_str(user_agent) { + headers.insert(USER_AGENT, value); + builder = builder.default_headers(headers); + } + } + + builder.build().expect("Failed to create HTTP client") + } + pub fn new() -> Self { + let cookie_jar = Arc::new(Jar::default()); + let client = Self::build_client(cookie_jar.clone(), None); + let requester = Requester { - cookie_jar: Arc::new(Jar::default()), + client, + cookie_jar, proxy: false, flaresolverr_session: None, user_agent: None, @@ -170,163 +83,41 @@ impl Requester { self.proxy = proxy; } - fn cookie_headers(&self, url: &str) -> Vec<(String, String)> { - let Ok(uri) = url.parse::() else { - return vec![]; - }; + pub async fn get_raw(&mut self, url: &str) -> Result { + let client = Self::build_client(self.cookie_jar.clone(), self.user_agent.as_deref()); - match self.cookie_jar.cookies(&uri) { - Cookies::Compressed(value) => value - .to_str() - .ok() - .map(|value| vec![(COOKIE.to_string(), value.to_string())]) - .unwrap_or_default(), - Cookies::Uncompressed(values) => values - .into_iter() - .filter_map(|value| value.to_str().ok().map(|value| (COOKIE.to_string(), value.to_string()))) - .collect(), - Cookies::Empty => vec![], - _ => vec![], - } - } + let mut request = client.get(url).version(Version::HTTP_11); - fn merged_headers( - &self, - url: &str, - headers: Vec<(String, String)>, - ensure_json: bool, - ) -> Vec<(String, String)> { - let mut merged = headers; - - if ensure_json - && !merged - .iter() - .any(|(key, _)| key.eq_ignore_ascii_case(CONTENT_TYPE.as_str())) - { - merged.push((CONTENT_TYPE.to_string(), "application/json".to_string())); - } - - if let Some(user_agent) = &self.user_agent { - if !merged - .iter() - .any(|(key, _)| key.eq_ignore_ascii_case(USER_AGENT.as_str())) - { - merged.push((USER_AGENT.to_string(), user_agent.clone())); + if self.proxy { + if let Ok(proxy_url) = env::var("BURP_URL") { + let proxy = Proxy::all(&proxy_url).unwrap(); + request = request.proxy(proxy); } } - let has_cookie = merged - .iter() - .any(|(key, _)| key.eq_ignore_ascii_case(COOKIE.as_str())); - if !has_cookie { - merged.extend(self.cookie_headers(url)); - } - - merged - } - - async fn run_python_request( - &mut self, - method: &str, - url: &str, - headers: Vec<(String, String)>, - body: Option>, - follow_redirects: bool, - ) -> Result { - let headers = self.merged_headers(url, headers, false); - let proxy_url = if self.proxy { - env::var("BURP_URL").ok() - } else { - None - }; - - let payload = PythonRequestPayload { - method, - url, - headers, - body_base64: body.map(|body| base64::engine::general_purpose::STANDARD.encode(body)), - follow_redirects, - timeout_secs: 60, - proxy_url, - impersonate: "chrome", - }; - - let payload = serde_json::to_vec(&payload)?; - - let output = tokio::task::spawn_blocking(move || -> Result { - let mut command = Command::new("python3"); - command - .arg("-c") - .arg(CURL_CFFI_SCRIPT) - .stdin(Stdio::piped()) - .stdout(Stdio::piped()) - .stderr(Stdio::piped()); - - let mut child = command.spawn()?; - if let Some(stdin) = child.stdin.as_mut() { - stdin.write_all(&payload)?; - } - Ok(child.wait_with_output()?) - }) - .await - .map_err(|error| -> AnyErr { format!("spawn_blocking failed: {error}").into() })??; - - let meta_text = String::from_utf8_lossy(&output.stderr).trim().to_string(); - let meta: PythonResponseMeta = serde_json::from_str(&meta_text) - .map_err(|error| format!("failed to parse curl_cffi metadata: {error}; stderr={meta_text}"))?; - - if !output.status.success() { - let error = meta - .error - .unwrap_or_else(|| format!("curl_cffi request failed for {method} {url}")); - return Err(error.into()); - } - - for cookie in meta.cookies.unwrap_or_default() { - self.cookie_jar.add_cookie_str(&cookie, url); - } - - let status = StatusCode::from_u16(meta.status.unwrap_or(500)) - .map_err(|error| format!("invalid status code from curl_cffi: {error}"))?; - - let mut response_headers = HeaderMap::new(); - for (key, value) in meta.headers.unwrap_or_default() { - let Ok(name) = HeaderName::try_from(key.as_str()) else { - continue; - }; - let Ok(value) = HeaderValue::from_str(&value) else { - continue; - }; - response_headers.append(name, value); - } - - Ok(Response { - status, - headers: response_headers, - body: output.stdout, - }) - } - - async fn multipart_to_body(form: Form) -> Result<(Vec, String), AnyErr> { - let boundary = form.boundary().to_string(); - let chunks: Vec<_> = form.into_stream().try_collect().await?; - let mut body = Vec::new(); - for chunk in chunks { - body.extend_from_slice(&chunk); - } - Ok((body, format!("multipart/form-data; boundary={boundary}"))) - } - - pub async fn get_raw(&mut self, url: &str) -> Result { - self.run_python_request("GET", url, vec![], None, false).await + request.send().await } pub async fn get_raw_with_headers( &mut self, url: &str, headers: Vec<(String, String)>, - ) -> Result { - self.run_python_request("GET", url, headers, None, true).await + ) -> Result { + let client = Self::build_client(self.cookie_jar.clone(), self.user_agent.as_deref()); + + let mut request = client.get(url).version(Version::HTTP_11); + + if self.proxy { + if let Ok(proxy_url) = env::var("BURP_URL") { + let proxy = Proxy::all(&proxy_url).unwrap(); + request = request.proxy(proxy); + } + } + // Set custom headers + for (key, value) in headers.iter() { + request = request.header(key, value); + } + request.send().await } pub async fn post_json( @@ -334,21 +125,25 @@ impl Requester { url: &str, data: &S, headers: Vec<(String, String)>, - ) -> Result + ) -> Result where S: Serialize + ?Sized, { - let mut headers = self.merged_headers(url, headers, true); - if !headers - .iter() - .any(|(key, _)| key.eq_ignore_ascii_case(CONTENT_TYPE.as_str())) - { - headers.push((CONTENT_TYPE.to_string(), "application/json".to_string())); + let mut request = self.client.post(url).version(Version::HTTP_11).json(data); + + // Set custom headers + for (key, value) in headers.iter() { + request = request.header(key, value); } - let body = serde_json::to_vec(data)?; - self.run_python_request("POST", url, headers, Some(body), true) - .await + if self.proxy { + if let Ok(proxy_url) = env::var("BURP_URL") { + let proxy = Proxy::all(&proxy_url).unwrap(); + request = request.proxy(proxy); + } + } + + request.send().await } pub async fn post( @@ -356,14 +151,26 @@ impl Requester { url: &str, data: &str, headers: Vec<(&str, &str)>, - ) -> Result { - let headers = headers - .into_iter() - .map(|(key, value)| (key.to_string(), value.to_string())) - .collect::>(); + ) -> Result { + let mut request = self + .client + .post(url) + .version(Version::HTTP_11) + .body(data.to_string()); - self.run_python_request("POST", url, headers, Some(data.as_bytes().to_vec()), true) - .await + // Set custom headers + for (key, value) in headers.iter() { + request = request.header(key.to_string(), value.to_string()); + } + + if self.proxy { + if let Ok(proxy_url) = env::var("BURP_URL") { + let proxy = Proxy::all(&proxy_url).unwrap(); + request = request.proxy(proxy); + } + } + + request.send().await } pub async fn post_multipart( @@ -372,18 +179,27 @@ impl Requester { form: Form, headers: Vec<(String, String)>, _http_version: Option, - ) -> Result { - let (body, content_type) = Self::multipart_to_body(form).await?; - let mut headers = headers; - if !headers - .iter() - .any(|(key, _)| key.eq_ignore_ascii_case(CONTENT_TYPE.as_str())) - { - headers.push((CONTENT_TYPE.to_string(), content_type)); + ) -> Result { + let http_version = match _http_version { + Some(v) => v, + None => Version::HTTP_11, + }; + + let mut request = self.client.post(url).multipart(form).version(http_version); + + // Set custom headers + for (key, value) in headers.iter() { + request = request.header(key, value); } - self.run_python_request("POST", url, headers, Some(body), true) - .await + if self.proxy { + if let Ok(proxy_url) = env::var("BURP_URL") { + let proxy = Proxy::all(&proxy_url).unwrap(); + request = request.proxy(proxy); + } + } + + request.send().await } pub async fn get( @@ -400,15 +216,25 @@ impl Requester { headers: Vec<(String, String)>, _http_version: Option, ) -> Result { + let http_version = match _http_version { + Some(v) => v, + None => Version::HTTP_11, + }; loop { - let response = self - .run_python_request("GET", url, headers.clone(), None, true) - .await?; - - if response.status().is_success() || response.status().as_u16() == 404 { - return response.text().await; + let mut request = self.client.get(url).version(http_version); + for (key, value) in headers.iter() { + request = request.header(key, value); + } + if self.proxy { + if let Ok(proxy_url) = env::var("BURP_URL") { + let proxy = Proxy::all(&proxy_url).unwrap(); + request = request.proxy(proxy); + } + } + let response = request.send().await?; + if response.status().is_success() || response.status().as_u16() == 404 { + return Ok(response.text().await?); } - if response.status().as_u16() == 429 { tokio::time::sleep(std::time::Duration::from_secs(1)).await; continue; @@ -422,6 +248,8 @@ impl Requester { } } + // If direct request failed, try FlareSolverr. Map its error to a Send+Sync error immediately, + // so no non-Send error value lives across later `.await`s. let flare_url = match env::var("FLARE_URL") { Ok(url) => url, Err(e) => return Err(format!("FLARE_URL not set: {e}").into()), @@ -440,21 +268,39 @@ impl Requester { .await .map_err(|e| -> AnyErr { format!("Failed to solve FlareSolverr: {e}").into() })?; + // Rebuild client and apply UA/cookies from FlareSolverr let cookie_origin = url.split('/').take(3).collect::>().join("/"); - self.user_agent = Some(res.solution.userAgent); - for cookie in res.solution.cookies { - self.cookie_jar - .add_cookie_str(&format!("{}={}", cookie.name, cookie.value), &cookie_origin); + let useragent = res.solution.userAgent; + self.user_agent = Some(useragent); + + if url::Url::parse(&cookie_origin).is_ok() { + for cookie in res.solution.cookies { + self.cookie_jar + .add_cookie_str(&format!("{}={}", cookie.name, cookie.value), &cookie_origin); + } } - let response = self - .run_python_request("GET", url, headers, None, true) - .await?; + self.client = Self::build_client(self.cookie_jar.clone(), self.user_agent.as_deref()); + + // Retry the original URL with the updated client & (optional) proxy + let mut request = self.client.get(url).version(Version::HTTP_11); + for (key, value) in headers.iter() { + request = request.header(key, value); + } + if self.proxy { + if let Ok(proxy_url) = env::var("BURP_URL") { + let proxy = Proxy::all(&proxy_url).unwrap(); + request = request.proxy(proxy); + } + } + + let response = request.send().await?; if response.status().is_success() { - return response.text().await; + return Ok(response.text().await?); } + // Fall back to FlareSolverr-provided body Ok(res.solution.response) } }