use ntex::web; use regex::Regex; use serde_json::Value; use url::Url; use wreq::Version; use crate::util::requester::Requester; #[derive(Debug, Clone)] pub struct PimpbunnyProxy {} impl PimpbunnyProxy { const FIREFOX_USER_AGENT: &'static str = "Mozilla/5.0 (X11; Linux x86_64; rv:147.0) Gecko/20100101 Firefox/147.0"; const HTML_ACCEPT: &'static str = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8"; pub fn new() -> Self { PimpbunnyProxy {} } fn normalize_detail_url(url: &str) -> Option { let normalized = if url.starts_with("http://") || url.starts_with("https://") { url.to_string() } else { format!("https://{}", url.trim_start_matches('/')) }; Self::is_allowed_detail_url(&normalized).then_some(normalized) } fn is_allowed_detail_url(url: &str) -> bool { let Some(url) = Url::parse(url).ok() else { return false; }; if url.scheme() != "https" { return false; } let Some(host) = url.host_str() else { return false; }; matches!(host, "pimpbunny.com" | "www.pimpbunny.com") && !url.path().starts_with("/contents/videos_screenshots/") } fn root_referer() -> &'static str { "https://pimpbunny.com/" } fn html_headers_with_referer(referer: &str) -> Vec<(String, String)> { vec![ ("Referer".to_string(), referer.to_string()), ( "User-Agent".to_string(), Self::FIREFOX_USER_AGENT.to_string(), ), ("Accept".to_string(), Self::HTML_ACCEPT.to_string()), ("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()), ] } fn headers_with_cookies( requester: &Requester, request_url: &str, referer: &str, ) -> Vec<(String, String)> { let mut headers = Self::html_headers_with_referer(referer); if let Some(cookie) = requester.cookie_header_for_url(request_url) { headers.push(("Cookie".to_string(), cookie)); } headers } async fn warm_root_session(requester: &mut Requester) { let _ = requester .get_with_headers( Self::root_referer(), Self::html_headers_with_referer(Self::root_referer()), Some(Version::HTTP_11), ) .await; } fn extract_json_ld_video(text: &str) -> Option { let script_regex = Regex::new(r#"(?s)]+application/ld\+json[^>]*>(.*?)"#).ok()?; for captures in script_regex.captures_iter(text) { let raw = captures.get(1).map(|value| value.as_str().trim())?; let parsed: Value = serde_json::from_str(raw).ok()?; if let Some(video) = Self::find_video_object(&parsed) { return Some(video); } } None } fn find_video_object(parsed: &Value) -> Option { if parsed .get("@type") .and_then(Value::as_str) .is_some_and(|value| value == "VideoObject") { return Some(parsed.clone()); } if parsed .get("contentUrl") .and_then(Value::as_str) .is_some_and(|value| !value.trim().is_empty()) { return Some(parsed.clone()); } if let Some(graph) = parsed.get("@graph").and_then(Value::as_array) { for item in graph { if item .get("@type") .and_then(Value::as_str) .is_some_and(|value| value == "VideoObject") { return Some(item.clone()); } if item .get("contentUrl") .and_then(Value::as_str) .is_some_and(|value| !value.trim().is_empty()) { return Some(item.clone()); } } } if let Some(array) = parsed.as_array() { for item in array { if let Some(video) = Self::find_video_object(item) { return Some(video); } } } None } fn extract_stream_url(json_ld: &Value) -> Option { json_ld .get("contentUrl") .and_then(Value::as_str) .map(str::trim) .filter(|value| !value.is_empty()) .map(str::to_string) } fn extract_stream_url_from_html(text: &str) -> Option { Regex::new(r#""contentUrl"\s*:\s*"([^"]+)""#) .ok()? .captures(text) .and_then(|captures| captures.get(1)) .map(|value| value.as_str().trim().to_string()) .filter(|value| !value.is_empty()) } } impl crate::proxies::Proxy for PimpbunnyProxy { async fn get_video_url(&self, url: String, requester: web::types::State) -> String { let Some(detail_url) = Self::normalize_detail_url(&url) else { return String::new(); }; let mut requester = requester.get_ref().clone(); Self::warm_root_session(&mut requester).await; let headers = Self::headers_with_cookies(&requester, &detail_url, Self::root_referer()); let text = match requester .get_with_headers(&detail_url, headers, Some(Version::HTTP_2)) .await { Ok(text) => text, Err(_) => return String::new(), }; Self::extract_json_ld_video(&text) .and_then(|json_ld| Self::extract_stream_url(&json_ld)) .or_else(|| Self::extract_stream_url_from_html(&text)) .unwrap_or_default() } } #[cfg(test)] mod tests { use super::PimpbunnyProxy; #[test] fn allows_only_pimpbunny_detail_urls() { assert!(PimpbunnyProxy::is_allowed_detail_url( "https://pimpbunny.com/videos/example-video/" )); assert!(PimpbunnyProxy::is_allowed_detail_url( "https://www.pimpbunny.com/video/example/" )); assert!(!PimpbunnyProxy::is_allowed_detail_url( "http://pimpbunny.com/videos/example-video/" )); assert!(!PimpbunnyProxy::is_allowed_detail_url( "https://pimpbunny.com/contents/videos_screenshots/1/2/3.jpg" )); assert!(!PimpbunnyProxy::is_allowed_detail_url( "https://example.com/videos/example-video/" )); } #[test] fn extracts_content_url_from_json_ld() { let html = r#" "#; let json_ld = PimpbunnyProxy::extract_json_ld_video(html).expect("json-ld should parse"); assert_eq!( PimpbunnyProxy::extract_stream_url(&json_ld).as_deref(), Some("https://cdn.example/video.mp4") ); } #[test] fn extracts_video_object_from_graph_script() { let html = r#" "#; let json_ld = PimpbunnyProxy::extract_json_ld_video(html).expect("video object should parse"); assert_eq!( PimpbunnyProxy::extract_stream_url(&json_ld).as_deref(), Some("https://cdn.example/graph.mp4") ); } #[test] fn falls_back_to_raw_content_url_match() { let html = r#"{"contentUrl":"https://cdn.example/fallback.mp4"}"#; assert_eq!( PimpbunnyProxy::extract_stream_url_from_html(html).as_deref(), Some("https://cdn.example/fallback.mp4") ); } }