diff --git a/src/providers/pimpbunny.rs b/src/providers/pimpbunny.rs index 01e5f47..719aa6d 100644 --- a/src/providers/pimpbunny.rs +++ b/src/providers/pimpbunny.rs @@ -2,6 +2,7 @@ use crate::DbPool; use crate::api::ClientVersion; use crate::providers::Provider; use crate::status::*; +use crate::util::parse_abbreviated_number; use crate::util::cache::VideoCache; use crate::util::discord::{format_error_chain, send_discord_error_report}; use crate::util::requester::Requester; @@ -10,7 +11,6 @@ use crate::videos::{ServerOptions, VideoFormat, VideoItem}; use async_trait::async_trait; use error_chain::error_chain; -use futures::future::join_all; use htmlentity::entity::{ICodedDataTrait, decode}; use std::sync::{Arc, RwLock}; use std::{thread, vec}; @@ -205,6 +205,32 @@ impl PimpbunnyProvider { ) } + fn is_allowed_detail_url(url: &str) -> bool { + let Some(url) = Url::parse(url).ok() else { + return false; + }; + if url.scheme() != "https" { + return false; + } + let Some(host) = url.host_str() else { + return false; + }; + matches!(host, "pimpbunny.com" | "www.pimpbunny.com") + && !url.path().starts_with("/contents/videos_screenshots/") + } + + fn proxied_video(&self, options: &ServerOptions, page_url: &str) -> String { + if page_url.is_empty() || !Self::is_allowed_detail_url(page_url) { + return page_url.to_string(); + } + + crate::providers::build_proxy_url( + options, + "pimpbunny", + &crate::providers::strip_url_scheme(page_url), + ) + } + fn root_referer(&self) -> String { format!("{}/", self.url.trim_end_matches('/')) } @@ -407,9 +433,7 @@ impl PimpbunnyProvider { return Ok(old_items); } }; - let video_items: Vec = self - .get_video_items_from_html(text.clone(), &mut requester) - .await; + let video_items = self.get_video_items_from_html(text.clone(), &options); if !video_items.is_empty() { cache.remove(&video_url); cache.insert(video_url.clone(), video_items.clone()); @@ -509,9 +533,7 @@ impl PimpbunnyProvider { return Ok(old_items); } }; - let video_items: Vec = self - .get_video_items_from_html(text.clone(), &mut requester) - .await; + let video_items = self.get_video_items_from_html(text.clone(), &options); if !video_items.is_empty() { cache.remove(&video_url); cache.insert(video_url.clone(), video_items.clone()); @@ -521,11 +543,7 @@ impl PimpbunnyProvider { Ok(video_items) } - async fn get_video_items_from_html( - &self, - html: String, - requester: &mut Requester, - ) -> Vec { + fn get_video_items_from_html(&self, html: String, options: &ServerOptions) -> Vec { if html.is_empty() || html.contains("404 Not Found") { return vec![]; } @@ -539,19 +557,42 @@ impl PimpbunnyProvider { None => return vec![], }; - let futures = block + block .split("
") .skip(1) - .map(|el| self.get_video_item(el.to_string(), requester.clone())); - - join_all(futures) - .await + .filter_map(|el| self.get_video_item(el.to_string(), options).ok()) .into_iter() - .filter_map(Result::ok) .collect() } - async fn get_video_item(&self, seg: String, mut requester: Requester) -> Result { + fn extract_duration_from_segment(&self, seg: &str) -> u32 { + for token in seg.split(|ch: char| ch == '<' || ch == '>' || ch.is_whitespace()) { + let candidate = token.trim(); + if candidate.is_empty() || !candidate.contains(':') { + continue; + } + if let Some(parsed) = parse_time_to_seconds(candidate) { + return parsed as u32; + } + } + 0 + } + + fn extract_views_from_segment(&self, seg: &str) -> u32 { + let Some(before_views) = seg.split("Views").next() else { + return 0; + }; + let candidate = before_views + .split(|ch: char| ch == '<' || ch == '>' || ch.is_whitespace()) + .filter(|value| !value.trim().is_empty()) + .next_back() + .unwrap_or("") + .trim_matches(|ch: char| ch == '(' || ch == ')' || ch == ','); + + parse_abbreviated_number(candidate).unwrap_or(0) + } + + fn get_video_item(&self, seg: String, options: &ServerOptions) -> Result { let video_url = seg .split(" href=\"") .nth(1) @@ -606,71 +647,22 @@ impl PimpbunnyProvider { .and_then(|s| s.split('"').next()) .unwrap_or("") .to_string(); - - let (tags, formats, views, duration) = - self.extract_media(&video_url, &mut requester).await?; + let proxy_url = self.proxied_video(options, &video_url); + let views = self.extract_views_from_segment(&seg); + let duration = self.extract_duration_from_segment(&seg); + let formats = vec![ + VideoFormat::new(proxy_url.clone(), "auto".into(), "video/mp4".into()) + .format_id("auto".into()) + .format_note("proxied".into()), + ]; Ok( - VideoItem::new(id, title, video_url, "pimpbunny".into(), thumb, duration) + VideoItem::new(id, title, proxy_url, "pimpbunny".into(), thumb, duration) .formats(formats) - .tags(tags) .preview(preview) .views(views), ) } - - async fn extract_media( - &self, - url: &str, - requester: &mut Requester, - ) -> Result<(Vec, Vec, u32, u32)> { - self.warm_root_session(requester).await; - let headers = self.headers_with_cookies(requester, url, &self.root_referer()); - let text = requester - .get_with_headers(url, headers, Some(Version::HTTP_2)) - .await - .map_err(|e| Error::from(format!("{}", e)))?; - - let json_str = text - .split("application/ld+json\">") - .nth(1) - .and_then(|s| s.split("").next()) - .ok_or_else(|| ErrorKind::Parse("ld+json".into()))?; - - let json: serde_json::Value = serde_json::from_str(json_str)?; - - let video_url = json["contentUrl"].as_str().unwrap_or("").to_string(); - let quality = video_url - .split('_') - .last() - .and_then(|s| s.split('.').next()) - .unwrap_or("") - .to_string(); - - let views = json["interactionStatistic"] - .as_array() - .and_then(|a| a.first()) - .and_then(|v| v["userInteractionCount"].as_str()) - .and_then(|v| v.parse().ok()) - .unwrap_or(0); - - let duration = json["duration"] - .as_str() - .map(|d| parse_time_to_seconds(&d.replace(['P', 'T', 'H', 'M', 'S'], "")).unwrap_or(0)) - .unwrap_or(0) as u32; - - let mut format = VideoFormat::new(video_url.clone(), quality, "video/mp4".into()) - .http_header("Referer".to_string(), url.to_string()) - .http_header( - "User-Agent".to_string(), - Self::FIREFOX_USER_AGENT.to_string(), - ); - if let Some(cookie) = requester.cookie_header_for_url(&video_url) { - format.add_http_header("Cookie".to_string(), cookie); - } - - Ok((vec![], vec![format], views, duration)) - } } #[async_trait] @@ -716,6 +708,7 @@ impl Provider for PimpbunnyProvider { mod tests { use super::PimpbunnyProvider; use crate::videos::ServerOptions; + use std::sync::{Arc, RwLock}; #[test] fn rewrites_allowed_thumbs_to_proxy_urls() { @@ -746,4 +739,85 @@ mod tests { "https://example.com/proxy/pimpbunny-thumb/pimpbunny.com/contents/videos_screenshots/517000/517329/800x450/1.jpg" ); } + + #[test] + fn rewrites_video_pages_to_redirect_proxy() { + let provider = PimpbunnyProvider::new(); + let options = ServerOptions { + featured: None, + category: None, + sites: None, + filter: None, + language: None, + public_url_base: Some("https://example.com".to_string()), + requester: None, + network: None, + stars: None, + categories: None, + duration: None, + sort: None, + sexuality: None, + }; + + let proxied = provider.proxied_video( + &options, + "https://pimpbunny.com/videos/example-video/", + ); + + assert_eq!( + proxied, + "https://example.com/proxy/pimpbunny/pimpbunny.com/videos/example-video/" + ); + } + + #[test] + fn parses_listing_without_detail_requests() { + let provider = PimpbunnyProvider { + url: "https://pimpbunny.com".to_string(), + stars: Arc::new(RwLock::new(vec![])), + categories: Arc::new(RwLock::new(vec![])), + }; + let options = ServerOptions { + featured: None, + category: None, + sites: None, + filter: None, + language: None, + public_url_base: Some("https://example.com".to_string()), + requester: None, + network: None, + stars: None, + categories: None, + duration: None, + sort: None, + sexuality: None, + }; + + let html = r#" +
+
+ + -pagination-wrapper + "#; + + let items = provider.get_video_items_from_html(html.to_string(), &options); + + assert_eq!(items.len(), 1); + assert_eq!( + items[0].url, + "https://example.com/proxy/pimpbunny/pimpbunny.com/videos/example-video/" + ); + assert_eq!(items[0].duration, 754); + assert_eq!(items[0].views, Some(1200)); + assert_eq!(items[0].formats.as_ref().map(|f| f.len()), Some(1)); + } } diff --git a/src/proxies/mod.rs b/src/proxies/mod.rs index 9176b89..be20024 100644 --- a/src/proxies/mod.rs +++ b/src/proxies/mod.rs @@ -1,5 +1,6 @@ use ntex::web; +use crate::proxies::pimpbunny::PimpbunnyProxy; use crate::proxies::porndish::PorndishProxy; use crate::proxies::spankbang::SpankbangProxy; use crate::{proxies::sxyprn::SxyprnProxy, util::requester::Requester}; @@ -8,6 +9,7 @@ pub mod hanimecdn; pub mod hqpornerthumb; pub mod javtiful; pub mod noodlemagazine; +pub mod pimpbunny; pub mod pimpbunnythumb; pub mod porndish; pub mod porndishthumb; @@ -18,6 +20,7 @@ pub mod sxyprn; pub enum AnyProxy { Sxyprn(SxyprnProxy), Javtiful(javtiful::JavtifulProxy), + Pimpbunny(PimpbunnyProxy), Porndish(PorndishProxy), Spankbang(SpankbangProxy), } @@ -31,6 +34,7 @@ impl Proxy for AnyProxy { match self { AnyProxy::Sxyprn(p) => p.get_video_url(url, requester).await, AnyProxy::Javtiful(p) => p.get_video_url(url, requester).await, + AnyProxy::Pimpbunny(p) => p.get_video_url(url, requester).await, AnyProxy::Porndish(p) => p.get_video_url(url, requester).await, AnyProxy::Spankbang(p) => p.get_video_url(url, requester).await, } diff --git a/src/proxies/pimpbunny.rs b/src/proxies/pimpbunny.rs new file mode 100644 index 0000000..b24b12d --- /dev/null +++ b/src/proxies/pimpbunny.rs @@ -0,0 +1,160 @@ +use ntex::web; +use serde_json::Value; +use url::Url; +use wreq::Version; + +use crate::util::requester::Requester; + +#[derive(Debug, Clone)] +pub struct PimpbunnyProxy {} + +impl PimpbunnyProxy { + const FIREFOX_USER_AGENT: &'static str = + "Mozilla/5.0 (X11; Linux x86_64; rv:147.0) Gecko/20100101 Firefox/147.0"; + const HTML_ACCEPT: &'static str = + "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8"; + + pub fn new() -> Self { + PimpbunnyProxy {} + } + + fn normalize_detail_url(url: &str) -> Option { + let normalized = if url.starts_with("http://") || url.starts_with("https://") { + url.to_string() + } else { + format!("https://{}", url.trim_start_matches('/')) + }; + + Self::is_allowed_detail_url(&normalized).then_some(normalized) + } + + fn is_allowed_detail_url(url: &str) -> bool { + let Some(url) = Url::parse(url).ok() else { + return false; + }; + if url.scheme() != "https" { + return false; + } + let Some(host) = url.host_str() else { + return false; + }; + + matches!(host, "pimpbunny.com" | "www.pimpbunny.com") + && !url.path().starts_with("/contents/videos_screenshots/") + } + + fn root_referer() -> &'static str { + "https://pimpbunny.com/" + } + + fn html_headers_with_referer(referer: &str) -> Vec<(String, String)> { + vec![ + ("Referer".to_string(), referer.to_string()), + ("User-Agent".to_string(), Self::FIREFOX_USER_AGENT.to_string()), + ("Accept".to_string(), Self::HTML_ACCEPT.to_string()), + ("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()), + ] + } + + fn headers_with_cookies( + requester: &Requester, + request_url: &str, + referer: &str, + ) -> Vec<(String, String)> { + let mut headers = Self::html_headers_with_referer(referer); + if let Some(cookie) = requester.cookie_header_for_url(request_url) { + headers.push(("Cookie".to_string(), cookie)); + } + headers + } + + async fn warm_root_session(requester: &mut Requester) { + let _ = requester + .get_with_headers( + Self::root_referer(), + Self::html_headers_with_referer(Self::root_referer()), + Some(Version::HTTP_11), + ) + .await; + } + + fn extract_json_ld_video(text: &str) -> Option { + let json_str = text + .split("application/ld+json\">") + .nth(1) + .and_then(|value| value.split("").next())?; + + serde_json::from_str(json_str).ok() + } + + fn extract_stream_url(json_ld: &Value) -> Option { + json_ld + .get("contentUrl") + .and_then(Value::as_str) + .map(str::trim) + .filter(|value| !value.is_empty()) + .map(str::to_string) + } +} + +impl crate::proxies::Proxy for PimpbunnyProxy { + async fn get_video_url(&self, url: String, requester: web::types::State) -> String { + let Some(detail_url) = Self::normalize_detail_url(&url) else { + return String::new(); + }; + + let mut requester = requester.get_ref().clone(); + Self::warm_root_session(&mut requester).await; + let headers = Self::headers_with_cookies(&requester, &detail_url, Self::root_referer()); + let text = match requester + .get_with_headers(&detail_url, headers, Some(Version::HTTP_2)) + .await + { + Ok(text) => text, + Err(_) => return String::new(), + }; + + let Some(json_ld) = Self::extract_json_ld_video(&text) else { + return String::new(); + }; + + Self::extract_stream_url(&json_ld).unwrap_or_default() + } +} + +#[cfg(test)] +mod tests { + use super::PimpbunnyProxy; + + #[test] + fn allows_only_pimpbunny_detail_urls() { + assert!(PimpbunnyProxy::is_allowed_detail_url( + "https://pimpbunny.com/videos/example-video/" + )); + assert!(PimpbunnyProxy::is_allowed_detail_url( + "https://www.pimpbunny.com/video/example/" + )); + assert!(!PimpbunnyProxy::is_allowed_detail_url( + "http://pimpbunny.com/videos/example-video/" + )); + assert!(!PimpbunnyProxy::is_allowed_detail_url( + "https://pimpbunny.com/contents/videos_screenshots/1/2/3.jpg" + )); + assert!(!PimpbunnyProxy::is_allowed_detail_url( + "https://example.com/videos/example-video/" + )); + } + + #[test] + fn extracts_content_url_from_json_ld() { + let html = r#" + + "#; + + let json_ld = PimpbunnyProxy::extract_json_ld_video(html).expect("json-ld should parse"); + assert_eq!( + PimpbunnyProxy::extract_stream_url(&json_ld).as_deref(), + Some("https://cdn.example/video.mp4") + ); + } +} diff --git a/src/proxy.rs b/src/proxy.rs index 05ba118..4413c96 100644 --- a/src/proxy.rs +++ b/src/proxy.rs @@ -1,6 +1,7 @@ use ntex::web::{self, HttpRequest}; use crate::proxies::javtiful::JavtifulProxy; +use crate::proxies::pimpbunny::PimpbunnyProxy; use crate::proxies::porndish::PorndishProxy; use crate::proxies::spankbang::SpankbangProxy; use crate::proxies::sxyprn::SxyprnProxy; @@ -28,6 +29,11 @@ pub fn config(cfg: &mut web::ServiceConfig) { .route(web::post().to(proxy2redirect)) .route(web::get().to(proxy2redirect)), ) + .service( + web::resource("/pimpbunny/{endpoint}*") + .route(web::post().to(proxy2redirect)) + .route(web::get().to(proxy2redirect)), + ) .service( web::resource("/noodlemagazine/{endpoint}*") .route(web::post().to(crate::proxies::noodlemagazine::serve_media)) @@ -79,6 +85,7 @@ fn get_proxy(proxy: &str) -> Option { match proxy { "sxyprn" => Some(AnyProxy::Sxyprn(SxyprnProxy::new())), "javtiful" => Some(AnyProxy::Javtiful(JavtifulProxy::new())), + "pimpbunny" => Some(AnyProxy::Pimpbunny(PimpbunnyProxy::new())), "porndish" => Some(AnyProxy::Porndish(PorndishProxy::new())), "spankbang" => Some(AnyProxy::Spankbang(SpankbangProxy::new())), _ => None,