diff --git a/build.rs b/build.rs index 5d3c296..e83e489 100644 --- a/build.rs +++ b/build.rs @@ -301,6 +301,11 @@ const PROVIDERS: &[ProviderDef] = &[ module: "thaiporntv", ty: "ThaipornTvProvider", }, + ProviderDef { + id: "allpornstream", + module: "allpornstream", + ty: "AllPornStreamProvider", + }, ]; fn main() { diff --git a/docs/provider-catalog.md b/docs/provider-catalog.md index e85774e..43984bb 100644 --- a/docs/provider-catalog.md +++ b/docs/provider-catalog.md @@ -7,6 +7,7 @@ This is the current implementation inventory as of this snapshot of the repo. Us | Provider | Group | `/api/uploaders` | Uses local `/proxy` | Notes | | --- | --- | --- | --- | --- | | `all` | `meta-search` | no | no | Aggregates all compiled providers. | +| `allpornstream` | `mainstream-tube` | no | yes | Next.js App Router scraper; extracts cards via `data-thumb-id/href/title/images` attributes; redirect proxy lazy-resolves VOE/DoodStream/StreamTape/FileMoon embeds. | | `archivebate` | `live-cams` | no | no | Livewire-backed cam archive listings with platform/gender/profile shortcuts. | | `beeg` | `mainstream-tube` | no | no | Basic mainstream tube pattern. | | `blowjobspro` | `mainstream-tube` | no | no | KVS-style HTML provider with async search pagination and category shortcut routing. | @@ -80,6 +81,7 @@ These resolve a provider-specific input into a `302 Location`. - `/proxy/pornhd3x/{endpoint}*` - `/proxy/shooshtime/{endpoint}*` - `/proxy/pimpbunny/{endpoint}*` +- `/proxy/allpornstream/{endpoint}*` ### Media/image proxies diff --git a/src/providers/allpornstream.rs b/src/providers/allpornstream.rs new file mode 100644 index 0000000..d4dbc32 --- /dev/null +++ b/src/providers/allpornstream.rs @@ -0,0 +1,598 @@ +use crate::DbPool; +use crate::api::ClientVersion; +use crate::providers::{ + Provider, build_proxy_url, report_provider_error, requester_or_default, strip_url_scheme, +}; +use crate::status::*; +use crate::util::cache::VideoCache; +use crate::videos::{ServerOptions, VideoFormat, VideoItem}; +use async_trait::async_trait; +use chrono::DateTime; +use error_chain::error_chain; +use percent_encoding::{NON_ALPHANUMERIC, utf8_percent_encode}; +use regex::Regex; +use scraper::{Html, Selector}; +use std::collections::HashMap; + +pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata = + crate::providers::ProviderChannelMetadata { + group_id: "mainstream-tube", + tags: &["tube", "aggregator", "mixed"], + }; + +const BASE_URL: &str = "https://allpornstream.com"; +const CHANNEL_ID: &str = "allpornstream"; +const BROWSER_UA: &str = + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"; + +error_chain! { + foreign_links { + Io(std::io::Error); + HttpRequest(wreq::Error); + } +} + +#[derive(Debug, Clone)] +enum Target { + Latest { page: u32 }, + Search { query: String, page: u32 }, + Category { slug: String, page: u32 }, + Producer { slug: String, page: u32 }, + Actor { slug: String, page: u32 }, +} + +#[derive(Debug, Clone)] +pub struct AllPornStreamProvider {} + +impl AllPornStreamProvider { + pub fn new() -> Self { + Self {} + } + + fn build_channel(&self, _clientversion: ClientVersion) -> Channel { + Channel { + id: CHANNEL_ID.to_string(), + name: "All Porn Stream".to_string(), + description: "Free HD porn videos aggregated from major studios and independent creators.".to_string(), + premium: false, + favicon: "https://www.google.com/s2/favicons?sz=64&domain=allpornstream.com".to_string(), + status: "active".to_string(), + categories: vec![], + options: vec![ + ChannelOption { + id: "sort".to_string(), + title: "Sort".to_string(), + description: "Browse the latest feed.".to_string(), + systemImage: "list.number".to_string(), + colorName: "blue".to_string(), + options: vec![FilterOption { + id: "new".to_string(), + title: "Latest".to_string(), + }], + multiSelect: false, + }, + ChannelOption { + id: "sites".to_string(), + title: "Producer".to_string(), + description: "Jump directly to a studio or producer page. Use the slug from the URL (e.g. brazzers).".to_string(), + systemImage: "building.2".to_string(), + colorName: "purple".to_string(), + options: vec![FilterOption { + id: "all".to_string(), + title: "All".to_string(), + }], + multiSelect: false, + }, + ], + nsfw: true, + cacheDuration: Some(1800), + } + } + + fn build_url(target: &Target) -> String { + match target { + Target::Latest { page } => { + if *page > 1 { + format!("{BASE_URL}/?page={page}") + } else { + BASE_URL.to_string() + } + } + Target::Search { query, page } => { + let encoded = + utf8_percent_encode(query, NON_ALPHANUMERIC).to_string(); + if *page > 1 { + format!("{BASE_URL}/?search={encoded}&page={page}") + } else { + format!("{BASE_URL}/?search={encoded}") + } + } + Target::Category { slug, page } => { + if *page > 1 { + format!("{BASE_URL}/categories/{slug}?page={page}") + } else { + format!("{BASE_URL}/categories/{slug}") + } + } + Target::Producer { slug, page } => { + if *page > 1 { + format!("{BASE_URL}/producers/{slug}?page={page}") + } else { + format!("{BASE_URL}/producers/{slug}") + } + } + Target::Actor { slug, page } => { + if *page > 1 { + format!("{BASE_URL}/actors/{slug}?page={page}") + } else { + format!("{BASE_URL}/actors/{slug}") + } + } + } + } + + fn parse_duration(text: &str) -> u32 { + let parts: Vec = text + .trim() + .split(':') + .filter_map(|p| p.parse::().ok()) + .collect(); + match parts.as_slice() { + [m, s] => m * 60 + s, + [h, m, s] => h * 3600 + m * 60 + s, + _ => 0, + } + } + + fn parse_uploaded_at(dt: &str) -> Option { + DateTime::parse_from_rfc3339(dt) + .ok() + .map(|d| d.timestamp() as u64) + } + + fn extract_first_image(data_images: &str) -> String { + // data_images is a JSON array, already HTML-decoded by scraper's parser. + // Find the first https:// URL in it. + if let Some(start) = data_images.find("https://") { + let rest = &data_images[start..]; + if let Some(end) = rest.find('"') { + return rest[..end].to_string(); + } + } + String::new() + } + + fn slug_to_title(slug: &str) -> String { + slug.split(['-', '_']) + .filter(|s| !s.is_empty()) + .map(|s| { + let mut chars = s.chars(); + match chars.next() { + None => String::new(), + Some(f) => format!("{}{}", f.to_uppercase(), chars.collect::()), + } + }) + .collect::>() + .join(" ") + } + + fn parse_listing(&self, html: &str, options: &ServerOptions) -> Vec { + let document = Html::parse_document(html); + + let card_sel = match Selector::parse("[data-thumb-id][data-href][data-title][data-images]") + { + Ok(s) => s, + Err(_) => return vec![], + }; + let time_sel = match Selector::parse("time[datetime]") { + Ok(s) => s, + Err(_) => return vec![], + }; + let studio_sel = match Selector::parse("[data-ga-category='thumbnail_studio']") { + Ok(s) => s, + Err(_) => return vec![], + }; + let actor_sel = match Selector::parse("[data-ga-category='thumbnail_actor']") { + Ok(s) => s, + Err(_) => return vec![], + }; + + // Duration: span with class starting "absolute bottom-2" containing a time string + let dur_re = match Regex::new( + r#"]*class="absolute[^"]*"[^>]*>(\d+:\d{2}(?::\d{2})?)"#, + ) { + Ok(r) => r, + Err(_) => return vec![], + }; + + // Views: number directly after the eye-icon SVG closing tag + let views_re = + match Regex::new(r"\s*(\d+)\s*") { + Ok(r) => r, + Err(_) => return vec![], + }; + + let mut items = Vec::new(); + + for card in document.select(&card_sel) { + let uuid = match card.value().attr("data-thumb-id") { + Some(v) if !v.is_empty() => v.to_string(), + _ => continue, + }; + + let href = match card.value().attr("data-href") { + Some(v) if v.starts_with('/') => v.to_string(), + _ => continue, + }; + + let title = match card.value().attr("data-title") { + Some(v) if !v.is_empty() => v.to_string(), + _ => continue, + }; + + let images_raw = card.value().attr("data-images").unwrap_or_default(); + let thumb = Self::extract_first_image(images_raw); + + let card_html = card.html(); + + // Duration from the overlay span + let duration = dur_re + .captures(&card_html) + .and_then(|c| c.get(1)) + .map(|m| Self::parse_duration(m.as_str())) + .unwrap_or(0); + + // Views from after the eye icon SVG + let views = views_re + .captures(&card_html) + .and_then(|c| c.get(1)) + .and_then(|m| m.as_str().parse::().ok()); + + // video.url is the stable page URL; the proxy URL goes into formats so the + // client can supply the required Referer header alongside the stream request. + let detail_url = format!("{BASE_URL}{href}"); + let proxy_target = strip_url_scheme(&detail_url); + let proxy_url = build_proxy_url(options, CHANNEL_ID, &proxy_target); + + let mut item = VideoItem::new( + uuid, + title, + detail_url.clone(), + CHANNEL_ID.to_string(), + thumb, + duration, + ); + + if !proxy_url.is_empty() { + let mut format = + VideoFormat::new(proxy_url, "auto".to_string(), "video/mp4".to_string()); + format.add_http_header("Referer".to_string(), detail_url.clone()); + item = item.formats(vec![format]); + } + + // Upload date + if let Some(time_el) = card.select(&time_sel).next() { + if let Some(dt) = time_el.value().attr("datetime") { + if let Some(ts) = Self::parse_uploaded_at(dt) { + item = item.uploaded_at(ts); + } + } + } + + if let Some(v) = views { + item = item.views(v); + } + + // Producer / studio + if let Some(studio_link) = card.select(&studio_sel).next() { + let label = studio_link + .value() + .attr("aria-label") + .unwrap_or_default(); + // "producer: ONLY FANS" → "ONLY FANS" + let raw_name = label + .strip_prefix("producer: ") + .unwrap_or_default() + .trim() + .to_string(); + if !raw_name.is_empty() { + let producer_href = + studio_link.value().attr("href").unwrap_or_default(); + let slug = producer_href + .trim_start_matches("/producers/") + .to_string(); + let display = Self::slug_to_title(&raw_name.to_lowercase().replace(' ', "-")); + item = item.uploader(display.clone()); + if !slug.is_empty() { + item = item.uploader_url(format!("{BASE_URL}/producers/{slug}")); + item.uploaderId = Some(format!("{CHANNEL_ID}:{slug}")); + } + } + } + + // Actors as tags – deduplicate by href, keep the longest text per actor + let mut actor_map: HashMap = HashMap::new(); + for actor_link in card.select(&actor_sel) { + let actor_href = actor_link + .value() + .attr("href") + .unwrap_or_default() + .to_string(); + let text = actor_link + .text() + .collect::() + .trim() + .to_string(); + if !actor_href.is_empty() && !text.is_empty() { + actor_map + .entry(actor_href) + .and_modify(|v| { + if text.len() > v.len() { + *v = text.clone(); + } + }) + .or_insert(text); + } + } + let mut actors: Vec = actor_map.into_values().collect(); + actors.sort(); + if !actors.is_empty() { + item = item.tags(actors); + } + + items.push(item); + } + + items + } + + fn resolve_target(query: &str, _sort: &str, page: u32, options: &ServerOptions) -> Target { + // Explicit shortcuts: "actor:slug", "producer:slug", "category:slug" + if let Some(slug) = query.strip_prefix("actor:") { + return Target::Actor { + slug: slug.to_string(), + page, + }; + } + if let Some(slug) = query.strip_prefix("producer:") { + return Target::Producer { + slug: slug.to_string(), + page, + }; + } + if let Some(slug) = query.strip_prefix("category:") { + return Target::Category { + slug: slug.to_string(), + page, + }; + } + + // Keyword search + if !query.is_empty() { + return Target::Search { + query: query.to_string(), + page, + }; + } + + // Producer filter from options.sites + if let Some(sites) = &options.sites { + let sites = sites.trim(); + if !sites.is_empty() && sites != "all" { + return Target::Producer { + slug: sites.to_string(), + page, + }; + } + } + + Target::Latest { page } + } + + async fn fetch_and_parse( + &self, + cache: VideoCache, + target: Target, + options: ServerOptions, + ) -> Result> { + let url = Self::build_url(&target); + + if let Some((time, items)) = cache.get(&url) { + if time.elapsed().unwrap_or_default().as_secs() < 300 { + return Ok(items.clone()); + } + } + + let mut requester = requester_or_default(&options, CHANNEL_ID, "fetch_and_parse"); + + let html = requester + .get_with_headers( + &url, + vec![ + ("user-agent".to_string(), BROWSER_UA.to_string()), + ("accept".to_string(), "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8".to_string()), + ("accept-language".to_string(), "en-US,en;q=0.5".to_string()), + ], + Some(wreq::Version::HTTP_11), + ) + .await + .map_err(|e| Error::from(format!("request failed url={url}: {e}")))?; + + if html.is_empty() { + return Ok(vec![]); + } + + let items = self.parse_listing(&html, &options); + + if !items.is_empty() { + cache.insert(url, items.clone()); + } + + Ok(items) + } +} + +#[async_trait] +impl Provider for AllPornStreamProvider { + async fn get_videos( + &self, + cache: VideoCache, + pool: DbPool, + sort: String, + query: Option, + page: String, + per_page: String, + options: ServerOptions, + ) -> Vec { + let _ = pool; + let _ = per_page; + let page = page.parse::().unwrap_or(1); + let query_str = query.unwrap_or_default(); + let target = Self::resolve_target(&query_str, &sort, page, &options); + + match self.fetch_and_parse(cache, target, options).await { + Ok(items) => items, + Err(e) => { + report_provider_error(CHANNEL_ID, "get_videos", &e.to_string()).await; + vec![] + } + } + } + + fn get_channel(&self, clientversion: ClientVersion) -> Option { + Some(self.build_channel(clientversion)) + } +} + +#[cfg(test)] +mod tests { + use super::AllPornStreamProvider; + use crate::videos::ServerOptions; + + fn make_options() -> ServerOptions { + ServerOptions { + featured: None, + category: None, + sites: None, + filter: None, + language: None, + public_url_base: Some("http://127.0.0.1:18080".to_string()), + requester: None, + network: None, + stars: None, + categories: None, + duration: None, + sort: None, + sexuality: None, + } + } + + #[test] + fn builds_latest_urls() { + assert_eq!( + AllPornStreamProvider::build_url(&super::Target::Latest { page: 1 }), + "https://allpornstream.com" + ); + assert_eq!( + AllPornStreamProvider::build_url(&super::Target::Latest { page: 2 }), + "https://allpornstream.com/?page=2" + ); + } + + #[test] + fn builds_search_urls() { + assert_eq!( + AllPornStreamProvider::build_url(&super::Target::Search { + query: "brazzers".to_string(), + page: 1 + }), + "https://allpornstream.com/?search=brazzers" + ); + assert_eq!( + AllPornStreamProvider::build_url(&super::Target::Search { + query: "big tits".to_string(), + page: 2 + }), + "https://allpornstream.com/?search=big%20tits&page=2" + ); + } + + #[test] + fn builds_producer_urls() { + assert_eq!( + AllPornStreamProvider::build_url(&super::Target::Producer { + slug: "brazzers".to_string(), + page: 1 + }), + "https://allpornstream.com/producers/brazzers" + ); + assert_eq!( + AllPornStreamProvider::build_url(&super::Target::Producer { + slug: "brazzers".to_string(), + page: 2 + }), + "https://allpornstream.com/producers/brazzers?page=2" + ); + } + + #[test] + fn parses_duration() { + assert_eq!(AllPornStreamProvider::parse_duration("18:42"), 1122); + assert_eq!(AllPornStreamProvider::parse_duration("1:23:45"), 5025); + assert_eq!(AllPornStreamProvider::parse_duration("00:59"), 59); + } + + #[test] + fn extracts_first_image() { + let input = + r#"["https://example.com/thumb1.jpg","https://example.com/thumb2.jpg"]"#; + assert_eq!( + AllPornStreamProvider::extract_first_image(input), + "https://example.com/thumb1.jpg" + ); + } + + #[test] + fn parses_cards_from_html() { + let provider = AllPornStreamProvider::new(); + let options = make_options(); + let html = r#" +
+ 18:42 + OF + BB + Bonnie Blue + +
416
+
+ "#; + + let items = provider.parse_listing(html, &options); + assert_eq!(items.len(), 1); + let item = &items[0]; + assert_eq!(item.id, "34a7e37d-7fca-4f30-ad0b-3ab134a00f9f"); + assert_eq!(item.title, "Test Video Title"); + assert_eq!(item.duration, 1122); + assert_eq!(item.views, Some(416)); + assert!(item.thumb.contains("example.com/thumb.jpg")); + assert_eq!(item.uploader.as_deref(), Some("Only Fans")); + assert!(item.tags.as_ref().unwrap().contains(&"Bonnie Blue".to_string())); + assert!(item.uploadedAt.is_some()); + // video.url is the page URL; proxy URL is in formats[0] + assert_eq!( + item.url, + "https://allpornstream.com/post/34a7e37d-7fca-4f30-ad0b-3ab134a00f9f/test-video" + ); + let format = &item.formats.as_ref().unwrap()[0]; + assert!(format.url.contains("/proxy/allpornstream/")); + assert!(format.url.contains("allpornstream.com/post/34a7e37d")); + let headers = format.http_headers_pairs(); + assert!(headers.iter().any(|(k, _)| k.to_lowercase() == "referer")); + } +} diff --git a/src/providers/sxyprn.rs b/src/providers/sxyprn.rs index 5fe50ca..ef72c7d 100644 --- a/src/providers/sxyprn.rs +++ b/src/providers/sxyprn.rs @@ -298,13 +298,13 @@ impl SxyprnProvider { } // take content before "