diff --git a/build.rs b/build.rs index 4a34229..8edb141 100644 --- a/build.rs +++ b/build.rs @@ -37,7 +37,7 @@ const PROVIDERS: &[ProviderDef] = &[ ty: "PornhubProvider", }, ProviderDef { - id: "pornhub-shorties", + id: "ph-shorties", module: "pornhub_shorties", ty: "PornhubShortiesProvider", }, @@ -302,7 +302,7 @@ const PROVIDERS: &[ProviderDef] = &[ ty: "ThaipornTvProvider", }, ProviderDef { - id: "allpornstream", + id: "aps", module: "allpornstream", ty: "AllPornStreamProvider", }, @@ -346,6 +346,11 @@ const PROVIDERS: &[ProviderDef] = &[ module: "xvideos", ty: "XvideosProvider", }, + ProviderDef { + id: "wowxxx", + module: "wowxxx", + ty: "WowxxxProvider", + }, ProviderDef { id: "camsoda", module: "camsoda", diff --git a/docs/provider-catalog.md b/docs/provider-catalog.md index acee089..1575984 100644 --- a/docs/provider-catalog.md +++ b/docs/provider-catalog.md @@ -73,6 +73,7 @@ This is the current implementation inventory as of this snapshot of the repo. Us | `xhamster` | `mainstream-tube` | no | no | HTML scraper for xhamster.com; card selector `div[data-video-type="video"]` with `data-video-id`; thumbnails via `img[data-role="thumb-preview-img"]` at `ic-vt-nss.xhcdn.com` (no proxy, no Referer needed); pagination via `?page=N` query param (browse feeds use infinite-scroll so only search reliably returns different content per page); feeds: `/newest` (default), `/most-viewed`, `/best`; categories via `/categories/{slug}`; channels via `/channels/{slug}`; 43 hardcoded categories as `categories` option; uploader type inferred from URL path (`/channels/` → channel, `/creators/` → creator, `/pornstars/` → pornstar); supports `cat:`/`category:` and `channel:` query shortcuts, plus static category name matching; preview mp4 clips from `data-previewvideo` attribute; yt-dlp resolves `video.url` natively (xHamster extractor, 28 formats); no proxy needed. | | `camsoda` | `live-cams` | no | no | HTML scraper for camsoda.com `/media` listing; CF-protected so relies on Jina HTML fallback (requester sends `X-Return-Format: html`); parses anchor tags with `[class*="media-item-module__title"]` / `[class*="media-item-module__subtitle"]` CSS selectors; video-specific thumbnails from `media-secure.camsoda.com/user/videos/{id}/`; video URLs are page URLs (`/{username}/media/{slug}/{id}`) — CF blocks direct access and yt-dlp; supports `uploader:`/`model:` query prefix to browse a model's media page; no search API — keyword queries fall through to client-side filtering; pagination via `?page=N` but CamSoda serves the same 60 items regardless of page (JS-driven infinite scroll). | | `xvideos` | `mainstream-tube` | no | no | HTML scraper for xvideos.com; handles two card formats: homepage (`div.thumb-block[data-id][data-eid]`) uses `p.title a[title]` + `data-pvv` on img, best-of-month page uses `div.thumb-block.video[data-video=JSON]` with `div.title a` text + `previewVideo` JSON key; thumbnails at `thumb-cdn77.xvideos-cdn.com` / `thumbs-gcore.xvideos-cdn.com` (no proxy needed); latest: `/` (page 1) / `/new/{N-1}` (page N≥2); best-of-month: `/best/{YYYY-MM}` (previous calendar month), page N: `/best/{YYYY-MM}/{N-1}`; search: `/?k={query}` / `/?k={query}&p={N-1}` (0-indexed); tag shortcuts: `/tags/{slug}/{N-1}`; category shortcuts: `/c/{Name}-{ID}/{N-1}` (38 hardcoded categories); `cat:`, `tag:`, `uploader:` query prefix routing; yt-dlp resolves `video.url` natively (XVideos extractor → HLS formats); CDN preview mp4 in `preview` field; no proxy needed. | +| `wowxxx` | `studio-network` | no | no | HTML scraper for wow.xxx premium aggregator; default feed `/latest-updates/`, page 2 `/{N}/` suffix (for example `/latest-updates/2/`), search `/search/{query}/relevance/` with the same page suffix; supports `site:`/`studio:`/`network:`/`model:`/`pornstar:`/`tag:`/`cat:` query shortcuts to direct archive routes; list cards expose preview clips (`cast.wow.xxx/preview/*.mp4`), thumbnails (`img.wow.xxx/.../medium@2x/1.jpg`), duration, rating, views, site (as uploader), and model tags; `video.url` is the detail page URL and yt-dlp resolves HTML5 MP4 formats dynamically; no proxy needed. | ## Proxy Routes diff --git a/src/providers/allpornstream.rs b/src/providers/allpornstream.rs index 287fd3d..de4990b 100644 --- a/src/providers/allpornstream.rs +++ b/src/providers/allpornstream.rs @@ -590,7 +590,7 @@ mod tests { "https://allpornstream.com/post/34a7e37d-7fca-4f30-ad0b-3ab134a00f9f/test-video" ); let format = &item.formats.as_ref().unwrap()[0]; - assert!(format.url.contains("/proxy/allpornstream/")); + assert!(format.url.contains("/proxy/aps/")); assert!(format.url.contains("allpornstream.com/post/34a7e37d")); let headers = format.http_headers_pairs(); assert!(headers.iter().any(|(k, _)| k.to_lowercase() == "referer")); diff --git a/src/providers/wowxxx.rs b/src/providers/wowxxx.rs new file mode 100644 index 0000000..4ecd109 --- /dev/null +++ b/src/providers/wowxxx.rs @@ -0,0 +1,443 @@ +use crate::DbPool; +use crate::api::ClientVersion; +use crate::providers::{Provider, report_provider_error, requester_or_default}; +use crate::status::*; +use crate::util::cache::VideoCache; +use crate::util::parse_abbreviated_number; +use crate::util::time::parse_time_to_seconds; +use crate::videos::{ServerOptions, VideoItem}; +use async_trait::async_trait; +use error_chain::error_chain; +use htmlentity::entity::{ICodedDataTrait, decode}; +use regex::Regex; +use scraper::{ElementRef, Html, Selector}; +use wreq::Version; + +pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata = + crate::providers::ProviderChannelMetadata { + group_id: "studio-network", + tags: &["premium", "studios", "networks", "models"], + }; + +const BASE_URL: &str = "https://www.wow.xxx"; +const CHANNEL_ID: &str = "wowxxx"; +const FIREFOX_UA: &str = + "Mozilla/5.0 (X11; Linux x86_64; rv:147.0) Gecko/20100101 Firefox/147.0"; +const HTML_ACCEPT: &str = + "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8"; + +error_chain! { + foreign_links { + Io(std::io::Error); + } + errors { + Parse(msg: String) { + description("parse error") + display("parse error: {}", msg) + } + } +} + +#[derive(Debug, Clone)] +enum Target { + Latest, + MostPopular, + TopRated, + Search(String), + Archive(String), +} + +#[derive(Debug, Clone)] +pub struct WowxxxProvider; + +impl WowxxxProvider { + pub fn new() -> Self { + Self + } + + fn build_channel(&self, _cv: ClientVersion) -> Channel { + Channel { + id: CHANNEL_ID.to_string(), + name: "WOW.XXX".to_string(), + description: + "Premium studio aggregator with latest updates, search, and direct archive routing for sites/networks/models/tags/categories." + .to_string(), + premium: false, + favicon: "https://www.google.com/s2/favicons?sz=64&domain=wow.xxx".to_string(), + status: "active".to_string(), + categories: vec![], + options: vec![ChannelOption { + id: "sort".to_string(), + title: "Sort".to_string(), + description: "Browse WOW.XXX feeds.".to_string(), + systemImage: "list.number".to_string(), + colorName: "blue".to_string(), + options: vec![ + FilterOption { + id: "new".to_string(), + title: "Latest".to_string(), + }, + FilterOption { + id: "popular".to_string(), + title: "Most Viewed".to_string(), + }, + FilterOption { + id: "rated".to_string(), + title: "Top Rated".to_string(), + }, + ], + multiSelect: false, + }], + nsfw: true, + cacheDuration: Some(1800), + } + } + + fn selector(value: &str) -> Result { + Selector::parse(value) + .map_err(|e| Error::from(format!("selector `{value}` parse failed: {e}"))) + } + + fn decode_html(text: &str) -> String { + decode(text.as_bytes()) + .to_string() + .unwrap_or_else(|_| text.to_string()) + } + + fn text_of(el: &ElementRef<'_>) -> String { + let raw = el.text().collect::>().join(" "); + Self::decode_html(&raw.split_whitespace().collect::>().join(" ")) + } + + fn normalize_url(path: &str) -> String { + let path = path.trim(); + if path.starts_with("http://") || path.starts_with("https://") { + return path.to_string(); + } + if path.starts_with("//") { + return format!("https:{path}"); + } + if path.starts_with('/') { + return format!("{BASE_URL}{path}"); + } + format!("{BASE_URL}/{path}") + } + + fn slug_from_url(url: &str, segment: &str) -> Option { + let marker = format!("/{segment}/"); + let (_, tail) = url.split_once(&marker)?; + let slug = tail.trim_matches('/').split('/').next()?.trim(); + if slug.is_empty() { + None + } else { + Some(slug.to_string()) + } + } + + fn normalize_search_query(query: &str) -> String { + let mut cleaned = query.trim().trim_start_matches('#').to_string(); + while cleaned.contains("//") { + cleaned = cleaned.replace("//", "/"); + } + cleaned + } + + fn page_url(base: &str, page: u16) -> String { + let base = base.trim_end_matches('/'); + if page <= 1 { + format!("{base}/") + } else { + format!("{base}/{page}/") + } + } + + fn target_url(target: &Target, page: u16) -> String { + match target { + Target::Latest => Self::page_url(&format!("{BASE_URL}/latest-updates"), page), + Target::MostPopular => Self::page_url(&format!("{BASE_URL}/most-popular/all"), page), + Target::TopRated => Self::page_url(&format!("{BASE_URL}/top-rated/all"), page), + Target::Search(query) => { + let encoded: String = + url::form_urlencoded::byte_serialize(query.trim().as_bytes()).collect(); + let base = format!("{BASE_URL}/search/{encoded}/relevance"); + Self::page_url(&base, page) + } + Target::Archive(base) => Self::page_url(base, page), + } + } + + fn resolve_query_target(query: &str) -> Option { + let q = query.trim(); + let lower = q.to_ascii_lowercase(); + let prefixes = [ + ("site:", "sites"), + ("studio:", "sites"), + ("network:", "networks"), + ("model:", "models"), + ("pornstar:", "models"), + ("tag:", "tags"), + ("cat:", "categories"), + ("category:", "categories"), + ]; + for (prefix, segment) in prefixes { + if let Some((_, value)) = lower.split_once(prefix) { + let slug = value.trim().trim_matches('/'); + if !slug.is_empty() { + return Some(Target::Archive(format!("{BASE_URL}/{segment}/{slug}/"))); + } + } + } + None + } + + fn resolve_target(query: Option<&str>, sort: &str) -> Target { + if let Some(q) = query { + let trimmed = q.trim(); + if !trimmed.is_empty() { + if let Some(shortcut) = Self::resolve_query_target(trimmed) { + return shortcut; + } + return Target::Search(Self::normalize_search_query(trimmed)); + } + } + + match sort { + "popular" => Target::MostPopular, + "rated" => Target::TopRated, + _ => Target::Latest, + } + } + + fn html_headers(referer: &str) -> Vec<(String, String)> { + vec![ + ("User-Agent".to_string(), FIREFOX_UA.to_string()), + ("Accept".to_string(), HTML_ACCEPT.to_string()), + ("Accept-Language".to_string(), "en-US,en;q=0.5".to_string()), + ("Referer".to_string(), referer.to_string()), + ] + } + + async fn fetch_html( + requester: &mut crate::util::requester::Requester, + url: &str, + ) -> Result { + requester + .get_with_headers(url, Self::html_headers(BASE_URL), Some(Version::HTTP_11)) + .await + .map_err(|e| Error::from(format!("request failed for {url}: {e}"))) + } + + fn parse_duration(duration_text: &str) -> u32 { + let cleaned = duration_text + .replace("Full Video", "") + .replace("Full", "") + .trim() + .to_string(); + parse_time_to_seconds(&cleaned).unwrap_or(0) as u32 + } + + fn parse_video_items(html: &str) -> Result> { + let document = Html::parse_document(html); + + let item_sel = Self::selector("div.list-videos div.item")?; + let a_sel = Self::selector("a.thumb_img")?; + let title_sel = Self::selector("a.thumb_title strong.title, a.thumb_title")?; + let img_sel = Self::selector("div.thumb__img img")?; + let thumb_wrap_sel = Self::selector("div.thumb__img")?; + let duration_sel = Self::selector("span.duration")?; + let rating_sel = Self::selector("div.rating")?; + let views_sel = Self::selector("div.views")?; + let site_sel = Self::selector("a.models__item.thumb_cs")?; + let model_sel = Self::selector("a.models__item.thumb_model")?; + let id_sel = Self::selector("span.js-favourites[data-object_id]")?; + + let rating_re = Regex::new(r"(\d{1,3})").map_err(|e| Error::from(e.to_string()))?; + + let mut out = vec![]; + for item in document.select(&item_sel) { + let Some(anchor) = item.select(&a_sel).next() else { + continue; + }; + let href = anchor.value().attr("href").unwrap_or("").trim(); + if href.is_empty() { + continue; + } + let video_url = Self::normalize_url(href); + + let id = item + .select(&id_sel) + .next() + .and_then(|el| el.value().attr("data-object_id")) + .map(|v| v.to_string()) + .or_else(|| { + let tail = video_url.trim_end_matches('/').split('/').next_back()?; + if tail.is_empty() { + None + } else { + Some(tail.to_string()) + } + }) + .unwrap_or_default(); + if id.is_empty() { + continue; + } + + let title = item + .select(&title_sel) + .next() + .map(|el| Self::text_of(&el)) + .filter(|value| !value.is_empty()) + .unwrap_or_else(|| { + anchor + .value() + .attr("title") + .map(Self::decode_html) + .unwrap_or_else(|| "Untitled".to_string()) + }); + + let thumb = item + .select(&img_sel) + .next() + .and_then(|img| { + img.value() + .attr("data-src") + .or_else(|| img.value().attr("src")) + }) + .map(Self::normalize_url) + .unwrap_or_default(); + if thumb.is_empty() { + continue; + } + + let preview = item + .select(&thumb_wrap_sel) + .next() + .and_then(|el| el.value().attr("data-preview")) + .map(Self::normalize_url) + .filter(|value| !value.is_empty()); + + let duration = item + .select(&duration_sel) + .next() + .map(|el| Self::parse_duration(&Self::text_of(&el))) + .unwrap_or(0); + + let views = item + .select(&views_sel) + .next() + .map(|el| Self::text_of(&el)) + .and_then(|v| parse_abbreviated_number(v.trim())) + .unwrap_or(0); + + let rating = item + .select(&rating_sel) + .next() + .map(|el| Self::text_of(&el)) + .and_then(|raw| { + rating_re + .captures(&raw) + .and_then(|caps| caps.get(1)) + .and_then(|m| m.as_str().parse::().ok()) + }); + + let uploader_name = item + .select(&site_sel) + .next() + .map(|el| Self::text_of(&el)) + .filter(|value| !value.is_empty()); + let uploader_url = item + .select(&site_sel) + .next() + .and_then(|el| el.value().attr("href")) + .map(Self::normalize_url); + let uploader_id = uploader_url + .as_deref() + .and_then(|url| Self::slug_from_url(url, "sites")) + .map(|slug| format!("{CHANNEL_ID}:site:{slug}")); + + let mut tags = vec![]; + for model in item.select(&model_sel) { + let value = Self::text_of(&model); + if !value.is_empty() { + tags.push(value); + } + } + + let mut video = VideoItem::new( + id, + title, + video_url, + CHANNEL_ID.to_string(), + thumb, + duration, + ) + .views(views) + .tags(tags); + + if let Some(r) = rating { + video = video.rating(r); + } + if let Some(preview) = preview { + video = video.preview(preview); + } + if let Some(uploader) = uploader_name { + video = video.uploader(uploader); + } + if let Some(url) = uploader_url { + video = video.uploader_url(url); + } + if let Some(uid) = uploader_id { + video.uploaderId = Some(uid); + } + + out.push(video); + } + + Ok(out) + } + + async fn get( + &self, + _db: DbPool, + _cache: VideoCache, + page: u16, + sort: &str, + query: Option, + options: ServerOptions, + ) -> Result> { + let target = Self::resolve_target(query.as_deref(), sort); + let url = Self::target_url(&target, page); + + let mut requester = requester_or_default(&options, CHANNEL_ID, "get_videos"); + let html = Self::fetch_html(&mut requester, &url).await?; + + Self::parse_video_items(&html) + } +} + +#[async_trait] +impl Provider for WowxxxProvider { + async fn get_videos( + &self, + cache: VideoCache, + db: DbPool, + sort: String, + query: Option, + page: String, + _per_page: String, + options: ServerOptions, + ) -> Vec { + let page = page.parse::().unwrap_or(1).max(1); + match self.get(db, cache, page, &sort, query, options).await { + Ok(v) => v, + Err(e) => { + report_provider_error(CHANNEL_ID, "get_videos", &e.to_string()).await; + vec![] + } + } + } + + fn get_channel(&self, clientversion: ClientVersion) -> Option { + Some(self.build_channel(clientversion)) + } +} diff --git a/src/proxies/mod.rs b/src/proxies/mod.rs index 4f7be05..f98588c 100644 --- a/src/proxies/mod.rs +++ b/src/proxies/mod.rs @@ -1,4 +1,3 @@ -use crate::proxies::allpornstream::AllPornStreamProxy; use crate::proxies::archivebate::ArchivebateProxy; use crate::proxies::clapdat::ClapdatProxy; use crate::proxies::doodstream::DoodstreamProxy; @@ -48,7 +47,6 @@ pub mod vjav; #[derive(Debug, Clone)] pub enum AnyProxy { - AllPornStream(AllPornStreamProxy), Archivebate(ArchivebateProxy), Doodstream(DoodstreamProxy), Sxyprn(SxyprnProxy), @@ -75,7 +73,6 @@ pub trait Proxy { impl Proxy for AnyProxy { async fn get_video_url(&self, url: String, requester: web::types::State) -> String { match self { - AnyProxy::AllPornStream(p) => p.get_video_url(url, requester).await, AnyProxy::Archivebate(p) => p.get_video_url(url, requester).await, AnyProxy::Doodstream(p) => p.get_video_url(url, requester).await, AnyProxy::Sxyprn(p) => p.get_video_url(url, requester).await, diff --git a/src/proxy.rs b/src/proxy.rs index 02af129..ab4f3c6 100644 --- a/src/proxy.rs +++ b/src/proxy.rs @@ -1,6 +1,5 @@ use ntex::web::{self, HttpRequest}; -use crate::proxies::allpornstream::AllPornStreamProxy; use crate::proxies::archivebate::ArchivebateProxy; use crate::proxies::clapdat::ClapdatProxy; use crate::proxies::doodstream::DoodstreamProxy; @@ -197,7 +196,6 @@ fn get_proxy(proxy: &str) -> Option { "spankbang" => Some(AnyProxy::Spankbang(SpankbangProxy::new())), "lulustream" => Some(AnyProxy::Lulustream(LulustreamProxy::new())), "thaiporntv" => Some(AnyProxy::ThaipornTv(ThaipornTvProxy::new())), - "allpornstream" => Some(AnyProxy::AllPornStream(AllPornStreamProxy::new())), "tube8" => Some(AnyProxy::Tube8(Tube8Proxy::new())), "supjav" => Some(AnyProxy::Supjav(SupjavProxy::new())), _ => None,