use crate::DbPool; use crate::api::ClientVersion; use crate::providers::{ Provider, report_provider_error, report_provider_error_background, requester_or_default, build_proxy_url, strip_url_scheme, }; use crate::status::*; use crate::util::cache::VideoCache; use crate::util::parse_abbreviated_number; use crate::util::time::parse_time_to_seconds; use crate::videos::{ServerOptions, VideoFormat, VideoItem}; use async_trait::async_trait; use chrono::{DateTime, NaiveDate, Utc}; use error_chain::error_chain; use futures::stream::{self, StreamExt}; use htmlentity::entity::{ICodedDataTrait, decode}; use regex::Regex; use scraper::{ElementRef, Html, Selector}; use serde_json::Value; use std::collections::HashSet; use std::collections::HashMap; use std::sync::{Arc, RwLock}; use std::thread; use url::Url; pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata = crate::providers::ProviderChannelMetadata { group_id: "mainstream-tube", tags: &["mainstream", "studio", "general"], }; const BASE_URL: &str = "https://www.pornhub.com"; const CHANNEL_ID: &str = "pornhub"; const DETAIL_ENRICH_LIMIT: usize = 12; error_chain! { foreign_links { Io(std::io::Error); HttpRequest(wreq::Error); Json(serde_json::Error); Url(url::ParseError); } errors { Parse(msg: String) { description("parse error") display("parse error: {}", msg) } } } #[derive(Debug, Clone)] pub struct PornhubProvider { url: String, tag_map: Arc>>, } #[derive(Debug, Clone, Copy)] enum ListingScope { Browse, Search, Creator, } #[derive(Debug, Clone, Copy)] enum QueryTargetKind { Channel, Pornstar, Model, User, } #[derive(Debug, Clone)] struct QueryTarget { kind: QueryTargetKind, slug: String, } #[derive(Debug, Clone)] struct TagInfo { kind: QueryTargetKind, slug: String, title: String, } impl QueryTargetKind { fn path_segment(self) -> &'static str { match self { Self::Channel => "channels", Self::Pornstar => "pornstar", Self::Model => "model", Self::User => "users", } } } impl PornhubProvider { pub fn new() -> Self { let provider = Self { url: BASE_URL.to_string(), tag_map: Arc::new(RwLock::new(HashMap::new())), }; provider.spawn_initial_load(); provider } fn spawn_initial_load(&self) { let url = self.url.clone(); let tag_map = Arc::clone(&self.tag_map); thread::spawn(move || { let rt = tokio::runtime::Builder::new_current_thread() .enable_all() .build() .unwrap(); rt.block_on(async { let _ = Self::load_tags(&url, tag_map).await; }); }); } async fn load_tags(base_url: &str, tag_map: Arc>>) -> Result<()> { Self::load_kind(base_url, "channel", QueryTargetKind::Channel, &tag_map).await?; Self::load_kind(base_url, "pornstar", QueryTargetKind::Pornstar, &tag_map).await?; Self::load_kind(base_url, "model", QueryTargetKind::Model, &tag_map).await?; Self::load_kind(base_url, "user", QueryTargetKind::User, &tag_map).await?; Ok(()) } async fn load_kind(base_url: &str, path_segment: &str, kind: QueryTargetKind, tag_map: &Arc>>) -> Result<()> { let url = format!("{}/{}/top", base_url, path_segment); let mut requester = crate::util::requester::Requester::new(); let body = requester.get(&url, None).await.map_err(|e| Error::from(ErrorKind::Parse(format!("http request failed: {e}"))))?; let document = Html::parse_document(&body); let selector = Self::selector(&format!("a[href^='/{}/']", path_segment))?; for element in document.select(&selector) { if let Some(href) = element.attr("href") { if let Some(slug) = Self::slug_from_url(href, path_segment) { let title = element.text().collect::().trim().to_string(); if !title.is_empty() && !slug.is_empty() { let info = TagInfo { kind, slug: slug.clone(), title: title.clone(), }; let mut map = tag_map.write().unwrap(); map.insert(title.to_ascii_lowercase(), info.clone()); map.insert(slug.to_ascii_lowercase(), info); } } } } Ok(()) } fn slug_from_url(url: &str, path_segment: &str) -> Option { let parsed = Url::parse(url).ok()?; let mut segments = parsed.path_segments()?; if segments.next() == Some(path_segment) { segments.next().map(|s| s.to_string()) } else { None } } fn build_channel(&self, _clientversion: ClientVersion) -> Channel { Channel { id: CHANNEL_ID.to_string(), name: "Pornhub".to_string(), description: "Pornhub listings with creator queries and direct HLS playback links." .to_string(), premium: false, favicon: "https://www.google.com/s2/favicons?sz=64&domain=pornhub.com".to_string(), status: "active".to_string(), categories: vec![], options: vec![ChannelOption { id: "sort".to_string(), title: "Sort".to_string(), description: "Browse Pornhub charts by sort order.".to_string(), systemImage: "list.number".to_string(), colorName: "blue".to_string(), options: vec![ FilterOption { id: "mr".to_string(), title: "Featured Recently".to_string(), }, FilterOption { id: "mv".to_string(), title: "Most Viewed".to_string(), }, FilterOption { id: "tr".to_string(), title: "Top Rated".to_string(), }, FilterOption { id: "ht".to_string(), title: "Hottest".to_string(), }, FilterOption { id: "lg".to_string(), title: "Longest".to_string(), }, FilterOption { id: "cm".to_string(), title: "Newest".to_string(), }, ], multiSelect: false, }], nsfw: true, cacheDuration: Some(1800), } } fn selector(value: &str) -> Result { Selector::parse(value) .map_err(|error| Error::from(ErrorKind::Parse(format!("selector parse failed for {value}: {error}")))) } fn regex(value: &str) -> Result { Regex::new(value) .map_err(|error| Error::from(ErrorKind::Parse(format!("regex parse failed for {value}: {error}")))) } fn text_of(element: &ElementRef<'_>) -> String { element .text() .collect::>() .join(" ") .split_whitespace() .collect::>() .join(" ") } fn decode_html(value: &str) -> String { decode(value.as_bytes()) .to_string() .unwrap_or_else(|_| value.to_string()) } fn normalize_url(&self, value: &str) -> String { let trimmed = value.trim(); if trimmed.is_empty() { return String::new(); } if let Ok(url) = Url::parse(trimmed) { return url.to_string(); } Url::parse(BASE_URL) .and_then(|base| base.join(trimmed)) .map(|value| value.to_string()) .unwrap_or_default() } fn normalize_sort(sort: &str) -> &'static str { match sort.trim().to_ascii_lowercase().as_str() { "mv" => "mv", "tr" => "tr", "ht" => "ht", "lg" => "lg", "cm" => "cm", "mr" | "new" => "mr", value if value.contains("date") => "mr", _ => "mr", } } fn parse_query_target(&self, query: &str) -> Option { let normalized = query.trim().to_ascii_lowercase(); if let Some(info) = self.tag_map.read().unwrap().get(&normalized) { return Some(QueryTarget { kind: info.kind, slug: info.slug.clone() }); } // Fallback to kind:slug without @ let trimmed = query.trim(); let (kind_str, raw_slug) = trimmed.split_once(':')?; let slug = raw_slug .trim() .trim_matches('/') .replace(' ', "-") .to_ascii_lowercase(); if slug.is_empty() { return None; } let kind = match kind_str.trim().to_ascii_lowercase().as_str() { "channel" | "channels" => QueryTargetKind::Channel, "pornstar" | "pornstars" => QueryTargetKind::Pornstar, "model" | "models" => QueryTargetKind::Model, "user" | "users" => QueryTargetKind::User, _ => return None, }; Some(QueryTarget { kind, slug }) } fn build_browse_url(&self, page: u8, sort: &str) -> String { let order = Self::normalize_sort(sort); if order == "mr" { format!("{}/video?page={page}", self.url) } else { format!("{}/video?o={order}&page={page}", self.url) } } fn build_creator_url(&self, page: u8, sort: &str, target: &QueryTarget) -> String { let mut url = format!( "{}/{}/{}/videos?page={page}", self.url, target.kind.path_segment(), target.slug ); let mapped_sort = match target.kind { QueryTargetKind::Channel => match Self::normalize_sort(sort) { "mv" => Some("vi"), "tr" => Some("ra"), _ => None, }, _ => match Self::normalize_sort(sort) { "mv" => Some("mv"), "tr" => Some("tr"), "lg" => Some("lg"), _ => None, }, }; if let Some(order) = mapped_sort { url.push_str("&o="); url.push_str(order); } url } fn build_listing_request(&self, page: u8, sort: &str, query: Option<&str>) -> (String, ListingScope) { match query.map(str::trim).filter(|value| !value.is_empty()) { Some(query) => { if let Some(target) = self.parse_query_target(query) { (self.build_creator_url(page, sort, &target), ListingScope::Creator) } else { let encoded = query.to_ascii_lowercase().replace(' ', "+"); ( format!("{}/video/search?search={encoded}&page={page}", self.url), ListingScope::Search, ) } } None => (self.build_browse_url(page, sort), ListingScope::Browse), } } fn parse_listing_page(&self, html: &str, scope: ListingScope) -> Result> { let document = Html::parse_document(html); let item_selector = Self::selector("li.pcVideoListItem")?; let container_selectors = match scope { ListingScope::Browse => vec!["#videoCategory"], ListingScope::Search => vec!["#videoSearchResult"], ListingScope::Creator => vec!["#showAllChanelVideos", "#mostRecentVideosSection"], }; for selector_text in container_selectors { let container_selector = Self::selector(selector_text)?; if let Some(container) = document.select(&container_selector).next() { if container.select(&item_selector).next().is_some() { return self.parse_listing_items(container); } } } Err(ErrorKind::Parse(format!("missing listing container for scope {scope:?}")).into()) } fn parse_listing_items(&self, container: ElementRef<'_>) -> Result> { let item_selector = Self::selector("li.pcVideoListItem")?; let link_selector = Self::selector("a[href*=\"/view_video.php\"]")?; let title_selector = Self::selector(".title a, .thumbnailTitle, span.title a")?; let image_selector = Self::selector("img")?; let duration_selector = Self::selector(".duration")?; let views_selector = Self::selector(".views var")?; let uploader_selector = Self::selector( ".videoUploaderBlock a[href], .usernameWrap a[href], .usernameWrapper a[href]", )?; let verified_selector = Self::selector(".verified-icon, .channel-icon")?; let mut items = Vec::new(); let mut seen_ids = HashSet::new(); for card in container.select(&item_selector) { let Some(link) = card.select(&link_selector).next() else { continue; }; let href = link.value().attr("href").unwrap_or_default(); let page_url = self.normalize_url(href); if page_url.is_empty() || !page_url.contains("/view_video.php") { continue; } let id = card .value() .attr("data-video-vkey") .filter(|value| !value.trim().is_empty()) .map(ToOwned::to_owned) .or_else(|| Self::viewkey_from_url(&page_url)) .or_else(|| { card.value() .attr("data-video-id") .filter(|value| !value.trim().is_empty()) .map(ToOwned::to_owned) }); let Some(id) = id else { continue; }; if !seen_ids.insert(id.clone()) { continue; } let title = link .value() .attr("title") .filter(|value| !value.trim().is_empty()) .map(Self::decode_html) .or_else(|| { card.select(&title_selector) .next() .map(|value| Self::decode_html(&Self::text_of(&value))) }) .unwrap_or_default(); if title.is_empty() { continue; } let image = card.select(&image_selector).next(); let thumb = image .as_ref() .and_then(|value| { value .value() .attr("src") .or_else(|| value.value().attr("data-mediumthumb")) }) .map(|value| self.normalize_url(value)) .unwrap_or_default(); let duration = card .select(&duration_selector) .next() .map(|value| Self::text_of(&value)) .and_then(|value| parse_time_to_seconds(&value)) .unwrap_or(0) as u32; let views = card .select(&views_selector) .next() .and_then(|value| parse_abbreviated_number(&Self::text_of(&value))); let uploader_link = card.select(&uploader_selector).next(); let uploader = uploader_link .as_ref() .map(|value| Self::decode_html(&Self::text_of(value))) .filter(|value| !value.is_empty()); let uploader_url = uploader_link .and_then(|value| value.value().attr("href")) .map(|value| self.normalize_url(value)) .filter(|value| !value.is_empty()); let mut item = VideoItem::new( id, title, page_url, CHANNEL_ID.to_string(), thumb, duration, ); item.views = views; item.preview = image .and_then(|value| value.value().attr("data-mediabook")) .map(|value| self.normalize_url(value)) .filter(|value| !value.is_empty()); item.verified = card .select(&verified_selector) .next() .map(|_| true); item.uploader = uploader.clone(); item.uploaderUrl = uploader_url.clone(); item.uploaderId = uploader_url .as_deref() .and_then(Self::uploader_identity_from_url); let mut tags = Vec::new(); if let Some(tag) = uploader_url .as_deref() .and_then(|url| self.query_tag_from_uploader_url(url)) { tags.push(tag); } if !tags.is_empty() { item.tags = Some(tags); } items.push(item); } Ok(items) } fn viewkey_from_url(url: &str) -> Option { let parsed = Url::parse(url).ok()?; parsed .query_pairs() .find(|(key, _)| key == "viewkey") .map(|(_, value)| value.into_owned()) } fn uploader_identity_from_url(url: &str) -> Option { let parsed = Url::parse(url).ok()?; let mut segments = parsed.path_segments()?; let kind = segments.next()?.trim_matches('/'); let slug = segments.next()?.trim_matches('/'); if kind.is_empty() || slug.is_empty() { return None; } Some(format!("{CHANNEL_ID}:{kind}:{slug}")) } fn query_tag_from_uploader_url(&self, url: &str) -> Option { let parsed = Url::parse(url).ok()?; let mut segments = parsed.path_segments()?; let kind_str = segments.next()?.trim_matches('/'); let slug = segments.next()?.trim_matches('/'); if kind_str.is_empty() || slug.is_empty() { return None; } let normalized_slug = slug.to_ascii_lowercase(); if let Some(info) = self.tag_map.read().unwrap().get(&normalized_slug) { return Some(info.title.clone()); } Some(slug.replace('-', " ")) } fn push_unique(values: &mut Vec, value: String) { let normalized = value.trim(); if normalized.is_empty() { return; } if values.iter().any(|existing| existing.eq_ignore_ascii_case(normalized)) { return; } values.push(normalized.to_string()); } fn collect_named_links(&self, document: &Html, selector_text: &str) -> Result> { let selector = Self::selector(selector_text)?; let mut values = Vec::new(); for element in document.select(&selector) { Self::push_unique(&mut values, Self::decode_html(&Self::text_of(&element))); } Ok(values) } fn parse_upload_date(value: &str) -> Option { if let Ok(parsed) = DateTime::parse_from_rfc3339(value.trim()) { return Some(parsed.timestamp() as u64); } NaiveDate::parse_from_str(value.trim(), "%Y-%m-%d") .ok() .and_then(|date| date.and_hms_opt(0, 0, 0)) .map(|date| DateTime::::from_naive_utc_and_offset(date, Utc).timestamp() as u64) } fn json_string(value: Option<&Value>) -> Option { value.and_then(|value| match value { Value::String(value) => Some(value.to_string()), Value::Number(value) => Some(value.to_string()), _ => None, }) } fn json_u32(value: Option<&Value>) -> Option { match value { Some(Value::Number(value)) => value.as_u64().and_then(|value| u32::try_from(value).ok()), Some(Value::String(value)) => value.parse::().ok(), _ => None, } } fn extract_flashvars(&self, html: &str) -> Result> { let regex = Self::regex(r#"(?s)var\s+flashvars_\d+\s*=\s*(\{.*?\});"#)?; let Some(raw) = regex .captures(html) .and_then(|captures| captures.get(1)) .map(|value| value.as_str()) else { return Ok(None); }; Ok(Some(serde_json::from_str::(raw)?)) } fn extract_ld_video_object(&self, document: &Html) -> Result> { let script_selector = Self::selector("script[type=\"application/ld+json\"]")?; for script in document.select(&script_selector) { let raw = script.inner_html(); let Ok(value) = serde_json::from_str::(&raw) else { continue; }; if Self::is_video_object(&value) { return Ok(Some(value)); } if let Some(array) = value.as_array() { for entry in array { if Self::is_video_object(entry) { return Ok(Some(entry.clone())); } } } } Ok(None) } fn is_video_object(value: &Value) -> bool { value .get("@type") .and_then(|value| value.as_str()) .is_some_and(|value| value.eq_ignore_ascii_case("VideoObject")) } fn build_formats_from_flashvars(&self, flashvars: &Value) -> Vec { let mut entries = flashvars .get("mediaDefinitions") .and_then(|value| value.as_array()) .into_iter() .flatten() .filter_map(|entry| { let format = entry .get("format") .and_then(|value| value.as_str()) .unwrap_or_default() .to_ascii_lowercase(); if format != "hls" { return None; } let url = entry .get("videoUrl") .and_then(|value| value.as_str()) .map(|value| self.normalize_url(value)) .filter(|value| !value.is_empty())?; let quality = entry .get("quality") .and_then(|value| value.as_str()) .unwrap_or("auto"); let label = match quality { "auto" => "auto".to_string(), value if value.ends_with('p') => value.to_string(), value => format!("{value}p"), }; let rank = if label == "auto" { 0 } else { label .trim_end_matches('p') .parse::() .unwrap_or(0) }; Some((rank, label, url)) }) .collect::>(); entries.sort_by_key(|(rank, _, _)| *rank); entries.dedup_by(|a, b| a.2 == b.2); entries .into_iter() .map(|(_, label, url)| { VideoFormat::new(url, label.clone(), "m3u8".to_string()) .format_id(label.clone()) .format_note(label) }) .collect() } fn apply_detail_video(&self, mut item: VideoItem, html: &str) -> Result { let document = Html::parse_document(html); if let Some(flashvars) = self.extract_flashvars(html)? { if let Some(title) = Self::json_string(flashvars.get("video_title")) { let decoded = Self::decode_html(&title); if !decoded.is_empty() { item.title = decoded; } } if let Some(thumb) = Self::json_string(flashvars.get("image_url")) { let normalized = self.normalize_url(&thumb); if !normalized.is_empty() { item.thumb = normalized; } } if let Some(duration) = Self::json_u32(flashvars.get("video_duration")) { item.duration = duration; } if let Some(link_url) = Self::json_string(flashvars.get("link_url")) { let normalized = self.normalize_url(&link_url); if !normalized.is_empty() { item.url = normalized; } } } if let Some(ld_video) = self.extract_ld_video_object(&document)? { if let Some(thumb) = ld_video .get("thumbnailUrl") .and_then(|value| match value { Value::String(value) => Some(value.to_string()), Value::Array(values) => values .iter() .find_map(|entry| entry.as_str().map(ToOwned::to_owned)), _ => None, }) { let normalized = self.normalize_url(&thumb); if !normalized.is_empty() { item.thumb = normalized; } } if let Some(uploaded_at) = ld_video .get("uploadDate") .and_then(|value| value.as_str()) .and_then(Self::parse_upload_date) { item.uploadedAt = Some(uploaded_at); } if item.views.is_none() { item.views = Self::json_string(ld_video.get("interactionCount")) .and_then(|value| value.parse::().ok()); } if item.uploader.is_none() { item.uploader = ld_video .get("author") .and_then(|value| match value { Value::String(value) => Some(value.to_string()), Value::Object(values) => values .get("name") .and_then(|value| value.as_str()) .map(ToOwned::to_owned), _ => None, }) .filter(|value| !value.trim().is_empty()); } } let mut tags = item.tags.clone().unwrap_or_default(); for value in self.collect_named_links( &document, ".categoriesWrapper a.item, .categoriesWrapper a[href*=\"/categories/\"]", )? { Self::push_unique(&mut tags, value); } for value in self.collect_named_links( &document, ".tagsWrapper a.item, .tagsWrapper a[href*=\"/video/search\"]", )? { Self::push_unique(&mut tags, value); } for value in self.collect_named_links( &document, ".pornstarsWrapper a.item, .pornstarsWrapper a[href*=\"/pornstar/\"], a[href*=\"/pornstar/\"]", )? { Self::push_unique(&mut tags, value); } for value in self.collect_named_links( &document, ".modelsWrapper a.item, .modelsWrapper a[href*=\"/model/\"], a[href*=\"/model/\"]", )? { Self::push_unique(&mut tags, value); } if !tags.is_empty() { item.tags = Some(tags); } Ok(item) } async fn enrich_listing_items(&self, items: Vec, options: &ServerOptions) -> Vec { let requester = requester_or_default(options, CHANNEL_ID, "enrich_listing_items.requester"); let mut enriched = stream::iter(items.into_iter().enumerate().map(|(index, item)| { let provider = self.clone(); let requester = requester.clone(); async move { if index >= DETAIL_ENRICH_LIMIT || item.url.is_empty() { return (index, item); } let fallback = item.clone(); let enriched = match provider.fetch_detail(item, requester).await { Ok(value) => value, Err(error) => { report_provider_error_background( CHANNEL_ID, "enrich_listing_items.detail", &format!("url={}; error={error}", fallback.url), ); fallback } }; (index, enriched) } })) .buffer_unordered(4) .collect::>() .await; enriched.sort_by_key(|(index, _)| *index); enriched.into_iter().map(|(_, item)| item).collect() } async fn fetch_detail(&self, item: VideoItem, mut requester: crate::util::requester::Requester) -> Result { let html = requester .get(&item.url, None) .await .map_err(|error| ErrorKind::Parse(format!("detail request failed: {error}")))?; self.apply_detail_video(item, &html) } async fn fetch_listing( &self, cache: VideoCache, page: u8, sort: &str, query: Option<&str>, options: ServerOptions, ) -> Result> { let (video_url, scope) = self.build_listing_request(page, sort, query); let old_items = match cache.get(&video_url) { Some((time, items)) if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 => { return Ok(items.clone()); } Some((_, items)) => items.clone(), None => vec![], }; let mut requester = requester_or_default(&options, CHANNEL_ID, "fetch_listing.requester"); let text = match requester.get(&video_url, None).await { Ok(text) => text, Err(error) => { report_provider_error( CHANNEL_ID, "fetch_listing.request", &format!("url={video_url}; error={error}"), ) .await; return Ok(old_items); } }; let items = match self.parse_listing_page(&text, scope) { Ok(items) => items, Err(error) => { report_provider_error( CHANNEL_ID, "fetch_listing.parse", &format!("url={video_url}; error={error}"), ) .await; return Ok(old_items); } }; if items.is_empty() { return Ok(old_items); } let mut items = self.enrich_listing_items(items, &options).await; // Rewrite thumbs and previews to use the proxy when appropriate for item in items.iter_mut() { let proxied = self.proxied_thumb(&options, &item.thumb); if !proxied.is_empty() { item.thumb = proxied; } if let Some(prev) = item.preview.clone() { let proxied_prev = self.proxied_thumb(&options, &prev); if !proxied_prev.is_empty() { item.preview = Some(proxied_prev); } } } cache.remove(&video_url); cache.insert(video_url, items.clone()); Ok(items) } } impl PornhubProvider { fn proxied_thumb(&self, options: &ServerOptions, thumb: &str) -> String { if thumb.is_empty() { return String::new(); } if !PornhubThumbPolicy::is_allowed_thumb_url(thumb) { return String::new(); } build_proxy_url(options, "pornhub-thumb", &strip_url_scheme(thumb)) } } struct PornhubThumbPolicy; impl PornhubThumbPolicy { fn is_allowed_thumb_url(url: &str) -> bool { let Some(url) = Url::parse(url).ok() else { return false; }; if url.scheme() != "https" { return false; } let Some(host) = url.host_str() else { return false; }; // Only allow the specific Pornhub CDN host used for thumbnails host.eq_ignore_ascii_case("pix-cdn77.phncdn.com") } } #[async_trait] impl Provider for PornhubProvider { async fn get_videos( &self, cache: VideoCache, pool: DbPool, sort: String, query: Option, page: String, per_page: String, options: ServerOptions, ) -> Vec { let _ = pool; let _ = per_page; let page = page.parse::().unwrap_or(1); let sort = Self::normalize_sort(&sort).to_string(); match self .fetch_listing(cache, page, &sort, query.as_deref(), options) .await { Ok(items) => items, Err(error) => { report_provider_error(CHANNEL_ID, "get_videos", &error.to_string()).await; vec![] } } } fn get_channel(&self, clientversion: ClientVersion) -> Option { Some(self.build_channel(clientversion)) } } #[cfg(test)] mod tests { use super::*; #[test] fn parses_creator_queries() { let provider = PornhubProvider::new(); let target = provider.parse_query_target("channels:Brazzers") .expect("channel target should parse"); assert!(matches!(target.kind, QueryTargetKind::Channel)); assert_eq!(target.slug, "brazzers"); let target = provider.parse_query_target("pornstar:Alex Mack") .expect("pornstar target should parse"); assert!(matches!(target.kind, QueryTargetKind::Pornstar)); assert_eq!(target.slug, "alex-mack"); assert!(provider.parse_query_target("teacher").is_none()); } #[test] fn parses_browse_listing_cards() { let provider = PornhubProvider::new(); let html = r#" "#; let items = provider .parse_listing_page(html, ListingScope::Browse) .expect("browse listing should parse"); assert_eq!(items.len(), 1); assert_eq!(items[0].id, "67ed937c986b1"); assert_eq!(items[0].uploader.as_deref(), Some("Honeycore")); assert_eq!( items[0].uploaderUrl.as_deref(), Some("https://www.pornhub.com/model/honeycore") ); assert_eq!(items[0].views, Some(199000)); assert_eq!(items[0].duration, 738); assert_eq!( items[0].preview.as_deref(), Some("https://example.com/preview.webm") ); assert!(items[0] .tags .as_ref() .is_some_and(|values| values.iter().any(|value| value == "@model:honeycore"))); } #[test] fn applies_detail_video_metadata() { let provider = PornhubProvider::new(); let item = VideoItem::new( "69cfa159b1377".to_string(), "placeholder".to_string(), "https://www.pornhub.com/view_video.php?viewkey=69cfa159b1377".to_string(), CHANNEL_ID.to_string(), "https://example.com/thumb.jpg".to_string(), 0, ); let html = r#" "#; let item = provider .apply_detail_video(item, html) .expect("detail page should enrich item"); assert_eq!(item.title, "Brazzers Detail Title"); assert_eq!(item.thumb, "https://example.com/ld-thumb.jpg"); assert_eq!(item.duration, 930); assert_eq!(item.views, Some(5700)); assert_eq!(item.uploader.as_deref(), Some("Brazzers")); assert!(item.uploadedAt.is_some()); assert_eq!(item.formats.as_ref().map(|values| values.len()), Some(2)); assert!(item.tags.as_ref().is_some_and(|values| values .iter() .any(|value| value == "Big Tits"))); assert!(item.tags.as_ref().is_some_and(|values| values .iter() .any(|value| value == "Maid"))); } }