Files
hottub/src/providers/pornhub.rs
2026-04-05 16:18:53 +00:00

1096 lines
38 KiB
Rust

use crate::DbPool;
use crate::api::ClientVersion;
use crate::providers::{
Provider, report_provider_error, report_provider_error_background, requester_or_default,
build_proxy_url, strip_url_scheme,
};
use crate::status::*;
use crate::util::cache::VideoCache;
use crate::util::parse_abbreviated_number;
use crate::util::time::parse_time_to_seconds;
use crate::videos::{ServerOptions, VideoFormat, VideoItem};
use async_trait::async_trait;
use chrono::{DateTime, NaiveDate, Utc};
use error_chain::error_chain;
use futures::stream::{self, StreamExt};
use htmlentity::entity::{ICodedDataTrait, decode};
use regex::Regex;
use scraper::{ElementRef, Html, Selector};
use serde_json::Value;
use std::collections::HashSet;
use std::collections::HashMap;
use std::sync::{Arc, RwLock};
use std::thread;
use url::Url;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "mainstream-tube",
tags: &["mainstream", "studio", "general"],
};
const BASE_URL: &str = "https://www.pornhub.com";
const CHANNEL_ID: &str = "pornhub";
const DETAIL_ENRICH_LIMIT: usize = 12;
error_chain! {
foreign_links {
Io(std::io::Error);
HttpRequest(wreq::Error);
Json(serde_json::Error);
Url(url::ParseError);
}
errors {
Parse(msg: String) {
description("parse error")
display("parse error: {}", msg)
}
}
}
#[derive(Debug, Clone)]
pub struct PornhubProvider {
url: String,
tag_map: Arc<RwLock<HashMap<String, TagInfo>>>,
}
#[derive(Debug, Clone, Copy)]
enum ListingScope {
Browse,
Search,
Creator,
}
#[derive(Debug, Clone, Copy)]
enum QueryTargetKind {
Channel,
Pornstar,
Model,
User,
}
#[derive(Debug, Clone)]
struct QueryTarget {
kind: QueryTargetKind,
slug: String,
}
#[derive(Debug, Clone)]
struct TagInfo {
kind: QueryTargetKind,
slug: String,
title: String,
}
impl QueryTargetKind {
fn path_segment(self) -> &'static str {
match self {
Self::Channel => "channels",
Self::Pornstar => "pornstar",
Self::Model => "model",
Self::User => "users",
}
}
}
impl PornhubProvider {
pub fn new() -> Self {
let provider = Self {
url: BASE_URL.to_string(),
tag_map: Arc::new(RwLock::new(HashMap::new())),
};
provider.spawn_initial_load();
provider
}
fn spawn_initial_load(&self) {
let url = self.url.clone();
let tag_map = Arc::clone(&self.tag_map);
thread::spawn(move || {
let rt = tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()
.unwrap();
rt.block_on(async {
let _ = Self::load_tags(&url, tag_map).await;
});
});
}
async fn load_tags(base_url: &str, tag_map: Arc<RwLock<HashMap<String, TagInfo>>>) -> Result<()> {
Self::load_kind(base_url, "channel", QueryTargetKind::Channel, &tag_map).await?;
Self::load_kind(base_url, "pornstar", QueryTargetKind::Pornstar, &tag_map).await?;
Self::load_kind(base_url, "model", QueryTargetKind::Model, &tag_map).await?;
Self::load_kind(base_url, "user", QueryTargetKind::User, &tag_map).await?;
Ok(())
}
async fn load_kind(base_url: &str, path_segment: &str, kind: QueryTargetKind, tag_map: &Arc<RwLock<HashMap<String, TagInfo>>>) -> Result<()> {
let url = format!("{}/{}/top", base_url, path_segment);
let mut requester = crate::util::requester::Requester::new();
let body = requester.get(&url, None).await.map_err(|e| Error::from(ErrorKind::Parse(format!("http request failed: {e}"))))?;
let document = Html::parse_document(&body);
let selector = Self::selector(&format!("a[href^='/{}/']", path_segment))?;
for element in document.select(&selector) {
if let Some(href) = element.attr("href") {
if let Some(slug) = Self::slug_from_url(href, path_segment) {
let title = element.text().collect::<String>().trim().to_string();
if !title.is_empty() && !slug.is_empty() {
let info = TagInfo {
kind,
slug: slug.clone(),
title: title.clone(),
};
let mut map = tag_map.write().unwrap();
map.insert(title.to_ascii_lowercase(), info.clone());
map.insert(slug.to_ascii_lowercase(), info);
}
}
}
}
Ok(())
}
fn slug_from_url(url: &str, path_segment: &str) -> Option<String> {
let parsed = Url::parse(url).ok()?;
let mut segments = parsed.path_segments()?;
if segments.next() == Some(path_segment) {
segments.next().map(|s| s.to_string())
} else {
None
}
}
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
Channel {
id: CHANNEL_ID.to_string(),
name: "Pornhub".to_string(),
description: "Pornhub listings with creator queries and direct HLS playback links."
.to_string(),
premium: false,
favicon: "https://www.google.com/s2/favicons?sz=64&domain=pornhub.com".to_string(),
status: "active".to_string(),
categories: vec![],
options: vec![ChannelOption {
id: "sort".to_string(),
title: "Sort".to_string(),
description: "Browse Pornhub charts by sort order.".to_string(),
systemImage: "list.number".to_string(),
colorName: "blue".to_string(),
options: vec![
FilterOption {
id: "mr".to_string(),
title: "Featured Recently".to_string(),
},
FilterOption {
id: "mv".to_string(),
title: "Most Viewed".to_string(),
},
FilterOption {
id: "tr".to_string(),
title: "Top Rated".to_string(),
},
FilterOption {
id: "ht".to_string(),
title: "Hottest".to_string(),
},
FilterOption {
id: "lg".to_string(),
title: "Longest".to_string(),
},
FilterOption {
id: "cm".to_string(),
title: "Newest".to_string(),
},
],
multiSelect: false,
}],
nsfw: true,
cacheDuration: Some(1800),
}
}
fn selector(value: &str) -> Result<Selector> {
Selector::parse(value)
.map_err(|error| Error::from(ErrorKind::Parse(format!("selector parse failed for {value}: {error}"))))
}
fn regex(value: &str) -> Result<Regex> {
Regex::new(value)
.map_err(|error| Error::from(ErrorKind::Parse(format!("regex parse failed for {value}: {error}"))))
}
fn text_of(element: &ElementRef<'_>) -> String {
element
.text()
.collect::<Vec<_>>()
.join(" ")
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
}
fn decode_html(value: &str) -> String {
decode(value.as_bytes())
.to_string()
.unwrap_or_else(|_| value.to_string())
}
fn normalize_url(&self, value: &str) -> String {
let trimmed = value.trim();
if trimmed.is_empty() {
return String::new();
}
if let Ok(url) = Url::parse(trimmed) {
return url.to_string();
}
Url::parse(BASE_URL)
.and_then(|base| base.join(trimmed))
.map(|value| value.to_string())
.unwrap_or_default()
}
fn normalize_sort(sort: &str) -> &'static str {
match sort.trim().to_ascii_lowercase().as_str() {
"mv" => "mv",
"tr" => "tr",
"ht" => "ht",
"lg" => "lg",
"cm" => "cm",
"mr" | "new" => "mr",
value if value.contains("date") => "mr",
_ => "mr",
}
}
fn parse_query_target(&self, query: &str) -> Option<QueryTarget> {
let normalized = query.trim().to_ascii_lowercase();
if let Some(info) = self.tag_map.read().unwrap().get(&normalized) {
return Some(QueryTarget { kind: info.kind, slug: info.slug.clone() });
}
// Fallback to kind:slug without @
let trimmed = query.trim();
let (kind_str, raw_slug) = trimmed.split_once(':')?;
let slug = raw_slug
.trim()
.trim_matches('/')
.replace(' ', "-")
.to_ascii_lowercase();
if slug.is_empty() {
return None;
}
let kind = match kind_str.trim().to_ascii_lowercase().as_str() {
"channel" | "channels" => QueryTargetKind::Channel,
"pornstar" | "pornstars" => QueryTargetKind::Pornstar,
"model" | "models" => QueryTargetKind::Model,
"user" | "users" => QueryTargetKind::User,
_ => return None,
};
Some(QueryTarget { kind, slug })
}
fn build_browse_url(&self, page: u8, sort: &str) -> String {
let order = Self::normalize_sort(sort);
if order == "mr" {
format!("{}/video?page={page}", self.url)
} else {
format!("{}/video?o={order}&page={page}", self.url)
}
}
fn build_creator_url(&self, page: u8, sort: &str, target: &QueryTarget) -> String {
let mut url = format!(
"{}/{}/{}/videos?page={page}",
self.url,
target.kind.path_segment(),
target.slug
);
let mapped_sort = match target.kind {
QueryTargetKind::Channel => match Self::normalize_sort(sort) {
"mv" => Some("vi"),
"tr" => Some("ra"),
_ => None,
},
_ => match Self::normalize_sort(sort) {
"mv" => Some("mv"),
"tr" => Some("tr"),
"lg" => Some("lg"),
_ => None,
},
};
if let Some(order) = mapped_sort {
url.push_str("&o=");
url.push_str(order);
}
url
}
fn build_listing_request(&self, page: u8, sort: &str, query: Option<&str>) -> (String, ListingScope) {
match query.map(str::trim).filter(|value| !value.is_empty()) {
Some(query) => {
if let Some(target) = self.parse_query_target(query) {
(self.build_creator_url(page, sort, &target), ListingScope::Creator)
} else {
let encoded = query.to_ascii_lowercase().replace(' ', "+");
(
format!("{}/video/search?search={encoded}&page={page}", self.url),
ListingScope::Search,
)
}
}
None => (self.build_browse_url(page, sort), ListingScope::Browse),
}
}
fn parse_listing_page(&self, html: &str, scope: ListingScope) -> Result<Vec<VideoItem>> {
let document = Html::parse_document(html);
let item_selector = Self::selector("li.pcVideoListItem")?;
let container_selectors = match scope {
ListingScope::Browse => vec!["#videoCategory"],
ListingScope::Search => vec!["#videoSearchResult"],
ListingScope::Creator => vec!["#showAllChanelVideos", "#mostRecentVideosSection"],
};
for selector_text in container_selectors {
let container_selector = Self::selector(selector_text)?;
if let Some(container) = document.select(&container_selector).next() {
if container.select(&item_selector).next().is_some() {
return self.parse_listing_items(container);
}
}
}
Err(ErrorKind::Parse(format!("missing listing container for scope {scope:?}")).into())
}
fn parse_listing_items(&self, container: ElementRef<'_>) -> Result<Vec<VideoItem>> {
let item_selector = Self::selector("li.pcVideoListItem")?;
let link_selector = Self::selector("a[href*=\"/view_video.php\"]")?;
let title_selector = Self::selector(".title a, .thumbnailTitle, span.title a")?;
let image_selector = Self::selector("img")?;
let duration_selector = Self::selector(".duration")?;
let views_selector = Self::selector(".views var")?;
let uploader_selector = Self::selector(
".videoUploaderBlock a[href], .usernameWrap a[href], .usernameWrapper a[href]",
)?;
let verified_selector = Self::selector(".verified-icon, .channel-icon")?;
let mut items = Vec::new();
let mut seen_ids = HashSet::new();
for card in container.select(&item_selector) {
let Some(link) = card.select(&link_selector).next() else {
continue;
};
let href = link.value().attr("href").unwrap_or_default();
let page_url = self.normalize_url(href);
if page_url.is_empty() || !page_url.contains("/view_video.php") {
continue;
}
let id = card
.value()
.attr("data-video-vkey")
.filter(|value| !value.trim().is_empty())
.map(ToOwned::to_owned)
.or_else(|| Self::viewkey_from_url(&page_url))
.or_else(|| {
card.value()
.attr("data-video-id")
.filter(|value| !value.trim().is_empty())
.map(ToOwned::to_owned)
});
let Some(id) = id else {
continue;
};
if !seen_ids.insert(id.clone()) {
continue;
}
let title = link
.value()
.attr("title")
.filter(|value| !value.trim().is_empty())
.map(Self::decode_html)
.or_else(|| {
card.select(&title_selector)
.next()
.map(|value| Self::decode_html(&Self::text_of(&value)))
})
.unwrap_or_default();
if title.is_empty() {
continue;
}
let image = card.select(&image_selector).next();
let thumb = image
.as_ref()
.and_then(|value| {
value
.value()
.attr("src")
.or_else(|| value.value().attr("data-mediumthumb"))
})
.map(|value| self.normalize_url(value))
.unwrap_or_default();
let duration = card
.select(&duration_selector)
.next()
.map(|value| Self::text_of(&value))
.and_then(|value| parse_time_to_seconds(&value))
.unwrap_or(0) as u32;
let views = card
.select(&views_selector)
.next()
.and_then(|value| parse_abbreviated_number(&Self::text_of(&value)));
let uploader_link = card.select(&uploader_selector).next();
let uploader = uploader_link
.as_ref()
.map(|value| Self::decode_html(&Self::text_of(value)))
.filter(|value| !value.is_empty());
let uploader_url = uploader_link
.and_then(|value| value.value().attr("href"))
.map(|value| self.normalize_url(value))
.filter(|value| !value.is_empty());
let mut item = VideoItem::new(
id,
title,
page_url,
CHANNEL_ID.to_string(),
thumb,
duration,
);
item.views = views;
item.preview = image
.and_then(|value| value.value().attr("data-mediabook"))
.map(|value| self.normalize_url(value))
.filter(|value| !value.is_empty());
item.verified = card
.select(&verified_selector)
.next()
.map(|_| true);
item.uploader = uploader.clone();
item.uploaderUrl = uploader_url.clone();
item.uploaderId = uploader_url
.as_deref()
.and_then(Self::uploader_identity_from_url);
let mut tags = Vec::new();
if let Some(tag) = uploader_url
.as_deref()
.and_then(|url| self.query_tag_from_uploader_url(url))
{
tags.push(tag);
}
if !tags.is_empty() {
item.tags = Some(tags);
}
items.push(item);
}
Ok(items)
}
fn viewkey_from_url(url: &str) -> Option<String> {
let parsed = Url::parse(url).ok()?;
parsed
.query_pairs()
.find(|(key, _)| key == "viewkey")
.map(|(_, value)| value.into_owned())
}
fn uploader_identity_from_url(url: &str) -> Option<String> {
let parsed = Url::parse(url).ok()?;
let mut segments = parsed.path_segments()?;
let kind = segments.next()?.trim_matches('/');
let slug = segments.next()?.trim_matches('/');
if kind.is_empty() || slug.is_empty() {
return None;
}
Some(format!("{CHANNEL_ID}:{kind}:{slug}"))
}
fn query_tag_from_uploader_url(&self, url: &str) -> Option<String> {
let parsed = Url::parse(url).ok()?;
let mut segments = parsed.path_segments()?;
let kind_str = segments.next()?.trim_matches('/');
let slug = segments.next()?.trim_matches('/');
if kind_str.is_empty() || slug.is_empty() {
return None;
}
let normalized_slug = slug.to_ascii_lowercase();
if let Some(info) = self.tag_map.read().unwrap().get(&normalized_slug) {
return Some(info.title.clone());
}
Some(slug.replace('-', " "))
}
fn push_unique(values: &mut Vec<String>, value: String) {
let normalized = value.trim();
if normalized.is_empty() {
return;
}
if values.iter().any(|existing| existing.eq_ignore_ascii_case(normalized)) {
return;
}
values.push(normalized.to_string());
}
fn collect_named_links(&self, document: &Html, selector_text: &str) -> Result<Vec<String>> {
let selector = Self::selector(selector_text)?;
let mut values = Vec::new();
for element in document.select(&selector) {
Self::push_unique(&mut values, Self::decode_html(&Self::text_of(&element)));
}
Ok(values)
}
fn parse_upload_date(value: &str) -> Option<u64> {
if let Ok(parsed) = DateTime::parse_from_rfc3339(value.trim()) {
return Some(parsed.timestamp() as u64);
}
NaiveDate::parse_from_str(value.trim(), "%Y-%m-%d")
.ok()
.and_then(|date| date.and_hms_opt(0, 0, 0))
.map(|date| DateTime::<Utc>::from_naive_utc_and_offset(date, Utc).timestamp() as u64)
}
fn json_string(value: Option<&Value>) -> Option<String> {
value.and_then(|value| match value {
Value::String(value) => Some(value.to_string()),
Value::Number(value) => Some(value.to_string()),
_ => None,
})
}
fn json_u32(value: Option<&Value>) -> Option<u32> {
match value {
Some(Value::Number(value)) => value.as_u64().and_then(|value| u32::try_from(value).ok()),
Some(Value::String(value)) => value.parse::<u32>().ok(),
_ => None,
}
}
fn extract_flashvars(&self, html: &str) -> Result<Option<Value>> {
let regex = Self::regex(r#"(?s)var\s+flashvars_\d+\s*=\s*(\{.*?\});"#)?;
let Some(raw) = regex
.captures(html)
.and_then(|captures| captures.get(1))
.map(|value| value.as_str())
else {
return Ok(None);
};
Ok(Some(serde_json::from_str::<Value>(raw)?))
}
fn extract_ld_video_object(&self, document: &Html) -> Result<Option<Value>> {
let script_selector = Self::selector("script[type=\"application/ld+json\"]")?;
for script in document.select(&script_selector) {
let raw = script.inner_html();
let Ok(value) = serde_json::from_str::<Value>(&raw) else {
continue;
};
if Self::is_video_object(&value) {
return Ok(Some(value));
}
if let Some(array) = value.as_array() {
for entry in array {
if Self::is_video_object(entry) {
return Ok(Some(entry.clone()));
}
}
}
}
Ok(None)
}
fn is_video_object(value: &Value) -> bool {
value
.get("@type")
.and_then(|value| value.as_str())
.is_some_and(|value| value.eq_ignore_ascii_case("VideoObject"))
}
fn build_formats_from_flashvars(&self, flashvars: &Value) -> Vec<VideoFormat> {
let mut entries = flashvars
.get("mediaDefinitions")
.and_then(|value| value.as_array())
.into_iter()
.flatten()
.filter_map(|entry| {
let format = entry
.get("format")
.and_then(|value| value.as_str())
.unwrap_or_default()
.to_ascii_lowercase();
if format != "hls" {
return None;
}
let url = entry
.get("videoUrl")
.and_then(|value| value.as_str())
.map(|value| self.normalize_url(value))
.filter(|value| !value.is_empty())?;
let quality = entry
.get("quality")
.and_then(|value| value.as_str())
.unwrap_or("auto");
let label = match quality {
"auto" => "auto".to_string(),
value if value.ends_with('p') => value.to_string(),
value => format!("{value}p"),
};
let rank = if label == "auto" {
0
} else {
label
.trim_end_matches('p')
.parse::<u32>()
.unwrap_or(0)
};
Some((rank, label, url))
})
.collect::<Vec<_>>();
entries.sort_by_key(|(rank, _, _)| *rank);
entries.dedup_by(|a, b| a.2 == b.2);
entries
.into_iter()
.map(|(_, label, url)| {
VideoFormat::new(url, label.clone(), "m3u8".to_string())
.format_id(label.clone())
.format_note(label)
})
.collect()
}
fn apply_detail_video(&self, mut item: VideoItem, html: &str) -> Result<VideoItem> {
let document = Html::parse_document(html);
if let Some(flashvars) = self.extract_flashvars(html)? {
if let Some(title) = Self::json_string(flashvars.get("video_title")) {
let decoded = Self::decode_html(&title);
if !decoded.is_empty() {
item.title = decoded;
}
}
if let Some(thumb) = Self::json_string(flashvars.get("image_url")) {
let normalized = self.normalize_url(&thumb);
if !normalized.is_empty() {
item.thumb = normalized;
}
}
if let Some(duration) = Self::json_u32(flashvars.get("video_duration")) {
item.duration = duration;
}
if let Some(link_url) = Self::json_string(flashvars.get("link_url")) {
let normalized = self.normalize_url(&link_url);
if !normalized.is_empty() {
item.url = normalized;
}
}
}
if let Some(ld_video) = self.extract_ld_video_object(&document)? {
if let Some(thumb) = ld_video
.get("thumbnailUrl")
.and_then(|value| match value {
Value::String(value) => Some(value.to_string()),
Value::Array(values) => values
.iter()
.find_map(|entry| entry.as_str().map(ToOwned::to_owned)),
_ => None,
})
{
let normalized = self.normalize_url(&thumb);
if !normalized.is_empty() {
item.thumb = normalized;
}
}
if let Some(uploaded_at) = ld_video
.get("uploadDate")
.and_then(|value| value.as_str())
.and_then(Self::parse_upload_date)
{
item.uploadedAt = Some(uploaded_at);
}
if item.views.is_none() {
item.views = Self::json_string(ld_video.get("interactionCount"))
.and_then(|value| value.parse::<u32>().ok());
}
if item.uploader.is_none() {
item.uploader = ld_video
.get("author")
.and_then(|value| match value {
Value::String(value) => Some(value.to_string()),
Value::Object(values) => values
.get("name")
.and_then(|value| value.as_str())
.map(ToOwned::to_owned),
_ => None,
})
.filter(|value| !value.trim().is_empty());
}
}
let mut tags = item.tags.clone().unwrap_or_default();
for value in self.collect_named_links(
&document,
".categoriesWrapper a.item, .categoriesWrapper a[href*=\"/categories/\"]",
)? {
Self::push_unique(&mut tags, value);
}
for value in self.collect_named_links(
&document,
".tagsWrapper a.item, .tagsWrapper a[href*=\"/video/search\"]",
)? {
Self::push_unique(&mut tags, value);
}
for value in self.collect_named_links(
&document,
".pornstarsWrapper a.item, .pornstarsWrapper a[href*=\"/pornstar/\"], a[href*=\"/pornstar/\"]",
)? {
Self::push_unique(&mut tags, value);
}
for value in self.collect_named_links(
&document,
".modelsWrapper a.item, .modelsWrapper a[href*=\"/model/\"], a[href*=\"/model/\"]",
)? {
Self::push_unique(&mut tags, value);
}
if !tags.is_empty() {
item.tags = Some(tags);
}
Ok(item)
}
async fn enrich_listing_items(&self, items: Vec<VideoItem>, options: &ServerOptions) -> Vec<VideoItem> {
let requester = requester_or_default(options, CHANNEL_ID, "enrich_listing_items.requester");
let mut enriched = stream::iter(items.into_iter().enumerate().map(|(index, item)| {
let provider = self.clone();
let requester = requester.clone();
async move {
if index >= DETAIL_ENRICH_LIMIT || item.url.is_empty() {
return (index, item);
}
let fallback = item.clone();
let enriched = match provider.fetch_detail(item, requester).await {
Ok(value) => value,
Err(error) => {
report_provider_error_background(
CHANNEL_ID,
"enrich_listing_items.detail",
&format!("url={}; error={error}", fallback.url),
);
fallback
}
};
(index, enriched)
}
}))
.buffer_unordered(4)
.collect::<Vec<_>>()
.await;
enriched.sort_by_key(|(index, _)| *index);
enriched.into_iter().map(|(_, item)| item).collect()
}
async fn fetch_detail(&self, item: VideoItem, mut requester: crate::util::requester::Requester) -> Result<VideoItem> {
let html = requester
.get(&item.url, None)
.await
.map_err(|error| ErrorKind::Parse(format!("detail request failed: {error}")))?;
self.apply_detail_video(item, &html)
}
async fn fetch_listing(
&self,
cache: VideoCache,
page: u8,
sort: &str,
query: Option<&str>,
options: ServerOptions,
) -> Result<Vec<VideoItem>> {
let (video_url, scope) = self.build_listing_request(page, sort, query);
let old_items = match cache.get(&video_url) {
Some((time, items)) if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 => {
return Ok(items.clone());
}
Some((_, items)) => items.clone(),
None => vec![],
};
let mut requester = requester_or_default(&options, CHANNEL_ID, "fetch_listing.requester");
let text = match requester.get(&video_url, None).await {
Ok(text) => text,
Err(error) => {
report_provider_error(
CHANNEL_ID,
"fetch_listing.request",
&format!("url={video_url}; error={error}"),
)
.await;
return Ok(old_items);
}
};
let items = match self.parse_listing_page(&text, scope) {
Ok(items) => items,
Err(error) => {
report_provider_error(
CHANNEL_ID,
"fetch_listing.parse",
&format!("url={video_url}; error={error}"),
)
.await;
return Ok(old_items);
}
};
if items.is_empty() {
return Ok(old_items);
}
let mut items = self.enrich_listing_items(items, &options).await;
// Rewrite thumbs and previews to use the proxy when appropriate
for item in items.iter_mut() {
let proxied = self.proxied_thumb(&options, &item.thumb);
if !proxied.is_empty() {
item.thumb = proxied;
}
if let Some(prev) = item.preview.clone() {
let proxied_prev = self.proxied_thumb(&options, &prev);
if !proxied_prev.is_empty() {
item.preview = Some(proxied_prev);
}
}
}
cache.remove(&video_url);
cache.insert(video_url, items.clone());
Ok(items)
}
}
impl PornhubProvider {
fn proxied_thumb(&self, options: &ServerOptions, thumb: &str) -> String {
if thumb.is_empty() {
return String::new();
}
if !PornhubThumbPolicy::is_allowed_thumb_url(thumb) {
return String::new();
}
build_proxy_url(options, "pornhub-thumb", &strip_url_scheme(thumb))
}
}
struct PornhubThumbPolicy;
impl PornhubThumbPolicy {
fn is_allowed_thumb_url(url: &str) -> bool {
let Some(url) = Url::parse(url).ok() else { return false; };
if url.scheme() != "https" {
return false;
}
let Some(host) = url.host_str() else { return false; };
// Only allow the specific Pornhub CDN host used for thumbnails
host.eq_ignore_ascii_case("pix-cdn77.phncdn.com")
}
}
#[async_trait]
impl Provider for PornhubProvider {
async fn get_videos(
&self,
cache: VideoCache,
pool: DbPool,
sort: String,
query: Option<String>,
page: String,
per_page: String,
options: ServerOptions,
) -> Vec<VideoItem> {
let _ = pool;
let _ = per_page;
let page = page.parse::<u8>().unwrap_or(1);
let sort = Self::normalize_sort(&sort).to_string();
match self
.fetch_listing(cache, page, &sort, query.as_deref(), options)
.await
{
Ok(items) => items,
Err(error) => {
report_provider_error(CHANNEL_ID, "get_videos", &error.to_string()).await;
vec![]
}
}
}
fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
Some(self.build_channel(clientversion))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parses_creator_queries() {
let provider = PornhubProvider::new();
let target = provider.parse_query_target("channels:Brazzers")
.expect("channel target should parse");
assert!(matches!(target.kind, QueryTargetKind::Channel));
assert_eq!(target.slug, "brazzers");
let target = provider.parse_query_target("pornstar:Alex Mack")
.expect("pornstar target should parse");
assert!(matches!(target.kind, QueryTargetKind::Pornstar));
assert_eq!(target.slug, "alex-mack");
assert!(provider.parse_query_target("teacher").is_none());
}
#[test]
fn parses_browse_listing_cards() {
let provider = PornhubProvider::new();
let html = r#"
<ul id="videoCategory" class="nf-videos videos search-video-thumbs">
<li class="sniperModeEngaged"></li>
<li class="pcVideoListItem js-pop videoblock videoBox withKebabMenu"
data-video-id="466705435"
data-video-vkey="67ed937c986b1">
<a href="/view_video.php?viewkey=67ed937c986b1" title="Black asian teen"></a>
<img src="https://example.com/thumb.jpg"
data-mediabook="https://example.com/preview.webm" />
<div class="marker-overlays"><var class="duration">12:18</var></div>
<div class="videoUploaderBlock">
<div class="usernameWrap">
<a href="/model/honeycore">Honeycore</a>
</div>
</div>
<div class="videoDetailsBlock">
<span class="views"><var>199K</var> views</span>
</div>
</li>
</ul>
"#;
let items = provider
.parse_listing_page(html, ListingScope::Browse)
.expect("browse listing should parse");
assert_eq!(items.len(), 1);
assert_eq!(items[0].id, "67ed937c986b1");
assert_eq!(items[0].uploader.as_deref(), Some("Honeycore"));
assert_eq!(
items[0].uploaderUrl.as_deref(),
Some("https://www.pornhub.com/model/honeycore")
);
assert_eq!(items[0].views, Some(199000));
assert_eq!(items[0].duration, 738);
assert_eq!(
items[0].preview.as_deref(),
Some("https://example.com/preview.webm")
);
assert!(items[0]
.tags
.as_ref()
.is_some_and(|values| values.iter().any(|value| value == "@model:honeycore")));
}
#[test]
fn applies_detail_video_metadata() {
let provider = PornhubProvider::new();
let item = VideoItem::new(
"69cfa159b1377".to_string(),
"placeholder".to_string(),
"https://www.pornhub.com/view_video.php?viewkey=69cfa159b1377".to_string(),
CHANNEL_ID.to_string(),
"https://example.com/thumb.jpg".to_string(),
0,
);
let html = r#"
<script>
var flashvars_482929735 = {
"video_title":"Brazzers Detail Title",
"image_url":"https://example.com/detail.jpg",
"video_duration":"930",
"link_url":"https://www.pornhub.com/view_video.php?viewkey=69cfa159b1377",
"mediaDefinitions":[
{"format":"hls","quality":"240","videoUrl":"https://cdn.example.com/master-240.m3u8"},
{"format":"hls","quality":"720","videoUrl":"https://cdn.example.com/master-720.m3u8"},
{"format":"mp4","quality":"720","videoUrl":"https://cdn.example.com/video.mp4","remote":true}
]
};
</script>
<script type="application/ld+json">
{
"@type":"VideoObject",
"thumbnailUrl":"https://example.com/ld-thumb.jpg",
"uploadDate":"2026-04-03T00:00:00+00:00",
"interactionCount":"5700",
"author":{"name":"Brazzers"}
}
</script>
<div class="categoriesWrapper">
<a class="item" href="/categories/big-tits">Big Tits</a>
</div>
<div class="tagsWrapper">
<a class="item" href="/video/search?search=maid">Maid</a>
</div>
"#;
let item = provider
.apply_detail_video(item, html)
.expect("detail page should enrich item");
assert_eq!(item.title, "Brazzers Detail Title");
assert_eq!(item.thumb, "https://example.com/ld-thumb.jpg");
assert_eq!(item.duration, 930);
assert_eq!(item.views, Some(5700));
assert_eq!(item.uploader.as_deref(), Some("Brazzers"));
assert!(item.uploadedAt.is_some());
assert_eq!(item.formats.as_ref().map(|values| values.len()), Some(2));
assert!(item.tags.as_ref().is_some_and(|values| values
.iter()
.any(|value| value == "Big Tits")));
assert!(item.tags.as_ref().is_some_and(|values| values
.iter()
.any(|value| value == "Maid")));
}
}