1096 lines
38 KiB
Rust
1096 lines
38 KiB
Rust
use crate::DbPool;
|
|
use crate::api::ClientVersion;
|
|
use crate::providers::{
|
|
Provider, report_provider_error, report_provider_error_background, requester_or_default,
|
|
build_proxy_url, strip_url_scheme,
|
|
};
|
|
use crate::status::*;
|
|
use crate::util::cache::VideoCache;
|
|
use crate::util::parse_abbreviated_number;
|
|
use crate::util::time::parse_time_to_seconds;
|
|
use crate::videos::{ServerOptions, VideoFormat, VideoItem};
|
|
|
|
use async_trait::async_trait;
|
|
use chrono::{DateTime, NaiveDate, Utc};
|
|
use error_chain::error_chain;
|
|
use futures::stream::{self, StreamExt};
|
|
use htmlentity::entity::{ICodedDataTrait, decode};
|
|
use regex::Regex;
|
|
use scraper::{ElementRef, Html, Selector};
|
|
use serde_json::Value;
|
|
use std::collections::HashSet;
|
|
use std::collections::HashMap;
|
|
use std::sync::{Arc, RwLock};
|
|
use std::thread;
|
|
use url::Url;
|
|
|
|
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
|
|
crate::providers::ProviderChannelMetadata {
|
|
group_id: "mainstream-tube",
|
|
tags: &["mainstream", "studio", "general"],
|
|
};
|
|
|
|
const BASE_URL: &str = "https://www.pornhub.com";
|
|
const CHANNEL_ID: &str = "pornhub";
|
|
const DETAIL_ENRICH_LIMIT: usize = 12;
|
|
|
|
error_chain! {
|
|
foreign_links {
|
|
Io(std::io::Error);
|
|
HttpRequest(wreq::Error);
|
|
Json(serde_json::Error);
|
|
Url(url::ParseError);
|
|
}
|
|
errors {
|
|
Parse(msg: String) {
|
|
description("parse error")
|
|
display("parse error: {}", msg)
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct PornhubProvider {
|
|
url: String,
|
|
tag_map: Arc<RwLock<HashMap<String, TagInfo>>>,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Copy)]
|
|
enum ListingScope {
|
|
Browse,
|
|
Search,
|
|
Creator,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Copy)]
|
|
enum QueryTargetKind {
|
|
Channel,
|
|
Pornstar,
|
|
Model,
|
|
User,
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
struct QueryTarget {
|
|
kind: QueryTargetKind,
|
|
slug: String,
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
struct TagInfo {
|
|
kind: QueryTargetKind,
|
|
slug: String,
|
|
title: String,
|
|
}
|
|
|
|
impl QueryTargetKind {
|
|
fn path_segment(self) -> &'static str {
|
|
match self {
|
|
Self::Channel => "channels",
|
|
Self::Pornstar => "pornstar",
|
|
Self::Model => "model",
|
|
Self::User => "users",
|
|
}
|
|
}
|
|
}
|
|
|
|
impl PornhubProvider {
|
|
pub fn new() -> Self {
|
|
let provider = Self {
|
|
url: BASE_URL.to_string(),
|
|
tag_map: Arc::new(RwLock::new(HashMap::new())),
|
|
};
|
|
provider.spawn_initial_load();
|
|
provider
|
|
}
|
|
|
|
fn spawn_initial_load(&self) {
|
|
let url = self.url.clone();
|
|
let tag_map = Arc::clone(&self.tag_map);
|
|
thread::spawn(move || {
|
|
let rt = tokio::runtime::Builder::new_current_thread()
|
|
.enable_all()
|
|
.build()
|
|
.unwrap();
|
|
rt.block_on(async {
|
|
let _ = Self::load_tags(&url, tag_map).await;
|
|
});
|
|
});
|
|
}
|
|
|
|
async fn load_tags(base_url: &str, tag_map: Arc<RwLock<HashMap<String, TagInfo>>>) -> Result<()> {
|
|
Self::load_kind(base_url, "channel", QueryTargetKind::Channel, &tag_map).await?;
|
|
Self::load_kind(base_url, "pornstar", QueryTargetKind::Pornstar, &tag_map).await?;
|
|
Self::load_kind(base_url, "model", QueryTargetKind::Model, &tag_map).await?;
|
|
Self::load_kind(base_url, "user", QueryTargetKind::User, &tag_map).await?;
|
|
Ok(())
|
|
}
|
|
|
|
async fn load_kind(base_url: &str, path_segment: &str, kind: QueryTargetKind, tag_map: &Arc<RwLock<HashMap<String, TagInfo>>>) -> Result<()> {
|
|
let url = format!("{}/{}/top", base_url, path_segment);
|
|
let mut requester = crate::util::requester::Requester::new();
|
|
let body = requester.get(&url, None).await.map_err(|e| Error::from(ErrorKind::Parse(format!("http request failed: {e}"))))?;
|
|
let document = Html::parse_document(&body);
|
|
let selector = Self::selector(&format!("a[href^='/{}/']", path_segment))?;
|
|
for element in document.select(&selector) {
|
|
if let Some(href) = element.attr("href") {
|
|
if let Some(slug) = Self::slug_from_url(href, path_segment) {
|
|
let title = element.text().collect::<String>().trim().to_string();
|
|
if !title.is_empty() && !slug.is_empty() {
|
|
let info = TagInfo {
|
|
kind,
|
|
slug: slug.clone(),
|
|
title: title.clone(),
|
|
};
|
|
let mut map = tag_map.write().unwrap();
|
|
map.insert(title.to_ascii_lowercase(), info.clone());
|
|
map.insert(slug.to_ascii_lowercase(), info);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
fn slug_from_url(url: &str, path_segment: &str) -> Option<String> {
|
|
let parsed = Url::parse(url).ok()?;
|
|
let mut segments = parsed.path_segments()?;
|
|
if segments.next() == Some(path_segment) {
|
|
segments.next().map(|s| s.to_string())
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
|
|
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
|
|
Channel {
|
|
id: CHANNEL_ID.to_string(),
|
|
name: "Pornhub".to_string(),
|
|
description: "Pornhub listings with creator queries and direct HLS playback links."
|
|
.to_string(),
|
|
premium: false,
|
|
favicon: "https://www.google.com/s2/favicons?sz=64&domain=pornhub.com".to_string(),
|
|
status: "active".to_string(),
|
|
categories: vec![],
|
|
options: vec![ChannelOption {
|
|
id: "sort".to_string(),
|
|
title: "Sort".to_string(),
|
|
description: "Browse Pornhub charts by sort order.".to_string(),
|
|
systemImage: "list.number".to_string(),
|
|
colorName: "blue".to_string(),
|
|
options: vec![
|
|
FilterOption {
|
|
id: "mr".to_string(),
|
|
title: "Featured Recently".to_string(),
|
|
},
|
|
FilterOption {
|
|
id: "mv".to_string(),
|
|
title: "Most Viewed".to_string(),
|
|
},
|
|
FilterOption {
|
|
id: "tr".to_string(),
|
|
title: "Top Rated".to_string(),
|
|
},
|
|
FilterOption {
|
|
id: "ht".to_string(),
|
|
title: "Hottest".to_string(),
|
|
},
|
|
FilterOption {
|
|
id: "lg".to_string(),
|
|
title: "Longest".to_string(),
|
|
},
|
|
FilterOption {
|
|
id: "cm".to_string(),
|
|
title: "Newest".to_string(),
|
|
},
|
|
],
|
|
multiSelect: false,
|
|
}],
|
|
nsfw: true,
|
|
cacheDuration: Some(1800),
|
|
}
|
|
}
|
|
|
|
fn selector(value: &str) -> Result<Selector> {
|
|
Selector::parse(value)
|
|
.map_err(|error| Error::from(ErrorKind::Parse(format!("selector parse failed for {value}: {error}"))))
|
|
}
|
|
|
|
fn regex(value: &str) -> Result<Regex> {
|
|
Regex::new(value)
|
|
.map_err(|error| Error::from(ErrorKind::Parse(format!("regex parse failed for {value}: {error}"))))
|
|
}
|
|
|
|
fn text_of(element: &ElementRef<'_>) -> String {
|
|
element
|
|
.text()
|
|
.collect::<Vec<_>>()
|
|
.join(" ")
|
|
.split_whitespace()
|
|
.collect::<Vec<_>>()
|
|
.join(" ")
|
|
}
|
|
|
|
fn decode_html(value: &str) -> String {
|
|
decode(value.as_bytes())
|
|
.to_string()
|
|
.unwrap_or_else(|_| value.to_string())
|
|
}
|
|
|
|
fn normalize_url(&self, value: &str) -> String {
|
|
let trimmed = value.trim();
|
|
if trimmed.is_empty() {
|
|
return String::new();
|
|
}
|
|
if let Ok(url) = Url::parse(trimmed) {
|
|
return url.to_string();
|
|
}
|
|
Url::parse(BASE_URL)
|
|
.and_then(|base| base.join(trimmed))
|
|
.map(|value| value.to_string())
|
|
.unwrap_or_default()
|
|
}
|
|
|
|
fn normalize_sort(sort: &str) -> &'static str {
|
|
match sort.trim().to_ascii_lowercase().as_str() {
|
|
"mv" => "mv",
|
|
"tr" => "tr",
|
|
"ht" => "ht",
|
|
"lg" => "lg",
|
|
"cm" => "cm",
|
|
"mr" | "new" => "mr",
|
|
value if value.contains("date") => "mr",
|
|
_ => "mr",
|
|
}
|
|
}
|
|
|
|
fn parse_query_target(&self, query: &str) -> Option<QueryTarget> {
|
|
let normalized = query.trim().to_ascii_lowercase();
|
|
if let Some(info) = self.tag_map.read().unwrap().get(&normalized) {
|
|
return Some(QueryTarget { kind: info.kind, slug: info.slug.clone() });
|
|
}
|
|
// Fallback to kind:slug without @
|
|
let trimmed = query.trim();
|
|
let (kind_str, raw_slug) = trimmed.split_once(':')?;
|
|
let slug = raw_slug
|
|
.trim()
|
|
.trim_matches('/')
|
|
.replace(' ', "-")
|
|
.to_ascii_lowercase();
|
|
if slug.is_empty() {
|
|
return None;
|
|
}
|
|
|
|
let kind = match kind_str.trim().to_ascii_lowercase().as_str() {
|
|
"channel" | "channels" => QueryTargetKind::Channel,
|
|
"pornstar" | "pornstars" => QueryTargetKind::Pornstar,
|
|
"model" | "models" => QueryTargetKind::Model,
|
|
"user" | "users" => QueryTargetKind::User,
|
|
_ => return None,
|
|
};
|
|
|
|
Some(QueryTarget { kind, slug })
|
|
}
|
|
|
|
fn build_browse_url(&self, page: u8, sort: &str) -> String {
|
|
let order = Self::normalize_sort(sort);
|
|
if order == "mr" {
|
|
format!("{}/video?page={page}", self.url)
|
|
} else {
|
|
format!("{}/video?o={order}&page={page}", self.url)
|
|
}
|
|
}
|
|
|
|
fn build_creator_url(&self, page: u8, sort: &str, target: &QueryTarget) -> String {
|
|
let mut url = format!(
|
|
"{}/{}/{}/videos?page={page}",
|
|
self.url,
|
|
target.kind.path_segment(),
|
|
target.slug
|
|
);
|
|
|
|
let mapped_sort = match target.kind {
|
|
QueryTargetKind::Channel => match Self::normalize_sort(sort) {
|
|
"mv" => Some("vi"),
|
|
"tr" => Some("ra"),
|
|
_ => None,
|
|
},
|
|
_ => match Self::normalize_sort(sort) {
|
|
"mv" => Some("mv"),
|
|
"tr" => Some("tr"),
|
|
"lg" => Some("lg"),
|
|
_ => None,
|
|
},
|
|
};
|
|
|
|
if let Some(order) = mapped_sort {
|
|
url.push_str("&o=");
|
|
url.push_str(order);
|
|
}
|
|
|
|
url
|
|
}
|
|
|
|
fn build_listing_request(&self, page: u8, sort: &str, query: Option<&str>) -> (String, ListingScope) {
|
|
match query.map(str::trim).filter(|value| !value.is_empty()) {
|
|
Some(query) => {
|
|
if let Some(target) = self.parse_query_target(query) {
|
|
(self.build_creator_url(page, sort, &target), ListingScope::Creator)
|
|
} else {
|
|
let encoded = query.to_ascii_lowercase().replace(' ', "+");
|
|
(
|
|
format!("{}/video/search?search={encoded}&page={page}", self.url),
|
|
ListingScope::Search,
|
|
)
|
|
}
|
|
}
|
|
None => (self.build_browse_url(page, sort), ListingScope::Browse),
|
|
}
|
|
}
|
|
|
|
fn parse_listing_page(&self, html: &str, scope: ListingScope) -> Result<Vec<VideoItem>> {
|
|
let document = Html::parse_document(html);
|
|
let item_selector = Self::selector("li.pcVideoListItem")?;
|
|
|
|
let container_selectors = match scope {
|
|
ListingScope::Browse => vec!["#videoCategory"],
|
|
ListingScope::Search => vec!["#videoSearchResult"],
|
|
ListingScope::Creator => vec!["#showAllChanelVideos", "#mostRecentVideosSection"],
|
|
};
|
|
|
|
for selector_text in container_selectors {
|
|
let container_selector = Self::selector(selector_text)?;
|
|
if let Some(container) = document.select(&container_selector).next() {
|
|
if container.select(&item_selector).next().is_some() {
|
|
return self.parse_listing_items(container);
|
|
}
|
|
}
|
|
}
|
|
|
|
Err(ErrorKind::Parse(format!("missing listing container for scope {scope:?}")).into())
|
|
}
|
|
|
|
fn parse_listing_items(&self, container: ElementRef<'_>) -> Result<Vec<VideoItem>> {
|
|
let item_selector = Self::selector("li.pcVideoListItem")?;
|
|
let link_selector = Self::selector("a[href*=\"/view_video.php\"]")?;
|
|
let title_selector = Self::selector(".title a, .thumbnailTitle, span.title a")?;
|
|
let image_selector = Self::selector("img")?;
|
|
let duration_selector = Self::selector(".duration")?;
|
|
let views_selector = Self::selector(".views var")?;
|
|
let uploader_selector = Self::selector(
|
|
".videoUploaderBlock a[href], .usernameWrap a[href], .usernameWrapper a[href]",
|
|
)?;
|
|
let verified_selector = Self::selector(".verified-icon, .channel-icon")?;
|
|
|
|
let mut items = Vec::new();
|
|
let mut seen_ids = HashSet::new();
|
|
|
|
for card in container.select(&item_selector) {
|
|
let Some(link) = card.select(&link_selector).next() else {
|
|
continue;
|
|
};
|
|
|
|
let href = link.value().attr("href").unwrap_or_default();
|
|
let page_url = self.normalize_url(href);
|
|
if page_url.is_empty() || !page_url.contains("/view_video.php") {
|
|
continue;
|
|
}
|
|
|
|
let id = card
|
|
.value()
|
|
.attr("data-video-vkey")
|
|
.filter(|value| !value.trim().is_empty())
|
|
.map(ToOwned::to_owned)
|
|
.or_else(|| Self::viewkey_from_url(&page_url))
|
|
.or_else(|| {
|
|
card.value()
|
|
.attr("data-video-id")
|
|
.filter(|value| !value.trim().is_empty())
|
|
.map(ToOwned::to_owned)
|
|
});
|
|
let Some(id) = id else {
|
|
continue;
|
|
};
|
|
if !seen_ids.insert(id.clone()) {
|
|
continue;
|
|
}
|
|
|
|
let title = link
|
|
.value()
|
|
.attr("title")
|
|
.filter(|value| !value.trim().is_empty())
|
|
.map(Self::decode_html)
|
|
.or_else(|| {
|
|
card.select(&title_selector)
|
|
.next()
|
|
.map(|value| Self::decode_html(&Self::text_of(&value)))
|
|
})
|
|
.unwrap_or_default();
|
|
if title.is_empty() {
|
|
continue;
|
|
}
|
|
|
|
let image = card.select(&image_selector).next();
|
|
let thumb = image
|
|
.as_ref()
|
|
.and_then(|value| {
|
|
value
|
|
.value()
|
|
.attr("src")
|
|
.or_else(|| value.value().attr("data-mediumthumb"))
|
|
})
|
|
.map(|value| self.normalize_url(value))
|
|
.unwrap_or_default();
|
|
|
|
let duration = card
|
|
.select(&duration_selector)
|
|
.next()
|
|
.map(|value| Self::text_of(&value))
|
|
.and_then(|value| parse_time_to_seconds(&value))
|
|
.unwrap_or(0) as u32;
|
|
|
|
let views = card
|
|
.select(&views_selector)
|
|
.next()
|
|
.and_then(|value| parse_abbreviated_number(&Self::text_of(&value)));
|
|
|
|
let uploader_link = card.select(&uploader_selector).next();
|
|
let uploader = uploader_link
|
|
.as_ref()
|
|
.map(|value| Self::decode_html(&Self::text_of(value)))
|
|
.filter(|value| !value.is_empty());
|
|
let uploader_url = uploader_link
|
|
.and_then(|value| value.value().attr("href"))
|
|
.map(|value| self.normalize_url(value))
|
|
.filter(|value| !value.is_empty());
|
|
|
|
let mut item = VideoItem::new(
|
|
id,
|
|
title,
|
|
page_url,
|
|
CHANNEL_ID.to_string(),
|
|
thumb,
|
|
duration,
|
|
);
|
|
item.views = views;
|
|
item.preview = image
|
|
.and_then(|value| value.value().attr("data-mediabook"))
|
|
.map(|value| self.normalize_url(value))
|
|
.filter(|value| !value.is_empty());
|
|
item.verified = card
|
|
.select(&verified_selector)
|
|
.next()
|
|
.map(|_| true);
|
|
item.uploader = uploader.clone();
|
|
item.uploaderUrl = uploader_url.clone();
|
|
item.uploaderId = uploader_url
|
|
.as_deref()
|
|
.and_then(Self::uploader_identity_from_url);
|
|
|
|
let mut tags = Vec::new();
|
|
if let Some(tag) = uploader_url
|
|
.as_deref()
|
|
.and_then(|url| self.query_tag_from_uploader_url(url))
|
|
{
|
|
tags.push(tag);
|
|
}
|
|
if !tags.is_empty() {
|
|
item.tags = Some(tags);
|
|
}
|
|
|
|
items.push(item);
|
|
}
|
|
|
|
Ok(items)
|
|
}
|
|
|
|
fn viewkey_from_url(url: &str) -> Option<String> {
|
|
let parsed = Url::parse(url).ok()?;
|
|
parsed
|
|
.query_pairs()
|
|
.find(|(key, _)| key == "viewkey")
|
|
.map(|(_, value)| value.into_owned())
|
|
}
|
|
|
|
fn uploader_identity_from_url(url: &str) -> Option<String> {
|
|
let parsed = Url::parse(url).ok()?;
|
|
let mut segments = parsed.path_segments()?;
|
|
let kind = segments.next()?.trim_matches('/');
|
|
let slug = segments.next()?.trim_matches('/');
|
|
if kind.is_empty() || slug.is_empty() {
|
|
return None;
|
|
}
|
|
Some(format!("{CHANNEL_ID}:{kind}:{slug}"))
|
|
}
|
|
|
|
fn query_tag_from_uploader_url(&self, url: &str) -> Option<String> {
|
|
let parsed = Url::parse(url).ok()?;
|
|
let mut segments = parsed.path_segments()?;
|
|
let kind_str = segments.next()?.trim_matches('/');
|
|
let slug = segments.next()?.trim_matches('/');
|
|
if kind_str.is_empty() || slug.is_empty() {
|
|
return None;
|
|
}
|
|
let normalized_slug = slug.to_ascii_lowercase();
|
|
if let Some(info) = self.tag_map.read().unwrap().get(&normalized_slug) {
|
|
return Some(info.title.clone());
|
|
}
|
|
Some(slug.replace('-', " "))
|
|
}
|
|
|
|
fn push_unique(values: &mut Vec<String>, value: String) {
|
|
let normalized = value.trim();
|
|
if normalized.is_empty() {
|
|
return;
|
|
}
|
|
if values.iter().any(|existing| existing.eq_ignore_ascii_case(normalized)) {
|
|
return;
|
|
}
|
|
values.push(normalized.to_string());
|
|
}
|
|
|
|
fn collect_named_links(&self, document: &Html, selector_text: &str) -> Result<Vec<String>> {
|
|
let selector = Self::selector(selector_text)?;
|
|
let mut values = Vec::new();
|
|
for element in document.select(&selector) {
|
|
Self::push_unique(&mut values, Self::decode_html(&Self::text_of(&element)));
|
|
}
|
|
Ok(values)
|
|
}
|
|
|
|
fn parse_upload_date(value: &str) -> Option<u64> {
|
|
if let Ok(parsed) = DateTime::parse_from_rfc3339(value.trim()) {
|
|
return Some(parsed.timestamp() as u64);
|
|
}
|
|
|
|
NaiveDate::parse_from_str(value.trim(), "%Y-%m-%d")
|
|
.ok()
|
|
.and_then(|date| date.and_hms_opt(0, 0, 0))
|
|
.map(|date| DateTime::<Utc>::from_naive_utc_and_offset(date, Utc).timestamp() as u64)
|
|
}
|
|
|
|
fn json_string(value: Option<&Value>) -> Option<String> {
|
|
value.and_then(|value| match value {
|
|
Value::String(value) => Some(value.to_string()),
|
|
Value::Number(value) => Some(value.to_string()),
|
|
_ => None,
|
|
})
|
|
}
|
|
|
|
fn json_u32(value: Option<&Value>) -> Option<u32> {
|
|
match value {
|
|
Some(Value::Number(value)) => value.as_u64().and_then(|value| u32::try_from(value).ok()),
|
|
Some(Value::String(value)) => value.parse::<u32>().ok(),
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
fn extract_flashvars(&self, html: &str) -> Result<Option<Value>> {
|
|
let regex = Self::regex(r#"(?s)var\s+flashvars_\d+\s*=\s*(\{.*?\});"#)?;
|
|
let Some(raw) = regex
|
|
.captures(html)
|
|
.and_then(|captures| captures.get(1))
|
|
.map(|value| value.as_str())
|
|
else {
|
|
return Ok(None);
|
|
};
|
|
|
|
Ok(Some(serde_json::from_str::<Value>(raw)?))
|
|
}
|
|
|
|
fn extract_ld_video_object(&self, document: &Html) -> Result<Option<Value>> {
|
|
let script_selector = Self::selector("script[type=\"application/ld+json\"]")?;
|
|
for script in document.select(&script_selector) {
|
|
let raw = script.inner_html();
|
|
let Ok(value) = serde_json::from_str::<Value>(&raw) else {
|
|
continue;
|
|
};
|
|
|
|
if Self::is_video_object(&value) {
|
|
return Ok(Some(value));
|
|
}
|
|
|
|
if let Some(array) = value.as_array() {
|
|
for entry in array {
|
|
if Self::is_video_object(entry) {
|
|
return Ok(Some(entry.clone()));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
Ok(None)
|
|
}
|
|
|
|
fn is_video_object(value: &Value) -> bool {
|
|
value
|
|
.get("@type")
|
|
.and_then(|value| value.as_str())
|
|
.is_some_and(|value| value.eq_ignore_ascii_case("VideoObject"))
|
|
}
|
|
|
|
fn build_formats_from_flashvars(&self, flashvars: &Value) -> Vec<VideoFormat> {
|
|
let mut entries = flashvars
|
|
.get("mediaDefinitions")
|
|
.and_then(|value| value.as_array())
|
|
.into_iter()
|
|
.flatten()
|
|
.filter_map(|entry| {
|
|
let format = entry
|
|
.get("format")
|
|
.and_then(|value| value.as_str())
|
|
.unwrap_or_default()
|
|
.to_ascii_lowercase();
|
|
if format != "hls" {
|
|
return None;
|
|
}
|
|
|
|
let url = entry
|
|
.get("videoUrl")
|
|
.and_then(|value| value.as_str())
|
|
.map(|value| self.normalize_url(value))
|
|
.filter(|value| !value.is_empty())?;
|
|
|
|
let quality = entry
|
|
.get("quality")
|
|
.and_then(|value| value.as_str())
|
|
.unwrap_or("auto");
|
|
let label = match quality {
|
|
"auto" => "auto".to_string(),
|
|
value if value.ends_with('p') => value.to_string(),
|
|
value => format!("{value}p"),
|
|
};
|
|
|
|
let rank = if label == "auto" {
|
|
0
|
|
} else {
|
|
label
|
|
.trim_end_matches('p')
|
|
.parse::<u32>()
|
|
.unwrap_or(0)
|
|
};
|
|
|
|
Some((rank, label, url))
|
|
})
|
|
.collect::<Vec<_>>();
|
|
|
|
entries.sort_by_key(|(rank, _, _)| *rank);
|
|
entries.dedup_by(|a, b| a.2 == b.2);
|
|
|
|
entries
|
|
.into_iter()
|
|
.map(|(_, label, url)| {
|
|
VideoFormat::new(url, label.clone(), "m3u8".to_string())
|
|
.format_id(label.clone())
|
|
.format_note(label)
|
|
})
|
|
.collect()
|
|
}
|
|
|
|
fn apply_detail_video(&self, mut item: VideoItem, html: &str) -> Result<VideoItem> {
|
|
let document = Html::parse_document(html);
|
|
|
|
if let Some(flashvars) = self.extract_flashvars(html)? {
|
|
if let Some(title) = Self::json_string(flashvars.get("video_title")) {
|
|
let decoded = Self::decode_html(&title);
|
|
if !decoded.is_empty() {
|
|
item.title = decoded;
|
|
}
|
|
}
|
|
|
|
if let Some(thumb) = Self::json_string(flashvars.get("image_url")) {
|
|
let normalized = self.normalize_url(&thumb);
|
|
if !normalized.is_empty() {
|
|
item.thumb = normalized;
|
|
}
|
|
}
|
|
|
|
if let Some(duration) = Self::json_u32(flashvars.get("video_duration")) {
|
|
item.duration = duration;
|
|
}
|
|
|
|
if let Some(link_url) = Self::json_string(flashvars.get("link_url")) {
|
|
let normalized = self.normalize_url(&link_url);
|
|
if !normalized.is_empty() {
|
|
item.url = normalized;
|
|
}
|
|
}
|
|
}
|
|
|
|
if let Some(ld_video) = self.extract_ld_video_object(&document)? {
|
|
if let Some(thumb) = ld_video
|
|
.get("thumbnailUrl")
|
|
.and_then(|value| match value {
|
|
Value::String(value) => Some(value.to_string()),
|
|
Value::Array(values) => values
|
|
.iter()
|
|
.find_map(|entry| entry.as_str().map(ToOwned::to_owned)),
|
|
_ => None,
|
|
})
|
|
{
|
|
let normalized = self.normalize_url(&thumb);
|
|
if !normalized.is_empty() {
|
|
item.thumb = normalized;
|
|
}
|
|
}
|
|
|
|
if let Some(uploaded_at) = ld_video
|
|
.get("uploadDate")
|
|
.and_then(|value| value.as_str())
|
|
.and_then(Self::parse_upload_date)
|
|
{
|
|
item.uploadedAt = Some(uploaded_at);
|
|
}
|
|
|
|
if item.views.is_none() {
|
|
item.views = Self::json_string(ld_video.get("interactionCount"))
|
|
.and_then(|value| value.parse::<u32>().ok());
|
|
}
|
|
|
|
if item.uploader.is_none() {
|
|
item.uploader = ld_video
|
|
.get("author")
|
|
.and_then(|value| match value {
|
|
Value::String(value) => Some(value.to_string()),
|
|
Value::Object(values) => values
|
|
.get("name")
|
|
.and_then(|value| value.as_str())
|
|
.map(ToOwned::to_owned),
|
|
_ => None,
|
|
})
|
|
.filter(|value| !value.trim().is_empty());
|
|
}
|
|
}
|
|
|
|
let mut tags = item.tags.clone().unwrap_or_default();
|
|
for value in self.collect_named_links(
|
|
&document,
|
|
".categoriesWrapper a.item, .categoriesWrapper a[href*=\"/categories/\"]",
|
|
)? {
|
|
Self::push_unique(&mut tags, value);
|
|
}
|
|
for value in self.collect_named_links(
|
|
&document,
|
|
".tagsWrapper a.item, .tagsWrapper a[href*=\"/video/search\"]",
|
|
)? {
|
|
Self::push_unique(&mut tags, value);
|
|
}
|
|
for value in self.collect_named_links(
|
|
&document,
|
|
".pornstarsWrapper a.item, .pornstarsWrapper a[href*=\"/pornstar/\"], a[href*=\"/pornstar/\"]",
|
|
)? {
|
|
Self::push_unique(&mut tags, value);
|
|
}
|
|
for value in self.collect_named_links(
|
|
&document,
|
|
".modelsWrapper a.item, .modelsWrapper a[href*=\"/model/\"], a[href*=\"/model/\"]",
|
|
)? {
|
|
Self::push_unique(&mut tags, value);
|
|
}
|
|
if !tags.is_empty() {
|
|
item.tags = Some(tags);
|
|
}
|
|
|
|
Ok(item)
|
|
}
|
|
|
|
async fn enrich_listing_items(&self, items: Vec<VideoItem>, options: &ServerOptions) -> Vec<VideoItem> {
|
|
let requester = requester_or_default(options, CHANNEL_ID, "enrich_listing_items.requester");
|
|
let mut enriched = stream::iter(items.into_iter().enumerate().map(|(index, item)| {
|
|
let provider = self.clone();
|
|
let requester = requester.clone();
|
|
async move {
|
|
if index >= DETAIL_ENRICH_LIMIT || item.url.is_empty() {
|
|
return (index, item);
|
|
}
|
|
|
|
let fallback = item.clone();
|
|
let enriched = match provider.fetch_detail(item, requester).await {
|
|
Ok(value) => value,
|
|
Err(error) => {
|
|
report_provider_error_background(
|
|
CHANNEL_ID,
|
|
"enrich_listing_items.detail",
|
|
&format!("url={}; error={error}", fallback.url),
|
|
);
|
|
fallback
|
|
}
|
|
};
|
|
(index, enriched)
|
|
}
|
|
}))
|
|
.buffer_unordered(4)
|
|
.collect::<Vec<_>>()
|
|
.await;
|
|
|
|
enriched.sort_by_key(|(index, _)| *index);
|
|
enriched.into_iter().map(|(_, item)| item).collect()
|
|
}
|
|
|
|
async fn fetch_detail(&self, item: VideoItem, mut requester: crate::util::requester::Requester) -> Result<VideoItem> {
|
|
let html = requester
|
|
.get(&item.url, None)
|
|
.await
|
|
.map_err(|error| ErrorKind::Parse(format!("detail request failed: {error}")))?;
|
|
self.apply_detail_video(item, &html)
|
|
}
|
|
|
|
async fn fetch_listing(
|
|
&self,
|
|
cache: VideoCache,
|
|
page: u8,
|
|
sort: &str,
|
|
query: Option<&str>,
|
|
options: ServerOptions,
|
|
) -> Result<Vec<VideoItem>> {
|
|
let (video_url, scope) = self.build_listing_request(page, sort, query);
|
|
let old_items = match cache.get(&video_url) {
|
|
Some((time, items)) if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 => {
|
|
return Ok(items.clone());
|
|
}
|
|
Some((_, items)) => items.clone(),
|
|
None => vec![],
|
|
};
|
|
|
|
let mut requester = requester_or_default(&options, CHANNEL_ID, "fetch_listing.requester");
|
|
let text = match requester.get(&video_url, None).await {
|
|
Ok(text) => text,
|
|
Err(error) => {
|
|
report_provider_error(
|
|
CHANNEL_ID,
|
|
"fetch_listing.request",
|
|
&format!("url={video_url}; error={error}"),
|
|
)
|
|
.await;
|
|
return Ok(old_items);
|
|
}
|
|
};
|
|
|
|
let items = match self.parse_listing_page(&text, scope) {
|
|
Ok(items) => items,
|
|
Err(error) => {
|
|
report_provider_error(
|
|
CHANNEL_ID,
|
|
"fetch_listing.parse",
|
|
&format!("url={video_url}; error={error}"),
|
|
)
|
|
.await;
|
|
return Ok(old_items);
|
|
}
|
|
};
|
|
|
|
if items.is_empty() {
|
|
return Ok(old_items);
|
|
}
|
|
|
|
let mut items = self.enrich_listing_items(items, &options).await;
|
|
|
|
// Rewrite thumbs and previews to use the proxy when appropriate
|
|
for item in items.iter_mut() {
|
|
let proxied = self.proxied_thumb(&options, &item.thumb);
|
|
if !proxied.is_empty() {
|
|
item.thumb = proxied;
|
|
}
|
|
if let Some(prev) = item.preview.clone() {
|
|
let proxied_prev = self.proxied_thumb(&options, &prev);
|
|
if !proxied_prev.is_empty() {
|
|
item.preview = Some(proxied_prev);
|
|
}
|
|
}
|
|
}
|
|
|
|
cache.remove(&video_url);
|
|
cache.insert(video_url, items.clone());
|
|
Ok(items)
|
|
}
|
|
}
|
|
|
|
impl PornhubProvider {
|
|
fn proxied_thumb(&self, options: &ServerOptions, thumb: &str) -> String {
|
|
if thumb.is_empty() {
|
|
return String::new();
|
|
}
|
|
if !PornhubThumbPolicy::is_allowed_thumb_url(thumb) {
|
|
return String::new();
|
|
}
|
|
build_proxy_url(options, "pornhub-thumb", &strip_url_scheme(thumb))
|
|
}
|
|
}
|
|
|
|
struct PornhubThumbPolicy;
|
|
|
|
impl PornhubThumbPolicy {
|
|
fn is_allowed_thumb_url(url: &str) -> bool {
|
|
let Some(url) = Url::parse(url).ok() else { return false; };
|
|
if url.scheme() != "https" {
|
|
return false;
|
|
}
|
|
let Some(host) = url.host_str() else { return false; };
|
|
// Only allow the specific Pornhub CDN host used for thumbnails
|
|
host.eq_ignore_ascii_case("pix-cdn77.phncdn.com")
|
|
}
|
|
}
|
|
|
|
#[async_trait]
|
|
impl Provider for PornhubProvider {
|
|
async fn get_videos(
|
|
&self,
|
|
cache: VideoCache,
|
|
pool: DbPool,
|
|
sort: String,
|
|
query: Option<String>,
|
|
page: String,
|
|
per_page: String,
|
|
options: ServerOptions,
|
|
) -> Vec<VideoItem> {
|
|
let _ = pool;
|
|
let _ = per_page;
|
|
|
|
let page = page.parse::<u8>().unwrap_or(1);
|
|
let sort = Self::normalize_sort(&sort).to_string();
|
|
|
|
match self
|
|
.fetch_listing(cache, page, &sort, query.as_deref(), options)
|
|
.await
|
|
{
|
|
Ok(items) => items,
|
|
Err(error) => {
|
|
report_provider_error(CHANNEL_ID, "get_videos", &error.to_string()).await;
|
|
vec![]
|
|
}
|
|
}
|
|
}
|
|
|
|
fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
|
|
Some(self.build_channel(clientversion))
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn parses_creator_queries() {
|
|
let provider = PornhubProvider::new();
|
|
let target = provider.parse_query_target("channels:Brazzers")
|
|
.expect("channel target should parse");
|
|
assert!(matches!(target.kind, QueryTargetKind::Channel));
|
|
assert_eq!(target.slug, "brazzers");
|
|
|
|
let target = provider.parse_query_target("pornstar:Alex Mack")
|
|
.expect("pornstar target should parse");
|
|
assert!(matches!(target.kind, QueryTargetKind::Pornstar));
|
|
assert_eq!(target.slug, "alex-mack");
|
|
|
|
assert!(provider.parse_query_target("teacher").is_none());
|
|
}
|
|
|
|
#[test]
|
|
fn parses_browse_listing_cards() {
|
|
let provider = PornhubProvider::new();
|
|
let html = r#"
|
|
<ul id="videoCategory" class="nf-videos videos search-video-thumbs">
|
|
<li class="sniperModeEngaged"></li>
|
|
<li class="pcVideoListItem js-pop videoblock videoBox withKebabMenu"
|
|
data-video-id="466705435"
|
|
data-video-vkey="67ed937c986b1">
|
|
<a href="/view_video.php?viewkey=67ed937c986b1" title="Black asian teen"></a>
|
|
<img src="https://example.com/thumb.jpg"
|
|
data-mediabook="https://example.com/preview.webm" />
|
|
<div class="marker-overlays"><var class="duration">12:18</var></div>
|
|
<div class="videoUploaderBlock">
|
|
<div class="usernameWrap">
|
|
<a href="/model/honeycore">Honeycore</a>
|
|
</div>
|
|
</div>
|
|
<div class="videoDetailsBlock">
|
|
<span class="views"><var>199K</var> views</span>
|
|
</div>
|
|
</li>
|
|
</ul>
|
|
"#;
|
|
|
|
let items = provider
|
|
.parse_listing_page(html, ListingScope::Browse)
|
|
.expect("browse listing should parse");
|
|
assert_eq!(items.len(), 1);
|
|
assert_eq!(items[0].id, "67ed937c986b1");
|
|
assert_eq!(items[0].uploader.as_deref(), Some("Honeycore"));
|
|
assert_eq!(
|
|
items[0].uploaderUrl.as_deref(),
|
|
Some("https://www.pornhub.com/model/honeycore")
|
|
);
|
|
assert_eq!(items[0].views, Some(199000));
|
|
assert_eq!(items[0].duration, 738);
|
|
assert_eq!(
|
|
items[0].preview.as_deref(),
|
|
Some("https://example.com/preview.webm")
|
|
);
|
|
assert!(items[0]
|
|
.tags
|
|
.as_ref()
|
|
.is_some_and(|values| values.iter().any(|value| value == "@model:honeycore")));
|
|
}
|
|
|
|
#[test]
|
|
fn applies_detail_video_metadata() {
|
|
let provider = PornhubProvider::new();
|
|
let item = VideoItem::new(
|
|
"69cfa159b1377".to_string(),
|
|
"placeholder".to_string(),
|
|
"https://www.pornhub.com/view_video.php?viewkey=69cfa159b1377".to_string(),
|
|
CHANNEL_ID.to_string(),
|
|
"https://example.com/thumb.jpg".to_string(),
|
|
0,
|
|
);
|
|
let html = r#"
|
|
<script>
|
|
var flashvars_482929735 = {
|
|
"video_title":"Brazzers Detail Title",
|
|
"image_url":"https://example.com/detail.jpg",
|
|
"video_duration":"930",
|
|
"link_url":"https://www.pornhub.com/view_video.php?viewkey=69cfa159b1377",
|
|
"mediaDefinitions":[
|
|
{"format":"hls","quality":"240","videoUrl":"https://cdn.example.com/master-240.m3u8"},
|
|
{"format":"hls","quality":"720","videoUrl":"https://cdn.example.com/master-720.m3u8"},
|
|
{"format":"mp4","quality":"720","videoUrl":"https://cdn.example.com/video.mp4","remote":true}
|
|
]
|
|
};
|
|
</script>
|
|
<script type="application/ld+json">
|
|
{
|
|
"@type":"VideoObject",
|
|
"thumbnailUrl":"https://example.com/ld-thumb.jpg",
|
|
"uploadDate":"2026-04-03T00:00:00+00:00",
|
|
"interactionCount":"5700",
|
|
"author":{"name":"Brazzers"}
|
|
}
|
|
</script>
|
|
<div class="categoriesWrapper">
|
|
<a class="item" href="/categories/big-tits">Big Tits</a>
|
|
</div>
|
|
<div class="tagsWrapper">
|
|
<a class="item" href="/video/search?search=maid">Maid</a>
|
|
</div>
|
|
"#;
|
|
|
|
let item = provider
|
|
.apply_detail_video(item, html)
|
|
.expect("detail page should enrich item");
|
|
assert_eq!(item.title, "Brazzers Detail Title");
|
|
assert_eq!(item.thumb, "https://example.com/ld-thumb.jpg");
|
|
assert_eq!(item.duration, 930);
|
|
assert_eq!(item.views, Some(5700));
|
|
assert_eq!(item.uploader.as_deref(), Some("Brazzers"));
|
|
assert!(item.uploadedAt.is_some());
|
|
assert_eq!(item.formats.as_ref().map(|values| values.len()), Some(2));
|
|
assert!(item.tags.as_ref().is_some_and(|values| values
|
|
.iter()
|
|
.any(|value| value == "Big Tits")));
|
|
assert!(item.tags.as_ref().is_some_and(|values| values
|
|
.iter()
|
|
.any(|value| value == "Maid")));
|
|
}
|
|
}
|