From e6803195411efe2a1bc01672234bf6df7ef40ea0 Mon Sep 17 00:00:00 2001 From: Simon Date: Fri, 3 Apr 2026 18:01:03 +0000 Subject: [PATCH] pornhub --- src/providers/mod.rs | 100 +++- src/providers/pornhub.rs | 1032 ++++++++++++++++++++++++++++++-------- 2 files changed, 925 insertions(+), 207 deletions(-) diff --git a/src/providers/mod.rs b/src/providers/mod.rs index 2c99d47..8cef800 100644 --- a/src/providers/mod.rs +++ b/src/providers/mod.rs @@ -11,7 +11,9 @@ use std::time::{Duration, Instant}; use crate::{ DbPool, api::ClientVersion, - status::{Channel, ChannelGroup, ChannelView, FilterOption, Status, StatusResponse}, + status::{ + Channel, ChannelGroup, ChannelOption, ChannelView, FilterOption, Status, StatusResponse, + }, uploaders::UploaderProfile, util::{cache::VideoCache, discord::send_discord_error_report, requester::Requester}, videos::{FlexibleNumber, ServerOptions, VideoItem, VideosRequest}, @@ -756,6 +758,25 @@ fn channel_group_order(group_id: &str) -> usize { } } +fn should_hide_status_option(option: &ChannelOption) -> bool { + let id = option.id.trim().to_ascii_lowercase(); + let title = option.title.trim().to_ascii_lowercase(); + + matches!(id.as_str(), "stars" | "networks" | "categories") + || title.contains("models") + || title.contains("pornstars") + || title.contains("genres") + || title.contains("networks") + || (id == "filter" && title.contains("tags")) +} + +fn sanitize_status_options(options: Vec) -> Vec { + options + .into_iter() + .filter(|option| !should_hide_status_option(option)) + .collect() +} + pub fn decorate_channel(channel: Channel) -> ChannelView { let metadata = channel_metadata_for(&channel.id); let runtime_status = current_provider_channel_status(&channel.id); @@ -770,8 +791,8 @@ pub fn decorate_channel(channel: Channel) -> ChannelView { premium: channel.premium, favicon: channel.favicon, status: runtime_status.unwrap_or(channel.status), - categories: channel.categories, - options: channel.options, + categories: vec![], + options: sanitize_status_options(channel.options), nsfw: channel.nsfw, groupKey: metadata.map(|value| value.group_id.to_string()), sortOrder: None, @@ -987,6 +1008,79 @@ mod tests { } } + #[test] + fn decorate_channel_strips_heavy_status_filters() { + let mut channel = base_channel("status-clean"); + channel.categories = vec!["Drama".to_string(), "Action".to_string()]; + channel.options = vec![ + ChannelOption { + id: "sort".to_string(), + title: "Sort".to_string(), + description: String::new(), + systemImage: "arrow.up.arrow.down".to_string(), + colorName: "blue".to_string(), + options: vec![FilterOption { + id: "new".to_string(), + title: "Newest".to_string(), + }], + multiSelect: false, + }, + ChannelOption { + id: "categories".to_string(), + title: "Genres".to_string(), + description: String::new(), + systemImage: "square.grid.2x2".to_string(), + colorName: "orange".to_string(), + options: vec![FilterOption { + id: "drama".to_string(), + title: "Drama".to_string(), + }], + multiSelect: true, + }, + ChannelOption { + id: "filter".to_string(), + title: "Tags".to_string(), + description: String::new(), + systemImage: "tag".to_string(), + colorName: "green".to_string(), + options: vec![FilterOption { + id: "tag".to_string(), + title: "Tag".to_string(), + }], + multiSelect: true, + }, + ChannelOption { + id: "stars".to_string(), + title: "Models".to_string(), + description: String::new(), + systemImage: "person.2".to_string(), + colorName: "pink".to_string(), + options: vec![FilterOption { + id: "model".to_string(), + title: "Model".to_string(), + }], + multiSelect: true, + }, + ChannelOption { + id: "networks".to_string(), + title: "Networks".to_string(), + description: String::new(), + systemImage: "network".to_string(), + colorName: "purple".to_string(), + options: vec![FilterOption { + id: "network".to_string(), + title: "Network".to_string(), + }], + multiSelect: true, + }, + ]; + + let decorated = decorate_channel(channel); + assert!(decorated.categories.is_empty()); + assert_eq!(decorated.options.len(), 1); + assert_eq!(decorated.options[0].id, "sort"); + } + fn test_db_pool() -> DbPool { let unique = SystemTime::now() .duration_since(UNIX_EPOCH) diff --git a/src/providers/pornhub.rs b/src/providers/pornhub.rs index d433ebc..c883f6e 100644 --- a/src/providers/pornhub.rs +++ b/src/providers/pornhub.rs @@ -1,16 +1,24 @@ use crate::DbPool; use crate::api::ClientVersion; -use crate::providers::Provider; +use crate::providers::{ + Provider, report_provider_error, report_provider_error_background, requester_or_default, +}; use crate::status::*; use crate::util::cache::VideoCache; use crate::util::parse_abbreviated_number; use crate::util::time::parse_time_to_seconds; -use crate::videos::{ServerOptions, VideoItem}; +use crate::videos::{ServerOptions, VideoFormat, VideoItem}; use async_trait::async_trait; +use chrono::{DateTime, NaiveDate, Utc}; use error_chain::error_chain; +use futures::stream::{self, StreamExt}; use htmlentity::entity::{ICodedDataTrait, decode}; -use std::vec; +use regex::Regex; +use scraper::{ElementRef, Html, Selector}; +use serde_json::Value; +use std::collections::HashSet; +use url::Url; pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata = crate::providers::ProviderChannelMetadata { @@ -18,10 +26,16 @@ pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata = tags: &["mainstream", "studio", "general"], }; +const BASE_URL: &str = "https://www.pornhub.com"; +const CHANNEL_ID: &str = "pornhub"; +const DETAIL_ENRICH_LIMIT: usize = 12; + error_chain! { foreign_links { Io(std::io::Error); HttpRequest(wreq::Error); + Json(serde_json::Error); + Url(url::ParseError); } errors { Parse(msg: String) { @@ -36,18 +50,51 @@ pub struct PornhubProvider { url: String, } +#[derive(Debug, Clone, Copy)] +enum ListingScope { + Browse, + Search, + Creator, +} + +#[derive(Debug, Clone, Copy)] +enum QueryTargetKind { + Channel, + Pornstar, + Model, + User, +} + +#[derive(Debug, Clone)] +struct QueryTarget { + kind: QueryTargetKind, + slug: String, +} + +impl QueryTargetKind { + fn path_segment(self) -> &'static str { + match self { + Self::Channel => "channels", + Self::Pornstar => "pornstar", + Self::Model => "model", + Self::User => "users", + } + } +} + impl PornhubProvider { pub fn new() -> Self { Self { - url: "https://www.pornhub.com".to_string(), + url: BASE_URL.to_string(), } } fn build_channel(&self, _clientversion: ClientVersion) -> Channel { Channel { - id: "pornhub".to_string(), + id: CHANNEL_ID.to_string(), name: "Pornhub".to_string(), - description: "Pornhub Free Videos".to_string(), + description: "Pornhub listings with creator queries and direct HLS playback links." + .to_string(), premium: false, favicon: "https://www.google.com/s2/favicons?sz=64&domain=pornhub.com".to_string(), status: "active".to_string(), @@ -55,13 +102,13 @@ impl PornhubProvider { options: vec![ChannelOption { id: "sort".to_string(), title: "Sort".to_string(), - description: "Sort the Videos".to_string(), + description: "Browse Pornhub charts by sort order.".to_string(), systemImage: "list.number".to_string(), colorName: "blue".to_string(), options: vec![ FilterOption { id: "mr".to_string(), - title: "Most Recent".to_string(), + title: "Featured Recently".to_string(), }, FilterOption { id: "mv".to_string(), @@ -71,6 +118,10 @@ impl PornhubProvider { id: "tr".to_string(), title: "Top Rated".to_string(), }, + FilterOption { + id: "ht".to_string(), + title: "Hottest".to_string(), + }, FilterOption { id: "lg".to_string(), title: "Longest".to_string(), @@ -87,225 +138,679 @@ impl PornhubProvider { } } - async fn get( - &self, - cache: VideoCache, - page: u8, - sort: &str, - options: ServerOptions, - ) -> Result> { - let video_url = format!("{}/video?o={}&page={}", self.url, sort, page); + fn selector(value: &str) -> Result { + Selector::parse(value) + .map_err(|error| ErrorKind::Parse(format!("selector parse failed for {value}: {error}")).into()) + } - let old_items = match cache.get(&video_url) { - Some((time, items)) if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 => { - return Ok(items.clone()); - } - Some((_, items)) => items.clone(), - None => vec![], - }; + fn regex(value: &str) -> Result { + Regex::new(value) + .map_err(|error| ErrorKind::Parse(format!("regex parse failed for {value}: {error}")).into()) + } - let mut requester = match options.requester.clone() { - Some(r) => r, - None => return Ok(old_items), - }; + fn text_of(element: &ElementRef<'_>) -> String { + element + .text() + .collect::>() + .join(" ") + .split_whitespace() + .collect::>() + .join(" ") + } - let text = match requester.get(&video_url, None).await { - Ok(t) => t, - Err(_) => return Ok(old_items), - }; + fn decode_html(value: &str) -> String { + decode(value.as_bytes()) + .to_string() + .unwrap_or_else(|_| value.to_string()) + } - let video_items = self.get_video_items_from_html(text, "