pornhub fix

This commit is contained in:
Simon
2026-04-05 15:53:16 +00:00
parent 4d50e0a9fb
commit 78e852c29d
4 changed files with 197 additions and 17 deletions

View File

@@ -2,6 +2,7 @@ use crate::DbPool;
use crate::api::ClientVersion;
use crate::providers::{
Provider, report_provider_error, report_provider_error_background, requester_or_default,
build_proxy_url, strip_url_scheme,
};
use crate::status::*;
use crate::util::cache::VideoCache;
@@ -18,6 +19,9 @@ use regex::Regex;
use scraper::{ElementRef, Html, Selector};
use serde_json::Value;
use std::collections::HashSet;
use std::collections::HashMap;
use std::sync::{Arc, RwLock};
use std::thread;
use url::Url;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
@@ -48,6 +52,7 @@ error_chain! {
#[derive(Debug, Clone)]
pub struct PornhubProvider {
url: String,
tag_map: Arc<RwLock<HashMap<String, TagInfo>>>,
}
#[derive(Debug, Clone, Copy)]
@@ -71,6 +76,13 @@ struct QueryTarget {
slug: String,
}
#[derive(Debug, Clone)]
struct TagInfo {
kind: QueryTargetKind,
slug: String,
title: String,
}
impl QueryTargetKind {
fn path_segment(self) -> &'static str {
match self {
@@ -84,8 +96,69 @@ impl QueryTargetKind {
impl PornhubProvider {
pub fn new() -> Self {
Self {
let provider = Self {
url: BASE_URL.to_string(),
tag_map: Arc::new(RwLock::new(HashMap::new())),
};
provider.spawn_initial_load();
provider
}
fn spawn_initial_load(&self) {
let url = self.url.clone();
let tag_map = Arc::clone(&self.tag_map);
thread::spawn(move || {
let rt = tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()
.unwrap();
rt.block_on(async {
let _ = Self::load_tags(&url, tag_map).await;
});
});
}
async fn load_tags(base_url: &str, tag_map: Arc<RwLock<HashMap<String, TagInfo>>>) -> Result<()> {
Self::load_kind(base_url, "channel", QueryTargetKind::Channel, &tag_map).await?;
Self::load_kind(base_url, "pornstar", QueryTargetKind::Pornstar, &tag_map).await?;
Self::load_kind(base_url, "model", QueryTargetKind::Model, &tag_map).await?;
Self::load_kind(base_url, "user", QueryTargetKind::User, &tag_map).await?;
Ok(())
}
async fn load_kind(base_url: &str, path_segment: &str, kind: QueryTargetKind, tag_map: &Arc<RwLock<HashMap<String, TagInfo>>>) -> Result<()> {
let url = format!("{}/{}/top", base_url, path_segment);
let mut requester = crate::util::requester::Requester::new();
let body = requester.get(&url, None).await.map_err(|e| Error::from(ErrorKind::Parse(format!("http request failed: {e}"))))?;
let document = Html::parse_document(&body);
let selector = Self::selector(&format!("a[href^='/{}/']", path_segment))?;
for element in document.select(&selector) {
if let Some(href) = element.attr("href") {
if let Some(slug) = Self::slug_from_url(href, path_segment) {
let title = element.text().collect::<String>().trim().to_string();
if !title.is_empty() && !slug.is_empty() {
let info = TagInfo {
kind,
slug: slug.clone(),
title: title.clone(),
};
let mut map = tag_map.write().unwrap();
map.insert(title.to_ascii_lowercase(), info.clone());
map.insert(slug.to_ascii_lowercase(), info);
}
}
}
}
Ok(())
}
fn slug_from_url(url: &str, path_segment: &str) -> Option<String> {
let parsed = Url::parse(url).ok()?;
let mut segments = parsed.path_segments()?;
if segments.next() == Some(path_segment) {
segments.next().map(|s| s.to_string())
} else {
None
}
}
@@ -140,12 +213,12 @@ impl PornhubProvider {
fn selector(value: &str) -> Result<Selector> {
Selector::parse(value)
.map_err(|error| ErrorKind::Parse(format!("selector parse failed for {value}: {error}")).into())
.map_err(|error| Error::from(ErrorKind::Parse(format!("selector parse failed for {value}: {error}"))))
}
fn regex(value: &str) -> Result<Regex> {
Regex::new(value)
.map_err(|error| ErrorKind::Parse(format!("regex parse failed for {value}: {error}")).into())
.map_err(|error| Error::from(ErrorKind::Parse(format!("regex parse failed for {value}: {error}"))))
}
fn text_of(element: &ElementRef<'_>) -> String {
@@ -191,10 +264,14 @@ impl PornhubProvider {
}
}
fn parse_query_target(query: &str) -> Option<QueryTarget> {
fn parse_query_target(&self, query: &str) -> Option<QueryTarget> {
let normalized = query.trim().to_ascii_lowercase();
if let Some(info) = self.tag_map.read().unwrap().get(&normalized) {
return Some(QueryTarget { kind: info.kind, slug: info.slug.clone() });
}
// Fallback to kind:slug without @
let trimmed = query.trim();
let trimmed = trimmed.strip_prefix('@')?;
let (kind, raw_slug) = trimmed.split_once(':')?;
let (kind_str, raw_slug) = trimmed.split_once(':')?;
let slug = raw_slug
.trim()
.trim_matches('/')
@@ -204,7 +281,7 @@ impl PornhubProvider {
return None;
}
let kind = match kind.trim().to_ascii_lowercase().as_str() {
let kind = match kind_str.trim().to_ascii_lowercase().as_str() {
"channel" | "channels" => QueryTargetKind::Channel,
"pornstar" | "pornstars" => QueryTargetKind::Pornstar,
"model" | "models" => QueryTargetKind::Model,
@@ -257,7 +334,7 @@ impl PornhubProvider {
fn build_listing_request(&self, page: u8, sort: &str, query: Option<&str>) -> (String, ListingScope) {
match query.map(str::trim).filter(|value| !value.is_empty()) {
Some(query) => {
if let Some(target) = Self::parse_query_target(query) {
if let Some(target) = self.parse_query_target(query) {
(self.build_creator_url(page, sort, &target), ListingScope::Creator)
} else {
let encoded = query.to_ascii_lowercase().replace(' ', "+");
@@ -413,7 +490,7 @@ impl PornhubProvider {
let mut tags = Vec::new();
if let Some(tag) = uploader_url
.as_deref()
.and_then(Self::query_tag_from_uploader_url)
.and_then(|url| self.query_tag_from_uploader_url(url))
{
tags.push(tag);
}
@@ -446,15 +523,19 @@ impl PornhubProvider {
Some(format!("{CHANNEL_ID}:{kind}:{slug}"))
}
fn query_tag_from_uploader_url(url: &str) -> Option<String> {
fn query_tag_from_uploader_url(&self, url: &str) -> Option<String> {
let parsed = Url::parse(url).ok()?;
let mut segments = parsed.path_segments()?;
let kind = segments.next()?.trim_matches('/');
let kind_str = segments.next()?.trim_matches('/');
let slug = segments.next()?.trim_matches('/');
if kind.is_empty() || slug.is_empty() {
if kind_str.is_empty() || slug.is_empty() {
return None;
}
Some(format!("@{kind}:{}", slug.replace('-', " ")))
let normalized_slug = slug.to_ascii_lowercase();
if let Some(info) = self.tag_map.read().unwrap().get(&normalized_slug) {
return Some(info.title.clone());
}
Some(slug.replace('-', " "))
}
fn push_unique(values: &mut Vec<String>, value: String) {
@@ -802,13 +883,54 @@ impl PornhubProvider {
return Ok(old_items);
}
let items = self.enrich_listing_items(items, &options).await;
let mut items = self.enrich_listing_items(items, &options).await;
// Rewrite thumbs and previews to use the proxy when appropriate
for item in items.iter_mut() {
let proxied = self.proxied_thumb(&options, &item.thumb);
if !proxied.is_empty() {
item.thumb = proxied;
}
if let Some(prev) = item.preview.clone() {
let proxied_prev = self.proxied_thumb(&options, &prev);
if !proxied_prev.is_empty() {
item.preview = Some(proxied_prev);
}
}
}
cache.remove(&video_url);
cache.insert(video_url, items.clone());
Ok(items)
}
}
impl PornhubProvider {
fn proxied_thumb(&self, options: &ServerOptions, thumb: &str) -> String {
if thumb.is_empty() {
return String::new();
}
if !PornhubThumbPolicy::is_allowed_thumb_url(thumb) {
return String::new();
}
build_proxy_url(options, "pornhub-thumb", &strip_url_scheme(thumb))
}
}
struct PornhubThumbPolicy;
impl PornhubThumbPolicy {
fn is_allowed_thumb_url(url: &str) -> bool {
let Some(url) = Url::parse(url).ok() else { return false; };
if url.scheme() != "https" {
return false;
}
let Some(host) = url.host_str() else { return false; };
// Allow official Pornhub hosts and their CDN hosts
host.contains("phncdn") || host.ends_with("pornhub.com")
}
}
#[async_trait]
impl Provider for PornhubProvider {
async fn get_videos(
@@ -850,17 +972,18 @@ mod tests {
#[test]
fn parses_creator_queries() {
let target = PornhubProvider::parse_query_target("@channels:Brazzers")
let provider = PornhubProvider::new();
let target = provider.parse_query_target("channels:Brazzers")
.expect("channel target should parse");
assert!(matches!(target.kind, QueryTargetKind::Channel));
assert_eq!(target.slug, "brazzers");
let target = PornhubProvider::parse_query_target("@pornstar:Alex Mack")
let target = provider.parse_query_target("pornstar:Alex Mack")
.expect("pornstar target should parse");
assert!(matches!(target.kind, QueryTargetKind::Pornstar));
assert_eq!(target.slug, "alex-mack");
assert!(PornhubProvider::parse_query_target("teacher").is_none());
assert!(provider.parse_query_target("teacher").is_none());
}
#[test]

View File

@@ -10,6 +10,7 @@ use crate::{proxies::sxyprn::SxyprnProxy, util::requester::Requester};
pub mod doodstream;
pub mod hanimecdn;
pub mod hqpornerthumb;
pub mod pornhubthumb;
pub mod javtiful;
pub mod noodlemagazine;
pub mod pimpbunny;

View File

@@ -0,0 +1,51 @@
use ntex::http::header::{CONTENT_LENGTH, CONTENT_TYPE};
use ntex::{
http::Response,
web::{self, HttpRequest, error},
};
use crate::util::requester::Requester;
pub async fn get_image(
req: HttpRequest,
requester: web::types::State<Requester>,
) -> Result<impl web::Responder, web::Error> {
let endpoint = req.match_info().query("endpoint").to_string();
let image_url = if endpoint.starts_with("http://") || endpoint.starts_with("https://") {
endpoint
} else {
format!("https://{}", endpoint.trim_start_matches('/'))
};
let upstream = match requester
.get_ref()
.clone()
.get_raw_with_headers(
image_url.as_str(),
vec![("Referer".to_string(), "https://www.pornhub.com/".to_string())],
)
.await
{
Ok(response) => response,
Err(_) => return Ok(web::HttpResponse::NotFound().finish()),
};
let status = upstream.status();
let headers = upstream.headers().clone();
let bytes = upstream.bytes().await.map_err(error::ErrorBadGateway)?;
let mut resp = Response::build(status);
if let Some(ct) = headers.get(CONTENT_TYPE) {
if let Ok(ct_str) = ct.to_str() {
resp.set_header(CONTENT_TYPE, ct_str);
}
}
if let Some(cl) = headers.get(CONTENT_LENGTH) {
if let Ok(cl_str) = cl.to_str() {
resp.set_header(CONTENT_LENGTH, cl_str);
}
}
Ok(resp.body(bytes.to_vec()))
}

View File

@@ -76,6 +76,11 @@ pub fn config(cfg: &mut web::ServiceConfig) {
.route(web::post().to(crate::proxies::porndishthumb::get_image))
.route(web::get().to(crate::proxies::porndishthumb::get_image)),
);
cfg.service(
web::resource("/pornhub-thumb/{endpoint}*")
.route(web::post().to(crate::proxies::pornhubthumb::get_image))
.route(web::get().to(crate::proxies::pornhubthumb::get_image)),
);
cfg.service(
web::resource("/pimpbunny-thumb/{endpoint}*")
.route(web::post().to(crate::proxies::pimpbunnythumb::get_image))