pornhub fix
This commit is contained in:
@@ -2,6 +2,7 @@ use crate::DbPool;
|
||||
use crate::api::ClientVersion;
|
||||
use crate::providers::{
|
||||
Provider, report_provider_error, report_provider_error_background, requester_or_default,
|
||||
build_proxy_url, strip_url_scheme,
|
||||
};
|
||||
use crate::status::*;
|
||||
use crate::util::cache::VideoCache;
|
||||
@@ -18,6 +19,9 @@ use regex::Regex;
|
||||
use scraper::{ElementRef, Html, Selector};
|
||||
use serde_json::Value;
|
||||
use std::collections::HashSet;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::{Arc, RwLock};
|
||||
use std::thread;
|
||||
use url::Url;
|
||||
|
||||
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
|
||||
@@ -48,6 +52,7 @@ error_chain! {
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PornhubProvider {
|
||||
url: String,
|
||||
tag_map: Arc<RwLock<HashMap<String, TagInfo>>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
@@ -71,6 +76,13 @@ struct QueryTarget {
|
||||
slug: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct TagInfo {
|
||||
kind: QueryTargetKind,
|
||||
slug: String,
|
||||
title: String,
|
||||
}
|
||||
|
||||
impl QueryTargetKind {
|
||||
fn path_segment(self) -> &'static str {
|
||||
match self {
|
||||
@@ -84,8 +96,69 @@ impl QueryTargetKind {
|
||||
|
||||
impl PornhubProvider {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
let provider = Self {
|
||||
url: BASE_URL.to_string(),
|
||||
tag_map: Arc::new(RwLock::new(HashMap::new())),
|
||||
};
|
||||
provider.spawn_initial_load();
|
||||
provider
|
||||
}
|
||||
|
||||
fn spawn_initial_load(&self) {
|
||||
let url = self.url.clone();
|
||||
let tag_map = Arc::clone(&self.tag_map);
|
||||
thread::spawn(move || {
|
||||
let rt = tokio::runtime::Builder::new_current_thread()
|
||||
.enable_all()
|
||||
.build()
|
||||
.unwrap();
|
||||
rt.block_on(async {
|
||||
let _ = Self::load_tags(&url, tag_map).await;
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
async fn load_tags(base_url: &str, tag_map: Arc<RwLock<HashMap<String, TagInfo>>>) -> Result<()> {
|
||||
Self::load_kind(base_url, "channel", QueryTargetKind::Channel, &tag_map).await?;
|
||||
Self::load_kind(base_url, "pornstar", QueryTargetKind::Pornstar, &tag_map).await?;
|
||||
Self::load_kind(base_url, "model", QueryTargetKind::Model, &tag_map).await?;
|
||||
Self::load_kind(base_url, "user", QueryTargetKind::User, &tag_map).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn load_kind(base_url: &str, path_segment: &str, kind: QueryTargetKind, tag_map: &Arc<RwLock<HashMap<String, TagInfo>>>) -> Result<()> {
|
||||
let url = format!("{}/{}/top", base_url, path_segment);
|
||||
let mut requester = crate::util::requester::Requester::new();
|
||||
let body = requester.get(&url, None).await.map_err(|e| Error::from(ErrorKind::Parse(format!("http request failed: {e}"))))?;
|
||||
let document = Html::parse_document(&body);
|
||||
let selector = Self::selector(&format!("a[href^='/{}/']", path_segment))?;
|
||||
for element in document.select(&selector) {
|
||||
if let Some(href) = element.attr("href") {
|
||||
if let Some(slug) = Self::slug_from_url(href, path_segment) {
|
||||
let title = element.text().collect::<String>().trim().to_string();
|
||||
if !title.is_empty() && !slug.is_empty() {
|
||||
let info = TagInfo {
|
||||
kind,
|
||||
slug: slug.clone(),
|
||||
title: title.clone(),
|
||||
};
|
||||
let mut map = tag_map.write().unwrap();
|
||||
map.insert(title.to_ascii_lowercase(), info.clone());
|
||||
map.insert(slug.to_ascii_lowercase(), info);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn slug_from_url(url: &str, path_segment: &str) -> Option<String> {
|
||||
let parsed = Url::parse(url).ok()?;
|
||||
let mut segments = parsed.path_segments()?;
|
||||
if segments.next() == Some(path_segment) {
|
||||
segments.next().map(|s| s.to_string())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
@@ -140,12 +213,12 @@ impl PornhubProvider {
|
||||
|
||||
fn selector(value: &str) -> Result<Selector> {
|
||||
Selector::parse(value)
|
||||
.map_err(|error| ErrorKind::Parse(format!("selector parse failed for {value}: {error}")).into())
|
||||
.map_err(|error| Error::from(ErrorKind::Parse(format!("selector parse failed for {value}: {error}"))))
|
||||
}
|
||||
|
||||
fn regex(value: &str) -> Result<Regex> {
|
||||
Regex::new(value)
|
||||
.map_err(|error| ErrorKind::Parse(format!("regex parse failed for {value}: {error}")).into())
|
||||
.map_err(|error| Error::from(ErrorKind::Parse(format!("regex parse failed for {value}: {error}"))))
|
||||
}
|
||||
|
||||
fn text_of(element: &ElementRef<'_>) -> String {
|
||||
@@ -191,10 +264,14 @@ impl PornhubProvider {
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_query_target(query: &str) -> Option<QueryTarget> {
|
||||
fn parse_query_target(&self, query: &str) -> Option<QueryTarget> {
|
||||
let normalized = query.trim().to_ascii_lowercase();
|
||||
if let Some(info) = self.tag_map.read().unwrap().get(&normalized) {
|
||||
return Some(QueryTarget { kind: info.kind, slug: info.slug.clone() });
|
||||
}
|
||||
// Fallback to kind:slug without @
|
||||
let trimmed = query.trim();
|
||||
let trimmed = trimmed.strip_prefix('@')?;
|
||||
let (kind, raw_slug) = trimmed.split_once(':')?;
|
||||
let (kind_str, raw_slug) = trimmed.split_once(':')?;
|
||||
let slug = raw_slug
|
||||
.trim()
|
||||
.trim_matches('/')
|
||||
@@ -204,7 +281,7 @@ impl PornhubProvider {
|
||||
return None;
|
||||
}
|
||||
|
||||
let kind = match kind.trim().to_ascii_lowercase().as_str() {
|
||||
let kind = match kind_str.trim().to_ascii_lowercase().as_str() {
|
||||
"channel" | "channels" => QueryTargetKind::Channel,
|
||||
"pornstar" | "pornstars" => QueryTargetKind::Pornstar,
|
||||
"model" | "models" => QueryTargetKind::Model,
|
||||
@@ -257,7 +334,7 @@ impl PornhubProvider {
|
||||
fn build_listing_request(&self, page: u8, sort: &str, query: Option<&str>) -> (String, ListingScope) {
|
||||
match query.map(str::trim).filter(|value| !value.is_empty()) {
|
||||
Some(query) => {
|
||||
if let Some(target) = Self::parse_query_target(query) {
|
||||
if let Some(target) = self.parse_query_target(query) {
|
||||
(self.build_creator_url(page, sort, &target), ListingScope::Creator)
|
||||
} else {
|
||||
let encoded = query.to_ascii_lowercase().replace(' ', "+");
|
||||
@@ -413,7 +490,7 @@ impl PornhubProvider {
|
||||
let mut tags = Vec::new();
|
||||
if let Some(tag) = uploader_url
|
||||
.as_deref()
|
||||
.and_then(Self::query_tag_from_uploader_url)
|
||||
.and_then(|url| self.query_tag_from_uploader_url(url))
|
||||
{
|
||||
tags.push(tag);
|
||||
}
|
||||
@@ -446,15 +523,19 @@ impl PornhubProvider {
|
||||
Some(format!("{CHANNEL_ID}:{kind}:{slug}"))
|
||||
}
|
||||
|
||||
fn query_tag_from_uploader_url(url: &str) -> Option<String> {
|
||||
fn query_tag_from_uploader_url(&self, url: &str) -> Option<String> {
|
||||
let parsed = Url::parse(url).ok()?;
|
||||
let mut segments = parsed.path_segments()?;
|
||||
let kind = segments.next()?.trim_matches('/');
|
||||
let kind_str = segments.next()?.trim_matches('/');
|
||||
let slug = segments.next()?.trim_matches('/');
|
||||
if kind.is_empty() || slug.is_empty() {
|
||||
if kind_str.is_empty() || slug.is_empty() {
|
||||
return None;
|
||||
}
|
||||
Some(format!("@{kind}:{}", slug.replace('-', " ")))
|
||||
let normalized_slug = slug.to_ascii_lowercase();
|
||||
if let Some(info) = self.tag_map.read().unwrap().get(&normalized_slug) {
|
||||
return Some(info.title.clone());
|
||||
}
|
||||
Some(slug.replace('-', " "))
|
||||
}
|
||||
|
||||
fn push_unique(values: &mut Vec<String>, value: String) {
|
||||
@@ -802,13 +883,54 @@ impl PornhubProvider {
|
||||
return Ok(old_items);
|
||||
}
|
||||
|
||||
let items = self.enrich_listing_items(items, &options).await;
|
||||
let mut items = self.enrich_listing_items(items, &options).await;
|
||||
|
||||
// Rewrite thumbs and previews to use the proxy when appropriate
|
||||
for item in items.iter_mut() {
|
||||
let proxied = self.proxied_thumb(&options, &item.thumb);
|
||||
if !proxied.is_empty() {
|
||||
item.thumb = proxied;
|
||||
}
|
||||
if let Some(prev) = item.preview.clone() {
|
||||
let proxied_prev = self.proxied_thumb(&options, &prev);
|
||||
if !proxied_prev.is_empty() {
|
||||
item.preview = Some(proxied_prev);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cache.remove(&video_url);
|
||||
cache.insert(video_url, items.clone());
|
||||
Ok(items)
|
||||
}
|
||||
}
|
||||
|
||||
impl PornhubProvider {
|
||||
fn proxied_thumb(&self, options: &ServerOptions, thumb: &str) -> String {
|
||||
if thumb.is_empty() {
|
||||
return String::new();
|
||||
}
|
||||
if !PornhubThumbPolicy::is_allowed_thumb_url(thumb) {
|
||||
return String::new();
|
||||
}
|
||||
build_proxy_url(options, "pornhub-thumb", &strip_url_scheme(thumb))
|
||||
}
|
||||
}
|
||||
|
||||
struct PornhubThumbPolicy;
|
||||
|
||||
impl PornhubThumbPolicy {
|
||||
fn is_allowed_thumb_url(url: &str) -> bool {
|
||||
let Some(url) = Url::parse(url).ok() else { return false; };
|
||||
if url.scheme() != "https" {
|
||||
return false;
|
||||
}
|
||||
let Some(host) = url.host_str() else { return false; };
|
||||
// Allow official Pornhub hosts and their CDN hosts
|
||||
host.contains("phncdn") || host.ends_with("pornhub.com")
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Provider for PornhubProvider {
|
||||
async fn get_videos(
|
||||
@@ -850,17 +972,18 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn parses_creator_queries() {
|
||||
let target = PornhubProvider::parse_query_target("@channels:Brazzers")
|
||||
let provider = PornhubProvider::new();
|
||||
let target = provider.parse_query_target("channels:Brazzers")
|
||||
.expect("channel target should parse");
|
||||
assert!(matches!(target.kind, QueryTargetKind::Channel));
|
||||
assert_eq!(target.slug, "brazzers");
|
||||
|
||||
let target = PornhubProvider::parse_query_target("@pornstar:Alex Mack")
|
||||
let target = provider.parse_query_target("pornstar:Alex Mack")
|
||||
.expect("pornstar target should parse");
|
||||
assert!(matches!(target.kind, QueryTargetKind::Pornstar));
|
||||
assert_eq!(target.slug, "alex-mack");
|
||||
|
||||
assert!(PornhubProvider::parse_query_target("teacher").is_none());
|
||||
assert!(provider.parse_query_target("teacher").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -10,6 +10,7 @@ use crate::{proxies::sxyprn::SxyprnProxy, util::requester::Requester};
|
||||
pub mod doodstream;
|
||||
pub mod hanimecdn;
|
||||
pub mod hqpornerthumb;
|
||||
pub mod pornhubthumb;
|
||||
pub mod javtiful;
|
||||
pub mod noodlemagazine;
|
||||
pub mod pimpbunny;
|
||||
|
||||
51
src/proxies/pornhubthumb.rs
Normal file
51
src/proxies/pornhubthumb.rs
Normal file
@@ -0,0 +1,51 @@
|
||||
use ntex::http::header::{CONTENT_LENGTH, CONTENT_TYPE};
|
||||
use ntex::{
|
||||
http::Response,
|
||||
web::{self, HttpRequest, error},
|
||||
};
|
||||
|
||||
use crate::util::requester::Requester;
|
||||
|
||||
pub async fn get_image(
|
||||
req: HttpRequest,
|
||||
requester: web::types::State<Requester>,
|
||||
) -> Result<impl web::Responder, web::Error> {
|
||||
let endpoint = req.match_info().query("endpoint").to_string();
|
||||
let image_url = if endpoint.starts_with("http://") || endpoint.starts_with("https://") {
|
||||
endpoint
|
||||
} else {
|
||||
format!("https://{}", endpoint.trim_start_matches('/'))
|
||||
};
|
||||
|
||||
let upstream = match requester
|
||||
.get_ref()
|
||||
.clone()
|
||||
.get_raw_with_headers(
|
||||
image_url.as_str(),
|
||||
vec![("Referer".to_string(), "https://www.pornhub.com/".to_string())],
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(response) => response,
|
||||
Err(_) => return Ok(web::HttpResponse::NotFound().finish()),
|
||||
};
|
||||
|
||||
let status = upstream.status();
|
||||
let headers = upstream.headers().clone();
|
||||
let bytes = upstream.bytes().await.map_err(error::ErrorBadGateway)?;
|
||||
|
||||
let mut resp = Response::build(status);
|
||||
|
||||
if let Some(ct) = headers.get(CONTENT_TYPE) {
|
||||
if let Ok(ct_str) = ct.to_str() {
|
||||
resp.set_header(CONTENT_TYPE, ct_str);
|
||||
}
|
||||
}
|
||||
if let Some(cl) = headers.get(CONTENT_LENGTH) {
|
||||
if let Ok(cl_str) = cl.to_str() {
|
||||
resp.set_header(CONTENT_LENGTH, cl_str);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(resp.body(bytes.to_vec()))
|
||||
}
|
||||
@@ -76,6 +76,11 @@ pub fn config(cfg: &mut web::ServiceConfig) {
|
||||
.route(web::post().to(crate::proxies::porndishthumb::get_image))
|
||||
.route(web::get().to(crate::proxies::porndishthumb::get_image)),
|
||||
);
|
||||
cfg.service(
|
||||
web::resource("/pornhub-thumb/{endpoint}*")
|
||||
.route(web::post().to(crate::proxies::pornhubthumb::get_image))
|
||||
.route(web::get().to(crate::proxies::pornhubthumb::get_image)),
|
||||
);
|
||||
cfg.service(
|
||||
web::resource("/pimpbunny-thumb/{endpoint}*")
|
||||
.route(web::post().to(crate::proxies::pimpbunnythumb::get_image))
|
||||
|
||||
Reference in New Issue
Block a user