Files
hottub/src/proxies/pornhubthumb.rs
2026-04-05 20:31:38 +00:00

221 lines
6.6 KiB
Rust

use ntex::web::{self, HttpRequest};
use regex::Regex;
use scraper::{Html, Selector};
use url::Url;
use crate::util::requester::Requester;
const PORNHUB_ROOT: &str = "https://www.pornhub.com/";
fn endpoint_to_page_url(req: &HttpRequest) -> String {
let endpoint = req.match_info().query("endpoint").trim_start_matches('/');
let mut page_url = if endpoint.starts_with("http://") || endpoint.starts_with("https://") {
endpoint.to_string()
} else {
format!("https://{endpoint}")
};
let query = req.query_string();
if !query.is_empty() && !page_url.contains('?') {
page_url.push('?');
page_url.push_str(query);
}
page_url
}
fn is_allowed_video_page_url(url: &str) -> bool {
let Some(url) = Url::parse(url).ok() else {
return false;
};
if url.scheme() != "https" {
return false;
}
let Some(host) = url.host_str() else {
return false;
};
if host != "pornhub.com" && host != "www.pornhub.com" && !host.ends_with(".pornhub.com") {
return false;
}
url.path().starts_with("/view_video.php") || url.path().starts_with("/video/")
}
fn normalize_candidate_url(candidate: &str, page_url: &Url) -> Option<String> {
if candidate.is_empty() {
return None;
}
if candidate.starts_with("//") {
return Some(format!("https:{candidate}"));
}
if candidate.starts_with("https://") || candidate.starts_with("http://") {
return Some(candidate.to_string());
}
if candidate.starts_with('/') {
let host = page_url.host_str()?;
return Some(format!("{}://{}{}", page_url.scheme(), host, candidate));
}
None
}
fn is_allowed_thumb_url(url: &str) -> bool {
let Some(url) = Url::parse(url).ok() else {
return false;
};
if url.scheme() != "https" {
return false;
}
let Some(host) = url.host_str() else {
return false;
};
let allowed_host = host == "pornhub.com"
|| host == "www.pornhub.com"
|| host.ends_with(".pornhub.com")
|| host.ends_with(".phncdn.com");
if !allowed_host {
return false;
}
let path = url.path().to_ascii_lowercase();
[".jpg", ".jpeg", ".png", ".webp", ".avif"]
.iter()
.any(|ext| path.ends_with(ext))
}
fn decode_js_string(value: &str) -> String {
value
.replace("\\/", "/")
.replace("\\u002F", "/")
.replace("\\u003A", ":")
}
fn find_thumb_in_html(html: &str, page_url: &Url) -> Option<String> {
let document = Html::parse_document(html);
let selector = Selector::parse(
"meta[property=\"og:image\"], meta[name=\"twitter:image\"], meta[itemprop=\"thumbnailUrl\"]",
)
.ok()?;
for meta in document.select(&selector) {
let value = meta.value().attr("content").unwrap_or_default().trim();
if let Some(candidate) = normalize_candidate_url(value, page_url) {
if is_allowed_thumb_url(&candidate) {
return Some(candidate);
}
}
}
let image_url_re = Regex::new(r#""image_url"\s*:\s*"([^"]+)""#).ok()?;
if let Some(captures) = image_url_re.captures(html) {
let raw = captures
.get(1)
.map(|value| value.as_str())
.unwrap_or_default();
let decoded = decode_js_string(raw);
if let Some(candidate) = normalize_candidate_url(&decoded, page_url) {
if is_allowed_thumb_url(&candidate) {
return Some(candidate);
}
}
}
None
}
pub async fn get_image(
req: HttpRequest,
requester: web::types::State<Requester>,
) -> Result<impl web::Responder, web::Error> {
let page_url = endpoint_to_page_url(&req);
if !is_allowed_video_page_url(&page_url) {
return Ok(web::HttpResponse::BadRequest().finish());
}
let mut requester = requester.get_ref().clone();
let html = match requester
.get_with_headers(
page_url.as_str(),
vec![("Referer".to_string(), PORNHUB_ROOT.to_string())],
None,
)
.await
{
Ok(value) => value,
Err(_) => return Ok(web::HttpResponse::NotFound().finish()),
};
let parsed_page_url = match Url::parse(&page_url) {
Ok(value) => value,
Err(_) => return Ok(web::HttpResponse::BadRequest().finish()),
};
let Some(image_url) = find_thumb_in_html(&html, &parsed_page_url) else {
return Ok(web::HttpResponse::NotFound().finish());
};
Ok(web::HttpResponse::Found()
.header("Location", image_url)
.finish())
}
#[cfg(test)]
mod tests {
use super::{
decode_js_string, is_allowed_thumb_url, is_allowed_video_page_url, normalize_candidate_url,
};
use url::Url;
#[test]
fn validates_allowed_video_pages() {
assert!(is_allowed_video_page_url(
"https://www.pornhub.com/view_video.php?viewkey=abc123"
));
assert!(is_allowed_video_page_url(
"https://www.pornhub.com/video/search?search=test"
));
assert!(!is_allowed_video_page_url(
"https://example.com/view_video.php?viewkey=abc123"
));
assert!(!is_allowed_video_page_url(
"http://www.pornhub.com/view_video.php?viewkey=abc123"
));
}
#[test]
fn validates_allowed_thumb_hosts_and_extensions() {
assert!(is_allowed_thumb_url(
"https://pix-cdn77.phncdn.com/videos/2026/04/01/1/(m=eafTGgaaaa)(mh=abc123)1.jpg"
));
assert!(is_allowed_thumb_url(
"https://www.pornhub.com/webmasters/thumb.webp"
));
assert!(!is_allowed_thumb_url("https://example.com/thumb.jpg"));
assert!(!is_allowed_thumb_url(
"https://pix-cdn77.phncdn.com/videos/2026/04/01/1/manifest.m3u8"
));
}
#[test]
fn normalizes_protocol_relative_and_root_relative_urls() {
let page_url = Url::parse("https://www.pornhub.com/view_video.php?viewkey=abc").unwrap();
let protocol_relative =
normalize_candidate_url("//pix-cdn77.phncdn.com/thumb.jpg", &page_url);
assert_eq!(
protocol_relative.as_deref(),
Some("https://pix-cdn77.phncdn.com/thumb.jpg")
);
let root_relative = normalize_candidate_url("/assets/thumb.jpg", &page_url);
assert_eq!(
root_relative.as_deref(),
Some("https://www.pornhub.com/assets/thumb.jpg")
);
}
#[test]
fn decodes_js_escaped_urls() {
assert_eq!(
decode_js_string(r#"https:\/\/pix-cdn77.phncdn.com\/thumb.jpg"#),
"https://pix-cdn77.phncdn.com/thumb.jpg"
);
}
}