221 lines
6.6 KiB
Rust
221 lines
6.6 KiB
Rust
use ntex::web::{self, HttpRequest};
|
|
use regex::Regex;
|
|
use scraper::{Html, Selector};
|
|
use url::Url;
|
|
|
|
use crate::util::requester::Requester;
|
|
|
|
const PORNHUB_ROOT: &str = "https://www.pornhub.com/";
|
|
|
|
fn endpoint_to_page_url(req: &HttpRequest) -> String {
|
|
let endpoint = req.match_info().query("endpoint").trim_start_matches('/');
|
|
let mut page_url = if endpoint.starts_with("http://") || endpoint.starts_with("https://") {
|
|
endpoint.to_string()
|
|
} else {
|
|
format!("https://{endpoint}")
|
|
};
|
|
|
|
let query = req.query_string();
|
|
if !query.is_empty() && !page_url.contains('?') {
|
|
page_url.push('?');
|
|
page_url.push_str(query);
|
|
}
|
|
|
|
page_url
|
|
}
|
|
|
|
fn is_allowed_video_page_url(url: &str) -> bool {
|
|
let Some(url) = Url::parse(url).ok() else {
|
|
return false;
|
|
};
|
|
if url.scheme() != "https" {
|
|
return false;
|
|
}
|
|
let Some(host) = url.host_str() else {
|
|
return false;
|
|
};
|
|
if host != "pornhub.com" && host != "www.pornhub.com" && !host.ends_with(".pornhub.com") {
|
|
return false;
|
|
}
|
|
url.path().starts_with("/view_video.php") || url.path().starts_with("/video/")
|
|
}
|
|
|
|
fn normalize_candidate_url(candidate: &str, page_url: &Url) -> Option<String> {
|
|
if candidate.is_empty() {
|
|
return None;
|
|
}
|
|
if candidate.starts_with("//") {
|
|
return Some(format!("https:{candidate}"));
|
|
}
|
|
if candidate.starts_with("https://") || candidate.starts_with("http://") {
|
|
return Some(candidate.to_string());
|
|
}
|
|
if candidate.starts_with('/') {
|
|
let host = page_url.host_str()?;
|
|
return Some(format!("{}://{}{}", page_url.scheme(), host, candidate));
|
|
}
|
|
None
|
|
}
|
|
|
|
fn is_allowed_thumb_url(url: &str) -> bool {
|
|
let Some(url) = Url::parse(url).ok() else {
|
|
return false;
|
|
};
|
|
if url.scheme() != "https" {
|
|
return false;
|
|
}
|
|
let Some(host) = url.host_str() else {
|
|
return false;
|
|
};
|
|
let allowed_host = host == "pornhub.com"
|
|
|| host == "www.pornhub.com"
|
|
|| host.ends_with(".pornhub.com")
|
|
|| host.ends_with(".phncdn.com");
|
|
if !allowed_host {
|
|
return false;
|
|
}
|
|
let path = url.path().to_ascii_lowercase();
|
|
[".jpg", ".jpeg", ".png", ".webp", ".avif"]
|
|
.iter()
|
|
.any(|ext| path.ends_with(ext))
|
|
}
|
|
|
|
fn decode_js_string(value: &str) -> String {
|
|
value
|
|
.replace("\\/", "/")
|
|
.replace("\\u002F", "/")
|
|
.replace("\\u003A", ":")
|
|
}
|
|
|
|
fn find_thumb_in_html(html: &str, page_url: &Url) -> Option<String> {
|
|
let document = Html::parse_document(html);
|
|
let selector = Selector::parse(
|
|
"meta[property=\"og:image\"], meta[name=\"twitter:image\"], meta[itemprop=\"thumbnailUrl\"]",
|
|
)
|
|
.ok()?;
|
|
|
|
for meta in document.select(&selector) {
|
|
let value = meta.value().attr("content").unwrap_or_default().trim();
|
|
if let Some(candidate) = normalize_candidate_url(value, page_url) {
|
|
if is_allowed_thumb_url(&candidate) {
|
|
return Some(candidate);
|
|
}
|
|
}
|
|
}
|
|
|
|
let image_url_re = Regex::new(r#""image_url"\s*:\s*"([^"]+)""#).ok()?;
|
|
if let Some(captures) = image_url_re.captures(html) {
|
|
let raw = captures
|
|
.get(1)
|
|
.map(|value| value.as_str())
|
|
.unwrap_or_default();
|
|
let decoded = decode_js_string(raw);
|
|
if let Some(candidate) = normalize_candidate_url(&decoded, page_url) {
|
|
if is_allowed_thumb_url(&candidate) {
|
|
return Some(candidate);
|
|
}
|
|
}
|
|
}
|
|
|
|
None
|
|
}
|
|
|
|
pub async fn get_image(
|
|
req: HttpRequest,
|
|
requester: web::types::State<Requester>,
|
|
) -> Result<impl web::Responder, web::Error> {
|
|
let page_url = endpoint_to_page_url(&req);
|
|
if !is_allowed_video_page_url(&page_url) {
|
|
return Ok(web::HttpResponse::BadRequest().finish());
|
|
}
|
|
|
|
let mut requester = requester.get_ref().clone();
|
|
let html = match requester
|
|
.get_with_headers(
|
|
page_url.as_str(),
|
|
vec![("Referer".to_string(), PORNHUB_ROOT.to_string())],
|
|
None,
|
|
)
|
|
.await
|
|
{
|
|
Ok(value) => value,
|
|
Err(_) => return Ok(web::HttpResponse::NotFound().finish()),
|
|
};
|
|
|
|
let parsed_page_url = match Url::parse(&page_url) {
|
|
Ok(value) => value,
|
|
Err(_) => return Ok(web::HttpResponse::BadRequest().finish()),
|
|
};
|
|
|
|
let Some(image_url) = find_thumb_in_html(&html, &parsed_page_url) else {
|
|
return Ok(web::HttpResponse::NotFound().finish());
|
|
};
|
|
|
|
Ok(web::HttpResponse::Found()
|
|
.header("Location", image_url)
|
|
.finish())
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::{
|
|
decode_js_string, is_allowed_thumb_url, is_allowed_video_page_url, normalize_candidate_url,
|
|
};
|
|
use url::Url;
|
|
|
|
#[test]
|
|
fn validates_allowed_video_pages() {
|
|
assert!(is_allowed_video_page_url(
|
|
"https://www.pornhub.com/view_video.php?viewkey=abc123"
|
|
));
|
|
assert!(is_allowed_video_page_url(
|
|
"https://www.pornhub.com/video/search?search=test"
|
|
));
|
|
assert!(!is_allowed_video_page_url(
|
|
"https://example.com/view_video.php?viewkey=abc123"
|
|
));
|
|
assert!(!is_allowed_video_page_url(
|
|
"http://www.pornhub.com/view_video.php?viewkey=abc123"
|
|
));
|
|
}
|
|
|
|
#[test]
|
|
fn validates_allowed_thumb_hosts_and_extensions() {
|
|
assert!(is_allowed_thumb_url(
|
|
"https://pix-cdn77.phncdn.com/videos/2026/04/01/1/(m=eafTGgaaaa)(mh=abc123)1.jpg"
|
|
));
|
|
assert!(is_allowed_thumb_url(
|
|
"https://www.pornhub.com/webmasters/thumb.webp"
|
|
));
|
|
assert!(!is_allowed_thumb_url("https://example.com/thumb.jpg"));
|
|
assert!(!is_allowed_thumb_url(
|
|
"https://pix-cdn77.phncdn.com/videos/2026/04/01/1/manifest.m3u8"
|
|
));
|
|
}
|
|
|
|
#[test]
|
|
fn normalizes_protocol_relative_and_root_relative_urls() {
|
|
let page_url = Url::parse("https://www.pornhub.com/view_video.php?viewkey=abc").unwrap();
|
|
let protocol_relative =
|
|
normalize_candidate_url("//pix-cdn77.phncdn.com/thumb.jpg", &page_url);
|
|
assert_eq!(
|
|
protocol_relative.as_deref(),
|
|
Some("https://pix-cdn77.phncdn.com/thumb.jpg")
|
|
);
|
|
|
|
let root_relative = normalize_candidate_url("/assets/thumb.jpg", &page_url);
|
|
assert_eq!(
|
|
root_relative.as_deref(),
|
|
Some("https://www.pornhub.com/assets/thumb.jpg")
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn decodes_js_escaped_urls() {
|
|
assert_eq!(
|
|
decode_js_string(r#"https:\/\/pix-cdn77.phncdn.com\/thumb.jpg"#),
|
|
"https://pix-cdn77.phncdn.com/thumb.jpg"
|
|
);
|
|
}
|
|
}
|