From dd7c4ec6a1b1108af899052df9693b4b3b384709 Mon Sep 17 00:00:00 2001 From: Simon Date: Fri, 20 Mar 2026 13:52:06 +0000 Subject: [PATCH] noodlemagazine thumb proxy --- src/providers/noodlemagazine.rs | 160 ++++++++++++++++++++------ src/proxies/noodlemagazine.rs | 191 +++++++++++++++++++++++++++++++- src/proxy.rs | 5 + 3 files changed, 323 insertions(+), 33 deletions(-) diff --git a/src/providers/noodlemagazine.rs b/src/providers/noodlemagazine.rs index 30df8ec..dd02f6e 100644 --- a/src/providers/noodlemagazine.rs +++ b/src/providers/noodlemagazine.rs @@ -10,6 +10,7 @@ use crate::videos::{ServerOptions, VideoFormat, VideoItem}; use async_trait::async_trait; use error_chain::error_chain; use htmlentity::entity::{ICodedDataTrait, decode}; +use url::Url; use std::vec; use titlecase::Titlecase; use wreq::Version; @@ -86,8 +87,7 @@ impl NoodlemagazineProvider { .await .unwrap_or_default(); - let proxy_base_url = options.public_url_base.as_deref().unwrap_or_default(); - let items = self.get_video_items_from_html(text, proxy_base_url); + let items = self.get_video_items_from_html(text, &options); if items.is_empty() { Ok(old_items) @@ -124,8 +124,7 @@ impl NoodlemagazineProvider { .await .unwrap_or_default(); - let proxy_base_url = options.public_url_base.as_deref().unwrap_or_default(); - let items = self.get_video_items_from_html(text, proxy_base_url); + let items = self.get_video_items_from_html(text, &options); if items.is_empty() { Ok(old_items) @@ -136,7 +135,7 @@ impl NoodlemagazineProvider { } } - fn get_video_items_from_html(&self, html: String, proxy_base_url: &str) -> Vec { + fn get_video_items_from_html(&self, html: String, options: &ServerOptions) -> Vec { if html.is_empty() || html.contains("404 Not Found") { return vec![]; } @@ -157,31 +156,76 @@ impl NoodlemagazineProvider { list.split("
") .skip(1) .filter_map(|segment| { - self.get_video_item(segment.to_string(), proxy_base_url) - .ok() + self.get_video_item(segment.to_string(), options).ok() }) .collect() } - fn proxy_url(&self, proxy_base_url: &str, video_url: &str) -> String { - let target = video_url - .strip_prefix("https://") - .or_else(|| video_url.strip_prefix("http://")) - .unwrap_or(video_url) - .trim_start_matches('/'); - - if proxy_base_url.is_empty() { - return format!("/proxy/noodlemagazine/{target}"); - } - - format!( - "{}/proxy/noodlemagazine/{}", - proxy_base_url.trim_end_matches('/'), - target + fn proxy_url(&self, options: &ServerOptions, video_url: &str) -> String { + crate::providers::build_proxy_url( + options, + "noodlemagazine", + &crate::providers::strip_url_scheme(video_url), ) } - fn get_video_item(&self, video_segment: String, proxy_base_url: &str) -> Result { + fn normalize_thumb_url(&self, thumb: &str) -> String { + let thumb = thumb.trim(); + if thumb.is_empty() { + return String::new(); + } + + if thumb.starts_with("http://") || thumb.starts_with("https://") { + return thumb.to_string(); + } + + if thumb.starts_with("//") { + return format!("https:{thumb}"); + } + + if thumb.starts_with('/') { + return format!("{}{}", self.url, thumb); + } + + format!("{}/{}", self.url.trim_end_matches('/'), thumb.trim_start_matches('/')) + } + + fn has_allowed_image_extension(path: &str) -> bool { + let path = path.to_ascii_lowercase(); + [".jpg", ".jpeg", ".png", ".webp", ".avif", ".gif"] + .iter() + .any(|ext| path.ends_with(ext)) + } + + fn is_allowed_thumb_url(&self, url: &str) -> bool { + let Some(url) = Url::parse(url).ok() else { + return false; + }; + if url.scheme() != "https" { + return false; + } + let Some(host) = url.host_str() else { + return false; + }; + let is_noodlemagazine_host = host == "noodlemagazine.com" || host.ends_with(".noodlemagazine.com"); + + is_noodlemagazine_host && Self::has_allowed_image_extension(url.path()) + } + + fn proxied_thumb(&self, options: &ServerOptions, thumb: &str) -> String { + let normalized = self.normalize_thumb_url(thumb); + if normalized.is_empty() || !self.is_allowed_thumb_url(&normalized) { + return String::new(); + } + + crate::providers::build_proxy_url( + options, + "noodlemagazine-thumb", + &crate::providers::strip_url_scheme(&normalized), + ) + } + + fn get_video_item(&self, video_segment: String, options: &ServerOptions) -> Result { let href = video_segment .split(" ServerOptions { + ServerOptions { + featured: None, + category: None, + sites: None, + filter: None, + language: None, + public_url_base: Some("https://example.com".to_string()), + requester: None, + network: None, + stars: None, + categories: None, + duration: None, + sort: None, + sexuality: None, + } + } #[test] fn rewrites_video_pages_to_hottub_proxy() { let provider = NoodlemagazineProvider::new(); + let options = options(); assert_eq!( - provider.proxy_url( - "https://example.com", - "https://noodlemagazine.com/watch/-123_456" - ), + provider.proxy_url(&options, "https://noodlemagazine.com/watch/-123_456"), "https://example.com/proxy/noodlemagazine/noodlemagazine.com/watch/-123_456" ); } @@ -304,11 +366,12 @@ mod tests { #[test] fn parses_listing_without_detail_page_requests() { let provider = NoodlemagazineProvider::new(); + let options = options(); let html = r#"
- +
sample & title
#clock-o">12:34< @@ -317,13 +380,48 @@ mod tests { >Show more
"#; - let items = provider.get_video_items_from_html(html.to_string(), "https://example.com"); + let items = provider.get_video_items_from_html(html.to_string(), &options); assert_eq!(items.len(), 1); assert_eq!( items[0].url, "https://example.com/proxy/noodlemagazine/noodlemagazine.com/watch/-123_456" ); + assert_eq!( + items[0].thumb, + "https://example.com/proxy/noodlemagazine-thumb/noodlemagazine.com/thumbs/test.jpg" + ); assert_eq!(items[0].formats.as_ref().map(|f| f.len()), Some(1)); } + + #[test] + fn drops_non_noodlemagazine_or_non_image_thumbs() { + let provider = NoodlemagazineProvider::new(); + let options = options(); + let html = r#" +
+
+ + + +
sample
+ #clock-o">12:34< + #eye">1.2K< +
+
+ + + +
sample 2
+ #clock-o">00:42< + #eye">123< +
+ >Show more
+ "#; + + let items = provider.get_video_items_from_html(html.to_string(), &options); + + assert_eq!(items.len(), 2); + assert!(items.iter().all(|item| item.thumb.is_empty())); + } } diff --git a/src/proxies/noodlemagazine.rs b/src/proxies/noodlemagazine.rs index e981614..60364f2 100644 --- a/src/proxies/noodlemagazine.rs +++ b/src/proxies/noodlemagazine.rs @@ -1,11 +1,21 @@ -use ntex::http::header::CONTENT_TYPE; -use ntex::web::{self, HttpRequest, error}; +use ntex::http::header::{CONTENT_LENGTH, CONTENT_TYPE}; +use ntex::{ + http::Response, + web::{self, HttpRequest, error}, +}; use serde_json::Value; use url::Url; use wreq::Version; use crate::util::requester::Requester; +const FIREFOX_USER_AGENT: &str = + "Mozilla/5.0 (X11; Linux x86_64; rv:147.0) Gecko/20100101 Firefox/147.0"; +const HTML_ACCEPT: &str = + "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8"; +const IMAGE_ACCEPT: &str = + "image/avif,image/webp,image/png,image/svg+xml,image/*;q=0.8,*/*;q=0.5"; + #[derive(Debug, Clone)] pub struct NoodlemagazineProxy {} @@ -57,6 +67,75 @@ impl NoodlemagazineProxy { } } + fn normalize_image_url(url: &str) -> String { + if url.starts_with("http://") || url.starts_with("https://") { + url.to_string() + } else { + format!("https://{}", url.trim_start_matches('/')) + } + } + + fn root_referer() -> &'static str { + "https://noodlemagazine.com/" + } + + fn root_html_headers() -> Vec<(String, String)> { + vec![ + ("Referer".to_string(), Self::root_referer().to_string()), + ("User-Agent".to_string(), FIREFOX_USER_AGENT.to_string()), + ("Accept".to_string(), HTML_ACCEPT.to_string()), + ("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()), + ] + } + + fn image_headers(requester: &Requester, image_url: &str) -> Vec<(String, String)> { + let mut headers = vec![ + ("Referer".to_string(), Self::root_referer().to_string()), + ("User-Agent".to_string(), FIREFOX_USER_AGENT.to_string()), + ("Accept".to_string(), IMAGE_ACCEPT.to_string()), + ("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()), + ]; + + if let Some(cookie) = requester.cookie_header_for_url(image_url) { + headers.push(("Cookie".to_string(), cookie)); + } + + headers + } + + fn has_allowed_image_extension(path: &str) -> bool { + let path = path.to_ascii_lowercase(); + [".jpg", ".jpeg", ".png", ".webp", ".avif", ".gif"] + .iter() + .any(|ext| path.ends_with(ext)) + } + + fn is_allowed_thumb_url(url: &str) -> bool { + let Some(url) = Url::parse(url).ok() else { + return false; + }; + if url.scheme() != "https" { + return false; + } + let Some(host) = url.host_str() else { + return false; + }; + + (host == "noodlemagazine.com" || host.ends_with(".noodlemagazine.com")) + && Self::has_allowed_image_extension(url.path()) + } + + fn is_binary_image_content_type(content_type: &str) -> bool { + let media_type = content_type + .split(';') + .next() + .unwrap_or_default() + .trim() + .to_ascii_lowercase(); + + media_type.starts_with("image/") + } + fn is_hls_url(url: &str) -> bool { Url::parse(url) .ok() @@ -184,6 +263,82 @@ pub async fn serve_media( .body(rewritten_manifest)) } +pub async fn get_image( + req: HttpRequest, + requester: web::types::State, +) -> Result { + let endpoint = req.match_info().query("endpoint").to_string(); + let image_url = NoodlemagazineProxy::normalize_image_url(&endpoint); + + if !NoodlemagazineProxy::is_allowed_thumb_url(&image_url) { + return Ok(web::HttpResponse::BadRequest().finish()); + } + + let mut requester = requester.get_ref().clone(); + let _ = requester + .get_with_headers( + NoodlemagazineProxy::root_referer(), + NoodlemagazineProxy::root_html_headers(), + Some(Version::HTTP_11), + ) + .await; + + let mut headers = NoodlemagazineProxy::image_headers(&requester, image_url.as_str()); + let mut upstream = requester + .get_raw_with_headers(image_url.as_str(), headers.clone()) + .await + .ok(); + + let needs_warmup = upstream + .as_ref() + .map(|response| !response.status().is_success()) + .unwrap_or(true); + + if needs_warmup { + let _ = requester + .get_with_headers( + image_url.as_str(), + headers.clone(), + Some(Version::HTTP_11), + ) + .await; + headers = NoodlemagazineProxy::image_headers(&requester, image_url.as_str()); + upstream = requester + .get_raw_with_headers(image_url.as_str(), headers) + .await + .ok(); + } + + let Some(upstream) = upstream.filter(|response| response.status().is_success()) else { + return Ok(web::HttpResponse::NotFound().finish()); + }; + + let status = upstream.status(); + let headers = upstream.headers().clone(); + let content_type = headers + .get(CONTENT_TYPE) + .and_then(|value| value.to_str().ok()) + .map(str::to_string) + .unwrap_or_default(); + + if !NoodlemagazineProxy::is_binary_image_content_type(&content_type) { + return Ok(web::HttpResponse::BadGateway().finish()); + } + + let bytes = upstream.bytes().await.map_err(error::ErrorBadGateway)?; + let mut resp = Response::build(status); + if !content_type.is_empty() { + resp.set_header(CONTENT_TYPE, content_type); + } + if let Some(cl) = headers.get(CONTENT_LENGTH) { + if let Ok(cl_str) = cl.to_str() { + resp.set_header(CONTENT_LENGTH, cl_str); + } + } + + Ok(resp.body(bytes.to_vec())) +} + #[cfg(test)] mod tests { use super::NoodlemagazineProxy; @@ -231,4 +386,36 @@ mod tests { "#EXTM3U\n#EXT-X-STREAM-INF:BANDWIDTH=1\nhttps://cdn.example/hls/low/index.m3u8\n#EXT-X-KEY:METHOD=AES-128,URI=\"https://cdn.example/hls/keys/key.bin\"\nhttps://cdn.example/hls/segment0.ts" ); } + + #[test] + fn allows_only_noodlemagazine_image_thumbs() { + assert!(NoodlemagazineProxy::is_allowed_thumb_url( + "https://noodlemagazine.com/thumbs/example.webp" + )); + assert!(NoodlemagazineProxy::is_allowed_thumb_url( + "https://img.noodlemagazine.com/previews/example.jpg" + )); + assert!(!NoodlemagazineProxy::is_allowed_thumb_url( + "https://noodlemagazine.com/watch/-123_456" + )); + assert!(!NoodlemagazineProxy::is_allowed_thumb_url( + "https://cdn.example/thumb.jpg" + )); + } + + #[test] + fn recognizes_binary_image_content_types() { + assert!(NoodlemagazineProxy::is_binary_image_content_type( + "image/webp" + )); + assert!(NoodlemagazineProxy::is_binary_image_content_type( + "image/jpeg; charset=binary" + )); + assert!(!NoodlemagazineProxy::is_binary_image_content_type( + "text/html; charset=utf-8" + )); + assert!(!NoodlemagazineProxy::is_binary_image_content_type( + "application/json" + )); + } } diff --git a/src/proxy.rs b/src/proxy.rs index 12ddc17..05ba118 100644 --- a/src/proxy.rs +++ b/src/proxy.rs @@ -33,6 +33,11 @@ pub fn config(cfg: &mut web::ServiceConfig) { .route(web::post().to(crate::proxies::noodlemagazine::serve_media)) .route(web::get().to(crate::proxies::noodlemagazine::serve_media)), ) + .service( + web::resource("/noodlemagazine-thumb/{endpoint}*") + .route(web::post().to(crate::proxies::noodlemagazine::get_image)) + .route(web::get().to(crate::proxies::noodlemagazine::get_image)), + ) .service( web::resource("/hanime-cdn/{endpoint}*") .route(web::post().to(crate::proxies::hanimecdn::get_image))