noodlemagazine thumb proxy

This commit is contained in:
Simon
2026-03-20 13:52:06 +00:00
parent 99e4a77507
commit dd7c4ec6a1
3 changed files with 323 additions and 33 deletions

View File

@@ -10,6 +10,7 @@ use crate::videos::{ServerOptions, VideoFormat, VideoItem};
use async_trait::async_trait;
use error_chain::error_chain;
use htmlentity::entity::{ICodedDataTrait, decode};
use url::Url;
use std::vec;
use titlecase::Titlecase;
use wreq::Version;
@@ -86,8 +87,7 @@ impl NoodlemagazineProvider {
.await
.unwrap_or_default();
let proxy_base_url = options.public_url_base.as_deref().unwrap_or_default();
let items = self.get_video_items_from_html(text, proxy_base_url);
let items = self.get_video_items_from_html(text, &options);
if items.is_empty() {
Ok(old_items)
@@ -124,8 +124,7 @@ impl NoodlemagazineProvider {
.await
.unwrap_or_default();
let proxy_base_url = options.public_url_base.as_deref().unwrap_or_default();
let items = self.get_video_items_from_html(text, proxy_base_url);
let items = self.get_video_items_from_html(text, &options);
if items.is_empty() {
Ok(old_items)
@@ -136,7 +135,7 @@ impl NoodlemagazineProvider {
}
}
fn get_video_items_from_html(&self, html: String, proxy_base_url: &str) -> Vec<VideoItem> {
fn get_video_items_from_html(&self, html: String, options: &ServerOptions) -> Vec<VideoItem> {
if html.is_empty() || html.contains("404 Not Found") {
return vec![];
}
@@ -157,31 +156,76 @@ impl NoodlemagazineProvider {
list.split("<div class=\"item\">")
.skip(1)
.filter_map(|segment| {
self.get_video_item(segment.to_string(), proxy_base_url)
.ok()
self.get_video_item(segment.to_string(), options).ok()
})
.collect()
}
fn proxy_url(&self, proxy_base_url: &str, video_url: &str) -> String {
let target = video_url
.strip_prefix("https://")
.or_else(|| video_url.strip_prefix("http://"))
.unwrap_or(video_url)
.trim_start_matches('/');
if proxy_base_url.is_empty() {
return format!("/proxy/noodlemagazine/{target}");
}
format!(
"{}/proxy/noodlemagazine/{}",
proxy_base_url.trim_end_matches('/'),
target
fn proxy_url(&self, options: &ServerOptions, video_url: &str) -> String {
crate::providers::build_proxy_url(
options,
"noodlemagazine",
&crate::providers::strip_url_scheme(video_url),
)
}
fn get_video_item(&self, video_segment: String, proxy_base_url: &str) -> Result<VideoItem> {
fn normalize_thumb_url(&self, thumb: &str) -> String {
let thumb = thumb.trim();
if thumb.is_empty() {
return String::new();
}
if thumb.starts_with("http://") || thumb.starts_with("https://") {
return thumb.to_string();
}
if thumb.starts_with("//") {
return format!("https:{thumb}");
}
if thumb.starts_with('/') {
return format!("{}{}", self.url, thumb);
}
format!("{}/{}", self.url.trim_end_matches('/'), thumb.trim_start_matches('/'))
}
fn has_allowed_image_extension(path: &str) -> bool {
let path = path.to_ascii_lowercase();
[".jpg", ".jpeg", ".png", ".webp", ".avif", ".gif"]
.iter()
.any(|ext| path.ends_with(ext))
}
fn is_allowed_thumb_url(&self, url: &str) -> bool {
let Some(url) = Url::parse(url).ok() else {
return false;
};
if url.scheme() != "https" {
return false;
}
let Some(host) = url.host_str() else {
return false;
};
let is_noodlemagazine_host = host == "noodlemagazine.com" || host.ends_with(".noodlemagazine.com");
is_noodlemagazine_host && Self::has_allowed_image_extension(url.path())
}
fn proxied_thumb(&self, options: &ServerOptions, thumb: &str) -> String {
let normalized = self.normalize_thumb_url(thumb);
if normalized.is_empty() || !self.is_allowed_thumb_url(&normalized) {
return String::new();
}
crate::providers::build_proxy_url(
options,
"noodlemagazine-thumb",
&crate::providers::strip_url_scheme(&normalized),
)
}
fn get_video_item(&self, video_segment: String, options: &ServerOptions) -> Result<VideoItem> {
let href = video_segment
.split("<a href=\"")
.nth(1)
@@ -231,14 +275,15 @@ impl NoodlemagazineProvider {
.and_then(|s| s.split('<').next())
.and_then(|v| parse_abbreviated_number(v.trim()))
.unwrap_or(0);
let proxy_url = self.proxy_url(proxy_base_url, &video_url);
let proxy_url = self.proxy_url(options, &video_url);
let proxied_thumb = self.proxied_thumb(options, &thumb);
Ok(VideoItem::new(
id,
title,
proxy_url.clone(),
"noodlemagazine".into(),
thumb,
proxied_thumb,
duration,
)
.views(views)
@@ -287,16 +332,33 @@ impl Provider for NoodlemagazineProvider {
#[cfg(test)]
mod tests {
use super::NoodlemagazineProvider;
use crate::videos::ServerOptions;
fn options() -> ServerOptions {
ServerOptions {
featured: None,
category: None,
sites: None,
filter: None,
language: None,
public_url_base: Some("https://example.com".to_string()),
requester: None,
network: None,
stars: None,
categories: None,
duration: None,
sort: None,
sexuality: None,
}
}
#[test]
fn rewrites_video_pages_to_hottub_proxy() {
let provider = NoodlemagazineProvider::new();
let options = options();
assert_eq!(
provider.proxy_url(
"https://example.com",
"https://noodlemagazine.com/watch/-123_456"
),
provider.proxy_url(&options, "https://noodlemagazine.com/watch/-123_456"),
"https://example.com/proxy/noodlemagazine/noodlemagazine.com/watch/-123_456"
);
}
@@ -304,11 +366,12 @@ mod tests {
#[test]
fn parses_listing_without_detail_page_requests() {
let provider = NoodlemagazineProvider::new();
let options = options();
let html = r#"
<div class="list_videos" id="list_videos">
<div class="item">
<a href="/watch/-123_456">
<img data-src="https://thumb.example/test.jpg" />
<img data-src="https://noodlemagazine.com/thumbs/test.jpg" />
</a>
<div class="title">sample &amp; title</div>
<svg><use></use></svg>#clock-o"></use></svg>12:34<
@@ -317,13 +380,48 @@ mod tests {
>Show more</div>
"#;
let items = provider.get_video_items_from_html(html.to_string(), "https://example.com");
let items = provider.get_video_items_from_html(html.to_string(), &options);
assert_eq!(items.len(), 1);
assert_eq!(
items[0].url,
"https://example.com/proxy/noodlemagazine/noodlemagazine.com/watch/-123_456"
);
assert_eq!(
items[0].thumb,
"https://example.com/proxy/noodlemagazine-thumb/noodlemagazine.com/thumbs/test.jpg"
);
assert_eq!(items[0].formats.as_ref().map(|f| f.len()), Some(1));
}
#[test]
fn drops_non_noodlemagazine_or_non_image_thumbs() {
let provider = NoodlemagazineProvider::new();
let options = options();
let html = r#"
<div class="list_videos" id="list_videos">
<div class="item">
<a href="/watch/-123_456">
<img data-src="https://cdn.example/thumb.jpg" />
</a>
<div class="title">sample</div>
<svg><use></use></svg>#clock-o"></use></svg>12:34<
<svg><use></use></svg>#eye"></use></svg>1.2K<
</div>
<div class="item">
<a href="/watch/-555_666">
<img data-src="https://noodlemagazine.com/watch/not-an-image" />
</a>
<div class="title">sample 2</div>
<svg><use></use></svg>#clock-o"></use></svg>00:42<
<svg><use></use></svg>#eye"></use></svg>123<
</div>
>Show more</div>
"#;
let items = provider.get_video_items_from_html(html.to_string(), &options);
assert_eq!(items.len(), 2);
assert!(items.iter().all(|item| item.thumb.is_empty()));
}
}

View File

@@ -1,11 +1,21 @@
use ntex::http::header::CONTENT_TYPE;
use ntex::web::{self, HttpRequest, error};
use ntex::http::header::{CONTENT_LENGTH, CONTENT_TYPE};
use ntex::{
http::Response,
web::{self, HttpRequest, error},
};
use serde_json::Value;
use url::Url;
use wreq::Version;
use crate::util::requester::Requester;
const FIREFOX_USER_AGENT: &str =
"Mozilla/5.0 (X11; Linux x86_64; rv:147.0) Gecko/20100101 Firefox/147.0";
const HTML_ACCEPT: &str =
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8";
const IMAGE_ACCEPT: &str =
"image/avif,image/webp,image/png,image/svg+xml,image/*;q=0.8,*/*;q=0.5";
#[derive(Debug, Clone)]
pub struct NoodlemagazineProxy {}
@@ -57,6 +67,75 @@ impl NoodlemagazineProxy {
}
}
fn normalize_image_url(url: &str) -> String {
if url.starts_with("http://") || url.starts_with("https://") {
url.to_string()
} else {
format!("https://{}", url.trim_start_matches('/'))
}
}
fn root_referer() -> &'static str {
"https://noodlemagazine.com/"
}
fn root_html_headers() -> Vec<(String, String)> {
vec![
("Referer".to_string(), Self::root_referer().to_string()),
("User-Agent".to_string(), FIREFOX_USER_AGENT.to_string()),
("Accept".to_string(), HTML_ACCEPT.to_string()),
("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()),
]
}
fn image_headers(requester: &Requester, image_url: &str) -> Vec<(String, String)> {
let mut headers = vec![
("Referer".to_string(), Self::root_referer().to_string()),
("User-Agent".to_string(), FIREFOX_USER_AGENT.to_string()),
("Accept".to_string(), IMAGE_ACCEPT.to_string()),
("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()),
];
if let Some(cookie) = requester.cookie_header_for_url(image_url) {
headers.push(("Cookie".to_string(), cookie));
}
headers
}
fn has_allowed_image_extension(path: &str) -> bool {
let path = path.to_ascii_lowercase();
[".jpg", ".jpeg", ".png", ".webp", ".avif", ".gif"]
.iter()
.any(|ext| path.ends_with(ext))
}
fn is_allowed_thumb_url(url: &str) -> bool {
let Some(url) = Url::parse(url).ok() else {
return false;
};
if url.scheme() != "https" {
return false;
}
let Some(host) = url.host_str() else {
return false;
};
(host == "noodlemagazine.com" || host.ends_with(".noodlemagazine.com"))
&& Self::has_allowed_image_extension(url.path())
}
fn is_binary_image_content_type(content_type: &str) -> bool {
let media_type = content_type
.split(';')
.next()
.unwrap_or_default()
.trim()
.to_ascii_lowercase();
media_type.starts_with("image/")
}
fn is_hls_url(url: &str) -> bool {
Url::parse(url)
.ok()
@@ -184,6 +263,82 @@ pub async fn serve_media(
.body(rewritten_manifest))
}
pub async fn get_image(
req: HttpRequest,
requester: web::types::State<Requester>,
) -> Result<impl web::Responder, web::Error> {
let endpoint = req.match_info().query("endpoint").to_string();
let image_url = NoodlemagazineProxy::normalize_image_url(&endpoint);
if !NoodlemagazineProxy::is_allowed_thumb_url(&image_url) {
return Ok(web::HttpResponse::BadRequest().finish());
}
let mut requester = requester.get_ref().clone();
let _ = requester
.get_with_headers(
NoodlemagazineProxy::root_referer(),
NoodlemagazineProxy::root_html_headers(),
Some(Version::HTTP_11),
)
.await;
let mut headers = NoodlemagazineProxy::image_headers(&requester, image_url.as_str());
let mut upstream = requester
.get_raw_with_headers(image_url.as_str(), headers.clone())
.await
.ok();
let needs_warmup = upstream
.as_ref()
.map(|response| !response.status().is_success())
.unwrap_or(true);
if needs_warmup {
let _ = requester
.get_with_headers(
image_url.as_str(),
headers.clone(),
Some(Version::HTTP_11),
)
.await;
headers = NoodlemagazineProxy::image_headers(&requester, image_url.as_str());
upstream = requester
.get_raw_with_headers(image_url.as_str(), headers)
.await
.ok();
}
let Some(upstream) = upstream.filter(|response| response.status().is_success()) else {
return Ok(web::HttpResponse::NotFound().finish());
};
let status = upstream.status();
let headers = upstream.headers().clone();
let content_type = headers
.get(CONTENT_TYPE)
.and_then(|value| value.to_str().ok())
.map(str::to_string)
.unwrap_or_default();
if !NoodlemagazineProxy::is_binary_image_content_type(&content_type) {
return Ok(web::HttpResponse::BadGateway().finish());
}
let bytes = upstream.bytes().await.map_err(error::ErrorBadGateway)?;
let mut resp = Response::build(status);
if !content_type.is_empty() {
resp.set_header(CONTENT_TYPE, content_type);
}
if let Some(cl) = headers.get(CONTENT_LENGTH) {
if let Ok(cl_str) = cl.to_str() {
resp.set_header(CONTENT_LENGTH, cl_str);
}
}
Ok(resp.body(bytes.to_vec()))
}
#[cfg(test)]
mod tests {
use super::NoodlemagazineProxy;
@@ -231,4 +386,36 @@ mod tests {
"#EXTM3U\n#EXT-X-STREAM-INF:BANDWIDTH=1\nhttps://cdn.example/hls/low/index.m3u8\n#EXT-X-KEY:METHOD=AES-128,URI=\"https://cdn.example/hls/keys/key.bin\"\nhttps://cdn.example/hls/segment0.ts"
);
}
#[test]
fn allows_only_noodlemagazine_image_thumbs() {
assert!(NoodlemagazineProxy::is_allowed_thumb_url(
"https://noodlemagazine.com/thumbs/example.webp"
));
assert!(NoodlemagazineProxy::is_allowed_thumb_url(
"https://img.noodlemagazine.com/previews/example.jpg"
));
assert!(!NoodlemagazineProxy::is_allowed_thumb_url(
"https://noodlemagazine.com/watch/-123_456"
));
assert!(!NoodlemagazineProxy::is_allowed_thumb_url(
"https://cdn.example/thumb.jpg"
));
}
#[test]
fn recognizes_binary_image_content_types() {
assert!(NoodlemagazineProxy::is_binary_image_content_type(
"image/webp"
));
assert!(NoodlemagazineProxy::is_binary_image_content_type(
"image/jpeg; charset=binary"
));
assert!(!NoodlemagazineProxy::is_binary_image_content_type(
"text/html; charset=utf-8"
));
assert!(!NoodlemagazineProxy::is_binary_image_content_type(
"application/json"
));
}
}

View File

@@ -33,6 +33,11 @@ pub fn config(cfg: &mut web::ServiceConfig) {
.route(web::post().to(crate::proxies::noodlemagazine::serve_media))
.route(web::get().to(crate::proxies::noodlemagazine::serve_media)),
)
.service(
web::resource("/noodlemagazine-thumb/{endpoint}*")
.route(web::post().to(crate::proxies::noodlemagazine::get_image))
.route(web::get().to(crate::proxies::noodlemagazine::get_image)),
)
.service(
web::resource("/hanime-cdn/{endpoint}*")
.route(web::post().to(crate::proxies::hanimecdn::get_image))