Files
hottub/src/proxies/noodlemagazine.rs
2026-03-22 15:56:25 +00:00

442 lines
14 KiB
Rust

use ntex::http::header::{CONTENT_LENGTH, CONTENT_TYPE};
use ntex::{
http::Response,
web::{self, HttpRequest, error},
};
use serde_json::Value;
use std::net::IpAddr;
use url::Url;
use wreq::Version;
use crate::util::requester::Requester;
const FIREFOX_USER_AGENT: &str =
"Mozilla/5.0 (X11; Linux x86_64; rv:147.0) Gecko/20100101 Firefox/147.0";
const HTML_ACCEPT: &str =
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8";
const IMAGE_ACCEPT: &str = "image/avif,image/webp,image/png,image/svg+xml,image/*;q=0.8,*/*;q=0.5";
#[derive(Debug, Clone)]
pub struct NoodlemagazineProxy {}
impl NoodlemagazineProxy {
pub fn new() -> Self {
NoodlemagazineProxy {}
}
fn extract_playlist(text: &str) -> Option<&str> {
text.split("window.playlist = ").nth(1)?.split(';').next()
}
fn source_score(source: &Value) -> (u8, u32) {
let file = source["file"].as_str().unwrap_or_default();
let label = source["label"].as_str().unwrap_or_default();
let is_hls = u8::from(file.contains(".m3u8"));
let quality = label
.chars()
.filter(|c| c.is_ascii_digit())
.collect::<String>()
.parse::<u32>()
.unwrap_or(0);
(is_hls, quality)
}
fn select_best_source(playlist: &str) -> Option<String> {
let json: Value = serde_json::from_str(playlist).ok()?;
let sources = json["sources"].as_array()?;
sources
.iter()
.filter(|source| {
source["file"]
.as_str()
.map(|file| !file.is_empty())
.unwrap_or(false)
})
.max_by_key(|source| Self::source_score(source))
.and_then(|source| source["file"].as_str())
.map(str::to_string)
}
fn normalize_video_page_url(url: &str) -> String {
if url.starts_with("http://") || url.starts_with("https://") {
url.to_string()
} else {
format!("https://{}", url.trim_start_matches('/'))
}
}
fn normalize_image_url(url: &str) -> String {
if url.starts_with("http://") || url.starts_with("https://") {
url.to_string()
} else {
format!("https://{}", url.trim_start_matches('/'))
}
}
fn root_referer() -> &'static str {
"https://noodlemagazine.com/"
}
fn root_html_headers() -> Vec<(String, String)> {
vec![
("Referer".to_string(), Self::root_referer().to_string()),
("User-Agent".to_string(), FIREFOX_USER_AGENT.to_string()),
("Accept".to_string(), HTML_ACCEPT.to_string()),
("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()),
]
}
fn image_headers(requester: &Requester, image_url: &str) -> Vec<(String, String)> {
let mut headers = vec![
("Referer".to_string(), Self::root_referer().to_string()),
("User-Agent".to_string(), FIREFOX_USER_AGENT.to_string()),
("Accept".to_string(), IMAGE_ACCEPT.to_string()),
("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()),
];
if let Some(cookie) = requester.cookie_header_for_url(image_url) {
headers.push(("Cookie".to_string(), cookie));
}
headers
}
fn has_allowed_image_extension(path: &str) -> bool {
let path = path.to_ascii_lowercase();
[".jpg", ".jpeg", ".png", ".webp", ".avif", ".gif"]
.iter()
.any(|ext| path.ends_with(ext))
}
fn is_disallowed_thumb_host(host: &str) -> bool {
if host.eq_ignore_ascii_case("localhost") {
return true;
}
match host.parse::<IpAddr>() {
Ok(IpAddr::V4(ip)) => {
ip.is_private()
|| ip.is_loopback()
|| ip.is_link_local()
|| ip.is_broadcast()
|| ip.is_documentation()
|| ip.is_unspecified()
}
Ok(IpAddr::V6(ip)) => {
ip.is_loopback()
|| ip.is_unspecified()
|| ip.is_multicast()
|| ip.is_unique_local()
|| ip.is_unicast_link_local()
}
Err(_) => false,
}
}
fn is_allowed_thumb_url(url: &str) -> bool {
let Some(url) = Url::parse(url).ok() else {
return false;
};
if url.scheme() != "https" {
return false;
}
let Some(host) = url.host_str() else {
return false;
};
!Self::is_disallowed_thumb_host(host) && Self::has_allowed_image_extension(url.path())
}
fn is_binary_image_content_type(content_type: &str) -> bool {
let media_type = content_type
.split(';')
.next()
.unwrap_or_default()
.trim()
.to_ascii_lowercase();
media_type.starts_with("image/")
}
fn is_hls_url(url: &str) -> bool {
Url::parse(url)
.ok()
.map(|parsed| parsed.path().ends_with(".m3u8"))
.unwrap_or(false)
}
fn absolutize_uri(base_url: &Url, value: &str) -> String {
if value.is_empty() {
return String::new();
}
if value.starts_with('#')
|| value.starts_with("data:")
|| value.starts_with("http://")
|| value.starts_with("https://")
{
return value.to_string();
}
base_url
.join(value)
.map(|url| url.to_string())
.unwrap_or_else(|_| value.to_string())
}
fn rewrite_manifest_line(base_url: &Url, line: &str) -> String {
if line.trim().is_empty() {
return line.to_string();
}
if !line.starts_with('#') {
return Self::absolutize_uri(base_url, line);
}
let Some(uri_start) = line.find("URI=\"") else {
return line.to_string();
};
let value_start = uri_start + 5;
let Some(relative_end) = line[value_start..].find('"') else {
return line.to_string();
};
let value_end = value_start + relative_end;
let value = &line[value_start..value_end];
let rewritten = Self::absolutize_uri(base_url, value);
format!(
"{}{}{}",
&line[..value_start],
rewritten,
&line[value_end..]
)
}
fn rewrite_manifest(manifest_url: &str, body: &str) -> Option<String> {
let base_url = Url::parse(manifest_url).ok()?;
Some(
body.lines()
.map(|line| Self::rewrite_manifest_line(&base_url, line))
.collect::<Vec<_>>()
.join("\n"),
)
}
async fn resolve_source_url(
&self,
url: String,
requester: web::types::State<Requester>,
) -> Option<(String, String)> {
let mut requester = requester.get_ref().clone();
let url = Self::normalize_video_page_url(&url);
let text = requester
.get(&url, Some(Version::HTTP_2))
.await
.unwrap_or_default();
if text.is_empty() {
return None;
}
let Some(playlist) = Self::extract_playlist(&text) else {
return None;
};
Self::select_best_source(playlist).map(|source_url| (url, source_url))
}
}
pub async fn serve_media(
req: HttpRequest,
requester: web::types::State<Requester>,
) -> Result<impl web::Responder, web::Error> {
let endpoint = req.match_info().query("endpoint").to_string();
let proxy = NoodlemagazineProxy::new();
let Some((video_page_url, source_url)) =
proxy.resolve_source_url(endpoint, requester.clone()).await
else {
return Ok(web::HttpResponse::BadGateway().finish());
};
if !NoodlemagazineProxy::is_hls_url(&source_url) {
return Ok(web::HttpResponse::Found()
.header("Location", source_url)
.finish());
}
let mut upstream_requester = requester.get_ref().clone();
let upstream = match upstream_requester
.get_raw_with_headers(&source_url, vec![("Referer".to_string(), video_page_url)])
.await
{
Ok(response) => response,
Err(_) => return Ok(web::HttpResponse::BadGateway().finish()),
};
let manifest_body = upstream.text().await.map_err(error::ErrorBadGateway)?;
let rewritten_manifest =
match NoodlemagazineProxy::rewrite_manifest(&source_url, &manifest_body) {
Some(body) => body,
None => return Ok(web::HttpResponse::BadGateway().finish()),
};
Ok(web::HttpResponse::Ok()
.header(CONTENT_TYPE, "application/vnd.apple.mpegurl")
.body(rewritten_manifest))
}
pub async fn get_image(
req: HttpRequest,
requester: web::types::State<Requester>,
) -> Result<impl web::Responder, web::Error> {
let endpoint = req.match_info().query("endpoint").to_string();
let image_url = NoodlemagazineProxy::normalize_image_url(&endpoint);
if !NoodlemagazineProxy::is_allowed_thumb_url(&image_url) {
return Ok(web::HttpResponse::BadRequest().finish());
}
let mut requester = requester.get_ref().clone();
let _ = requester
.get_with_headers(
NoodlemagazineProxy::root_referer(),
NoodlemagazineProxy::root_html_headers(),
Some(Version::HTTP_11),
)
.await;
let mut headers = NoodlemagazineProxy::image_headers(&requester, image_url.as_str());
let mut upstream = requester
.get_raw_with_headers(image_url.as_str(), headers.clone())
.await
.ok();
let needs_warmup = upstream
.as_ref()
.map(|response| !response.status().is_success())
.unwrap_or(true);
if needs_warmup {
let _ = requester
.get_with_headers(image_url.as_str(), headers.clone(), Some(Version::HTTP_11))
.await;
headers = NoodlemagazineProxy::image_headers(&requester, image_url.as_str());
upstream = requester
.get_raw_with_headers(image_url.as_str(), headers)
.await
.ok();
}
let Some(upstream) = upstream.filter(|response| response.status().is_success()) else {
return Ok(web::HttpResponse::NotFound().finish());
};
let status = upstream.status();
let headers = upstream.headers().clone();
let content_type = headers
.get(CONTENT_TYPE)
.and_then(|value| value.to_str().ok())
.map(str::to_string)
.unwrap_or_default();
if !NoodlemagazineProxy::is_binary_image_content_type(&content_type) {
return Ok(web::HttpResponse::BadGateway().finish());
}
let bytes = upstream.bytes().await.map_err(error::ErrorBadGateway)?;
let mut resp = Response::build(status);
if !content_type.is_empty() {
resp.set_header(CONTENT_TYPE, content_type);
}
if let Some(cl) = headers.get(CONTENT_LENGTH) {
if let Ok(cl_str) = cl.to_str() {
resp.set_header(CONTENT_LENGTH, cl_str);
}
}
Ok(resp.body(bytes.to_vec()))
}
#[cfg(test)]
mod tests {
use super::NoodlemagazineProxy;
#[test]
fn extracts_playlist_from_page() {
let html = r#"
<script>
window.playlist = {"sources":[{"file":"https://cdn.example/360.mp4","label":"360p"}]};
</script>
"#;
assert_eq!(
NoodlemagazineProxy::extract_playlist(html),
Some(r#"{"sources":[{"file":"https://cdn.example/360.mp4","label":"360p"}]}"#)
);
}
#[test]
fn prefers_hls_then_highest_quality() {
let playlist = r#"{
"sources": [
{"file":"https://cdn.example/360.mp4","label":"360p"},
{"file":"https://cdn.example/720.mp4","label":"720p"},
{"file":"https://cdn.example/master.m3u8","label":"1080p"}
]
}"#;
assert_eq!(
NoodlemagazineProxy::select_best_source(playlist).as_deref(),
Some("https://cdn.example/master.m3u8")
);
}
#[test]
fn rewrites_manifest_to_direct_absolute_urls() {
let manifest = "#EXTM3U\n#EXT-X-STREAM-INF:BANDWIDTH=1\nlow/index.m3u8\n#EXT-X-KEY:METHOD=AES-128,URI=\"keys/key.bin\"\nsegment0.ts";
let rewritten =
NoodlemagazineProxy::rewrite_manifest("https://cdn.example/hls/master.m3u8", manifest)
.unwrap();
assert_eq!(
rewritten,
"#EXTM3U\n#EXT-X-STREAM-INF:BANDWIDTH=1\nhttps://cdn.example/hls/low/index.m3u8\n#EXT-X-KEY:METHOD=AES-128,URI=\"https://cdn.example/hls/keys/key.bin\"\nhttps://cdn.example/hls/segment0.ts"
);
}
#[test]
fn allows_https_image_thumbs_but_rejects_local_or_non_images() {
assert!(NoodlemagazineProxy::is_allowed_thumb_url(
"https://noodlemagazine.com/thumbs/example.webp"
));
assert!(NoodlemagazineProxy::is_allowed_thumb_url(
"https://cdn.example/previews/example.jpg"
));
assert!(!NoodlemagazineProxy::is_allowed_thumb_url(
"https://noodlemagazine.com/watch/-123_456"
));
assert!(!NoodlemagazineProxy::is_allowed_thumb_url(
"https://localhost/thumb.jpg"
));
}
#[test]
fn recognizes_binary_image_content_types() {
assert!(NoodlemagazineProxy::is_binary_image_content_type(
"image/webp"
));
assert!(NoodlemagazineProxy::is_binary_image_content_type(
"image/jpeg; charset=binary"
));
assert!(!NoodlemagazineProxy::is_binary_image_content_type(
"text/html; charset=utf-8"
));
assert!(!NoodlemagazineProxy::is_binary_image_content_type(
"application/json"
));
}
}