442 lines
14 KiB
Rust
442 lines
14 KiB
Rust
use ntex::http::header::{CONTENT_LENGTH, CONTENT_TYPE};
|
|
use ntex::{
|
|
http::Response,
|
|
web::{self, HttpRequest, error},
|
|
};
|
|
use serde_json::Value;
|
|
use std::net::IpAddr;
|
|
use url::Url;
|
|
use wreq::Version;
|
|
|
|
use crate::util::requester::Requester;
|
|
|
|
const FIREFOX_USER_AGENT: &str =
|
|
"Mozilla/5.0 (X11; Linux x86_64; rv:147.0) Gecko/20100101 Firefox/147.0";
|
|
const HTML_ACCEPT: &str =
|
|
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8";
|
|
const IMAGE_ACCEPT: &str = "image/avif,image/webp,image/png,image/svg+xml,image/*;q=0.8,*/*;q=0.5";
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct NoodlemagazineProxy {}
|
|
|
|
impl NoodlemagazineProxy {
|
|
pub fn new() -> Self {
|
|
NoodlemagazineProxy {}
|
|
}
|
|
|
|
fn extract_playlist(text: &str) -> Option<&str> {
|
|
text.split("window.playlist = ").nth(1)?.split(';').next()
|
|
}
|
|
|
|
fn source_score(source: &Value) -> (u8, u32) {
|
|
let file = source["file"].as_str().unwrap_or_default();
|
|
let label = source["label"].as_str().unwrap_or_default();
|
|
let is_hls = u8::from(file.contains(".m3u8"));
|
|
let quality = label
|
|
.chars()
|
|
.filter(|c| c.is_ascii_digit())
|
|
.collect::<String>()
|
|
.parse::<u32>()
|
|
.unwrap_or(0);
|
|
|
|
(is_hls, quality)
|
|
}
|
|
|
|
fn select_best_source(playlist: &str) -> Option<String> {
|
|
let json: Value = serde_json::from_str(playlist).ok()?;
|
|
let sources = json["sources"].as_array()?;
|
|
|
|
sources
|
|
.iter()
|
|
.filter(|source| {
|
|
source["file"]
|
|
.as_str()
|
|
.map(|file| !file.is_empty())
|
|
.unwrap_or(false)
|
|
})
|
|
.max_by_key(|source| Self::source_score(source))
|
|
.and_then(|source| source["file"].as_str())
|
|
.map(str::to_string)
|
|
}
|
|
|
|
fn normalize_video_page_url(url: &str) -> String {
|
|
if url.starts_with("http://") || url.starts_with("https://") {
|
|
url.to_string()
|
|
} else {
|
|
format!("https://{}", url.trim_start_matches('/'))
|
|
}
|
|
}
|
|
|
|
fn normalize_image_url(url: &str) -> String {
|
|
if url.starts_with("http://") || url.starts_with("https://") {
|
|
url.to_string()
|
|
} else {
|
|
format!("https://{}", url.trim_start_matches('/'))
|
|
}
|
|
}
|
|
|
|
fn root_referer() -> &'static str {
|
|
"https://noodlemagazine.com/"
|
|
}
|
|
|
|
fn root_html_headers() -> Vec<(String, String)> {
|
|
vec![
|
|
("Referer".to_string(), Self::root_referer().to_string()),
|
|
("User-Agent".to_string(), FIREFOX_USER_AGENT.to_string()),
|
|
("Accept".to_string(), HTML_ACCEPT.to_string()),
|
|
("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()),
|
|
]
|
|
}
|
|
|
|
fn image_headers(requester: &Requester, image_url: &str) -> Vec<(String, String)> {
|
|
let mut headers = vec![
|
|
("Referer".to_string(), Self::root_referer().to_string()),
|
|
("User-Agent".to_string(), FIREFOX_USER_AGENT.to_string()),
|
|
("Accept".to_string(), IMAGE_ACCEPT.to_string()),
|
|
("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()),
|
|
];
|
|
|
|
if let Some(cookie) = requester.cookie_header_for_url(image_url) {
|
|
headers.push(("Cookie".to_string(), cookie));
|
|
}
|
|
|
|
headers
|
|
}
|
|
|
|
fn has_allowed_image_extension(path: &str) -> bool {
|
|
let path = path.to_ascii_lowercase();
|
|
[".jpg", ".jpeg", ".png", ".webp", ".avif", ".gif"]
|
|
.iter()
|
|
.any(|ext| path.ends_with(ext))
|
|
}
|
|
|
|
fn is_disallowed_thumb_host(host: &str) -> bool {
|
|
if host.eq_ignore_ascii_case("localhost") {
|
|
return true;
|
|
}
|
|
|
|
match host.parse::<IpAddr>() {
|
|
Ok(IpAddr::V4(ip)) => {
|
|
ip.is_private()
|
|
|| ip.is_loopback()
|
|
|| ip.is_link_local()
|
|
|| ip.is_broadcast()
|
|
|| ip.is_documentation()
|
|
|| ip.is_unspecified()
|
|
}
|
|
Ok(IpAddr::V6(ip)) => {
|
|
ip.is_loopback()
|
|
|| ip.is_unspecified()
|
|
|| ip.is_multicast()
|
|
|| ip.is_unique_local()
|
|
|| ip.is_unicast_link_local()
|
|
}
|
|
Err(_) => false,
|
|
}
|
|
}
|
|
|
|
fn is_allowed_thumb_url(url: &str) -> bool {
|
|
let Some(url) = Url::parse(url).ok() else {
|
|
return false;
|
|
};
|
|
if url.scheme() != "https" {
|
|
return false;
|
|
}
|
|
let Some(host) = url.host_str() else {
|
|
return false;
|
|
};
|
|
|
|
!Self::is_disallowed_thumb_host(host) && Self::has_allowed_image_extension(url.path())
|
|
}
|
|
|
|
fn is_binary_image_content_type(content_type: &str) -> bool {
|
|
let media_type = content_type
|
|
.split(';')
|
|
.next()
|
|
.unwrap_or_default()
|
|
.trim()
|
|
.to_ascii_lowercase();
|
|
|
|
media_type.starts_with("image/")
|
|
}
|
|
|
|
fn is_hls_url(url: &str) -> bool {
|
|
Url::parse(url)
|
|
.ok()
|
|
.map(|parsed| parsed.path().ends_with(".m3u8"))
|
|
.unwrap_or(false)
|
|
}
|
|
|
|
fn absolutize_uri(base_url: &Url, value: &str) -> String {
|
|
if value.is_empty() {
|
|
return String::new();
|
|
}
|
|
|
|
if value.starts_with('#')
|
|
|| value.starts_with("data:")
|
|
|| value.starts_with("http://")
|
|
|| value.starts_with("https://")
|
|
{
|
|
return value.to_string();
|
|
}
|
|
|
|
base_url
|
|
.join(value)
|
|
.map(|url| url.to_string())
|
|
.unwrap_or_else(|_| value.to_string())
|
|
}
|
|
|
|
fn rewrite_manifest_line(base_url: &Url, line: &str) -> String {
|
|
if line.trim().is_empty() {
|
|
return line.to_string();
|
|
}
|
|
|
|
if !line.starts_with('#') {
|
|
return Self::absolutize_uri(base_url, line);
|
|
}
|
|
|
|
let Some(uri_start) = line.find("URI=\"") else {
|
|
return line.to_string();
|
|
};
|
|
let value_start = uri_start + 5;
|
|
let Some(relative_end) = line[value_start..].find('"') else {
|
|
return line.to_string();
|
|
};
|
|
let value_end = value_start + relative_end;
|
|
let value = &line[value_start..value_end];
|
|
let rewritten = Self::absolutize_uri(base_url, value);
|
|
|
|
format!(
|
|
"{}{}{}",
|
|
&line[..value_start],
|
|
rewritten,
|
|
&line[value_end..]
|
|
)
|
|
}
|
|
|
|
fn rewrite_manifest(manifest_url: &str, body: &str) -> Option<String> {
|
|
let base_url = Url::parse(manifest_url).ok()?;
|
|
|
|
Some(
|
|
body.lines()
|
|
.map(|line| Self::rewrite_manifest_line(&base_url, line))
|
|
.collect::<Vec<_>>()
|
|
.join("\n"),
|
|
)
|
|
}
|
|
|
|
async fn resolve_source_url(
|
|
&self,
|
|
url: String,
|
|
requester: web::types::State<Requester>,
|
|
) -> Option<(String, String)> {
|
|
let mut requester = requester.get_ref().clone();
|
|
let url = Self::normalize_video_page_url(&url);
|
|
let text = requester
|
|
.get(&url, Some(Version::HTTP_2))
|
|
.await
|
|
.unwrap_or_default();
|
|
if text.is_empty() {
|
|
return None;
|
|
}
|
|
|
|
let Some(playlist) = Self::extract_playlist(&text) else {
|
|
return None;
|
|
};
|
|
|
|
Self::select_best_source(playlist).map(|source_url| (url, source_url))
|
|
}
|
|
}
|
|
|
|
pub async fn serve_media(
|
|
req: HttpRequest,
|
|
requester: web::types::State<Requester>,
|
|
) -> Result<impl web::Responder, web::Error> {
|
|
let endpoint = req.match_info().query("endpoint").to_string();
|
|
let proxy = NoodlemagazineProxy::new();
|
|
let Some((video_page_url, source_url)) =
|
|
proxy.resolve_source_url(endpoint, requester.clone()).await
|
|
else {
|
|
return Ok(web::HttpResponse::BadGateway().finish());
|
|
};
|
|
|
|
if !NoodlemagazineProxy::is_hls_url(&source_url) {
|
|
return Ok(web::HttpResponse::Found()
|
|
.header("Location", source_url)
|
|
.finish());
|
|
}
|
|
|
|
let mut upstream_requester = requester.get_ref().clone();
|
|
let upstream = match upstream_requester
|
|
.get_raw_with_headers(&source_url, vec![("Referer".to_string(), video_page_url)])
|
|
.await
|
|
{
|
|
Ok(response) => response,
|
|
Err(_) => return Ok(web::HttpResponse::BadGateway().finish()),
|
|
};
|
|
|
|
let manifest_body = upstream.text().await.map_err(error::ErrorBadGateway)?;
|
|
let rewritten_manifest =
|
|
match NoodlemagazineProxy::rewrite_manifest(&source_url, &manifest_body) {
|
|
Some(body) => body,
|
|
None => return Ok(web::HttpResponse::BadGateway().finish()),
|
|
};
|
|
|
|
Ok(web::HttpResponse::Ok()
|
|
.header(CONTENT_TYPE, "application/vnd.apple.mpegurl")
|
|
.body(rewritten_manifest))
|
|
}
|
|
|
|
pub async fn get_image(
|
|
req: HttpRequest,
|
|
requester: web::types::State<Requester>,
|
|
) -> Result<impl web::Responder, web::Error> {
|
|
let endpoint = req.match_info().query("endpoint").to_string();
|
|
let image_url = NoodlemagazineProxy::normalize_image_url(&endpoint);
|
|
|
|
if !NoodlemagazineProxy::is_allowed_thumb_url(&image_url) {
|
|
return Ok(web::HttpResponse::BadRequest().finish());
|
|
}
|
|
|
|
let mut requester = requester.get_ref().clone();
|
|
let _ = requester
|
|
.get_with_headers(
|
|
NoodlemagazineProxy::root_referer(),
|
|
NoodlemagazineProxy::root_html_headers(),
|
|
Some(Version::HTTP_11),
|
|
)
|
|
.await;
|
|
|
|
let mut headers = NoodlemagazineProxy::image_headers(&requester, image_url.as_str());
|
|
let mut upstream = requester
|
|
.get_raw_with_headers(image_url.as_str(), headers.clone())
|
|
.await
|
|
.ok();
|
|
|
|
let needs_warmup = upstream
|
|
.as_ref()
|
|
.map(|response| !response.status().is_success())
|
|
.unwrap_or(true);
|
|
|
|
if needs_warmup {
|
|
let _ = requester
|
|
.get_with_headers(image_url.as_str(), headers.clone(), Some(Version::HTTP_11))
|
|
.await;
|
|
headers = NoodlemagazineProxy::image_headers(&requester, image_url.as_str());
|
|
upstream = requester
|
|
.get_raw_with_headers(image_url.as_str(), headers)
|
|
.await
|
|
.ok();
|
|
}
|
|
|
|
let Some(upstream) = upstream.filter(|response| response.status().is_success()) else {
|
|
return Ok(web::HttpResponse::NotFound().finish());
|
|
};
|
|
|
|
let status = upstream.status();
|
|
let headers = upstream.headers().clone();
|
|
let content_type = headers
|
|
.get(CONTENT_TYPE)
|
|
.and_then(|value| value.to_str().ok())
|
|
.map(str::to_string)
|
|
.unwrap_or_default();
|
|
|
|
if !NoodlemagazineProxy::is_binary_image_content_type(&content_type) {
|
|
return Ok(web::HttpResponse::BadGateway().finish());
|
|
}
|
|
|
|
let bytes = upstream.bytes().await.map_err(error::ErrorBadGateway)?;
|
|
let mut resp = Response::build(status);
|
|
if !content_type.is_empty() {
|
|
resp.set_header(CONTENT_TYPE, content_type);
|
|
}
|
|
if let Some(cl) = headers.get(CONTENT_LENGTH) {
|
|
if let Ok(cl_str) = cl.to_str() {
|
|
resp.set_header(CONTENT_LENGTH, cl_str);
|
|
}
|
|
}
|
|
|
|
Ok(resp.body(bytes.to_vec()))
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::NoodlemagazineProxy;
|
|
|
|
#[test]
|
|
fn extracts_playlist_from_page() {
|
|
let html = r#"
|
|
<script>
|
|
window.playlist = {"sources":[{"file":"https://cdn.example/360.mp4","label":"360p"}]};
|
|
</script>
|
|
"#;
|
|
|
|
assert_eq!(
|
|
NoodlemagazineProxy::extract_playlist(html),
|
|
Some(r#"{"sources":[{"file":"https://cdn.example/360.mp4","label":"360p"}]}"#)
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn prefers_hls_then_highest_quality() {
|
|
let playlist = r#"{
|
|
"sources": [
|
|
{"file":"https://cdn.example/360.mp4","label":"360p"},
|
|
{"file":"https://cdn.example/720.mp4","label":"720p"},
|
|
{"file":"https://cdn.example/master.m3u8","label":"1080p"}
|
|
]
|
|
}"#;
|
|
|
|
assert_eq!(
|
|
NoodlemagazineProxy::select_best_source(playlist).as_deref(),
|
|
Some("https://cdn.example/master.m3u8")
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn rewrites_manifest_to_direct_absolute_urls() {
|
|
let manifest = "#EXTM3U\n#EXT-X-STREAM-INF:BANDWIDTH=1\nlow/index.m3u8\n#EXT-X-KEY:METHOD=AES-128,URI=\"keys/key.bin\"\nsegment0.ts";
|
|
|
|
let rewritten =
|
|
NoodlemagazineProxy::rewrite_manifest("https://cdn.example/hls/master.m3u8", manifest)
|
|
.unwrap();
|
|
|
|
assert_eq!(
|
|
rewritten,
|
|
"#EXTM3U\n#EXT-X-STREAM-INF:BANDWIDTH=1\nhttps://cdn.example/hls/low/index.m3u8\n#EXT-X-KEY:METHOD=AES-128,URI=\"https://cdn.example/hls/keys/key.bin\"\nhttps://cdn.example/hls/segment0.ts"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn allows_https_image_thumbs_but_rejects_local_or_non_images() {
|
|
assert!(NoodlemagazineProxy::is_allowed_thumb_url(
|
|
"https://noodlemagazine.com/thumbs/example.webp"
|
|
));
|
|
assert!(NoodlemagazineProxy::is_allowed_thumb_url(
|
|
"https://cdn.example/previews/example.jpg"
|
|
));
|
|
assert!(!NoodlemagazineProxy::is_allowed_thumb_url(
|
|
"https://noodlemagazine.com/watch/-123_456"
|
|
));
|
|
assert!(!NoodlemagazineProxy::is_allowed_thumb_url(
|
|
"https://localhost/thumb.jpg"
|
|
));
|
|
}
|
|
|
|
#[test]
|
|
fn recognizes_binary_image_content_types() {
|
|
assert!(NoodlemagazineProxy::is_binary_image_content_type(
|
|
"image/webp"
|
|
));
|
|
assert!(NoodlemagazineProxy::is_binary_image_content_type(
|
|
"image/jpeg; charset=binary"
|
|
));
|
|
assert!(!NoodlemagazineProxy::is_binary_image_content_type(
|
|
"text/html; charset=utf-8"
|
|
));
|
|
assert!(!NoodlemagazineProxy::is_binary_image_content_type(
|
|
"application/json"
|
|
));
|
|
}
|
|
}
|