pimpbunny fix

This commit is contained in:
Simon
2026-03-20 22:08:02 +00:00
parent 259a07686d
commit 1b32df0c35

View File

@@ -1,4 +1,5 @@
use ntex::web; use ntex::web;
use regex::Regex;
use serde_json::Value; use serde_json::Value;
use url::Url; use url::Url;
use wreq::Version; use wreq::Version;
@@ -79,12 +80,65 @@ impl PimpbunnyProxy {
} }
fn extract_json_ld_video(text: &str) -> Option<Value> { fn extract_json_ld_video(text: &str) -> Option<Value> {
let json_str = text let script_regex = Regex::new(r#"(?s)<script[^>]+application/ld\+json[^>]*>(.*?)</script>"#).ok()?;
.split("application/ld+json\">")
.nth(1)
.and_then(|value| value.split("</script>").next())?;
serde_json::from_str(json_str).ok() for captures in script_regex.captures_iter(text) {
let raw = captures.get(1).map(|value| value.as_str().trim())?;
let parsed: Value = serde_json::from_str(raw).ok()?;
if let Some(video) = Self::find_video_object(&parsed) {
return Some(video);
}
}
None
}
fn find_video_object(parsed: &Value) -> Option<Value> {
if parsed
.get("@type")
.and_then(Value::as_str)
.is_some_and(|value| value == "VideoObject")
{
return Some(parsed.clone());
}
if parsed
.get("contentUrl")
.and_then(Value::as_str)
.is_some_and(|value| !value.trim().is_empty())
{
return Some(parsed.clone());
}
if let Some(graph) = parsed.get("@graph").and_then(Value::as_array) {
for item in graph {
if item
.get("@type")
.and_then(Value::as_str)
.is_some_and(|value| value == "VideoObject")
{
return Some(item.clone());
}
if item
.get("contentUrl")
.and_then(Value::as_str)
.is_some_and(|value| !value.trim().is_empty())
{
return Some(item.clone());
}
}
}
if let Some(array) = parsed.as_array() {
for item in array {
if let Some(video) = Self::find_video_object(item) {
return Some(video);
}
}
}
None
} }
fn extract_stream_url(json_ld: &Value) -> Option<String> { fn extract_stream_url(json_ld: &Value) -> Option<String> {
@@ -95,6 +149,15 @@ impl PimpbunnyProxy {
.filter(|value| !value.is_empty()) .filter(|value| !value.is_empty())
.map(str::to_string) .map(str::to_string)
} }
fn extract_stream_url_from_html(text: &str) -> Option<String> {
Regex::new(r#""contentUrl"\s*:\s*"([^"]+)""#)
.ok()?
.captures(text)
.and_then(|captures| captures.get(1))
.map(|value| value.as_str().trim().to_string())
.filter(|value| !value.is_empty())
}
} }
impl crate::proxies::Proxy for PimpbunnyProxy { impl crate::proxies::Proxy for PimpbunnyProxy {
@@ -114,11 +177,10 @@ impl crate::proxies::Proxy for PimpbunnyProxy {
Err(_) => return String::new(), Err(_) => return String::new(),
}; };
let Some(json_ld) = Self::extract_json_ld_video(&text) else { Self::extract_json_ld_video(&text)
return String::new(); .and_then(|json_ld| Self::extract_stream_url(&json_ld))
}; .or_else(|| Self::extract_stream_url_from_html(&text))
.unwrap_or_default()
Self::extract_stream_url(&json_ld).unwrap_or_default()
} }
} }
@@ -157,4 +219,29 @@ mod tests {
Some("https://cdn.example/video.mp4") Some("https://cdn.example/video.mp4")
); );
} }
#[test]
fn extracts_video_object_from_graph_script() {
let html = r#"
<script type="application/ld+json">
{"@graph":[{"@type":"BreadcrumbList"},{"@type":"VideoObject","contentUrl":"https://cdn.example/graph.mp4"}]}
</script>
"#;
let json_ld = PimpbunnyProxy::extract_json_ld_video(html).expect("video object should parse");
assert_eq!(
PimpbunnyProxy::extract_stream_url(&json_ld).as_deref(),
Some("https://cdn.example/graph.mp4")
);
}
#[test]
fn falls_back_to_raw_content_url_match() {
let html = r#"{"contentUrl":"https://cdn.example/fallback.mp4"}"#;
assert_eq!(
PimpbunnyProxy::extract_stream_url_from_html(html).as_deref(),
Some("https://cdn.example/fallback.mp4")
);
}
} }