253 lines
7.8 KiB
Rust
253 lines
7.8 KiB
Rust
use ntex::web;
|
|
use regex::Regex;
|
|
use serde_json::Value;
|
|
use url::Url;
|
|
use wreq::Version;
|
|
|
|
use crate::util::requester::Requester;
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct PimpbunnyProxy {}
|
|
|
|
impl PimpbunnyProxy {
|
|
const FIREFOX_USER_AGENT: &'static str =
|
|
"Mozilla/5.0 (X11; Linux x86_64; rv:147.0) Gecko/20100101 Firefox/147.0";
|
|
const HTML_ACCEPT: &'static str =
|
|
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8";
|
|
|
|
pub fn new() -> Self {
|
|
PimpbunnyProxy {}
|
|
}
|
|
|
|
fn normalize_detail_url(url: &str) -> Option<String> {
|
|
let normalized = if url.starts_with("http://") || url.starts_with("https://") {
|
|
url.to_string()
|
|
} else {
|
|
format!("https://{}", url.trim_start_matches('/'))
|
|
};
|
|
|
|
Self::is_allowed_detail_url(&normalized).then_some(normalized)
|
|
}
|
|
|
|
fn is_allowed_detail_url(url: &str) -> bool {
|
|
let Some(url) = Url::parse(url).ok() else {
|
|
return false;
|
|
};
|
|
if url.scheme() != "https" {
|
|
return false;
|
|
}
|
|
let Some(host) = url.host_str() else {
|
|
return false;
|
|
};
|
|
|
|
matches!(host, "pimpbunny.com" | "www.pimpbunny.com")
|
|
&& !url.path().starts_with("/contents/videos_screenshots/")
|
|
}
|
|
|
|
fn root_referer() -> &'static str {
|
|
"https://pimpbunny.com/"
|
|
}
|
|
|
|
fn html_headers_with_referer(referer: &str) -> Vec<(String, String)> {
|
|
vec![
|
|
("Referer".to_string(), referer.to_string()),
|
|
(
|
|
"User-Agent".to_string(),
|
|
Self::FIREFOX_USER_AGENT.to_string(),
|
|
),
|
|
("Accept".to_string(), Self::HTML_ACCEPT.to_string()),
|
|
("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()),
|
|
]
|
|
}
|
|
|
|
fn headers_with_cookies(
|
|
requester: &Requester,
|
|
request_url: &str,
|
|
referer: &str,
|
|
) -> Vec<(String, String)> {
|
|
let mut headers = Self::html_headers_with_referer(referer);
|
|
if let Some(cookie) = requester.cookie_header_for_url(request_url) {
|
|
headers.push(("Cookie".to_string(), cookie));
|
|
}
|
|
headers
|
|
}
|
|
|
|
async fn warm_root_session(requester: &mut Requester) {
|
|
let _ = requester
|
|
.get_with_headers(
|
|
Self::root_referer(),
|
|
Self::html_headers_with_referer(Self::root_referer()),
|
|
Some(Version::HTTP_11),
|
|
)
|
|
.await;
|
|
}
|
|
|
|
fn extract_json_ld_video(text: &str) -> Option<Value> {
|
|
let script_regex =
|
|
Regex::new(r#"(?s)<script[^>]+application/ld\+json[^>]*>(.*?)</script>"#).ok()?;
|
|
|
|
for captures in script_regex.captures_iter(text) {
|
|
let raw = captures.get(1).map(|value| value.as_str().trim())?;
|
|
let parsed: Value = serde_json::from_str(raw).ok()?;
|
|
|
|
if let Some(video) = Self::find_video_object(&parsed) {
|
|
return Some(video);
|
|
}
|
|
}
|
|
|
|
None
|
|
}
|
|
|
|
fn find_video_object(parsed: &Value) -> Option<Value> {
|
|
if parsed
|
|
.get("@type")
|
|
.and_then(Value::as_str)
|
|
.is_some_and(|value| value == "VideoObject")
|
|
{
|
|
return Some(parsed.clone());
|
|
}
|
|
|
|
if parsed
|
|
.get("contentUrl")
|
|
.and_then(Value::as_str)
|
|
.is_some_and(|value| !value.trim().is_empty())
|
|
{
|
|
return Some(parsed.clone());
|
|
}
|
|
|
|
if let Some(graph) = parsed.get("@graph").and_then(Value::as_array) {
|
|
for item in graph {
|
|
if item
|
|
.get("@type")
|
|
.and_then(Value::as_str)
|
|
.is_some_and(|value| value == "VideoObject")
|
|
{
|
|
return Some(item.clone());
|
|
}
|
|
if item
|
|
.get("contentUrl")
|
|
.and_then(Value::as_str)
|
|
.is_some_and(|value| !value.trim().is_empty())
|
|
{
|
|
return Some(item.clone());
|
|
}
|
|
}
|
|
}
|
|
|
|
if let Some(array) = parsed.as_array() {
|
|
for item in array {
|
|
if let Some(video) = Self::find_video_object(item) {
|
|
return Some(video);
|
|
}
|
|
}
|
|
}
|
|
|
|
None
|
|
}
|
|
|
|
fn extract_stream_url(json_ld: &Value) -> Option<String> {
|
|
json_ld
|
|
.get("contentUrl")
|
|
.and_then(Value::as_str)
|
|
.map(str::trim)
|
|
.filter(|value| !value.is_empty())
|
|
.map(str::to_string)
|
|
}
|
|
|
|
fn extract_stream_url_from_html(text: &str) -> Option<String> {
|
|
Regex::new(r#""contentUrl"\s*:\s*"([^"]+)""#)
|
|
.ok()?
|
|
.captures(text)
|
|
.and_then(|captures| captures.get(1))
|
|
.map(|value| value.as_str().trim().to_string())
|
|
.filter(|value| !value.is_empty())
|
|
}
|
|
}
|
|
|
|
impl crate::proxies::Proxy for PimpbunnyProxy {
|
|
async fn get_video_url(&self, url: String, requester: web::types::State<Requester>) -> String {
|
|
let Some(detail_url) = Self::normalize_detail_url(&url) else {
|
|
return String::new();
|
|
};
|
|
|
|
let mut requester = requester.get_ref().clone();
|
|
Self::warm_root_session(&mut requester).await;
|
|
let headers = Self::headers_with_cookies(&requester, &detail_url, Self::root_referer());
|
|
let text = match requester
|
|
.get_with_headers(&detail_url, headers, Some(Version::HTTP_2))
|
|
.await
|
|
{
|
|
Ok(text) => text,
|
|
Err(_) => return String::new(),
|
|
};
|
|
|
|
Self::extract_json_ld_video(&text)
|
|
.and_then(|json_ld| Self::extract_stream_url(&json_ld))
|
|
.or_else(|| Self::extract_stream_url_from_html(&text))
|
|
.unwrap_or_default()
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::PimpbunnyProxy;
|
|
|
|
#[test]
|
|
fn allows_only_pimpbunny_detail_urls() {
|
|
assert!(PimpbunnyProxy::is_allowed_detail_url(
|
|
"https://pimpbunny.com/videos/example-video/"
|
|
));
|
|
assert!(PimpbunnyProxy::is_allowed_detail_url(
|
|
"https://www.pimpbunny.com/video/example/"
|
|
));
|
|
assert!(!PimpbunnyProxy::is_allowed_detail_url(
|
|
"http://pimpbunny.com/videos/example-video/"
|
|
));
|
|
assert!(!PimpbunnyProxy::is_allowed_detail_url(
|
|
"https://pimpbunny.com/contents/videos_screenshots/1/2/3.jpg"
|
|
));
|
|
assert!(!PimpbunnyProxy::is_allowed_detail_url(
|
|
"https://example.com/videos/example-video/"
|
|
));
|
|
}
|
|
|
|
#[test]
|
|
fn extracts_content_url_from_json_ld() {
|
|
let html = r#"
|
|
<script type="application/ld+json">{"contentUrl":"https://cdn.example/video.mp4"}</script>
|
|
"#;
|
|
|
|
let json_ld = PimpbunnyProxy::extract_json_ld_video(html).expect("json-ld should parse");
|
|
assert_eq!(
|
|
PimpbunnyProxy::extract_stream_url(&json_ld).as_deref(),
|
|
Some("https://cdn.example/video.mp4")
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn extracts_video_object_from_graph_script() {
|
|
let html = r#"
|
|
<script type="application/ld+json">
|
|
{"@graph":[{"@type":"BreadcrumbList"},{"@type":"VideoObject","contentUrl":"https://cdn.example/graph.mp4"}]}
|
|
</script>
|
|
"#;
|
|
|
|
let json_ld =
|
|
PimpbunnyProxy::extract_json_ld_video(html).expect("video object should parse");
|
|
assert_eq!(
|
|
PimpbunnyProxy::extract_stream_url(&json_ld).as_deref(),
|
|
Some("https://cdn.example/graph.mp4")
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn falls_back_to_raw_content_url_match() {
|
|
let html = r#"{"contentUrl":"https://cdn.example/fallback.mp4"}"#;
|
|
|
|
assert_eq!(
|
|
PimpbunnyProxy::extract_stream_url_from_html(html).as_deref(),
|
|
Some("https://cdn.example/fallback.mp4")
|
|
);
|
|
}
|
|
}
|