hottub/src/proxies/pimpbunny.rs

use ntex::web;
use regex::Regex;
use serde_json::Value;
use url::Url;
use wreq::Version;

use crate::util::requester::Requester;

#[derive(Debug, Clone)]
pub struct PimpbunnyProxy {}

impl PimpbunnyProxy {
    const FIREFOX_USER_AGENT: &'static str =
        "Mozilla/5.0 (X11; Linux x86_64; rv:147.0) Gecko/20100101 Firefox/147.0";
    const HTML_ACCEPT: &'static str =
        "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8";

    pub fn new() -> Self {
        PimpbunnyProxy {}
    }

    fn normalize_detail_url(url: &str) -> Option<String> {
        let normalized = if url.starts_with("http://") || url.starts_with("https://") {
            url.to_string()
        } else {
            format!("https://{}", url.trim_start_matches('/'))
        };

        Self::is_allowed_detail_url(&normalized).then_some(normalized)
    }

    fn is_allowed_detail_url(url: &str) -> bool {
        let Some(url) = Url::parse(url).ok() else {
            return false;
        };
        if url.scheme() != "https" {
            return false;
        }
        let Some(host) = url.host_str() else {
            return false;
        };

        matches!(host, "pimpbunny.com" | "www.pimpbunny.com")
            && !url.path().starts_with("/contents/videos_screenshots/")
    }

    fn root_referer() -> &'static str {
        "https://pimpbunny.com/"
    }

    fn html_headers_with_referer(referer: &str) -> Vec<(String, String)> {
        vec![
            ("Referer".to_string(), referer.to_string()),
            (
                "User-Agent".to_string(),
                Self::FIREFOX_USER_AGENT.to_string(),
            ),
            ("Accept".to_string(), Self::HTML_ACCEPT.to_string()),
            ("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()),
        ]
    }

    fn headers_with_cookies(
        requester: &Requester,
        request_url: &str,
        referer: &str,
    ) -> Vec<(String, String)> {
        let mut headers = Self::html_headers_with_referer(referer);
        if let Some(cookie) = requester.cookie_header_for_url(request_url) {
            headers.push(("Cookie".to_string(), cookie));
        }
        headers
    }

    async fn warm_root_session(requester: &mut Requester) {
        let _ = requester
            .get_with_headers(
                Self::root_referer(),
                Self::html_headers_with_referer(Self::root_referer()),
                Some(Version::HTTP_11),
            )
            .await;
    }

    fn extract_json_ld_video(text: &str) -> Option<Value> {
        let script_regex =
            Regex::new(r#"(?s)<script[^>]+application/ld\+json[^>]*>(.*?)</script>"#).ok()?;

        for captures in script_regex.captures_iter(text) {
            let raw = captures.get(1).map(|value| value.as_str().trim())?;
            let parsed: Value = serde_json::from_str(raw).ok()?;

            if let Some(video) = Self::find_video_object(&parsed) {
                return Some(video);
            }
        }

        None
    }

    fn find_video_object(parsed: &Value) -> Option<Value> {
        if parsed
            .get("@type")
            .and_then(Value::as_str)
            .is_some_and(|value| value == "VideoObject")
        {
            return Some(parsed.clone());
        }

        if parsed
            .get("contentUrl")
            .and_then(Value::as_str)
            .is_some_and(|value| !value.trim().is_empty())
        {
            return Some(parsed.clone());
        }

        if let Some(graph) = parsed.get("@graph").and_then(Value::as_array) {
            for item in graph {
                if item
                    .get("@type")
                    .and_then(Value::as_str)
                    .is_some_and(|value| value == "VideoObject")
                {
                    return Some(item.clone());
                }
                if item
                    .get("contentUrl")
                    .and_then(Value::as_str)
                    .is_some_and(|value| !value.trim().is_empty())
                {
                    return Some(item.clone());
                }
            }
        }

        if let Some(array) = parsed.as_array() {
            for item in array {
                if let Some(video) = Self::find_video_object(item) {
                    return Some(video);
                }
            }
        }

        None
    }

    fn extract_stream_url(json_ld: &Value) -> Option<String> {
        json_ld
            .get("contentUrl")
            .and_then(Value::as_str)
            .map(str::trim)
            .filter(|value| !value.is_empty())
            .map(str::to_string)
    }

    fn extract_stream_url_from_html(text: &str) -> Option<String> {
        Regex::new(r#""contentUrl"\s*:\s*"([^"]+)""#)
            .ok()?
            .captures(text)
            .and_then(|captures| captures.get(1))
            .map(|value| value.as_str().trim().to_string())
            .filter(|value| !value.is_empty())
    }
}

impl crate::proxies::Proxy for PimpbunnyProxy {
    async fn get_video_url(&self, url: String, requester: web::types::State<Requester>) -> String {
        let Some(detail_url) = Self::normalize_detail_url(&url) else {
            return String::new();
        };

        let mut requester = requester.get_ref().clone();
        Self::warm_root_session(&mut requester).await;
        let headers = Self::headers_with_cookies(&requester, &detail_url, Self::root_referer());
        let text = match requester
            .get_with_headers(&detail_url, headers, Some(Version::HTTP_2))
            .await
        {
            Ok(text) => text,
            Err(_) => return String::new(),
        };

        Self::extract_json_ld_video(&text)
            .and_then(|json_ld| Self::extract_stream_url(&json_ld))
            .or_else(|| Self::extract_stream_url_from_html(&text))
            .unwrap_or_default()
    }
}

#[cfg(test)]
mod tests {
    use super::PimpbunnyProxy;

    #[test]
    fn allows_only_pimpbunny_detail_urls() {
        assert!(PimpbunnyProxy::is_allowed_detail_url(
            "https://pimpbunny.com/videos/example-video/"
        ));
        assert!(PimpbunnyProxy::is_allowed_detail_url(
            "https://www.pimpbunny.com/video/example/"
        ));
        assert!(!PimpbunnyProxy::is_allowed_detail_url(
            "http://pimpbunny.com/videos/example-video/"
        ));
        assert!(!PimpbunnyProxy::is_allowed_detail_url(
            "https://pimpbunny.com/contents/videos_screenshots/1/2/3.jpg"
        ));
        assert!(!PimpbunnyProxy::is_allowed_detail_url(
            "https://example.com/videos/example-video/"
        ));
    }

    #[test]
    fn extracts_content_url_from_json_ld() {
        let html = r#"
            <script type="application/ld+json">{"contentUrl":"https://cdn.example/video.mp4"}</script>
        "#;

        let json_ld = PimpbunnyProxy::extract_json_ld_video(html).expect("json-ld should parse");
        assert_eq!(
            PimpbunnyProxy::extract_stream_url(&json_ld).as_deref(),
            Some("https://cdn.example/video.mp4")
        );
    }

    #[test]
    fn extracts_video_object_from_graph_script() {
        let html = r#"
            <script type="application/ld+json">
                {"@graph":[{"@type":"BreadcrumbList"},{"@type":"VideoObject","contentUrl":"https://cdn.example/graph.mp4"}]}
            </script>
        "#;

        let json_ld =
            PimpbunnyProxy::extract_json_ld_video(html).expect("video object should parse");
        assert_eq!(
            PimpbunnyProxy::extract_stream_url(&json_ld).as_deref(),
            Some("https://cdn.example/graph.mp4")
        );
    }

    #[test]
    fn falls_back_to_raw_content_url_match() {
        let html = r#"{"contentUrl":"https://cdn.example/fallback.mp4"}"#;

        assert_eq!(
            PimpbunnyProxy::extract_stream_url_from_html(html).as_deref(),
            Some("https://cdn.example/fallback.mp4")
        );
    }
}