archivebate fix

2026-04-22 10:18:15 +00:00
parent 47631b8a70
commit a47a69962f
4 changed files with 474 additions and 37 deletions
--- a/src/proxies/archivebate.rs
+++ b/src/proxies/archivebate.rs
@@ -0,0 +1,321 @@
+use std::time::Duration as StdDuration;
+
+use ntex::web;
+use regex::Regex;
+use scraper::{Html, Selector};
+use url::Url;
+use wreq::Version;
+
+use crate::util::requester::Requester;
+
+const FIREFOX_UA: &str =
+    "Mozilla/5.0 (X11; Linux x86_64; rv:146.0) Gecko/20100101 Firefox/146.0";
+
+#[derive(Debug, Clone)]
+pub struct ArchivebateProxy {}
+
+impl ArchivebateProxy {
+    pub fn new() -> Self {
+        Self {}
+    }
+
+    fn normalize_detail_request(endpoint: &str) -> Option<String> {
+        let endpoint = endpoint.trim().trim_start_matches('/');
+        if endpoint.is_empty() {
+            return None;
+        }
+
+        let detail_url = if endpoint.starts_with("http://") || endpoint.starts_with("https://") {
+            endpoint.to_string()
+        } else {
+            format!("https://{}", endpoint.trim_start_matches('/'))
+        };
+
+        Self::is_allowed_detail_url(&detail_url).then_some(detail_url)
+    }
+
+    fn is_allowed_detail_url(url: &str) -> bool {
+        let Some(parsed) = Url::parse(url).ok() else {
+            return false;
+        };
+        if parsed.scheme() != "https" {
+            return false;
+        }
+        let Some(host) = parsed.host_str() else {
+            return false;
+        };
+        (host == "archivebate.com" || host == "www.archivebate.com")
+            && parsed.path().starts_with("/watch/")
+    }
+
+    fn host_from_url(url: &str) -> Option<String> {
+        let parsed = Url::parse(url).ok()?;
+        parsed.host_str().map(|value| value.to_ascii_lowercase())
+    }
+
+    fn is_mixdrop_host(url: &str) -> bool {
+        let Some(host) = Self::host_from_url(url) else {
+            return false;
+        };
+        host.contains("mixdrop") || host.contains("m1xdrop")
+    }
+
+    fn html_headers(referer: &str) -> Vec<(String, String)> {
+        vec![
+            ("Referer".to_string(), referer.to_string()),
+            ("User-Agent".to_string(), FIREFOX_UA.to_string()),
+            (
+                "Accept".to_string(),
+                "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8"
+                    .to_string(),
+            ),
+            ("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()),
+        ]
+    }
+
+    fn first_iframe_source_from_html(html: &str) -> Option<String> {
+        let document = Html::parse_document(html);
+        let selector = Selector::parse("iframe[src]").ok()?;
+        document
+            .select(&selector)
+            .next()
+            .and_then(|node| node.value().attr("src"))
+            .map(str::to_string)
+    }
+
+    fn download_fid_from_detail_html(html: &str) -> Option<String> {
+        let document = Html::parse_document(html);
+        let selector = Selector::parse("input[name='fid'][value]").ok()?;
+        document
+            .select(&selector)
+            .next()
+            .and_then(|node| node.value().attr("value"))
+            .map(str::trim)
+            .filter(|value| !value.is_empty())
+            .map(ToOwned::to_owned)
+    }
+
+    fn mixdrop_embed_url_from_download_url(url: &str) -> Option<String> {
+        let parsed = Url::parse(url).ok()?;
+        let host = parsed.host_str()?;
+        let host_lc = host.to_ascii_lowercase();
+        if !host_lc.contains("mixdrop") && !host_lc.contains("m1xdrop") {
+            return None;
+        }
+
+        let mut segments = parsed.path_segments()?.filter(|segment| !segment.is_empty());
+        let kind = segments.next()?.to_ascii_lowercase();
+        if kind != "e" && kind != "f" {
+            return None;
+        }
+        let media_id = segments.next()?.trim();
+        if media_id.is_empty() {
+            return None;
+        }
+
+        Some(format!("{}://{host}/e/{media_id}", parsed.scheme()))
+    }
+
+    fn normalize_possible_protocol_relative(value: &str) -> String {
+        let trimmed = value.trim();
+        if trimmed.starts_with("//") {
+            format!("https:{trimmed}")
+        } else {
+            trimmed.to_string()
+        }
+    }
+
+    fn extract_mixdrop_media_url(html: &str) -> Option<String> {
+        let direct_regex = Regex::new(r#"MDCore\.wurl\s*=\s*"([^"]+)""#).ok()?;
+        if let Some(url) = direct_regex
+            .captures(html)
+            .and_then(|captures| captures.get(1).map(|value| value.as_str().to_string()))
+        {
+            return Some(Self::normalize_possible_protocol_relative(&url));
+        }
+
+        let unpacked = Self::parse_mixin_packed_eval(html)?;
+        let unpacked_regex = Regex::new(r#"MDCore\.wurl\s*=\s*"([^"]+)""#).ok()?;
+        unpacked_regex
+            .captures(&unpacked)
+            .and_then(|captures| captures.get(1).map(|value| value.as_str().to_string()))
+            .map(|value| Self::normalize_possible_protocol_relative(&value))
+    }
+
+    fn parse_mixin_packed_eval(html: &str) -> Option<String> {
+        let eval_regex = Regex::new(
+            r#"(?s)eval\(function\(p,a,c,k,e,d\)\{.*?\}\('(?P<payload>.*?)',\s*(?P<radix>[0-9]+),\s*(?P<count>[0-9]+),\s*'(?P<tokens>.*?)'\.split\('\|'\)"#,
+        )
+        .ok()?;
+        let captures = eval_regex.captures(html)?;
+        let payload_raw = captures.name("payload")?.as_str();
+        let radix = captures.name("radix")?.as_str().parse::<u32>().ok()?;
+        let count = captures.name("count")?.as_str().parse::<usize>().ok()?;
+        if !(2..=36).contains(&radix) {
+            return None;
+        }
+
+        let payload = Self::unescape_js_single_quoted(payload_raw);
+        let tokens_raw = captures.name("tokens")?.as_str();
+        let tokens = tokens_raw.split('|').collect::<Vec<_>>();
+        let mut unpacked = payload;
+
+        for index in (0..count).rev() {
+            let Some(token) = tokens.get(index) else {
+                continue;
+            };
+            if token.is_empty() {
+                continue;
+            }
+            let key = Self::to_radix(index, radix);
+            let pattern = format!(r"\b{}\b", regex::escape(&key));
+            let re = Regex::new(&pattern).ok()?;
+            unpacked = re.replace_all(&unpacked, *token).into_owned();
+        }
+
+        Some(unpacked)
+    }
+
+    fn unescape_js_single_quoted(value: &str) -> String {
+        let mut output = String::with_capacity(value.len());
+        let mut chars = value.chars();
+        while let Some(character) = chars.next() {
+            if character != '\\' {
+                output.push(character);
+                continue;
+            }
+            let Some(next) = chars.next() else {
+                break;
+            };
+            match next {
+                '\\' => output.push('\\'),
+                '\'' => output.push('\''),
+                '"' => output.push('"'),
+                'n' => output.push('\n'),
+                'r' => output.push('\r'),
+                't' => output.push('\t'),
+                _ => output.push(next),
+            }
+        }
+        output
+    }
+
+    fn to_radix(mut value: usize, radix: u32) -> String {
+        if value == 0 {
+            return "0".to_string();
+        }
+        let alphabet = b"0123456789abcdefghijklmnopqrstuvwxyz";
+        let mut out = Vec::new();
+        while value > 0 {
+            let digit = value % radix as usize;
+            out.push(alphabet[digit] as char);
+            value /= radix as usize;
+        }
+        out.iter().rev().collect()
+    }
+
+    fn absolute_url(value: &str) -> String {
+        if value.starts_with("http://") || value.starts_with("https://") {
+            return value.to_string();
+        }
+        if value.starts_with("//") {
+            return format!("https:{value}");
+        }
+        format!("https://archivebate.com/{}", value.trim_start_matches('/'))
+    }
+
+    async fn resolve_mixdrop_media_from_embed(
+        detail_url: &str,
+        embed_url: &str,
+        requester: &mut Requester,
+    ) -> Option<String> {
+        let response = requester
+            .get_raw_with_headers_timeout(
+                embed_url,
+                Self::html_headers(detail_url),
+                Some(StdDuration::from_secs(8)),
+            )
+            .await
+            .ok()?;
+        if !response.status().is_success() {
+            return None;
+        }
+        let html = response.text().await.ok()?;
+        Self::extract_mixdrop_media_url(&html)
+    }
+}
+
+impl crate::proxies::Proxy for ArchivebateProxy {
+    async fn get_video_url(&self, url: String, requester: web::types::State<Requester>) -> String {
+        let Some(detail_url) = Self::normalize_detail_request(&url) else {
+            return String::new();
+        };
+
+        let mut requester = requester.get_ref().clone();
+        let detail_html = requester
+            .get_with_headers(
+                &detail_url,
+                Self::html_headers("https://archivebate.com/"),
+                Some(Version::HTTP_11),
+            )
+            .await
+            .unwrap_or_default();
+        if detail_html.is_empty() {
+            return String::new();
+        }
+
+        if let Some(iframe_url) = Self::first_iframe_source_from_html(&detail_html).map(|value| Self::absolute_url(&value)) {
+            if Self::is_mixdrop_host(&iframe_url) {
+                if let Some(media_url) =
+                    Self::resolve_mixdrop_media_from_embed(&detail_url, &iframe_url, &mut requester).await
+                {
+                    return media_url;
+                }
+            }
+        }
+
+        if let Some(download_fid) = Self::download_fid_from_detail_html(&detail_html).map(|value| Self::absolute_url(&value)) {
+            if let Some(embed_url) = Self::mixdrop_embed_url_from_download_url(&download_fid) {
+                if let Some(media_url) =
+                    Self::resolve_mixdrop_media_from_embed(&detail_url, &embed_url, &mut requester).await
+                {
+                    return media_url;
+                }
+            }
+        }
+
+        String::new()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::ArchivebateProxy;
+
+    #[test]
+    fn normalizes_detail_request() {
+        let detail = ArchivebateProxy::normalize_detail_request("archivebate.com/watch/123456");
+        assert_eq!(detail.as_deref(), Some("https://archivebate.com/watch/123456"));
+    }
+
+    #[test]
+    fn rejects_non_watch_paths() {
+        assert!(ArchivebateProxy::normalize_detail_request("archivebate.com/profile/test").is_none());
+    }
+
+    #[test]
+    fn extracts_mixdrop_wurl_from_packed_eval() {
+        let html = r#"
+<script>
+eval(function(p,a,c,k,e,d){e=function(c){return c};if(!''.replace(/^/,String)){while(c--){d[c]=k[c]||c}k=[function(e){return d[e]}];e=function(){return'\\w+'};c=1};while(c--){if(k[c]){p=p.replace(new RegExp('\\b'+e(c)+'\\b','g'),k[c])}}return p}('1.2="//o230m5y6z.3.4/5/6.7?8=9&a=b";',12,12,'|MDCore|wurl|mxcontent|net|v2|r6pkwozjber741|mp4|s|TvNTJe3_z_6nKveumEHk8Q|e|1776460168'.split('|'),0,{}))
+</script>
+"#;
+
+        let extracted = ArchivebateProxy::extract_mixdrop_media_url(html)
+            .expect("expected extracted media url");
+        assert_eq!(
+            extracted,
+            "https://o230m5y6z.mxcontent.net/v2/r6pkwozjber741.mp4?s=TvNTJe3_z_6nKveumEHk8Q&e=1776460168"
+        );
+    }
+}
--- a/src/proxies/mod.rs
+++ b/src/proxies/mod.rs
@@ -1,3 +1,4 @@
+use crate::proxies::archivebate::ArchivebateProxy;
 use crate::proxies::doodstream::DoodstreamProxy;
 use crate::proxies::heavyfetish::HeavyfetishProxy;
 use crate::proxies::hqporner::HqpornerProxy;
@@ -11,6 +12,7 @@ use crate::proxies::spankbang::SpankbangProxy;
 use crate::proxies::vjav::VjavProxy;
 use crate::{proxies::sxyprn::SxyprnProxy, util::requester::Requester};

+pub mod archivebate;
 pub mod doodstream;
 pub mod hanimecdn;
 pub mod heavyfetish;
@@ -30,6 +32,7 @@ pub mod vjav;

 #[derive(Debug, Clone)]
 pub enum AnyProxy {
+    Archivebate(ArchivebateProxy),
    Doodstream(DoodstreamProxy),
    Sxyprn(SxyprnProxy),
    Javtiful(javtiful::JavtifulProxy),
@@ -50,6 +53,7 @@ pub trait Proxy {
 impl Proxy for AnyProxy {
    async fn get_video_url(&self, url: String, requester: web::types::State<Requester>) -> String {
        match self {
+            AnyProxy::Archivebate(p) => p.get_video_url(url, requester).await,
            AnyProxy::Doodstream(p) => p.get_video_url(url, requester).await,
            AnyProxy::Sxyprn(p) => p.get_video_url(url, requester).await,
            AnyProxy::Javtiful(p) => p.get_video_url(url, requester).await,