archivebate fix

This commit is contained in:
Simon
2026-04-22 10:18:15 +00:00
parent 47631b8a70
commit a47a69962f
4 changed files with 474 additions and 37 deletions

321
src/proxies/archivebate.rs Normal file
View File

@@ -0,0 +1,321 @@
use std::time::Duration as StdDuration;
use ntex::web;
use regex::Regex;
use scraper::{Html, Selector};
use url::Url;
use wreq::Version;
use crate::util::requester::Requester;
const FIREFOX_UA: &str =
"Mozilla/5.0 (X11; Linux x86_64; rv:146.0) Gecko/20100101 Firefox/146.0";
#[derive(Debug, Clone)]
pub struct ArchivebateProxy {}
impl ArchivebateProxy {
pub fn new() -> Self {
Self {}
}
fn normalize_detail_request(endpoint: &str) -> Option<String> {
let endpoint = endpoint.trim().trim_start_matches('/');
if endpoint.is_empty() {
return None;
}
let detail_url = if endpoint.starts_with("http://") || endpoint.starts_with("https://") {
endpoint.to_string()
} else {
format!("https://{}", endpoint.trim_start_matches('/'))
};
Self::is_allowed_detail_url(&detail_url).then_some(detail_url)
}
fn is_allowed_detail_url(url: &str) -> bool {
let Some(parsed) = Url::parse(url).ok() else {
return false;
};
if parsed.scheme() != "https" {
return false;
}
let Some(host) = parsed.host_str() else {
return false;
};
(host == "archivebate.com" || host == "www.archivebate.com")
&& parsed.path().starts_with("/watch/")
}
fn host_from_url(url: &str) -> Option<String> {
let parsed = Url::parse(url).ok()?;
parsed.host_str().map(|value| value.to_ascii_lowercase())
}
fn is_mixdrop_host(url: &str) -> bool {
let Some(host) = Self::host_from_url(url) else {
return false;
};
host.contains("mixdrop") || host.contains("m1xdrop")
}
fn html_headers(referer: &str) -> Vec<(String, String)> {
vec![
("Referer".to_string(), referer.to_string()),
("User-Agent".to_string(), FIREFOX_UA.to_string()),
(
"Accept".to_string(),
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8"
.to_string(),
),
("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()),
]
}
fn first_iframe_source_from_html(html: &str) -> Option<String> {
let document = Html::parse_document(html);
let selector = Selector::parse("iframe[src]").ok()?;
document
.select(&selector)
.next()
.and_then(|node| node.value().attr("src"))
.map(str::to_string)
}
fn download_fid_from_detail_html(html: &str) -> Option<String> {
let document = Html::parse_document(html);
let selector = Selector::parse("input[name='fid'][value]").ok()?;
document
.select(&selector)
.next()
.and_then(|node| node.value().attr("value"))
.map(str::trim)
.filter(|value| !value.is_empty())
.map(ToOwned::to_owned)
}
fn mixdrop_embed_url_from_download_url(url: &str) -> Option<String> {
let parsed = Url::parse(url).ok()?;
let host = parsed.host_str()?;
let host_lc = host.to_ascii_lowercase();
if !host_lc.contains("mixdrop") && !host_lc.contains("m1xdrop") {
return None;
}
let mut segments = parsed.path_segments()?.filter(|segment| !segment.is_empty());
let kind = segments.next()?.to_ascii_lowercase();
if kind != "e" && kind != "f" {
return None;
}
let media_id = segments.next()?.trim();
if media_id.is_empty() {
return None;
}
Some(format!("{}://{host}/e/{media_id}", parsed.scheme()))
}
fn normalize_possible_protocol_relative(value: &str) -> String {
let trimmed = value.trim();
if trimmed.starts_with("//") {
format!("https:{trimmed}")
} else {
trimmed.to_string()
}
}
fn extract_mixdrop_media_url(html: &str) -> Option<String> {
let direct_regex = Regex::new(r#"MDCore\.wurl\s*=\s*"([^"]+)""#).ok()?;
if let Some(url) = direct_regex
.captures(html)
.and_then(|captures| captures.get(1).map(|value| value.as_str().to_string()))
{
return Some(Self::normalize_possible_protocol_relative(&url));
}
let unpacked = Self::parse_mixin_packed_eval(html)?;
let unpacked_regex = Regex::new(r#"MDCore\.wurl\s*=\s*"([^"]+)""#).ok()?;
unpacked_regex
.captures(&unpacked)
.and_then(|captures| captures.get(1).map(|value| value.as_str().to_string()))
.map(|value| Self::normalize_possible_protocol_relative(&value))
}
fn parse_mixin_packed_eval(html: &str) -> Option<String> {
let eval_regex = Regex::new(
r#"(?s)eval\(function\(p,a,c,k,e,d\)\{.*?\}\('(?P<payload>.*?)',\s*(?P<radix>[0-9]+),\s*(?P<count>[0-9]+),\s*'(?P<tokens>.*?)'\.split\('\|'\)"#,
)
.ok()?;
let captures = eval_regex.captures(html)?;
let payload_raw = captures.name("payload")?.as_str();
let radix = captures.name("radix")?.as_str().parse::<u32>().ok()?;
let count = captures.name("count")?.as_str().parse::<usize>().ok()?;
if !(2..=36).contains(&radix) {
return None;
}
let payload = Self::unescape_js_single_quoted(payload_raw);
let tokens_raw = captures.name("tokens")?.as_str();
let tokens = tokens_raw.split('|').collect::<Vec<_>>();
let mut unpacked = payload;
for index in (0..count).rev() {
let Some(token) = tokens.get(index) else {
continue;
};
if token.is_empty() {
continue;
}
let key = Self::to_radix(index, radix);
let pattern = format!(r"\b{}\b", regex::escape(&key));
let re = Regex::new(&pattern).ok()?;
unpacked = re.replace_all(&unpacked, *token).into_owned();
}
Some(unpacked)
}
fn unescape_js_single_quoted(value: &str) -> String {
let mut output = String::with_capacity(value.len());
let mut chars = value.chars();
while let Some(character) = chars.next() {
if character != '\\' {
output.push(character);
continue;
}
let Some(next) = chars.next() else {
break;
};
match next {
'\\' => output.push('\\'),
'\'' => output.push('\''),
'"' => output.push('"'),
'n' => output.push('\n'),
'r' => output.push('\r'),
't' => output.push('\t'),
_ => output.push(next),
}
}
output
}
fn to_radix(mut value: usize, radix: u32) -> String {
if value == 0 {
return "0".to_string();
}
let alphabet = b"0123456789abcdefghijklmnopqrstuvwxyz";
let mut out = Vec::new();
while value > 0 {
let digit = value % radix as usize;
out.push(alphabet[digit] as char);
value /= radix as usize;
}
out.iter().rev().collect()
}
fn absolute_url(value: &str) -> String {
if value.starts_with("http://") || value.starts_with("https://") {
return value.to_string();
}
if value.starts_with("//") {
return format!("https:{value}");
}
format!("https://archivebate.com/{}", value.trim_start_matches('/'))
}
async fn resolve_mixdrop_media_from_embed(
detail_url: &str,
embed_url: &str,
requester: &mut Requester,
) -> Option<String> {
let response = requester
.get_raw_with_headers_timeout(
embed_url,
Self::html_headers(detail_url),
Some(StdDuration::from_secs(8)),
)
.await
.ok()?;
if !response.status().is_success() {
return None;
}
let html = response.text().await.ok()?;
Self::extract_mixdrop_media_url(&html)
}
}
impl crate::proxies::Proxy for ArchivebateProxy {
async fn get_video_url(&self, url: String, requester: web::types::State<Requester>) -> String {
let Some(detail_url) = Self::normalize_detail_request(&url) else {
return String::new();
};
let mut requester = requester.get_ref().clone();
let detail_html = requester
.get_with_headers(
&detail_url,
Self::html_headers("https://archivebate.com/"),
Some(Version::HTTP_11),
)
.await
.unwrap_or_default();
if detail_html.is_empty() {
return String::new();
}
if let Some(iframe_url) = Self::first_iframe_source_from_html(&detail_html).map(|value| Self::absolute_url(&value)) {
if Self::is_mixdrop_host(&iframe_url) {
if let Some(media_url) =
Self::resolve_mixdrop_media_from_embed(&detail_url, &iframe_url, &mut requester).await
{
return media_url;
}
}
}
if let Some(download_fid) = Self::download_fid_from_detail_html(&detail_html).map(|value| Self::absolute_url(&value)) {
if let Some(embed_url) = Self::mixdrop_embed_url_from_download_url(&download_fid) {
if let Some(media_url) =
Self::resolve_mixdrop_media_from_embed(&detail_url, &embed_url, &mut requester).await
{
return media_url;
}
}
}
String::new()
}
}
#[cfg(test)]
mod tests {
use super::ArchivebateProxy;
#[test]
fn normalizes_detail_request() {
let detail = ArchivebateProxy::normalize_detail_request("archivebate.com/watch/123456");
assert_eq!(detail.as_deref(), Some("https://archivebate.com/watch/123456"));
}
#[test]
fn rejects_non_watch_paths() {
assert!(ArchivebateProxy::normalize_detail_request("archivebate.com/profile/test").is_none());
}
#[test]
fn extracts_mixdrop_wurl_from_packed_eval() {
let html = r#"
<script>
eval(function(p,a,c,k,e,d){e=function(c){return c};if(!''.replace(/^/,String)){while(c--){d[c]=k[c]||c}k=[function(e){return d[e]}];e=function(){return'\\w+'};c=1};while(c--){if(k[c]){p=p.replace(new RegExp('\\b'+e(c)+'\\b','g'),k[c])}}return p}('1.2="//o230m5y6z.3.4/5/6.7?8=9&a=b";',12,12,'|MDCore|wurl|mxcontent|net|v2|r6pkwozjber741|mp4|s|TvNTJe3_z_6nKveumEHk8Q|e|1776460168'.split('|'),0,{}))
</script>
"#;
let extracted = ArchivebateProxy::extract_mixdrop_media_url(html)
.expect("expected extracted media url");
assert_eq!(
extracted,
"https://o230m5y6z.mxcontent.net/v2/r6pkwozjber741.mp4?s=TvNTJe3_z_6nKveumEHk8Q&e=1776460168"
);
}
}

View File

@@ -1,3 +1,4 @@
use crate::proxies::archivebate::ArchivebateProxy;
use crate::proxies::doodstream::DoodstreamProxy;
use crate::proxies::heavyfetish::HeavyfetishProxy;
use crate::proxies::hqporner::HqpornerProxy;
@@ -11,6 +12,7 @@ use crate::proxies::spankbang::SpankbangProxy;
use crate::proxies::vjav::VjavProxy;
use crate::{proxies::sxyprn::SxyprnProxy, util::requester::Requester};
pub mod archivebate;
pub mod doodstream;
pub mod hanimecdn;
pub mod heavyfetish;
@@ -30,6 +32,7 @@ pub mod vjav;
#[derive(Debug, Clone)]
pub enum AnyProxy {
Archivebate(ArchivebateProxy),
Doodstream(DoodstreamProxy),
Sxyprn(SxyprnProxy),
Javtiful(javtiful::JavtifulProxy),
@@ -50,6 +53,7 @@ pub trait Proxy {
impl Proxy for AnyProxy {
async fn get_video_url(&self, url: String, requester: web::types::State<Requester>) -> String {
match self {
AnyProxy::Archivebate(p) => p.get_video_url(url, requester).await,
AnyProxy::Doodstream(p) => p.get_video_url(url, requester).await,
AnyProxy::Sxyprn(p) => p.get_video_url(url, requester).await,
AnyProxy::Javtiful(p) => p.get_video_url(url, requester).await,