archivebate repaired

This commit is contained in:
Simon
2026-06-18 10:10:26 +00:00
parent c19e3ebbea
commit d263854b35

View File

@@ -64,12 +64,10 @@ const PLATFORM_SHORTCUTS: &[(&str, &str)] = &[
("tiktok", "dGlrdG9r"), ("tiktok", "dGlrdG9r"),
]; ];
const GENDER_SHORTCUTS: &[(&str, &str)] = &[ // NOTE: archivebate.com also exposes `/gender/<base64>` routes in its nav, but the
("female", "ZmVtYWxl"), // Livewire `filter.platform` component always responds with a redirect to the home
("couple", "Y291cGxl"), // page for gender values (server-side bug on the site), so gender browsing returns
("male", "bWFsZQ=="), // no videos. Gender shortcuts are intentionally not exposed here.
("trans", "dHJhbnM="),
];
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct ArchivebateProvider { pub struct ArchivebateProvider {
@@ -85,10 +83,6 @@ enum Target {
id: String, id: String,
title: String, title: String,
}, },
Gender {
id: String,
title: String,
},
Profile { Profile {
username: String, username: String,
platform: Option<String>, platform: Option<String>,
@@ -120,12 +114,6 @@ struct LivewireInitialData {
server_memo_json: String, server_memo_json: String,
} }
#[derive(Debug, Clone)]
struct ResolvedMixdropMedia {
media_url: String,
embed_url: String,
}
impl ArchivebateProvider { impl ArchivebateProvider {
pub fn new() -> Self { pub fn new() -> Self {
let provider = Self { let provider = Self {
@@ -191,13 +179,6 @@ impl ArchivebateProvider {
title: "All".to_string(), title: "All".to_string(),
}]; }];
for (title, id) in GENDER_SHORTCUTS {
filters.push(FilterOption {
id: format!("gender:{id}"),
title: format!("Gender: {}", Self::title_case(title)),
});
}
for (title, id) in PLATFORM_SHORTCUTS { for (title, id) in PLATFORM_SHORTCUTS {
filters.push(FilterOption { filters.push(FilterOption {
id: format!("platform:{id}"), id: format!("platform:{id}"),
@@ -223,7 +204,7 @@ impl ArchivebateProvider {
Channel { Channel {
id: CHANNEL_ID.to_string(), id: CHANNEL_ID.to_string(),
name: "ArchiveBate".to_string(), name: "ArchiveBate".to_string(),
description: "ArchiveBate cam recording feeds with platform/gender/profile shortcuts and yt-dlp compatible watch URLs.".to_string(), description: "ArchiveBate cam recording feeds with platform and profile shortcuts and yt-dlp compatible watch URLs.".to_string(),
premium: false, premium: false,
favicon: "https://www.google.com/s2/favicons?sz=64&domain=archivebate.com".to_string(), favicon: "https://www.google.com/s2/favicons?sz=64&domain=archivebate.com".to_string(),
status: "work in progress".to_string(), status: "work in progress".to_string(),
@@ -244,7 +225,7 @@ impl ArchivebateProvider {
ChannelOption { ChannelOption {
id: "filter".to_string(), id: "filter".to_string(),
title: "Shortcuts".to_string(), title: "Shortcuts".to_string(),
description: "Direct platform and gender archive routes.".to_string(), description: "Direct platform archive routes.".to_string(),
systemImage: "tag.fill".to_string(), systemImage: "tag.fill".to_string(),
colorName: "green".to_string(), colorName: "green".to_string(),
options: filters, options: filters,
@@ -365,13 +346,6 @@ impl ArchivebateProvider {
format!("{}/platform/{id}?page={page}", self.url) format!("{}/platform/{id}?page={page}", self.url)
} }
} }
Target::Gender { id, .. } => {
if page <= 1 {
format!("{}/gender/{id}", self.url)
} else {
format!("{}/gender/{id}?page={page}", self.url)
}
}
Target::Profile { username, .. } => { Target::Profile { username, .. } => {
if page <= 1 { if page <= 1 {
format!("{}/profile/{username}", self.url) format!("{}/profile/{username}", self.url)
@@ -435,15 +409,6 @@ impl ArchivebateProvider {
} }
} }
if let Some(id) = trimmed.strip_prefix("gender:") {
if let Some((name, _)) = GENDER_SHORTCUTS.iter().find(|(_, known_id)| *known_id == id) {
return Some(Target::Gender {
id: id.to_string(),
title: Self::title_case(name),
});
}
}
let normalized = Self::normalize_title(trimmed); let normalized = Self::normalize_title(trimmed);
if let Some((name, id)) = PLATFORM_SHORTCUTS if let Some((name, id)) = PLATFORM_SHORTCUTS
.iter() .iter()
@@ -455,16 +420,6 @@ impl ArchivebateProvider {
}); });
} }
if let Some((name, id)) = GENDER_SHORTCUTS
.iter()
.find(|(name, _)| Self::normalize_title(name) == normalized)
{
return Some(Target::Gender {
id: (*id).to_string(),
title: Self::title_case(name),
});
}
if let Some(option) = self.find_uploader_option(trimmed) { if let Some(option) = self.find_uploader_option(trimmed) {
if let Some(username) = option.id.strip_prefix("profile:") { if let Some(username) = option.id.strip_prefix("profile:") {
return Some(Target::Profile { return Some(Target::Profile {
@@ -605,7 +560,6 @@ impl ArchivebateProvider {
fn extra_tags_for_target(target: &Target) -> Vec<String> { fn extra_tags_for_target(target: &Target) -> Vec<String> {
match target { match target {
Target::Platform { title, .. } => vec![title.clone()], Target::Platform { title, .. } => vec![title.clone()],
Target::Gender { title, .. } => vec![title.clone()],
Target::Profile { platform, gender, .. } => { Target::Profile { platform, gender, .. } => {
let mut tags = Vec::new(); let mut tags = Vec::new();
if let Some(platform) = platform.as_ref().filter(|value| !value.trim().is_empty()) { if let Some(platform) = platform.as_ref().filter(|value| !value.trim().is_empty()) {
@@ -746,104 +700,6 @@ impl ArchivebateProvider {
Ok(items) Ok(items)
} }
fn parse_mixin_packed_eval(html: &str) -> Option<String> {
let eval_regex = Regex::new(
r#"(?s)eval\(function\(p,a,c,k,e,d\)\{.*?\}\('(?P<payload>.*?)',\s*(?P<radix>[0-9]+),\s*(?P<count>[0-9]+),\s*'(?P<tokens>.*?)'\.split\('\|'\)"#,
)
.ok()?;
let captures = eval_regex.captures(html)?;
let payload_raw = captures.name("payload")?.as_str();
let radix = captures.name("radix")?.as_str().parse::<u32>().ok()?;
let count = captures.name("count")?.as_str().parse::<usize>().ok()?;
if !(2..=36).contains(&radix) {
return None;
}
let payload = Self::unescape_js_single_quoted(payload_raw);
let tokens_raw = captures.name("tokens")?.as_str();
let tokens = tokens_raw.split('|').collect::<Vec<_>>();
let mut unpacked = payload;
for index in (0..count).rev() {
let Some(token) = tokens.get(index) else {
continue;
};
if token.is_empty() {
continue;
}
let key = Self::to_radix(index, radix);
let pattern = format!(r"\b{}\b", regex::escape(&key));
let re = Regex::new(&pattern).ok()?;
unpacked = re.replace_all(&unpacked, *token).into_owned();
}
Some(unpacked)
}
fn unescape_js_single_quoted(value: &str) -> String {
let mut output = String::with_capacity(value.len());
let mut chars = value.chars();
while let Some(character) = chars.next() {
if character != '\\' {
output.push(character);
continue;
}
let Some(next) = chars.next() else {
break;
};
match next {
'\\' => output.push('\\'),
'\'' => output.push('\''),
'"' => output.push('"'),
'n' => output.push('\n'),
'r' => output.push('\r'),
't' => output.push('\t'),
_ => output.push(next),
}
}
output
}
fn to_radix(mut value: usize, radix: u32) -> String {
if value == 0 {
return "0".to_string();
}
let alphabet = b"0123456789abcdefghijklmnopqrstuvwxyz";
let mut out = Vec::new();
while value > 0 {
let digit = value % radix as usize;
out.push(alphabet[digit] as char);
value /= radix as usize;
}
out.iter().rev().collect()
}
fn extract_mixdrop_media_url(html: &str) -> Option<String> {
let direct_regex = Regex::new(r#"MDCore\.wurl\s*=\s*"([^"]+)""#).ok()?;
if let Some(url) = direct_regex
.captures(html)
.and_then(|captures| captures.get(1).map(|value| value.as_str().to_string()))
{
return Some(Self::normalize_possible_protocol_relative(&url));
}
let unpacked = Self::parse_mixin_packed_eval(html)?;
let unpacked_regex = Regex::new(r#"MDCore\.wurl\s*=\s*"([^"]+)""#).ok()?;
unpacked_regex
.captures(&unpacked)
.and_then(|captures| captures.get(1).map(|value| value.as_str().to_string()))
.map(|value| Self::normalize_possible_protocol_relative(&value))
}
fn normalize_possible_protocol_relative(value: &str) -> String {
let trimmed = value.trim();
if trimmed.starts_with("//") {
format!("https:{trimmed}")
} else {
trimmed.to_string()
}
}
fn host_from_url(url: &str) -> Option<String> { fn host_from_url(url: &str) -> Option<String> {
let parsed = url::Url::parse(url).ok()?; let parsed = url::Url::parse(url).ok()?;
parsed.host_str().map(|value| value.to_ascii_lowercase()) parsed.host_str().map(|value| value.to_ascii_lowercase())
@@ -931,25 +787,6 @@ impl ArchivebateProvider {
format format
} }
fn first_video_source_from_html(html: &str) -> Option<String> {
let document = Html::parse_document(html);
let source_selector = Selector::parse("video source[src]").ok()?;
let video_src_selector = Selector::parse("video[src]").ok()?;
if let Some(value) = document
.select(&source_selector)
.next()
.and_then(|node| node.value().attr("src"))
{
return Some(value.to_string());
}
document
.select(&video_src_selector)
.next()
.and_then(|node| node.value().attr("src"))
.map(|value| value.to_string())
}
fn first_iframe_source_from_html(html: &str) -> Option<String> { fn first_iframe_source_from_html(html: &str) -> Option<String> {
let document = Html::parse_document(html); let document = Html::parse_document(html);
let iframe_selector = Selector::parse("iframe[src]").ok()?; let iframe_selector = Selector::parse("iframe[src]").ok()?;
@@ -960,32 +797,6 @@ impl ArchivebateProvider {
.map(|value| value.to_string()) .map(|value| value.to_string())
} }
async fn resolve_mixdrop_media_from_iframe(
&self,
iframe_url: &str,
referer: &str,
options: &ServerOptions,
) -> Option<ResolvedMixdropMedia> {
let mut requester = requester_or_default(options, CHANNEL_ID, "resolve_mixdrop_media");
let response = requester
.get_raw_with_headers_timeout(
iframe_url,
self.html_headers(referer),
Some(StdDuration::from_secs(6)),
)
.await
.ok()?;
if !response.status().is_success() {
return None;
}
let iframe_html = response.text().await.ok()?;
let media_url = Self::extract_mixdrop_media_url(&iframe_html)?;
Some(ResolvedMixdropMedia {
media_url,
embed_url: iframe_url.to_string(),
})
}
async fn enrich_video(&self, item: VideoItem, options: &ServerOptions) -> VideoItem { async fn enrich_video(&self, item: VideoItem, options: &ServerOptions) -> VideoItem {
let page_url = item.url.clone(); let page_url = item.url.clone();
let format_url = Self::proxied_video(options, &page_url); let format_url = Self::proxied_video(options, &page_url);
@@ -1405,23 +1216,3 @@ impl Provider for ArchivebateProvider {
Some(self.build_channel(clientversion)) Some(self.build_channel(clientversion))
} }
} }
#[cfg(test)]
mod tests {
use super::ArchivebateProvider;
#[test]
fn extracts_mixdrop_wurl_from_packed_eval() {
let html = r#"
<script>
eval(function(p,a,c,k,e,d){e=function(c){return c};if(!''.replace(/^/,String)){while(c--){d[c]=k[c]||c}k=[function(e){return d[e]}];e=function(){return'\\w+'};c=1};while(c--){if(k[c]){p=p.replace(new RegExp('\\b'+e(c)+'\\b','g'),k[c])}}return p}('1.2="//3.4.5/6/7.8?9=a&b=c";',13,13,'|MDCore|wurl|o230m5y6z|mxcontent|net|v2|r6pkwozjber741|mp4|s|TvNTJe3_z_6nKveumEHk8Q|e|1776460168'.split('|'),0,{}))
</script>
"#;
let actual = ArchivebateProvider::extract_mixdrop_media_url(html)
.expect("expected mixdrop media url");
assert_eq!(
actual,
"https://o230m5y6z.mxcontent.net/v2/r6pkwozjber741.mp4?s=TvNTJe3_z_6nKveumEHk8Q&e=1776460168"
);
}
}