archivebate fix
This commit is contained in:
321
src/proxies/archivebate.rs
Normal file
321
src/proxies/archivebate.rs
Normal file
@@ -0,0 +1,321 @@
|
||||
use std::time::Duration as StdDuration;
|
||||
|
||||
use ntex::web;
|
||||
use regex::Regex;
|
||||
use scraper::{Html, Selector};
|
||||
use url::Url;
|
||||
use wreq::Version;
|
||||
|
||||
use crate::util::requester::Requester;
|
||||
|
||||
const FIREFOX_UA: &str =
|
||||
"Mozilla/5.0 (X11; Linux x86_64; rv:146.0) Gecko/20100101 Firefox/146.0";
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ArchivebateProxy {}
|
||||
|
||||
impl ArchivebateProxy {
|
||||
pub fn new() -> Self {
|
||||
Self {}
|
||||
}
|
||||
|
||||
fn normalize_detail_request(endpoint: &str) -> Option<String> {
|
||||
let endpoint = endpoint.trim().trim_start_matches('/');
|
||||
if endpoint.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let detail_url = if endpoint.starts_with("http://") || endpoint.starts_with("https://") {
|
||||
endpoint.to_string()
|
||||
} else {
|
||||
format!("https://{}", endpoint.trim_start_matches('/'))
|
||||
};
|
||||
|
||||
Self::is_allowed_detail_url(&detail_url).then_some(detail_url)
|
||||
}
|
||||
|
||||
fn is_allowed_detail_url(url: &str) -> bool {
|
||||
let Some(parsed) = Url::parse(url).ok() else {
|
||||
return false;
|
||||
};
|
||||
if parsed.scheme() != "https" {
|
||||
return false;
|
||||
}
|
||||
let Some(host) = parsed.host_str() else {
|
||||
return false;
|
||||
};
|
||||
(host == "archivebate.com" || host == "www.archivebate.com")
|
||||
&& parsed.path().starts_with("/watch/")
|
||||
}
|
||||
|
||||
fn host_from_url(url: &str) -> Option<String> {
|
||||
let parsed = Url::parse(url).ok()?;
|
||||
parsed.host_str().map(|value| value.to_ascii_lowercase())
|
||||
}
|
||||
|
||||
fn is_mixdrop_host(url: &str) -> bool {
|
||||
let Some(host) = Self::host_from_url(url) else {
|
||||
return false;
|
||||
};
|
||||
host.contains("mixdrop") || host.contains("m1xdrop")
|
||||
}
|
||||
|
||||
fn html_headers(referer: &str) -> Vec<(String, String)> {
|
||||
vec![
|
||||
("Referer".to_string(), referer.to_string()),
|
||||
("User-Agent".to_string(), FIREFOX_UA.to_string()),
|
||||
(
|
||||
"Accept".to_string(),
|
||||
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8"
|
||||
.to_string(),
|
||||
),
|
||||
("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()),
|
||||
]
|
||||
}
|
||||
|
||||
fn first_iframe_source_from_html(html: &str) -> Option<String> {
|
||||
let document = Html::parse_document(html);
|
||||
let selector = Selector::parse("iframe[src]").ok()?;
|
||||
document
|
||||
.select(&selector)
|
||||
.next()
|
||||
.and_then(|node| node.value().attr("src"))
|
||||
.map(str::to_string)
|
||||
}
|
||||
|
||||
fn download_fid_from_detail_html(html: &str) -> Option<String> {
|
||||
let document = Html::parse_document(html);
|
||||
let selector = Selector::parse("input[name='fid'][value]").ok()?;
|
||||
document
|
||||
.select(&selector)
|
||||
.next()
|
||||
.and_then(|node| node.value().attr("value"))
|
||||
.map(str::trim)
|
||||
.filter(|value| !value.is_empty())
|
||||
.map(ToOwned::to_owned)
|
||||
}
|
||||
|
||||
fn mixdrop_embed_url_from_download_url(url: &str) -> Option<String> {
|
||||
let parsed = Url::parse(url).ok()?;
|
||||
let host = parsed.host_str()?;
|
||||
let host_lc = host.to_ascii_lowercase();
|
||||
if !host_lc.contains("mixdrop") && !host_lc.contains("m1xdrop") {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut segments = parsed.path_segments()?.filter(|segment| !segment.is_empty());
|
||||
let kind = segments.next()?.to_ascii_lowercase();
|
||||
if kind != "e" && kind != "f" {
|
||||
return None;
|
||||
}
|
||||
let media_id = segments.next()?.trim();
|
||||
if media_id.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(format!("{}://{host}/e/{media_id}", parsed.scheme()))
|
||||
}
|
||||
|
||||
fn normalize_possible_protocol_relative(value: &str) -> String {
|
||||
let trimmed = value.trim();
|
||||
if trimmed.starts_with("//") {
|
||||
format!("https:{trimmed}")
|
||||
} else {
|
||||
trimmed.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_mixdrop_media_url(html: &str) -> Option<String> {
|
||||
let direct_regex = Regex::new(r#"MDCore\.wurl\s*=\s*"([^"]+)""#).ok()?;
|
||||
if let Some(url) = direct_regex
|
||||
.captures(html)
|
||||
.and_then(|captures| captures.get(1).map(|value| value.as_str().to_string()))
|
||||
{
|
||||
return Some(Self::normalize_possible_protocol_relative(&url));
|
||||
}
|
||||
|
||||
let unpacked = Self::parse_mixin_packed_eval(html)?;
|
||||
let unpacked_regex = Regex::new(r#"MDCore\.wurl\s*=\s*"([^"]+)""#).ok()?;
|
||||
unpacked_regex
|
||||
.captures(&unpacked)
|
||||
.and_then(|captures| captures.get(1).map(|value| value.as_str().to_string()))
|
||||
.map(|value| Self::normalize_possible_protocol_relative(&value))
|
||||
}
|
||||
|
||||
fn parse_mixin_packed_eval(html: &str) -> Option<String> {
|
||||
let eval_regex = Regex::new(
|
||||
r#"(?s)eval\(function\(p,a,c,k,e,d\)\{.*?\}\('(?P<payload>.*?)',\s*(?P<radix>[0-9]+),\s*(?P<count>[0-9]+),\s*'(?P<tokens>.*?)'\.split\('\|'\)"#,
|
||||
)
|
||||
.ok()?;
|
||||
let captures = eval_regex.captures(html)?;
|
||||
let payload_raw = captures.name("payload")?.as_str();
|
||||
let radix = captures.name("radix")?.as_str().parse::<u32>().ok()?;
|
||||
let count = captures.name("count")?.as_str().parse::<usize>().ok()?;
|
||||
if !(2..=36).contains(&radix) {
|
||||
return None;
|
||||
}
|
||||
|
||||
let payload = Self::unescape_js_single_quoted(payload_raw);
|
||||
let tokens_raw = captures.name("tokens")?.as_str();
|
||||
let tokens = tokens_raw.split('|').collect::<Vec<_>>();
|
||||
let mut unpacked = payload;
|
||||
|
||||
for index in (0..count).rev() {
|
||||
let Some(token) = tokens.get(index) else {
|
||||
continue;
|
||||
};
|
||||
if token.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let key = Self::to_radix(index, radix);
|
||||
let pattern = format!(r"\b{}\b", regex::escape(&key));
|
||||
let re = Regex::new(&pattern).ok()?;
|
||||
unpacked = re.replace_all(&unpacked, *token).into_owned();
|
||||
}
|
||||
|
||||
Some(unpacked)
|
||||
}
|
||||
|
||||
fn unescape_js_single_quoted(value: &str) -> String {
|
||||
let mut output = String::with_capacity(value.len());
|
||||
let mut chars = value.chars();
|
||||
while let Some(character) = chars.next() {
|
||||
if character != '\\' {
|
||||
output.push(character);
|
||||
continue;
|
||||
}
|
||||
let Some(next) = chars.next() else {
|
||||
break;
|
||||
};
|
||||
match next {
|
||||
'\\' => output.push('\\'),
|
||||
'\'' => output.push('\''),
|
||||
'"' => output.push('"'),
|
||||
'n' => output.push('\n'),
|
||||
'r' => output.push('\r'),
|
||||
't' => output.push('\t'),
|
||||
_ => output.push(next),
|
||||
}
|
||||
}
|
||||
output
|
||||
}
|
||||
|
||||
fn to_radix(mut value: usize, radix: u32) -> String {
|
||||
if value == 0 {
|
||||
return "0".to_string();
|
||||
}
|
||||
let alphabet = b"0123456789abcdefghijklmnopqrstuvwxyz";
|
||||
let mut out = Vec::new();
|
||||
while value > 0 {
|
||||
let digit = value % radix as usize;
|
||||
out.push(alphabet[digit] as char);
|
||||
value /= radix as usize;
|
||||
}
|
||||
out.iter().rev().collect()
|
||||
}
|
||||
|
||||
fn absolute_url(value: &str) -> String {
|
||||
if value.starts_with("http://") || value.starts_with("https://") {
|
||||
return value.to_string();
|
||||
}
|
||||
if value.starts_with("//") {
|
||||
return format!("https:{value}");
|
||||
}
|
||||
format!("https://archivebate.com/{}", value.trim_start_matches('/'))
|
||||
}
|
||||
|
||||
async fn resolve_mixdrop_media_from_embed(
|
||||
detail_url: &str,
|
||||
embed_url: &str,
|
||||
requester: &mut Requester,
|
||||
) -> Option<String> {
|
||||
let response = requester
|
||||
.get_raw_with_headers_timeout(
|
||||
embed_url,
|
||||
Self::html_headers(detail_url),
|
||||
Some(StdDuration::from_secs(8)),
|
||||
)
|
||||
.await
|
||||
.ok()?;
|
||||
if !response.status().is_success() {
|
||||
return None;
|
||||
}
|
||||
let html = response.text().await.ok()?;
|
||||
Self::extract_mixdrop_media_url(&html)
|
||||
}
|
||||
}
|
||||
|
||||
impl crate::proxies::Proxy for ArchivebateProxy {
|
||||
async fn get_video_url(&self, url: String, requester: web::types::State<Requester>) -> String {
|
||||
let Some(detail_url) = Self::normalize_detail_request(&url) else {
|
||||
return String::new();
|
||||
};
|
||||
|
||||
let mut requester = requester.get_ref().clone();
|
||||
let detail_html = requester
|
||||
.get_with_headers(
|
||||
&detail_url,
|
||||
Self::html_headers("https://archivebate.com/"),
|
||||
Some(Version::HTTP_11),
|
||||
)
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
if detail_html.is_empty() {
|
||||
return String::new();
|
||||
}
|
||||
|
||||
if let Some(iframe_url) = Self::first_iframe_source_from_html(&detail_html).map(|value| Self::absolute_url(&value)) {
|
||||
if Self::is_mixdrop_host(&iframe_url) {
|
||||
if let Some(media_url) =
|
||||
Self::resolve_mixdrop_media_from_embed(&detail_url, &iframe_url, &mut requester).await
|
||||
{
|
||||
return media_url;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(download_fid) = Self::download_fid_from_detail_html(&detail_html).map(|value| Self::absolute_url(&value)) {
|
||||
if let Some(embed_url) = Self::mixdrop_embed_url_from_download_url(&download_fid) {
|
||||
if let Some(media_url) =
|
||||
Self::resolve_mixdrop_media_from_embed(&detail_url, &embed_url, &mut requester).await
|
||||
{
|
||||
return media_url;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
String::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::ArchivebateProxy;
|
||||
|
||||
#[test]
|
||||
fn normalizes_detail_request() {
|
||||
let detail = ArchivebateProxy::normalize_detail_request("archivebate.com/watch/123456");
|
||||
assert_eq!(detail.as_deref(), Some("https://archivebate.com/watch/123456"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_non_watch_paths() {
|
||||
assert!(ArchivebateProxy::normalize_detail_request("archivebate.com/profile/test").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extracts_mixdrop_wurl_from_packed_eval() {
|
||||
let html = r#"
|
||||
<script>
|
||||
eval(function(p,a,c,k,e,d){e=function(c){return c};if(!''.replace(/^/,String)){while(c--){d[c]=k[c]||c}k=[function(e){return d[e]}];e=function(){return'\\w+'};c=1};while(c--){if(k[c]){p=p.replace(new RegExp('\\b'+e(c)+'\\b','g'),k[c])}}return p}('1.2="//o230m5y6z.3.4/5/6.7?8=9&a=b";',12,12,'|MDCore|wurl|mxcontent|net|v2|r6pkwozjber741|mp4|s|TvNTJe3_z_6nKveumEHk8Q|e|1776460168'.split('|'),0,{}))
|
||||
</script>
|
||||
"#;
|
||||
|
||||
let extracted = ArchivebateProxy::extract_mixdrop_media_url(html)
|
||||
.expect("expected extracted media url");
|
||||
assert_eq!(
|
||||
extracted,
|
||||
"https://o230m5y6z.mxcontent.net/v2/r6pkwozjber741.mp4?s=TvNTJe3_z_6nKveumEHk8Q&e=1776460168"
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1,3 +1,4 @@
|
||||
use crate::proxies::archivebate::ArchivebateProxy;
|
||||
use crate::proxies::doodstream::DoodstreamProxy;
|
||||
use crate::proxies::heavyfetish::HeavyfetishProxy;
|
||||
use crate::proxies::hqporner::HqpornerProxy;
|
||||
@@ -11,6 +12,7 @@ use crate::proxies::spankbang::SpankbangProxy;
|
||||
use crate::proxies::vjav::VjavProxy;
|
||||
use crate::{proxies::sxyprn::SxyprnProxy, util::requester::Requester};
|
||||
|
||||
pub mod archivebate;
|
||||
pub mod doodstream;
|
||||
pub mod hanimecdn;
|
||||
pub mod heavyfetish;
|
||||
@@ -30,6 +32,7 @@ pub mod vjav;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum AnyProxy {
|
||||
Archivebate(ArchivebateProxy),
|
||||
Doodstream(DoodstreamProxy),
|
||||
Sxyprn(SxyprnProxy),
|
||||
Javtiful(javtiful::JavtifulProxy),
|
||||
@@ -50,6 +53,7 @@ pub trait Proxy {
|
||||
impl Proxy for AnyProxy {
|
||||
async fn get_video_url(&self, url: String, requester: web::types::State<Requester>) -> String {
|
||||
match self {
|
||||
AnyProxy::Archivebate(p) => p.get_video_url(url, requester).await,
|
||||
AnyProxy::Doodstream(p) => p.get_video_url(url, requester).await,
|
||||
AnyProxy::Sxyprn(p) => p.get_video_url(url, requester).await,
|
||||
AnyProxy::Javtiful(p) => p.get_video_url(url, requester).await,
|
||||
|
||||
Reference in New Issue
Block a user