diff --git a/src/providers/archivebate.rs b/src/providers/archivebate.rs index 6996232..ee22a2d 100644 --- a/src/providers/archivebate.rs +++ b/src/providers/archivebate.rs @@ -1,7 +1,8 @@ use crate::DbPool; use crate::api::ClientVersion; use crate::providers::{ - Provider, report_provider_error, report_provider_error_background, requester_or_default, + Provider, build_proxy_url, report_provider_error, report_provider_error_background, + requester_or_default, strip_url_scheme, }; use crate::status::*; use crate::util::cache::VideoCache; @@ -18,7 +19,7 @@ use regex::Regex; use scraper::{Html, Selector}; use serde::Deserialize; use serde_json::Value; -use std::collections::HashSet; +use std::collections::{HashMap, HashSet}; use std::sync::{Arc, RwLock}; use std::thread; use std::time::Duration as StdDuration; @@ -119,6 +120,12 @@ struct LivewireInitialData { server_memo_json: String, } +#[derive(Debug, Clone)] +struct ResolvedMixdropMedia { + media_url: String, + embed_url: String, +} + impl ArchivebateProvider { pub fn new() -> Self { let provider = Self { @@ -526,6 +533,28 @@ impl ArchivebateProvider { .and_then(|captures| captures.name("id").map(|value| value.as_str().to_string())) } + fn is_allowed_detail_watch_url(url: &str) -> bool { + let Some(parsed) = url::Url::parse(url).ok() else { + return false; + }; + if parsed.scheme() != "https" { + return false; + } + let Some(host) = parsed.host_str() else { + return false; + }; + (host == "archivebate.com" || host == "www.archivebate.com") + && parsed.path().starts_with("/watch/") + } + + fn proxied_video(options: &ServerOptions, detail_url: &str) -> String { + if detail_url.is_empty() || !Self::is_allowed_detail_watch_url(detail_url) { + return String::new(); + } + + build_proxy_url(options, CHANNEL_ID, &strip_url_scheme(detail_url)) + } + fn parse_duration(text: &str) -> u32 { let Ok(regex) = Self::regex(r"([0-9]{1,2}:[0-9]{2}(?::[0-9]{2})?)") else { return 0; @@ -827,6 +856,81 @@ impl ArchivebateProvider { host.contains("mixdrop") || host.contains("m1xdrop") } + fn download_fid_from_detail_html(html: &str) -> Option { + let document = Html::parse_document(html); + let selector = Selector::parse("input[name='fid'][value]").ok()?; + document + .select(&selector) + .next() + .and_then(|node| node.value().attr("value")) + .map(str::trim) + .filter(|value| !value.is_empty()) + .map(ToOwned::to_owned) + } + + fn mixdrop_embed_url_from_download_url(url: &str) -> Option { + let parsed = url::Url::parse(url).ok()?; + let host = parsed.host_str()?; + let host_lc = host.to_ascii_lowercase(); + if !host_lc.contains("mixdrop") && !host_lc.contains("m1xdrop") { + return None; + } + + let mut segments = parsed.path_segments()?.filter(|segment| !segment.is_empty()); + let kind = segments.next()?.to_ascii_lowercase(); + if kind != "e" && kind != "f" { + return None; + } + let media_id = segments.next()?.trim(); + if media_id.is_empty() { + return None; + } + + Some(format!("{}://{host}/e/{media_id}", parsed.scheme())) + } + + fn video_format_with_headers( + format: VideoFormat, + headers: Vec<(String, String)>, + ) -> VideoFormat { + if headers.is_empty() { + return format; + } + + let header_map: HashMap = headers + .into_iter() + .filter_map(|(key, value)| { + let key = key.trim().to_string(); + let value = value.trim().to_string(); + if key.is_empty() || value.is_empty() { + return None; + } + Some((key, value)) + }) + .collect(); + + if header_map.is_empty() { + return format; + } + + let mut value = match serde_json::to_value(&format) { + Ok(value) => value, + Err(_) => return format, + }; + + if let Value::Object(object) = &mut value { + let Ok(headers_value) = serde_json::to_value(header_map) else { + return format; + }; + object.insert("http_headers".to_string(), headers_value); + if let Ok(updated) = serde_json::from_value::(value) { + return updated; + } + } + + format + } + fn first_video_source_from_html(html: &str) -> Option { let document = Html::parse_document(html); let source_selector = Selector::parse("video source[src]").ok()?; @@ -861,23 +965,37 @@ impl ArchivebateProvider { iframe_url: &str, referer: &str, options: &ServerOptions, - ) -> Option { + ) -> Option { let mut requester = requester_or_default(options, CHANNEL_ID, "resolve_mixdrop_media"); - let iframe_html = requester - .get_with_headers( + let response = requester + .get_raw_with_headers_timeout( iframe_url, self.html_headers(referer), - Some(wreq::Version::HTTP_11), + Some(StdDuration::from_secs(6)), ) .await .ok()?; - Self::extract_mixdrop_media_url(&iframe_html) + if !response.status().is_success() { + return None; + } + let iframe_html = response.text().await.ok()?; + let media_url = Self::extract_mixdrop_media_url(&iframe_html)?; + Some(ResolvedMixdropMedia { + media_url, + embed_url: iframe_url.to_string(), + }) } async fn enrich_video(&self, item: VideoItem, options: &ServerOptions) -> VideoItem { let page_url = item.url.clone(); + let format_url = Self::proxied_video(options, &page_url); + if format_url.is_empty() { + return item; + } + + let mut format = VideoFormat::new(format_url, "source".to_string(), "mp4".to_string()); let mut requester = requester_or_default(options, CHANNEL_ID, "archivebate.enrich_video"); - let detail_html = match requester + if let Ok(detail_html) = requester .get_with_headers( &page_url, self.html_headers(&format!("{}/", self.url)), @@ -885,40 +1003,27 @@ impl ArchivebateProvider { ) .await { - Ok(value) => value, - Err(error) => { - report_provider_error_background( - CHANNEL_ID, - "enrich_video.fetch_detail", - &format!("url={page_url}; error={error}"), - ); - return item; + let mut mixdrop_embed_url = Self::first_iframe_source_from_html(&detail_html) + .map(|value| self.absolute_url(&value)) + .filter(|value| Self::is_mixdrop_host(value)); + + if mixdrop_embed_url.is_none() { + mixdrop_embed_url = Self::download_fid_from_detail_html(&detail_html) + .map(|value| self.absolute_url(&value)) + .and_then(|value| Self::mixdrop_embed_url_from_download_url(&value)); } - }; - let mut media_url = Self::first_video_source_from_html(&detail_html) - .map(|value| self.absolute_url(&value)); - - if media_url.is_none() { - let iframe_url = Self::first_iframe_source_from_html(&detail_html) - .map(|value| self.absolute_url(&value)); - if let Some(iframe_url) = iframe_url { - if Self::is_mixdrop_host(&iframe_url) { - if let Some(resolved) = self - .resolve_mixdrop_media_from_iframe(&iframe_url, &page_url, options) - .await - { - media_url = Some(resolved); - } - } + if let Some(embed_url) = mixdrop_embed_url { + format = Self::video_format_with_headers( + format, + vec![ + ("Referer".to_string(), embed_url), + ("User-Agent".to_string(), FIREFOX_UA.to_string()), + ], + ); } } - let Some(media_url) = media_url else { - return item; - }; - - let format = VideoFormat::new(media_url, "source".to_string(), "mp4".to_string()); let mut enriched = item; enriched.formats = Some(vec![format]); enriched diff --git a/src/proxies/archivebate.rs b/src/proxies/archivebate.rs new file mode 100644 index 0000000..bd65dfb --- /dev/null +++ b/src/proxies/archivebate.rs @@ -0,0 +1,321 @@ +use std::time::Duration as StdDuration; + +use ntex::web; +use regex::Regex; +use scraper::{Html, Selector}; +use url::Url; +use wreq::Version; + +use crate::util::requester::Requester; + +const FIREFOX_UA: &str = + "Mozilla/5.0 (X11; Linux x86_64; rv:146.0) Gecko/20100101 Firefox/146.0"; + +#[derive(Debug, Clone)] +pub struct ArchivebateProxy {} + +impl ArchivebateProxy { + pub fn new() -> Self { + Self {} + } + + fn normalize_detail_request(endpoint: &str) -> Option { + let endpoint = endpoint.trim().trim_start_matches('/'); + if endpoint.is_empty() { + return None; + } + + let detail_url = if endpoint.starts_with("http://") || endpoint.starts_with("https://") { + endpoint.to_string() + } else { + format!("https://{}", endpoint.trim_start_matches('/')) + }; + + Self::is_allowed_detail_url(&detail_url).then_some(detail_url) + } + + fn is_allowed_detail_url(url: &str) -> bool { + let Some(parsed) = Url::parse(url).ok() else { + return false; + }; + if parsed.scheme() != "https" { + return false; + } + let Some(host) = parsed.host_str() else { + return false; + }; + (host == "archivebate.com" || host == "www.archivebate.com") + && parsed.path().starts_with("/watch/") + } + + fn host_from_url(url: &str) -> Option { + let parsed = Url::parse(url).ok()?; + parsed.host_str().map(|value| value.to_ascii_lowercase()) + } + + fn is_mixdrop_host(url: &str) -> bool { + let Some(host) = Self::host_from_url(url) else { + return false; + }; + host.contains("mixdrop") || host.contains("m1xdrop") + } + + fn html_headers(referer: &str) -> Vec<(String, String)> { + vec![ + ("Referer".to_string(), referer.to_string()), + ("User-Agent".to_string(), FIREFOX_UA.to_string()), + ( + "Accept".to_string(), + "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8" + .to_string(), + ), + ("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()), + ] + } + + fn first_iframe_source_from_html(html: &str) -> Option { + let document = Html::parse_document(html); + let selector = Selector::parse("iframe[src]").ok()?; + document + .select(&selector) + .next() + .and_then(|node| node.value().attr("src")) + .map(str::to_string) + } + + fn download_fid_from_detail_html(html: &str) -> Option { + let document = Html::parse_document(html); + let selector = Selector::parse("input[name='fid'][value]").ok()?; + document + .select(&selector) + .next() + .and_then(|node| node.value().attr("value")) + .map(str::trim) + .filter(|value| !value.is_empty()) + .map(ToOwned::to_owned) + } + + fn mixdrop_embed_url_from_download_url(url: &str) -> Option { + let parsed = Url::parse(url).ok()?; + let host = parsed.host_str()?; + let host_lc = host.to_ascii_lowercase(); + if !host_lc.contains("mixdrop") && !host_lc.contains("m1xdrop") { + return None; + } + + let mut segments = parsed.path_segments()?.filter(|segment| !segment.is_empty()); + let kind = segments.next()?.to_ascii_lowercase(); + if kind != "e" && kind != "f" { + return None; + } + let media_id = segments.next()?.trim(); + if media_id.is_empty() { + return None; + } + + Some(format!("{}://{host}/e/{media_id}", parsed.scheme())) + } + + fn normalize_possible_protocol_relative(value: &str) -> String { + let trimmed = value.trim(); + if trimmed.starts_with("//") { + format!("https:{trimmed}") + } else { + trimmed.to_string() + } + } + + fn extract_mixdrop_media_url(html: &str) -> Option { + let direct_regex = Regex::new(r#"MDCore\.wurl\s*=\s*"([^"]+)""#).ok()?; + if let Some(url) = direct_regex + .captures(html) + .and_then(|captures| captures.get(1).map(|value| value.as_str().to_string())) + { + return Some(Self::normalize_possible_protocol_relative(&url)); + } + + let unpacked = Self::parse_mixin_packed_eval(html)?; + let unpacked_regex = Regex::new(r#"MDCore\.wurl\s*=\s*"([^"]+)""#).ok()?; + unpacked_regex + .captures(&unpacked) + .and_then(|captures| captures.get(1).map(|value| value.as_str().to_string())) + .map(|value| Self::normalize_possible_protocol_relative(&value)) + } + + fn parse_mixin_packed_eval(html: &str) -> Option { + let eval_regex = Regex::new( + r#"(?s)eval\(function\(p,a,c,k,e,d\)\{.*?\}\('(?P.*?)',\s*(?P[0-9]+),\s*(?P[0-9]+),\s*'(?P.*?)'\.split\('\|'\)"#, + ) + .ok()?; + let captures = eval_regex.captures(html)?; + let payload_raw = captures.name("payload")?.as_str(); + let radix = captures.name("radix")?.as_str().parse::().ok()?; + let count = captures.name("count")?.as_str().parse::().ok()?; + if !(2..=36).contains(&radix) { + return None; + } + + let payload = Self::unescape_js_single_quoted(payload_raw); + let tokens_raw = captures.name("tokens")?.as_str(); + let tokens = tokens_raw.split('|').collect::>(); + let mut unpacked = payload; + + for index in (0..count).rev() { + let Some(token) = tokens.get(index) else { + continue; + }; + if token.is_empty() { + continue; + } + let key = Self::to_radix(index, radix); + let pattern = format!(r"\b{}\b", regex::escape(&key)); + let re = Regex::new(&pattern).ok()?; + unpacked = re.replace_all(&unpacked, *token).into_owned(); + } + + Some(unpacked) + } + + fn unescape_js_single_quoted(value: &str) -> String { + let mut output = String::with_capacity(value.len()); + let mut chars = value.chars(); + while let Some(character) = chars.next() { + if character != '\\' { + output.push(character); + continue; + } + let Some(next) = chars.next() else { + break; + }; + match next { + '\\' => output.push('\\'), + '\'' => output.push('\''), + '"' => output.push('"'), + 'n' => output.push('\n'), + 'r' => output.push('\r'), + 't' => output.push('\t'), + _ => output.push(next), + } + } + output + } + + fn to_radix(mut value: usize, radix: u32) -> String { + if value == 0 { + return "0".to_string(); + } + let alphabet = b"0123456789abcdefghijklmnopqrstuvwxyz"; + let mut out = Vec::new(); + while value > 0 { + let digit = value % radix as usize; + out.push(alphabet[digit] as char); + value /= radix as usize; + } + out.iter().rev().collect() + } + + fn absolute_url(value: &str) -> String { + if value.starts_with("http://") || value.starts_with("https://") { + return value.to_string(); + } + if value.starts_with("//") { + return format!("https:{value}"); + } + format!("https://archivebate.com/{}", value.trim_start_matches('/')) + } + + async fn resolve_mixdrop_media_from_embed( + detail_url: &str, + embed_url: &str, + requester: &mut Requester, + ) -> Option { + let response = requester + .get_raw_with_headers_timeout( + embed_url, + Self::html_headers(detail_url), + Some(StdDuration::from_secs(8)), + ) + .await + .ok()?; + if !response.status().is_success() { + return None; + } + let html = response.text().await.ok()?; + Self::extract_mixdrop_media_url(&html) + } +} + +impl crate::proxies::Proxy for ArchivebateProxy { + async fn get_video_url(&self, url: String, requester: web::types::State) -> String { + let Some(detail_url) = Self::normalize_detail_request(&url) else { + return String::new(); + }; + + let mut requester = requester.get_ref().clone(); + let detail_html = requester + .get_with_headers( + &detail_url, + Self::html_headers("https://archivebate.com/"), + Some(Version::HTTP_11), + ) + .await + .unwrap_or_default(); + if detail_html.is_empty() { + return String::new(); + } + + if let Some(iframe_url) = Self::first_iframe_source_from_html(&detail_html).map(|value| Self::absolute_url(&value)) { + if Self::is_mixdrop_host(&iframe_url) { + if let Some(media_url) = + Self::resolve_mixdrop_media_from_embed(&detail_url, &iframe_url, &mut requester).await + { + return media_url; + } + } + } + + if let Some(download_fid) = Self::download_fid_from_detail_html(&detail_html).map(|value| Self::absolute_url(&value)) { + if let Some(embed_url) = Self::mixdrop_embed_url_from_download_url(&download_fid) { + if let Some(media_url) = + Self::resolve_mixdrop_media_from_embed(&detail_url, &embed_url, &mut requester).await + { + return media_url; + } + } + } + + String::new() + } +} + +#[cfg(test)] +mod tests { + use super::ArchivebateProxy; + + #[test] + fn normalizes_detail_request() { + let detail = ArchivebateProxy::normalize_detail_request("archivebate.com/watch/123456"); + assert_eq!(detail.as_deref(), Some("https://archivebate.com/watch/123456")); + } + + #[test] + fn rejects_non_watch_paths() { + assert!(ArchivebateProxy::normalize_detail_request("archivebate.com/profile/test").is_none()); + } + + #[test] + fn extracts_mixdrop_wurl_from_packed_eval() { + let html = r#" + +"#; + + let extracted = ArchivebateProxy::extract_mixdrop_media_url(html) + .expect("expected extracted media url"); + assert_eq!( + extracted, + "https://o230m5y6z.mxcontent.net/v2/r6pkwozjber741.mp4?s=TvNTJe3_z_6nKveumEHk8Q&e=1776460168" + ); + } +} diff --git a/src/proxies/mod.rs b/src/proxies/mod.rs index 307214e..27b3f54 100644 --- a/src/proxies/mod.rs +++ b/src/proxies/mod.rs @@ -1,3 +1,4 @@ +use crate::proxies::archivebate::ArchivebateProxy; use crate::proxies::doodstream::DoodstreamProxy; use crate::proxies::heavyfetish::HeavyfetishProxy; use crate::proxies::hqporner::HqpornerProxy; @@ -11,6 +12,7 @@ use crate::proxies::spankbang::SpankbangProxy; use crate::proxies::vjav::VjavProxy; use crate::{proxies::sxyprn::SxyprnProxy, util::requester::Requester}; +pub mod archivebate; pub mod doodstream; pub mod hanimecdn; pub mod heavyfetish; @@ -30,6 +32,7 @@ pub mod vjav; #[derive(Debug, Clone)] pub enum AnyProxy { + Archivebate(ArchivebateProxy), Doodstream(DoodstreamProxy), Sxyprn(SxyprnProxy), Javtiful(javtiful::JavtifulProxy), @@ -50,6 +53,7 @@ pub trait Proxy { impl Proxy for AnyProxy { async fn get_video_url(&self, url: String, requester: web::types::State) -> String { match self { + AnyProxy::Archivebate(p) => p.get_video_url(url, requester).await, AnyProxy::Doodstream(p) => p.get_video_url(url, requester).await, AnyProxy::Sxyprn(p) => p.get_video_url(url, requester).await, AnyProxy::Javtiful(p) => p.get_video_url(url, requester).await, diff --git a/src/proxy.rs b/src/proxy.rs index 5d777cd..80982e0 100644 --- a/src/proxy.rs +++ b/src/proxy.rs @@ -1,5 +1,6 @@ use ntex::web::{self, HttpRequest}; +use crate::proxies::archivebate::ArchivebateProxy; use crate::proxies::doodstream::DoodstreamProxy; use crate::proxies::heavyfetish::HeavyfetishProxy; use crate::proxies::hqporner::HqpornerProxy; @@ -16,6 +17,11 @@ use crate::util::requester::Requester; pub fn config(cfg: &mut web::ServiceConfig) { cfg.service( + web::resource("/archivebate/{endpoint}*") + .route(web::post().to(proxy2redirect)) + .route(web::get().to(proxy2redirect)), + ) + .service( web::resource("/doodstream/{endpoint}*") .route(web::post().to(proxy2redirect)) .route(web::get().to(proxy2redirect)), @@ -124,6 +130,7 @@ async fn proxy2redirect( fn get_proxy(proxy: &str) -> Option { match proxy { + "archivebate" => Some(AnyProxy::Archivebate(ArchivebateProxy::new())), "doodstream" => Some(AnyProxy::Doodstream(DoodstreamProxy::new())), "sxyprn" => Some(AnyProxy::Sxyprn(SxyprnProxy::new())), "javtiful" => Some(AnyProxy::Javtiful(JavtifulProxy::new())),