From e4941b120971107ee1c5ede8b9eff5277e744c74 Mon Sep 17 00:00:00 2001 From: Simon Date: Fri, 22 May 2026 09:19:17 +0000 Subject: [PATCH] removed archivebate1 --- build.rs | 5 - src/providers/archivebate1.rs | 1504 --------------------------------- src/util/requester.rs | 15 - 3 files changed, 1524 deletions(-) delete mode 100644 src/providers/archivebate1.rs diff --git a/build.rs b/build.rs index 00636b7..e617878 100644 --- a/build.rs +++ b/build.rs @@ -291,11 +291,6 @@ const PROVIDERS: &[ProviderDef] = &[ module: "archivebate", ty: "ArchivebateProvider", }, - ProviderDef { - id: "archivebate1", - module: "archivebate1", - ty: "ArchivebateProvider", - }, ProviderDef { id: "thaiporntv", module: "thaiporntv", diff --git a/src/providers/archivebate1.rs b/src/providers/archivebate1.rs deleted file mode 100644 index ed87a62..0000000 --- a/src/providers/archivebate1.rs +++ /dev/null @@ -1,1504 +0,0 @@ -use crate::DbPool; -use crate::api::ClientVersion; -use crate::providers::{ - Provider, build_proxy_url, report_provider_error, report_provider_error_background, - requester_or_default, strip_url_scheme, -}; -use crate::status::*; -use crate::util::cache::VideoCache; -use crate::util::parse_abbreviated_number; -use crate::util::time::parse_time_to_seconds; -use crate::videos::{ServerOptions, VideoFormat, VideoItem}; -use async_trait::async_trait; -use chrono::{Duration as ChronoDuration, Utc}; -use error_chain::error_chain; -use futures::stream::{self, StreamExt}; -use htmlentity::entity::{ICodedDataTrait, decode}; -use percent_encoding::{NON_ALPHANUMERIC, percent_decode_str, utf8_percent_encode}; -use regex::Regex; -use scraper::{Html, Selector}; -use serde::Deserialize; -use serde_json::Value; -use std::collections::{HashMap, HashSet}; -use std::sync::{Arc, RwLock}; -use std::thread; -use std::time::Duration as StdDuration; -use tokio::time::timeout; - -pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata = - crate::providers::ProviderChannelMetadata { - group_id: "live-cams", - tags: &["archive", "cams", "recordings"], - }; - -error_chain! { - foreign_links { - Io(std::io::Error); - Json(serde_json::Error); - } - errors { - Parse(msg: String) { - description("parse error") - display("parse error: {}", msg) - } - } -} - -const BASE_URL: &str = "https://archivebate1.com"; -const CHANNEL_ID: &str = "archivebate1"; -const FIREFOX_UA: &str = - "Mozilla/5.0 (X11; Linux x86_64; rv:146.0) Gecko/20100101 Firefox/146.0"; -const HTML_ACCEPT: &str = - "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8"; - -const PLATFORM_SHORTCUTS: &[(&str, &str)] = &[ - ("chaturbate", "Y2hhdHVyYmF0ZQ=="), - ("stripchat", "c3RyaXBjaGF0"), - ("camsoda", "Y2Ftc29kYQ=="), - ("cam4", "Y2FtNA=="), - ("bongacams", "Ym9uZ2FjYW1z"), - ("onlyfans", "b25seWZhbnM="), - ("twitch", "dHdpdGNo"), - ("youtube", "eW91dHViZQ=="), - ("instagram", "aW5zdGFncmFt"), - ("tiktok", "dGlrdG9r"), -]; - -const GENDER_SHORTCUTS: &[(&str, &str)] = &[ - ("female", "ZmVtYWxl"), - ("couple", "Y291cGxl"), - ("male", "bWFsZQ=="), - ("trans", "dHJhbnM="), -]; - -#[derive(Debug, Clone)] -pub struct ArchivebateProvider { - url: String, - filters: Arc>>, - uploaders: Arc>>, -} - -#[derive(Debug, Clone)] -enum Target { - Home, - Platform { - id: String, - title: String, - }, - Gender { - id: String, - title: String, - }, - Profile { - username: String, - platform: Option, - gender: Option, - }, -} - -#[derive(Debug, Deserialize, Clone)] -struct SearchResponse { - #[serde(default)] - data: Vec, -} - -#[derive(Debug, Deserialize, Clone)] -struct SearchProfile { - #[serde(default)] - username: String, - #[serde(default)] - platform: Option, - #[serde(default)] - gender: Option, -} - -#[derive(Debug, Clone)] -struct LivewireInitialData { - component: String, - method: String, - fingerprint_json: String, - server_memo_json: String, -} - -#[derive(Debug, Clone)] -struct ResolvedMixdropMedia { - media_url: String, - embed_url: String, -} - -impl ArchivebateProvider { - pub fn new() -> Self { - let provider = Self { - url: BASE_URL.to_string(), - filters: Arc::new(RwLock::new(Self::build_default_filters())), - uploaders: Arc::new(RwLock::new(vec![FilterOption { - id: "all".to_string(), - title: "All".to_string(), - }])), - }; - provider.spawn_initial_load(); - provider - } - - fn spawn_initial_load(&self) { - let provider = self.clone(); - thread::spawn(move || { - let runtime = match tokio::runtime::Builder::new_current_thread() - .enable_all() - .build() - { - Ok(runtime) => runtime, - Err(error) => { - report_provider_error_background( - CHANNEL_ID, - "spawn_initial_load.runtime_build", - &error.to_string(), - ); - return; - } - }; - - runtime.block_on(async move { - let options = ServerOptions { - featured: None, - category: None, - sites: None, - filter: None, - language: None, - public_url_base: None, - requester: None, - network: None, - stars: None, - categories: None, - duration: None, - sort: Some("new".to_string()), - sexuality: None, - }; - if let Err(error) = provider.refresh_uploaders_from_home(&options).await { - report_provider_error_background( - CHANNEL_ID, - "spawn_initial_load.refresh_uploaders", - &error.to_string(), - ); - } - }); - }); - } - - fn build_default_filters() -> Vec { - let mut filters = vec![FilterOption { - id: "all".to_string(), - title: "All".to_string(), - }]; - - for (title, id) in GENDER_SHORTCUTS { - filters.push(FilterOption { - id: format!("gender:{id}"), - title: format!("Gender: {}", Self::title_case(title)), - }); - } - - for (title, id) in PLATFORM_SHORTCUTS { - filters.push(FilterOption { - id: format!("platform:{id}"), - title: format!("Platform: {}", Self::title_case(title)), - }); - } - - filters - } - - fn build_channel(&self, _clientversion: ClientVersion) -> Channel { - let filters = self - .filters - .read() - .map(|value| value.clone()) - .unwrap_or_default(); - let uploaders = self - .uploaders - .read() - .map(|value| value.clone()) - .unwrap_or_default(); - - Channel { - id: CHANNEL_ID.to_string(), - name: "ArchiveBate 1".to_string(), - description: "ArchiveBate 1 cam recording feeds.".to_string(), - premium: false, - favicon: "https://www.google.com/s2/favicons?sz=64&domain=archivebate.com".to_string(), - status: "work in progress".to_string(), - categories: filters.iter().skip(1).map(|value| value.title.clone()).collect(), - options: vec![ - ChannelOption { - id: "sort".to_string(), - title: "Sort".to_string(), - description: "Browse ArchiveBate newest uploads.".to_string(), - systemImage: "list.number".to_string(), - colorName: "blue".to_string(), - options: vec![ - FilterOption { - id: "new".to_string(), - title: "Latest".to_string(), - }, - FilterOption { - id: "popular".to_string(), - title: "Popular".to_string(), - }, - ], - multiSelect: false, - }, - ChannelOption { - id: "filter".to_string(), - title: "Shortcuts".to_string(), - description: "Direct platform and gender archive routes.".to_string(), - systemImage: "tag.fill".to_string(), - colorName: "green".to_string(), - options: filters, - multiSelect: false, - }, - ChannelOption { - id: "sites".to_string(), - title: "Uploaders".to_string(), - description: "Profile shortcuts discovered from latest listings.".to_string(), - systemImage: "person.crop.square".to_string(), - colorName: "purple".to_string(), - options: uploaders, - multiSelect: false, - }, - ], - nsfw: true, - cacheDuration: Some(900), - } - } - - fn selector(value: &str) -> Result { - Selector::parse(value) - .map_err(|error| Error::from(format!("selector `{value}` parse failed: {error}"))) - } - - fn regex(value: &str) -> Result { - Regex::new(value).map_err(|error| Error::from(format!("regex `{value}` parse failed: {error}"))) - } - - fn decode_text(text: &str) -> String { - decode(text.as_bytes()) - .to_string() - .unwrap_or_else(|_| text.to_string()) - .replace('\u{a0}', " ") - .trim() - .to_string() - } - - fn collapse_whitespace(text: &str) -> String { - text.split_whitespace().collect::>().join(" ") - } - - fn absolute_url(&self, value: &str) -> String { - if value.starts_with("http://") || value.starts_with("https://") { - return value.to_string(); - } - if value.starts_with("//") { - return format!("https:{value}"); - } - format!( - "{}/{}", - self.url.trim_end_matches('/'), - value.trim_start_matches('/') - ) - } - - fn normalize_title(value: &str) -> String { - let mut normalized = String::new(); - let mut last_space = true; - for character in value.chars().flat_map(|character| character.to_lowercase()) { - if character.is_alphanumeric() { - normalized.push(character); - last_space = false; - } else if !last_space { - normalized.push(' '); - last_space = true; - } - } - normalized.trim().to_string() - } - - fn title_case(value: &str) -> String { - let mut chars = value.chars(); - let Some(first) = chars.next() else { - return String::new(); - }; - let mut out = first.to_uppercase().to_string(); - out.push_str(chars.as_str()); - out - } - - fn html_headers(&self, referer: &str) -> Vec<(String, String)> { - vec![ - ("Referer".to_string(), referer.to_string()), - ("User-Agent".to_string(), FIREFOX_UA.to_string()), - ("Accept".to_string(), HTML_ACCEPT.to_string()), - ("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()), - ] - } - - fn search_headers(&self, referer: &str) -> Vec<(String, String)> { - vec![ - ("Referer".to_string(), referer.to_string()), - ("User-Agent".to_string(), FIREFOX_UA.to_string()), - ( - "Accept".to_string(), - "application/json, text/javascript, */*; q=0.01".to_string(), - ), - ("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()), - ("X-Requested-With".to_string(), "XMLHttpRequest".to_string()), - ] - } - - fn build_target_url(&self, target: &Target, page: u16) -> String { - let page = page.max(1); - match target { - Target::Home => { - if page <= 1 { - format!("{}/", self.url) - } else { - format!("{}/?page={page}", self.url) - } - } - Target::Platform { id, .. } => { - if page <= 1 { - format!("{}/platform/{id}", self.url) - } else { - format!("{}/platform/{id}?page={page}", self.url) - } - } - Target::Gender { id, .. } => { - if page <= 1 { - format!("{}/gender/{id}", self.url) - } else { - format!("{}/gender/{id}?page={page}", self.url) - } - } - Target::Profile { username, .. } => { - if page <= 1 { - format!("{}/profile/{username}", self.url) - } else { - format!("{}/profile/{username}?page={page}", self.url) - } - } - } - } - - fn canonical_uploader_id(username: &str) -> String { - format!( - "{CHANNEL_ID}:{}", - utf8_percent_encode(username, NON_ALPHANUMERIC) - ) - } - - fn username_from_uploader_id(value: &str) -> Option { - let suffix = match value.split_once(':') { - Some((channel, suffix)) if channel.eq_ignore_ascii_case(CHANNEL_ID) => suffix, - _ => return None, - }; - percent_decode_str(suffix) - .decode_utf8() - .ok() - .map(|value| value.into_owned()) - .and_then(|value| (!value.trim().is_empty()).then_some(value)) - } - - fn target_from_shortcut(&self, value: &str) -> Option { - let trimmed = value.trim(); - if trimmed.is_empty() || trimmed.eq_ignore_ascii_case("all") { - return None; - } - - if let Some(username) = Self::username_from_uploader_id(trimmed) { - return Some(Target::Profile { - username, - platform: None, - gender: None, - }); - } - - if let Some(username) = trimmed.strip_prefix("profile:") { - let username = username.trim(); - if !username.is_empty() { - return Some(Target::Profile { - username: username.to_string(), - platform: None, - gender: None, - }); - } - } - - if let Some(id) = trimmed.strip_prefix("platform:") { - if let Some((name, _)) = PLATFORM_SHORTCUTS.iter().find(|(_, known_id)| *known_id == id) { - return Some(Target::Platform { - id: id.to_string(), - title: Self::title_case(name), - }); - } - } - - if let Some(id) = trimmed.strip_prefix("gender:") { - if let Some((name, _)) = GENDER_SHORTCUTS.iter().find(|(_, known_id)| *known_id == id) { - return Some(Target::Gender { - id: id.to_string(), - title: Self::title_case(name), - }); - } - } - - let normalized = Self::normalize_title(trimmed); - if let Some((name, id)) = PLATFORM_SHORTCUTS - .iter() - .find(|(name, _)| Self::normalize_title(name) == normalized) - { - return Some(Target::Platform { - id: (*id).to_string(), - title: Self::title_case(name), - }); - } - - if let Some((name, id)) = GENDER_SHORTCUTS - .iter() - .find(|(name, _)| Self::normalize_title(name) == normalized) - { - return Some(Target::Gender { - id: (*id).to_string(), - title: Self::title_case(name), - }); - } - - if let Some(option) = self.find_uploader_option(trimmed) { - if let Some(username) = option.id.strip_prefix("profile:") { - return Some(Target::Profile { - username: username.to_string(), - platform: None, - gender: None, - }); - } - } - - None - } - - fn resolve_option_target(&self, options: &ServerOptions) -> Target { - if let Some(value) = options.sites.as_deref() { - if let Some(target) = self.target_from_shortcut(value) { - return target; - } - } - if let Some(value) = options.filter.as_deref() { - if let Some(target) = self.target_from_shortcut(value) { - return target; - } - } - if let Some(value) = options.sort.as_deref() { - if value == "popular" { - return Target::Home; - } - } - Target::Home - } - - fn find_uploader_option(&self, value: &str) -> Option { - let normalized = Self::normalize_title(value); - self.uploaders - .read() - .ok()? - .iter() - .find(|option| { - option.id.eq_ignore_ascii_case(value) - || Self::normalize_title(&option.title) == normalized - }) - .cloned() - } - - fn upsert_uploader_option(&self, username: &str) { - let trimmed = username.trim(); - if trimmed.is_empty() { - return; - } - let option = FilterOption { - id: format!("profile:{trimmed}"), - title: trimmed.to_string(), - }; - let Ok(mut uploaders) = self.uploaders.write() else { - return; - }; - if uploaders - .iter() - .any(|value| value.id.eq_ignore_ascii_case(option.id.as_str())) - { - return; - } - uploaders.push(option); - } - - fn parse_watch_id(url: &str) -> Option { - let watch_regex = Regex::new(r"/watch/(?P[0-9]+)").ok()?; - if let Some(id) = watch_regex - .captures(url) - .and_then(|captures| captures.name("id").map(|value| value.as_str().to_string())) - { - return Some(id); - } - - let video_regex = Regex::new(r"/video/(?P[A-Za-z0-9_-]+)").ok()?; - if let Some(id) = video_regex - .captures(url) - .and_then(|captures| captures.name("id").map(|value| value.as_str().to_string())) - { - return Some(id); - } - - let parsed = url::Url::parse(url).ok()?; - parsed - .path_segments() - .and_then(|segments| segments.rev().find(|segment| !segment.trim().is_empty())) - .map(|segment| segment.to_string()) - } - - fn is_allowed_detail_watch_url(url: &str) -> bool { - let Some(parsed) = url::Url::parse(url).ok() else { - return false; - }; - if parsed.scheme() != "https" { - return false; - } - let Some(host) = parsed.host_str() else { - return false; - }; - (host == "archivebate.com" || host == "www.archivebate.com") - && parsed.path().starts_with("/watch/") - } - - fn proxied_video(options: &ServerOptions, detail_url: &str) -> String { - if detail_url.is_empty() || !Self::is_allowed_detail_watch_url(detail_url) { - return String::new(); - } - - build_proxy_url(options, CHANNEL_ID, &strip_url_scheme(detail_url)) - } - - fn parse_duration(text: &str) -> u32 { - let Ok(regex) = Self::regex(r"([0-9]{1,2}:[0-9]{2}(?::[0-9]{2})?)") else { - return 0; - }; - let Some(captures) = regex.captures(text) else { - return 0; - }; - let Some(value) = captures.get(1) else { - return 0; - }; - parse_time_to_seconds(value.as_str()).unwrap_or(0) as u32 - } - - fn parse_views(value: &str) -> Option { - let lowered = value.to_lowercase(); - let cleaned = lowered.replace("views", "").trim().to_string(); - parse_abbreviated_number(&cleaned) - } - - fn parse_uploaded_at(value: &str) -> Option { - let text = value.trim().to_lowercase(); - if text.is_empty() { - return None; - } - if text == "just now" { - return Some(Utc::now().timestamp() as u64); - } - - let regex = - Regex::new(r"^([0-9]+)\s+(second|minute|hour|day|week|month|year)s?\s+ago$").ok()?; - let captures = regex.captures(&text)?; - let amount = captures.get(1)?.as_str().parse::().ok()?; - let unit = captures.get(2)?.as_str(); - let now = Utc::now(); - let parsed = match unit { - "second" => now - ChronoDuration::seconds(amount), - "minute" => now - ChronoDuration::minutes(amount), - "hour" => now - ChronoDuration::hours(amount), - "day" => now - ChronoDuration::days(amount), - "week" => now - ChronoDuration::weeks(amount), - "month" => now - ChronoDuration::days(amount * 30), - "year" => now - ChronoDuration::days(amount * 365), - _ => return None, - }; - Some(parsed.timestamp() as u64) - } - - fn extra_tags_for_target(target: &Target) -> Vec { - match target { - Target::Platform { title, .. } => vec![title.clone()], - Target::Gender { title, .. } => vec![title.clone()], - Target::Profile { platform, gender, .. } => { - let mut tags = Vec::new(); - if let Some(platform) = platform.as_ref().filter(|value| !value.trim().is_empty()) { - tags.push(platform.trim().to_string()); - } - if let Some(gender) = gender.as_ref().filter(|value| !value.trim().is_empty()) { - tags.push(gender.trim().to_string()); - } - tags - } - Target::Home => Vec::new(), - } - } - - fn parse_video_items(&self, html: &str, target: &Target, limit: usize) -> Result> { - let document = Html::parse_fragment(html); - let item_selector = Self::selector("section.video_item, div.n-blog-post-card")?; - let watch_selector = Self::selector("a[href*='/watch/'], a[href*='/video/']")?; - let uploader_selector = Self::selector("div.info a[href*='/profile/'], .n-bpc-details a[href*='search=']")?; - let info_selector = Self::selector("div.info p, .n-bpc-details")?; - let duration_selector = Self::selector("div.duration span, .n-bpc-details")?; - let video_selector = Self::selector("video[poster], img[src]")?; - let source_selector = Self::selector("video source[src], a.room_thumb[data-preview]")?; - - let base_tags = Self::extra_tags_for_target(target); - let mut items = Vec::new(); - - for element in document.select(&item_selector) { - let Some(watch_link) = element.select(&watch_selector).next() else { - continue; - }; - let Some(watch_href) = watch_link.value().attr("href") else { - continue; - }; - let watch_url = self.absolute_url(watch_href); - let Some(video_id) = Self::parse_watch_id(&watch_url) else { - continue; - }; - - let duration_text = element - .select(&duration_selector) - .next() - .map(|value| Self::decode_text(&Self::collapse_whitespace(&value.text().collect::()))) - .unwrap_or_default(); - let duration = Self::parse_duration(&duration_text); - - let thumb = element - .select(&video_selector) - .next() - .and_then(|value| { - value - .value() - .attr("poster") - .or_else(|| value.value().attr("src")) - .or_else(|| value.value().attr("data-src")) - }) - .map(|value| self.absolute_url(value)) - .unwrap_or_default(); - if thumb.is_empty() { - continue; - } - - let preview = element - .select(&source_selector) - .next() - .and_then(|value| { - value - .value() - .attr("src") - .or_else(|| value.value().attr("data-preview")) - }) - .map(|value| self.absolute_url(value)); - - let uploader_element = element.select(&uploader_selector).next(); - let uploader_name = uploader_element - .as_ref() - .map(|value| Self::decode_text(&value.text().collect::())) - .filter(|value| !value.is_empty()); - let uploader_url = uploader_element.as_ref().and_then(|value| { - value.value().attr("href").map(|value| { - if value.starts_with("http://") || value.starts_with("https://") { - value.to_string() - } else if let Some(search) = value.strip_prefix("/showall/?search=") { - format!("{}/profile/{search}", self.url) - } else { - self.absolute_url(value) - } - }) - }); - let uploader_id = uploader_name - .as_ref() - .map(|value| Self::canonical_uploader_id(value)); - - let info_text = element - .select(&info_selector) - .next() - .map(|value| Self::decode_text(&Self::collapse_whitespace(&value.text().collect::()))) - .unwrap_or_default(); - let info_parts = info_text - .split('|') - .map(|value| value.trim()) - .filter(|value| !value.is_empty()) - .collect::>(); - - let uploaded_at = info_parts.first().and_then(|value| { - if let Ok(parsed) = chrono::NaiveDateTime::parse_from_str(value, "%Y-%m-%d %H:%M:%S") { - Some(parsed.and_utc().timestamp() as u64) - } else { - Self::parse_uploaded_at(value) - } - }); - let platform = info_parts - .iter() - .find_map(|value| value.strip_prefix("Platform:").map(|v| v.trim().to_string())) - .or_else(|| { - watch_url - .split('/') - .find(|segment| PLATFORM_SHORTCUTS.iter().any(|(name, _)| name == segment)) - .map(|value| value.to_string()) - }); - let views = info_parts.iter().find_map(|value| Self::parse_views(value)); - - let title = watch_link - .value() - .attr("title") - .map(Self::decode_text) - .filter(|value| !value.trim().is_empty()) - .or_else(|| { - element - .select(&Self::selector(".my-3 div, .my-3") .ok()?) - .next() - .map(|value| Self::decode_text(&Self::collapse_whitespace(&value.text().collect::()))) - .filter(|value| !value.trim().is_empty()) - }) - .unwrap_or_else(|| match (uploader_name.as_deref(), platform.as_deref()) { - (Some(uploader), Some(platform)) => format!("{uploader} on {platform}"), - (Some(uploader), None) => format!("{uploader} on ArchiveBate"), - _ => format!("ArchiveBate video {video_id}"), - }); - - let mut tags = base_tags.clone(); - if let Some(platform) = platform.as_ref().filter(|value| !value.trim().is_empty()) { - tags.push(platform.clone()); - } - let mut unique = HashSet::new(); - let mut deduped = Vec::new(); - for tag in tags { - let normalized = Self::normalize_title(&tag); - if normalized.is_empty() || unique.contains(&normalized) { - continue; - } - unique.insert(normalized); - deduped.push(tag); - } - - let mut item = VideoItem::new( - video_id, - title, - watch_url, - CHANNEL_ID.to_string(), - thumb, - duration, - ); - item.uploader = uploader_name.clone(); - item.uploaderUrl = uploader_url; - item.uploaderId = uploader_id; - item.uploadedAt = uploaded_at; - item.views = views; - item.tags = (!deduped.is_empty()).then_some(deduped); - item.preview = preview; - - if let Some(uploader) = uploader_name { - self.upsert_uploader_option(&uploader); - } - - items.push(item); - if items.len() >= limit { - break; - } - } - - Ok(items) - } - - fn parse_mixin_packed_eval(html: &str) -> Option { - let eval_regex = Regex::new( - r#"(?s)eval\(function\(p,a,c,k,e,d\)\{.*?\}\('(?P.*?)',\s*(?P[0-9]+),\s*(?P[0-9]+),\s*'(?P.*?)'\.split\('\|'\)"#, - ) - .ok()?; - let captures = eval_regex.captures(html)?; - let payload_raw = captures.name("payload")?.as_str(); - let radix = captures.name("radix")?.as_str().parse::().ok()?; - let count = captures.name("count")?.as_str().parse::().ok()?; - if !(2..=36).contains(&radix) { - return None; - } - - let payload = Self::unescape_js_single_quoted(payload_raw); - let tokens_raw = captures.name("tokens")?.as_str(); - let tokens = tokens_raw.split('|').collect::>(); - let mut unpacked = payload; - - for index in (0..count).rev() { - let Some(token) = tokens.get(index) else { - continue; - }; - if token.is_empty() { - continue; - } - let key = Self::to_radix(index, radix); - let pattern = format!(r"\b{}\b", regex::escape(&key)); - let re = Regex::new(&pattern).ok()?; - unpacked = re.replace_all(&unpacked, *token).into_owned(); - } - - Some(unpacked) - } - - fn unescape_js_single_quoted(value: &str) -> String { - let mut output = String::with_capacity(value.len()); - let mut chars = value.chars(); - while let Some(character) = chars.next() { - if character != '\\' { - output.push(character); - continue; - } - let Some(next) = chars.next() else { - break; - }; - match next { - '\\' => output.push('\\'), - '\'' => output.push('\''), - '"' => output.push('"'), - 'n' => output.push('\n'), - 'r' => output.push('\r'), - 't' => output.push('\t'), - _ => output.push(next), - } - } - output - } - - fn to_radix(mut value: usize, radix: u32) -> String { - if value == 0 { - return "0".to_string(); - } - let alphabet = b"0123456789abcdefghijklmnopqrstuvwxyz"; - let mut out = Vec::new(); - while value > 0 { - let digit = value % radix as usize; - out.push(alphabet[digit] as char); - value /= radix as usize; - } - out.iter().rev().collect() - } - - fn extract_mixdrop_media_url(html: &str) -> Option { - let direct_regex = Regex::new(r#"MDCore\.wurl\s*=\s*"([^"]+)""#).ok()?; - if let Some(url) = direct_regex - .captures(html) - .and_then(|captures| captures.get(1).map(|value| value.as_str().to_string())) - { - return Some(Self::normalize_possible_protocol_relative(&url)); - } - - let unpacked = Self::parse_mixin_packed_eval(html)?; - let unpacked_regex = Regex::new(r#"MDCore\.wurl\s*=\s*"([^"]+)""#).ok()?; - unpacked_regex - .captures(&unpacked) - .and_then(|captures| captures.get(1).map(|value| value.as_str().to_string())) - .map(|value| Self::normalize_possible_protocol_relative(&value)) - } - - fn normalize_possible_protocol_relative(value: &str) -> String { - let trimmed = value.trim(); - if trimmed.starts_with("//") { - format!("https:{trimmed}") - } else { - trimmed.to_string() - } - } - - fn host_from_url(url: &str) -> Option { - let parsed = url::Url::parse(url).ok()?; - parsed.host_str().map(|value| value.to_ascii_lowercase()) - } - - fn is_mixdrop_host(url: &str) -> bool { - let Some(host) = Self::host_from_url(url) else { - return false; - }; - host.contains("mixdrop") || host.contains("m1xdrop") - } - - fn download_fid_from_detail_html(html: &str) -> Option { - let document = Html::parse_document(html); - let selector = Selector::parse("input[name='fid'][value]").ok()?; - document - .select(&selector) - .next() - .and_then(|node| node.value().attr("value")) - .map(str::trim) - .filter(|value| !value.is_empty()) - .map(ToOwned::to_owned) - } - - fn mixdrop_embed_url_from_download_url(url: &str) -> Option { - let parsed = url::Url::parse(url).ok()?; - let host = parsed.host_str()?; - let host_lc = host.to_ascii_lowercase(); - if !host_lc.contains("mixdrop") && !host_lc.contains("m1xdrop") { - return None; - } - - let mut segments = parsed.path_segments()?.filter(|segment| !segment.is_empty()); - let kind = segments.next()?.to_ascii_lowercase(); - if kind != "e" && kind != "f" { - return None; - } - let media_id = segments.next()?.trim(); - if media_id.is_empty() { - return None; - } - - Some(format!("{}://{host}/e/{media_id}", parsed.scheme())) - } - - fn video_format_with_headers( - format: VideoFormat, - headers: Vec<(String, String)>, - ) -> VideoFormat { - if headers.is_empty() { - return format; - } - - let header_map: HashMap = headers - .into_iter() - .filter_map(|(key, value)| { - let key = key.trim().to_string(); - let value = value.trim().to_string(); - if key.is_empty() || value.is_empty() { - return None; - } - Some((key, value)) - }) - .collect(); - - if header_map.is_empty() { - return format; - } - - let mut value = match serde_json::to_value(&format) { - Ok(value) => value, - Err(_) => return format, - }; - - if let Value::Object(object) = &mut value { - let Ok(headers_value) = serde_json::to_value(header_map) else { - return format; - }; - object.insert("http_headers".to_string(), headers_value); - if let Ok(updated) = serde_json::from_value::(value) { - return updated; - } - } - - format - } - - fn first_video_source_from_html(html: &str) -> Option { - let document = Html::parse_document(html); - let source_selector = Selector::parse("video source[src]").ok()?; - let video_src_selector = Selector::parse("video[src]").ok()?; - - if let Some(value) = document - .select(&source_selector) - .next() - .and_then(|node| node.value().attr("src")) - { - return Some(value.to_string()); - } - document - .select(&video_src_selector) - .next() - .and_then(|node| node.value().attr("src")) - .map(|value| value.to_string()) - } - - fn first_iframe_source_from_html(html: &str) -> Option { - let document = Html::parse_document(html); - let iframe_selector = Selector::parse("iframe[src]").ok()?; - document - .select(&iframe_selector) - .next() - .and_then(|node| node.value().attr("src")) - .map(|value| value.to_string()) - } - - async fn resolve_mixdrop_media_from_iframe( - &self, - iframe_url: &str, - referer: &str, - options: &ServerOptions, - ) -> Option { - let mut requester = requester_or_default(options, CHANNEL_ID, "resolve_mixdrop_media"); - let response = requester - .get_raw_with_headers_timeout( - iframe_url, - self.html_headers(referer), - Some(StdDuration::from_secs(6)), - ) - .await - .ok()?; - if !response.status().is_success() { - return None; - } - let iframe_html = response.text().await.ok()?; - let media_url = Self::extract_mixdrop_media_url(&iframe_html)?; - Some(ResolvedMixdropMedia { - media_url, - embed_url: iframe_url.to_string(), - }) - } - - async fn enrich_video(&self, item: VideoItem, options: &ServerOptions) -> VideoItem { - let page_url = item.url.clone(); - let format_url = Self::proxied_video(options, &page_url); - if format_url.is_empty() { - return item; - } - - let mut format = VideoFormat::new(format_url, "source".to_string(), "mp4".to_string()); - let mut requester = requester_or_default(options, CHANNEL_ID, "archivebate.enrich_video"); - if let Ok(detail_html) = requester - .get_with_headers( - &page_url, - self.html_headers(&format!("{}/", self.url)), - Some(wreq::Version::HTTP_11), - ) - .await - { - let mut mixdrop_embed_url = Self::first_iframe_source_from_html(&detail_html) - .map(|value| self.absolute_url(&value)) - .filter(|value| Self::is_mixdrop_host(value)); - - if mixdrop_embed_url.is_none() { - mixdrop_embed_url = Self::download_fid_from_detail_html(&detail_html) - .map(|value| self.absolute_url(&value)) - .and_then(|value| Self::mixdrop_embed_url_from_download_url(&value)); - } - - if let Some(embed_url) = mixdrop_embed_url { - format = Self::video_format_with_headers( - format, - vec![ - ("Referer".to_string(), embed_url), - ("User-Agent".to_string(), FIREFOX_UA.to_string()), - ], - ); - } - } - - let mut enriched = item; - enriched.formats = Some(vec![format]); - enriched - } - - fn extract_csrf_token(html: &str) -> Option { - let regex = Regex::new(r#" Option { - let decoded = Self::decode_text(raw); - (!decoded.trim().is_empty()).then_some(decoded) - } - - fn livewire_method(component: &str) -> Option<&'static str> { - match component { - "home-videos" => Some("loadVideos"), - "filter.platform" => Some("load_platform_videos"), - "profile.model-videos" => Some("load_profile_videos"), - _ => None, - } - } - - fn extract_json_object(source: &str, key: &str) -> Option { - let needle = format!("\"{key}\":"); - let start = source.find(&needle)? + needle.len(); - let bytes = source.as_bytes(); - let mut index = start; - while index < bytes.len() && bytes[index].is_ascii_whitespace() { - index += 1; - } - if index >= bytes.len() || bytes[index] != b'{' { - return None; - } - - let mut depth = 0usize; - let mut in_string = false; - let mut escaped = false; - for end in index..bytes.len() { - let byte = bytes[end]; - if in_string { - if escaped { - escaped = false; - } else if byte == b'\\' { - escaped = true; - } else if byte == b'"' { - in_string = false; - } - continue; - } - - match byte { - b'"' => in_string = true, - b'{' => depth += 1, - b'}' => { - depth = depth.saturating_sub(1); - if depth == 0 { - return Some(source[index..=end].to_string()); - } - } - _ => {} - } - } - - None - } - - fn extract_livewire_initial(html: &str) -> Result> { - let regex = Self::regex(r#"wire:initial-data="([^"]+)""#)?; - for captures in regex.captures_iter(html) { - let Some(raw) = captures.get(1).map(|value| value.as_str()) else { - continue; - }; - let Some(decoded) = Self::decode_livewire_json_attr(raw) else { - continue; - }; - let Some(fingerprint_json) = Self::extract_json_object(&decoded, "fingerprint") else { - continue; - }; - let Some(server_memo_json) = Self::extract_json_object(&decoded, "serverMemo") else { - continue; - }; - let component_regex = Self::regex(r#""name":"([^"]+)""#)?; - let Some(component) = component_regex - .captures(&fingerprint_json) - .and_then(|captures| captures.get(1).map(|value| value.as_str().to_string())) - else { - continue; - }; - let Some(method) = Self::livewire_method(&component) else { - continue; - }; - return Ok(Some(LivewireInitialData { - component, - method: method.to_string(), - fingerprint_json, - server_memo_json, - })); - } - Ok(None) - } - - async fn fetch_livewire_html( - &self, - target_url: &str, - options: &ServerOptions, - ) -> Result { - let mut requester = requester_or_default(options, CHANNEL_ID, "fetch_livewire_html.page"); - let page_html = requester - .get_with_headers(target_url, self.html_headers(&format!("{}/", self.url)), Some(wreq::Version::HTTP_11)) - .await - .map_err(|error| Error::from(format!("fetch page failed for {target_url}: {error}")))?; - - let csrf = Self::extract_csrf_token(&page_html); - let initial = Self::extract_livewire_initial(&page_html)?; - let csrf_present = csrf.is_some(); - let initial_present = initial.is_some(); - let (Some(csrf), Some(initial)) = (csrf, initial) else { - crate::flow_debug!( - "archivebate1 livewire fallback target={} csrf_present={} initial_present={}", - crate::util::flow_debug::preview(target_url, 120), - csrf_present, - initial_present - ); - return Ok(page_html); - }; - - let update_id = format!("ab{}", Utc::now().timestamp_micros()); - let payload = format!( - "{{\"fingerprint\":{},\"serverMemo\":{},\"updates\":[{{\"type\":\"callMethod\",\"payload\":{{\"id\":\"{}\",\"method\":\"{}\",\"params\":[]}}}}]}}", - initial.fingerprint_json, initial.server_memo_json, update_id, initial.method - ); - let livewire_url = format!("{}/livewire/message/{}", self.url, initial.component); - let referer = target_url.to_string(); - let user_agent = FIREFOX_UA.to_string(); - let accept = "application/json, text/plain, */*".to_string(); - let response = requester - .post( - &livewire_url, - &payload, - vec![ - ("Content-Type", "application/json"), - ("X-Requested-With", "XMLHttpRequest"), - ("X-CSRF-TOKEN", csrf.as_str()), - ("Referer", referer.as_str()), - ("User-Agent", user_agent.as_str()), - ("Accept", accept.as_str()), - ], - ) - .await - .map_err(|error| { - Error::from(format!( - "livewire request failed for {target_url} component={}: {error}", - initial.component - )) - })?; - let response_text = response - .text() - .await - .map_err(|error| Error::from(format!("livewire response text failed: {error}")))?; - let parsed: Value = serde_json::from_str(&response_text)?; - let effects_html = parsed - .get("effects") - .and_then(|value| value.get("html")) - .and_then(Value::as_str) - .unwrap_or("") - .to_string(); - crate::flow_debug!( - "archivebate livewire component={} target={} effects_html_len={} body_preview={}", - initial.component, - crate::util::flow_debug::preview(target_url, 120), - effects_html.len(), - crate::util::flow_debug::preview(&response_text, 120) - ); - if !effects_html.trim().is_empty() { - return Ok(effects_html); - } - - Ok(page_html) - } - - async fn fetch_items_for_target( - &self, - cache: VideoCache, - target: &Target, - page: u16, - per_page: usize, - options: &ServerOptions, - ) -> Result> { - let target_url = self.build_target_url(target, page); - let cache_key = format!("{CHANNEL_ID}:{target_url}"); - if let Some((time, items)) = cache.get(&cache_key) { - if time.elapsed().unwrap_or_default().as_secs() < 60 * 10 { - return Ok(items.iter().take(per_page).cloned().collect()); - } - } - - let html = self.fetch_livewire_html(&target_url, options).await?; - let items = self.parse_video_items(&html, target, per_page)?; - crate::flow_debug!( - "archivebate parsed target={} page={} items={} html_preview={}", - crate::util::flow_debug::preview(&target_url, 120), - page, - items.len(), - crate::util::flow_debug::preview(&html, 120) - ); - if !items.is_empty() { - cache.insert(cache_key, items.clone()); - } - Ok(items) - } - - async fn refresh_uploaders_from_home(&self, options: &ServerOptions) -> Result<()> { - let items = self - .fetch_items_for_target( - VideoCache::new(), - &Target::Home, - 1, - 120, - options, - ) - .await?; - for item in items { - if let Some(uploader) = item.uploader { - self.upsert_uploader_option(&uploader); - } - } - Ok(()) - } - - async fn search_profiles( - &self, - query: &str, - page: u16, - options: &ServerOptions, - ) -> Result> { - let query_string = { - let mut serializer = url::form_urlencoded::Serializer::new(String::new()); - serializer.append_pair("query", query); - serializer.append_pair("page", &page.max(1).to_string()); - serializer.finish() - }; - let search_url = format!("{}/api/v1/search?{query_string}", self.url); - - let mut requester = requester_or_default(options, CHANNEL_ID, "resolve_search_target"); - let response = requester - .get_raw_with_headers(&search_url, self.search_headers(&format!("{}/", self.url))) - .await - .map_err(|error| Error::from(format!("search request failed: {error}")))?; - if !response.status().is_success() { - return Err(Error::from(format!( - "search request returned status {}", - response.status() - ))); - } - let body = response - .text() - .await - .map_err(|error| Error::from(format!("search response text failed: {error}")))?; - let parsed: SearchResponse = serde_json::from_str(&body)?; - Ok(parsed.data) - } - - async fn get_default( - &self, - cache: VideoCache, - page: u16, - per_page: usize, - options: ServerOptions, - ) -> Result> { - let target = self.resolve_option_target(&options); - self.fetch_items_for_target(cache, &target, page, per_page, &options) - .await - } - - async fn query( - &self, - cache: VideoCache, - page: u16, - per_page: usize, - query: &str, - options: ServerOptions, - ) -> Result> { - if let Some(target) = self.target_from_shortcut(query) { - return self - .fetch_items_for_target(cache, &target, page, per_page, &options) - .await; - } - - let mut candidates = self.search_profiles(query, page, &options).await?; - if candidates.is_empty() { - return Ok(vec![]); - } - - let normalized_query = Self::normalize_title(query); - candidates.sort_by_key(|entry| { - if Self::normalize_title(&entry.username) == normalized_query { - 0 - } else { - 1 - } - }); - - for candidate in candidates.into_iter().take(8) { - let username = candidate.username.trim(); - if username.is_empty() { - continue; - } - let target = Target::Profile { - username: username.to_string(), - platform: candidate.platform.clone(), - gender: candidate.gender.clone(), - }; - let items = self - .fetch_items_for_target(cache.clone(), &target, 1, per_page, &options) - .await?; - if !items.is_empty() { - return Ok(items); - } - } - - Ok(vec![]) - } -} - -#[async_trait] -impl Provider for ArchivebateProvider { - async fn get_videos( - &self, - cache: VideoCache, - pool: DbPool, - _sort: String, - query: Option, - page: String, - per_page: String, - options: ServerOptions, - ) -> Vec { - let _ = pool; - let page = page.parse::().unwrap_or(1); - let per_page = per_page.parse::().unwrap_or(36).clamp(1, 120); - - let result = match query { - Some(query) if !query.trim().is_empty() => { - self.query(cache, page, per_page, &query, options.clone()).await - } - _ => self.get_default(cache, page, per_page, options.clone()).await, - }; - - match result { - Ok(videos) => { - if videos.is_empty() { - return videos; - } - stream::iter(videos.into_iter().map(|video| { - let provider = self.clone(); - let options = options.clone(); - async move { - let timeout_result = timeout( - StdDuration::from_secs(8), - provider.enrich_video(video.clone(), &options), - ) - .await; - match timeout_result { - Ok(enriched) => enriched, - Err(_) => video, - } - } - })) - .buffer_unordered(4) - .collect::>() - .await - } - Err(error) => { - report_provider_error(CHANNEL_ID, "get_videos", &error.to_string()).await; - vec![] - } - } - } - - fn get_channel(&self, clientversion: ClientVersion) -> Option { - Some(self.build_channel(clientversion)) - } -} - -#[cfg(test)] -mod tests { - use super::ArchivebateProvider; - - #[test] - fn extracts_mixdrop_wurl_from_packed_eval() { - let html = r#" - -"#; - let actual = ArchivebateProvider::extract_mixdrop_media_url(html) - .expect("expected mixdrop media url"); - assert_eq!( - actual, - "https://o230m5y6z.mxcontent.net/v2/r6pkwozjber741.mp4?s=TvNTJe3_z_6nKveumEHk8Q&e=1776460168" - ); - } -} diff --git a/src/util/requester.rs b/src/util/requester.rs index c9fa8a1..eaf9a41 100644 --- a/src/util/requester.rs +++ b/src/util/requester.rs @@ -175,13 +175,6 @@ impl Requester { Ok(response.text().await?) } - #[cfg(any( - not(hottub_single_provider), - hottub_provider = "archivebate", - hottub_provider = "archivebate1", - hottub_provider = "hypnotube", - hottub_provider = "vjav", - ))] fn debug_cookie_preview_from_borrowed_headers( &self, url: &str, @@ -377,14 +370,6 @@ impl Requester { Ok(response) } - #[cfg(any( - not(hottub_single_provider), - hottub_provider = "archivebate", - hottub_provider = "archivebate1", - hottub_provider = "hypnotube", - hottub_provider = "freeuseporn", - hottub_provider = "vjav", - ))] pub async fn post( &mut self, url: &str,