From 6a72f84c17a2996bd5827cea9c71f89900989290 Mon Sep 17 00:00:00 2001 From: Simon Date: Wed, 22 Apr 2026 14:01:08 +0000 Subject: [PATCH] archivebate1 --- build.rs | 5 + src/providers/archivebate1.rs | 1504 +++++++++++++++++++++++++++++++++ src/providers/chaturbate.rs | 2 +- src/util/requester.rs | 2 + 4 files changed, 1512 insertions(+), 1 deletion(-) create mode 100644 src/providers/archivebate1.rs diff --git a/build.rs b/build.rs index 707fc04..63ae4a4 100644 --- a/build.rs +++ b/build.rs @@ -271,6 +271,11 @@ const PROVIDERS: &[ProviderDef] = &[ module: "archivebate", ty: "ArchivebateProvider", }, + ProviderDef { + id: "archivebate1", + module: "archivebate1", + ty: "ArchivebateProvider", + }, ]; fn main() { diff --git a/src/providers/archivebate1.rs b/src/providers/archivebate1.rs new file mode 100644 index 0000000..ed87a62 --- /dev/null +++ b/src/providers/archivebate1.rs @@ -0,0 +1,1504 @@ +use crate::DbPool; +use crate::api::ClientVersion; +use crate::providers::{ + Provider, build_proxy_url, report_provider_error, report_provider_error_background, + requester_or_default, strip_url_scheme, +}; +use crate::status::*; +use crate::util::cache::VideoCache; +use crate::util::parse_abbreviated_number; +use crate::util::time::parse_time_to_seconds; +use crate::videos::{ServerOptions, VideoFormat, VideoItem}; +use async_trait::async_trait; +use chrono::{Duration as ChronoDuration, Utc}; +use error_chain::error_chain; +use futures::stream::{self, StreamExt}; +use htmlentity::entity::{ICodedDataTrait, decode}; +use percent_encoding::{NON_ALPHANUMERIC, percent_decode_str, utf8_percent_encode}; +use regex::Regex; +use scraper::{Html, Selector}; +use serde::Deserialize; +use serde_json::Value; +use std::collections::{HashMap, HashSet}; +use std::sync::{Arc, RwLock}; +use std::thread; +use std::time::Duration as StdDuration; +use tokio::time::timeout; + +pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata = + crate::providers::ProviderChannelMetadata { + group_id: "live-cams", + tags: &["archive", "cams", "recordings"], + }; + +error_chain! { + foreign_links { + Io(std::io::Error); + Json(serde_json::Error); + } + errors { + Parse(msg: String) { + description("parse error") + display("parse error: {}", msg) + } + } +} + +const BASE_URL: &str = "https://archivebate1.com"; +const CHANNEL_ID: &str = "archivebate1"; +const FIREFOX_UA: &str = + "Mozilla/5.0 (X11; Linux x86_64; rv:146.0) Gecko/20100101 Firefox/146.0"; +const HTML_ACCEPT: &str = + "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8"; + +const PLATFORM_SHORTCUTS: &[(&str, &str)] = &[ + ("chaturbate", "Y2hhdHVyYmF0ZQ=="), + ("stripchat", "c3RyaXBjaGF0"), + ("camsoda", "Y2Ftc29kYQ=="), + ("cam4", "Y2FtNA=="), + ("bongacams", "Ym9uZ2FjYW1z"), + ("onlyfans", "b25seWZhbnM="), + ("twitch", "dHdpdGNo"), + ("youtube", "eW91dHViZQ=="), + ("instagram", "aW5zdGFncmFt"), + ("tiktok", "dGlrdG9r"), +]; + +const GENDER_SHORTCUTS: &[(&str, &str)] = &[ + ("female", "ZmVtYWxl"), + ("couple", "Y291cGxl"), + ("male", "bWFsZQ=="), + ("trans", "dHJhbnM="), +]; + +#[derive(Debug, Clone)] +pub struct ArchivebateProvider { + url: String, + filters: Arc>>, + uploaders: Arc>>, +} + +#[derive(Debug, Clone)] +enum Target { + Home, + Platform { + id: String, + title: String, + }, + Gender { + id: String, + title: String, + }, + Profile { + username: String, + platform: Option, + gender: Option, + }, +} + +#[derive(Debug, Deserialize, Clone)] +struct SearchResponse { + #[serde(default)] + data: Vec, +} + +#[derive(Debug, Deserialize, Clone)] +struct SearchProfile { + #[serde(default)] + username: String, + #[serde(default)] + platform: Option, + #[serde(default)] + gender: Option, +} + +#[derive(Debug, Clone)] +struct LivewireInitialData { + component: String, + method: String, + fingerprint_json: String, + server_memo_json: String, +} + +#[derive(Debug, Clone)] +struct ResolvedMixdropMedia { + media_url: String, + embed_url: String, +} + +impl ArchivebateProvider { + pub fn new() -> Self { + let provider = Self { + url: BASE_URL.to_string(), + filters: Arc::new(RwLock::new(Self::build_default_filters())), + uploaders: Arc::new(RwLock::new(vec![FilterOption { + id: "all".to_string(), + title: "All".to_string(), + }])), + }; + provider.spawn_initial_load(); + provider + } + + fn spawn_initial_load(&self) { + let provider = self.clone(); + thread::spawn(move || { + let runtime = match tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + { + Ok(runtime) => runtime, + Err(error) => { + report_provider_error_background( + CHANNEL_ID, + "spawn_initial_load.runtime_build", + &error.to_string(), + ); + return; + } + }; + + runtime.block_on(async move { + let options = ServerOptions { + featured: None, + category: None, + sites: None, + filter: None, + language: None, + public_url_base: None, + requester: None, + network: None, + stars: None, + categories: None, + duration: None, + sort: Some("new".to_string()), + sexuality: None, + }; + if let Err(error) = provider.refresh_uploaders_from_home(&options).await { + report_provider_error_background( + CHANNEL_ID, + "spawn_initial_load.refresh_uploaders", + &error.to_string(), + ); + } + }); + }); + } + + fn build_default_filters() -> Vec { + let mut filters = vec![FilterOption { + id: "all".to_string(), + title: "All".to_string(), + }]; + + for (title, id) in GENDER_SHORTCUTS { + filters.push(FilterOption { + id: format!("gender:{id}"), + title: format!("Gender: {}", Self::title_case(title)), + }); + } + + for (title, id) in PLATFORM_SHORTCUTS { + filters.push(FilterOption { + id: format!("platform:{id}"), + title: format!("Platform: {}", Self::title_case(title)), + }); + } + + filters + } + + fn build_channel(&self, _clientversion: ClientVersion) -> Channel { + let filters = self + .filters + .read() + .map(|value| value.clone()) + .unwrap_or_default(); + let uploaders = self + .uploaders + .read() + .map(|value| value.clone()) + .unwrap_or_default(); + + Channel { + id: CHANNEL_ID.to_string(), + name: "ArchiveBate 1".to_string(), + description: "ArchiveBate 1 cam recording feeds.".to_string(), + premium: false, + favicon: "https://www.google.com/s2/favicons?sz=64&domain=archivebate.com".to_string(), + status: "work in progress".to_string(), + categories: filters.iter().skip(1).map(|value| value.title.clone()).collect(), + options: vec![ + ChannelOption { + id: "sort".to_string(), + title: "Sort".to_string(), + description: "Browse ArchiveBate newest uploads.".to_string(), + systemImage: "list.number".to_string(), + colorName: "blue".to_string(), + options: vec![ + FilterOption { + id: "new".to_string(), + title: "Latest".to_string(), + }, + FilterOption { + id: "popular".to_string(), + title: "Popular".to_string(), + }, + ], + multiSelect: false, + }, + ChannelOption { + id: "filter".to_string(), + title: "Shortcuts".to_string(), + description: "Direct platform and gender archive routes.".to_string(), + systemImage: "tag.fill".to_string(), + colorName: "green".to_string(), + options: filters, + multiSelect: false, + }, + ChannelOption { + id: "sites".to_string(), + title: "Uploaders".to_string(), + description: "Profile shortcuts discovered from latest listings.".to_string(), + systemImage: "person.crop.square".to_string(), + colorName: "purple".to_string(), + options: uploaders, + multiSelect: false, + }, + ], + nsfw: true, + cacheDuration: Some(900), + } + } + + fn selector(value: &str) -> Result { + Selector::parse(value) + .map_err(|error| Error::from(format!("selector `{value}` parse failed: {error}"))) + } + + fn regex(value: &str) -> Result { + Regex::new(value).map_err(|error| Error::from(format!("regex `{value}` parse failed: {error}"))) + } + + fn decode_text(text: &str) -> String { + decode(text.as_bytes()) + .to_string() + .unwrap_or_else(|_| text.to_string()) + .replace('\u{a0}', " ") + .trim() + .to_string() + } + + fn collapse_whitespace(text: &str) -> String { + text.split_whitespace().collect::>().join(" ") + } + + fn absolute_url(&self, value: &str) -> String { + if value.starts_with("http://") || value.starts_with("https://") { + return value.to_string(); + } + if value.starts_with("//") { + return format!("https:{value}"); + } + format!( + "{}/{}", + self.url.trim_end_matches('/'), + value.trim_start_matches('/') + ) + } + + fn normalize_title(value: &str) -> String { + let mut normalized = String::new(); + let mut last_space = true; + for character in value.chars().flat_map(|character| character.to_lowercase()) { + if character.is_alphanumeric() { + normalized.push(character); + last_space = false; + } else if !last_space { + normalized.push(' '); + last_space = true; + } + } + normalized.trim().to_string() + } + + fn title_case(value: &str) -> String { + let mut chars = value.chars(); + let Some(first) = chars.next() else { + return String::new(); + }; + let mut out = first.to_uppercase().to_string(); + out.push_str(chars.as_str()); + out + } + + fn html_headers(&self, referer: &str) -> Vec<(String, String)> { + vec![ + ("Referer".to_string(), referer.to_string()), + ("User-Agent".to_string(), FIREFOX_UA.to_string()), + ("Accept".to_string(), HTML_ACCEPT.to_string()), + ("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()), + ] + } + + fn search_headers(&self, referer: &str) -> Vec<(String, String)> { + vec![ + ("Referer".to_string(), referer.to_string()), + ("User-Agent".to_string(), FIREFOX_UA.to_string()), + ( + "Accept".to_string(), + "application/json, text/javascript, */*; q=0.01".to_string(), + ), + ("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()), + ("X-Requested-With".to_string(), "XMLHttpRequest".to_string()), + ] + } + + fn build_target_url(&self, target: &Target, page: u16) -> String { + let page = page.max(1); + match target { + Target::Home => { + if page <= 1 { + format!("{}/", self.url) + } else { + format!("{}/?page={page}", self.url) + } + } + Target::Platform { id, .. } => { + if page <= 1 { + format!("{}/platform/{id}", self.url) + } else { + format!("{}/platform/{id}?page={page}", self.url) + } + } + Target::Gender { id, .. } => { + if page <= 1 { + format!("{}/gender/{id}", self.url) + } else { + format!("{}/gender/{id}?page={page}", self.url) + } + } + Target::Profile { username, .. } => { + if page <= 1 { + format!("{}/profile/{username}", self.url) + } else { + format!("{}/profile/{username}?page={page}", self.url) + } + } + } + } + + fn canonical_uploader_id(username: &str) -> String { + format!( + "{CHANNEL_ID}:{}", + utf8_percent_encode(username, NON_ALPHANUMERIC) + ) + } + + fn username_from_uploader_id(value: &str) -> Option { + let suffix = match value.split_once(':') { + Some((channel, suffix)) if channel.eq_ignore_ascii_case(CHANNEL_ID) => suffix, + _ => return None, + }; + percent_decode_str(suffix) + .decode_utf8() + .ok() + .map(|value| value.into_owned()) + .and_then(|value| (!value.trim().is_empty()).then_some(value)) + } + + fn target_from_shortcut(&self, value: &str) -> Option { + let trimmed = value.trim(); + if trimmed.is_empty() || trimmed.eq_ignore_ascii_case("all") { + return None; + } + + if let Some(username) = Self::username_from_uploader_id(trimmed) { + return Some(Target::Profile { + username, + platform: None, + gender: None, + }); + } + + if let Some(username) = trimmed.strip_prefix("profile:") { + let username = username.trim(); + if !username.is_empty() { + return Some(Target::Profile { + username: username.to_string(), + platform: None, + gender: None, + }); + } + } + + if let Some(id) = trimmed.strip_prefix("platform:") { + if let Some((name, _)) = PLATFORM_SHORTCUTS.iter().find(|(_, known_id)| *known_id == id) { + return Some(Target::Platform { + id: id.to_string(), + title: Self::title_case(name), + }); + } + } + + if let Some(id) = trimmed.strip_prefix("gender:") { + if let Some((name, _)) = GENDER_SHORTCUTS.iter().find(|(_, known_id)| *known_id == id) { + return Some(Target::Gender { + id: id.to_string(), + title: Self::title_case(name), + }); + } + } + + let normalized = Self::normalize_title(trimmed); + if let Some((name, id)) = PLATFORM_SHORTCUTS + .iter() + .find(|(name, _)| Self::normalize_title(name) == normalized) + { + return Some(Target::Platform { + id: (*id).to_string(), + title: Self::title_case(name), + }); + } + + if let Some((name, id)) = GENDER_SHORTCUTS + .iter() + .find(|(name, _)| Self::normalize_title(name) == normalized) + { + return Some(Target::Gender { + id: (*id).to_string(), + title: Self::title_case(name), + }); + } + + if let Some(option) = self.find_uploader_option(trimmed) { + if let Some(username) = option.id.strip_prefix("profile:") { + return Some(Target::Profile { + username: username.to_string(), + platform: None, + gender: None, + }); + } + } + + None + } + + fn resolve_option_target(&self, options: &ServerOptions) -> Target { + if let Some(value) = options.sites.as_deref() { + if let Some(target) = self.target_from_shortcut(value) { + return target; + } + } + if let Some(value) = options.filter.as_deref() { + if let Some(target) = self.target_from_shortcut(value) { + return target; + } + } + if let Some(value) = options.sort.as_deref() { + if value == "popular" { + return Target::Home; + } + } + Target::Home + } + + fn find_uploader_option(&self, value: &str) -> Option { + let normalized = Self::normalize_title(value); + self.uploaders + .read() + .ok()? + .iter() + .find(|option| { + option.id.eq_ignore_ascii_case(value) + || Self::normalize_title(&option.title) == normalized + }) + .cloned() + } + + fn upsert_uploader_option(&self, username: &str) { + let trimmed = username.trim(); + if trimmed.is_empty() { + return; + } + let option = FilterOption { + id: format!("profile:{trimmed}"), + title: trimmed.to_string(), + }; + let Ok(mut uploaders) = self.uploaders.write() else { + return; + }; + if uploaders + .iter() + .any(|value| value.id.eq_ignore_ascii_case(option.id.as_str())) + { + return; + } + uploaders.push(option); + } + + fn parse_watch_id(url: &str) -> Option { + let watch_regex = Regex::new(r"/watch/(?P[0-9]+)").ok()?; + if let Some(id) = watch_regex + .captures(url) + .and_then(|captures| captures.name("id").map(|value| value.as_str().to_string())) + { + return Some(id); + } + + let video_regex = Regex::new(r"/video/(?P[A-Za-z0-9_-]+)").ok()?; + if let Some(id) = video_regex + .captures(url) + .and_then(|captures| captures.name("id").map(|value| value.as_str().to_string())) + { + return Some(id); + } + + let parsed = url::Url::parse(url).ok()?; + parsed + .path_segments() + .and_then(|segments| segments.rev().find(|segment| !segment.trim().is_empty())) + .map(|segment| segment.to_string()) + } + + fn is_allowed_detail_watch_url(url: &str) -> bool { + let Some(parsed) = url::Url::parse(url).ok() else { + return false; + }; + if parsed.scheme() != "https" { + return false; + } + let Some(host) = parsed.host_str() else { + return false; + }; + (host == "archivebate.com" || host == "www.archivebate.com") + && parsed.path().starts_with("/watch/") + } + + fn proxied_video(options: &ServerOptions, detail_url: &str) -> String { + if detail_url.is_empty() || !Self::is_allowed_detail_watch_url(detail_url) { + return String::new(); + } + + build_proxy_url(options, CHANNEL_ID, &strip_url_scheme(detail_url)) + } + + fn parse_duration(text: &str) -> u32 { + let Ok(regex) = Self::regex(r"([0-9]{1,2}:[0-9]{2}(?::[0-9]{2})?)") else { + return 0; + }; + let Some(captures) = regex.captures(text) else { + return 0; + }; + let Some(value) = captures.get(1) else { + return 0; + }; + parse_time_to_seconds(value.as_str()).unwrap_or(0) as u32 + } + + fn parse_views(value: &str) -> Option { + let lowered = value.to_lowercase(); + let cleaned = lowered.replace("views", "").trim().to_string(); + parse_abbreviated_number(&cleaned) + } + + fn parse_uploaded_at(value: &str) -> Option { + let text = value.trim().to_lowercase(); + if text.is_empty() { + return None; + } + if text == "just now" { + return Some(Utc::now().timestamp() as u64); + } + + let regex = + Regex::new(r"^([0-9]+)\s+(second|minute|hour|day|week|month|year)s?\s+ago$").ok()?; + let captures = regex.captures(&text)?; + let amount = captures.get(1)?.as_str().parse::().ok()?; + let unit = captures.get(2)?.as_str(); + let now = Utc::now(); + let parsed = match unit { + "second" => now - ChronoDuration::seconds(amount), + "minute" => now - ChronoDuration::minutes(amount), + "hour" => now - ChronoDuration::hours(amount), + "day" => now - ChronoDuration::days(amount), + "week" => now - ChronoDuration::weeks(amount), + "month" => now - ChronoDuration::days(amount * 30), + "year" => now - ChronoDuration::days(amount * 365), + _ => return None, + }; + Some(parsed.timestamp() as u64) + } + + fn extra_tags_for_target(target: &Target) -> Vec { + match target { + Target::Platform { title, .. } => vec![title.clone()], + Target::Gender { title, .. } => vec![title.clone()], + Target::Profile { platform, gender, .. } => { + let mut tags = Vec::new(); + if let Some(platform) = platform.as_ref().filter(|value| !value.trim().is_empty()) { + tags.push(platform.trim().to_string()); + } + if let Some(gender) = gender.as_ref().filter(|value| !value.trim().is_empty()) { + tags.push(gender.trim().to_string()); + } + tags + } + Target::Home => Vec::new(), + } + } + + fn parse_video_items(&self, html: &str, target: &Target, limit: usize) -> Result> { + let document = Html::parse_fragment(html); + let item_selector = Self::selector("section.video_item, div.n-blog-post-card")?; + let watch_selector = Self::selector("a[href*='/watch/'], a[href*='/video/']")?; + let uploader_selector = Self::selector("div.info a[href*='/profile/'], .n-bpc-details a[href*='search=']")?; + let info_selector = Self::selector("div.info p, .n-bpc-details")?; + let duration_selector = Self::selector("div.duration span, .n-bpc-details")?; + let video_selector = Self::selector("video[poster], img[src]")?; + let source_selector = Self::selector("video source[src], a.room_thumb[data-preview]")?; + + let base_tags = Self::extra_tags_for_target(target); + let mut items = Vec::new(); + + for element in document.select(&item_selector) { + let Some(watch_link) = element.select(&watch_selector).next() else { + continue; + }; + let Some(watch_href) = watch_link.value().attr("href") else { + continue; + }; + let watch_url = self.absolute_url(watch_href); + let Some(video_id) = Self::parse_watch_id(&watch_url) else { + continue; + }; + + let duration_text = element + .select(&duration_selector) + .next() + .map(|value| Self::decode_text(&Self::collapse_whitespace(&value.text().collect::()))) + .unwrap_or_default(); + let duration = Self::parse_duration(&duration_text); + + let thumb = element + .select(&video_selector) + .next() + .and_then(|value| { + value + .value() + .attr("poster") + .or_else(|| value.value().attr("src")) + .or_else(|| value.value().attr("data-src")) + }) + .map(|value| self.absolute_url(value)) + .unwrap_or_default(); + if thumb.is_empty() { + continue; + } + + let preview = element + .select(&source_selector) + .next() + .and_then(|value| { + value + .value() + .attr("src") + .or_else(|| value.value().attr("data-preview")) + }) + .map(|value| self.absolute_url(value)); + + let uploader_element = element.select(&uploader_selector).next(); + let uploader_name = uploader_element + .as_ref() + .map(|value| Self::decode_text(&value.text().collect::())) + .filter(|value| !value.is_empty()); + let uploader_url = uploader_element.as_ref().and_then(|value| { + value.value().attr("href").map(|value| { + if value.starts_with("http://") || value.starts_with("https://") { + value.to_string() + } else if let Some(search) = value.strip_prefix("/showall/?search=") { + format!("{}/profile/{search}", self.url) + } else { + self.absolute_url(value) + } + }) + }); + let uploader_id = uploader_name + .as_ref() + .map(|value| Self::canonical_uploader_id(value)); + + let info_text = element + .select(&info_selector) + .next() + .map(|value| Self::decode_text(&Self::collapse_whitespace(&value.text().collect::()))) + .unwrap_or_default(); + let info_parts = info_text + .split('|') + .map(|value| value.trim()) + .filter(|value| !value.is_empty()) + .collect::>(); + + let uploaded_at = info_parts.first().and_then(|value| { + if let Ok(parsed) = chrono::NaiveDateTime::parse_from_str(value, "%Y-%m-%d %H:%M:%S") { + Some(parsed.and_utc().timestamp() as u64) + } else { + Self::parse_uploaded_at(value) + } + }); + let platform = info_parts + .iter() + .find_map(|value| value.strip_prefix("Platform:").map(|v| v.trim().to_string())) + .or_else(|| { + watch_url + .split('/') + .find(|segment| PLATFORM_SHORTCUTS.iter().any(|(name, _)| name == segment)) + .map(|value| value.to_string()) + }); + let views = info_parts.iter().find_map(|value| Self::parse_views(value)); + + let title = watch_link + .value() + .attr("title") + .map(Self::decode_text) + .filter(|value| !value.trim().is_empty()) + .or_else(|| { + element + .select(&Self::selector(".my-3 div, .my-3") .ok()?) + .next() + .map(|value| Self::decode_text(&Self::collapse_whitespace(&value.text().collect::()))) + .filter(|value| !value.trim().is_empty()) + }) + .unwrap_or_else(|| match (uploader_name.as_deref(), platform.as_deref()) { + (Some(uploader), Some(platform)) => format!("{uploader} on {platform}"), + (Some(uploader), None) => format!("{uploader} on ArchiveBate"), + _ => format!("ArchiveBate video {video_id}"), + }); + + let mut tags = base_tags.clone(); + if let Some(platform) = platform.as_ref().filter(|value| !value.trim().is_empty()) { + tags.push(platform.clone()); + } + let mut unique = HashSet::new(); + let mut deduped = Vec::new(); + for tag in tags { + let normalized = Self::normalize_title(&tag); + if normalized.is_empty() || unique.contains(&normalized) { + continue; + } + unique.insert(normalized); + deduped.push(tag); + } + + let mut item = VideoItem::new( + video_id, + title, + watch_url, + CHANNEL_ID.to_string(), + thumb, + duration, + ); + item.uploader = uploader_name.clone(); + item.uploaderUrl = uploader_url; + item.uploaderId = uploader_id; + item.uploadedAt = uploaded_at; + item.views = views; + item.tags = (!deduped.is_empty()).then_some(deduped); + item.preview = preview; + + if let Some(uploader) = uploader_name { + self.upsert_uploader_option(&uploader); + } + + items.push(item); + if items.len() >= limit { + break; + } + } + + Ok(items) + } + + fn parse_mixin_packed_eval(html: &str) -> Option { + let eval_regex = Regex::new( + r#"(?s)eval\(function\(p,a,c,k,e,d\)\{.*?\}\('(?P.*?)',\s*(?P[0-9]+),\s*(?P[0-9]+),\s*'(?P.*?)'\.split\('\|'\)"#, + ) + .ok()?; + let captures = eval_regex.captures(html)?; + let payload_raw = captures.name("payload")?.as_str(); + let radix = captures.name("radix")?.as_str().parse::().ok()?; + let count = captures.name("count")?.as_str().parse::().ok()?; + if !(2..=36).contains(&radix) { + return None; + } + + let payload = Self::unescape_js_single_quoted(payload_raw); + let tokens_raw = captures.name("tokens")?.as_str(); + let tokens = tokens_raw.split('|').collect::>(); + let mut unpacked = payload; + + for index in (0..count).rev() { + let Some(token) = tokens.get(index) else { + continue; + }; + if token.is_empty() { + continue; + } + let key = Self::to_radix(index, radix); + let pattern = format!(r"\b{}\b", regex::escape(&key)); + let re = Regex::new(&pattern).ok()?; + unpacked = re.replace_all(&unpacked, *token).into_owned(); + } + + Some(unpacked) + } + + fn unescape_js_single_quoted(value: &str) -> String { + let mut output = String::with_capacity(value.len()); + let mut chars = value.chars(); + while let Some(character) = chars.next() { + if character != '\\' { + output.push(character); + continue; + } + let Some(next) = chars.next() else { + break; + }; + match next { + '\\' => output.push('\\'), + '\'' => output.push('\''), + '"' => output.push('"'), + 'n' => output.push('\n'), + 'r' => output.push('\r'), + 't' => output.push('\t'), + _ => output.push(next), + } + } + output + } + + fn to_radix(mut value: usize, radix: u32) -> String { + if value == 0 { + return "0".to_string(); + } + let alphabet = b"0123456789abcdefghijklmnopqrstuvwxyz"; + let mut out = Vec::new(); + while value > 0 { + let digit = value % radix as usize; + out.push(alphabet[digit] as char); + value /= radix as usize; + } + out.iter().rev().collect() + } + + fn extract_mixdrop_media_url(html: &str) -> Option { + let direct_regex = Regex::new(r#"MDCore\.wurl\s*=\s*"([^"]+)""#).ok()?; + if let Some(url) = direct_regex + .captures(html) + .and_then(|captures| captures.get(1).map(|value| value.as_str().to_string())) + { + return Some(Self::normalize_possible_protocol_relative(&url)); + } + + let unpacked = Self::parse_mixin_packed_eval(html)?; + let unpacked_regex = Regex::new(r#"MDCore\.wurl\s*=\s*"([^"]+)""#).ok()?; + unpacked_regex + .captures(&unpacked) + .and_then(|captures| captures.get(1).map(|value| value.as_str().to_string())) + .map(|value| Self::normalize_possible_protocol_relative(&value)) + } + + fn normalize_possible_protocol_relative(value: &str) -> String { + let trimmed = value.trim(); + if trimmed.starts_with("//") { + format!("https:{trimmed}") + } else { + trimmed.to_string() + } + } + + fn host_from_url(url: &str) -> Option { + let parsed = url::Url::parse(url).ok()?; + parsed.host_str().map(|value| value.to_ascii_lowercase()) + } + + fn is_mixdrop_host(url: &str) -> bool { + let Some(host) = Self::host_from_url(url) else { + return false; + }; + host.contains("mixdrop") || host.contains("m1xdrop") + } + + fn download_fid_from_detail_html(html: &str) -> Option { + let document = Html::parse_document(html); + let selector = Selector::parse("input[name='fid'][value]").ok()?; + document + .select(&selector) + .next() + .and_then(|node| node.value().attr("value")) + .map(str::trim) + .filter(|value| !value.is_empty()) + .map(ToOwned::to_owned) + } + + fn mixdrop_embed_url_from_download_url(url: &str) -> Option { + let parsed = url::Url::parse(url).ok()?; + let host = parsed.host_str()?; + let host_lc = host.to_ascii_lowercase(); + if !host_lc.contains("mixdrop") && !host_lc.contains("m1xdrop") { + return None; + } + + let mut segments = parsed.path_segments()?.filter(|segment| !segment.is_empty()); + let kind = segments.next()?.to_ascii_lowercase(); + if kind != "e" && kind != "f" { + return None; + } + let media_id = segments.next()?.trim(); + if media_id.is_empty() { + return None; + } + + Some(format!("{}://{host}/e/{media_id}", parsed.scheme())) + } + + fn video_format_with_headers( + format: VideoFormat, + headers: Vec<(String, String)>, + ) -> VideoFormat { + if headers.is_empty() { + return format; + } + + let header_map: HashMap = headers + .into_iter() + .filter_map(|(key, value)| { + let key = key.trim().to_string(); + let value = value.trim().to_string(); + if key.is_empty() || value.is_empty() { + return None; + } + Some((key, value)) + }) + .collect(); + + if header_map.is_empty() { + return format; + } + + let mut value = match serde_json::to_value(&format) { + Ok(value) => value, + Err(_) => return format, + }; + + if let Value::Object(object) = &mut value { + let Ok(headers_value) = serde_json::to_value(header_map) else { + return format; + }; + object.insert("http_headers".to_string(), headers_value); + if let Ok(updated) = serde_json::from_value::(value) { + return updated; + } + } + + format + } + + fn first_video_source_from_html(html: &str) -> Option { + let document = Html::parse_document(html); + let source_selector = Selector::parse("video source[src]").ok()?; + let video_src_selector = Selector::parse("video[src]").ok()?; + + if let Some(value) = document + .select(&source_selector) + .next() + .and_then(|node| node.value().attr("src")) + { + return Some(value.to_string()); + } + document + .select(&video_src_selector) + .next() + .and_then(|node| node.value().attr("src")) + .map(|value| value.to_string()) + } + + fn first_iframe_source_from_html(html: &str) -> Option { + let document = Html::parse_document(html); + let iframe_selector = Selector::parse("iframe[src]").ok()?; + document + .select(&iframe_selector) + .next() + .and_then(|node| node.value().attr("src")) + .map(|value| value.to_string()) + } + + async fn resolve_mixdrop_media_from_iframe( + &self, + iframe_url: &str, + referer: &str, + options: &ServerOptions, + ) -> Option { + let mut requester = requester_or_default(options, CHANNEL_ID, "resolve_mixdrop_media"); + let response = requester + .get_raw_with_headers_timeout( + iframe_url, + self.html_headers(referer), + Some(StdDuration::from_secs(6)), + ) + .await + .ok()?; + if !response.status().is_success() { + return None; + } + let iframe_html = response.text().await.ok()?; + let media_url = Self::extract_mixdrop_media_url(&iframe_html)?; + Some(ResolvedMixdropMedia { + media_url, + embed_url: iframe_url.to_string(), + }) + } + + async fn enrich_video(&self, item: VideoItem, options: &ServerOptions) -> VideoItem { + let page_url = item.url.clone(); + let format_url = Self::proxied_video(options, &page_url); + if format_url.is_empty() { + return item; + } + + let mut format = VideoFormat::new(format_url, "source".to_string(), "mp4".to_string()); + let mut requester = requester_or_default(options, CHANNEL_ID, "archivebate.enrich_video"); + if let Ok(detail_html) = requester + .get_with_headers( + &page_url, + self.html_headers(&format!("{}/", self.url)), + Some(wreq::Version::HTTP_11), + ) + .await + { + let mut mixdrop_embed_url = Self::first_iframe_source_from_html(&detail_html) + .map(|value| self.absolute_url(&value)) + .filter(|value| Self::is_mixdrop_host(value)); + + if mixdrop_embed_url.is_none() { + mixdrop_embed_url = Self::download_fid_from_detail_html(&detail_html) + .map(|value| self.absolute_url(&value)) + .and_then(|value| Self::mixdrop_embed_url_from_download_url(&value)); + } + + if let Some(embed_url) = mixdrop_embed_url { + format = Self::video_format_with_headers( + format, + vec![ + ("Referer".to_string(), embed_url), + ("User-Agent".to_string(), FIREFOX_UA.to_string()), + ], + ); + } + } + + let mut enriched = item; + enriched.formats = Some(vec![format]); + enriched + } + + fn extract_csrf_token(html: &str) -> Option { + let regex = Regex::new(r#" Option { + let decoded = Self::decode_text(raw); + (!decoded.trim().is_empty()).then_some(decoded) + } + + fn livewire_method(component: &str) -> Option<&'static str> { + match component { + "home-videos" => Some("loadVideos"), + "filter.platform" => Some("load_platform_videos"), + "profile.model-videos" => Some("load_profile_videos"), + _ => None, + } + } + + fn extract_json_object(source: &str, key: &str) -> Option { + let needle = format!("\"{key}\":"); + let start = source.find(&needle)? + needle.len(); + let bytes = source.as_bytes(); + let mut index = start; + while index < bytes.len() && bytes[index].is_ascii_whitespace() { + index += 1; + } + if index >= bytes.len() || bytes[index] != b'{' { + return None; + } + + let mut depth = 0usize; + let mut in_string = false; + let mut escaped = false; + for end in index..bytes.len() { + let byte = bytes[end]; + if in_string { + if escaped { + escaped = false; + } else if byte == b'\\' { + escaped = true; + } else if byte == b'"' { + in_string = false; + } + continue; + } + + match byte { + b'"' => in_string = true, + b'{' => depth += 1, + b'}' => { + depth = depth.saturating_sub(1); + if depth == 0 { + return Some(source[index..=end].to_string()); + } + } + _ => {} + } + } + + None + } + + fn extract_livewire_initial(html: &str) -> Result> { + let regex = Self::regex(r#"wire:initial-data="([^"]+)""#)?; + for captures in regex.captures_iter(html) { + let Some(raw) = captures.get(1).map(|value| value.as_str()) else { + continue; + }; + let Some(decoded) = Self::decode_livewire_json_attr(raw) else { + continue; + }; + let Some(fingerprint_json) = Self::extract_json_object(&decoded, "fingerprint") else { + continue; + }; + let Some(server_memo_json) = Self::extract_json_object(&decoded, "serverMemo") else { + continue; + }; + let component_regex = Self::regex(r#""name":"([^"]+)""#)?; + let Some(component) = component_regex + .captures(&fingerprint_json) + .and_then(|captures| captures.get(1).map(|value| value.as_str().to_string())) + else { + continue; + }; + let Some(method) = Self::livewire_method(&component) else { + continue; + }; + return Ok(Some(LivewireInitialData { + component, + method: method.to_string(), + fingerprint_json, + server_memo_json, + })); + } + Ok(None) + } + + async fn fetch_livewire_html( + &self, + target_url: &str, + options: &ServerOptions, + ) -> Result { + let mut requester = requester_or_default(options, CHANNEL_ID, "fetch_livewire_html.page"); + let page_html = requester + .get_with_headers(target_url, self.html_headers(&format!("{}/", self.url)), Some(wreq::Version::HTTP_11)) + .await + .map_err(|error| Error::from(format!("fetch page failed for {target_url}: {error}")))?; + + let csrf = Self::extract_csrf_token(&page_html); + let initial = Self::extract_livewire_initial(&page_html)?; + let csrf_present = csrf.is_some(); + let initial_present = initial.is_some(); + let (Some(csrf), Some(initial)) = (csrf, initial) else { + crate::flow_debug!( + "archivebate1 livewire fallback target={} csrf_present={} initial_present={}", + crate::util::flow_debug::preview(target_url, 120), + csrf_present, + initial_present + ); + return Ok(page_html); + }; + + let update_id = format!("ab{}", Utc::now().timestamp_micros()); + let payload = format!( + "{{\"fingerprint\":{},\"serverMemo\":{},\"updates\":[{{\"type\":\"callMethod\",\"payload\":{{\"id\":\"{}\",\"method\":\"{}\",\"params\":[]}}}}]}}", + initial.fingerprint_json, initial.server_memo_json, update_id, initial.method + ); + let livewire_url = format!("{}/livewire/message/{}", self.url, initial.component); + let referer = target_url.to_string(); + let user_agent = FIREFOX_UA.to_string(); + let accept = "application/json, text/plain, */*".to_string(); + let response = requester + .post( + &livewire_url, + &payload, + vec![ + ("Content-Type", "application/json"), + ("X-Requested-With", "XMLHttpRequest"), + ("X-CSRF-TOKEN", csrf.as_str()), + ("Referer", referer.as_str()), + ("User-Agent", user_agent.as_str()), + ("Accept", accept.as_str()), + ], + ) + .await + .map_err(|error| { + Error::from(format!( + "livewire request failed for {target_url} component={}: {error}", + initial.component + )) + })?; + let response_text = response + .text() + .await + .map_err(|error| Error::from(format!("livewire response text failed: {error}")))?; + let parsed: Value = serde_json::from_str(&response_text)?; + let effects_html = parsed + .get("effects") + .and_then(|value| value.get("html")) + .and_then(Value::as_str) + .unwrap_or("") + .to_string(); + crate::flow_debug!( + "archivebate livewire component={} target={} effects_html_len={} body_preview={}", + initial.component, + crate::util::flow_debug::preview(target_url, 120), + effects_html.len(), + crate::util::flow_debug::preview(&response_text, 120) + ); + if !effects_html.trim().is_empty() { + return Ok(effects_html); + } + + Ok(page_html) + } + + async fn fetch_items_for_target( + &self, + cache: VideoCache, + target: &Target, + page: u16, + per_page: usize, + options: &ServerOptions, + ) -> Result> { + let target_url = self.build_target_url(target, page); + let cache_key = format!("{CHANNEL_ID}:{target_url}"); + if let Some((time, items)) = cache.get(&cache_key) { + if time.elapsed().unwrap_or_default().as_secs() < 60 * 10 { + return Ok(items.iter().take(per_page).cloned().collect()); + } + } + + let html = self.fetch_livewire_html(&target_url, options).await?; + let items = self.parse_video_items(&html, target, per_page)?; + crate::flow_debug!( + "archivebate parsed target={} page={} items={} html_preview={}", + crate::util::flow_debug::preview(&target_url, 120), + page, + items.len(), + crate::util::flow_debug::preview(&html, 120) + ); + if !items.is_empty() { + cache.insert(cache_key, items.clone()); + } + Ok(items) + } + + async fn refresh_uploaders_from_home(&self, options: &ServerOptions) -> Result<()> { + let items = self + .fetch_items_for_target( + VideoCache::new(), + &Target::Home, + 1, + 120, + options, + ) + .await?; + for item in items { + if let Some(uploader) = item.uploader { + self.upsert_uploader_option(&uploader); + } + } + Ok(()) + } + + async fn search_profiles( + &self, + query: &str, + page: u16, + options: &ServerOptions, + ) -> Result> { + let query_string = { + let mut serializer = url::form_urlencoded::Serializer::new(String::new()); + serializer.append_pair("query", query); + serializer.append_pair("page", &page.max(1).to_string()); + serializer.finish() + }; + let search_url = format!("{}/api/v1/search?{query_string}", self.url); + + let mut requester = requester_or_default(options, CHANNEL_ID, "resolve_search_target"); + let response = requester + .get_raw_with_headers(&search_url, self.search_headers(&format!("{}/", self.url))) + .await + .map_err(|error| Error::from(format!("search request failed: {error}")))?; + if !response.status().is_success() { + return Err(Error::from(format!( + "search request returned status {}", + response.status() + ))); + } + let body = response + .text() + .await + .map_err(|error| Error::from(format!("search response text failed: {error}")))?; + let parsed: SearchResponse = serde_json::from_str(&body)?; + Ok(parsed.data) + } + + async fn get_default( + &self, + cache: VideoCache, + page: u16, + per_page: usize, + options: ServerOptions, + ) -> Result> { + let target = self.resolve_option_target(&options); + self.fetch_items_for_target(cache, &target, page, per_page, &options) + .await + } + + async fn query( + &self, + cache: VideoCache, + page: u16, + per_page: usize, + query: &str, + options: ServerOptions, + ) -> Result> { + if let Some(target) = self.target_from_shortcut(query) { + return self + .fetch_items_for_target(cache, &target, page, per_page, &options) + .await; + } + + let mut candidates = self.search_profiles(query, page, &options).await?; + if candidates.is_empty() { + return Ok(vec![]); + } + + let normalized_query = Self::normalize_title(query); + candidates.sort_by_key(|entry| { + if Self::normalize_title(&entry.username) == normalized_query { + 0 + } else { + 1 + } + }); + + for candidate in candidates.into_iter().take(8) { + let username = candidate.username.trim(); + if username.is_empty() { + continue; + } + let target = Target::Profile { + username: username.to_string(), + platform: candidate.platform.clone(), + gender: candidate.gender.clone(), + }; + let items = self + .fetch_items_for_target(cache.clone(), &target, 1, per_page, &options) + .await?; + if !items.is_empty() { + return Ok(items); + } + } + + Ok(vec![]) + } +} + +#[async_trait] +impl Provider for ArchivebateProvider { + async fn get_videos( + &self, + cache: VideoCache, + pool: DbPool, + _sort: String, + query: Option, + page: String, + per_page: String, + options: ServerOptions, + ) -> Vec { + let _ = pool; + let page = page.parse::().unwrap_or(1); + let per_page = per_page.parse::().unwrap_or(36).clamp(1, 120); + + let result = match query { + Some(query) if !query.trim().is_empty() => { + self.query(cache, page, per_page, &query, options.clone()).await + } + _ => self.get_default(cache, page, per_page, options.clone()).await, + }; + + match result { + Ok(videos) => { + if videos.is_empty() { + return videos; + } + stream::iter(videos.into_iter().map(|video| { + let provider = self.clone(); + let options = options.clone(); + async move { + let timeout_result = timeout( + StdDuration::from_secs(8), + provider.enrich_video(video.clone(), &options), + ) + .await; + match timeout_result { + Ok(enriched) => enriched, + Err(_) => video, + } + } + })) + .buffer_unordered(4) + .collect::>() + .await + } + Err(error) => { + report_provider_error(CHANNEL_ID, "get_videos", &error.to_string()).await; + vec![] + } + } + } + + fn get_channel(&self, clientversion: ClientVersion) -> Option { + Some(self.build_channel(clientversion)) + } +} + +#[cfg(test)] +mod tests { + use super::ArchivebateProvider; + + #[test] + fn extracts_mixdrop_wurl_from_packed_eval() { + let html = r#" + +"#; + let actual = ArchivebateProvider::extract_mixdrop_media_url(html) + .expect("expected mixdrop media url"); + assert_eq!( + actual, + "https://o230m5y6z.mxcontent.net/v2/r6pkwozjber741.mp4?s=TvNTJe3_z_6nKveumEHk8Q&e=1776460168" + ); + } +} diff --git a/src/providers/chaturbate.rs b/src/providers/chaturbate.rs index e1798f6..ee0151c 100644 --- a/src/providers/chaturbate.rs +++ b/src/providers/chaturbate.rs @@ -38,7 +38,7 @@ impl ChaturbateProvider { Channel { id: "chaturbate".to_string(), - name: "WORK IN PROGRESS Chaturbate".to_string(), + name: "Chaturbate".to_string(), description: "Free Adult Webcams".to_string(), premium: false, favicon: "https://www.google.com/s2/favicons?sz=64&domain=chaturbate.com".to_string(), diff --git a/src/util/requester.rs b/src/util/requester.rs index 474fcee..9f8b04c 100644 --- a/src/util/requester.rs +++ b/src/util/requester.rs @@ -175,6 +175,7 @@ impl Requester { #[cfg(any( not(hottub_single_provider), hottub_provider = "archivebate", + hottub_provider = "archivebate1", hottub_provider = "hypnotube", hottub_provider = "vjav", ))] @@ -383,6 +384,7 @@ impl Requester { #[cfg(any( not(hottub_single_provider), hottub_provider = "archivebate", + hottub_provider = "archivebate1", hottub_provider = "hypnotube", hottub_provider = "freeuseporn", hottub_provider = "vjav",