1428 lines
48 KiB
Rust
1428 lines
48 KiB
Rust
use crate::DbPool;
|
|
use crate::api::ClientVersion;
|
|
use crate::providers::{
|
|
Provider, build_proxy_url, report_provider_error, report_provider_error_background,
|
|
requester_or_default, strip_url_scheme,
|
|
};
|
|
use crate::status::*;
|
|
use crate::util::cache::VideoCache;
|
|
use crate::util::parse_abbreviated_number;
|
|
use crate::util::time::parse_time_to_seconds;
|
|
use crate::videos::{ServerOptions, VideoFormat, VideoItem};
|
|
use async_trait::async_trait;
|
|
use chrono::{Duration as ChronoDuration, Utc};
|
|
use error_chain::error_chain;
|
|
use futures::stream::{self, StreamExt};
|
|
use htmlentity::entity::{ICodedDataTrait, decode};
|
|
use percent_encoding::{NON_ALPHANUMERIC, percent_decode_str, utf8_percent_encode};
|
|
use regex::Regex;
|
|
use scraper::{Html, Selector};
|
|
use serde::Deserialize;
|
|
use serde_json::Value;
|
|
use std::collections::{HashMap, HashSet};
|
|
use std::sync::{Arc, RwLock};
|
|
use std::thread;
|
|
use std::time::Duration as StdDuration;
|
|
use tokio::time::timeout;
|
|
|
|
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
|
|
crate::providers::ProviderChannelMetadata {
|
|
group_id: "live-cams",
|
|
tags: &["archive", "cams", "recordings"],
|
|
};
|
|
|
|
error_chain! {
|
|
foreign_links {
|
|
Io(std::io::Error);
|
|
Json(serde_json::Error);
|
|
}
|
|
errors {
|
|
Parse(msg: String) {
|
|
description("parse error")
|
|
display("parse error: {}", msg)
|
|
}
|
|
}
|
|
}
|
|
|
|
const BASE_URL: &str = "https://archivebate.com";
|
|
const CHANNEL_ID: &str = "archivebate";
|
|
const FIREFOX_UA: &str =
|
|
"Mozilla/5.0 (X11; Linux x86_64; rv:146.0) Gecko/20100101 Firefox/146.0";
|
|
const HTML_ACCEPT: &str =
|
|
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8";
|
|
|
|
const PLATFORM_SHORTCUTS: &[(&str, &str)] = &[
|
|
("chaturbate", "Y2hhdHVyYmF0ZQ=="),
|
|
("stripchat", "c3RyaXBjaGF0"),
|
|
("camsoda", "Y2Ftc29kYQ=="),
|
|
("cam4", "Y2FtNA=="),
|
|
("bongacams", "Ym9uZ2FjYW1z"),
|
|
("onlyfans", "b25seWZhbnM="),
|
|
("twitch", "dHdpdGNo"),
|
|
("youtube", "eW91dHViZQ=="),
|
|
("instagram", "aW5zdGFncmFt"),
|
|
("tiktok", "dGlrdG9r"),
|
|
];
|
|
|
|
const GENDER_SHORTCUTS: &[(&str, &str)] = &[
|
|
("female", "ZmVtYWxl"),
|
|
("couple", "Y291cGxl"),
|
|
("male", "bWFsZQ=="),
|
|
("trans", "dHJhbnM="),
|
|
];
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct ArchivebateProvider {
|
|
url: String,
|
|
filters: Arc<RwLock<Vec<FilterOption>>>,
|
|
uploaders: Arc<RwLock<Vec<FilterOption>>>,
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
enum Target {
|
|
Home,
|
|
Platform {
|
|
id: String,
|
|
title: String,
|
|
},
|
|
Gender {
|
|
id: String,
|
|
title: String,
|
|
},
|
|
Profile {
|
|
username: String,
|
|
platform: Option<String>,
|
|
gender: Option<String>,
|
|
},
|
|
}
|
|
|
|
#[derive(Debug, Deserialize, Clone)]
|
|
struct SearchResponse {
|
|
#[serde(default)]
|
|
data: Vec<SearchProfile>,
|
|
}
|
|
|
|
#[derive(Debug, Deserialize, Clone)]
|
|
struct SearchProfile {
|
|
#[serde(default)]
|
|
username: String,
|
|
#[serde(default)]
|
|
platform: Option<String>,
|
|
#[serde(default)]
|
|
gender: Option<String>,
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
struct LivewireInitialData {
|
|
component: String,
|
|
method: String,
|
|
fingerprint_json: String,
|
|
server_memo_json: String,
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
struct ResolvedMixdropMedia {
|
|
media_url: String,
|
|
embed_url: String,
|
|
}
|
|
|
|
impl ArchivebateProvider {
|
|
pub fn new() -> Self {
|
|
let provider = Self {
|
|
url: BASE_URL.to_string(),
|
|
filters: Arc::new(RwLock::new(Self::build_default_filters())),
|
|
uploaders: Arc::new(RwLock::new(vec![FilterOption {
|
|
id: "all".to_string(),
|
|
title: "All".to_string(),
|
|
}])),
|
|
};
|
|
provider.spawn_initial_load();
|
|
provider
|
|
}
|
|
|
|
fn spawn_initial_load(&self) {
|
|
let provider = self.clone();
|
|
thread::spawn(move || {
|
|
let runtime = match tokio::runtime::Builder::new_current_thread()
|
|
.enable_all()
|
|
.build()
|
|
{
|
|
Ok(runtime) => runtime,
|
|
Err(error) => {
|
|
report_provider_error_background(
|
|
CHANNEL_ID,
|
|
"spawn_initial_load.runtime_build",
|
|
&error.to_string(),
|
|
);
|
|
return;
|
|
}
|
|
};
|
|
|
|
runtime.block_on(async move {
|
|
let options = ServerOptions {
|
|
featured: None,
|
|
category: None,
|
|
sites: None,
|
|
filter: None,
|
|
language: None,
|
|
public_url_base: None,
|
|
requester: None,
|
|
network: None,
|
|
stars: None,
|
|
categories: None,
|
|
duration: None,
|
|
sort: Some("new".to_string()),
|
|
sexuality: None,
|
|
};
|
|
if let Err(error) = provider.refresh_uploaders_from_home(&options).await {
|
|
report_provider_error_background(
|
|
CHANNEL_ID,
|
|
"spawn_initial_load.refresh_uploaders",
|
|
&error.to_string(),
|
|
);
|
|
}
|
|
});
|
|
});
|
|
}
|
|
|
|
fn build_default_filters() -> Vec<FilterOption> {
|
|
let mut filters = vec![FilterOption {
|
|
id: "all".to_string(),
|
|
title: "All".to_string(),
|
|
}];
|
|
|
|
for (title, id) in GENDER_SHORTCUTS {
|
|
filters.push(FilterOption {
|
|
id: format!("gender:{id}"),
|
|
title: format!("Gender: {}", Self::title_case(title)),
|
|
});
|
|
}
|
|
|
|
for (title, id) in PLATFORM_SHORTCUTS {
|
|
filters.push(FilterOption {
|
|
id: format!("platform:{id}"),
|
|
title: format!("Platform: {}", Self::title_case(title)),
|
|
});
|
|
}
|
|
|
|
filters
|
|
}
|
|
|
|
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
|
|
let filters = self
|
|
.filters
|
|
.read()
|
|
.map(|value| value.clone())
|
|
.unwrap_or_default();
|
|
let uploaders = self
|
|
.uploaders
|
|
.read()
|
|
.map(|value| value.clone())
|
|
.unwrap_or_default();
|
|
|
|
Channel {
|
|
id: CHANNEL_ID.to_string(),
|
|
name: "ArchiveBate".to_string(),
|
|
description: "ArchiveBate cam recording feeds with platform/gender/profile shortcuts and yt-dlp compatible watch URLs.".to_string(),
|
|
premium: false,
|
|
favicon: "https://www.google.com/s2/favicons?sz=64&domain=archivebate.com".to_string(),
|
|
status: "work in progress".to_string(),
|
|
categories: filters.iter().skip(1).map(|value| value.title.clone()).collect(),
|
|
options: vec![
|
|
ChannelOption {
|
|
id: "sort".to_string(),
|
|
title: "Sort".to_string(),
|
|
description: "Browse ArchiveBate newest uploads.".to_string(),
|
|
systemImage: "list.number".to_string(),
|
|
colorName: "blue".to_string(),
|
|
options: vec![FilterOption {
|
|
id: "new".to_string(),
|
|
title: "Latest".to_string(),
|
|
}],
|
|
multiSelect: false,
|
|
},
|
|
ChannelOption {
|
|
id: "filter".to_string(),
|
|
title: "Shortcuts".to_string(),
|
|
description: "Direct platform and gender archive routes.".to_string(),
|
|
systemImage: "tag.fill".to_string(),
|
|
colorName: "green".to_string(),
|
|
options: filters,
|
|
multiSelect: false,
|
|
},
|
|
ChannelOption {
|
|
id: "sites".to_string(),
|
|
title: "Uploaders".to_string(),
|
|
description: "Profile shortcuts discovered from latest listings.".to_string(),
|
|
systemImage: "person.crop.square".to_string(),
|
|
colorName: "purple".to_string(),
|
|
options: uploaders,
|
|
multiSelect: false,
|
|
},
|
|
],
|
|
nsfw: true,
|
|
cacheDuration: Some(900),
|
|
}
|
|
}
|
|
|
|
fn selector(value: &str) -> Result<Selector> {
|
|
Selector::parse(value)
|
|
.map_err(|error| Error::from(format!("selector `{value}` parse failed: {error}")))
|
|
}
|
|
|
|
fn regex(value: &str) -> Result<Regex> {
|
|
Regex::new(value).map_err(|error| Error::from(format!("regex `{value}` parse failed: {error}")))
|
|
}
|
|
|
|
fn decode_text(text: &str) -> String {
|
|
decode(text.as_bytes())
|
|
.to_string()
|
|
.unwrap_or_else(|_| text.to_string())
|
|
.replace('\u{a0}', " ")
|
|
.trim()
|
|
.to_string()
|
|
}
|
|
|
|
fn collapse_whitespace(text: &str) -> String {
|
|
text.split_whitespace().collect::<Vec<_>>().join(" ")
|
|
}
|
|
|
|
fn absolute_url(&self, value: &str) -> String {
|
|
if value.starts_with("http://") || value.starts_with("https://") {
|
|
return value.to_string();
|
|
}
|
|
if value.starts_with("//") {
|
|
return format!("https:{value}");
|
|
}
|
|
format!(
|
|
"{}/{}",
|
|
self.url.trim_end_matches('/'),
|
|
value.trim_start_matches('/')
|
|
)
|
|
}
|
|
|
|
fn normalize_title(value: &str) -> String {
|
|
let mut normalized = String::new();
|
|
let mut last_space = true;
|
|
for character in value.chars().flat_map(|character| character.to_lowercase()) {
|
|
if character.is_alphanumeric() {
|
|
normalized.push(character);
|
|
last_space = false;
|
|
} else if !last_space {
|
|
normalized.push(' ');
|
|
last_space = true;
|
|
}
|
|
}
|
|
normalized.trim().to_string()
|
|
}
|
|
|
|
fn title_case(value: &str) -> String {
|
|
let mut chars = value.chars();
|
|
let Some(first) = chars.next() else {
|
|
return String::new();
|
|
};
|
|
let mut out = first.to_uppercase().to_string();
|
|
out.push_str(chars.as_str());
|
|
out
|
|
}
|
|
|
|
fn html_headers(&self, referer: &str) -> Vec<(String, String)> {
|
|
vec![
|
|
("Referer".to_string(), referer.to_string()),
|
|
("User-Agent".to_string(), FIREFOX_UA.to_string()),
|
|
("Accept".to_string(), HTML_ACCEPT.to_string()),
|
|
("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()),
|
|
]
|
|
}
|
|
|
|
fn search_headers(&self, referer: &str) -> Vec<(String, String)> {
|
|
vec![
|
|
("Referer".to_string(), referer.to_string()),
|
|
("User-Agent".to_string(), FIREFOX_UA.to_string()),
|
|
(
|
|
"Accept".to_string(),
|
|
"application/json, text/javascript, */*; q=0.01".to_string(),
|
|
),
|
|
("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()),
|
|
("X-Requested-With".to_string(), "XMLHttpRequest".to_string()),
|
|
]
|
|
}
|
|
|
|
fn build_target_url(&self, target: &Target, page: u16) -> String {
|
|
let page = page.max(1);
|
|
match target {
|
|
Target::Home => {
|
|
if page <= 1 {
|
|
format!("{}/", self.url)
|
|
} else {
|
|
format!("{}/?page={page}", self.url)
|
|
}
|
|
}
|
|
Target::Platform { id, .. } => {
|
|
if page <= 1 {
|
|
format!("{}/platform/{id}", self.url)
|
|
} else {
|
|
format!("{}/platform/{id}?page={page}", self.url)
|
|
}
|
|
}
|
|
Target::Gender { id, .. } => {
|
|
if page <= 1 {
|
|
format!("{}/gender/{id}", self.url)
|
|
} else {
|
|
format!("{}/gender/{id}?page={page}", self.url)
|
|
}
|
|
}
|
|
Target::Profile { username, .. } => {
|
|
if page <= 1 {
|
|
format!("{}/profile/{username}", self.url)
|
|
} else {
|
|
format!("{}/profile/{username}?page={page}", self.url)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
fn canonical_uploader_id(username: &str) -> String {
|
|
format!(
|
|
"{CHANNEL_ID}:{}",
|
|
utf8_percent_encode(username, NON_ALPHANUMERIC)
|
|
)
|
|
}
|
|
|
|
fn username_from_uploader_id(value: &str) -> Option<String> {
|
|
let suffix = match value.split_once(':') {
|
|
Some((channel, suffix)) if channel.eq_ignore_ascii_case(CHANNEL_ID) => suffix,
|
|
_ => return None,
|
|
};
|
|
percent_decode_str(suffix)
|
|
.decode_utf8()
|
|
.ok()
|
|
.map(|value| value.into_owned())
|
|
.and_then(|value| (!value.trim().is_empty()).then_some(value))
|
|
}
|
|
|
|
fn target_from_shortcut(&self, value: &str) -> Option<Target> {
|
|
let trimmed = value.trim();
|
|
if trimmed.is_empty() || trimmed.eq_ignore_ascii_case("all") {
|
|
return None;
|
|
}
|
|
|
|
if let Some(username) = Self::username_from_uploader_id(trimmed) {
|
|
return Some(Target::Profile {
|
|
username,
|
|
platform: None,
|
|
gender: None,
|
|
});
|
|
}
|
|
|
|
if let Some(username) = trimmed.strip_prefix("profile:") {
|
|
let username = username.trim();
|
|
if !username.is_empty() {
|
|
return Some(Target::Profile {
|
|
username: username.to_string(),
|
|
platform: None,
|
|
gender: None,
|
|
});
|
|
}
|
|
}
|
|
|
|
if let Some(id) = trimmed.strip_prefix("platform:") {
|
|
if let Some((name, _)) = PLATFORM_SHORTCUTS.iter().find(|(_, known_id)| *known_id == id) {
|
|
return Some(Target::Platform {
|
|
id: id.to_string(),
|
|
title: Self::title_case(name),
|
|
});
|
|
}
|
|
}
|
|
|
|
if let Some(id) = trimmed.strip_prefix("gender:") {
|
|
if let Some((name, _)) = GENDER_SHORTCUTS.iter().find(|(_, known_id)| *known_id == id) {
|
|
return Some(Target::Gender {
|
|
id: id.to_string(),
|
|
title: Self::title_case(name),
|
|
});
|
|
}
|
|
}
|
|
|
|
let normalized = Self::normalize_title(trimmed);
|
|
if let Some((name, id)) = PLATFORM_SHORTCUTS
|
|
.iter()
|
|
.find(|(name, _)| Self::normalize_title(name) == normalized)
|
|
{
|
|
return Some(Target::Platform {
|
|
id: (*id).to_string(),
|
|
title: Self::title_case(name),
|
|
});
|
|
}
|
|
|
|
if let Some((name, id)) = GENDER_SHORTCUTS
|
|
.iter()
|
|
.find(|(name, _)| Self::normalize_title(name) == normalized)
|
|
{
|
|
return Some(Target::Gender {
|
|
id: (*id).to_string(),
|
|
title: Self::title_case(name),
|
|
});
|
|
}
|
|
|
|
if let Some(option) = self.find_uploader_option(trimmed) {
|
|
if let Some(username) = option.id.strip_prefix("profile:") {
|
|
return Some(Target::Profile {
|
|
username: username.to_string(),
|
|
platform: None,
|
|
gender: None,
|
|
});
|
|
}
|
|
}
|
|
|
|
None
|
|
}
|
|
|
|
fn resolve_option_target(&self, options: &ServerOptions) -> Target {
|
|
if let Some(value) = options.sites.as_deref() {
|
|
if let Some(target) = self.target_from_shortcut(value) {
|
|
return target;
|
|
}
|
|
}
|
|
if let Some(value) = options.filter.as_deref() {
|
|
if let Some(target) = self.target_from_shortcut(value) {
|
|
return target;
|
|
}
|
|
}
|
|
Target::Home
|
|
}
|
|
|
|
fn find_uploader_option(&self, value: &str) -> Option<FilterOption> {
|
|
let normalized = Self::normalize_title(value);
|
|
self.uploaders
|
|
.read()
|
|
.ok()?
|
|
.iter()
|
|
.find(|option| {
|
|
option.id.eq_ignore_ascii_case(value)
|
|
|| Self::normalize_title(&option.title) == normalized
|
|
})
|
|
.cloned()
|
|
}
|
|
|
|
fn upsert_uploader_option(&self, username: &str) {
|
|
let trimmed = username.trim();
|
|
if trimmed.is_empty() {
|
|
return;
|
|
}
|
|
let option = FilterOption {
|
|
id: format!("profile:{trimmed}"),
|
|
title: trimmed.to_string(),
|
|
};
|
|
let Ok(mut uploaders) = self.uploaders.write() else {
|
|
return;
|
|
};
|
|
if uploaders
|
|
.iter()
|
|
.any(|value| value.id.eq_ignore_ascii_case(option.id.as_str()))
|
|
{
|
|
return;
|
|
}
|
|
uploaders.push(option);
|
|
}
|
|
|
|
fn parse_watch_id(url: &str) -> Option<String> {
|
|
let regex = Regex::new(r"/watch/(?P<id>[0-9]+)").ok()?;
|
|
regex
|
|
.captures(url)
|
|
.and_then(|captures| captures.name("id").map(|value| value.as_str().to_string()))
|
|
}
|
|
|
|
fn is_allowed_detail_watch_url(url: &str) -> bool {
|
|
let Some(parsed) = url::Url::parse(url).ok() else {
|
|
return false;
|
|
};
|
|
if parsed.scheme() != "https" {
|
|
return false;
|
|
}
|
|
let Some(host) = parsed.host_str() else {
|
|
return false;
|
|
};
|
|
(host == "archivebate.com" || host == "www.archivebate.com")
|
|
&& parsed.path().starts_with("/watch/")
|
|
}
|
|
|
|
fn proxied_video(options: &ServerOptions, detail_url: &str) -> String {
|
|
if detail_url.is_empty() || !Self::is_allowed_detail_watch_url(detail_url) {
|
|
return String::new();
|
|
}
|
|
|
|
build_proxy_url(options, CHANNEL_ID, &strip_url_scheme(detail_url))
|
|
}
|
|
|
|
fn parse_duration(text: &str) -> u32 {
|
|
let Ok(regex) = Self::regex(r"([0-9]{1,2}:[0-9]{2}(?::[0-9]{2})?)") else {
|
|
return 0;
|
|
};
|
|
let Some(captures) = regex.captures(text) else {
|
|
return 0;
|
|
};
|
|
let Some(value) = captures.get(1) else {
|
|
return 0;
|
|
};
|
|
parse_time_to_seconds(value.as_str()).unwrap_or(0) as u32
|
|
}
|
|
|
|
fn parse_views(value: &str) -> Option<u32> {
|
|
let lowered = value.to_lowercase();
|
|
let cleaned = lowered.replace("views", "").trim().to_string();
|
|
parse_abbreviated_number(&cleaned)
|
|
}
|
|
|
|
fn parse_uploaded_at(value: &str) -> Option<u64> {
|
|
let text = value.trim().to_lowercase();
|
|
if text.is_empty() {
|
|
return None;
|
|
}
|
|
if text == "just now" {
|
|
return Some(Utc::now().timestamp() as u64);
|
|
}
|
|
|
|
let regex =
|
|
Regex::new(r"^([0-9]+)\s+(second|minute|hour|day|week|month|year)s?\s+ago$").ok()?;
|
|
let captures = regex.captures(&text)?;
|
|
let amount = captures.get(1)?.as_str().parse::<i64>().ok()?;
|
|
let unit = captures.get(2)?.as_str();
|
|
let now = Utc::now();
|
|
let parsed = match unit {
|
|
"second" => now - ChronoDuration::seconds(amount),
|
|
"minute" => now - ChronoDuration::minutes(amount),
|
|
"hour" => now - ChronoDuration::hours(amount),
|
|
"day" => now - ChronoDuration::days(amount),
|
|
"week" => now - ChronoDuration::weeks(amount),
|
|
"month" => now - ChronoDuration::days(amount * 30),
|
|
"year" => now - ChronoDuration::days(amount * 365),
|
|
_ => return None,
|
|
};
|
|
Some(parsed.timestamp() as u64)
|
|
}
|
|
|
|
fn extra_tags_for_target(target: &Target) -> Vec<String> {
|
|
match target {
|
|
Target::Platform { title, .. } => vec![title.clone()],
|
|
Target::Gender { title, .. } => vec![title.clone()],
|
|
Target::Profile { platform, gender, .. } => {
|
|
let mut tags = Vec::new();
|
|
if let Some(platform) = platform.as_ref().filter(|value| !value.trim().is_empty()) {
|
|
tags.push(platform.trim().to_string());
|
|
}
|
|
if let Some(gender) = gender.as_ref().filter(|value| !value.trim().is_empty()) {
|
|
tags.push(gender.trim().to_string());
|
|
}
|
|
tags
|
|
}
|
|
Target::Home => Vec::new(),
|
|
}
|
|
}
|
|
|
|
fn parse_video_items(&self, html: &str, target: &Target, limit: usize) -> Result<Vec<VideoItem>> {
|
|
let document = Html::parse_fragment(html);
|
|
let item_selector = Self::selector("section.video_item")?;
|
|
let watch_selector = Self::selector("a[href*='/watch/']")?;
|
|
let uploader_selector = Self::selector("div.info a[href*='/profile/']")?;
|
|
let info_selector = Self::selector("div.info p")?;
|
|
let duration_selector = Self::selector("div.duration span")?;
|
|
let video_selector = Self::selector("video[poster]")?;
|
|
let source_selector = Self::selector("video source[src]")?;
|
|
|
|
let base_tags = Self::extra_tags_for_target(target);
|
|
let mut items = Vec::new();
|
|
|
|
for element in document.select(&item_selector) {
|
|
let Some(watch_link) = element.select(&watch_selector).next() else {
|
|
continue;
|
|
};
|
|
let Some(watch_href) = watch_link.value().attr("href") else {
|
|
continue;
|
|
};
|
|
let watch_url = self.absolute_url(watch_href);
|
|
let Some(video_id) = Self::parse_watch_id(&watch_url) else {
|
|
continue;
|
|
};
|
|
|
|
let duration_text = element
|
|
.select(&duration_selector)
|
|
.next()
|
|
.map(|value| Self::decode_text(&Self::collapse_whitespace(&value.text().collect::<String>())))
|
|
.unwrap_or_default();
|
|
let duration = Self::parse_duration(&duration_text);
|
|
|
|
let thumb = element
|
|
.select(&video_selector)
|
|
.next()
|
|
.and_then(|value| value.value().attr("poster"))
|
|
.map(|value| self.absolute_url(value))
|
|
.unwrap_or_default();
|
|
if thumb.is_empty() {
|
|
continue;
|
|
}
|
|
|
|
let preview = element
|
|
.select(&source_selector)
|
|
.next()
|
|
.and_then(|value| value.value().attr("src"))
|
|
.map(|value| self.absolute_url(value));
|
|
|
|
let uploader_element = element.select(&uploader_selector).next();
|
|
let uploader_name = uploader_element
|
|
.as_ref()
|
|
.map(|value| Self::decode_text(&value.text().collect::<String>()))
|
|
.filter(|value| !value.is_empty());
|
|
let uploader_url = uploader_element
|
|
.as_ref()
|
|
.and_then(|value| value.value().attr("href"))
|
|
.map(|value| self.absolute_url(value));
|
|
let uploader_id = uploader_name
|
|
.as_ref()
|
|
.map(|value| Self::canonical_uploader_id(value));
|
|
|
|
let info_text = element
|
|
.select(&info_selector)
|
|
.next()
|
|
.map(|value| Self::decode_text(&Self::collapse_whitespace(&value.text().collect::<String>())))
|
|
.unwrap_or_default();
|
|
let info_parts = info_text
|
|
.split('·')
|
|
.map(|value| value.trim())
|
|
.filter(|value| !value.is_empty())
|
|
.collect::<Vec<_>>();
|
|
|
|
let uploaded_at = info_parts.first().and_then(|value| Self::parse_uploaded_at(value));
|
|
let platform = info_parts.get(1).map(|value| value.trim().to_string());
|
|
let views = info_parts.get(2).and_then(|value| Self::parse_views(value));
|
|
|
|
let title = match (uploader_name.as_deref(), platform.as_deref()) {
|
|
(Some(uploader), Some(platform)) => format!("{uploader} on {platform}"),
|
|
(Some(uploader), None) => format!("{uploader} on ArchiveBate"),
|
|
_ => format!("ArchiveBate video {video_id}"),
|
|
};
|
|
|
|
let mut tags = base_tags.clone();
|
|
if let Some(platform) = platform.as_ref().filter(|value| !value.trim().is_empty()) {
|
|
tags.push(platform.clone());
|
|
}
|
|
let mut unique = HashSet::new();
|
|
let mut deduped = Vec::new();
|
|
for tag in tags {
|
|
let normalized = Self::normalize_title(&tag);
|
|
if normalized.is_empty() || unique.contains(&normalized) {
|
|
continue;
|
|
}
|
|
unique.insert(normalized);
|
|
deduped.push(tag);
|
|
}
|
|
|
|
let mut item = VideoItem::new(
|
|
video_id,
|
|
title,
|
|
watch_url,
|
|
CHANNEL_ID.to_string(),
|
|
thumb,
|
|
duration,
|
|
);
|
|
item.uploader = uploader_name.clone();
|
|
item.uploaderUrl = uploader_url;
|
|
item.uploaderId = uploader_id;
|
|
item.uploadedAt = uploaded_at;
|
|
item.views = views;
|
|
item.tags = (!deduped.is_empty()).then_some(deduped);
|
|
item.preview = preview;
|
|
|
|
if let Some(uploader) = uploader_name {
|
|
self.upsert_uploader_option(&uploader);
|
|
}
|
|
|
|
items.push(item);
|
|
if items.len() >= limit {
|
|
break;
|
|
}
|
|
}
|
|
|
|
Ok(items)
|
|
}
|
|
|
|
fn parse_mixin_packed_eval(html: &str) -> Option<String> {
|
|
let eval_regex = Regex::new(
|
|
r#"(?s)eval\(function\(p,a,c,k,e,d\)\{.*?\}\('(?P<payload>.*?)',\s*(?P<radix>[0-9]+),\s*(?P<count>[0-9]+),\s*'(?P<tokens>.*?)'\.split\('\|'\)"#,
|
|
)
|
|
.ok()?;
|
|
let captures = eval_regex.captures(html)?;
|
|
let payload_raw = captures.name("payload")?.as_str();
|
|
let radix = captures.name("radix")?.as_str().parse::<u32>().ok()?;
|
|
let count = captures.name("count")?.as_str().parse::<usize>().ok()?;
|
|
if !(2..=36).contains(&radix) {
|
|
return None;
|
|
}
|
|
|
|
let payload = Self::unescape_js_single_quoted(payload_raw);
|
|
let tokens_raw = captures.name("tokens")?.as_str();
|
|
let tokens = tokens_raw.split('|').collect::<Vec<_>>();
|
|
let mut unpacked = payload;
|
|
|
|
for index in (0..count).rev() {
|
|
let Some(token) = tokens.get(index) else {
|
|
continue;
|
|
};
|
|
if token.is_empty() {
|
|
continue;
|
|
}
|
|
let key = Self::to_radix(index, radix);
|
|
let pattern = format!(r"\b{}\b", regex::escape(&key));
|
|
let re = Regex::new(&pattern).ok()?;
|
|
unpacked = re.replace_all(&unpacked, *token).into_owned();
|
|
}
|
|
|
|
Some(unpacked)
|
|
}
|
|
|
|
fn unescape_js_single_quoted(value: &str) -> String {
|
|
let mut output = String::with_capacity(value.len());
|
|
let mut chars = value.chars();
|
|
while let Some(character) = chars.next() {
|
|
if character != '\\' {
|
|
output.push(character);
|
|
continue;
|
|
}
|
|
let Some(next) = chars.next() else {
|
|
break;
|
|
};
|
|
match next {
|
|
'\\' => output.push('\\'),
|
|
'\'' => output.push('\''),
|
|
'"' => output.push('"'),
|
|
'n' => output.push('\n'),
|
|
'r' => output.push('\r'),
|
|
't' => output.push('\t'),
|
|
_ => output.push(next),
|
|
}
|
|
}
|
|
output
|
|
}
|
|
|
|
fn to_radix(mut value: usize, radix: u32) -> String {
|
|
if value == 0 {
|
|
return "0".to_string();
|
|
}
|
|
let alphabet = b"0123456789abcdefghijklmnopqrstuvwxyz";
|
|
let mut out = Vec::new();
|
|
while value > 0 {
|
|
let digit = value % radix as usize;
|
|
out.push(alphabet[digit] as char);
|
|
value /= radix as usize;
|
|
}
|
|
out.iter().rev().collect()
|
|
}
|
|
|
|
fn extract_mixdrop_media_url(html: &str) -> Option<String> {
|
|
let direct_regex = Regex::new(r#"MDCore\.wurl\s*=\s*"([^"]+)""#).ok()?;
|
|
if let Some(url) = direct_regex
|
|
.captures(html)
|
|
.and_then(|captures| captures.get(1).map(|value| value.as_str().to_string()))
|
|
{
|
|
return Some(Self::normalize_possible_protocol_relative(&url));
|
|
}
|
|
|
|
let unpacked = Self::parse_mixin_packed_eval(html)?;
|
|
let unpacked_regex = Regex::new(r#"MDCore\.wurl\s*=\s*"([^"]+)""#).ok()?;
|
|
unpacked_regex
|
|
.captures(&unpacked)
|
|
.and_then(|captures| captures.get(1).map(|value| value.as_str().to_string()))
|
|
.map(|value| Self::normalize_possible_protocol_relative(&value))
|
|
}
|
|
|
|
fn normalize_possible_protocol_relative(value: &str) -> String {
|
|
let trimmed = value.trim();
|
|
if trimmed.starts_with("//") {
|
|
format!("https:{trimmed}")
|
|
} else {
|
|
trimmed.to_string()
|
|
}
|
|
}
|
|
|
|
fn host_from_url(url: &str) -> Option<String> {
|
|
let parsed = url::Url::parse(url).ok()?;
|
|
parsed.host_str().map(|value| value.to_ascii_lowercase())
|
|
}
|
|
|
|
fn is_mixdrop_host(url: &str) -> bool {
|
|
let Some(host) = Self::host_from_url(url) else {
|
|
return false;
|
|
};
|
|
host.contains("mixdrop") || host.contains("m1xdrop")
|
|
}
|
|
|
|
fn download_fid_from_detail_html(html: &str) -> Option<String> {
|
|
let document = Html::parse_document(html);
|
|
let selector = Selector::parse("input[name='fid'][value]").ok()?;
|
|
document
|
|
.select(&selector)
|
|
.next()
|
|
.and_then(|node| node.value().attr("value"))
|
|
.map(str::trim)
|
|
.filter(|value| !value.is_empty())
|
|
.map(ToOwned::to_owned)
|
|
}
|
|
|
|
fn mixdrop_embed_url_from_download_url(url: &str) -> Option<String> {
|
|
let parsed = url::Url::parse(url).ok()?;
|
|
let host = parsed.host_str()?;
|
|
let host_lc = host.to_ascii_lowercase();
|
|
if !host_lc.contains("mixdrop") && !host_lc.contains("m1xdrop") {
|
|
return None;
|
|
}
|
|
|
|
let mut segments = parsed.path_segments()?.filter(|segment| !segment.is_empty());
|
|
let kind = segments.next()?.to_ascii_lowercase();
|
|
if kind != "e" && kind != "f" {
|
|
return None;
|
|
}
|
|
let media_id = segments.next()?.trim();
|
|
if media_id.is_empty() {
|
|
return None;
|
|
}
|
|
|
|
Some(format!("{}://{host}/e/{media_id}", parsed.scheme()))
|
|
}
|
|
|
|
fn video_format_with_headers(
|
|
format: VideoFormat,
|
|
headers: Vec<(String, String)>,
|
|
) -> VideoFormat {
|
|
if headers.is_empty() {
|
|
return format;
|
|
}
|
|
|
|
let header_map: HashMap<String, String> = headers
|
|
.into_iter()
|
|
.filter_map(|(key, value)| {
|
|
let key = key.trim().to_string();
|
|
let value = value.trim().to_string();
|
|
if key.is_empty() || value.is_empty() {
|
|
return None;
|
|
}
|
|
Some((key, value))
|
|
})
|
|
.collect();
|
|
|
|
if header_map.is_empty() {
|
|
return format;
|
|
}
|
|
|
|
let mut value = match serde_json::to_value(&format) {
|
|
Ok(value) => value,
|
|
Err(_) => return format,
|
|
};
|
|
|
|
if let Value::Object(object) = &mut value {
|
|
let Ok(headers_value) = serde_json::to_value(header_map) else {
|
|
return format;
|
|
};
|
|
object.insert("http_headers".to_string(), headers_value);
|
|
if let Ok(updated) = serde_json::from_value::<VideoFormat>(value) {
|
|
return updated;
|
|
}
|
|
}
|
|
|
|
format
|
|
}
|
|
|
|
fn first_video_source_from_html(html: &str) -> Option<String> {
|
|
let document = Html::parse_document(html);
|
|
let source_selector = Selector::parse("video source[src]").ok()?;
|
|
let video_src_selector = Selector::parse("video[src]").ok()?;
|
|
|
|
if let Some(value) = document
|
|
.select(&source_selector)
|
|
.next()
|
|
.and_then(|node| node.value().attr("src"))
|
|
{
|
|
return Some(value.to_string());
|
|
}
|
|
document
|
|
.select(&video_src_selector)
|
|
.next()
|
|
.and_then(|node| node.value().attr("src"))
|
|
.map(|value| value.to_string())
|
|
}
|
|
|
|
fn first_iframe_source_from_html(html: &str) -> Option<String> {
|
|
let document = Html::parse_document(html);
|
|
let iframe_selector = Selector::parse("iframe[src]").ok()?;
|
|
document
|
|
.select(&iframe_selector)
|
|
.next()
|
|
.and_then(|node| node.value().attr("src"))
|
|
.map(|value| value.to_string())
|
|
}
|
|
|
|
async fn resolve_mixdrop_media_from_iframe(
|
|
&self,
|
|
iframe_url: &str,
|
|
referer: &str,
|
|
options: &ServerOptions,
|
|
) -> Option<ResolvedMixdropMedia> {
|
|
let mut requester = requester_or_default(options, CHANNEL_ID, "resolve_mixdrop_media");
|
|
let response = requester
|
|
.get_raw_with_headers_timeout(
|
|
iframe_url,
|
|
self.html_headers(referer),
|
|
Some(StdDuration::from_secs(6)),
|
|
)
|
|
.await
|
|
.ok()?;
|
|
if !response.status().is_success() {
|
|
return None;
|
|
}
|
|
let iframe_html = response.text().await.ok()?;
|
|
let media_url = Self::extract_mixdrop_media_url(&iframe_html)?;
|
|
Some(ResolvedMixdropMedia {
|
|
media_url,
|
|
embed_url: iframe_url.to_string(),
|
|
})
|
|
}
|
|
|
|
async fn enrich_video(&self, item: VideoItem, options: &ServerOptions) -> VideoItem {
|
|
let page_url = item.url.clone();
|
|
let format_url = Self::proxied_video(options, &page_url);
|
|
if format_url.is_empty() {
|
|
return item;
|
|
}
|
|
|
|
let mut format = VideoFormat::new(format_url, "source".to_string(), "mp4".to_string());
|
|
let mut requester = requester_or_default(options, CHANNEL_ID, "archivebate.enrich_video");
|
|
if let Ok(detail_html) = requester
|
|
.get_with_headers(
|
|
&page_url,
|
|
self.html_headers(&format!("{}/", self.url)),
|
|
Some(wreq::Version::HTTP_11),
|
|
)
|
|
.await
|
|
{
|
|
let mut mixdrop_embed_url = Self::first_iframe_source_from_html(&detail_html)
|
|
.map(|value| self.absolute_url(&value))
|
|
.filter(|value| Self::is_mixdrop_host(value));
|
|
|
|
if mixdrop_embed_url.is_none() {
|
|
mixdrop_embed_url = Self::download_fid_from_detail_html(&detail_html)
|
|
.map(|value| self.absolute_url(&value))
|
|
.and_then(|value| Self::mixdrop_embed_url_from_download_url(&value));
|
|
}
|
|
|
|
if let Some(embed_url) = mixdrop_embed_url {
|
|
format = Self::video_format_with_headers(
|
|
format,
|
|
vec![
|
|
("Referer".to_string(), embed_url),
|
|
("User-Agent".to_string(), FIREFOX_UA.to_string()),
|
|
],
|
|
);
|
|
}
|
|
}
|
|
|
|
let mut enriched = item;
|
|
enriched.formats = Some(vec![format]);
|
|
enriched
|
|
}
|
|
|
|
fn extract_csrf_token(html: &str) -> Option<String> {
|
|
let regex = Regex::new(r#"<meta name="csrf-token" content="([^"]+)""#).ok()?;
|
|
regex
|
|
.captures(html)
|
|
.and_then(|captures| captures.get(1).map(|value| value.as_str().to_string()))
|
|
}
|
|
|
|
fn decode_livewire_json_attr(raw: &str) -> Option<String> {
|
|
let decoded = Self::decode_text(raw);
|
|
(!decoded.trim().is_empty()).then_some(decoded)
|
|
}
|
|
|
|
fn livewire_method(component: &str) -> Option<&'static str> {
|
|
match component {
|
|
"home-videos" => Some("loadVideos"),
|
|
"filter.platform" => Some("load_platform_videos"),
|
|
"profile.model-videos" => Some("load_profile_videos"),
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
fn extract_json_object(source: &str, key: &str) -> Option<String> {
|
|
let needle = format!("\"{key}\":");
|
|
let start = source.find(&needle)? + needle.len();
|
|
let bytes = source.as_bytes();
|
|
let mut index = start;
|
|
while index < bytes.len() && bytes[index].is_ascii_whitespace() {
|
|
index += 1;
|
|
}
|
|
if index >= bytes.len() || bytes[index] != b'{' {
|
|
return None;
|
|
}
|
|
|
|
let mut depth = 0usize;
|
|
let mut in_string = false;
|
|
let mut escaped = false;
|
|
for end in index..bytes.len() {
|
|
let byte = bytes[end];
|
|
if in_string {
|
|
if escaped {
|
|
escaped = false;
|
|
} else if byte == b'\\' {
|
|
escaped = true;
|
|
} else if byte == b'"' {
|
|
in_string = false;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
match byte {
|
|
b'"' => in_string = true,
|
|
b'{' => depth += 1,
|
|
b'}' => {
|
|
depth = depth.saturating_sub(1);
|
|
if depth == 0 {
|
|
return Some(source[index..=end].to_string());
|
|
}
|
|
}
|
|
_ => {}
|
|
}
|
|
}
|
|
|
|
None
|
|
}
|
|
|
|
fn extract_livewire_initial(html: &str) -> Result<Option<LivewireInitialData>> {
|
|
let regex = Self::regex(r#"wire:initial-data="([^"]+)""#)?;
|
|
for captures in regex.captures_iter(html) {
|
|
let Some(raw) = captures.get(1).map(|value| value.as_str()) else {
|
|
continue;
|
|
};
|
|
let Some(decoded) = Self::decode_livewire_json_attr(raw) else {
|
|
continue;
|
|
};
|
|
let Some(fingerprint_json) = Self::extract_json_object(&decoded, "fingerprint") else {
|
|
continue;
|
|
};
|
|
let Some(server_memo_json) = Self::extract_json_object(&decoded, "serverMemo") else {
|
|
continue;
|
|
};
|
|
let component_regex = Self::regex(r#""name":"([^"]+)""#)?;
|
|
let Some(component) = component_regex
|
|
.captures(&fingerprint_json)
|
|
.and_then(|captures| captures.get(1).map(|value| value.as_str().to_string()))
|
|
else {
|
|
continue;
|
|
};
|
|
let Some(method) = Self::livewire_method(&component) else {
|
|
continue;
|
|
};
|
|
return Ok(Some(LivewireInitialData {
|
|
component,
|
|
method: method.to_string(),
|
|
fingerprint_json,
|
|
server_memo_json,
|
|
}));
|
|
}
|
|
Ok(None)
|
|
}
|
|
|
|
async fn fetch_livewire_html(
|
|
&self,
|
|
target_url: &str,
|
|
options: &ServerOptions,
|
|
) -> Result<String> {
|
|
let mut requester = requester_or_default(options, CHANNEL_ID, "fetch_livewire_html.page");
|
|
let page_html = requester
|
|
.get_with_headers(target_url, self.html_headers(&format!("{}/", self.url)), Some(wreq::Version::HTTP_11))
|
|
.await
|
|
.map_err(|error| Error::from(format!("fetch page failed for {target_url}: {error}")))?;
|
|
|
|
let csrf = Self::extract_csrf_token(&page_html)
|
|
.ok_or_else(|| Error::from(format!("missing csrf token on {target_url}")))?;
|
|
let Some(initial) = Self::extract_livewire_initial(&page_html)? else {
|
|
return Err(Error::from(format!(
|
|
"missing supported livewire initial-data on {target_url}"
|
|
)));
|
|
};
|
|
|
|
let update_id = format!("ab{}", Utc::now().timestamp_micros());
|
|
let payload = format!(
|
|
"{{\"fingerprint\":{},\"serverMemo\":{},\"updates\":[{{\"type\":\"callMethod\",\"payload\":{{\"id\":\"{}\",\"method\":\"{}\",\"params\":[]}}}}]}}",
|
|
initial.fingerprint_json, initial.server_memo_json, update_id, initial.method
|
|
);
|
|
let livewire_url = format!("{}/livewire/message/{}", self.url, initial.component);
|
|
let referer = target_url.to_string();
|
|
let user_agent = FIREFOX_UA.to_string();
|
|
let accept = "application/json, text/plain, */*".to_string();
|
|
let response = requester
|
|
.post(
|
|
&livewire_url,
|
|
&payload,
|
|
vec![
|
|
("Content-Type", "application/json"),
|
|
("X-Requested-With", "XMLHttpRequest"),
|
|
("X-CSRF-TOKEN", csrf.as_str()),
|
|
("Referer", referer.as_str()),
|
|
("User-Agent", user_agent.as_str()),
|
|
("Accept", accept.as_str()),
|
|
],
|
|
)
|
|
.await
|
|
.map_err(|error| {
|
|
Error::from(format!(
|
|
"livewire request failed for {target_url} component={}: {error}",
|
|
initial.component
|
|
))
|
|
})?;
|
|
|
|
let response_text = response
|
|
.text()
|
|
.await
|
|
.map_err(|error| Error::from(format!("livewire response text failed: {error}")))?;
|
|
let parsed: Value = serde_json::from_str(&response_text)?;
|
|
let effects_html = parsed
|
|
.get("effects")
|
|
.and_then(|value| value.get("html"))
|
|
.and_then(Value::as_str)
|
|
.unwrap_or("")
|
|
.to_string();
|
|
crate::flow_debug!(
|
|
"archivebate livewire component={} target={} effects_html_len={} body_preview={}",
|
|
initial.component,
|
|
crate::util::flow_debug::preview(target_url, 120),
|
|
effects_html.len(),
|
|
crate::util::flow_debug::preview(&response_text, 120)
|
|
);
|
|
if !effects_html.trim().is_empty() {
|
|
return Ok(effects_html);
|
|
}
|
|
|
|
Ok(page_html)
|
|
}
|
|
|
|
async fn fetch_items_for_target(
|
|
&self,
|
|
cache: VideoCache,
|
|
target: &Target,
|
|
page: u16,
|
|
per_page: usize,
|
|
options: &ServerOptions,
|
|
) -> Result<Vec<VideoItem>> {
|
|
let target_url = self.build_target_url(target, page);
|
|
let cache_key = format!("{CHANNEL_ID}:{target_url}");
|
|
if let Some((time, items)) = cache.get(&cache_key) {
|
|
if time.elapsed().unwrap_or_default().as_secs() < 60 * 10 {
|
|
return Ok(items.iter().take(per_page).cloned().collect());
|
|
}
|
|
}
|
|
|
|
let html = self.fetch_livewire_html(&target_url, options).await?;
|
|
let items = self.parse_video_items(&html, target, per_page)?;
|
|
crate::flow_debug!(
|
|
"archivebate parsed target={} page={} items={} html_preview={}",
|
|
crate::util::flow_debug::preview(&target_url, 120),
|
|
page,
|
|
items.len(),
|
|
crate::util::flow_debug::preview(&html, 120)
|
|
);
|
|
if !items.is_empty() {
|
|
cache.insert(cache_key, items.clone());
|
|
}
|
|
Ok(items)
|
|
}
|
|
|
|
async fn refresh_uploaders_from_home(&self, options: &ServerOptions) -> Result<()> {
|
|
let items = self
|
|
.fetch_items_for_target(
|
|
VideoCache::new(),
|
|
&Target::Home,
|
|
1,
|
|
120,
|
|
options,
|
|
)
|
|
.await?;
|
|
for item in items {
|
|
if let Some(uploader) = item.uploader {
|
|
self.upsert_uploader_option(&uploader);
|
|
}
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
async fn search_profiles(
|
|
&self,
|
|
query: &str,
|
|
page: u16,
|
|
options: &ServerOptions,
|
|
) -> Result<Vec<SearchProfile>> {
|
|
let query_string = {
|
|
let mut serializer = url::form_urlencoded::Serializer::new(String::new());
|
|
serializer.append_pair("query", query);
|
|
serializer.append_pair("page", &page.max(1).to_string());
|
|
serializer.finish()
|
|
};
|
|
let search_url = format!("{}/api/v1/search?{query_string}", self.url);
|
|
|
|
let mut requester = requester_or_default(options, CHANNEL_ID, "resolve_search_target");
|
|
let response = requester
|
|
.get_raw_with_headers(&search_url, self.search_headers(&format!("{}/", self.url)))
|
|
.await
|
|
.map_err(|error| Error::from(format!("search request failed: {error}")))?;
|
|
if !response.status().is_success() {
|
|
return Err(Error::from(format!(
|
|
"search request returned status {}",
|
|
response.status()
|
|
)));
|
|
}
|
|
let body = response
|
|
.text()
|
|
.await
|
|
.map_err(|error| Error::from(format!("search response text failed: {error}")))?;
|
|
let parsed: SearchResponse = serde_json::from_str(&body)?;
|
|
Ok(parsed.data)
|
|
}
|
|
|
|
async fn get_default(
|
|
&self,
|
|
cache: VideoCache,
|
|
page: u16,
|
|
per_page: usize,
|
|
options: ServerOptions,
|
|
) -> Result<Vec<VideoItem>> {
|
|
let target = self.resolve_option_target(&options);
|
|
self.fetch_items_for_target(cache, &target, page, per_page, &options)
|
|
.await
|
|
}
|
|
|
|
async fn query(
|
|
&self,
|
|
cache: VideoCache,
|
|
page: u16,
|
|
per_page: usize,
|
|
query: &str,
|
|
options: ServerOptions,
|
|
) -> Result<Vec<VideoItem>> {
|
|
if let Some(target) = self.target_from_shortcut(query) {
|
|
return self
|
|
.fetch_items_for_target(cache, &target, page, per_page, &options)
|
|
.await;
|
|
}
|
|
|
|
let mut candidates = self.search_profiles(query, page, &options).await?;
|
|
if candidates.is_empty() {
|
|
return Ok(vec![]);
|
|
}
|
|
|
|
let normalized_query = Self::normalize_title(query);
|
|
candidates.sort_by_key(|entry| {
|
|
if Self::normalize_title(&entry.username) == normalized_query {
|
|
0
|
|
} else {
|
|
1
|
|
}
|
|
});
|
|
|
|
for candidate in candidates.into_iter().take(8) {
|
|
let username = candidate.username.trim();
|
|
if username.is_empty() {
|
|
continue;
|
|
}
|
|
let target = Target::Profile {
|
|
username: username.to_string(),
|
|
platform: candidate.platform.clone(),
|
|
gender: candidate.gender.clone(),
|
|
};
|
|
let items = self
|
|
.fetch_items_for_target(cache.clone(), &target, 1, per_page, &options)
|
|
.await?;
|
|
if !items.is_empty() {
|
|
return Ok(items);
|
|
}
|
|
}
|
|
|
|
Ok(vec![])
|
|
}
|
|
}
|
|
|
|
#[async_trait]
|
|
impl Provider for ArchivebateProvider {
|
|
async fn get_videos(
|
|
&self,
|
|
cache: VideoCache,
|
|
pool: DbPool,
|
|
_sort: String,
|
|
query: Option<String>,
|
|
page: String,
|
|
per_page: String,
|
|
options: ServerOptions,
|
|
) -> Vec<VideoItem> {
|
|
let _ = pool;
|
|
let page = page.parse::<u16>().unwrap_or(1);
|
|
let per_page = per_page.parse::<usize>().unwrap_or(36).clamp(1, 120);
|
|
|
|
let result = match query {
|
|
Some(query) if !query.trim().is_empty() => {
|
|
self.query(cache, page, per_page, &query, options.clone()).await
|
|
}
|
|
_ => self.get_default(cache, page, per_page, options.clone()).await,
|
|
};
|
|
|
|
match result {
|
|
Ok(videos) => {
|
|
if videos.is_empty() {
|
|
return videos;
|
|
}
|
|
stream::iter(videos.into_iter().map(|video| {
|
|
let provider = self.clone();
|
|
let options = options.clone();
|
|
async move {
|
|
let timeout_result = timeout(
|
|
StdDuration::from_secs(8),
|
|
provider.enrich_video(video.clone(), &options),
|
|
)
|
|
.await;
|
|
match timeout_result {
|
|
Ok(enriched) => enriched,
|
|
Err(_) => video,
|
|
}
|
|
}
|
|
}))
|
|
.buffer_unordered(4)
|
|
.collect::<Vec<_>>()
|
|
.await
|
|
}
|
|
Err(error) => {
|
|
report_provider_error(CHANNEL_ID, "get_videos", &error.to_string()).await;
|
|
vec![]
|
|
}
|
|
}
|
|
}
|
|
|
|
fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
|
|
Some(self.build_channel(clientversion))
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::ArchivebateProvider;
|
|
|
|
#[test]
|
|
fn extracts_mixdrop_wurl_from_packed_eval() {
|
|
let html = r#"
|
|
<script>
|
|
eval(function(p,a,c,k,e,d){e=function(c){return c};if(!''.replace(/^/,String)){while(c--){d[c]=k[c]||c}k=[function(e){return d[e]}];e=function(){return'\\w+'};c=1};while(c--){if(k[c]){p=p.replace(new RegExp('\\b'+e(c)+'\\b','g'),k[c])}}return p}('1.2="//3.4.5/6/7.8?9=a&b=c";',13,13,'|MDCore|wurl|o230m5y6z|mxcontent|net|v2|r6pkwozjber741|mp4|s|TvNTJe3_z_6nKveumEHk8Q|e|1776460168'.split('|'),0,{}))
|
|
</script>
|
|
"#;
|
|
let actual = ArchivebateProvider::extract_mixdrop_media_url(html)
|
|
.expect("expected mixdrop media url");
|
|
assert_eq!(
|
|
actual,
|
|
"https://o230m5y6z.mxcontent.net/v2/r6pkwozjber741.mp4?s=TvNTJe3_z_6nKveumEHk8Q&e=1776460168"
|
|
);
|
|
}
|
|
}
|