archivebate fix

This commit is contained in:
Simon
2026-04-22 10:18:15 +00:00
parent 47631b8a70
commit a47a69962f
4 changed files with 474 additions and 37 deletions

View File

@@ -1,7 +1,8 @@
use crate::DbPool;
use crate::api::ClientVersion;
use crate::providers::{
Provider, report_provider_error, report_provider_error_background, requester_or_default,
Provider, build_proxy_url, report_provider_error, report_provider_error_background,
requester_or_default, strip_url_scheme,
};
use crate::status::*;
use crate::util::cache::VideoCache;
@@ -18,7 +19,7 @@ use regex::Regex;
use scraper::{Html, Selector};
use serde::Deserialize;
use serde_json::Value;
use std::collections::HashSet;
use std::collections::{HashMap, HashSet};
use std::sync::{Arc, RwLock};
use std::thread;
use std::time::Duration as StdDuration;
@@ -119,6 +120,12 @@ struct LivewireInitialData {
server_memo_json: String,
}
#[derive(Debug, Clone)]
struct ResolvedMixdropMedia {
media_url: String,
embed_url: String,
}
impl ArchivebateProvider {
pub fn new() -> Self {
let provider = Self {
@@ -526,6 +533,28 @@ impl ArchivebateProvider {
.and_then(|captures| captures.name("id").map(|value| value.as_str().to_string()))
}
fn is_allowed_detail_watch_url(url: &str) -> bool {
let Some(parsed) = url::Url::parse(url).ok() else {
return false;
};
if parsed.scheme() != "https" {
return false;
}
let Some(host) = parsed.host_str() else {
return false;
};
(host == "archivebate.com" || host == "www.archivebate.com")
&& parsed.path().starts_with("/watch/")
}
fn proxied_video(options: &ServerOptions, detail_url: &str) -> String {
if detail_url.is_empty() || !Self::is_allowed_detail_watch_url(detail_url) {
return String::new();
}
build_proxy_url(options, CHANNEL_ID, &strip_url_scheme(detail_url))
}
fn parse_duration(text: &str) -> u32 {
let Ok(regex) = Self::regex(r"([0-9]{1,2}:[0-9]{2}(?::[0-9]{2})?)") else {
return 0;
@@ -827,6 +856,81 @@ impl ArchivebateProvider {
host.contains("mixdrop") || host.contains("m1xdrop")
}
fn download_fid_from_detail_html(html: &str) -> Option<String> {
let document = Html::parse_document(html);
let selector = Selector::parse("input[name='fid'][value]").ok()?;
document
.select(&selector)
.next()
.and_then(|node| node.value().attr("value"))
.map(str::trim)
.filter(|value| !value.is_empty())
.map(ToOwned::to_owned)
}
fn mixdrop_embed_url_from_download_url(url: &str) -> Option<String> {
let parsed = url::Url::parse(url).ok()?;
let host = parsed.host_str()?;
let host_lc = host.to_ascii_lowercase();
if !host_lc.contains("mixdrop") && !host_lc.contains("m1xdrop") {
return None;
}
let mut segments = parsed.path_segments()?.filter(|segment| !segment.is_empty());
let kind = segments.next()?.to_ascii_lowercase();
if kind != "e" && kind != "f" {
return None;
}
let media_id = segments.next()?.trim();
if media_id.is_empty() {
return None;
}
Some(format!("{}://{host}/e/{media_id}", parsed.scheme()))
}
fn video_format_with_headers(
format: VideoFormat,
headers: Vec<(String, String)>,
) -> VideoFormat {
if headers.is_empty() {
return format;
}
let header_map: HashMap<String, String> = headers
.into_iter()
.filter_map(|(key, value)| {
let key = key.trim().to_string();
let value = value.trim().to_string();
if key.is_empty() || value.is_empty() {
return None;
}
Some((key, value))
})
.collect();
if header_map.is_empty() {
return format;
}
let mut value = match serde_json::to_value(&format) {
Ok(value) => value,
Err(_) => return format,
};
if let Value::Object(object) = &mut value {
let Ok(headers_value) = serde_json::to_value(header_map) else {
return format;
};
object.insert("http_headers".to_string(), headers_value);
if let Ok(updated) = serde_json::from_value::<VideoFormat>(value) {
return updated;
}
}
format
}
fn first_video_source_from_html(html: &str) -> Option<String> {
let document = Html::parse_document(html);
let source_selector = Selector::parse("video source[src]").ok()?;
@@ -861,23 +965,37 @@ impl ArchivebateProvider {
iframe_url: &str,
referer: &str,
options: &ServerOptions,
) -> Option<String> {
) -> Option<ResolvedMixdropMedia> {
let mut requester = requester_or_default(options, CHANNEL_ID, "resolve_mixdrop_media");
let iframe_html = requester
.get_with_headers(
let response = requester
.get_raw_with_headers_timeout(
iframe_url,
self.html_headers(referer),
Some(wreq::Version::HTTP_11),
Some(StdDuration::from_secs(6)),
)
.await
.ok()?;
Self::extract_mixdrop_media_url(&iframe_html)
if !response.status().is_success() {
return None;
}
let iframe_html = response.text().await.ok()?;
let media_url = Self::extract_mixdrop_media_url(&iframe_html)?;
Some(ResolvedMixdropMedia {
media_url,
embed_url: iframe_url.to_string(),
})
}
async fn enrich_video(&self, item: VideoItem, options: &ServerOptions) -> VideoItem {
let page_url = item.url.clone();
let format_url = Self::proxied_video(options, &page_url);
if format_url.is_empty() {
return item;
}
let mut format = VideoFormat::new(format_url, "source".to_string(), "mp4".to_string());
let mut requester = requester_or_default(options, CHANNEL_ID, "archivebate.enrich_video");
let detail_html = match requester
if let Ok(detail_html) = requester
.get_with_headers(
&page_url,
self.html_headers(&format!("{}/", self.url)),
@@ -885,40 +1003,27 @@ impl ArchivebateProvider {
)
.await
{
Ok(value) => value,
Err(error) => {
report_provider_error_background(
CHANNEL_ID,
"enrich_video.fetch_detail",
&format!("url={page_url}; error={error}"),
);
return item;
let mut mixdrop_embed_url = Self::first_iframe_source_from_html(&detail_html)
.map(|value| self.absolute_url(&value))
.filter(|value| Self::is_mixdrop_host(value));
if mixdrop_embed_url.is_none() {
mixdrop_embed_url = Self::download_fid_from_detail_html(&detail_html)
.map(|value| self.absolute_url(&value))
.and_then(|value| Self::mixdrop_embed_url_from_download_url(&value));
}
};
let mut media_url = Self::first_video_source_from_html(&detail_html)
.map(|value| self.absolute_url(&value));
if media_url.is_none() {
let iframe_url = Self::first_iframe_source_from_html(&detail_html)
.map(|value| self.absolute_url(&value));
if let Some(iframe_url) = iframe_url {
if Self::is_mixdrop_host(&iframe_url) {
if let Some(resolved) = self
.resolve_mixdrop_media_from_iframe(&iframe_url, &page_url, options)
.await
{
media_url = Some(resolved);
}
}
if let Some(embed_url) = mixdrop_embed_url {
format = Self::video_format_with_headers(
format,
vec![
("Referer".to_string(), embed_url),
("User-Agent".to_string(), FIREFOX_UA.to_string()),
],
);
}
}
let Some(media_url) = media_url else {
return item;
};
let format = VideoFormat::new(media_url, "source".to_string(), "mp4".to_string());
let mut enriched = item;
enriched.formats = Some(vec![format]);
enriched