xfree and beeg bug fix
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
use crate::DbPool;
|
||||
use crate::api::ClientVersion;
|
||||
use crate::providers::{Provider, report_provider_error_background};
|
||||
use crate::providers::{Provider, report_provider_error, report_provider_error_background};
|
||||
use crate::util::cache::VideoCache;
|
||||
use crate::util::parse_abbreviated_number;
|
||||
use crate::videos::{ServerOptions, VideoItem};
|
||||
@@ -11,6 +11,7 @@ use htmlentity::entity::{ICodedDataTrait, decode};
|
||||
use serde_json::Value;
|
||||
use std::sync::{Arc, RwLock};
|
||||
use std::thread;
|
||||
use std::time::Duration;
|
||||
use std::vec;
|
||||
|
||||
error_chain! {
|
||||
@@ -73,14 +74,15 @@ impl BeegProvider {
|
||||
};
|
||||
|
||||
rt.block_on(async move {
|
||||
if let Err(e) = Self::load_sites(sites).await {
|
||||
eprintln!("beeg load_sites failed: {}", e);
|
||||
}
|
||||
if let Err(e) = Self::load_categories(categories).await {
|
||||
eprintln!("beeg load_categories failed: {}", e);
|
||||
}
|
||||
if let Err(e) = Self::load_stars(stars).await {
|
||||
eprintln!("beeg load_stars failed: {}", e);
|
||||
match Self::fetch_tags().await {
|
||||
Ok(json) => {
|
||||
Self::load_sites(&json, sites);
|
||||
Self::load_categories(&json, categories);
|
||||
Self::load_stars(&json, stars);
|
||||
}
|
||||
Err(e) => {
|
||||
report_provider_error("beeg", "init.fetch_tags", &e.to_string()).await;
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
@@ -88,24 +90,36 @@ impl BeegProvider {
|
||||
|
||||
async fn fetch_tags() -> Result<Value> {
|
||||
let mut requester = util::requester::Requester::new();
|
||||
let text = match requester
|
||||
.get(
|
||||
"https://store.externulls.com/tag/facts/tags?get_original=true&slug=index",
|
||||
None,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(text) => text,
|
||||
Err(e) => {
|
||||
eprintln!("beeg fetch_tags failed: {}", e);
|
||||
return Err(ErrorKind::Parse("failed to fetch tags".into()).into());
|
||||
let endpoints = [
|
||||
"https://store.externulls.com/tag/facts/tags?get_original=true&slug=index",
|
||||
"https://store.externulls.com/tag/facts/tags?slug=index",
|
||||
];
|
||||
let mut errors: Vec<String> = vec![];
|
||||
|
||||
for endpoint in endpoints {
|
||||
for attempt in 1..=3 {
|
||||
match requester.get(endpoint, None).await {
|
||||
Ok(text) => match serde_json::from_str::<Value>(&text) {
|
||||
Ok(json) => return Ok(json),
|
||||
Err(e) => {
|
||||
errors
|
||||
.push(format!("endpoint={endpoint}; attempt={attempt}; parse={e}"));
|
||||
}
|
||||
},
|
||||
Err(e) => {
|
||||
errors.push(format!(
|
||||
"endpoint={endpoint}; attempt={attempt}; request={e}"
|
||||
));
|
||||
}
|
||||
}
|
||||
tokio::time::sleep(Duration::from_millis(250 * attempt as u64)).await;
|
||||
}
|
||||
};
|
||||
Ok(serde_json::from_str(&text)?)
|
||||
}
|
||||
|
||||
Err(ErrorKind::Parse(format!("failed to fetch tags; {}", errors.join(" | "))).into())
|
||||
}
|
||||
|
||||
async fn load_stars(stars: Arc<RwLock<Vec<FilterOption>>>) -> Result<()> {
|
||||
let json = Self::fetch_tags().await?;
|
||||
fn load_stars(json: &Value, stars: Arc<RwLock<Vec<FilterOption>>>) {
|
||||
let arr = json
|
||||
.get("human")
|
||||
.and_then(|v| v.as_array().map(|v| v.as_slice()))
|
||||
@@ -124,11 +138,9 @@ impl BeegProvider {
|
||||
);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn load_categories(categories: Arc<RwLock<Vec<FilterOption>>>) -> Result<()> {
|
||||
let json = Self::fetch_tags().await?;
|
||||
fn load_categories(json: &Value, categories: Arc<RwLock<Vec<FilterOption>>>) {
|
||||
let arr = json
|
||||
.get("other")
|
||||
.and_then(|v| v.as_array().map(|v| v.as_slice()))
|
||||
@@ -147,11 +159,9 @@ impl BeegProvider {
|
||||
);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn load_sites(sites: Arc<RwLock<Vec<FilterOption>>>) -> Result<()> {
|
||||
let json = Self::fetch_tags().await?;
|
||||
fn load_sites(json: &Value, sites: Arc<RwLock<Vec<FilterOption>>>) {
|
||||
let arr = json
|
||||
.get("productions")
|
||||
.and_then(|v| v.as_array().map(|v| v.as_slice()))
|
||||
@@ -170,7 +180,6 @@ impl BeegProvider {
|
||||
);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn push_unique(target: &Arc<RwLock<Vec<FilterOption>>>, item: FilterOption) {
|
||||
|
||||
@@ -36,6 +36,7 @@ pub mod porn00;
|
||||
pub mod pornzog;
|
||||
pub mod sxyprn;
|
||||
pub mod tnaflix;
|
||||
pub mod xfree;
|
||||
pub mod xxthots;
|
||||
pub mod youjizz;
|
||||
// pub mod pornxp;
|
||||
@@ -143,6 +144,10 @@ pub static ALL_PROVIDERS: Lazy<HashMap<&'static str, DynProvider>> = Lazy::new(|
|
||||
"xxdbx",
|
||||
Arc::new(xxdbx::XxdbxProvider::new()) as DynProvider,
|
||||
);
|
||||
m.insert(
|
||||
"xfree",
|
||||
Arc::new(xfree::XfreeProvider::new()) as DynProvider,
|
||||
);
|
||||
m.insert(
|
||||
"hqporner",
|
||||
Arc::new(hqporner::HqpornerProvider::new()) as DynProvider,
|
||||
|
||||
751
src/providers/xfree.rs
Normal file
751
src/providers/xfree.rs
Normal file
@@ -0,0 +1,751 @@
|
||||
use crate::DbPool;
|
||||
use crate::api::ClientVersion;
|
||||
use crate::providers::{Provider, report_provider_error_background, requester_or_default};
|
||||
use crate::status::*;
|
||||
use crate::util::cache::VideoCache;
|
||||
use crate::util::discord::send_discord_error_report;
|
||||
use crate::util::parse_abbreviated_number;
|
||||
use crate::util::time::parse_time_to_seconds;
|
||||
use crate::videos::{ServerOptions, VideoFormat, VideoItem};
|
||||
use async_trait::async_trait;
|
||||
use error_chain::error_chain;
|
||||
use futures::stream::{FuturesUnordered, StreamExt};
|
||||
use htmlentity::entity::{ICodedDataTrait, decode};
|
||||
use regex::Regex;
|
||||
use std::collections::HashSet;
|
||||
use std::fmt::Write;
|
||||
use std::vec;
|
||||
use url::form_urlencoded::{Serializer, parse};
|
||||
|
||||
error_chain! {
|
||||
foreign_links {
|
||||
Io(std::io::Error);
|
||||
HttpRequest(wreq::Error);
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct XfreeProvider {
|
||||
url: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct RawListingItem {
|
||||
id: String,
|
||||
title: String,
|
||||
detail_url: String,
|
||||
thumb: String,
|
||||
duration: u32,
|
||||
views: Option<u32>,
|
||||
uploader: Option<String>,
|
||||
tags: Vec<String>,
|
||||
}
|
||||
|
||||
impl XfreeProvider {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
url: "https://www.xfree.com".to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
|
||||
Channel {
|
||||
id: "xfree".to_string(),
|
||||
name: "XFree".to_string(),
|
||||
description: "Short NSFW clips from xfree.com".to_string(),
|
||||
premium: false,
|
||||
favicon: "https://www.google.com/s2/favicons?sz=64&domain=xfree.com".to_string(),
|
||||
status: "active".to_string(),
|
||||
categories: vec![
|
||||
"all".to_string(),
|
||||
"straight".to_string(),
|
||||
"gay".to_string(),
|
||||
"trans".to_string(),
|
||||
],
|
||||
options: vec![
|
||||
ChannelOption {
|
||||
id: "sort".to_string(),
|
||||
title: "Sort".to_string(),
|
||||
description: "Sort listing preference".to_string(),
|
||||
systemImage: "list.number".to_string(),
|
||||
colorName: "blue".to_string(),
|
||||
options: vec![
|
||||
FilterOption {
|
||||
id: "trending".to_string(),
|
||||
title: "Trending".to_string(),
|
||||
},
|
||||
FilterOption {
|
||||
id: "latest".to_string(),
|
||||
title: "Latest".to_string(),
|
||||
},
|
||||
],
|
||||
multiSelect: false,
|
||||
},
|
||||
ChannelOption {
|
||||
id: "category".to_string(),
|
||||
title: "Category".to_string(),
|
||||
description: "Audience/category feed".to_string(),
|
||||
systemImage: "line.horizontal.3.decrease.circle".to_string(),
|
||||
colorName: "green".to_string(),
|
||||
options: vec![
|
||||
FilterOption {
|
||||
id: "all".to_string(),
|
||||
title: "All".to_string(),
|
||||
},
|
||||
FilterOption {
|
||||
id: "straight".to_string(),
|
||||
title: "Straight".to_string(),
|
||||
},
|
||||
FilterOption {
|
||||
id: "gay".to_string(),
|
||||
title: "Gay".to_string(),
|
||||
},
|
||||
FilterOption {
|
||||
id: "trans".to_string(),
|
||||
title: "Trans".to_string(),
|
||||
},
|
||||
],
|
||||
multiSelect: false,
|
||||
},
|
||||
],
|
||||
nsfw: true,
|
||||
cacheDuration: Some(300),
|
||||
}
|
||||
}
|
||||
|
||||
fn normalize_ws(input: &str) -> String {
|
||||
input.split_whitespace().collect::<Vec<_>>().join(" ")
|
||||
}
|
||||
|
||||
fn decode_html(input: &str) -> String {
|
||||
decode(input.as_bytes())
|
||||
.to_string()
|
||||
.unwrap_or_else(|_| input.to_string())
|
||||
}
|
||||
|
||||
fn clean_media_url(raw: &str) -> String {
|
||||
let mut out = raw
|
||||
.trim_matches(|c: char| c == '"' || c == '\'' || c == '\\' || c.is_whitespace())
|
||||
.to_string();
|
||||
out = out
|
||||
.replace("\\u0026", "&")
|
||||
.replace("\\u002F", "/")
|
||||
.replace("\\/", "/")
|
||||
.replace("&", "&");
|
||||
out = out
|
||||
.trim_end_matches(|c: char| matches!(c, ',' | ';' | ')' | ']' | '}'))
|
||||
.to_string();
|
||||
if out.starts_with("//") {
|
||||
return format!("https:{out}");
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
fn is_downloadable_media_url(url: &str) -> bool {
|
||||
let lower = url.to_ascii_lowercase();
|
||||
(lower.starts_with("http://") || lower.starts_with("https://"))
|
||||
&& (lower.contains(".mp4") || lower.contains(".m3u8"))
|
||||
}
|
||||
|
||||
fn absolute_url(&self, path: &str) -> String {
|
||||
if path.starts_with("http://") || path.starts_with("https://") {
|
||||
return path.to_string();
|
||||
}
|
||||
if path.starts_with("//") {
|
||||
return format!("https:{path}");
|
||||
}
|
||||
if path.starts_with('/') {
|
||||
return format!("{}{}", self.url, path);
|
||||
}
|
||||
format!("{}/{}", self.url, path.trim_start_matches('/'))
|
||||
}
|
||||
|
||||
fn encode_query_value(value: &str) -> String {
|
||||
let mut serializer = Serializer::new(String::new());
|
||||
serializer.append_pair("q", value);
|
||||
let encoded = serializer.finish();
|
||||
encoded.strip_prefix("q=").unwrap_or(&encoded).to_string()
|
||||
}
|
||||
|
||||
fn category_value(options: &ServerOptions) -> String {
|
||||
options
|
||||
.category
|
||||
.clone()
|
||||
.unwrap_or_else(|| "all".to_string())
|
||||
.to_ascii_lowercase()
|
||||
}
|
||||
|
||||
fn sort_value(options: &ServerOptions) -> String {
|
||||
options
|
||||
.sort
|
||||
.clone()
|
||||
.unwrap_or_else(|| "trending".to_string())
|
||||
.to_ascii_lowercase()
|
||||
}
|
||||
|
||||
fn category_suffix(category: &str) -> Option<&'static str> {
|
||||
match category {
|
||||
"gay" => Some("gay"),
|
||||
"trans" => Some("trans"),
|
||||
"straight" => Some("straight"),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn with_page(mut url: String, page: u8) -> String {
|
||||
if page <= 1 {
|
||||
return url;
|
||||
}
|
||||
if url.contains('?') {
|
||||
url.push_str(&format!("&page={page}"));
|
||||
} else {
|
||||
url.push_str(&format!("?page={page}"));
|
||||
}
|
||||
url
|
||||
}
|
||||
|
||||
fn build_listing_urls(&self, page: u8, query: &str, options: &ServerOptions) -> Vec<String> {
|
||||
let category = Self::category_value(options);
|
||||
let sort = Self::sort_value(options);
|
||||
let encoded_query = Self::encode_query_value(query.trim());
|
||||
let category_suffix = Self::category_suffix(&category);
|
||||
let mut urls = Vec::new();
|
||||
|
||||
if !query.trim().is_empty() {
|
||||
if let Some(suffix) = category_suffix {
|
||||
urls.push(Self::with_page(
|
||||
format!("{}/search-{suffix}?q={encoded_query}", self.url),
|
||||
page,
|
||||
));
|
||||
}
|
||||
urls.push(Self::with_page(
|
||||
format!("{}/search?q={encoded_query}", self.url),
|
||||
page,
|
||||
));
|
||||
return urls;
|
||||
}
|
||||
|
||||
let base_category_url = match category_suffix {
|
||||
Some(suffix) => format!("{}/{}", self.url, suffix),
|
||||
None => self.url.clone(),
|
||||
};
|
||||
|
||||
if sort == "latest" {
|
||||
urls.push(Self::with_page(
|
||||
format!("{}/latest", base_category_url),
|
||||
page,
|
||||
));
|
||||
urls.push(Self::with_page(
|
||||
format!("{base_category_url}?sort=latest"),
|
||||
page,
|
||||
));
|
||||
}
|
||||
urls.push(Self::with_page(base_category_url, page));
|
||||
|
||||
urls
|
||||
}
|
||||
|
||||
fn extract_href_param(href: &str, key: &str) -> Option<String> {
|
||||
let query = href.split('?').nth(1)?;
|
||||
for (k, v) in parse(query.as_bytes()) {
|
||||
if k == key {
|
||||
return Some(v.into_owned());
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn strip_html_tags(text: &str) -> String {
|
||||
let Ok(tags_re) = Regex::new(r"(?is)<[^>]+>") else {
|
||||
return text.to_string();
|
||||
};
|
||||
tags_re.replace_all(text, " ").to_string()
|
||||
}
|
||||
|
||||
fn extract_duration_seconds(text: &str) -> Option<u32> {
|
||||
let Ok(duration_re) = Regex::new(r"\b(\d{1,2}:\d{2}(?::\d{2})?)\b") else {
|
||||
return None;
|
||||
};
|
||||
if let Some(caps) = duration_re.captures(text) {
|
||||
if let Some(raw) = caps.get(1) {
|
||||
return parse_time_to_seconds(raw.as_str()).map(|v| v as u32);
|
||||
}
|
||||
}
|
||||
|
||||
let Ok(iso_re) = Regex::new(r"PT(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?") else {
|
||||
return None;
|
||||
};
|
||||
let caps = iso_re.captures(text)?;
|
||||
let h = caps
|
||||
.get(1)
|
||||
.and_then(|m| m.as_str().parse::<u32>().ok())
|
||||
.unwrap_or(0);
|
||||
let m = caps
|
||||
.get(2)
|
||||
.and_then(|m| m.as_str().parse::<u32>().ok())
|
||||
.unwrap_or(0);
|
||||
let s = caps
|
||||
.get(3)
|
||||
.and_then(|m| m.as_str().parse::<u32>().ok())
|
||||
.unwrap_or(0);
|
||||
let total = h.saturating_mul(3600) + m.saturating_mul(60) + s;
|
||||
if total > 0 { Some(total) } else { None }
|
||||
}
|
||||
|
||||
fn extract_views(text: &str) -> Option<u32> {
|
||||
let Ok(views_re) = Regex::new(r"(?i)\b([0-9]+(?:\.[0-9]+)?\s*[kmb]?)\s*(?:views?|view)\b")
|
||||
else {
|
||||
return None;
|
||||
};
|
||||
let raw = views_re
|
||||
.captures(text)
|
||||
.and_then(|c| c.get(1))
|
||||
.map(|m| m.as_str().to_string())?;
|
||||
parse_abbreviated_number(&raw)
|
||||
}
|
||||
|
||||
fn extract_tags(text: &str) -> Vec<String> {
|
||||
let Ok(tag_re) = Regex::new(r"#([A-Za-z0-9_]+)") else {
|
||||
return vec![];
|
||||
};
|
||||
let mut seen = HashSet::new();
|
||||
let mut tags = vec![];
|
||||
for caps in tag_re.captures_iter(text) {
|
||||
let Some(raw) = caps.get(1).map(|m| m.as_str()) else {
|
||||
continue;
|
||||
};
|
||||
let tag = raw.to_ascii_lowercase();
|
||||
if seen.insert(tag.clone()) {
|
||||
tags.push(tag);
|
||||
}
|
||||
}
|
||||
tags
|
||||
}
|
||||
|
||||
fn extract_thumb_from_segment(&self, segment: &str) -> String {
|
||||
let Ok(thumb_re) = Regex::new(
|
||||
r#"(?is)(https?://[^"' <]*(?:thumbs|peek|prbn)\.xfree\.com[^"' <]*\.(?:jpg|jpeg|png|webp))"#,
|
||||
) else {
|
||||
return String::new();
|
||||
};
|
||||
if let Some(m) = thumb_re.captures(segment).and_then(|c| c.get(1)) {
|
||||
return m.as_str().to_string();
|
||||
}
|
||||
|
||||
let Ok(img_attr_re) = Regex::new(r#"(?is)(?:src|data-src|data-original)="([^"]+)""#) else {
|
||||
return String::new();
|
||||
};
|
||||
if let Some(m) = img_attr_re.captures(segment).and_then(|c| c.get(1)) {
|
||||
return self.absolute_url(m.as_str());
|
||||
}
|
||||
|
||||
String::new()
|
||||
}
|
||||
|
||||
fn extract_quality_from_url(url: &str) -> String {
|
||||
let Ok(q_re) = Regex::new(r"(?i)(\d{3,4})p") else {
|
||||
return "1080".to_string();
|
||||
};
|
||||
if let Some(q) = q_re.captures(url).and_then(|c| c.get(1)) {
|
||||
return q.as_str().to_string();
|
||||
}
|
||||
if url.to_ascii_lowercase().contains(".m3u8") {
|
||||
return "hls".to_string();
|
||||
}
|
||||
"1080".to_string()
|
||||
}
|
||||
|
||||
fn parse_listing_items(&self, html: &str) -> Vec<RawListingItem> {
|
||||
if html.trim().is_empty() {
|
||||
return vec![];
|
||||
}
|
||||
let Ok(link_re) = Regex::new(
|
||||
r#"(?is)<a[^>]+href="(?P<href>/(?:video(?:-[a-z]+)?\?id=\d+[^"]*))"[^>]*>(?P<body>.*?)</a>"#,
|
||||
) else {
|
||||
return vec![];
|
||||
};
|
||||
let Ok(title_attr_re) = Regex::new(r#"(?is)\btitle="([^"]+)""#) else {
|
||||
return vec![];
|
||||
};
|
||||
let Ok(uploader_re) =
|
||||
Regex::new(r#"(?is)href="/(?:u|user|profile)/[^"]+"[^>]*>\s*([^<]{2,64})\s*<"#)
|
||||
else {
|
||||
return vec![];
|
||||
};
|
||||
|
||||
let mut items = vec![];
|
||||
let mut seen_ids = HashSet::new();
|
||||
|
||||
for caps in link_re.captures_iter(html) {
|
||||
let Some(full) = caps.get(0) else {
|
||||
continue;
|
||||
};
|
||||
let href = caps.name("href").map(|m| m.as_str()).unwrap_or("");
|
||||
let body = caps.name("body").map(|m| m.as_str()).unwrap_or("");
|
||||
let Some(id) = Self::extract_href_param(href, "id") else {
|
||||
continue;
|
||||
};
|
||||
if !seen_ids.insert(id.clone()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let seg_start = full.start().saturating_sub(400);
|
||||
let seg_end = (full.end() + 1600).min(html.len());
|
||||
let segment = html.get(seg_start..seg_end).unwrap_or(full.as_str());
|
||||
|
||||
let title_from_attr = title_attr_re
|
||||
.captures(full.as_str())
|
||||
.and_then(|c| c.get(1))
|
||||
.map(|m| m.as_str().to_string())
|
||||
.unwrap_or_default();
|
||||
let title_from_body = Self::strip_html_tags(body);
|
||||
let title_from_href = Self::extract_href_param(href, "title")
|
||||
.map(|s| s.replace('-', " "))
|
||||
.unwrap_or_default();
|
||||
let title = Self::normalize_ws(&Self::decode_html(if !title_from_attr.is_empty() {
|
||||
&title_from_attr
|
||||
} else if !title_from_body.trim().is_empty() {
|
||||
&title_from_body
|
||||
} else {
|
||||
&title_from_href
|
||||
}));
|
||||
if title.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let thumb = self.extract_thumb_from_segment(segment);
|
||||
let duration = Self::extract_duration_seconds(segment).unwrap_or(0);
|
||||
let views = Self::extract_views(segment);
|
||||
let uploader = uploader_re
|
||||
.captures(segment)
|
||||
.and_then(|c| c.get(1))
|
||||
.map(|m| Self::normalize_ws(m.as_str()))
|
||||
.filter(|s| !s.is_empty());
|
||||
let tags = Self::extract_tags(segment);
|
||||
|
||||
items.push(RawListingItem {
|
||||
id,
|
||||
title,
|
||||
detail_url: self.absolute_url(href),
|
||||
thumb,
|
||||
duration,
|
||||
views,
|
||||
uploader,
|
||||
tags,
|
||||
});
|
||||
}
|
||||
|
||||
items
|
||||
}
|
||||
|
||||
fn extract_media_urls(&self, html: &str) -> Vec<String> {
|
||||
let mut urls = vec![];
|
||||
let mut seen = HashSet::new();
|
||||
|
||||
let patterns = [
|
||||
r#"https?:\\?/\\?/[^"' <>\s]+?\.(?:mp4|m3u8)[^"' <>\s]*"#,
|
||||
r#"https?://[^"' <>\s]+?\.(?:mp4|m3u8)[^"' <>\s]*"#,
|
||||
];
|
||||
|
||||
for pattern in patterns {
|
||||
let Ok(re) = Regex::new(pattern) else {
|
||||
continue;
|
||||
};
|
||||
for m in re.find_iter(html) {
|
||||
let cleaned = Self::clean_media_url(m.as_str());
|
||||
if !Self::is_downloadable_media_url(&cleaned) {
|
||||
continue;
|
||||
}
|
||||
if seen.insert(cleaned.clone()) {
|
||||
urls.push(cleaned);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
urls
|
||||
}
|
||||
|
||||
fn extract_detail_tags(html: &str) -> Vec<String> {
|
||||
let Ok(tag_link_re) = Regex::new(r#"(?is)href="/tag(?:-[a-z]+)?/([^"?#]+)"#) else {
|
||||
return vec![];
|
||||
};
|
||||
let mut seen = HashSet::new();
|
||||
let mut tags = vec![];
|
||||
for caps in tag_link_re.captures_iter(html) {
|
||||
let Some(raw) = caps.get(1).map(|m| m.as_str()) else {
|
||||
continue;
|
||||
};
|
||||
let tag = raw
|
||||
.replace('-', " ")
|
||||
.replace("%20", " ")
|
||||
.trim()
|
||||
.to_ascii_lowercase();
|
||||
if tag.is_empty() {
|
||||
continue;
|
||||
}
|
||||
if seen.insert(tag.clone()) {
|
||||
tags.push(tag);
|
||||
}
|
||||
}
|
||||
tags
|
||||
}
|
||||
|
||||
fn extract_detail_thumb(&self, html: &str) -> String {
|
||||
self.extract_thumb_from_segment(html)
|
||||
}
|
||||
|
||||
async fn fetch_detailed_video_item(
|
||||
&self,
|
||||
raw: RawListingItem,
|
||||
mut requester: crate::util::requester::Requester,
|
||||
) -> Option<VideoItem> {
|
||||
let detail_html = match requester.get(&raw.detail_url, None).await {
|
||||
Ok(t) => t,
|
||||
Err(e) => {
|
||||
report_provider_error_background(
|
||||
"xfree",
|
||||
"detail.request",
|
||||
&format!("url={}; error={e}", raw.detail_url),
|
||||
);
|
||||
return None;
|
||||
}
|
||||
};
|
||||
|
||||
let media_urls = self.extract_media_urls(&detail_html);
|
||||
if media_urls.is_empty() {
|
||||
report_provider_error_background(
|
||||
"xfree",
|
||||
"detail.media",
|
||||
&format!("no_media_url_found; url={}", raw.detail_url),
|
||||
);
|
||||
return None;
|
||||
}
|
||||
|
||||
let thumb = if raw.thumb.is_empty() {
|
||||
self.extract_detail_thumb(&detail_html)
|
||||
} else {
|
||||
raw.thumb.clone()
|
||||
};
|
||||
|
||||
let duration = if raw.duration > 0 {
|
||||
raw.duration
|
||||
} else {
|
||||
Self::extract_duration_seconds(&detail_html).unwrap_or(0)
|
||||
};
|
||||
|
||||
let mut tags = raw.tags.clone();
|
||||
for tag in Self::extract_detail_tags(&detail_html) {
|
||||
if !tags.iter().any(|t| t == &tag) {
|
||||
tags.push(tag);
|
||||
}
|
||||
}
|
||||
|
||||
let mut formats = vec![];
|
||||
for media_url in media_urls.iter() {
|
||||
let format_kind = if media_url.to_ascii_lowercase().contains(".m3u8") {
|
||||
"m3u8".to_string()
|
||||
} else {
|
||||
"mp4".to_string()
|
||||
};
|
||||
let quality = Self::extract_quality_from_url(media_url);
|
||||
formats.push(VideoFormat::new(media_url.clone(), quality, format_kind));
|
||||
}
|
||||
|
||||
let selected_url = media_urls
|
||||
.iter()
|
||||
.find(|u| u.to_ascii_lowercase().contains(".mp4"))
|
||||
.cloned()
|
||||
.unwrap_or_else(|| media_urls.first().cloned().unwrap_or_default());
|
||||
if selected_url.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut item = VideoItem::new(
|
||||
raw.id,
|
||||
raw.title,
|
||||
selected_url,
|
||||
"xfree".to_string(),
|
||||
thumb,
|
||||
duration,
|
||||
)
|
||||
.formats(formats)
|
||||
.preview(
|
||||
media_urls
|
||||
.first()
|
||||
.cloned()
|
||||
.unwrap_or_else(|| raw.detail_url.clone()),
|
||||
);
|
||||
|
||||
if let Some(views) = raw.views {
|
||||
item = item.views(views);
|
||||
}
|
||||
if let Some(uploader) = raw.uploader {
|
||||
item = item.uploader(uploader);
|
||||
}
|
||||
if !tags.is_empty() {
|
||||
item = item.tags(tags);
|
||||
}
|
||||
|
||||
Some(item)
|
||||
}
|
||||
|
||||
async fn parse_video_items_from_html(
|
||||
&self,
|
||||
html: String,
|
||||
requester: crate::util::requester::Requester,
|
||||
) -> Vec<VideoItem> {
|
||||
let listing_items = self.parse_listing_items(&html);
|
||||
if listing_items.is_empty() {
|
||||
return vec![];
|
||||
}
|
||||
|
||||
let mut in_flight = FuturesUnordered::new();
|
||||
let mut items = vec![];
|
||||
let mut iter = listing_items.into_iter();
|
||||
const MAX_IN_FLIGHT: usize = 5;
|
||||
|
||||
loop {
|
||||
while in_flight.len() < MAX_IN_FLIGHT {
|
||||
let Some(raw) = iter.next() else {
|
||||
break;
|
||||
};
|
||||
in_flight.push(self.fetch_detailed_video_item(raw, requester.clone()));
|
||||
}
|
||||
|
||||
let Some(result) = in_flight.next().await else {
|
||||
break;
|
||||
};
|
||||
if let Some(item) = result {
|
||||
items.push(item);
|
||||
}
|
||||
}
|
||||
|
||||
items
|
||||
}
|
||||
|
||||
async fn fetch(
|
||||
&self,
|
||||
cache: VideoCache,
|
||||
page: u8,
|
||||
query: &str,
|
||||
options: ServerOptions,
|
||||
) -> Result<Vec<VideoItem>> {
|
||||
let urls = self.build_listing_urls(page, query, &options);
|
||||
let mut requester = requester_or_default(&options, "xfree", "fetch");
|
||||
let mut stale_items = vec![];
|
||||
|
||||
for url in urls {
|
||||
if let Some((time, items)) = cache.get(&url) {
|
||||
if time.elapsed().unwrap_or_default().as_secs() < 300 {
|
||||
return Ok(items.clone());
|
||||
}
|
||||
if stale_items.is_empty() && !items.is_empty() {
|
||||
stale_items = items.clone();
|
||||
}
|
||||
}
|
||||
|
||||
let html = match requester.get(&url, None).await {
|
||||
Ok(text) => text,
|
||||
Err(e) => {
|
||||
report_provider_error_background(
|
||||
"xfree",
|
||||
"listing.request",
|
||||
&format!("url={url}; error={e}"),
|
||||
);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
let items = self
|
||||
.parse_video_items_from_html(html, requester.clone())
|
||||
.await;
|
||||
if !items.is_empty() {
|
||||
cache.remove(&url);
|
||||
cache.insert(url, items.clone());
|
||||
return Ok(items);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(stale_items)
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Provider for XfreeProvider {
|
||||
async fn get_videos(
|
||||
&self,
|
||||
cache: VideoCache,
|
||||
_pool: DbPool,
|
||||
_sort: String,
|
||||
query: Option<String>,
|
||||
page: String,
|
||||
_per_page: String,
|
||||
options: ServerOptions,
|
||||
) -> Vec<VideoItem> {
|
||||
let page = page.parse::<u8>().unwrap_or(1);
|
||||
let query = query.unwrap_or_default();
|
||||
|
||||
match self.fetch(cache, page, &query, options).await {
|
||||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
let mut chain_str = String::new();
|
||||
for (i, cause) in e.iter().enumerate() {
|
||||
let _ = writeln!(chain_str, "{}. {}", i + 1, cause);
|
||||
}
|
||||
send_discord_error_report(
|
||||
e.to_string(),
|
||||
Some(chain_str),
|
||||
Some("Xfree Provider"),
|
||||
Some("Failed to fetch videos"),
|
||||
file!(),
|
||||
line!(),
|
||||
module_path!(),
|
||||
)
|
||||
.await;
|
||||
vec![]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
|
||||
Some(self.build_channel(clientversion))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::XfreeProvider;
|
||||
|
||||
#[test]
|
||||
fn parses_listing_items_from_html() {
|
||||
let provider = XfreeProvider::new();
|
||||
let html = r#"
|
||||
<a href="/video?id=12345&title=bbc-anal-test" title="BBC Anal Test">
|
||||
<img src="https://thumbs.xfree.com/ab/cd/test.jpg" />
|
||||
<span>1:23</span>
|
||||
<span>12.5K views</span>
|
||||
</a>
|
||||
"#;
|
||||
let items = provider.parse_listing_items(html);
|
||||
assert_eq!(items.len(), 1);
|
||||
assert_eq!(items[0].id, "12345");
|
||||
assert_eq!(items[0].title, "BBC Anal Test");
|
||||
assert_eq!(items[0].duration, 83);
|
||||
assert_eq!(items[0].views, Some(12_500));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extracts_media_urls_from_escaped_html() {
|
||||
let provider = XfreeProvider::new();
|
||||
let html = r#"
|
||||
<script>
|
||||
const a = "https:\/\/cdn.xfree.com\/v\/clip_720p.mp4?token=1\u0026x=2";
|
||||
const b = "https://cdn.xfree.com/hls/master.m3u8";
|
||||
</script>
|
||||
"#;
|
||||
let urls = provider.extract_media_urls(html);
|
||||
assert_eq!(urls.len(), 2);
|
||||
assert!(urls.iter().any(|u| u.contains("clip_720p.mp4")));
|
||||
assert!(urls.iter().any(|u| u.contains("master.m3u8")));
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user