diff --git a/src/providers/javtiful.rs b/src/providers/javtiful.rs index bfde36f..c650c5d 100644 --- a/src/providers/javtiful.rs +++ b/src/providers/javtiful.rs @@ -362,13 +362,18 @@ impl JavtifulProvider { .unwrap_or("") .to_string(); let duration = parse_time_to_seconds(&raw_duration).unwrap_or(0) as u32; - let (tags, formats, views) = self + let (tags, mut formats, views) = self .extract_media(&video_url, &mut requester, options) .await?; if preview.len() == 0 { preview = format!("https://trailers.jav.si/preview/{id}.mp4"); } + if formats.is_empty() && !preview.is_empty() { + let mut format = VideoFormat::new(preview.clone(), "preview".to_string(), "video/mp4".to_string()); + format.add_http_header("Referer".to_string(), video_url.clone()); + formats.push(format); + } let video_item = VideoItem::new(id, title, video_url, "javtiful".into(), thumb, duration) .formats(formats) .tags(tags) @@ -428,23 +433,55 @@ impl JavtifulProvider { .unwrap_or(0); let quality = "1080p".to_string(); - let stripped_url = crate::providers::strip_url_scheme(url); - let proxy_target = stripped_url - .strip_prefix("www.javtiful.com/") - .or_else(|| stripped_url.strip_prefix("javtiful.com/")) - .unwrap_or(stripped_url.as_str()) - .trim_start_matches('/') - .to_string(); - let video_url = crate::providers::build_proxy_url( - options, - "javtiful", - &proxy_target, - ); - Ok(( - tags, - vec![VideoFormat::new(video_url, quality, "video/mp4".into())], - views, - )) + let mut formats = Vec::new(); + let video_id = url + .split("/video/") + .nth(1) + .and_then(|value| value.split('/').next()) + .unwrap_or("") + .trim(); + let token = text + .split("data-csrf-token=\"") + .nth(1) + .and_then(|value| value.split('"').next()) + .unwrap_or("") + .trim(); + + if !video_id.is_empty() && !token.is_empty() { + let form = wreq::multipart::Form::new() + .text("video_id", video_id.to_string()) + .text("pid_c", "".to_string()) + .text("token", token.to_string()); + + if let Ok(response) = requester + .post_multipart( + "https://javtiful.com/ajax/get_cdn", + form, + vec![("Referer".to_string(), url.to_string())], + Some(Version::HTTP_11), + ) + .await + { + let payload = response.text().await.unwrap_or_default(); + if let Ok(json) = serde_json::from_str::(&payload) { + if let Some(cdn_url) = json.get("playlists").and_then(|value| value.as_str()) { + if !cdn_url.trim().is_empty() { + let mut format = VideoFormat::new( + cdn_url.to_string(), + quality.clone(), + "m3u8".into(), + ); + format.add_http_header("Referer".to_string(), url.to_string()); + formats.push(format); + } + } + } + } + } + + let _ = options; + + Ok((tags, formats, views)) } } diff --git a/src/providers/mod.rs b/src/providers/mod.rs index 8c9ccc2..a9d8991 100644 --- a/src/providers/mod.rs +++ b/src/providers/mod.rs @@ -36,10 +36,11 @@ pub static ALL_PROVIDERS: Lazy> = Lazy::new(| const CHANNEL_STATUS_ERROR: &str = "error"; const VALIDATION_RESULTS_REQUIRED: usize = 5; -const VALIDATION_MIN_SUCCESS: usize = 3; +const VALIDATION_MIN_SUCCESS: usize = 1; const VALIDATION_COOLDOWN: Duration = Duration::from_secs(3600); const VALIDATION_MEDIA_TIMEOUT: Duration = Duration::from_secs(100); -const VALIDATION_ERROR_RETEST_INTERVAL: Duration = Duration::from_secs(5 * 60); +const VALIDATION_ERROR_RETEST_INTERVAL: Duration = VALIDATION_COOLDOWN; +const VALIDATION_FAILURES_FOR_ERROR: u8 = 5; #[derive(Clone)] struct ProviderValidationContext { @@ -48,10 +49,18 @@ struct ProviderValidationContext { requester: Requester, } +#[derive(Clone, Copy)] +struct ValidationFailureState { + consecutive_failures: u8, + last_counted_at: Instant, +} + static PROVIDER_VALIDATION_CONTEXT: OnceLock = OnceLock::new(); static PROVIDER_RUNTIME_STATUS: Lazy> = Lazy::new(DashMap::new); static PROVIDER_VALIDATION_INFLIGHT: Lazy> = Lazy::new(DashSet::new); static PROVIDER_VALIDATION_LAST_RUN: Lazy> = Lazy::new(DashMap::new); +static PROVIDER_VALIDATION_FAILURE_STATE: Lazy> = + Lazy::new(DashMap::new); static PROVIDER_ERROR_REVALIDATION_STARTED: OnceLock<()> = OnceLock::new(); fn validation_client_version() -> ClientVersion { @@ -107,12 +116,27 @@ fn validation_request_for_channel(channel: &Channel) -> VideosRequest { } } -fn media_target(item: &VideoItem) -> (String, Vec<(String, String)>) { - if let Some(format) = item.formats.as_ref().and_then(|formats| formats.first()) { - return (format.url.clone(), format.http_headers_pairs()); +fn media_targets(item: &VideoItem) -> Vec<(String, Vec<(String, String)>)> { + let mut targets = Vec::new(); + + if let Some(formats) = item.formats.as_ref() { + for format in formats { + if format.url.trim().is_empty() { + continue; + } + targets.push((format.url.clone(), format.http_headers_pairs())); + } } - (item.url.clone(), Vec::new()) + if !item.url.trim().is_empty() + && !targets + .iter() + .any(|(url, _)| url.eq_ignore_ascii_case(item.url.as_str())) + { + targets.push((item.url.clone(), Vec::new())); + } + + targets } fn looks_like_media(content_type: &str, body: &[u8]) -> bool { @@ -131,6 +155,23 @@ fn looks_like_media(content_type: &str, body: &[u8]) -> bool { || body.windows(4).any(|window| window == b"mdat") } +fn is_transient_validation_error(error: &str) -> bool { + let value = error.to_ascii_lowercase(); + value.contains("client error (connect)") + || value.contains("timed out") + || value.contains("timeout") + || value.contains("dns") + || value.contains("connection reset") + || value.contains("connection refused") + || value.contains("temporarily unavailable") + || value.contains("request returned 403") + || value.contains("request returned 429") + || value.contains("request returned 500") + || value.contains("request returned 502") + || value.contains("request returned 503") + || value.contains("request returned 504") +} + async fn validate_media_response( provider_id: &str, item_index: usize, @@ -256,42 +297,100 @@ async fn run_provider_validation(provider_id: &str) -> Result<(), String> { } let mut successes = 0usize; - let mut failures = Vec::new(); + let mut hard_failures = Vec::new(); + let mut transient_failures = Vec::new(); for (item_index, item) in items.iter().take(VALIDATION_RESULTS_REQUIRED).enumerate() { - let (url, headers) = media_target(item); - if url.is_empty() { - failures.push(format!( + let targets = media_targets(item); + if targets.is_empty() { + hard_failures.push(format!( "{provider_id} item {} returned an empty media url", item_index + 1 )); continue; } - match validate_media_response( - provider_id, - item_index, - &url, - headers, - context.requester.clone(), - ) - .await - { - Ok(()) => { - successes += 1; - if successes >= VALIDATION_MIN_SUCCESS { - return Ok(()); + let mut item_errors = Vec::new(); + let mut item_validated = false; + for (url, headers) in targets { + if url.starts_with('/') { + continue; + } + item_validated = true; + match validate_media_response( + provider_id, + item_index, + &url, + headers, + context.requester.clone(), + ) + .await + { + Ok(()) => { + successes += 1; + if successes >= VALIDATION_MIN_SUCCESS { + return Ok(()); + } + item_errors.clear(); + break; + } + Err(error) => item_errors.push(error), + } + } + + if item_validated && !item_errors.is_empty() { + for error in item_errors { + if is_transient_validation_error(&error) { + transient_failures.push(error); + } else { + hard_failures.push(error); } } - Err(error) => failures.push(error), } } + if successes >= VALIDATION_MIN_SUCCESS { + return Ok(()); + } + + if hard_failures.is_empty() && !transient_failures.is_empty() { + crate::flow_debug!( + "provider validation inconclusive provider={} transient_failures={}", + provider_id, + transient_failures.len() + ); + return Ok(()); + } + Err(format!( - "{provider_id} validation failed: only {successes} media checks passed (required at least {VALIDATION_MIN_SUCCESS}); failures={}", - failures.join(" | ") + "{provider_id} validation failed: only {successes} media checks passed (required at least {VALIDATION_MIN_SUCCESS}); hard_failures={}; transient_failures={}", + hard_failures.join(" | "), + transient_failures.join(" | ") )) } +fn reset_validation_failure_state(provider_id: &str) { + PROVIDER_VALIDATION_FAILURE_STATE.remove(provider_id); +} + +fn record_validation_failure(provider_id: &str, now: Instant) -> u8 { + if let Some(mut state) = PROVIDER_VALIDATION_FAILURE_STATE.get_mut(provider_id) { + if now.duration_since(state.last_counted_at) >= VALIDATION_COOLDOWN { + state.consecutive_failures = state.consecutive_failures.saturating_add(1); + state.last_counted_at = now; + } + return state.consecutive_failures; + } + + PROVIDER_VALIDATION_FAILURE_STATE.insert( + provider_id.to_string(), + ValidationFailureState { + consecutive_failures: 1, + last_counted_at: now, + }, + ); + 1 +} + fn start_periodic_error_revalidation() { if PROVIDER_ERROR_REVALIDATION_STARTED.set(()).is_err() { return; @@ -383,14 +482,20 @@ pub fn schedule_provider_validation(provider_id: &str, context: &str, msg: &str) let validation_result = run_provider_validation(&provider_id).await; match validation_result { Ok(()) => { + reset_validation_failure_state(&provider_id); PROVIDER_RUNTIME_STATUS.remove(&provider_id); } Err(_validation_error) => { - PROVIDER_RUNTIME_STATUS - .insert(provider_id.clone(), CHANNEL_STATUS_ERROR.to_string()); + let failures = record_validation_failure(&provider_id, Instant::now()); + if failures >= VALIDATION_FAILURES_FOR_ERROR { + PROVIDER_RUNTIME_STATUS + .insert(provider_id.clone(), CHANNEL_STATUS_ERROR.to_string()); + } crate::flow_debug!( - "provider validation failed provider={} error={}", + "provider validation failed provider={} failures={} threshold={} error={}", &provider_id, + failures, + VALIDATION_FAILURES_FOR_ERROR, crate::util::flow_debug::preview(&_validation_error, 160) ); } @@ -790,6 +895,8 @@ mod tests { #[derive(Debug, Deserialize)] struct ApiVideoItem { + #[serde(default)] + title: String, url: String, formats: Option>, } @@ -880,6 +987,41 @@ mod tests { } } + fn request_for_channel_with_query(channel: &Channel, query: String) -> VideosRequest { + let mut request = request_for_channel(channel); + request.query = Some(query); + request + } + + fn search_queries_for_channel(provider_id: &str, items: &[ApiVideoItem]) -> Vec { + let mut candidates = Vec::new(); + match provider_id { + "yesporn" => candidates.push("anal".to_string()), + _ => {} + } + + for item in items { + for token in item.title.split_whitespace() { + let cleaned = token + .chars() + .filter(|ch| ch.is_alphanumeric()) + .collect::(); + if cleaned.len() >= 3 + && !candidates + .iter() + .any(|existing| existing.eq_ignore_ascii_case(&cleaned)) + { + candidates.push(cleaned); + } + } + } + + if candidates.is_empty() { + candidates.push("video".to_string()); + } + candidates + } + fn skip_reason_for_provider(provider_id: &str) -> Option<&'static str> { if std::env::var("FLARE_URL").is_ok() { return None; @@ -893,6 +1035,13 @@ mod tests { } } + fn provider_filter_matches(provider_id: &str) -> bool { + match std::env::var("HOTTUB_TEST_PROVIDER") { + Ok(filter) => filter.trim().is_empty() || filter.trim() == provider_id, + Err(_) => true, + } + } + fn media_target(item: &ApiVideoItem) -> (String, Vec<(String, String)>) { if let Some(format) = item.formats.as_ref().and_then(|formats| formats.first()) { let headers = format @@ -940,7 +1089,12 @@ mod tests { let response = requester .get_raw_with_headers_timeout(url, headers, Some(VALIDATION_MEDIA_TIMEOUT)) .await - .map_err(|err| format!("{provider_id} item {} request failed for {url}: {err}", item_index + 1))?; + .map_err(|err| { + format!( + "{provider_id} item {} request failed for {url}: {err}", + item_index + 1 + ) + })?; let status = response.status(); if !status.is_success() { @@ -956,10 +1110,12 @@ mod tests { .and_then(|value| value.to_str().ok()) .unwrap_or("") .to_string(); - let body = response - .bytes() - .await - .map_err(|err| format!("{provider_id} item {} body read failed for {url}: {err}", item_index + 1))?; + let body = response.bytes().await.map_err(|err| { + format!( + "{provider_id} item {} body read failed for {url}: {err}", + item_index + 1 + ) + })?; if body.is_empty() { return Err(format!( @@ -997,6 +1153,61 @@ mod tests { ); } + #[test] + fn validation_failure_streak_requires_hourly_spacing() { + let provider_id = "hsex"; + PROVIDER_VALIDATION_FAILURE_STATE.remove(provider_id); + + let now = Instant::now(); + assert_eq!(record_validation_failure(provider_id, now), 1); + assert_eq!(record_validation_failure(provider_id, now), 1); + assert_eq!( + record_validation_failure(provider_id, now + VALIDATION_COOLDOWN), + 2 + ); + assert_eq!( + record_validation_failure(provider_id, now + VALIDATION_COOLDOWN * 2), + 3 + ); + + PROVIDER_VALIDATION_FAILURE_STATE.remove(provider_id); + } + + #[test] + fn validation_failure_streak_resets_after_success() { + let provider_id = "hsex"; + PROVIDER_VALIDATION_FAILURE_STATE.remove(provider_id); + + let now = Instant::now(); + assert_eq!(record_validation_failure(provider_id, now), 1); + assert_eq!( + record_validation_failure(provider_id, now + VALIDATION_COOLDOWN), + 2 + ); + reset_validation_failure_state(provider_id); + assert_eq!( + record_validation_failure(provider_id, now + VALIDATION_COOLDOWN * 2), + 1 + ); + + PROVIDER_VALIDATION_FAILURE_STATE.remove(provider_id); + } + + #[test] + fn validation_failure_threshold_matches_channel_error_policy() { + let provider_id = "hsex"; + PROVIDER_VALIDATION_FAILURE_STATE.remove(provider_id); + + let now = Instant::now(); + let mut counted = 0; + for step in 0..VALIDATION_FAILURES_FOR_ERROR { + counted = record_validation_failure(provider_id, now + VALIDATION_COOLDOWN * step as u32); + } + assert_eq!(counted, VALIDATION_FAILURES_FOR_ERROR); + + PROVIDER_VALIDATION_FAILURE_STATE.remove(provider_id); + } + #[test] fn builds_group_index() { PROVIDER_RUNTIME_STATUS.remove("all"); @@ -1114,6 +1325,7 @@ mod tests { let mut channels = ALL_PROVIDERS .iter() .filter(|(provider_id, _)| **provider_id != "all") + .filter(|(provider_id, _)| provider_filter_matches(provider_id)) .filter_map(|(_, provider)| provider.get_channel(client_version.clone())) .collect::>(); channels.sort_by(|a, b| a.id.cmp(&b.id)); @@ -1199,4 +1411,163 @@ mod tests { eprintln!("skipped providers:\n{}", skipped.join("\n")); } } + + #[ntex::test] + #[ignore = "live search sweep across all providers"] + async fn api_videos_search_returns_working_media_urls_for_all_channels() { + let app = test::init_service( + web::App::new() + .state(test_db_pool()) + .state(VideoCache::new().max_size(10_000).to_owned()) + .state(Requester::new()) + .service(web::scope("/api").configure(crate::api::config)), + ) + .await; + + let client_version = ClientVersion::new(22, 'c' as u32, "Hot%20Tub".to_string()); + let mut channels = ALL_PROVIDERS + .iter() + .filter(|(provider_id, _)| **provider_id != "all") + .filter(|(provider_id, _)| provider_filter_matches(provider_id)) + .filter_map(|(_, provider)| provider.get_channel(client_version.clone())) + .collect::>(); + channels.sort_by(|a, b| a.id.cmp(&b.id)); + + let mut failures = Vec::new(); + let mut skipped = Vec::new(); + + for channel in channels { + let provider_id = channel.id.clone(); + + if let Some(reason) = skip_reason_for_provider(&provider_id) { + skipped.push(format!("{provider_id}: {reason}")); + continue; + } + + let baseline_payload = request_for_channel(&channel); + let baseline_request = test::TestRequest::post() + .uri("/api/videos") + .header( + header::USER_AGENT, + "Hot%20Tub/22c CFNetwork/1494.0.7 Darwin/23.4.0", + ) + .set_json(&baseline_payload) + .to_request(); + + let baseline_response = test::call_service(&app, baseline_request).await; + let baseline_status = baseline_response.status(); + let baseline_body = test::read_body(baseline_response).await; + if !baseline_status.is_success() { + failures.push(format!( + "{provider_id} baseline request returned status {baseline_status}: {}", + String::from_utf8_lossy(&baseline_body) + )); + continue; + } + let baseline: ApiVideosResponse = match serde_json::from_slice(&baseline_body) { + Ok(payload) => payload, + Err(error) => { + failures.push(format!( + "{provider_id} baseline returned invalid JSON: {error}; body={}", + String::from_utf8_lossy(&baseline_body) + )); + continue; + } + }; + if baseline.items.is_empty() { + failures.push(format!( + "{provider_id} baseline returned no items for search seed" + )); + continue; + } + + let mut selected_payload: Option = None; + let mut last_error: Option = None; + for search_query in search_queries_for_channel(&provider_id, &baseline.items) + .into_iter() + .take(12) + { + if search_query.trim().is_empty() { + continue; + } + + let payload = request_for_channel_with_query(&channel, search_query.clone()); + let request = test::TestRequest::post() + .uri("/api/videos") + .header( + header::USER_AGENT, + "Hot%20Tub/22c CFNetwork/1494.0.7 Darwin/23.4.0", + ) + .set_json(&payload) + .to_request(); + + let response = test::call_service(&app, request).await; + let status = response.status(); + let body = test::read_body(response).await; + + if !status.is_success() { + last_error = Some(format!( + "{provider_id} search query={search_query} returned status {status}: {}", + String::from_utf8_lossy(&body) + )); + continue; + } + + let payload: ApiVideosResponse = match serde_json::from_slice(&body) { + Ok(payload) => payload, + Err(error) => { + last_error = Some(format!( + "{provider_id} search query={search_query} returned invalid JSON: {error}; body={}", + String::from_utf8_lossy(&body) + )); + continue; + } + }; + + if payload.items.len() >= 5 { + selected_payload = Some(payload); + break; + } + last_error = Some(format!( + "{provider_id} search query={search_query} returned fewer than 5 items: {}", + payload.items.len() + )); + } + + let Some(payload) = selected_payload else { + failures.push(last_error.unwrap_or_else(|| { + format!("{provider_id} search did not yield at least 5 items") + })); + continue; + }; + + for (item_index, item) in payload.items.iter().take(5).enumerate() { + let (url, headers) = media_target(item); + if url.is_empty() { + failures.push(format!( + "{provider_id} search item {} returned an empty media url", + item_index + 1 + )); + break; + } + + if let Err(error) = + assert_media_response(&provider_id, item_index, &url, headers).await + { + failures.push(error); + break; + } + } + } + + assert!( + failures.is_empty(), + "provider live search sweep failed:\n{}", + failures.join("\n") + ); + + if !skipped.is_empty() { + eprintln!("skipped providers:\n{}", skipped.join("\n")); + } + } } diff --git a/src/providers/sextb.rs b/src/providers/sextb.rs index 1bb7e24..3184991 100644 --- a/src/providers/sextb.rs +++ b/src/providers/sextb.rs @@ -817,7 +817,7 @@ impl SextbProvider { } } - let line_regex = Self::regex(r#"(?m)^(?PDirector|Label|Studio|Cast\(s\)|Genre\(s\)|Quality|Release Date|Runtimes|Added|Viewed|Description):\s*(?P.+)$"#)?; + let line_regex = Self::regex(r#"(?m)^\s*(?PDirector|Label|Studio|Cast\(s\)|Genre\(s\)|Quality|Release Date|Runtimes|Added|Viewed|Description):\s*(?P.+)$"#)?; for captures in line_regex.captures_iter(markdown) { let key = captures.name("key").map(|value| value.as_str()).unwrap_or_default(); let value = captures.name("value").map(|value| value.as_str()).unwrap_or_default().trim(); diff --git a/src/providers/spankbang.rs b/src/providers/spankbang.rs index 45b2bf1..c645ff3 100644 --- a/src/providers/spankbang.rs +++ b/src/providers/spankbang.rs @@ -456,6 +456,7 @@ impl SpankbangProvider { .select(video_link_selector) .find_map(|link| link.value().attr("href")) .map(ToString::to_string)?; + let detail_url = self.normalize_url(&href); let thumb = card .select(thumb_selector) .find_map(|img| img.value().attr("src")) @@ -511,7 +512,10 @@ impl SpankbangProvider { item = item.rating(rating); } if let Some(preview) = preview { - item = item.preview(preview); + let mut format = + VideoFormat::new(preview.clone(), "preview".to_string(), "video/mp4".to_string()); + format.add_http_header("Referer".to_string(), detail_url.clone()); + item = item.preview(preview).formats(vec![format]); } if let Some(meta_link) = card.select(meta_link_selector).next() { diff --git a/src/providers/viralxxxporn.rs b/src/providers/viralxxxporn.rs index ccb94e9..88909b5 100644 --- a/src/providers/viralxxxporn.rs +++ b/src/providers/viralxxxporn.rs @@ -5,7 +5,7 @@ use crate::status::*; use crate::util::cache::VideoCache; use crate::util::parse_abbreviated_number; use crate::util::time::parse_time_to_seconds; -use crate::videos::{ServerOptions, VideoItem}; +use crate::videos::{ServerOptions, VideoFormat, VideoItem}; use async_trait::async_trait; use error_chain::error_chain; use htmlentity::entity::{ICodedDataTrait, decode}; @@ -384,6 +384,9 @@ impl ViralxxxpornProvider { } let thumb = self.extract_thumb_url(segment); + let preview = Self::first_non_empty_attr(segment, &["data-preview=\""]) + .map(|value| self.normalize_url(&value)) + .unwrap_or_default(); let text_segment = Self::normalize_ws(&Self::decode_html(&Self::strip_tags(segment))); let duration = Self::extract_duration_seconds(segment) @@ -398,6 +401,15 @@ impl ViralxxxpornProvider { if views > 0 { item = item.views(views); } + if !preview.is_empty() { + let mut format = VideoFormat::new( + preview.clone(), + "preview".to_string(), + "video/mp4".to_string(), + ); + format.add_http_header("Referer".to_string(), item.url.clone()); + item = item.preview(preview).formats(vec![format]); + } items.push(item); } @@ -460,6 +472,9 @@ impl ViralxxxpornProvider { } let thumb = self.extract_thumb_url(segment); + let preview = Self::first_non_empty_attr(segment, &["data-preview=\""]) + .map(|value| self.normalize_url(&value)) + .unwrap_or_default(); let raw_duration = Self::extract_between(segment, "
", "<") .or_else(|| Self::extract_between(segment, "
", "<")) @@ -490,6 +505,15 @@ impl ViralxxxpornProvider { if views > 0 { item = item.views(views); } + if !preview.is_empty() { + let mut format = VideoFormat::new( + preview.clone(), + "preview".to_string(), + "video/mp4".to_string(), + ); + format.add_http_header("Referer".to_string(), item.url.clone()); + item = item.preview(preview).formats(vec![format]); + } items.push(item); } diff --git a/src/providers/xxthots.rs b/src/providers/xxthots.rs index 7cc3b6e..6f5abd7 100644 --- a/src/providers/xxthots.rs +++ b/src/providers/xxthots.rs @@ -5,7 +5,7 @@ use crate::status::*; use crate::util::cache::VideoCache; use crate::util::parse_abbreviated_number; use crate::util::time::parse_time_to_seconds; -use crate::videos::{ServerOptions, VideoItem}; +use crate::videos::{ServerOptions, VideoFormat, VideoItem}; use async_trait::async_trait; use error_chain::error_chain; use htmlentity::entity::{ICodedDataTrait, decode}; @@ -299,8 +299,20 @@ impl XxthotsProvider { .unwrap_or_default() .to_string(); let views = parse_abbreviated_number(&views_part).unwrap_or(0) as u32; + let preview = video_segment + .split("data-preview=\"") + .collect::>() + .get(1) + .copied() + .unwrap_or_default() + .split('"') + .collect::>() + .first() + .copied() + .unwrap_or_default() + .to_string(); - let video_item = VideoItem::new( + let mut video_item = VideoItem::new( id, title, video_url.to_string(), @@ -309,6 +321,15 @@ impl XxthotsProvider { duration, ) .views(views); + if !preview.is_empty() { + let mut format = VideoFormat::new( + preview.clone(), + "preview".to_string(), + "video/mp4".to_string(), + ); + format.add_http_header("Referer".to_string(), video_url.clone()); + video_item = video_item.preview(preview).formats(vec![format]); + } items.push(video_item); } return items;