From 2f8951601be19219ba049a858d48a80134c60c53 Mon Sep 17 00:00:00 2001 From: Simon Date: Thu, 5 Mar 2026 19:49:30 +0000 Subject: [PATCH] viralxxxporn and xfree bugfix --- src/providers/mod.rs | 5 + src/providers/viralxxxporn.rs | 722 ++++++++++++++++++++++++++++++++++ src/providers/xfree.rs | 55 ++- 3 files changed, 772 insertions(+), 10 deletions(-) create mode 100644 src/providers/viralxxxporn.rs diff --git a/src/providers/mod.rs b/src/providers/mod.rs index 17d9471..72024d8 100644 --- a/src/providers/mod.rs +++ b/src/providers/mod.rs @@ -36,6 +36,7 @@ pub mod porn00; pub mod pornzog; pub mod sxyprn; pub mod tnaflix; +pub mod viralxxxporn; pub mod xfree; pub mod xxthots; pub mod youjizz; @@ -135,6 +136,10 @@ pub static ALL_PROVIDERS: Lazy> = Lazy::new(| "tnaflix", Arc::new(tnaflix::TnaflixProvider::new()) as DynProvider, ); + m.insert( + "viralxxxporn", + Arc::new(viralxxxporn::ViralxxxpornProvider::new()) as DynProvider, + ); // m.insert("pornxp", Arc::new(pornxp::PornxpProvider::new()) as DynProvider); m.insert( "rule34gen", diff --git a/src/providers/viralxxxporn.rs b/src/providers/viralxxxporn.rs new file mode 100644 index 0000000..3bfec50 --- /dev/null +++ b/src/providers/viralxxxporn.rs @@ -0,0 +1,722 @@ +use crate::DbPool; +use crate::api::ClientVersion; +use crate::providers::{Provider, report_provider_error_background, requester_or_default}; +use crate::status::*; +use crate::util::cache::VideoCache; +use crate::util::discord::send_discord_error_report; +use crate::util::parse_abbreviated_number; +use crate::util::time::parse_time_to_seconds; +use crate::videos::{ServerOptions, VideoFormat, VideoItem}; +use async_trait::async_trait; +use error_chain::error_chain; +use futures::stream::{FuturesUnordered, StreamExt}; +use htmlentity::entity::{ICodedDataTrait, decode}; +use regex::Regex; +use std::collections::HashSet; +use std::fmt::Write; +use std::vec; +use url::form_urlencoded::Serializer; + +error_chain! { + foreign_links { + Io(std::io::Error); + HttpRequest(wreq::Error); + } +} + +#[derive(Debug, Clone)] +pub struct ViralxxxpornProvider { + url: String, +} + +#[derive(Debug, Clone)] +struct RawVideo { + id: String, + title: String, + detail_url: String, + thumb: String, + duration: u32, + views: Option, + rating: Option, + quality: Option, +} + +impl ViralxxxpornProvider { + pub fn new() -> Self { + Self { + url: "https://viralxxxporn.com".to_string(), + } + } + + fn build_channel(&self, _clientversion: ClientVersion) -> Channel { + Channel { + id: "viralxxxporn".to_string(), + name: "ViralXXXPorn".to_string(), + description: "Trending free porn and onlyfans leaks".to_string(), + premium: false, + favicon: "https://www.google.com/s2/favicons?sz=64&domain=viralxxxporn.com".to_string(), + status: "active".to_string(), + categories: vec![], + options: vec![ + ChannelOption { + id: "sort".to_string(), + title: "Sort".to_string(), + description: "Browse different video feeds".to_string(), + systemImage: "list.number".to_string(), + colorName: "blue".to_string(), + options: vec![ + FilterOption { + id: "latest".to_string(), + title: "Latest".to_string(), + }, + FilterOption { + id: "top-rated".to_string(), + title: "Top Rated".to_string(), + }, + FilterOption { + id: "most-viewed".to_string(), + title: "Most Viewed".to_string(), + }, + FilterOption { + id: "premium".to_string(), + title: "Premium".to_string(), + }, + ], + multiSelect: false, + }, + ChannelOption { + id: "filter".to_string(), + title: "Period".to_string(), + description: "Time range for top-rated feed".to_string(), + systemImage: "clock".to_string(), + colorName: "green".to_string(), + options: vec![ + FilterOption { + id: "all-time".to_string(), + title: "All Time".to_string(), + }, + FilterOption { + id: "month".to_string(), + title: "This Month".to_string(), + }, + FilterOption { + id: "week".to_string(), + title: "This Week".to_string(), + }, + FilterOption { + id: "today".to_string(), + title: "Today".to_string(), + }, + ], + multiSelect: false, + }, + ], + nsfw: true, + cacheDuration: Some(300), + } + } + + fn normalize_ws(s: &str) -> String { + s.split_whitespace().collect::>().join(" ") + } + + fn decode_html(s: &str) -> String { + decode(s.as_bytes()) + .to_string() + .unwrap_or_else(|_| s.to_string()) + } + + fn absolute_url(&self, raw: &str) -> String { + if raw.starts_with("http://") || raw.starts_with("https://") { + return raw.to_string(); + } + if raw.starts_with("//") { + return format!("https:{raw}"); + } + if raw.starts_with('/') { + return format!("{}{}", self.url, raw); + } + format!("{}/{}", self.url, raw.trim_start_matches('/')) + } + + fn slugify_query(query: &str) -> String { + let mut out = String::new(); + let mut prev_dash = false; + for c in query.to_ascii_lowercase().chars() { + if c.is_ascii_alphanumeric() { + out.push(c); + prev_dash = false; + } else if !prev_dash { + out.push('-'); + prev_dash = true; + } + } + out.trim_matches('-').to_string() + } + + fn encode_query_value(value: &str) -> String { + let mut serializer = Serializer::new(String::new()); + serializer.append_pair("q", value); + let encoded = serializer.finish(); + encoded.strip_prefix("q=").unwrap_or(&encoded).to_string() + } + + fn with_page(url: &str, page: u8) -> Vec { + if page <= 1 { + return vec![url.to_string()]; + } + let base = url.trim_end_matches('/'); + let mut candidates = vec![ + format!("{base}/page/{page}/"), + format!("{base}/{page}/"), + format!("{base}/?page={page}"), + ]; + if url.contains('?') { + candidates.push(format!("{url}&page={page}")); + } + candidates + } + + fn unique_urls(urls: Vec) -> Vec { + let mut seen = HashSet::new(); + let mut out = vec![]; + for u in urls { + if seen.insert(u.clone()) { + out.push(u); + } + } + out + } + + fn build_listing_urls(&self, page: u8, query: &str, options: &ServerOptions) -> Vec { + let sort = options + .sort + .clone() + .unwrap_or_else(|| "latest".to_string()) + .to_ascii_lowercase(); + let filter = options + .filter + .clone() + .unwrap_or_else(|| "all-time".to_string()) + .to_ascii_lowercase(); + let query = query.trim(); + let slug = Self::slugify_query(query); + let encoded = Self::encode_query_value(query); + + let mut urls = vec![]; + + if !query.is_empty() { + urls.extend(Self::with_page(&format!("{}/tags/{slug}/", self.url), page)); + urls.extend(Self::with_page( + &format!("{}/search/{}/", self.url, slug), + page, + )); + urls.extend(Self::with_page( + &format!("{}/?s={}", self.url, encoded), + page, + )); + urls.extend(Self::with_page( + &format!("{}/search?q={}", self.url, encoded), + page, + )); + return Self::unique_urls(urls); + } + + match sort.as_str() { + "top-rated" => { + urls.extend(Self::with_page(&format!("{}/top-rated/", self.url), page)); + urls.extend(Self::with_page( + &format!("{}/top-rated/?period={}", self.url, filter), + page, + )); + urls.extend(Self::with_page( + &format!("{}/top-rated/{}/", self.url, filter), + page, + )); + } + "most-viewed" => { + urls.extend(Self::with_page( + &format!("{}/most-popular/", self.url), + page, + )); + urls.extend(Self::with_page(&format!("{}/most-viewed/", self.url), page)); + } + "premium" => { + urls.extend(Self::with_page(&format!("{}/premium/", self.url), page)); + } + _ => { + urls.extend(Self::with_page( + &format!("{}/latest-updates/", self.url), + page, + )); + urls.extend(Self::with_page(&self.url, page)); + } + } + + Self::unique_urls(urls) + } + + fn strip_tags(text: &str) -> String { + let Ok(tag_re) = Regex::new(r"(?is)<[^>]+>") else { + return text.to_string(); + }; + tag_re.replace_all(text, " ").to_string() + } + + fn clean_media_url(raw: &str) -> String { + let mut out = raw + .trim_matches(|c: char| c == '"' || c == '\'' || c == '\\' || c.is_whitespace()) + .to_string(); + out = out + .replace("\\u0026", "&") + .replace("\\u002F", "/") + .replace("\\/", "/") + .replace("&", "&"); + out = out + .trim_end_matches(|c: char| matches!(c, ',' | ';' | ')' | ']' | '}')) + .to_string(); + if out.starts_with("//") { + return format!("https:{out}"); + } + out + } + + fn is_media_url(url: &str) -> bool { + let lower = url.to_ascii_lowercase(); + (lower.starts_with("http://") || lower.starts_with("https://")) + && (lower.contains(".mp4") || lower.contains(".m3u8")) + } + + fn extract_duration_seconds(text: &str) -> Option { + let Ok(duration_re) = Regex::new(r"\b(\d{1,2}:\d{2}(?::\d{2})?)\b") else { + return None; + }; + duration_re + .captures(text) + .and_then(|c| c.get(1)) + .and_then(|m| parse_time_to_seconds(m.as_str())) + .map(|v| v as u32) + } + + fn extract_quality(text: &str) -> Option { + let Ok(q_re) = Regex::new(r"(?i)\b((?:\d{3,4})p|hd|4k)\b") else { + return None; + }; + q_re.captures(text) + .and_then(|c| c.get(1)) + .map(|m| m.as_str().to_ascii_lowercase()) + } + + fn extract_rating(text: &str) -> Option { + let Ok(r_re) = Regex::new(r"(?i)\b(\d{1,3})%") else { + return None; + }; + r_re.captures(text) + .and_then(|c| c.get(1)) + .and_then(|m| m.as_str().parse::().ok()) + } + + fn extract_views(text: &str) -> Option { + let Ok(v_re) = Regex::new(r"(?i)\b([0-9]+(?:\.[0-9]+)?\s*[kmb]?)\s+\d{1,3}%") else { + return None; + }; + let raw = v_re + .captures(text) + .and_then(|c| c.get(1)) + .map(|m| m.as_str().to_string())?; + parse_abbreviated_number(&raw) + } + + fn parse_listing_items(&self, html: &str) -> Vec { + if html.trim().is_empty() { + return vec![]; + } + + let Ok(link_re) = Regex::new( + r#"(?is)]+href="(?P/video/(?P\d+)/[^"]+)"[^>]*>(?P.*?)"#, + ) else { + return vec![]; + }; + let Ok(title_attr_re) = Regex::new(r#"(?is)\btitle="([^"]+)""#) else { + return vec![]; + }; + let Ok(thumb_re) = Regex::new(r#"(?is)(?:src|data-src|data-original|poster)="([^"]+)""#) + else { + return vec![]; + }; + + let mut out = vec![]; + let mut seen = HashSet::new(); + + for caps in link_re.captures_iter(html) { + let Some(id) = caps.name("id").map(|m| m.as_str().to_string()) else { + continue; + }; + if !seen.insert(id.clone()) { + continue; + } + let href = caps.name("href").map(|m| m.as_str()).unwrap_or_default(); + let body = caps.name("body").map(|m| m.as_str()).unwrap_or_default(); + + let m0 = match caps.get(0) { + Some(v) => v, + None => continue, + }; + let seg_start = m0.start().saturating_sub(600); + let seg_end = (m0.end() + 1800).min(html.len()); + let segment = html.get(seg_start..seg_end).unwrap_or(body); + + let title_from_attr = title_attr_re + .captures(m0.as_str()) + .and_then(|c| c.get(1)) + .map(|m| m.as_str().to_string()) + .unwrap_or_default(); + let title_from_body = Self::strip_tags(body); + let title = Self::normalize_ws(&Self::decode_html(if !title_from_attr.is_empty() { + &title_from_attr + } else { + &title_from_body + })); + if title.is_empty() { + continue; + } + + let thumb = thumb_re + .captures(segment) + .and_then(|c| c.get(1)) + .map(|m| self.absolute_url(m.as_str())) + .unwrap_or_default(); + + let duration = Self::extract_duration_seconds(segment).unwrap_or(0); + let rating = Self::extract_rating(segment); + let views = Self::extract_views(segment); + let quality = Self::extract_quality(segment); + + out.push(RawVideo { + id, + title, + detail_url: self.absolute_url(href), + thumb, + duration, + views, + rating, + quality, + }); + } + + out + } + + fn extract_media_urls(&self, html: &str) -> Vec { + let patterns = [ + r#"https?:\\?/\\?/[^"' <>\s]+?\.(?:mp4|m3u8)[^"' <>\s]*"#, + r#"https?://[^"' <>\s]+?\.(?:mp4|m3u8)[^"' <>\s]*"#, + r#"(?is)]+src=["']([^"']+\.(?:mp4|m3u8)[^"']*)["']"#, + r#"(?is)["']file["']\s*:\s*["']([^"']+\.(?:mp4|m3u8)[^"']*)["']"#, + r#"(?is)["']src["']\s*:\s*["']([^"']+\.(?:mp4|m3u8)[^"']*)["']"#, + ]; + + let mut urls = vec![]; + let mut seen = HashSet::new(); + + for pattern in patterns { + let Ok(re) = Regex::new(pattern) else { + continue; + }; + for caps in re.captures_iter(html) { + let raw = caps + .get(1) + .map(|m| m.as_str()) + .unwrap_or_else(|| caps.get(0).map(|m| m.as_str()).unwrap_or("")); + if raw.is_empty() { + continue; + } + let url = Self::clean_media_url(raw); + if !Self::is_media_url(&url) { + continue; + } + if seen.insert(url.clone()) { + urls.push(url); + } + } + } + + urls + } + + fn extract_tags_from_detail(html: &str) -> Vec { + let Ok(tag_re) = Regex::new(r#"(?is)href="/tags?/([^"?#/]+)/""#) else { + return vec![]; + }; + let mut out = vec![]; + let mut seen = HashSet::new(); + for caps in tag_re.captures_iter(html) { + let Some(raw) = caps.get(1).map(|m| m.as_str()) else { + continue; + }; + let tag = raw.replace('-', " ").to_ascii_lowercase(); + if !tag.is_empty() && seen.insert(tag.clone()) { + out.push(tag); + } + } + out + } + + fn quality_from_url(url: &str, fallback: Option<&String>) -> String { + let Ok(q_re) = Regex::new(r"(?i)(\d{3,4})p") else { + return fallback + .cloned() + .unwrap_or_else(|| "1080p".to_string()) + .replace('p', ""); + }; + if let Some(q) = q_re.captures(url).and_then(|c| c.get(1)) { + return q.as_str().to_string(); + } + if url.to_ascii_lowercase().contains(".m3u8") { + return "hls".to_string(); + } + fallback + .cloned() + .unwrap_or_else(|| "1080p".to_string()) + .replace('p', "") + } + + async fn fetch_detailed_item( + &self, + raw: RawVideo, + mut requester: crate::util::requester::Requester, + ) -> Option { + let detail_html = match requester.get(&raw.detail_url, None).await { + Ok(text) => text, + Err(e) => { + report_provider_error_background( + "viralxxxporn", + "detail.request", + &format!("url={}; error={e}", raw.detail_url), + ); + return None; + } + }; + + let media_urls = self.extract_media_urls(&detail_html); + if media_urls.is_empty() { + report_provider_error_background( + "viralxxxporn", + "detail.media", + &format!("no_media_url_found; url={}", raw.detail_url), + ); + return None; + } + + let mut formats = vec![]; + for media in &media_urls { + let format_kind = if media.to_ascii_lowercase().contains(".m3u8") { + "m3u8".to_string() + } else { + "mp4".to_string() + }; + let quality = Self::quality_from_url(media, raw.quality.as_ref()); + formats.push(VideoFormat::new(media.clone(), quality, format_kind)); + } + + let url = media_urls + .iter() + .find(|u| u.to_ascii_lowercase().contains(".mp4")) + .cloned() + .unwrap_or_else(|| media_urls.first().cloned().unwrap_or_default()); + if url.is_empty() { + return None; + } + + let mut tags = Self::extract_tags_from_detail(&detail_html); + if tags.is_empty() { + tags = vec![]; + } + + let mut item = VideoItem::new( + raw.id, + raw.title, + url, + "viralxxxporn".to_string(), + raw.thumb, + raw.duration, + ) + .formats(formats) + .preview( + media_urls + .first() + .cloned() + .unwrap_or_else(|| raw.detail_url.clone()), + ); + + if let Some(views) = raw.views { + item = item.views(views); + } + if let Some(rating) = raw.rating { + item = item.rating(rating); + } + if !tags.is_empty() { + item = item.tags(tags); + } + + Some(item) + } + + async fn collect_video_items( + &self, + html: String, + requester: crate::util::requester::Requester, + ) -> Vec { + let raw_items = self.parse_listing_items(&html); + if raw_items.is_empty() { + return vec![]; + } + + let mut in_flight = FuturesUnordered::new(); + let mut out = vec![]; + let mut iter = raw_items.into_iter(); + const MAX_IN_FLIGHT: usize = 6; + + loop { + while in_flight.len() < MAX_IN_FLIGHT { + let Some(raw) = iter.next() else { + break; + }; + in_flight.push(self.fetch_detailed_item(raw, requester.clone())); + } + + let Some(result) = in_flight.next().await else { + break; + }; + if let Some(item) = result { + out.push(item); + } + } + + out + } + + async fn fetch( + &self, + cache: VideoCache, + page: u8, + query: &str, + options: ServerOptions, + ) -> Result> { + let mut requester = requester_or_default(&options, "viralxxxporn", "fetch"); + let urls = self.build_listing_urls(page, query, &options); + let mut stale_items: Vec = vec![]; + + for url in urls { + if let Some((time, items)) = cache.get(&url) { + if time.elapsed().unwrap_or_default().as_secs() < 300 { + return Ok(items.clone()); + } + if stale_items.is_empty() && !items.is_empty() { + stale_items = items.clone(); + } + } + + let html = match requester.get(&url, None).await { + Ok(text) => text, + Err(e) => { + report_provider_error_background( + "viralxxxporn", + "listing.request", + &format!("url={url}; error={e}"), + ); + continue; + } + }; + + let items = self.collect_video_items(html, requester.clone()).await; + if !items.is_empty() { + cache.remove(&url); + cache.insert(url, items.clone()); + return Ok(items); + } + } + + Ok(stale_items) + } +} + +#[async_trait] +impl Provider for ViralxxxpornProvider { + async fn get_videos( + &self, + cache: VideoCache, + _pool: DbPool, + _sort: String, + query: Option, + page: String, + _per_page: String, + options: ServerOptions, + ) -> Vec { + let page = page.parse::().unwrap_or(1); + let query = query.unwrap_or_default(); + + match self.fetch(cache, page, &query, options).await { + Ok(v) => v, + Err(e) => { + let mut chain = String::new(); + for (idx, cause) in e.iter().enumerate() { + let _ = writeln!(chain, "{}. {}", idx + 1, cause); + } + send_discord_error_report( + e.to_string(), + Some(chain), + Some("Viralxxxporn Provider"), + Some("Failed to fetch videos"), + file!(), + line!(), + module_path!(), + ) + .await; + vec![] + } + } + } + + fn get_channel(&self, clientversion: ClientVersion) -> Option { + Some(self.build_channel(clientversion)) + } +} + +#[cfg(test)] +mod tests { + use super::ViralxxxpornProvider; + + #[test] + fn parses_listing_card() { + let provider = ViralxxxpornProvider::new(); + let html = r#" + + + 1080p12:344.2K 95% + + "#; + let items = provider.parse_listing_items(html); + assert_eq!(items.len(), 1); + assert_eq!(items[0].id, "12345"); + assert_eq!(items[0].title, "Test Title"); + assert_eq!(items[0].duration, 754); + assert_eq!(items[0].views, Some(4200)); + assert_eq!(items[0].rating, Some(95.0)); + } + + #[test] + fn extracts_media_urls() { + let provider = ViralxxxpornProvider::new(); + let html = r#" + + "#; + let urls = provider.extract_media_urls(html); + assert_eq!(urls.len(), 2); + assert!(urls.iter().any(|u| u.contains("video_720p.mp4"))); + assert!(urls.iter().any(|u| u.contains("master.m3u8"))); + } +} diff --git a/src/providers/xfree.rs b/src/providers/xfree.rs index c51d502..e886484 100644 --- a/src/providers/xfree.rs +++ b/src/providers/xfree.rs @@ -293,15 +293,35 @@ impl XfreeProvider { } fn extract_views(text: &str) -> Option { - let Ok(views_re) = Regex::new(r"(?i)\b([0-9]+(?:\.[0-9]+)?\s*[kmb]?)\s*(?:views?|view)\b") - else { - return None; - }; - let raw = views_re - .captures(text) - .and_then(|c| c.get(1)) - .map(|m| m.as_str().to_string())?; - parse_abbreviated_number(&raw) + let patterns = [ + r#"(?is)(?:class=["'][^"']*views[^"']*["'][^>]*>|data-views=["']?)([0-9]+(?:\.[0-9]+)?\s*[kmb]?)"#, + r#"(?is)(?:fa-eye|icon-eye|eye[^>]*>)[^0-9]{0,20}([0-9]+(?:\.[0-9]+)?\s*[kmb]?)"#, + r#"(?i)\b([0-9]+(?:\.[0-9]+)?\s*[kmb])\s*(?:views?|view)\b"#, + ]; + + for pattern in patterns { + let Ok(re) = Regex::new(pattern) else { + continue; + }; + let Some(raw) = re + .captures(text) + .and_then(|c| c.get(1)) + .map(|m| m.as_str().to_string()) + else { + continue; + }; + let Some(parsed) = parse_abbreviated_number(&raw) else { + continue; + }; + let has_suffix = raw + .chars() + .any(|c| matches!(c, 'k' | 'K' | 'm' | 'M' | 'b' | 'B')); + if has_suffix || parsed >= 100 { + return Some(parsed); + } + } + + None } fn extract_tags(text: &str) -> Vec { @@ -390,7 +410,7 @@ impl XfreeProvider { } let seg_start = full.start().saturating_sub(400); - let seg_end = (full.end() + 1600).min(html.len()); + let seg_end = (full.end() + 700).min(html.len()); let segment = html.get(seg_start..seg_end).unwrap_or(full.as_str()); let title_from_attr = title_attr_re @@ -748,4 +768,19 @@ mod tests { assert!(urls.iter().any(|u| u.contains("clip_720p.mp4"))); assert!(urls.iter().any(|u| u.contains("master.m3u8"))); } + + #[test] + fn does_not_take_views_from_plain_title_text() { + let provider = XfreeProvider::new(); + let html = r#" + + + 1:01 + 18 View Example + + "#; + let items = provider.parse_listing_items(html); + assert_eq!(items.len(), 1); + assert_eq!(items[0].views, None); + } }