viralxxxporn and xfree bugfix

This commit is contained in:
Simon
2026-03-05 19:49:30 +00:00
parent 63782f6a7c
commit 2f8951601b
3 changed files with 772 additions and 10 deletions

View File

@@ -36,6 +36,7 @@ pub mod porn00;
pub mod pornzog;
pub mod sxyprn;
pub mod tnaflix;
pub mod viralxxxporn;
pub mod xfree;
pub mod xxthots;
pub mod youjizz;
@@ -135,6 +136,10 @@ pub static ALL_PROVIDERS: Lazy<HashMap<&'static str, DynProvider>> = Lazy::new(|
"tnaflix",
Arc::new(tnaflix::TnaflixProvider::new()) as DynProvider,
);
m.insert(
"viralxxxporn",
Arc::new(viralxxxporn::ViralxxxpornProvider::new()) as DynProvider,
);
// m.insert("pornxp", Arc::new(pornxp::PornxpProvider::new()) as DynProvider);
m.insert(
"rule34gen",

View File

@@ -0,0 +1,722 @@
use crate::DbPool;
use crate::api::ClientVersion;
use crate::providers::{Provider, report_provider_error_background, requester_or_default};
use crate::status::*;
use crate::util::cache::VideoCache;
use crate::util::discord::send_discord_error_report;
use crate::util::parse_abbreviated_number;
use crate::util::time::parse_time_to_seconds;
use crate::videos::{ServerOptions, VideoFormat, VideoItem};
use async_trait::async_trait;
use error_chain::error_chain;
use futures::stream::{FuturesUnordered, StreamExt};
use htmlentity::entity::{ICodedDataTrait, decode};
use regex::Regex;
use std::collections::HashSet;
use std::fmt::Write;
use std::vec;
use url::form_urlencoded::Serializer;
error_chain! {
foreign_links {
Io(std::io::Error);
HttpRequest(wreq::Error);
}
}
#[derive(Debug, Clone)]
pub struct ViralxxxpornProvider {
url: String,
}
#[derive(Debug, Clone)]
struct RawVideo {
id: String,
title: String,
detail_url: String,
thumb: String,
duration: u32,
views: Option<u32>,
rating: Option<f32>,
quality: Option<String>,
}
impl ViralxxxpornProvider {
pub fn new() -> Self {
Self {
url: "https://viralxxxporn.com".to_string(),
}
}
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
Channel {
id: "viralxxxporn".to_string(),
name: "ViralXXXPorn".to_string(),
description: "Trending free porn and onlyfans leaks".to_string(),
premium: false,
favicon: "https://www.google.com/s2/favicons?sz=64&domain=viralxxxporn.com".to_string(),
status: "active".to_string(),
categories: vec![],
options: vec![
ChannelOption {
id: "sort".to_string(),
title: "Sort".to_string(),
description: "Browse different video feeds".to_string(),
systemImage: "list.number".to_string(),
colorName: "blue".to_string(),
options: vec![
FilterOption {
id: "latest".to_string(),
title: "Latest".to_string(),
},
FilterOption {
id: "top-rated".to_string(),
title: "Top Rated".to_string(),
},
FilterOption {
id: "most-viewed".to_string(),
title: "Most Viewed".to_string(),
},
FilterOption {
id: "premium".to_string(),
title: "Premium".to_string(),
},
],
multiSelect: false,
},
ChannelOption {
id: "filter".to_string(),
title: "Period".to_string(),
description: "Time range for top-rated feed".to_string(),
systemImage: "clock".to_string(),
colorName: "green".to_string(),
options: vec![
FilterOption {
id: "all-time".to_string(),
title: "All Time".to_string(),
},
FilterOption {
id: "month".to_string(),
title: "This Month".to_string(),
},
FilterOption {
id: "week".to_string(),
title: "This Week".to_string(),
},
FilterOption {
id: "today".to_string(),
title: "Today".to_string(),
},
],
multiSelect: false,
},
],
nsfw: true,
cacheDuration: Some(300),
}
}
fn normalize_ws(s: &str) -> String {
s.split_whitespace().collect::<Vec<_>>().join(" ")
}
fn decode_html(s: &str) -> String {
decode(s.as_bytes())
.to_string()
.unwrap_or_else(|_| s.to_string())
}
fn absolute_url(&self, raw: &str) -> String {
if raw.starts_with("http://") || raw.starts_with("https://") {
return raw.to_string();
}
if raw.starts_with("//") {
return format!("https:{raw}");
}
if raw.starts_with('/') {
return format!("{}{}", self.url, raw);
}
format!("{}/{}", self.url, raw.trim_start_matches('/'))
}
fn slugify_query(query: &str) -> String {
let mut out = String::new();
let mut prev_dash = false;
for c in query.to_ascii_lowercase().chars() {
if c.is_ascii_alphanumeric() {
out.push(c);
prev_dash = false;
} else if !prev_dash {
out.push('-');
prev_dash = true;
}
}
out.trim_matches('-').to_string()
}
fn encode_query_value(value: &str) -> String {
let mut serializer = Serializer::new(String::new());
serializer.append_pair("q", value);
let encoded = serializer.finish();
encoded.strip_prefix("q=").unwrap_or(&encoded).to_string()
}
fn with_page(url: &str, page: u8) -> Vec<String> {
if page <= 1 {
return vec![url.to_string()];
}
let base = url.trim_end_matches('/');
let mut candidates = vec![
format!("{base}/page/{page}/"),
format!("{base}/{page}/"),
format!("{base}/?page={page}"),
];
if url.contains('?') {
candidates.push(format!("{url}&page={page}"));
}
candidates
}
fn unique_urls(urls: Vec<String>) -> Vec<String> {
let mut seen = HashSet::new();
let mut out = vec![];
for u in urls {
if seen.insert(u.clone()) {
out.push(u);
}
}
out
}
fn build_listing_urls(&self, page: u8, query: &str, options: &ServerOptions) -> Vec<String> {
let sort = options
.sort
.clone()
.unwrap_or_else(|| "latest".to_string())
.to_ascii_lowercase();
let filter = options
.filter
.clone()
.unwrap_or_else(|| "all-time".to_string())
.to_ascii_lowercase();
let query = query.trim();
let slug = Self::slugify_query(query);
let encoded = Self::encode_query_value(query);
let mut urls = vec![];
if !query.is_empty() {
urls.extend(Self::with_page(&format!("{}/tags/{slug}/", self.url), page));
urls.extend(Self::with_page(
&format!("{}/search/{}/", self.url, slug),
page,
));
urls.extend(Self::with_page(
&format!("{}/?s={}", self.url, encoded),
page,
));
urls.extend(Self::with_page(
&format!("{}/search?q={}", self.url, encoded),
page,
));
return Self::unique_urls(urls);
}
match sort.as_str() {
"top-rated" => {
urls.extend(Self::with_page(&format!("{}/top-rated/", self.url), page));
urls.extend(Self::with_page(
&format!("{}/top-rated/?period={}", self.url, filter),
page,
));
urls.extend(Self::with_page(
&format!("{}/top-rated/{}/", self.url, filter),
page,
));
}
"most-viewed" => {
urls.extend(Self::with_page(
&format!("{}/most-popular/", self.url),
page,
));
urls.extend(Self::with_page(&format!("{}/most-viewed/", self.url), page));
}
"premium" => {
urls.extend(Self::with_page(&format!("{}/premium/", self.url), page));
}
_ => {
urls.extend(Self::with_page(
&format!("{}/latest-updates/", self.url),
page,
));
urls.extend(Self::with_page(&self.url, page));
}
}
Self::unique_urls(urls)
}
fn strip_tags(text: &str) -> String {
let Ok(tag_re) = Regex::new(r"(?is)<[^>]+>") else {
return text.to_string();
};
tag_re.replace_all(text, " ").to_string()
}
fn clean_media_url(raw: &str) -> String {
let mut out = raw
.trim_matches(|c: char| c == '"' || c == '\'' || c == '\\' || c.is_whitespace())
.to_string();
out = out
.replace("\\u0026", "&")
.replace("\\u002F", "/")
.replace("\\/", "/")
.replace("&amp;", "&");
out = out
.trim_end_matches(|c: char| matches!(c, ',' | ';' | ')' | ']' | '}'))
.to_string();
if out.starts_with("//") {
return format!("https:{out}");
}
out
}
fn is_media_url(url: &str) -> bool {
let lower = url.to_ascii_lowercase();
(lower.starts_with("http://") || lower.starts_with("https://"))
&& (lower.contains(".mp4") || lower.contains(".m3u8"))
}
fn extract_duration_seconds(text: &str) -> Option<u32> {
let Ok(duration_re) = Regex::new(r"\b(\d{1,2}:\d{2}(?::\d{2})?)\b") else {
return None;
};
duration_re
.captures(text)
.and_then(|c| c.get(1))
.and_then(|m| parse_time_to_seconds(m.as_str()))
.map(|v| v as u32)
}
fn extract_quality(text: &str) -> Option<String> {
let Ok(q_re) = Regex::new(r"(?i)\b((?:\d{3,4})p|hd|4k)\b") else {
return None;
};
q_re.captures(text)
.and_then(|c| c.get(1))
.map(|m| m.as_str().to_ascii_lowercase())
}
fn extract_rating(text: &str) -> Option<f32> {
let Ok(r_re) = Regex::new(r"(?i)\b(\d{1,3})%") else {
return None;
};
r_re.captures(text)
.and_then(|c| c.get(1))
.and_then(|m| m.as_str().parse::<f32>().ok())
}
fn extract_views(text: &str) -> Option<u32> {
let Ok(v_re) = Regex::new(r"(?i)\b([0-9]+(?:\.[0-9]+)?\s*[kmb]?)\s+\d{1,3}%") else {
return None;
};
let raw = v_re
.captures(text)
.and_then(|c| c.get(1))
.map(|m| m.as_str().to_string())?;
parse_abbreviated_number(&raw)
}
fn parse_listing_items(&self, html: &str) -> Vec<RawVideo> {
if html.trim().is_empty() {
return vec![];
}
let Ok(link_re) = Regex::new(
r#"(?is)<a[^>]+href="(?P<href>/video/(?P<id>\d+)/[^"]+)"[^>]*>(?P<body>.*?)</a>"#,
) else {
return vec![];
};
let Ok(title_attr_re) = Regex::new(r#"(?is)\btitle="([^"]+)""#) else {
return vec![];
};
let Ok(thumb_re) = Regex::new(r#"(?is)(?:src|data-src|data-original|poster)="([^"]+)""#)
else {
return vec![];
};
let mut out = vec![];
let mut seen = HashSet::new();
for caps in link_re.captures_iter(html) {
let Some(id) = caps.name("id").map(|m| m.as_str().to_string()) else {
continue;
};
if !seen.insert(id.clone()) {
continue;
}
let href = caps.name("href").map(|m| m.as_str()).unwrap_or_default();
let body = caps.name("body").map(|m| m.as_str()).unwrap_or_default();
let m0 = match caps.get(0) {
Some(v) => v,
None => continue,
};
let seg_start = m0.start().saturating_sub(600);
let seg_end = (m0.end() + 1800).min(html.len());
let segment = html.get(seg_start..seg_end).unwrap_or(body);
let title_from_attr = title_attr_re
.captures(m0.as_str())
.and_then(|c| c.get(1))
.map(|m| m.as_str().to_string())
.unwrap_or_default();
let title_from_body = Self::strip_tags(body);
let title = Self::normalize_ws(&Self::decode_html(if !title_from_attr.is_empty() {
&title_from_attr
} else {
&title_from_body
}));
if title.is_empty() {
continue;
}
let thumb = thumb_re
.captures(segment)
.and_then(|c| c.get(1))
.map(|m| self.absolute_url(m.as_str()))
.unwrap_or_default();
let duration = Self::extract_duration_seconds(segment).unwrap_or(0);
let rating = Self::extract_rating(segment);
let views = Self::extract_views(segment);
let quality = Self::extract_quality(segment);
out.push(RawVideo {
id,
title,
detail_url: self.absolute_url(href),
thumb,
duration,
views,
rating,
quality,
});
}
out
}
fn extract_media_urls(&self, html: &str) -> Vec<String> {
let patterns = [
r#"https?:\\?/\\?/[^"' <>\s]+?\.(?:mp4|m3u8)[^"' <>\s]*"#,
r#"https?://[^"' <>\s]+?\.(?:mp4|m3u8)[^"' <>\s]*"#,
r#"(?is)<source[^>]+src=["']([^"']+\.(?:mp4|m3u8)[^"']*)["']"#,
r#"(?is)["']file["']\s*:\s*["']([^"']+\.(?:mp4|m3u8)[^"']*)["']"#,
r#"(?is)["']src["']\s*:\s*["']([^"']+\.(?:mp4|m3u8)[^"']*)["']"#,
];
let mut urls = vec![];
let mut seen = HashSet::new();
for pattern in patterns {
let Ok(re) = Regex::new(pattern) else {
continue;
};
for caps in re.captures_iter(html) {
let raw = caps
.get(1)
.map(|m| m.as_str())
.unwrap_or_else(|| caps.get(0).map(|m| m.as_str()).unwrap_or(""));
if raw.is_empty() {
continue;
}
let url = Self::clean_media_url(raw);
if !Self::is_media_url(&url) {
continue;
}
if seen.insert(url.clone()) {
urls.push(url);
}
}
}
urls
}
fn extract_tags_from_detail(html: &str) -> Vec<String> {
let Ok(tag_re) = Regex::new(r#"(?is)href="/tags?/([^"?#/]+)/""#) else {
return vec![];
};
let mut out = vec![];
let mut seen = HashSet::new();
for caps in tag_re.captures_iter(html) {
let Some(raw) = caps.get(1).map(|m| m.as_str()) else {
continue;
};
let tag = raw.replace('-', " ").to_ascii_lowercase();
if !tag.is_empty() && seen.insert(tag.clone()) {
out.push(tag);
}
}
out
}
fn quality_from_url(url: &str, fallback: Option<&String>) -> String {
let Ok(q_re) = Regex::new(r"(?i)(\d{3,4})p") else {
return fallback
.cloned()
.unwrap_or_else(|| "1080p".to_string())
.replace('p', "");
};
if let Some(q) = q_re.captures(url).and_then(|c| c.get(1)) {
return q.as_str().to_string();
}
if url.to_ascii_lowercase().contains(".m3u8") {
return "hls".to_string();
}
fallback
.cloned()
.unwrap_or_else(|| "1080p".to_string())
.replace('p', "")
}
async fn fetch_detailed_item(
&self,
raw: RawVideo,
mut requester: crate::util::requester::Requester,
) -> Option<VideoItem> {
let detail_html = match requester.get(&raw.detail_url, None).await {
Ok(text) => text,
Err(e) => {
report_provider_error_background(
"viralxxxporn",
"detail.request",
&format!("url={}; error={e}", raw.detail_url),
);
return None;
}
};
let media_urls = self.extract_media_urls(&detail_html);
if media_urls.is_empty() {
report_provider_error_background(
"viralxxxporn",
"detail.media",
&format!("no_media_url_found; url={}", raw.detail_url),
);
return None;
}
let mut formats = vec![];
for media in &media_urls {
let format_kind = if media.to_ascii_lowercase().contains(".m3u8") {
"m3u8".to_string()
} else {
"mp4".to_string()
};
let quality = Self::quality_from_url(media, raw.quality.as_ref());
formats.push(VideoFormat::new(media.clone(), quality, format_kind));
}
let url = media_urls
.iter()
.find(|u| u.to_ascii_lowercase().contains(".mp4"))
.cloned()
.unwrap_or_else(|| media_urls.first().cloned().unwrap_or_default());
if url.is_empty() {
return None;
}
let mut tags = Self::extract_tags_from_detail(&detail_html);
if tags.is_empty() {
tags = vec![];
}
let mut item = VideoItem::new(
raw.id,
raw.title,
url,
"viralxxxporn".to_string(),
raw.thumb,
raw.duration,
)
.formats(formats)
.preview(
media_urls
.first()
.cloned()
.unwrap_or_else(|| raw.detail_url.clone()),
);
if let Some(views) = raw.views {
item = item.views(views);
}
if let Some(rating) = raw.rating {
item = item.rating(rating);
}
if !tags.is_empty() {
item = item.tags(tags);
}
Some(item)
}
async fn collect_video_items(
&self,
html: String,
requester: crate::util::requester::Requester,
) -> Vec<VideoItem> {
let raw_items = self.parse_listing_items(&html);
if raw_items.is_empty() {
return vec![];
}
let mut in_flight = FuturesUnordered::new();
let mut out = vec![];
let mut iter = raw_items.into_iter();
const MAX_IN_FLIGHT: usize = 6;
loop {
while in_flight.len() < MAX_IN_FLIGHT {
let Some(raw) = iter.next() else {
break;
};
in_flight.push(self.fetch_detailed_item(raw, requester.clone()));
}
let Some(result) = in_flight.next().await else {
break;
};
if let Some(item) = result {
out.push(item);
}
}
out
}
async fn fetch(
&self,
cache: VideoCache,
page: u8,
query: &str,
options: ServerOptions,
) -> Result<Vec<VideoItem>> {
let mut requester = requester_or_default(&options, "viralxxxporn", "fetch");
let urls = self.build_listing_urls(page, query, &options);
let mut stale_items: Vec<VideoItem> = vec![];
for url in urls {
if let Some((time, items)) = cache.get(&url) {
if time.elapsed().unwrap_or_default().as_secs() < 300 {
return Ok(items.clone());
}
if stale_items.is_empty() && !items.is_empty() {
stale_items = items.clone();
}
}
let html = match requester.get(&url, None).await {
Ok(text) => text,
Err(e) => {
report_provider_error_background(
"viralxxxporn",
"listing.request",
&format!("url={url}; error={e}"),
);
continue;
}
};
let items = self.collect_video_items(html, requester.clone()).await;
if !items.is_empty() {
cache.remove(&url);
cache.insert(url, items.clone());
return Ok(items);
}
}
Ok(stale_items)
}
}
#[async_trait]
impl Provider for ViralxxxpornProvider {
async fn get_videos(
&self,
cache: VideoCache,
_pool: DbPool,
_sort: String,
query: Option<String>,
page: String,
_per_page: String,
options: ServerOptions,
) -> Vec<VideoItem> {
let page = page.parse::<u8>().unwrap_or(1);
let query = query.unwrap_or_default();
match self.fetch(cache, page, &query, options).await {
Ok(v) => v,
Err(e) => {
let mut chain = String::new();
for (idx, cause) in e.iter().enumerate() {
let _ = writeln!(chain, "{}. {}", idx + 1, cause);
}
send_discord_error_report(
e.to_string(),
Some(chain),
Some("Viralxxxporn Provider"),
Some("Failed to fetch videos"),
file!(),
line!(),
module_path!(),
)
.await;
vec![]
}
}
}
fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
Some(self.build_channel(clientversion))
}
}
#[cfg(test)]
mod tests {
use super::ViralxxxpornProvider;
#[test]
fn parses_listing_card() {
let provider = ViralxxxpornProvider::new();
let html = r#"
<a href="/video/12345/test-title/" title="Test Title">
<img src="https://cdn.example.com/thumb.jpg"/>
<span>1080p</span><span>12:34</span><span>4.2K 95%</span>
</a>
"#;
let items = provider.parse_listing_items(html);
assert_eq!(items.len(), 1);
assert_eq!(items[0].id, "12345");
assert_eq!(items[0].title, "Test Title");
assert_eq!(items[0].duration, 754);
assert_eq!(items[0].views, Some(4200));
assert_eq!(items[0].rating, Some(95.0));
}
#[test]
fn extracts_media_urls() {
let provider = ViralxxxpornProvider::new();
let html = r#"
<script>
var src = "https:\/\/cdn.site.com\/video_720p.mp4?token=1\u0026x=2";
var hls = "https://cdn.site.com/master.m3u8";
</script>
"#;
let urls = provider.extract_media_urls(html);
assert_eq!(urls.len(), 2);
assert!(urls.iter().any(|u| u.contains("video_720p.mp4")));
assert!(urls.iter().any(|u| u.contains("master.m3u8")));
}
}

View File

@@ -293,15 +293,35 @@ impl XfreeProvider {
}
fn extract_views(text: &str) -> Option<u32> {
let Ok(views_re) = Regex::new(r"(?i)\b([0-9]+(?:\.[0-9]+)?\s*[kmb]?)\s*(?:views?|view)\b")
else {
return None;
};
let raw = views_re
.captures(text)
.and_then(|c| c.get(1))
.map(|m| m.as_str().to_string())?;
parse_abbreviated_number(&raw)
let patterns = [
r#"(?is)(?:class=["'][^"']*views[^"']*["'][^>]*>|data-views=["']?)([0-9]+(?:\.[0-9]+)?\s*[kmb]?)"#,
r#"(?is)(?:fa-eye|icon-eye|eye[^>]*></i>)[^0-9]{0,20}([0-9]+(?:\.[0-9]+)?\s*[kmb]?)"#,
r#"(?i)\b([0-9]+(?:\.[0-9]+)?\s*[kmb])\s*(?:views?|view)\b"#,
];
for pattern in patterns {
let Ok(re) = Regex::new(pattern) else {
continue;
};
let Some(raw) = re
.captures(text)
.and_then(|c| c.get(1))
.map(|m| m.as_str().to_string())
else {
continue;
};
let Some(parsed) = parse_abbreviated_number(&raw) else {
continue;
};
let has_suffix = raw
.chars()
.any(|c| matches!(c, 'k' | 'K' | 'm' | 'M' | 'b' | 'B'));
if has_suffix || parsed >= 100 {
return Some(parsed);
}
}
None
}
fn extract_tags(text: &str) -> Vec<String> {
@@ -390,7 +410,7 @@ impl XfreeProvider {
}
let seg_start = full.start().saturating_sub(400);
let seg_end = (full.end() + 1600).min(html.len());
let seg_end = (full.end() + 700).min(html.len());
let segment = html.get(seg_start..seg_end).unwrap_or(full.as_str());
let title_from_attr = title_attr_re
@@ -748,4 +768,19 @@ mod tests {
assert!(urls.iter().any(|u| u.contains("clip_720p.mp4")));
assert!(urls.iter().any(|u| u.contains("master.m3u8")));
}
#[test]
fn does_not_take_views_from_plain_title_text() {
let provider = XfreeProvider::new();
let html = r#"
<a href="/video?id=777&title=18-view-example" title="18 View Example">
<img src="https://thumbs.xfree.com/x/y/test.jpg" />
<span>1:01</span>
<span>18 View Example</span>
</a>
"#;
let items = provider.parse_listing_items(html);
assert_eq!(items.len(), 1);
assert_eq!(items[0].views, None);
}
}