xhamster, xnxx, xvidos early build

This commit is contained in:
Simon
2026-05-21 13:52:32 +00:00
committed by ForgeCode
parent 8ca1df8f5a
commit 7db9465750
4 changed files with 1751 additions and 0 deletions

615
src/providers/xvideos.rs Normal file
View File

@@ -0,0 +1,615 @@
use crate::DbPool;
use crate::api::ClientVersion;
use crate::providers::{Provider, report_provider_error, requester_or_default};
use crate::status::*;
use crate::util::cache::VideoCache;
use crate::util::parse_abbreviated_number;
use crate::videos::{ServerOptions, VideoItem};
use async_trait::async_trait;
use chrono::{Datelike, Local, Months};
use error_chain::error_chain;
use htmlentity::entity::{ICodedDataTrait, decode};
use scraper::{ElementRef, Html, Selector};
use wreq::Version;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "mainstream-tube",
tags: &["tube", "hd", "mixed", "search"],
};
const BASE_URL: &str = "https://www.xvideos.com";
const CHANNEL_ID: &str = "xvideos";
const FIREFOX_UA: &str =
"Mozilla/5.0 (X11; Linux x86_64; rv:147.0) Gecko/20100101 Firefox/147.0";
const HTML_ACCEPT: &str =
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8";
error_chain! {
foreign_links {
Io(std::io::Error);
}
errors {
Parse(msg: String) {
description("parse error")
display("parse error: {}", msg)
}
}
}
// Slug format is `{SiteName}-{ID}` as returned by /c listing.
const CATEGORIES: &[(&str, &str)] = &[
("AI-239", "AI"),
("Amateur-65", "Amateur"),
("Anal-12", "Anal"),
("Arab-159", "Arab"),
("Asian_Woman-32", "Asian"),
("ASMR-229", "ASMR"),
("Ass-14", "Ass"),
("bbw-51", "BBW"),
("Bi_Sexual-62", "Bi"),
("Big_Ass-24", "Big Ass"),
("Big_Cock-34", "Big Cock"),
("Big_Tits-23", "Big Tits"),
("Black_Woman-30", "Black"),
("Blonde-20", "Blonde"),
("Blowjob-15", "Blowjob"),
("Brunette-25", "Brunette"),
("Cam_Porn-58", "Cam Porn"),
("Creampie-40", "Creampie"),
("Cuckold-237", "Cuckold"),
("Cumshot-18", "Cumshot"),
("Femdom-235", "Femdom"),
("Fisting-165", "Fisting"),
("Fucked_Up_Family-81", "Step Family"),
("Gangbang-69", "Gangbang"),
("Gapes-167", "Gapes"),
("Indian-89", "Indian"),
("Interracial-27", "Interracial"),
("Latina-16", "Latina"),
("Lesbian-26", "Lesbian"),
("Lingerie-83", "Lingerie"),
("Mature-38", "Mature"),
("Milf-19", "MILF"),
("Oiled-22", "Oiled"),
("Redhead-31", "Redhead"),
("Solo_and_Masturbation-33", "Solo"),
("Squirting-56", "Squirting"),
("Stockings-28", "Stockings"),
("Teen-13", "Teen"),
];
#[derive(Debug, Clone)]
enum Target {
Latest,
Best,
Search(String),
Archive(String),
}
#[derive(Debug, Clone)]
pub struct XvideosProvider;
impl XvideosProvider {
pub fn new() -> Self {
Self
}
fn build_channel(&self, _cv: ClientVersion) -> Channel {
let mut cat_options: Vec<FilterOption> = vec![FilterOption {
id: "all".to_string(),
title: "All".to_string(),
}];
for (slug, label) in CATEGORIES {
cat_options.push(FilterOption {
id: slug.to_string(),
title: label.to_string(),
});
}
Channel {
id: CHANNEL_ID.to_string(),
name: "XVideos".to_string(),
description:
"XVideos — one of the world's largest free porn sites with latest, best-of-month, category, tag, and keyword search."
.to_string(),
premium: false,
favicon: "https://www.google.com/s2/favicons?sz=64&domain=xvideos.com".to_string(),
status: "active".to_string(),
categories: CATEGORIES.iter().map(|(_, label)| label.to_string()).collect(),
options: vec![
ChannelOption {
id: "sort".to_string(),
title: "Sort".to_string(),
description: "Browse XVideos ranking feeds.".to_string(),
systemImage: "list.number".to_string(),
colorName: "blue".to_string(),
options: vec![
FilterOption {
id: "new".to_string(),
title: "Latest".to_string(),
},
FilterOption {
id: "best".to_string(),
title: "Best of Month".to_string(),
},
],
multiSelect: false,
},
ChannelOption {
id: "categories".to_string(),
title: "Categories".to_string(),
description: "Browse an XVideos category archive.".to_string(),
systemImage: "square.grid.2x2".to_string(),
colorName: "orange".to_string(),
options: cat_options,
multiSelect: false,
},
],
nsfw: true,
cacheDuration: Some(1800),
}
}
fn selector(value: &str) -> Result<Selector> {
Selector::parse(value)
.map_err(|e| Error::from(format!("selector `{value}` parse failed: {e}")))
}
fn decode_html(text: &str) -> String {
decode(text.as_bytes())
.to_string()
.unwrap_or_else(|_| text.to_string())
}
fn text_of(el: &ElementRef<'_>) -> String {
let raw: String = el.text().collect::<Vec<_>>().join(" ");
Self::decode_html(&raw.split_whitespace().collect::<Vec<_>>().join(" "))
}
fn normalize_key(s: &str) -> String {
s.trim()
.trim_start_matches('#')
.replace(['_', '-'], " ")
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
.to_ascii_lowercase()
}
fn normalize_url(path: &str) -> String {
let path = path.trim();
if path.starts_with("http://") || path.starts_with("https://") {
return path.to_string();
}
if path.starts_with("//") {
return format!("https:{path}");
}
if path.starts_with('/') {
return format!("{BASE_URL}{path}");
}
format!("{BASE_URL}/{path}")
}
fn html_headers(referer: &str) -> Vec<(String, String)> {
vec![
("User-Agent".to_string(), FIREFOX_UA.to_string()),
("Accept".to_string(), HTML_ACCEPT.to_string()),
("Accept-Language".to_string(), "en-US,en;q=0.5".to_string()),
("Referer".to_string(), referer.to_string()),
]
}
// /best always redirects to the previous calendar month's archive.
fn best_base_url() -> String {
let now = Local::now();
let prev = now
.checked_sub_months(Months::new(1))
.unwrap_or(now);
format!("{BASE_URL}/best/{}-{:02}", prev.year(), prev.month())
}
fn target_url(target: &Target, page: u16) -> String {
match target {
Target::Latest => {
if page <= 1 {
format!("{BASE_URL}/")
} else {
// page 2 = /new/1, page 3 = /new/2, ...
format!("{BASE_URL}/new/{}", page - 1)
}
}
Target::Best => {
let base = Self::best_base_url();
if page <= 1 {
base
} else {
format!("{base}/{}", page - 1)
}
}
Target::Search(q) => {
let encoded: String =
url::form_urlencoded::byte_serialize(q.trim().as_bytes()).collect();
if page <= 1 {
format!("{BASE_URL}/?k={encoded}")
} else {
format!("{BASE_URL}/?k={encoded}&p={}", page - 1)
}
}
Target::Archive(base_url) => {
let base = base_url.trim_end_matches('/');
if page <= 1 {
base.to_string()
} else {
format!("{base}/{}", page - 1)
}
}
}
}
async fn fetch_html(
requester: &mut crate::util::requester::Requester,
url: &str,
) -> Result<String> {
requester
.get_with_headers(url, Self::html_headers(url), Some(Version::HTTP_11))
.await
.map_err(|e| Error::from(format!("request failed for {url}: {e}")))
}
// Parses "21 min", "1h20min", "2h", "45sec", "MM:SS", "HH:MM:SS"
fn parse_duration(text: &str) -> u32 {
let text = text.trim();
// Colon-separated formats MM:SS and HH:MM:SS
let parts: Vec<&str> = text.split(':').collect();
if parts.len() == 2 {
let m: u32 = parts[0].trim().parse().unwrap_or(0);
let s: u32 = parts[1].trim().parse().unwrap_or(0);
return m * 60 + s;
}
if parts.len() == 3 {
let h: u32 = parts[0].trim().parse().unwrap_or(0);
let m: u32 = parts[1].trim().parse().unwrap_or(0);
let s: u32 = parts[2].trim().parse().unwrap_or(0);
return h * 3600 + m * 60 + s;
}
// Word-based: "1h20min", "30 min", "45sec", etc.
let low = text.to_ascii_lowercase();
let h: u32 = low
.find('h')
.and_then(|i| low[..i].trim().parse().ok())
.unwrap_or(0);
let m: u32 = low.find("min").and_then(|i| {
let start = low[..i]
.rfind(|c: char| !c.is_ascii_digit())
.map(|j| j + 1)
.unwrap_or(0);
low[start..i].trim().parse().ok()
}).unwrap_or(0);
let s: u32 = low.find("sec").and_then(|i| {
let start = low[..i]
.rfind(|c: char| !c.is_ascii_digit())
.map(|j| j + 1)
.unwrap_or(0);
low[start..i].trim().parse().ok()
}).unwrap_or(0);
h * 3600 + m * 60 + s
}
fn parse_views(text: &str) -> Option<u32> {
// "877.3k Views", "1.2M Views" — strip suffix then parse
let cleaned = text
.replace("Views", "")
.replace("views", "")
.replace("View", "")
.replace(',', "");
parse_abbreviated_number(cleaned.trim())
}
fn parse_listing(html: &str, limit: usize) -> Result<Vec<VideoItem>> {
let document = Html::parse_document(html);
let card_sel = Self::selector("div.thumb-block")?;
let img_sel = Self::selector("img[data-src]")?;
let link_sel = Self::selector("a[href]")?;
let title_sel = Self::selector("p.title a[title], a.title[title]")?;
let uploader_name_sel = Self::selector("p.metadata a span.name")?;
let uploader_link_sel = Self::selector("p.metadata a[href]")?;
let dur_sel = Self::selector(".thumb-under span.duration")?;
let metadata_sel = Self::selector("p.metadata")?;
let mut items = Vec::new();
'card: for card in document.select(&card_sel) {
// Find the anchor whose href contains /video.
let video_link = card
.select(&link_sel)
.find(|el| {
el.value()
.attr("href")
.map(|h| h.contains("/video."))
.unwrap_or(false)
});
let Some(video_link) = video_link else {
continue;
};
let href = video_link.value().attr("href").unwrap_or_default();
let page_url = Self::normalize_url(href);
if page_url.is_empty() {
continue;
}
// eid: path segment starting with "video." e.g. "video.ohedfck8b21"
let eid = href
.split('/')
.find(|s| s.starts_with("video."))
.and_then(|s| s.strip_prefix("video."))
.unwrap_or_default()
.to_string();
if eid.is_empty() {
continue;
}
// Numeric id from data-id attribute; fall back to eid
let video_id = card
.value()
.attr("data-id")
.filter(|s| !s.is_empty())
.unwrap_or(&eid)
.to_string();
// Thumbnail (lazy-loaded, stored in data-src)
let thumb = card
.select(&img_sel)
.next()
.and_then(|el| el.value().attr("data-src"))
.map(str::to_string)
.unwrap_or_default();
if thumb.is_empty() {
continue 'card;
}
// Preview video clip (data-pvv on the same img element)
let preview = card
.select(&img_sel)
.next()
.and_then(|el| el.value().attr("data-pvv"))
.map(str::to_string)
.filter(|s| !s.is_empty());
// Title from the title attribute on the link inside p.title
let title = card
.select(&title_sel)
.next()
.and_then(|el| el.value().attr("title").map(Self::decode_html))
.filter(|t| !t.trim().is_empty());
let Some(title) = title else {
continue;
};
// Duration from span.duration inside .thumb-under
let duration = card
.select(&dur_sel)
.next()
.map(|el| Self::parse_duration(&Self::text_of(&el)))
.unwrap_or(0);
// Uploader name and URL
let uploader_name = card
.select(&uploader_name_sel)
.next()
.map(|el| Self::text_of(&el))
.filter(|s| !s.is_empty());
let uploader_url = card
.select(&uploader_link_sel)
.next()
.and_then(|el| el.value().attr("href").map(Self::normalize_url))
.filter(|u| !u.is_empty());
// Views: scan p.metadata text for "NNN Views"
let views = card.select(&metadata_sel).next().and_then(|meta| {
let text = Self::text_of(&meta);
let low = text.to_ascii_lowercase();
low.find("views").and_then(|idx| {
// grab the token immediately before "views"
text[..idx]
.split_whitespace()
.last()
.and_then(|w| Self::parse_views(w))
})
});
let mut item = VideoItem::new(
video_id,
title.trim().to_string(),
page_url,
CHANNEL_ID.to_string(),
thumb,
duration,
);
if let Some(v) = views {
item.views = Some(v);
}
if let Some(p) = preview {
item.preview = Some(p);
}
if let Some(name) = uploader_name {
item.uploader = Some(name);
}
if let Some(url) = uploader_url {
let uploader_id = url
.trim_end_matches('/')
.rsplit('/')
.next()
.unwrap_or_default()
.to_string();
if !uploader_id.is_empty() {
item.uploaderId = Some(format!("{CHANNEL_ID}:{uploader_id}"));
}
item.uploaderUrl = Some(url);
}
items.push(item);
if items.len() >= limit {
break;
}
}
Ok(items)
}
fn lookup_category(query: &str) -> Option<String> {
let normalized = Self::normalize_key(query);
for (slug, label) in CATEGORIES {
if Self::normalize_key(label) == normalized || Self::normalize_key(slug) == normalized {
return Some(format!("{BASE_URL}/c/{slug}"));
}
}
None
}
fn resolve_query_target(query: &str) -> Target {
let trimmed = query.trim().trim_start_matches('@');
if let Some((kind, value)) = trimmed.split_once(':') {
let value = value.trim();
if !value.is_empty() {
match kind.trim().to_ascii_lowercase().as_str() {
"tag" => {
let slug = value.replace(' ', "-").to_ascii_lowercase();
return Target::Archive(format!("{BASE_URL}/tags/{slug}"));
}
"cat" | "category" => {
if let Some(url) = Self::lookup_category(value) {
return Target::Archive(url);
}
let slug = value.replace(' ', "_");
return Target::Archive(format!("{BASE_URL}/c/{slug}"));
}
"uploader" | "channel" | "profile" => {
let slug = value.replace(' ', "_").to_ascii_lowercase();
return Target::Archive(format!("{BASE_URL}/{slug}"));
}
_ => {}
}
}
}
// Category name lookup
if let Some(url) = Self::lookup_category(trimmed) {
return Target::Archive(url);
}
Target::Search(trimmed.to_string())
}
fn resolve_option_target(options: &ServerOptions, sort: &str) -> Target {
if let Some(cat) = options.categories.as_deref() {
if cat != "all" && !cat.is_empty() {
return Target::Archive(format!("{BASE_URL}/c/{cat}"));
}
}
match sort.trim().to_ascii_lowercase().as_str() {
"best" | "top" => Target::Best,
_ => Target::Latest,
}
}
async fn fetch_target(
&self,
cache: VideoCache,
target: Target,
page: u16,
per_page: usize,
options: ServerOptions,
) -> Result<Vec<VideoItem>> {
let url = Self::target_url(&target, page);
let cache_key = format!("{url}#per={per_page}");
if let Some((ts, cached)) = cache.get(&cache_key) {
if ts.elapsed().unwrap_or_default().as_secs() < 300 {
return Ok(cached.clone());
}
}
let mut requester = requester_or_default(
&options,
CHANNEL_ID,
"xvideos.fetch_target.missing_requester",
);
let html = match Self::fetch_html(&mut requester, &url).await {
Ok(v) => v,
Err(e) => {
report_provider_error(
CHANNEL_ID,
"fetch_target.request",
&format!("url={url}; error={e}"),
)
.await;
return Ok(vec![]);
}
};
if html.trim().is_empty() {
report_provider_error(
CHANNEL_ID,
"fetch_target.empty",
&format!("url={url}"),
)
.await;
return Ok(vec![]);
}
let items = Self::parse_listing(&html, per_page)?;
if !items.is_empty() {
cache.insert(cache_key, items.clone());
}
Ok(items)
}
}
#[async_trait]
impl Provider for XvideosProvider {
async fn get_videos(
&self,
cache: VideoCache,
_pool: DbPool,
sort: String,
query: Option<String>,
page: String,
per_page: String,
options: ServerOptions,
) -> Vec<VideoItem> {
let page = page.parse::<u16>().unwrap_or(1).max(1);
let per_page = per_page.parse::<usize>().unwrap_or(10).clamp(1, 60);
let target = match query {
Some(q) if !q.trim().is_empty() => Self::resolve_query_target(q.trim()),
_ => Self::resolve_option_target(&options, &sort),
};
match self
.fetch_target(cache, target, page, per_page, options)
.await
{
Ok(items) => items,
Err(e) => {
report_provider_error(
CHANNEL_ID,
"get_videos",
&format!("sort={sort}; page={page}; error={e}"),
)
.await;
vec![]
}
}
}
fn get_channel(&self, cv: ClientVersion) -> Option<Channel> {
Some(self.build_channel(cv))
}
}