use crate::DbPool; use crate::api::ClientVersion; use crate::providers::{Provider, report_provider_error, requester_or_default}; use crate::status::*; use crate::util::cache::VideoCache; use crate::util::parse_abbreviated_number; use crate::videos::{ServerOptions, VideoItem}; use async_trait::async_trait; use chrono::{Datelike, Local, Months}; use error_chain::error_chain; use htmlentity::entity::{ICodedDataTrait, decode}; use scraper::{ElementRef, Html, Selector}; use wreq::Version; pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata = crate::providers::ProviderChannelMetadata { group_id: "mainstream-tube", tags: &["tube", "hd", "mixed", "search"], }; const BASE_URL: &str = "https://www.xvideos.com"; const CHANNEL_ID: &str = "xvideos"; const FIREFOX_UA: &str = "Mozilla/5.0 (X11; Linux x86_64; rv:147.0) Gecko/20100101 Firefox/147.0"; const HTML_ACCEPT: &str = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8"; error_chain! { foreign_links { Io(std::io::Error); } errors { Parse(msg: String) { description("parse error") display("parse error: {}", msg) } } } // Slug format is `{SiteName}-{ID}` as returned by /c listing. const CATEGORIES: &[(&str, &str)] = &[ ("AI-239", "AI"), ("Amateur-65", "Amateur"), ("Anal-12", "Anal"), ("Arab-159", "Arab"), ("Asian_Woman-32", "Asian"), ("ASMR-229", "ASMR"), ("Ass-14", "Ass"), ("bbw-51", "BBW"), ("Bi_Sexual-62", "Bi"), ("Big_Ass-24", "Big Ass"), ("Big_Cock-34", "Big Cock"), ("Big_Tits-23", "Big Tits"), ("Black_Woman-30", "Black"), ("Blonde-20", "Blonde"), ("Blowjob-15", "Blowjob"), ("Brunette-25", "Brunette"), ("Cam_Porn-58", "Cam Porn"), ("Creampie-40", "Creampie"), ("Cuckold-237", "Cuckold"), ("Cumshot-18", "Cumshot"), ("Femdom-235", "Femdom"), ("Fisting-165", "Fisting"), ("Fucked_Up_Family-81", "Step Family"), ("Gangbang-69", "Gangbang"), ("Gapes-167", "Gapes"), ("Indian-89", "Indian"), ("Interracial-27", "Interracial"), ("Latina-16", "Latina"), ("Lesbian-26", "Lesbian"), ("Lingerie-83", "Lingerie"), ("Mature-38", "Mature"), ("Milf-19", "MILF"), ("Oiled-22", "Oiled"), ("Redhead-31", "Redhead"), ("Solo_and_Masturbation-33", "Solo"), ("Squirting-56", "Squirting"), ("Stockings-28", "Stockings"), ("Teen-13", "Teen"), ]; #[derive(Debug, Clone)] enum Target { Latest, Best, Search(String), Archive(String), } #[derive(Debug, Clone)] pub struct XvideosProvider; impl XvideosProvider { pub fn new() -> Self { Self } fn build_channel(&self, _cv: ClientVersion) -> Channel { let mut cat_options: Vec = vec![FilterOption { id: "all".to_string(), title: "All".to_string(), }]; for (slug, label) in CATEGORIES { cat_options.push(FilterOption { id: slug.to_string(), title: label.to_string(), }); } Channel { id: CHANNEL_ID.to_string(), name: "XVideos".to_string(), description: "XVideos — one of the world's largest free porn sites with latest, best-of-month, category, tag, and keyword search." .to_string(), premium: false, favicon: "https://www.google.com/s2/favicons?sz=64&domain=xvideos.com".to_string(), status: "active".to_string(), categories: CATEGORIES.iter().map(|(_, label)| label.to_string()).collect(), options: vec![ ChannelOption { id: "sort".to_string(), title: "Sort".to_string(), description: "Browse XVideos ranking feeds.".to_string(), systemImage: "list.number".to_string(), colorName: "blue".to_string(), options: vec![ FilterOption { id: "new".to_string(), title: "Latest".to_string(), }, FilterOption { id: "best".to_string(), title: "Best of Month".to_string(), }, ], multiSelect: false, }, ChannelOption { id: "categories".to_string(), title: "Categories".to_string(), description: "Browse an XVideos category archive.".to_string(), systemImage: "square.grid.2x2".to_string(), colorName: "orange".to_string(), options: cat_options, multiSelect: false, }, ], nsfw: true, cacheDuration: Some(1800), } } fn selector(value: &str) -> Result { Selector::parse(value) .map_err(|e| Error::from(format!("selector `{value}` parse failed: {e}"))) } fn decode_html(text: &str) -> String { decode(text.as_bytes()) .to_string() .unwrap_or_else(|_| text.to_string()) } fn text_of(el: &ElementRef<'_>) -> String { let raw: String = el.text().collect::>().join(" "); Self::decode_html(&raw.split_whitespace().collect::>().join(" ")) } fn normalize_key(s: &str) -> String { s.trim() .trim_start_matches('#') .replace(['_', '-'], " ") .split_whitespace() .collect::>() .join(" ") .to_ascii_lowercase() } fn normalize_url(path: &str) -> String { let path = path.trim(); if path.starts_with("http://") || path.starts_with("https://") { return path.to_string(); } if path.starts_with("//") { return format!("https:{path}"); } if path.starts_with('/') { return format!("{BASE_URL}{path}"); } format!("{BASE_URL}/{path}") } fn html_headers(referer: &str) -> Vec<(String, String)> { vec![ ("User-Agent".to_string(), FIREFOX_UA.to_string()), ("Accept".to_string(), HTML_ACCEPT.to_string()), ("Accept-Language".to_string(), "en-US,en;q=0.5".to_string()), ("Referer".to_string(), referer.to_string()), ] } // /best always redirects to the previous calendar month's archive. fn best_base_url() -> String { let now = Local::now(); let prev = now .checked_sub_months(Months::new(1)) .unwrap_or(now); format!("{BASE_URL}/best/{}-{:02}", prev.year(), prev.month()) } fn target_url(target: &Target, page: u16) -> String { match target { Target::Latest => { if page <= 1 { format!("{BASE_URL}/") } else { // page 2 = /new/1, page 3 = /new/2, ... format!("{BASE_URL}/new/{}", page - 1) } } Target::Best => { let base = Self::best_base_url(); if page <= 1 { base } else { format!("{base}/{}", page - 1) } } Target::Search(q) => { let encoded: String = url::form_urlencoded::byte_serialize(q.trim().as_bytes()).collect(); if page <= 1 { format!("{BASE_URL}/?k={encoded}") } else { format!("{BASE_URL}/?k={encoded}&p={}", page - 1) } } Target::Archive(base_url) => { let base = base_url.trim_end_matches('/'); if page <= 1 { base.to_string() } else { format!("{base}/{}", page - 1) } } } } async fn fetch_html( requester: &mut crate::util::requester::Requester, url: &str, ) -> Result { requester .get_with_headers(url, Self::html_headers(url), Some(Version::HTTP_11)) .await .map_err(|e| Error::from(format!("request failed for {url}: {e}"))) } // Parses "21 min", "1h20min", "2h", "45sec", "MM:SS", "HH:MM:SS" fn parse_duration(text: &str) -> u32 { let text = text.trim(); // Colon-separated formats MM:SS and HH:MM:SS let parts: Vec<&str> = text.split(':').collect(); if parts.len() == 2 { let m: u32 = parts[0].trim().parse().unwrap_or(0); let s: u32 = parts[1].trim().parse().unwrap_or(0); return m * 60 + s; } if parts.len() == 3 { let h: u32 = parts[0].trim().parse().unwrap_or(0); let m: u32 = parts[1].trim().parse().unwrap_or(0); let s: u32 = parts[2].trim().parse().unwrap_or(0); return h * 3600 + m * 60 + s; } // Word-based: "1h20min", "30 min", "45sec", etc. let low = text.to_ascii_lowercase(); let h: u32 = low .find('h') .and_then(|i| low[..i].trim().parse().ok()) .unwrap_or(0); let m: u32 = low.find("min").and_then(|i| { let start = low[..i] .rfind(|c: char| !c.is_ascii_digit()) .map(|j| j + 1) .unwrap_or(0); low[start..i].trim().parse().ok() }).unwrap_or(0); let s: u32 = low.find("sec").and_then(|i| { let start = low[..i] .rfind(|c: char| !c.is_ascii_digit()) .map(|j| j + 1) .unwrap_or(0); low[start..i].trim().parse().ok() }).unwrap_or(0); h * 3600 + m * 60 + s } fn parse_views(text: &str) -> Option { // "877.3k Views", "1.2M Views" — strip suffix then parse let cleaned = text .replace("Views", "") .replace("views", "") .replace("View", "") .replace(',', ""); parse_abbreviated_number(cleaned.trim()) } fn parse_listing(html: &str, limit: usize) -> Result> { let document = Html::parse_document(html); let card_sel = Self::selector("div.thumb-block")?; let img_sel = Self::selector("img[data-src]")?; let link_sel = Self::selector("a[href]")?; let title_sel = Self::selector("p.title a[title], a.title[title]")?; let uploader_name_sel = Self::selector("p.metadata a span.name")?; let uploader_link_sel = Self::selector("p.metadata a[href]")?; let dur_sel = Self::selector(".thumb-under span.duration")?; let metadata_sel = Self::selector("p.metadata")?; let mut items = Vec::new(); 'card: for card in document.select(&card_sel) { // Find the anchor whose href contains /video. let video_link = card .select(&link_sel) .find(|el| { el.value() .attr("href") .map(|h| h.contains("/video.")) .unwrap_or(false) }); let Some(video_link) = video_link else { continue; }; let href = video_link.value().attr("href").unwrap_or_default(); let page_url = Self::normalize_url(href); if page_url.is_empty() { continue; } // eid: path segment starting with "video." e.g. "video.ohedfck8b21" let eid = href .split('/') .find(|s| s.starts_with("video.")) .and_then(|s| s.strip_prefix("video.")) .unwrap_or_default() .to_string(); if eid.is_empty() { continue; } // Numeric id from data-id attribute; fall back to eid let video_id = card .value() .attr("data-id") .filter(|s| !s.is_empty()) .unwrap_or(&eid) .to_string(); // Thumbnail (lazy-loaded, stored in data-src) let thumb = card .select(&img_sel) .next() .and_then(|el| el.value().attr("data-src")) .map(str::to_string) .unwrap_or_default(); if thumb.is_empty() { continue 'card; } // Preview video clip (data-pvv on the same img element) let preview = card .select(&img_sel) .next() .and_then(|el| el.value().attr("data-pvv")) .map(str::to_string) .filter(|s| !s.is_empty()); // Title from the title attribute on the link inside p.title let title = card .select(&title_sel) .next() .and_then(|el| el.value().attr("title").map(Self::decode_html)) .filter(|t| !t.trim().is_empty()); let Some(title) = title else { continue; }; // Duration from span.duration inside .thumb-under let duration = card .select(&dur_sel) .next() .map(|el| Self::parse_duration(&Self::text_of(&el))) .unwrap_or(0); // Uploader name and URL let uploader_name = card .select(&uploader_name_sel) .next() .map(|el| Self::text_of(&el)) .filter(|s| !s.is_empty()); let uploader_url = card .select(&uploader_link_sel) .next() .and_then(|el| el.value().attr("href").map(Self::normalize_url)) .filter(|u| !u.is_empty()); // Views: scan p.metadata text for "NNN Views" let views = card.select(&metadata_sel).next().and_then(|meta| { let text = Self::text_of(&meta); let low = text.to_ascii_lowercase(); low.find("views").and_then(|idx| { // grab the token immediately before "views" text[..idx] .split_whitespace() .last() .and_then(|w| Self::parse_views(w)) }) }); let mut item = VideoItem::new( video_id, title.trim().to_string(), page_url, CHANNEL_ID.to_string(), thumb, duration, ); if let Some(v) = views { item.views = Some(v); } if let Some(p) = preview { item.preview = Some(p); } if let Some(name) = uploader_name { item.uploader = Some(name); } if let Some(url) = uploader_url { let uploader_id = url .trim_end_matches('/') .rsplit('/') .next() .unwrap_or_default() .to_string(); if !uploader_id.is_empty() { item.uploaderId = Some(format!("{CHANNEL_ID}:{uploader_id}")); } item.uploaderUrl = Some(url); } items.push(item); if items.len() >= limit { break; } } Ok(items) } fn lookup_category(query: &str) -> Option { let normalized = Self::normalize_key(query); for (slug, label) in CATEGORIES { if Self::normalize_key(label) == normalized || Self::normalize_key(slug) == normalized { return Some(format!("{BASE_URL}/c/{slug}")); } } None } fn resolve_query_target(query: &str) -> Target { let trimmed = query.trim().trim_start_matches('@'); if let Some((kind, value)) = trimmed.split_once(':') { let value = value.trim(); if !value.is_empty() { match kind.trim().to_ascii_lowercase().as_str() { "tag" => { let slug = value.replace(' ', "-").to_ascii_lowercase(); return Target::Archive(format!("{BASE_URL}/tags/{slug}")); } "cat" | "category" => { if let Some(url) = Self::lookup_category(value) { return Target::Archive(url); } let slug = value.replace(' ', "_"); return Target::Archive(format!("{BASE_URL}/c/{slug}")); } "uploader" | "channel" | "profile" => { let slug = value.replace(' ', "_").to_ascii_lowercase(); return Target::Archive(format!("{BASE_URL}/{slug}")); } _ => {} } } } // Category name lookup if let Some(url) = Self::lookup_category(trimmed) { return Target::Archive(url); } Target::Search(trimmed.to_string()) } fn resolve_option_target(options: &ServerOptions, sort: &str) -> Target { if let Some(cat) = options.categories.as_deref() { if cat != "all" && !cat.is_empty() { return Target::Archive(format!("{BASE_URL}/c/{cat}")); } } match sort.trim().to_ascii_lowercase().as_str() { "best" | "top" => Target::Best, _ => Target::Latest, } } async fn fetch_target( &self, cache: VideoCache, target: Target, page: u16, per_page: usize, options: ServerOptions, ) -> Result> { let url = Self::target_url(&target, page); let cache_key = format!("{url}#per={per_page}"); if let Some((ts, cached)) = cache.get(&cache_key) { if ts.elapsed().unwrap_or_default().as_secs() < 300 { return Ok(cached.clone()); } } let mut requester = requester_or_default( &options, CHANNEL_ID, "xvideos.fetch_target.missing_requester", ); let html = match Self::fetch_html(&mut requester, &url).await { Ok(v) => v, Err(e) => { report_provider_error( CHANNEL_ID, "fetch_target.request", &format!("url={url}; error={e}"), ) .await; return Ok(vec![]); } }; if html.trim().is_empty() { report_provider_error( CHANNEL_ID, "fetch_target.empty", &format!("url={url}"), ) .await; return Ok(vec![]); } let items = Self::parse_listing(&html, per_page)?; if !items.is_empty() { cache.insert(cache_key, items.clone()); } Ok(items) } } #[async_trait] impl Provider for XvideosProvider { async fn get_videos( &self, cache: VideoCache, _pool: DbPool, sort: String, query: Option, page: String, per_page: String, options: ServerOptions, ) -> Vec { let page = page.parse::().unwrap_or(1).max(1); let per_page = per_page.parse::().unwrap_or(10).clamp(1, 60); let target = match query { Some(q) if !q.trim().is_empty() => Self::resolve_query_target(q.trim()), _ => Self::resolve_option_target(&options, &sort), }; match self .fetch_target(cache, target, page, per_page, options) .await { Ok(items) => items, Err(e) => { report_provider_error( CHANNEL_ID, "get_videos", &format!("sort={sort}; page={page}; error={e}"), ) .await; vec![] } } } fn get_channel(&self, cv: ClientVersion) -> Option { Some(self.build_channel(cv)) } }