From 2b26019a6624df6167153d4d4cceff9875647bf9 Mon Sep 17 00:00:00 2001 From: Simon Date: Wed, 18 Mar 2026 22:54:51 +0000 Subject: [PATCH] vrporn --- src/providers/mod.rs | 6 + src/providers/vrporn.rs | 1223 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 1229 insertions(+) create mode 100644 src/providers/vrporn.rs diff --git a/src/providers/mod.rs b/src/providers/mod.rs index 1d3c644..079904a 100644 --- a/src/providers/mod.rs +++ b/src/providers/mod.rs @@ -41,6 +41,7 @@ pub mod sxyprn; pub mod tnaflix; pub mod tokyomotion; pub mod viralxxxporn; +pub mod vrporn; pub mod xfree; pub mod xxthots; pub mod yesporn; @@ -177,6 +178,10 @@ pub static ALL_PROVIDERS: Lazy> = Lazy::new(| "viralxxxporn", Arc::new(viralxxxporn::ViralxxxpornProvider::new()) as DynProvider, ); + m.insert( + "vrporn", + Arc::new(vrporn::VrpornProvider::new()) as DynProvider, + ); // m.insert("pornxp", Arc::new(pornxp::PornxpProvider::new()) as DynProvider); m.insert( "rule34gen", @@ -365,6 +370,7 @@ fn channel_metadata_for(id: &str) -> Option { "tnaflix" => Some(tnaflix::CHANNEL_METADATA), "tokyomotion" => Some(tokyomotion::CHANNEL_METADATA), "viralxxxporn" => Some(viralxxxporn::CHANNEL_METADATA), + "vrporn" => Some(vrporn::CHANNEL_METADATA), "rule34gen" => Some(rule34gen::CHANNEL_METADATA), "xxdbx" => Some(xxdbx::CHANNEL_METADATA), "xfree" => Some(xfree::CHANNEL_METADATA), diff --git a/src/providers/vrporn.rs b/src/providers/vrporn.rs new file mode 100644 index 0000000..902f08f --- /dev/null +++ b/src/providers/vrporn.rs @@ -0,0 +1,1223 @@ +use crate::DbPool; +use crate::api::ClientVersion; +use crate::providers::{ + Provider, report_provider_error, report_provider_error_background, requester_or_default, +}; +use crate::status::*; +use crate::util::cache::VideoCache; +use crate::util::parse_abbreviated_number; +use crate::util::requester::Requester; +use crate::util::time::parse_time_to_seconds; +use crate::videos::{ServerOptions, VideoEmbed, VideoFormat, VideoItem}; +use async_trait::async_trait; +use chrono::{DateTime, Utc}; +use error_chain::error_chain; +use futures::stream::{self, StreamExt}; +use htmlentity::entity::{ICodedDataTrait, decode}; +use regex::Regex; +use scraper::{ElementRef, Html, Selector}; +use serde_json::Value; +use std::collections::HashSet; +use std::sync::{Arc, RwLock}; +use std::{thread, vec}; +use wreq::Version; + +pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata = + crate::providers::ProviderChannelMetadata { + group_id: "studio-network", + tags: &["vr", "studios", "premium"], + }; + +error_chain! { + foreign_links { + Io(std::io::Error); + Json(serde_json::Error); + } + errors { + Parse(msg: String) { + description("parse error") + display("parse error: {}", msg) + } + } +} + +const BASE_URL: &str = "https://vrporn.com"; +const CHANNEL_ID: &str = "vrporn"; +const FIREFOX_UA: &str = + "Mozilla/5.0 (X11; Linux x86_64; rv:147.0) Gecko/20100101 Firefox/147.0"; +const HTML_ACCEPT: &str = + "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8"; + +#[derive(Debug, Clone)] +pub struct VrpornProvider { + url: String, + categories: Arc>>, + tags: Arc>>, + stars: Arc>>, + sites: Arc>>, +} + +#[derive(Debug, Clone)] +enum Target { + Hot, + New, + Popular, + Search(String), + Archive(String), +} + +impl VrpornProvider { + pub fn new() -> Self { + let provider = Self { + url: BASE_URL.to_string(), + categories: Arc::new(RwLock::new(vec![FilterOption { + id: "all".to_string(), + title: "All".to_string(), + }])), + tags: Arc::new(RwLock::new(vec![FilterOption { + id: "all".to_string(), + title: "All".to_string(), + }])), + stars: Arc::new(RwLock::new(vec![FilterOption { + id: "all".to_string(), + title: "All".to_string(), + }])), + sites: Arc::new(RwLock::new(vec![FilterOption { + id: "all".to_string(), + title: "All".to_string(), + }])), + }; + provider.spawn_initial_load(); + provider + } + + fn spawn_initial_load(&self) { + let provider = self.clone(); + thread::spawn(move || { + let runtime = match tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + { + Ok(runtime) => runtime, + Err(error) => { + report_provider_error_background( + CHANNEL_ID, + "spawn_initial_load.runtime_build", + &error.to_string(), + ); + return; + } + }; + + runtime.block_on(async move { + provider.refresh_filter_catalogs().await; + }); + }); + } + + fn build_channel(&self, _clientversion: ClientVersion) -> Channel { + let categories = self + .categories + .read() + .map(|values| values.clone()) + .unwrap_or_default(); + let tags = self.tags.read().map(|values| values.clone()).unwrap_or_default(); + let stars = self + .stars + .read() + .map(|values| values.clone()) + .unwrap_or_default(); + let sites = self + .sites + .read() + .map(|values| values.clone()) + .unwrap_or_default(); + + Channel { + id: CHANNEL_ID.to_string(), + name: "VRPorn".to_string(), + description: + "VRPorn.com browse, search, tag, pornstar, and studio archives with direct VR formats." + .to_string(), + premium: false, + favicon: "https://www.google.com/s2/favicons?sz=64&domain=vrporn.com".to_string(), + status: "active".to_string(), + categories: categories.iter().map(|value| value.title.clone()).collect(), + options: vec![ + ChannelOption { + id: "sort".to_string(), + title: "Sort".to_string(), + description: "Browse VRPorn sections.".to_string(), + systemImage: "list.number".to_string(), + colorName: "blue".to_string(), + options: vec![ + FilterOption { + id: "hot".to_string(), + title: "Hot Right Now".to_string(), + }, + FilterOption { + id: "new".to_string(), + title: "New".to_string(), + }, + FilterOption { + id: "popular".to_string(), + title: "Popular".to_string(), + }, + ], + multiSelect: false, + }, + ChannelOption { + id: "categories".to_string(), + title: "Categories".to_string(), + description: "Browse VRPorn category archives.".to_string(), + systemImage: "square.grid.2x2".to_string(), + colorName: "orange".to_string(), + options: categories, + multiSelect: false, + }, + ChannelOption { + id: "filter".to_string(), + title: "Tags".to_string(), + description: "Browse VRPorn tag archives.".to_string(), + systemImage: "tag.fill".to_string(), + colorName: "green".to_string(), + options: tags, + multiSelect: false, + }, + ChannelOption { + id: "stars".to_string(), + title: "Pornstars".to_string(), + description: "Browse VRPorn pornstar archives.".to_string(), + systemImage: "star.fill".to_string(), + colorName: "yellow".to_string(), + options: stars, + multiSelect: false, + }, + ChannelOption { + id: "sites".to_string(), + title: "Studios".to_string(), + description: "Browse VRPorn studio archives.".to_string(), + systemImage: "building.2.fill".to_string(), + colorName: "purple".to_string(), + options: sites, + multiSelect: false, + }, + ], + nsfw: true, + cacheDuration: Some(1800), + } + } + + fn selector(value: &str) -> Result { + Selector::parse(value) + .map_err(|error| Error::from(format!("selector `{value}` parse failed: {error}"))) + } + + fn regex(value: &str) -> Result { + Regex::new(value).map_err(|error| Error::from(format!("regex `{value}` failed: {error}"))) + } + + fn decode_text(text: &str) -> String { + decode(text.as_bytes()) + .to_string() + .unwrap_or_else(|_| text.to_string()) + .replace('\u{a0}', " ") + .trim() + .to_string() + } + + fn collapse_whitespace(text: &str) -> String { + text.split_whitespace().collect::>().join(" ") + } + + fn text_of(element: &ElementRef<'_>) -> String { + Self::decode_text(&Self::collapse_whitespace( + &element.text().collect::>().join(" "), + )) + } + + fn normalize_title(value: &str) -> String { + Self::decode_text(value) + .to_ascii_lowercase() + .split_whitespace() + .collect::>() + .join(" ") + } + + fn normalize_url(&self, value: &str) -> String { + if value.starts_with("http://") || value.starts_with("https://") { + return value.to_string(); + } + if value.starts_with("//") { + return format!("https:{value}"); + } + if value.starts_with('/') { + return format!("{}{}", self.url, value); + } + format!("{}/{}", self.url.trim_end_matches('/'), value) + } + + fn html_headers(&self, referer: &str) -> Vec<(String, String)> { + vec![ + ("User-Agent".to_string(), FIREFOX_UA.to_string()), + ("Accept".to_string(), HTML_ACCEPT.to_string()), + ("Referer".to_string(), referer.to_string()), + ] + } + + async fn fetch_html( + &self, + requester: &mut Requester, + url: &str, + referer: &str, + ) -> Result { + requester + .get_with_headers(url, self.html_headers(referer), Some(Version::HTTP_11)) + .await + .map_err(|error| Error::from(format!("request failed for {url}: {error}"))) + } + + fn push_unique(target: &Arc>>, item: FilterOption) { + if item.id.is_empty() || item.title.is_empty() { + return; + } + if let Ok(mut values) = target.write() { + let normalized = Self::normalize_title(&item.title); + if !values + .iter() + .any(|value| value.id == item.id || Self::normalize_title(&value.title) == normalized) + { + values.push(item); + } + } + } + + fn extract_last_page(document: &Html) -> u16 { + let selector = match Self::selector("a[href]") { + Ok(value) => value, + Err(_) => return 1, + }; + let regex = match Self::regex(r"/page/([0-9]+)/") { + Ok(value) => value, + Err(_) => return 1, + }; + + document + .select(&selector) + .filter_map(|element| element.value().attr("href")) + .filter_map(|href| { + regex + .captures(href) + .and_then(|captures| captures.get(1)) + .and_then(|value| value.as_str().parse::().ok()) + }) + .max() + .unwrap_or(1) + } + + async fn load_tags_and_categories(&self) -> Result<()> { + let mut requester = Requester::new(); + let home_url = format!("{}/", self.url); + let home_html = self.fetch_html(&mut requester, &home_url, &home_url).await?; + { + let home_document = Html::parse_document(&home_html); + let tag_selector = Self::selector("a[href^=\"/tag/\"]")?; + + for element in home_document.select(&tag_selector) { + let href = element.value().attr("href").unwrap_or_default(); + let title = Self::text_of(&element); + if href.is_empty() || title.is_empty() { + continue; + } + let url = self.normalize_url(href); + let option = FilterOption { id: url, title }; + Self::push_unique(&self.tags, option.clone()); + Self::push_unique(&self.categories, option); + } + } + + let categories_url = format!("{}/categories/", self.url); + let categories_html = self + .fetch_html(&mut requester, &categories_url, &home_url) + .await?; + { + let categories_document = Html::parse_document(&categories_html); + let card_selector = Self::selector("article.ui-category-card a[href*=\"/tag/\"]")?; + + for element in categories_document.select(&card_selector) { + let href = element.value().attr("href").unwrap_or_default(); + let title = element + .value() + .attr("title") + .map(Self::decode_text) + .filter(|value| !value.is_empty()) + .unwrap_or_else(|| Self::text_of(&element)); + if href.is_empty() || title.is_empty() { + continue; + } + let option = FilterOption { + id: self.normalize_url(href), + title, + }; + Self::push_unique(&self.categories, option.clone()); + Self::push_unique(&self.tags, option); + } + } + + Ok(()) + } + + async fn load_studios(&self) -> Result<()> { + let mut requester = Requester::new(); + let first_page_url = format!("{}/studios/", self.url); + let first_html = self + .fetch_html(&mut requester, &first_page_url, &format!("{}/", self.url)) + .await?; + let last_page = { + let first_document = Html::parse_document(&first_html); + let last_page = Self::extract_last_page(&first_document).max(1).min(25); + self.collect_studios_from_document(&first_document)?; + last_page + }; + + for page in 2..=last_page { + let url = format!("{}/studios/page/{page}/", self.url); + let html = self.fetch_html(&mut requester, &url, &first_page_url).await?; + let document = Html::parse_document(&html); + self.collect_studios_from_document(&document)?; + } + + Ok(()) + } + + fn collect_studios_from_document(&self, document: &Html) -> Result<()> { + let selector = Self::selector("article.ui-studio-card a[href]")?; + for element in document.select(&selector) { + let href = element.value().attr("href").unwrap_or_default(); + if !href.contains("/studio/") { + continue; + } + let title = element + .value() + .attr("title") + .map(Self::decode_text) + .filter(|value| !value.is_empty()) + .unwrap_or_else(|| Self::text_of(&element)); + if title.is_empty() { + continue; + } + Self::push_unique( + &self.sites, + FilterOption { + id: self.normalize_url(href), + title, + }, + ); + } + Ok(()) + } + + async fn load_pornstars(&self) -> Result<()> { + let mut requester = Requester::new(); + let first_page_url = format!("{}/pornstars/", self.url); + let first_html = self + .fetch_html(&mut requester, &first_page_url, &format!("{}/", self.url)) + .await?; + let last_page = { + let first_document = Html::parse_document(&first_html); + let last_page = Self::extract_last_page(&first_document).max(1).min(25); + self.collect_pornstars_from_document(&first_document)?; + last_page + }; + + for page in 2..=last_page { + let url = format!("{}/pornstars/page/{page}/", self.url); + let html = self.fetch_html(&mut requester, &url, &first_page_url).await?; + let document = Html::parse_document(&html); + self.collect_pornstars_from_document(&document)?; + } + + Ok(()) + } + + fn collect_pornstars_from_document(&self, document: &Html) -> Result<()> { + let selector = Self::selector("article.ui-card-model a[href]")?; + for element in document.select(&selector) { + let href = element.value().attr("href").unwrap_or_default(); + if !href.contains("/pornstars/") { + continue; + } + let title = element + .value() + .attr("title") + .map(Self::decode_text) + .filter(|value| !value.is_empty()) + .unwrap_or_else(|| Self::text_of(&element)); + if title.is_empty() { + continue; + } + Self::push_unique( + &self.stars, + FilterOption { + id: self.normalize_url(href), + title, + }, + ); + } + Ok(()) + } + + fn filters_need_refresh(&self) -> bool { + self.categories + .read() + .map(|values| values.len()) + .unwrap_or_default() + <= 1 + || self.tags.read().map(|values| values.len()).unwrap_or_default() <= 1 + || self.stars.read().map(|values| values.len()).unwrap_or_default() <= 1 + || self.sites.read().map(|values| values.len()).unwrap_or_default() <= 1 + } + + async fn refresh_filter_catalogs(&self) { + if let Err(error) = self.load_tags_and_categories().await { + report_provider_error_background( + CHANNEL_ID, + "refresh_filter_catalogs.tags_and_categories", + &error.to_string(), + ); + } + if let Err(error) = self.load_studios().await { + report_provider_error_background( + CHANNEL_ID, + "refresh_filter_catalogs.studios", + &error.to_string(), + ); + } + if let Err(error) = self.load_pornstars().await { + report_provider_error_background( + CHANNEL_ID, + "refresh_filter_catalogs.pornstars", + &error.to_string(), + ); + } + } + + fn match_filter(options: &[FilterOption], query: &str) -> Option { + let normalized_query = Self::normalize_title(query); + options + .iter() + .find(|value| value.id != "all" && Self::normalize_title(&value.title) == normalized_query) + .map(|value| value.id.clone()) + } + + fn resolve_option_target(&self, options: &ServerOptions, sort: &str) -> Target { + for candidate in [ + options.sites.as_deref(), + options.stars.as_deref(), + options.filter.as_deref(), + options.categories.as_deref(), + ] + .into_iter() + .flatten() + { + if candidate.starts_with(&self.url) && candidate != "all" { + return Target::Archive(candidate.to_string()); + } + } + + match sort { + "new" | "latest" => Target::New, + "popular" | "all" | "most_viewed" => Target::Popular, + _ => Target::Hot, + } + } + + async fn resolve_query_target(&self, query: &str, sort: &str) -> Target { + let query = query.trim(); + if query.is_empty() { + return self.resolve_option_target( + &ServerOptions { + featured: None, + category: None, + sites: None, + filter: None, + language: None, + public_url_base: None, + requester: None, + network: None, + stars: None, + categories: None, + duration: None, + sort: None, + sexuality: None, + }, + sort, + ); + } + + if self.filters_need_refresh() { + self.refresh_filter_catalogs().await; + } + + for options in [&self.sites, &self.stars, &self.tags, &self.categories] { + if let Ok(values) = options.read() { + if let Some(url) = Self::match_filter(&values, query) { + return Target::Archive(url); + } + } + } + + Target::Search(query.to_string()) + } + + fn build_url_for_target(&self, target: &Target, page: u16) -> String { + match target { + Target::Hot => { + if page <= 1 { + format!("{}/", self.url) + } else { + format!("{}/all/page/{page}/", self.url) + } + } + Target::New => { + if page <= 1 { + format!("{}/new/", self.url) + } else { + format!("{}/new/page/{page}/", self.url) + } + } + Target::Popular => { + if page <= 1 { + format!("{}/all/", self.url) + } else { + format!("{}/all/page/{page}/", self.url) + } + } + Target::Search(query) => { + let query = query.split_whitespace().collect::>().join("+"); + if page <= 1 { + format!("{}/search/videos/?query={query}", self.url) + } else { + format!("{}/search/videos/page/{page}/?query={query}", self.url) + } + } + Target::Archive(url) => { + let base = url.trim_end_matches('/'); + if page <= 1 { + format!("{base}/") + } else { + format!("{base}/page/{page}/") + } + } + } + } + + fn parse_list_videos(&self, html: &str) -> Result> { + let document = Html::parse_document(html); + let article_selector = Self::selector("article.ui-video-card")?; + let header_link_selector = Self::selector("header a[href]")?; + let title_selector = Self::selector("a.ui-video-card__name")?; + let thumb_selector = Self::selector("img.ui-video-card__cover")?; + let duration_selector = Self::selector("div.ui-time span")?; + let studio_selector = Self::selector("a.ui-video-card__studio-link[href]")?; + let info_text_selector = Self::selector(".ui-video-card__info-mini span")?; + let footer_text_selector = Self::selector(".ui-video-card__footer .ui-video-card__text")?; + + let mut items = Vec::new(); + + for article in document.select(&article_selector) { + let header_link = article.select(&header_link_selector).next(); + let title_link = article.select(&title_selector).next(); + let href = title_link + .and_then(|link| link.value().attr("href")) + .or_else(|| header_link.and_then(|link| link.value().attr("href"))) + .unwrap_or_default(); + if href.is_empty() { + continue; + } + + let title = title_link + .map(|element| Self::text_of(&element)) + .filter(|value| !value.is_empty()) + .or_else(|| { + header_link.and_then(|element| { + element + .value() + .attr("title") + .map(Self::decode_text) + .filter(|value| !value.is_empty()) + }) + }) + .unwrap_or_default(); + if title.is_empty() { + continue; + } + + let thumb = article + .select(&thumb_selector) + .next() + .and_then(|img| img.value().attr("src")) + .map(|value| self.normalize_url(value)) + .unwrap_or_default(); + + let duration = article + .select(&duration_selector) + .next() + .map(|element| Self::text_of(&element)) + .and_then(|value| parse_time_to_seconds(&value)) + .unwrap_or(0) + .max(0) as u32; + + let url = self.normalize_url(href); + let id = href + .trim_matches('/') + .split('/') + .next_back() + .unwrap_or_default() + .to_string(); + if id.is_empty() { + continue; + } + + let mut item = VideoItem::new( + id, + title, + url.clone(), + CHANNEL_ID.to_string(), + thumb, + duration, + ); + + if let Some(studio) = article.select(&studio_selector).next() { + let uploader = studio + .value() + .attr("title") + .map(Self::decode_text) + .filter(|value| !value.is_empty()) + .unwrap_or_else(|| Self::text_of(&studio)); + if !uploader.is_empty() { + item.uploader = Some(uploader); + } + if let Some(href) = studio.value().attr("href") { + item.uploaderUrl = Some(self.normalize_url(href)); + } + } + + let footer_values = article + .select(&footer_text_selector) + .map(|element| Self::text_of(&element)) + .collect::>(); + if footer_values.len() >= 2 { + item.views = parse_abbreviated_number(&footer_values[1]); + } + + let info_values = article + .select(&info_text_selector) + .map(|element| Self::text_of(&element)) + .collect::>(); + if item.uploader.is_none() && !info_values.is_empty() { + item.uploader = Some(info_values[0].clone()); + } + + items.push(item); + } + + Ok(items) + } + + fn extract_json_ld_video(&self, html: &str) -> Result> { + let script_regex = Self::regex(r#"(?s)]+application/ld\+json[^>]*>(.*?)"#)?; + for captures in script_regex.captures_iter(html) { + let Some(raw) = captures.get(1).map(|value| value.as_str().trim()) else { + continue; + }; + let parsed: Value = match serde_json::from_str(raw) { + Ok(value) => value, + Err(_) => continue, + }; + + if let Some(graph) = parsed.get("@graph").and_then(Value::as_array) { + for item in graph { + if item + .get("@type") + .and_then(Value::as_str) + .is_some_and(|value| value == "VideoObject") + { + return Ok(Some(item.clone())); + } + } + } + + if parsed + .get("@type") + .and_then(Value::as_str) + .is_some_and(|value| value == "VideoObject") + { + return Ok(Some(parsed)); + } + } + + Ok(None) + } + + fn parse_uploaded_at(value: &str) -> Option { + DateTime::parse_from_rfc3339(value) + .ok() + .map(|date| date.with_timezone(&Utc).timestamp().max(0) as u64) + } + + fn extract_media_quality(url: &str) -> String { + let lowercase = url.to_ascii_lowercase(); + for label in ["8k", "6k", "5k", "4k", "hq", "hd", "sd"] { + if lowercase.contains(&format!("free_{label}")) + || lowercase.contains(&format!("_{label}_")) + || lowercase.contains(&format!("/{label}.mp4")) + || lowercase.contains(&format!("{label}.mp4")) + { + return label.to_ascii_uppercase(); + } + } + if lowercase.contains("original") { + return "ORIGINAL".to_string(); + } + "MP4".to_string() + } + + fn quality_rank(label: &str) -> usize { + match label.to_ascii_lowercase().as_str() { + "sd" => 0, + "hq" => 1, + "hd" => 2, + "4k" => 3, + "5k" => 4, + "6k" => 5, + "8k" => 6, + "original" => 7, + _ => 8, + } + } + + fn extract_formats(html: &str, referer: &str, slug: &str) -> Result> { + let regex = Self::regex(r#"https://(?:cdns|mcdnds)\.vrporn\.com/[^"'\s]+\.mp4\?[^"'\s<]+"#)?; + let mut seen = HashSet::new(); + let mut raw_formats = Vec::new(); + let slug = slug.trim_matches('/').to_ascii_lowercase(); + + for found in regex.find_iter(html).map(|value| value.as_str().to_string()) { + let lowercase = found.to_ascii_lowercase(); + if lowercase.contains("shortvideo") + || lowercase.contains("preview") + || lowercase.contains("trailer") + || lowercase.contains("video-short") + { + continue; + } + if !slug.is_empty() && !lowercase.contains(&format!("/videos/{slug}/")) { + continue; + } + if seen.insert(found.clone()) { + let label = Self::extract_media_quality(&found); + raw_formats.push((Self::quality_rank(&label), label, found)); + } + } + + raw_formats.sort_by_key(|(rank, _, _)| *rank); + + let formats = raw_formats + .into_iter() + .map(|(_, label, url)| { + let mut format = + VideoFormat::new(url, label.clone(), "mp4".to_string()).format_id(label); + format.add_http_header("Referer".to_string(), referer.to_string()); + format.add_http_header("User-Agent".to_string(), FIREFOX_UA.to_string()); + format + }) + .collect::>(); + + Ok(formats) + } + + fn extract_preview(html: &str, slug: &str) -> Result> { + let regex = Self::regex( + r#"https://(?:cdn|mcdn|mcdnds)\.vrporn\.com/[^"'\s]+(?:shortvideo[^"'\s]*|preview[^"'\s]*|trailer[^"'\s]*)\.mp4(?:\?[^"'\s<]+)?"#, + )?; + let slug = slug.trim_matches('/').to_ascii_lowercase(); + Ok(regex.find_iter(html).find_map(|value| { + let found = value.as_str().to_string(); + if slug.is_empty() || found.to_ascii_lowercase().contains(&slug) { + Some(found) + } else { + None + } + })) + } + + fn apply_detail_video(&self, mut item: VideoItem, html: &str) -> Result { + let document = Html::parse_document(html); + let title_selector = Self::selector("h1")?; + let sub_text_selector = Self::selector("span.ui-player-title__sub-text")?; + let studio_selector = Self::selector(".ui-detail-video a[href*=\"/studio/\"]")?; + let pornstar_selector = Self::selector("div.starrings a[href*=\"/pornstars/\"]")?; + let tag_selector = Self::selector("a[href*=\"/tag/\"]")?; + let description_selector = Self::selector(".ui-detail-video__content-inner")?; + let verified_selector = Self::selector(".detail-video__verfy")?; + + if let Some(title) = document.select(&title_selector).next() { + let title = Self::text_of(&title); + if !title.is_empty() { + item.title = title; + } + } + + let sub_texts = document + .select(&sub_text_selector) + .map(|element| Self::text_of(&element)) + .collect::>(); + if let Some(value) = sub_texts.first() { + let cleaned = value.trim_end_matches('%').trim(); + if let Ok(parsed) = cleaned.parse::() { + item.rating = Some(parsed); + } + } + if let Some(value) = sub_texts.get(1) { + if item.views.is_none() { + item.views = parse_abbreviated_number(value); + } + } + + if document.select(&verified_selector).next().is_some() { + item.verified = Some(true); + } + + if let Some(studio) = document.select(&studio_selector).next() { + let title = Self::text_of(&studio); + if !title.is_empty() { + item.uploader = Some(title); + } + if let Some(href) = studio.value().attr("href") { + item.uploaderUrl = Some(self.normalize_url(href)); + } + } + + let mut tags = item.tags.take().unwrap_or_default(); + let mut discovered_formats = Vec::new(); + for element in document.select(&pornstar_selector) { + let name = Self::text_of(&element); + if !name.is_empty() && !tags.iter().any(|value| value == &name) { + tags.push(name); + } + } + for element in document.select(&tag_selector) { + let href = element.value().attr("href").unwrap_or_default(); + if !href.contains("/tag/") { + continue; + } + let name = Self::text_of(&element); + if !name.is_empty() && !tags.iter().any(|value| value == &name) { + tags.push(name); + } + } + + if let Some(json_ld) = self.extract_json_ld_video(html)? { + if let Some(title) = json_ld.get("name").and_then(Value::as_str) { + if !title.trim().is_empty() { + item.title = Self::decode_text(title); + } + } + + if item.thumb.is_empty() { + if let Some(thumb) = json_ld.get("thumbnailUrl").and_then(Value::as_str) { + item.thumb = self.normalize_url(thumb); + } + } + + if item.uploadedAt.is_none() { + if let Some(uploaded_at) = json_ld.get("uploadDate").and_then(Value::as_str) { + item.uploadedAt = Self::parse_uploaded_at(uploaded_at); + } + } + + if item.uploader.is_none() { + if let Some(author) = json_ld.get("author") { + if let Some(name) = author.get("name").and_then(Value::as_str) { + item.uploader = Some(Self::decode_text(name)); + } + if let Some(url) = author.get("url").and_then(Value::as_str) { + item.uploaderUrl = Some(self.normalize_url(url)); + } + } + } + + if let Some(actors) = json_ld.get("actor").and_then(Value::as_array) { + for actor in actors { + if let Some(name) = actor.get("name").and_then(Value::as_str) { + let name = Self::decode_text(name); + if !name.is_empty() && !tags.iter().any(|value| value == &name) { + tags.push(name); + } + } + } + } + + if let Some(embed_url) = json_ld.get("embedUrl").and_then(Value::as_str) { + item.embed = Some(VideoEmbed { + html: format!( + "", + self.normalize_url(embed_url) + ), + source: self.normalize_url(embed_url), + }); + } + + if let Some(content_url) = json_ld.get("contentUrl").and_then(Value::as_str) { + let mut format = VideoFormat::new( + content_url.to_string(), + Self::extract_media_quality(content_url), + "mp4".to_string(), + ); + format.add_http_header("Referer".to_string(), item.url.clone()); + format.add_http_header("User-Agent".to_string(), FIREFOX_UA.to_string()); + discovered_formats.push(format); + } + } + + if let Some(description) = document.select(&description_selector).next() { + let description = Self::text_of(&description); + if !description.is_empty() { + for tag in description + .split(|ch: char| !ch.is_alphanumeric() && ch != '-' && ch != '\'') + .map(str::trim) + .filter(|value| value.len() > 2 && value.len() < 40) + .take(0) + { + let _ = tag; + } + } + } + + if let Some(preview) = Self::extract_preview(html, &item.id)? { + item.preview = Some(preview); + } + + let mut formats = Self::extract_formats(html, &item.url, &item.id)?; + for format in formats.drain(..) { + if !discovered_formats.iter().any(|existing| existing.url == format.url) { + discovered_formats.push(format); + } + } + if !discovered_formats.is_empty() { + item.formats = Some(discovered_formats); + } + + if !tags.is_empty() { + item.tags = Some(tags); + } + + Ok(item) + } + + async fn enrich_video(&self, item: VideoItem, options: &ServerOptions) -> VideoItem { + let mut requester = requester_or_default(options, CHANNEL_ID, "enrich_video"); + match self + .fetch_html(&mut requester, &item.url, &format!("{}/", self.url)) + .await + { + Ok(html) => match self.apply_detail_video(item.clone(), &html) { + Ok(enriched) => enriched, + Err(error) => { + report_provider_error_background( + CHANNEL_ID, + "apply_detail_video", + &error.to_string(), + ); + item + } + }, + Err(error) => { + report_provider_error_background(CHANNEL_ID, "fetch_detail", &error.to_string()); + item + } + } + } + + async fn fetch_items_for_url( + &self, + cache: VideoCache, + url: String, + per_page_limit: usize, + enrich_details: bool, + options: &ServerOptions, + ) -> Result> { + if let Some((time, items)) = cache.get(&url) { + if time.elapsed().unwrap_or_default().as_secs() < 60 * 15 { + return Ok(items.clone()); + } + } + + let mut requester = requester_or_default(options, CHANNEL_ID, "fetch_items_for_url"); + let html = self + .fetch_html(&mut requester, &url, &format!("{}/", self.url)) + .await?; + let items = self.parse_list_videos(&html)?; + if items.is_empty() { + return Ok(vec![]); + } + + let limited = items + .into_iter() + .take(per_page_limit.max(1)) + .collect::>(); + + if !enrich_details { + cache.insert(url, limited.clone()); + return Ok(limited); + } + + let enriched = stream::iter(limited.into_iter().map(|item| { + let provider = self.clone(); + let options = options.clone(); + async move { provider.enrich_video(item, &options).await } + })) + .buffer_unordered(4) + .collect::>() + .await; + + cache.insert(url, enriched.clone()); + Ok(enriched) + } + + async fn get( + &self, + cache: VideoCache, + page: u16, + sort: &str, + per_page_limit: usize, + options: ServerOptions, + ) -> Result> { + let target = self.resolve_option_target(&options, sort); + let url = self.build_url_for_target(&target, page); + self.fetch_items_for_url(cache, url, per_page_limit, page <= 1, &options) + .await + } + + async fn query( + &self, + cache: VideoCache, + page: u16, + sort: &str, + query: &str, + per_page_limit: usize, + options: ServerOptions, + ) -> Result> { + let target = self.resolve_query_target(query, sort).await; + let url = self.build_url_for_target(&target, page); + self.fetch_items_for_url(cache, url, per_page_limit, page <= 1, &options) + .await + } +} + +#[async_trait] +impl Provider for VrpornProvider { + async fn get_videos( + &self, + cache: VideoCache, + pool: DbPool, + sort: String, + query: Option, + page: String, + per_page: String, + options: ServerOptions, + ) -> Vec { + let _ = pool; + let page = page.parse::().unwrap_or(1); + let per_page_limit = per_page.parse::().unwrap_or(30); + + let result = match query { + Some(query) if !query.trim().is_empty() => { + self.query(cache, page, &sort, &query, per_page_limit, options) + .await + } + _ => self.get(cache, page, &sort, per_page_limit, options).await, + }; + + match result { + Ok(videos) => videos, + Err(error) => { + report_provider_error(CHANNEL_ID, "get_videos", &error.to_string()).await; + vec![] + } + } + } + + fn get_channel(&self, clientversion: ClientVersion) -> Option { + Some(self.build_channel(clientversion)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn provider() -> VrpornProvider { + VrpornProvider { + url: BASE_URL.to_string(), + categories: Arc::new(RwLock::new(vec![FilterOption { + id: "https://vrporn.com/tag/anal/".to_string(), + title: "Anal".to_string(), + }])), + tags: Arc::new(RwLock::new(vec![FilterOption { + id: "https://vrporn.com/tag/passthrough/".to_string(), + title: "Passthrough".to_string(), + }])), + stars: Arc::new(RwLock::new(vec![FilterOption { + id: "https://vrporn.com/pornstars/elena-vrq/".to_string(), + title: "Elena VRQ".to_string(), + }])), + sites: Arc::new(RwLock::new(vec![FilterOption { + id: "https://vrporn.com/studio/vrsun/".to_string(), + title: "VRSUN".to_string(), + }])), + } + } + + #[test] + fn builds_search_page_two_url() { + let provider = provider(); + let url = provider.build_url_for_target(&Target::Search("adriana chechik".to_string()), 2); + assert_eq!( + url, + "https://vrporn.com/search/videos/page/2/?query=adriana+chechik" + ); + } + + #[test] + fn builds_archive_page_two_url() { + let provider = provider(); + let url = provider.build_url_for_target( + &Target::Archive("https://vrporn.com/tag/anal/".to_string()), + 2, + ); + assert_eq!(url, "https://vrporn.com/tag/anal/page/2/"); + } + + #[test] + fn extracts_formats_from_detail_html() { + let formats = VrpornProvider::extract_formats( + r#" + + + "#, + "https://vrporn.com/test/", + "test", + ) + .expect("formats should parse"); + + assert_eq!(formats.len(), 2); + assert!(formats[0].url.contains("free_4k.mp4")); + assert!(formats[1].url.contains("free_6k.mp4")); + } +}