use crate::DbPool; use crate::api::ClientVersion; use crate::providers::{ Provider, report_provider_error, report_provider_error_background, requester_or_default, }; use crate::status::*; use crate::util::cache::VideoCache; use crate::util::parse_abbreviated_number; use crate::util::requester::Requester; use crate::util::time::parse_time_to_seconds; use crate::videos::{ServerOptions, VideoFormat, VideoItem}; use async_trait::async_trait; use error_chain::error_chain; use futures::stream::{self, StreamExt}; use htmlentity::entity::{ICodedDataTrait, decode}; use percent_encoding::{NON_ALPHANUMERIC, utf8_percent_encode}; use regex::Regex; use scraper::{ElementRef, Html, Selector}; use std::collections::HashMap; use std::sync::{Arc, RwLock}; use std::{thread, vec}; use wreq::Version; pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata = crate::providers::ProviderChannelMetadata { group_id: "mainstream-tube", tags: &["tube", "mixed", "hd"], }; const BASE_URL: &str = "https://www.porntrex.com"; const CHANNEL_ID: &str = "porntrex"; const FIREFOX_UA: &str = "Mozilla/5.0 (X11; Linux x86_64; rv:147.0) Gecko/20100101 Firefox/147.0"; const HTML_ACCEPT: &str = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8"; error_chain! { foreign_links { Io(std::io::Error); } errors { Parse(msg: String) { description("parse error") display("parse error: {}", msg) } } } #[derive(Debug, Clone)] pub struct PorntrexProvider { url: String, categories: Arc>>, tag_map: Arc>>, } #[derive(Debug, Clone)] enum Target { Latest, Popular, TopRated, Search(String), Archive { url: String, page_mode: PageMode, }, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] enum PageMode { SitePaged, LocalSlice, } impl PorntrexProvider { pub fn new() -> Self { let provider = Self { url: BASE_URL.to_string(), categories: Arc::new(RwLock::new(vec![FilterOption { id: "all".to_string(), title: "All".to_string(), }])), tag_map: Arc::new(RwLock::new(HashMap::new())), }; provider.spawn_initial_load(); provider } fn spawn_initial_load(&self) { let url = self.url.clone(); let categories = Arc::clone(&self.categories); let tag_map = Arc::clone(&self.tag_map); thread::spawn(move || { let runtime = match tokio::runtime::Builder::new_current_thread() .enable_all() .build() { Ok(runtime) => runtime, Err(error) => { report_provider_error_background( CHANNEL_ID, "spawn_initial_load.runtime_build", &error.to_string(), ); return; } }; runtime.block_on(async move { if let Err(error) = Self::load_categories(&url, Arc::clone(&categories)).await { report_provider_error_background( CHANNEL_ID, "load_categories", &error.to_string(), ); } if let Err(error) = Self::load_tags(&url, Arc::clone(&tag_map)).await { report_provider_error_background(CHANNEL_ID, "load_tags", &error.to_string()); } }); }); } fn build_channel(&self, _clientversion: ClientVersion) -> Channel { let categories = self .categories .read() .map(|value| value.clone()) .unwrap_or_default(); Channel { id: CHANNEL_ID.to_string(), name: "PornTrex".to_string(), description: "PornTrex videos with latest, most viewed, top rated, category, and tag-aware search routing." .to_string(), premium: false, favicon: "https://www.google.com/s2/favicons?sz=64&domain=porntrex.com".to_string(), status: "active".to_string(), categories: categories.iter().map(|value| value.title.clone()).collect(), options: vec![ ChannelOption { id: "sort".to_string(), title: "Sort".to_string(), description: "Browse PornTrex ranking feeds.".to_string(), systemImage: "list.number".to_string(), colorName: "blue".to_string(), options: vec![ FilterOption { id: "new".to_string(), title: "Latest".to_string(), }, FilterOption { id: "popular".to_string(), title: "Most Viewed".to_string(), }, FilterOption { id: "rated".to_string(), title: "Top Rated".to_string(), }, ], multiSelect: false, }, ChannelOption { id: "categories".to_string(), title: "Categories".to_string(), description: "Browse a PornTrex category archive.".to_string(), systemImage: "square.grid.2x2".to_string(), colorName: "orange".to_string(), options: categories, multiSelect: false, }, ], nsfw: true, cacheDuration: Some(1800), } } fn selector(value: &str) -> Result { Selector::parse(value) .map_err(|error| Error::from(format!("selector `{value}` parse failed: {error}"))) } fn regex(value: &str) -> Result { Regex::new(value).map_err(|error| Error::from(format!("regex `{value}` failed: {error}"))) } fn decode_html(text: &str) -> String { decode(text.as_bytes()) .to_string() .unwrap_or_else(|_| text.to_string()) } fn collapse_whitespace(text: &str) -> String { text.split_whitespace().collect::>().join(" ") } fn text_of(element: &ElementRef<'_>) -> String { Self::decode_html(&Self::collapse_whitespace( &element.text().collect::>().join(" "), )) } fn normalize_title(title: &str) -> String { title .trim() .trim_start_matches('#') .replace(['_', '-'], " ") .split_whitespace() .collect::>() .join(" ") .to_ascii_lowercase() } fn normalize_url(&self, url: &str) -> String { let trimmed = url.trim(); if trimmed.is_empty() { return String::new(); } if trimmed.starts_with("http://") || trimmed.starts_with("https://") { return trimmed.to_string(); } if trimmed.starts_with("//") { return format!("https:{trimmed}"); } if trimmed.starts_with('/') { return format!("{}{}", self.url, trimmed); } format!("{}/{}", self.url, trimmed.trim_start_matches("./")) } fn html_headers(referer: &str) -> Vec<(String, String)> { vec![ ("User-Agent".to_string(), FIREFOX_UA.to_string()), ("Accept".to_string(), HTML_ACCEPT.to_string()), ("Referer".to_string(), referer.to_string()), ] } fn build_search_path(query: &str) -> String { query .split_whitespace() .map(|part| utf8_percent_encode(part, NON_ALPHANUMERIC).to_string()) .collect::>() .join("-") } fn build_archive_page_url(archive_url: &str, page: u16) -> String { if page <= 1 { return archive_url.trim_end_matches('/').to_string() + "/"; } format!("{}/{page}/", archive_url.trim_end_matches('/')) } fn archive_target(url: String, page_mode: PageMode) -> Target { Target::Archive { url, page_mode } } fn build_target_url(&self, target: &Target, page: u16) -> String { match target { Target::Latest => { Self::build_archive_page_url(&format!("{}/latest-updates/", self.url), page) } Target::Popular => { Self::build_archive_page_url(&format!("{}/most-popular/", self.url), page) } Target::TopRated => { Self::build_archive_page_url(&format!("{}/top-rated/", self.url), page) } Target::Search(query) => Self::build_archive_page_url( &format!("{}/search/{}/", self.url, Self::build_search_path(query)), page, ), Target::Archive { url, page_mode } => match page_mode { PageMode::SitePaged => Self::build_archive_page_url(url, page), PageMode::LocalSlice => Self::build_archive_page_url(url, 1), }, } } async fn fetch_html(requester: &mut Requester, url: &str, referer: &str) -> Result { requester .get_with_headers(url, Self::html_headers(referer), Some(Version::HTTP_11)) .await .map_err(|error| Error::from(format!("request failed for {url}: {error}"))) } fn slug_remainder(href: &str, prefix: &str) -> Option { let trimmed = href.trim().trim_end_matches('/'); let remainder = trimmed.strip_prefix(prefix)?.trim_matches('/'); if remainder.is_empty() || remainder.contains('/') { return None; } Some(remainder.to_string()) } fn push_category(target: &Arc>>, item: FilterOption) { if item.id.is_empty() || item.title.is_empty() { return; } if let Ok(mut values) = target.write() { let normalized = Self::normalize_title(&item.title); if !values .iter() .any(|value| value.id == item.id || Self::normalize_title(&value.title) == normalized) { values.push(item); } } } fn insert_tag_mapping(target: &Arc>>, title: &str, href: &str) { let normalized_title = Self::normalize_title(title); if normalized_title.is_empty() || href.is_empty() { return; } if let Ok(mut values) = target.write() { values.insert(normalized_title, href.to_string()); } } async fn load_categories( base_url: &str, categories: Arc>>, ) -> Result<()> { let mut requester = Requester::new(); let page_url = format!("{base_url}/categories/"); let html = Self::fetch_html(&mut requester, &page_url, &page_url).await?; let document = Html::parse_document(&html); let selector = Self::selector("a.item[href]")?; let prefix = format!("{base_url}/categories/"); for element in document.select(&selector) { let href = element.value().attr("href").unwrap_or_default(); let Some(_slug) = Self::slug_remainder(href, &prefix) else { continue; }; let title = element .value() .attr("title") .map(Self::decode_html) .filter(|value| !value.trim().is_empty()) .unwrap_or_else(|| Self::text_of(&element)); let title = title.trim().to_string(); if title.is_empty() { continue; } Self::push_category( &categories, FilterOption { id: format!("{}/", href.trim_end_matches('/')), title, }, ); } Ok(()) } async fn load_tags(base_url: &str, tag_map: Arc>>) -> Result<()> { let mut requester = Requester::new(); let page_url = format!("{base_url}/tags/"); let html = Self::fetch_html(&mut requester, &page_url, &page_url).await?; let document = Html::parse_document(&html); let selector = Self::selector("div.list-tags a[href]")?; let prefix = format!("{base_url}/tags/"); for element in document.select(&selector) { let href = element.value().attr("href").unwrap_or_default(); let Some(slug) = Self::slug_remainder(href, &prefix) else { continue; }; let title = Self::text_of(&element); if title.is_empty() { continue; } let canonical = format!("{}/", href.trim_end_matches('/')); Self::insert_tag_mapping(&tag_map, &title, &canonical); Self::insert_tag_mapping(&tag_map, &slug, &canonical); } Ok(()) } fn parse_duration(text: &str) -> u32 { parse_time_to_seconds(text) .and_then(|value| u32::try_from(value).ok()) .unwrap_or(0) } fn parse_views(text: &str) -> Option { let cleaned = text .replace("views", "") .replace("view", "") .replace([',', ' '], ""); parse_abbreviated_number(cleaned.trim()) } fn parse_rating(text: &str) -> Option { let digits = text .chars() .filter(|value| value.is_ascii_digit() || *value == '.') .collect::(); digits.parse::().ok() } fn parse_list_videos(&self, html: &str) -> Result> { let document = Html::parse_document(html); let card_selector = Self::selector("div.video-preview-screen.video-item.thumb-item")?; let link_selector = Self::selector("a[href*=\"/video/\"]")?; let title_link_selector = Self::selector("p.inf a[href*=\"/video/\"], a[title][href*=\"/video/\"]")?; let image_selector = Self::selector("img.cover")?; let duration_selector = Self::selector("div.durations")?; let views_selector = Self::selector("div.viewsthumb")?; let rating_selector = Self::selector("ul.list-unstyled li.pull-right")?; let mut items = Vec::new(); for card in document.select(&card_selector) { let Some(link) = card.select(&link_selector).next() else { continue; }; let href = link.value().attr("href").unwrap_or_default(); let page_url = self.normalize_url(href); if page_url.is_empty() { continue; } let id = card .value() .attr("data-item-id") .map(str::to_string) .filter(|value| !value.is_empty()) .unwrap_or_else(|| { page_url .trim_end_matches('/') .split('/') .nth_back(1) .unwrap_or_default() .to_string() }); if id.is_empty() { continue; } let image = card.select(&image_selector).next(); let thumb = image .and_then(|value| value.value().attr("data-src").or_else(|| value.value().attr("src"))) .map(|value| self.normalize_url(value)) .unwrap_or_default(); let title = card .select(&title_link_selector) .next() .or_else(|| card.select(&link_selector).find(|value| value.value().attr("title").is_some())) .and_then(|value| value.value().attr("title").map(Self::decode_html).or_else(|| { let text = Self::text_of(&value); (!text.is_empty()).then_some(text) })) .filter(|value| !value.trim().is_empty()) .unwrap_or_else(|| { image .and_then(|value| value.value().attr("alt").map(Self::decode_html)) .unwrap_or_default() }); if title.is_empty() { continue; } let duration = card .select(&duration_selector) .next() .map(|value| Self::parse_duration(&Self::text_of(&value))) .unwrap_or(0); let views = card .select(&views_selector) .next() .and_then(|value| Self::parse_views(&Self::text_of(&value))); let rating = card .select(&rating_selector) .next() .and_then(|value| Self::parse_rating(&Self::text_of(&value))); let mut item = VideoItem::new( id, title.trim().to_string(), page_url, CHANNEL_ID.to_string(), thumb, duration, ); item.views = views; item.rating = rating; items.push(item); } Ok(items) } fn parse_format_urls(html: &str) -> Result> { let pairs = [ ("video_url", "video_url_text"), ("video_alt_url", "video_alt_url_text"), ("video_alt_url2", "video_alt_url2_text"), ("video_alt_url3", "video_alt_url3_text"), ]; let mut formats = Vec::new(); let mut seen = std::collections::HashSet::new(); for (url_key, label_key) in pairs { let url_re = Self::regex(&format!(r#"{url_key}:\s*'([^']+)'"#))?; let label_re = Self::regex(&format!(r#"{label_key}:\s*'([^']*)'"#))?; let Some(url_match) = url_re.captures(html).and_then(|value| value.get(1)) else { continue; }; let url = url_match.as_str().replace("\\/", "/"); if !seen.insert(url.clone()) { continue; } let label = label_re .captures(html) .and_then(|value| value.get(1)) .map(|value| value.as_str().trim().to_string()) .filter(|value| !value.is_empty()) .unwrap_or_else(|| "mp4".to_string()); let normalized_label = label.replace(" HD", "").replace(" FHD", "").trim().to_string(); let format = VideoFormat::new(url, normalized_label.clone(), "mp4".to_string()) .format_id(normalized_label.clone()) .format_note(label); formats.push(format); } Ok(formats) } fn parse_aspect_ratio(html: &str) -> Result> { let width_re = Self::regex(r#"player_width:\s*'([0-9.]+)'"#)?; let height_re = Self::regex(r#"player_height:\s*'([0-9.]+)'"#)?; let Some(width) = width_re .captures(html) .and_then(|value| value.get(1)) .and_then(|value| value.as_str().parse::().ok()) else { return Ok(None); }; let Some(height) = height_re .captures(html) .and_then(|value| value.get(1)) .and_then(|value| value.as_str().parse::().ok()) else { return Ok(None); }; Ok((height > 0.0).then_some(width / height)) } fn collect_tags(document: &Html, html: &str) -> Result> { let category_selector = Self::selector("div.items-holder.js-categories a[href*=\"/categories/\"]")?; let tag_selector = Self::selector("div.item a[href*=\"/tags/\"]")?; let mut values = Vec::new(); for element in document.select(&category_selector) { let value = Self::text_of(&element); if !value.is_empty() { values.push(value); } } for element in document.select(&tag_selector) { let value = Self::text_of(&element); if !value.is_empty() { values.push(value); } } for pattern in [r#"video_categories:\s*'([^']*)'"#, r#"video_tags:\s*'([^']*)'"#] { let re = Self::regex(pattern)?; if let Some(raw) = re.captures(html).and_then(|value| value.get(1)) { for entry in raw .as_str() .split(',') .map(str::trim) .map(Self::decode_html) .filter(|value| !value.is_empty()) { values.push(entry); } } } let mut unique = Vec::new(); for value in values { let normalized = Self::normalize_title(&value); if normalized.is_empty() || normalized == "-" { continue; } if !unique .iter() .any(|existing: &String| Self::normalize_title(existing) == normalized) { unique.push(value); } } Ok(unique) } fn apply_detail_video(&self, mut item: VideoItem, html: &str) -> Result { let document = Html::parse_document(html); let title_selector = Self::selector("h1")?; let uploader_selector = Self::selector("div.info-block div.block-user div.username a[href*=\"/members/\"]")?; let stat_selector = Self::selector("div.info-block div.item span")?; if let Some(title) = document .select(&title_selector) .next() .map(|value| Self::text_of(&value)) .filter(|value| !value.is_empty()) { item.title = title; } if let Some(uploader) = document.select(&uploader_selector).next() { let uploader_name = Self::text_of(&uploader); let uploader_url = uploader .value() .attr("href") .map(|value| self.normalize_url(value)) .unwrap_or_default(); if !uploader_name.is_empty() { item.uploader = Some(uploader_name); } if !uploader_url.is_empty() { let uploader_id = uploader_url .trim_end_matches('/') .split('/') .next_back() .unwrap_or_default() .to_string(); item.uploaderUrl = Some(uploader_url); if !uploader_id.is_empty() { item.uploaderId = Some(format!("{CHANNEL_ID}:{uploader_id}")); } } } for stat in document.select(&stat_selector).map(|value| Self::text_of(&value)) { if item.views.is_none() { item.views = Self::parse_views(&stat); } if item.duration == 0 { let duration = Self::parse_duration(&stat); if duration > 0 { item.duration = duration; } } } let tags = Self::collect_tags(&document, html)?; if !tags.is_empty() { item.tags = Some(tags); } // let formats = Self::parse_format_urls(html)?; // if !formats.is_empty() { // item.formats = Some(formats); // } if item.aspectRatio.is_none() { item.aspectRatio = Self::parse_aspect_ratio(html)?; } Ok(item) } async fn enrich_item(&self, item: VideoItem, options: &ServerOptions) -> VideoItem { let mut requester = requester_or_default(options, CHANNEL_ID, "porntrex.enrich_item.missing_requester"); match Self::fetch_html(&mut requester, &item.url, &item.url).await { Ok(html) => match self.apply_detail_video(item.clone(), &html) { Ok(value) => value, Err(error) => { report_provider_error_background( CHANNEL_ID, "enrich_item.apply_detail_video", &format!("url={}; error={error}", item.url), ); item } }, Err(error) => { report_provider_error_background( CHANNEL_ID, "enrich_item.fetch_html", &format!("url={}; error={error}", item.url), ); item } } } fn resolve_sort_target(sort: &str) -> Target { match sort.trim().to_ascii_lowercase().as_str() { "popular" | "viewed" | "most_viewed" => Target::Popular, "rated" | "rating" | "top" => Target::TopRated, _ => Target::Latest, } } fn resolve_option_target(&self, options: &ServerOptions, sort: &str) -> Target { if let Some(category) = options.categories.as_deref() { if category.starts_with(&self.url) && category != "all" { return Self::archive_target(category.to_string(), PageMode::SitePaged); } } Self::resolve_sort_target(sort) } fn lookup_category_target(&self, query: &str) -> Option { let normalized_query = Self::normalize_title(query); self.categories .read() .ok()? .iter() .find(|value| value.id != "all" && Self::normalize_title(&value.title) == normalized_query) .map(|value| value.id.clone()) } fn resolve_query_target(&self, query: &str) -> Target { let trimmed = query.trim().trim_start_matches('@'); if let Some((kind, raw_value)) = trimmed.split_once(':') { let value = raw_value.trim().trim_matches('/').replace(' ', "-"); if !value.is_empty() { match kind.trim().to_ascii_lowercase().as_str() { "tag" | "tags" => { return Self::archive_target( format!("{}/tags/{value}/", self.url), PageMode::LocalSlice, ); } "category" | "categories" => { return Self::archive_target( format!("{}/categories/{value}/", self.url), PageMode::SitePaged, ); } _ => {} } } } if let Some(category) = self.lookup_category_target(trimmed) { return Self::archive_target(category, PageMode::SitePaged); } let normalized = Self::normalize_title(trimmed); if let Some(target) = self .tag_map .read() .ok() .and_then(|value| value.get(&normalized).cloned()) { return Self::archive_target(target, PageMode::LocalSlice); } Target::Search(trimmed.to_string()) } fn catalogs_need_refresh(&self) -> bool { let categories_len = self .categories .read() .map(|value| value.len()) .unwrap_or_default(); let tag_count = self .tag_map .read() .map(|value| value.len()) .unwrap_or_default(); categories_len <= 1 || tag_count == 0 } async fn refresh_catalogs(&self) { if let Err(error) = Self::load_categories(&self.url, Arc::clone(&self.categories)).await { report_provider_error_background( CHANNEL_ID, "refresh_catalogs.categories", &error.to_string(), ); } if let Err(error) = Self::load_tags(&self.url, Arc::clone(&self.tag_map)).await { report_provider_error_background( CHANNEL_ID, "refresh_catalogs.tags", &error.to_string(), ); } } async fn fetch_target( &self, cache: VideoCache, target: Target, page: u16, per_page_limit: usize, options: ServerOptions, ) -> Result> { let page_mode = match &target { Target::Archive { page_mode, .. } => *page_mode, _ => PageMode::SitePaged, }; let source_url = self.build_target_url(&target, page); let cache_key = match page_mode { PageMode::SitePaged => source_url.clone(), PageMode::LocalSlice => format!("{source_url}#page={page}&per_page={per_page_limit}"), }; let old_items = match cache.get(&cache_key) { Some((time, items)) => { if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 { return Ok(items.clone()); } items.clone() } None => vec![], }; let mut requester = requester_or_default(&options, CHANNEL_ID, "porntrex.fetch_target.missing_requester"); let html = match Self::fetch_html(&mut requester, &source_url, &source_url).await { Ok(value) => value, Err(error) => { report_provider_error( CHANNEL_ID, "fetch_target.request", &format!("url={source_url}; error={error}"), ) .await; return Ok(old_items); } }; if html.trim().is_empty() { report_provider_error( CHANNEL_ID, "fetch_target.empty_response", &format!("url={source_url}"), ) .await; return Ok(old_items); } let items = self.parse_list_videos(&html)?; if items.is_empty() { return Ok(old_items); } let limited_items = match page_mode { PageMode::SitePaged => items .into_iter() .take(per_page_limit.max(1)) .collect::>(), PageMode::LocalSlice => { let start = page.saturating_sub(1) as usize * per_page_limit.max(1); items.into_iter() .skip(start) .take(per_page_limit.max(1)) .collect::>() } }; if limited_items.is_empty() { cache.insert(cache_key, vec![]); return Ok(vec![]); } let enriched = stream::iter(limited_items.into_iter().map(|item| { let provider = self.clone(); let options = options.clone(); async move { provider.enrich_item(item, &options).await } })) .buffer_unordered(4) .collect::>() .await; cache.remove(&cache_key); cache.insert(cache_key, enriched.clone()); Ok(enriched) } } #[async_trait] impl Provider for PorntrexProvider { async fn get_videos( &self, cache: VideoCache, _pool: DbPool, sort: String, query: Option, page: String, per_page: String, options: ServerOptions, ) -> Vec { let page = page.parse::().unwrap_or(1).max(1); let per_page_limit = per_page.parse::().unwrap_or(10).clamp(1, 60); let target = match query { Some(query) if !query.trim().is_empty() => { let query = query.trim(); let mut target = self.resolve_query_target(query); if matches!(target, Target::Search(_)) && self.catalogs_need_refresh() { self.refresh_catalogs().await; target = self.resolve_query_target(query); } target } _ => self.resolve_option_target(&options, &sort), }; match self .fetch_target(cache, target, page, per_page_limit, options.clone()) .await { Ok(items) => items, Err(error) => { report_provider_error( CHANNEL_ID, "get_videos.fetch_target", &format!("sort={sort}; page={page}; error={error}"), ) .await; vec![] } } } fn get_channel(&self, clientversion: ClientVersion) -> Option { Some(self.build_channel(clientversion)) } } #[cfg(test)] mod tests { use super::*; #[test] fn builds_search_url() { let provider = PorntrexProvider::new(); assert_eq!( provider.build_target_url(&Target::Search("adriana chechik".to_string()), 2), "https://www.porntrex.com/search/adriana-chechik/2/" ); } #[test] fn resolves_tag_prefix() { let provider = PorntrexProvider::new(); match provider.resolve_query_target("tag:blowjob") { Target::Archive { url, page_mode } => { assert_eq!(url, "https://www.porntrex.com/tags/blowjob/"); assert_eq!(page_mode, PageMode::LocalSlice); } _ => panic!("expected archive target"), } } #[test] fn builds_local_slice_archive_url_without_numeric_page() { let provider = PorntrexProvider::new(); let target = PorntrexProvider::archive_target( "https://www.porntrex.com/tags/anal-creampie/".to_string(), PageMode::LocalSlice, ); assert_eq!( provider.build_target_url(&target, 3), "https://www.porntrex.com/tags/anal-creampie/" ); } }