From 61f39e23b7b31204082ae578cd6f30126aeda205 Mon Sep 17 00:00:00 2001 From: Simon Date: Sat, 11 Apr 2026 21:43:36 +0000 Subject: [PATCH] porntrex --- build.rs | 5 + docs/provider-catalog.md | 1 + src/providers/porntrex.rs | 987 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 993 insertions(+) create mode 100644 src/providers/porntrex.rs diff --git a/build.rs b/build.rs index 12d246a..04cd09c 100644 --- a/build.rs +++ b/build.rs @@ -91,6 +91,11 @@ const PROVIDERS: &[ProviderDef] = &[ module: "yesporn", ty: "YespornProvider", }, + ProviderDef { + id: "porntrex", + module: "porntrex", + ty: "PorntrexProvider", + }, ProviderDef { id: "sxyprn", module: "sxyprn", diff --git a/docs/provider-catalog.md b/docs/provider-catalog.md index 4c89471..827c90c 100644 --- a/docs/provider-catalog.md +++ b/docs/provider-catalog.md @@ -37,6 +37,7 @@ This is the current implementation inventory as of this snapshot of the repo. Us | `pornhub` | `mainstream-tube` | no | no | Rich metadata and format examples. | | `pornmz` | `mainstream-tube` | no | no | Mainstream archive. | | `pornzog` | `mainstream-tube` | no | no | Basic list/detail scraper. | +| `porntrex` | `mainstream-tube` | no | no | KVS-style HTML archive with direct MP4 formats and tag-aware search shortcuts. | | `redtube` | `mainstream-tube` | no | no | Mainstream archive. | | `rule34gen` | `ai` | no | no | AI group example. | | `rule34video` | `hentai-animation` | no | no | Hentai group example. | diff --git a/src/providers/porntrex.rs b/src/providers/porntrex.rs new file mode 100644 index 0000000..2d8f2cb --- /dev/null +++ b/src/providers/porntrex.rs @@ -0,0 +1,987 @@ +use crate::DbPool; +use crate::api::ClientVersion; +use crate::providers::{ + Provider, report_provider_error, report_provider_error_background, requester_or_default, +}; +use crate::status::*; +use crate::util::cache::VideoCache; +use crate::util::parse_abbreviated_number; +use crate::util::requester::Requester; +use crate::util::time::parse_time_to_seconds; +use crate::videos::{ServerOptions, VideoFormat, VideoItem}; +use async_trait::async_trait; +use error_chain::error_chain; +use futures::stream::{self, StreamExt}; +use htmlentity::entity::{ICodedDataTrait, decode}; +use percent_encoding::{NON_ALPHANUMERIC, utf8_percent_encode}; +use regex::Regex; +use scraper::{ElementRef, Html, Selector}; +use std::collections::HashMap; +use std::sync::{Arc, RwLock}; +use std::{thread, vec}; +use wreq::Version; + +pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata = + crate::providers::ProviderChannelMetadata { + group_id: "mainstream-tube", + tags: &["tube", "mixed", "hd"], + }; + +const BASE_URL: &str = "https://www.porntrex.com"; +const CHANNEL_ID: &str = "porntrex"; +const FIREFOX_UA: &str = + "Mozilla/5.0 (X11; Linux x86_64; rv:147.0) Gecko/20100101 Firefox/147.0"; +const HTML_ACCEPT: &str = + "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8"; + +error_chain! { + foreign_links { + Io(std::io::Error); + } + errors { + Parse(msg: String) { + description("parse error") + display("parse error: {}", msg) + } + } +} + +#[derive(Debug, Clone)] +pub struct PorntrexProvider { + url: String, + categories: Arc>>, + tag_map: Arc>>, +} + +#[derive(Debug, Clone)] +enum Target { + Latest, + Popular, + TopRated, + Search(String), + Archive { + url: String, + page_mode: PageMode, + }, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum PageMode { + SitePaged, + LocalSlice, +} + +impl PorntrexProvider { + pub fn new() -> Self { + let provider = Self { + url: BASE_URL.to_string(), + categories: Arc::new(RwLock::new(vec![FilterOption { + id: "all".to_string(), + title: "All".to_string(), + }])), + tag_map: Arc::new(RwLock::new(HashMap::new())), + }; + provider.spawn_initial_load(); + provider + } + + fn spawn_initial_load(&self) { + let url = self.url.clone(); + let categories = Arc::clone(&self.categories); + let tag_map = Arc::clone(&self.tag_map); + + thread::spawn(move || { + let runtime = match tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + { + Ok(runtime) => runtime, + Err(error) => { + report_provider_error_background( + CHANNEL_ID, + "spawn_initial_load.runtime_build", + &error.to_string(), + ); + return; + } + }; + + runtime.block_on(async move { + if let Err(error) = Self::load_categories(&url, Arc::clone(&categories)).await { + report_provider_error_background( + CHANNEL_ID, + "load_categories", + &error.to_string(), + ); + } + if let Err(error) = Self::load_tags(&url, Arc::clone(&tag_map)).await { + report_provider_error_background(CHANNEL_ID, "load_tags", &error.to_string()); + } + }); + }); + } + + fn build_channel(&self, _clientversion: ClientVersion) -> Channel { + let categories = self + .categories + .read() + .map(|value| value.clone()) + .unwrap_or_default(); + + Channel { + id: CHANNEL_ID.to_string(), + name: "PornTrex".to_string(), + description: + "PornTrex videos with latest, most viewed, top rated, category, and tag-aware search routing." + .to_string(), + premium: false, + favicon: "https://www.google.com/s2/favicons?sz=64&domain=porntrex.com".to_string(), + status: "active".to_string(), + categories: categories.iter().map(|value| value.title.clone()).collect(), + options: vec![ + ChannelOption { + id: "sort".to_string(), + title: "Sort".to_string(), + description: "Browse PornTrex ranking feeds.".to_string(), + systemImage: "list.number".to_string(), + colorName: "blue".to_string(), + options: vec![ + FilterOption { + id: "new".to_string(), + title: "Latest".to_string(), + }, + FilterOption { + id: "popular".to_string(), + title: "Most Viewed".to_string(), + }, + FilterOption { + id: "rated".to_string(), + title: "Top Rated".to_string(), + }, + ], + multiSelect: false, + }, + ChannelOption { + id: "categories".to_string(), + title: "Categories".to_string(), + description: "Browse a PornTrex category archive.".to_string(), + systemImage: "square.grid.2x2".to_string(), + colorName: "orange".to_string(), + options: categories, + multiSelect: false, + }, + ], + nsfw: true, + cacheDuration: Some(1800), + } + } + + fn selector(value: &str) -> Result { + Selector::parse(value) + .map_err(|error| Error::from(format!("selector `{value}` parse failed: {error}"))) + } + + fn regex(value: &str) -> Result { + Regex::new(value).map_err(|error| Error::from(format!("regex `{value}` failed: {error}"))) + } + + fn decode_html(text: &str) -> String { + decode(text.as_bytes()) + .to_string() + .unwrap_or_else(|_| text.to_string()) + } + + fn collapse_whitespace(text: &str) -> String { + text.split_whitespace().collect::>().join(" ") + } + + fn text_of(element: &ElementRef<'_>) -> String { + Self::decode_html(&Self::collapse_whitespace( + &element.text().collect::>().join(" "), + )) + } + + fn normalize_title(title: &str) -> String { + title + .trim() + .trim_start_matches('#') + .replace(['_', '-'], " ") + .split_whitespace() + .collect::>() + .join(" ") + .to_ascii_lowercase() + } + + fn normalize_url(&self, url: &str) -> String { + let trimmed = url.trim(); + if trimmed.is_empty() { + return String::new(); + } + if trimmed.starts_with("http://") || trimmed.starts_with("https://") { + return trimmed.to_string(); + } + if trimmed.starts_with("//") { + return format!("https:{trimmed}"); + } + if trimmed.starts_with('/') { + return format!("{}{}", self.url, trimmed); + } + format!("{}/{}", self.url, trimmed.trim_start_matches("./")) + } + + fn html_headers(referer: &str) -> Vec<(String, String)> { + vec![ + ("User-Agent".to_string(), FIREFOX_UA.to_string()), + ("Accept".to_string(), HTML_ACCEPT.to_string()), + ("Referer".to_string(), referer.to_string()), + ] + } + + fn build_search_path(query: &str) -> String { + query + .split_whitespace() + .map(|part| utf8_percent_encode(part, NON_ALPHANUMERIC).to_string()) + .collect::>() + .join("-") + } + + fn build_archive_page_url(archive_url: &str, page: u16) -> String { + if page <= 1 { + return archive_url.trim_end_matches('/').to_string() + "/"; + } + format!("{}/{page}/", archive_url.trim_end_matches('/')) + } + + fn archive_target(url: String, page_mode: PageMode) -> Target { + Target::Archive { url, page_mode } + } + + fn build_target_url(&self, target: &Target, page: u16) -> String { + match target { + Target::Latest => { + Self::build_archive_page_url(&format!("{}/latest-updates/", self.url), page) + } + Target::Popular => { + Self::build_archive_page_url(&format!("{}/most-popular/", self.url), page) + } + Target::TopRated => { + Self::build_archive_page_url(&format!("{}/top-rated/", self.url), page) + } + Target::Search(query) => Self::build_archive_page_url( + &format!("{}/search/{}/", self.url, Self::build_search_path(query)), + page, + ), + Target::Archive { url, page_mode } => match page_mode { + PageMode::SitePaged => Self::build_archive_page_url(url, page), + PageMode::LocalSlice => Self::build_archive_page_url(url, 1), + }, + } + } + + async fn fetch_html(requester: &mut Requester, url: &str, referer: &str) -> Result { + requester + .get_with_headers(url, Self::html_headers(referer), Some(Version::HTTP_11)) + .await + .map_err(|error| Error::from(format!("request failed for {url}: {error}"))) + } + + fn slug_remainder(href: &str, prefix: &str) -> Option { + let trimmed = href.trim().trim_end_matches('/'); + let remainder = trimmed.strip_prefix(prefix)?.trim_matches('/'); + if remainder.is_empty() || remainder.contains('/') { + return None; + } + Some(remainder.to_string()) + } + + fn push_category(target: &Arc>>, item: FilterOption) { + if item.id.is_empty() || item.title.is_empty() { + return; + } + + if let Ok(mut values) = target.write() { + let normalized = Self::normalize_title(&item.title); + if !values + .iter() + .any(|value| value.id == item.id || Self::normalize_title(&value.title) == normalized) + { + values.push(item); + } + } + } + + fn insert_tag_mapping(target: &Arc>>, title: &str, href: &str) { + let normalized_title = Self::normalize_title(title); + if normalized_title.is_empty() || href.is_empty() { + return; + } + + if let Ok(mut values) = target.write() { + values.insert(normalized_title, href.to_string()); + } + } + + async fn load_categories( + base_url: &str, + categories: Arc>>, + ) -> Result<()> { + let mut requester = Requester::new(); + let page_url = format!("{base_url}/categories/"); + let html = Self::fetch_html(&mut requester, &page_url, &page_url).await?; + let document = Html::parse_document(&html); + let selector = Self::selector("a.item[href]")?; + let prefix = format!("{base_url}/categories/"); + + for element in document.select(&selector) { + let href = element.value().attr("href").unwrap_or_default(); + let Some(_slug) = Self::slug_remainder(href, &prefix) else { + continue; + }; + + let title = element + .value() + .attr("title") + .map(Self::decode_html) + .filter(|value| !value.trim().is_empty()) + .unwrap_or_else(|| Self::text_of(&element)); + let title = title.trim().to_string(); + if title.is_empty() { + continue; + } + + Self::push_category( + &categories, + FilterOption { + id: format!("{}/", href.trim_end_matches('/')), + title, + }, + ); + } + + Ok(()) + } + + async fn load_tags(base_url: &str, tag_map: Arc>>) -> Result<()> { + let mut requester = Requester::new(); + let page_url = format!("{base_url}/tags/"); + let html = Self::fetch_html(&mut requester, &page_url, &page_url).await?; + let document = Html::parse_document(&html); + let selector = Self::selector("div.list-tags a[href]")?; + let prefix = format!("{base_url}/tags/"); + + for element in document.select(&selector) { + let href = element.value().attr("href").unwrap_or_default(); + let Some(slug) = Self::slug_remainder(href, &prefix) else { + continue; + }; + let title = Self::text_of(&element); + if title.is_empty() { + continue; + } + let canonical = format!("{}/", href.trim_end_matches('/')); + Self::insert_tag_mapping(&tag_map, &title, &canonical); + Self::insert_tag_mapping(&tag_map, &slug, &canonical); + } + + Ok(()) + } + + fn parse_duration(text: &str) -> u32 { + parse_time_to_seconds(text) + .and_then(|value| u32::try_from(value).ok()) + .unwrap_or(0) + } + + fn parse_views(text: &str) -> Option { + let cleaned = text + .replace("views", "") + .replace("view", "") + .replace([',', ' '], ""); + parse_abbreviated_number(cleaned.trim()) + } + + fn parse_rating(text: &str) -> Option { + let digits = text + .chars() + .filter(|value| value.is_ascii_digit() || *value == '.') + .collect::(); + digits.parse::().ok() + } + + fn parse_list_videos(&self, html: &str) -> Result> { + let document = Html::parse_document(html); + let card_selector = Self::selector("div.video-preview-screen.video-item.thumb-item")?; + let link_selector = Self::selector("a[href*=\"/video/\"]")?; + let title_link_selector = Self::selector("p.inf a[href*=\"/video/\"], a[title][href*=\"/video/\"]")?; + let image_selector = Self::selector("img.cover")?; + let duration_selector = Self::selector("div.durations")?; + let views_selector = Self::selector("div.viewsthumb")?; + let rating_selector = Self::selector("ul.list-unstyled li.pull-right")?; + + let mut items = Vec::new(); + + for card in document.select(&card_selector) { + let Some(link) = card.select(&link_selector).next() else { + continue; + }; + + let href = link.value().attr("href").unwrap_or_default(); + let page_url = self.normalize_url(href); + if page_url.is_empty() { + continue; + } + + let id = card + .value() + .attr("data-item-id") + .map(str::to_string) + .filter(|value| !value.is_empty()) + .unwrap_or_else(|| { + page_url + .trim_end_matches('/') + .split('/') + .nth_back(1) + .unwrap_or_default() + .to_string() + }); + if id.is_empty() { + continue; + } + + let image = card.select(&image_selector).next(); + let thumb = image + .and_then(|value| value.value().attr("data-src").or_else(|| value.value().attr("src"))) + .map(|value| self.normalize_url(value)) + .unwrap_or_default(); + + let title = card + .select(&title_link_selector) + .next() + .or_else(|| card.select(&link_selector).find(|value| value.value().attr("title").is_some())) + .and_then(|value| value.value().attr("title").map(Self::decode_html).or_else(|| { + let text = Self::text_of(&value); + (!text.is_empty()).then_some(text) + })) + .filter(|value| !value.trim().is_empty()) + .unwrap_or_else(|| { + image + .and_then(|value| value.value().attr("alt").map(Self::decode_html)) + .unwrap_or_default() + }); + if title.is_empty() { + continue; + } + + let duration = card + .select(&duration_selector) + .next() + .map(|value| Self::parse_duration(&Self::text_of(&value))) + .unwrap_or(0); + let views = card + .select(&views_selector) + .next() + .and_then(|value| Self::parse_views(&Self::text_of(&value))); + let rating = card + .select(&rating_selector) + .next() + .and_then(|value| Self::parse_rating(&Self::text_of(&value))); + + let mut item = VideoItem::new( + id, + title.trim().to_string(), + page_url, + CHANNEL_ID.to_string(), + thumb, + duration, + ); + item.views = views; + item.rating = rating; + items.push(item); + } + + Ok(items) + } + + fn parse_format_urls(html: &str) -> Result> { + let pairs = [ + ("video_url", "video_url_text"), + ("video_alt_url", "video_alt_url_text"), + ("video_alt_url2", "video_alt_url2_text"), + ("video_alt_url3", "video_alt_url3_text"), + ]; + let mut formats = Vec::new(); + let mut seen = std::collections::HashSet::new(); + + for (url_key, label_key) in pairs { + let url_re = Self::regex(&format!(r#"{url_key}:\s*'([^']+)'"#))?; + let label_re = Self::regex(&format!(r#"{label_key}:\s*'([^']*)'"#))?; + + let Some(url_match) = url_re.captures(html).and_then(|value| value.get(1)) else { + continue; + }; + let url = url_match.as_str().replace("\\/", "/"); + if !seen.insert(url.clone()) { + continue; + } + + let label = label_re + .captures(html) + .and_then(|value| value.get(1)) + .map(|value| value.as_str().trim().to_string()) + .filter(|value| !value.is_empty()) + .unwrap_or_else(|| "mp4".to_string()); + let normalized_label = label.replace(" HD", "").replace(" FHD", "").trim().to_string(); + let format = VideoFormat::new(url, normalized_label.clone(), "mp4".to_string()) + .format_id(normalized_label.clone()) + .format_note(label); + formats.push(format); + } + + Ok(formats) + } + + fn parse_aspect_ratio(html: &str) -> Result> { + let width_re = Self::regex(r#"player_width:\s*'([0-9.]+)'"#)?; + let height_re = Self::regex(r#"player_height:\s*'([0-9.]+)'"#)?; + let Some(width) = width_re + .captures(html) + .and_then(|value| value.get(1)) + .and_then(|value| value.as_str().parse::().ok()) + else { + return Ok(None); + }; + let Some(height) = height_re + .captures(html) + .and_then(|value| value.get(1)) + .and_then(|value| value.as_str().parse::().ok()) + else { + return Ok(None); + }; + + Ok((height > 0.0).then_some(width / height)) + } + + fn collect_tags(document: &Html, html: &str) -> Result> { + let category_selector = Self::selector("div.items-holder.js-categories a[href*=\"/categories/\"]")?; + let tag_selector = Self::selector("div.item a[href*=\"/tags/\"]")?; + let mut values = Vec::new(); + + for element in document.select(&category_selector) { + let value = Self::text_of(&element); + if !value.is_empty() { + values.push(value); + } + } + + for element in document.select(&tag_selector) { + let value = Self::text_of(&element); + if !value.is_empty() { + values.push(value); + } + } + + for pattern in [r#"video_categories:\s*'([^']*)'"#, r#"video_tags:\s*'([^']*)'"#] { + let re = Self::regex(pattern)?; + if let Some(raw) = re.captures(html).and_then(|value| value.get(1)) { + for entry in raw + .as_str() + .split(',') + .map(str::trim) + .map(Self::decode_html) + .filter(|value| !value.is_empty()) + { + values.push(entry); + } + } + } + + let mut unique = Vec::new(); + for value in values { + let normalized = Self::normalize_title(&value); + if normalized.is_empty() || normalized == "-" { + continue; + } + if !unique + .iter() + .any(|existing: &String| Self::normalize_title(existing) == normalized) + { + unique.push(value); + } + } + + Ok(unique) + } + + fn apply_detail_video(&self, mut item: VideoItem, html: &str) -> Result { + let document = Html::parse_document(html); + let title_selector = Self::selector("h1")?; + let uploader_selector = + Self::selector("div.info-block div.block-user div.username a[href*=\"/members/\"]")?; + let stat_selector = Self::selector("div.info-block div.item span")?; + + if let Some(title) = document + .select(&title_selector) + .next() + .map(|value| Self::text_of(&value)) + .filter(|value| !value.is_empty()) + { + item.title = title; + } + + if let Some(uploader) = document.select(&uploader_selector).next() { + let uploader_name = Self::text_of(&uploader); + let uploader_url = uploader + .value() + .attr("href") + .map(|value| self.normalize_url(value)) + .unwrap_or_default(); + if !uploader_name.is_empty() { + item.uploader = Some(uploader_name); + } + if !uploader_url.is_empty() { + let uploader_id = uploader_url + .trim_end_matches('/') + .split('/') + .next_back() + .unwrap_or_default() + .to_string(); + item.uploaderUrl = Some(uploader_url); + if !uploader_id.is_empty() { + item.uploaderId = Some(format!("{CHANNEL_ID}:{uploader_id}")); + } + } + } + + for stat in document.select(&stat_selector).map(|value| Self::text_of(&value)) { + if item.views.is_none() { + item.views = Self::parse_views(&stat); + } + if item.duration == 0 { + let duration = Self::parse_duration(&stat); + if duration > 0 { + item.duration = duration; + } + } + } + + let tags = Self::collect_tags(&document, html)?; + if !tags.is_empty() { + item.tags = Some(tags); + } + + let formats = Self::parse_format_urls(html)?; + if !formats.is_empty() { + item.formats = Some(formats); + } + + if item.aspectRatio.is_none() { + item.aspectRatio = Self::parse_aspect_ratio(html)?; + } + + Ok(item) + } + + async fn enrich_item(&self, item: VideoItem, options: &ServerOptions) -> VideoItem { + let mut requester = + requester_or_default(options, CHANNEL_ID, "porntrex.enrich_item.missing_requester"); + + match Self::fetch_html(&mut requester, &item.url, &item.url).await { + Ok(html) => match self.apply_detail_video(item.clone(), &html) { + Ok(value) => value, + Err(error) => { + report_provider_error_background( + CHANNEL_ID, + "enrich_item.apply_detail_video", + &format!("url={}; error={error}", item.url), + ); + item + } + }, + Err(error) => { + report_provider_error_background( + CHANNEL_ID, + "enrich_item.fetch_html", + &format!("url={}; error={error}", item.url), + ); + item + } + } + } + + fn resolve_sort_target(sort: &str) -> Target { + match sort.trim().to_ascii_lowercase().as_str() { + "popular" | "viewed" | "most_viewed" => Target::Popular, + "rated" | "rating" | "top" => Target::TopRated, + _ => Target::Latest, + } + } + + fn resolve_option_target(&self, options: &ServerOptions, sort: &str) -> Target { + if let Some(category) = options.categories.as_deref() { + if category.starts_with(&self.url) && category != "all" { + return Self::archive_target(category.to_string(), PageMode::SitePaged); + } + } + Self::resolve_sort_target(sort) + } + + fn lookup_category_target(&self, query: &str) -> Option { + let normalized_query = Self::normalize_title(query); + self.categories + .read() + .ok()? + .iter() + .find(|value| value.id != "all" && Self::normalize_title(&value.title) == normalized_query) + .map(|value| value.id.clone()) + } + + fn resolve_query_target(&self, query: &str) -> Target { + let trimmed = query.trim().trim_start_matches('@'); + if let Some((kind, raw_value)) = trimmed.split_once(':') { + let value = raw_value.trim().trim_matches('/').replace(' ', "-"); + if !value.is_empty() { + match kind.trim().to_ascii_lowercase().as_str() { + "tag" | "tags" => { + return Self::archive_target( + format!("{}/tags/{value}/", self.url), + PageMode::LocalSlice, + ); + } + "category" | "categories" => { + return Self::archive_target( + format!("{}/categories/{value}/", self.url), + PageMode::SitePaged, + ); + } + _ => {} + } + } + } + + if let Some(category) = self.lookup_category_target(trimmed) { + return Self::archive_target(category, PageMode::SitePaged); + } + + let normalized = Self::normalize_title(trimmed); + if let Some(target) = self + .tag_map + .read() + .ok() + .and_then(|value| value.get(&normalized).cloned()) + { + return Self::archive_target(target, PageMode::LocalSlice); + } + + Target::Search(trimmed.to_string()) + } + + fn catalogs_need_refresh(&self) -> bool { + let categories_len = self + .categories + .read() + .map(|value| value.len()) + .unwrap_or_default(); + let tag_count = self + .tag_map + .read() + .map(|value| value.len()) + .unwrap_or_default(); + categories_len <= 1 || tag_count == 0 + } + + async fn refresh_catalogs(&self) { + if let Err(error) = Self::load_categories(&self.url, Arc::clone(&self.categories)).await { + report_provider_error_background( + CHANNEL_ID, + "refresh_catalogs.categories", + &error.to_string(), + ); + } + if let Err(error) = Self::load_tags(&self.url, Arc::clone(&self.tag_map)).await { + report_provider_error_background( + CHANNEL_ID, + "refresh_catalogs.tags", + &error.to_string(), + ); + } + } + + async fn fetch_target( + &self, + cache: VideoCache, + target: Target, + page: u16, + per_page_limit: usize, + options: ServerOptions, + ) -> Result> { + let page_mode = match &target { + Target::Archive { page_mode, .. } => *page_mode, + _ => PageMode::SitePaged, + }; + let source_url = self.build_target_url(&target, page); + let cache_key = match page_mode { + PageMode::SitePaged => source_url.clone(), + PageMode::LocalSlice => format!("{source_url}#page={page}&per_page={per_page_limit}"), + }; + let old_items = match cache.get(&cache_key) { + Some((time, items)) => { + if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 { + return Ok(items.clone()); + } + items.clone() + } + None => vec![], + }; + + let mut requester = + requester_or_default(&options, CHANNEL_ID, "porntrex.fetch_target.missing_requester"); + let html = match Self::fetch_html(&mut requester, &source_url, &source_url).await { + Ok(value) => value, + Err(error) => { + report_provider_error( + CHANNEL_ID, + "fetch_target.request", + &format!("url={source_url}; error={error}"), + ) + .await; + return Ok(old_items); + } + }; + + if html.trim().is_empty() { + report_provider_error( + CHANNEL_ID, + "fetch_target.empty_response", + &format!("url={source_url}"), + ) + .await; + return Ok(old_items); + } + + let items = self.parse_list_videos(&html)?; + if items.is_empty() { + return Ok(old_items); + } + + let limited_items = match page_mode { + PageMode::SitePaged => items + .into_iter() + .take(per_page_limit.max(1)) + .collect::>(), + PageMode::LocalSlice => { + let start = page.saturating_sub(1) as usize * per_page_limit.max(1); + items.into_iter() + .skip(start) + .take(per_page_limit.max(1)) + .collect::>() + } + }; + + if limited_items.is_empty() { + cache.insert(cache_key, vec![]); + return Ok(vec![]); + } + + let enriched = stream::iter(limited_items.into_iter().map(|item| { + let provider = self.clone(); + let options = options.clone(); + async move { provider.enrich_item(item, &options).await } + })) + .buffer_unordered(4) + .collect::>() + .await; + + cache.remove(&cache_key); + cache.insert(cache_key, enriched.clone()); + Ok(enriched) + } +} + +#[async_trait] +impl Provider for PorntrexProvider { + async fn get_videos( + &self, + cache: VideoCache, + _pool: DbPool, + sort: String, + query: Option, + page: String, + per_page: String, + options: ServerOptions, + ) -> Vec { + let page = page.parse::().unwrap_or(1).max(1); + let per_page_limit = per_page.parse::().unwrap_or(10).clamp(1, 60); + + let target = match query { + Some(query) if !query.trim().is_empty() => { + let query = query.trim(); + let mut target = self.resolve_query_target(query); + if matches!(target, Target::Search(_)) && self.catalogs_need_refresh() { + self.refresh_catalogs().await; + target = self.resolve_query_target(query); + } + target + } + _ => self.resolve_option_target(&options, &sort), + }; + + match self + .fetch_target(cache, target, page, per_page_limit, options.clone()) + .await + { + Ok(items) => items, + Err(error) => { + report_provider_error( + CHANNEL_ID, + "get_videos.fetch_target", + &format!("sort={sort}; page={page}; error={error}"), + ) + .await; + vec![] + } + } + } + + fn get_channel(&self, clientversion: ClientVersion) -> Option { + Some(self.build_channel(clientversion)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn builds_search_url() { + let provider = PorntrexProvider::new(); + assert_eq!( + provider.build_target_url(&Target::Search("adriana chechik".to_string()), 2), + "https://www.porntrex.com/search/adriana-chechik/2/" + ); + } + + #[test] + fn resolves_tag_prefix() { + let provider = PorntrexProvider::new(); + match provider.resolve_query_target("tag:blowjob") { + Target::Archive { url, page_mode } => { + assert_eq!(url, "https://www.porntrex.com/tags/blowjob/"); + assert_eq!(page_mode, PageMode::LocalSlice); + } + _ => panic!("expected archive target"), + } + } + + #[test] + fn builds_local_slice_archive_url_without_numeric_page() { + let provider = PorntrexProvider::new(); + let target = PorntrexProvider::archive_target( + "https://www.porntrex.com/tags/anal-creampie/".to_string(), + PageMode::LocalSlice, + ); + assert_eq!( + provider.build_target_url(&target, 3), + "https://www.porntrex.com/tags/anal-creampie/" + ); + } +}