porntrex
This commit is contained in:
5
build.rs
5
build.rs
@@ -91,6 +91,11 @@ const PROVIDERS: &[ProviderDef] = &[
|
||||
module: "yesporn",
|
||||
ty: "YespornProvider",
|
||||
},
|
||||
ProviderDef {
|
||||
id: "porntrex",
|
||||
module: "porntrex",
|
||||
ty: "PorntrexProvider",
|
||||
},
|
||||
ProviderDef {
|
||||
id: "sxyprn",
|
||||
module: "sxyprn",
|
||||
|
||||
@@ -37,6 +37,7 @@ This is the current implementation inventory as of this snapshot of the repo. Us
|
||||
| `pornhub` | `mainstream-tube` | no | no | Rich metadata and format examples. |
|
||||
| `pornmz` | `mainstream-tube` | no | no | Mainstream archive. |
|
||||
| `pornzog` | `mainstream-tube` | no | no | Basic list/detail scraper. |
|
||||
| `porntrex` | `mainstream-tube` | no | no | KVS-style HTML archive with direct MP4 formats and tag-aware search shortcuts. |
|
||||
| `redtube` | `mainstream-tube` | no | no | Mainstream archive. |
|
||||
| `rule34gen` | `ai` | no | no | AI group example. |
|
||||
| `rule34video` | `hentai-animation` | no | no | Hentai group example. |
|
||||
|
||||
987
src/providers/porntrex.rs
Normal file
987
src/providers/porntrex.rs
Normal file
@@ -0,0 +1,987 @@
|
||||
use crate::DbPool;
|
||||
use crate::api::ClientVersion;
|
||||
use crate::providers::{
|
||||
Provider, report_provider_error, report_provider_error_background, requester_or_default,
|
||||
};
|
||||
use crate::status::*;
|
||||
use crate::util::cache::VideoCache;
|
||||
use crate::util::parse_abbreviated_number;
|
||||
use crate::util::requester::Requester;
|
||||
use crate::util::time::parse_time_to_seconds;
|
||||
use crate::videos::{ServerOptions, VideoFormat, VideoItem};
|
||||
use async_trait::async_trait;
|
||||
use error_chain::error_chain;
|
||||
use futures::stream::{self, StreamExt};
|
||||
use htmlentity::entity::{ICodedDataTrait, decode};
|
||||
use percent_encoding::{NON_ALPHANUMERIC, utf8_percent_encode};
|
||||
use regex::Regex;
|
||||
use scraper::{ElementRef, Html, Selector};
|
||||
use std::collections::HashMap;
|
||||
use std::sync::{Arc, RwLock};
|
||||
use std::{thread, vec};
|
||||
use wreq::Version;
|
||||
|
||||
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
|
||||
crate::providers::ProviderChannelMetadata {
|
||||
group_id: "mainstream-tube",
|
||||
tags: &["tube", "mixed", "hd"],
|
||||
};
|
||||
|
||||
const BASE_URL: &str = "https://www.porntrex.com";
|
||||
const CHANNEL_ID: &str = "porntrex";
|
||||
const FIREFOX_UA: &str =
|
||||
"Mozilla/5.0 (X11; Linux x86_64; rv:147.0) Gecko/20100101 Firefox/147.0";
|
||||
const HTML_ACCEPT: &str =
|
||||
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8";
|
||||
|
||||
error_chain! {
|
||||
foreign_links {
|
||||
Io(std::io::Error);
|
||||
}
|
||||
errors {
|
||||
Parse(msg: String) {
|
||||
description("parse error")
|
||||
display("parse error: {}", msg)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PorntrexProvider {
|
||||
url: String,
|
||||
categories: Arc<RwLock<Vec<FilterOption>>>,
|
||||
tag_map: Arc<RwLock<HashMap<String, String>>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
enum Target {
|
||||
Latest,
|
||||
Popular,
|
||||
TopRated,
|
||||
Search(String),
|
||||
Archive {
|
||||
url: String,
|
||||
page_mode: PageMode,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
enum PageMode {
|
||||
SitePaged,
|
||||
LocalSlice,
|
||||
}
|
||||
|
||||
impl PorntrexProvider {
|
||||
pub fn new() -> Self {
|
||||
let provider = Self {
|
||||
url: BASE_URL.to_string(),
|
||||
categories: Arc::new(RwLock::new(vec![FilterOption {
|
||||
id: "all".to_string(),
|
||||
title: "All".to_string(),
|
||||
}])),
|
||||
tag_map: Arc::new(RwLock::new(HashMap::new())),
|
||||
};
|
||||
provider.spawn_initial_load();
|
||||
provider
|
||||
}
|
||||
|
||||
fn spawn_initial_load(&self) {
|
||||
let url = self.url.clone();
|
||||
let categories = Arc::clone(&self.categories);
|
||||
let tag_map = Arc::clone(&self.tag_map);
|
||||
|
||||
thread::spawn(move || {
|
||||
let runtime = match tokio::runtime::Builder::new_current_thread()
|
||||
.enable_all()
|
||||
.build()
|
||||
{
|
||||
Ok(runtime) => runtime,
|
||||
Err(error) => {
|
||||
report_provider_error_background(
|
||||
CHANNEL_ID,
|
||||
"spawn_initial_load.runtime_build",
|
||||
&error.to_string(),
|
||||
);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
runtime.block_on(async move {
|
||||
if let Err(error) = Self::load_categories(&url, Arc::clone(&categories)).await {
|
||||
report_provider_error_background(
|
||||
CHANNEL_ID,
|
||||
"load_categories",
|
||||
&error.to_string(),
|
||||
);
|
||||
}
|
||||
if let Err(error) = Self::load_tags(&url, Arc::clone(&tag_map)).await {
|
||||
report_provider_error_background(CHANNEL_ID, "load_tags", &error.to_string());
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
|
||||
let categories = self
|
||||
.categories
|
||||
.read()
|
||||
.map(|value| value.clone())
|
||||
.unwrap_or_default();
|
||||
|
||||
Channel {
|
||||
id: CHANNEL_ID.to_string(),
|
||||
name: "PornTrex".to_string(),
|
||||
description:
|
||||
"PornTrex videos with latest, most viewed, top rated, category, and tag-aware search routing."
|
||||
.to_string(),
|
||||
premium: false,
|
||||
favicon: "https://www.google.com/s2/favicons?sz=64&domain=porntrex.com".to_string(),
|
||||
status: "active".to_string(),
|
||||
categories: categories.iter().map(|value| value.title.clone()).collect(),
|
||||
options: vec![
|
||||
ChannelOption {
|
||||
id: "sort".to_string(),
|
||||
title: "Sort".to_string(),
|
||||
description: "Browse PornTrex ranking feeds.".to_string(),
|
||||
systemImage: "list.number".to_string(),
|
||||
colorName: "blue".to_string(),
|
||||
options: vec![
|
||||
FilterOption {
|
||||
id: "new".to_string(),
|
||||
title: "Latest".to_string(),
|
||||
},
|
||||
FilterOption {
|
||||
id: "popular".to_string(),
|
||||
title: "Most Viewed".to_string(),
|
||||
},
|
||||
FilterOption {
|
||||
id: "rated".to_string(),
|
||||
title: "Top Rated".to_string(),
|
||||
},
|
||||
],
|
||||
multiSelect: false,
|
||||
},
|
||||
ChannelOption {
|
||||
id: "categories".to_string(),
|
||||
title: "Categories".to_string(),
|
||||
description: "Browse a PornTrex category archive.".to_string(),
|
||||
systemImage: "square.grid.2x2".to_string(),
|
||||
colorName: "orange".to_string(),
|
||||
options: categories,
|
||||
multiSelect: false,
|
||||
},
|
||||
],
|
||||
nsfw: true,
|
||||
cacheDuration: Some(1800),
|
||||
}
|
||||
}
|
||||
|
||||
fn selector(value: &str) -> Result<Selector> {
|
||||
Selector::parse(value)
|
||||
.map_err(|error| Error::from(format!("selector `{value}` parse failed: {error}")))
|
||||
}
|
||||
|
||||
fn regex(value: &str) -> Result<Regex> {
|
||||
Regex::new(value).map_err(|error| Error::from(format!("regex `{value}` failed: {error}")))
|
||||
}
|
||||
|
||||
fn decode_html(text: &str) -> String {
|
||||
decode(text.as_bytes())
|
||||
.to_string()
|
||||
.unwrap_or_else(|_| text.to_string())
|
||||
}
|
||||
|
||||
fn collapse_whitespace(text: &str) -> String {
|
||||
text.split_whitespace().collect::<Vec<_>>().join(" ")
|
||||
}
|
||||
|
||||
fn text_of(element: &ElementRef<'_>) -> String {
|
||||
Self::decode_html(&Self::collapse_whitespace(
|
||||
&element.text().collect::<Vec<_>>().join(" "),
|
||||
))
|
||||
}
|
||||
|
||||
fn normalize_title(title: &str) -> String {
|
||||
title
|
||||
.trim()
|
||||
.trim_start_matches('#')
|
||||
.replace(['_', '-'], " ")
|
||||
.split_whitespace()
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ")
|
||||
.to_ascii_lowercase()
|
||||
}
|
||||
|
||||
fn normalize_url(&self, url: &str) -> String {
|
||||
let trimmed = url.trim();
|
||||
if trimmed.is_empty() {
|
||||
return String::new();
|
||||
}
|
||||
if trimmed.starts_with("http://") || trimmed.starts_with("https://") {
|
||||
return trimmed.to_string();
|
||||
}
|
||||
if trimmed.starts_with("//") {
|
||||
return format!("https:{trimmed}");
|
||||
}
|
||||
if trimmed.starts_with('/') {
|
||||
return format!("{}{}", self.url, trimmed);
|
||||
}
|
||||
format!("{}/{}", self.url, trimmed.trim_start_matches("./"))
|
||||
}
|
||||
|
||||
fn html_headers(referer: &str) -> Vec<(String, String)> {
|
||||
vec![
|
||||
("User-Agent".to_string(), FIREFOX_UA.to_string()),
|
||||
("Accept".to_string(), HTML_ACCEPT.to_string()),
|
||||
("Referer".to_string(), referer.to_string()),
|
||||
]
|
||||
}
|
||||
|
||||
fn build_search_path(query: &str) -> String {
|
||||
query
|
||||
.split_whitespace()
|
||||
.map(|part| utf8_percent_encode(part, NON_ALPHANUMERIC).to_string())
|
||||
.collect::<Vec<_>>()
|
||||
.join("-")
|
||||
}
|
||||
|
||||
fn build_archive_page_url(archive_url: &str, page: u16) -> String {
|
||||
if page <= 1 {
|
||||
return archive_url.trim_end_matches('/').to_string() + "/";
|
||||
}
|
||||
format!("{}/{page}/", archive_url.trim_end_matches('/'))
|
||||
}
|
||||
|
||||
fn archive_target(url: String, page_mode: PageMode) -> Target {
|
||||
Target::Archive { url, page_mode }
|
||||
}
|
||||
|
||||
fn build_target_url(&self, target: &Target, page: u16) -> String {
|
||||
match target {
|
||||
Target::Latest => {
|
||||
Self::build_archive_page_url(&format!("{}/latest-updates/", self.url), page)
|
||||
}
|
||||
Target::Popular => {
|
||||
Self::build_archive_page_url(&format!("{}/most-popular/", self.url), page)
|
||||
}
|
||||
Target::TopRated => {
|
||||
Self::build_archive_page_url(&format!("{}/top-rated/", self.url), page)
|
||||
}
|
||||
Target::Search(query) => Self::build_archive_page_url(
|
||||
&format!("{}/search/{}/", self.url, Self::build_search_path(query)),
|
||||
page,
|
||||
),
|
||||
Target::Archive { url, page_mode } => match page_mode {
|
||||
PageMode::SitePaged => Self::build_archive_page_url(url, page),
|
||||
PageMode::LocalSlice => Self::build_archive_page_url(url, 1),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
async fn fetch_html(requester: &mut Requester, url: &str, referer: &str) -> Result<String> {
|
||||
requester
|
||||
.get_with_headers(url, Self::html_headers(referer), Some(Version::HTTP_11))
|
||||
.await
|
||||
.map_err(|error| Error::from(format!("request failed for {url}: {error}")))
|
||||
}
|
||||
|
||||
fn slug_remainder(href: &str, prefix: &str) -> Option<String> {
|
||||
let trimmed = href.trim().trim_end_matches('/');
|
||||
let remainder = trimmed.strip_prefix(prefix)?.trim_matches('/');
|
||||
if remainder.is_empty() || remainder.contains('/') {
|
||||
return None;
|
||||
}
|
||||
Some(remainder.to_string())
|
||||
}
|
||||
|
||||
fn push_category(target: &Arc<RwLock<Vec<FilterOption>>>, item: FilterOption) {
|
||||
if item.id.is_empty() || item.title.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
if let Ok(mut values) = target.write() {
|
||||
let normalized = Self::normalize_title(&item.title);
|
||||
if !values
|
||||
.iter()
|
||||
.any(|value| value.id == item.id || Self::normalize_title(&value.title) == normalized)
|
||||
{
|
||||
values.push(item);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn insert_tag_mapping(target: &Arc<RwLock<HashMap<String, String>>>, title: &str, href: &str) {
|
||||
let normalized_title = Self::normalize_title(title);
|
||||
if normalized_title.is_empty() || href.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
if let Ok(mut values) = target.write() {
|
||||
values.insert(normalized_title, href.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
async fn load_categories(
|
||||
base_url: &str,
|
||||
categories: Arc<RwLock<Vec<FilterOption>>>,
|
||||
) -> Result<()> {
|
||||
let mut requester = Requester::new();
|
||||
let page_url = format!("{base_url}/categories/");
|
||||
let html = Self::fetch_html(&mut requester, &page_url, &page_url).await?;
|
||||
let document = Html::parse_document(&html);
|
||||
let selector = Self::selector("a.item[href]")?;
|
||||
let prefix = format!("{base_url}/categories/");
|
||||
|
||||
for element in document.select(&selector) {
|
||||
let href = element.value().attr("href").unwrap_or_default();
|
||||
let Some(_slug) = Self::slug_remainder(href, &prefix) else {
|
||||
continue;
|
||||
};
|
||||
|
||||
let title = element
|
||||
.value()
|
||||
.attr("title")
|
||||
.map(Self::decode_html)
|
||||
.filter(|value| !value.trim().is_empty())
|
||||
.unwrap_or_else(|| Self::text_of(&element));
|
||||
let title = title.trim().to_string();
|
||||
if title.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
Self::push_category(
|
||||
&categories,
|
||||
FilterOption {
|
||||
id: format!("{}/", href.trim_end_matches('/')),
|
||||
title,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn load_tags(base_url: &str, tag_map: Arc<RwLock<HashMap<String, String>>>) -> Result<()> {
|
||||
let mut requester = Requester::new();
|
||||
let page_url = format!("{base_url}/tags/");
|
||||
let html = Self::fetch_html(&mut requester, &page_url, &page_url).await?;
|
||||
let document = Html::parse_document(&html);
|
||||
let selector = Self::selector("div.list-tags a[href]")?;
|
||||
let prefix = format!("{base_url}/tags/");
|
||||
|
||||
for element in document.select(&selector) {
|
||||
let href = element.value().attr("href").unwrap_or_default();
|
||||
let Some(slug) = Self::slug_remainder(href, &prefix) else {
|
||||
continue;
|
||||
};
|
||||
let title = Self::text_of(&element);
|
||||
if title.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let canonical = format!("{}/", href.trim_end_matches('/'));
|
||||
Self::insert_tag_mapping(&tag_map, &title, &canonical);
|
||||
Self::insert_tag_mapping(&tag_map, &slug, &canonical);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn parse_duration(text: &str) -> u32 {
|
||||
parse_time_to_seconds(text)
|
||||
.and_then(|value| u32::try_from(value).ok())
|
||||
.unwrap_or(0)
|
||||
}
|
||||
|
||||
fn parse_views(text: &str) -> Option<u32> {
|
||||
let cleaned = text
|
||||
.replace("views", "")
|
||||
.replace("view", "")
|
||||
.replace([',', ' '], "");
|
||||
parse_abbreviated_number(cleaned.trim())
|
||||
}
|
||||
|
||||
fn parse_rating(text: &str) -> Option<f32> {
|
||||
let digits = text
|
||||
.chars()
|
||||
.filter(|value| value.is_ascii_digit() || *value == '.')
|
||||
.collect::<String>();
|
||||
digits.parse::<f32>().ok()
|
||||
}
|
||||
|
||||
fn parse_list_videos(&self, html: &str) -> Result<Vec<VideoItem>> {
|
||||
let document = Html::parse_document(html);
|
||||
let card_selector = Self::selector("div.video-preview-screen.video-item.thumb-item")?;
|
||||
let link_selector = Self::selector("a[href*=\"/video/\"]")?;
|
||||
let title_link_selector = Self::selector("p.inf a[href*=\"/video/\"], a[title][href*=\"/video/\"]")?;
|
||||
let image_selector = Self::selector("img.cover")?;
|
||||
let duration_selector = Self::selector("div.durations")?;
|
||||
let views_selector = Self::selector("div.viewsthumb")?;
|
||||
let rating_selector = Self::selector("ul.list-unstyled li.pull-right")?;
|
||||
|
||||
let mut items = Vec::new();
|
||||
|
||||
for card in document.select(&card_selector) {
|
||||
let Some(link) = card.select(&link_selector).next() else {
|
||||
continue;
|
||||
};
|
||||
|
||||
let href = link.value().attr("href").unwrap_or_default();
|
||||
let page_url = self.normalize_url(href);
|
||||
if page_url.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let id = card
|
||||
.value()
|
||||
.attr("data-item-id")
|
||||
.map(str::to_string)
|
||||
.filter(|value| !value.is_empty())
|
||||
.unwrap_or_else(|| {
|
||||
page_url
|
||||
.trim_end_matches('/')
|
||||
.split('/')
|
||||
.nth_back(1)
|
||||
.unwrap_or_default()
|
||||
.to_string()
|
||||
});
|
||||
if id.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let image = card.select(&image_selector).next();
|
||||
let thumb = image
|
||||
.and_then(|value| value.value().attr("data-src").or_else(|| value.value().attr("src")))
|
||||
.map(|value| self.normalize_url(value))
|
||||
.unwrap_or_default();
|
||||
|
||||
let title = card
|
||||
.select(&title_link_selector)
|
||||
.next()
|
||||
.or_else(|| card.select(&link_selector).find(|value| value.value().attr("title").is_some()))
|
||||
.and_then(|value| value.value().attr("title").map(Self::decode_html).or_else(|| {
|
||||
let text = Self::text_of(&value);
|
||||
(!text.is_empty()).then_some(text)
|
||||
}))
|
||||
.filter(|value| !value.trim().is_empty())
|
||||
.unwrap_or_else(|| {
|
||||
image
|
||||
.and_then(|value| value.value().attr("alt").map(Self::decode_html))
|
||||
.unwrap_or_default()
|
||||
});
|
||||
if title.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let duration = card
|
||||
.select(&duration_selector)
|
||||
.next()
|
||||
.map(|value| Self::parse_duration(&Self::text_of(&value)))
|
||||
.unwrap_or(0);
|
||||
let views = card
|
||||
.select(&views_selector)
|
||||
.next()
|
||||
.and_then(|value| Self::parse_views(&Self::text_of(&value)));
|
||||
let rating = card
|
||||
.select(&rating_selector)
|
||||
.next()
|
||||
.and_then(|value| Self::parse_rating(&Self::text_of(&value)));
|
||||
|
||||
let mut item = VideoItem::new(
|
||||
id,
|
||||
title.trim().to_string(),
|
||||
page_url,
|
||||
CHANNEL_ID.to_string(),
|
||||
thumb,
|
||||
duration,
|
||||
);
|
||||
item.views = views;
|
||||
item.rating = rating;
|
||||
items.push(item);
|
||||
}
|
||||
|
||||
Ok(items)
|
||||
}
|
||||
|
||||
fn parse_format_urls(html: &str) -> Result<Vec<VideoFormat>> {
|
||||
let pairs = [
|
||||
("video_url", "video_url_text"),
|
||||
("video_alt_url", "video_alt_url_text"),
|
||||
("video_alt_url2", "video_alt_url2_text"),
|
||||
("video_alt_url3", "video_alt_url3_text"),
|
||||
];
|
||||
let mut formats = Vec::new();
|
||||
let mut seen = std::collections::HashSet::new();
|
||||
|
||||
for (url_key, label_key) in pairs {
|
||||
let url_re = Self::regex(&format!(r#"{url_key}:\s*'([^']+)'"#))?;
|
||||
let label_re = Self::regex(&format!(r#"{label_key}:\s*'([^']*)'"#))?;
|
||||
|
||||
let Some(url_match) = url_re.captures(html).and_then(|value| value.get(1)) else {
|
||||
continue;
|
||||
};
|
||||
let url = url_match.as_str().replace("\\/", "/");
|
||||
if !seen.insert(url.clone()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let label = label_re
|
||||
.captures(html)
|
||||
.and_then(|value| value.get(1))
|
||||
.map(|value| value.as_str().trim().to_string())
|
||||
.filter(|value| !value.is_empty())
|
||||
.unwrap_or_else(|| "mp4".to_string());
|
||||
let normalized_label = label.replace(" HD", "").replace(" FHD", "").trim().to_string();
|
||||
let format = VideoFormat::new(url, normalized_label.clone(), "mp4".to_string())
|
||||
.format_id(normalized_label.clone())
|
||||
.format_note(label);
|
||||
formats.push(format);
|
||||
}
|
||||
|
||||
Ok(formats)
|
||||
}
|
||||
|
||||
fn parse_aspect_ratio(html: &str) -> Result<Option<f32>> {
|
||||
let width_re = Self::regex(r#"player_width:\s*'([0-9.]+)'"#)?;
|
||||
let height_re = Self::regex(r#"player_height:\s*'([0-9.]+)'"#)?;
|
||||
let Some(width) = width_re
|
||||
.captures(html)
|
||||
.and_then(|value| value.get(1))
|
||||
.and_then(|value| value.as_str().parse::<f32>().ok())
|
||||
else {
|
||||
return Ok(None);
|
||||
};
|
||||
let Some(height) = height_re
|
||||
.captures(html)
|
||||
.and_then(|value| value.get(1))
|
||||
.and_then(|value| value.as_str().parse::<f32>().ok())
|
||||
else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
Ok((height > 0.0).then_some(width / height))
|
||||
}
|
||||
|
||||
fn collect_tags(document: &Html, html: &str) -> Result<Vec<String>> {
|
||||
let category_selector = Self::selector("div.items-holder.js-categories a[href*=\"/categories/\"]")?;
|
||||
let tag_selector = Self::selector("div.item a[href*=\"/tags/\"]")?;
|
||||
let mut values = Vec::new();
|
||||
|
||||
for element in document.select(&category_selector) {
|
||||
let value = Self::text_of(&element);
|
||||
if !value.is_empty() {
|
||||
values.push(value);
|
||||
}
|
||||
}
|
||||
|
||||
for element in document.select(&tag_selector) {
|
||||
let value = Self::text_of(&element);
|
||||
if !value.is_empty() {
|
||||
values.push(value);
|
||||
}
|
||||
}
|
||||
|
||||
for pattern in [r#"video_categories:\s*'([^']*)'"#, r#"video_tags:\s*'([^']*)'"#] {
|
||||
let re = Self::regex(pattern)?;
|
||||
if let Some(raw) = re.captures(html).and_then(|value| value.get(1)) {
|
||||
for entry in raw
|
||||
.as_str()
|
||||
.split(',')
|
||||
.map(str::trim)
|
||||
.map(Self::decode_html)
|
||||
.filter(|value| !value.is_empty())
|
||||
{
|
||||
values.push(entry);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut unique = Vec::new();
|
||||
for value in values {
|
||||
let normalized = Self::normalize_title(&value);
|
||||
if normalized.is_empty() || normalized == "-" {
|
||||
continue;
|
||||
}
|
||||
if !unique
|
||||
.iter()
|
||||
.any(|existing: &String| Self::normalize_title(existing) == normalized)
|
||||
{
|
||||
unique.push(value);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(unique)
|
||||
}
|
||||
|
||||
fn apply_detail_video(&self, mut item: VideoItem, html: &str) -> Result<VideoItem> {
|
||||
let document = Html::parse_document(html);
|
||||
let title_selector = Self::selector("h1")?;
|
||||
let uploader_selector =
|
||||
Self::selector("div.info-block div.block-user div.username a[href*=\"/members/\"]")?;
|
||||
let stat_selector = Self::selector("div.info-block div.item span")?;
|
||||
|
||||
if let Some(title) = document
|
||||
.select(&title_selector)
|
||||
.next()
|
||||
.map(|value| Self::text_of(&value))
|
||||
.filter(|value| !value.is_empty())
|
||||
{
|
||||
item.title = title;
|
||||
}
|
||||
|
||||
if let Some(uploader) = document.select(&uploader_selector).next() {
|
||||
let uploader_name = Self::text_of(&uploader);
|
||||
let uploader_url = uploader
|
||||
.value()
|
||||
.attr("href")
|
||||
.map(|value| self.normalize_url(value))
|
||||
.unwrap_or_default();
|
||||
if !uploader_name.is_empty() {
|
||||
item.uploader = Some(uploader_name);
|
||||
}
|
||||
if !uploader_url.is_empty() {
|
||||
let uploader_id = uploader_url
|
||||
.trim_end_matches('/')
|
||||
.split('/')
|
||||
.next_back()
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
item.uploaderUrl = Some(uploader_url);
|
||||
if !uploader_id.is_empty() {
|
||||
item.uploaderId = Some(format!("{CHANNEL_ID}:{uploader_id}"));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for stat in document.select(&stat_selector).map(|value| Self::text_of(&value)) {
|
||||
if item.views.is_none() {
|
||||
item.views = Self::parse_views(&stat);
|
||||
}
|
||||
if item.duration == 0 {
|
||||
let duration = Self::parse_duration(&stat);
|
||||
if duration > 0 {
|
||||
item.duration = duration;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let tags = Self::collect_tags(&document, html)?;
|
||||
if !tags.is_empty() {
|
||||
item.tags = Some(tags);
|
||||
}
|
||||
|
||||
let formats = Self::parse_format_urls(html)?;
|
||||
if !formats.is_empty() {
|
||||
item.formats = Some(formats);
|
||||
}
|
||||
|
||||
if item.aspectRatio.is_none() {
|
||||
item.aspectRatio = Self::parse_aspect_ratio(html)?;
|
||||
}
|
||||
|
||||
Ok(item)
|
||||
}
|
||||
|
||||
async fn enrich_item(&self, item: VideoItem, options: &ServerOptions) -> VideoItem {
|
||||
let mut requester =
|
||||
requester_or_default(options, CHANNEL_ID, "porntrex.enrich_item.missing_requester");
|
||||
|
||||
match Self::fetch_html(&mut requester, &item.url, &item.url).await {
|
||||
Ok(html) => match self.apply_detail_video(item.clone(), &html) {
|
||||
Ok(value) => value,
|
||||
Err(error) => {
|
||||
report_provider_error_background(
|
||||
CHANNEL_ID,
|
||||
"enrich_item.apply_detail_video",
|
||||
&format!("url={}; error={error}", item.url),
|
||||
);
|
||||
item
|
||||
}
|
||||
},
|
||||
Err(error) => {
|
||||
report_provider_error_background(
|
||||
CHANNEL_ID,
|
||||
"enrich_item.fetch_html",
|
||||
&format!("url={}; error={error}", item.url),
|
||||
);
|
||||
item
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn resolve_sort_target(sort: &str) -> Target {
|
||||
match sort.trim().to_ascii_lowercase().as_str() {
|
||||
"popular" | "viewed" | "most_viewed" => Target::Popular,
|
||||
"rated" | "rating" | "top" => Target::TopRated,
|
||||
_ => Target::Latest,
|
||||
}
|
||||
}
|
||||
|
||||
fn resolve_option_target(&self, options: &ServerOptions, sort: &str) -> Target {
|
||||
if let Some(category) = options.categories.as_deref() {
|
||||
if category.starts_with(&self.url) && category != "all" {
|
||||
return Self::archive_target(category.to_string(), PageMode::SitePaged);
|
||||
}
|
||||
}
|
||||
Self::resolve_sort_target(sort)
|
||||
}
|
||||
|
||||
fn lookup_category_target(&self, query: &str) -> Option<String> {
|
||||
let normalized_query = Self::normalize_title(query);
|
||||
self.categories
|
||||
.read()
|
||||
.ok()?
|
||||
.iter()
|
||||
.find(|value| value.id != "all" && Self::normalize_title(&value.title) == normalized_query)
|
||||
.map(|value| value.id.clone())
|
||||
}
|
||||
|
||||
fn resolve_query_target(&self, query: &str) -> Target {
|
||||
let trimmed = query.trim().trim_start_matches('@');
|
||||
if let Some((kind, raw_value)) = trimmed.split_once(':') {
|
||||
let value = raw_value.trim().trim_matches('/').replace(' ', "-");
|
||||
if !value.is_empty() {
|
||||
match kind.trim().to_ascii_lowercase().as_str() {
|
||||
"tag" | "tags" => {
|
||||
return Self::archive_target(
|
||||
format!("{}/tags/{value}/", self.url),
|
||||
PageMode::LocalSlice,
|
||||
);
|
||||
}
|
||||
"category" | "categories" => {
|
||||
return Self::archive_target(
|
||||
format!("{}/categories/{value}/", self.url),
|
||||
PageMode::SitePaged,
|
||||
);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(category) = self.lookup_category_target(trimmed) {
|
||||
return Self::archive_target(category, PageMode::SitePaged);
|
||||
}
|
||||
|
||||
let normalized = Self::normalize_title(trimmed);
|
||||
if let Some(target) = self
|
||||
.tag_map
|
||||
.read()
|
||||
.ok()
|
||||
.and_then(|value| value.get(&normalized).cloned())
|
||||
{
|
||||
return Self::archive_target(target, PageMode::LocalSlice);
|
||||
}
|
||||
|
||||
Target::Search(trimmed.to_string())
|
||||
}
|
||||
|
||||
fn catalogs_need_refresh(&self) -> bool {
|
||||
let categories_len = self
|
||||
.categories
|
||||
.read()
|
||||
.map(|value| value.len())
|
||||
.unwrap_or_default();
|
||||
let tag_count = self
|
||||
.tag_map
|
||||
.read()
|
||||
.map(|value| value.len())
|
||||
.unwrap_or_default();
|
||||
categories_len <= 1 || tag_count == 0
|
||||
}
|
||||
|
||||
async fn refresh_catalogs(&self) {
|
||||
if let Err(error) = Self::load_categories(&self.url, Arc::clone(&self.categories)).await {
|
||||
report_provider_error_background(
|
||||
CHANNEL_ID,
|
||||
"refresh_catalogs.categories",
|
||||
&error.to_string(),
|
||||
);
|
||||
}
|
||||
if let Err(error) = Self::load_tags(&self.url, Arc::clone(&self.tag_map)).await {
|
||||
report_provider_error_background(
|
||||
CHANNEL_ID,
|
||||
"refresh_catalogs.tags",
|
||||
&error.to_string(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
async fn fetch_target(
|
||||
&self,
|
||||
cache: VideoCache,
|
||||
target: Target,
|
||||
page: u16,
|
||||
per_page_limit: usize,
|
||||
options: ServerOptions,
|
||||
) -> Result<Vec<VideoItem>> {
|
||||
let page_mode = match &target {
|
||||
Target::Archive { page_mode, .. } => *page_mode,
|
||||
_ => PageMode::SitePaged,
|
||||
};
|
||||
let source_url = self.build_target_url(&target, page);
|
||||
let cache_key = match page_mode {
|
||||
PageMode::SitePaged => source_url.clone(),
|
||||
PageMode::LocalSlice => format!("{source_url}#page={page}&per_page={per_page_limit}"),
|
||||
};
|
||||
let old_items = match cache.get(&cache_key) {
|
||||
Some((time, items)) => {
|
||||
if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 {
|
||||
return Ok(items.clone());
|
||||
}
|
||||
items.clone()
|
||||
}
|
||||
None => vec![],
|
||||
};
|
||||
|
||||
let mut requester =
|
||||
requester_or_default(&options, CHANNEL_ID, "porntrex.fetch_target.missing_requester");
|
||||
let html = match Self::fetch_html(&mut requester, &source_url, &source_url).await {
|
||||
Ok(value) => value,
|
||||
Err(error) => {
|
||||
report_provider_error(
|
||||
CHANNEL_ID,
|
||||
"fetch_target.request",
|
||||
&format!("url={source_url}; error={error}"),
|
||||
)
|
||||
.await;
|
||||
return Ok(old_items);
|
||||
}
|
||||
};
|
||||
|
||||
if html.trim().is_empty() {
|
||||
report_provider_error(
|
||||
CHANNEL_ID,
|
||||
"fetch_target.empty_response",
|
||||
&format!("url={source_url}"),
|
||||
)
|
||||
.await;
|
||||
return Ok(old_items);
|
||||
}
|
||||
|
||||
let items = self.parse_list_videos(&html)?;
|
||||
if items.is_empty() {
|
||||
return Ok(old_items);
|
||||
}
|
||||
|
||||
let limited_items = match page_mode {
|
||||
PageMode::SitePaged => items
|
||||
.into_iter()
|
||||
.take(per_page_limit.max(1))
|
||||
.collect::<Vec<_>>(),
|
||||
PageMode::LocalSlice => {
|
||||
let start = page.saturating_sub(1) as usize * per_page_limit.max(1);
|
||||
items.into_iter()
|
||||
.skip(start)
|
||||
.take(per_page_limit.max(1))
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
};
|
||||
|
||||
if limited_items.is_empty() {
|
||||
cache.insert(cache_key, vec![]);
|
||||
return Ok(vec![]);
|
||||
}
|
||||
|
||||
let enriched = stream::iter(limited_items.into_iter().map(|item| {
|
||||
let provider = self.clone();
|
||||
let options = options.clone();
|
||||
async move { provider.enrich_item(item, &options).await }
|
||||
}))
|
||||
.buffer_unordered(4)
|
||||
.collect::<Vec<_>>()
|
||||
.await;
|
||||
|
||||
cache.remove(&cache_key);
|
||||
cache.insert(cache_key, enriched.clone());
|
||||
Ok(enriched)
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Provider for PorntrexProvider {
|
||||
async fn get_videos(
|
||||
&self,
|
||||
cache: VideoCache,
|
||||
_pool: DbPool,
|
||||
sort: String,
|
||||
query: Option<String>,
|
||||
page: String,
|
||||
per_page: String,
|
||||
options: ServerOptions,
|
||||
) -> Vec<VideoItem> {
|
||||
let page = page.parse::<u16>().unwrap_or(1).max(1);
|
||||
let per_page_limit = per_page.parse::<usize>().unwrap_or(10).clamp(1, 60);
|
||||
|
||||
let target = match query {
|
||||
Some(query) if !query.trim().is_empty() => {
|
||||
let query = query.trim();
|
||||
let mut target = self.resolve_query_target(query);
|
||||
if matches!(target, Target::Search(_)) && self.catalogs_need_refresh() {
|
||||
self.refresh_catalogs().await;
|
||||
target = self.resolve_query_target(query);
|
||||
}
|
||||
target
|
||||
}
|
||||
_ => self.resolve_option_target(&options, &sort),
|
||||
};
|
||||
|
||||
match self
|
||||
.fetch_target(cache, target, page, per_page_limit, options.clone())
|
||||
.await
|
||||
{
|
||||
Ok(items) => items,
|
||||
Err(error) => {
|
||||
report_provider_error(
|
||||
CHANNEL_ID,
|
||||
"get_videos.fetch_target",
|
||||
&format!("sort={sort}; page={page}; error={error}"),
|
||||
)
|
||||
.await;
|
||||
vec![]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
|
||||
Some(self.build_channel(clientversion))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn builds_search_url() {
|
||||
let provider = PorntrexProvider::new();
|
||||
assert_eq!(
|
||||
provider.build_target_url(&Target::Search("adriana chechik".to_string()), 2),
|
||||
"https://www.porntrex.com/search/adriana-chechik/2/"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resolves_tag_prefix() {
|
||||
let provider = PorntrexProvider::new();
|
||||
match provider.resolve_query_target("tag:blowjob") {
|
||||
Target::Archive { url, page_mode } => {
|
||||
assert_eq!(url, "https://www.porntrex.com/tags/blowjob/");
|
||||
assert_eq!(page_mode, PageMode::LocalSlice);
|
||||
}
|
||||
_ => panic!("expected archive target"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn builds_local_slice_archive_url_without_numeric_page() {
|
||||
let provider = PorntrexProvider::new();
|
||||
let target = PorntrexProvider::archive_target(
|
||||
"https://www.porntrex.com/tags/anal-creampie/".to_string(),
|
||||
PageMode::LocalSlice,
|
||||
);
|
||||
assert_eq!(
|
||||
provider.build_target_url(&target, 3),
|
||||
"https://www.porntrex.com/tags/anal-creampie/"
|
||||
);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user