988 lines
34 KiB
Rust
988 lines
34 KiB
Rust
use crate::DbPool;
|
|
use crate::api::ClientVersion;
|
|
use crate::providers::{
|
|
Provider, report_provider_error, report_provider_error_background, requester_or_default,
|
|
};
|
|
use crate::status::*;
|
|
use crate::util::cache::VideoCache;
|
|
use crate::util::parse_abbreviated_number;
|
|
use crate::util::requester::Requester;
|
|
use crate::util::time::parse_time_to_seconds;
|
|
use crate::videos::{ServerOptions, VideoFormat, VideoItem};
|
|
use async_trait::async_trait;
|
|
use error_chain::error_chain;
|
|
use futures::stream::{self, StreamExt};
|
|
use htmlentity::entity::{ICodedDataTrait, decode};
|
|
use percent_encoding::{NON_ALPHANUMERIC, utf8_percent_encode};
|
|
use regex::Regex;
|
|
use scraper::{ElementRef, Html, Selector};
|
|
use std::collections::HashMap;
|
|
use std::sync::{Arc, RwLock};
|
|
use std::{thread, vec};
|
|
use wreq::Version;
|
|
|
|
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
|
|
crate::providers::ProviderChannelMetadata {
|
|
group_id: "mainstream-tube",
|
|
tags: &["tube", "mixed", "hd"],
|
|
};
|
|
|
|
const BASE_URL: &str = "https://www.porntrex.com";
|
|
const CHANNEL_ID: &str = "porntrex";
|
|
const FIREFOX_UA: &str =
|
|
"Mozilla/5.0 (X11; Linux x86_64; rv:147.0) Gecko/20100101 Firefox/147.0";
|
|
const HTML_ACCEPT: &str =
|
|
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8";
|
|
|
|
error_chain! {
|
|
foreign_links {
|
|
Io(std::io::Error);
|
|
}
|
|
errors {
|
|
Parse(msg: String) {
|
|
description("parse error")
|
|
display("parse error: {}", msg)
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct PorntrexProvider {
|
|
url: String,
|
|
categories: Arc<RwLock<Vec<FilterOption>>>,
|
|
tag_map: Arc<RwLock<HashMap<String, String>>>,
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
enum Target {
|
|
Latest,
|
|
Popular,
|
|
TopRated,
|
|
Search(String),
|
|
Archive {
|
|
url: String,
|
|
page_mode: PageMode,
|
|
},
|
|
}
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
enum PageMode {
|
|
SitePaged,
|
|
LocalSlice,
|
|
}
|
|
|
|
impl PorntrexProvider {
|
|
pub fn new() -> Self {
|
|
let provider = Self {
|
|
url: BASE_URL.to_string(),
|
|
categories: Arc::new(RwLock::new(vec![FilterOption {
|
|
id: "all".to_string(),
|
|
title: "All".to_string(),
|
|
}])),
|
|
tag_map: Arc::new(RwLock::new(HashMap::new())),
|
|
};
|
|
provider.spawn_initial_load();
|
|
provider
|
|
}
|
|
|
|
fn spawn_initial_load(&self) {
|
|
let url = self.url.clone();
|
|
let categories = Arc::clone(&self.categories);
|
|
let tag_map = Arc::clone(&self.tag_map);
|
|
|
|
thread::spawn(move || {
|
|
let runtime = match tokio::runtime::Builder::new_current_thread()
|
|
.enable_all()
|
|
.build()
|
|
{
|
|
Ok(runtime) => runtime,
|
|
Err(error) => {
|
|
report_provider_error_background(
|
|
CHANNEL_ID,
|
|
"spawn_initial_load.runtime_build",
|
|
&error.to_string(),
|
|
);
|
|
return;
|
|
}
|
|
};
|
|
|
|
runtime.block_on(async move {
|
|
if let Err(error) = Self::load_categories(&url, Arc::clone(&categories)).await {
|
|
report_provider_error_background(
|
|
CHANNEL_ID,
|
|
"load_categories",
|
|
&error.to_string(),
|
|
);
|
|
}
|
|
if let Err(error) = Self::load_tags(&url, Arc::clone(&tag_map)).await {
|
|
report_provider_error_background(CHANNEL_ID, "load_tags", &error.to_string());
|
|
}
|
|
});
|
|
});
|
|
}
|
|
|
|
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
|
|
let categories = self
|
|
.categories
|
|
.read()
|
|
.map(|value| value.clone())
|
|
.unwrap_or_default();
|
|
|
|
Channel {
|
|
id: CHANNEL_ID.to_string(),
|
|
name: "PornTrex".to_string(),
|
|
description:
|
|
"PornTrex videos with latest, most viewed, top rated, category, and tag-aware search routing."
|
|
.to_string(),
|
|
premium: false,
|
|
favicon: "https://www.google.com/s2/favicons?sz=64&domain=porntrex.com".to_string(),
|
|
status: "active".to_string(),
|
|
categories: categories.iter().map(|value| value.title.clone()).collect(),
|
|
options: vec![
|
|
ChannelOption {
|
|
id: "sort".to_string(),
|
|
title: "Sort".to_string(),
|
|
description: "Browse PornTrex ranking feeds.".to_string(),
|
|
systemImage: "list.number".to_string(),
|
|
colorName: "blue".to_string(),
|
|
options: vec![
|
|
FilterOption {
|
|
id: "new".to_string(),
|
|
title: "Latest".to_string(),
|
|
},
|
|
FilterOption {
|
|
id: "popular".to_string(),
|
|
title: "Most Viewed".to_string(),
|
|
},
|
|
FilterOption {
|
|
id: "rated".to_string(),
|
|
title: "Top Rated".to_string(),
|
|
},
|
|
],
|
|
multiSelect: false,
|
|
},
|
|
ChannelOption {
|
|
id: "categories".to_string(),
|
|
title: "Categories".to_string(),
|
|
description: "Browse a PornTrex category archive.".to_string(),
|
|
systemImage: "square.grid.2x2".to_string(),
|
|
colorName: "orange".to_string(),
|
|
options: categories,
|
|
multiSelect: false,
|
|
},
|
|
],
|
|
nsfw: true,
|
|
cacheDuration: Some(1800),
|
|
}
|
|
}
|
|
|
|
fn selector(value: &str) -> Result<Selector> {
|
|
Selector::parse(value)
|
|
.map_err(|error| Error::from(format!("selector `{value}` parse failed: {error}")))
|
|
}
|
|
|
|
fn regex(value: &str) -> Result<Regex> {
|
|
Regex::new(value).map_err(|error| Error::from(format!("regex `{value}` failed: {error}")))
|
|
}
|
|
|
|
fn decode_html(text: &str) -> String {
|
|
decode(text.as_bytes())
|
|
.to_string()
|
|
.unwrap_or_else(|_| text.to_string())
|
|
}
|
|
|
|
fn collapse_whitespace(text: &str) -> String {
|
|
text.split_whitespace().collect::<Vec<_>>().join(" ")
|
|
}
|
|
|
|
fn text_of(element: &ElementRef<'_>) -> String {
|
|
Self::decode_html(&Self::collapse_whitespace(
|
|
&element.text().collect::<Vec<_>>().join(" "),
|
|
))
|
|
}
|
|
|
|
fn normalize_title(title: &str) -> String {
|
|
title
|
|
.trim()
|
|
.trim_start_matches('#')
|
|
.replace(['_', '-'], " ")
|
|
.split_whitespace()
|
|
.collect::<Vec<_>>()
|
|
.join(" ")
|
|
.to_ascii_lowercase()
|
|
}
|
|
|
|
fn normalize_url(&self, url: &str) -> String {
|
|
let trimmed = url.trim();
|
|
if trimmed.is_empty() {
|
|
return String::new();
|
|
}
|
|
if trimmed.starts_with("http://") || trimmed.starts_with("https://") {
|
|
return trimmed.to_string();
|
|
}
|
|
if trimmed.starts_with("//") {
|
|
return format!("https:{trimmed}");
|
|
}
|
|
if trimmed.starts_with('/') {
|
|
return format!("{}{}", self.url, trimmed);
|
|
}
|
|
format!("{}/{}", self.url, trimmed.trim_start_matches("./"))
|
|
}
|
|
|
|
fn html_headers(referer: &str) -> Vec<(String, String)> {
|
|
vec![
|
|
("User-Agent".to_string(), FIREFOX_UA.to_string()),
|
|
("Accept".to_string(), HTML_ACCEPT.to_string()),
|
|
("Referer".to_string(), referer.to_string()),
|
|
]
|
|
}
|
|
|
|
fn build_search_path(query: &str) -> String {
|
|
query
|
|
.split_whitespace()
|
|
.map(|part| utf8_percent_encode(part, NON_ALPHANUMERIC).to_string())
|
|
.collect::<Vec<_>>()
|
|
.join("-")
|
|
}
|
|
|
|
fn build_archive_page_url(archive_url: &str, page: u16) -> String {
|
|
if page <= 1 {
|
|
return archive_url.trim_end_matches('/').to_string() + "/";
|
|
}
|
|
format!("{}/{page}/", archive_url.trim_end_matches('/'))
|
|
}
|
|
|
|
fn archive_target(url: String, page_mode: PageMode) -> Target {
|
|
Target::Archive { url, page_mode }
|
|
}
|
|
|
|
fn build_target_url(&self, target: &Target, page: u16) -> String {
|
|
match target {
|
|
Target::Latest => {
|
|
Self::build_archive_page_url(&format!("{}/latest-updates/", self.url), page)
|
|
}
|
|
Target::Popular => {
|
|
Self::build_archive_page_url(&format!("{}/most-popular/", self.url), page)
|
|
}
|
|
Target::TopRated => {
|
|
Self::build_archive_page_url(&format!("{}/top-rated/", self.url), page)
|
|
}
|
|
Target::Search(query) => Self::build_archive_page_url(
|
|
&format!("{}/search/{}/", self.url, Self::build_search_path(query)),
|
|
page,
|
|
),
|
|
Target::Archive { url, page_mode } => match page_mode {
|
|
PageMode::SitePaged => Self::build_archive_page_url(url, page),
|
|
PageMode::LocalSlice => Self::build_archive_page_url(url, 1),
|
|
},
|
|
}
|
|
}
|
|
|
|
async fn fetch_html(requester: &mut Requester, url: &str, referer: &str) -> Result<String> {
|
|
requester
|
|
.get_with_headers(url, Self::html_headers(referer), Some(Version::HTTP_11))
|
|
.await
|
|
.map_err(|error| Error::from(format!("request failed for {url}: {error}")))
|
|
}
|
|
|
|
fn slug_remainder(href: &str, prefix: &str) -> Option<String> {
|
|
let trimmed = href.trim().trim_end_matches('/');
|
|
let remainder = trimmed.strip_prefix(prefix)?.trim_matches('/');
|
|
if remainder.is_empty() || remainder.contains('/') {
|
|
return None;
|
|
}
|
|
Some(remainder.to_string())
|
|
}
|
|
|
|
fn push_category(target: &Arc<RwLock<Vec<FilterOption>>>, item: FilterOption) {
|
|
if item.id.is_empty() || item.title.is_empty() {
|
|
return;
|
|
}
|
|
|
|
if let Ok(mut values) = target.write() {
|
|
let normalized = Self::normalize_title(&item.title);
|
|
if !values
|
|
.iter()
|
|
.any(|value| value.id == item.id || Self::normalize_title(&value.title) == normalized)
|
|
{
|
|
values.push(item);
|
|
}
|
|
}
|
|
}
|
|
|
|
fn insert_tag_mapping(target: &Arc<RwLock<HashMap<String, String>>>, title: &str, href: &str) {
|
|
let normalized_title = Self::normalize_title(title);
|
|
if normalized_title.is_empty() || href.is_empty() {
|
|
return;
|
|
}
|
|
|
|
if let Ok(mut values) = target.write() {
|
|
values.insert(normalized_title, href.to_string());
|
|
}
|
|
}
|
|
|
|
async fn load_categories(
|
|
base_url: &str,
|
|
categories: Arc<RwLock<Vec<FilterOption>>>,
|
|
) -> Result<()> {
|
|
let mut requester = Requester::new();
|
|
let page_url = format!("{base_url}/categories/");
|
|
let html = Self::fetch_html(&mut requester, &page_url, &page_url).await?;
|
|
let document = Html::parse_document(&html);
|
|
let selector = Self::selector("a.item[href]")?;
|
|
let prefix = format!("{base_url}/categories/");
|
|
|
|
for element in document.select(&selector) {
|
|
let href = element.value().attr("href").unwrap_or_default();
|
|
let Some(_slug) = Self::slug_remainder(href, &prefix) else {
|
|
continue;
|
|
};
|
|
|
|
let title = element
|
|
.value()
|
|
.attr("title")
|
|
.map(Self::decode_html)
|
|
.filter(|value| !value.trim().is_empty())
|
|
.unwrap_or_else(|| Self::text_of(&element));
|
|
let title = title.trim().to_string();
|
|
if title.is_empty() {
|
|
continue;
|
|
}
|
|
|
|
Self::push_category(
|
|
&categories,
|
|
FilterOption {
|
|
id: format!("{}/", href.trim_end_matches('/')),
|
|
title,
|
|
},
|
|
);
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
async fn load_tags(base_url: &str, tag_map: Arc<RwLock<HashMap<String, String>>>) -> Result<()> {
|
|
let mut requester = Requester::new();
|
|
let page_url = format!("{base_url}/tags/");
|
|
let html = Self::fetch_html(&mut requester, &page_url, &page_url).await?;
|
|
let document = Html::parse_document(&html);
|
|
let selector = Self::selector("div.list-tags a[href]")?;
|
|
let prefix = format!("{base_url}/tags/");
|
|
|
|
for element in document.select(&selector) {
|
|
let href = element.value().attr("href").unwrap_or_default();
|
|
let Some(slug) = Self::slug_remainder(href, &prefix) else {
|
|
continue;
|
|
};
|
|
let title = Self::text_of(&element);
|
|
if title.is_empty() {
|
|
continue;
|
|
}
|
|
let canonical = format!("{}/", href.trim_end_matches('/'));
|
|
Self::insert_tag_mapping(&tag_map, &title, &canonical);
|
|
Self::insert_tag_mapping(&tag_map, &slug, &canonical);
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn parse_duration(text: &str) -> u32 {
|
|
parse_time_to_seconds(text)
|
|
.and_then(|value| u32::try_from(value).ok())
|
|
.unwrap_or(0)
|
|
}
|
|
|
|
fn parse_views(text: &str) -> Option<u32> {
|
|
let cleaned = text
|
|
.replace("views", "")
|
|
.replace("view", "")
|
|
.replace([',', ' '], "");
|
|
parse_abbreviated_number(cleaned.trim())
|
|
}
|
|
|
|
fn parse_rating(text: &str) -> Option<f32> {
|
|
let digits = text
|
|
.chars()
|
|
.filter(|value| value.is_ascii_digit() || *value == '.')
|
|
.collect::<String>();
|
|
digits.parse::<f32>().ok()
|
|
}
|
|
|
|
fn parse_list_videos(&self, html: &str) -> Result<Vec<VideoItem>> {
|
|
let document = Html::parse_document(html);
|
|
let card_selector = Self::selector("div.video-preview-screen.video-item.thumb-item")?;
|
|
let link_selector = Self::selector("a[href*=\"/video/\"]")?;
|
|
let title_link_selector = Self::selector("p.inf a[href*=\"/video/\"], a[title][href*=\"/video/\"]")?;
|
|
let image_selector = Self::selector("img.cover")?;
|
|
let duration_selector = Self::selector("div.durations")?;
|
|
let views_selector = Self::selector("div.viewsthumb")?;
|
|
let rating_selector = Self::selector("ul.list-unstyled li.pull-right")?;
|
|
|
|
let mut items = Vec::new();
|
|
|
|
for card in document.select(&card_selector) {
|
|
let Some(link) = card.select(&link_selector).next() else {
|
|
continue;
|
|
};
|
|
|
|
let href = link.value().attr("href").unwrap_or_default();
|
|
let page_url = self.normalize_url(href);
|
|
if page_url.is_empty() {
|
|
continue;
|
|
}
|
|
|
|
let id = card
|
|
.value()
|
|
.attr("data-item-id")
|
|
.map(str::to_string)
|
|
.filter(|value| !value.is_empty())
|
|
.unwrap_or_else(|| {
|
|
page_url
|
|
.trim_end_matches('/')
|
|
.split('/')
|
|
.nth_back(1)
|
|
.unwrap_or_default()
|
|
.to_string()
|
|
});
|
|
if id.is_empty() {
|
|
continue;
|
|
}
|
|
|
|
let image = card.select(&image_selector).next();
|
|
let thumb = image
|
|
.and_then(|value| value.value().attr("data-src").or_else(|| value.value().attr("src")))
|
|
.map(|value| self.normalize_url(value))
|
|
.unwrap_or_default();
|
|
|
|
let title = card
|
|
.select(&title_link_selector)
|
|
.next()
|
|
.or_else(|| card.select(&link_selector).find(|value| value.value().attr("title").is_some()))
|
|
.and_then(|value| value.value().attr("title").map(Self::decode_html).or_else(|| {
|
|
let text = Self::text_of(&value);
|
|
(!text.is_empty()).then_some(text)
|
|
}))
|
|
.filter(|value| !value.trim().is_empty())
|
|
.unwrap_or_else(|| {
|
|
image
|
|
.and_then(|value| value.value().attr("alt").map(Self::decode_html))
|
|
.unwrap_or_default()
|
|
});
|
|
if title.is_empty() {
|
|
continue;
|
|
}
|
|
|
|
let duration = card
|
|
.select(&duration_selector)
|
|
.next()
|
|
.map(|value| Self::parse_duration(&Self::text_of(&value)))
|
|
.unwrap_or(0);
|
|
let views = card
|
|
.select(&views_selector)
|
|
.next()
|
|
.and_then(|value| Self::parse_views(&Self::text_of(&value)));
|
|
let rating = card
|
|
.select(&rating_selector)
|
|
.next()
|
|
.and_then(|value| Self::parse_rating(&Self::text_of(&value)));
|
|
|
|
let mut item = VideoItem::new(
|
|
id,
|
|
title.trim().to_string(),
|
|
page_url,
|
|
CHANNEL_ID.to_string(),
|
|
thumb,
|
|
duration,
|
|
);
|
|
item.views = views;
|
|
item.rating = rating;
|
|
items.push(item);
|
|
}
|
|
|
|
Ok(items)
|
|
}
|
|
|
|
fn parse_format_urls(html: &str) -> Result<Vec<VideoFormat>> {
|
|
let pairs = [
|
|
("video_url", "video_url_text"),
|
|
("video_alt_url", "video_alt_url_text"),
|
|
("video_alt_url2", "video_alt_url2_text"),
|
|
("video_alt_url3", "video_alt_url3_text"),
|
|
];
|
|
let mut formats = Vec::new();
|
|
let mut seen = std::collections::HashSet::new();
|
|
|
|
for (url_key, label_key) in pairs {
|
|
let url_re = Self::regex(&format!(r#"{url_key}:\s*'([^']+)'"#))?;
|
|
let label_re = Self::regex(&format!(r#"{label_key}:\s*'([^']*)'"#))?;
|
|
|
|
let Some(url_match) = url_re.captures(html).and_then(|value| value.get(1)) else {
|
|
continue;
|
|
};
|
|
let url = url_match.as_str().replace("\\/", "/");
|
|
if !seen.insert(url.clone()) {
|
|
continue;
|
|
}
|
|
|
|
let label = label_re
|
|
.captures(html)
|
|
.and_then(|value| value.get(1))
|
|
.map(|value| value.as_str().trim().to_string())
|
|
.filter(|value| !value.is_empty())
|
|
.unwrap_or_else(|| "mp4".to_string());
|
|
let normalized_label = label.replace(" HD", "").replace(" FHD", "").trim().to_string();
|
|
let format = VideoFormat::new(url, normalized_label.clone(), "mp4".to_string())
|
|
.format_id(normalized_label.clone())
|
|
.format_note(label);
|
|
formats.push(format);
|
|
}
|
|
|
|
Ok(formats)
|
|
}
|
|
|
|
fn parse_aspect_ratio(html: &str) -> Result<Option<f32>> {
|
|
let width_re = Self::regex(r#"player_width:\s*'([0-9.]+)'"#)?;
|
|
let height_re = Self::regex(r#"player_height:\s*'([0-9.]+)'"#)?;
|
|
let Some(width) = width_re
|
|
.captures(html)
|
|
.and_then(|value| value.get(1))
|
|
.and_then(|value| value.as_str().parse::<f32>().ok())
|
|
else {
|
|
return Ok(None);
|
|
};
|
|
let Some(height) = height_re
|
|
.captures(html)
|
|
.and_then(|value| value.get(1))
|
|
.and_then(|value| value.as_str().parse::<f32>().ok())
|
|
else {
|
|
return Ok(None);
|
|
};
|
|
|
|
Ok((height > 0.0).then_some(width / height))
|
|
}
|
|
|
|
fn collect_tags(document: &Html, html: &str) -> Result<Vec<String>> {
|
|
let category_selector = Self::selector("div.items-holder.js-categories a[href*=\"/categories/\"]")?;
|
|
let tag_selector = Self::selector("div.item a[href*=\"/tags/\"]")?;
|
|
let mut values = Vec::new();
|
|
|
|
for element in document.select(&category_selector) {
|
|
let value = Self::text_of(&element);
|
|
if !value.is_empty() {
|
|
values.push(value);
|
|
}
|
|
}
|
|
|
|
for element in document.select(&tag_selector) {
|
|
let value = Self::text_of(&element);
|
|
if !value.is_empty() {
|
|
values.push(value);
|
|
}
|
|
}
|
|
|
|
for pattern in [r#"video_categories:\s*'([^']*)'"#, r#"video_tags:\s*'([^']*)'"#] {
|
|
let re = Self::regex(pattern)?;
|
|
if let Some(raw) = re.captures(html).and_then(|value| value.get(1)) {
|
|
for entry in raw
|
|
.as_str()
|
|
.split(',')
|
|
.map(str::trim)
|
|
.map(Self::decode_html)
|
|
.filter(|value| !value.is_empty())
|
|
{
|
|
values.push(entry);
|
|
}
|
|
}
|
|
}
|
|
|
|
let mut unique = Vec::new();
|
|
for value in values {
|
|
let normalized = Self::normalize_title(&value);
|
|
if normalized.is_empty() || normalized == "-" {
|
|
continue;
|
|
}
|
|
if !unique
|
|
.iter()
|
|
.any(|existing: &String| Self::normalize_title(existing) == normalized)
|
|
{
|
|
unique.push(value);
|
|
}
|
|
}
|
|
|
|
Ok(unique)
|
|
}
|
|
|
|
fn apply_detail_video(&self, mut item: VideoItem, html: &str) -> Result<VideoItem> {
|
|
let document = Html::parse_document(html);
|
|
let title_selector = Self::selector("h1")?;
|
|
let uploader_selector =
|
|
Self::selector("div.info-block div.block-user div.username a[href*=\"/members/\"]")?;
|
|
let stat_selector = Self::selector("div.info-block div.item span")?;
|
|
|
|
if let Some(title) = document
|
|
.select(&title_selector)
|
|
.next()
|
|
.map(|value| Self::text_of(&value))
|
|
.filter(|value| !value.is_empty())
|
|
{
|
|
item.title = title;
|
|
}
|
|
|
|
if let Some(uploader) = document.select(&uploader_selector).next() {
|
|
let uploader_name = Self::text_of(&uploader);
|
|
let uploader_url = uploader
|
|
.value()
|
|
.attr("href")
|
|
.map(|value| self.normalize_url(value))
|
|
.unwrap_or_default();
|
|
if !uploader_name.is_empty() {
|
|
item.uploader = Some(uploader_name);
|
|
}
|
|
if !uploader_url.is_empty() {
|
|
let uploader_id = uploader_url
|
|
.trim_end_matches('/')
|
|
.split('/')
|
|
.next_back()
|
|
.unwrap_or_default()
|
|
.to_string();
|
|
item.uploaderUrl = Some(uploader_url);
|
|
if !uploader_id.is_empty() {
|
|
item.uploaderId = Some(format!("{CHANNEL_ID}:{uploader_id}"));
|
|
}
|
|
}
|
|
}
|
|
|
|
for stat in document.select(&stat_selector).map(|value| Self::text_of(&value)) {
|
|
if item.views.is_none() {
|
|
item.views = Self::parse_views(&stat);
|
|
}
|
|
if item.duration == 0 {
|
|
let duration = Self::parse_duration(&stat);
|
|
if duration > 0 {
|
|
item.duration = duration;
|
|
}
|
|
}
|
|
}
|
|
|
|
let tags = Self::collect_tags(&document, html)?;
|
|
if !tags.is_empty() {
|
|
item.tags = Some(tags);
|
|
}
|
|
|
|
// let formats = Self::parse_format_urls(html)?;
|
|
// if !formats.is_empty() {
|
|
// item.formats = Some(formats);
|
|
// }
|
|
|
|
if item.aspectRatio.is_none() {
|
|
item.aspectRatio = Self::parse_aspect_ratio(html)?;
|
|
}
|
|
|
|
Ok(item)
|
|
}
|
|
|
|
async fn enrich_item(&self, item: VideoItem, options: &ServerOptions) -> VideoItem {
|
|
let mut requester =
|
|
requester_or_default(options, CHANNEL_ID, "porntrex.enrich_item.missing_requester");
|
|
|
|
match Self::fetch_html(&mut requester, &item.url, &item.url).await {
|
|
Ok(html) => match self.apply_detail_video(item.clone(), &html) {
|
|
Ok(value) => value,
|
|
Err(error) => {
|
|
report_provider_error_background(
|
|
CHANNEL_ID,
|
|
"enrich_item.apply_detail_video",
|
|
&format!("url={}; error={error}", item.url),
|
|
);
|
|
item
|
|
}
|
|
},
|
|
Err(error) => {
|
|
report_provider_error_background(
|
|
CHANNEL_ID,
|
|
"enrich_item.fetch_html",
|
|
&format!("url={}; error={error}", item.url),
|
|
);
|
|
item
|
|
}
|
|
}
|
|
}
|
|
|
|
fn resolve_sort_target(sort: &str) -> Target {
|
|
match sort.trim().to_ascii_lowercase().as_str() {
|
|
"popular" | "viewed" | "most_viewed" => Target::Popular,
|
|
"rated" | "rating" | "top" => Target::TopRated,
|
|
_ => Target::Latest,
|
|
}
|
|
}
|
|
|
|
fn resolve_option_target(&self, options: &ServerOptions, sort: &str) -> Target {
|
|
if let Some(category) = options.categories.as_deref() {
|
|
if category.starts_with(&self.url) && category != "all" {
|
|
return Self::archive_target(category.to_string(), PageMode::SitePaged);
|
|
}
|
|
}
|
|
Self::resolve_sort_target(sort)
|
|
}
|
|
|
|
fn lookup_category_target(&self, query: &str) -> Option<String> {
|
|
let normalized_query = Self::normalize_title(query);
|
|
self.categories
|
|
.read()
|
|
.ok()?
|
|
.iter()
|
|
.find(|value| value.id != "all" && Self::normalize_title(&value.title) == normalized_query)
|
|
.map(|value| value.id.clone())
|
|
}
|
|
|
|
fn resolve_query_target(&self, query: &str) -> Target {
|
|
let trimmed = query.trim().trim_start_matches('@');
|
|
if let Some((kind, raw_value)) = trimmed.split_once(':') {
|
|
let value = raw_value.trim().trim_matches('/').replace(' ', "-");
|
|
if !value.is_empty() {
|
|
match kind.trim().to_ascii_lowercase().as_str() {
|
|
"tag" | "tags" => {
|
|
return Self::archive_target(
|
|
format!("{}/tags/{value}/", self.url),
|
|
PageMode::LocalSlice,
|
|
);
|
|
}
|
|
"category" | "categories" => {
|
|
return Self::archive_target(
|
|
format!("{}/categories/{value}/", self.url),
|
|
PageMode::SitePaged,
|
|
);
|
|
}
|
|
_ => {}
|
|
}
|
|
}
|
|
}
|
|
|
|
if let Some(category) = self.lookup_category_target(trimmed) {
|
|
return Self::archive_target(category, PageMode::SitePaged);
|
|
}
|
|
|
|
let normalized = Self::normalize_title(trimmed);
|
|
if let Some(target) = self
|
|
.tag_map
|
|
.read()
|
|
.ok()
|
|
.and_then(|value| value.get(&normalized).cloned())
|
|
{
|
|
return Self::archive_target(target, PageMode::LocalSlice);
|
|
}
|
|
|
|
Target::Search(trimmed.to_string())
|
|
}
|
|
|
|
fn catalogs_need_refresh(&self) -> bool {
|
|
let categories_len = self
|
|
.categories
|
|
.read()
|
|
.map(|value| value.len())
|
|
.unwrap_or_default();
|
|
let tag_count = self
|
|
.tag_map
|
|
.read()
|
|
.map(|value| value.len())
|
|
.unwrap_or_default();
|
|
categories_len <= 1 || tag_count == 0
|
|
}
|
|
|
|
async fn refresh_catalogs(&self) {
|
|
if let Err(error) = Self::load_categories(&self.url, Arc::clone(&self.categories)).await {
|
|
report_provider_error_background(
|
|
CHANNEL_ID,
|
|
"refresh_catalogs.categories",
|
|
&error.to_string(),
|
|
);
|
|
}
|
|
if let Err(error) = Self::load_tags(&self.url, Arc::clone(&self.tag_map)).await {
|
|
report_provider_error_background(
|
|
CHANNEL_ID,
|
|
"refresh_catalogs.tags",
|
|
&error.to_string(),
|
|
);
|
|
}
|
|
}
|
|
|
|
async fn fetch_target(
|
|
&self,
|
|
cache: VideoCache,
|
|
target: Target,
|
|
page: u16,
|
|
per_page_limit: usize,
|
|
options: ServerOptions,
|
|
) -> Result<Vec<VideoItem>> {
|
|
let page_mode = match &target {
|
|
Target::Archive { page_mode, .. } => *page_mode,
|
|
_ => PageMode::SitePaged,
|
|
};
|
|
let source_url = self.build_target_url(&target, page);
|
|
let cache_key = match page_mode {
|
|
PageMode::SitePaged => source_url.clone(),
|
|
PageMode::LocalSlice => format!("{source_url}#page={page}&per_page={per_page_limit}"),
|
|
};
|
|
let old_items = match cache.get(&cache_key) {
|
|
Some((time, items)) => {
|
|
if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 {
|
|
return Ok(items.clone());
|
|
}
|
|
items.clone()
|
|
}
|
|
None => vec![],
|
|
};
|
|
|
|
let mut requester =
|
|
requester_or_default(&options, CHANNEL_ID, "porntrex.fetch_target.missing_requester");
|
|
let html = match Self::fetch_html(&mut requester, &source_url, &source_url).await {
|
|
Ok(value) => value,
|
|
Err(error) => {
|
|
report_provider_error(
|
|
CHANNEL_ID,
|
|
"fetch_target.request",
|
|
&format!("url={source_url}; error={error}"),
|
|
)
|
|
.await;
|
|
return Ok(old_items);
|
|
}
|
|
};
|
|
|
|
if html.trim().is_empty() {
|
|
report_provider_error(
|
|
CHANNEL_ID,
|
|
"fetch_target.empty_response",
|
|
&format!("url={source_url}"),
|
|
)
|
|
.await;
|
|
return Ok(old_items);
|
|
}
|
|
|
|
let items = self.parse_list_videos(&html)?;
|
|
if items.is_empty() {
|
|
return Ok(old_items);
|
|
}
|
|
|
|
let limited_items = match page_mode {
|
|
PageMode::SitePaged => items
|
|
.into_iter()
|
|
.take(per_page_limit.max(1))
|
|
.collect::<Vec<_>>(),
|
|
PageMode::LocalSlice => {
|
|
let start = page.saturating_sub(1) as usize * per_page_limit.max(1);
|
|
items.into_iter()
|
|
.skip(start)
|
|
.take(per_page_limit.max(1))
|
|
.collect::<Vec<_>>()
|
|
}
|
|
};
|
|
|
|
if limited_items.is_empty() {
|
|
cache.insert(cache_key, vec![]);
|
|
return Ok(vec![]);
|
|
}
|
|
|
|
let enriched = stream::iter(limited_items.into_iter().map(|item| {
|
|
let provider = self.clone();
|
|
let options = options.clone();
|
|
async move { provider.enrich_item(item, &options).await }
|
|
}))
|
|
.buffer_unordered(4)
|
|
.collect::<Vec<_>>()
|
|
.await;
|
|
|
|
cache.remove(&cache_key);
|
|
cache.insert(cache_key, enriched.clone());
|
|
Ok(enriched)
|
|
}
|
|
}
|
|
|
|
#[async_trait]
|
|
impl Provider for PorntrexProvider {
|
|
async fn get_videos(
|
|
&self,
|
|
cache: VideoCache,
|
|
_pool: DbPool,
|
|
sort: String,
|
|
query: Option<String>,
|
|
page: String,
|
|
per_page: String,
|
|
options: ServerOptions,
|
|
) -> Vec<VideoItem> {
|
|
let page = page.parse::<u16>().unwrap_or(1).max(1);
|
|
let per_page_limit = per_page.parse::<usize>().unwrap_or(10).clamp(1, 60);
|
|
|
|
let target = match query {
|
|
Some(query) if !query.trim().is_empty() => {
|
|
let query = query.trim();
|
|
let mut target = self.resolve_query_target(query);
|
|
if matches!(target, Target::Search(_)) && self.catalogs_need_refresh() {
|
|
self.refresh_catalogs().await;
|
|
target = self.resolve_query_target(query);
|
|
}
|
|
target
|
|
}
|
|
_ => self.resolve_option_target(&options, &sort),
|
|
};
|
|
|
|
match self
|
|
.fetch_target(cache, target, page, per_page_limit, options.clone())
|
|
.await
|
|
{
|
|
Ok(items) => items,
|
|
Err(error) => {
|
|
report_provider_error(
|
|
CHANNEL_ID,
|
|
"get_videos.fetch_target",
|
|
&format!("sort={sort}; page={page}; error={error}"),
|
|
)
|
|
.await;
|
|
vec![]
|
|
}
|
|
}
|
|
}
|
|
|
|
fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
|
|
Some(self.build_channel(clientversion))
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn builds_search_url() {
|
|
let provider = PorntrexProvider::new();
|
|
assert_eq!(
|
|
provider.build_target_url(&Target::Search("adriana chechik".to_string()), 2),
|
|
"https://www.porntrex.com/search/adriana-chechik/2/"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn resolves_tag_prefix() {
|
|
let provider = PorntrexProvider::new();
|
|
match provider.resolve_query_target("tag:blowjob") {
|
|
Target::Archive { url, page_mode } => {
|
|
assert_eq!(url, "https://www.porntrex.com/tags/blowjob/");
|
|
assert_eq!(page_mode, PageMode::LocalSlice);
|
|
}
|
|
_ => panic!("expected archive target"),
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn builds_local_slice_archive_url_without_numeric_page() {
|
|
let provider = PorntrexProvider::new();
|
|
let target = PorntrexProvider::archive_target(
|
|
"https://www.porntrex.com/tags/anal-creampie/".to_string(),
|
|
PageMode::LocalSlice,
|
|
);
|
|
assert_eq!(
|
|
provider.build_target_url(&target, 3),
|
|
"https://www.porntrex.com/tags/anal-creampie/"
|
|
);
|
|
}
|
|
}
|