1249 lines
44 KiB
Rust
1249 lines
44 KiB
Rust
use crate::DbPool;
|
|
use crate::api::ClientVersion;
|
|
use crate::providers::{
|
|
Provider, report_provider_error, report_provider_error_background, requester_or_default,
|
|
};
|
|
use crate::status::*;
|
|
use crate::util::cache::VideoCache;
|
|
use crate::util::parse_abbreviated_number;
|
|
use crate::util::requester::Requester;
|
|
use crate::util::time::parse_time_to_seconds;
|
|
use crate::videos::{ServerOptions, VideoFormat, VideoItem};
|
|
use async_trait::async_trait;
|
|
use chrono::{DateTime, Utc};
|
|
use error_chain::error_chain;
|
|
use futures::stream::{self, StreamExt};
|
|
use htmlentity::entity::{ICodedDataTrait, decode};
|
|
use regex::Regex;
|
|
use scraper::{ElementRef, Html, Selector};
|
|
use std::sync::{Arc, RwLock};
|
|
use std::{thread, vec};
|
|
use url::Url;
|
|
use wreq::Version;
|
|
|
|
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
|
|
crate::providers::ProviderChannelMetadata {
|
|
group_id: "mainstream-tube",
|
|
tags: &["tube", "studios", "actors"],
|
|
};
|
|
|
|
const BASE_URL: &str = "https://pornmz.com";
|
|
const CHANNEL_ID: &str = "pornmz";
|
|
const FIREFOX_UA: &str =
|
|
"Mozilla/5.0 (X11; Linux x86_64; rv:147.0) Gecko/20100101 Firefox/147.0";
|
|
const HTML_ACCEPT: &str =
|
|
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8";
|
|
|
|
error_chain! {
|
|
foreign_links {
|
|
Io(std::io::Error);
|
|
}
|
|
errors {
|
|
Parse(msg: String) {
|
|
description("parse error")
|
|
display("parse error: {}", msg)
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct PornmzProvider {
|
|
url: String,
|
|
categories: Arc<RwLock<Vec<FilterOption>>>,
|
|
tags: Arc<RwLock<Vec<FilterOption>>>,
|
|
uploaders: Arc<RwLock<Vec<FilterOption>>>,
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
enum Target {
|
|
Home,
|
|
Search(String),
|
|
Archive(String),
|
|
}
|
|
|
|
impl PornmzProvider {
|
|
pub fn new() -> Self {
|
|
let provider = Self {
|
|
url: BASE_URL.to_string(),
|
|
categories: Arc::new(RwLock::new(vec![FilterOption {
|
|
id: "all".to_string(),
|
|
title: "All".to_string(),
|
|
}])),
|
|
tags: Arc::new(RwLock::new(vec![FilterOption {
|
|
id: "all".to_string(),
|
|
title: "All".to_string(),
|
|
}])),
|
|
uploaders: Arc::new(RwLock::new(vec![FilterOption {
|
|
id: "all".to_string(),
|
|
title: "All".to_string(),
|
|
}])),
|
|
};
|
|
provider.spawn_initial_load();
|
|
provider
|
|
}
|
|
|
|
fn spawn_initial_load(&self) {
|
|
let url = self.url.clone();
|
|
let categories = Arc::clone(&self.categories);
|
|
let tags = Arc::clone(&self.tags);
|
|
let uploaders = Arc::clone(&self.uploaders);
|
|
|
|
thread::spawn(move || {
|
|
let runtime = match tokio::runtime::Builder::new_current_thread()
|
|
.enable_all()
|
|
.build()
|
|
{
|
|
Ok(runtime) => runtime,
|
|
Err(error) => {
|
|
report_provider_error_background(
|
|
CHANNEL_ID,
|
|
"spawn_initial_load.runtime_build",
|
|
&error.to_string(),
|
|
);
|
|
return;
|
|
}
|
|
};
|
|
|
|
runtime.block_on(async move {
|
|
if let Err(error) = Self::load_categories(&url, Arc::clone(&categories)).await {
|
|
report_provider_error_background(
|
|
CHANNEL_ID,
|
|
"load_categories",
|
|
&error.to_string(),
|
|
);
|
|
}
|
|
if let Err(error) = Self::load_tags(&url, Arc::clone(&tags)).await {
|
|
report_provider_error_background(CHANNEL_ID, "load_tags", &error.to_string());
|
|
}
|
|
if let Err(error) = Self::load_uploaders(&url, Arc::clone(&uploaders)).await {
|
|
report_provider_error_background(
|
|
CHANNEL_ID,
|
|
"load_uploaders",
|
|
&error.to_string(),
|
|
);
|
|
}
|
|
});
|
|
});
|
|
}
|
|
|
|
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
|
|
let categories = self
|
|
.categories
|
|
.read()
|
|
.map(|value| value.clone())
|
|
.unwrap_or_default();
|
|
let tags = self.tags.read().map(|value| value.clone()).unwrap_or_default();
|
|
let uploaders = self
|
|
.uploaders
|
|
.read()
|
|
.map(|value| value.clone())
|
|
.unwrap_or_default();
|
|
|
|
Channel {
|
|
id: CHANNEL_ID.to_string(),
|
|
name: "Pornmz".to_string(),
|
|
description:
|
|
"Pornmz videos with latest, best, most-viewed, longest, random, category, tag, and actor archives."
|
|
.to_string(),
|
|
premium: false,
|
|
favicon: "https://www.google.com/s2/favicons?sz=64&domain=pornmz.com".to_string(),
|
|
status: "active".to_string(),
|
|
categories: categories.iter().map(|value| value.title.clone()).collect(),
|
|
options: vec![
|
|
ChannelOption {
|
|
id: "sort".to_string(),
|
|
title: "Sort".to_string(),
|
|
description: "Browse Pornmz archives by ranking.".to_string(),
|
|
systemImage: "list.number".to_string(),
|
|
colorName: "blue".to_string(),
|
|
options: vec![
|
|
FilterOption {
|
|
id: "latest".to_string(),
|
|
title: "Newest".to_string(),
|
|
},
|
|
FilterOption {
|
|
id: "popular".to_string(),
|
|
title: "Best".to_string(),
|
|
},
|
|
FilterOption {
|
|
id: "most-viewed".to_string(),
|
|
title: "Most Viewed".to_string(),
|
|
},
|
|
FilterOption {
|
|
id: "longest".to_string(),
|
|
title: "Longest".to_string(),
|
|
},
|
|
FilterOption {
|
|
id: "random".to_string(),
|
|
title: "Random".to_string(),
|
|
},
|
|
],
|
|
multiSelect: false,
|
|
},
|
|
ChannelOption {
|
|
id: "categories".to_string(),
|
|
title: "Categories".to_string(),
|
|
description: "Browse a Pornmz category archive.".to_string(),
|
|
systemImage: "square.grid.2x2".to_string(),
|
|
colorName: "orange".to_string(),
|
|
options: categories,
|
|
multiSelect: false,
|
|
},
|
|
ChannelOption {
|
|
id: "filter".to_string(),
|
|
title: "Tags".to_string(),
|
|
description: "Browse a Pornmz tag archive.".to_string(),
|
|
systemImage: "tag.fill".to_string(),
|
|
colorName: "green".to_string(),
|
|
options: tags,
|
|
multiSelect: false,
|
|
},
|
|
ChannelOption {
|
|
id: "sites".to_string(),
|
|
title: "Actors".to_string(),
|
|
description: "Browse a Pornmz actor archive.".to_string(),
|
|
systemImage: "person.crop.square".to_string(),
|
|
colorName: "purple".to_string(),
|
|
options: uploaders,
|
|
multiSelect: false,
|
|
},
|
|
],
|
|
nsfw: true,
|
|
cacheDuration: Some(1800),
|
|
}
|
|
}
|
|
|
|
fn selector(value: &str) -> Result<Selector> {
|
|
Selector::parse(value)
|
|
.map_err(|error| Error::from(format!("selector `{value}` parse failed: {error}")))
|
|
}
|
|
|
|
fn regex(value: &str) -> Result<Regex> {
|
|
Regex::new(value).map_err(|error| Error::from(format!("regex `{value}` failed: {error}")))
|
|
}
|
|
|
|
fn decode_html(text: &str) -> String {
|
|
decode(text.as_bytes())
|
|
.to_string()
|
|
.unwrap_or_else(|_| text.to_string())
|
|
}
|
|
|
|
fn collapse_whitespace(text: &str) -> String {
|
|
text.split_whitespace().collect::<Vec<_>>().join(" ")
|
|
}
|
|
|
|
fn text_of(element: &ElementRef<'_>) -> String {
|
|
Self::decode_html(&Self::collapse_whitespace(
|
|
&element.text().collect::<Vec<_>>().join(" "),
|
|
))
|
|
}
|
|
|
|
fn normalize_title(title: &str) -> String {
|
|
title
|
|
.trim()
|
|
.trim_start_matches('#')
|
|
.split_whitespace()
|
|
.collect::<Vec<_>>()
|
|
.join(" ")
|
|
.to_ascii_lowercase()
|
|
}
|
|
|
|
fn normalize_url(&self, url: &str) -> String {
|
|
if url.is_empty() {
|
|
return String::new();
|
|
}
|
|
if url.starts_with("http://") || url.starts_with("https://") {
|
|
return url.to_string();
|
|
}
|
|
if url.starts_with("//") {
|
|
return format!("https:{url}");
|
|
}
|
|
if url.starts_with('?') {
|
|
return format!("{}{url}", self.url);
|
|
}
|
|
if url.starts_with('/') {
|
|
return format!("{}{}", self.url, url);
|
|
}
|
|
format!("{}/{}", self.url, url.trim_start_matches("./"))
|
|
}
|
|
|
|
fn html_headers(referer: &str) -> Vec<(String, String)> {
|
|
vec![
|
|
("User-Agent".to_string(), FIREFOX_UA.to_string()),
|
|
("Accept".to_string(), HTML_ACCEPT.to_string()),
|
|
("Referer".to_string(), referer.to_string()),
|
|
]
|
|
}
|
|
|
|
async fn fetch_html(requester: &mut Requester, url: &str, referer: &str) -> Result<String> {
|
|
requester
|
|
.get_with_headers(url, Self::html_headers(referer), Some(Version::HTTP_11))
|
|
.await
|
|
.map_err(|error| Error::from(format!("request failed for {url}: {error}")))
|
|
}
|
|
|
|
fn discover_last_page(html: &str) -> u16 {
|
|
let Ok(re) = Self::regex(r#"/page/([0-9]+)"#) else {
|
|
return 1;
|
|
};
|
|
re.captures_iter(html)
|
|
.filter_map(|caps| caps.get(1).and_then(|value| value.as_str().parse::<u16>().ok()))
|
|
.max()
|
|
.unwrap_or(1)
|
|
}
|
|
|
|
fn push_unique(target: &Arc<RwLock<Vec<FilterOption>>>, item: FilterOption) {
|
|
if item.id.is_empty() || item.title.is_empty() {
|
|
return;
|
|
}
|
|
let normalized = Self::normalize_title(&item.title);
|
|
if normalized.is_empty() {
|
|
return;
|
|
}
|
|
|
|
if let Ok(mut values) = target.write() {
|
|
if values.iter().any(|value| {
|
|
value.id == item.id || Self::normalize_title(&value.title) == normalized
|
|
}) {
|
|
return;
|
|
}
|
|
values.push(item);
|
|
}
|
|
}
|
|
|
|
fn clean_filter_title(title: &str) -> String {
|
|
let mut parts = Self::decode_html(title)
|
|
.trim()
|
|
.trim_start_matches('#')
|
|
.split_whitespace()
|
|
.map(ToOwned::to_owned)
|
|
.collect::<Vec<_>>();
|
|
|
|
if parts.len() > 1
|
|
&& parts
|
|
.last()
|
|
.is_some_and(|value| value.chars().all(|ch| ch.is_ascii_digit()))
|
|
{
|
|
parts.pop();
|
|
}
|
|
|
|
parts.join(" ").trim().to_string()
|
|
}
|
|
|
|
fn humanize_slug(slug: &str) -> String {
|
|
slug.split('-')
|
|
.filter(|part| !part.is_empty())
|
|
.map(|part| {
|
|
if part.chars().all(|ch| ch.is_ascii_digit()) {
|
|
return part.to_string();
|
|
}
|
|
let mut chars = part.chars();
|
|
match chars.next() {
|
|
Some(first) => {
|
|
let mut value = first.to_uppercase().collect::<String>();
|
|
value.push_str(chars.as_str());
|
|
value
|
|
}
|
|
None => String::new(),
|
|
}
|
|
})
|
|
.collect::<Vec<_>>()
|
|
.join(" ")
|
|
}
|
|
|
|
fn merge_tag(target: &mut Vec<String>, value: String) {
|
|
let normalized = Self::normalize_title(&value);
|
|
if normalized.is_empty() {
|
|
return;
|
|
}
|
|
if target
|
|
.iter()
|
|
.any(|existing| Self::normalize_title(existing) == normalized)
|
|
{
|
|
return;
|
|
}
|
|
target.push(value);
|
|
}
|
|
|
|
fn tags_from_card(card: &ElementRef<'_>) -> Vec<String> {
|
|
let mut tags = Vec::new();
|
|
for class_name in card.value().classes() {
|
|
let slug = class_name
|
|
.strip_prefix("tag-")
|
|
.or_else(|| class_name.strip_prefix("category-"))
|
|
.or_else(|| class_name.strip_prefix("actors-"));
|
|
let Some(slug) = slug else {
|
|
continue;
|
|
};
|
|
if slug.chars().all(|ch| ch.is_ascii_digit()) {
|
|
continue;
|
|
}
|
|
Self::merge_tag(&mut tags, Self::humanize_slug(slug));
|
|
}
|
|
tags
|
|
}
|
|
|
|
async fn load_categories(base_url: &str, categories: Arc<RwLock<Vec<FilterOption>>>) -> Result<()> {
|
|
let mut requester = Requester::new();
|
|
let first_url = format!("{base_url}/categories");
|
|
let first_html = Self::fetch_html(&mut requester, &first_url, base_url).await?;
|
|
let max_pages = Self::discover_last_page(&first_html).max(1);
|
|
|
|
for page in 1..=max_pages {
|
|
let url = if page == 1 {
|
|
first_url.clone()
|
|
} else {
|
|
format!("{base_url}/categories/page/{page}")
|
|
};
|
|
let html = if page == 1 {
|
|
first_html.clone()
|
|
} else {
|
|
Self::fetch_html(&mut requester, &url, base_url).await?
|
|
};
|
|
let document = Html::parse_document(&html);
|
|
let selector = Self::selector("a[href*=\"/pmvideo/c/\"]")?;
|
|
for element in document.select(&selector) {
|
|
let href = element.value().attr("href").unwrap_or_default();
|
|
let title = Self::clean_filter_title(&Self::text_of(&element));
|
|
if title.is_empty() {
|
|
continue;
|
|
}
|
|
let normalized = if href.starts_with("http") {
|
|
href.to_string()
|
|
} else {
|
|
format!(
|
|
"{base_url}/{}",
|
|
href.trim_start_matches('/').trim_end_matches('/')
|
|
)
|
|
};
|
|
Self::push_unique(
|
|
&categories,
|
|
FilterOption {
|
|
id: normalized,
|
|
title,
|
|
},
|
|
);
|
|
}
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
async fn load_tags(base_url: &str, tags: Arc<RwLock<Vec<FilterOption>>>) -> Result<()> {
|
|
let mut requester = Requester::new();
|
|
let html = Self::fetch_html(&mut requester, &format!("{base_url}/tags"), base_url).await?;
|
|
let document = Html::parse_document(&html);
|
|
let selector = Self::selector("a[href*=\"/pmvideo/s/\"]")?;
|
|
|
|
for element in document.select(&selector) {
|
|
let href = element.value().attr("href").unwrap_or_default();
|
|
let title = Self::clean_filter_title(&Self::text_of(&element));
|
|
if title.is_empty() {
|
|
continue;
|
|
}
|
|
let normalized = if href.starts_with("http") {
|
|
href.to_string()
|
|
} else {
|
|
format!(
|
|
"{base_url}/{}",
|
|
href.trim_start_matches('/').trim_end_matches('/')
|
|
)
|
|
};
|
|
Self::push_unique(
|
|
&tags,
|
|
FilterOption {
|
|
id: normalized,
|
|
title,
|
|
},
|
|
);
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn canonical_actor_url(base_url: &str, href: &str) -> String {
|
|
if let Ok(url) = Url::parse(href) {
|
|
if let Some((_, slug)) = url.query_pairs().find(|(key, _)| key == "actors") {
|
|
return format!("{base_url}/video/id=pmactor/{}", slug.trim());
|
|
}
|
|
}
|
|
if let Some(slug) = href.split("actors=").nth(1) {
|
|
return format!("{base_url}/video/id=pmactor/{}", slug.trim());
|
|
}
|
|
if href.starts_with("http://") || href.starts_with("https://") {
|
|
return href.to_string();
|
|
}
|
|
if href.starts_with('/') {
|
|
return format!("{base_url}{href}");
|
|
}
|
|
format!("{base_url}/{}", href.trim_start_matches("./"))
|
|
}
|
|
|
|
async fn load_uploaders(base_url: &str, uploaders: Arc<RwLock<Vec<FilterOption>>>) -> Result<()> {
|
|
let mut requester = Requester::new();
|
|
let first_url = format!("{base_url}/actors");
|
|
let first_html = Self::fetch_html(&mut requester, &first_url, base_url).await?;
|
|
let max_pages = Self::discover_last_page(&first_html).max(1);
|
|
|
|
for page in 1..=max_pages {
|
|
let url = if page == 1 {
|
|
first_url.clone()
|
|
} else {
|
|
format!("{base_url}/actors/page/{page}")
|
|
};
|
|
let html = if page == 1 {
|
|
first_html.clone()
|
|
} else {
|
|
Self::fetch_html(&mut requester, &url, base_url).await?
|
|
};
|
|
let document = Html::parse_document(&html);
|
|
let selector = Self::selector("article.thumb-block a[href*=\"actors=\"]")?;
|
|
|
|
for element in document.select(&selector) {
|
|
let href = element.value().attr("href").unwrap_or_default();
|
|
let title = element
|
|
.value()
|
|
.attr("title")
|
|
.map(Self::decode_html)
|
|
.filter(|value| !value.is_empty())
|
|
.unwrap_or_else(|| Self::text_of(&element));
|
|
if title.is_empty() {
|
|
continue;
|
|
}
|
|
Self::push_unique(
|
|
&uploaders,
|
|
FilterOption {
|
|
id: Self::canonical_actor_url(base_url, href),
|
|
title,
|
|
},
|
|
);
|
|
}
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn search_filters_need_refresh(&self) -> bool {
|
|
let categories_len = self
|
|
.categories
|
|
.read()
|
|
.map(|values| values.len())
|
|
.unwrap_or_default();
|
|
let tags_len = self.tags.read().map(|values| values.len()).unwrap_or_default();
|
|
|
|
categories_len <= 1 || tags_len <= 1
|
|
}
|
|
|
|
async fn refresh_search_filters(&self) {
|
|
if let Err(error) = Self::load_categories(&self.url, Arc::clone(&self.categories)).await {
|
|
report_provider_error_background(
|
|
CHANNEL_ID,
|
|
"refresh_search_filters.categories",
|
|
&error.to_string(),
|
|
);
|
|
}
|
|
if let Err(error) = Self::load_tags(&self.url, Arc::clone(&self.tags)).await {
|
|
report_provider_error_background(
|
|
CHANNEL_ID,
|
|
"refresh_search_filters.tags",
|
|
&error.to_string(),
|
|
);
|
|
}
|
|
}
|
|
|
|
fn match_filter(options: &[FilterOption], query: &str) -> Option<String> {
|
|
let normalized_query = Self::normalize_title(query);
|
|
options
|
|
.iter()
|
|
.find(|value| {
|
|
value.id != "all" && Self::normalize_title(&value.title) == normalized_query
|
|
})
|
|
.map(|value| value.id.clone())
|
|
}
|
|
|
|
fn resolve_option_target(&self, options: &ServerOptions) -> Target {
|
|
if let Some(actor) = options.sites.as_deref() {
|
|
if actor.starts_with(&self.url) && actor != "all" {
|
|
return Target::Archive(actor.to_string());
|
|
}
|
|
}
|
|
if let Some(tag) = options.filter.as_deref() {
|
|
if tag.starts_with(&self.url) && tag != "all" {
|
|
return Target::Archive(tag.to_string());
|
|
}
|
|
}
|
|
if let Some(category) = options.categories.as_deref() {
|
|
if category.starts_with(&self.url) && category != "all" {
|
|
return Target::Archive(category.to_string());
|
|
}
|
|
}
|
|
Target::Home
|
|
}
|
|
|
|
fn resolve_query_target(&self, query: &str) -> Target {
|
|
if let Ok(uploaders) = self.uploaders.read() {
|
|
if let Some(value) = Self::match_filter(&uploaders, query) {
|
|
return Target::Archive(value);
|
|
}
|
|
}
|
|
if let Ok(tags) = self.tags.read() {
|
|
if let Some(value) = Self::match_filter(&tags, query) {
|
|
return Target::Archive(value);
|
|
}
|
|
}
|
|
if let Ok(categories) = self.categories.read() {
|
|
if let Some(value) = Self::match_filter(&categories, query) {
|
|
return Target::Archive(value);
|
|
}
|
|
}
|
|
Target::Search(query.to_string())
|
|
}
|
|
|
|
fn slugify_query(query: &str) -> String {
|
|
let mut slug = String::new();
|
|
let mut last_dash = false;
|
|
for ch in query.chars().flat_map(|ch| ch.to_lowercase()) {
|
|
if ch.is_ascii_alphanumeric() {
|
|
slug.push(ch);
|
|
last_dash = false;
|
|
} else if !last_dash {
|
|
slug.push('-');
|
|
last_dash = true;
|
|
}
|
|
}
|
|
slug.trim_matches('-').to_string()
|
|
}
|
|
|
|
async fn guess_actor_archive(
|
|
&self,
|
|
query: &str,
|
|
options: &ServerOptions,
|
|
) -> Option<String> {
|
|
let slug = Self::slugify_query(query);
|
|
if slug.is_empty() {
|
|
return None;
|
|
}
|
|
let archive_url = format!("{}/video/id=pmactor/{}", self.url, slug);
|
|
let mut requester =
|
|
requester_or_default(options, CHANNEL_ID, "pornmz.guess_actor_archive.missing_requester");
|
|
let html = Self::fetch_html(&mut requester, &archive_url, &archive_url)
|
|
.await
|
|
.ok()?;
|
|
let document = Html::parse_document(&html);
|
|
let page_title = document
|
|
.select(&Self::selector("title").ok()?)
|
|
.next()
|
|
.map(|value| Self::text_of(&value))
|
|
.unwrap_or_default();
|
|
let normalized_query = Self::normalize_title(query);
|
|
if !Self::normalize_title(&page_title).contains(&normalized_query) {
|
|
return None;
|
|
}
|
|
let items = self.parse_list_videos(&html).ok()?;
|
|
(!items.is_empty()).then_some(archive_url)
|
|
}
|
|
|
|
fn sort_filter(sort: &str) -> &'static str {
|
|
match sort {
|
|
"popular" | "best" => "popular",
|
|
"most-viewed" | "viewed" | "trending" => "most-viewed",
|
|
"longest" | "duration" => "longest",
|
|
"random" => "random",
|
|
_ => "latest",
|
|
}
|
|
}
|
|
|
|
fn add_sort(base: &str, sort: &str) -> String {
|
|
let mut url = Url::parse(base).unwrap_or_else(|_| Url::parse(BASE_URL).unwrap());
|
|
url.query_pairs_mut()
|
|
.append_pair("filter", Self::sort_filter(sort));
|
|
url.to_string()
|
|
}
|
|
|
|
fn build_target_url(&self, target: &Target, page: u16, sort: &str) -> String {
|
|
let base = match target {
|
|
Target::Home => format!("{}/", self.url),
|
|
Target::Search(query) => {
|
|
let mut url = Url::parse(&format!("{}/", self.url)).unwrap();
|
|
url.query_pairs_mut().append_pair("s", query);
|
|
url.to_string()
|
|
}
|
|
Target::Archive(url) => url.clone(),
|
|
};
|
|
|
|
let with_sort = Self::add_sort(&base, sort);
|
|
let mut parsed = Url::parse(&with_sort).unwrap_or_else(|_| Url::parse(&base).unwrap());
|
|
if page > 1 {
|
|
let path = parsed.path().trim_end_matches('/');
|
|
let new_path = if path.is_empty() {
|
|
format!("/page/{page}")
|
|
} else {
|
|
format!("{path}/page/{page}")
|
|
};
|
|
parsed.set_path(&new_path);
|
|
}
|
|
parsed.to_string()
|
|
}
|
|
|
|
fn parse_duration(text: &str) -> u32 {
|
|
parse_time_to_seconds(text)
|
|
.and_then(|value| u32::try_from(value).ok())
|
|
.unwrap_or(0)
|
|
}
|
|
|
|
fn parse_views(text: &str) -> Option<u32> {
|
|
parse_abbreviated_number(text.trim())
|
|
}
|
|
|
|
fn parse_iso8601_duration(text: &str) -> Option<u32> {
|
|
let re = Self::regex(r#"P(?:\d+D)?T(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?"#).ok()?;
|
|
let caps = re.captures(text)?;
|
|
let hours = caps
|
|
.get(1)
|
|
.and_then(|value| value.as_str().parse::<u32>().ok())
|
|
.unwrap_or(0);
|
|
let minutes = caps
|
|
.get(2)
|
|
.and_then(|value| value.as_str().parse::<u32>().ok())
|
|
.unwrap_or(0);
|
|
let seconds = caps
|
|
.get(3)
|
|
.and_then(|value| value.as_str().parse::<u32>().ok())
|
|
.unwrap_or(0);
|
|
Some(hours * 3600 + minutes * 60 + seconds)
|
|
}
|
|
|
|
fn parse_uploaded_at(text: &str) -> Option<u64> {
|
|
DateTime::parse_from_rfc3339(text)
|
|
.ok()
|
|
.map(|value| value.with_timezone(&Utc).timestamp() as u64)
|
|
}
|
|
|
|
fn media_extension(url: &str) -> String {
|
|
Url::parse(url)
|
|
.ok()
|
|
.and_then(|value| {
|
|
value
|
|
.path_segments()
|
|
.and_then(|mut segments| segments.next_back().map(str::to_string))
|
|
})
|
|
.and_then(|segment| segment.rsplit('.').next().map(str::to_string))
|
|
.filter(|ext| !ext.is_empty() && !ext.contains('/'))
|
|
.unwrap_or_else(|| "auto".to_string())
|
|
}
|
|
|
|
fn list_container<'a>(&self, document: &'a Html) -> Result<Option<ElementRef<'a>>> {
|
|
for selector_text in [
|
|
"div.videos-list",
|
|
"div.posts",
|
|
"main.site-main",
|
|
"div.content-area",
|
|
] {
|
|
let selector = Self::selector(selector_text)?;
|
|
if let Some(element) = document.select(&selector).next() {
|
|
return Ok(Some(element));
|
|
}
|
|
}
|
|
Ok(None)
|
|
}
|
|
|
|
fn parse_list_videos(&self, html: &str) -> Result<Vec<VideoItem>> {
|
|
let document = Html::parse_document(html);
|
|
let Some(container) = self.list_container(&document)? else {
|
|
return Ok(vec![]);
|
|
};
|
|
|
|
let card_selector = Self::selector("article.thumb-block")?;
|
|
let link_selector = Self::selector("a[href*=\"/video/id=\"]")?;
|
|
let thumb_selector = Self::selector("img.video-main-thumb")?;
|
|
let title_selector = Self::selector("span.title")?;
|
|
let duration_selector = Self::selector("span.duration")?;
|
|
let views_selector = Self::selector("span.views")?;
|
|
|
|
let mut items = Vec::new();
|
|
for card in container.select(&card_selector) {
|
|
let Some(link) = card.select(&link_selector).next() else {
|
|
continue;
|
|
};
|
|
let detail_url = self.normalize_url(link.value().attr("href").unwrap_or_default());
|
|
if detail_url.is_empty() {
|
|
continue;
|
|
}
|
|
let id = detail_url
|
|
.rsplit("id=")
|
|
.next()
|
|
.unwrap_or_default()
|
|
.trim()
|
|
.to_string();
|
|
if id.is_empty() || id.starts_with("pmactor/") {
|
|
continue;
|
|
}
|
|
|
|
let title = card
|
|
.select(&title_selector)
|
|
.next()
|
|
.map(|value| Self::text_of(&value))
|
|
.filter(|value| !value.is_empty())
|
|
.or_else(|| {
|
|
link.value()
|
|
.attr("title")
|
|
.map(Self::decode_html)
|
|
.filter(|value| !value.is_empty())
|
|
});
|
|
let Some(title) = title else {
|
|
continue;
|
|
};
|
|
|
|
let thumb = card
|
|
.select(&thumb_selector)
|
|
.next()
|
|
.and_then(|value| value.value().attr("src"))
|
|
.map(|value| self.normalize_url(value))
|
|
.unwrap_or_else(|| {
|
|
self.normalize_url(card.value().attr("data-main-thumb").unwrap_or_default())
|
|
});
|
|
|
|
let preview = self.normalize_url(card.value().attr("data-trailer").unwrap_or_default());
|
|
let duration = card
|
|
.select(&duration_selector)
|
|
.next()
|
|
.map(|value| Self::parse_duration(&Self::text_of(&value)))
|
|
.unwrap_or(0);
|
|
let views = card
|
|
.select(&views_selector)
|
|
.next()
|
|
.map(|value| Self::text_of(&value))
|
|
.and_then(|value| Self::parse_views(&value));
|
|
|
|
let mut item =
|
|
VideoItem::new(id, title, detail_url, CHANNEL_ID.to_string(), thumb, duration);
|
|
if let Some(views) = views {
|
|
item.views = Some(views);
|
|
}
|
|
if !preview.is_empty() {
|
|
item.preview = Some(preview);
|
|
}
|
|
let class_tags = Self::tags_from_card(&card);
|
|
if !class_tags.is_empty() {
|
|
item.tags = Some(class_tags);
|
|
}
|
|
items.push(item);
|
|
}
|
|
|
|
Ok(items)
|
|
}
|
|
|
|
fn meta_itemprop(document: &Html, itemprop: &str) -> Option<String> {
|
|
let selector = Self::selector(&format!("meta[itemprop=\"{itemprop}\"]")).ok()?;
|
|
document
|
|
.select(&selector)
|
|
.next()
|
|
.and_then(|value| value.value().attr("content"))
|
|
.map(Self::decode_html)
|
|
}
|
|
|
|
fn label_entries(document: &Html) -> Result<Vec<(String, String, String)>> {
|
|
let selector = Self::selector("div.video-tags a.label[href]")?;
|
|
let icon_selector = Self::selector("i")?;
|
|
let mut values = Vec::new();
|
|
|
|
for element in document.select(&selector) {
|
|
let href = element.value().attr("href").unwrap_or_default().to_string();
|
|
let title = Self::text_of(&element);
|
|
let kind = element
|
|
.select(&icon_selector)
|
|
.next()
|
|
.and_then(|value| value.value().attr("class"))
|
|
.unwrap_or_default()
|
|
.to_string();
|
|
if !href.is_empty() && !title.is_empty() {
|
|
values.push((href, title, kind));
|
|
}
|
|
}
|
|
|
|
Ok(values)
|
|
}
|
|
|
|
fn apply_detail_video(&self, mut item: VideoItem, html: &str) -> Result<VideoItem> {
|
|
let document = Html::parse_document(html);
|
|
|
|
if let Some(title) = document
|
|
.select(&Self::selector("div.video-infos h1, h1[itemprop=\"name\"]")?)
|
|
.next()
|
|
.map(|value| Self::text_of(&value))
|
|
.filter(|value| !value.is_empty())
|
|
.or_else(|| Self::meta_itemprop(&document, "name").filter(|value| !value.is_empty()))
|
|
{
|
|
item.title = title;
|
|
}
|
|
|
|
if let Some(url) = Self::meta_itemprop(&document, "contentURL").filter(|value| !value.is_empty()) {
|
|
item.url = self.normalize_url(&url);
|
|
let ext = Self::media_extension(&item.url);
|
|
item.formats = Some(vec![VideoFormat::new(
|
|
item.url.clone(),
|
|
"auto".to_string(),
|
|
ext,
|
|
)]);
|
|
}
|
|
|
|
if let Some(thumb) = Self::meta_itemprop(&document, "thumbnailUrl").filter(|value| !value.is_empty()) {
|
|
item.thumb = self.normalize_url(&thumb);
|
|
}
|
|
|
|
if let Some(duration) = Self::meta_itemprop(&document, "duration")
|
|
.and_then(|value| Self::parse_iso8601_duration(&value))
|
|
{
|
|
item.duration = duration;
|
|
}
|
|
|
|
if let Some(uploaded_at) = Self::meta_itemprop(&document, "uploadDate")
|
|
.and_then(|value| Self::parse_uploaded_at(&value))
|
|
{
|
|
item.uploadedAt = Some(uploaded_at);
|
|
}
|
|
|
|
let views_selector = Self::selector("div.video-infos span.views")?;
|
|
if let Some(views) = document
|
|
.select(&views_selector)
|
|
.next()
|
|
.map(|value| Self::text_of(&value))
|
|
.and_then(|value| Self::parse_views(&value))
|
|
{
|
|
item.views = Some(views);
|
|
}
|
|
|
|
let mut tags = item.tags.take().unwrap_or_default();
|
|
let mut uploader = None;
|
|
let mut uploader_url = None;
|
|
for (href, title, kind) in Self::label_entries(&document)? {
|
|
let normalized_title = Self::normalize_title(&title);
|
|
if normalized_title.is_empty() {
|
|
continue;
|
|
}
|
|
Self::merge_tag(&mut tags, title.clone());
|
|
if uploader.is_none() && kind.contains("fa-star") {
|
|
uploader = Some(title);
|
|
uploader_url = Some(self.normalize_url(&href));
|
|
}
|
|
}
|
|
|
|
if let Some(uploader) = uploader {
|
|
item.uploader = Some(uploader);
|
|
}
|
|
if let Some(uploader_url) = uploader_url.filter(|value| !value.is_empty()) {
|
|
item.uploaderUrl = Some(uploader_url);
|
|
}
|
|
if !tags.is_empty() {
|
|
item.tags = Some(tags);
|
|
}
|
|
|
|
Ok(item)
|
|
}
|
|
|
|
async fn enrich_item(&self, item: VideoItem, options: &ServerOptions) -> VideoItem {
|
|
let mut requester =
|
|
requester_or_default(options, CHANNEL_ID, "pornmz.enrich_item.missing_requester");
|
|
match Self::fetch_html(&mut requester, &item.url, &item.url).await {
|
|
Ok(html) => match self.apply_detail_video(item.clone(), &html) {
|
|
Ok(value) => value,
|
|
Err(error) => {
|
|
report_provider_error_background(
|
|
CHANNEL_ID,
|
|
"enrich_item.apply_detail_video",
|
|
&format!("url={}; error={error}", item.url),
|
|
);
|
|
item
|
|
}
|
|
},
|
|
Err(error) => {
|
|
report_provider_error_background(
|
|
CHANNEL_ID,
|
|
"enrich_item.fetch_html",
|
|
&format!("url={}; error={error}", item.url),
|
|
);
|
|
item
|
|
}
|
|
}
|
|
}
|
|
|
|
async fn fetch_target(
|
|
&self,
|
|
cache: VideoCache,
|
|
target: Target,
|
|
page: u16,
|
|
sort: &str,
|
|
options: ServerOptions,
|
|
) -> Result<Vec<VideoItem>> {
|
|
let url = self.build_target_url(&target, page, sort);
|
|
let old_items = match cache.get(&url) {
|
|
Some((time, items)) => {
|
|
if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 {
|
|
return Ok(items.clone());
|
|
}
|
|
items.clone()
|
|
}
|
|
None => vec![],
|
|
};
|
|
|
|
let mut requester =
|
|
requester_or_default(&options, CHANNEL_ID, "pornmz.fetch_target.missing_requester");
|
|
let html = match Self::fetch_html(&mut requester, &url, BASE_URL).await {
|
|
Ok(value) => value,
|
|
Err(error) => {
|
|
report_provider_error(
|
|
CHANNEL_ID,
|
|
"fetch_target.request",
|
|
&format!("url={url}; error={error}"),
|
|
)
|
|
.await;
|
|
return Ok(old_items);
|
|
}
|
|
};
|
|
|
|
if html.trim().is_empty() {
|
|
report_provider_error(CHANNEL_ID, "fetch_target.empty_response", &format!("url={url}"))
|
|
.await;
|
|
return Ok(old_items);
|
|
}
|
|
|
|
let items = self.parse_list_videos(&html)?;
|
|
if items.is_empty() {
|
|
return Ok(old_items);
|
|
}
|
|
|
|
let enriched = stream::iter(items.into_iter().map(|item| {
|
|
let provider = self.clone();
|
|
let options = options.clone();
|
|
async move { provider.enrich_item(item, &options).await }
|
|
}))
|
|
.buffer_unordered(6)
|
|
.collect::<Vec<_>>()
|
|
.await;
|
|
|
|
cache.remove(&url);
|
|
cache.insert(url, enriched.clone());
|
|
Ok(enriched)
|
|
}
|
|
}
|
|
|
|
#[async_trait]
|
|
impl Provider for PornmzProvider {
|
|
async fn get_videos(
|
|
&self,
|
|
cache: VideoCache,
|
|
_pool: DbPool,
|
|
sort: String,
|
|
query: Option<String>,
|
|
page: String,
|
|
_per_page: String,
|
|
options: ServerOptions,
|
|
) -> Vec<VideoItem> {
|
|
let page = page.parse::<u16>().unwrap_or(1).max(1);
|
|
|
|
let target = match query {
|
|
Some(query) if !query.trim().is_empty() => {
|
|
let query = query.trim();
|
|
let mut target = self.resolve_query_target(query);
|
|
if matches!(target, Target::Search(_)) {
|
|
if let Some(archive) = self.guess_actor_archive(query, &options).await {
|
|
target = Target::Archive(archive);
|
|
}
|
|
}
|
|
if matches!(target, Target::Search(_)) && self.search_filters_need_refresh() {
|
|
self.refresh_search_filters().await;
|
|
target = self.resolve_query_target(query);
|
|
if matches!(target, Target::Search(_)) {
|
|
if let Some(archive) = self.guess_actor_archive(query, &options).await {
|
|
target = Target::Archive(archive);
|
|
}
|
|
}
|
|
}
|
|
target
|
|
}
|
|
_ => self.resolve_option_target(&options),
|
|
};
|
|
|
|
match self
|
|
.fetch_target(cache, target, page, &sort, options.clone())
|
|
.await
|
|
{
|
|
Ok(items) => items,
|
|
Err(error) => {
|
|
report_provider_error(
|
|
CHANNEL_ID,
|
|
"get_videos.fetch_target",
|
|
&format!("sort={sort}; page={page}; error={error}"),
|
|
)
|
|
.await;
|
|
vec![]
|
|
}
|
|
}
|
|
}
|
|
|
|
fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
|
|
Some(self.build_channel(clientversion))
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
fn test_provider() -> PornmzProvider {
|
|
PornmzProvider {
|
|
url: BASE_URL.to_string(),
|
|
categories: Arc::new(RwLock::new(vec![FilterOption {
|
|
id: "all".to_string(),
|
|
title: "All".to_string(),
|
|
}])),
|
|
tags: Arc::new(RwLock::new(vec![FilterOption {
|
|
id: "all".to_string(),
|
|
title: "All".to_string(),
|
|
}])),
|
|
uploaders: Arc::new(RwLock::new(vec![FilterOption {
|
|
id: "all".to_string(),
|
|
title: "All".to_string(),
|
|
}])),
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn search_filters_ignore_missing_uploaders() {
|
|
let provider = PornmzProvider {
|
|
url: BASE_URL.to_string(),
|
|
categories: Arc::new(RwLock::new(vec![
|
|
FilterOption {
|
|
id: "all".to_string(),
|
|
title: "All".to_string(),
|
|
},
|
|
FilterOption {
|
|
id: "https://pornmz.com/pmvideo/c/brazzers".to_string(),
|
|
title: "Brazzers".to_string(),
|
|
},
|
|
])),
|
|
tags: Arc::new(RwLock::new(vec![
|
|
FilterOption {
|
|
id: "all".to_string(),
|
|
title: "All".to_string(),
|
|
},
|
|
FilterOption {
|
|
id: "https://pornmz.com/pmvideo/s/blonde".to_string(),
|
|
title: "Blonde".to_string(),
|
|
},
|
|
])),
|
|
uploaders: Arc::new(RwLock::new(vec![FilterOption {
|
|
id: "all".to_string(),
|
|
title: "All".to_string(),
|
|
}])),
|
|
};
|
|
|
|
assert!(!provider.search_filters_need_refresh());
|
|
}
|
|
|
|
#[test]
|
|
fn builds_search_page_url() {
|
|
let provider = test_provider();
|
|
let url = provider.build_target_url(&Target::Search("brazzers".to_string()), 2, "most-viewed");
|
|
assert_eq!(url, "https://pornmz.com/page/2?s=brazzers&filter=most-viewed");
|
|
}
|
|
|
|
#[test]
|
|
fn canonicalizes_actor_urls() {
|
|
assert_eq!(
|
|
PornmzProvider::canonical_actor_url(
|
|
BASE_URL,
|
|
"https://pornmz.com?actors=kayley-gunner"
|
|
),
|
|
"https://pornmz.com/video/id=pmactor/kayley-gunner"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn parses_list_card() {
|
|
let provider = test_provider();
|
|
let html = r#"
|
|
<div class="videos-list">
|
|
<article class="thumb-block tag-blonde category-brazzers actors-kayley-gunner" data-main-thumb="https://pornmz.com/thumb.jpg" data-trailer="https://pornmz.com/preview.mp4">
|
|
<a href="https://pornmz.com/video/id=pm123" title="Sample Title">
|
|
<div class="post-thumbnail">
|
|
<img class="video-main-thumb" src="https://pornmz.com/thumb.jpg" />
|
|
<span class="duration">29:00</span>
|
|
</div>
|
|
<header class="entry-header">
|
|
<span class="title">Sample Title</span>
|
|
<div class="under-thumb"><span class="views"><i class="fa fa-eye"></i> 1.2K</span></div>
|
|
</header>
|
|
</a>
|
|
</article>
|
|
</div>
|
|
"#;
|
|
let items = provider.parse_list_videos(html).expect("items");
|
|
assert_eq!(items.len(), 1);
|
|
assert_eq!(items[0].id, "pm123");
|
|
assert_eq!(items[0].duration, 1740);
|
|
assert_eq!(items[0].views, Some(1200));
|
|
assert_eq!(items[0].preview.as_deref(), Some("https://pornmz.com/preview.mp4"));
|
|
assert!(items[0].tags.as_ref().is_some_and(|values| values.iter().any(|value| value == "Blonde")));
|
|
}
|
|
|
|
#[test]
|
|
fn applies_detail_media_and_labels() {
|
|
let provider = test_provider();
|
|
let item = VideoItem::new(
|
|
"pm123".to_string(),
|
|
"Old title".to_string(),
|
|
"https://pornmz.com/video/id=pm123".to_string(),
|
|
CHANNEL_ID.to_string(),
|
|
"https://pornmz.com/thumb.jpg".to_string(),
|
|
0,
|
|
);
|
|
let html = r#"
|
|
<div class="entry-content">
|
|
<meta itemprop="name" content="Real Title" />
|
|
<meta itemprop="duration" content="P0DT0H29M0S" />
|
|
<meta itemprop="thumbnailUrl" content="https://pornmz.com/thumb-real.jpg" />
|
|
<meta itemprop="contentURL" content="https://cdn.example/master.m3u8" />
|
|
<meta itemprop="uploadDate" content="2026-03-22T12:58:25+00:00" />
|
|
<div class="video-infos"><span class="views"><i class="fa fa-eye"></i> 1.4K</span></div>
|
|
<div class="video-tags">
|
|
<div class="tags-list">
|
|
<a class="label" href="https://pornmz.com/pmvideo/c/brazzers"><i class="fa fa-folder"></i> Brazzers</a>
|
|
<a class="label" href="https://pornmz.com/video/id=pmactor/kayley-gunner"><i class="fa fa-star"></i> Kayley Gunner</a>
|
|
<a class="label" href="https://pornmz.com/video/id=pmactor/mick-blue"><i class="fa fa-star"></i> Mick Blue</a>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
"#;
|
|
let item = provider.apply_detail_video(item, html).expect("detail");
|
|
assert_eq!(item.title, "Real Title");
|
|
assert_eq!(item.url, "https://cdn.example/master.m3u8");
|
|
let first_format = item
|
|
.formats
|
|
.as_ref()
|
|
.and_then(|values| values.first())
|
|
.expect("expected a parsed format");
|
|
let first_format_json =
|
|
serde_json::to_value(first_format).expect("format should serialize");
|
|
assert_eq!(first_format_json.get("format").and_then(|value| value.as_str()), Some("m3u8"));
|
|
assert_eq!(item.duration, 1740);
|
|
assert_eq!(item.views, Some(1400));
|
|
assert_eq!(item.uploader.as_deref(), Some("Kayley Gunner"));
|
|
assert_eq!(
|
|
item.uploaderUrl.as_deref(),
|
|
Some("https://pornmz.com/video/id=pmactor/kayley-gunner")
|
|
);
|
|
assert!(item.tags.as_ref().is_some_and(|values| values.iter().any(|value| value == "Brazzers")));
|
|
assert!(item.formats.is_some());
|
|
assert!(item.uploadedAt.is_some());
|
|
}
|
|
|
|
#[test]
|
|
fn derives_mp4_media_extension() {
|
|
assert_eq!(
|
|
PornmzProvider::media_extension("https://cdn.example/video.mp4?token=1"),
|
|
"mp4"
|
|
);
|
|
}
|
|
}
|