1262 lines
43 KiB
Rust
1262 lines
43 KiB
Rust
use crate::DbPool;
|
|
use crate::api::ClientVersion;
|
|
use crate::providers::{
|
|
Provider, report_provider_error, report_provider_error_background, requester_or_default,
|
|
};
|
|
use crate::status::*;
|
|
use crate::util::cache::VideoCache;
|
|
use crate::util::parse_abbreviated_number;
|
|
use crate::util::requester::Requester;
|
|
use crate::util::time::parse_time_to_seconds;
|
|
use crate::videos::{ServerOptions, VideoFormat, VideoItem};
|
|
use async_trait::async_trait;
|
|
use error_chain::error_chain;
|
|
use futures::stream::{self, StreamExt};
|
|
use htmlentity::entity::{ICodedDataTrait, decode};
|
|
use regex::Regex;
|
|
use scraper::{ElementRef, Html, Selector};
|
|
use std::collections::HashMap;
|
|
use std::sync::{Arc, RwLock};
|
|
use std::{thread, vec};
|
|
|
|
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
|
|
crate::providers::ProviderChannelMetadata {
|
|
group_id: "fetish-kink",
|
|
tags: &["fetish", "bdsm", "niche"],
|
|
};
|
|
|
|
error_chain! {
|
|
foreign_links {
|
|
Io(std::io::Error);
|
|
}
|
|
errors {
|
|
Parse(msg: String) {
|
|
description("parse error")
|
|
display("parse error: {}", msg)
|
|
}
|
|
}
|
|
}
|
|
|
|
const BASE_URL: &str = "https://heavyfetish.com";
|
|
const BROWSER_UA: &str = "Mozilla/5.0 (X11; Linux x86_64; rv:146.0) Gecko/20100101 Firefox/146.0";
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct HeavyfetishProvider {
|
|
url: String,
|
|
categories: Arc<RwLock<Vec<FilterOption>>>,
|
|
tags: Arc<RwLock<Vec<FilterOption>>>,
|
|
models: Arc<RwLock<Vec<FilterOption>>>,
|
|
uploaders: Arc<RwLock<Vec<FilterOption>>>,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
enum TargetKind {
|
|
Archive,
|
|
StandardVideos,
|
|
MemberVideos,
|
|
Search,
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
struct QueryTarget {
|
|
kind: TargetKind,
|
|
value: String,
|
|
}
|
|
|
|
impl HeavyfetishProvider {
|
|
pub fn new() -> Self {
|
|
let provider = Self {
|
|
url: BASE_URL.to_string(),
|
|
categories: Arc::new(RwLock::new(vec![FilterOption {
|
|
id: "all".to_string(),
|
|
title: "All".to_string(),
|
|
}])),
|
|
tags: Arc::new(RwLock::new(vec![FilterOption {
|
|
id: "all".to_string(),
|
|
title: "All".to_string(),
|
|
}])),
|
|
models: Arc::new(RwLock::new(vec![FilterOption {
|
|
id: "all".to_string(),
|
|
title: "All".to_string(),
|
|
}])),
|
|
uploaders: Arc::new(RwLock::new(vec![FilterOption {
|
|
id: "all".to_string(),
|
|
title: "All".to_string(),
|
|
}])),
|
|
};
|
|
provider.spawn_initial_load();
|
|
provider
|
|
}
|
|
|
|
fn spawn_initial_load(&self) {
|
|
let url = self.url.clone();
|
|
let categories = Arc::clone(&self.categories);
|
|
let tags = Arc::clone(&self.tags);
|
|
let models = Arc::clone(&self.models);
|
|
let uploaders = Arc::clone(&self.uploaders);
|
|
|
|
thread::spawn(move || {
|
|
let runtime = match tokio::runtime::Builder::new_current_thread()
|
|
.enable_all()
|
|
.build()
|
|
{
|
|
Ok(runtime) => runtime,
|
|
Err(error) => {
|
|
report_provider_error_background(
|
|
"heavyfetish",
|
|
"spawn_initial_load.runtime_build",
|
|
&error.to_string(),
|
|
);
|
|
return;
|
|
}
|
|
};
|
|
|
|
runtime.block_on(async move {
|
|
if let Err(error) = Self::load_categories(&url, Arc::clone(&categories)).await {
|
|
report_provider_error_background(
|
|
"heavyfetish",
|
|
"load_categories",
|
|
&error.to_string(),
|
|
);
|
|
}
|
|
if let Err(error) = Self::load_tags(&url, Arc::clone(&tags)).await {
|
|
report_provider_error_background(
|
|
"heavyfetish",
|
|
"load_tags",
|
|
&error.to_string(),
|
|
);
|
|
}
|
|
if let Err(error) = Self::load_models(&url, Arc::clone(&models)).await {
|
|
report_provider_error_background(
|
|
"heavyfetish",
|
|
"load_models",
|
|
&error.to_string(),
|
|
);
|
|
}
|
|
if let Err(error) = Self::load_uploaders(&url, Arc::clone(&uploaders)).await {
|
|
report_provider_error_background(
|
|
"heavyfetish",
|
|
"load_uploaders",
|
|
&error.to_string(),
|
|
);
|
|
}
|
|
});
|
|
});
|
|
}
|
|
|
|
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
|
|
let categories = self
|
|
.categories
|
|
.read()
|
|
.map(|value| value.clone())
|
|
.unwrap_or_default();
|
|
let tags = self
|
|
.tags
|
|
.read()
|
|
.map(|value| value.clone())
|
|
.unwrap_or_default();
|
|
let models = self
|
|
.models
|
|
.read()
|
|
.map(|value| value.clone())
|
|
.unwrap_or_default();
|
|
let uploaders = self
|
|
.uploaders
|
|
.read()
|
|
.map(|value| value.clone())
|
|
.unwrap_or_default();
|
|
|
|
Channel {
|
|
id: "heavyfetish".to_string(),
|
|
name: "HeavyFetish".to_string(),
|
|
description:
|
|
"HeavyFetish videos, categories, tags, models, and uploader archives.".to_string(),
|
|
premium: false,
|
|
favicon: "https://www.google.com/s2/favicons?sz=64&domain=heavyfetish.com"
|
|
.to_string(),
|
|
status: "active".to_string(),
|
|
categories: categories.iter().map(|value| value.title.clone()).collect(),
|
|
options: vec![
|
|
ChannelOption {
|
|
id: "sort".to_string(),
|
|
title: "Sort".to_string(),
|
|
description: "Browse HeavyFetish by archive order.".to_string(),
|
|
systemImage: "list.number".to_string(),
|
|
colorName: "blue".to_string(),
|
|
options: vec![
|
|
FilterOption {
|
|
id: "new".to_string(),
|
|
title: "Latest".to_string(),
|
|
},
|
|
FilterOption {
|
|
id: "popular".to_string(),
|
|
title: "Most Popular".to_string(),
|
|
},
|
|
FilterOption {
|
|
id: "rated".to_string(),
|
|
title: "Top Rated".to_string(),
|
|
},
|
|
FilterOption {
|
|
id: "longest".to_string(),
|
|
title: "Longest".to_string(),
|
|
},
|
|
FilterOption {
|
|
id: "commented".to_string(),
|
|
title: "Most Commented".to_string(),
|
|
},
|
|
FilterOption {
|
|
id: "recommended".to_string(),
|
|
title: "Most Favorited".to_string(),
|
|
},
|
|
],
|
|
multiSelect: false,
|
|
},
|
|
ChannelOption {
|
|
id: "categories".to_string(),
|
|
title: "Categories".to_string(),
|
|
description: "Browse a HeavyFetish category archive.".to_string(),
|
|
systemImage: "square.grid.2x2".to_string(),
|
|
colorName: "orange".to_string(),
|
|
options: categories,
|
|
multiSelect: false,
|
|
},
|
|
ChannelOption {
|
|
id: "filter".to_string(),
|
|
title: "Tags".to_string(),
|
|
description: "Browse a HeavyFetish tag archive.".to_string(),
|
|
systemImage: "tag.fill".to_string(),
|
|
colorName: "green".to_string(),
|
|
options: tags,
|
|
multiSelect: false,
|
|
},
|
|
ChannelOption {
|
|
id: "stars".to_string(),
|
|
title: "Models".to_string(),
|
|
description: "Browse a HeavyFetish model archive.".to_string(),
|
|
systemImage: "star.fill".to_string(),
|
|
colorName: "yellow".to_string(),
|
|
options: models,
|
|
multiSelect: false,
|
|
},
|
|
ChannelOption {
|
|
id: "sites".to_string(),
|
|
title: "Uploaders".to_string(),
|
|
description: "Browse a HeavyFetish uploader archive.".to_string(),
|
|
systemImage: "person.crop.square".to_string(),
|
|
colorName: "purple".to_string(),
|
|
options: uploaders,
|
|
multiSelect: false,
|
|
},
|
|
],
|
|
nsfw: true,
|
|
cacheDuration: Some(1800),
|
|
}
|
|
}
|
|
|
|
fn selector(value: &str) -> Result<Selector> {
|
|
Selector::parse(value)
|
|
.map_err(|error| Error::from(format!("selector `{value}` parse failed: {error}")))
|
|
}
|
|
|
|
fn regex(value: &str) -> Result<Regex> {
|
|
Regex::new(value).map_err(|error| Error::from(format!("regex `{value}` failed: {error}")))
|
|
}
|
|
|
|
fn collapse_whitespace(text: &str) -> String {
|
|
text.split_whitespace().collect::<Vec<_>>().join(" ")
|
|
}
|
|
|
|
fn decode_html(text: &str) -> String {
|
|
decode(text.as_bytes())
|
|
.to_string()
|
|
.unwrap_or_else(|_| text.to_string())
|
|
}
|
|
|
|
fn text_of(element: &ElementRef<'_>) -> String {
|
|
Self::decode_html(&Self::collapse_whitespace(
|
|
&element.text().collect::<Vec<_>>().join(" "),
|
|
))
|
|
}
|
|
|
|
fn normalize_title(title: &str) -> String {
|
|
title
|
|
.trim()
|
|
.trim_start_matches('#')
|
|
.split_whitespace()
|
|
.collect::<Vec<_>>()
|
|
.join(" ")
|
|
.to_ascii_lowercase()
|
|
}
|
|
|
|
fn normalize_url(&self, url: &str) -> String {
|
|
if url.is_empty() {
|
|
return String::new();
|
|
}
|
|
if url.starts_with("http://") || url.starts_with("https://") {
|
|
return url.to_string();
|
|
}
|
|
if url.starts_with("//") {
|
|
return format!("https:{url}");
|
|
}
|
|
if url.starts_with('/') {
|
|
return format!("{}{}", self.url, url);
|
|
}
|
|
format!("{}/{}", self.url, url.trim_start_matches("./"))
|
|
}
|
|
|
|
fn parse_duration(text: &str) -> u32 {
|
|
parse_time_to_seconds(text)
|
|
.and_then(|value| u32::try_from(value).ok())
|
|
.unwrap_or(0)
|
|
}
|
|
|
|
fn parse_views(text: &str) -> Option<u32> {
|
|
let cleaned = text
|
|
.replace("views", "")
|
|
.replace("view", "")
|
|
.replace(' ', "")
|
|
.trim()
|
|
.to_string();
|
|
parse_abbreviated_number(&cleaned)
|
|
}
|
|
|
|
fn parse_percent(text: &str) -> Option<f32> {
|
|
text.trim()
|
|
.trim_end_matches('%')
|
|
.trim()
|
|
.parse::<f32>()
|
|
.ok()
|
|
}
|
|
|
|
fn sort_param(sort: &str) -> Option<&'static str> {
|
|
match sort {
|
|
"popular" | "viewed" | "trending" => Some("video_viewed"),
|
|
"rated" | "rating" | "top" => Some("rating"),
|
|
"longest" | "duration" => Some("duration"),
|
|
"commented" | "comments" => Some("most_commented"),
|
|
"recommended" | "favorited" | "favourited" => Some("most_favourited"),
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
fn append_query_param(url: &str, key: &str, value: &str) -> String {
|
|
let separator = if url.contains('?') { "&" } else { "?" };
|
|
format!("{url}{separator}{key}={value}")
|
|
}
|
|
|
|
fn encode_search_query(query: &str) -> String {
|
|
let mut serializer = url::form_urlencoded::Serializer::new(String::new());
|
|
serializer.append_pair("q", query);
|
|
serializer
|
|
.finish()
|
|
.strip_prefix("q=")
|
|
.unwrap_or_default()
|
|
.to_string()
|
|
}
|
|
|
|
fn push_unique(target: &Arc<RwLock<Vec<FilterOption>>>, item: FilterOption) {
|
|
if item.id.is_empty() || item.title.is_empty() {
|
|
return;
|
|
}
|
|
if let Ok(mut values) = target.write() {
|
|
if !values.iter().any(|value| value.id == item.id) {
|
|
values.push(item);
|
|
}
|
|
}
|
|
}
|
|
|
|
async fn fetch_html(url: &str) -> Result<String> {
|
|
let mut requester = Requester::new();
|
|
requester
|
|
.get(url, None)
|
|
.await
|
|
.map_err(|error| Error::from(format!("request failed for {url}: {error}")))
|
|
}
|
|
|
|
async fn load_categories(
|
|
base_url: &str,
|
|
categories: Arc<RwLock<Vec<FilterOption>>>,
|
|
) -> Result<()> {
|
|
let html = Self::fetch_html(&format!("{base_url}/categories/")).await?;
|
|
let document = Html::parse_document(&html);
|
|
let selector = Self::selector("#list_categories_categories_list_items a.item[href]")?;
|
|
|
|
for element in document.select(&selector) {
|
|
let href = element.value().attr("href").unwrap_or_default().to_string();
|
|
let title = element
|
|
.value()
|
|
.attr("title")
|
|
.map(str::trim)
|
|
.filter(|value| !value.is_empty())
|
|
.map(ToOwned::to_owned)
|
|
.unwrap_or_else(|| Self::text_of(&element));
|
|
let normalized = href.trim_end_matches('/').to_string();
|
|
|
|
if !normalized.starts_with(&format!("{base_url}/categories/")) || title.is_empty() {
|
|
continue;
|
|
}
|
|
|
|
Self::push_unique(
|
|
&categories,
|
|
FilterOption {
|
|
id: format!("{normalized}/"),
|
|
title,
|
|
},
|
|
);
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
async fn load_tags(base_url: &str, tags: Arc<RwLock<Vec<FilterOption>>>) -> Result<()> {
|
|
let html = Self::fetch_html(&format!("{base_url}/tags/")).await?;
|
|
let document = Html::parse_document(&html);
|
|
let selector = Self::selector("a[href]")?;
|
|
|
|
for element in document.select(&selector) {
|
|
let href = element.value().attr("href").unwrap_or_default().trim_end_matches('/');
|
|
if !href.starts_with(&format!("{base_url}/tags/")) {
|
|
continue;
|
|
}
|
|
|
|
let remainder = href
|
|
.strip_prefix(&format!("{base_url}/tags/"))
|
|
.unwrap_or_default();
|
|
if remainder.is_empty() || remainder.contains('/') {
|
|
continue;
|
|
}
|
|
|
|
let title = element
|
|
.value()
|
|
.attr("title")
|
|
.map(str::trim)
|
|
.filter(|value| !value.is_empty())
|
|
.map(ToOwned::to_owned)
|
|
.unwrap_or_else(|| Self::text_of(&element));
|
|
if title.is_empty() {
|
|
continue;
|
|
}
|
|
|
|
Self::push_unique(
|
|
&tags,
|
|
FilterOption {
|
|
id: format!("{href}/"),
|
|
title,
|
|
},
|
|
);
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
async fn load_models(base_url: &str, models: Arc<RwLock<Vec<FilterOption>>>) -> Result<()> {
|
|
let selector = Self::selector("#list_models_models_list_items a.item[href]")?;
|
|
|
|
for page in 1..=5 {
|
|
let url = if page == 1 {
|
|
format!("{base_url}/fetish-models/")
|
|
} else {
|
|
format!("{base_url}/fetish-models/{page}/")
|
|
};
|
|
let html = Self::fetch_html(&url).await?;
|
|
let document = Html::parse_document(&html);
|
|
|
|
for element in document.select(&selector) {
|
|
let href = element.value().attr("href").unwrap_or_default().trim_end_matches('/');
|
|
let title = element
|
|
.value()
|
|
.attr("title")
|
|
.map(str::trim)
|
|
.filter(|value| !value.is_empty())
|
|
.map(ToOwned::to_owned)
|
|
.unwrap_or_else(|| Self::text_of(&element));
|
|
|
|
if !href.starts_with(&format!("{base_url}/fetish-models/")) || title.is_empty() {
|
|
continue;
|
|
}
|
|
|
|
Self::push_unique(
|
|
&models,
|
|
FilterOption {
|
|
id: format!("{href}/"),
|
|
title,
|
|
},
|
|
);
|
|
}
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
async fn load_uploaders(
|
|
base_url: &str,
|
|
uploaders: Arc<RwLock<Vec<FilterOption>>>,
|
|
) -> Result<()> {
|
|
let selector = Self::selector("#list_members_members_items a[href]")?;
|
|
let title_selector = Self::selector("strong.title")?;
|
|
|
|
for page in 1..=5 {
|
|
let mut url = format!("{base_url}/members/");
|
|
if page > 1 {
|
|
url = Self::append_query_param(&url, "from_members", &page.to_string());
|
|
}
|
|
|
|
let html = Self::fetch_html(&url).await?;
|
|
let document = Html::parse_document(&html);
|
|
|
|
for element in document.select(&selector) {
|
|
let href = element.value().attr("href").unwrap_or_default().trim_end_matches('/');
|
|
if !href.starts_with(&format!("{base_url}/members/")) {
|
|
continue;
|
|
}
|
|
|
|
let remainder = href
|
|
.strip_prefix(&format!("{base_url}/members/"))
|
|
.unwrap_or_default();
|
|
if remainder.is_empty() || remainder.contains('/') {
|
|
continue;
|
|
}
|
|
if !remainder.chars().all(|value| value.is_ascii_digit()) {
|
|
continue;
|
|
}
|
|
|
|
let title = element
|
|
.select(&title_selector)
|
|
.next()
|
|
.map(|value| Self::text_of(&value))
|
|
.filter(|value| !value.is_empty())
|
|
.unwrap_or_else(|| Self::text_of(&element));
|
|
if title.is_empty() {
|
|
continue;
|
|
}
|
|
|
|
Self::push_unique(
|
|
&uploaders,
|
|
FilterOption {
|
|
id: format!("{href}/"),
|
|
title,
|
|
},
|
|
);
|
|
}
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn resolve_option_target(&self, options: &ServerOptions) -> QueryTarget {
|
|
if let Some(uploader) = options.sites.as_deref() {
|
|
if uploader.starts_with(&self.url) && uploader != "all" {
|
|
return QueryTarget {
|
|
kind: TargetKind::MemberVideos,
|
|
value: uploader.to_string(),
|
|
};
|
|
}
|
|
}
|
|
|
|
if let Some(model) = options.stars.as_deref() {
|
|
if model.starts_with(&self.url) && model != "all" {
|
|
return QueryTarget {
|
|
kind: TargetKind::StandardVideos,
|
|
value: model.to_string(),
|
|
};
|
|
}
|
|
}
|
|
|
|
if let Some(tag) = options.filter.as_deref() {
|
|
if tag.starts_with(&self.url) && tag != "all" {
|
|
return QueryTarget {
|
|
kind: TargetKind::StandardVideos,
|
|
value: tag.to_string(),
|
|
};
|
|
}
|
|
}
|
|
|
|
if let Some(category) = options.categories.as_deref() {
|
|
if category.starts_with(&self.url) && category != "all" {
|
|
return QueryTarget {
|
|
kind: TargetKind::StandardVideos,
|
|
value: category.to_string(),
|
|
};
|
|
}
|
|
}
|
|
|
|
QueryTarget {
|
|
kind: TargetKind::Archive,
|
|
value: String::new(),
|
|
}
|
|
}
|
|
|
|
fn match_filter(options: &[FilterOption], query: &str, kind: TargetKind) -> Option<QueryTarget> {
|
|
let normalized_query = Self::normalize_title(query);
|
|
options
|
|
.iter()
|
|
.find(|value| value.id != "all" && Self::normalize_title(&value.title) == normalized_query)
|
|
.map(|value| QueryTarget {
|
|
kind,
|
|
value: value.id.clone(),
|
|
})
|
|
}
|
|
|
|
fn resolve_query_target(&self, query: &str) -> QueryTarget {
|
|
if let Ok(uploaders) = self.uploaders.read() {
|
|
if let Some(target) = Self::match_filter(&uploaders, query, TargetKind::MemberVideos) {
|
|
return target;
|
|
}
|
|
}
|
|
|
|
if let Ok(models) = self.models.read() {
|
|
if let Some(target) = Self::match_filter(&models, query, TargetKind::StandardVideos) {
|
|
return target;
|
|
}
|
|
}
|
|
|
|
if let Ok(tags) = self.tags.read() {
|
|
if let Some(target) = Self::match_filter(&tags, query, TargetKind::StandardVideos) {
|
|
return target;
|
|
}
|
|
}
|
|
|
|
if let Ok(categories) = self.categories.read() {
|
|
if let Some(target) = Self::match_filter(&categories, query, TargetKind::StandardVideos)
|
|
{
|
|
return target;
|
|
}
|
|
}
|
|
|
|
QueryTarget {
|
|
kind: TargetKind::Search,
|
|
value: query.to_string(),
|
|
}
|
|
}
|
|
|
|
fn build_url_for_target(&self, target: &QueryTarget, page: u16, sort: &str) -> String {
|
|
let sort_by = Self::sort_param(sort);
|
|
|
|
match target.kind {
|
|
TargetKind::Archive => {
|
|
let mut url = if page > 1 {
|
|
format!("{}/fetish-videos/{page}/", self.url)
|
|
} else {
|
|
format!("{}/fetish-videos/", self.url)
|
|
};
|
|
if let Some(sort_by) = sort_by {
|
|
url = Self::append_query_param(&url, "sort_by", sort_by);
|
|
}
|
|
url
|
|
}
|
|
TargetKind::StandardVideos => {
|
|
let mut url = target.value.clone();
|
|
if page > 1 {
|
|
url = Self::append_query_param(&url, "from", &page.to_string());
|
|
}
|
|
if let Some(sort_by) = sort_by {
|
|
url = Self::append_query_param(&url, "sort_by", sort_by);
|
|
}
|
|
url
|
|
}
|
|
TargetKind::MemberVideos => {
|
|
let mut url = target.value.clone();
|
|
if page > 1 {
|
|
url = Self::append_query_param(&url, "from_videos", &page.to_string());
|
|
}
|
|
if let Some(sort_by) = sort_by {
|
|
url = Self::append_query_param(&url, "sort_by", sort_by);
|
|
}
|
|
url
|
|
}
|
|
TargetKind::Search => {
|
|
let encoded_query = Self::encode_search_query(&target.value);
|
|
let mut url = format!("{}/search/{encoded_query}/", self.url);
|
|
if page > 1 {
|
|
url = Self::append_query_param(&url, "from_videos", &page.to_string());
|
|
url = Self::append_query_param(&url, "from_albums", &page.to_string());
|
|
}
|
|
if let Some(sort_by) = sort_by {
|
|
url = Self::append_query_param(&url, "sort_by", sort_by);
|
|
}
|
|
url
|
|
}
|
|
}
|
|
}
|
|
|
|
fn list_container<'a>(&self, document: &'a Html) -> Result<Option<ElementRef<'a>>> {
|
|
for selector_text in [
|
|
"#list_videos_fetish_videos_list_items",
|
|
"#list_videos_videos_list_search_result_items",
|
|
"#list_videos_common_videos_list_items",
|
|
"#list_videos_uploaded_videos_items",
|
|
] {
|
|
let selector = Self::selector(selector_text)?;
|
|
if let Some(element) = document.select(&selector).next() {
|
|
return Ok(Some(element));
|
|
}
|
|
}
|
|
Ok(None)
|
|
}
|
|
|
|
fn parse_list_videos(&self, html: &str) -> Result<Vec<VideoItem>> {
|
|
let document = Html::parse_document(html);
|
|
let Some(container) = self.list_container(&document)? else {
|
|
return Ok(vec![]);
|
|
};
|
|
|
|
let card_selector = Self::selector("div.item.hf-video-item")?;
|
|
let link_selector = Self::selector("a[href*=\"/videos/\"]")?;
|
|
let image_selector = Self::selector("img.thumb")?;
|
|
let title_selector = Self::selector("strong.title")?;
|
|
let duration_selector = Self::selector("div.duration")?;
|
|
let rating_selector = Self::selector("div.rating")?;
|
|
let views_selector = Self::selector("div.views")?;
|
|
|
|
let mut items = Vec::new();
|
|
|
|
for card in container.select(&card_selector) {
|
|
let Some(link) = card.select(&link_selector).next() else {
|
|
continue;
|
|
};
|
|
|
|
let href = link.value().attr("href").unwrap_or_default();
|
|
let page_url = self.normalize_url(href);
|
|
let id = page_url
|
|
.trim_end_matches('/')
|
|
.split('/')
|
|
.nth_back(1)
|
|
.unwrap_or_default()
|
|
.to_string();
|
|
|
|
if id.is_empty() || page_url.is_empty() {
|
|
continue;
|
|
}
|
|
|
|
let image = card.select(&image_selector).next();
|
|
let thumb = image
|
|
.and_then(|value| {
|
|
value
|
|
.value()
|
|
.attr("data-webp")
|
|
.or_else(|| value.value().attr("data-original"))
|
|
.or_else(|| value.value().attr("src"))
|
|
})
|
|
.map(|value| self.normalize_url(value))
|
|
.unwrap_or_default();
|
|
|
|
let preview = image
|
|
.and_then(|value| value.value().attr("data-preview"))
|
|
.map(|value| self.normalize_url(value));
|
|
|
|
let title = card
|
|
.select(&title_selector)
|
|
.next()
|
|
.map(|value| Self::text_of(&value))
|
|
.filter(|value| !value.is_empty())
|
|
.or_else(|| {
|
|
link.value()
|
|
.attr("title")
|
|
.map(Self::decode_html)
|
|
.filter(|value| !value.is_empty())
|
|
});
|
|
|
|
let Some(title) = title else {
|
|
continue;
|
|
};
|
|
|
|
let duration = card
|
|
.select(&duration_selector)
|
|
.next()
|
|
.map(|value| Self::parse_duration(&Self::text_of(&value)))
|
|
.unwrap_or(0);
|
|
|
|
let rating = card
|
|
.select(&rating_selector)
|
|
.next()
|
|
.and_then(|value| Self::parse_percent(&Self::text_of(&value)));
|
|
|
|
let views = card
|
|
.select(&views_selector)
|
|
.next()
|
|
.and_then(|value| Self::parse_views(&Self::text_of(&value)));
|
|
|
|
let mut item = VideoItem::new(
|
|
id,
|
|
title,
|
|
page_url,
|
|
"heavyfetish".to_string(),
|
|
thumb,
|
|
duration,
|
|
);
|
|
if let Some(preview) = preview {
|
|
item = item.preview(preview);
|
|
}
|
|
if let Some(rating) = rating {
|
|
item = item.rating(rating);
|
|
}
|
|
if let Some(views) = views {
|
|
item = item.views(views);
|
|
}
|
|
|
|
items.push(item);
|
|
}
|
|
|
|
Ok(items)
|
|
}
|
|
|
|
fn extract_js_value(block: &str, regex: &Regex) -> Option<String> {
|
|
regex
|
|
.captures(block)
|
|
.and_then(|captures| captures.get(1))
|
|
.map(|value| value.as_str().replace("\\/", "/").replace("\\'", "'"))
|
|
}
|
|
|
|
fn quality_from_url(url: &str) -> String {
|
|
for quality in ["2160p", "1440p", "1080p", "720p", "480p", "360p", "240p"] {
|
|
if url.contains(quality) {
|
|
return quality.to_string();
|
|
}
|
|
}
|
|
"480p".to_string()
|
|
}
|
|
|
|
fn build_formats(&self, html: &str, page_url: &str) -> Result<Vec<VideoFormat>> {
|
|
let flashvars_regex = Self::regex(r#"(?s)var\s+flashvars\s*=\s*\{(.*?)\};"#)?;
|
|
let value_regex = |key: &str| Self::regex(&format!(r#"{key}:\s*'((?:\\'|[^'])*)'"#));
|
|
let flashvars = flashvars_regex
|
|
.captures(html)
|
|
.and_then(|value| value.get(1))
|
|
.map(|value| value.as_str().to_string())
|
|
.unwrap_or_default();
|
|
|
|
let mut seen = HashMap::<String, String>::new();
|
|
for key in ["video_alt_url2", "video_alt_url", "video_url"] {
|
|
let url_regex = value_regex(key)?;
|
|
let text_regex = value_regex(&format!("{key}_text"))?;
|
|
if let Some(url) = Self::extract_js_value(&flashvars, &url_regex) {
|
|
let normalized = self.normalize_url(&url);
|
|
let quality = Self::extract_js_value(&flashvars, &text_regex)
|
|
.filter(|value| !value.is_empty())
|
|
.unwrap_or_else(|| Self::quality_from_url(&normalized));
|
|
seen.entry(quality).or_insert(normalized);
|
|
}
|
|
}
|
|
|
|
let document = Html::parse_document(html);
|
|
let download_selector = Self::selector("#download_popup a[href*=\"/get_file/\"]")?;
|
|
for element in document.select(&download_selector) {
|
|
let href = element.value().attr("href").unwrap_or_default();
|
|
let normalized = self.normalize_url(href);
|
|
if normalized.is_empty() {
|
|
continue;
|
|
}
|
|
let quality = Self::quality_from_url(&normalized);
|
|
seen.entry(quality).or_insert(normalized);
|
|
}
|
|
|
|
let order = ["2160p", "1440p", "1080p", "720p", "480p", "360p", "240p"];
|
|
let mut formats = Vec::new();
|
|
|
|
for quality in order {
|
|
let Some(url) = seen.get(quality) else {
|
|
continue;
|
|
};
|
|
let format = VideoFormat::new(url.clone(), quality.to_string(), "mp4".to_string())
|
|
.format_id(quality.to_string())
|
|
.http_header("Referer".to_string(), page_url.to_string())
|
|
.http_header("User-Agent".to_string(), BROWSER_UA.to_string());
|
|
formats.push(format);
|
|
}
|
|
|
|
Ok(formats)
|
|
}
|
|
|
|
fn apply_detail_video(
|
|
&self,
|
|
mut item: VideoItem,
|
|
html: &str,
|
|
page_url: &str,
|
|
) -> Result<VideoItem> {
|
|
let flashvars_regex = Self::regex(r#"(?s)var\s+flashvars\s*=\s*\{(.*?)\};"#)?;
|
|
let value_regex = |key: &str| Self::regex(&format!(r#"{key}:\s*'((?:\\'|[^'])*)'"#));
|
|
let flashvars = flashvars_regex
|
|
.captures(html)
|
|
.and_then(|value| value.get(1))
|
|
.map(|value| value.as_str().to_string())
|
|
.unwrap_or_default();
|
|
|
|
let title_regex = value_regex("video_title")?;
|
|
let category_regex = value_regex("video_categories")?;
|
|
let tag_regex = value_regex("video_tags")?;
|
|
let model_regex = value_regex("video_models")?;
|
|
let preview_regex = value_regex("preview_url")?;
|
|
let width_regex = value_regex("player_width")?;
|
|
let height_regex = value_regex("player_height")?;
|
|
|
|
let document = Html::parse_document(html);
|
|
let uploader_selector =
|
|
Self::selector(".block-details .block-user .username a[href*=\"/members/\"]")?;
|
|
let info_span_selector = Self::selector(".block-details .info .item span")?;
|
|
let category_selector = Self::selector(".block-details .info a[href*=\"/categories/\"]")?;
|
|
let tag_selector = Self::selector(".block-details .info a[href*=\"/tags/\"]")?;
|
|
let model_selector =
|
|
Self::selector(".block-details .info a[href*=\"/fetish-models/\"]")?;
|
|
|
|
if let Some(title) = Self::extract_js_value(&flashvars, &title_regex) {
|
|
if !title.is_empty() {
|
|
item.title = title;
|
|
}
|
|
}
|
|
|
|
let formats = self.build_formats(html, page_url)?;
|
|
if !formats.is_empty() {
|
|
item = item.formats(formats);
|
|
}
|
|
|
|
let uploader_link = document.select(&uploader_selector).next();
|
|
let uploader = uploader_link
|
|
.as_ref()
|
|
.map(|value| Self::text_of(value))
|
|
.filter(|value| !value.is_empty());
|
|
let uploader_url = uploader_link
|
|
.and_then(|value| value.value().attr("href"))
|
|
.map(|value| self.normalize_url(value));
|
|
|
|
if let (Some(name), Some(url)) = (&uploader, &uploader_url) {
|
|
Self::push_unique(
|
|
&self.uploaders,
|
|
FilterOption {
|
|
id: url.clone(),
|
|
title: name.clone(),
|
|
},
|
|
);
|
|
}
|
|
|
|
for span in document.select(&info_span_selector) {
|
|
let text = Self::text_of(&span);
|
|
if let Some(value) = text.strip_prefix("Views:") {
|
|
if let Some(views) = Self::parse_views(value) {
|
|
item = item.views(views);
|
|
}
|
|
} else if let Some(value) = text.strip_prefix("Duration:") {
|
|
let duration = Self::parse_duration(value.trim());
|
|
if duration > 0 {
|
|
item.duration = duration;
|
|
}
|
|
}
|
|
}
|
|
|
|
let mut tags = item.tags.clone().unwrap_or_default();
|
|
|
|
for element in document.select(&category_selector) {
|
|
let title = Self::text_of(&element);
|
|
let href = element.value().attr("href").unwrap_or_default();
|
|
if title.is_empty() {
|
|
continue;
|
|
}
|
|
tags.push(title.clone());
|
|
Self::push_unique(
|
|
&self.categories,
|
|
FilterOption {
|
|
id: self.normalize_url(href),
|
|
title,
|
|
},
|
|
);
|
|
}
|
|
|
|
for element in document.select(&tag_selector) {
|
|
let title = Self::text_of(&element);
|
|
let href = element.value().attr("href").unwrap_or_default();
|
|
if title.is_empty() {
|
|
continue;
|
|
}
|
|
tags.push(title.clone());
|
|
Self::push_unique(
|
|
&self.tags,
|
|
FilterOption {
|
|
id: self.normalize_url(href),
|
|
title,
|
|
},
|
|
);
|
|
}
|
|
|
|
for element in document.select(&model_selector) {
|
|
let title = Self::text_of(&element);
|
|
let href = element.value().attr("href").unwrap_or_default();
|
|
if title.is_empty() {
|
|
continue;
|
|
}
|
|
tags.push(title.clone());
|
|
Self::push_unique(
|
|
&self.models,
|
|
FilterOption {
|
|
id: self.normalize_url(href),
|
|
title,
|
|
},
|
|
);
|
|
}
|
|
|
|
for raw_group in [category_regex, tag_regex, model_regex] {
|
|
if let Some(values) = Self::extract_js_value(&flashvars, &raw_group) {
|
|
for value in values.split(',') {
|
|
let clean = Self::collapse_whitespace(value.trim());
|
|
if !clean.is_empty() {
|
|
tags.push(clean);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
tags.sort();
|
|
tags.dedup();
|
|
if !tags.is_empty() {
|
|
item = item.tags(tags);
|
|
}
|
|
|
|
if let Some(uploader) = uploader {
|
|
item = item.uploader(uploader);
|
|
}
|
|
if let Some(uploader_url) = uploader_url {
|
|
item = item.uploader_url(uploader_url);
|
|
}
|
|
if item.thumb.is_empty() {
|
|
if let Some(preview) = Self::extract_js_value(&flashvars, &preview_regex) {
|
|
item.thumb = self.normalize_url(&preview);
|
|
}
|
|
}
|
|
if item.aspectRatio.is_none() {
|
|
let width = Self::extract_js_value(&flashvars, &width_regex)
|
|
.and_then(|value| value.parse::<f32>().ok());
|
|
let height = Self::extract_js_value(&flashvars, &height_regex)
|
|
.and_then(|value| value.parse::<f32>().ok());
|
|
if let (Some(width), Some(height)) = (width, height) {
|
|
if height > 0.0 {
|
|
item = item.aspect_ratio(width / height);
|
|
}
|
|
}
|
|
}
|
|
|
|
Ok(item)
|
|
}
|
|
|
|
async fn enrich_video(&self, item: VideoItem, options: &ServerOptions) -> VideoItem {
|
|
let page_url = item.url.clone();
|
|
let original = item.clone();
|
|
let mut requester =
|
|
requester_or_default(options, "heavyfetish", "enrich_video.requester_missing");
|
|
|
|
let html = match requester.get(&page_url, None).await {
|
|
Ok(html) => html,
|
|
Err(error) => {
|
|
report_provider_error_background(
|
|
"heavyfetish",
|
|
"enrich_video.request",
|
|
&format!("url={page_url}; error={error}"),
|
|
);
|
|
return original;
|
|
}
|
|
};
|
|
|
|
match self.apply_detail_video(item, &html, &page_url) {
|
|
Ok(item) => item,
|
|
Err(error) => {
|
|
report_provider_error_background(
|
|
"heavyfetish",
|
|
"enrich_video.parse",
|
|
&format!("url={page_url}; error={error}"),
|
|
);
|
|
original
|
|
}
|
|
}
|
|
}
|
|
|
|
async fn fetch_items_for_url(
|
|
&self,
|
|
cache: VideoCache,
|
|
url: String,
|
|
per_page_limit: usize,
|
|
options: &ServerOptions,
|
|
) -> Result<Vec<VideoItem>> {
|
|
if let Some((time, items)) = cache.get(&url) {
|
|
if time.elapsed().unwrap_or_default().as_secs() < 300 {
|
|
return Ok(items.iter().take(per_page_limit).cloned().collect());
|
|
}
|
|
}
|
|
|
|
let mut requester =
|
|
requester_or_default(options, "heavyfetish", "fetch_items_for_url.requester_missing");
|
|
let html = requester
|
|
.get(&url, None)
|
|
.await
|
|
.map_err(|error| Error::from(format!("request failed for {url}: {error}")))?;
|
|
|
|
let list_items = self.parse_list_videos(&html)?;
|
|
if list_items.is_empty() {
|
|
return Ok(vec![]);
|
|
}
|
|
|
|
let limited_items = list_items
|
|
.into_iter()
|
|
.take(per_page_limit.max(1))
|
|
.collect::<Vec<_>>();
|
|
|
|
let items = stream::iter(limited_items.into_iter().map(|item| {
|
|
let provider = self.clone();
|
|
let options = options.clone();
|
|
async move { provider.enrich_video(item, &options).await }
|
|
}))
|
|
.buffer_unordered(4)
|
|
.collect::<Vec<_>>()
|
|
.await;
|
|
|
|
if !items.is_empty() {
|
|
cache.insert(url, items.clone());
|
|
}
|
|
|
|
Ok(items)
|
|
}
|
|
|
|
async fn get(
|
|
&self,
|
|
cache: VideoCache,
|
|
page: u16,
|
|
sort: &str,
|
|
per_page_limit: usize,
|
|
options: ServerOptions,
|
|
) -> Result<Vec<VideoItem>> {
|
|
let target = self.resolve_option_target(&options);
|
|
let url = self.build_url_for_target(&target, page, sort);
|
|
self.fetch_items_for_url(cache, url, per_page_limit, &options)
|
|
.await
|
|
}
|
|
|
|
async fn query(
|
|
&self,
|
|
cache: VideoCache,
|
|
page: u16,
|
|
sort: &str,
|
|
query: &str,
|
|
per_page_limit: usize,
|
|
options: ServerOptions,
|
|
) -> Result<Vec<VideoItem>> {
|
|
let target = self.resolve_query_target(query);
|
|
let url = self.build_url_for_target(&target, page, sort);
|
|
self.fetch_items_for_url(cache, url, per_page_limit, &options)
|
|
.await
|
|
}
|
|
}
|
|
|
|
#[async_trait]
|
|
impl Provider for HeavyfetishProvider {
|
|
async fn get_videos(
|
|
&self,
|
|
cache: VideoCache,
|
|
pool: DbPool,
|
|
sort: String,
|
|
query: Option<String>,
|
|
page: String,
|
|
per_page: String,
|
|
options: ServerOptions,
|
|
) -> Vec<VideoItem> {
|
|
let _ = pool;
|
|
let page = page.parse::<u16>().unwrap_or(1);
|
|
let per_page_limit = per_page.parse::<usize>().unwrap_or(30);
|
|
|
|
let result = match query {
|
|
Some(query) if !query.trim().is_empty() => {
|
|
self.query(cache, page, &sort, &query, per_page_limit, options)
|
|
.await
|
|
}
|
|
_ => self.get(cache, page, &sort, per_page_limit, options).await,
|
|
};
|
|
|
|
match result {
|
|
Ok(videos) => videos,
|
|
Err(error) => {
|
|
report_provider_error("heavyfetish", "get_videos", &error.to_string()).await;
|
|
vec![]
|
|
}
|
|
}
|
|
}
|
|
|
|
fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
|
|
Some(self.build_channel(clientversion))
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
fn provider() -> HeavyfetishProvider {
|
|
HeavyfetishProvider {
|
|
url: BASE_URL.to_string(),
|
|
categories: Arc::new(RwLock::new(vec![FilterOption {
|
|
id: "all".to_string(),
|
|
title: "All".to_string(),
|
|
}])),
|
|
tags: Arc::new(RwLock::new(vec![FilterOption {
|
|
id: "all".to_string(),
|
|
title: "All".to_string(),
|
|
}])),
|
|
models: Arc::new(RwLock::new(vec![FilterOption {
|
|
id: "all".to_string(),
|
|
title: "All".to_string(),
|
|
}])),
|
|
uploaders: Arc::new(RwLock::new(vec![FilterOption {
|
|
id: "all".to_string(),
|
|
title: "All".to_string(),
|
|
}])),
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn builds_search_page_two_url() {
|
|
let provider = provider();
|
|
let url = provider.build_url_for_target(
|
|
&QueryTarget {
|
|
kind: TargetKind::Search,
|
|
value: "adriana chechik".to_string(),
|
|
},
|
|
2,
|
|
"rated",
|
|
);
|
|
|
|
assert_eq!(
|
|
url,
|
|
"https://heavyfetish.com/search/adriana+chechik/?from_videos=2&from_albums=2&sort_by=rating"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn preserves_list_thumb_when_detail_has_preview_image() {
|
|
let provider = provider();
|
|
let item = VideoItem::new(
|
|
"120660".to_string(),
|
|
"Example".to_string(),
|
|
"https://heavyfetish.com/videos/120660/example/".to_string(),
|
|
"heavyfetish".to_string(),
|
|
"https://heavyfetish.com/list-thumb.jpg".to_string(),
|
|
0,
|
|
)
|
|
.preview("https://heavyfetish.com/list-preview.mp4".to_string());
|
|
|
|
let html = r#"
|
|
<script>
|
|
var flashvars = {
|
|
video_title: 'Example',
|
|
preview_url: 'https://heavyfetish.com/detail-preview.jpg',
|
|
player_width: '882',
|
|
player_height: '500.42553191489'
|
|
};
|
|
</script>
|
|
"#;
|
|
|
|
let enriched = provider
|
|
.apply_detail_video(item, html, "https://heavyfetish.com/videos/120660/example/")
|
|
.expect("detail parsing should succeed");
|
|
|
|
assert_eq!(enriched.thumb, "https://heavyfetish.com/list-thumb.jpg");
|
|
assert_eq!(
|
|
enriched.preview.as_deref(),
|
|
Some("https://heavyfetish.com/list-preview.mp4")
|
|
);
|
|
}
|
|
}
|