Files
hottub/src/providers/heavyfetish.rs
2026-03-18 12:13:28 +00:00

1262 lines
43 KiB
Rust

use crate::DbPool;
use crate::api::ClientVersion;
use crate::providers::{
Provider, report_provider_error, report_provider_error_background, requester_or_default,
};
use crate::status::*;
use crate::util::cache::VideoCache;
use crate::util::parse_abbreviated_number;
use crate::util::requester::Requester;
use crate::util::time::parse_time_to_seconds;
use crate::videos::{ServerOptions, VideoFormat, VideoItem};
use async_trait::async_trait;
use error_chain::error_chain;
use futures::stream::{self, StreamExt};
use htmlentity::entity::{ICodedDataTrait, decode};
use regex::Regex;
use scraper::{ElementRef, Html, Selector};
use std::collections::HashMap;
use std::sync::{Arc, RwLock};
use std::{thread, vec};
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "fetish-kink",
tags: &["fetish", "bdsm", "niche"],
};
error_chain! {
foreign_links {
Io(std::io::Error);
}
errors {
Parse(msg: String) {
description("parse error")
display("parse error: {}", msg)
}
}
}
const BASE_URL: &str = "https://heavyfetish.com";
const BROWSER_UA: &str = "Mozilla/5.0 (X11; Linux x86_64; rv:146.0) Gecko/20100101 Firefox/146.0";
#[derive(Debug, Clone)]
pub struct HeavyfetishProvider {
url: String,
categories: Arc<RwLock<Vec<FilterOption>>>,
tags: Arc<RwLock<Vec<FilterOption>>>,
models: Arc<RwLock<Vec<FilterOption>>>,
uploaders: Arc<RwLock<Vec<FilterOption>>>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum TargetKind {
Archive,
StandardVideos,
MemberVideos,
Search,
}
#[derive(Debug, Clone)]
struct QueryTarget {
kind: TargetKind,
value: String,
}
impl HeavyfetishProvider {
pub fn new() -> Self {
let provider = Self {
url: BASE_URL.to_string(),
categories: Arc::new(RwLock::new(vec![FilterOption {
id: "all".to_string(),
title: "All".to_string(),
}])),
tags: Arc::new(RwLock::new(vec![FilterOption {
id: "all".to_string(),
title: "All".to_string(),
}])),
models: Arc::new(RwLock::new(vec![FilterOption {
id: "all".to_string(),
title: "All".to_string(),
}])),
uploaders: Arc::new(RwLock::new(vec![FilterOption {
id: "all".to_string(),
title: "All".to_string(),
}])),
};
provider.spawn_initial_load();
provider
}
fn spawn_initial_load(&self) {
let url = self.url.clone();
let categories = Arc::clone(&self.categories);
let tags = Arc::clone(&self.tags);
let models = Arc::clone(&self.models);
let uploaders = Arc::clone(&self.uploaders);
thread::spawn(move || {
let runtime = match tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()
{
Ok(runtime) => runtime,
Err(error) => {
report_provider_error_background(
"heavyfetish",
"spawn_initial_load.runtime_build",
&error.to_string(),
);
return;
}
};
runtime.block_on(async move {
if let Err(error) = Self::load_categories(&url, Arc::clone(&categories)).await {
report_provider_error_background(
"heavyfetish",
"load_categories",
&error.to_string(),
);
}
if let Err(error) = Self::load_tags(&url, Arc::clone(&tags)).await {
report_provider_error_background(
"heavyfetish",
"load_tags",
&error.to_string(),
);
}
if let Err(error) = Self::load_models(&url, Arc::clone(&models)).await {
report_provider_error_background(
"heavyfetish",
"load_models",
&error.to_string(),
);
}
if let Err(error) = Self::load_uploaders(&url, Arc::clone(&uploaders)).await {
report_provider_error_background(
"heavyfetish",
"load_uploaders",
&error.to_string(),
);
}
});
});
}
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
let categories = self
.categories
.read()
.map(|value| value.clone())
.unwrap_or_default();
let tags = self
.tags
.read()
.map(|value| value.clone())
.unwrap_or_default();
let models = self
.models
.read()
.map(|value| value.clone())
.unwrap_or_default();
let uploaders = self
.uploaders
.read()
.map(|value| value.clone())
.unwrap_or_default();
Channel {
id: "heavyfetish".to_string(),
name: "HeavyFetish".to_string(),
description:
"HeavyFetish videos, categories, tags, models, and uploader archives.".to_string(),
premium: false,
favicon: "https://www.google.com/s2/favicons?sz=64&domain=heavyfetish.com"
.to_string(),
status: "active".to_string(),
categories: categories.iter().map(|value| value.title.clone()).collect(),
options: vec![
ChannelOption {
id: "sort".to_string(),
title: "Sort".to_string(),
description: "Browse HeavyFetish by archive order.".to_string(),
systemImage: "list.number".to_string(),
colorName: "blue".to_string(),
options: vec![
FilterOption {
id: "new".to_string(),
title: "Latest".to_string(),
},
FilterOption {
id: "popular".to_string(),
title: "Most Popular".to_string(),
},
FilterOption {
id: "rated".to_string(),
title: "Top Rated".to_string(),
},
FilterOption {
id: "longest".to_string(),
title: "Longest".to_string(),
},
FilterOption {
id: "commented".to_string(),
title: "Most Commented".to_string(),
},
FilterOption {
id: "recommended".to_string(),
title: "Most Favorited".to_string(),
},
],
multiSelect: false,
},
ChannelOption {
id: "categories".to_string(),
title: "Categories".to_string(),
description: "Browse a HeavyFetish category archive.".to_string(),
systemImage: "square.grid.2x2".to_string(),
colorName: "orange".to_string(),
options: categories,
multiSelect: false,
},
ChannelOption {
id: "filter".to_string(),
title: "Tags".to_string(),
description: "Browse a HeavyFetish tag archive.".to_string(),
systemImage: "tag.fill".to_string(),
colorName: "green".to_string(),
options: tags,
multiSelect: false,
},
ChannelOption {
id: "stars".to_string(),
title: "Models".to_string(),
description: "Browse a HeavyFetish model archive.".to_string(),
systemImage: "star.fill".to_string(),
colorName: "yellow".to_string(),
options: models,
multiSelect: false,
},
ChannelOption {
id: "sites".to_string(),
title: "Uploaders".to_string(),
description: "Browse a HeavyFetish uploader archive.".to_string(),
systemImage: "person.crop.square".to_string(),
colorName: "purple".to_string(),
options: uploaders,
multiSelect: false,
},
],
nsfw: true,
cacheDuration: Some(1800),
}
}
fn selector(value: &str) -> Result<Selector> {
Selector::parse(value)
.map_err(|error| Error::from(format!("selector `{value}` parse failed: {error}")))
}
fn regex(value: &str) -> Result<Regex> {
Regex::new(value).map_err(|error| Error::from(format!("regex `{value}` failed: {error}")))
}
fn collapse_whitespace(text: &str) -> String {
text.split_whitespace().collect::<Vec<_>>().join(" ")
}
fn decode_html(text: &str) -> String {
decode(text.as_bytes())
.to_string()
.unwrap_or_else(|_| text.to_string())
}
fn text_of(element: &ElementRef<'_>) -> String {
Self::decode_html(&Self::collapse_whitespace(
&element.text().collect::<Vec<_>>().join(" "),
))
}
fn normalize_title(title: &str) -> String {
title
.trim()
.trim_start_matches('#')
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
.to_ascii_lowercase()
}
fn normalize_url(&self, url: &str) -> String {
if url.is_empty() {
return String::new();
}
if url.starts_with("http://") || url.starts_with("https://") {
return url.to_string();
}
if url.starts_with("//") {
return format!("https:{url}");
}
if url.starts_with('/') {
return format!("{}{}", self.url, url);
}
format!("{}/{}", self.url, url.trim_start_matches("./"))
}
fn parse_duration(text: &str) -> u32 {
parse_time_to_seconds(text)
.and_then(|value| u32::try_from(value).ok())
.unwrap_or(0)
}
fn parse_views(text: &str) -> Option<u32> {
let cleaned = text
.replace("views", "")
.replace("view", "")
.replace(' ', "")
.trim()
.to_string();
parse_abbreviated_number(&cleaned)
}
fn parse_percent(text: &str) -> Option<f32> {
text.trim()
.trim_end_matches('%')
.trim()
.parse::<f32>()
.ok()
}
fn sort_param(sort: &str) -> Option<&'static str> {
match sort {
"popular" | "viewed" | "trending" => Some("video_viewed"),
"rated" | "rating" | "top" => Some("rating"),
"longest" | "duration" => Some("duration"),
"commented" | "comments" => Some("most_commented"),
"recommended" | "favorited" | "favourited" => Some("most_favourited"),
_ => None,
}
}
fn append_query_param(url: &str, key: &str, value: &str) -> String {
let separator = if url.contains('?') { "&" } else { "?" };
format!("{url}{separator}{key}={value}")
}
fn encode_search_query(query: &str) -> String {
let mut serializer = url::form_urlencoded::Serializer::new(String::new());
serializer.append_pair("q", query);
serializer
.finish()
.strip_prefix("q=")
.unwrap_or_default()
.to_string()
}
fn push_unique(target: &Arc<RwLock<Vec<FilterOption>>>, item: FilterOption) {
if item.id.is_empty() || item.title.is_empty() {
return;
}
if let Ok(mut values) = target.write() {
if !values.iter().any(|value| value.id == item.id) {
values.push(item);
}
}
}
async fn fetch_html(url: &str) -> Result<String> {
let mut requester = Requester::new();
requester
.get(url, None)
.await
.map_err(|error| Error::from(format!("request failed for {url}: {error}")))
}
async fn load_categories(
base_url: &str,
categories: Arc<RwLock<Vec<FilterOption>>>,
) -> Result<()> {
let html = Self::fetch_html(&format!("{base_url}/categories/")).await?;
let document = Html::parse_document(&html);
let selector = Self::selector("#list_categories_categories_list_items a.item[href]")?;
for element in document.select(&selector) {
let href = element.value().attr("href").unwrap_or_default().to_string();
let title = element
.value()
.attr("title")
.map(str::trim)
.filter(|value| !value.is_empty())
.map(ToOwned::to_owned)
.unwrap_or_else(|| Self::text_of(&element));
let normalized = href.trim_end_matches('/').to_string();
if !normalized.starts_with(&format!("{base_url}/categories/")) || title.is_empty() {
continue;
}
Self::push_unique(
&categories,
FilterOption {
id: format!("{normalized}/"),
title,
},
);
}
Ok(())
}
async fn load_tags(base_url: &str, tags: Arc<RwLock<Vec<FilterOption>>>) -> Result<()> {
let html = Self::fetch_html(&format!("{base_url}/tags/")).await?;
let document = Html::parse_document(&html);
let selector = Self::selector("a[href]")?;
for element in document.select(&selector) {
let href = element.value().attr("href").unwrap_or_default().trim_end_matches('/');
if !href.starts_with(&format!("{base_url}/tags/")) {
continue;
}
let remainder = href
.strip_prefix(&format!("{base_url}/tags/"))
.unwrap_or_default();
if remainder.is_empty() || remainder.contains('/') {
continue;
}
let title = element
.value()
.attr("title")
.map(str::trim)
.filter(|value| !value.is_empty())
.map(ToOwned::to_owned)
.unwrap_or_else(|| Self::text_of(&element));
if title.is_empty() {
continue;
}
Self::push_unique(
&tags,
FilterOption {
id: format!("{href}/"),
title,
},
);
}
Ok(())
}
async fn load_models(base_url: &str, models: Arc<RwLock<Vec<FilterOption>>>) -> Result<()> {
let selector = Self::selector("#list_models_models_list_items a.item[href]")?;
for page in 1..=5 {
let url = if page == 1 {
format!("{base_url}/fetish-models/")
} else {
format!("{base_url}/fetish-models/{page}/")
};
let html = Self::fetch_html(&url).await?;
let document = Html::parse_document(&html);
for element in document.select(&selector) {
let href = element.value().attr("href").unwrap_or_default().trim_end_matches('/');
let title = element
.value()
.attr("title")
.map(str::trim)
.filter(|value| !value.is_empty())
.map(ToOwned::to_owned)
.unwrap_or_else(|| Self::text_of(&element));
if !href.starts_with(&format!("{base_url}/fetish-models/")) || title.is_empty() {
continue;
}
Self::push_unique(
&models,
FilterOption {
id: format!("{href}/"),
title,
},
);
}
}
Ok(())
}
async fn load_uploaders(
base_url: &str,
uploaders: Arc<RwLock<Vec<FilterOption>>>,
) -> Result<()> {
let selector = Self::selector("#list_members_members_items a[href]")?;
let title_selector = Self::selector("strong.title")?;
for page in 1..=5 {
let mut url = format!("{base_url}/members/");
if page > 1 {
url = Self::append_query_param(&url, "from_members", &page.to_string());
}
let html = Self::fetch_html(&url).await?;
let document = Html::parse_document(&html);
for element in document.select(&selector) {
let href = element.value().attr("href").unwrap_or_default().trim_end_matches('/');
if !href.starts_with(&format!("{base_url}/members/")) {
continue;
}
let remainder = href
.strip_prefix(&format!("{base_url}/members/"))
.unwrap_or_default();
if remainder.is_empty() || remainder.contains('/') {
continue;
}
if !remainder.chars().all(|value| value.is_ascii_digit()) {
continue;
}
let title = element
.select(&title_selector)
.next()
.map(|value| Self::text_of(&value))
.filter(|value| !value.is_empty())
.unwrap_or_else(|| Self::text_of(&element));
if title.is_empty() {
continue;
}
Self::push_unique(
&uploaders,
FilterOption {
id: format!("{href}/"),
title,
},
);
}
}
Ok(())
}
fn resolve_option_target(&self, options: &ServerOptions) -> QueryTarget {
if let Some(uploader) = options.sites.as_deref() {
if uploader.starts_with(&self.url) && uploader != "all" {
return QueryTarget {
kind: TargetKind::MemberVideos,
value: uploader.to_string(),
};
}
}
if let Some(model) = options.stars.as_deref() {
if model.starts_with(&self.url) && model != "all" {
return QueryTarget {
kind: TargetKind::StandardVideos,
value: model.to_string(),
};
}
}
if let Some(tag) = options.filter.as_deref() {
if tag.starts_with(&self.url) && tag != "all" {
return QueryTarget {
kind: TargetKind::StandardVideos,
value: tag.to_string(),
};
}
}
if let Some(category) = options.categories.as_deref() {
if category.starts_with(&self.url) && category != "all" {
return QueryTarget {
kind: TargetKind::StandardVideos,
value: category.to_string(),
};
}
}
QueryTarget {
kind: TargetKind::Archive,
value: String::new(),
}
}
fn match_filter(options: &[FilterOption], query: &str, kind: TargetKind) -> Option<QueryTarget> {
let normalized_query = Self::normalize_title(query);
options
.iter()
.find(|value| value.id != "all" && Self::normalize_title(&value.title) == normalized_query)
.map(|value| QueryTarget {
kind,
value: value.id.clone(),
})
}
fn resolve_query_target(&self, query: &str) -> QueryTarget {
if let Ok(uploaders) = self.uploaders.read() {
if let Some(target) = Self::match_filter(&uploaders, query, TargetKind::MemberVideos) {
return target;
}
}
if let Ok(models) = self.models.read() {
if let Some(target) = Self::match_filter(&models, query, TargetKind::StandardVideos) {
return target;
}
}
if let Ok(tags) = self.tags.read() {
if let Some(target) = Self::match_filter(&tags, query, TargetKind::StandardVideos) {
return target;
}
}
if let Ok(categories) = self.categories.read() {
if let Some(target) = Self::match_filter(&categories, query, TargetKind::StandardVideos)
{
return target;
}
}
QueryTarget {
kind: TargetKind::Search,
value: query.to_string(),
}
}
fn build_url_for_target(&self, target: &QueryTarget, page: u16, sort: &str) -> String {
let sort_by = Self::sort_param(sort);
match target.kind {
TargetKind::Archive => {
let mut url = if page > 1 {
format!("{}/fetish-videos/{page}/", self.url)
} else {
format!("{}/fetish-videos/", self.url)
};
if let Some(sort_by) = sort_by {
url = Self::append_query_param(&url, "sort_by", sort_by);
}
url
}
TargetKind::StandardVideos => {
let mut url = target.value.clone();
if page > 1 {
url = Self::append_query_param(&url, "from", &page.to_string());
}
if let Some(sort_by) = sort_by {
url = Self::append_query_param(&url, "sort_by", sort_by);
}
url
}
TargetKind::MemberVideos => {
let mut url = target.value.clone();
if page > 1 {
url = Self::append_query_param(&url, "from_videos", &page.to_string());
}
if let Some(sort_by) = sort_by {
url = Self::append_query_param(&url, "sort_by", sort_by);
}
url
}
TargetKind::Search => {
let encoded_query = Self::encode_search_query(&target.value);
let mut url = format!("{}/search/{encoded_query}/", self.url);
if page > 1 {
url = Self::append_query_param(&url, "from_videos", &page.to_string());
url = Self::append_query_param(&url, "from_albums", &page.to_string());
}
if let Some(sort_by) = sort_by {
url = Self::append_query_param(&url, "sort_by", sort_by);
}
url
}
}
}
fn list_container<'a>(&self, document: &'a Html) -> Result<Option<ElementRef<'a>>> {
for selector_text in [
"#list_videos_fetish_videos_list_items",
"#list_videos_videos_list_search_result_items",
"#list_videos_common_videos_list_items",
"#list_videos_uploaded_videos_items",
] {
let selector = Self::selector(selector_text)?;
if let Some(element) = document.select(&selector).next() {
return Ok(Some(element));
}
}
Ok(None)
}
fn parse_list_videos(&self, html: &str) -> Result<Vec<VideoItem>> {
let document = Html::parse_document(html);
let Some(container) = self.list_container(&document)? else {
return Ok(vec![]);
};
let card_selector = Self::selector("div.item.hf-video-item")?;
let link_selector = Self::selector("a[href*=\"/videos/\"]")?;
let image_selector = Self::selector("img.thumb")?;
let title_selector = Self::selector("strong.title")?;
let duration_selector = Self::selector("div.duration")?;
let rating_selector = Self::selector("div.rating")?;
let views_selector = Self::selector("div.views")?;
let mut items = Vec::new();
for card in container.select(&card_selector) {
let Some(link) = card.select(&link_selector).next() else {
continue;
};
let href = link.value().attr("href").unwrap_or_default();
let page_url = self.normalize_url(href);
let id = page_url
.trim_end_matches('/')
.split('/')
.nth_back(1)
.unwrap_or_default()
.to_string();
if id.is_empty() || page_url.is_empty() {
continue;
}
let image = card.select(&image_selector).next();
let thumb = image
.and_then(|value| {
value
.value()
.attr("data-webp")
.or_else(|| value.value().attr("data-original"))
.or_else(|| value.value().attr("src"))
})
.map(|value| self.normalize_url(value))
.unwrap_or_default();
let preview = image
.and_then(|value| value.value().attr("data-preview"))
.map(|value| self.normalize_url(value));
let title = card
.select(&title_selector)
.next()
.map(|value| Self::text_of(&value))
.filter(|value| !value.is_empty())
.or_else(|| {
link.value()
.attr("title")
.map(Self::decode_html)
.filter(|value| !value.is_empty())
});
let Some(title) = title else {
continue;
};
let duration = card
.select(&duration_selector)
.next()
.map(|value| Self::parse_duration(&Self::text_of(&value)))
.unwrap_or(0);
let rating = card
.select(&rating_selector)
.next()
.and_then(|value| Self::parse_percent(&Self::text_of(&value)));
let views = card
.select(&views_selector)
.next()
.and_then(|value| Self::parse_views(&Self::text_of(&value)));
let mut item = VideoItem::new(
id,
title,
page_url,
"heavyfetish".to_string(),
thumb,
duration,
);
if let Some(preview) = preview {
item = item.preview(preview);
}
if let Some(rating) = rating {
item = item.rating(rating);
}
if let Some(views) = views {
item = item.views(views);
}
items.push(item);
}
Ok(items)
}
fn extract_js_value(block: &str, regex: &Regex) -> Option<String> {
regex
.captures(block)
.and_then(|captures| captures.get(1))
.map(|value| value.as_str().replace("\\/", "/").replace("\\'", "'"))
}
fn quality_from_url(url: &str) -> String {
for quality in ["2160p", "1440p", "1080p", "720p", "480p", "360p", "240p"] {
if url.contains(quality) {
return quality.to_string();
}
}
"480p".to_string()
}
fn build_formats(&self, html: &str, page_url: &str) -> Result<Vec<VideoFormat>> {
let flashvars_regex = Self::regex(r#"(?s)var\s+flashvars\s*=\s*\{(.*?)\};"#)?;
let value_regex = |key: &str| Self::regex(&format!(r#"{key}:\s*'((?:\\'|[^'])*)'"#));
let flashvars = flashvars_regex
.captures(html)
.and_then(|value| value.get(1))
.map(|value| value.as_str().to_string())
.unwrap_or_default();
let mut seen = HashMap::<String, String>::new();
for key in ["video_alt_url2", "video_alt_url", "video_url"] {
let url_regex = value_regex(key)?;
let text_regex = value_regex(&format!("{key}_text"))?;
if let Some(url) = Self::extract_js_value(&flashvars, &url_regex) {
let normalized = self.normalize_url(&url);
let quality = Self::extract_js_value(&flashvars, &text_regex)
.filter(|value| !value.is_empty())
.unwrap_or_else(|| Self::quality_from_url(&normalized));
seen.entry(quality).or_insert(normalized);
}
}
let document = Html::parse_document(html);
let download_selector = Self::selector("#download_popup a[href*=\"/get_file/\"]")?;
for element in document.select(&download_selector) {
let href = element.value().attr("href").unwrap_or_default();
let normalized = self.normalize_url(href);
if normalized.is_empty() {
continue;
}
let quality = Self::quality_from_url(&normalized);
seen.entry(quality).or_insert(normalized);
}
let order = ["2160p", "1440p", "1080p", "720p", "480p", "360p", "240p"];
let mut formats = Vec::new();
for quality in order {
let Some(url) = seen.get(quality) else {
continue;
};
let format = VideoFormat::new(url.clone(), quality.to_string(), "mp4".to_string())
.format_id(quality.to_string())
.http_header("Referer".to_string(), page_url.to_string())
.http_header("User-Agent".to_string(), BROWSER_UA.to_string());
formats.push(format);
}
Ok(formats)
}
fn apply_detail_video(
&self,
mut item: VideoItem,
html: &str,
page_url: &str,
) -> Result<VideoItem> {
let flashvars_regex = Self::regex(r#"(?s)var\s+flashvars\s*=\s*\{(.*?)\};"#)?;
let value_regex = |key: &str| Self::regex(&format!(r#"{key}:\s*'((?:\\'|[^'])*)'"#));
let flashvars = flashvars_regex
.captures(html)
.and_then(|value| value.get(1))
.map(|value| value.as_str().to_string())
.unwrap_or_default();
let title_regex = value_regex("video_title")?;
let category_regex = value_regex("video_categories")?;
let tag_regex = value_regex("video_tags")?;
let model_regex = value_regex("video_models")?;
let preview_regex = value_regex("preview_url")?;
let width_regex = value_regex("player_width")?;
let height_regex = value_regex("player_height")?;
let document = Html::parse_document(html);
let uploader_selector =
Self::selector(".block-details .block-user .username a[href*=\"/members/\"]")?;
let info_span_selector = Self::selector(".block-details .info .item span")?;
let category_selector = Self::selector(".block-details .info a[href*=\"/categories/\"]")?;
let tag_selector = Self::selector(".block-details .info a[href*=\"/tags/\"]")?;
let model_selector =
Self::selector(".block-details .info a[href*=\"/fetish-models/\"]")?;
if let Some(title) = Self::extract_js_value(&flashvars, &title_regex) {
if !title.is_empty() {
item.title = title;
}
}
let formats = self.build_formats(html, page_url)?;
if !formats.is_empty() {
item = item.formats(formats);
}
let uploader_link = document.select(&uploader_selector).next();
let uploader = uploader_link
.as_ref()
.map(|value| Self::text_of(value))
.filter(|value| !value.is_empty());
let uploader_url = uploader_link
.and_then(|value| value.value().attr("href"))
.map(|value| self.normalize_url(value));
if let (Some(name), Some(url)) = (&uploader, &uploader_url) {
Self::push_unique(
&self.uploaders,
FilterOption {
id: url.clone(),
title: name.clone(),
},
);
}
for span in document.select(&info_span_selector) {
let text = Self::text_of(&span);
if let Some(value) = text.strip_prefix("Views:") {
if let Some(views) = Self::parse_views(value) {
item = item.views(views);
}
} else if let Some(value) = text.strip_prefix("Duration:") {
let duration = Self::parse_duration(value.trim());
if duration > 0 {
item.duration = duration;
}
}
}
let mut tags = item.tags.clone().unwrap_or_default();
for element in document.select(&category_selector) {
let title = Self::text_of(&element);
let href = element.value().attr("href").unwrap_or_default();
if title.is_empty() {
continue;
}
tags.push(title.clone());
Self::push_unique(
&self.categories,
FilterOption {
id: self.normalize_url(href),
title,
},
);
}
for element in document.select(&tag_selector) {
let title = Self::text_of(&element);
let href = element.value().attr("href").unwrap_or_default();
if title.is_empty() {
continue;
}
tags.push(title.clone());
Self::push_unique(
&self.tags,
FilterOption {
id: self.normalize_url(href),
title,
},
);
}
for element in document.select(&model_selector) {
let title = Self::text_of(&element);
let href = element.value().attr("href").unwrap_or_default();
if title.is_empty() {
continue;
}
tags.push(title.clone());
Self::push_unique(
&self.models,
FilterOption {
id: self.normalize_url(href),
title,
},
);
}
for raw_group in [category_regex, tag_regex, model_regex] {
if let Some(values) = Self::extract_js_value(&flashvars, &raw_group) {
for value in values.split(',') {
let clean = Self::collapse_whitespace(value.trim());
if !clean.is_empty() {
tags.push(clean);
}
}
}
}
tags.sort();
tags.dedup();
if !tags.is_empty() {
item = item.tags(tags);
}
if let Some(uploader) = uploader {
item = item.uploader(uploader);
}
if let Some(uploader_url) = uploader_url {
item = item.uploader_url(uploader_url);
}
if item.thumb.is_empty() {
if let Some(preview) = Self::extract_js_value(&flashvars, &preview_regex) {
item.thumb = self.normalize_url(&preview);
}
}
if item.aspectRatio.is_none() {
let width = Self::extract_js_value(&flashvars, &width_regex)
.and_then(|value| value.parse::<f32>().ok());
let height = Self::extract_js_value(&flashvars, &height_regex)
.and_then(|value| value.parse::<f32>().ok());
if let (Some(width), Some(height)) = (width, height) {
if height > 0.0 {
item = item.aspect_ratio(width / height);
}
}
}
Ok(item)
}
async fn enrich_video(&self, item: VideoItem, options: &ServerOptions) -> VideoItem {
let page_url = item.url.clone();
let original = item.clone();
let mut requester =
requester_or_default(options, "heavyfetish", "enrich_video.requester_missing");
let html = match requester.get(&page_url, None).await {
Ok(html) => html,
Err(error) => {
report_provider_error_background(
"heavyfetish",
"enrich_video.request",
&format!("url={page_url}; error={error}"),
);
return original;
}
};
match self.apply_detail_video(item, &html, &page_url) {
Ok(item) => item,
Err(error) => {
report_provider_error_background(
"heavyfetish",
"enrich_video.parse",
&format!("url={page_url}; error={error}"),
);
original
}
}
}
async fn fetch_items_for_url(
&self,
cache: VideoCache,
url: String,
per_page_limit: usize,
options: &ServerOptions,
) -> Result<Vec<VideoItem>> {
if let Some((time, items)) = cache.get(&url) {
if time.elapsed().unwrap_or_default().as_secs() < 300 {
return Ok(items.iter().take(per_page_limit).cloned().collect());
}
}
let mut requester =
requester_or_default(options, "heavyfetish", "fetch_items_for_url.requester_missing");
let html = requester
.get(&url, None)
.await
.map_err(|error| Error::from(format!("request failed for {url}: {error}")))?;
let list_items = self.parse_list_videos(&html)?;
if list_items.is_empty() {
return Ok(vec![]);
}
let limited_items = list_items
.into_iter()
.take(per_page_limit.max(1))
.collect::<Vec<_>>();
let items = stream::iter(limited_items.into_iter().map(|item| {
let provider = self.clone();
let options = options.clone();
async move { provider.enrich_video(item, &options).await }
}))
.buffer_unordered(4)
.collect::<Vec<_>>()
.await;
if !items.is_empty() {
cache.insert(url, items.clone());
}
Ok(items)
}
async fn get(
&self,
cache: VideoCache,
page: u16,
sort: &str,
per_page_limit: usize,
options: ServerOptions,
) -> Result<Vec<VideoItem>> {
let target = self.resolve_option_target(&options);
let url = self.build_url_for_target(&target, page, sort);
self.fetch_items_for_url(cache, url, per_page_limit, &options)
.await
}
async fn query(
&self,
cache: VideoCache,
page: u16,
sort: &str,
query: &str,
per_page_limit: usize,
options: ServerOptions,
) -> Result<Vec<VideoItem>> {
let target = self.resolve_query_target(query);
let url = self.build_url_for_target(&target, page, sort);
self.fetch_items_for_url(cache, url, per_page_limit, &options)
.await
}
}
#[async_trait]
impl Provider for HeavyfetishProvider {
async fn get_videos(
&self,
cache: VideoCache,
pool: DbPool,
sort: String,
query: Option<String>,
page: String,
per_page: String,
options: ServerOptions,
) -> Vec<VideoItem> {
let _ = pool;
let page = page.parse::<u16>().unwrap_or(1);
let per_page_limit = per_page.parse::<usize>().unwrap_or(30);
let result = match query {
Some(query) if !query.trim().is_empty() => {
self.query(cache, page, &sort, &query, per_page_limit, options)
.await
}
_ => self.get(cache, page, &sort, per_page_limit, options).await,
};
match result {
Ok(videos) => videos,
Err(error) => {
report_provider_error("heavyfetish", "get_videos", &error.to_string()).await;
vec![]
}
}
}
fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
Some(self.build_channel(clientversion))
}
}
#[cfg(test)]
mod tests {
use super::*;
fn provider() -> HeavyfetishProvider {
HeavyfetishProvider {
url: BASE_URL.to_string(),
categories: Arc::new(RwLock::new(vec![FilterOption {
id: "all".to_string(),
title: "All".to_string(),
}])),
tags: Arc::new(RwLock::new(vec![FilterOption {
id: "all".to_string(),
title: "All".to_string(),
}])),
models: Arc::new(RwLock::new(vec![FilterOption {
id: "all".to_string(),
title: "All".to_string(),
}])),
uploaders: Arc::new(RwLock::new(vec![FilterOption {
id: "all".to_string(),
title: "All".to_string(),
}])),
}
}
#[test]
fn builds_search_page_two_url() {
let provider = provider();
let url = provider.build_url_for_target(
&QueryTarget {
kind: TargetKind::Search,
value: "adriana chechik".to_string(),
},
2,
"rated",
);
assert_eq!(
url,
"https://heavyfetish.com/search/adriana+chechik/?from_videos=2&from_albums=2&sort_by=rating"
);
}
#[test]
fn preserves_list_thumb_when_detail_has_preview_image() {
let provider = provider();
let item = VideoItem::new(
"120660".to_string(),
"Example".to_string(),
"https://heavyfetish.com/videos/120660/example/".to_string(),
"heavyfetish".to_string(),
"https://heavyfetish.com/list-thumb.jpg".to_string(),
0,
)
.preview("https://heavyfetish.com/list-preview.mp4".to_string());
let html = r#"
<script>
var flashvars = {
video_title: 'Example',
preview_url: 'https://heavyfetish.com/detail-preview.jpg',
player_width: '882',
player_height: '500.42553191489'
};
</script>
"#;
let enriched = provider
.apply_detail_video(item, html, "https://heavyfetish.com/videos/120660/example/")
.expect("detail parsing should succeed");
assert_eq!(enriched.thumb, "https://heavyfetish.com/list-thumb.jpg");
assert_eq!(
enriched.preview.as_deref(),
Some("https://heavyfetish.com/list-preview.mp4")
);
}
}