Files
hottub/src/providers/porn4fans.rs
2026-03-21 19:29:30 +00:00

651 lines
21 KiB
Rust

use crate::DbPool;
use crate::api::ClientVersion;
use crate::providers::{Provider, report_provider_error, requester_or_default};
use crate::status::*;
use crate::util::cache::VideoCache;
use crate::util::parse_abbreviated_number;
use crate::util::time::parse_time_to_seconds;
use crate::videos::{ServerOptions, VideoItem};
use async_trait::async_trait;
use error_chain::error_chain;
use futures::future::join_all;
use htmlentity::entity::{ICodedDataTrait, decode};
use regex::Regex;
use scraper::{Html, Selector};
use std::collections::HashSet;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "onlyfans",
tags: &["creator", "premium", "clips"],
};
error_chain! {
foreign_links {
Io(std::io::Error);
HttpRequest(wreq::Error);
}
}
#[derive(Debug, Clone)]
pub struct Porn4fansProvider {
url: String,
}
#[derive(Debug, Clone)]
struct Porn4fansCard {
id: String,
title: String,
page_url: String,
thumb: String,
duration: u32,
views: Option<u32>,
rating: Option<f32>,
}
impl Porn4fansProvider {
pub fn new() -> Self {
Self {
url: "https://www.porn4fans.com".to_string(),
}
}
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
Channel {
id: "porn4fans".to_string(),
name: "Porn4Fans".to_string(),
description: "OnlyFans porn videos.".to_string(),
premium: false,
favicon: "https://www.google.com/s2/favicons?sz=64&domain=www.porn4fans.com"
.to_string(),
status: "active".to_string(),
categories: vec![],
options: vec![],
nsfw: true,
cacheDuration: Some(1800),
}
}
fn sort_by(sort: &str) -> &'static str {
match sort {
"popular" => "video_viewed",
_ => "post_date",
}
}
fn build_latest_url(&self, page: u32, sort: &str) -> String {
format!(
"{}/latest-updates/?mode=async&function=get_block&block_id=custom_list_videos_latest_videos_list&sort_by={}&from={page}",
self.url,
Self::sort_by(sort)
)
}
fn build_latest_headers(&self) -> Vec<(String, String)> {
vec![(
"Referer".to_string(),
format!("{}/latest-updates/", self.url),
)]
}
fn build_search_path_query(query: &str, separator: &str) -> String {
query.split_whitespace().collect::<Vec<_>>().join(separator)
}
fn build_search_url(&self, query: &str, page: u32, sort: &str) -> String {
let query_param = Self::build_search_path_query(query, "+");
let path_query = Self::build_search_path_query(query, "-");
format!(
"{}/search/{path_query}/?mode=async&function=get_block&block_id=custom_list_videos_videos_list_search_result&q={query_param}&sort_by={}&from_videos={page}",
self.url,
Self::sort_by(sort)
)
}
fn build_search_headers(&self, query: &str) -> Vec<(String, String)> {
let path_query = Self::build_search_path_query(query, "-");
vec![(
"Referer".to_string(),
format!("{}/search/{path_query}/", self.url),
)]
}
async fn get(
&self,
cache: VideoCache,
page: u32,
sort: &str,
options: ServerOptions,
) -> Result<Vec<VideoItem>> {
let video_url = self.build_latest_url(page, sort);
let old_items = match cache.get(&video_url) {
Some((time, items)) => {
if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 {
return Ok(items.clone());
}
items.clone()
}
None => vec![],
};
let mut requester =
requester_or_default(&options, "porn4fans", "porn4fans.get.missing_requester");
let text = match requester
.get_with_headers(&video_url, self.build_latest_headers(), None)
.await
{
Ok(text) => text,
Err(e) => {
report_provider_error(
"porn4fans",
"get.request",
&format!("url={video_url}; error={e}"),
)
.await;
return Ok(old_items);
}
};
if text.trim().is_empty() {
report_provider_error(
"porn4fans",
"get.empty_response",
&format!("url={video_url}"),
)
.await;
return Ok(old_items);
}
let video_items = self.get_video_items_from_html(text, requester).await;
if !video_items.is_empty() {
cache.remove(&video_url);
cache.insert(video_url.clone(), video_items.clone());
return Ok(video_items);
}
Ok(old_items)
}
async fn query(
&self,
cache: VideoCache,
page: u32,
query: &str,
sort: &str,
options: ServerOptions,
) -> Result<Vec<VideoItem>> {
let video_url = self.build_search_url(query, page, sort);
let old_items = match cache.get(&video_url) {
Some((time, items)) => {
if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 {
return Ok(items.clone());
}
items.clone()
}
None => vec![],
};
let mut requester =
requester_or_default(&options, "porn4fans", "porn4fans.query.missing_requester");
let text = match requester
.get_with_headers(&video_url, self.build_search_headers(query), None)
.await
{
Ok(text) => text,
Err(e) => {
report_provider_error(
"porn4fans",
"query.request",
&format!("url={video_url}; error={e}"),
)
.await;
return Ok(old_items);
}
};
if text.trim().is_empty() {
report_provider_error(
"porn4fans",
"query.empty_response",
&format!("url={video_url}"),
)
.await;
return Ok(old_items);
}
let video_items = self.get_video_items_from_html(text, requester).await;
if !video_items.is_empty() {
cache.remove(&video_url);
cache.insert(video_url.clone(), video_items.clone());
return Ok(video_items);
}
Ok(old_items)
}
fn extract_between<'a>(text: &'a str, start: &str, end: &str) -> Option<&'a str> {
text.split(start).nth(1)?.split(end).next()
}
fn first_non_empty_attr(segment: &str, attrs: &[&str]) -> Option<String> {
attrs.iter().find_map(|attr| {
Self::extract_between(segment, attr, "\"")
.map(str::trim)
.filter(|value| !value.is_empty())
.map(ToString::to_string)
})
}
fn normalize_url(&self, url: &str) -> String {
if url.starts_with("http://") || url.starts_with("https://") {
return url.to_string();
}
if url.starts_with("//") {
return format!("https:{url}");
}
if url.starts_with('/') {
return format!("{}{}", self.url, url);
}
format!("{}/{}", self.url, url.trim_start_matches("./"))
}
fn extract_thumb_url(&self, segment: &str) -> String {
let thumb_raw = Self::first_non_empty_attr(
segment,
&[
"data-original=\"",
"data-webp=\"",
"srcset=\"",
"src=\"",
"poster=\"",
],
)
.unwrap_or_default();
if thumb_raw.starts_with("data:image/") {
return String::new();
}
self.normalize_url(&thumb_raw)
}
fn decode_escaped_text(text: &str) -> String {
text.replace("\\/", "/").replace("&amp;", "&")
}
fn decode_html_text(text: &str) -> String {
decode(text.as_bytes())
.to_string()
.unwrap_or_else(|_| text.to_string())
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
.trim()
.to_string()
}
fn strip_tags(text: &str) -> String {
Regex::new(r"(?is)<[^>]+>")
.ok()
.map(|regex| regex.replace_all(text, "").to_string())
.unwrap_or_else(|| text.to_string())
}
fn push_unique_tag(values: &mut Vec<String>, value: String) {
let value = value.trim().to_string();
if value.is_empty()
|| values
.iter()
.any(|existing| existing.eq_ignore_ascii_case(&value))
{
return;
}
values.push(value);
}
fn extract_views(text: &str) -> Option<u32> {
Regex::new(r"(?i)<svg[^>]+icon-eye[^>]*>.*?</svg>\s*<span>([^<]+)</span>")
.ok()
.and_then(|re| re.captures(text))
.and_then(|caps| caps.get(1))
.and_then(|m| parse_abbreviated_number(m.as_str().trim()))
}
fn extract_rating(text: &str) -> Option<f32> {
Regex::new(r"(?i)<svg[^>]+icon-like[^>]*>.*?</svg>\s*<span>([^<%]+)%</span>")
.ok()
.and_then(|re| re.captures(text))
.and_then(|caps| caps.get(1))
.and_then(|m| m.as_str().trim().parse::<f32>().ok())
}
fn extract_direct_video_url_from_page(text: &str) -> Option<String> {
let decoded = Self::decode_escaped_text(text);
for key in ["video_url", "video_alt_url", "contentUrl"] {
let pattern = format!(
r#"(?is)(?:^|[{{\s,])["']?{}["']?\s*[:=]\s*["'](?P<url>https?://[^"'<>]+?\.mp4)"#,
regex::escape(key)
);
let regex = Regex::new(&pattern).ok()?;
if let Some(url) = regex
.captures(&decoded)
.and_then(|captures| captures.name("url"))
.map(|value| value.as_str().to_string())
{
return Some(url);
}
}
None
}
fn collect_texts(document: &Html, selector: &str) -> Vec<String> {
let Ok(selector) = Selector::parse(selector) else {
return vec![];
};
let mut values = Vec::new();
for element in document.select(&selector) {
let raw_text = element.text().collect::<Vec<_>>().join(" ");
let cleaned = Self::decode_html_text(&Self::strip_tags(&raw_text));
Self::push_unique_tag(&mut values, cleaned);
}
values
}
fn extract_page_models_and_categories(text: &str) -> (Vec<String>, Vec<String>) {
let document = Html::parse_document(text);
let models = Self::collect_texts(&document, ".player-models-list a[href*=\"/models/\"]");
let mut categories =
Self::collect_texts(&document, ".categories-row a[href*=\"/categories/\"]");
for value in Self::collect_texts(&document, ".tags-row a[href*=\"/tags/\"]") {
Self::push_unique_tag(&mut categories, value);
}
(models, categories)
}
fn parse_video_cards_from_html(&self, html: &str) -> Vec<Porn4fansCard> {
if html.trim().is_empty() {
return vec![];
}
let Ok(link_re) = Regex::new(
r#"(?is)<a[^>]+class="item-link"[^>]+href="(?P<href>[^"]+/video/(?P<id>\d+)/[^"]+)"[^>]+title="(?P<title>[^"]+)"[^>]*>(?P<body>.*?)</a>"#,
) else {
return vec![];
};
let mut items = Vec::new();
let mut seen = HashSet::new();
for captures in link_re.captures_iter(html) {
let Some(id) = captures.name("id").map(|m| m.as_str().to_string()) else {
continue;
};
if !seen.insert(id.clone()) {
continue;
}
let href = captures
.name("href")
.map(|m| self.normalize_url(m.as_str()))
.unwrap_or_default();
let title_raw = captures
.name("title")
.map(|m| m.as_str())
.unwrap_or_default();
let title = decode(title_raw.as_bytes())
.to_string()
.unwrap_or_else(|_| title_raw.to_string());
let body = captures
.name("body")
.map(|m| m.as_str())
.unwrap_or_default();
let thumb = self.extract_thumb_url(body);
let duration_raw = Self::extract_between(body, "<div class=\"duration\">", "<")
.unwrap_or_default()
.trim()
.to_string();
let duration = parse_time_to_seconds(&duration_raw).unwrap_or(0) as u32;
let views = Self::extract_views(body).unwrap_or(0);
let rating = Self::extract_rating(body);
items.push(Porn4fansCard {
id,
title,
page_url: href,
thumb,
duration,
views: (views > 0).then_some(views),
rating,
});
}
items
}
async fn enrich_video_card(
&self,
card: Porn4fansCard,
mut requester: crate::util::requester::Requester,
) -> VideoItem {
let direct_url = requester
.get_with_headers(
&card.page_url,
vec![("Referer".to_string(), format!("{}/", self.url))],
None,
)
.await
.ok();
let (direct_url, models, categories) = match direct_url {
Some(text) => {
let url = Self::extract_direct_video_url_from_page(&text)
.unwrap_or_else(|| card.page_url.clone());
let (models, categories) = Self::extract_page_models_and_categories(&text);
(url, models, categories)
}
None => (card.page_url.clone(), vec![], vec![]),
};
let mut item = VideoItem::new(
card.id,
card.title,
direct_url,
"porn4fans".to_string(),
card.thumb,
card.duration,
);
if let Some(views) = card.views {
item = item.views(views);
}
if let Some(rating) = card.rating {
item = item.rating(rating);
}
if let Some(model) = models.first() {
item = item.uploader(model.clone());
}
item = item.tags(categories);
item
}
async fn get_video_items_from_html(
&self,
html: String,
requester: crate::util::requester::Requester,
) -> Vec<VideoItem> {
let cards = self.parse_video_cards_from_html(&html);
let futures = cards
.into_iter()
.map(|card| self.enrich_video_card(card, requester.clone()));
join_all(futures).await
}
}
#[async_trait]
impl Provider for Porn4fansProvider {
async fn get_videos(
&self,
cache: VideoCache,
pool: DbPool,
sort: String,
query: Option<String>,
page: String,
per_page: String,
options: ServerOptions,
) -> Vec<VideoItem> {
let _ = pool;
let _ = per_page;
let page = page.parse::<u32>().unwrap_or(1);
let videos = match query {
Some(query) if !query.trim().is_empty() => {
self.query(cache, page, &query, &sort, options).await
}
_ => self.get(cache, page, &sort, options).await,
};
match videos {
Ok(videos) => videos,
Err(e) => {
report_provider_error(
"porn4fans",
"get_videos",
&format!("page={page}; error={e}"),
)
.await;
vec![]
}
}
}
fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
Some(self.build_channel(clientversion))
}
}
#[cfg(test)]
mod tests {
use super::Porn4fansProvider;
#[test]
fn builds_latest_url_with_custom_block_id() {
let provider = Porn4fansProvider::new();
assert_eq!(
provider.build_latest_url(2, "new"),
"https://www.porn4fans.com/latest-updates/?mode=async&function=get_block&block_id=custom_list_videos_latest_videos_list&sort_by=post_date&from=2"
);
}
#[test]
fn builds_search_url_with_custom_block_id() {
let provider = Porn4fansProvider::new();
assert_eq!(
provider.build_search_url("big black cock", 3, "popular"),
"https://www.porn4fans.com/search/big-black-cock/?mode=async&function=get_block&block_id=custom_list_videos_videos_list_search_result&q=big+black+cock&sort_by=video_viewed&from_videos=3"
);
}
#[test]
fn parses_porn4fans_search_markup() {
let provider = Porn4fansProvider::new();
let html = r##"
<div class="thumbs second grid-1" id="custom_list_videos_videos_list_search_result_items">
<div class="item">
<a class="item-link" href="https://www.porn4fans.com/video/10194/horny-police-officer-melztube-gets-banged-by-bbc/" title="Horny Police Officer Melztube Gets Banged By BBC">
<div class="img-wrap">
<div class="duration">23:47</div>
<picture>
<source srcset="https://www.porn4fans.com/contents/videos_screenshots/10000/10194/800x450/1.jpg" type="image/webp">
<img class="thumb lazy-load" src="data:image/gif;base64,AAAA" data-original="https://www.porn4fans.com/contents/videos_screenshots/10000/10194/800x450/1.jpg" data-webp="https://www.porn4fans.com/contents/videos_screenshots/10000/10194/800x450/1.jpg" data-preview="https://www.porn4fans.com/get_file/3/9df8de1fc2da5dfcbf9a4ad512dc8f306c4997e60f/10000/10194/10194_preview_high.mp4/" alt="Horny Police Officer Melztube Gets Banged By BBC" />
</picture>
</div>
<div class="video-text">Horny Police Officer Melztube Gets Banged By BBC</div>
<ul class="video-items">
<li class="video-item">
<svg class="svg-icon icon-eye"><use xlink:href="#icon-eye"></use></svg>
<span>14K</span>
</li>
<li class="video-item rating">
<svg class="svg-icon icon-like"><use xlink:href="#icon-like"></use></svg>
<span>66%</span>
</li>
<li class="video-item">
<span>2 weeks ago</span>
</li>
</ul>
</a>
</div>
</div>
"##;
let items = provider.parse_video_cards_from_html(html);
assert_eq!(items.len(), 1);
assert_eq!(items[0].id, "10194");
assert_eq!(
items[0].page_url,
"https://www.porn4fans.com/video/10194/horny-police-officer-melztube-gets-banged-by-bbc/"
);
assert_eq!(
items[0].thumb,
"https://www.porn4fans.com/contents/videos_screenshots/10000/10194/800x450/1.jpg"
);
assert_eq!(items[0].duration, 1427);
assert_eq!(items[0].views, Some(14_000));
assert_eq!(items[0].rating, Some(66.0));
}
#[test]
fn extracts_direct_video_url_from_video_page() {
let html = r#"
<script>
var flashvars = {
video_url: 'https:\/\/www.porn4fans.com\/get_file\/3\/9df8de1fc2da5dfcbf9a4ad512dc8f306c4997e60f\/10000\/10951\/10951.mp4\/',
video_alt_url: 'https:\/\/www.porn4fans.com\/get_file\/3\/9df8de1fc2da5dfcbf9a4ad512dc8f306c4997e60f\/10000\/10951\/10951_720p.mp4\/'
};
</script>
"#;
assert_eq!(
Porn4fansProvider::extract_direct_video_url_from_page(html).as_deref(),
Some(
"https://www.porn4fans.com/get_file/3/9df8de1fc2da5dfcbf9a4ad512dc8f306c4997e60f/10000/10951/10951.mp4"
)
);
}
#[test]
fn extracts_models_and_categories_from_video_page() {
let html = r#"
<div class="player-models-list">
<div class="player-model-item">
<a href="/models/piper-rockelle/"><span class="player-model-name">Piper Rockelle</span></a>
</div>
</div>
<ul class="categories-row">
<li class="visible"><a href="/categories/striptease/">Striptease</a></li>
<li class="visible"><a href="/categories/teen/">Teen</a></li>
</ul>
<ul class="tags-row">
<li class="visible"><a href="/tags/bathroom/">Bathroom</a></li>
</ul>
"#;
let (models, categories) = Porn4fansProvider::extract_page_models_and_categories(html);
assert_eq!(models, vec!["Piper Rockelle".to_string()]);
assert_eq!(
categories,
vec![
"Striptease".to_string(),
"Teen".to_string(),
"Bathroom".to_string()
]
);
}
}