use crate::DbPool; use crate::api::ClientVersion; use crate::providers::{Provider, report_provider_error, requester_or_default}; use crate::status::*; use crate::util::cache::VideoCache; use crate::util::parse_abbreviated_number; use crate::util::time::parse_time_to_seconds; use crate::videos::{ServerOptions, VideoItem}; use async_trait::async_trait; use error_chain::error_chain; use futures::future::join_all; use htmlentity::entity::{ICodedDataTrait, decode}; use regex::Regex; use scraper::{Html, Selector}; use std::collections::HashSet; pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata = crate::providers::ProviderChannelMetadata { group_id: "onlyfans", tags: &["creator", "premium", "clips"], }; error_chain! { foreign_links { Io(std::io::Error); HttpRequest(wreq::Error); } } #[derive(Debug, Clone)] pub struct Porn4fansProvider { url: String, } #[derive(Debug, Clone)] struct Porn4fansCard { id: String, title: String, page_url: String, thumb: String, duration: u32, views: Option, rating: Option, } impl Porn4fansProvider { pub fn new() -> Self { Self { url: "https://www.porn4fans.com".to_string(), } } fn build_channel(&self, _clientversion: ClientVersion) -> Channel { Channel { id: "porn4fans".to_string(), name: "Porn4Fans".to_string(), description: "OnlyFans porn videos.".to_string(), premium: false, favicon: "https://www.google.com/s2/favicons?sz=64&domain=www.porn4fans.com" .to_string(), status: "active".to_string(), categories: vec![], options: vec![], nsfw: true, cacheDuration: Some(1800), } } fn sort_by(sort: &str) -> &'static str { match sort { "popular" => "video_viewed", _ => "post_date", } } fn build_latest_url(&self, page: u32, sort: &str) -> String { format!( "{}/latest-updates/?mode=async&function=get_block&block_id=custom_list_videos_latest_videos_list&sort_by={}&from={page}", self.url, Self::sort_by(sort) ) } fn build_latest_headers(&self) -> Vec<(String, String)> { vec![( "Referer".to_string(), format!("{}/latest-updates/", self.url), )] } fn build_search_path_query(query: &str, separator: &str) -> String { query.split_whitespace().collect::>().join(separator) } fn build_search_url(&self, query: &str, page: u32, sort: &str) -> String { let query_param = Self::build_search_path_query(query, "+"); let path_query = Self::build_search_path_query(query, "-"); format!( "{}/search/{path_query}/?mode=async&function=get_block&block_id=custom_list_videos_videos_list_search_result&q={query_param}&sort_by={}&from_videos={page}", self.url, Self::sort_by(sort) ) } fn build_search_headers(&self, query: &str) -> Vec<(String, String)> { let path_query = Self::build_search_path_query(query, "-"); vec![( "Referer".to_string(), format!("{}/search/{path_query}/", self.url), )] } async fn get( &self, cache: VideoCache, page: u32, sort: &str, options: ServerOptions, ) -> Result> { let video_url = self.build_latest_url(page, sort); let old_items = match cache.get(&video_url) { Some((time, items)) => { if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 { return Ok(items.clone()); } items.clone() } None => vec![], }; let mut requester = requester_or_default(&options, "porn4fans", "porn4fans.get.missing_requester"); let text = match requester .get_with_headers(&video_url, self.build_latest_headers(), None) .await { Ok(text) => text, Err(e) => { report_provider_error( "porn4fans", "get.request", &format!("url={video_url}; error={e}"), ) .await; return Ok(old_items); } }; if text.trim().is_empty() { report_provider_error( "porn4fans", "get.empty_response", &format!("url={video_url}"), ) .await; return Ok(old_items); } let video_items = self.get_video_items_from_html(text, requester).await; if !video_items.is_empty() { cache.remove(&video_url); cache.insert(video_url.clone(), video_items.clone()); return Ok(video_items); } Ok(old_items) } async fn query( &self, cache: VideoCache, page: u32, query: &str, sort: &str, options: ServerOptions, ) -> Result> { let video_url = self.build_search_url(query, page, sort); let old_items = match cache.get(&video_url) { Some((time, items)) => { if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 { return Ok(items.clone()); } items.clone() } None => vec![], }; let mut requester = requester_or_default(&options, "porn4fans", "porn4fans.query.missing_requester"); let text = match requester .get_with_headers(&video_url, self.build_search_headers(query), None) .await { Ok(text) => text, Err(e) => { report_provider_error( "porn4fans", "query.request", &format!("url={video_url}; error={e}"), ) .await; return Ok(old_items); } }; if text.trim().is_empty() { report_provider_error( "porn4fans", "query.empty_response", &format!("url={video_url}"), ) .await; return Ok(old_items); } let video_items = self.get_video_items_from_html(text, requester).await; if !video_items.is_empty() { cache.remove(&video_url); cache.insert(video_url.clone(), video_items.clone()); return Ok(video_items); } Ok(old_items) } fn extract_between<'a>(text: &'a str, start: &str, end: &str) -> Option<&'a str> { text.split(start).nth(1)?.split(end).next() } fn first_non_empty_attr(segment: &str, attrs: &[&str]) -> Option { attrs.iter().find_map(|attr| { Self::extract_between(segment, attr, "\"") .map(str::trim) .filter(|value| !value.is_empty()) .map(ToString::to_string) }) } fn normalize_url(&self, url: &str) -> String { if url.starts_with("http://") || url.starts_with("https://") { return url.to_string(); } if url.starts_with("//") { return format!("https:{url}"); } if url.starts_with('/') { return format!("{}{}", self.url, url); } format!("{}/{}", self.url, url.trim_start_matches("./")) } fn extract_thumb_url(&self, segment: &str) -> String { let thumb_raw = Self::first_non_empty_attr( segment, &[ "data-original=\"", "data-webp=\"", "srcset=\"", "src=\"", "poster=\"", ], ) .unwrap_or_default(); if thumb_raw.starts_with("data:image/") { return String::new(); } self.normalize_url(&thumb_raw) } fn decode_escaped_text(text: &str) -> String { text.replace("\\/", "/").replace("&", "&") } fn decode_html_text(text: &str) -> String { decode(text.as_bytes()) .to_string() .unwrap_or_else(|_| text.to_string()) .split_whitespace() .collect::>() .join(" ") .trim() .to_string() } fn strip_tags(text: &str) -> String { Regex::new(r"(?is)<[^>]+>") .ok() .map(|regex| regex.replace_all(text, "").to_string()) .unwrap_or_else(|| text.to_string()) } fn push_unique_tag(values: &mut Vec, value: String) { let value = value.trim().to_string(); if value.is_empty() || values .iter() .any(|existing| existing.eq_ignore_ascii_case(&value)) { return; } values.push(value); } fn extract_views(text: &str) -> Option { Regex::new(r"(?i)]+icon-eye[^>]*>.*?\s*([^<]+)") .ok() .and_then(|re| re.captures(text)) .and_then(|caps| caps.get(1)) .and_then(|m| parse_abbreviated_number(m.as_str().trim())) } fn extract_rating(text: &str) -> Option { Regex::new(r"(?i)]+icon-like[^>]*>.*?\s*([^<%]+)%") .ok() .and_then(|re| re.captures(text)) .and_then(|caps| caps.get(1)) .and_then(|m| m.as_str().trim().parse::().ok()) } fn extract_direct_video_url_from_page(text: &str) -> Option { let decoded = Self::decode_escaped_text(text); for key in ["video_url", "video_alt_url", "contentUrl"] { let pattern = format!( r#"(?is)(?:^|[{{\s,])["']?{}["']?\s*[:=]\s*["'](?Phttps?://[^"'<>]+?\.mp4)"#, regex::escape(key) ); let regex = Regex::new(&pattern).ok()?; if let Some(url) = regex .captures(&decoded) .and_then(|captures| captures.name("url")) .map(|value| value.as_str().to_string()) { return Some(url); } } None } fn collect_texts(document: &Html, selector: &str) -> Vec { let Ok(selector) = Selector::parse(selector) else { return vec![]; }; let mut values = Vec::new(); for element in document.select(&selector) { let raw_text = element.text().collect::>().join(" "); let cleaned = Self::decode_html_text(&Self::strip_tags(&raw_text)); Self::push_unique_tag(&mut values, cleaned); } values } fn extract_page_models_and_categories(text: &str) -> (Vec, Vec) { let document = Html::parse_document(text); let models = Self::collect_texts(&document, ".player-models-list a[href*=\"/models/\"]"); let mut categories = Self::collect_texts(&document, ".categories-row a[href*=\"/categories/\"]"); for value in Self::collect_texts(&document, ".tags-row a[href*=\"/tags/\"]") { Self::push_unique_tag(&mut categories, value); } (models, categories) } fn parse_video_cards_from_html(&self, html: &str) -> Vec { if html.trim().is_empty() { return vec![]; } let Ok(link_re) = Regex::new( r#"(?is)]+class="item-link"[^>]+href="(?P[^"]+/video/(?P\d+)/[^"]+)"[^>]+title="(?P[^"]+)"[^>]*>(?P<body>.*?)</a>"#, ) else { return vec![]; }; let mut items = Vec::new(); let mut seen = HashSet::new(); for captures in link_re.captures_iter(html) { let Some(id) = captures.name("id").map(|m| m.as_str().to_string()) else { continue; }; if !seen.insert(id.clone()) { continue; } let href = captures .name("href") .map(|m| self.normalize_url(m.as_str())) .unwrap_or_default(); let title_raw = captures .name("title") .map(|m| m.as_str()) .unwrap_or_default(); let title = decode(title_raw.as_bytes()) .to_string() .unwrap_or_else(|_| title_raw.to_string()); let body = captures .name("body") .map(|m| m.as_str()) .unwrap_or_default(); let thumb = self.extract_thumb_url(body); let duration_raw = Self::extract_between(body, "<div class=\"duration\">", "<") .unwrap_or_default() .trim() .to_string(); let duration = parse_time_to_seconds(&duration_raw).unwrap_or(0) as u32; let views = Self::extract_views(body).unwrap_or(0); let rating = Self::extract_rating(body); items.push(Porn4fansCard { id, title, page_url: href, thumb, duration, views: (views > 0).then_some(views), rating, }); } items } async fn enrich_video_card( &self, card: Porn4fansCard, mut requester: crate::util::requester::Requester, ) -> VideoItem { let direct_url = requester .get_with_headers( &card.page_url, vec![("Referer".to_string(), format!("{}/", self.url))], None, ) .await .ok(); let (direct_url, models, categories) = match direct_url { Some(text) => { let url = Self::extract_direct_video_url_from_page(&text) .unwrap_or_else(|| card.page_url.clone()); let (models, categories) = Self::extract_page_models_and_categories(&text); (url, models, categories) } None => (card.page_url.clone(), vec![], vec![]), }; let mut item = VideoItem::new( card.id, card.title, direct_url, "porn4fans".to_string(), card.thumb, card.duration, ); if let Some(views) = card.views { item = item.views(views); } if let Some(rating) = card.rating { item = item.rating(rating); } if let Some(model) = models.first() { item = item.uploader(model.clone()); } item = item.tags(categories); item } async fn get_video_items_from_html( &self, html: String, requester: crate::util::requester::Requester, ) -> Vec<VideoItem> { let cards = self.parse_video_cards_from_html(&html); let futures = cards .into_iter() .map(|card| self.enrich_video_card(card, requester.clone())); join_all(futures).await } } #[async_trait] impl Provider for Porn4fansProvider { async fn get_videos( &self, cache: VideoCache, pool: DbPool, sort: String, query: Option<String>, page: String, per_page: String, options: ServerOptions, ) -> Vec<VideoItem> { let _ = pool; let _ = per_page; let page = page.parse::<u32>().unwrap_or(1); let videos = match query { Some(query) if !query.trim().is_empty() => { self.query(cache, page, &query, &sort, options).await } _ => self.get(cache, page, &sort, options).await, }; match videos { Ok(videos) => videos, Err(e) => { report_provider_error( "porn4fans", "get_videos", &format!("page={page}; error={e}"), ) .await; vec![] } } } fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> { Some(self.build_channel(clientversion)) } } #[cfg(test)] mod tests { use super::Porn4fansProvider; #[test] fn builds_latest_url_with_custom_block_id() { let provider = Porn4fansProvider::new(); assert_eq!( provider.build_latest_url(2, "new"), "https://www.porn4fans.com/latest-updates/?mode=async&function=get_block&block_id=custom_list_videos_latest_videos_list&sort_by=post_date&from=2" ); } #[test] fn builds_search_url_with_custom_block_id() { let provider = Porn4fansProvider::new(); assert_eq!( provider.build_search_url("big black cock", 3, "popular"), "https://www.porn4fans.com/search/big-black-cock/?mode=async&function=get_block&block_id=custom_list_videos_videos_list_search_result&q=big+black+cock&sort_by=video_viewed&from_videos=3" ); } #[test] fn parses_porn4fans_search_markup() { let provider = Porn4fansProvider::new(); let html = r##" <div class="thumbs second grid-1" id="custom_list_videos_videos_list_search_result_items"> <div class="item"> <a class="item-link" href="https://www.porn4fans.com/video/10194/horny-police-officer-melztube-gets-banged-by-bbc/" title="Horny Police Officer Melztube Gets Banged By BBC"> <div class="img-wrap"> <div class="duration">23:47</div> <picture> <source srcset="https://www.porn4fans.com/contents/videos_screenshots/10000/10194/800x450/1.jpg" type="image/webp"> <img class="thumb lazy-load" src="data:image/gif;base64,AAAA" data-original="https://www.porn4fans.com/contents/videos_screenshots/10000/10194/800x450/1.jpg" data-webp="https://www.porn4fans.com/contents/videos_screenshots/10000/10194/800x450/1.jpg" data-preview="https://www.porn4fans.com/get_file/3/9df8de1fc2da5dfcbf9a4ad512dc8f306c4997e60f/10000/10194/10194_preview_high.mp4/" alt="Horny Police Officer Melztube Gets Banged By BBC" /> </picture> </div> <div class="video-text">Horny Police Officer Melztube Gets Banged By BBC</div> <ul class="video-items"> <li class="video-item"> <svg class="svg-icon icon-eye"><use xlink:href="#icon-eye"></use></svg> <span>14K</span> </li> <li class="video-item rating"> <svg class="svg-icon icon-like"><use xlink:href="#icon-like"></use></svg> <span>66%</span> </li> <li class="video-item"> <span>2 weeks ago</span> </li> </ul> </a> </div> </div> "##; let items = provider.parse_video_cards_from_html(html); assert_eq!(items.len(), 1); assert_eq!(items[0].id, "10194"); assert_eq!( items[0].page_url, "https://www.porn4fans.com/video/10194/horny-police-officer-melztube-gets-banged-by-bbc/" ); assert_eq!( items[0].thumb, "https://www.porn4fans.com/contents/videos_screenshots/10000/10194/800x450/1.jpg" ); assert_eq!(items[0].duration, 1427); assert_eq!(items[0].views, Some(14_000)); assert_eq!(items[0].rating, Some(66.0)); } #[test] fn extracts_direct_video_url_from_video_page() { let html = r#" <script> var flashvars = { video_url: 'https:\/\/www.porn4fans.com\/get_file\/3\/9df8de1fc2da5dfcbf9a4ad512dc8f306c4997e60f\/10000\/10951\/10951.mp4\/', video_alt_url: 'https:\/\/www.porn4fans.com\/get_file\/3\/9df8de1fc2da5dfcbf9a4ad512dc8f306c4997e60f\/10000\/10951\/10951_720p.mp4\/' }; </script> "#; assert_eq!( Porn4fansProvider::extract_direct_video_url_from_page(html).as_deref(), Some( "https://www.porn4fans.com/get_file/3/9df8de1fc2da5dfcbf9a4ad512dc8f306c4997e60f/10000/10951/10951.mp4" ) ); } #[test] fn extracts_models_and_categories_from_video_page() { let html = r#" <div class="player-models-list"> <div class="player-model-item"> <a href="/models/piper-rockelle/"><span class="player-model-name">Piper Rockelle</span></a> </div> </div> <ul class="categories-row"> <li class="visible"><a href="/categories/striptease/">Striptease</a></li> <li class="visible"><a href="/categories/teen/">Teen</a></li> </ul> <ul class="tags-row"> <li class="visible"><a href="/tags/bathroom/">Bathroom</a></li> </ul> "#; let (models, categories) = Porn4fansProvider::extract_page_models_and_categories(html); assert_eq!(models, vec!["Piper Rockelle".to_string()]); assert_eq!( categories, vec![ "Striptease".to_string(), "Teen".to_string(), "Bathroom".to_string() ] ); } }