diff --git a/src/providers/mod.rs b/src/providers/mod.rs index 72024d8..3bc53c8 100644 --- a/src/providers/mod.rs +++ b/src/providers/mod.rs @@ -33,6 +33,7 @@ pub mod missav; pub mod omgxxx; pub mod paradisehill; pub mod porn00; +pub mod porn4fans; pub mod pornzog; pub mod sxyprn; pub mod tnaflix; @@ -123,6 +124,10 @@ pub static ALL_PROVIDERS: Lazy> = Lazy::new(| "paradisehill", Arc::new(paradisehill::ParadisehillProvider::new()) as DynProvider, ); + m.insert( + "porn4fans", + Arc::new(porn4fans::Porn4fansProvider::new()) as DynProvider, + ); m.insert( "pornzog", Arc::new(pornzog::PornzogProvider::new()) as DynProvider, diff --git a/src/providers/porn4fans.rs b/src/providers/porn4fans.rs new file mode 100644 index 0000000..fd00a32 --- /dev/null +++ b/src/providers/porn4fans.rs @@ -0,0 +1,441 @@ +use crate::DbPool; +use crate::api::ClientVersion; +use crate::providers::{Provider, report_provider_error, requester_or_default}; +use crate::status::*; +use crate::util::cache::VideoCache; +use crate::util::parse_abbreviated_number; +use crate::util::time::parse_time_to_seconds; +use crate::videos::{ServerOptions, VideoItem}; +use async_trait::async_trait; +use error_chain::error_chain; +use htmlentity::entity::{ICodedDataTrait, decode}; +use regex::Regex; +use std::collections::HashSet; + +error_chain! { + foreign_links { + Io(std::io::Error); + HttpRequest(wreq::Error); + } +} + +#[derive(Debug, Clone)] +pub struct Porn4fansProvider { + url: String, +} + +impl Porn4fansProvider { + pub fn new() -> Self { + Self { + url: "https://www.porn4fans.com".to_string(), + } + } + + fn build_channel(&self, _clientversion: ClientVersion) -> Channel { + Channel { + id: "porn4fans".to_string(), + name: "Porn4Fans".to_string(), + description: "OnlyFans porn videos.".to_string(), + premium: false, + favicon: "https://www.google.com/s2/favicons?sz=64&domain=www.porn4fans.com" + .to_string(), + status: "active".to_string(), + categories: vec![], + options: vec![], + nsfw: true, + cacheDuration: Some(1800), + } + } + + fn sort_by(sort: &str) -> &'static str { + match sort { + _ => "post_date", + } + } + + fn build_latest_url(&self, page: u32, sort: &str) -> String { + format!( + "{}/latest-updates/?mode=async&function=get_block&block_id=custom_list_videos_latest_videos_list&sort_by={}&from={page}", + self.url, + Self::sort_by(sort) + ) + } + + fn build_latest_headers(&self) -> Vec<(String, String)> { + vec![( + "Referer".to_string(), + format!("{}/latest-updates/", self.url), + )] + } + + fn build_search_path_query(query: &str, separator: &str) -> String { + query.split_whitespace().collect::>().join(separator) + } + + fn build_search_url(&self, query: &str, page: u32, sort: &str) -> String { + let query_param = Self::build_search_path_query(query, "+"); + let path_query = Self::build_search_path_query(query, "-"); + format!( + "{}/search/{path_query}/?mode=async&function=get_block&block_id=custom_list_videos_videos_list_search_result&q={query_param}&sort_by={}&from_videos={page}", + self.url, + Self::sort_by(sort) + ) + } + + fn build_search_headers(&self, query: &str) -> Vec<(String, String)> { + let path_query = Self::build_search_path_query(query, "-"); + vec![( + "Referer".to_string(), + format!("{}/search/{path_query}/", self.url), + )] + } + + async fn get( + &self, + cache: VideoCache, + page: u32, + sort: &str, + options: ServerOptions, + ) -> Result> { + let video_url = self.build_latest_url(page, sort); + let old_items = match cache.get(&video_url) { + Some((time, items)) => { + if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 { + return Ok(items.clone()); + } + items.clone() + } + None => vec![], + }; + + let mut requester = + requester_or_default(&options, "porn4fans", "porn4fans.get.missing_requester"); + let text = match requester + .get_with_headers(&video_url, self.build_latest_headers(), None) + .await + { + Ok(text) => text, + Err(e) => { + report_provider_error( + "porn4fans", + "get.request", + &format!("url={video_url}; error={e}"), + ) + .await; + return Ok(old_items); + } + }; + + if text.trim().is_empty() { + report_provider_error( + "porn4fans", + "get.empty_response", + &format!("url={video_url}"), + ) + .await; + return Ok(old_items); + } + + let video_items = self.get_video_items_from_html(text); + if !video_items.is_empty() { + cache.remove(&video_url); + cache.insert(video_url.clone(), video_items.clone()); + return Ok(video_items); + } + + Ok(old_items) + } + + async fn query( + &self, + cache: VideoCache, + page: u32, + query: &str, + sort: &str, + options: ServerOptions, + ) -> Result> { + let video_url = self.build_search_url(query, page, sort); + let old_items = match cache.get(&video_url) { + Some((time, items)) => { + if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 { + return Ok(items.clone()); + } + items.clone() + } + None => vec![], + }; + + let mut requester = + requester_or_default(&options, "porn4fans", "porn4fans.query.missing_requester"); + let text = match requester + .get_with_headers(&video_url, self.build_search_headers(query), None) + .await + { + Ok(text) => text, + Err(e) => { + report_provider_error( + "porn4fans", + "query.request", + &format!("url={video_url}; error={e}"), + ) + .await; + return Ok(old_items); + } + }; + + if text.trim().is_empty() { + report_provider_error( + "porn4fans", + "query.empty_response", + &format!("url={video_url}"), + ) + .await; + return Ok(old_items); + } + + let video_items = self.get_video_items_from_html(text); + if !video_items.is_empty() { + cache.remove(&video_url); + cache.insert(video_url.clone(), video_items.clone()); + return Ok(video_items); + } + + Ok(old_items) + } + + fn extract_between<'a>(text: &'a str, start: &str, end: &str) -> Option<&'a str> { + text.split(start).nth(1)?.split(end).next() + } + + fn first_non_empty_attr(segment: &str, attrs: &[&str]) -> Option { + attrs.iter().find_map(|attr| { + Self::extract_between(segment, attr, "\"") + .map(str::trim) + .filter(|value| !value.is_empty()) + .map(ToString::to_string) + }) + } + + fn normalize_url(&self, url: &str) -> String { + if url.starts_with("http://") || url.starts_with("https://") { + return url.to_string(); + } + if url.starts_with("//") { + return format!("https:{url}"); + } + if url.starts_with('/') { + return format!("{}{}", self.url, url); + } + format!("{}/{}", self.url, url.trim_start_matches("./")) + } + + fn extract_thumb_url(&self, segment: &str) -> String { + let thumb_raw = Self::first_non_empty_attr( + segment, + &[ + "data-original=\"", + "data-webp=\"", + "srcset=\"", + "src=\"", + "poster=\"", + ], + ) + .unwrap_or_default(); + + if thumb_raw.starts_with("data:image/") { + return String::new(); + } + + self.normalize_url(&thumb_raw) + } + + fn extract_views(text: &str) -> Option { + Regex::new(r"(?i)]+icon-eye[^>]*>.*?\s*([^<]+)") + .ok() + .and_then(|re| re.captures(text)) + .and_then(|caps| caps.get(1)) + .and_then(|m| parse_abbreviated_number(m.as_str().trim())) + } + + fn extract_rating(text: &str) -> Option { + Regex::new(r"(?i)]+icon-like[^>]*>.*?\s*([^<%]+)%") + .ok() + .and_then(|re| re.captures(text)) + .and_then(|caps| caps.get(1)) + .and_then(|m| m.as_str().trim().parse::().ok()) + } + + fn get_video_items_from_html(&self, html: String) -> Vec { + if html.trim().is_empty() { + return vec![]; + } + + let Ok(link_re) = Regex::new( + r#"(?is)]+class="item-link"[^>]+href="(?P[^"]+/video/(?P\d+)/[^"]+)"[^>]+title="(?P[^"]+)"[^>]*>(?P<body>.*?)</a>"#, + ) else { + return vec![]; + }; + + let mut items = Vec::new(); + let mut seen = HashSet::new(); + + for captures in link_re.captures_iter(&html) { + let Some(id) = captures.name("id").map(|m| m.as_str().to_string()) else { + continue; + }; + if !seen.insert(id.clone()) { + continue; + } + + let href = captures + .name("href") + .map(|m| self.normalize_url(m.as_str())) + .unwrap_or_default(); + let title_raw = captures + .name("title") + .map(|m| m.as_str()) + .unwrap_or_default(); + let title = decode(title_raw.as_bytes()) + .to_string() + .unwrap_or_else(|_| title_raw.to_string()); + let body = captures + .name("body") + .map(|m| m.as_str()) + .unwrap_or_default(); + let thumb = self.extract_thumb_url(body); + let duration_raw = Self::extract_between(body, "<div class=\"duration\">", "<") + .unwrap_or_default() + .trim() + .to_string(); + let duration = parse_time_to_seconds(&duration_raw).unwrap_or(0) as u32; + let views = Self::extract_views(body).unwrap_or(0); + let rating = Self::extract_rating(body); + + let mut item = + VideoItem::new(id, title, href, "porn4fans".to_string(), thumb, duration); + if views > 0 { + item = item.views(views); + } + if let Some(rating) = rating { + item = item.rating(rating); + } + items.push(item); + } + + items + } +} + +#[async_trait] +impl Provider for Porn4fansProvider { + async fn get_videos( + &self, + cache: VideoCache, + pool: DbPool, + sort: String, + query: Option<String>, + page: String, + per_page: String, + options: ServerOptions, + ) -> Vec<VideoItem> { + let _ = pool; + let _ = per_page; + let page = page.parse::<u32>().unwrap_or(1); + + let videos = match query { + Some(query) if !query.trim().is_empty() => { + self.query(cache, page, &query, &sort, options).await + } + _ => self.get(cache, page, &sort, options).await, + }; + + match videos { + Ok(videos) => videos, + Err(e) => { + report_provider_error( + "porn4fans", + "get_videos", + &format!("page={page}; error={e}"), + ) + .await; + vec![] + } + } + } + + fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> { + Some(self.build_channel(clientversion)) + } +} + +#[cfg(test)] +mod tests { + use super::Porn4fansProvider; + + #[test] + fn builds_latest_url_with_custom_block_id() { + let provider = Porn4fansProvider::new(); + assert_eq!( + provider.build_latest_url(2, "new"), + "https://www.porn4fans.com/latest-updates/?mode=async&function=get_block&block_id=custom_list_videos_latest_videos_list&sort_by=post_date&from=2" + ); + } + + #[test] + fn builds_search_url_with_custom_block_id() { + let provider = Porn4fansProvider::new(); + assert_eq!( + provider.build_search_url("big black cock", 3, "popular"), + "https://www.porn4fans.com/search/big-black-cock/?mode=async&function=get_block&block_id=custom_list_videos_videos_list_search_result&q=big+black+cock&sort_by=video_viewed&from_videos=3" + ); + } + + #[test] + fn parses_porn4fans_search_markup() { + let provider = Porn4fansProvider::new(); + let html = r##" + <div class="thumbs second grid-1" id="custom_list_videos_videos_list_search_result_items"> + <div class="item"> + <a class="item-link" href="https://www.porn4fans.com/video/10194/horny-police-officer-melztube-gets-banged-by-bbc/" title="Horny Police Officer Melztube Gets Banged By BBC"> + <div class="img-wrap"> + <div class="duration">23:47</div> + <picture> + <source srcset="https://www.porn4fans.com/contents/videos_screenshots/10000/10194/800x450/1.jpg" type="image/webp"> + <img class="thumb lazy-load" src="data:image/gif;base64,AAAA" data-original="https://www.porn4fans.com/contents/videos_screenshots/10000/10194/800x450/1.jpg" data-webp="https://www.porn4fans.com/contents/videos_screenshots/10000/10194/800x450/1.jpg" alt="Horny Police Officer Melztube Gets Banged By BBC" /> + </picture> + </div> + <div class="video-text">Horny Police Officer Melztube Gets Banged By BBC</div> + <ul class="video-items"> + <li class="video-item"> + <svg class="svg-icon icon-eye"><use xlink:href="#icon-eye"></use></svg> + <span>14K</span> + </li> + <li class="video-item rating"> + <svg class="svg-icon icon-like"><use xlink:href="#icon-like"></use></svg> + <span>66%</span> + </li> + <li class="video-item"> + <span>2 weeks ago</span> + </li> + </ul> + </a> + </div> + </div> + "##; + + let items = provider.get_video_items_from_html(html.to_string()); + assert_eq!(items.len(), 1); + assert_eq!(items[0].id, "10194"); + assert_eq!( + items[0].url, + "https://www.porn4fans.com/video/10194/horny-police-officer-melztube-gets-banged-by-bbc/" + ); + assert_eq!( + items[0].thumb, + "https://www.porn4fans.com/contents/videos_screenshots/10000/10194/800x450/1.jpg" + ); + assert_eq!(items[0].duration, 1427); + assert_eq!(items[0].views, Some(14_000)); + assert_eq!(items[0].rating, Some(66.0)); + } +}