651 lines
21 KiB
Rust
651 lines
21 KiB
Rust
use crate::DbPool;
|
|
use crate::api::ClientVersion;
|
|
use crate::providers::{Provider, report_provider_error, requester_or_default};
|
|
use crate::status::*;
|
|
use crate::util::cache::VideoCache;
|
|
use crate::util::parse_abbreviated_number;
|
|
use crate::util::time::parse_time_to_seconds;
|
|
use crate::videos::{ServerOptions, VideoItem};
|
|
use async_trait::async_trait;
|
|
use error_chain::error_chain;
|
|
use futures::future::join_all;
|
|
use htmlentity::entity::{ICodedDataTrait, decode};
|
|
use regex::Regex;
|
|
use scraper::{Html, Selector};
|
|
use std::collections::HashSet;
|
|
|
|
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
|
|
crate::providers::ProviderChannelMetadata {
|
|
group_id: "onlyfans",
|
|
tags: &["creator", "premium", "clips"],
|
|
};
|
|
|
|
error_chain! {
|
|
foreign_links {
|
|
Io(std::io::Error);
|
|
HttpRequest(wreq::Error);
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct Porn4fansProvider {
|
|
url: String,
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
struct Porn4fansCard {
|
|
id: String,
|
|
title: String,
|
|
page_url: String,
|
|
thumb: String,
|
|
duration: u32,
|
|
views: Option<u32>,
|
|
rating: Option<f32>,
|
|
}
|
|
|
|
impl Porn4fansProvider {
|
|
pub fn new() -> Self {
|
|
Self {
|
|
url: "https://www.porn4fans.com".to_string(),
|
|
}
|
|
}
|
|
|
|
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
|
|
Channel {
|
|
id: "porn4fans".to_string(),
|
|
name: "Porn4Fans".to_string(),
|
|
description: "OnlyFans porn videos.".to_string(),
|
|
premium: false,
|
|
favicon: "https://www.google.com/s2/favicons?sz=64&domain=www.porn4fans.com"
|
|
.to_string(),
|
|
status: "active".to_string(),
|
|
categories: vec![],
|
|
options: vec![],
|
|
nsfw: true,
|
|
cacheDuration: Some(1800),
|
|
}
|
|
}
|
|
|
|
fn sort_by(sort: &str) -> &'static str {
|
|
match sort {
|
|
"popular" => "video_viewed",
|
|
_ => "post_date",
|
|
}
|
|
}
|
|
|
|
fn build_latest_url(&self, page: u32, sort: &str) -> String {
|
|
format!(
|
|
"{}/latest-updates/?mode=async&function=get_block&block_id=custom_list_videos_latest_videos_list&sort_by={}&from={page}",
|
|
self.url,
|
|
Self::sort_by(sort)
|
|
)
|
|
}
|
|
|
|
fn build_latest_headers(&self) -> Vec<(String, String)> {
|
|
vec![(
|
|
"Referer".to_string(),
|
|
format!("{}/latest-updates/", self.url),
|
|
)]
|
|
}
|
|
|
|
fn build_search_path_query(query: &str, separator: &str) -> String {
|
|
query.split_whitespace().collect::<Vec<_>>().join(separator)
|
|
}
|
|
|
|
fn build_search_url(&self, query: &str, page: u32, sort: &str) -> String {
|
|
let query_param = Self::build_search_path_query(query, "+");
|
|
let path_query = Self::build_search_path_query(query, "-");
|
|
format!(
|
|
"{}/search/{path_query}/?mode=async&function=get_block&block_id=custom_list_videos_videos_list_search_result&q={query_param}&sort_by={}&from_videos={page}",
|
|
self.url,
|
|
Self::sort_by(sort)
|
|
)
|
|
}
|
|
|
|
fn build_search_headers(&self, query: &str) -> Vec<(String, String)> {
|
|
let path_query = Self::build_search_path_query(query, "-");
|
|
vec![(
|
|
"Referer".to_string(),
|
|
format!("{}/search/{path_query}/", self.url),
|
|
)]
|
|
}
|
|
|
|
async fn get(
|
|
&self,
|
|
cache: VideoCache,
|
|
page: u32,
|
|
sort: &str,
|
|
options: ServerOptions,
|
|
) -> Result<Vec<VideoItem>> {
|
|
let video_url = self.build_latest_url(page, sort);
|
|
let old_items = match cache.get(&video_url) {
|
|
Some((time, items)) => {
|
|
if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 {
|
|
return Ok(items.clone());
|
|
}
|
|
items.clone()
|
|
}
|
|
None => vec![],
|
|
};
|
|
|
|
let mut requester =
|
|
requester_or_default(&options, "porn4fans", "porn4fans.get.missing_requester");
|
|
let text = match requester
|
|
.get_with_headers(&video_url, self.build_latest_headers(), None)
|
|
.await
|
|
{
|
|
Ok(text) => text,
|
|
Err(e) => {
|
|
report_provider_error(
|
|
"porn4fans",
|
|
"get.request",
|
|
&format!("url={video_url}; error={e}"),
|
|
)
|
|
.await;
|
|
return Ok(old_items);
|
|
}
|
|
};
|
|
|
|
if text.trim().is_empty() {
|
|
report_provider_error(
|
|
"porn4fans",
|
|
"get.empty_response",
|
|
&format!("url={video_url}"),
|
|
)
|
|
.await;
|
|
return Ok(old_items);
|
|
}
|
|
|
|
let video_items = self.get_video_items_from_html(text, requester).await;
|
|
if !video_items.is_empty() {
|
|
cache.remove(&video_url);
|
|
cache.insert(video_url.clone(), video_items.clone());
|
|
return Ok(video_items);
|
|
}
|
|
|
|
Ok(old_items)
|
|
}
|
|
|
|
async fn query(
|
|
&self,
|
|
cache: VideoCache,
|
|
page: u32,
|
|
query: &str,
|
|
sort: &str,
|
|
options: ServerOptions,
|
|
) -> Result<Vec<VideoItem>> {
|
|
let video_url = self.build_search_url(query, page, sort);
|
|
let old_items = match cache.get(&video_url) {
|
|
Some((time, items)) => {
|
|
if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 {
|
|
return Ok(items.clone());
|
|
}
|
|
items.clone()
|
|
}
|
|
None => vec![],
|
|
};
|
|
|
|
let mut requester =
|
|
requester_or_default(&options, "porn4fans", "porn4fans.query.missing_requester");
|
|
let text = match requester
|
|
.get_with_headers(&video_url, self.build_search_headers(query), None)
|
|
.await
|
|
{
|
|
Ok(text) => text,
|
|
Err(e) => {
|
|
report_provider_error(
|
|
"porn4fans",
|
|
"query.request",
|
|
&format!("url={video_url}; error={e}"),
|
|
)
|
|
.await;
|
|
return Ok(old_items);
|
|
}
|
|
};
|
|
|
|
if text.trim().is_empty() {
|
|
report_provider_error(
|
|
"porn4fans",
|
|
"query.empty_response",
|
|
&format!("url={video_url}"),
|
|
)
|
|
.await;
|
|
return Ok(old_items);
|
|
}
|
|
|
|
let video_items = self.get_video_items_from_html(text, requester).await;
|
|
if !video_items.is_empty() {
|
|
cache.remove(&video_url);
|
|
cache.insert(video_url.clone(), video_items.clone());
|
|
return Ok(video_items);
|
|
}
|
|
|
|
Ok(old_items)
|
|
}
|
|
|
|
fn extract_between<'a>(text: &'a str, start: &str, end: &str) -> Option<&'a str> {
|
|
text.split(start).nth(1)?.split(end).next()
|
|
}
|
|
|
|
fn first_non_empty_attr(segment: &str, attrs: &[&str]) -> Option<String> {
|
|
attrs.iter().find_map(|attr| {
|
|
Self::extract_between(segment, attr, "\"")
|
|
.map(str::trim)
|
|
.filter(|value| !value.is_empty())
|
|
.map(ToString::to_string)
|
|
})
|
|
}
|
|
|
|
fn normalize_url(&self, url: &str) -> String {
|
|
if url.starts_with("http://") || url.starts_with("https://") {
|
|
return url.to_string();
|
|
}
|
|
if url.starts_with("//") {
|
|
return format!("https:{url}");
|
|
}
|
|
if url.starts_with('/') {
|
|
return format!("{}{}", self.url, url);
|
|
}
|
|
format!("{}/{}", self.url, url.trim_start_matches("./"))
|
|
}
|
|
|
|
fn extract_thumb_url(&self, segment: &str) -> String {
|
|
let thumb_raw = Self::first_non_empty_attr(
|
|
segment,
|
|
&[
|
|
"data-original=\"",
|
|
"data-webp=\"",
|
|
"srcset=\"",
|
|
"src=\"",
|
|
"poster=\"",
|
|
],
|
|
)
|
|
.unwrap_or_default();
|
|
|
|
if thumb_raw.starts_with("data:image/") {
|
|
return String::new();
|
|
}
|
|
|
|
self.normalize_url(&thumb_raw)
|
|
}
|
|
|
|
fn decode_escaped_text(text: &str) -> String {
|
|
text.replace("\\/", "/").replace("&", "&")
|
|
}
|
|
|
|
fn decode_html_text(text: &str) -> String {
|
|
decode(text.as_bytes())
|
|
.to_string()
|
|
.unwrap_or_else(|_| text.to_string())
|
|
.split_whitespace()
|
|
.collect::<Vec<_>>()
|
|
.join(" ")
|
|
.trim()
|
|
.to_string()
|
|
}
|
|
|
|
fn strip_tags(text: &str) -> String {
|
|
Regex::new(r"(?is)<[^>]+>")
|
|
.ok()
|
|
.map(|regex| regex.replace_all(text, "").to_string())
|
|
.unwrap_or_else(|| text.to_string())
|
|
}
|
|
|
|
fn push_unique_tag(values: &mut Vec<String>, value: String) {
|
|
let value = value.trim().to_string();
|
|
if value.is_empty()
|
|
|| values
|
|
.iter()
|
|
.any(|existing| existing.eq_ignore_ascii_case(&value))
|
|
{
|
|
return;
|
|
}
|
|
values.push(value);
|
|
}
|
|
|
|
fn extract_views(text: &str) -> Option<u32> {
|
|
Regex::new(r"(?i)<svg[^>]+icon-eye[^>]*>.*?</svg>\s*<span>([^<]+)</span>")
|
|
.ok()
|
|
.and_then(|re| re.captures(text))
|
|
.and_then(|caps| caps.get(1))
|
|
.and_then(|m| parse_abbreviated_number(m.as_str().trim()))
|
|
}
|
|
|
|
fn extract_rating(text: &str) -> Option<f32> {
|
|
Regex::new(r"(?i)<svg[^>]+icon-like[^>]*>.*?</svg>\s*<span>([^<%]+)%</span>")
|
|
.ok()
|
|
.and_then(|re| re.captures(text))
|
|
.and_then(|caps| caps.get(1))
|
|
.and_then(|m| m.as_str().trim().parse::<f32>().ok())
|
|
}
|
|
|
|
fn extract_direct_video_url_from_page(text: &str) -> Option<String> {
|
|
let decoded = Self::decode_escaped_text(text);
|
|
|
|
for key in ["video_url", "video_alt_url", "contentUrl"] {
|
|
let pattern = format!(
|
|
r#"(?is)(?:^|[{{\s,])["']?{}["']?\s*[:=]\s*["'](?P<url>https?://[^"'<>]+?\.mp4)"#,
|
|
regex::escape(key)
|
|
);
|
|
let regex = Regex::new(&pattern).ok()?;
|
|
if let Some(url) = regex
|
|
.captures(&decoded)
|
|
.and_then(|captures| captures.name("url"))
|
|
.map(|value| value.as_str().to_string())
|
|
{
|
|
return Some(url);
|
|
}
|
|
}
|
|
|
|
None
|
|
}
|
|
|
|
fn collect_texts(document: &Html, selector: &str) -> Vec<String> {
|
|
let Ok(selector) = Selector::parse(selector) else {
|
|
return vec![];
|
|
};
|
|
let mut values = Vec::new();
|
|
for element in document.select(&selector) {
|
|
let raw_text = element.text().collect::<Vec<_>>().join(" ");
|
|
let cleaned = Self::decode_html_text(&Self::strip_tags(&raw_text));
|
|
Self::push_unique_tag(&mut values, cleaned);
|
|
}
|
|
|
|
values
|
|
}
|
|
|
|
fn extract_page_models_and_categories(text: &str) -> (Vec<String>, Vec<String>) {
|
|
let document = Html::parse_document(text);
|
|
|
|
let models = Self::collect_texts(&document, ".player-models-list a[href*=\"/models/\"]");
|
|
|
|
let mut categories =
|
|
Self::collect_texts(&document, ".categories-row a[href*=\"/categories/\"]");
|
|
for value in Self::collect_texts(&document, ".tags-row a[href*=\"/tags/\"]") {
|
|
Self::push_unique_tag(&mut categories, value);
|
|
}
|
|
|
|
(models, categories)
|
|
}
|
|
|
|
fn parse_video_cards_from_html(&self, html: &str) -> Vec<Porn4fansCard> {
|
|
if html.trim().is_empty() {
|
|
return vec![];
|
|
}
|
|
|
|
let Ok(link_re) = Regex::new(
|
|
r#"(?is)<a[^>]+class="item-link"[^>]+href="(?P<href>[^"]+/video/(?P<id>\d+)/[^"]+)"[^>]+title="(?P<title>[^"]+)"[^>]*>(?P<body>.*?)</a>"#,
|
|
) else {
|
|
return vec![];
|
|
};
|
|
|
|
let mut items = Vec::new();
|
|
let mut seen = HashSet::new();
|
|
|
|
for captures in link_re.captures_iter(html) {
|
|
let Some(id) = captures.name("id").map(|m| m.as_str().to_string()) else {
|
|
continue;
|
|
};
|
|
if !seen.insert(id.clone()) {
|
|
continue;
|
|
}
|
|
|
|
let href = captures
|
|
.name("href")
|
|
.map(|m| self.normalize_url(m.as_str()))
|
|
.unwrap_or_default();
|
|
let title_raw = captures
|
|
.name("title")
|
|
.map(|m| m.as_str())
|
|
.unwrap_or_default();
|
|
let title = decode(title_raw.as_bytes())
|
|
.to_string()
|
|
.unwrap_or_else(|_| title_raw.to_string());
|
|
let body = captures
|
|
.name("body")
|
|
.map(|m| m.as_str())
|
|
.unwrap_or_default();
|
|
let thumb = self.extract_thumb_url(body);
|
|
let duration_raw = Self::extract_between(body, "<div class=\"duration\">", "<")
|
|
.unwrap_or_default()
|
|
.trim()
|
|
.to_string();
|
|
let duration = parse_time_to_seconds(&duration_raw).unwrap_or(0) as u32;
|
|
let views = Self::extract_views(body).unwrap_or(0);
|
|
let rating = Self::extract_rating(body);
|
|
|
|
items.push(Porn4fansCard {
|
|
id,
|
|
title,
|
|
page_url: href,
|
|
thumb,
|
|
duration,
|
|
views: (views > 0).then_some(views),
|
|
rating,
|
|
});
|
|
}
|
|
|
|
items
|
|
}
|
|
|
|
async fn enrich_video_card(
|
|
&self,
|
|
card: Porn4fansCard,
|
|
mut requester: crate::util::requester::Requester,
|
|
) -> VideoItem {
|
|
let direct_url = requester
|
|
.get_with_headers(
|
|
&card.page_url,
|
|
vec![("Referer".to_string(), format!("{}/", self.url))],
|
|
None,
|
|
)
|
|
.await
|
|
.ok();
|
|
|
|
let (direct_url, models, categories) = match direct_url {
|
|
Some(text) => {
|
|
let url = Self::extract_direct_video_url_from_page(&text)
|
|
.unwrap_or_else(|| card.page_url.clone());
|
|
let (models, categories) = Self::extract_page_models_and_categories(&text);
|
|
(url, models, categories)
|
|
}
|
|
None => (card.page_url.clone(), vec![], vec![]),
|
|
};
|
|
|
|
let mut item = VideoItem::new(
|
|
card.id,
|
|
card.title,
|
|
direct_url,
|
|
"porn4fans".to_string(),
|
|
card.thumb,
|
|
card.duration,
|
|
);
|
|
if let Some(views) = card.views {
|
|
item = item.views(views);
|
|
}
|
|
if let Some(rating) = card.rating {
|
|
item = item.rating(rating);
|
|
}
|
|
if let Some(model) = models.first() {
|
|
item = item.uploader(model.clone());
|
|
}
|
|
item = item.tags(categories);
|
|
item
|
|
}
|
|
|
|
async fn get_video_items_from_html(
|
|
&self,
|
|
html: String,
|
|
requester: crate::util::requester::Requester,
|
|
) -> Vec<VideoItem> {
|
|
let cards = self.parse_video_cards_from_html(&html);
|
|
let futures = cards
|
|
.into_iter()
|
|
.map(|card| self.enrich_video_card(card, requester.clone()));
|
|
|
|
join_all(futures).await
|
|
}
|
|
}
|
|
|
|
#[async_trait]
|
|
impl Provider for Porn4fansProvider {
|
|
async fn get_videos(
|
|
&self,
|
|
cache: VideoCache,
|
|
pool: DbPool,
|
|
sort: String,
|
|
query: Option<String>,
|
|
page: String,
|
|
per_page: String,
|
|
options: ServerOptions,
|
|
) -> Vec<VideoItem> {
|
|
let _ = pool;
|
|
let _ = per_page;
|
|
let page = page.parse::<u32>().unwrap_or(1);
|
|
|
|
let videos = match query {
|
|
Some(query) if !query.trim().is_empty() => {
|
|
self.query(cache, page, &query, &sort, options).await
|
|
}
|
|
_ => self.get(cache, page, &sort, options).await,
|
|
};
|
|
|
|
match videos {
|
|
Ok(videos) => videos,
|
|
Err(e) => {
|
|
report_provider_error(
|
|
"porn4fans",
|
|
"get_videos",
|
|
&format!("page={page}; error={e}"),
|
|
)
|
|
.await;
|
|
vec![]
|
|
}
|
|
}
|
|
}
|
|
|
|
fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
|
|
Some(self.build_channel(clientversion))
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::Porn4fansProvider;
|
|
|
|
#[test]
|
|
fn builds_latest_url_with_custom_block_id() {
|
|
let provider = Porn4fansProvider::new();
|
|
assert_eq!(
|
|
provider.build_latest_url(2, "new"),
|
|
"https://www.porn4fans.com/latest-updates/?mode=async&function=get_block&block_id=custom_list_videos_latest_videos_list&sort_by=post_date&from=2"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn builds_search_url_with_custom_block_id() {
|
|
let provider = Porn4fansProvider::new();
|
|
assert_eq!(
|
|
provider.build_search_url("big black cock", 3, "popular"),
|
|
"https://www.porn4fans.com/search/big-black-cock/?mode=async&function=get_block&block_id=custom_list_videos_videos_list_search_result&q=big+black+cock&sort_by=video_viewed&from_videos=3"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn parses_porn4fans_search_markup() {
|
|
let provider = Porn4fansProvider::new();
|
|
let html = r##"
|
|
<div class="thumbs second grid-1" id="custom_list_videos_videos_list_search_result_items">
|
|
<div class="item">
|
|
<a class="item-link" href="https://www.porn4fans.com/video/10194/horny-police-officer-melztube-gets-banged-by-bbc/" title="Horny Police Officer Melztube Gets Banged By BBC">
|
|
<div class="img-wrap">
|
|
<div class="duration">23:47</div>
|
|
<picture>
|
|
<source srcset="https://www.porn4fans.com/contents/videos_screenshots/10000/10194/800x450/1.jpg" type="image/webp">
|
|
<img class="thumb lazy-load" src="data:image/gif;base64,AAAA" data-original="https://www.porn4fans.com/contents/videos_screenshots/10000/10194/800x450/1.jpg" data-webp="https://www.porn4fans.com/contents/videos_screenshots/10000/10194/800x450/1.jpg" data-preview="https://www.porn4fans.com/get_file/3/9df8de1fc2da5dfcbf9a4ad512dc8f306c4997e60f/10000/10194/10194_preview_high.mp4/" alt="Horny Police Officer Melztube Gets Banged By BBC" />
|
|
</picture>
|
|
</div>
|
|
<div class="video-text">Horny Police Officer Melztube Gets Banged By BBC</div>
|
|
<ul class="video-items">
|
|
<li class="video-item">
|
|
<svg class="svg-icon icon-eye"><use xlink:href="#icon-eye"></use></svg>
|
|
<span>14K</span>
|
|
</li>
|
|
<li class="video-item rating">
|
|
<svg class="svg-icon icon-like"><use xlink:href="#icon-like"></use></svg>
|
|
<span>66%</span>
|
|
</li>
|
|
<li class="video-item">
|
|
<span>2 weeks ago</span>
|
|
</li>
|
|
</ul>
|
|
</a>
|
|
</div>
|
|
</div>
|
|
"##;
|
|
|
|
let items = provider.parse_video_cards_from_html(html);
|
|
assert_eq!(items.len(), 1);
|
|
assert_eq!(items[0].id, "10194");
|
|
assert_eq!(
|
|
items[0].page_url,
|
|
"https://www.porn4fans.com/video/10194/horny-police-officer-melztube-gets-banged-by-bbc/"
|
|
);
|
|
assert_eq!(
|
|
items[0].thumb,
|
|
"https://www.porn4fans.com/contents/videos_screenshots/10000/10194/800x450/1.jpg"
|
|
);
|
|
assert_eq!(items[0].duration, 1427);
|
|
assert_eq!(items[0].views, Some(14_000));
|
|
assert_eq!(items[0].rating, Some(66.0));
|
|
}
|
|
|
|
#[test]
|
|
fn extracts_direct_video_url_from_video_page() {
|
|
let html = r#"
|
|
<script>
|
|
var flashvars = {
|
|
video_url: 'https:\/\/www.porn4fans.com\/get_file\/3\/9df8de1fc2da5dfcbf9a4ad512dc8f306c4997e60f\/10000\/10951\/10951.mp4\/',
|
|
video_alt_url: 'https:\/\/www.porn4fans.com\/get_file\/3\/9df8de1fc2da5dfcbf9a4ad512dc8f306c4997e60f\/10000\/10951\/10951_720p.mp4\/'
|
|
};
|
|
</script>
|
|
"#;
|
|
|
|
assert_eq!(
|
|
Porn4fansProvider::extract_direct_video_url_from_page(html).as_deref(),
|
|
Some(
|
|
"https://www.porn4fans.com/get_file/3/9df8de1fc2da5dfcbf9a4ad512dc8f306c4997e60f/10000/10951/10951.mp4"
|
|
)
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn extracts_models_and_categories_from_video_page() {
|
|
let html = r#"
|
|
<div class="player-models-list">
|
|
<div class="player-model-item">
|
|
<a href="/models/piper-rockelle/"><span class="player-model-name">Piper Rockelle</span></a>
|
|
</div>
|
|
</div>
|
|
<ul class="categories-row">
|
|
<li class="visible"><a href="/categories/striptease/">Striptease</a></li>
|
|
<li class="visible"><a href="/categories/teen/">Teen</a></li>
|
|
</ul>
|
|
<ul class="tags-row">
|
|
<li class="visible"><a href="/tags/bathroom/">Bathroom</a></li>
|
|
</ul>
|
|
"#;
|
|
|
|
let (models, categories) = Porn4fansProvider::extract_page_models_and_categories(html);
|
|
assert_eq!(models, vec!["Piper Rockelle".to_string()]);
|
|
assert_eq!(
|
|
categories,
|
|
vec![
|
|
"Striptease".to_string(),
|
|
"Teen".to_string(),
|
|
"Bathroom".to_string()
|
|
]
|
|
);
|
|
}
|
|
}
|