Files
hottub/src/providers/yesporn.rs
2026-03-30 06:59:38 +00:00

1517 lines
54 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
use crate::DbPool;
use crate::api::ClientVersion;
use crate::providers::{
Provider, report_provider_error, report_provider_error_background, requester_or_default,
};
use crate::status::*;
use crate::util::cache::VideoCache;
use crate::util::parse_abbreviated_number;
use crate::util::requester::Requester;
use crate::util::time::parse_time_to_seconds;
use crate::videos::{ServerOptions, VideoFormat, VideoItem};
use async_trait::async_trait;
use chrono::{DateTime, NaiveDate, Utc};
use error_chain::error_chain;
use futures::stream::{self, StreamExt};
use htmlentity::entity::{ICodedDataTrait, decode};
use regex::Regex;
use scraper::{ElementRef, Html, Selector};
use std::sync::{Arc, RwLock};
use std::{thread, vec};
use wreq::Version;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "mainstream-tube",
tags: &["tube", "mixed", "hd"],
};
error_chain! {
foreign_links {
Io(std::io::Error);
}
errors {
Parse(msg: String) {
description("parse error")
display("parse error: {}", msg)
}
}
}
const BASE_URL: &str = "https://yesporn.vip";
const CHANNEL_ID: &str = "yesporn";
const FIREFOX_UA: &str =
"Mozilla/5.0 (X11; Linux x86_64; rv:147.0) Gecko/20100101 Firefox/147.0";
const HTML_ACCEPT: &str =
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8";
#[derive(Debug, Clone)]
pub struct YespornProvider {
url: String,
categories: Arc<RwLock<Vec<FilterOption>>>,
tags: Arc<RwLock<Vec<FilterOption>>>,
models: Arc<RwLock<Vec<FilterOption>>>,
uploaders: Arc<RwLock<Vec<FilterOption>>>,
}
#[derive(Debug, Clone)]
enum Target {
Home,
Search(String),
CommonArchive(String),
MemberArchive(String),
}
impl YespornProvider {
pub fn new() -> Self {
let provider = Self {
url: BASE_URL.to_string(),
categories: Arc::new(RwLock::new(vec![FilterOption {
id: "all".to_string(),
title: "All".to_string(),
}])),
tags: Arc::new(RwLock::new(vec![FilterOption {
id: "all".to_string(),
title: "All".to_string(),
}])),
models: Arc::new(RwLock::new(vec![FilterOption {
id: "all".to_string(),
title: "All".to_string(),
}])),
uploaders: Arc::new(RwLock::new(vec![FilterOption {
id: "all".to_string(),
title: "All".to_string(),
}])),
};
provider.spawn_initial_load();
provider
}
fn spawn_initial_load(&self) {
let url = self.url.clone();
let categories = Arc::clone(&self.categories);
let tags = Arc::clone(&self.tags);
let models = Arc::clone(&self.models);
let uploaders = Arc::clone(&self.uploaders);
thread::spawn(move || {
let runtime = match tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()
{
Ok(runtime) => runtime,
Err(error) => {
report_provider_error_background(
CHANNEL_ID,
"spawn_initial_load.runtime_build",
&error.to_string(),
);
return;
}
};
runtime.block_on(async move {
if let Err(error) = Self::load_uploaders(&url, Arc::clone(&uploaders)).await {
report_provider_error_background(
CHANNEL_ID,
"load_uploaders",
&error.to_string(),
);
}
if let Err(error) = Self::load_models(&url, Arc::clone(&models)).await {
report_provider_error_background(CHANNEL_ID, "load_models", &error.to_string());
}
if let Err(error) = Self::load_categories(&url, Arc::clone(&categories)).await {
report_provider_error_background(
CHANNEL_ID,
"load_categories",
&error.to_string(),
);
}
if let Err(error) = Self::load_tags(&url, Arc::clone(&tags)).await {
report_provider_error_background(CHANNEL_ID, "load_tags", &error.to_string());
}
});
});
}
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
let categories = self
.categories
.read()
.map(|value| value.clone())
.unwrap_or_default();
let tags = self.tags.read().map(|value| value.clone()).unwrap_or_default();
let models = self.models.read().map(|value| value.clone()).unwrap_or_default();
let uploaders = self
.uploaders
.read()
.map(|value| value.clone())
.unwrap_or_default();
Channel {
id: CHANNEL_ID.to_string(),
name: "YesPorn".to_string(),
description:
"YesPorn videos with latest, popular, rated, category, tag, model, and uploader archives."
.to_string(),
premium: false,
favicon: "https://www.google.com/s2/favicons?sz=64&domain=yesporn.vip".to_string(),
status: "active".to_string(),
categories: categories.iter().map(|value| value.title.clone()).collect(),
options: vec![
ChannelOption {
id: "sort".to_string(),
title: "Sort".to_string(),
description: "Browse YesPorn archives by ranking.".to_string(),
systemImage: "list.number".to_string(),
colorName: "blue".to_string(),
options: vec![
FilterOption {
id: "new".to_string(),
title: "Latest".to_string(),
},
FilterOption {
id: "popular".to_string(),
title: "Most Viewed".to_string(),
},
FilterOption {
id: "rated".to_string(),
title: "Top Rated".to_string(),
},
FilterOption {
id: "longest".to_string(),
title: "Longest".to_string(),
},
FilterOption {
id: "commented".to_string(),
title: "Most Commented".to_string(),
},
FilterOption {
id: "recommended".to_string(),
title: "Most Favorited".to_string(),
},
FilterOption {
id: "random".to_string(),
title: "Random".to_string(),
},
],
multiSelect: false,
},
ChannelOption {
id: "categories".to_string(),
title: "Categories".to_string(),
description: "Browse a YesPorn category archive.".to_string(),
systemImage: "square.grid.2x2".to_string(),
colorName: "orange".to_string(),
options: categories,
multiSelect: false,
},
ChannelOption {
id: "filter".to_string(),
title: "Tags".to_string(),
description: "Browse a YesPorn tag archive.".to_string(),
systemImage: "tag.fill".to_string(),
colorName: "green".to_string(),
options: tags,
multiSelect: false,
},
ChannelOption {
id: "stars".to_string(),
title: "Models".to_string(),
description: "Browse a YesPorn model archive.".to_string(),
systemImage: "star.fill".to_string(),
colorName: "yellow".to_string(),
options: models,
multiSelect: false,
},
ChannelOption {
id: "sites".to_string(),
title: "Uploaders".to_string(),
description: "Browse a YesPorn channel or community uploader archive."
.to_string(),
systemImage: "person.crop.square".to_string(),
colorName: "purple".to_string(),
options: uploaders,
multiSelect: false,
},
],
nsfw: true,
cacheDuration: Some(1800),
}
}
fn selector(value: &str) -> Result<Selector> {
Selector::parse(value)
.map_err(|error| Error::from(format!("selector `{value}` parse failed: {error}")))
}
fn regex(value: &str) -> Result<Regex> {
Regex::new(value).map_err(|error| Error::from(format!("regex `{value}` failed: {error}")))
}
fn decode_html(text: &str) -> String {
decode(text.as_bytes())
.to_string()
.unwrap_or_else(|_| text.to_string())
}
fn collapse_whitespace(text: &str) -> String {
text.split_whitespace().collect::<Vec<_>>().join(" ")
}
fn text_of(element: &ElementRef<'_>) -> String {
Self::decode_html(&Self::collapse_whitespace(
&element.text().collect::<Vec<_>>().join(" "),
))
}
fn normalize_title(title: &str) -> String {
title
.trim()
.trim_start_matches('#')
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
.to_ascii_lowercase()
}
fn clean_filter_title(title: &str) -> String {
let mut parts = Self::decode_html(title)
.trim()
.trim_start_matches('#')
.split_whitespace()
.map(ToOwned::to_owned)
.collect::<Vec<_>>();
if parts.len() > 1
&& parts
.last()
.is_some_and(|value| value.chars().all(|ch| ch.is_ascii_digit()))
{
parts.pop();
}
parts.join(" ").trim().to_string()
}
fn normalize_url(&self, url: &str) -> String {
if url.is_empty() {
return String::new();
}
if url.starts_with("http://") || url.starts_with("https://") {
return url.to_string();
}
if url.starts_with("//") {
return format!("https:{url}");
}
if url.starts_with('/') {
return format!("{}{}", self.url, url);
}
format!("{}/{}", self.url, url.trim_start_matches("./"))
}
fn html_headers(referer: &str) -> Vec<(String, String)> {
vec![
("User-Agent".to_string(), FIREFOX_UA.to_string()),
("Accept".to_string(), HTML_ACCEPT.to_string()),
("Referer".to_string(), referer.to_string()),
]
}
fn sort_by(sort: &str) -> &'static str {
match sort {
"popular" | "viewed" | "trending" => "video_viewed",
"rated" | "rating" | "top" => "rating",
"longest" | "duration" => "duration",
"commented" | "comments" => "most_commented",
"recommended" | "favorited" | "favourited" => "most_favourited",
"random" => "rand()",
_ => "post_date",
}
}
fn build_search_path_query(query: &str, separator: &str) -> String {
query.split_whitespace().collect::<Vec<_>>().join(separator)
}
fn build_home_url(&self, page: u16, sort: &str) -> String {
format!(
"{}/?mode=async&function=get_block&block_id=list_videos_most_recent_videos&sort_by={}&from={page}",
self.url,
Self::sort_by(sort)
)
}
fn build_search_url(&self, query: &str, page: u16, sort: &str) -> String {
let path_query = Self::build_search_path_query(query, "-");
let query_param = Self::build_search_path_query(query, "+");
format!(
"{}/search/{path_query}/?mode=async&function=get_block&block_id=list_videos_videos_list_search_result&q={query_param}&category_ids=&sort_by={}&from_videos={page}",
self.url,
Self::sort_by(sort)
)
}
fn build_common_archive_url(&self, archive_url: &str, page: u16, sort: &str) -> String {
format!(
"{}/?mode=async&function=get_block&block_id=list_videos_common_videos_list&sort_by={}&from={page}",
archive_url.trim_end_matches('/'),
Self::sort_by(sort)
)
}
fn build_member_archive_url(&self, member_url: &str, page: u16, sort: &str) -> String {
format!(
"{}/?mode=async&function=get_block&block_id=list_videos_uploaded_videos&sort_by={}&from_videos={page}",
member_url.trim_end_matches('/'),
Self::sort_by(sort)
)
}
async fn fetch_html(requester: &mut Requester, url: &str, referer: &str) -> Result<String> {
requester
.get_with_headers(url, Self::html_headers(referer), Some(Version::HTTP_11))
.await
.map_err(|error| Error::from(format!("request failed for {url}: {error}")))
}
async fn fetch_archive_html(
&self,
requester: &mut Requester,
target: &Target,
page: u16,
sort: &str,
) -> Result<String> {
let archive_url = match target {
Target::CommonArchive(url) | Target::MemberArchive(url) => url,
_ => {
return Err(Error::from(
"fetch_archive_html called for non-archive target".to_string(),
));
}
};
let response = requester
.get_raw_with_headers(archive_url, Self::html_headers(archive_url))
.await
.map_err(|error| {
Error::from(format!("archive page request failed for {archive_url}: {error}"))
})?;
if !response.status().is_success() {
return Err(Error::from(format!(
"archive page request failed for {archive_url}: status {}",
response.status()
)));
}
let canonical_url = response.uri().to_string();
let body = response
.text()
.await
.map_err(|error| Error::from(format!("archive page body read failed: {error}")))?;
if page <= 1 {
return Ok(body);
}
let async_url = match target {
Target::CommonArchive(_) => self.build_common_archive_url(&canonical_url, page, sort),
Target::MemberArchive(_) => self.build_member_archive_url(&canonical_url, page, sort),
_ => unreachable!(),
};
Self::fetch_html(requester, &async_url, &canonical_url).await
}
fn push_unique(target: &Arc<RwLock<Vec<FilterOption>>>, item: FilterOption) {
if item.id.is_empty() || item.title.is_empty() {
return;
}
if let Ok(mut values) = target.write() {
let normalized = Self::normalize_title(&item.title);
if !values
.iter()
.any(|value| value.id == item.id || Self::normalize_title(&value.title) == normalized)
{
values.push(item);
}
}
}
async fn load_categories(
base_url: &str,
categories: Arc<RwLock<Vec<FilterOption>>>,
) -> Result<()> {
let mut requester = Requester::new();
for page in 1..=4 {
let url = format!(
"{base_url}/categories/?mode=async&function=get_block&block_id=list_categories_categories_list&sort_by=avg_videos_rating&from={page}"
);
let html = Self::fetch_html(&mut requester, &url, &format!("{base_url}/categories/")).await?;
let document = Html::parse_document(&html);
let selector = Self::selector("#list_categories_categories_list_items a[href]")?;
for element in document.select(&selector) {
let href = element.value().attr("href").unwrap_or_default().trim_end_matches('/');
if !href.starts_with(&format!("{base_url}/categories/")) {
continue;
}
let remainder = href
.strip_prefix(&format!("{base_url}/categories/"))
.unwrap_or_default();
if remainder.is_empty() || remainder.contains('/') {
continue;
}
let title = element
.value()
.attr("title")
.map(str::trim)
.filter(|value| !value.is_empty())
.map(ToOwned::to_owned)
.unwrap_or_else(|| Self::text_of(&element));
let title = Self::clean_filter_title(&title);
if title.is_empty() {
continue;
}
Self::push_unique(
&categories,
FilterOption {
id: format!("{href}/"),
title,
},
);
}
}
Ok(())
}
async fn load_tags(base_url: &str, tags: Arc<RwLock<Vec<FilterOption>>>) -> Result<()> {
let mut requester = Requester::new();
let html = Self::fetch_html(&mut requester, &format!("{base_url}/tags/"), &format!("{base_url}/tags/")).await?;
let document = Html::parse_document(&html);
let selector = Self::selector("#list_tags_tags_list a[href]")?;
for element in document.select(&selector) {
let href = element.value().attr("href").unwrap_or_default().trim_end_matches('/');
if !href.starts_with(&format!("{base_url}/tags/")) {
continue;
}
let remainder = href.strip_prefix(&format!("{base_url}/tags/")).unwrap_or_default();
if remainder.is_empty() || remainder.contains('/') {
continue;
}
let title = Self::clean_filter_title(&Self::text_of(&element));
if title.is_empty() || title.len() > 80 {
continue;
}
Self::push_unique(
&tags,
FilterOption {
id: format!("{href}/"),
title,
},
);
}
Ok(())
}
async fn load_models(base_url: &str, models: Arc<RwLock<Vec<FilterOption>>>) -> Result<()> {
let mut requester = Requester::new();
for page in 1..=5 {
let url = format!(
"{base_url}/models/?mode=async&function=get_block&block_id=list_models_models_list&sort_by=avg_videos_rating&from={page}"
);
let html = Self::fetch_html(&mut requester, &url, &format!("{base_url}/models/")).await?;
let document = Html::parse_document(&html);
let selector = Self::selector("#list_models_models_list_items a[href]")?;
for element in document.select(&selector) {
let href = element.value().attr("href").unwrap_or_default().trim_end_matches('/');
if !href.starts_with(&format!("{base_url}/models/")) {
continue;
}
let remainder = href
.strip_prefix(&format!("{base_url}/models/"))
.unwrap_or_default();
if remainder.is_empty() || remainder.contains('/') {
continue;
}
let title = element
.value()
.attr("title")
.map(str::trim)
.filter(|value| !value.is_empty())
.map(ToOwned::to_owned)
.unwrap_or_else(|| Self::text_of(&element));
let title = Self::clean_filter_title(&title);
if title.is_empty() {
continue;
}
Self::push_unique(
&models,
FilterOption {
id: format!("{href}/"),
title,
},
);
}
}
Ok(())
}
async fn load_uploaders(
base_url: &str,
uploaders: Arc<RwLock<Vec<FilterOption>>>,
) -> Result<()> {
let mut requester = Requester::new();
{
let home_html =
Self::fetch_html(&mut requester, &format!("{base_url}/"), &format!("{base_url}/"))
.await?;
let home_document = Html::parse_document(&home_html);
let nav_selector = Self::selector("a.nav-link[href]")?;
for element in home_document.select(&nav_selector) {
let href = element.value().attr("href").unwrap_or_default().trim_end_matches('/');
if !href.starts_with(&format!("{base_url}/channels/")) {
continue;
}
let remainder = href
.strip_prefix(&format!("{base_url}/channels/"))
.unwrap_or_default();
if remainder.is_empty() || remainder.contains('/') {
continue;
}
let title = Self::clean_filter_title(&Self::text_of(&element));
if title.is_empty() {
continue;
}
Self::push_unique(
&uploaders,
FilterOption {
id: format!("{href}/"),
title,
},
);
}
}
for page in 1..=4 {
let url = format!(
"{base_url}/channels/?mode=async&function=get_block&block_id=list_dvds_channels_list&sort_by=avg_videos_rating&from={page}"
);
let html = Self::fetch_html(&mut requester, &url, &format!("{base_url}/channels/")).await?;
let document = Html::parse_document(&html);
let selector = Self::selector("#list_dvds_channels_list_items a[href]")?;
for element in document.select(&selector) {
let href = element.value().attr("href").unwrap_or_default().trim_end_matches('/');
if !href.starts_with(&format!("{base_url}/channels/")) {
continue;
}
let remainder = href
.strip_prefix(&format!("{base_url}/channels/"))
.unwrap_or_default();
if remainder.is_empty() || remainder.contains('/') {
continue;
}
let title = element
.value()
.attr("title")
.map(str::trim)
.filter(|value| !value.is_empty())
.map(ToOwned::to_owned)
.unwrap_or_else(|| Self::text_of(&element));
let title = Self::clean_filter_title(&title);
if title.is_empty() {
continue;
}
Self::push_unique(
&uploaders,
FilterOption {
id: format!("{href}/"),
title,
},
);
}
}
for page in 1..=3 {
let url = format!(
"{base_url}/members/?mode=async&function=get_block&block_id=list_members_members&sort_by=activity&from_members={page}"
);
let html = Self::fetch_html(&mut requester, &url, &format!("{base_url}/members/")).await?;
let document = Html::parse_document(&html);
let selector = Self::selector("#list_members_members_items a[href]")?;
for element in document.select(&selector) {
let href = element.value().attr("href").unwrap_or_default().trim_end_matches('/');
if !href.starts_with(&format!("{base_url}/members/")) {
continue;
}
let remainder = href
.strip_prefix(&format!("{base_url}/members/"))
.unwrap_or_default();
if remainder.is_empty() || remainder.contains('/') || !remainder.chars().all(|value| value.is_ascii_digit()) {
continue;
}
let title = element
.value()
.attr("title")
.map(str::trim)
.filter(|value| !value.is_empty())
.map(ToOwned::to_owned)
.unwrap_or_else(|| Self::text_of(&element));
let title = Self::clean_filter_title(&title);
if title.is_empty() {
continue;
}
Self::push_unique(
&uploaders,
FilterOption {
id: format!("{href}/"),
title,
},
);
}
}
Ok(())
}
fn match_filter(options: &[FilterOption], query: &str) -> Option<String> {
let normalized_query = Self::normalize_title(query);
options
.iter()
.find(|value| value.id != "all" && Self::normalize_title(&value.title) == normalized_query)
.map(|value| value.id.clone())
}
fn filters_need_refresh(&self) -> bool {
let uploaders_len = self.uploaders.read().map(|values| values.len()).unwrap_or_default();
let models_len = self.models.read().map(|values| values.len()).unwrap_or_default();
let tags_len = self.tags.read().map(|values| values.len()).unwrap_or_default();
let categories_len = self
.categories
.read()
.map(|values| values.len())
.unwrap_or_default();
uploaders_len <= 1 || models_len <= 1 || tags_len <= 1 || categories_len <= 1
}
async fn refresh_filter_catalogs(&self) {
if let Err(error) = Self::load_uploaders(&self.url, Arc::clone(&self.uploaders)).await {
report_provider_error_background(CHANNEL_ID, "refresh_filter_catalogs.uploaders", &error.to_string());
}
if let Err(error) = Self::load_models(&self.url, Arc::clone(&self.models)).await {
report_provider_error_background(CHANNEL_ID, "refresh_filter_catalogs.models", &error.to_string());
}
if let Err(error) = Self::load_categories(&self.url, Arc::clone(&self.categories)).await {
report_provider_error_background(CHANNEL_ID, "refresh_filter_catalogs.categories", &error.to_string());
}
if let Err(error) = Self::load_tags(&self.url, Arc::clone(&self.tags)).await {
report_provider_error_background(CHANNEL_ID, "refresh_filter_catalogs.tags", &error.to_string());
}
}
fn resolve_option_target(&self, options: &ServerOptions) -> Target {
if let Some(uploaders) = options.sites.as_deref() {
if uploaders.starts_with(&self.url) && uploaders != "all" {
if uploaders.contains("/members/") {
return Target::MemberArchive(uploaders.to_string());
}
return Target::CommonArchive(uploaders.to_string());
}
}
if let Some(model) = options.stars.as_deref() {
if model.starts_with(&self.url) && model != "all" {
return Target::CommonArchive(model.to_string());
}
}
if let Some(tag) = options.filter.as_deref() {
if tag.starts_with(&self.url) && tag != "all" {
return Target::CommonArchive(tag.to_string());
}
}
if let Some(category) = options.categories.as_deref() {
if category.starts_with(&self.url) && category != "all" {
return Target::CommonArchive(category.to_string());
}
}
Target::Home
}
fn resolve_query_target(&self, query: &str) -> Target {
if let Ok(uploaders) = self.uploaders.read() {
if let Some(value) = Self::match_filter(&uploaders, query) {
if value.contains("/members/") {
return Target::MemberArchive(value);
}
return Target::CommonArchive(value);
}
}
if let Ok(models) = self.models.read() {
if let Some(value) = Self::match_filter(&models, query) {
return Target::CommonArchive(value);
}
}
if let Ok(tags) = self.tags.read() {
if let Some(value) = Self::match_filter(&tags, query) {
return Target::CommonArchive(value);
}
}
if let Ok(categories) = self.categories.read() {
if let Some(value) = Self::match_filter(&categories, query) {
return Target::CommonArchive(value);
}
}
Target::Search(query.to_string())
}
fn build_target_request(&self, target: &Target, page: u16, sort: &str) -> (String, String) {
match target {
Target::Home => {
let referer = format!("{}/", self.url);
(self.build_home_url(page, sort), referer)
}
Target::Search(query) => {
let path_query = Self::build_search_path_query(query, "-");
let referer = format!("{}/search/{path_query}/", self.url);
(self.build_search_url(query, page, sort), referer)
}
Target::CommonArchive(archive_url) => {
let referer = archive_url.clone();
(
self.build_common_archive_url(archive_url, page, sort),
referer,
)
}
Target::MemberArchive(member_url) => {
let referer = member_url.clone();
(
self.build_member_archive_url(member_url, page, sort),
referer,
)
}
}
}
fn parse_duration(text: &str) -> u32 {
parse_time_to_seconds(text)
.and_then(|value| u32::try_from(value).ok())
.unwrap_or(0)
}
fn parse_views(text: &str) -> Option<u32> {
parse_abbreviated_number(text.trim())
}
fn parse_percent(text: &str) -> Option<f32> {
text.trim().trim_end_matches('%').trim().parse::<f32>().ok()
}
fn list_container<'a>(&self, document: &'a Html) -> Result<Option<ElementRef<'a>>> {
for selector_text in [
"#list_videos_most_recent_videos_items",
"#list_videos_videos_list_search_result_items",
"#list_videos_common_videos_list_items",
"#list_videos_uploaded_videos_items",
"#list_videos_related_videos_items",
"#list_videos_recommended_videos_items",
] {
let selector = Self::selector(selector_text)?;
if let Some(element) = document.select(&selector).next() {
return Ok(Some(element));
}
}
Ok(None)
}
fn parse_list_videos(&self, html: &str) -> Result<Vec<VideoItem>> {
let document = Html::parse_document(html);
let Some(container) = self.list_container(&document)? else {
return Ok(vec![]);
};
let card_selector = Self::selector("div.thumb.thumb_rel.item")?;
let link_selector = Self::selector("a[href*=\"/video/\"]")?;
let image_selector = Self::selector("img")?;
let title_selector = Self::selector("div.title")?;
let meta_selector = Self::selector("div.thumb-bottom.thumb-bottom-videos div.thumb-item")?;
let duration_selector = Self::selector("div.item-bottom div.time")?;
let mut items = Vec::new();
for card in container.select(&card_selector) {
let Some(link) = card.select(&link_selector).next() else {
continue;
};
let href = link.value().attr("href").unwrap_or_default();
let page_url = self.normalize_url(href);
if page_url.is_empty() {
continue;
}
let id = page_url
.trim_end_matches('/')
.split('/')
.nth_back(1)
.unwrap_or_default()
.to_string();
if id.is_empty() {
continue;
}
let image = card.select(&image_selector).next();
let thumb = image
.and_then(|value| {
value
.value()
.attr("data-webp")
.or_else(|| value.value().attr("data-original"))
.or_else(|| value.value().attr("src"))
})
.map(|value| self.normalize_url(value))
.unwrap_or_default();
let preview = image
.and_then(|value| value.value().attr("data-preview"))
.map(|value| self.normalize_url(value));
let title = card
.select(&title_selector)
.next()
.map(|value| Self::text_of(&value))
.filter(|value| !value.is_empty())
.or_else(|| {
link.value()
.attr("title")
.map(Self::decode_html)
.filter(|value| !value.is_empty())
});
let Some(title) = title else {
continue;
};
let duration = card
.select(&duration_selector)
.next()
.map(|value| Self::parse_duration(&Self::text_of(&value)))
.unwrap_or(0);
let meta_values = card
.select(&meta_selector)
.map(|value| Self::text_of(&value))
.collect::<Vec<_>>();
let views = meta_values.first().and_then(|value| Self::parse_views(value));
let rating = meta_values.get(1).and_then(|value| Self::parse_percent(value));
let mut item = VideoItem::new(
id,
title,
page_url,
CHANNEL_ID.to_string(),
thumb,
duration,
);
if let Some(preview) = preview {
let mut format =
VideoFormat::new(preview.clone(), "preview".to_string(), "video/mp4".to_string());
format.add_http_header("Referer".to_string(), item.url.clone());
item = item.preview(preview).formats(vec![format]);
}
if let Some(views) = views {
item = item.views(views);
}
if let Some(rating) = rating {
item = item.rating(rating);
}
items.push(item);
}
Ok(items)
}
fn parse_upload_date_timestamp(html: &str) -> Option<u64> {
let iso_re = Self::regex(r#""uploadDate"\s*:\s*"([^"]+)""#).ok()?;
let iso = iso_re.captures(html)?.get(1)?.as_str();
DateTime::parse_from_rfc3339(iso)
.map(|value| value.with_timezone(&Utc).timestamp() as u64)
.ok()
.or_else(|| {
NaiveDate::parse_from_str(iso, "%Y-%m-%d")
.ok()
.and_then(|value| value.and_hms_opt(0, 0, 0))
.map(|value| value.and_utc().timestamp() as u64)
})
}
fn parse_count_values(document: &Html) -> Result<(Option<u32>, Option<u64>, Option<u32>)> {
let selector = Self::selector("div.title-holder div.count-item")?;
let mut duration = None;
let mut views = None;
let mut uploaded_at = None;
for element in document.select(&selector) {
let text = Self::text_of(&element);
if duration.is_none() {
let parsed = Self::parse_duration(&text);
if parsed > 0 {
duration = Some(parsed);
continue;
}
}
if views.is_none() {
if let Some(parsed) = Self::parse_views(&text) {
views = Some(parsed);
continue;
}
}
if uploaded_at.is_none() {
if let Ok(date) = NaiveDate::parse_from_str(text.trim(), "%m/%d/%Y") {
uploaded_at = date
.and_hms_opt(0, 0, 0)
.map(|value| value.and_utc().timestamp() as u64);
}
}
}
Ok((views, uploaded_at, duration))
}
fn parse_format_urls(html: &str) -> Vec<(String, String)> {
let keys = [
("video_url", "video_url_text"),
("video_alt_url", "video_alt_url_text"),
("video_alt_url2", "video_alt_url2_text"),
("video_alt_url3", "video_alt_url3_text"),
];
let mut formats = Vec::new();
for (url_key, label_key) in keys {
let url_re = match Self::regex(&format!(r#"{url_key}:\s*'([^']+)'"#)) {
Ok(value) => value,
Err(_) => continue,
};
let label_re = match Self::regex(&format!(r#"{label_key}:\s*'([^']*)'"#)) {
Ok(value) => value,
Err(_) => continue,
};
let Some(url_match) = url_re.captures(html).and_then(|value| value.get(1)) else {
continue;
};
let mut url = url_match.as_str().replace("\\/", "/");
if let Some(stripped) = url.strip_prefix("function/0/") {
url = stripped.to_string();
}
let label = label_re
.captures(html)
.and_then(|value| value.get(1))
.map(|value| value.as_str().trim().to_string())
.filter(|value| !value.is_empty())
.unwrap_or_else(|| "MP4".to_string());
formats.push((url, label));
}
formats
}
fn collect_tags(document: &Html, html: &str) -> Result<Vec<String>> {
let mut tags = Vec::new();
let tag_selector = Self::selector("div.tags-row-wrapper a[href*=\"/tags/\"]")?;
for element in document.select(&tag_selector) {
let value = Self::text_of(&element);
if !value.is_empty() {
tags.push(value);
}
}
for pattern in [
r#"video_categories:\s*'([^']*)'"#,
r#"video_tags:\s*'([^']*)'"#,
r#"video_models:\s*'([^']*)'"#,
] {
let re = Self::regex(pattern)?;
if let Some(value) = re.captures(html).and_then(|caps| caps.get(1)) {
for entry in value
.as_str()
.split(',')
.map(str::trim)
.map(Self::decode_html)
.filter(|entry| !entry.is_empty())
{
tags.push(entry);
}
}
}
let mut unique = Vec::new();
for value in tags {
let normalized = Self::normalize_title(&value);
if !normalized.is_empty()
&& !unique
.iter()
.any(|existing: &String| Self::normalize_title(existing) == normalized)
{
unique.push(value);
}
}
Ok(unique)
}
fn parse_aspect_ratio(html: &str) -> Option<f32> {
let width_re = Self::regex(r#"player_width:\s*'([0-9.]+)'"#).ok()?;
let height_re = Self::regex(r#"player_height:\s*'([0-9.]+)'"#).ok()?;
let width = width_re
.captures(html)?
.get(1)?
.as_str()
.parse::<f32>()
.ok()?;
let height = height_re
.captures(html)?
.get(1)?
.as_str()
.parse::<f32>()
.ok()?;
(height > 0.0).then_some(width / height)
}
fn apply_detail_video(&self, mut item: VideoItem, html: &str) -> Result<VideoItem> {
let document = Html::parse_document(html);
let title_selector = Self::selector("h1.title")?;
let member_selector = Self::selector("a.sub-btn[href*=\"/members/\"]")?;
let member_name_selector = Self::selector("em")?;
let channel_selector = Self::selector("a.btn.gold[href*=\"/channels/\"]")?;
let model_selector = Self::selector("a.btn.gold[href*=\"/models/\"]")?;
if let Some(title) = document
.select(&title_selector)
.next()
.map(|value| Self::text_of(&value))
.filter(|value| !value.is_empty())
{
item.title = title;
}
if let Some(member) = document.select(&member_selector).next() {
let uploader = member
.select(&member_name_selector)
.next()
.map(|value| Self::text_of(&value))
.filter(|value| !value.is_empty())
.unwrap_or_else(|| Self::text_of(&member));
let uploader_url = member
.value()
.attr("href")
.map(|value| self.normalize_url(value))
.unwrap_or_default();
if !uploader.is_empty() {
item.uploader = Some(uploader);
}
if !uploader_url.is_empty() {
item.uploaderUrl = Some(uploader_url);
}
} else if let Some(channel) = document.select(&channel_selector).next() {
let uploader = Self::text_of(&channel);
let uploader_url = channel
.value()
.attr("href")
.map(|value| self.normalize_url(value))
.unwrap_or_default();
if !uploader.is_empty() {
item.uploader = Some(uploader);
}
if !uploader_url.is_empty() {
item.uploaderUrl = Some(uploader_url);
}
}
let mut tags = Self::collect_tags(&document, html)?;
if let Some(channel) = document.select(&channel_selector).next() {
let value = Self::text_of(&channel);
if !value.is_empty()
&& !tags
.iter()
.any(|existing| Self::normalize_title(existing) == Self::normalize_title(&value))
{
tags.push(value);
}
}
if let Some(model) = document.select(&model_selector).next() {
let value = Self::text_of(&model);
if !value.is_empty()
&& !tags
.iter()
.any(|existing| Self::normalize_title(existing) == Self::normalize_title(&value))
{
tags.push(value);
}
}
if !tags.is_empty() {
item.tags = Some(tags);
}
let (views, uploaded_at, duration) = Self::parse_count_values(&document)?;
if item.views.is_none() {
if let Some(views) = views {
item.views = Some(views);
}
}
if item.uploadedAt.is_none() {
item.uploadedAt = uploaded_at.or_else(|| Self::parse_upload_date_timestamp(html));
}
if item.duration == 0 {
if let Some(duration) = duration {
item.duration = duration;
}
}
if item.aspectRatio.is_none() {
item.aspectRatio = Self::parse_aspect_ratio(html);
}
if let Some(poster) = Self::regex(r#""thumbnailUrl"\s*:\s*"([^"]+)""#)
.ok()
.and_then(|re| re.captures(html))
.and_then(|caps| caps.get(1))
.map(|value| Self::decode_html(value.as_str()))
{
if item.thumb.is_empty() {
item.thumb = poster;
}
}
let _ = Self::parse_format_urls(html);
Ok(item)
}
async fn enrich_item(&self, item: VideoItem, options: &ServerOptions) -> VideoItem {
let mut requester =
requester_or_default(options, CHANNEL_ID, "yesporn.enrich_item.missing_requester");
match Self::fetch_html(&mut requester, &item.url, &item.url).await {
Ok(html) => match self.apply_detail_video(item.clone(), &html) {
Ok(value) => value,
Err(error) => {
report_provider_error_background(
CHANNEL_ID,
"enrich_item.apply_detail_video",
&format!("url={}; error={error}", item.url),
);
item
}
},
Err(error) => {
report_provider_error_background(
CHANNEL_ID,
"enrich_item.fetch_html",
&format!("url={}; error={error}", item.url),
);
item
}
}
}
async fn fetch_target(
&self,
cache: VideoCache,
target: Target,
page: u16,
sort: &str,
options: ServerOptions,
) -> Result<Vec<VideoItem>> {
let (url, referer) = self.build_target_request(&target, page, sort);
let old_items = match cache.get(&url) {
Some((time, items)) => {
if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 {
return Ok(items.clone());
}
items.clone()
}
None => vec![],
};
let mut requester =
requester_or_default(&options, CHANNEL_ID, "yesporn.fetch_target.missing_requester");
let html = match &target {
Target::CommonArchive(_) | Target::MemberArchive(_) => {
match self.fetch_archive_html(&mut requester, &target, page, sort).await {
Ok(value) => value,
Err(error) => {
report_provider_error(
CHANNEL_ID,
"fetch_target.archive_request",
&format!("url={url}; referer={referer}; error={error}"),
)
.await;
return Ok(old_items);
}
}
}
_ => match Self::fetch_html(&mut requester, &url, &referer).await {
Ok(value) => value,
Err(error) => {
report_provider_error(
CHANNEL_ID,
"fetch_target.request",
&format!("url={url}; error={error}"),
)
.await;
return Ok(old_items);
}
},
};
if html.trim().is_empty() {
report_provider_error(CHANNEL_ID, "fetch_target.empty_response", &format!("url={url}"))
.await;
return Ok(old_items);
}
let items = self.parse_list_videos(&html)?;
if items.is_empty() {
return Ok(old_items);
}
let enriched = stream::iter(items.into_iter().map(|item| {
let provider = self.clone();
let options = options.clone();
async move { provider.enrich_item(item, &options).await }
}))
.buffer_unordered(4)
.collect::<Vec<_>>()
.await;
cache.remove(&url);
cache.insert(url, enriched.clone());
Ok(enriched)
}
}
#[async_trait]
impl Provider for YespornProvider {
async fn get_videos(
&self,
cache: VideoCache,
_pool: DbPool,
sort: String,
query: Option<String>,
page: String,
_per_page: String,
options: ServerOptions,
) -> Vec<VideoItem> {
let page = page.parse::<u16>().unwrap_or(1).max(1);
let target = match query {
Some(query) if !query.trim().is_empty() => {
let query = query.trim();
let mut target = self.resolve_query_target(query);
if matches!(target, Target::Search(_)) && self.filters_need_refresh() {
self.refresh_filter_catalogs().await;
target = self.resolve_query_target(query);
}
target
}
_ => self.resolve_option_target(&options),
};
match self
.fetch_target(cache, target, page, &sort, options.clone())
.await
{
Ok(items) => items,
Err(error) => {
report_provider_error(
CHANNEL_ID,
"get_videos.fetch_target",
&format!("sort={sort}; page={page}; error={error}"),
)
.await;
vec![]
}
}
}
fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
Some(self.build_channel(clientversion))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn builds_home_url() {
let provider = YespornProvider::new();
assert_eq!(
provider.build_home_url(2, "popular"),
"https://yesporn.vip/?mode=async&function=get_block&block_id=list_videos_most_recent_videos&sort_by=video_viewed&from=2"
);
}
#[test]
fn builds_search_url() {
let provider = YespornProvider::new();
assert_eq!(
provider.build_search_url("adriana chechik", 3, "rated"),
"https://yesporn.vip/search/adriana-chechik/?mode=async&function=get_block&block_id=list_videos_videos_list_search_result&q=adriana+chechik&category_ids=&sort_by=rating&from_videos=3"
);
}
#[test]
fn parses_yesporn_cards() {
let provider = YespornProvider::new();
let html = r#"
<section id="list_videos_most_recent_videos">
<div class="thumbs" id="list_videos_most_recent_videos_items">
<div class="thumb thumb_rel item">
<a href="https://yesporn.vip/video/66943/fuckin-taxes-andi-avalon/" title="Fuckin Taxes Andi Avalon">
<div class="img-holder">
<img data-original="https://yesnn.b-cdn.net/contents/videos_screenshots/66000/66943/390x218/1.jpg" data-webp="https://yesnn.b-cdn.net/contents/videos_screenshots/66000/66943/480x270/1.jpg" data-preview="https://yesporn.vip/get_file/6/x/66000/66943/66943_preview.mp4/" />
<div class="item-bottom"><div class="qualtiy">HD</div><div class="time">12:34</div></div>
</div>
<div class="title">Fuckin Taxes Andi Avalon</div>
<div class="thumb-bottom thumb-bottom-videos">
<div class="thumb-item">11K</div>
<div class="thumb-item">95%</div>
<div class="thumb-item thumb-item-date">today</div>
</div>
</a>
</div>
</div>
</section>
"#;
let items = provider.parse_list_videos(html).expect("parses cards");
assert_eq!(items.len(), 1);
assert_eq!(items[0].id, "66943");
assert_eq!(items[0].title, "Fuckin Taxes Andi Avalon");
assert_eq!(
items[0].thumb,
"https://yesnn.b-cdn.net/contents/videos_screenshots/66000/66943/480x270/1.jpg"
);
assert_eq!(items[0].duration, 754);
assert_eq!(items[0].views, Some(11_000));
assert_eq!(items[0].rating, Some(95.0));
assert_eq!(
items[0].preview.as_deref(),
Some("https://yesporn.vip/get_file/6/x/66000/66943/66943_preview.mp4/")
);
}
#[test]
fn applies_detail_video_data() {
let provider = YespornProvider::new();
let item = VideoItem::new(
"66898".to_string(),
"placeholder".to_string(),
"https://yesporn.vip/video/66898/the-world-s-greatest-art-class-a-nude-model-you-can-creampie-6noc5d/".to_string(),
CHANNEL_ID.to_string(),
"https://yesnn.b-cdn.net/contents/videos_screenshots/66000/66898/480x270/1.jpg"
.to_string(),
0,
);
let html = r#"
<script>
var flashvars = {
video_categories: 'Creampie',
video_tags: 'creampie',
video_models: 'Theodora Day',
video_url: 'function/0/https:\/\/yesporn.vip\/get_file\/6\/abc\/66000\/66898\/66898.mp4\/',
video_url_text: '480p',
video_alt_url: 'function/0/https:\/\/yesporn.vip\/get_file\/6\/def\/66000\/66898\/66898_720p.mp4\/',
video_alt_url_text: '720p',
player_width: '882',
player_height: '496.9014084507'
};
</script>
<script type="application/ld+json">
{"@type":"VideoObject","thumbnailUrl":"https://yesnn.b-cdn.net/contents/videos_screenshots/66000/66898/preview.jpg","uploadDate":"2026-03-18T00:00:00+00:00"}
</script>
<div class="title-holder">
<h1 class="title">The Worlds Greatest Art Class: A Nude Model You Can Creampie!</h1>
<div class="count-item">32:45</div>
<div class="count-item">2.6K</div>
<div class="count-item">03/18/2026</div>
</div>
<div class="top-options flex">
<div class="block flex">
<a href="https://yesporn.vip/members/15878/" class="sub-btn"><em>nonameforthis</em></a>
<a class="btn gold" href="https://yesporn.vip/channels/team-skeet-0nsaan/">TEAM SKEET</a>
<a class="btn gold" href="https://yesporn.vip/models/theodora-day-4kpl52/">Theodora Day</a>
</div>
</div>
<div class="tags-row-wrapper">
<a href="https://yesporn.vip/tags/creampie-j0u5ti/">creampie</a>
</div>
"#;
let item = provider
.apply_detail_video(item, html)
.expect("applies detail video");
assert_eq!(
item.title,
"The Worlds Greatest Art Class: A Nude Model You Can Creampie!"
);
assert_eq!(item.uploader.as_deref(), Some("nonameforthis"));
assert_eq!(
item.uploaderUrl.as_deref(),
Some("https://yesporn.vip/members/15878/")
);
assert_eq!(item.views, Some(2600));
assert_eq!(item.duration, 1965);
assert!(item.tags.as_ref().is_some_and(|values| values
.iter()
.any(|value| YespornProvider::normalize_title(value) == "creampie")));
assert!(item.aspectRatio.is_some());
assert!(item.uploadedAt.is_some());
}
}