From f88b789f25b97aaaca3a72fb39df83ed95ba0698 Mon Sep 17 00:00:00 2001 From: Simon Date: Wed, 18 Mar 2026 21:48:05 +0000 Subject: [PATCH] yesporn --- src/providers/mod.rs | 6 + src/providers/yesporn.rs | 1513 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 1519 insertions(+) create mode 100644 src/providers/yesporn.rs diff --git a/src/providers/mod.rs b/src/providers/mod.rs index 568ebd0..1d3c644 100644 --- a/src/providers/mod.rs +++ b/src/providers/mod.rs @@ -43,6 +43,7 @@ pub mod tokyomotion; pub mod viralxxxporn; pub mod xfree; pub mod xxthots; +pub mod yesporn; pub mod youjizz; // pub mod pornxp; pub mod chaturbate; @@ -123,6 +124,10 @@ pub static ALL_PROVIDERS: Lazy> = Lazy::new(| "xxthots", Arc::new(xxthots::XxthotsProvider::new()) as DynProvider, ); + m.insert( + "yesporn", + Arc::new(yesporn::YespornProvider::new()) as DynProvider, + ); m.insert( "sxyprn", Arc::new(sxyprn::SxyprnProvider::new()) as DynProvider, @@ -346,6 +351,7 @@ fn channel_metadata_for(id: &str) -> Option { "homoxxx" => Some(homoxxx::CHANNEL_METADATA), "missav" => Some(missav::CHANNEL_METADATA), "xxthots" => Some(xxthots::CHANNEL_METADATA), + "yesporn" => Some(yesporn::CHANNEL_METADATA), "sxyprn" => Some(sxyprn::CHANNEL_METADATA), "porn00" => Some(porn00::CHANNEL_METADATA), "youjizz" => Some(youjizz::CHANNEL_METADATA), diff --git a/src/providers/yesporn.rs b/src/providers/yesporn.rs new file mode 100644 index 0000000..19ef9d7 --- /dev/null +++ b/src/providers/yesporn.rs @@ -0,0 +1,1513 @@ +use crate::DbPool; +use crate::api::ClientVersion; +use crate::providers::{ + Provider, report_provider_error, report_provider_error_background, requester_or_default, +}; +use crate::status::*; +use crate::util::cache::VideoCache; +use crate::util::parse_abbreviated_number; +use crate::util::requester::Requester; +use crate::util::time::parse_time_to_seconds; +use crate::videos::{ServerOptions, VideoItem}; +use async_trait::async_trait; +use chrono::{DateTime, NaiveDate, Utc}; +use error_chain::error_chain; +use futures::stream::{self, StreamExt}; +use htmlentity::entity::{ICodedDataTrait, decode}; +use regex::Regex; +use scraper::{ElementRef, Html, Selector}; +use std::sync::{Arc, RwLock}; +use std::{thread, vec}; +use wreq::Version; + +pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata = + crate::providers::ProviderChannelMetadata { + group_id: "mainstream-tube", + tags: &["tube", "mixed", "hd"], + }; + +error_chain! { + foreign_links { + Io(std::io::Error); + } + errors { + Parse(msg: String) { + description("parse error") + display("parse error: {}", msg) + } + } +} + +const BASE_URL: &str = "https://yesporn.vip"; +const CHANNEL_ID: &str = "yesporn"; +const FIREFOX_UA: &str = + "Mozilla/5.0 (X11; Linux x86_64; rv:147.0) Gecko/20100101 Firefox/147.0"; +const HTML_ACCEPT: &str = + "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8"; + +#[derive(Debug, Clone)] +pub struct YespornProvider { + url: String, + categories: Arc>>, + tags: Arc>>, + models: Arc>>, + uploaders: Arc>>, +} + +#[derive(Debug, Clone)] +enum Target { + Home, + Search(String), + CommonArchive(String), + MemberArchive(String), +} + +impl YespornProvider { + pub fn new() -> Self { + let provider = Self { + url: BASE_URL.to_string(), + categories: Arc::new(RwLock::new(vec![FilterOption { + id: "all".to_string(), + title: "All".to_string(), + }])), + tags: Arc::new(RwLock::new(vec![FilterOption { + id: "all".to_string(), + title: "All".to_string(), + }])), + models: Arc::new(RwLock::new(vec![FilterOption { + id: "all".to_string(), + title: "All".to_string(), + }])), + uploaders: Arc::new(RwLock::new(vec![FilterOption { + id: "all".to_string(), + title: "All".to_string(), + }])), + }; + provider.spawn_initial_load(); + provider + } + + fn spawn_initial_load(&self) { + let url = self.url.clone(); + let categories = Arc::clone(&self.categories); + let tags = Arc::clone(&self.tags); + let models = Arc::clone(&self.models); + let uploaders = Arc::clone(&self.uploaders); + + thread::spawn(move || { + let runtime = match tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + { + Ok(runtime) => runtime, + Err(error) => { + report_provider_error_background( + CHANNEL_ID, + "spawn_initial_load.runtime_build", + &error.to_string(), + ); + return; + } + }; + + runtime.block_on(async move { + if let Err(error) = Self::load_uploaders(&url, Arc::clone(&uploaders)).await { + report_provider_error_background( + CHANNEL_ID, + "load_uploaders", + &error.to_string(), + ); + } + if let Err(error) = Self::load_models(&url, Arc::clone(&models)).await { + report_provider_error_background(CHANNEL_ID, "load_models", &error.to_string()); + } + if let Err(error) = Self::load_categories(&url, Arc::clone(&categories)).await { + report_provider_error_background( + CHANNEL_ID, + "load_categories", + &error.to_string(), + ); + } + if let Err(error) = Self::load_tags(&url, Arc::clone(&tags)).await { + report_provider_error_background(CHANNEL_ID, "load_tags", &error.to_string()); + } + }); + }); + } + + fn build_channel(&self, _clientversion: ClientVersion) -> Channel { + let categories = self + .categories + .read() + .map(|value| value.clone()) + .unwrap_or_default(); + let tags = self.tags.read().map(|value| value.clone()).unwrap_or_default(); + let models = self.models.read().map(|value| value.clone()).unwrap_or_default(); + let uploaders = self + .uploaders + .read() + .map(|value| value.clone()) + .unwrap_or_default(); + + Channel { + id: CHANNEL_ID.to_string(), + name: "YesPorn".to_string(), + description: + "YesPorn videos with latest, popular, rated, category, tag, model, and uploader archives." + .to_string(), + premium: false, + favicon: "https://www.google.com/s2/favicons?sz=64&domain=yesporn.vip".to_string(), + status: "active".to_string(), + categories: categories.iter().map(|value| value.title.clone()).collect(), + options: vec![ + ChannelOption { + id: "sort".to_string(), + title: "Sort".to_string(), + description: "Browse YesPorn archives by ranking.".to_string(), + systemImage: "list.number".to_string(), + colorName: "blue".to_string(), + options: vec![ + FilterOption { + id: "new".to_string(), + title: "Latest".to_string(), + }, + FilterOption { + id: "popular".to_string(), + title: "Most Viewed".to_string(), + }, + FilterOption { + id: "rated".to_string(), + title: "Top Rated".to_string(), + }, + FilterOption { + id: "longest".to_string(), + title: "Longest".to_string(), + }, + FilterOption { + id: "commented".to_string(), + title: "Most Commented".to_string(), + }, + FilterOption { + id: "recommended".to_string(), + title: "Most Favorited".to_string(), + }, + FilterOption { + id: "random".to_string(), + title: "Random".to_string(), + }, + ], + multiSelect: false, + }, + ChannelOption { + id: "categories".to_string(), + title: "Categories".to_string(), + description: "Browse a YesPorn category archive.".to_string(), + systemImage: "square.grid.2x2".to_string(), + colorName: "orange".to_string(), + options: categories, + multiSelect: false, + }, + ChannelOption { + id: "filter".to_string(), + title: "Tags".to_string(), + description: "Browse a YesPorn tag archive.".to_string(), + systemImage: "tag.fill".to_string(), + colorName: "green".to_string(), + options: tags, + multiSelect: false, + }, + ChannelOption { + id: "stars".to_string(), + title: "Models".to_string(), + description: "Browse a YesPorn model archive.".to_string(), + systemImage: "star.fill".to_string(), + colorName: "yellow".to_string(), + options: models, + multiSelect: false, + }, + ChannelOption { + id: "sites".to_string(), + title: "Uploaders".to_string(), + description: "Browse a YesPorn channel or community uploader archive." + .to_string(), + systemImage: "person.crop.square".to_string(), + colorName: "purple".to_string(), + options: uploaders, + multiSelect: false, + }, + ], + nsfw: true, + cacheDuration: Some(1800), + } + } + + fn selector(value: &str) -> Result { + Selector::parse(value) + .map_err(|error| Error::from(format!("selector `{value}` parse failed: {error}"))) + } + + fn regex(value: &str) -> Result { + Regex::new(value).map_err(|error| Error::from(format!("regex `{value}` failed: {error}"))) + } + + fn decode_html(text: &str) -> String { + decode(text.as_bytes()) + .to_string() + .unwrap_or_else(|_| text.to_string()) + } + + fn collapse_whitespace(text: &str) -> String { + text.split_whitespace().collect::>().join(" ") + } + + fn text_of(element: &ElementRef<'_>) -> String { + Self::decode_html(&Self::collapse_whitespace( + &element.text().collect::>().join(" "), + )) + } + + fn normalize_title(title: &str) -> String { + title + .trim() + .trim_start_matches('#') + .split_whitespace() + .collect::>() + .join(" ") + .to_ascii_lowercase() + } + + fn clean_filter_title(title: &str) -> String { + let mut parts = Self::decode_html(title) + .trim() + .trim_start_matches('#') + .split_whitespace() + .map(ToOwned::to_owned) + .collect::>(); + + if parts.len() > 1 + && parts + .last() + .is_some_and(|value| value.chars().all(|ch| ch.is_ascii_digit())) + { + parts.pop(); + } + + parts.join(" ").trim().to_string() + } + + fn normalize_url(&self, url: &str) -> String { + if url.is_empty() { + return String::new(); + } + if url.starts_with("http://") || url.starts_with("https://") { + return url.to_string(); + } + if url.starts_with("//") { + return format!("https:{url}"); + } + if url.starts_with('/') { + return format!("{}{}", self.url, url); + } + format!("{}/{}", self.url, url.trim_start_matches("./")) + } + + fn html_headers(referer: &str) -> Vec<(String, String)> { + vec![ + ("User-Agent".to_string(), FIREFOX_UA.to_string()), + ("Accept".to_string(), HTML_ACCEPT.to_string()), + ("Referer".to_string(), referer.to_string()), + ] + } + + fn sort_by(sort: &str) -> &'static str { + match sort { + "popular" | "viewed" | "trending" => "video_viewed", + "rated" | "rating" | "top" => "rating", + "longest" | "duration" => "duration", + "commented" | "comments" => "most_commented", + "recommended" | "favorited" | "favourited" => "most_favourited", + "random" => "rand()", + _ => "post_date", + } + } + + fn build_search_path_query(query: &str, separator: &str) -> String { + query.split_whitespace().collect::>().join(separator) + } + + fn build_home_url(&self, page: u16, sort: &str) -> String { + format!( + "{}/?mode=async&function=get_block&block_id=list_videos_most_recent_videos&sort_by={}&from={page}", + self.url, + Self::sort_by(sort) + ) + } + + fn build_search_url(&self, query: &str, page: u16, sort: &str) -> String { + let path_query = Self::build_search_path_query(query, "-"); + let query_param = Self::build_search_path_query(query, "+"); + format!( + "{}/search/{path_query}/?mode=async&function=get_block&block_id=list_videos_videos_list_search_result&q={query_param}&category_ids=&sort_by={}&from_videos={page}", + self.url, + Self::sort_by(sort) + ) + } + + fn build_common_archive_url(&self, archive_url: &str, page: u16, sort: &str) -> String { + format!( + "{}/?mode=async&function=get_block&block_id=list_videos_common_videos_list&sort_by={}&from={page}", + archive_url.trim_end_matches('/'), + Self::sort_by(sort) + ) + } + + fn build_member_archive_url(&self, member_url: &str, page: u16, sort: &str) -> String { + format!( + "{}/?mode=async&function=get_block&block_id=list_videos_uploaded_videos&sort_by={}&from_videos={page}", + member_url.trim_end_matches('/'), + Self::sort_by(sort) + ) + } + + async fn fetch_html(requester: &mut Requester, url: &str, referer: &str) -> Result { + requester + .get_with_headers(url, Self::html_headers(referer), Some(Version::HTTP_11)) + .await + .map_err(|error| Error::from(format!("request failed for {url}: {error}"))) + } + + async fn fetch_archive_html( + &self, + requester: &mut Requester, + target: &Target, + page: u16, + sort: &str, + ) -> Result { + let archive_url = match target { + Target::CommonArchive(url) | Target::MemberArchive(url) => url, + _ => { + return Err(Error::from( + "fetch_archive_html called for non-archive target".to_string(), + )); + } + }; + + let response = requester + .get_raw_with_headers(archive_url, Self::html_headers(archive_url)) + .await + .map_err(|error| { + Error::from(format!("archive page request failed for {archive_url}: {error}")) + })?; + if !response.status().is_success() { + return Err(Error::from(format!( + "archive page request failed for {archive_url}: status {}", + response.status() + ))); + } + + let canonical_url = response.uri().to_string(); + let body = response + .text() + .await + .map_err(|error| Error::from(format!("archive page body read failed: {error}")))?; + + if page <= 1 { + return Ok(body); + } + + let async_url = match target { + Target::CommonArchive(_) => self.build_common_archive_url(&canonical_url, page, sort), + Target::MemberArchive(_) => self.build_member_archive_url(&canonical_url, page, sort), + _ => unreachable!(), + }; + + Self::fetch_html(requester, &async_url, &canonical_url).await + } + + fn push_unique(target: &Arc>>, item: FilterOption) { + if item.id.is_empty() || item.title.is_empty() { + return; + } + + if let Ok(mut values) = target.write() { + let normalized = Self::normalize_title(&item.title); + if !values + .iter() + .any(|value| value.id == item.id || Self::normalize_title(&value.title) == normalized) + { + values.push(item); + } + } + } + + async fn load_categories( + base_url: &str, + categories: Arc>>, + ) -> Result<()> { + let mut requester = Requester::new(); + for page in 1..=4 { + let url = format!( + "{base_url}/categories/?mode=async&function=get_block&block_id=list_categories_categories_list&sort_by=avg_videos_rating&from={page}" + ); + let html = Self::fetch_html(&mut requester, &url, &format!("{base_url}/categories/")).await?; + let document = Html::parse_document(&html); + let selector = Self::selector("#list_categories_categories_list_items a[href]")?; + + for element in document.select(&selector) { + let href = element.value().attr("href").unwrap_or_default().trim_end_matches('/'); + if !href.starts_with(&format!("{base_url}/categories/")) { + continue; + } + + let remainder = href + .strip_prefix(&format!("{base_url}/categories/")) + .unwrap_or_default(); + if remainder.is_empty() || remainder.contains('/') { + continue; + } + + let title = element + .value() + .attr("title") + .map(str::trim) + .filter(|value| !value.is_empty()) + .map(ToOwned::to_owned) + .unwrap_or_else(|| Self::text_of(&element)); + let title = Self::clean_filter_title(&title); + if title.is_empty() { + continue; + } + + Self::push_unique( + &categories, + FilterOption { + id: format!("{href}/"), + title, + }, + ); + } + } + + Ok(()) + } + + async fn load_tags(base_url: &str, tags: Arc>>) -> Result<()> { + let mut requester = Requester::new(); + let html = Self::fetch_html(&mut requester, &format!("{base_url}/tags/"), &format!("{base_url}/tags/")).await?; + let document = Html::parse_document(&html); + let selector = Self::selector("#list_tags_tags_list a[href]")?; + + for element in document.select(&selector) { + let href = element.value().attr("href").unwrap_or_default().trim_end_matches('/'); + if !href.starts_with(&format!("{base_url}/tags/")) { + continue; + } + + let remainder = href.strip_prefix(&format!("{base_url}/tags/")).unwrap_or_default(); + if remainder.is_empty() || remainder.contains('/') { + continue; + } + + let title = Self::clean_filter_title(&Self::text_of(&element)); + if title.is_empty() || title.len() > 80 { + continue; + } + + Self::push_unique( + &tags, + FilterOption { + id: format!("{href}/"), + title, + }, + ); + } + + Ok(()) + } + + async fn load_models(base_url: &str, models: Arc>>) -> Result<()> { + let mut requester = Requester::new(); + for page in 1..=5 { + let url = format!( + "{base_url}/models/?mode=async&function=get_block&block_id=list_models_models_list&sort_by=avg_videos_rating&from={page}" + ); + let html = Self::fetch_html(&mut requester, &url, &format!("{base_url}/models/")).await?; + let document = Html::parse_document(&html); + let selector = Self::selector("#list_models_models_list_items a[href]")?; + + for element in document.select(&selector) { + let href = element.value().attr("href").unwrap_or_default().trim_end_matches('/'); + if !href.starts_with(&format!("{base_url}/models/")) { + continue; + } + + let remainder = href + .strip_prefix(&format!("{base_url}/models/")) + .unwrap_or_default(); + if remainder.is_empty() || remainder.contains('/') { + continue; + } + + let title = element + .value() + .attr("title") + .map(str::trim) + .filter(|value| !value.is_empty()) + .map(ToOwned::to_owned) + .unwrap_or_else(|| Self::text_of(&element)); + let title = Self::clean_filter_title(&title); + if title.is_empty() { + continue; + } + + Self::push_unique( + &models, + FilterOption { + id: format!("{href}/"), + title, + }, + ); + } + } + + Ok(()) + } + + async fn load_uploaders( + base_url: &str, + uploaders: Arc>>, + ) -> Result<()> { + let mut requester = Requester::new(); + + { + let home_html = + Self::fetch_html(&mut requester, &format!("{base_url}/"), &format!("{base_url}/")) + .await?; + let home_document = Html::parse_document(&home_html); + let nav_selector = Self::selector("a.nav-link[href]")?; + + for element in home_document.select(&nav_selector) { + let href = element.value().attr("href").unwrap_or_default().trim_end_matches('/'); + if !href.starts_with(&format!("{base_url}/channels/")) { + continue; + } + + let remainder = href + .strip_prefix(&format!("{base_url}/channels/")) + .unwrap_or_default(); + if remainder.is_empty() || remainder.contains('/') { + continue; + } + + let title = Self::clean_filter_title(&Self::text_of(&element)); + if title.is_empty() { + continue; + } + + Self::push_unique( + &uploaders, + FilterOption { + id: format!("{href}/"), + title, + }, + ); + } + } + + for page in 1..=4 { + let url = format!( + "{base_url}/channels/?mode=async&function=get_block&block_id=list_dvds_channels_list&sort_by=avg_videos_rating&from={page}" + ); + let html = Self::fetch_html(&mut requester, &url, &format!("{base_url}/channels/")).await?; + let document = Html::parse_document(&html); + let selector = Self::selector("#list_dvds_channels_list_items a[href]")?; + + for element in document.select(&selector) { + let href = element.value().attr("href").unwrap_or_default().trim_end_matches('/'); + if !href.starts_with(&format!("{base_url}/channels/")) { + continue; + } + + let remainder = href + .strip_prefix(&format!("{base_url}/channels/")) + .unwrap_or_default(); + if remainder.is_empty() || remainder.contains('/') { + continue; + } + + let title = element + .value() + .attr("title") + .map(str::trim) + .filter(|value| !value.is_empty()) + .map(ToOwned::to_owned) + .unwrap_or_else(|| Self::text_of(&element)); + let title = Self::clean_filter_title(&title); + if title.is_empty() { + continue; + } + + Self::push_unique( + &uploaders, + FilterOption { + id: format!("{href}/"), + title, + }, + ); + } + } + + for page in 1..=3 { + let url = format!( + "{base_url}/members/?mode=async&function=get_block&block_id=list_members_members&sort_by=activity&from_members={page}" + ); + let html = Self::fetch_html(&mut requester, &url, &format!("{base_url}/members/")).await?; + let document = Html::parse_document(&html); + let selector = Self::selector("#list_members_members_items a[href]")?; + + for element in document.select(&selector) { + let href = element.value().attr("href").unwrap_or_default().trim_end_matches('/'); + if !href.starts_with(&format!("{base_url}/members/")) { + continue; + } + + let remainder = href + .strip_prefix(&format!("{base_url}/members/")) + .unwrap_or_default(); + if remainder.is_empty() || remainder.contains('/') || !remainder.chars().all(|value| value.is_ascii_digit()) { + continue; + } + + let title = element + .value() + .attr("title") + .map(str::trim) + .filter(|value| !value.is_empty()) + .map(ToOwned::to_owned) + .unwrap_or_else(|| Self::text_of(&element)); + let title = Self::clean_filter_title(&title); + if title.is_empty() { + continue; + } + + Self::push_unique( + &uploaders, + FilterOption { + id: format!("{href}/"), + title, + }, + ); + } + } + + Ok(()) + } + + fn match_filter(options: &[FilterOption], query: &str) -> Option { + let normalized_query = Self::normalize_title(query); + options + .iter() + .find(|value| value.id != "all" && Self::normalize_title(&value.title) == normalized_query) + .map(|value| value.id.clone()) + } + + fn filters_need_refresh(&self) -> bool { + let uploaders_len = self.uploaders.read().map(|values| values.len()).unwrap_or_default(); + let models_len = self.models.read().map(|values| values.len()).unwrap_or_default(); + let tags_len = self.tags.read().map(|values| values.len()).unwrap_or_default(); + let categories_len = self + .categories + .read() + .map(|values| values.len()) + .unwrap_or_default(); + + uploaders_len <= 1 || models_len <= 1 || tags_len <= 1 || categories_len <= 1 + } + + async fn refresh_filter_catalogs(&self) { + if let Err(error) = Self::load_uploaders(&self.url, Arc::clone(&self.uploaders)).await { + report_provider_error_background(CHANNEL_ID, "refresh_filter_catalogs.uploaders", &error.to_string()); + } + if let Err(error) = Self::load_models(&self.url, Arc::clone(&self.models)).await { + report_provider_error_background(CHANNEL_ID, "refresh_filter_catalogs.models", &error.to_string()); + } + if let Err(error) = Self::load_categories(&self.url, Arc::clone(&self.categories)).await { + report_provider_error_background(CHANNEL_ID, "refresh_filter_catalogs.categories", &error.to_string()); + } + if let Err(error) = Self::load_tags(&self.url, Arc::clone(&self.tags)).await { + report_provider_error_background(CHANNEL_ID, "refresh_filter_catalogs.tags", &error.to_string()); + } + } + + fn resolve_option_target(&self, options: &ServerOptions) -> Target { + if let Some(uploaders) = options.sites.as_deref() { + if uploaders.starts_with(&self.url) && uploaders != "all" { + if uploaders.contains("/members/") { + return Target::MemberArchive(uploaders.to_string()); + } + return Target::CommonArchive(uploaders.to_string()); + } + } + + if let Some(model) = options.stars.as_deref() { + if model.starts_with(&self.url) && model != "all" { + return Target::CommonArchive(model.to_string()); + } + } + + if let Some(tag) = options.filter.as_deref() { + if tag.starts_with(&self.url) && tag != "all" { + return Target::CommonArchive(tag.to_string()); + } + } + + if let Some(category) = options.categories.as_deref() { + if category.starts_with(&self.url) && category != "all" { + return Target::CommonArchive(category.to_string()); + } + } + + Target::Home + } + + fn resolve_query_target(&self, query: &str) -> Target { + if let Ok(uploaders) = self.uploaders.read() { + if let Some(value) = Self::match_filter(&uploaders, query) { + if value.contains("/members/") { + return Target::MemberArchive(value); + } + return Target::CommonArchive(value); + } + } + + if let Ok(models) = self.models.read() { + if let Some(value) = Self::match_filter(&models, query) { + return Target::CommonArchive(value); + } + } + + if let Ok(tags) = self.tags.read() { + if let Some(value) = Self::match_filter(&tags, query) { + return Target::CommonArchive(value); + } + } + + if let Ok(categories) = self.categories.read() { + if let Some(value) = Self::match_filter(&categories, query) { + return Target::CommonArchive(value); + } + } + + Target::Search(query.to_string()) + } + + fn build_target_request(&self, target: &Target, page: u16, sort: &str) -> (String, String) { + match target { + Target::Home => { + let referer = format!("{}/", self.url); + (self.build_home_url(page, sort), referer) + } + Target::Search(query) => { + let path_query = Self::build_search_path_query(query, "-"); + let referer = format!("{}/search/{path_query}/", self.url); + (self.build_search_url(query, page, sort), referer) + } + Target::CommonArchive(archive_url) => { + let referer = archive_url.clone(); + ( + self.build_common_archive_url(archive_url, page, sort), + referer, + ) + } + Target::MemberArchive(member_url) => { + let referer = member_url.clone(); + ( + self.build_member_archive_url(member_url, page, sort), + referer, + ) + } + } + } + + fn parse_duration(text: &str) -> u32 { + parse_time_to_seconds(text) + .and_then(|value| u32::try_from(value).ok()) + .unwrap_or(0) + } + + fn parse_views(text: &str) -> Option { + parse_abbreviated_number(text.trim()) + } + + fn parse_percent(text: &str) -> Option { + text.trim().trim_end_matches('%').trim().parse::().ok() + } + + fn list_container<'a>(&self, document: &'a Html) -> Result>> { + for selector_text in [ + "#list_videos_most_recent_videos_items", + "#list_videos_videos_list_search_result_items", + "#list_videos_common_videos_list_items", + "#list_videos_uploaded_videos_items", + "#list_videos_related_videos_items", + "#list_videos_recommended_videos_items", + ] { + let selector = Self::selector(selector_text)?; + if let Some(element) = document.select(&selector).next() { + return Ok(Some(element)); + } + } + Ok(None) + } + + fn parse_list_videos(&self, html: &str) -> Result> { + let document = Html::parse_document(html); + let Some(container) = self.list_container(&document)? else { + return Ok(vec![]); + }; + + let card_selector = Self::selector("div.thumb.thumb_rel.item")?; + let link_selector = Self::selector("a[href*=\"/video/\"]")?; + let image_selector = Self::selector("img")?; + let title_selector = Self::selector("div.title")?; + let meta_selector = Self::selector("div.thumb-bottom.thumb-bottom-videos div.thumb-item")?; + let duration_selector = Self::selector("div.item-bottom div.time")?; + + let mut items = Vec::new(); + + for card in container.select(&card_selector) { + let Some(link) = card.select(&link_selector).next() else { + continue; + }; + + let href = link.value().attr("href").unwrap_or_default(); + let page_url = self.normalize_url(href); + if page_url.is_empty() { + continue; + } + + let id = page_url + .trim_end_matches('/') + .split('/') + .nth_back(1) + .unwrap_or_default() + .to_string(); + if id.is_empty() { + continue; + } + + let image = card.select(&image_selector).next(); + let thumb = image + .and_then(|value| { + value + .value() + .attr("data-webp") + .or_else(|| value.value().attr("data-original")) + .or_else(|| value.value().attr("src")) + }) + .map(|value| self.normalize_url(value)) + .unwrap_or_default(); + + let preview = image + .and_then(|value| value.value().attr("data-preview")) + .map(|value| self.normalize_url(value)); + + let title = card + .select(&title_selector) + .next() + .map(|value| Self::text_of(&value)) + .filter(|value| !value.is_empty()) + .or_else(|| { + link.value() + .attr("title") + .map(Self::decode_html) + .filter(|value| !value.is_empty()) + }); + let Some(title) = title else { + continue; + }; + + let duration = card + .select(&duration_selector) + .next() + .map(|value| Self::parse_duration(&Self::text_of(&value))) + .unwrap_or(0); + + let meta_values = card + .select(&meta_selector) + .map(|value| Self::text_of(&value)) + .collect::>(); + let views = meta_values.first().and_then(|value| Self::parse_views(value)); + let rating = meta_values.get(1).and_then(|value| Self::parse_percent(value)); + + let mut item = VideoItem::new( + id, + title, + page_url, + CHANNEL_ID.to_string(), + thumb, + duration, + ); + if let Some(preview) = preview { + item = item.preview(preview); + } + if let Some(views) = views { + item = item.views(views); + } + if let Some(rating) = rating { + item = item.rating(rating); + } + + items.push(item); + } + + Ok(items) + } + + fn parse_upload_date_timestamp(html: &str) -> Option { + let iso_re = Self::regex(r#""uploadDate"\s*:\s*"([^"]+)""#).ok()?; + let iso = iso_re.captures(html)?.get(1)?.as_str(); + DateTime::parse_from_rfc3339(iso) + .map(|value| value.with_timezone(&Utc).timestamp() as u64) + .ok() + .or_else(|| { + NaiveDate::parse_from_str(iso, "%Y-%m-%d") + .ok() + .and_then(|value| value.and_hms_opt(0, 0, 0)) + .map(|value| value.and_utc().timestamp() as u64) + }) + } + + fn parse_count_values(document: &Html) -> Result<(Option, Option, Option)> { + let selector = Self::selector("div.title-holder div.count-item")?; + let mut duration = None; + let mut views = None; + let mut uploaded_at = None; + + for element in document.select(&selector) { + let text = Self::text_of(&element); + if duration.is_none() { + let parsed = Self::parse_duration(&text); + if parsed > 0 { + duration = Some(parsed); + continue; + } + } + + if views.is_none() { + if let Some(parsed) = Self::parse_views(&text) { + views = Some(parsed); + continue; + } + } + + if uploaded_at.is_none() { + if let Ok(date) = NaiveDate::parse_from_str(text.trim(), "%m/%d/%Y") { + uploaded_at = date + .and_hms_opt(0, 0, 0) + .map(|value| value.and_utc().timestamp() as u64); + } + } + } + + Ok((views, uploaded_at, duration)) + } + + fn parse_format_urls(html: &str) -> Vec<(String, String)> { + let keys = [ + ("video_url", "video_url_text"), + ("video_alt_url", "video_alt_url_text"), + ("video_alt_url2", "video_alt_url2_text"), + ("video_alt_url3", "video_alt_url3_text"), + ]; + let mut formats = Vec::new(); + + for (url_key, label_key) in keys { + let url_re = match Self::regex(&format!(r#"{url_key}:\s*'([^']+)'"#)) { + Ok(value) => value, + Err(_) => continue, + }; + let label_re = match Self::regex(&format!(r#"{label_key}:\s*'([^']*)'"#)) { + Ok(value) => value, + Err(_) => continue, + }; + + let Some(url_match) = url_re.captures(html).and_then(|value| value.get(1)) else { + continue; + }; + + let mut url = url_match.as_str().replace("\\/", "/"); + if let Some(stripped) = url.strip_prefix("function/0/") { + url = stripped.to_string(); + } + + let label = label_re + .captures(html) + .and_then(|value| value.get(1)) + .map(|value| value.as_str().trim().to_string()) + .filter(|value| !value.is_empty()) + .unwrap_or_else(|| "MP4".to_string()); + + formats.push((url, label)); + } + + formats + } + + fn collect_tags(document: &Html, html: &str) -> Result> { + let mut tags = Vec::new(); + + let tag_selector = Self::selector("div.tags-row-wrapper a[href*=\"/tags/\"]")?; + for element in document.select(&tag_selector) { + let value = Self::text_of(&element); + if !value.is_empty() { + tags.push(value); + } + } + + for pattern in [ + r#"video_categories:\s*'([^']*)'"#, + r#"video_tags:\s*'([^']*)'"#, + r#"video_models:\s*'([^']*)'"#, + ] { + let re = Self::regex(pattern)?; + if let Some(value) = re.captures(html).and_then(|caps| caps.get(1)) { + for entry in value + .as_str() + .split(',') + .map(str::trim) + .map(Self::decode_html) + .filter(|entry| !entry.is_empty()) + { + tags.push(entry); + } + } + } + + let mut unique = Vec::new(); + for value in tags { + let normalized = Self::normalize_title(&value); + if !normalized.is_empty() + && !unique + .iter() + .any(|existing: &String| Self::normalize_title(existing) == normalized) + { + unique.push(value); + } + } + + Ok(unique) + } + + fn parse_aspect_ratio(html: &str) -> Option { + let width_re = Self::regex(r#"player_width:\s*'([0-9.]+)'"#).ok()?; + let height_re = Self::regex(r#"player_height:\s*'([0-9.]+)'"#).ok()?; + let width = width_re + .captures(html)? + .get(1)? + .as_str() + .parse::() + .ok()?; + let height = height_re + .captures(html)? + .get(1)? + .as_str() + .parse::() + .ok()?; + + (height > 0.0).then_some(width / height) + } + + fn apply_detail_video(&self, mut item: VideoItem, html: &str) -> Result { + let document = Html::parse_document(html); + let title_selector = Self::selector("h1.title")?; + let member_selector = Self::selector("a.sub-btn[href*=\"/members/\"]")?; + let member_name_selector = Self::selector("em")?; + let channel_selector = Self::selector("a.btn.gold[href*=\"/channels/\"]")?; + let model_selector = Self::selector("a.btn.gold[href*=\"/models/\"]")?; + + if let Some(title) = document + .select(&title_selector) + .next() + .map(|value| Self::text_of(&value)) + .filter(|value| !value.is_empty()) + { + item.title = title; + } + + if let Some(member) = document.select(&member_selector).next() { + let uploader = member + .select(&member_name_selector) + .next() + .map(|value| Self::text_of(&value)) + .filter(|value| !value.is_empty()) + .unwrap_or_else(|| Self::text_of(&member)); + let uploader_url = member + .value() + .attr("href") + .map(|value| self.normalize_url(value)) + .unwrap_or_default(); + if !uploader.is_empty() { + item.uploader = Some(uploader); + } + if !uploader_url.is_empty() { + item.uploaderUrl = Some(uploader_url); + } + } else if let Some(channel) = document.select(&channel_selector).next() { + let uploader = Self::text_of(&channel); + let uploader_url = channel + .value() + .attr("href") + .map(|value| self.normalize_url(value)) + .unwrap_or_default(); + if !uploader.is_empty() { + item.uploader = Some(uploader); + } + if !uploader_url.is_empty() { + item.uploaderUrl = Some(uploader_url); + } + } + + let mut tags = Self::collect_tags(&document, html)?; + if let Some(channel) = document.select(&channel_selector).next() { + let value = Self::text_of(&channel); + if !value.is_empty() + && !tags + .iter() + .any(|existing| Self::normalize_title(existing) == Self::normalize_title(&value)) + { + tags.push(value); + } + } + if let Some(model) = document.select(&model_selector).next() { + let value = Self::text_of(&model); + if !value.is_empty() + && !tags + .iter() + .any(|existing| Self::normalize_title(existing) == Self::normalize_title(&value)) + { + tags.push(value); + } + } + if !tags.is_empty() { + item.tags = Some(tags); + } + + let (views, uploaded_at, duration) = Self::parse_count_values(&document)?; + if item.views.is_none() { + if let Some(views) = views { + item.views = Some(views); + } + } + if item.uploadedAt.is_none() { + item.uploadedAt = uploaded_at.or_else(|| Self::parse_upload_date_timestamp(html)); + } + if item.duration == 0 { + if let Some(duration) = duration { + item.duration = duration; + } + } + + if item.aspectRatio.is_none() { + item.aspectRatio = Self::parse_aspect_ratio(html); + } + + if let Some(poster) = Self::regex(r#""thumbnailUrl"\s*:\s*"([^"]+)""#) + .ok() + .and_then(|re| re.captures(html)) + .and_then(|caps| caps.get(1)) + .map(|value| Self::decode_html(value.as_str())) + { + if item.thumb.is_empty() { + item.thumb = poster; + } + } + + let _ = Self::parse_format_urls(html); + + Ok(item) + } + + async fn enrich_item(&self, item: VideoItem, options: &ServerOptions) -> VideoItem { + let mut requester = + requester_or_default(options, CHANNEL_ID, "yesporn.enrich_item.missing_requester"); + + match Self::fetch_html(&mut requester, &item.url, &item.url).await { + Ok(html) => match self.apply_detail_video(item.clone(), &html) { + Ok(value) => value, + Err(error) => { + report_provider_error_background( + CHANNEL_ID, + "enrich_item.apply_detail_video", + &format!("url={}; error={error}", item.url), + ); + item + } + }, + Err(error) => { + report_provider_error_background( + CHANNEL_ID, + "enrich_item.fetch_html", + &format!("url={}; error={error}", item.url), + ); + item + } + } + } + + async fn fetch_target( + &self, + cache: VideoCache, + target: Target, + page: u16, + sort: &str, + options: ServerOptions, + ) -> Result> { + let (url, referer) = self.build_target_request(&target, page, sort); + let old_items = match cache.get(&url) { + Some((time, items)) => { + if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 { + return Ok(items.clone()); + } + items.clone() + } + None => vec![], + }; + + let mut requester = + requester_or_default(&options, CHANNEL_ID, "yesporn.fetch_target.missing_requester"); + let html = match &target { + Target::CommonArchive(_) | Target::MemberArchive(_) => { + match self.fetch_archive_html(&mut requester, &target, page, sort).await { + Ok(value) => value, + Err(error) => { + report_provider_error( + CHANNEL_ID, + "fetch_target.archive_request", + &format!("url={url}; referer={referer}; error={error}"), + ) + .await; + return Ok(old_items); + } + } + } + _ => match Self::fetch_html(&mut requester, &url, &referer).await { + Ok(value) => value, + Err(error) => { + report_provider_error( + CHANNEL_ID, + "fetch_target.request", + &format!("url={url}; error={error}"), + ) + .await; + return Ok(old_items); + } + }, + }; + + if html.trim().is_empty() { + report_provider_error(CHANNEL_ID, "fetch_target.empty_response", &format!("url={url}")) + .await; + return Ok(old_items); + } + + let items = self.parse_list_videos(&html)?; + if items.is_empty() { + return Ok(old_items); + } + + let enriched = stream::iter(items.into_iter().map(|item| { + let provider = self.clone(); + let options = options.clone(); + async move { provider.enrich_item(item, &options).await } + })) + .buffer_unordered(4) + .collect::>() + .await; + + cache.remove(&url); + cache.insert(url, enriched.clone()); + Ok(enriched) + } +} + +#[async_trait] +impl Provider for YespornProvider { + async fn get_videos( + &self, + cache: VideoCache, + _pool: DbPool, + sort: String, + query: Option, + page: String, + _per_page: String, + options: ServerOptions, + ) -> Vec { + let page = page.parse::().unwrap_or(1).max(1); + + let target = match query { + Some(query) if !query.trim().is_empty() => { + let query = query.trim(); + let mut target = self.resolve_query_target(query); + if matches!(target, Target::Search(_)) && self.filters_need_refresh() { + self.refresh_filter_catalogs().await; + target = self.resolve_query_target(query); + } + target + } + _ => self.resolve_option_target(&options), + }; + + match self + .fetch_target(cache, target, page, &sort, options.clone()) + .await + { + Ok(items) => items, + Err(error) => { + report_provider_error( + CHANNEL_ID, + "get_videos.fetch_target", + &format!("sort={sort}; page={page}; error={error}"), + ) + .await; + vec![] + } + } + } + + fn get_channel(&self, clientversion: ClientVersion) -> Option { + Some(self.build_channel(clientversion)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn builds_home_url() { + let provider = YespornProvider::new(); + assert_eq!( + provider.build_home_url(2, "popular"), + "https://yesporn.vip/?mode=async&function=get_block&block_id=list_videos_most_recent_videos&sort_by=video_viewed&from=2" + ); + } + + #[test] + fn builds_search_url() { + let provider = YespornProvider::new(); + assert_eq!( + provider.build_search_url("adriana chechik", 3, "rated"), + "https://yesporn.vip/search/adriana-chechik/?mode=async&function=get_block&block_id=list_videos_videos_list_search_result&q=adriana+chechik&category_ids=&sort_by=rating&from_videos=3" + ); + } + + #[test] + fn parses_yesporn_cards() { + let provider = YespornProvider::new(); + let html = r#" +
+ +
+ "#; + + let items = provider.parse_list_videos(html).expect("parses cards"); + assert_eq!(items.len(), 1); + assert_eq!(items[0].id, "66943"); + assert_eq!(items[0].title, "Fuckin Taxes Andi Avalon"); + assert_eq!( + items[0].thumb, + "https://yesnn.b-cdn.net/contents/videos_screenshots/66000/66943/480x270/1.jpg" + ); + assert_eq!(items[0].duration, 754); + assert_eq!(items[0].views, Some(11_000)); + assert_eq!(items[0].rating, Some(95.0)); + assert_eq!( + items[0].preview.as_deref(), + Some("https://yesporn.vip/get_file/6/x/66000/66943/66943_preview.mp4/") + ); + } + + #[test] + fn applies_detail_video_data() { + let provider = YespornProvider::new(); + let item = VideoItem::new( + "66898".to_string(), + "placeholder".to_string(), + "https://yesporn.vip/video/66898/the-world-s-greatest-art-class-a-nude-model-you-can-creampie-6noc5d/".to_string(), + CHANNEL_ID.to_string(), + "https://yesnn.b-cdn.net/contents/videos_screenshots/66000/66898/480x270/1.jpg" + .to_string(), + 0, + ); + let html = r#" + + +
+

The World’s Greatest Art Class: A Nude Model You Can Creampie!

+
32:45
+
2.6K
+
03/18/2026
+
+ +
+ creampie +
+ "#; + + let item = provider + .apply_detail_video(item, html) + .expect("applies detail video"); + assert_eq!( + item.title, + "The World’s Greatest Art Class: A Nude Model You Can Creampie!" + ); + assert_eq!(item.uploader.as_deref(), Some("nonameforthis")); + assert_eq!( + item.uploaderUrl.as_deref(), + Some("https://yesporn.vip/members/15878/") + ); + assert_eq!(item.views, Some(2600)); + assert_eq!(item.duration, 1965); + assert!(item.tags.as_ref().is_some_and(|values| values + .iter() + .any(|value| YespornProvider::normalize_title(value) == "creampie"))); + assert!(item.aspectRatio.is_some()); + assert!(item.uploadedAt.is_some()); + } +}