use crate::DbPool; use crate::api::ClientVersion; use crate::providers::{ Provider, report_provider_error, report_provider_error_background, requester_or_default, }; use crate::status::*; use crate::util::cache::VideoCache; use crate::util::parse_abbreviated_number; use crate::util::requester::Requester; use crate::util::time::parse_time_to_seconds; use crate::videos::{ServerOptions, VideoFormat, VideoItem}; use async_trait::async_trait; use chrono::{DateTime, NaiveDate, Utc}; use error_chain::error_chain; use futures::stream::{self, StreamExt}; use htmlentity::entity::{ICodedDataTrait, decode}; use regex::Regex; use scraper::{ElementRef, Html, Selector}; use std::sync::{Arc, RwLock}; use std::{thread, vec}; use wreq::Version; pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata = crate::providers::ProviderChannelMetadata { group_id: "mainstream-tube", tags: &["tube", "mixed", "hd"], }; error_chain! { foreign_links { Io(std::io::Error); } errors { Parse(msg: String) { description("parse error") display("parse error: {}", msg) } } } const BASE_URL: &str = "https://yesporn.vip"; const CHANNEL_ID: &str = "yesporn"; const FIREFOX_UA: &str = "Mozilla/5.0 (X11; Linux x86_64; rv:147.0) Gecko/20100101 Firefox/147.0"; const HTML_ACCEPT: &str = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8"; #[derive(Debug, Clone)] pub struct YespornProvider { url: String, categories: Arc>>, tags: Arc>>, models: Arc>>, uploaders: Arc>>, } #[derive(Debug, Clone)] enum Target { Home, Search(String), CommonArchive(String), MemberArchive(String), } impl YespornProvider { pub fn new() -> Self { let provider = Self { url: BASE_URL.to_string(), categories: Arc::new(RwLock::new(vec![FilterOption { id: "all".to_string(), title: "All".to_string(), }])), tags: Arc::new(RwLock::new(vec![FilterOption { id: "all".to_string(), title: "All".to_string(), }])), models: Arc::new(RwLock::new(vec![FilterOption { id: "all".to_string(), title: "All".to_string(), }])), uploaders: Arc::new(RwLock::new(vec![FilterOption { id: "all".to_string(), title: "All".to_string(), }])), }; provider.spawn_initial_load(); provider } fn spawn_initial_load(&self) { let url = self.url.clone(); let categories = Arc::clone(&self.categories); let tags = Arc::clone(&self.tags); let models = Arc::clone(&self.models); let uploaders = Arc::clone(&self.uploaders); thread::spawn(move || { let runtime = match tokio::runtime::Builder::new_current_thread() .enable_all() .build() { Ok(runtime) => runtime, Err(error) => { report_provider_error_background( CHANNEL_ID, "spawn_initial_load.runtime_build", &error.to_string(), ); return; } }; runtime.block_on(async move { if let Err(error) = Self::load_uploaders(&url, Arc::clone(&uploaders)).await { report_provider_error_background( CHANNEL_ID, "load_uploaders", &error.to_string(), ); } if let Err(error) = Self::load_models(&url, Arc::clone(&models)).await { report_provider_error_background(CHANNEL_ID, "load_models", &error.to_string()); } if let Err(error) = Self::load_categories(&url, Arc::clone(&categories)).await { report_provider_error_background( CHANNEL_ID, "load_categories", &error.to_string(), ); } if let Err(error) = Self::load_tags(&url, Arc::clone(&tags)).await { report_provider_error_background(CHANNEL_ID, "load_tags", &error.to_string()); } }); }); } fn build_channel(&self, _clientversion: ClientVersion) -> Channel { let categories = self .categories .read() .map(|value| value.clone()) .unwrap_or_default(); let tags = self.tags.read().map(|value| value.clone()).unwrap_or_default(); let models = self.models.read().map(|value| value.clone()).unwrap_or_default(); let uploaders = self .uploaders .read() .map(|value| value.clone()) .unwrap_or_default(); Channel { id: CHANNEL_ID.to_string(), name: "YesPorn".to_string(), description: "YesPorn videos with latest, popular, rated, category, tag, model, and uploader archives." .to_string(), premium: false, favicon: "https://www.google.com/s2/favicons?sz=64&domain=yesporn.vip".to_string(), status: "active".to_string(), categories: categories.iter().map(|value| value.title.clone()).collect(), options: vec![ ChannelOption { id: "sort".to_string(), title: "Sort".to_string(), description: "Browse YesPorn archives by ranking.".to_string(), systemImage: "list.number".to_string(), colorName: "blue".to_string(), options: vec![ FilterOption { id: "new".to_string(), title: "Latest".to_string(), }, FilterOption { id: "popular".to_string(), title: "Most Viewed".to_string(), }, FilterOption { id: "rated".to_string(), title: "Top Rated".to_string(), }, FilterOption { id: "longest".to_string(), title: "Longest".to_string(), }, FilterOption { id: "commented".to_string(), title: "Most Commented".to_string(), }, FilterOption { id: "recommended".to_string(), title: "Most Favorited".to_string(), }, FilterOption { id: "random".to_string(), title: "Random".to_string(), }, ], multiSelect: false, }, ChannelOption { id: "categories".to_string(), title: "Categories".to_string(), description: "Browse a YesPorn category archive.".to_string(), systemImage: "square.grid.2x2".to_string(), colorName: "orange".to_string(), options: categories, multiSelect: false, }, ChannelOption { id: "filter".to_string(), title: "Tags".to_string(), description: "Browse a YesPorn tag archive.".to_string(), systemImage: "tag.fill".to_string(), colorName: "green".to_string(), options: tags, multiSelect: false, }, ChannelOption { id: "stars".to_string(), title: "Models".to_string(), description: "Browse a YesPorn model archive.".to_string(), systemImage: "star.fill".to_string(), colorName: "yellow".to_string(), options: models, multiSelect: false, }, ChannelOption { id: "sites".to_string(), title: "Uploaders".to_string(), description: "Browse a YesPorn channel or community uploader archive." .to_string(), systemImage: "person.crop.square".to_string(), colorName: "purple".to_string(), options: uploaders, multiSelect: false, }, ], nsfw: true, cacheDuration: Some(1800), } } fn selector(value: &str) -> Result { Selector::parse(value) .map_err(|error| Error::from(format!("selector `{value}` parse failed: {error}"))) } fn regex(value: &str) -> Result { Regex::new(value).map_err(|error| Error::from(format!("regex `{value}` failed: {error}"))) } fn decode_html(text: &str) -> String { decode(text.as_bytes()) .to_string() .unwrap_or_else(|_| text.to_string()) } fn collapse_whitespace(text: &str) -> String { text.split_whitespace().collect::>().join(" ") } fn text_of(element: &ElementRef<'_>) -> String { Self::decode_html(&Self::collapse_whitespace( &element.text().collect::>().join(" "), )) } fn normalize_title(title: &str) -> String { title .trim() .trim_start_matches('#') .split_whitespace() .collect::>() .join(" ") .to_ascii_lowercase() } fn clean_filter_title(title: &str) -> String { let mut parts = Self::decode_html(title) .trim() .trim_start_matches('#') .split_whitespace() .map(ToOwned::to_owned) .collect::>(); if parts.len() > 1 && parts .last() .is_some_and(|value| value.chars().all(|ch| ch.is_ascii_digit())) { parts.pop(); } parts.join(" ").trim().to_string() } fn normalize_url(&self, url: &str) -> String { if url.is_empty() { return String::new(); } if url.starts_with("http://") || url.starts_with("https://") { return url.to_string(); } if url.starts_with("//") { return format!("https:{url}"); } if url.starts_with('/') { return format!("{}{}", self.url, url); } format!("{}/{}", self.url, url.trim_start_matches("./")) } fn html_headers(referer: &str) -> Vec<(String, String)> { vec![ ("User-Agent".to_string(), FIREFOX_UA.to_string()), ("Accept".to_string(), HTML_ACCEPT.to_string()), ("Referer".to_string(), referer.to_string()), ] } fn sort_by(sort: &str) -> &'static str { match sort { "popular" | "viewed" | "trending" => "video_viewed", "rated" | "rating" | "top" => "rating", "longest" | "duration" => "duration", "commented" | "comments" => "most_commented", "recommended" | "favorited" | "favourited" => "most_favourited", "random" => "rand()", _ => "post_date", } } fn build_search_path_query(query: &str, separator: &str) -> String { query.split_whitespace().collect::>().join(separator) } fn build_home_url(&self, page: u16, sort: &str) -> String { format!( "{}/?mode=async&function=get_block&block_id=list_videos_most_recent_videos&sort_by={}&from={page}", self.url, Self::sort_by(sort) ) } fn build_search_url(&self, query: &str, page: u16, sort: &str) -> String { let path_query = Self::build_search_path_query(query, "-"); let query_param = Self::build_search_path_query(query, "+"); format!( "{}/search/{path_query}/?mode=async&function=get_block&block_id=list_videos_videos_list_search_result&q={query_param}&category_ids=&sort_by={}&from_videos={page}", self.url, Self::sort_by(sort) ) } fn build_common_archive_url(&self, archive_url: &str, page: u16, sort: &str) -> String { format!( "{}/?mode=async&function=get_block&block_id=list_videos_common_videos_list&sort_by={}&from={page}", archive_url.trim_end_matches('/'), Self::sort_by(sort) ) } fn build_member_archive_url(&self, member_url: &str, page: u16, sort: &str) -> String { format!( "{}/?mode=async&function=get_block&block_id=list_videos_uploaded_videos&sort_by={}&from_videos={page}", member_url.trim_end_matches('/'), Self::sort_by(sort) ) } async fn fetch_html(requester: &mut Requester, url: &str, referer: &str) -> Result { requester .get_with_headers(url, Self::html_headers(referer), Some(Version::HTTP_11)) .await .map_err(|error| Error::from(format!("request failed for {url}: {error}"))) } async fn fetch_archive_html( &self, requester: &mut Requester, target: &Target, page: u16, sort: &str, ) -> Result { let archive_url = match target { Target::CommonArchive(url) | Target::MemberArchive(url) => url, _ => { return Err(Error::from( "fetch_archive_html called for non-archive target".to_string(), )); } }; let response = requester .get_raw_with_headers(archive_url, Self::html_headers(archive_url)) .await .map_err(|error| { Error::from(format!("archive page request failed for {archive_url}: {error}")) })?; if !response.status().is_success() { return Err(Error::from(format!( "archive page request failed for {archive_url}: status {}", response.status() ))); } let canonical_url = response.uri().to_string(); let body = response .text() .await .map_err(|error| Error::from(format!("archive page body read failed: {error}")))?; if page <= 1 { return Ok(body); } let async_url = match target { Target::CommonArchive(_) => self.build_common_archive_url(&canonical_url, page, sort), Target::MemberArchive(_) => self.build_member_archive_url(&canonical_url, page, sort), _ => unreachable!(), }; Self::fetch_html(requester, &async_url, &canonical_url).await } fn push_unique(target: &Arc>>, item: FilterOption) { if item.id.is_empty() || item.title.is_empty() { return; } if let Ok(mut values) = target.write() { let normalized = Self::normalize_title(&item.title); if !values .iter() .any(|value| value.id == item.id || Self::normalize_title(&value.title) == normalized) { values.push(item); } } } async fn load_categories( base_url: &str, categories: Arc>>, ) -> Result<()> { let mut requester = Requester::new(); for page in 1..=4 { let url = format!( "{base_url}/categories/?mode=async&function=get_block&block_id=list_categories_categories_list&sort_by=avg_videos_rating&from={page}" ); let html = Self::fetch_html(&mut requester, &url, &format!("{base_url}/categories/")).await?; let document = Html::parse_document(&html); let selector = Self::selector("#list_categories_categories_list_items a[href]")?; for element in document.select(&selector) { let href = element.value().attr("href").unwrap_or_default().trim_end_matches('/'); if !href.starts_with(&format!("{base_url}/categories/")) { continue; } let remainder = href .strip_prefix(&format!("{base_url}/categories/")) .unwrap_or_default(); if remainder.is_empty() || remainder.contains('/') { continue; } let title = element .value() .attr("title") .map(str::trim) .filter(|value| !value.is_empty()) .map(ToOwned::to_owned) .unwrap_or_else(|| Self::text_of(&element)); let title = Self::clean_filter_title(&title); if title.is_empty() { continue; } Self::push_unique( &categories, FilterOption { id: format!("{href}/"), title, }, ); } } Ok(()) } async fn load_tags(base_url: &str, tags: Arc>>) -> Result<()> { let mut requester = Requester::new(); let html = Self::fetch_html(&mut requester, &format!("{base_url}/tags/"), &format!("{base_url}/tags/")).await?; let document = Html::parse_document(&html); let selector = Self::selector("#list_tags_tags_list a[href]")?; for element in document.select(&selector) { let href = element.value().attr("href").unwrap_or_default().trim_end_matches('/'); if !href.starts_with(&format!("{base_url}/tags/")) { continue; } let remainder = href.strip_prefix(&format!("{base_url}/tags/")).unwrap_or_default(); if remainder.is_empty() || remainder.contains('/') { continue; } let title = Self::clean_filter_title(&Self::text_of(&element)); if title.is_empty() || title.len() > 80 { continue; } Self::push_unique( &tags, FilterOption { id: format!("{href}/"), title, }, ); } Ok(()) } async fn load_models(base_url: &str, models: Arc>>) -> Result<()> { let mut requester = Requester::new(); for page in 1..=5 { let url = format!( "{base_url}/models/?mode=async&function=get_block&block_id=list_models_models_list&sort_by=avg_videos_rating&from={page}" ); let html = Self::fetch_html(&mut requester, &url, &format!("{base_url}/models/")).await?; let document = Html::parse_document(&html); let selector = Self::selector("#list_models_models_list_items a[href]")?; for element in document.select(&selector) { let href = element.value().attr("href").unwrap_or_default().trim_end_matches('/'); if !href.starts_with(&format!("{base_url}/models/")) { continue; } let remainder = href .strip_prefix(&format!("{base_url}/models/")) .unwrap_or_default(); if remainder.is_empty() || remainder.contains('/') { continue; } let title = element .value() .attr("title") .map(str::trim) .filter(|value| !value.is_empty()) .map(ToOwned::to_owned) .unwrap_or_else(|| Self::text_of(&element)); let title = Self::clean_filter_title(&title); if title.is_empty() { continue; } Self::push_unique( &models, FilterOption { id: format!("{href}/"), title, }, ); } } Ok(()) } async fn load_uploaders( base_url: &str, uploaders: Arc>>, ) -> Result<()> { let mut requester = Requester::new(); { let home_html = Self::fetch_html(&mut requester, &format!("{base_url}/"), &format!("{base_url}/")) .await?; let home_document = Html::parse_document(&home_html); let nav_selector = Self::selector("a.nav-link[href]")?; for element in home_document.select(&nav_selector) { let href = element.value().attr("href").unwrap_or_default().trim_end_matches('/'); if !href.starts_with(&format!("{base_url}/channels/")) { continue; } let remainder = href .strip_prefix(&format!("{base_url}/channels/")) .unwrap_or_default(); if remainder.is_empty() || remainder.contains('/') { continue; } let title = Self::clean_filter_title(&Self::text_of(&element)); if title.is_empty() { continue; } Self::push_unique( &uploaders, FilterOption { id: format!("{href}/"), title, }, ); } } for page in 1..=4 { let url = format!( "{base_url}/channels/?mode=async&function=get_block&block_id=list_dvds_channels_list&sort_by=avg_videos_rating&from={page}" ); let html = Self::fetch_html(&mut requester, &url, &format!("{base_url}/channels/")).await?; let document = Html::parse_document(&html); let selector = Self::selector("#list_dvds_channels_list_items a[href]")?; for element in document.select(&selector) { let href = element.value().attr("href").unwrap_or_default().trim_end_matches('/'); if !href.starts_with(&format!("{base_url}/channels/")) { continue; } let remainder = href .strip_prefix(&format!("{base_url}/channels/")) .unwrap_or_default(); if remainder.is_empty() || remainder.contains('/') { continue; } let title = element .value() .attr("title") .map(str::trim) .filter(|value| !value.is_empty()) .map(ToOwned::to_owned) .unwrap_or_else(|| Self::text_of(&element)); let title = Self::clean_filter_title(&title); if title.is_empty() { continue; } Self::push_unique( &uploaders, FilterOption { id: format!("{href}/"), title, }, ); } } for page in 1..=3 { let url = format!( "{base_url}/members/?mode=async&function=get_block&block_id=list_members_members&sort_by=activity&from_members={page}" ); let html = Self::fetch_html(&mut requester, &url, &format!("{base_url}/members/")).await?; let document = Html::parse_document(&html); let selector = Self::selector("#list_members_members_items a[href]")?; for element in document.select(&selector) { let href = element.value().attr("href").unwrap_or_default().trim_end_matches('/'); if !href.starts_with(&format!("{base_url}/members/")) { continue; } let remainder = href .strip_prefix(&format!("{base_url}/members/")) .unwrap_or_default(); if remainder.is_empty() || remainder.contains('/') || !remainder.chars().all(|value| value.is_ascii_digit()) { continue; } let title = element .value() .attr("title") .map(str::trim) .filter(|value| !value.is_empty()) .map(ToOwned::to_owned) .unwrap_or_else(|| Self::text_of(&element)); let title = Self::clean_filter_title(&title); if title.is_empty() { continue; } Self::push_unique( &uploaders, FilterOption { id: format!("{href}/"), title, }, ); } } Ok(()) } fn match_filter(options: &[FilterOption], query: &str) -> Option { let normalized_query = Self::normalize_title(query); options .iter() .find(|value| value.id != "all" && Self::normalize_title(&value.title) == normalized_query) .map(|value| value.id.clone()) } fn filters_need_refresh(&self) -> bool { let uploaders_len = self.uploaders.read().map(|values| values.len()).unwrap_or_default(); let models_len = self.models.read().map(|values| values.len()).unwrap_or_default(); let tags_len = self.tags.read().map(|values| values.len()).unwrap_or_default(); let categories_len = self .categories .read() .map(|values| values.len()) .unwrap_or_default(); uploaders_len <= 1 || models_len <= 1 || tags_len <= 1 || categories_len <= 1 } async fn refresh_filter_catalogs(&self) { if let Err(error) = Self::load_uploaders(&self.url, Arc::clone(&self.uploaders)).await { report_provider_error_background(CHANNEL_ID, "refresh_filter_catalogs.uploaders", &error.to_string()); } if let Err(error) = Self::load_models(&self.url, Arc::clone(&self.models)).await { report_provider_error_background(CHANNEL_ID, "refresh_filter_catalogs.models", &error.to_string()); } if let Err(error) = Self::load_categories(&self.url, Arc::clone(&self.categories)).await { report_provider_error_background(CHANNEL_ID, "refresh_filter_catalogs.categories", &error.to_string()); } if let Err(error) = Self::load_tags(&self.url, Arc::clone(&self.tags)).await { report_provider_error_background(CHANNEL_ID, "refresh_filter_catalogs.tags", &error.to_string()); } } fn resolve_option_target(&self, options: &ServerOptions) -> Target { if let Some(uploaders) = options.sites.as_deref() { if uploaders.starts_with(&self.url) && uploaders != "all" { if uploaders.contains("/members/") { return Target::MemberArchive(uploaders.to_string()); } return Target::CommonArchive(uploaders.to_string()); } } if let Some(model) = options.stars.as_deref() { if model.starts_with(&self.url) && model != "all" { return Target::CommonArchive(model.to_string()); } } if let Some(tag) = options.filter.as_deref() { if tag.starts_with(&self.url) && tag != "all" { return Target::CommonArchive(tag.to_string()); } } if let Some(category) = options.categories.as_deref() { if category.starts_with(&self.url) && category != "all" { return Target::CommonArchive(category.to_string()); } } Target::Home } fn resolve_query_target(&self, query: &str) -> Target { if let Ok(uploaders) = self.uploaders.read() { if let Some(value) = Self::match_filter(&uploaders, query) { if value.contains("/members/") { return Target::MemberArchive(value); } return Target::CommonArchive(value); } } if let Ok(models) = self.models.read() { if let Some(value) = Self::match_filter(&models, query) { return Target::CommonArchive(value); } } if let Ok(tags) = self.tags.read() { if let Some(value) = Self::match_filter(&tags, query) { return Target::CommonArchive(value); } } if let Ok(categories) = self.categories.read() { if let Some(value) = Self::match_filter(&categories, query) { return Target::CommonArchive(value); } } Target::Search(query.to_string()) } fn build_target_request(&self, target: &Target, page: u16, sort: &str) -> (String, String) { match target { Target::Home => { let referer = format!("{}/", self.url); (self.build_home_url(page, sort), referer) } Target::Search(query) => { let path_query = Self::build_search_path_query(query, "-"); let referer = format!("{}/search/{path_query}/", self.url); (self.build_search_url(query, page, sort), referer) } Target::CommonArchive(archive_url) => { let referer = archive_url.clone(); ( self.build_common_archive_url(archive_url, page, sort), referer, ) } Target::MemberArchive(member_url) => { let referer = member_url.clone(); ( self.build_member_archive_url(member_url, page, sort), referer, ) } } } fn parse_duration(text: &str) -> u32 { parse_time_to_seconds(text) .and_then(|value| u32::try_from(value).ok()) .unwrap_or(0) } fn parse_views(text: &str) -> Option { parse_abbreviated_number(text.trim()) } fn parse_percent(text: &str) -> Option { text.trim().trim_end_matches('%').trim().parse::().ok() } fn list_container<'a>(&self, document: &'a Html) -> Result>> { for selector_text in [ "#list_videos_most_recent_videos_items", "#list_videos_videos_list_search_result_items", "#list_videos_common_videos_list_items", "#list_videos_uploaded_videos_items", "#list_videos_related_videos_items", "#list_videos_recommended_videos_items", ] { let selector = Self::selector(selector_text)?; if let Some(element) = document.select(&selector).next() { return Ok(Some(element)); } } Ok(None) } fn parse_list_videos(&self, html: &str) -> Result> { let document = Html::parse_document(html); let Some(container) = self.list_container(&document)? else { return Ok(vec![]); }; let card_selector = Self::selector("div.thumb.thumb_rel.item")?; let link_selector = Self::selector("a[href*=\"/video/\"]")?; let image_selector = Self::selector("img")?; let title_selector = Self::selector("div.title")?; let meta_selector = Self::selector("div.thumb-bottom.thumb-bottom-videos div.thumb-item")?; let duration_selector = Self::selector("div.item-bottom div.time")?; let mut items = Vec::new(); for card in container.select(&card_selector) { let Some(link) = card.select(&link_selector).next() else { continue; }; let href = link.value().attr("href").unwrap_or_default(); let page_url = self.normalize_url(href); if page_url.is_empty() { continue; } let id = page_url .trim_end_matches('/') .split('/') .nth_back(1) .unwrap_or_default() .to_string(); if id.is_empty() { continue; } let image = card.select(&image_selector).next(); let thumb = image .and_then(|value| { value .value() .attr("data-webp") .or_else(|| value.value().attr("data-original")) .or_else(|| value.value().attr("src")) }) .map(|value| self.normalize_url(value)) .unwrap_or_default(); let preview = image .and_then(|value| value.value().attr("data-preview")) .map(|value| self.normalize_url(value)); let title = card .select(&title_selector) .next() .map(|value| Self::text_of(&value)) .filter(|value| !value.is_empty()) .or_else(|| { link.value() .attr("title") .map(Self::decode_html) .filter(|value| !value.is_empty()) }); let Some(title) = title else { continue; }; let duration = card .select(&duration_selector) .next() .map(|value| Self::parse_duration(&Self::text_of(&value))) .unwrap_or(0); let meta_values = card .select(&meta_selector) .map(|value| Self::text_of(&value)) .collect::>(); let views = meta_values.first().and_then(|value| Self::parse_views(value)); let rating = meta_values.get(1).and_then(|value| Self::parse_percent(value)); let mut item = VideoItem::new( id, title, page_url, CHANNEL_ID.to_string(), thumb, duration, ); if let Some(preview) = preview { let mut format = VideoFormat::new(preview.clone(), "preview".to_string(), "video/mp4".to_string()); format.add_http_header("Referer".to_string(), item.url.clone()); item = item.preview(preview).formats(vec![format]); } if let Some(views) = views { item = item.views(views); } if let Some(rating) = rating { item = item.rating(rating); } items.push(item); } Ok(items) } fn parse_upload_date_timestamp(html: &str) -> Option { let iso_re = Self::regex(r#""uploadDate"\s*:\s*"([^"]+)""#).ok()?; let iso = iso_re.captures(html)?.get(1)?.as_str(); DateTime::parse_from_rfc3339(iso) .map(|value| value.with_timezone(&Utc).timestamp() as u64) .ok() .or_else(|| { NaiveDate::parse_from_str(iso, "%Y-%m-%d") .ok() .and_then(|value| value.and_hms_opt(0, 0, 0)) .map(|value| value.and_utc().timestamp() as u64) }) } fn parse_count_values(document: &Html) -> Result<(Option, Option, Option)> { let selector = Self::selector("div.title-holder div.count-item")?; let mut duration = None; let mut views = None; let mut uploaded_at = None; for element in document.select(&selector) { let text = Self::text_of(&element); if duration.is_none() { let parsed = Self::parse_duration(&text); if parsed > 0 { duration = Some(parsed); continue; } } if views.is_none() { if let Some(parsed) = Self::parse_views(&text) { views = Some(parsed); continue; } } if uploaded_at.is_none() { if let Ok(date) = NaiveDate::parse_from_str(text.trim(), "%m/%d/%Y") { uploaded_at = date .and_hms_opt(0, 0, 0) .map(|value| value.and_utc().timestamp() as u64); } } } Ok((views, uploaded_at, duration)) } fn parse_format_urls(html: &str) -> Vec<(String, String)> { let keys = [ ("video_url", "video_url_text"), ("video_alt_url", "video_alt_url_text"), ("video_alt_url2", "video_alt_url2_text"), ("video_alt_url3", "video_alt_url3_text"), ]; let mut formats = Vec::new(); for (url_key, label_key) in keys { let url_re = match Self::regex(&format!(r#"{url_key}:\s*'([^']+)'"#)) { Ok(value) => value, Err(_) => continue, }; let label_re = match Self::regex(&format!(r#"{label_key}:\s*'([^']*)'"#)) { Ok(value) => value, Err(_) => continue, }; let Some(url_match) = url_re.captures(html).and_then(|value| value.get(1)) else { continue; }; let mut url = url_match.as_str().replace("\\/", "/"); if let Some(stripped) = url.strip_prefix("function/0/") { url = stripped.to_string(); } let label = label_re .captures(html) .and_then(|value| value.get(1)) .map(|value| value.as_str().trim().to_string()) .filter(|value| !value.is_empty()) .unwrap_or_else(|| "MP4".to_string()); formats.push((url, label)); } formats } fn collect_tags(document: &Html, html: &str) -> Result> { let mut tags = Vec::new(); let tag_selector = Self::selector("div.tags-row-wrapper a[href*=\"/tags/\"]")?; for element in document.select(&tag_selector) { let value = Self::text_of(&element); if !value.is_empty() { tags.push(value); } } for pattern in [ r#"video_categories:\s*'([^']*)'"#, r#"video_tags:\s*'([^']*)'"#, r#"video_models:\s*'([^']*)'"#, ] { let re = Self::regex(pattern)?; if let Some(value) = re.captures(html).and_then(|caps| caps.get(1)) { for entry in value .as_str() .split(',') .map(str::trim) .map(Self::decode_html) .filter(|entry| !entry.is_empty()) { tags.push(entry); } } } let mut unique = Vec::new(); for value in tags { let normalized = Self::normalize_title(&value); if !normalized.is_empty() && !unique .iter() .any(|existing: &String| Self::normalize_title(existing) == normalized) { unique.push(value); } } Ok(unique) } fn parse_aspect_ratio(html: &str) -> Option { let width_re = Self::regex(r#"player_width:\s*'([0-9.]+)'"#).ok()?; let height_re = Self::regex(r#"player_height:\s*'([0-9.]+)'"#).ok()?; let width = width_re .captures(html)? .get(1)? .as_str() .parse::() .ok()?; let height = height_re .captures(html)? .get(1)? .as_str() .parse::() .ok()?; (height > 0.0).then_some(width / height) } fn apply_detail_video(&self, mut item: VideoItem, html: &str) -> Result { let document = Html::parse_document(html); let title_selector = Self::selector("h1.title")?; let member_selector = Self::selector("a.sub-btn[href*=\"/members/\"]")?; let member_name_selector = Self::selector("em")?; let channel_selector = Self::selector("a.btn.gold[href*=\"/channels/\"]")?; let model_selector = Self::selector("a.btn.gold[href*=\"/models/\"]")?; if let Some(title) = document .select(&title_selector) .next() .map(|value| Self::text_of(&value)) .filter(|value| !value.is_empty()) { item.title = title; } if let Some(member) = document.select(&member_selector).next() { let uploader = member .select(&member_name_selector) .next() .map(|value| Self::text_of(&value)) .filter(|value| !value.is_empty()) .unwrap_or_else(|| Self::text_of(&member)); let uploader_url = member .value() .attr("href") .map(|value| self.normalize_url(value)) .unwrap_or_default(); if !uploader.is_empty() { item.uploader = Some(uploader); } if !uploader_url.is_empty() { item.uploaderUrl = Some(uploader_url); } } else if let Some(channel) = document.select(&channel_selector).next() { let uploader = Self::text_of(&channel); let uploader_url = channel .value() .attr("href") .map(|value| self.normalize_url(value)) .unwrap_or_default(); if !uploader.is_empty() { item.uploader = Some(uploader); } if !uploader_url.is_empty() { item.uploaderUrl = Some(uploader_url); } } let mut tags = Self::collect_tags(&document, html)?; if let Some(channel) = document.select(&channel_selector).next() { let value = Self::text_of(&channel); if !value.is_empty() && !tags .iter() .any(|existing| Self::normalize_title(existing) == Self::normalize_title(&value)) { tags.push(value); } } if let Some(model) = document.select(&model_selector).next() { let value = Self::text_of(&model); if !value.is_empty() && !tags .iter() .any(|existing| Self::normalize_title(existing) == Self::normalize_title(&value)) { tags.push(value); } } if !tags.is_empty() { item.tags = Some(tags); } let (views, uploaded_at, duration) = Self::parse_count_values(&document)?; if item.views.is_none() { if let Some(views) = views { item.views = Some(views); } } if item.uploadedAt.is_none() { item.uploadedAt = uploaded_at.or_else(|| Self::parse_upload_date_timestamp(html)); } if item.duration == 0 { if let Some(duration) = duration { item.duration = duration; } } if item.aspectRatio.is_none() { item.aspectRatio = Self::parse_aspect_ratio(html); } if let Some(poster) = Self::regex(r#""thumbnailUrl"\s*:\s*"([^"]+)""#) .ok() .and_then(|re| re.captures(html)) .and_then(|caps| caps.get(1)) .map(|value| Self::decode_html(value.as_str())) { if item.thumb.is_empty() { item.thumb = poster; } } let _ = Self::parse_format_urls(html); Ok(item) } async fn enrich_item(&self, item: VideoItem, options: &ServerOptions) -> VideoItem { let mut requester = requester_or_default(options, CHANNEL_ID, "yesporn.enrich_item.missing_requester"); match Self::fetch_html(&mut requester, &item.url, &item.url).await { Ok(html) => match self.apply_detail_video(item.clone(), &html) { Ok(value) => value, Err(error) => { report_provider_error_background( CHANNEL_ID, "enrich_item.apply_detail_video", &format!("url={}; error={error}", item.url), ); item } }, Err(error) => { report_provider_error_background( CHANNEL_ID, "enrich_item.fetch_html", &format!("url={}; error={error}", item.url), ); item } } } async fn fetch_target( &self, cache: VideoCache, target: Target, page: u16, sort: &str, options: ServerOptions, ) -> Result> { let (url, referer) = self.build_target_request(&target, page, sort); let old_items = match cache.get(&url) { Some((time, items)) => { if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 { return Ok(items.clone()); } items.clone() } None => vec![], }; let mut requester = requester_or_default(&options, CHANNEL_ID, "yesporn.fetch_target.missing_requester"); let html = match &target { Target::CommonArchive(_) | Target::MemberArchive(_) => { match self.fetch_archive_html(&mut requester, &target, page, sort).await { Ok(value) => value, Err(error) => { report_provider_error( CHANNEL_ID, "fetch_target.archive_request", &format!("url={url}; referer={referer}; error={error}"), ) .await; return Ok(old_items); } } } _ => match Self::fetch_html(&mut requester, &url, &referer).await { Ok(value) => value, Err(error) => { report_provider_error( CHANNEL_ID, "fetch_target.request", &format!("url={url}; error={error}"), ) .await; return Ok(old_items); } }, }; if html.trim().is_empty() { report_provider_error(CHANNEL_ID, "fetch_target.empty_response", &format!("url={url}")) .await; return Ok(old_items); } let items = self.parse_list_videos(&html)?; if items.is_empty() { return Ok(old_items); } let enriched = stream::iter(items.into_iter().map(|item| { let provider = self.clone(); let options = options.clone(); async move { provider.enrich_item(item, &options).await } })) .buffer_unordered(4) .collect::>() .await; cache.remove(&url); cache.insert(url, enriched.clone()); Ok(enriched) } } #[async_trait] impl Provider for YespornProvider { async fn get_videos( &self, cache: VideoCache, _pool: DbPool, sort: String, query: Option, page: String, _per_page: String, options: ServerOptions, ) -> Vec { let page = page.parse::().unwrap_or(1).max(1); let target = match query { Some(query) if !query.trim().is_empty() => { let query = query.trim(); let mut target = self.resolve_query_target(query); if matches!(target, Target::Search(_)) && self.filters_need_refresh() { self.refresh_filter_catalogs().await; target = self.resolve_query_target(query); } target } _ => self.resolve_option_target(&options), }; match self .fetch_target(cache, target, page, &sort, options.clone()) .await { Ok(items) => items, Err(error) => { report_provider_error( CHANNEL_ID, "get_videos.fetch_target", &format!("sort={sort}; page={page}; error={error}"), ) .await; vec![] } } } fn get_channel(&self, clientversion: ClientVersion) -> Option { Some(self.build_channel(clientversion)) } } #[cfg(test)] mod tests { use super::*; #[test] fn builds_home_url() { let provider = YespornProvider::new(); assert_eq!( provider.build_home_url(2, "popular"), "https://yesporn.vip/?mode=async&function=get_block&block_id=list_videos_most_recent_videos&sort_by=video_viewed&from=2" ); } #[test] fn builds_search_url() { let provider = YespornProvider::new(); assert_eq!( provider.build_search_url("adriana chechik", 3, "rated"), "https://yesporn.vip/search/adriana-chechik/?mode=async&function=get_block&block_id=list_videos_videos_list_search_result&q=adriana+chechik&category_ids=&sort_by=rating&from_videos=3" ); } #[test] fn parses_yesporn_cards() { let provider = YespornProvider::new(); let html = r#"
"#; let items = provider.parse_list_videos(html).expect("parses cards"); assert_eq!(items.len(), 1); assert_eq!(items[0].id, "66943"); assert_eq!(items[0].title, "Fuckin Taxes Andi Avalon"); assert_eq!( items[0].thumb, "https://yesnn.b-cdn.net/contents/videos_screenshots/66000/66943/480x270/1.jpg" ); assert_eq!(items[0].duration, 754); assert_eq!(items[0].views, Some(11_000)); assert_eq!(items[0].rating, Some(95.0)); assert_eq!( items[0].preview.as_deref(), Some("https://yesporn.vip/get_file/6/x/66000/66943/66943_preview.mp4/") ); } #[test] fn applies_detail_video_data() { let provider = YespornProvider::new(); let item = VideoItem::new( "66898".to_string(), "placeholder".to_string(), "https://yesporn.vip/video/66898/the-world-s-greatest-art-class-a-nude-model-you-can-creampie-6noc5d/".to_string(), CHANNEL_ID.to_string(), "https://yesnn.b-cdn.net/contents/videos_screenshots/66000/66898/480x270/1.jpg" .to_string(), 0, ); let html = r#"

The World’s Greatest Art Class: A Nude Model You Can Creampie!

32:45
2.6K
03/18/2026
"#; let item = provider .apply_detail_video(item, html) .expect("applies detail video"); assert_eq!( item.title, "The World’s Greatest Art Class: A Nude Model You Can Creampie!" ); assert_eq!(item.uploader.as_deref(), Some("nonameforthis")); assert_eq!( item.uploaderUrl.as_deref(), Some("https://yesporn.vip/members/15878/") ); assert_eq!(item.views, Some(2600)); assert_eq!(item.duration, 1965); assert!(item.tags.as_ref().is_some_and(|values| values .iter() .any(|value| YespornProvider::normalize_title(value) == "creampie"))); assert!(item.aspectRatio.is_some()); assert!(item.uploadedAt.is_some()); } }