diff --git a/build.rs b/build.rs index fa67ae9..e0a26ef 100644 --- a/build.rs +++ b/build.rs @@ -256,6 +256,11 @@ const PROVIDERS: &[ProviderDef] = &[ module: "hsex", ty: "HsexProvider", }, + ProviderDef { + id: "blowjobspro", + module: "blowjobspro", + ty: "BlowjobsproProvider", + }, ProviderDef { id: "erome", module: "erome", diff --git a/docs/provider-catalog.md b/docs/provider-catalog.md index 3385683..6c43b9f 100644 --- a/docs/provider-catalog.md +++ b/docs/provider-catalog.md @@ -9,6 +9,7 @@ This is the current implementation inventory as of this snapshot of the repo. Us | `all` | `meta-search` | no | no | Aggregates all compiled providers. | | `archivebate` | `live-cams` | no | no | Livewire-backed cam archive listings with platform/gender/profile shortcuts. | | `beeg` | `mainstream-tube` | no | no | Basic mainstream tube pattern. | +| `blowjobspro` | `mainstream-tube` | no | no | KVS-style HTML provider with async search pagination and category shortcut routing. | | `chaturbate` | `live-cams` | no | no | Live cam channel. | | `erome` | `amateur-homemade` | no | no | HTML album scraper with hot/new feeds, keyword search, and uploader-slug shortcuts (`uploader:`). | | `freepornvideosxxx` | `studio-network` | no | no | Studio-style scraper. | diff --git a/src/providers/blowjobspro.rs b/src/providers/blowjobspro.rs new file mode 100644 index 0000000..cc0f5ba --- /dev/null +++ b/src/providers/blowjobspro.rs @@ -0,0 +1,531 @@ +use crate::DbPool; +use crate::api::ClientVersion; +use crate::providers::{ + Provider, report_provider_error, report_provider_error_background, requester_or_default, +}; +use crate::status::*; +use crate::util::cache::VideoCache; +use crate::util::parse_abbreviated_number; +use crate::util::time::parse_time_to_seconds; +use crate::videos::{ServerOptions, VideoItem}; +use async_trait::async_trait; +use error_chain::error_chain; +use htmlentity::entity::{ICodedDataTrait, decode}; +use percent_encoding::{NON_ALPHANUMERIC, utf8_percent_encode}; +use regex::Regex; +use scraper::{Html, Selector}; +use std::collections::HashMap; +use std::sync::{Arc, RwLock}; +use std::thread; +use wreq::Version; + +pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata = + crate::providers::ProviderChannelMetadata { + group_id: "mainstream-tube", + tags: &["tube", "kvs", "blowjob"], + }; + +const BASE_URL: &str = "https://blowjobs.pro"; +const CHANNEL_ID: &str = "blowjobspro"; +const FIREFOX_UA: &str = + "Mozilla/5.0 (X11; Linux x86_64; rv:147.0) Gecko/20100101 Firefox/147.0"; +const HTML_ACCEPT: &str = + "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8"; + +error_chain! { + foreign_links { + Io(std::io::Error); + } + errors { + Parse(msg: String) { + description("parse error") + display("parse error: {}", msg) + } + } +} + +#[derive(Debug, Clone)] +pub struct BlowjobsproProvider { + url: String, + categories: Arc>>, + category_map: Arc>>, +} + +#[derive(Debug, Clone)] +enum Target { + Latest, + MostViewed, + TopRated, + Search { query: String }, + Category { url: String }, +} + +impl BlowjobsproProvider { + pub fn new() -> Self { + let provider = Self { + url: BASE_URL.to_string(), + categories: Arc::new(RwLock::new(vec![FilterOption { + id: "all".to_string(), + title: "All".to_string(), + }])), + category_map: Arc::new(RwLock::new(HashMap::new())), + }; + provider.spawn_initial_load(); + provider + } + + fn spawn_initial_load(&self) { + let url = self.url.clone(); + let categories = Arc::clone(&self.categories); + let category_map = Arc::clone(&self.category_map); + + thread::spawn(move || { + let runtime = match tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + { + Ok(runtime) => runtime, + Err(error) => { + report_provider_error_background( + CHANNEL_ID, + "spawn_initial_load.runtime_build", + &error.to_string(), + ); + return; + } + }; + + runtime.block_on(async move { + if let Err(error) = Self::load_categories(&url, categories, category_map).await { + report_provider_error_background(CHANNEL_ID, "load_categories", &error.to_string()); + } + }); + }); + } + + fn build_channel(&self, _clientversion: ClientVersion) -> Channel { + let categories = self + .categories + .read() + .map(|value| value.clone()) + .unwrap_or_default(); + + Channel { + id: CHANNEL_ID.to_string(), + name: "Blowjobs.pro".to_string(), + description: "Blowjobs.pro KVS listings with latest, most viewed, top rated, search, and category shortcuts." + .to_string(), + premium: false, + favicon: "https://www.google.com/s2/favicons?sz=64&domain=blowjobs.pro".to_string(), + status: "active".to_string(), + categories: categories.iter().map(|value| value.title.clone()).collect(), + options: vec![ + ChannelOption { + id: "sort".to_string(), + title: "Sort".to_string(), + description: "Browse feed ordering.".to_string(), + systemImage: "list.number".to_string(), + colorName: "blue".to_string(), + options: vec![ + FilterOption { + id: "new".to_string(), + title: "Latest".to_string(), + }, + FilterOption { + id: "popular".to_string(), + title: "Most Viewed".to_string(), + }, + FilterOption { + id: "rated".to_string(), + title: "Top Rated".to_string(), + }, + ], + multiSelect: false, + }, + ChannelOption { + id: "categories".to_string(), + title: "Categories".to_string(), + description: "Jump directly to category archives.".to_string(), + systemImage: "square.grid.2x2".to_string(), + colorName: "orange".to_string(), + options: categories, + multiSelect: false, + }, + ], + nsfw: true, + cacheDuration: Some(1800), + } + } + + fn selector(value: &str) -> Result { + Selector::parse(value) + .map_err(|error| Error::from(format!("selector `{value}` parse failed: {error}"))) + } + + fn regex(value: &str) -> Result { + Regex::new(value).map_err(|error| Error::from(format!("regex `{value}` failed: {error}"))) + } + + fn decode_html(text: &str) -> String { + decode(text.as_bytes()) + .to_string() + .unwrap_or_else(|_| text.to_string()) + } + + fn collapse_whitespace(text: &str) -> String { + text.split_whitespace().collect::>().join(" ") + } + + fn normalize_title(title: &str) -> String { + title + .trim() + .trim_start_matches('#') + .replace(['_', '-'], " ") + .split_whitespace() + .collect::>() + .join(" ") + .to_ascii_lowercase() + } + + fn normalize_url(&self, url: &str) -> String { + let trimmed = url.trim(); + if trimmed.is_empty() { + return String::new(); + } + if trimmed.starts_with("http://") || trimmed.starts_with("https://") { + return trimmed.to_string(); + } + if trimmed.starts_with("//") { + return format!("https:{trimmed}"); + } + if trimmed.starts_with('/') { + return format!("{}{}", self.url, trimmed); + } + format!("{}/{}", self.url, trimmed.trim_start_matches("./")) + } + + fn html_headers(referer: &str) -> Vec<(String, String)> { + vec![ + ("User-Agent".to_string(), FIREFOX_UA.to_string()), + ("Accept".to_string(), HTML_ACCEPT.to_string()), + ("Referer".to_string(), referer.to_string()), + ] + } + + fn build_search_path(query: &str) -> String { + query + .split_whitespace() + .map(|part| utf8_percent_encode(part, NON_ALPHANUMERIC).to_string()) + .collect::>() + .join("-") + } + + fn build_archive_page_url(archive_url: &str, page: u16) -> String { + if page <= 1 { + return archive_url.trim_end_matches('/').to_string() + "/"; + } + format!("{}/{page}/", archive_url.trim_end_matches('/')) + } + + fn build_target_url(&self, target: &Target, page: u16) -> String { + match target { + Target::Latest => { + Self::build_archive_page_url(&format!("{}/latest-updates/", self.url), page) + } + Target::MostViewed => { + Self::build_archive_page_url(&format!("{}/most-popular/", self.url), page) + } + Target::TopRated => { + Self::build_archive_page_url(&format!("{}/top-rated/", self.url), page) + } + Target::Category { url } => Self::build_archive_page_url(url, page), + Target::Search { query } => { + let normalized = Self::build_search_path(query); + format!("{}/search/{normalized}/", self.url) + } + } + } + + fn target_from_request( + &self, + sort: &str, + query: Option<&str>, + category: Option<&str>, + ) -> Target { + let category_value = category.unwrap_or("").trim(); + if !category_value.is_empty() && !category_value.eq_ignore_ascii_case("all") { + if let Some(url) = self.resolve_category(category_value) { + return Target::Category { url }; + } + } + + if let Some(raw_query) = query { + let trimmed = raw_query.trim(); + if !trimmed.is_empty() { + if let Some(value) = trimmed.strip_prefix("category:") + && let Some(url) = self.resolve_category(value) + { + return Target::Category { url }; + } + if let Some(url) = self.resolve_category(trimmed) { + return Target::Category { url }; + } + return Target::Search { + query: trimmed.to_string(), + }; + } + } + + match sort { + "popular" => Target::MostViewed, + "rated" => Target::TopRated, + _ => Target::Latest, + } + } + + fn resolve_category(&self, value: &str) -> Option { + let normalized = Self::normalize_title(value); + if normalized.is_empty() { + return None; + } + + self.category_map + .read() + .ok() + .and_then(|map| map.get(&normalized).cloned()) + } + + async fn fetch_html( + &self, + options: &ServerOptions, + url: &str, + referer: &str, + ) -> Result { + let mut requester = requester_or_default(options, CHANNEL_ID, "fetch_html"); + requester + .get_with_headers(url, Self::html_headers(referer), Some(Version::HTTP_11)) + .await + .map_err(|error| Error::from(format!("request failed for {url}: {error}"))) + } + + fn search_page_from_async(&self, query: &str, page: u16) -> Option { + if page <= 1 { + return None; + } + + let query_value = query.trim(); + if query_value.is_empty() { + return None; + } + + let from = page; + let encoded_query = utf8_percent_encode(query_value, NON_ALPHANUMERIC).to_string(); + Some(format!( + "{}/search/{}/?mode=async&function=get_block&block_id=list_videos_videos_list_search_result&q={encoded_query}&category_ids=&sort_by=&from_videos%2Bfrom_albums={from}", + self.url, + Self::build_search_path(query_value) + )) + } + + async fn fetch_listing_html( + &self, + options: &ServerOptions, + target: &Target, + page: u16, + ) -> Result { + if let Target::Search { query } = target + && page > 1 + && let Some(async_url) = self.search_page_from_async(query, page) + { + return self + .fetch_html(options, &async_url, &format!("{}/search/{}/", self.url, Self::build_search_path(query))) + .await; + } + + let page_url = self.build_target_url(target, page); + self.fetch_html(options, &page_url, &self.url).await + } + + fn parse_listing_html(&self, html: &str) -> Result> { + let href_re = Self::regex(r#"href="([^"]+/videos/\d+/[^"]*)""#)?; + let title_re = Self::regex(r#"title="([^"]+)""#)?; + let thumb_re = Self::regex(r#"(?:data-original|src)="([^"]+/contents/videos_screenshots/[^"]+)""#)?; + let duration_re = Self::regex(r#"
\s*([^<]+)\s*
"#)?; + let views_re = Self::regex(r#"
\s*([^<]+)\s*
"#)?; + let tag_re = Self::regex(r#"]*>\s*([^<]+)\s*"#)?; + let id_re = Self::regex(r"/videos/(\d+)/")?; + let mut videos = Vec::new(); + for segment in html.split("
").skip(1) { + let Some(href_caps) = href_re.captures(segment) else { + continue; + }; + let Some(raw_url) = href_caps.get(1).map(|m| m.as_str()) else { + continue; + }; + let url = self.normalize_url(raw_url); + let id = id_re + .captures(&url) + .and_then(|caps| caps.get(1).map(|m| m.as_str().to_string())) + .unwrap_or_else(|| url.clone()); + + let title = title_re + .captures(segment) + .and_then(|caps| caps.get(1).map(|m| Self::decode_html(m.as_str()))) + .unwrap_or_default(); + if title.is_empty() { + continue; + } + + let thumb = thumb_re + .captures(segment) + .and_then(|caps| caps.get(1).map(|m| self.normalize_url(m.as_str()))) + .unwrap_or_default(); + + let duration = duration_re + .captures(segment) + .and_then(|caps| caps.get(1).map(|m| m.as_str().trim().to_string())) + .and_then(|value| parse_time_to_seconds(&value)) + .unwrap_or(0) + .max(0) as u32; + + let views = views_re + .captures(segment) + .and_then(|caps| caps.get(1).map(|m| m.as_str().trim().to_string())) + .and_then(|value| parse_abbreviated_number(&value)) + .map(|value| value as u32); + + let tags = tag_re + .captures_iter(segment) + .filter_map(|caps| { + caps.get(1).map(|m| { + Self::decode_html(m.as_str()) + .split_whitespace() + .collect::>() + .join(" ") + .trim() + .to_string() + }) + }) + .filter(|value| !value.is_empty()) + .collect::>(); + + let mut item = VideoItem::new( + id, + title, + url, + CHANNEL_ID.to_string(), + thumb, + duration, + ); + item.tags = Some(tags); + item.views = views; + videos.push(item); + } + + Ok(videos) + } + + async fn load_categories( + url: &str, + categories: Arc>>, + category_map: Arc>>, + ) -> Result<()> { + let mut requester = crate::util::requester::Requester::new(); + let category_url = format!("{url}/categories/"); + let html = requester + .get_with_headers( + &category_url, + Self::html_headers(url), + Some(Version::HTTP_11), + ) + .await + .map_err(|error| Error::from(format!("category fetch failed: {error}")))?; + + let doc = Html::parse_document(&html); + let link_selector = Self::selector("#list_categories_categories_list_items a.item-link")?; + + let mut options = vec![FilterOption { + id: "all".to_string(), + title: "All".to_string(), + }]; + let mut map = HashMap::new(); + + for link in doc.select(&link_selector) { + let Some(href) = link.value().attr("href") else { + continue; + }; + let raw_title = link + .value() + .attr("title") + .unwrap_or("") + .to_string(); + let title = Self::decode_html(&raw_title).trim().to_string(); + if title.is_empty() { + continue; + } + + let normalized = Self::normalize_title(&title); + if normalized.is_empty() { + continue; + } + + options.push(FilterOption { + id: normalized.clone(), + title: title.clone(), + }); + map.insert(normalized, href.to_string()); + } + + if let Ok(mut guard) = categories.write() { + *guard = options; + } + if let Ok(mut guard) = category_map.write() { + *guard = map; + } + + Ok(()) + } +} + +#[async_trait] +impl Provider for BlowjobsproProvider { + async fn get_videos( + &self, + _cache: VideoCache, + _pool: DbPool, + sort: String, + query: Option, + page: String, + _per_page: String, + options: ServerOptions, + ) -> Vec { + let page_number = page.parse::().unwrap_or(1).max(1); + let target = self.target_from_request( + sort.as_str(), + query.as_deref(), + options.categories.as_deref(), + ); + + let html = match self.fetch_listing_html(&options, &target, page_number).await { + Ok(html) => html, + Err(error) => { + report_provider_error(CHANNEL_ID, "get_videos.fetch_listing_html", &error.to_string()) + .await; + return vec![]; + } + }; + + match self.parse_listing_html(&html) { + Ok(videos) => videos, + Err(error) => { + report_provider_error(CHANNEL_ID, "get_videos.parse_listing_html", &error.to_string()) + .await; + vec![] + } + } + } + + fn get_channel(&self, clientversion: ClientVersion) -> Option { + Some(self.build_channel(clientversion)) + } +}