blowjobspro launch

2026-05-05 18:41:05 +00:00
parent 3da09dc561
commit c4292c2ffc
3 changed files with 537 additions and 0 deletions
--- a/build.rs
+++ b/build.rs
@@ -256,6 +256,11 @@ const PROVIDERS: &[ProviderDef] = &[
        module: "hsex",
        ty: "HsexProvider",
    },
+    ProviderDef {
+        id: "blowjobspro",
+        module: "blowjobspro",
+        ty: "BlowjobsproProvider",
+    },
    ProviderDef {
        id: "erome",
        module: "erome",
--- a/docs/provider-catalog.md
+++ b/docs/provider-catalog.md
@@ -9,6 +9,7 @@ This is the current implementation inventory as of this snapshot of the repo. Us
 | `all` | `meta-search` | no | no | Aggregates all compiled providers. |
 | `archivebate` | `live-cams` | no | no | Livewire-backed cam archive listings with platform/gender/profile shortcuts. |
 | `beeg` | `mainstream-tube` | no | no | Basic mainstream tube pattern. |
+| `blowjobspro` | `mainstream-tube` | no | no | KVS-style HTML provider with async search pagination and category shortcut routing. |
 | `chaturbate` | `live-cams` | no | no | Live cam channel. |
 | `erome` | `amateur-homemade` | no | no | HTML album scraper with hot/new feeds, keyword search, and uploader-slug shortcuts (`uploader:<name>`). |
 | `freepornvideosxxx` | `studio-network` | no | no | Studio-style scraper. |
--- a/src/providers/blowjobspro.rs
+++ b/src/providers/blowjobspro.rs
@@ -0,0 +1,531 @@
+use crate::DbPool;
+use crate::api::ClientVersion;
+use crate::providers::{
+    Provider, report_provider_error, report_provider_error_background, requester_or_default,
+};
+use crate::status::*;
+use crate::util::cache::VideoCache;
+use crate::util::parse_abbreviated_number;
+use crate::util::time::parse_time_to_seconds;
+use crate::videos::{ServerOptions, VideoItem};
+use async_trait::async_trait;
+use error_chain::error_chain;
+use htmlentity::entity::{ICodedDataTrait, decode};
+use percent_encoding::{NON_ALPHANUMERIC, utf8_percent_encode};
+use regex::Regex;
+use scraper::{Html, Selector};
+use std::collections::HashMap;
+use std::sync::{Arc, RwLock};
+use std::thread;
+use wreq::Version;
+
+pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
+    crate::providers::ProviderChannelMetadata {
+        group_id: "mainstream-tube",
+        tags: &["tube", "kvs", "blowjob"],
+    };
+
+const BASE_URL: &str = "https://blowjobs.pro";
+const CHANNEL_ID: &str = "blowjobspro";
+const FIREFOX_UA: &str =
+    "Mozilla/5.0 (X11; Linux x86_64; rv:147.0) Gecko/20100101 Firefox/147.0";
+const HTML_ACCEPT: &str =
+    "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8";
+
+error_chain! {
+    foreign_links {
+        Io(std::io::Error);
+    }
+    errors {
+        Parse(msg: String) {
+            description("parse error")
+            display("parse error: {}", msg)
+        }
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct BlowjobsproProvider {
+    url: String,
+    categories: Arc<RwLock<Vec<FilterOption>>>,
+    category_map: Arc<RwLock<HashMap<String, String>>>,
+}
+
+#[derive(Debug, Clone)]
+enum Target {
+    Latest,
+    MostViewed,
+    TopRated,
+    Search { query: String },
+    Category { url: String },
+}
+
+impl BlowjobsproProvider {
+    pub fn new() -> Self {
+        let provider = Self {
+            url: BASE_URL.to_string(),
+            categories: Arc::new(RwLock::new(vec![FilterOption {
+                id: "all".to_string(),
+                title: "All".to_string(),
+            }])),
+            category_map: Arc::new(RwLock::new(HashMap::new())),
+        };
+        provider.spawn_initial_load();
+        provider
+    }
+
+    fn spawn_initial_load(&self) {
+        let url = self.url.clone();
+        let categories = Arc::clone(&self.categories);
+        let category_map = Arc::clone(&self.category_map);
+
+        thread::spawn(move || {
+            let runtime = match tokio::runtime::Builder::new_current_thread()
+                .enable_all()
+                .build()
+            {
+                Ok(runtime) => runtime,
+                Err(error) => {
+                    report_provider_error_background(
+                        CHANNEL_ID,
+                        "spawn_initial_load.runtime_build",
+                        &error.to_string(),
+                    );
+                    return;
+                }
+            };
+
+            runtime.block_on(async move {
+                if let Err(error) = Self::load_categories(&url, categories, category_map).await {
+                    report_provider_error_background(CHANNEL_ID, "load_categories", &error.to_string());
+                }
+            });
+        });
+    }
+
+    fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
+        let categories = self
+            .categories
+            .read()
+            .map(|value| value.clone())
+            .unwrap_or_default();
+
+        Channel {
+            id: CHANNEL_ID.to_string(),
+            name: "Blowjobs.pro".to_string(),
+            description: "Blowjobs.pro KVS listings with latest, most viewed, top rated, search, and category shortcuts."
+                .to_string(),
+            premium: false,
+            favicon: "https://www.google.com/s2/favicons?sz=64&domain=blowjobs.pro".to_string(),
+            status: "active".to_string(),
+            categories: categories.iter().map(|value| value.title.clone()).collect(),
+            options: vec![
+                ChannelOption {
+                    id: "sort".to_string(),
+                    title: "Sort".to_string(),
+                    description: "Browse feed ordering.".to_string(),
+                    systemImage: "list.number".to_string(),
+                    colorName: "blue".to_string(),
+                    options: vec![
+                        FilterOption {
+                            id: "new".to_string(),
+                            title: "Latest".to_string(),
+                        },
+                        FilterOption {
+                            id: "popular".to_string(),
+                            title: "Most Viewed".to_string(),
+                        },
+                        FilterOption {
+                            id: "rated".to_string(),
+                            title: "Top Rated".to_string(),
+                        },
+                    ],
+                    multiSelect: false,
+                },
+                ChannelOption {
+                    id: "categories".to_string(),
+                    title: "Categories".to_string(),
+                    description: "Jump directly to category archives.".to_string(),
+                    systemImage: "square.grid.2x2".to_string(),
+                    colorName: "orange".to_string(),
+                    options: categories,
+                    multiSelect: false,
+                },
+            ],
+            nsfw: true,
+            cacheDuration: Some(1800),
+        }
+    }
+
+    fn selector(value: &str) -> Result<Selector> {
+        Selector::parse(value)
+            .map_err(|error| Error::from(format!("selector `{value}` parse failed: {error}")))
+    }
+
+    fn regex(value: &str) -> Result<Regex> {
+        Regex::new(value).map_err(|error| Error::from(format!("regex `{value}` failed: {error}")))
+    }
+
+    fn decode_html(text: &str) -> String {
+        decode(text.as_bytes())
+            .to_string()
+            .unwrap_or_else(|_| text.to_string())
+    }
+
+    fn collapse_whitespace(text: &str) -> String {
+        text.split_whitespace().collect::<Vec<_>>().join(" ")
+    }
+
+    fn normalize_title(title: &str) -> String {
+        title
+            .trim()
+            .trim_start_matches('#')
+            .replace(['_', '-'], " ")
+            .split_whitespace()
+            .collect::<Vec<_>>()
+            .join(" ")
+            .to_ascii_lowercase()
+    }
+
+    fn normalize_url(&self, url: &str) -> String {
+        let trimmed = url.trim();
+        if trimmed.is_empty() {
+            return String::new();
+        }
+        if trimmed.starts_with("http://") || trimmed.starts_with("https://") {
+            return trimmed.to_string();
+        }
+        if trimmed.starts_with("//") {
+            return format!("https:{trimmed}");
+        }
+        if trimmed.starts_with('/') {
+            return format!("{}{}", self.url, trimmed);
+        }
+        format!("{}/{}", self.url, trimmed.trim_start_matches("./"))
+    }
+
+    fn html_headers(referer: &str) -> Vec<(String, String)> {
+        vec![
+            ("User-Agent".to_string(), FIREFOX_UA.to_string()),
+            ("Accept".to_string(), HTML_ACCEPT.to_string()),
+            ("Referer".to_string(), referer.to_string()),
+        ]
+    }
+
+    fn build_search_path(query: &str) -> String {
+        query
+            .split_whitespace()
+            .map(|part| utf8_percent_encode(part, NON_ALPHANUMERIC).to_string())
+            .collect::<Vec<_>>()
+            .join("-")
+    }
+
+    fn build_archive_page_url(archive_url: &str, page: u16) -> String {
+        if page <= 1 {
+            return archive_url.trim_end_matches('/').to_string() + "/";
+        }
+        format!("{}/{page}/", archive_url.trim_end_matches('/'))
+    }
+
+    fn build_target_url(&self, target: &Target, page: u16) -> String {
+        match target {
+            Target::Latest => {
+                Self::build_archive_page_url(&format!("{}/latest-updates/", self.url), page)
+            }
+            Target::MostViewed => {
+                Self::build_archive_page_url(&format!("{}/most-popular/", self.url), page)
+            }
+            Target::TopRated => {
+                Self::build_archive_page_url(&format!("{}/top-rated/", self.url), page)
+            }
+            Target::Category { url } => Self::build_archive_page_url(url, page),
+            Target::Search { query } => {
+                let normalized = Self::build_search_path(query);
+                format!("{}/search/{normalized}/", self.url)
+            }
+        }
+    }
+
+    fn target_from_request(
+        &self,
+        sort: &str,
+        query: Option<&str>,
+        category: Option<&str>,
+    ) -> Target {
+        let category_value = category.unwrap_or("").trim();
+        if !category_value.is_empty() && !category_value.eq_ignore_ascii_case("all") {
+            if let Some(url) = self.resolve_category(category_value) {
+                return Target::Category { url };
+            }
+        }
+
+        if let Some(raw_query) = query {
+            let trimmed = raw_query.trim();
+            if !trimmed.is_empty() {
+                if let Some(value) = trimmed.strip_prefix("category:")
+                    && let Some(url) = self.resolve_category(value)
+                {
+                    return Target::Category { url };
+                }
+                if let Some(url) = self.resolve_category(trimmed) {
+                    return Target::Category { url };
+                }
+                return Target::Search {
+                    query: trimmed.to_string(),
+                };
+            }
+        }
+
+        match sort {
+            "popular" => Target::MostViewed,
+            "rated" => Target::TopRated,
+            _ => Target::Latest,
+        }
+    }
+
+    fn resolve_category(&self, value: &str) -> Option<String> {
+        let normalized = Self::normalize_title(value);
+        if normalized.is_empty() {
+            return None;
+        }
+
+        self.category_map
+            .read()
+            .ok()
+            .and_then(|map| map.get(&normalized).cloned())
+    }
+
+    async fn fetch_html(
+        &self,
+        options: &ServerOptions,
+        url: &str,
+        referer: &str,
+    ) -> Result<String> {
+        let mut requester = requester_or_default(options, CHANNEL_ID, "fetch_html");
+        requester
+            .get_with_headers(url, Self::html_headers(referer), Some(Version::HTTP_11))
+            .await
+            .map_err(|error| Error::from(format!("request failed for {url}: {error}")))
+    }
+
+    fn search_page_from_async(&self, query: &str, page: u16) -> Option<String> {
+        if page <= 1 {
+            return None;
+        }
+
+        let query_value = query.trim();
+        if query_value.is_empty() {
+            return None;
+        }
+
+        let from = page;
+        let encoded_query = utf8_percent_encode(query_value, NON_ALPHANUMERIC).to_string();
+        Some(format!(
+            "{}/search/{}/?mode=async&function=get_block&block_id=list_videos_videos_list_search_result&q={encoded_query}&category_ids=&sort_by=&from_videos%2Bfrom_albums={from}",
+            self.url,
+            Self::build_search_path(query_value)
+        ))
+    }
+
+    async fn fetch_listing_html(
+        &self,
+        options: &ServerOptions,
+        target: &Target,
+        page: u16,
+    ) -> Result<String> {
+        if let Target::Search { query } = target
+            && page > 1
+            && let Some(async_url) = self.search_page_from_async(query, page)
+        {
+            return self
+                .fetch_html(options, &async_url, &format!("{}/search/{}/", self.url, Self::build_search_path(query)))
+                .await;
+        }
+
+        let page_url = self.build_target_url(target, page);
+        self.fetch_html(options, &page_url, &self.url).await
+    }
+
+    fn parse_listing_html(&self, html: &str) -> Result<Vec<VideoItem>> {
+        let href_re = Self::regex(r#"href="([^"]+/videos/\d+/[^"]*)""#)?;
+        let title_re = Self::regex(r#"title="([^"]+)""#)?;
+        let thumb_re = Self::regex(r#"(?:data-original|src)="([^"]+/contents/videos_screenshots/[^"]+)""#)?;
+        let duration_re = Self::regex(r#"<div class="duration">\s*([^<]+)\s*</div>"#)?;
+        let views_re = Self::regex(r#"<div class="views">\s*([^<]+)\s*</div>"#)?;
+        let tag_re = Self::regex(r#"<a href="[^"]*/categories/[^"]*"[^>]*>\s*([^<]+)\s*</a>"#)?;
+        let id_re = Self::regex(r"/videos/(\d+)/")?;
+        let mut videos = Vec::new();
+        for segment in html.split("<div class=\"item\">").skip(1) {
+            let Some(href_caps) = href_re.captures(segment) else {
+                continue;
+            };
+            let Some(raw_url) = href_caps.get(1).map(|m| m.as_str()) else {
+                continue;
+            };
+            let url = self.normalize_url(raw_url);
+            let id = id_re
+                .captures(&url)
+                .and_then(|caps| caps.get(1).map(|m| m.as_str().to_string()))
+                .unwrap_or_else(|| url.clone());
+
+            let title = title_re
+                .captures(segment)
+                .and_then(|caps| caps.get(1).map(|m| Self::decode_html(m.as_str())))
+                .unwrap_or_default();
+            if title.is_empty() {
+                continue;
+            }
+
+            let thumb = thumb_re
+                .captures(segment)
+                .and_then(|caps| caps.get(1).map(|m| self.normalize_url(m.as_str())))
+                .unwrap_or_default();
+
+            let duration = duration_re
+                .captures(segment)
+                .and_then(|caps| caps.get(1).map(|m| m.as_str().trim().to_string()))
+                .and_then(|value| parse_time_to_seconds(&value))
+                .unwrap_or(0)
+                .max(0) as u32;
+
+            let views = views_re
+                .captures(segment)
+                .and_then(|caps| caps.get(1).map(|m| m.as_str().trim().to_string()))
+                .and_then(|value| parse_abbreviated_number(&value))
+                .map(|value| value as u32);
+
+            let tags = tag_re
+                .captures_iter(segment)
+                .filter_map(|caps| {
+                    caps.get(1).map(|m| {
+                        Self::decode_html(m.as_str())
+                            .split_whitespace()
+                            .collect::<Vec<_>>()
+                            .join(" ")
+                            .trim()
+                            .to_string()
+                    })
+                })
+                .filter(|value| !value.is_empty())
+                .collect::<Vec<_>>();
+
+            let mut item = VideoItem::new(
+                id,
+                title,
+                url,
+                CHANNEL_ID.to_string(),
+                thumb,
+                duration,
+            );
+            item.tags = Some(tags);
+            item.views = views;
+            videos.push(item);
+        }
+
+        Ok(videos)
+    }
+
+    async fn load_categories(
+        url: &str,
+        categories: Arc<RwLock<Vec<FilterOption>>>,
+        category_map: Arc<RwLock<HashMap<String, String>>>,
+    ) -> Result<()> {
+        let mut requester = crate::util::requester::Requester::new();
+        let category_url = format!("{url}/categories/");
+        let html = requester
+            .get_with_headers(
+                &category_url,
+                Self::html_headers(url),
+                Some(Version::HTTP_11),
+            )
+            .await
+            .map_err(|error| Error::from(format!("category fetch failed: {error}")))?;
+
+        let doc = Html::parse_document(&html);
+        let link_selector = Self::selector("#list_categories_categories_list_items a.item-link")?;
+
+        let mut options = vec![FilterOption {
+            id: "all".to_string(),
+            title: "All".to_string(),
+        }];
+        let mut map = HashMap::new();
+
+        for link in doc.select(&link_selector) {
+            let Some(href) = link.value().attr("href") else {
+                continue;
+            };
+            let raw_title = link
+                .value()
+                .attr("title")
+                .unwrap_or("")
+                .to_string();
+            let title = Self::decode_html(&raw_title).trim().to_string();
+            if title.is_empty() {
+                continue;
+            }
+
+            let normalized = Self::normalize_title(&title);
+            if normalized.is_empty() {
+                continue;
+            }
+
+            options.push(FilterOption {
+                id: normalized.clone(),
+                title: title.clone(),
+            });
+            map.insert(normalized, href.to_string());
+        }
+
+        if let Ok(mut guard) = categories.write() {
+            *guard = options;
+        }
+        if let Ok(mut guard) = category_map.write() {
+            *guard = map;
+        }
+
+        Ok(())
+    }
+}
+
+#[async_trait]
+impl Provider for BlowjobsproProvider {
+    async fn get_videos(
+        &self,
+        _cache: VideoCache,
+        _pool: DbPool,
+        sort: String,
+        query: Option<String>,
+        page: String,
+        _per_page: String,
+        options: ServerOptions,
+    ) -> Vec<VideoItem> {
+        let page_number = page.parse::<u16>().unwrap_or(1).max(1);
+        let target = self.target_from_request(
+            sort.as_str(),
+            query.as_deref(),
+            options.categories.as_deref(),
+        );
+
+        let html = match self.fetch_listing_html(&options, &target, page_number).await {
+            Ok(html) => html,
+            Err(error) => {
+                report_provider_error(CHANNEL_ID, "get_videos.fetch_listing_html", &error.to_string())
+                    .await;
+                return vec![];
+            }
+        };
+
+        match self.parse_listing_html(&html) {
+            Ok(videos) => videos,
+            Err(error) => {
+                report_provider_error(CHANNEL_ID, "get_videos.parse_listing_html", &error.to_string())
+                    .await;
+                vec![]
+            }
+        }
+    }
+
+    fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
+        Some(self.build_channel(clientversion))
+    }
+}