blowjobspro launch
This commit is contained in:
5
build.rs
5
build.rs
@@ -256,6 +256,11 @@ const PROVIDERS: &[ProviderDef] = &[
|
||||
module: "hsex",
|
||||
ty: "HsexProvider",
|
||||
},
|
||||
ProviderDef {
|
||||
id: "blowjobspro",
|
||||
module: "blowjobspro",
|
||||
ty: "BlowjobsproProvider",
|
||||
},
|
||||
ProviderDef {
|
||||
id: "erome",
|
||||
module: "erome",
|
||||
|
||||
@@ -9,6 +9,7 @@ This is the current implementation inventory as of this snapshot of the repo. Us
|
||||
| `all` | `meta-search` | no | no | Aggregates all compiled providers. |
|
||||
| `archivebate` | `live-cams` | no | no | Livewire-backed cam archive listings with platform/gender/profile shortcuts. |
|
||||
| `beeg` | `mainstream-tube` | no | no | Basic mainstream tube pattern. |
|
||||
| `blowjobspro` | `mainstream-tube` | no | no | KVS-style HTML provider with async search pagination and category shortcut routing. |
|
||||
| `chaturbate` | `live-cams` | no | no | Live cam channel. |
|
||||
| `erome` | `amateur-homemade` | no | no | HTML album scraper with hot/new feeds, keyword search, and uploader-slug shortcuts (`uploader:<name>`). |
|
||||
| `freepornvideosxxx` | `studio-network` | no | no | Studio-style scraper. |
|
||||
|
||||
531
src/providers/blowjobspro.rs
Normal file
531
src/providers/blowjobspro.rs
Normal file
@@ -0,0 +1,531 @@
|
||||
use crate::DbPool;
|
||||
use crate::api::ClientVersion;
|
||||
use crate::providers::{
|
||||
Provider, report_provider_error, report_provider_error_background, requester_or_default,
|
||||
};
|
||||
use crate::status::*;
|
||||
use crate::util::cache::VideoCache;
|
||||
use crate::util::parse_abbreviated_number;
|
||||
use crate::util::time::parse_time_to_seconds;
|
||||
use crate::videos::{ServerOptions, VideoItem};
|
||||
use async_trait::async_trait;
|
||||
use error_chain::error_chain;
|
||||
use htmlentity::entity::{ICodedDataTrait, decode};
|
||||
use percent_encoding::{NON_ALPHANUMERIC, utf8_percent_encode};
|
||||
use regex::Regex;
|
||||
use scraper::{Html, Selector};
|
||||
use std::collections::HashMap;
|
||||
use std::sync::{Arc, RwLock};
|
||||
use std::thread;
|
||||
use wreq::Version;
|
||||
|
||||
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
|
||||
crate::providers::ProviderChannelMetadata {
|
||||
group_id: "mainstream-tube",
|
||||
tags: &["tube", "kvs", "blowjob"],
|
||||
};
|
||||
|
||||
const BASE_URL: &str = "https://blowjobs.pro";
|
||||
const CHANNEL_ID: &str = "blowjobspro";
|
||||
const FIREFOX_UA: &str =
|
||||
"Mozilla/5.0 (X11; Linux x86_64; rv:147.0) Gecko/20100101 Firefox/147.0";
|
||||
const HTML_ACCEPT: &str =
|
||||
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8";
|
||||
|
||||
error_chain! {
|
||||
foreign_links {
|
||||
Io(std::io::Error);
|
||||
}
|
||||
errors {
|
||||
Parse(msg: String) {
|
||||
description("parse error")
|
||||
display("parse error: {}", msg)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct BlowjobsproProvider {
|
||||
url: String,
|
||||
categories: Arc<RwLock<Vec<FilterOption>>>,
|
||||
category_map: Arc<RwLock<HashMap<String, String>>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
enum Target {
|
||||
Latest,
|
||||
MostViewed,
|
||||
TopRated,
|
||||
Search { query: String },
|
||||
Category { url: String },
|
||||
}
|
||||
|
||||
impl BlowjobsproProvider {
|
||||
pub fn new() -> Self {
|
||||
let provider = Self {
|
||||
url: BASE_URL.to_string(),
|
||||
categories: Arc::new(RwLock::new(vec![FilterOption {
|
||||
id: "all".to_string(),
|
||||
title: "All".to_string(),
|
||||
}])),
|
||||
category_map: Arc::new(RwLock::new(HashMap::new())),
|
||||
};
|
||||
provider.spawn_initial_load();
|
||||
provider
|
||||
}
|
||||
|
||||
fn spawn_initial_load(&self) {
|
||||
let url = self.url.clone();
|
||||
let categories = Arc::clone(&self.categories);
|
||||
let category_map = Arc::clone(&self.category_map);
|
||||
|
||||
thread::spawn(move || {
|
||||
let runtime = match tokio::runtime::Builder::new_current_thread()
|
||||
.enable_all()
|
||||
.build()
|
||||
{
|
||||
Ok(runtime) => runtime,
|
||||
Err(error) => {
|
||||
report_provider_error_background(
|
||||
CHANNEL_ID,
|
||||
"spawn_initial_load.runtime_build",
|
||||
&error.to_string(),
|
||||
);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
runtime.block_on(async move {
|
||||
if let Err(error) = Self::load_categories(&url, categories, category_map).await {
|
||||
report_provider_error_background(CHANNEL_ID, "load_categories", &error.to_string());
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
|
||||
let categories = self
|
||||
.categories
|
||||
.read()
|
||||
.map(|value| value.clone())
|
||||
.unwrap_or_default();
|
||||
|
||||
Channel {
|
||||
id: CHANNEL_ID.to_string(),
|
||||
name: "Blowjobs.pro".to_string(),
|
||||
description: "Blowjobs.pro KVS listings with latest, most viewed, top rated, search, and category shortcuts."
|
||||
.to_string(),
|
||||
premium: false,
|
||||
favicon: "https://www.google.com/s2/favicons?sz=64&domain=blowjobs.pro".to_string(),
|
||||
status: "active".to_string(),
|
||||
categories: categories.iter().map(|value| value.title.clone()).collect(),
|
||||
options: vec![
|
||||
ChannelOption {
|
||||
id: "sort".to_string(),
|
||||
title: "Sort".to_string(),
|
||||
description: "Browse feed ordering.".to_string(),
|
||||
systemImage: "list.number".to_string(),
|
||||
colorName: "blue".to_string(),
|
||||
options: vec![
|
||||
FilterOption {
|
||||
id: "new".to_string(),
|
||||
title: "Latest".to_string(),
|
||||
},
|
||||
FilterOption {
|
||||
id: "popular".to_string(),
|
||||
title: "Most Viewed".to_string(),
|
||||
},
|
||||
FilterOption {
|
||||
id: "rated".to_string(),
|
||||
title: "Top Rated".to_string(),
|
||||
},
|
||||
],
|
||||
multiSelect: false,
|
||||
},
|
||||
ChannelOption {
|
||||
id: "categories".to_string(),
|
||||
title: "Categories".to_string(),
|
||||
description: "Jump directly to category archives.".to_string(),
|
||||
systemImage: "square.grid.2x2".to_string(),
|
||||
colorName: "orange".to_string(),
|
||||
options: categories,
|
||||
multiSelect: false,
|
||||
},
|
||||
],
|
||||
nsfw: true,
|
||||
cacheDuration: Some(1800),
|
||||
}
|
||||
}
|
||||
|
||||
fn selector(value: &str) -> Result<Selector> {
|
||||
Selector::parse(value)
|
||||
.map_err(|error| Error::from(format!("selector `{value}` parse failed: {error}")))
|
||||
}
|
||||
|
||||
fn regex(value: &str) -> Result<Regex> {
|
||||
Regex::new(value).map_err(|error| Error::from(format!("regex `{value}` failed: {error}")))
|
||||
}
|
||||
|
||||
fn decode_html(text: &str) -> String {
|
||||
decode(text.as_bytes())
|
||||
.to_string()
|
||||
.unwrap_or_else(|_| text.to_string())
|
||||
}
|
||||
|
||||
fn collapse_whitespace(text: &str) -> String {
|
||||
text.split_whitespace().collect::<Vec<_>>().join(" ")
|
||||
}
|
||||
|
||||
fn normalize_title(title: &str) -> String {
|
||||
title
|
||||
.trim()
|
||||
.trim_start_matches('#')
|
||||
.replace(['_', '-'], " ")
|
||||
.split_whitespace()
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ")
|
||||
.to_ascii_lowercase()
|
||||
}
|
||||
|
||||
fn normalize_url(&self, url: &str) -> String {
|
||||
let trimmed = url.trim();
|
||||
if trimmed.is_empty() {
|
||||
return String::new();
|
||||
}
|
||||
if trimmed.starts_with("http://") || trimmed.starts_with("https://") {
|
||||
return trimmed.to_string();
|
||||
}
|
||||
if trimmed.starts_with("//") {
|
||||
return format!("https:{trimmed}");
|
||||
}
|
||||
if trimmed.starts_with('/') {
|
||||
return format!("{}{}", self.url, trimmed);
|
||||
}
|
||||
format!("{}/{}", self.url, trimmed.trim_start_matches("./"))
|
||||
}
|
||||
|
||||
fn html_headers(referer: &str) -> Vec<(String, String)> {
|
||||
vec![
|
||||
("User-Agent".to_string(), FIREFOX_UA.to_string()),
|
||||
("Accept".to_string(), HTML_ACCEPT.to_string()),
|
||||
("Referer".to_string(), referer.to_string()),
|
||||
]
|
||||
}
|
||||
|
||||
fn build_search_path(query: &str) -> String {
|
||||
query
|
||||
.split_whitespace()
|
||||
.map(|part| utf8_percent_encode(part, NON_ALPHANUMERIC).to_string())
|
||||
.collect::<Vec<_>>()
|
||||
.join("-")
|
||||
}
|
||||
|
||||
fn build_archive_page_url(archive_url: &str, page: u16) -> String {
|
||||
if page <= 1 {
|
||||
return archive_url.trim_end_matches('/').to_string() + "/";
|
||||
}
|
||||
format!("{}/{page}/", archive_url.trim_end_matches('/'))
|
||||
}
|
||||
|
||||
fn build_target_url(&self, target: &Target, page: u16) -> String {
|
||||
match target {
|
||||
Target::Latest => {
|
||||
Self::build_archive_page_url(&format!("{}/latest-updates/", self.url), page)
|
||||
}
|
||||
Target::MostViewed => {
|
||||
Self::build_archive_page_url(&format!("{}/most-popular/", self.url), page)
|
||||
}
|
||||
Target::TopRated => {
|
||||
Self::build_archive_page_url(&format!("{}/top-rated/", self.url), page)
|
||||
}
|
||||
Target::Category { url } => Self::build_archive_page_url(url, page),
|
||||
Target::Search { query } => {
|
||||
let normalized = Self::build_search_path(query);
|
||||
format!("{}/search/{normalized}/", self.url)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn target_from_request(
|
||||
&self,
|
||||
sort: &str,
|
||||
query: Option<&str>,
|
||||
category: Option<&str>,
|
||||
) -> Target {
|
||||
let category_value = category.unwrap_or("").trim();
|
||||
if !category_value.is_empty() && !category_value.eq_ignore_ascii_case("all") {
|
||||
if let Some(url) = self.resolve_category(category_value) {
|
||||
return Target::Category { url };
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(raw_query) = query {
|
||||
let trimmed = raw_query.trim();
|
||||
if !trimmed.is_empty() {
|
||||
if let Some(value) = trimmed.strip_prefix("category:")
|
||||
&& let Some(url) = self.resolve_category(value)
|
||||
{
|
||||
return Target::Category { url };
|
||||
}
|
||||
if let Some(url) = self.resolve_category(trimmed) {
|
||||
return Target::Category { url };
|
||||
}
|
||||
return Target::Search {
|
||||
query: trimmed.to_string(),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
match sort {
|
||||
"popular" => Target::MostViewed,
|
||||
"rated" => Target::TopRated,
|
||||
_ => Target::Latest,
|
||||
}
|
||||
}
|
||||
|
||||
fn resolve_category(&self, value: &str) -> Option<String> {
|
||||
let normalized = Self::normalize_title(value);
|
||||
if normalized.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
self.category_map
|
||||
.read()
|
||||
.ok()
|
||||
.and_then(|map| map.get(&normalized).cloned())
|
||||
}
|
||||
|
||||
async fn fetch_html(
|
||||
&self,
|
||||
options: &ServerOptions,
|
||||
url: &str,
|
||||
referer: &str,
|
||||
) -> Result<String> {
|
||||
let mut requester = requester_or_default(options, CHANNEL_ID, "fetch_html");
|
||||
requester
|
||||
.get_with_headers(url, Self::html_headers(referer), Some(Version::HTTP_11))
|
||||
.await
|
||||
.map_err(|error| Error::from(format!("request failed for {url}: {error}")))
|
||||
}
|
||||
|
||||
fn search_page_from_async(&self, query: &str, page: u16) -> Option<String> {
|
||||
if page <= 1 {
|
||||
return None;
|
||||
}
|
||||
|
||||
let query_value = query.trim();
|
||||
if query_value.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let from = page;
|
||||
let encoded_query = utf8_percent_encode(query_value, NON_ALPHANUMERIC).to_string();
|
||||
Some(format!(
|
||||
"{}/search/{}/?mode=async&function=get_block&block_id=list_videos_videos_list_search_result&q={encoded_query}&category_ids=&sort_by=&from_videos%2Bfrom_albums={from}",
|
||||
self.url,
|
||||
Self::build_search_path(query_value)
|
||||
))
|
||||
}
|
||||
|
||||
async fn fetch_listing_html(
|
||||
&self,
|
||||
options: &ServerOptions,
|
||||
target: &Target,
|
||||
page: u16,
|
||||
) -> Result<String> {
|
||||
if let Target::Search { query } = target
|
||||
&& page > 1
|
||||
&& let Some(async_url) = self.search_page_from_async(query, page)
|
||||
{
|
||||
return self
|
||||
.fetch_html(options, &async_url, &format!("{}/search/{}/", self.url, Self::build_search_path(query)))
|
||||
.await;
|
||||
}
|
||||
|
||||
let page_url = self.build_target_url(target, page);
|
||||
self.fetch_html(options, &page_url, &self.url).await
|
||||
}
|
||||
|
||||
fn parse_listing_html(&self, html: &str) -> Result<Vec<VideoItem>> {
|
||||
let href_re = Self::regex(r#"href="([^"]+/videos/\d+/[^"]*)""#)?;
|
||||
let title_re = Self::regex(r#"title="([^"]+)""#)?;
|
||||
let thumb_re = Self::regex(r#"(?:data-original|src)="([^"]+/contents/videos_screenshots/[^"]+)""#)?;
|
||||
let duration_re = Self::regex(r#"<div class="duration">\s*([^<]+)\s*</div>"#)?;
|
||||
let views_re = Self::regex(r#"<div class="views">\s*([^<]+)\s*</div>"#)?;
|
||||
let tag_re = Self::regex(r#"<a href="[^"]*/categories/[^"]*"[^>]*>\s*([^<]+)\s*</a>"#)?;
|
||||
let id_re = Self::regex(r"/videos/(\d+)/")?;
|
||||
let mut videos = Vec::new();
|
||||
for segment in html.split("<div class=\"item\">").skip(1) {
|
||||
let Some(href_caps) = href_re.captures(segment) else {
|
||||
continue;
|
||||
};
|
||||
let Some(raw_url) = href_caps.get(1).map(|m| m.as_str()) else {
|
||||
continue;
|
||||
};
|
||||
let url = self.normalize_url(raw_url);
|
||||
let id = id_re
|
||||
.captures(&url)
|
||||
.and_then(|caps| caps.get(1).map(|m| m.as_str().to_string()))
|
||||
.unwrap_or_else(|| url.clone());
|
||||
|
||||
let title = title_re
|
||||
.captures(segment)
|
||||
.and_then(|caps| caps.get(1).map(|m| Self::decode_html(m.as_str())))
|
||||
.unwrap_or_default();
|
||||
if title.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let thumb = thumb_re
|
||||
.captures(segment)
|
||||
.and_then(|caps| caps.get(1).map(|m| self.normalize_url(m.as_str())))
|
||||
.unwrap_or_default();
|
||||
|
||||
let duration = duration_re
|
||||
.captures(segment)
|
||||
.and_then(|caps| caps.get(1).map(|m| m.as_str().trim().to_string()))
|
||||
.and_then(|value| parse_time_to_seconds(&value))
|
||||
.unwrap_or(0)
|
||||
.max(0) as u32;
|
||||
|
||||
let views = views_re
|
||||
.captures(segment)
|
||||
.and_then(|caps| caps.get(1).map(|m| m.as_str().trim().to_string()))
|
||||
.and_then(|value| parse_abbreviated_number(&value))
|
||||
.map(|value| value as u32);
|
||||
|
||||
let tags = tag_re
|
||||
.captures_iter(segment)
|
||||
.filter_map(|caps| {
|
||||
caps.get(1).map(|m| {
|
||||
Self::decode_html(m.as_str())
|
||||
.split_whitespace()
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ")
|
||||
.trim()
|
||||
.to_string()
|
||||
})
|
||||
})
|
||||
.filter(|value| !value.is_empty())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let mut item = VideoItem::new(
|
||||
id,
|
||||
title,
|
||||
url,
|
||||
CHANNEL_ID.to_string(),
|
||||
thumb,
|
||||
duration,
|
||||
);
|
||||
item.tags = Some(tags);
|
||||
item.views = views;
|
||||
videos.push(item);
|
||||
}
|
||||
|
||||
Ok(videos)
|
||||
}
|
||||
|
||||
async fn load_categories(
|
||||
url: &str,
|
||||
categories: Arc<RwLock<Vec<FilterOption>>>,
|
||||
category_map: Arc<RwLock<HashMap<String, String>>>,
|
||||
) -> Result<()> {
|
||||
let mut requester = crate::util::requester::Requester::new();
|
||||
let category_url = format!("{url}/categories/");
|
||||
let html = requester
|
||||
.get_with_headers(
|
||||
&category_url,
|
||||
Self::html_headers(url),
|
||||
Some(Version::HTTP_11),
|
||||
)
|
||||
.await
|
||||
.map_err(|error| Error::from(format!("category fetch failed: {error}")))?;
|
||||
|
||||
let doc = Html::parse_document(&html);
|
||||
let link_selector = Self::selector("#list_categories_categories_list_items a.item-link")?;
|
||||
|
||||
let mut options = vec![FilterOption {
|
||||
id: "all".to_string(),
|
||||
title: "All".to_string(),
|
||||
}];
|
||||
let mut map = HashMap::new();
|
||||
|
||||
for link in doc.select(&link_selector) {
|
||||
let Some(href) = link.value().attr("href") else {
|
||||
continue;
|
||||
};
|
||||
let raw_title = link
|
||||
.value()
|
||||
.attr("title")
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
let title = Self::decode_html(&raw_title).trim().to_string();
|
||||
if title.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let normalized = Self::normalize_title(&title);
|
||||
if normalized.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
options.push(FilterOption {
|
||||
id: normalized.clone(),
|
||||
title: title.clone(),
|
||||
});
|
||||
map.insert(normalized, href.to_string());
|
||||
}
|
||||
|
||||
if let Ok(mut guard) = categories.write() {
|
||||
*guard = options;
|
||||
}
|
||||
if let Ok(mut guard) = category_map.write() {
|
||||
*guard = map;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Provider for BlowjobsproProvider {
|
||||
async fn get_videos(
|
||||
&self,
|
||||
_cache: VideoCache,
|
||||
_pool: DbPool,
|
||||
sort: String,
|
||||
query: Option<String>,
|
||||
page: String,
|
||||
_per_page: String,
|
||||
options: ServerOptions,
|
||||
) -> Vec<VideoItem> {
|
||||
let page_number = page.parse::<u16>().unwrap_or(1).max(1);
|
||||
let target = self.target_from_request(
|
||||
sort.as_str(),
|
||||
query.as_deref(),
|
||||
options.categories.as_deref(),
|
||||
);
|
||||
|
||||
let html = match self.fetch_listing_html(&options, &target, page_number).await {
|
||||
Ok(html) => html,
|
||||
Err(error) => {
|
||||
report_provider_error(CHANNEL_ID, "get_videos.fetch_listing_html", &error.to_string())
|
||||
.await;
|
||||
return vec![];
|
||||
}
|
||||
};
|
||||
|
||||
match self.parse_listing_html(&html) {
|
||||
Ok(videos) => videos,
|
||||
Err(error) => {
|
||||
report_provider_error(CHANNEL_ID, "get_videos.parse_listing_html", &error.to_string())
|
||||
.await;
|
||||
vec![]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
|
||||
Some(self.build_channel(clientversion))
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user