pimpbunny and more fixes

This commit is contained in:
Simon
2026-05-18 16:52:25 +00:00
committed by ForgeCode
parent dc70e2c9a6
commit ee47bbe74d
13 changed files with 1457 additions and 311 deletions

View File

@@ -20,7 +20,7 @@ ntex-files = "3"
serde = "1.0.228"
serde_json = "1.0.145"
tokio = { version = "1.49", features = ["full"] }
wreq = { version = "5", features = ["cookies", "multipart", "json"] }
wreq = { version = "5.3.0", features = ["cookies", "multipart", "json"] }
wreq-util = "2"
percent-encoding = "2.3.2"
capitalize = "0.3.4"
@@ -37,6 +37,8 @@ lru = "0.18.0"
rand = "0.10.0"
chrono = "0.4.44"
md5 = "0.8.0"
chromiumoxide = { version = "0.7", features = ["tokio-runtime"] }
playwright = "0.0.20"
[lints.rust]
warnings = "warn"

View File

@@ -296,6 +296,11 @@ const PROVIDERS: &[ProviderDef] = &[
module: "archivebate1",
ty: "ArchivebateProvider",
},
ProviderDef {
id: "thaiporntv",
module: "thaiporntv",
ty: "ThaipornTvProvider",
},
];
fn main() {

View File

@@ -48,6 +48,7 @@ This is the current implementation inventory as of this snapshot of the repo. Us
| `sextb` | `jav` | no | no | JAV family provider. |
| `shooshtime` | `onlyfans` | no | yes | Redirect proxy plus dedicated media route. |
| `spankbang` | `mainstream-tube` | no | yes | Best template for redirect proxy plus anti-bot fetches. |
| `thaiporntv` | `mainstream-tube` | no | yes | Decodes `data-enc` attribute for proxied HLS playback. |
| `supjav` | `jav` | no | no | JAV/HLS and uploader-id examples. |
| `sxyprn` | `mainstream-tube` | no | yes | Redirect proxy helper usage. |
| `tnaflix` | `mainstream-tube` | no | no | Mainstream tube provider. |

View File

@@ -1,4 +1,4 @@
Implement a new Hottub provider for `archivebate1` at `https://archivebate1.com`.
Implement a new Hottub provider for `thaiporntv` at `https://www.thaiporntv.com`.
You are working inside the Hottub Rust server. Your job is to add a functioning provider module that can survive handoff to another model with minimal guesswork. Do not stop at code generation. Carry the work through code, validation, and documentation updates.

784
src/providers/thaiporntv.rs Normal file
View File

@@ -0,0 +1,784 @@
use crate::DbPool;
use crate::api::ClientVersion;
use crate::providers::{Provider, report_provider_error, report_provider_error_background, requester_or_default};
use crate::status::*;
use crate::util::cache::VideoCache;
use crate::util::parse_abbreviated_number;
use crate::util::requester::Requester;
use crate::util::time::parse_time_to_seconds;
use crate::videos::{ServerOptions, VideoFormat, VideoItem};
use async_trait::async_trait;
use base64::{engine::general_purpose, Engine};
use chrono::{DateTime, Duration as ChronoDuration, NaiveDate, Utc};
use error_chain::error_chain;
use futures::stream::{self, StreamExt};
use htmlentity::entity::{ICodedDataTrait, decode};
use percent_encoding::{NON_ALPHANUMERIC, utf8_percent_encode};
use regex::Regex;
use scraper::{ElementRef, Html, Selector};
use std::sync::{Arc, RwLock};
use std::time::Duration as StdDuration;
use std::{thread, vec};
use tokio::time::timeout;
use url::Url;
use wreq::Version;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "mainstream-tube",
tags: &["thai", "asian", "amateur"],
};
error_chain! {
foreign_links {
Io(std::io::Error);
}
errors {
Parse(msg: String) {
description("parse error")
display("parse error: {}", msg)
}
}
}
const BASE_URL: &str = "https://www.thaiporntv.com";
const CHANNEL_ID: &str = "thaiporntv";
const USER_AGENT: &str =
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36";
const HTML_ACCEPT: &str =
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8";
#[derive(Debug, Clone)]
pub struct ThaipornTvProvider {
url: String,
tags: Arc<RwLock<Vec<FilterOption>>>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum ArchiveMode {
Latest,
Popular,
Commented,
Rated,
Longest,
}
#[derive(Debug, Clone)]
enum Target {
Archive(ArchiveMode),
Search {
query: String,
},
Tag {
slug: String,
},
}
impl ThaipornTvProvider {
pub fn new() -> Self {
let provider = Self {
url: BASE_URL.to_string(),
tags: Arc::new(RwLock::new(vec![FilterOption {
id: "all".to_string(),
title: "All".to_string(),
}])),
};
provider.spawn_initial_load();
provider
}
fn spawn_initial_load(&self) {
let url = self.url.clone();
let tags = Arc::clone(&self.tags);
thread::spawn(move || {
let runtime = match tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()
{
Ok(runtime) => runtime,
Err(error) => {
report_provider_error_background(
CHANNEL_ID,
"spawn_initial_load.runtime_build",
&error.to_string(),
);
return;
}
};
runtime.block_on(async move {
if let Err(error) = Self::load_tags(&url, Arc::clone(&tags)).await {
report_provider_error_background(
CHANNEL_ID,
"load_tags",
&error.to_string(),
);
}
});
});
}
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
let tags = self.tags.read().map(|value| value.clone()).unwrap_or_default();
Channel {
id: CHANNEL_ID.to_string(),
name: "THAIPornTV".to_string(),
description: "Free Thai Porn Videos & Asian Sex Tube.".to_string(),
premium: false,
favicon: "https://www.google.com/s2/favicons?sz=64&domain=thaiporntv.com".to_string(),
status: "active".to_string(),
categories: tags.iter().map(|value| value.title.clone()).collect(),
options: vec![
ChannelOption {
id: "sort".to_string(),
title: "Sort".to_string(),
description: "Sort the videos".to_string(),
systemImage: "list.number".to_string(),
colorName: "blue".to_string(),
options: vec![
FilterOption {
id: "new".to_string(),
title: "Most Recent".to_string(),
},
FilterOption {
id: "popular".to_string(),
title: "Most Viewed".to_string(),
},
FilterOption {
id: "commented".to_string(),
title: "Most Commented".to_string(),
},
FilterOption {
id: "rated".to_string(),
title: "Top Rated".to_string(),
},
FilterOption {
id: "longest".to_string(),
title: "Longest".to_string(),
},
],
multiSelect: false,
},
ChannelOption {
id: "filter".to_string(),
title: "Tags".to_string(),
description: "Video tags.".to_string(),
systemImage: "tag.fill".to_string(),
colorName: "green".to_string(),
options: tags,
multiSelect: false,
},
],
nsfw: true,
cacheDuration: Some(1800),
}
}
fn selector(value: &str) -> Result<Selector> {
Selector::parse(value)
.map_err(|error| Error::from(format!("selector `{value}` parse failed: {error}")))
}
fn regex(value: &str) -> Result<Regex> {
Regex::new(value).map_err(|error| Error::from(format!("regex `{value}` failed: {error}")))
}
fn collapse_whitespace(text: &str) -> String {
text.split_whitespace().collect::<Vec<_>>().join(" ")
}
fn decode_html_entities(text: &str) -> String {
decode(text.as_bytes())
.to_string()
.unwrap_or_else(|_| text.to_string())
.replace('\u{a0}', " ")
.trim()
.to_string()
}
fn absolute_url(&self, value: &str) -> String {
if value.starts_with("http://") || value.starts_with("https://") {
return value.to_string();
}
format!(
"{}/{}",
self.url.trim_end_matches('/'),
value.trim_start_matches('/')
)
}
fn html_headers(&self, referer: &str) -> Vec<(String, String)> {
vec![
("Referer".to_string(), referer.to_string()),
("User-Agent".to_string(), USER_AGENT.to_string()),
("Accept".to_string(), HTML_ACCEPT.to_string()),
("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()),
]
}
async fn fetch_html(
&self,
requester: &mut Requester,
url: &str,
referer: &str,
) -> Result<String> {
requester
.get_with_headers(url, self.html_headers(referer), Some(Version::HTTP_11))
.await
.map_err(|error| Error::from(format!("request failed for {url}: {error}")))
}
fn push_unique(target: &Arc<RwLock<Vec<FilterOption>>>, item: FilterOption) {
if item.id.is_empty() || item.title.is_empty() {
return;
}
if let Ok(mut values) = target.write() {
if !values
.iter()
.any(|existing| existing.id == item.id || existing.title == item.title)
{
values.push(item);
}
}
}
async fn load_tags(base_url: &str, tags: Arc<RwLock<Vec<FilterOption>>>) -> Result<()> {
let mut requester = Requester::new();
let provider = Self {
url: base_url.to_string(),
tags: Arc::clone(&tags),
};
let html = provider.fetch_html(&mut requester, &format!("{}/tags/", base_url), &format!("{}/", base_url)).await?;
let document = Html::parse_document(&html);
let selector = Self::selector("a[href*='/tags/']")?;
for element in document.select(&selector) {
let Some(href) = element.value().attr("href") else {
continue;
};
let title = Self::decode_html_entities(&element.text().collect::<String>());
let re = Regex::new(r"^(.+?)\s+\d+$").unwrap(); // Remove count from tag title
let title = if let Some(captures) = re.captures(&title) {
captures.get(1).unwrap().as_str().to_string()
} else {
title
};
if title.is_empty() {
continue;
}
Self::push_unique(
&tags,
FilterOption {
id: href.to_string(),
title,
},
);
}
Ok(())
}
fn archive_from_sort(sort: &str) -> ArchiveMode {
match sort {
"popular" => ArchiveMode::Popular,
"commented" => ArchiveMode::Commented,
"rated" => ArchiveMode::Rated,
"longest" => ArchiveMode::Longest,
_ => ArchiveMode::Latest,
}
}
fn resolve_option_target(&self, options: &ServerOptions, sort: &str) -> Target {
if let Some(value) = options.filter.as_deref() {
if let Some(target) = self.find_tag_target_in_options(value) {
return target;
}
}
Target::Archive(Self::archive_from_sort(sort))
}
fn resolve_query_target(&self, query: &str) -> Target {
if let Some(target) = self.find_tag_target_in_options(query) {
return target;
}
Target::Search {
query: query.trim().to_string(),
}
}
fn find_tag_target_in_options(
&self,
value: &str,
) -> Option<Target> {
let normalized = value.trim().to_lowercase();
let tags = self.tags.read().ok()?;
let option = tags.iter().find(|item| {
item.id.eq_ignore_ascii_case(value) || item.title.trim().to_lowercase() == normalized
})?;
self.target_from_filter_id(&option.id)
}
fn target_from_filter_id(&self, id: &str) -> Option<Target> {
if id.contains("/tags/") {
let url = Url::parse(&self.absolute_url(id)).ok()?;
let path_segments = url.path_segments()?;
let slug = path_segments.last()?.trim_end_matches('/').to_string();
return Some(Target::Tag { slug });
}
None
}
fn build_url_for_target(&self, target: &Target, page: u32) -> String {
match target {
Target::Archive(mode) => self.build_archive_url(*mode, page),
Target::Search { query } => self.build_search_url(query, page),
Target::Tag { slug } => self.build_tag_url(slug, page),
}
}
fn build_archive_url(&self, mode: ArchiveMode, page: u32) -> String {
let base_path = match mode {
ArchiveMode::Latest => "recent",
ArchiveMode::Popular => "popular",
ArchiveMode::Commented => "commented",
ArchiveMode::Rated => "rated",
ArchiveMode::Longest => "longest",
};
if page <= 1 {
format!("{}/{}/", self.url, base_path)
} else {
format!("{}/{}/page/{}/", self.url, base_path, page)
}
}
fn build_search_url(&self, query: &str, page: u32) -> String {
let encoded_query = utf8_percent_encode(query, NON_ALPHANUMERIC).to_string();
if page <= 1 {
format!("{}/search/?q={}", self.url, encoded_query)
} else {
format!("{}/search/?q={}&page={}", self.url, encoded_query, page)
}
}
fn build_tag_url(&self, slug: &str, page: u32) -> String {
let encoded_slug = utf8_percent_encode(slug, NON_ALPHANUMERIC).to_string();
if page <= 1 {
format!("{}/tags/{}/", self.url, encoded_slug)
} else {
format!("{}/tags/{}/page/{}/", self.url, encoded_slug, page)
}
}
fn decode_data_enc(encoded_data: &str) -> Result<Vec<VideoFormat>> {
let cleaned_data = encoded_data.replace("-", "+").replace("_", "/");
let padded_data = format!("{:<pad$}", cleaned_data, pad = (cleaned_data.len() + 3) & !3);
let decoded_bytes = general_purpose::STANDARD.decode(&padded_data)
.map_err(|e| Error::from(format!("Base64 decode failed: {e}")))?;
if decoded_bytes.is_empty() {
return Err(Error::from("Decoded bytes are empty"));
}
let key = decoded_bytes[0];
let decrypted_bytes: Vec<u8> = decoded_bytes[1..]
.iter()
.map(|&b| b ^ key)
.collect();
let json_str = String::from_utf8(decrypted_bytes)
.map_err(|e| Error::from(format!("UTF-8 decode failed: {e}")))?;
let formats_json: serde_json::Value = serde_json::from_str(&json_str)
.map_err(|e| Error::from(format!("JSON parse failed: {e}")))?;
let Some(formats_array) = formats_json.as_array() else {
return Err(Error::from("JSON is not an array"));
};
let mut formats = Vec::new();
for format_val in formats_array {
let Some(u) = format_val.get("u").and_then(|v| v.as_str()) else { continue; };
let Some(q) = format_val.get("q").and_then(|v| v.as_str()) else { continue; };
let mut format = VideoFormat::new(
u.to_string(),
q.to_string(),
"application/x-mpegURL".to_string(), // Assuming m3u8
);
// Add referer to the format
format.add_http_header("Referer".to_string(), BASE_URL.to_string());
formats.push(format);
}
Ok(formats)
}
fn parse_card(
&self,
card: ElementRef<'_>,
_proxy_base_url: &str,
) -> Option<VideoItem> {
let id_selector = Self::selector("a[href*='/videos/']").ok()?;
let title_selector = Self::selector("a[href*='/videos/']").ok()?;
let thumb_selector = Self::selector("img").ok()?;
let duration_selector = Self::selector("div.duration").ok()?;
let views_selector = Self::selector("div.views").ok()?;
let uploaded_at_selector = Self::selector("div.date").ok()?;
let tag_selector = Self::selector("a[href*='/tags/']").ok()?;
let href_element = card.select(&id_selector).next()?;
let href = href_element.value().attr("href")?.to_string();
let re = Regex::new(r"/videos/\d{4}/[^/-]+-(\d+)/$").unwrap();
let captures = re.captures(&href)?;
let id = captures.get(1)?.as_str().to_string();
let title = card.select(&title_selector).next()
.and_then(|e| e.value().attr("title"))
.map(Self::decode_html_entities)
.unwrap_or_else(|| {
card.select(&thumb_selector).next()
.and_then(|e| e.value().attr("alt"))
.map(Self::decode_html_entities)
.unwrap_or_default()
});
let thumb = card.select(&thumb_selector).next()
.and_then(|e| e.value().attr("src"))
.map(|s| self.absolute_url(s))
.unwrap_or_default();
let duration_text = card.select(&duration_selector).next()
.map(|e| Self::collapse_whitespace(&e.text().collect::<String>()))
.unwrap_or_default();
let duration = parse_time_to_seconds(&duration_text).unwrap_or(0) as u32;
let views = card.select(&views_selector).next()
.map(|e| Self::collapse_whitespace(&e.text().collect::<String>()))
.and_then(|s| s.strip_suffix(" views").map(|s| parse_abbreviated_number(s)))
.flatten();
let uploaded_at_text = card.select(&uploaded_at_selector).next()
.map(|e| Self::collapse_whitespace(&e.text().collect::<String>()))
.unwrap_or_default();
let uploaded_at = NaiveDate::parse_from_str(&uploaded_at_text, "%d %b %Y")
.ok()
.and_then(|date| {
date.and_hms_opt(0, 0, 0)
.map(|dt| DateTime::<Utc>::from_naive_utc_and_offset(dt, Utc).timestamp() as u64)
});
let tags: Vec<String> = card.select(&tag_selector)
.filter_map(|e| e.value().attr("href"))
.filter_map(|link_href| {
Url::parse(&self.absolute_url(link_href))
.ok()
.and_then(|url| url.path_segments().map(|segments| segments.map(ToString::to_string).collect::<Vec<String>>()))
.and_then(|segments_vec| segments_vec.last().cloned())
.map(|s| Self::decode_html_entities(&s).trim_end_matches('/').to_string())
})
.collect();
let mut item = VideoItem::new(
id,
title,
self.absolute_url(&href),
CHANNEL_ID.to_string(),
thumb,
duration,
);
if let Some(views) = views { item = item.views(views); }
if let Some(uploaded_at) = uploaded_at { item = item.uploaded_at(uploaded_at); }
if !tags.is_empty() { item = item.tags(tags); }
Some(item)
}
fn get_video_items_from_html(&self, html: String, proxy_base_url: &str) -> Result<Vec<VideoItem>> {
let document = Html::parse_document(&html);
let card_selector = Self::selector("div.video-list-item")?;
let mut items = Vec::new();
for card in document.select(&card_selector) {
if let Some(item) = self.parse_card(card, proxy_base_url) {
items.push(item);
}
}
Ok(items)
}
async fn enrich_video(&self, mut item: VideoItem, options: &ServerOptions) -> Result<VideoItem> {
let mut requester = requester_or_default(options, CHANNEL_ID, "enrich_video");
let detail_fetch = timeout(
StdDuration::from_secs(6),
self.fetch_html(&mut requester, &item.url, &item.url),
)
.await
.map_err(|_| Error::from(format!("detail request timed out for {}", item.url)))??;
let document = Html::parse_document(&detail_fetch);
let video_element_selector = Self::selector("video[data-enc]")?;
if let Some(video_element) = document.select(&video_element_selector).next() {
if let Some(data_enc) = video_element.value().attr("data-enc") {
match Self::decode_data_enc(data_enc) {
Ok(formats) => {
item.formats = Some(formats);
},
Err(e) => {
report_provider_error_background(CHANNEL_ID, "decode_data_enc", &format!("url={}; error={}", item.url, e));
}
}
}
}
Ok(item)
}
async fn fetch_items_for_url(
&self,
cache: VideoCache,
url: String,
per_page_limit: usize,
enrich_details: bool,
options: &ServerOptions,
) -> Result<Vec<VideoItem>> {
if let Some((time, items)) = cache.get(&url) {
if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 {
return Ok(items.clone());
}
}
let mut requester = requester_or_default(options, CHANNEL_ID, "fetch_items_for_url");
let html = timeout(
StdDuration::from_secs(10),
self.fetch_html(&mut requester, &url, &url),
)
.await
.map_err(|_| Error::from(format!("list request timed out for {url}")))??;
let list_items = self.get_video_items_from_html(html, options.public_url_base.as_deref().unwrap_or_default())?;
if list_items.is_empty() {
return Ok(vec![]);
}
let limited_items = list_items
.into_iter()
.take(per_page_limit.max(1))
.collect::<Vec<_>>();
if !enrich_details {
cache.insert(url, limited_items.clone());
return Ok(limited_items);
}
let items = stream::iter(limited_items.into_iter().map(|item| {
let provider = self.clone();
let options = options.clone();
async move { provider.enrich_video(item, &options).await }
}))
.buffer_unordered(4)
.collect::<Vec<_>>()
.await;
let mut final_items = Vec::new();
for item_result in items {
match item_result {
Ok(item) => final_items.push(item),
Err(e) => report_provider_error_background(CHANNEL_ID, "enrich_video_stream", &e.to_string()),
}
}
if !final_items.is_empty() {
cache.insert(url, final_items.clone());
}
Ok(final_items)
}
async fn get(
&self,
cache: VideoCache,
page: u32,
sort: &str,
per_page_limit: usize,
options: ServerOptions,
) -> Result<Vec<VideoItem>> {
let target = self.resolve_option_target(&options, sort);
let url = self.build_url_for_target(&target, page);
self.fetch_items_for_url(cache, url, per_page_limit, page <= 1, &options)
.await
}
async fn query(
&self,
cache: VideoCache,
page: u32,
query: &str,
per_page_limit: usize,
options: ServerOptions,
) -> Result<Vec<VideoItem>> {
let target = self.resolve_query_target(query);
let url = self.build_url_for_target(&target, page);
self.fetch_items_for_url(cache, url, per_page_limit, page <= 1, &options)
.await
}
}
#[async_trait]
impl Provider for ThaipornTvProvider {
async fn get_videos(
&self,
cache: VideoCache,
pool: DbPool,
sort: String,
query: Option<String>,
page: String,
per_page: String,
options: ServerOptions,
) -> Vec<VideoItem> {
let _ = pool;
let page = page.parse::<u32>().unwrap_or(1);
let per_page_limit = per_page.parse::<usize>().unwrap_or(30);
let result = match query {
Some(query) if !query.trim().is_empty() => {
self.query(cache, page, &query, per_page_limit, options)
.await
}
_ => self.get(cache, page, &sort, per_page_limit, options).await,
};
match result {
Ok(videos) => videos,
Err(error) => {
report_provider_error(CHANNEL_ID, "get_videos", &error.to_string()).await;
vec![]
}
}
}
fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
Some(self.build_channel(clientversion))
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::util::cache::VideoCache;
use crate::util::requester::Requester;
fn provider() -> ThaipornTvProvider {
ThaipornTvProvider::new()
}
#[test]
fn builds_archive_urls() {
let provider = provider();
assert_eq!(
provider.build_archive_url(ArchiveMode::Latest, 1),
"https://www.thaiporntv.com/recent/"
);
assert_eq!(
provider.build_archive_url(ArchiveMode::Latest, 2),
"https://www.thaiporntv.com/recent/page/2/"
);
assert_eq!(
provider.build_archive_url(ArchiveMode::Popular, 1),
"https://www.thaiporntv.com/popular/"
);
assert_eq!(
provider.build_archive_url(ArchiveMode::Popular, 3),
"https://www.thaiporntv.com/popular/page/3/"
);
}
#[test]
fn builds_search_urls() {
let provider = provider();
assert_eq!(
provider.build_search_url("thai student", 1),
"https://www.thaiporntv.com/search/?q=thai%20student"
);
assert_eq!(
provider.build_search_url("thai student", 2),
"https://www.thaiporntv.com/search/?q=thai%20student&page=2"
);
}
#[test]
fn builds_tag_urls() {
let provider = provider();
assert_eq!(
provider.build_tag_url("blowjob", 1),
"https://www.thaiporntv.com/tags/blowjob/"
);
assert_eq!(
provider.build_tag_url("thai-massage", 2),
"https://www.thaiporntv.com/tags/thai-massage/page/2/"
);
}
#[test]
fn decodes_data_enc_correctly() {
let encoded = "C1BwKX4pMSljf397eDEkJHxuaSV_bmhjfWJveCV_ZHskZjh-MyQ6PT4zVD8zO3slZjh-MyknKXopMSk_Mzt7KScpYykxbWpneG52Vg";
let formats = ThaipornTvProvider::decode_data_enc(encoded).unwrap();
assert_eq!(formats.len(), 1);
assert_eq!(formats[0].url, "https://web.techvids.top/m3u8/1658_480p.m3u8");
assert_eq!(formats[0].quality, "480p");
assert_eq!(formats[0].http_headers.get("Referer").unwrap(), "https://www.thaiporntv.com");
}
#[tokio::test]
#[ignore]
async fn fetches_and_parses_archive() {
let provider = provider();
let options = ServerOptions {
featured: None,
category: None,
sites: None,
filter: None,
language: None,
public_url_base: Some("http://127.0.0.1:18080".to_string()),
requester: Some(Requester::new()),
network: None,
stars: None,
categories: None,
duration: None,
sort: Some("new".to_string()),
sexuality: None,
};
let videos = provider.get(VideoCache::new(), 1, "new", 10, options).await.unwrap();
assert!(!videos.is_empty());
// Further assertions on video content
}
#[tokio::test]
#[ignore]
async fn fetches_and_parses_search() {
let provider = provider();
let options = ServerOptions {
featured: None,
category: None,
sites: None,
filter: None,
language: None,
public_url_base: Some("http://127.0.0.1:18080".to_string()),
requester: Some(Requester::new()),
network: None,
stars: None,
categories: None,
duration: None,
sort: Some("new".to_string()),
sexuality: None,
};
let videos = provider.query(VideoCache::new(), 1, "thai student", 10, options).await.unwrap();
assert!(!videos.is_empty());
// Further assertions on video content
}
}

View File

@@ -14,6 +14,7 @@ use crate::proxies::vjav::VjavProxy;
use crate::{proxies::sxyprn::SxyprnProxy, util::requester::Requester};
use crate::proxies::vidara::VidaraProxy;
use crate::proxies::lulustream::LulustreamProxy;
use crate::proxies::thaiporntv::ThaipornTvProxy;
pub mod archivebate;
pub mod clapdat;
@@ -33,6 +34,7 @@ pub mod pornhubthumb;
pub mod shooshtime;
pub mod spankbang;
pub mod sxyprn;
pub mod thaiporntv;
pub mod vidara;
pub mod vjav;
@@ -53,6 +55,7 @@ pub enum AnyProxy {
Vjav(VjavProxy),
Vidara(VidaraProxy),
Clapdat(ClapdatProxy),
ThaipornTv(ThaipornTvProxy),
}
pub trait Proxy {
@@ -77,6 +80,7 @@ impl Proxy for AnyProxy {
AnyProxy::Vjav(p) => p.get_video_url(url, requester).await,
AnyProxy::Vidara(p) => p.get_video_url(url, requester).await,
AnyProxy::Clapdat(p) => p.get_video_url(url, requester).await,
AnyProxy::ThaipornTv(p) => p.get_video_url(url, requester).await,
}
}
}

View File

@@ -1,33 +1,18 @@
use crate::util::browser;
use crate::util::requester::Requester;
use ntex::web;
use regex::Regex;
use std::{collections::HashMap, time::{SystemTime, UNIX_EPOCH}};
use std::time::{SystemTime, UNIX_EPOCH};
use url::Url;
use wreq::Version;
#[derive(Debug, Clone)]
pub struct PimpbunnyProxy {}
impl PimpbunnyProxy {
const FIREFOX_USER_AGENT: &'static str =
"Mozilla/5.0 (X11; Linux x86_64; rv:147.0) Gecko/20100101 Firefox/147.0";
const HTML_ACCEPT: &'static str =
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8";
/// Site-wide fallback key only used when the page omits a license_code.
const LICENSE_CODE: &'static str = "$576262819011919";
pub fn new() -> Self {
PimpbunnyProxy {}
}
fn js_now_millis() -> u128 {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|d| d.as_millis())
.unwrap_or(0)
}
fn normalize_detail_url(url: &str) -> Option<String> {
let normalized = if url.starts_with("http://") || url.starts_with("https://") {
url.to_string()
@@ -38,7 +23,7 @@ impl PimpbunnyProxy {
}
fn is_allowed_detail_url(url: &str) -> bool {
let Some(url) = Url::parse(url).ok() else {
let Ok(url) = Url::parse(url) else {
return false;
};
if url.scheme() != "https" {
@@ -51,253 +36,182 @@ impl PimpbunnyProxy {
&& !url.path().starts_with("/contents/videos_screenshots/")
}
fn root_referer() -> &'static str {
"https://pimpbunny.com/"
}
fn html_headers_with_referer(referer: &str) -> Vec<(String, String)> {
vec![
("Referer".to_string(), referer.to_string()),
("User-Agent".to_string(), Self::FIREFOX_USER_AGENT.to_string()),
("Accept".to_string(), Self::HTML_ACCEPT.to_string()),
("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()),
]
}
fn headers_with_cookies(
requester: &Requester,
request_url: &str,
referer: &str,
) -> Vec<(String, String)> {
let mut headers = Self::html_headers_with_referer(referer);
if let Some(cookie) = requester.cookie_header_for_url(request_url) {
headers.push(("Cookie".to_string(), cookie));
}
headers
}
async fn warm_root_session(requester: &mut Requester) {
let _ = requester
.get_with_headers(
Self::root_referer(),
Self::html_headers_with_referer(Self::root_referer()),
Some(Version::HTTP_11),
)
.await;
}
// ── Key generation (kt_player.js IIFE "function c", lines 541-586) ────────
//
// Derives the 32-char shuffle key from the page's license_code value.
//
// Steps:
// a) Build digit-string f from license_code[1:]; replace every 0 with 1.
// b) Split f at midpoint j = len/2; compute f_str = str(4 * |k l|).
// c) For g in 0..=j and h in 1..=4:
// n = digit(license_code[g+h]) + digit(f_str[g]);
// if n >= 10 { n -= 10 }
// push n onto key.
fn generate_key(license_code: &str) -> String {
let d: Vec<char> = license_code.chars().collect();
// (a) Build digit string, replacing 0 → 1
let mut f = String::new();
for ch in d.iter().skip(1) {
match ch.to_digit(10) {
Some(0) => f.push('1'),
Some(v) => f.push(char::from_digit(v, 10).unwrap()),
None => f.push('1'),
}
}
// (b) Split and compute intermediate
let j = f.len() / 2;
let k: i64 = f[..=j].parse().unwrap_or(0);
let l: i64 = f[j..].parse().unwrap_or(0);
let f_str = ((k - l).unsigned_abs() as i64 * 4).to_string();
// (c) Build 32-char key
let mut key = String::new();
for g in 0..=j {
for h in 1..=4usize {
let nd = d.get(g + h).and_then(|c| c.to_digit(10)).unwrap_or(0) as i64;
let nf = f_str.chars().nth(g).and_then(|c| c.to_digit(10)).unwrap_or(0) as i64;
let mut n = nd + nf;
if n >= 10 { n -= 10; }
key.push(char::from_digit(n as u32, 10).unwrap());
}
}
key
}
// ── Hash permutation (kt_player.js IIFE "function b", lines 521-539) ──────
//
// Reverse Fisher-Yates over the first 32 chars of the hash, driven by
// the cumulative digit-sum suffix of `key`.
//
// for k from 31 downto 0:
// l = k + Σ key_digit[m] for m in k..len(key)
// l %= 32
// swap hash[k] and hash[l]
fn transform_hash(hash_str: &str, key: &str) -> String {
let mut chars: Vec<char> = hash_str.chars().collect();
let tail_start = chars.len().min(32);
let (head, _) = chars.split_at(tail_start);
let mut h: Vec<char> = head.to_vec();
let tail: String = chars.drain(tail_start..).collect();
let key_digits: Vec<usize> = key
/// Generate 32-char numeric key from a pimpbunny license_code (e.g. '$576262819011919').
///
/// Stage 1 of the KVS player URL decoder, extracted from kt_player.js:
/// - d_raw = license_code (keeps '$' at index 0 for the d[g+h] lookup)
/// - d_mod = strip '$', replace every '0' digit with '1'
/// - split d_mod at midpoint j = floor(len/2) to get k and l
/// - f_str = 4 * |k - l| as a string
/// - for g in 0..=j, h in 1..=4: n = d_raw[g+h] + f_str[g]; if n >= 10 then n -= 10
fn generate_key(license_code: &str) -> Option<String> {
let d_raw = license_code;
// Strip '$' and replace '0' with '1'
let d_mod: String = license_code
.get(1..)?
.chars()
.filter_map(|c| c.to_digit(10))
.map(|d| d as usize)
.map(|c| if c == '0' { '1' } else { c })
.collect();
let len = h.len();
for k in (0..len).rev() {
let mut l = k;
for m in k..key_digits.len() {
l += key_digits[m];
let j = d_mod.len() / 2;
let k: i64 = d_mod.get(..j + 1)?.parse().ok()?;
let l: i64 = d_mod.get(j..)?.parse().ok()?;
let f_str = (4 * (k - l).abs()).to_string();
let threshold = 10i64;
let d_bytes = d_raw.as_bytes();
let f_bytes = f_str.as_bytes();
let mut key = String::with_capacity(32);
for g in 0..=j {
for h in 1..=4usize {
let d_digit = d_bytes.get(g + h).copied().unwrap_or(b'0') as i64 - b'0' as i64;
let f_digit = f_bytes.get(g).copied().unwrap_or(b'0') as i64 - b'0' as i64;
let mut n = d_digit + f_digit;
if n >= threshold {
n -= threshold;
}
l %= len;
h.swap(k, l);
key.push((b'0' + n as u8) as char);
}
}
Some(key)
}
let mut result: String = h.into_iter().collect();
result.push_str(&tail);
result
/// Shuffle (permute) a 32-char hex hash using the key.
///
/// Stage 2: for k from 31 down to 0, compute l = (k + sum of key[k..]) % 32, swap h[k] and h[l].
fn shuffle_hash(hash: &str, key: &str) -> String {
let mut h: Vec<char> = hash.chars().collect();
let n = h.len();
let key_bytes = key.as_bytes();
for k in (0..n).rev() {
let mut l = k as i64;
for m in k..key_bytes.len() {
l += (key_bytes[m] - b'0') as i64;
}
l %= n as i64;
h.swap(k, l as usize);
}
h.into_iter().collect()
}
// ── URL decoder ────────────────────────────────────────────────────────────
//
// Strips the `function/0/` prefix, un-shuffles the hash at path segment
// index 5, then returns the bare URL (without ?rnd append separately).
fn decode_function0_url(input: &str, license_code: &str) -> Option<String> {
if !input.starts_with("function/") {
return Some(input.to_string());
/// Decode a KVS-encoded 'function/N/https://…' video URL into a usable URL.
///
/// Stage 3: strip the function/N/ prefix, shuffle the 32-char hash in segment 6,
/// then append ?rnd=<epoch_ms>.
fn decode_encoded_url(encoded: &str, license_code: &str) -> Option<String> {
let after_prefix = encoded.strip_prefix("function/")?;
// Split '0/https://host/path/hash_segment/…' into parts by '/'
let parts: Vec<&str> = after_prefix.split('/').collect();
// Layout: [0]version [1]'https:' [2]'' [3]host [4]'get_file' [5]segment_no [6]hash_segment …
if parts.len() < 7 {
return None;
}
// Strip "function/<N>/"
let raw = input.splitn(3, '/').nth(2)?;
let mut parts: Vec<&str> = raw.split('/').collect();
// Expected segments after splitting on '/':
// 0:"https:" 1:"" 2:"host" 3:"get_file" 4:N 5:HASH …
if parts.len() < 6 {
let hash_segment = parts[6];
if hash_segment.len() < 32 {
return None;
}
let key = Self::generate_key(license_code);
let unscrambled = Self::transform_hash(parts[5], &key);
let key = Self::generate_key(license_code)?;
let shuffled = Self::shuffle_hash(&hash_segment[..32], &key);
let new_hash_segment = format!("{}{}", shuffled, &hash_segment[32..]);
// Rebuild we need owned strings only for the one replaced segment
let mut owned: Vec<String> = parts.iter().map(|s| s.to_string()).collect();
owned[5] = unscrambled;
let mut new_parts: Vec<String> = parts.iter().map(|s| s.to_string()).collect();
new_parts[6] = new_hash_segment;
new_parts.remove(0); // remove version number
Some(owned.join("/"))
let rnd = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_millis();
let url = new_parts.join("/");
Some(format!("{}?rnd={}", url, rnd))
}
fn append_rnd(url: &str) -> String {
let rnd = Self::js_now_millis();
if url.contains("?rnd=") {
url.to_string()
} else if url.contains('?') {
format!("{url}&rnd={rnd}")
} else {
format!("{url}?rnd={rnd}")
}
/// Extract the license_code from the KVS player config in the page HTML.
/// The license_code property holds a 16-char string starting with '$'.
fn extract_license_code(html: &str) -> Option<String> {
let re = Regex::new(r"license_code:\s*'(\$[0-9]{15})'").ok()?;
Some(re.captures(html)?[1].to_string())
}
// ── Page parser ────────────────────────────────────────────────────────────
//
// 1. Extracts license_code: any config key whose name contains "code"
// (not at position 0) and whose value is exactly 16 chars matching
// the IIFE's own detection logic.
// 2. Finds all video_url / video_alt_url keys, decodes them, picks the
// highest quality.
fn extract_video_from_player_js(text: &str) -> Option<String> {
// Extract license_code from the player config.
// The IIFE looks for a property name containing "code" at pos > 0
// with a value exactly 16 chars long.
let license_code = Regex::new(
r#"[a-z_$][a-z0-9_$]*code[a-z0-9_$]*\s*:\s*'([^']{16})'"#,
)
.ok()
.and_then(|re| re.captures(text))
.and_then(|cap| cap.get(1))
.map(|m| m.as_str().to_string())
.unwrap_or_else(|| Self::LICENSE_CODE.to_string());
let pair_regex = Regex::new(
r#"(?P<key>video_url\d*|video_alt_url\d*(?:_text)?):\s*'(?P<value>[^']+)'"#,
)
.ok()?;
let mut data: HashMap<String, String> = HashMap::new();
for cap in pair_regex.captures_iter(text) {
data.insert(cap["key"].to_string(), cap["value"].to_string());
}
let mut sources: Vec<(u32, String)> = Vec::new();
for (key, value) in &data {
if !(key.starts_with("video_url") || key.starts_with("video_alt_url")) {
continue;
}
if key.ends_with("_text") {
continue;
}
let quality_key = format!("{key}_text");
let quality = data
.get(&quality_key)
.and_then(|v| v.replace('p', "").parse::<u32>().ok())
.unwrap_or(0);
let decoded = match Self::decode_function0_url(value, &license_code) {
Some(v) => v,
None => continue,
/// Extract all 'function/…' video URLs from the KVS player config in the page HTML.
/// Returns them in document order (lowest to highest quality for pimpbunny).
fn extract_video_urls(html: &str) -> Vec<String> {
let Ok(re) = Regex::new(r"video(?:_alt)?_url\d*:\s*'(function/[^']+)'") else {
return vec![];
};
sources.push((quality, Self::append_rnd(&decoded)));
re.captures_iter(html)
.map(|cap| cap[1].to_string())
.collect()
}
sources.sort_by(|a, b| b.0.cmp(&a.0));
sources.first().map(|(_, u)| u.clone())
/// Returns true if the URL appears to serve video content (not an "access denied" response).
async fn url_is_accessible(url: &str, requester: &mut Requester) -> bool {
// Range: bytes=0-1 keeps the response body tiny while still probing auth.
match requester
.get_raw_with_headers(
url,
vec![("Range".to_string(), "bytes=0-1".to_string())],
)
.await
{
Ok(resp) => {
let s = resp.status().as_u16();
// 200 / 206 = success; 301/302/307 = redirect (follows, so we see final status)
// Treat anything that isn't a client-error 4xx as accessible.
s < 400 || s == 416 // 416 = Range Not Satisfiable means the server accepted auth
}
Err(_) => false,
}
}
// Fallback for standard JSON-LD if they ever use it again
fn extract_stream_url_from_html(text: &str) -> Option<String> {
Regex::new(r#""contentUrl"\s*:\s*"([^"]+)""#)
.ok()?
.captures(text)
.and_then(|captures| captures.get(1))
.map(|value| value.as_str().trim().to_string())
/// Try to decode the video URL using the KVS algorithm extracted from kt_player.js.
/// Returns the decoded URL if the server accepts it, None otherwise.
async fn try_decode(detail_url: &str, requester: &mut Requester) -> Option<String> {
let html = requester.get(detail_url, None).await.ok()?;
let license_code = Self::extract_license_code(&html)?;
let encoded_urls = Self::extract_video_urls(&html);
if encoded_urls.is_empty() {
return None;
}
// Attempt highest quality first (last URL in the config = highest non-redirect quality).
for encoded_url in encoded_urls.iter().rev() {
let Some(decoded) = Self::decode_encoded_url(encoded_url, &license_code) else {
continue;
};
if Self::url_is_accessible(&decoded, requester).await {
return Some(decoded);
}
}
None
}
}
impl crate::proxies::Proxy for PimpbunnyProxy {
async fn get_video_url(&self, url: String, requester: web::types::State<Requester>) -> String {
async fn get_video_url(
&self,
url: String,
requester: web::types::State<crate::util::requester::Requester>,
) -> String {
let Some(detail_url) = Self::normalize_detail_url(&url) else {
return String::new();
};
let mut requester = requester.get_ref().clone();
let mut req = requester.get_ref().clone();
Self::warm_root_session(&mut requester).await;
if let Some(video_url) = Self::try_decode(&detail_url, &mut req).await {
return video_url;
}
let headers = Self::headers_with_cookies(&requester, &detail_url, &detail_url);
let text = match requester
.get_with_headers(&detail_url, headers, Some(Version::HTTP_2))
// Fall back to browser-based extraction when the decoder fails or the decoded
// URL is rejected (access denied).
browser::wait_for_src(&detail_url, "//video[@src]")
.await
{
Ok(text) => text,
Err(_) => return String::new(),
};
Self::extract_video_from_player_js(&text)
.or_else(|| Self::extract_stream_url_from_html(&text))
.unwrap_or_default()
}
}
@@ -307,87 +221,103 @@ mod tests {
use super::*;
#[test]
fn test_generate_key() {
// Deterministic: same license_code always produces the same 32-char key
let key = PimpbunnyProxy::generate_key(PimpbunnyProxy::LICENSE_CODE);
assert_eq!(key.len(), 32, "key must be 32 digits");
assert!(key.chars().all(|c| c.is_ascii_digit()), "key must be all digits");
fn test_normalize_adds_https_scheme() {
let url =
PimpbunnyProxy::normalize_detail_url("pimpbunny.com/videos/some-video/").unwrap();
assert!(url.starts_with("https://"));
}
#[test]
fn test_decode_function0_url() {
// Example taken from live player config, verified against browser output
let input = "function/0/https://pimpbunny.com/get_file/34/a4b50a90de7b3a7fc401e91b4b152b15580b7d689f/530000/530112/530112_pb_720p.mp4/";
let expected = "https://pimpbunny.com/get_file/34/b54c19bdbf5b7a3b4492111aa70e5e00580b7d689f/530000/530112/530112_pb_720p.mp4/";
let decoded = PimpbunnyProxy::decode_function0_url(input, PimpbunnyProxy::LICENSE_CODE)
.expect("decode failed");
assert_eq!(decoded, expected);
}
#[test]
fn test_decode_canonical_example() {
// The original known-good example used to verify the algorithm
let input = "function/0/https://pimpbunny.com/get_file/40/7e4df9f504c7ec5b02b0101ed28edfa4c687186454/564000/564161/564161_720p.mp4/";
let expected = "https://pimpbunny.com/get_file/40/fde01fe04b42c7e72d0d0a8c95b1e45fc687186454/564000/564161/564161_720p.mp4/";
let decoded = PimpbunnyProxy::decode_function0_url(input, PimpbunnyProxy::LICENSE_CODE)
.expect("decode failed");
assert_eq!(decoded, expected);
}
#[test]
fn test_append_rnd() {
let url = "https://example.com/video.mp4";
let result = PimpbunnyProxy::append_rnd(url);
assert!(result.starts_with(url));
assert!(result.contains("?rnd="));
}
#[test]
fn test_append_rnd_existing_query() {
let url = "https://example.com/video.mp4?foo=bar";
let result = PimpbunnyProxy::append_rnd(url);
assert!(result.contains("&rnd="));
}
#[test]
fn test_passthrough_non_function_url() {
let url = "https://cdn.example.com/video.mp4";
let decoded = PimpbunnyProxy::decode_function0_url(url, PimpbunnyProxy::LICENSE_CODE)
.expect("passthrough failed");
assert_eq!(decoded, url);
}
#[test]
fn test_extract_highest_quality() {
let html = r#"
<script>
var t123456789abcdef = {
license_code: '$576262819011919',
video_url: 'function/0/https://pimpbunny.com/get_file/34/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/530000/530112/530112_pb_360p.mp4/',
video_url_text: '360p',
video_alt_url: 'function/0/https://pimpbunny.com/get_file/34/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb/530000/530112/530112_pb_480p.mp4/',
video_alt_url_text: '480p',
video_alt_url2: 'function/0/https://pimpbunny.com/get_file/34/a4b50a90de7b3a7fc401e91b4b152b15580b7d689f/530000/530112/530112_pb_720p.mp4/',
video_alt_url2_text: '720p'
};
</script>
"#;
let result = PimpbunnyProxy::extract_video_from_player_js(html)
.expect("no url extracted");
assert!(result.contains("530112_pb_720p.mp4"), "did not choose highest quality");
assert!(
result.contains("b54c19bdbf5b7a3b4492111aa70e5e00580b7d689f"),
"did not decode scrambled token"
fn test_normalize_passes_through_valid_https() {
let url = "https://pimpbunny.com/videos/some-video/";
assert_eq!(
PimpbunnyProxy::normalize_detail_url(url).unwrap(),
url
);
assert!(result.contains("?rnd="), "missing rnd parameter");
}
#[test]
fn test_normalize_rejects_screenshots_path() {
let url =
"https://pimpbunny.com/contents/videos_screenshots/473000/473894/preview.jpg";
assert!(PimpbunnyProxy::normalize_detail_url(url).is_none());
}
#[test]
fn test_normalize_rejects_other_host() {
assert!(PimpbunnyProxy::normalize_detail_url("https://evil.com/videos/x/").is_none());
}
#[test]
fn test_normalize_rejects_http() {
assert!(PimpbunnyProxy::normalize_detail_url(
"http://pimpbunny.com/videos/some-video/"
)
.is_none());
}
#[test]
fn test_www_subdomain_is_allowed() {
let url = "https://www.pimpbunny.com/videos/some-video/";
assert!(PimpbunnyProxy::normalize_detail_url(url).is_some());
}
#[test]
fn test_generate_key() {
let key = PimpbunnyProxy::generate_key("$576262819011919").unwrap();
assert_eq!(key, "68732171060626281736958625345345");
assert_eq!(key.len(), 32);
}
#[test]
fn test_shuffle_hash() {
let key = "68732171060626281736958625345345";
let input = "7e4df9f504c7ec5b02b0101ed28edfa4";
let output = PimpbunnyProxy::shuffle_hash(input, key);
assert_eq!(output, "fde01fe04b42c7e72d0d0a8c95b1e45f");
}
#[test]
fn test_decode_encoded_url() {
let encoded = "function/0/https://pimpbunny.com/get_file/40/7e4df9f504c7ec5b02b0101ed28edfa4c687186454/564000/564161/564161_720p.mp4/";
let license = "$576262819011919";
let decoded = PimpbunnyProxy::decode_encoded_url(encoded, license).unwrap();
assert!(decoded.starts_with(
"https://pimpbunny.com/get_file/40/fde01fe04b42c7e72d0d0a8c95b1e45fc687186454/564000/564161/564161_720p.mp4/"
));
assert!(decoded.contains("?rnd="));
}
#[test]
fn test_extract_license_code() {
let html = r#"license_code: '$576262819011919', lrc: '77028724',"#;
assert_eq!(
PimpbunnyProxy::extract_license_code(html).unwrap(),
"$576262819011919"
);
}
#[test]
fn test_extract_video_urls() {
let html = r#"
video_url: 'function/0/https://pimpbunny.com/get_file/34/abc/530000/530112/530112_pb_360p.mp4/',
video_alt_url: 'function/0/https://pimpbunny.com/get_file/34/def/530000/530112/530112_pb_480p.mp4/',
video_alt_url2: 'function/0/https://pimpbunny.com/get_file/34/ghi/530000/530112/530112_pb_720p.mp4/',
video_alt_url5: 'https://pimpbunny.com/?upgrade=true',
"#;
let urls = PimpbunnyProxy::extract_video_urls(html);
assert_eq!(urls.len(), 3);
assert!(urls[0].starts_with("function/0/"));
assert!(urls[2].contains("720p"));
}
#[tokio::test]
async fn test_live_extract_charlotte_sins() {
let url = "https://pimpbunny.com/videos/charlotte-sins-gets-drilled-by-jax-slayher/";
let src = browser::wait_for_src(url, "//video[@src]").await;
println!("video src: {:?}", src);
assert!(src.is_some(), "expected a video URL but got None");
let src = src.unwrap();
assert!(src.starts_with("http"), "expected http URL, got: {src}");
}
}

111
src/proxies/thaiporntv.rs Normal file
View File

@@ -0,0 +1,111 @@
use ntex::web;
use crate::util::requester::Requester;
use crate::videos::VideoFormat;
use crate::providers::report_provider_error_background;
const CHANNEL_ID: &str = "thaiporntv";
const BASE_URL: &str = "https://www.thaiporntv.com";
#[derive(Debug, Clone)]
pub struct ThaipornTvProxy {}
impl ThaipornTvProxy {
pub fn new() -> Self {
ThaipornTvProxy {}
}
fn decode_data_enc(encoded_data: &str) -> Option<Vec<VideoFormat>> {
let cleaned_data = encoded_data.replace("-", "+").replace("_", "/");
let padded_data = format!("{:<pad$}", cleaned_data, pad = (cleaned_data.len() + 3) & !3);
let decoded_bytes = match base64::decode(&padded_data) {
Ok(bytes) => bytes,
Err(e) => {
report_provider_error_background(CHANNEL_ID, "proxy.decode_data_enc.base64", &format!("error={e}"));
return None;
}
};
if decoded_bytes.is_empty() {
report_provider_error_background(CHANNEL_ID, "proxy.decode_data_enc.empty", "decoded bytes are empty");
return None;
}
let key = decoded_bytes[0];
let decrypted_bytes: Vec<u8> = decoded_bytes[1..]
.iter()
.map(|&b| b ^ key)
.collect();
let json_str = match String::from_utf8(decrypted_bytes) {
Ok(s) => s,
Err(e) => {
report_provider_error_background(CHANNEL_ID, "proxy.decode_data_enc.utf8", &format!("error={e}"));
return None;
}
};
let formats_json: serde_json::Value = match serde_json::from_str(&json_str) {
Ok(value) => value,
Err(e) => {
report_provider_error_background(CHANNEL_ID, "proxy.decode_data_enc.json_parse", &format!("error={e}"));
return None;
}
};
let Some(formats_array) = formats_json.as_array() else {
report_provider_error_background(CHANNEL_ID, "proxy.decode_data_enc.not_array", "JSON is not an array");
return None;
};
let mut formats = Vec::new();
for format_val in formats_array {
let Some(u) = format_val.get("u").and_then(|v| v.as_str()) else { continue; };
let Some(q) = format_val.get("q").and_then(|v| v.as_str()) else { continue; };
let mut format = VideoFormat::new(
u.to_string(),
q.to_string(),
"application/x-mpegURL".to_string(),
);
// Add referer to the format
format.add_http_header("Referer".to_string(), BASE_URL.to_string());
formats.push(format);
}
Some(formats)
}
pub async fn get_video_url(
&self,
url_path: String, // Expects something like videos/2019/thai-student-have-sex-at-taxi-1658/
requester: web::types::State<Requester>,
) -> String {
let mut requester = requester.get_ref().clone();
let video_detail_url = format!("{}/{}", BASE_URL, url_path.trim_start_matches('/'));
let html = match requester.get(&video_detail_url, None).await {
Ok(text) => text,
Err(e) => {
report_provider_error_background(CHANNEL_ID, "proxy.get_video_url.fetch_html", &format!("url={}; error={}", video_detail_url, e));
return String::new();
}
};
let document = scraper::Html::parse_document(&html);
let video_element_selector = scraper::Selector::parse("video[data-enc]").unwrap();
if let Some(video_element) = document.select(&video_element_selector).next() {
if let Some(data_enc) = video_element.value().attr("data-enc") {
if let Some(formats) = Self::decode_data_enc(data_enc) {
// Return the URL of the best quality format, or the first one if quality is not a factor.
// For m3u8, we typically just return the m3u8 playlist URL.
if let Some(format) = formats.first() {
return format.url.clone();
}
}
}
}
String::new()
}
}

View File

@@ -15,6 +15,7 @@ use crate::proxies::sxyprn::SxyprnProxy;
use crate::proxies::vjav::VjavProxy;
use crate::proxies::vidara::VidaraProxy;
use crate::proxies::lulustream::LulustreamProxy;
use crate::proxies::thaiporntv::ThaipornTvProxy;
use crate::proxies::*;
use crate::util::requester::Requester;
@@ -125,10 +126,15 @@ pub fn config(cfg: &mut web::ServiceConfig) {
.route(web::get().to(crate::proxies::porndishthumb::get_image)),
);
cfg.service(
web::resource("/pornhub-thumb/{endpoint}*")
web::resource("/proxy/pornhub-thumb/{endpoint}*")
.route(web::post().to(crate::proxies::pornhubthumb::get_image))
.route(web::get().to(crate::proxies::pornhubthumb::get_image)),
);
cfg.service(
web::resource("/proxy/thaiporntv/{tail:.*}")
.route(web::post().to(proxy2redirect))
.route(web::get().to(proxy2redirect)),
);
}
async fn proxy2redirect(
@@ -163,6 +169,7 @@ fn get_proxy(proxy: &str) -> Option<AnyProxy> {
"porndish" => Some(AnyProxy::Porndish(PorndishProxy::new())),
"spankbang" => Some(AnyProxy::Spankbang(SpankbangProxy::new())),
"lulustream" => Some(AnyProxy::Lulustream(LulustreamProxy::new())),
"thaiporntv" => Some(AnyProxy::ThaipornTv(ThaipornTvProxy::new())),
_ => None,
}
}

41
src/util/browser.rs Normal file
View File

@@ -0,0 +1,41 @@
use crate::util::{playwright, webdriver};
use std::time::Duration;
use tokio::sync::mpsc;
const DEFAULT_TIMEOUT: Duration = Duration::from_secs(30);
/// Races chromiumoxide (webdriver) and Playwright against each other.
/// Navigates to `url` and returns the `src` attribute of the first element
/// matched by `xpath` — whichever backend resolves it first wins.
pub async fn wait_for_src(url: &str, xpath: &str) -> Option<String> {
wait_for_attribute(url, xpath, "src", DEFAULT_TIMEOUT).await
}
/// Like [`wait_for_src`] but lets the caller choose the attribute name and timeout.
pub async fn wait_for_attribute(
url: &str,
xpath: &str,
attribute: &str,
wait_timeout: Duration,
) -> Option<String> {
// Buffer of 1: the first send wins; the losing task's send is silently
// dropped when the receiver is gone.
let (tx, mut rx) = mpsc::channel::<String>(1);
let (u, x, a) = (url.to_string(), xpath.to_string(), attribute.to_string());
let tx1 = tx.clone();
tokio::spawn(async move {
if let Some(src) = webdriver::wait_for_attribute(&u, &x, &a, wait_timeout).await {
let _ = tx1.send(src).await;
}
});
let (u, x, a) = (url.to_string(), xpath.to_string(), attribute.to_string());
tokio::spawn(async move {
if let Some(src) = playwright::wait_for_attribute(&u, &x, &a, wait_timeout).await {
let _ = tx.send(src).await;
}
});
rx.recv().await
}

View File

@@ -9,7 +9,10 @@ pub mod hoster_proxy;
pub mod proxy;
pub mod requester;
pub mod time;
pub mod browser;
pub mod dean_edwards;
pub mod playwright;
pub mod webdriver;
pub fn parse_abbreviated_number(s: &str) -> Option<u32> {
let s = s.trim();

128
src/util/playwright.rs Normal file
View File

@@ -0,0 +1,128 @@
use playwright::Playwright;
use playwright::api::Page;
use std::path::Path;
use std::time::Duration;
use tokio::time::{sleep, timeout};
const POLL_INTERVAL: Duration = Duration::from_millis(500);
const DEFAULT_TIMEOUT: Duration = Duration::from_secs(30);
const CHROME_ARGS: &[&str] = &[
"--no-sandbox",
"--disable-dev-shm-usage",
"--no-first-run",
"--no-default-browser-check",
"--disable-background-timer-throttling",
"--disable-backgrounding-occluded-windows",
"--disable-renderer-backgrounding",
"--disable-ipc-flooding-protection",
"--disable-hang-monitor",
"--disable-gpu",
"--disable-software-rasterizer",
"--disable-accelerated-2d-canvas",
"--disable-webgl",
"--disable-3d-apis",
"--blink-settings=imagesEnabled=false",
"--mute-audio",
"--disable-background-networking",
"--disable-client-side-phishing-detection",
"--disable-component-update",
"--disable-domain-reliability",
"--disable-sync",
"--metrics-recording-only",
"--safebrowsing-disable-auto-update",
"--disable-extensions",
"--disable-translate",
"--disable-default-apps",
"--disable-features=site-per-process",
"--password-store=basic",
"--use-mock-keychain",
];
/// Launches a headless Chrome browser via Playwright, navigates to `url`, and
/// polls until the element matched by `xpath` has a non-empty `src` attribute.
pub async fn wait_for_src(url: &str, xpath: &str) -> Option<String> {
wait_for_attribute(url, xpath, "src", DEFAULT_TIMEOUT).await
}
/// Like [`wait_for_src`] but lets the caller choose the attribute name and timeout.
pub async fn wait_for_attribute(
url: &str,
xpath: &str,
attribute: &str,
wait_timeout: Duration,
) -> Option<String> {
// _pw must be kept alive — dropping it kills the driver process.
let (_pw, page) = open_page(url).await?;
let xpath_js = serde_json::to_string(xpath).unwrap_or_default();
let attr_js = serde_json::to_string(attribute).unwrap_or_default();
let js = format!(
r#"() => {{
var r = document.evaluate({xpath_js}, document, null,
XPathResult.FIRST_ORDERED_NODE_TYPE, null);
var el = r.singleNodeValue;
if (!el) return '';
return el.getAttribute({attr_js}) || el[{attr_js}] || '';
}}"#
);
let result = timeout(wait_timeout, async {
loop {
match page.evaluate::<(), String>(&js, ()).await {
Ok(s) if !s.is_empty() => return Some(s),
Ok(_) => {}
Err(e) => eprintln!("[playwright] evaluate error: {e}"),
}
sleep(POLL_INTERVAL).await;
}
})
.await
.ok()
.flatten();
result
}
// ── internals ─────────────────────────────────────────────────────────────────
async fn open_page(url: &str) -> Option<(Playwright, Page)> {
let pw = Playwright::initialize()
.await
.map_err(|e| eprintln!("[playwright] init error: {e}"))
.ok()?;
let args: Vec<String> = CHROME_ARGS.iter().map(|s| s.to_string()).collect();
let browser = pw
.chromium()
.launcher()
.executable(Path::new("/usr/bin/google-chrome"))
.headless(true)
.args(&args)
.launch()
.await
.map_err(|e| eprintln!("[playwright] launch error: {e}"))
.ok()?;
let context = browser
.context_builder()
.build()
.await
.map_err(|e| eprintln!("[playwright] context error: {e}"))
.ok()?;
let page = context
.new_page()
.await
.map_err(|e| eprintln!("[playwright] new_page error: {e}"))
.ok()?;
page.goto_builder(url)
.goto()
.await
.map_err(|e| eprintln!("[playwright] goto error: {e}"))
.ok()?;
Some((pw, page))
}

130
src/util/webdriver.rs Normal file
View File

@@ -0,0 +1,130 @@
use chromiumoxide::{Browser, BrowserConfig};
use futures::StreamExt;
use std::time::Duration;
use tokio::time::{sleep, timeout};
const POLL_INTERVAL: Duration = Duration::from_millis(500);
const DEFAULT_TIMEOUT: Duration = Duration::from_secs(30);
/// Launches a headless Chrome browser, navigates to `url`, and polls until the
/// element matched by `xpath` has a non-empty `src` attribute, then returns it.
///
/// Equivalent to Selenium's:
/// `WebDriverWait(driver, 30).until(lambda d: d.find_element(By.XPATH, xpath).get_attribute("src"))`
pub async fn wait_for_src(url: &str, xpath: &str) -> Option<String> {
wait_for_attribute(url, xpath, "src", DEFAULT_TIMEOUT).await
}
/// Like [`wait_for_src`] but lets the caller choose the attribute name and timeout.
pub async fn wait_for_attribute(
url: &str,
xpath: &str,
attribute: &str,
wait_timeout: Duration,
) -> Option<String> {
let (mut browser, page) = open_page(url).await?;
// Embed xpath and attribute as JSON strings so they are safely quoted inside
// the JS template — avoids any injection from untrusted caller values.
let xpath_js = serde_json::to_string(xpath).unwrap_or_default();
let attr_js = serde_json::to_string(attribute).unwrap_or_default();
let js = format!(
r#"(function() {{
var r = document.evaluate({xpath_js}, document, null,
XPathResult.FIRST_ORDERED_NODE_TYPE, null);
var el = r.singleNodeValue;
if (!el) return '';
return el.getAttribute({attr_js}) || el[{attr_js}] || '';
}})()"#
);
let result = timeout(wait_timeout, async {
loop {
match page.evaluate(js.as_str()).await {
Ok(val) => {
if let Ok(s) = val.into_value::<String>() {
if !s.is_empty() {
return Some(s);
}
}
}
Err(e) => eprintln!("[webdriver] evaluate error: {e}"),
}
sleep(POLL_INTERVAL).await;
}
})
.await
.ok()
.flatten();
let _ = browser.close().await;
result
}
// ── internals ─────────────────────────────────────────────────────────────────
async fn open_page(
url: &str,
) -> Option<(Browser, chromiumoxide::Page)> {
let config = BrowserConfig::builder()
// ── headless & sandbox ────────────────────────────────────────
.arg("--headless=new")
.arg("--no-sandbox")
.arg("--disable-dev-shm-usage")
.arg("--no-first-run")
.arg("--no-default-browser-check")
// ── JS timer / scheduler: prevent Chrome from throttling
// background timers and renderers (critical for player init) ──
.arg("--disable-background-timer-throttling")
.arg("--disable-backgrounding-occluded-windows")
.arg("--disable-renderer-backgrounding")
.arg("--disable-ipc-flooding-protection")
.arg("--disable-hang-monitor")
// ── skip rendering work we don't need ────────────────────────
.arg("--disable-gpu")
.arg("--disable-software-rasterizer")
.arg("--disable-accelerated-2d-canvas")
.arg("--disable-webgl")
.arg("--disable-3d-apis")
.arg("--blink-settings=imagesEnabled=false")
.arg("--mute-audio")
// ── cut background network / update noise ────────────────────
.arg("--disable-background-networking")
.arg("--disable-client-side-phishing-detection")
.arg("--disable-component-update")
.arg("--disable-domain-reliability")
.arg("--disable-sync")
.arg("--metrics-recording-only")
.arg("--safebrowsing-disable-auto-update")
// ── misc overhead ─────────────────────────────────────────────
.arg("--disable-extensions")
.arg("--disable-translate")
.arg("--disable-default-apps")
.arg("--disable-features=site-per-process")
.arg("--password-store=basic")
.arg("--use-mock-keychain")
.build()
.map_err(|e| eprintln!("[webdriver] BrowserConfig error: {e}"))
.ok()?;
let (browser, mut handler) = Browser::launch(config)
.await
.map_err(|e| eprintln!("[webdriver] Browser launch error: {e}"))
.ok()?;
tokio::spawn(async move {
loop {
if handler.next().await.is_none() {
break;
}
}
});
let page = browser
.new_page(url)
.await
.map_err(|e| eprintln!("[webdriver] new_page error: {e}"))
.ok()?;
Some((browser, page))
}