diff --git a/Cargo.toml b/Cargo.toml index 96f8767..9577d5e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,7 +20,7 @@ ntex-files = "3" serde = "1.0.228" serde_json = "1.0.145" tokio = { version = "1.49", features = ["full"] } -wreq = { version = "5", features = ["cookies", "multipart", "json"] } +wreq = { version = "5.3.0", features = ["cookies", "multipart", "json"] } wreq-util = "2" percent-encoding = "2.3.2" capitalize = "0.3.4" @@ -37,6 +37,8 @@ lru = "0.18.0" rand = "0.10.0" chrono = "0.4.44" md5 = "0.8.0" +chromiumoxide = { version = "0.7", features = ["tokio-runtime"] } +playwright = "0.0.20" [lints.rust] warnings = "warn" diff --git a/build.rs b/build.rs index c00d5b6..5d3c296 100644 --- a/build.rs +++ b/build.rs @@ -296,6 +296,11 @@ const PROVIDERS: &[ProviderDef] = &[ module: "archivebate1", ty: "ArchivebateProvider", }, + ProviderDef { + id: "thaiporntv", + module: "thaiporntv", + ty: "ThaipornTvProvider", + }, ]; fn main() { diff --git a/docs/provider-catalog.md b/docs/provider-catalog.md index fcad3b6..e85774e 100644 --- a/docs/provider-catalog.md +++ b/docs/provider-catalog.md @@ -48,6 +48,7 @@ This is the current implementation inventory as of this snapshot of the repo. Us | `sextb` | `jav` | no | no | JAV family provider. | | `shooshtime` | `onlyfans` | no | yes | Redirect proxy plus dedicated media route. | | `spankbang` | `mainstream-tube` | no | yes | Best template for redirect proxy plus anti-bot fetches. | +| `thaiporntv` | `mainstream-tube` | no | yes | Decodes `data-enc` attribute for proxied HLS playback. | | `supjav` | `jav` | no | no | JAV/HLS and uploader-id examples. | | `sxyprn` | `mainstream-tube` | no | yes | Redirect proxy helper usage. | | `tnaflix` | `mainstream-tube` | no | no | Mainstream tube provider. | diff --git a/prompts/new-channel.md b/prompts/new-channel.md index 5f10b2d..7ab7cf4 100644 --- a/prompts/new-channel.md +++ b/prompts/new-channel.md @@ -1,4 +1,4 @@ -Implement a new Hottub provider for `archivebate1` at `https://archivebate1.com`. +Implement a new Hottub provider for `thaiporntv` at `https://www.thaiporntv.com`. You are working inside the Hottub Rust server. Your job is to add a functioning provider module that can survive handoff to another model with minimal guesswork. Do not stop at code generation. Carry the work through code, validation, and documentation updates. diff --git a/src/providers/thaiporntv.rs b/src/providers/thaiporntv.rs new file mode 100644 index 0000000..e178be4 --- /dev/null +++ b/src/providers/thaiporntv.rs @@ -0,0 +1,784 @@ +use crate::DbPool; +use crate::api::ClientVersion; +use crate::providers::{Provider, report_provider_error, report_provider_error_background, requester_or_default}; +use crate::status::*; +use crate::util::cache::VideoCache; +use crate::util::parse_abbreviated_number; +use crate::util::requester::Requester; +use crate::util::time::parse_time_to_seconds; +use crate::videos::{ServerOptions, VideoFormat, VideoItem}; +use async_trait::async_trait; +use base64::{engine::general_purpose, Engine}; +use chrono::{DateTime, Duration as ChronoDuration, NaiveDate, Utc}; +use error_chain::error_chain; +use futures::stream::{self, StreamExt}; +use htmlentity::entity::{ICodedDataTrait, decode}; +use percent_encoding::{NON_ALPHANUMERIC, utf8_percent_encode}; +use regex::Regex; +use scraper::{ElementRef, Html, Selector}; +use std::sync::{Arc, RwLock}; +use std::time::Duration as StdDuration; +use std::{thread, vec}; +use tokio::time::timeout; +use url::Url; +use wreq::Version; + +pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata = + crate::providers::ProviderChannelMetadata { + group_id: "mainstream-tube", + tags: &["thai", "asian", "amateur"], + }; + +error_chain! { + foreign_links { + Io(std::io::Error); + } + errors { + Parse(msg: String) { + description("parse error") + display("parse error: {}", msg) + } + } +} + +const BASE_URL: &str = "https://www.thaiporntv.com"; +const CHANNEL_ID: &str = "thaiporntv"; +const USER_AGENT: &str = + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36"; +const HTML_ACCEPT: &str = + "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8"; + +#[derive(Debug, Clone)] +pub struct ThaipornTvProvider { + url: String, + tags: Arc>>, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum ArchiveMode { + Latest, + Popular, + Commented, + Rated, + Longest, +} + +#[derive(Debug, Clone)] +enum Target { + Archive(ArchiveMode), + Search { + query: String, + }, + Tag { + slug: String, + }, +} + +impl ThaipornTvProvider { + pub fn new() -> Self { + let provider = Self { + url: BASE_URL.to_string(), + tags: Arc::new(RwLock::new(vec![FilterOption { + id: "all".to_string(), + title: "All".to_string(), + }])), + }; + provider.spawn_initial_load(); + provider + } + + fn spawn_initial_load(&self) { + let url = self.url.clone(); + let tags = Arc::clone(&self.tags); + + thread::spawn(move || { + let runtime = match tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + { + Ok(runtime) => runtime, + Err(error) => { + report_provider_error_background( + CHANNEL_ID, + "spawn_initial_load.runtime_build", + &error.to_string(), + ); + return; + } + }; + + runtime.block_on(async move { + if let Err(error) = Self::load_tags(&url, Arc::clone(&tags)).await { + report_provider_error_background( + CHANNEL_ID, + "load_tags", + &error.to_string(), + ); + } + }); + }); + } + + fn build_channel(&self, _clientversion: ClientVersion) -> Channel { + let tags = self.tags.read().map(|value| value.clone()).unwrap_or_default(); + + Channel { + id: CHANNEL_ID.to_string(), + name: "THAIPornTV".to_string(), + description: "Free Thai Porn Videos & Asian Sex Tube.".to_string(), + premium: false, + favicon: "https://www.google.com/s2/favicons?sz=64&domain=thaiporntv.com".to_string(), + status: "active".to_string(), + categories: tags.iter().map(|value| value.title.clone()).collect(), + options: vec![ + ChannelOption { + id: "sort".to_string(), + title: "Sort".to_string(), + description: "Sort the videos".to_string(), + systemImage: "list.number".to_string(), + colorName: "blue".to_string(), + options: vec![ + FilterOption { + id: "new".to_string(), + title: "Most Recent".to_string(), + }, + FilterOption { + id: "popular".to_string(), + title: "Most Viewed".to_string(), + }, + FilterOption { + id: "commented".to_string(), + title: "Most Commented".to_string(), + }, + FilterOption { + id: "rated".to_string(), + title: "Top Rated".to_string(), + }, + FilterOption { + id: "longest".to_string(), + title: "Longest".to_string(), + }, + ], + multiSelect: false, + }, + ChannelOption { + id: "filter".to_string(), + title: "Tags".to_string(), + description: "Video tags.".to_string(), + systemImage: "tag.fill".to_string(), + colorName: "green".to_string(), + options: tags, + multiSelect: false, + }, + ], + nsfw: true, + cacheDuration: Some(1800), + } + } + + fn selector(value: &str) -> Result { + Selector::parse(value) + .map_err(|error| Error::from(format!("selector `{value}` parse failed: {error}"))) + } + + fn regex(value: &str) -> Result { + Regex::new(value).map_err(|error| Error::from(format!("regex `{value}` failed: {error}"))) + } + + fn collapse_whitespace(text: &str) -> String { + text.split_whitespace().collect::>().join(" ") + } + + fn decode_html_entities(text: &str) -> String { + decode(text.as_bytes()) + .to_string() + .unwrap_or_else(|_| text.to_string()) + .replace('\u{a0}', " ") + .trim() + .to_string() + } + + fn absolute_url(&self, value: &str) -> String { + if value.starts_with("http://") || value.starts_with("https://") { + return value.to_string(); + } + format!( + "{}/{}", + self.url.trim_end_matches('/'), + value.trim_start_matches('/') + ) + } + + fn html_headers(&self, referer: &str) -> Vec<(String, String)> { + vec![ + ("Referer".to_string(), referer.to_string()), + ("User-Agent".to_string(), USER_AGENT.to_string()), + ("Accept".to_string(), HTML_ACCEPT.to_string()), + ("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()), + ] + } + + async fn fetch_html( + &self, + requester: &mut Requester, + url: &str, + referer: &str, + ) -> Result { + requester + .get_with_headers(url, self.html_headers(referer), Some(Version::HTTP_11)) + .await + .map_err(|error| Error::from(format!("request failed for {url}: {error}"))) + } + + fn push_unique(target: &Arc>>, item: FilterOption) { + if item.id.is_empty() || item.title.is_empty() { + return; + } + if let Ok(mut values) = target.write() { + if !values + .iter() + .any(|existing| existing.id == item.id || existing.title == item.title) + { + values.push(item); + } + } + } + + async fn load_tags(base_url: &str, tags: Arc>>) -> Result<()> { + let mut requester = Requester::new(); + let provider = Self { + url: base_url.to_string(), + tags: Arc::clone(&tags), + }; + let html = provider.fetch_html(&mut requester, &format!("{}/tags/", base_url), &format!("{}/", base_url)).await?; + let document = Html::parse_document(&html); + let selector = Self::selector("a[href*='/tags/']")?; + for element in document.select(&selector) { + let Some(href) = element.value().attr("href") else { + continue; + }; + let title = Self::decode_html_entities(&element.text().collect::()); + let re = Regex::new(r"^(.+?)\s+\d+$").unwrap(); // Remove count from tag title + let title = if let Some(captures) = re.captures(&title) { + captures.get(1).unwrap().as_str().to_string() + } else { + title + }; + if title.is_empty() { + continue; + } + Self::push_unique( + &tags, + FilterOption { + id: href.to_string(), + title, + }, + ); + } + Ok(()) + } + + fn archive_from_sort(sort: &str) -> ArchiveMode { + match sort { + "popular" => ArchiveMode::Popular, + "commented" => ArchiveMode::Commented, + "rated" => ArchiveMode::Rated, + "longest" => ArchiveMode::Longest, + _ => ArchiveMode::Latest, + } + } + + fn resolve_option_target(&self, options: &ServerOptions, sort: &str) -> Target { + if let Some(value) = options.filter.as_deref() { + if let Some(target) = self.find_tag_target_in_options(value) { + return target; + } + } + Target::Archive(Self::archive_from_sort(sort)) + } + + fn resolve_query_target(&self, query: &str) -> Target { + if let Some(target) = self.find_tag_target_in_options(query) { + return target; + } + Target::Search { + query: query.trim().to_string(), + } + } + + fn find_tag_target_in_options( + &self, + value: &str, + ) -> Option { + let normalized = value.trim().to_lowercase(); + let tags = self.tags.read().ok()?; + let option = tags.iter().find(|item| { + item.id.eq_ignore_ascii_case(value) || item.title.trim().to_lowercase() == normalized + })?; + self.target_from_filter_id(&option.id) + } + + fn target_from_filter_id(&self, id: &str) -> Option { + if id.contains("/tags/") { + let url = Url::parse(&self.absolute_url(id)).ok()?; + let path_segments = url.path_segments()?; + let slug = path_segments.last()?.trim_end_matches('/').to_string(); + return Some(Target::Tag { slug }); + } + None + } + + fn build_url_for_target(&self, target: &Target, page: u32) -> String { + match target { + Target::Archive(mode) => self.build_archive_url(*mode, page), + Target::Search { query } => self.build_search_url(query, page), + Target::Tag { slug } => self.build_tag_url(slug, page), + } + } + + fn build_archive_url(&self, mode: ArchiveMode, page: u32) -> String { + let base_path = match mode { + ArchiveMode::Latest => "recent", + ArchiveMode::Popular => "popular", + ArchiveMode::Commented => "commented", + ArchiveMode::Rated => "rated", + ArchiveMode::Longest => "longest", + }; + if page <= 1 { + format!("{}/{}/", self.url, base_path) + } else { + format!("{}/{}/page/{}/", self.url, base_path, page) + } + } + + fn build_search_url(&self, query: &str, page: u32) -> String { + let encoded_query = utf8_percent_encode(query, NON_ALPHANUMERIC).to_string(); + if page <= 1 { + format!("{}/search/?q={}", self.url, encoded_query) + } else { + format!("{}/search/?q={}&page={}", self.url, encoded_query, page) + } + } + + fn build_tag_url(&self, slug: &str, page: u32) -> String { + let encoded_slug = utf8_percent_encode(slug, NON_ALPHANUMERIC).to_string(); + if page <= 1 { + format!("{}/tags/{}/", self.url, encoded_slug) + } else { + format!("{}/tags/{}/page/{}/", self.url, encoded_slug, page) + } + } + + fn decode_data_enc(encoded_data: &str) -> Result> { + let cleaned_data = encoded_data.replace("-", "+").replace("_", "/"); + let padded_data = format!("{: = decoded_bytes[1..] + .iter() + .map(|&b| b ^ key) + .collect(); + + let json_str = String::from_utf8(decrypted_bytes) + .map_err(|e| Error::from(format!("UTF-8 decode failed: {e}")))?; + + let formats_json: serde_json::Value = serde_json::from_str(&json_str) + .map_err(|e| Error::from(format!("JSON parse failed: {e}")))?; + + let Some(formats_array) = formats_json.as_array() else { + return Err(Error::from("JSON is not an array")); + }; + + let mut formats = Vec::new(); + for format_val in formats_array { + let Some(u) = format_val.get("u").and_then(|v| v.as_str()) else { continue; }; + let Some(q) = format_val.get("q").and_then(|v| v.as_str()) else { continue; }; + + let mut format = VideoFormat::new( + u.to_string(), + q.to_string(), + "application/x-mpegURL".to_string(), // Assuming m3u8 + ); + // Add referer to the format + format.add_http_header("Referer".to_string(), BASE_URL.to_string()); + formats.push(format); + } + + Ok(formats) + } + + fn parse_card( + &self, + card: ElementRef<'_>, + _proxy_base_url: &str, + ) -> Option { + let id_selector = Self::selector("a[href*='/videos/']").ok()?; + let title_selector = Self::selector("a[href*='/videos/']").ok()?; + let thumb_selector = Self::selector("img").ok()?; + let duration_selector = Self::selector("div.duration").ok()?; + let views_selector = Self::selector("div.views").ok()?; + let uploaded_at_selector = Self::selector("div.date").ok()?; + let tag_selector = Self::selector("a[href*='/tags/']").ok()?; + + let href_element = card.select(&id_selector).next()?; + let href = href_element.value().attr("href")?.to_string(); + + let re = Regex::new(r"/videos/\d{4}/[^/-]+-(\d+)/$").unwrap(); + let captures = re.captures(&href)?; + let id = captures.get(1)?.as_str().to_string(); + + let title = card.select(&title_selector).next() + .and_then(|e| e.value().attr("title")) + .map(Self::decode_html_entities) + .unwrap_or_else(|| { + card.select(&thumb_selector).next() + .and_then(|e| e.value().attr("alt")) + .map(Self::decode_html_entities) + .unwrap_or_default() + }); + + let thumb = card.select(&thumb_selector).next() + .and_then(|e| e.value().attr("src")) + .map(|s| self.absolute_url(s)) + .unwrap_or_default(); + + let duration_text = card.select(&duration_selector).next() + .map(|e| Self::collapse_whitespace(&e.text().collect::())) + .unwrap_or_default(); + let duration = parse_time_to_seconds(&duration_text).unwrap_or(0) as u32; + + let views = card.select(&views_selector).next() + .map(|e| Self::collapse_whitespace(&e.text().collect::())) + .and_then(|s| s.strip_suffix(" views").map(|s| parse_abbreviated_number(s))) + .flatten(); + + let uploaded_at_text = card.select(&uploaded_at_selector).next() + .map(|e| Self::collapse_whitespace(&e.text().collect::())) + .unwrap_or_default(); + let uploaded_at = NaiveDate::parse_from_str(&uploaded_at_text, "%d %b %Y") + .ok() + .and_then(|date| { + date.and_hms_opt(0, 0, 0) + .map(|dt| DateTime::::from_naive_utc_and_offset(dt, Utc).timestamp() as u64) + }); + + let tags: Vec = card.select(&tag_selector) + .filter_map(|e| e.value().attr("href")) + .filter_map(|link_href| { + Url::parse(&self.absolute_url(link_href)) + .ok() + .and_then(|url| url.path_segments().map(|segments| segments.map(ToString::to_string).collect::>())) + .and_then(|segments_vec| segments_vec.last().cloned()) + .map(|s| Self::decode_html_entities(&s).trim_end_matches('/').to_string()) + }) + .collect(); + + + let mut item = VideoItem::new( + id, + title, + self.absolute_url(&href), + CHANNEL_ID.to_string(), + thumb, + duration, + ); + + if let Some(views) = views { item = item.views(views); } + if let Some(uploaded_at) = uploaded_at { item = item.uploaded_at(uploaded_at); } + if !tags.is_empty() { item = item.tags(tags); } + + Some(item) + } + + fn get_video_items_from_html(&self, html: String, proxy_base_url: &str) -> Result> { + let document = Html::parse_document(&html); + let card_selector = Self::selector("div.video-list-item")?; + + let mut items = Vec::new(); + for card in document.select(&card_selector) { + if let Some(item) = self.parse_card(card, proxy_base_url) { + items.push(item); + } + } + Ok(items) + } + + async fn enrich_video(&self, mut item: VideoItem, options: &ServerOptions) -> Result { + let mut requester = requester_or_default(options, CHANNEL_ID, "enrich_video"); + let detail_fetch = timeout( + StdDuration::from_secs(6), + self.fetch_html(&mut requester, &item.url, &item.url), + ) + .await + .map_err(|_| Error::from(format!("detail request timed out for {}", item.url)))??; + + let document = Html::parse_document(&detail_fetch); + let video_element_selector = Self::selector("video[data-enc]")?; + + if let Some(video_element) = document.select(&video_element_selector).next() { + if let Some(data_enc) = video_element.value().attr("data-enc") { + match Self::decode_data_enc(data_enc) { + Ok(formats) => { + item.formats = Some(formats); + }, + Err(e) => { + report_provider_error_background(CHANNEL_ID, "decode_data_enc", &format!("url={}; error={}", item.url, e)); + } + } + } + } + + Ok(item) + } + + async fn fetch_items_for_url( + &self, + cache: VideoCache, + url: String, + per_page_limit: usize, + enrich_details: bool, + options: &ServerOptions, + ) -> Result> { + if let Some((time, items)) = cache.get(&url) { + if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 { + return Ok(items.clone()); + } + } + + let mut requester = requester_or_default(options, CHANNEL_ID, "fetch_items_for_url"); + let html = timeout( + StdDuration::from_secs(10), + self.fetch_html(&mut requester, &url, &url), + ) + .await + .map_err(|_| Error::from(format!("list request timed out for {url}")))??; + + let list_items = self.get_video_items_from_html(html, options.public_url_base.as_deref().unwrap_or_default())?; + + if list_items.is_empty() { + return Ok(vec![]); + } + + let limited_items = list_items + .into_iter() + .take(per_page_limit.max(1)) + .collect::>(); + + if !enrich_details { + cache.insert(url, limited_items.clone()); + return Ok(limited_items); + } + + let items = stream::iter(limited_items.into_iter().map(|item| { + let provider = self.clone(); + let options = options.clone(); + async move { provider.enrich_video(item, &options).await } + })) + .buffer_unordered(4) + .collect::>() + .await; + + let mut final_items = Vec::new(); + for item_result in items { + match item_result { + Ok(item) => final_items.push(item), + Err(e) => report_provider_error_background(CHANNEL_ID, "enrich_video_stream", &e.to_string()), + } + } + + if !final_items.is_empty() { + cache.insert(url, final_items.clone()); + } + + Ok(final_items) + } + + async fn get( + &self, + cache: VideoCache, + page: u32, + sort: &str, + per_page_limit: usize, + options: ServerOptions, + ) -> Result> { + let target = self.resolve_option_target(&options, sort); + let url = self.build_url_for_target(&target, page); + self.fetch_items_for_url(cache, url, per_page_limit, page <= 1, &options) + .await + } + + async fn query( + &self, + cache: VideoCache, + page: u32, + query: &str, + per_page_limit: usize, + options: ServerOptions, + ) -> Result> { + let target = self.resolve_query_target(query); + let url = self.build_url_for_target(&target, page); + self.fetch_items_for_url(cache, url, per_page_limit, page <= 1, &options) + .await + } +} + +#[async_trait] +impl Provider for ThaipornTvProvider { + async fn get_videos( + &self, + cache: VideoCache, + pool: DbPool, + sort: String, + query: Option, + page: String, + per_page: String, + options: ServerOptions, + ) -> Vec { + let _ = pool; + let page = page.parse::().unwrap_or(1); + let per_page_limit = per_page.parse::().unwrap_or(30); + + let result = match query { + Some(query) if !query.trim().is_empty() => { + self.query(cache, page, &query, per_page_limit, options) + .await + } + _ => self.get(cache, page, &sort, per_page_limit, options).await, + }; + + match result { + Ok(videos) => videos, + Err(error) => { + report_provider_error(CHANNEL_ID, "get_videos", &error.to_string()).await; + vec![] + } + } + } + + fn get_channel(&self, clientversion: ClientVersion) -> Option { + Some(self.build_channel(clientversion)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::util::cache::VideoCache; + use crate::util::requester::Requester; + + fn provider() -> ThaipornTvProvider { + ThaipornTvProvider::new() + } + + #[test] + fn builds_archive_urls() { + let provider = provider(); + assert_eq!( + provider.build_archive_url(ArchiveMode::Latest, 1), + "https://www.thaiporntv.com/recent/" + ); + assert_eq!( + provider.build_archive_url(ArchiveMode::Latest, 2), + "https://www.thaiporntv.com/recent/page/2/" + ); + assert_eq!( + provider.build_archive_url(ArchiveMode::Popular, 1), + "https://www.thaiporntv.com/popular/" + ); + assert_eq!( + provider.build_archive_url(ArchiveMode::Popular, 3), + "https://www.thaiporntv.com/popular/page/3/" + ); + } + + #[test] + fn builds_search_urls() { + let provider = provider(); + assert_eq!( + provider.build_search_url("thai student", 1), + "https://www.thaiporntv.com/search/?q=thai%20student" + ); + assert_eq!( + provider.build_search_url("thai student", 2), + "https://www.thaiporntv.com/search/?q=thai%20student&page=2" + ); + } + + #[test] + fn builds_tag_urls() { + let provider = provider(); + assert_eq!( + provider.build_tag_url("blowjob", 1), + "https://www.thaiporntv.com/tags/blowjob/" + ); + assert_eq!( + provider.build_tag_url("thai-massage", 2), + "https://www.thaiporntv.com/tags/thai-massage/page/2/" + ); + } + + #[test] + fn decodes_data_enc_correctly() { + let encoded = "C1BwKX4pMSljf397eDEkJHxuaSV_bmhjfWJveCV_ZHskZjh-MyQ6PT4zVD8zO3slZjh-MyknKXopMSk_Mzt7KScpYykxbWpneG52Vg"; + let formats = ThaipornTvProvider::decode_data_enc(encoded).unwrap(); + assert_eq!(formats.len(), 1); + assert_eq!(formats[0].url, "https://web.techvids.top/m3u8/1658_480p.m3u8"); + assert_eq!(formats[0].quality, "480p"); + assert_eq!(formats[0].http_headers.get("Referer").unwrap(), "https://www.thaiporntv.com"); + } + + #[tokio::test] + #[ignore] + async fn fetches_and_parses_archive() { + let provider = provider(); + let options = ServerOptions { + featured: None, + category: None, + sites: None, + filter: None, + language: None, + public_url_base: Some("http://127.0.0.1:18080".to_string()), + requester: Some(Requester::new()), + network: None, + stars: None, + categories: None, + duration: None, + sort: Some("new".to_string()), + sexuality: None, + }; + let videos = provider.get(VideoCache::new(), 1, "new", 10, options).await.unwrap(); + assert!(!videos.is_empty()); + // Further assertions on video content + } + + #[tokio::test] + #[ignore] + async fn fetches_and_parses_search() { + let provider = provider(); + let options = ServerOptions { + featured: None, + category: None, + sites: None, + filter: None, + language: None, + public_url_base: Some("http://127.0.0.1:18080".to_string()), + requester: Some(Requester::new()), + network: None, + stars: None, + categories: None, + duration: None, + sort: Some("new".to_string()), + sexuality: None, + }; + let videos = provider.query(VideoCache::new(), 1, "thai student", 10, options).await.unwrap(); + assert!(!videos.is_empty()); + // Further assertions on video content + } +} diff --git a/src/proxies/mod.rs b/src/proxies/mod.rs index 19350af..4558a7e 100644 --- a/src/proxies/mod.rs +++ b/src/proxies/mod.rs @@ -14,6 +14,7 @@ use crate::proxies::vjav::VjavProxy; use crate::{proxies::sxyprn::SxyprnProxy, util::requester::Requester}; use crate::proxies::vidara::VidaraProxy; use crate::proxies::lulustream::LulustreamProxy; +use crate::proxies::thaiporntv::ThaipornTvProxy; pub mod archivebate; pub mod clapdat; @@ -33,6 +34,7 @@ pub mod pornhubthumb; pub mod shooshtime; pub mod spankbang; pub mod sxyprn; +pub mod thaiporntv; pub mod vidara; pub mod vjav; @@ -53,6 +55,7 @@ pub enum AnyProxy { Vjav(VjavProxy), Vidara(VidaraProxy), Clapdat(ClapdatProxy), + ThaipornTv(ThaipornTvProxy), } pub trait Proxy { @@ -77,6 +80,7 @@ impl Proxy for AnyProxy { AnyProxy::Vjav(p) => p.get_video_url(url, requester).await, AnyProxy::Vidara(p) => p.get_video_url(url, requester).await, AnyProxy::Clapdat(p) => p.get_video_url(url, requester).await, + AnyProxy::ThaipornTv(p) => p.get_video_url(url, requester).await, } } } diff --git a/src/proxies/pimpbunny.rs b/src/proxies/pimpbunny.rs index cc9228e..376ef32 100644 --- a/src/proxies/pimpbunny.rs +++ b/src/proxies/pimpbunny.rs @@ -1,33 +1,18 @@ +use crate::util::browser; use crate::util::requester::Requester; use ntex::web; use regex::Regex; -use std::{collections::HashMap, time::{SystemTime, UNIX_EPOCH}}; +use std::time::{SystemTime, UNIX_EPOCH}; use url::Url; -use wreq::Version; #[derive(Debug, Clone)] pub struct PimpbunnyProxy {} impl PimpbunnyProxy { - const FIREFOX_USER_AGENT: &'static str = - "Mozilla/5.0 (X11; Linux x86_64; rv:147.0) Gecko/20100101 Firefox/147.0"; - const HTML_ACCEPT: &'static str = - "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8"; - - /// Site-wide fallback key – only used when the page omits a license_code. - const LICENSE_CODE: &'static str = "$576262819011919"; - pub fn new() -> Self { PimpbunnyProxy {} } - fn js_now_millis() -> u128 { - SystemTime::now() - .duration_since(UNIX_EPOCH) - .map(|d| d.as_millis()) - .unwrap_or(0) - } - fn normalize_detail_url(url: &str) -> Option { let normalized = if url.starts_with("http://") || url.starts_with("https://") { url.to_string() @@ -38,7 +23,7 @@ impl PimpbunnyProxy { } fn is_allowed_detail_url(url: &str) -> bool { - let Some(url) = Url::parse(url).ok() else { + let Ok(url) = Url::parse(url) else { return false; }; if url.scheme() != "https" { @@ -51,253 +36,182 @@ impl PimpbunnyProxy { && !url.path().starts_with("/contents/videos_screenshots/") } - fn root_referer() -> &'static str { - "https://pimpbunny.com/" - } - - fn html_headers_with_referer(referer: &str) -> Vec<(String, String)> { - vec![ - ("Referer".to_string(), referer.to_string()), - ("User-Agent".to_string(), Self::FIREFOX_USER_AGENT.to_string()), - ("Accept".to_string(), Self::HTML_ACCEPT.to_string()), - ("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()), - ] - } - - fn headers_with_cookies( - requester: &Requester, - request_url: &str, - referer: &str, - ) -> Vec<(String, String)> { - let mut headers = Self::html_headers_with_referer(referer); - if let Some(cookie) = requester.cookie_header_for_url(request_url) { - headers.push(("Cookie".to_string(), cookie)); - } - headers - } - - async fn warm_root_session(requester: &mut Requester) { - let _ = requester - .get_with_headers( - Self::root_referer(), - Self::html_headers_with_referer(Self::root_referer()), - Some(Version::HTTP_11), - ) - .await; - } - - // ── Key generation (kt_player.js IIFE "function c", lines 541-586) ──────── - // - // Derives the 32-char shuffle key from the page's license_code value. - // - // Steps: - // a) Build digit-string f from license_code[1:]; replace every 0 with 1. - // b) Split f at midpoint j = len/2; compute f_str = str(4 * |k − l|). - // c) For g in 0..=j and h in 1..=4: - // n = digit(license_code[g+h]) + digit(f_str[g]); - // if n >= 10 { n -= 10 } - // push n onto key. - fn generate_key(license_code: &str) -> String { - let d: Vec = license_code.chars().collect(); - - // (a) Build digit string, replacing 0 → 1 - let mut f = String::new(); - for ch in d.iter().skip(1) { - match ch.to_digit(10) { - Some(0) => f.push('1'), - Some(v) => f.push(char::from_digit(v, 10).unwrap()), - None => f.push('1'), - } - } - - // (b) Split and compute intermediate - let j = f.len() / 2; - let k: i64 = f[..=j].parse().unwrap_or(0); - let l: i64 = f[j..].parse().unwrap_or(0); - let f_str = ((k - l).unsigned_abs() as i64 * 4).to_string(); - - // (c) Build 32-char key - let mut key = String::new(); - for g in 0..=j { - for h in 1..=4usize { - let nd = d.get(g + h).and_then(|c| c.to_digit(10)).unwrap_or(0) as i64; - let nf = f_str.chars().nth(g).and_then(|c| c.to_digit(10)).unwrap_or(0) as i64; - let mut n = nd + nf; - if n >= 10 { n -= 10; } - key.push(char::from_digit(n as u32, 10).unwrap()); - } - } - key - } - - // ── Hash permutation (kt_player.js IIFE "function b", lines 521-539) ────── - // - // Reverse Fisher-Yates over the first 32 chars of the hash, driven by - // the cumulative digit-sum suffix of `key`. - // - // for k from 31 downto 0: - // l = k + Σ key_digit[m] for m in k..len(key) - // l %= 32 - // swap hash[k] and hash[l] - fn transform_hash(hash_str: &str, key: &str) -> String { - let mut chars: Vec = hash_str.chars().collect(); - let tail_start = chars.len().min(32); - let (head, _) = chars.split_at(tail_start); - let mut h: Vec = head.to_vec(); - let tail: String = chars.drain(tail_start..).collect(); - - let key_digits: Vec = key + /// Generate 32-char numeric key from a pimpbunny license_code (e.g. '$576262819011919'). + /// + /// Stage 1 of the KVS player URL decoder, extracted from kt_player.js: + /// - d_raw = license_code (keeps '$' at index 0 for the d[g+h] lookup) + /// - d_mod = strip '$', replace every '0' digit with '1' + /// - split d_mod at midpoint j = floor(len/2) to get k and l + /// - f_str = 4 * |k - l| as a string + /// - for g in 0..=j, h in 1..=4: n = d_raw[g+h] + f_str[g]; if n >= 10 then n -= 10 + fn generate_key(license_code: &str) -> Option { + let d_raw = license_code; + // Strip '$' and replace '0' with '1' + let d_mod: String = license_code + .get(1..)? .chars() - .filter_map(|c| c.to_digit(10)) - .map(|d| d as usize) + .map(|c| if c == '0' { '1' } else { c }) .collect(); - let len = h.len(); - for k in (0..len).rev() { - let mut l = k; - for m in k..key_digits.len() { - l += key_digits[m]; - } - l %= len; - h.swap(k, l); - } + let j = d_mod.len() / 2; + let k: i64 = d_mod.get(..j + 1)?.parse().ok()?; + let l: i64 = d_mod.get(j..)?.parse().ok()?; + let f_str = (4 * (k - l).abs()).to_string(); - let mut result: String = h.into_iter().collect(); - result.push_str(&tail); - result + let threshold = 10i64; + let d_bytes = d_raw.as_bytes(); + let f_bytes = f_str.as_bytes(); + + let mut key = String::with_capacity(32); + for g in 0..=j { + for h in 1..=4usize { + let d_digit = d_bytes.get(g + h).copied().unwrap_or(b'0') as i64 - b'0' as i64; + let f_digit = f_bytes.get(g).copied().unwrap_or(b'0') as i64 - b'0' as i64; + let mut n = d_digit + f_digit; + if n >= threshold { + n -= threshold; + } + key.push((b'0' + n as u8) as char); + } + } + Some(key) } - // ── URL decoder ──────────────────────────────────────────────────────────── - // - // Strips the `function/0/` prefix, un-shuffles the hash at path segment - // index 5, then returns the bare URL (without ?rnd – append separately). - fn decode_function0_url(input: &str, license_code: &str) -> Option { - if !input.starts_with("function/") { - return Some(input.to_string()); + /// Shuffle (permute) a 32-char hex hash using the key. + /// + /// Stage 2: for k from 31 down to 0, compute l = (k + sum of key[k..]) % 32, swap h[k] and h[l]. + fn shuffle_hash(hash: &str, key: &str) -> String { + let mut h: Vec = hash.chars().collect(); + let n = h.len(); + let key_bytes = key.as_bytes(); + for k in (0..n).rev() { + let mut l = k as i64; + for m in k..key_bytes.len() { + l += (key_bytes[m] - b'0') as i64; + } + l %= n as i64; + h.swap(k, l as usize); } + h.into_iter().collect() + } - // Strip "function//" - let raw = input.splitn(3, '/').nth(2)?; - - let mut parts: Vec<&str> = raw.split('/').collect(); - - // Expected segments after splitting on '/': - // 0:"https:" 1:"" 2:"host" 3:"get_file" 4:N 5:HASH … - if parts.len() < 6 { + /// Decode a KVS-encoded 'function/N/https://…' video URL into a usable URL. + /// + /// Stage 3: strip the function/N/ prefix, shuffle the 32-char hash in segment 6, + /// then append ?rnd=. + fn decode_encoded_url(encoded: &str, license_code: &str) -> Option { + let after_prefix = encoded.strip_prefix("function/")?; + // Split '0/https://host/path/hash_segment/…' into parts by '/' + let parts: Vec<&str> = after_prefix.split('/').collect(); + // Layout: [0]version [1]'https:' [2]'' [3]host [4]'get_file' [5]segment_no [6]hash_segment … + if parts.len() < 7 { + return None; + } + let hash_segment = parts[6]; + if hash_segment.len() < 32 { return None; } - let key = Self::generate_key(license_code); - let unscrambled = Self::transform_hash(parts[5], &key); + let key = Self::generate_key(license_code)?; + let shuffled = Self::shuffle_hash(&hash_segment[..32], &key); + let new_hash_segment = format!("{}{}", shuffled, &hash_segment[32..]); - // Rebuild – we need owned strings only for the one replaced segment - let mut owned: Vec = parts.iter().map(|s| s.to_string()).collect(); - owned[5] = unscrambled; + let mut new_parts: Vec = parts.iter().map(|s| s.to_string()).collect(); + new_parts[6] = new_hash_segment; + new_parts.remove(0); // remove version number - Some(owned.join("/")) + let rnd = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_millis(); + + let url = new_parts.join("/"); + Some(format!("{}?rnd={}", url, rnd)) } - fn append_rnd(url: &str) -> String { - let rnd = Self::js_now_millis(); - if url.contains("?rnd=") { - url.to_string() - } else if url.contains('?') { - format!("{url}&rnd={rnd}") - } else { - format!("{url}?rnd={rnd}") + /// Extract the license_code from the KVS player config in the page HTML. + /// The license_code property holds a 16-char string starting with '$'. + fn extract_license_code(html: &str) -> Option { + let re = Regex::new(r"license_code:\s*'(\$[0-9]{15})'").ok()?; + Some(re.captures(html)?[1].to_string()) + } + + /// Extract all 'function/…' video URLs from the KVS player config in the page HTML. + /// Returns them in document order (lowest to highest quality for pimpbunny). + fn extract_video_urls(html: &str) -> Vec { + let Ok(re) = Regex::new(r"video(?:_alt)?_url\d*:\s*'(function/[^']+)'") else { + return vec![]; + }; + re.captures_iter(html) + .map(|cap| cap[1].to_string()) + .collect() + } + + /// Returns true if the URL appears to serve video content (not an "access denied" response). + async fn url_is_accessible(url: &str, requester: &mut Requester) -> bool { + // Range: bytes=0-1 keeps the response body tiny while still probing auth. + match requester + .get_raw_with_headers( + url, + vec![("Range".to_string(), "bytes=0-1".to_string())], + ) + .await + { + Ok(resp) => { + let s = resp.status().as_u16(); + // 200 / 206 = success; 301/302/307 = redirect (follows, so we see final status) + // Treat anything that isn't a client-error 4xx as accessible. + s < 400 || s == 416 // 416 = Range Not Satisfiable means the server accepted auth + } + Err(_) => false, } } - // ── Page parser ──────────────────────────────────────────────────────────── - // - // 1. Extracts license_code: any config key whose name contains "code" - // (not at position 0) and whose value is exactly 16 chars – matching - // the IIFE's own detection logic. - // 2. Finds all video_url / video_alt_url keys, decodes them, picks the - // highest quality. - fn extract_video_from_player_js(text: &str) -> Option { - // Extract license_code from the player config. - // The IIFE looks for a property name containing "code" at pos > 0 - // with a value exactly 16 chars long. - let license_code = Regex::new( - r#"[a-z_$][a-z0-9_$]*code[a-z0-9_$]*\s*:\s*'([^']{16})'"#, - ) - .ok() - .and_then(|re| re.captures(text)) - .and_then(|cap| cap.get(1)) - .map(|m| m.as_str().to_string()) - .unwrap_or_else(|| Self::LICENSE_CODE.to_string()); + /// Try to decode the video URL using the KVS algorithm extracted from kt_player.js. + /// Returns the decoded URL if the server accepts it, None otherwise. + async fn try_decode(detail_url: &str, requester: &mut Requester) -> Option { + let html = requester.get(detail_url, None).await.ok()?; - let pair_regex = Regex::new( - r#"(?Pvideo_url\d*|video_alt_url\d*(?:_text)?):\s*'(?P[^']+)'"#, - ) - .ok()?; - - let mut data: HashMap = HashMap::new(); - for cap in pair_regex.captures_iter(text) { - data.insert(cap["key"].to_string(), cap["value"].to_string()); + let license_code = Self::extract_license_code(&html)?; + let encoded_urls = Self::extract_video_urls(&html); + if encoded_urls.is_empty() { + return None; } - let mut sources: Vec<(u32, String)> = Vec::new(); - - for (key, value) in &data { - if !(key.starts_with("video_url") || key.starts_with("video_alt_url")) { + // Attempt highest quality first (last URL in the config = highest non-redirect quality). + for encoded_url in encoded_urls.iter().rev() { + let Some(decoded) = Self::decode_encoded_url(encoded_url, &license_code) else { continue; - } - if key.ends_with("_text") { - continue; - } - - let quality_key = format!("{key}_text"); - let quality = data - .get(&quality_key) - .and_then(|v| v.replace('p', "").parse::().ok()) - .unwrap_or(0); - - let decoded = match Self::decode_function0_url(value, &license_code) { - Some(v) => v, - None => continue, }; - - sources.push((quality, Self::append_rnd(&decoded))); + if Self::url_is_accessible(&decoded, requester).await { + return Some(decoded); + } } - sources.sort_by(|a, b| b.0.cmp(&a.0)); - sources.first().map(|(_, u)| u.clone()) - } - - // Fallback for standard JSON-LD if they ever use it again - fn extract_stream_url_from_html(text: &str) -> Option { - Regex::new(r#""contentUrl"\s*:\s*"([^"]+)""#) - .ok()? - .captures(text) - .and_then(|captures| captures.get(1)) - .map(|value| value.as_str().trim().to_string()) + None } } impl crate::proxies::Proxy for PimpbunnyProxy { - async fn get_video_url(&self, url: String, requester: web::types::State) -> String { + async fn get_video_url( + &self, + url: String, + requester: web::types::State, + ) -> String { let Some(detail_url) = Self::normalize_detail_url(&url) else { return String::new(); }; - let mut requester = requester.get_ref().clone(); + let mut req = requester.get_ref().clone(); - Self::warm_root_session(&mut requester).await; + if let Some(video_url) = Self::try_decode(&detail_url, &mut req).await { + return video_url; + } - let headers = Self::headers_with_cookies(&requester, &detail_url, &detail_url); - - let text = match requester - .get_with_headers(&detail_url, headers, Some(Version::HTTP_2)) + // Fall back to browser-based extraction when the decoder fails or the decoded + // URL is rejected (access denied). + browser::wait_for_src(&detail_url, "//video[@src]") .await - { - Ok(text) => text, - Err(_) => return String::new(), - }; - - Self::extract_video_from_player_js(&text) - .or_else(|| Self::extract_stream_url_from_html(&text)) .unwrap_or_default() } } @@ -307,87 +221,103 @@ mod tests { use super::*; #[test] - fn test_generate_key() { - // Deterministic: same license_code always produces the same 32-char key - let key = PimpbunnyProxy::generate_key(PimpbunnyProxy::LICENSE_CODE); - assert_eq!(key.len(), 32, "key must be 32 digits"); - assert!(key.chars().all(|c| c.is_ascii_digit()), "key must be all digits"); + fn test_normalize_adds_https_scheme() { + let url = + PimpbunnyProxy::normalize_detail_url("pimpbunny.com/videos/some-video/").unwrap(); + assert!(url.starts_with("https://")); } #[test] - fn test_decode_function0_url() { - // Example taken from live player config, verified against browser output - let input = "function/0/https://pimpbunny.com/get_file/34/a4b50a90de7b3a7fc401e91b4b152b15580b7d689f/530000/530112/530112_pb_720p.mp4/"; - let expected = "https://pimpbunny.com/get_file/34/b54c19bdbf5b7a3b4492111aa70e5e00580b7d689f/530000/530112/530112_pb_720p.mp4/"; - - let decoded = PimpbunnyProxy::decode_function0_url(input, PimpbunnyProxy::LICENSE_CODE) - .expect("decode failed"); - - assert_eq!(decoded, expected); - } - - #[test] - fn test_decode_canonical_example() { - // The original known-good example used to verify the algorithm - let input = "function/0/https://pimpbunny.com/get_file/40/7e4df9f504c7ec5b02b0101ed28edfa4c687186454/564000/564161/564161_720p.mp4/"; - let expected = "https://pimpbunny.com/get_file/40/fde01fe04b42c7e72d0d0a8c95b1e45fc687186454/564000/564161/564161_720p.mp4/"; - - let decoded = PimpbunnyProxy::decode_function0_url(input, PimpbunnyProxy::LICENSE_CODE) - .expect("decode failed"); - - assert_eq!(decoded, expected); - } - - #[test] - fn test_append_rnd() { - let url = "https://example.com/video.mp4"; - let result = PimpbunnyProxy::append_rnd(url); - assert!(result.starts_with(url)); - assert!(result.contains("?rnd=")); - } - - #[test] - fn test_append_rnd_existing_query() { - let url = "https://example.com/video.mp4?foo=bar"; - let result = PimpbunnyProxy::append_rnd(url); - assert!(result.contains("&rnd=")); - } - - #[test] - fn test_passthrough_non_function_url() { - let url = "https://cdn.example.com/video.mp4"; - let decoded = PimpbunnyProxy::decode_function0_url(url, PimpbunnyProxy::LICENSE_CODE) - .expect("passthrough failed"); - assert_eq!(decoded, url); - } - - #[test] - fn test_extract_highest_quality() { - let html = r#" - - "#; - - let result = PimpbunnyProxy::extract_video_from_player_js(html) - .expect("no url extracted"); - - assert!(result.contains("530112_pb_720p.mp4"), "did not choose highest quality"); - assert!( - result.contains("b54c19bdbf5b7a3b4492111aa70e5e00580b7d689f"), - "did not decode scrambled token" + fn test_normalize_passes_through_valid_https() { + let url = "https://pimpbunny.com/videos/some-video/"; + assert_eq!( + PimpbunnyProxy::normalize_detail_url(url).unwrap(), + url ); - assert!(result.contains("?rnd="), "missing rnd parameter"); + } + + #[test] + fn test_normalize_rejects_screenshots_path() { + let url = + "https://pimpbunny.com/contents/videos_screenshots/473000/473894/preview.jpg"; + assert!(PimpbunnyProxy::normalize_detail_url(url).is_none()); + } + + #[test] + fn test_normalize_rejects_other_host() { + assert!(PimpbunnyProxy::normalize_detail_url("https://evil.com/videos/x/").is_none()); + } + + #[test] + fn test_normalize_rejects_http() { + assert!(PimpbunnyProxy::normalize_detail_url( + "http://pimpbunny.com/videos/some-video/" + ) + .is_none()); + } + + #[test] + fn test_www_subdomain_is_allowed() { + let url = "https://www.pimpbunny.com/videos/some-video/"; + assert!(PimpbunnyProxy::normalize_detail_url(url).is_some()); + } + + #[test] + fn test_generate_key() { + let key = PimpbunnyProxy::generate_key("$576262819011919").unwrap(); + assert_eq!(key, "68732171060626281736958625345345"); + assert_eq!(key.len(), 32); + } + + #[test] + fn test_shuffle_hash() { + let key = "68732171060626281736958625345345"; + let input = "7e4df9f504c7ec5b02b0101ed28edfa4"; + let output = PimpbunnyProxy::shuffle_hash(input, key); + assert_eq!(output, "fde01fe04b42c7e72d0d0a8c95b1e45f"); + } + + #[test] + fn test_decode_encoded_url() { + let encoded = "function/0/https://pimpbunny.com/get_file/40/7e4df9f504c7ec5b02b0101ed28edfa4c687186454/564000/564161/564161_720p.mp4/"; + let license = "$576262819011919"; + let decoded = PimpbunnyProxy::decode_encoded_url(encoded, license).unwrap(); + assert!(decoded.starts_with( + "https://pimpbunny.com/get_file/40/fde01fe04b42c7e72d0d0a8c95b1e45fc687186454/564000/564161/564161_720p.mp4/" + )); + assert!(decoded.contains("?rnd=")); + } + + #[test] + fn test_extract_license_code() { + let html = r#"license_code: '$576262819011919', lrc: '77028724',"#; + assert_eq!( + PimpbunnyProxy::extract_license_code(html).unwrap(), + "$576262819011919" + ); + } + + #[test] + fn test_extract_video_urls() { + let html = r#" + video_url: 'function/0/https://pimpbunny.com/get_file/34/abc/530000/530112/530112_pb_360p.mp4/', + video_alt_url: 'function/0/https://pimpbunny.com/get_file/34/def/530000/530112/530112_pb_480p.mp4/', + video_alt_url2: 'function/0/https://pimpbunny.com/get_file/34/ghi/530000/530112/530112_pb_720p.mp4/', + video_alt_url5: 'https://pimpbunny.com/?upgrade=true', + "#; + let urls = PimpbunnyProxy::extract_video_urls(html); + assert_eq!(urls.len(), 3); + assert!(urls[0].starts_with("function/0/")); + assert!(urls[2].contains("720p")); + } + + #[tokio::test] + async fn test_live_extract_charlotte_sins() { + let url = "https://pimpbunny.com/videos/charlotte-sins-gets-drilled-by-jax-slayher/"; + let src = browser::wait_for_src(url, "//video[@src]").await; + println!("video src: {:?}", src); + assert!(src.is_some(), "expected a video URL but got None"); + let src = src.unwrap(); + assert!(src.starts_with("http"), "expected http URL, got: {src}"); } } diff --git a/src/proxies/thaiporntv.rs b/src/proxies/thaiporntv.rs new file mode 100644 index 0000000..4c6fd02 --- /dev/null +++ b/src/proxies/thaiporntv.rs @@ -0,0 +1,111 @@ +use ntex::web; +use crate::util::requester::Requester; +use crate::videos::VideoFormat; +use crate::providers::report_provider_error_background; + +const CHANNEL_ID: &str = "thaiporntv"; +const BASE_URL: &str = "https://www.thaiporntv.com"; + +#[derive(Debug, Clone)] +pub struct ThaipornTvProxy {} + +impl ThaipornTvProxy { + pub fn new() -> Self { + ThaipornTvProxy {} + } + + fn decode_data_enc(encoded_data: &str) -> Option> { + let cleaned_data = encoded_data.replace("-", "+").replace("_", "/"); + let padded_data = format!("{: bytes, + Err(e) => { + report_provider_error_background(CHANNEL_ID, "proxy.decode_data_enc.base64", &format!("error={e}")); + return None; + } + }; + + if decoded_bytes.is_empty() { + report_provider_error_background(CHANNEL_ID, "proxy.decode_data_enc.empty", "decoded bytes are empty"); + return None; + } + + let key = decoded_bytes[0]; + let decrypted_bytes: Vec = decoded_bytes[1..] + .iter() + .map(|&b| b ^ key) + .collect(); + + let json_str = match String::from_utf8(decrypted_bytes) { + Ok(s) => s, + Err(e) => { + report_provider_error_background(CHANNEL_ID, "proxy.decode_data_enc.utf8", &format!("error={e}")); + return None; + } + }; + + let formats_json: serde_json::Value = match serde_json::from_str(&json_str) { + Ok(value) => value, + Err(e) => { + report_provider_error_background(CHANNEL_ID, "proxy.decode_data_enc.json_parse", &format!("error={e}")); + return None; + } + }; + + let Some(formats_array) = formats_json.as_array() else { + report_provider_error_background(CHANNEL_ID, "proxy.decode_data_enc.not_array", "JSON is not an array"); + return None; + }; + + let mut formats = Vec::new(); + for format_val in formats_array { + let Some(u) = format_val.get("u").and_then(|v| v.as_str()) else { continue; }; + let Some(q) = format_val.get("q").and_then(|v| v.as_str()) else { continue; }; + + let mut format = VideoFormat::new( + u.to_string(), + q.to_string(), + "application/x-mpegURL".to_string(), + ); + // Add referer to the format + format.add_http_header("Referer".to_string(), BASE_URL.to_string()); + formats.push(format); + } + + Some(formats) + } + + pub async fn get_video_url( + &self, + url_path: String, // Expects something like videos/2019/thai-student-have-sex-at-taxi-1658/ + requester: web::types::State, + ) -> String { + let mut requester = requester.get_ref().clone(); + let video_detail_url = format!("{}/{}", BASE_URL, url_path.trim_start_matches('/')); + + let html = match requester.get(&video_detail_url, None).await { + Ok(text) => text, + Err(e) => { + report_provider_error_background(CHANNEL_ID, "proxy.get_video_url.fetch_html", &format!("url={}; error={}", video_detail_url, e)); + return String::new(); + } + }; + + let document = scraper::Html::parse_document(&html); + let video_element_selector = scraper::Selector::parse("video[data-enc]").unwrap(); + + if let Some(video_element) = document.select(&video_element_selector).next() { + if let Some(data_enc) = video_element.value().attr("data-enc") { + if let Some(formats) = Self::decode_data_enc(data_enc) { + // Return the URL of the best quality format, or the first one if quality is not a factor. + // For m3u8, we typically just return the m3u8 playlist URL. + if let Some(format) = formats.first() { + return format.url.clone(); + } + } + } + } + String::new() + } +} diff --git a/src/proxy.rs b/src/proxy.rs index 6d11f77..0b4ce11 100644 --- a/src/proxy.rs +++ b/src/proxy.rs @@ -15,6 +15,7 @@ use crate::proxies::sxyprn::SxyprnProxy; use crate::proxies::vjav::VjavProxy; use crate::proxies::vidara::VidaraProxy; use crate::proxies::lulustream::LulustreamProxy; +use crate::proxies::thaiporntv::ThaipornTvProxy; use crate::proxies::*; use crate::util::requester::Requester; @@ -125,11 +126,16 @@ pub fn config(cfg: &mut web::ServiceConfig) { .route(web::get().to(crate::proxies::porndishthumb::get_image)), ); cfg.service( - web::resource("/pornhub-thumb/{endpoint}*") + web::resource("/proxy/pornhub-thumb/{endpoint}*") .route(web::post().to(crate::proxies::pornhubthumb::get_image)) .route(web::get().to(crate::proxies::pornhubthumb::get_image)), ); -} + cfg.service( + web::resource("/proxy/thaiporntv/{tail:.*}") + .route(web::post().to(proxy2redirect)) + .route(web::get().to(proxy2redirect)), + ); + } async fn proxy2redirect( req: HttpRequest, @@ -163,6 +169,7 @@ fn get_proxy(proxy: &str) -> Option { "porndish" => Some(AnyProxy::Porndish(PorndishProxy::new())), "spankbang" => Some(AnyProxy::Spankbang(SpankbangProxy::new())), "lulustream" => Some(AnyProxy::Lulustream(LulustreamProxy::new())), + "thaiporntv" => Some(AnyProxy::ThaipornTv(ThaipornTvProxy::new())), _ => None, } } diff --git a/src/util/browser.rs b/src/util/browser.rs new file mode 100644 index 0000000..33a9158 --- /dev/null +++ b/src/util/browser.rs @@ -0,0 +1,41 @@ +use crate::util::{playwright, webdriver}; +use std::time::Duration; +use tokio::sync::mpsc; + +const DEFAULT_TIMEOUT: Duration = Duration::from_secs(30); + +/// Races chromiumoxide (webdriver) and Playwright against each other. +/// Navigates to `url` and returns the `src` attribute of the first element +/// matched by `xpath` — whichever backend resolves it first wins. +pub async fn wait_for_src(url: &str, xpath: &str) -> Option { + wait_for_attribute(url, xpath, "src", DEFAULT_TIMEOUT).await +} + +/// Like [`wait_for_src`] but lets the caller choose the attribute name and timeout. +pub async fn wait_for_attribute( + url: &str, + xpath: &str, + attribute: &str, + wait_timeout: Duration, +) -> Option { + // Buffer of 1: the first send wins; the losing task's send is silently + // dropped when the receiver is gone. + let (tx, mut rx) = mpsc::channel::(1); + + let (u, x, a) = (url.to_string(), xpath.to_string(), attribute.to_string()); + let tx1 = tx.clone(); + tokio::spawn(async move { + if let Some(src) = webdriver::wait_for_attribute(&u, &x, &a, wait_timeout).await { + let _ = tx1.send(src).await; + } + }); + + let (u, x, a) = (url.to_string(), xpath.to_string(), attribute.to_string()); + tokio::spawn(async move { + if let Some(src) = playwright::wait_for_attribute(&u, &x, &a, wait_timeout).await { + let _ = tx.send(src).await; + } + }); + + rx.recv().await +} diff --git a/src/util/mod.rs b/src/util/mod.rs index b7fcc82..67cf1a8 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -9,7 +9,10 @@ pub mod hoster_proxy; pub mod proxy; pub mod requester; pub mod time; +pub mod browser; pub mod dean_edwards; +pub mod playwright; +pub mod webdriver; pub fn parse_abbreviated_number(s: &str) -> Option { let s = s.trim(); diff --git a/src/util/playwright.rs b/src/util/playwright.rs new file mode 100644 index 0000000..e961b9e --- /dev/null +++ b/src/util/playwright.rs @@ -0,0 +1,128 @@ +use playwright::Playwright; +use playwright::api::Page; +use std::path::Path; +use std::time::Duration; +use tokio::time::{sleep, timeout}; + +const POLL_INTERVAL: Duration = Duration::from_millis(500); +const DEFAULT_TIMEOUT: Duration = Duration::from_secs(30); + +const CHROME_ARGS: &[&str] = &[ + "--no-sandbox", + "--disable-dev-shm-usage", + "--no-first-run", + "--no-default-browser-check", + "--disable-background-timer-throttling", + "--disable-backgrounding-occluded-windows", + "--disable-renderer-backgrounding", + "--disable-ipc-flooding-protection", + "--disable-hang-monitor", + "--disable-gpu", + "--disable-software-rasterizer", + "--disable-accelerated-2d-canvas", + "--disable-webgl", + "--disable-3d-apis", + "--blink-settings=imagesEnabled=false", + "--mute-audio", + "--disable-background-networking", + "--disable-client-side-phishing-detection", + "--disable-component-update", + "--disable-domain-reliability", + "--disable-sync", + "--metrics-recording-only", + "--safebrowsing-disable-auto-update", + "--disable-extensions", + "--disable-translate", + "--disable-default-apps", + "--disable-features=site-per-process", + "--password-store=basic", + "--use-mock-keychain", +]; + +/// Launches a headless Chrome browser via Playwright, navigates to `url`, and +/// polls until the element matched by `xpath` has a non-empty `src` attribute. +pub async fn wait_for_src(url: &str, xpath: &str) -> Option { + wait_for_attribute(url, xpath, "src", DEFAULT_TIMEOUT).await +} + +/// Like [`wait_for_src`] but lets the caller choose the attribute name and timeout. +pub async fn wait_for_attribute( + url: &str, + xpath: &str, + attribute: &str, + wait_timeout: Duration, +) -> Option { + // _pw must be kept alive — dropping it kills the driver process. + let (_pw, page) = open_page(url).await?; + + let xpath_js = serde_json::to_string(xpath).unwrap_or_default(); + let attr_js = serde_json::to_string(attribute).unwrap_or_default(); + let js = format!( + r#"() => {{ + var r = document.evaluate({xpath_js}, document, null, + XPathResult.FIRST_ORDERED_NODE_TYPE, null); + var el = r.singleNodeValue; + if (!el) return ''; + return el.getAttribute({attr_js}) || el[{attr_js}] || ''; + }}"# + ); + + let result = timeout(wait_timeout, async { + loop { + match page.evaluate::<(), String>(&js, ()).await { + Ok(s) if !s.is_empty() => return Some(s), + Ok(_) => {} + Err(e) => eprintln!("[playwright] evaluate error: {e}"), + } + sleep(POLL_INTERVAL).await; + } + }) + .await + .ok() + .flatten(); + + result +} + +// ── internals ───────────────────────────────────────────────────────────────── + +async fn open_page(url: &str) -> Option<(Playwright, Page)> { + let pw = Playwright::initialize() + .await + .map_err(|e| eprintln!("[playwright] init error: {e}")) + .ok()?; + + let args: Vec = CHROME_ARGS.iter().map(|s| s.to_string()).collect(); + + let browser = pw + .chromium() + .launcher() + .executable(Path::new("/usr/bin/google-chrome")) + .headless(true) + .args(&args) + .launch() + .await + .map_err(|e| eprintln!("[playwright] launch error: {e}")) + .ok()?; + + let context = browser + .context_builder() + .build() + .await + .map_err(|e| eprintln!("[playwright] context error: {e}")) + .ok()?; + + let page = context + .new_page() + .await + .map_err(|e| eprintln!("[playwright] new_page error: {e}")) + .ok()?; + + page.goto_builder(url) + .goto() + .await + .map_err(|e| eprintln!("[playwright] goto error: {e}")) + .ok()?; + + Some((pw, page)) +} diff --git a/src/util/webdriver.rs b/src/util/webdriver.rs new file mode 100644 index 0000000..e5e6c99 --- /dev/null +++ b/src/util/webdriver.rs @@ -0,0 +1,130 @@ +use chromiumoxide::{Browser, BrowserConfig}; +use futures::StreamExt; +use std::time::Duration; +use tokio::time::{sleep, timeout}; + +const POLL_INTERVAL: Duration = Duration::from_millis(500); +const DEFAULT_TIMEOUT: Duration = Duration::from_secs(30); + +/// Launches a headless Chrome browser, navigates to `url`, and polls until the +/// element matched by `xpath` has a non-empty `src` attribute, then returns it. +/// +/// Equivalent to Selenium's: +/// `WebDriverWait(driver, 30).until(lambda d: d.find_element(By.XPATH, xpath).get_attribute("src"))` +pub async fn wait_for_src(url: &str, xpath: &str) -> Option { + wait_for_attribute(url, xpath, "src", DEFAULT_TIMEOUT).await +} + +/// Like [`wait_for_src`] but lets the caller choose the attribute name and timeout. +pub async fn wait_for_attribute( + url: &str, + xpath: &str, + attribute: &str, + wait_timeout: Duration, +) -> Option { + let (mut browser, page) = open_page(url).await?; + + // Embed xpath and attribute as JSON strings so they are safely quoted inside + // the JS template — avoids any injection from untrusted caller values. + let xpath_js = serde_json::to_string(xpath).unwrap_or_default(); + let attr_js = serde_json::to_string(attribute).unwrap_or_default(); + let js = format!( + r#"(function() {{ + var r = document.evaluate({xpath_js}, document, null, + XPathResult.FIRST_ORDERED_NODE_TYPE, null); + var el = r.singleNodeValue; + if (!el) return ''; + return el.getAttribute({attr_js}) || el[{attr_js}] || ''; + }})()"# + ); + + let result = timeout(wait_timeout, async { + loop { + match page.evaluate(js.as_str()).await { + Ok(val) => { + if let Ok(s) = val.into_value::() { + if !s.is_empty() { + return Some(s); + } + } + } + Err(e) => eprintln!("[webdriver] evaluate error: {e}"), + } + sleep(POLL_INTERVAL).await; + } + }) + .await + .ok() + .flatten(); + + let _ = browser.close().await; + result +} + +// ── internals ───────────────────────────────────────────────────────────────── + +async fn open_page( + url: &str, +) -> Option<(Browser, chromiumoxide::Page)> { + let config = BrowserConfig::builder() + // ── headless & sandbox ──────────────────────────────────────── + .arg("--headless=new") + .arg("--no-sandbox") + .arg("--disable-dev-shm-usage") + .arg("--no-first-run") + .arg("--no-default-browser-check") + // ── JS timer / scheduler: prevent Chrome from throttling + // background timers and renderers (critical for player init) ── + .arg("--disable-background-timer-throttling") + .arg("--disable-backgrounding-occluded-windows") + .arg("--disable-renderer-backgrounding") + .arg("--disable-ipc-flooding-protection") + .arg("--disable-hang-monitor") + // ── skip rendering work we don't need ──────────────────────── + .arg("--disable-gpu") + .arg("--disable-software-rasterizer") + .arg("--disable-accelerated-2d-canvas") + .arg("--disable-webgl") + .arg("--disable-3d-apis") + .arg("--blink-settings=imagesEnabled=false") + .arg("--mute-audio") + // ── cut background network / update noise ──────────────────── + .arg("--disable-background-networking") + .arg("--disable-client-side-phishing-detection") + .arg("--disable-component-update") + .arg("--disable-domain-reliability") + .arg("--disable-sync") + .arg("--metrics-recording-only") + .arg("--safebrowsing-disable-auto-update") + // ── misc overhead ───────────────────────────────────────────── + .arg("--disable-extensions") + .arg("--disable-translate") + .arg("--disable-default-apps") + .arg("--disable-features=site-per-process") + .arg("--password-store=basic") + .arg("--use-mock-keychain") + .build() + .map_err(|e| eprintln!("[webdriver] BrowserConfig error: {e}")) + .ok()?; + + let (browser, mut handler) = Browser::launch(config) + .await + .map_err(|e| eprintln!("[webdriver] Browser launch error: {e}")) + .ok()?; + + tokio::spawn(async move { + loop { + if handler.next().await.is_none() { + break; + } + } + }); + + let page = browser + .new_page(url) + .await + .map_err(|e| eprintln!("[webdriver] new_page error: {e}")) + .ok()?; + + Some((browser, page)) +}