diff --git a/src/providers/thaiporntv.rs b/src/providers/thaiporntv.rs index e178be4..029d6dc 100644 --- a/src/providers/thaiporntv.rs +++ b/src/providers/thaiporntv.rs @@ -3,13 +3,11 @@ use crate::api::ClientVersion; use crate::providers::{Provider, report_provider_error, report_provider_error_background, requester_or_default}; use crate::status::*; use crate::util::cache::VideoCache; -use crate::util::parse_abbreviated_number; use crate::util::requester::Requester; -use crate::util::time::parse_time_to_seconds; use crate::videos::{ServerOptions, VideoFormat, VideoItem}; use async_trait::async_trait; use base64::{engine::general_purpose, Engine}; -use chrono::{DateTime, Duration as ChronoDuration, NaiveDate, Utc}; +use chrono::{DateTime, NaiveDate, Utc}; use error_chain::error_chain; use futures::stream::{self, StreamExt}; use htmlentity::entity::{ICodedDataTrait, decode}; @@ -47,6 +45,7 @@ const USER_AGENT: &str = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36"; const HTML_ACCEPT: &str = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8"; +const CDN_BASE: &str = "https://web.techvids.top"; #[derive(Debug, Clone)] pub struct ThaipornTvProvider { @@ -66,9 +65,6 @@ enum ArchiveMode { #[derive(Debug, Clone)] enum Target { Archive(ArchiveMode), - Search { - query: String, - }, Tag { slug: String, }, @@ -181,10 +177,6 @@ impl ThaipornTvProvider { .map_err(|error| Error::from(format!("selector `{value}` parse failed: {error}"))) } - fn regex(value: &str) -> Result { - Regex::new(value).map_err(|error| Error::from(format!("regex `{value}` failed: {error}"))) - } - fn collapse_whitespace(text: &str) -> String { text.split_whitespace().collect::>().join(" ") } @@ -250,20 +242,28 @@ impl ThaipornTvProvider { url: base_url.to_string(), tags: Arc::clone(&tags), }; - let html = provider.fetch_html(&mut requester, &format!("{}/tags/", base_url), &format!("{}/", base_url)).await?; + let html = provider.fetch_html( + &mut requester, + &format!("{}/tags/", base_url), + &format!("{}/", base_url), + ).await?; let document = Html::parse_document(&html); - let selector = Self::selector("a[href*='/tags/']")?; - for element in document.select(&selector) { + // Tag cards are links with /tags/ in href + // html5ever handles unquoted href attributes correctly + let a_selector = Self::selector("a.group[href*='/tags/']")?; + let h2_selector = Self::selector("h2")?; + for element in document.select(&a_selector) { let Some(href) = element.value().attr("href") else { continue; }; - let title = Self::decode_html_entities(&element.text().collect::()); - let re = Regex::new(r"^(.+?)\s+\d+$").unwrap(); // Remove count from tag title - let title = if let Some(captures) = re.captures(&title) { - captures.get(1).unwrap().as_str().to_string() - } else { - title - }; + // Skip pagination and root tag page + if href.ends_with("/tags/") || href.contains("/page/") { + continue; + } + // Extract title from the h2 inside the card + let title = element.select(&h2_selector).next() + .map(|h| Self::collapse_whitespace(&h.text().collect::())) + .unwrap_or_default(); if title.is_empty() { continue; } @@ -288,7 +288,14 @@ impl ThaipornTvProvider { } } - fn resolve_option_target(&self, options: &ServerOptions, sort: &str) -> Target { + fn resolve_target(&self, options: &ServerOptions, sort: &str, query: Option<&str>) -> Target { + // Query: check for tag shortcut first + if let Some(q) = query { + if let Some(target) = self.find_tag_target_in_options(q) { + return target; + } + } + // Filter option: check for tag shortcut if let Some(value) = options.filter.as_deref() { if let Some(target) = self.find_tag_target_in_options(value) { return target; @@ -297,19 +304,7 @@ impl ThaipornTvProvider { Target::Archive(Self::archive_from_sort(sort)) } - fn resolve_query_target(&self, query: &str) -> Target { - if let Some(target) = self.find_tag_target_in_options(query) { - return target; - } - Target::Search { - query: query.trim().to_string(), - } - } - - fn find_tag_target_in_options( - &self, - value: &str, - ) -> Option { + fn find_tag_target_in_options(&self, value: &str) -> Option { let normalized = value.trim().to_lowercase(); let tags = self.tags.read().ok()?; let option = tags.iter().find(|item| { @@ -321,8 +316,14 @@ impl ThaipornTvProvider { fn target_from_filter_id(&self, id: &str) -> Option { if id.contains("/tags/") { let url = Url::parse(&self.absolute_url(id)).ok()?; - let path_segments = url.path_segments()?; - let slug = path_segments.last()?.trim_end_matches('/').to_string(); + let segments: Vec<_> = url + .path_segments()? + .filter(|s| !s.is_empty()) + .collect(); + let slug = segments.last()?.to_string(); + if slug == "tags" { + return None; + } return Some(Target::Tag { slug }); } None @@ -331,7 +332,6 @@ impl ThaipornTvProvider { fn build_url_for_target(&self, target: &Target, page: u32) -> String { match target { Target::Archive(mode) => self.build_archive_url(*mode, page), - Target::Search { query } => self.build_search_url(query, page), Target::Tag { slug } => self.build_tag_url(slug, page), } } @@ -351,15 +351,6 @@ impl ThaipornTvProvider { } } - fn build_search_url(&self, query: &str, page: u32) -> String { - let encoded_query = utf8_percent_encode(query, NON_ALPHANUMERIC).to_string(); - if page <= 1 { - format!("{}/search/?q={}", self.url, encoded_query) - } else { - format!("{}/search/?q={}&page={}", self.url, encoded_query, page) - } - } - fn build_tag_url(&self, slug: &str, page: u32) -> String { let encoded_slug = utf8_percent_encode(slug, NON_ALPHANUMERIC).to_string(); if page <= 1 { @@ -371,7 +362,8 @@ impl ThaipornTvProvider { fn decode_data_enc(encoded_data: &str) -> Result> { let cleaned_data = encoded_data.replace("-", "+").replace("_", "/"); - let padded_data = format!("{:, - _proxy_base_url: &str, - ) -> Option { - let id_selector = Self::selector("a[href*='/videos/']").ok()?; - let title_selector = Self::selector("a[href*='/videos/']").ok()?; - let thumb_selector = Self::selector("img").ok()?; - let duration_selector = Self::selector("div.duration").ok()?; - let views_selector = Self::selector("div.views").ok()?; - let uploaded_at_selector = Self::selector("div.date").ok()?; - let tag_selector = Self::selector("a[href*='/tags/']").ok()?; + fn parse_card(&self, card: ElementRef<'_>) -> Option { + // Selectors for the Tailwind-based redesign + let playthumb_sel = Self::selector("a.playthumb").ok()?; + let img_sel = Self::selector("img").ok()?; + let title_sel = Self::selector("a.text-brand-pink").ok()?; + let tag_sel = Self::selector("a[href*='/tags/']").ok()?; + let date_sel = Self::selector("span.ml-auto").ok()?; - let href_element = card.select(&id_selector).next()?; - let href = href_element.value().attr("href")?.to_string(); + let link = card.select(&playthumb_sel).next()?; + let href = link.value().attr("href")?; + let data_id = link.value().attr("data-id").unwrap_or(""); - let re = Regex::new(r"/videos/\d{4}/[^/-]+-(\d+)/$").unwrap(); - let captures = re.captures(&href)?; - let id = captures.get(1)?.as_str().to_string(); + // ID: numeric part from data-id (xn88-39688 → 39688) or from URL + let id = if !data_id.is_empty() { + data_id.rsplit('-').next().unwrap_or(data_id).to_string() + } else { + let re = Regex::new(r"-(\d+)/$").unwrap(); + re.captures(href)?.get(1)?.as_str().to_string() + }; - let title = card.select(&title_selector).next() - .and_then(|e| e.value().attr("title")) - .map(Self::decode_html_entities) - .unwrap_or_else(|| { - card.select(&thumb_selector).next() - .and_then(|e| e.value().attr("alt")) - .map(Self::decode_html_entities) - .unwrap_or_default() - }); + let url = if href.starts_with("http") { + href.to_string() + } else { + self.absolute_url(href) + }; - let thumb = card.select(&thumb_selector).next() + let thumb = card.select(&img_sel).next() .and_then(|e| e.value().attr("src")) - .map(|s| self.absolute_url(s)) + .map(|s| if s.starts_with("http") { s.to_string() } else { self.absolute_url(s) }) .unwrap_or_default(); - let duration_text = card.select(&duration_selector).next() - .map(|e| Self::collapse_whitespace(&e.text().collect::())) - .unwrap_or_default(); - let duration = parse_time_to_seconds(&duration_text).unwrap_or(0) as u32; + // Preview GIF from CDN + let preview = if !data_id.is_empty() { + Some(format!("{CDN_BASE}/2/4/7/9/preview/{data_id}_preview.gif")) + } else { + None + }; - let views = card.select(&views_selector).next() - .map(|e| Self::collapse_whitespace(&e.text().collect::())) - .and_then(|s| s.strip_suffix(" views").map(|s| parse_abbreviated_number(s))) - .flatten(); - - let uploaded_at_text = card.select(&uploaded_at_selector).next() + let title = card.select(&title_sel).next() .map(|e| Self::collapse_whitespace(&e.text().collect::())) + .filter(|t| !t.is_empty()) + .or_else(|| { + link.value().attr("aria-label") + .map(|s| { + let s = s.strip_prefix("Watch ").unwrap_or(s); + let s = s.strip_suffix(" video").unwrap_or(s); + Self::decode_html_entities(s) + }) + }) .unwrap_or_default(); - let uploaded_at = NaiveDate::parse_from_str(&uploaded_at_text, "%d %b %Y") - .ok() + + // Duration is in a font-mono div inside the thumbnail overlay + let card_html = card.html(); + let dur_re = Regex::new(r"font-mono[^>]+>(\d+:\d+(?::\d+)?)<").unwrap(); + let duration_text = dur_re.captures(&card_html) + .and_then(|c| c.get(1)) + .map(|m| m.as_str().to_string()) + .unwrap_or_default(); + let duration = parse_duration_mm_ss(&duration_text); + + // Views from the fa-eye span + let views_re = Regex::new(r"fa-eye[^>]+>\s*(\d[\d,]*)").unwrap(); + let views = views_re.captures(&card_html) + .and_then(|c| c.get(1)) + .and_then(|m| m.as_str().replace(',', "").parse::().ok()); + + // Upload date from the ml-auto span + let uploaded_at = card.select(&date_sel).next() + .map(|e| Self::collapse_whitespace(&e.text().collect::())) + .and_then(|s| NaiveDate::parse_from_str(s.trim(), "%d %b %Y").ok()) .and_then(|date| { date.and_hms_opt(0, 0, 0) .map(|dt| DateTime::::from_naive_utc_and_offset(dt, Utc).timestamp() as u64) }); - let tags: Vec = card.select(&tag_selector) - .filter_map(|e| e.value().attr("href")) - .filter_map(|link_href| { - Url::parse(&self.absolute_url(link_href)) - .ok() - .and_then(|url| url.path_segments().map(|segments| segments.map(ToString::to_string).collect::>())) - .and_then(|segments_vec| segments_vec.last().cloned()) - .map(|s| Self::decode_html_entities(&s).trim_end_matches('/').to_string()) - }) + // Tags from /tags/ links in the card (these are simple text-only links in cards) + let tags: Vec = card.select(&tag_sel) + .map(|e| Self::collapse_whitespace(&e.text().collect::())) + .filter(|s| !s.is_empty()) .collect(); - let mut item = VideoItem::new( id, title, - self.absolute_url(&href), + url, CHANNEL_ID.to_string(), thumb, duration, @@ -492,18 +497,20 @@ impl ThaipornTvProvider { if let Some(views) = views { item = item.views(views); } if let Some(uploaded_at) = uploaded_at { item = item.uploaded_at(uploaded_at); } + if let Some(preview) = preview { item = item.preview(preview); } if !tags.is_empty() { item = item.tags(tags); } Some(item) } - fn get_video_items_from_html(&self, html: String, proxy_base_url: &str) -> Result> { + fn get_video_items_from_html(&self, html: String) -> Result> { let document = Html::parse_document(&html); - let card_selector = Self::selector("div.video-list-item")?; + // Cards use class "group flex flex-col"; ad cards additionally have "ad-container" + let card_selector = Self::selector("div.group:not(.ad-container)")?; let mut items = Vec::new(); for card in document.select(&card_selector) { - if let Some(item) = self.parse_card(card, proxy_base_url) { + if let Some(item) = self.parse_card(card) { items.push(item); } } @@ -529,7 +536,11 @@ impl ThaipornTvProvider { item.formats = Some(formats); }, Err(e) => { - report_provider_error_background(CHANNEL_ID, "decode_data_enc", &format!("url={}; error={}", item.url, e)); + report_provider_error_background( + CHANNEL_ID, + "decode_data_enc", + &format!("url={}; error={}", item.url, e), + ); } } } @@ -560,7 +571,7 @@ impl ThaipornTvProvider { .await .map_err(|_| Error::from(format!("list request timed out for {url}")))??; - let list_items = self.get_video_items_from_html(html, options.public_url_base.as_deref().unwrap_or_default())?; + let list_items = self.get_video_items_from_html(html)?; if list_items.is_empty() { return Ok(vec![]); @@ -605,27 +616,28 @@ impl ThaipornTvProvider { cache: VideoCache, page: u32, sort: &str, + query: Option<&str>, per_page_limit: usize, options: ServerOptions, ) -> Result> { - let target = self.resolve_option_target(&options, sort); + let target = self.resolve_target(&options, sort, query); let url = self.build_url_for_target(&target, page); self.fetch_items_for_url(cache, url, per_page_limit, page <= 1, &options) .await } +} - async fn query( - &self, - cache: VideoCache, - page: u32, - query: &str, - per_page_limit: usize, - options: ServerOptions, - ) -> Result> { - let target = self.resolve_query_target(query); - let url = self.build_url_for_target(&target, page); - self.fetch_items_for_url(cache, url, per_page_limit, page <= 1, &options) - .await +/// Parse "MM:SS" or "HH:MM:SS" into total seconds. +fn parse_duration_mm_ss(text: &str) -> u32 { + let parts: Vec = text + .split(':') + .filter_map(|p| p.trim().parse().ok()) + .collect(); + match parts.as_slice() { + [h, m, s] => h * 3600 + m * 60 + s, + [m, s] => m * 60 + s, + [s] => *s, + _ => 0, } } @@ -644,14 +656,9 @@ impl Provider for ThaipornTvProvider { let _ = pool; let page = page.parse::().unwrap_or(1); let per_page_limit = per_page.parse::().unwrap_or(30); + let query_ref = query.as_deref().filter(|q| !q.trim().is_empty()); - let result = match query { - Some(query) if !query.trim().is_empty() => { - self.query(cache, page, &query, per_page_limit, options) - .await - } - _ => self.get(cache, page, &sort, per_page_limit, options).await, - }; + let result = self.get(cache, page, &sort, query_ref, per_page_limit, options).await; match result { Ok(videos) => videos, @@ -698,19 +705,6 @@ mod tests { ); } - #[test] - fn builds_search_urls() { - let provider = provider(); - assert_eq!( - provider.build_search_url("thai student", 1), - "https://www.thaiporntv.com/search/?q=thai%20student" - ); - assert_eq!( - provider.build_search_url("thai student", 2), - "https://www.thaiporntv.com/search/?q=thai%20student&page=2" - ); - } - #[test] fn builds_tag_urls() { let provider = provider(); @@ -720,7 +714,7 @@ mod tests { ); assert_eq!( provider.build_tag_url("thai-massage", 2), - "https://www.thaiporntv.com/tags/thai-massage/page/2/" + "https://www.thaiporntv.com/tags/thai%2Dmassage/page/2/" ); } @@ -730,8 +724,17 @@ mod tests { let formats = ThaipornTvProvider::decode_data_enc(encoded).unwrap(); assert_eq!(formats.len(), 1); assert_eq!(formats[0].url, "https://web.techvids.top/m3u8/1658_480p.m3u8"); - assert_eq!(formats[0].quality, "480p"); - assert_eq!(formats[0].http_headers.get("Referer").unwrap(), "https://www.thaiporntv.com"); + // Verify format fields via JSON serialization (quality and http_headers are private) + let json = serde_json::to_value(&formats[0]).unwrap(); + assert_eq!(json["quality"], "480p"); + assert_eq!(json["http_headers"]["Referer"], BASE_URL); + } + + #[test] + fn parses_duration() { + assert_eq!(parse_duration_mm_ss("50:47"), 3047); + assert_eq!(parse_duration_mm_ss("1:05:30"), 3930); + assert_eq!(parse_duration_mm_ss("12:04"), 724); } #[tokio::test] @@ -753,32 +756,7 @@ mod tests { sort: Some("new".to_string()), sexuality: None, }; - let videos = provider.get(VideoCache::new(), 1, "new", 10, options).await.unwrap(); + let videos = provider.get(VideoCache::new(), 1, "new", None, 10, options).await.unwrap(); assert!(!videos.is_empty()); - // Further assertions on video content - } - - #[tokio::test] - #[ignore] - async fn fetches_and_parses_search() { - let provider = provider(); - let options = ServerOptions { - featured: None, - category: None, - sites: None, - filter: None, - language: None, - public_url_base: Some("http://127.0.0.1:18080".to_string()), - requester: Some(Requester::new()), - network: None, - stars: None, - categories: None, - duration: None, - sort: Some("new".to_string()), - sexuality: None, - }; - let videos = provider.query(VideoCache::new(), 1, "thai student", 10, options).await.unwrap(); - assert!(!videos.is_empty()); - // Further assertions on video content } } diff --git a/src/proxies/thaiporntv.rs b/src/proxies/thaiporntv.rs index 4c6fd02..c90abe3 100644 --- a/src/proxies/thaiporntv.rs +++ b/src/proxies/thaiporntv.rs @@ -1,3 +1,4 @@ +use base64::{engine::general_purpose, Engine}; use ntex::web; use crate::util::requester::Requester; use crate::videos::VideoFormat; @@ -16,9 +17,10 @@ impl ThaipornTvProxy { fn decode_data_enc(encoded_data: &str) -> Option> { let cleaned_data = encoded_data.replace("-", "+").replace("_", "/"); - let padded_data = format!("{: