From 751fc7765aa5eac0211e2f5ef602aad99b676ab4 Mon Sep 17 00:00:00 2001 From: Simon Date: Thu, 18 Jun 2026 10:10:26 +0000 Subject: [PATCH] tikporn --- build.rs | 5 + src/providers/archivebate.rs | 221 +-------- src/providers/tikporn.rs | 892 +++++++++++++++++++++++++++++++++++ 3 files changed, 903 insertions(+), 215 deletions(-) create mode 100644 src/providers/tikporn.rs diff --git a/build.rs b/build.rs index 8edb141..7183d4b 100644 --- a/build.rs +++ b/build.rs @@ -41,6 +41,11 @@ const PROVIDERS: &[ProviderDef] = &[ module: "pornhub_shorties", ty: "PornhubShortiesProvider", }, + ProviderDef { + id: "tikporn", + module: "tikporn", + ty: "TikpornProvider", + }, ProviderDef { id: "youporn", module: "youporn", diff --git a/src/providers/archivebate.rs b/src/providers/archivebate.rs index ee22a2d..76c468e 100644 --- a/src/providers/archivebate.rs +++ b/src/providers/archivebate.rs @@ -64,12 +64,10 @@ const PLATFORM_SHORTCUTS: &[(&str, &str)] = &[ ("tiktok", "dGlrdG9r"), ]; -const GENDER_SHORTCUTS: &[(&str, &str)] = &[ - ("female", "ZmVtYWxl"), - ("couple", "Y291cGxl"), - ("male", "bWFsZQ=="), - ("trans", "dHJhbnM="), -]; +// NOTE: archivebate.com also exposes `/gender/` routes in its nav, but the +// Livewire `filter.platform` component always responds with a redirect to the home +// page for gender values (server-side bug on the site), so gender browsing returns +// no videos. Gender shortcuts are intentionally not exposed here. #[derive(Debug, Clone)] pub struct ArchivebateProvider { @@ -85,10 +83,6 @@ enum Target { id: String, title: String, }, - Gender { - id: String, - title: String, - }, Profile { username: String, platform: Option, @@ -120,12 +114,6 @@ struct LivewireInitialData { server_memo_json: String, } -#[derive(Debug, Clone)] -struct ResolvedMixdropMedia { - media_url: String, - embed_url: String, -} - impl ArchivebateProvider { pub fn new() -> Self { let provider = Self { @@ -191,13 +179,6 @@ impl ArchivebateProvider { title: "All".to_string(), }]; - for (title, id) in GENDER_SHORTCUTS { - filters.push(FilterOption { - id: format!("gender:{id}"), - title: format!("Gender: {}", Self::title_case(title)), - }); - } - for (title, id) in PLATFORM_SHORTCUTS { filters.push(FilterOption { id: format!("platform:{id}"), @@ -223,7 +204,7 @@ impl ArchivebateProvider { Channel { id: CHANNEL_ID.to_string(), name: "ArchiveBate".to_string(), - description: "ArchiveBate cam recording feeds with platform/gender/profile shortcuts and yt-dlp compatible watch URLs.".to_string(), + description: "ArchiveBate cam recording feeds with platform and profile shortcuts and yt-dlp compatible watch URLs.".to_string(), premium: false, favicon: "https://www.google.com/s2/favicons?sz=64&domain=archivebate.com".to_string(), status: "work in progress".to_string(), @@ -244,7 +225,7 @@ impl ArchivebateProvider { ChannelOption { id: "filter".to_string(), title: "Shortcuts".to_string(), - description: "Direct platform and gender archive routes.".to_string(), + description: "Direct platform archive routes.".to_string(), systemImage: "tag.fill".to_string(), colorName: "green".to_string(), options: filters, @@ -365,13 +346,6 @@ impl ArchivebateProvider { format!("{}/platform/{id}?page={page}", self.url) } } - Target::Gender { id, .. } => { - if page <= 1 { - format!("{}/gender/{id}", self.url) - } else { - format!("{}/gender/{id}?page={page}", self.url) - } - } Target::Profile { username, .. } => { if page <= 1 { format!("{}/profile/{username}", self.url) @@ -435,15 +409,6 @@ impl ArchivebateProvider { } } - if let Some(id) = trimmed.strip_prefix("gender:") { - if let Some((name, _)) = GENDER_SHORTCUTS.iter().find(|(_, known_id)| *known_id == id) { - return Some(Target::Gender { - id: id.to_string(), - title: Self::title_case(name), - }); - } - } - let normalized = Self::normalize_title(trimmed); if let Some((name, id)) = PLATFORM_SHORTCUTS .iter() @@ -455,16 +420,6 @@ impl ArchivebateProvider { }); } - if let Some((name, id)) = GENDER_SHORTCUTS - .iter() - .find(|(name, _)| Self::normalize_title(name) == normalized) - { - return Some(Target::Gender { - id: (*id).to_string(), - title: Self::title_case(name), - }); - } - if let Some(option) = self.find_uploader_option(trimmed) { if let Some(username) = option.id.strip_prefix("profile:") { return Some(Target::Profile { @@ -605,7 +560,6 @@ impl ArchivebateProvider { fn extra_tags_for_target(target: &Target) -> Vec { match target { Target::Platform { title, .. } => vec![title.clone()], - Target::Gender { title, .. } => vec![title.clone()], Target::Profile { platform, gender, .. } => { let mut tags = Vec::new(); if let Some(platform) = platform.as_ref().filter(|value| !value.trim().is_empty()) { @@ -746,104 +700,6 @@ impl ArchivebateProvider { Ok(items) } - fn parse_mixin_packed_eval(html: &str) -> Option { - let eval_regex = Regex::new( - r#"(?s)eval\(function\(p,a,c,k,e,d\)\{.*?\}\('(?P.*?)',\s*(?P[0-9]+),\s*(?P[0-9]+),\s*'(?P.*?)'\.split\('\|'\)"#, - ) - .ok()?; - let captures = eval_regex.captures(html)?; - let payload_raw = captures.name("payload")?.as_str(); - let radix = captures.name("radix")?.as_str().parse::().ok()?; - let count = captures.name("count")?.as_str().parse::().ok()?; - if !(2..=36).contains(&radix) { - return None; - } - - let payload = Self::unescape_js_single_quoted(payload_raw); - let tokens_raw = captures.name("tokens")?.as_str(); - let tokens = tokens_raw.split('|').collect::>(); - let mut unpacked = payload; - - for index in (0..count).rev() { - let Some(token) = tokens.get(index) else { - continue; - }; - if token.is_empty() { - continue; - } - let key = Self::to_radix(index, radix); - let pattern = format!(r"\b{}\b", regex::escape(&key)); - let re = Regex::new(&pattern).ok()?; - unpacked = re.replace_all(&unpacked, *token).into_owned(); - } - - Some(unpacked) - } - - fn unescape_js_single_quoted(value: &str) -> String { - let mut output = String::with_capacity(value.len()); - let mut chars = value.chars(); - while let Some(character) = chars.next() { - if character != '\\' { - output.push(character); - continue; - } - let Some(next) = chars.next() else { - break; - }; - match next { - '\\' => output.push('\\'), - '\'' => output.push('\''), - '"' => output.push('"'), - 'n' => output.push('\n'), - 'r' => output.push('\r'), - 't' => output.push('\t'), - _ => output.push(next), - } - } - output - } - - fn to_radix(mut value: usize, radix: u32) -> String { - if value == 0 { - return "0".to_string(); - } - let alphabet = b"0123456789abcdefghijklmnopqrstuvwxyz"; - let mut out = Vec::new(); - while value > 0 { - let digit = value % radix as usize; - out.push(alphabet[digit] as char); - value /= radix as usize; - } - out.iter().rev().collect() - } - - fn extract_mixdrop_media_url(html: &str) -> Option { - let direct_regex = Regex::new(r#"MDCore\.wurl\s*=\s*"([^"]+)""#).ok()?; - if let Some(url) = direct_regex - .captures(html) - .and_then(|captures| captures.get(1).map(|value| value.as_str().to_string())) - { - return Some(Self::normalize_possible_protocol_relative(&url)); - } - - let unpacked = Self::parse_mixin_packed_eval(html)?; - let unpacked_regex = Regex::new(r#"MDCore\.wurl\s*=\s*"([^"]+)""#).ok()?; - unpacked_regex - .captures(&unpacked) - .and_then(|captures| captures.get(1).map(|value| value.as_str().to_string())) - .map(|value| Self::normalize_possible_protocol_relative(&value)) - } - - fn normalize_possible_protocol_relative(value: &str) -> String { - let trimmed = value.trim(); - if trimmed.starts_with("//") { - format!("https:{trimmed}") - } else { - trimmed.to_string() - } - } - fn host_from_url(url: &str) -> Option { let parsed = url::Url::parse(url).ok()?; parsed.host_str().map(|value| value.to_ascii_lowercase()) @@ -931,25 +787,6 @@ impl ArchivebateProvider { format } - fn first_video_source_from_html(html: &str) -> Option { - let document = Html::parse_document(html); - let source_selector = Selector::parse("video source[src]").ok()?; - let video_src_selector = Selector::parse("video[src]").ok()?; - - if let Some(value) = document - .select(&source_selector) - .next() - .and_then(|node| node.value().attr("src")) - { - return Some(value.to_string()); - } - document - .select(&video_src_selector) - .next() - .and_then(|node| node.value().attr("src")) - .map(|value| value.to_string()) - } - fn first_iframe_source_from_html(html: &str) -> Option { let document = Html::parse_document(html); let iframe_selector = Selector::parse("iframe[src]").ok()?; @@ -960,32 +797,6 @@ impl ArchivebateProvider { .map(|value| value.to_string()) } - async fn resolve_mixdrop_media_from_iframe( - &self, - iframe_url: &str, - referer: &str, - options: &ServerOptions, - ) -> Option { - let mut requester = requester_or_default(options, CHANNEL_ID, "resolve_mixdrop_media"); - let response = requester - .get_raw_with_headers_timeout( - iframe_url, - self.html_headers(referer), - Some(StdDuration::from_secs(6)), - ) - .await - .ok()?; - if !response.status().is_success() { - return None; - } - let iframe_html = response.text().await.ok()?; - let media_url = Self::extract_mixdrop_media_url(&iframe_html)?; - Some(ResolvedMixdropMedia { - media_url, - embed_url: iframe_url.to_string(), - }) - } - async fn enrich_video(&self, item: VideoItem, options: &ServerOptions) -> VideoItem { let page_url = item.url.clone(); let format_url = Self::proxied_video(options, &page_url); @@ -1405,23 +1216,3 @@ impl Provider for ArchivebateProvider { Some(self.build_channel(clientversion)) } } - -#[cfg(test)] -mod tests { - use super::ArchivebateProvider; - - #[test] - fn extracts_mixdrop_wurl_from_packed_eval() { - let html = r#" - -"#; - let actual = ArchivebateProvider::extract_mixdrop_media_url(html) - .expect("expected mixdrop media url"); - assert_eq!( - actual, - "https://o230m5y6z.mxcontent.net/v2/r6pkwozjber741.mp4?s=TvNTJe3_z_6nKveumEHk8Q&e=1776460168" - ); - } -} diff --git a/src/providers/tikporn.rs b/src/providers/tikporn.rs new file mode 100644 index 0000000..0066aa3 --- /dev/null +++ b/src/providers/tikporn.rs @@ -0,0 +1,892 @@ +use crate::DbPool; +use crate::api::ClientVersion; +use crate::providers::{Provider, report_provider_error, requester_or_default}; +use crate::status::*; +use crate::util::cache::VideoCache; +use crate::videos::{ServerOptions, VideoFormat, VideoItem}; + +use async_trait::async_trait; +use chrono::{DateTime, NaiveDateTime, Utc}; +use error_chain::error_chain; +use htmlentity::entity::{ICodedDataTrait, decode}; +use percent_encoding::{NON_ALPHANUMERIC, utf8_percent_encode}; +use regex::Regex; +use serde::Deserialize; +use std::sync::{Arc, RwLock}; + +pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata = + crate::providers::ProviderChannelMetadata { + group_id: "tiktok", + tags: &["shorts", "vertical", "tiktok"], + }; + +const BASE_URL: &str = "https://tik.porn"; +const API_BASE: &str = "https://apiv2.tik.porn"; +const CHANNEL_ID: &str = "tikporn"; +const BROWSER_UA: &str = + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0 Safari/537.36"; +// apiv2 listing endpoints return ~10 rows per call regardless of `limit`; step the +// offset by this so pages stay contiguous (no skipped or duplicated rows). +const FEED_PAGE_SIZE: u32 = 10; + +error_chain! { + foreign_links { + Io(std::io::Error); + HttpRequest(wreq::Error); + Json(serde_json::Error); + Regex(regex::Error); + } + errors { + Parse(msg: String) { + description("parse error") + display("parse error: {}", msg) + } + } +} + +#[derive(Debug, Clone)] +pub struct TikpornProvider { + url: String, + build_id: Arc>>, +} + +#[derive(Debug, Clone)] +enum Target { + /// Default browse feed, served from the apiv2 JSON API (paginates by offset). + Feed { kind: FeedKind }, + /// Keyword search, served from the Next.js `/?s=` SSR payload. + Search { query: String }, + /// Tag/action/model archive, served from the Next.js SSR payload. + Archive { path: String }, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum FeedKind { + /// Newest uploads first (`/getrecentvideos`); deeper pages fall back to popular + /// because the recent endpoint is a live feed and does not honor `offset`. + Newest, + /// Most popular shorts (`/videos/popular`); honors `offset` for clean paging. + Popular, +} + +// ---- apiv2 listing rows (https://apiv2.tik.porn/{getrecentvideos,videos/popular}) ---- + +#[derive(Debug, Deserialize, Default)] +struct ApiResponse { + #[serde(default)] + data: Vec, +} + +#[derive(Debug, Deserialize, Default)] +struct ApiVideo { + #[serde(default)] + video_id: u64, + #[serde(default, deserialize_with = "de_u32")] + duration: u32, + #[serde(default, deserialize_with = "de_u32")] + view_count: u32, + #[serde(default)] + published: String, + #[serde(default)] + video_date: String, + #[serde(default)] + action_name: String, + #[serde(default)] + producer_name: Option, + #[serde(default)] + producer_slug: Option, + #[serde(default)] + pornstars: Vec, + #[serde(default)] + tags: Vec, + #[serde(default)] + thumbnail_url: String, + #[serde(default)] + medium_thumb: String, + #[serde(default)] + small_thumb: String, + #[serde(default)] + poster_url: String, + #[serde(default)] + hls_url: Option, + #[serde(default)] + mp4_url: Option, + #[serde(default)] + video_text: ApiVideoText, +} + +#[derive(Debug, Deserialize, Default)] +struct ApiEntity { + #[serde(default)] + name: String, + #[serde(default)] + slug: String, +} + +#[derive(Debug, Deserialize, Default)] +struct ApiVideoText { + #[serde(default)] + meta_title: ApiLocalizedText, +} + +#[derive(Debug, Deserialize, Default)] +struct ApiLocalizedText { + #[serde(default)] + default: ApiTextValue, +} + +#[derive(Debug, Deserialize, Default)] +struct ApiTextValue { + #[serde(default)] + text: String, +} + +/// Tik.Porn returns sentinel `-1`/`null` values and occasional stringly-typed numbers +/// in integer fields. A single such value would otherwise abort the whole list parse, +/// so coerce anything unexpected to `0`. +fn de_u32<'de, D>(deserializer: D) -> std::result::Result +where + D: serde::Deserializer<'de>, +{ + Ok(match serde_json::Value::deserialize(deserializer)? { + serde_json::Value::Number(number) => number + .as_u64() + .or_else(|| number.as_i64().filter(|value| *value >= 0).map(|v| v as u64)) + .map(|value| value.min(u64::from(u32::MAX)) as u32) + .unwrap_or(0), + serde_json::Value::String(text) => text.trim().parse().unwrap_or(0), + _ => 0, + }) +} + +#[derive(Debug, Deserialize, Clone, Default)] +struct NextData { + #[serde(default, rename = "buildId")] + build_id: String, + #[serde(default)] + props: NextProps, +} + +#[derive(Debug, Deserialize, Clone, Default)] +struct NextProps { + #[serde(default, rename = "pageProps")] + page_props: PageProps, +} + +#[derive(Debug, Deserialize, Clone, Default)] +struct PageProps { + #[serde(default)] + videos: Option, + #[serde(default, rename = "initialVideoResults")] + initial_video_results: Option, +} + +#[derive(Debug, Deserialize, Clone, Default)] +struct VideoList { + #[serde(default)] + data: Vec, + #[serde(default)] + pagination: Pagination, +} + +#[derive(Debug, Deserialize, Clone, Default)] +struct Pagination { + #[serde(default, rename = "hasMore")] + has_more: bool, +} + +#[derive(Debug, Deserialize, Clone, Default)] +struct TikVideo { + #[serde(default)] + id: u64, + #[serde(default, rename = "basePath")] + base_path: String, + #[serde(default)] + texts: Texts, + #[serde(default)] + action: Option, + #[serde(default)] + producer: Option, + #[serde(default)] + pornstars: Vec, + #[serde(default)] + user: Option, + #[serde(default)] + creator: Vec, + #[serde(default)] + tags: Vec, + #[serde(default)] + source: Option, + #[serde(default)] + sources: Vec, + #[serde(default)] + poster: String, + #[serde(default)] + thumbnails: Thumbnails, + #[serde(default, deserialize_with = "de_u32")] + duration: u32, + #[serde(default, deserialize_with = "de_u32")] + likes: u32, + #[serde(default, deserialize_with = "de_u32")] + views: u32, + #[serde(default)] + metadata: Metadata, +} + +#[derive(Debug, Deserialize, Clone, Default)] +struct Texts { + #[serde(default)] + video: TextValue, +} + +#[derive(Debug, Deserialize, Clone, Default)] +struct TextValue { + #[serde(default)] + text: String, + #[serde(default)] + parsed_text: String, +} + +#[derive(Debug, Deserialize, Clone, Default)] +struct Entity { + #[serde(default)] + id: u64, + #[serde(default)] + name: String, + #[serde(default)] + slug: String, + #[serde(default, rename = "basePath")] + base_path: String, +} + +#[derive(Debug, Deserialize, Clone, Default)] +struct Source { + #[serde(default)] + src: String, + #[serde(default, rename = "type")] + mime_type: String, +} + +#[derive(Debug, Deserialize, Clone, Default)] +struct Thumbnails { + #[serde(default)] + sm: String, + #[serde(default)] + md: String, +} + +#[derive(Debug, Deserialize, Clone, Default)] +struct Metadata { + #[serde(default)] + title: String, + #[serde(default)] + thumbnail: String, + #[serde(default, rename = "uploadDate")] + upload_date: String, +} + +impl TikpornProvider { + pub fn new() -> Self { + Self { + url: BASE_URL.to_string(), + build_id: Arc::new(RwLock::new(None)), + } + } + + fn build_channel(&self, _clientversion: ClientVersion) -> Channel { + Channel { + id: CHANNEL_ID.to_string(), + name: "Tik Porn".to_string(), + description: "Tik.Porn short-form vertical videos with search, tag/action/model shortcuts, and direct CDN formats.".to_string(), + premium: false, + favicon: "https://tik.porn/favicon.ico".to_string(), + status: "active".to_string(), + categories: vec![ + "Shorts".to_string(), + "Vertical".to_string(), + "TikTok-style".to_string(), + ], + options: vec![ChannelOption { + id: "sort".to_string(), + title: "Sort".to_string(), + description: "Newest uploads or most popular shorts. Search and tag:/action:/model: shortcuts are also supported.".to_string(), + systemImage: "sparkles.tv".to_string(), + colorName: "pink".to_string(), + options: vec![ + FilterOption { + id: "new".to_string(), + title: "Newest".to_string(), + }, + FilterOption { + id: "trending".to_string(), + title: "Trending".to_string(), + }, + ], + multiSelect: false, + }], + nsfw: true, + cacheDuration: Some(900), + } + } + + fn resolve_target(query: Option<&str>, sort: &str) -> Target { + let Some(raw_query) = query.map(str::trim).filter(|value| !value.is_empty()) else { + return Target::Feed { + kind: Self::feed_kind(sort), + }; + }; + + if let Some(path) = Self::prefixed_archive(raw_query) { + return Target::Archive { path }; + } + + Target::Search { + query: raw_query.to_string(), + } + } + + fn feed_kind(sort: &str) -> FeedKind { + // The server defaults `sort` to "date" when the client omits it. + match sort.trim().to_ascii_lowercase().as_str() { + "trending" | "popular" | "hot" | "best" | "views" | "most-viewed" => FeedKind::Popular, + _ => FeedKind::Newest, + } + } + + fn prefixed_archive(query: &str) -> Option { + let (prefix, value) = query.split_once(':')?; + let value = value.trim(); + if value.is_empty() { + return None; + } + + match prefix.trim().to_ascii_lowercase().as_str() { + "tag" => Some(format!("/tag/{}", Self::slugify(value))), + "action" | "category" | "cat" => Some(format!("/action/{}", Self::slugify(value))), + "model" | "pornstar" | "creator" | "uploader" | "studio" | "site" => { + Some(format!("/{}", Self::slugify(value))) + } + _ => None, + } + } + + fn slugify(value: &str) -> String { + value + .trim() + .trim_matches('/') + .to_ascii_lowercase() + .chars() + .map(|ch| { + if ch.is_ascii_alphanumeric() { + ch + } else { + '-' + } + }) + .collect::() + .split('-') + .filter(|part| !part.is_empty()) + .collect::>() + .join("-") + } + + fn page_url(&self, target: &Target, page: u8) -> String { + match target { + // Feed targets are served from the apiv2 API, not the SSR page URL. + Target::Feed { .. } => format!("{}/", self.url), + Target::Search { query } => { + let encoded = utf8_percent_encode(query, NON_ALPHANUMERIC).to_string(); + if page <= 1 { + format!("{}/?s={encoded}", self.url) + } else { + format!("{}/?s={encoded}&page={page}", self.url) + } + } + Target::Archive { path } => { + let path = path.trim_start_matches('/'); + if page <= 1 { + format!("{}/{path}", self.url) + } else { + format!("{}/{path}?page={page}", self.url) + } + } + } + } + + async fn fetch_page_props( + &self, + requester: &mut crate::util::requester::Requester, + target: &Target, + page: u8, + ) -> Result { + if let Target::Search { query } = target { + if let Some(build_id) = self.current_build_id() { + let encoded = utf8_percent_encode(query, NON_ALPHANUMERIC).to_string(); + let data_url = if page <= 1 { + format!("{}/_next/data/{build_id}/search.json?s={encoded}", self.url) + } else { + format!( + "{}/_next/data/{build_id}/search.json?s={encoded}&page={page}", + self.url + ) + }; + if let Ok(body) = Self::fetch_text(requester, &data_url, &self.url).await { + if let Ok(next) = serde_json::from_str::(&body) { + return Ok(next.page_props); + } + } + } + } + + let page_url = self.page_url(target, page); + let body = Self::fetch_text(requester, &page_url, &self.url).await?; + let next = Self::extract_next_data(&body)?; + if !next.build_id.is_empty() { + self.set_build_id(next.build_id); + } + Ok(next.props.page_props) + } + + async fn fetch_text( + requester: &mut crate::util::requester::Requester, + url: &str, + referer: &str, + ) -> Result { + requester + .get_with_headers( + url, + vec![ + ("Referer".to_string(), referer.to_string()), + ("User-Agent".to_string(), BROWSER_UA.to_string()), + ( + "Accept".to_string(), + "text/html,application/json;q=0.9,*/*;q=0.8".to_string(), + ), + ], + None, + ) + .await + .map_err(|error| Error::from(ErrorKind::Parse(error.to_string()))) + } + + fn extract_next_data(html: &str) -> Result { + let regex = Regex::new( + r#""#, + )?; + let captures = regex.captures(html).ok_or_else(|| { + Error::from(ErrorKind::Parse( + "__NEXT_DATA__ script not found".to_string(), + )) + })?; + let raw = captures.get(1).map(|value| value.as_str()).unwrap_or(""); + serde_json::from_str::(raw).map_err(Error::from) + } + + fn current_build_id(&self) -> Option { + self.build_id.read().ok().and_then(|value| value.clone()) + } + + fn set_build_id(&self, build_id: String) { + if let Ok(mut stored) = self.build_id.write() { + *stored = Some(build_id); + } + } + + fn collect_videos(target: &Target, props: PageProps) -> (Vec, bool) { + match target { + // Feed targets never reach the SSR collector. + Target::Feed { .. } => (Vec::new(), false), + Target::Search { .. } => props + .initial_video_results + .map(|list| (Self::dedupe(list.data), list.pagination.has_more)) + .unwrap_or_default(), + Target::Archive { .. } => props + .videos + .map(|list| (Self::dedupe(list.data), list.pagination.has_more)) + .unwrap_or_default(), + } + } + + fn dedupe(items: Vec) -> Vec { + let mut seen = std::collections::HashSet::new(); + items + .into_iter() + .filter(|item| item.id != 0 && seen.insert(item.id)) + .collect() + } + + fn video_to_item(video: TikVideo) -> Option { + if video.id == 0 { + return None; + } + + let page_path = if video.base_path.trim().is_empty() { + format!("/video/{}", video.id) + } else { + video.base_path.clone() + }; + let page_url = Self::absolute_url(&page_path); + let title = Self::best_title(&video); + let thumb = Self::best_thumb(&video); + if title.is_empty() || thumb.is_empty() { + return None; + } + + let mut item = VideoItem::new( + video.id.to_string(), + title, + page_url.clone(), + CHANNEL_ID.to_string(), + thumb, + video.duration, + ); + + if video.views > 0 { + item.views = Some(video.views); + } + if video.likes > 0 { + item.rating = Some(100.0); + } + if let Some(uploaded_at) = Self::parse_uploaded_at(&video.metadata.upload_date) { + item.uploadedAt = Some(uploaded_at); + } + + let uploader = Self::best_uploader(&video); + if let Some(entity) = uploader { + item.uploader = Some(entity.name.clone()); + item.uploaderUrl = Some(Self::absolute_url(&entity.base_path)); + item.uploaderId = Some(format!( + "{CHANNEL_ID}:{}:{}", + Self::entity_kind(&entity.base_path), + if entity.id > 0 { + entity.id.to_string() + } else { + entity.slug.clone() + } + )); + } + + let mut tags = Vec::new(); + if let Some(action) = &video.action { + if !action.name.trim().is_empty() { + tags.push(action.name.trim().to_string()); + } + } + for entity in video.tags.iter().chain(video.pornstars.iter()) { + if !entity.name.trim().is_empty() { + tags.push(entity.name.trim().to_string()); + } + } + tags.sort(); + tags.dedup(); + if !tags.is_empty() { + item.tags = Some(tags); + } + + let formats = Self::formats(&video, &page_url); + if !formats.is_empty() { + item.formats = Some(formats); + } + + if let Some(source) = &video.source { + if source.mime_type.contains("video/") && !source.src.is_empty() { + item.preview = Some(source.src.clone()); + } + } + + Some(item) + } + + fn best_title(video: &TikVideo) -> String { + let raw = if !video.metadata.title.trim().is_empty() { + video.metadata.title.as_str() + } else if !video.texts.video.parsed_text.trim().is_empty() { + video.texts.video.parsed_text.as_str() + } else { + video.texts.video.text.as_str() + }; + + let title = decode(raw.as_bytes()) + .to_string() + .unwrap_or_else(|_| raw.to_string()) + .replace(" | Tik.Porn", "") + .replace("{{", "") + .replace("}}", ""); + title.split_whitespace().collect::>().join(" ") + } + + fn best_thumb(video: &TikVideo) -> String { + if !video.metadata.thumbnail.trim().is_empty() { + return video.metadata.thumbnail.clone(); + } + if !video.thumbnails.md.trim().is_empty() { + return video.thumbnails.md.clone(); + } + if !video.thumbnails.sm.trim().is_empty() { + return video.thumbnails.sm.clone(); + } + video.poster.clone() + } + + fn best_uploader(video: &TikVideo) -> Option { + video + .creator + .iter() + .chain(video.pornstars.iter()) + .chain(video.producer.iter()) + .chain(video.user.iter()) + .find(|entity| !entity.name.trim().is_empty() && !entity.base_path.trim().is_empty()) + .cloned() + } + + fn entity_kind(path: &str) -> &'static str { + if path.starts_with("/tag/") { + "tag" + } else if path.starts_with("/action/") { + "action" + } else { + "profile" + } + } + + fn formats(video: &TikVideo, referer: &str) -> Vec { + let mut formats = Vec::new(); + for source in video.sources.iter().chain(video.source.iter()) { + let url = source.src.trim(); + if url.is_empty() { + continue; + } + let mut format = if source.mime_type.contains("mpegURL") || url.contains(".m3u8") { + VideoFormat::m3u8(url.to_string(), "hls".to_string(), "m3u8".to_string()) + } else if source.mime_type.contains("mp4") || url.contains(".mp4") { + VideoFormat::new(url.to_string(), "direct".to_string(), "mp4".to_string()) + } else { + continue; + }; + format.add_http_header("Referer".to_string(), referer.to_string()); + format.add_http_header("User-Agent".to_string(), BROWSER_UA.to_string()); + formats.push(format); + } + formats + } + + fn parse_uploaded_at(value: &str) -> Option { + DateTime::parse_from_rfc3339(value) + .ok() + .map(|date| date.with_timezone(&Utc).timestamp()) + .and_then(|timestamp| u64::try_from(timestamp).ok()) + } + + fn absolute_url(value: &str) -> String { + if value.starts_with("http://") || value.starts_with("https://") { + value.to_string() + } else { + format!("{BASE_URL}/{}", value.trim_start_matches('/')) + } + } + + /// Search/archive targets are served by the Next.js SSR `__NEXT_DATA__` payload. + async fn fetch_ssr( + &self, + requester: &mut crate::util::requester::Requester, + target: &Target, + page: u8, + ) -> Result> { + let props = self.fetch_page_props(requester, target, page).await?; + let (videos, _has_more) = Self::collect_videos(target, props); + Ok(videos.into_iter().filter_map(Self::video_to_item).collect()) + } + + /// The default browse feed is served by the apiv2 JSON API, which returns fully + /// hydrated rows (signed HLS/MP4, thumbnails, title, tags, pornstars). + async fn fetch_feed( + &self, + requester: &mut crate::util::requester::Requester, + kind: FeedKind, + page: u8, + ) -> Result> { + // `/getrecentvideos` is a live feed that ignores `offset`, so only page 1 uses + // it; deeper "newest" pages continue through the popular feed, which paginates. + let (path, offset) = match (kind, page) { + (FeedKind::Newest, 1) => ("getrecentvideos", 0), + (FeedKind::Newest, p) => ("videos/popular", u32::from(p - 2) * FEED_PAGE_SIZE), + (FeedKind::Popular, p) => ("videos/popular", u32::from(p - 1) * FEED_PAGE_SIZE), + }; + let url = format!("{API_BASE}/{path}?limit={FEED_PAGE_SIZE}&offset={offset}"); + let body = Self::fetch_text(requester, &url, &format!("{}/", self.url)).await?; + let response = serde_json::from_str::(&body)?; + Ok(response + .data + .into_iter() + .filter_map(Self::api_video_to_item) + .collect()) + } + + fn api_video_to_item(video: ApiVideo) -> Option { + if video.video_id == 0 { + return None; + } + + let page_url = format!("{BASE_URL}/video/{}", video.video_id); + let title = { + let cleaned = Self::clean_title(&video.video_text.meta_title.default.text); + if cleaned.is_empty() { + video.action_name.trim().to_string() + } else { + cleaned + } + }; + let thumb = [ + &video.medium_thumb, + &video.small_thumb, + &video.thumbnail_url, + &video.poster_url, + ] + .into_iter() + .map(|value| value.trim()) + .find(|value| !value.is_empty()) + .unwrap_or("") + .to_string(); + if title.is_empty() || thumb.is_empty() { + return None; + } + + let mut item = VideoItem::new( + video.video_id.to_string(), + title, + page_url, + CHANNEL_ID.to_string(), + thumb, + video.duration, + ); + + if video.view_count > 0 { + item.views = Some(video.view_count); + } + + let uploaded = if video.published.trim().is_empty() { + video.video_date.as_str() + } else { + video.published.as_str() + }; + if let Some(timestamp) = Self::parse_api_date(uploaded) { + item.uploadedAt = Some(timestamp); + } + + // Uploader: prefer the studio/producer, then the first named pornstar. + let uploader = match (video.producer_name.as_deref(), video.producer_slug.as_deref()) { + (Some(name), Some(slug)) if !name.trim().is_empty() && !slug.trim().is_empty() => { + Some((name.trim().to_string(), slug.trim().to_string())) + } + _ => video + .pornstars + .iter() + .find(|entity| !entity.name.trim().is_empty() && !entity.slug.trim().is_empty()) + .map(|entity| (entity.name.trim().to_string(), entity.slug.trim().to_string())), + }; + if let Some((name, slug)) = uploader { + item.uploader = Some(name); + item.uploaderUrl = Some(format!("{BASE_URL}/{slug}")); + item.uploaderId = Some(format!("{CHANNEL_ID}:profile:{slug}")); + } + + let mut tags = Vec::new(); + if !video.action_name.trim().is_empty() { + tags.push(video.action_name.trim().to_string()); + } + for entity in video.tags.iter().chain(video.pornstars.iter()) { + if !entity.name.trim().is_empty() { + tags.push(entity.name.trim().to_string()); + } + } + tags.sort(); + tags.dedup(); + if !tags.is_empty() { + item.tags = Some(tags); + } + + let referer = format!("{BASE_URL}/"); + let mut formats = Vec::new(); + if let Some(hls) = video + .hls_url + .as_deref() + .map(str::trim) + .filter(|value| !value.is_empty()) + { + let mut format = + VideoFormat::m3u8(hls.to_string(), "hls".to_string(), "m3u8".to_string()); + format.add_http_header("Referer".to_string(), referer.clone()); + format.add_http_header("User-Agent".to_string(), BROWSER_UA.to_string()); + formats.push(format); + } + if let Some(mp4) = video + .mp4_url + .as_deref() + .map(str::trim) + .filter(|value| !value.is_empty()) + { + let mut format = + VideoFormat::new(mp4.to_string(), "direct".to_string(), "mp4".to_string()); + format.add_http_header("Referer".to_string(), referer); + format.add_http_header("User-Agent".to_string(), BROWSER_UA.to_string()); + formats.push(format); + } + if !formats.is_empty() { + item.formats = Some(formats); + } + + Some(item) + } + + fn clean_title(raw: &str) -> String { + let trimmed = raw.trim(); + let decoded = decode(trimmed.as_bytes()) + .to_string() + .unwrap_or_else(|_| trimmed.to_string()); + decoded + .replace(" | Tik.Porn", "") + .split_whitespace() + .collect::>() + .join(" ") + } + + fn parse_api_date(value: &str) -> Option { + NaiveDateTime::parse_from_str(value.trim(), "%Y-%m-%d %H:%M:%S") + .ok() + .map(|naive| naive.and_utc().timestamp()) + .and_then(|timestamp| u64::try_from(timestamp).ok()) + } +} + +#[async_trait] +impl Provider for TikpornProvider { + async fn get_videos( + &self, + _cache: VideoCache, + _pool: DbPool, + sort: String, + query: Option, + page: String, + _per_page: String, + options: ServerOptions, + ) -> Vec { + let page = page.parse::().unwrap_or(1).max(1); + let target = Self::resolve_target(query.as_deref(), &sort); + let mut requester = requester_or_default(&options, CHANNEL_ID, "get_videos"); + let result = match &target { + Target::Feed { kind } => self.fetch_feed(&mut requester, *kind, page).await, + other => self.fetch_ssr(&mut requester, other, page).await, + }; + match result { + Ok(items) => items, + Err(error) => { + report_provider_error(CHANNEL_ID, "get_videos", &error.to_string()).await; + Vec::new() + } + } + } + + fn get_channel(&self, clientversion: ClientVersion) -> Option { + Some(self.build_channel(clientversion)) + } +}