This commit is contained in:
Simon
2026-06-18 10:10:26 +00:00
parent c19e3ebbea
commit 751fc7765a
3 changed files with 903 additions and 215 deletions

View File

@@ -41,6 +41,11 @@ const PROVIDERS: &[ProviderDef] = &[
module: "pornhub_shorties", module: "pornhub_shorties",
ty: "PornhubShortiesProvider", ty: "PornhubShortiesProvider",
}, },
ProviderDef {
id: "tikporn",
module: "tikporn",
ty: "TikpornProvider",
},
ProviderDef { ProviderDef {
id: "youporn", id: "youporn",
module: "youporn", module: "youporn",

View File

@@ -64,12 +64,10 @@ const PLATFORM_SHORTCUTS: &[(&str, &str)] = &[
("tiktok", "dGlrdG9r"), ("tiktok", "dGlrdG9r"),
]; ];
const GENDER_SHORTCUTS: &[(&str, &str)] = &[ // NOTE: archivebate.com also exposes `/gender/<base64>` routes in its nav, but the
("female", "ZmVtYWxl"), // Livewire `filter.platform` component always responds with a redirect to the home
("couple", "Y291cGxl"), // page for gender values (server-side bug on the site), so gender browsing returns
("male", "bWFsZQ=="), // no videos. Gender shortcuts are intentionally not exposed here.
("trans", "dHJhbnM="),
];
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct ArchivebateProvider { pub struct ArchivebateProvider {
@@ -85,10 +83,6 @@ enum Target {
id: String, id: String,
title: String, title: String,
}, },
Gender {
id: String,
title: String,
},
Profile { Profile {
username: String, username: String,
platform: Option<String>, platform: Option<String>,
@@ -120,12 +114,6 @@ struct LivewireInitialData {
server_memo_json: String, server_memo_json: String,
} }
#[derive(Debug, Clone)]
struct ResolvedMixdropMedia {
media_url: String,
embed_url: String,
}
impl ArchivebateProvider { impl ArchivebateProvider {
pub fn new() -> Self { pub fn new() -> Self {
let provider = Self { let provider = Self {
@@ -191,13 +179,6 @@ impl ArchivebateProvider {
title: "All".to_string(), title: "All".to_string(),
}]; }];
for (title, id) in GENDER_SHORTCUTS {
filters.push(FilterOption {
id: format!("gender:{id}"),
title: format!("Gender: {}", Self::title_case(title)),
});
}
for (title, id) in PLATFORM_SHORTCUTS { for (title, id) in PLATFORM_SHORTCUTS {
filters.push(FilterOption { filters.push(FilterOption {
id: format!("platform:{id}"), id: format!("platform:{id}"),
@@ -223,7 +204,7 @@ impl ArchivebateProvider {
Channel { Channel {
id: CHANNEL_ID.to_string(), id: CHANNEL_ID.to_string(),
name: "ArchiveBate".to_string(), name: "ArchiveBate".to_string(),
description: "ArchiveBate cam recording feeds with platform/gender/profile shortcuts and yt-dlp compatible watch URLs.".to_string(), description: "ArchiveBate cam recording feeds with platform and profile shortcuts and yt-dlp compatible watch URLs.".to_string(),
premium: false, premium: false,
favicon: "https://www.google.com/s2/favicons?sz=64&domain=archivebate.com".to_string(), favicon: "https://www.google.com/s2/favicons?sz=64&domain=archivebate.com".to_string(),
status: "work in progress".to_string(), status: "work in progress".to_string(),
@@ -244,7 +225,7 @@ impl ArchivebateProvider {
ChannelOption { ChannelOption {
id: "filter".to_string(), id: "filter".to_string(),
title: "Shortcuts".to_string(), title: "Shortcuts".to_string(),
description: "Direct platform and gender archive routes.".to_string(), description: "Direct platform archive routes.".to_string(),
systemImage: "tag.fill".to_string(), systemImage: "tag.fill".to_string(),
colorName: "green".to_string(), colorName: "green".to_string(),
options: filters, options: filters,
@@ -365,13 +346,6 @@ impl ArchivebateProvider {
format!("{}/platform/{id}?page={page}", self.url) format!("{}/platform/{id}?page={page}", self.url)
} }
} }
Target::Gender { id, .. } => {
if page <= 1 {
format!("{}/gender/{id}", self.url)
} else {
format!("{}/gender/{id}?page={page}", self.url)
}
}
Target::Profile { username, .. } => { Target::Profile { username, .. } => {
if page <= 1 { if page <= 1 {
format!("{}/profile/{username}", self.url) format!("{}/profile/{username}", self.url)
@@ -435,15 +409,6 @@ impl ArchivebateProvider {
} }
} }
if let Some(id) = trimmed.strip_prefix("gender:") {
if let Some((name, _)) = GENDER_SHORTCUTS.iter().find(|(_, known_id)| *known_id == id) {
return Some(Target::Gender {
id: id.to_string(),
title: Self::title_case(name),
});
}
}
let normalized = Self::normalize_title(trimmed); let normalized = Self::normalize_title(trimmed);
if let Some((name, id)) = PLATFORM_SHORTCUTS if let Some((name, id)) = PLATFORM_SHORTCUTS
.iter() .iter()
@@ -455,16 +420,6 @@ impl ArchivebateProvider {
}); });
} }
if let Some((name, id)) = GENDER_SHORTCUTS
.iter()
.find(|(name, _)| Self::normalize_title(name) == normalized)
{
return Some(Target::Gender {
id: (*id).to_string(),
title: Self::title_case(name),
});
}
if let Some(option) = self.find_uploader_option(trimmed) { if let Some(option) = self.find_uploader_option(trimmed) {
if let Some(username) = option.id.strip_prefix("profile:") { if let Some(username) = option.id.strip_prefix("profile:") {
return Some(Target::Profile { return Some(Target::Profile {
@@ -605,7 +560,6 @@ impl ArchivebateProvider {
fn extra_tags_for_target(target: &Target) -> Vec<String> { fn extra_tags_for_target(target: &Target) -> Vec<String> {
match target { match target {
Target::Platform { title, .. } => vec![title.clone()], Target::Platform { title, .. } => vec![title.clone()],
Target::Gender { title, .. } => vec![title.clone()],
Target::Profile { platform, gender, .. } => { Target::Profile { platform, gender, .. } => {
let mut tags = Vec::new(); let mut tags = Vec::new();
if let Some(platform) = platform.as_ref().filter(|value| !value.trim().is_empty()) { if let Some(platform) = platform.as_ref().filter(|value| !value.trim().is_empty()) {
@@ -746,104 +700,6 @@ impl ArchivebateProvider {
Ok(items) Ok(items)
} }
fn parse_mixin_packed_eval(html: &str) -> Option<String> {
let eval_regex = Regex::new(
r#"(?s)eval\(function\(p,a,c,k,e,d\)\{.*?\}\('(?P<payload>.*?)',\s*(?P<radix>[0-9]+),\s*(?P<count>[0-9]+),\s*'(?P<tokens>.*?)'\.split\('\|'\)"#,
)
.ok()?;
let captures = eval_regex.captures(html)?;
let payload_raw = captures.name("payload")?.as_str();
let radix = captures.name("radix")?.as_str().parse::<u32>().ok()?;
let count = captures.name("count")?.as_str().parse::<usize>().ok()?;
if !(2..=36).contains(&radix) {
return None;
}
let payload = Self::unescape_js_single_quoted(payload_raw);
let tokens_raw = captures.name("tokens")?.as_str();
let tokens = tokens_raw.split('|').collect::<Vec<_>>();
let mut unpacked = payload;
for index in (0..count).rev() {
let Some(token) = tokens.get(index) else {
continue;
};
if token.is_empty() {
continue;
}
let key = Self::to_radix(index, radix);
let pattern = format!(r"\b{}\b", regex::escape(&key));
let re = Regex::new(&pattern).ok()?;
unpacked = re.replace_all(&unpacked, *token).into_owned();
}
Some(unpacked)
}
fn unescape_js_single_quoted(value: &str) -> String {
let mut output = String::with_capacity(value.len());
let mut chars = value.chars();
while let Some(character) = chars.next() {
if character != '\\' {
output.push(character);
continue;
}
let Some(next) = chars.next() else {
break;
};
match next {
'\\' => output.push('\\'),
'\'' => output.push('\''),
'"' => output.push('"'),
'n' => output.push('\n'),
'r' => output.push('\r'),
't' => output.push('\t'),
_ => output.push(next),
}
}
output
}
fn to_radix(mut value: usize, radix: u32) -> String {
if value == 0 {
return "0".to_string();
}
let alphabet = b"0123456789abcdefghijklmnopqrstuvwxyz";
let mut out = Vec::new();
while value > 0 {
let digit = value % radix as usize;
out.push(alphabet[digit] as char);
value /= radix as usize;
}
out.iter().rev().collect()
}
fn extract_mixdrop_media_url(html: &str) -> Option<String> {
let direct_regex = Regex::new(r#"MDCore\.wurl\s*=\s*"([^"]+)""#).ok()?;
if let Some(url) = direct_regex
.captures(html)
.and_then(|captures| captures.get(1).map(|value| value.as_str().to_string()))
{
return Some(Self::normalize_possible_protocol_relative(&url));
}
let unpacked = Self::parse_mixin_packed_eval(html)?;
let unpacked_regex = Regex::new(r#"MDCore\.wurl\s*=\s*"([^"]+)""#).ok()?;
unpacked_regex
.captures(&unpacked)
.and_then(|captures| captures.get(1).map(|value| value.as_str().to_string()))
.map(|value| Self::normalize_possible_protocol_relative(&value))
}
fn normalize_possible_protocol_relative(value: &str) -> String {
let trimmed = value.trim();
if trimmed.starts_with("//") {
format!("https:{trimmed}")
} else {
trimmed.to_string()
}
}
fn host_from_url(url: &str) -> Option<String> { fn host_from_url(url: &str) -> Option<String> {
let parsed = url::Url::parse(url).ok()?; let parsed = url::Url::parse(url).ok()?;
parsed.host_str().map(|value| value.to_ascii_lowercase()) parsed.host_str().map(|value| value.to_ascii_lowercase())
@@ -931,25 +787,6 @@ impl ArchivebateProvider {
format format
} }
fn first_video_source_from_html(html: &str) -> Option<String> {
let document = Html::parse_document(html);
let source_selector = Selector::parse("video source[src]").ok()?;
let video_src_selector = Selector::parse("video[src]").ok()?;
if let Some(value) = document
.select(&source_selector)
.next()
.and_then(|node| node.value().attr("src"))
{
return Some(value.to_string());
}
document
.select(&video_src_selector)
.next()
.and_then(|node| node.value().attr("src"))
.map(|value| value.to_string())
}
fn first_iframe_source_from_html(html: &str) -> Option<String> { fn first_iframe_source_from_html(html: &str) -> Option<String> {
let document = Html::parse_document(html); let document = Html::parse_document(html);
let iframe_selector = Selector::parse("iframe[src]").ok()?; let iframe_selector = Selector::parse("iframe[src]").ok()?;
@@ -960,32 +797,6 @@ impl ArchivebateProvider {
.map(|value| value.to_string()) .map(|value| value.to_string())
} }
async fn resolve_mixdrop_media_from_iframe(
&self,
iframe_url: &str,
referer: &str,
options: &ServerOptions,
) -> Option<ResolvedMixdropMedia> {
let mut requester = requester_or_default(options, CHANNEL_ID, "resolve_mixdrop_media");
let response = requester
.get_raw_with_headers_timeout(
iframe_url,
self.html_headers(referer),
Some(StdDuration::from_secs(6)),
)
.await
.ok()?;
if !response.status().is_success() {
return None;
}
let iframe_html = response.text().await.ok()?;
let media_url = Self::extract_mixdrop_media_url(&iframe_html)?;
Some(ResolvedMixdropMedia {
media_url,
embed_url: iframe_url.to_string(),
})
}
async fn enrich_video(&self, item: VideoItem, options: &ServerOptions) -> VideoItem { async fn enrich_video(&self, item: VideoItem, options: &ServerOptions) -> VideoItem {
let page_url = item.url.clone(); let page_url = item.url.clone();
let format_url = Self::proxied_video(options, &page_url); let format_url = Self::proxied_video(options, &page_url);
@@ -1405,23 +1216,3 @@ impl Provider for ArchivebateProvider {
Some(self.build_channel(clientversion)) Some(self.build_channel(clientversion))
} }
} }
#[cfg(test)]
mod tests {
use super::ArchivebateProvider;
#[test]
fn extracts_mixdrop_wurl_from_packed_eval() {
let html = r#"
<script>
eval(function(p,a,c,k,e,d){e=function(c){return c};if(!''.replace(/^/,String)){while(c--){d[c]=k[c]||c}k=[function(e){return d[e]}];e=function(){return'\\w+'};c=1};while(c--){if(k[c]){p=p.replace(new RegExp('\\b'+e(c)+'\\b','g'),k[c])}}return p}('1.2="//3.4.5/6/7.8?9=a&b=c";',13,13,'|MDCore|wurl|o230m5y6z|mxcontent|net|v2|r6pkwozjber741|mp4|s|TvNTJe3_z_6nKveumEHk8Q|e|1776460168'.split('|'),0,{}))
</script>
"#;
let actual = ArchivebateProvider::extract_mixdrop_media_url(html)
.expect("expected mixdrop media url");
assert_eq!(
actual,
"https://o230m5y6z.mxcontent.net/v2/r6pkwozjber741.mp4?s=TvNTJe3_z_6nKveumEHk8Q&e=1776460168"
);
}
}

892
src/providers/tikporn.rs Normal file
View File

@@ -0,0 +1,892 @@
use crate::DbPool;
use crate::api::ClientVersion;
use crate::providers::{Provider, report_provider_error, requester_or_default};
use crate::status::*;
use crate::util::cache::VideoCache;
use crate::videos::{ServerOptions, VideoFormat, VideoItem};
use async_trait::async_trait;
use chrono::{DateTime, NaiveDateTime, Utc};
use error_chain::error_chain;
use htmlentity::entity::{ICodedDataTrait, decode};
use percent_encoding::{NON_ALPHANUMERIC, utf8_percent_encode};
use regex::Regex;
use serde::Deserialize;
use std::sync::{Arc, RwLock};
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "tiktok",
tags: &["shorts", "vertical", "tiktok"],
};
const BASE_URL: &str = "https://tik.porn";
const API_BASE: &str = "https://apiv2.tik.porn";
const CHANNEL_ID: &str = "tikporn";
const BROWSER_UA: &str =
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0 Safari/537.36";
// apiv2 listing endpoints return ~10 rows per call regardless of `limit`; step the
// offset by this so pages stay contiguous (no skipped or duplicated rows).
const FEED_PAGE_SIZE: u32 = 10;
error_chain! {
foreign_links {
Io(std::io::Error);
HttpRequest(wreq::Error);
Json(serde_json::Error);
Regex(regex::Error);
}
errors {
Parse(msg: String) {
description("parse error")
display("parse error: {}", msg)
}
}
}
#[derive(Debug, Clone)]
pub struct TikpornProvider {
url: String,
build_id: Arc<RwLock<Option<String>>>,
}
#[derive(Debug, Clone)]
enum Target {
/// Default browse feed, served from the apiv2 JSON API (paginates by offset).
Feed { kind: FeedKind },
/// Keyword search, served from the Next.js `/?s=` SSR payload.
Search { query: String },
/// Tag/action/model archive, served from the Next.js SSR payload.
Archive { path: String },
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum FeedKind {
/// Newest uploads first (`/getrecentvideos`); deeper pages fall back to popular
/// because the recent endpoint is a live feed and does not honor `offset`.
Newest,
/// Most popular shorts (`/videos/popular`); honors `offset` for clean paging.
Popular,
}
// ---- apiv2 listing rows (https://apiv2.tik.porn/{getrecentvideos,videos/popular}) ----
#[derive(Debug, Deserialize, Default)]
struct ApiResponse {
#[serde(default)]
data: Vec<ApiVideo>,
}
#[derive(Debug, Deserialize, Default)]
struct ApiVideo {
#[serde(default)]
video_id: u64,
#[serde(default, deserialize_with = "de_u32")]
duration: u32,
#[serde(default, deserialize_with = "de_u32")]
view_count: u32,
#[serde(default)]
published: String,
#[serde(default)]
video_date: String,
#[serde(default)]
action_name: String,
#[serde(default)]
producer_name: Option<String>,
#[serde(default)]
producer_slug: Option<String>,
#[serde(default)]
pornstars: Vec<ApiEntity>,
#[serde(default)]
tags: Vec<ApiEntity>,
#[serde(default)]
thumbnail_url: String,
#[serde(default)]
medium_thumb: String,
#[serde(default)]
small_thumb: String,
#[serde(default)]
poster_url: String,
#[serde(default)]
hls_url: Option<String>,
#[serde(default)]
mp4_url: Option<String>,
#[serde(default)]
video_text: ApiVideoText,
}
#[derive(Debug, Deserialize, Default)]
struct ApiEntity {
#[serde(default)]
name: String,
#[serde(default)]
slug: String,
}
#[derive(Debug, Deserialize, Default)]
struct ApiVideoText {
#[serde(default)]
meta_title: ApiLocalizedText,
}
#[derive(Debug, Deserialize, Default)]
struct ApiLocalizedText {
#[serde(default)]
default: ApiTextValue,
}
#[derive(Debug, Deserialize, Default)]
struct ApiTextValue {
#[serde(default)]
text: String,
}
/// Tik.Porn returns sentinel `-1`/`null` values and occasional stringly-typed numbers
/// in integer fields. A single such value would otherwise abort the whole list parse,
/// so coerce anything unexpected to `0`.
fn de_u32<'de, D>(deserializer: D) -> std::result::Result<u32, D::Error>
where
D: serde::Deserializer<'de>,
{
Ok(match serde_json::Value::deserialize(deserializer)? {
serde_json::Value::Number(number) => number
.as_u64()
.or_else(|| number.as_i64().filter(|value| *value >= 0).map(|v| v as u64))
.map(|value| value.min(u64::from(u32::MAX)) as u32)
.unwrap_or(0),
serde_json::Value::String(text) => text.trim().parse().unwrap_or(0),
_ => 0,
})
}
#[derive(Debug, Deserialize, Clone, Default)]
struct NextData {
#[serde(default, rename = "buildId")]
build_id: String,
#[serde(default)]
props: NextProps,
}
#[derive(Debug, Deserialize, Clone, Default)]
struct NextProps {
#[serde(default, rename = "pageProps")]
page_props: PageProps,
}
#[derive(Debug, Deserialize, Clone, Default)]
struct PageProps {
#[serde(default)]
videos: Option<VideoList>,
#[serde(default, rename = "initialVideoResults")]
initial_video_results: Option<VideoList>,
}
#[derive(Debug, Deserialize, Clone, Default)]
struct VideoList {
#[serde(default)]
data: Vec<TikVideo>,
#[serde(default)]
pagination: Pagination,
}
#[derive(Debug, Deserialize, Clone, Default)]
struct Pagination {
#[serde(default, rename = "hasMore")]
has_more: bool,
}
#[derive(Debug, Deserialize, Clone, Default)]
struct TikVideo {
#[serde(default)]
id: u64,
#[serde(default, rename = "basePath")]
base_path: String,
#[serde(default)]
texts: Texts,
#[serde(default)]
action: Option<Entity>,
#[serde(default)]
producer: Option<Entity>,
#[serde(default)]
pornstars: Vec<Entity>,
#[serde(default)]
user: Option<Entity>,
#[serde(default)]
creator: Vec<Entity>,
#[serde(default)]
tags: Vec<Entity>,
#[serde(default)]
source: Option<Source>,
#[serde(default)]
sources: Vec<Source>,
#[serde(default)]
poster: String,
#[serde(default)]
thumbnails: Thumbnails,
#[serde(default, deserialize_with = "de_u32")]
duration: u32,
#[serde(default, deserialize_with = "de_u32")]
likes: u32,
#[serde(default, deserialize_with = "de_u32")]
views: u32,
#[serde(default)]
metadata: Metadata,
}
#[derive(Debug, Deserialize, Clone, Default)]
struct Texts {
#[serde(default)]
video: TextValue,
}
#[derive(Debug, Deserialize, Clone, Default)]
struct TextValue {
#[serde(default)]
text: String,
#[serde(default)]
parsed_text: String,
}
#[derive(Debug, Deserialize, Clone, Default)]
struct Entity {
#[serde(default)]
id: u64,
#[serde(default)]
name: String,
#[serde(default)]
slug: String,
#[serde(default, rename = "basePath")]
base_path: String,
}
#[derive(Debug, Deserialize, Clone, Default)]
struct Source {
#[serde(default)]
src: String,
#[serde(default, rename = "type")]
mime_type: String,
}
#[derive(Debug, Deserialize, Clone, Default)]
struct Thumbnails {
#[serde(default)]
sm: String,
#[serde(default)]
md: String,
}
#[derive(Debug, Deserialize, Clone, Default)]
struct Metadata {
#[serde(default)]
title: String,
#[serde(default)]
thumbnail: String,
#[serde(default, rename = "uploadDate")]
upload_date: String,
}
impl TikpornProvider {
pub fn new() -> Self {
Self {
url: BASE_URL.to_string(),
build_id: Arc::new(RwLock::new(None)),
}
}
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
Channel {
id: CHANNEL_ID.to_string(),
name: "Tik Porn".to_string(),
description: "Tik.Porn short-form vertical videos with search, tag/action/model shortcuts, and direct CDN formats.".to_string(),
premium: false,
favicon: "https://tik.porn/favicon.ico".to_string(),
status: "active".to_string(),
categories: vec![
"Shorts".to_string(),
"Vertical".to_string(),
"TikTok-style".to_string(),
],
options: vec![ChannelOption {
id: "sort".to_string(),
title: "Sort".to_string(),
description: "Newest uploads or most popular shorts. Search and tag:/action:/model: shortcuts are also supported.".to_string(),
systemImage: "sparkles.tv".to_string(),
colorName: "pink".to_string(),
options: vec![
FilterOption {
id: "new".to_string(),
title: "Newest".to_string(),
},
FilterOption {
id: "trending".to_string(),
title: "Trending".to_string(),
},
],
multiSelect: false,
}],
nsfw: true,
cacheDuration: Some(900),
}
}
fn resolve_target(query: Option<&str>, sort: &str) -> Target {
let Some(raw_query) = query.map(str::trim).filter(|value| !value.is_empty()) else {
return Target::Feed {
kind: Self::feed_kind(sort),
};
};
if let Some(path) = Self::prefixed_archive(raw_query) {
return Target::Archive { path };
}
Target::Search {
query: raw_query.to_string(),
}
}
fn feed_kind(sort: &str) -> FeedKind {
// The server defaults `sort` to "date" when the client omits it.
match sort.trim().to_ascii_lowercase().as_str() {
"trending" | "popular" | "hot" | "best" | "views" | "most-viewed" => FeedKind::Popular,
_ => FeedKind::Newest,
}
}
fn prefixed_archive(query: &str) -> Option<String> {
let (prefix, value) = query.split_once(':')?;
let value = value.trim();
if value.is_empty() {
return None;
}
match prefix.trim().to_ascii_lowercase().as_str() {
"tag" => Some(format!("/tag/{}", Self::slugify(value))),
"action" | "category" | "cat" => Some(format!("/action/{}", Self::slugify(value))),
"model" | "pornstar" | "creator" | "uploader" | "studio" | "site" => {
Some(format!("/{}", Self::slugify(value)))
}
_ => None,
}
}
fn slugify(value: &str) -> String {
value
.trim()
.trim_matches('/')
.to_ascii_lowercase()
.chars()
.map(|ch| {
if ch.is_ascii_alphanumeric() {
ch
} else {
'-'
}
})
.collect::<String>()
.split('-')
.filter(|part| !part.is_empty())
.collect::<Vec<_>>()
.join("-")
}
fn page_url(&self, target: &Target, page: u8) -> String {
match target {
// Feed targets are served from the apiv2 API, not the SSR page URL.
Target::Feed { .. } => format!("{}/", self.url),
Target::Search { query } => {
let encoded = utf8_percent_encode(query, NON_ALPHANUMERIC).to_string();
if page <= 1 {
format!("{}/?s={encoded}", self.url)
} else {
format!("{}/?s={encoded}&page={page}", self.url)
}
}
Target::Archive { path } => {
let path = path.trim_start_matches('/');
if page <= 1 {
format!("{}/{path}", self.url)
} else {
format!("{}/{path}?page={page}", self.url)
}
}
}
}
async fn fetch_page_props(
&self,
requester: &mut crate::util::requester::Requester,
target: &Target,
page: u8,
) -> Result<PageProps> {
if let Target::Search { query } = target {
if let Some(build_id) = self.current_build_id() {
let encoded = utf8_percent_encode(query, NON_ALPHANUMERIC).to_string();
let data_url = if page <= 1 {
format!("{}/_next/data/{build_id}/search.json?s={encoded}", self.url)
} else {
format!(
"{}/_next/data/{build_id}/search.json?s={encoded}&page={page}",
self.url
)
};
if let Ok(body) = Self::fetch_text(requester, &data_url, &self.url).await {
if let Ok(next) = serde_json::from_str::<NextProps>(&body) {
return Ok(next.page_props);
}
}
}
}
let page_url = self.page_url(target, page);
let body = Self::fetch_text(requester, &page_url, &self.url).await?;
let next = Self::extract_next_data(&body)?;
if !next.build_id.is_empty() {
self.set_build_id(next.build_id);
}
Ok(next.props.page_props)
}
async fn fetch_text(
requester: &mut crate::util::requester::Requester,
url: &str,
referer: &str,
) -> Result<String> {
requester
.get_with_headers(
url,
vec![
("Referer".to_string(), referer.to_string()),
("User-Agent".to_string(), BROWSER_UA.to_string()),
(
"Accept".to_string(),
"text/html,application/json;q=0.9,*/*;q=0.8".to_string(),
),
],
None,
)
.await
.map_err(|error| Error::from(ErrorKind::Parse(error.to_string())))
}
fn extract_next_data(html: &str) -> Result<NextData> {
let regex = Regex::new(
r#"<script id="__NEXT_DATA__" type="application/json">(?s:(.*?))</script>"#,
)?;
let captures = regex.captures(html).ok_or_else(|| {
Error::from(ErrorKind::Parse(
"__NEXT_DATA__ script not found".to_string(),
))
})?;
let raw = captures.get(1).map(|value| value.as_str()).unwrap_or("");
serde_json::from_str::<NextData>(raw).map_err(Error::from)
}
fn current_build_id(&self) -> Option<String> {
self.build_id.read().ok().and_then(|value| value.clone())
}
fn set_build_id(&self, build_id: String) {
if let Ok(mut stored) = self.build_id.write() {
*stored = Some(build_id);
}
}
fn collect_videos(target: &Target, props: PageProps) -> (Vec<TikVideo>, bool) {
match target {
// Feed targets never reach the SSR collector.
Target::Feed { .. } => (Vec::new(), false),
Target::Search { .. } => props
.initial_video_results
.map(|list| (Self::dedupe(list.data), list.pagination.has_more))
.unwrap_or_default(),
Target::Archive { .. } => props
.videos
.map(|list| (Self::dedupe(list.data), list.pagination.has_more))
.unwrap_or_default(),
}
}
fn dedupe(items: Vec<TikVideo>) -> Vec<TikVideo> {
let mut seen = std::collections::HashSet::new();
items
.into_iter()
.filter(|item| item.id != 0 && seen.insert(item.id))
.collect()
}
fn video_to_item(video: TikVideo) -> Option<VideoItem> {
if video.id == 0 {
return None;
}
let page_path = if video.base_path.trim().is_empty() {
format!("/video/{}", video.id)
} else {
video.base_path.clone()
};
let page_url = Self::absolute_url(&page_path);
let title = Self::best_title(&video);
let thumb = Self::best_thumb(&video);
if title.is_empty() || thumb.is_empty() {
return None;
}
let mut item = VideoItem::new(
video.id.to_string(),
title,
page_url.clone(),
CHANNEL_ID.to_string(),
thumb,
video.duration,
);
if video.views > 0 {
item.views = Some(video.views);
}
if video.likes > 0 {
item.rating = Some(100.0);
}
if let Some(uploaded_at) = Self::parse_uploaded_at(&video.metadata.upload_date) {
item.uploadedAt = Some(uploaded_at);
}
let uploader = Self::best_uploader(&video);
if let Some(entity) = uploader {
item.uploader = Some(entity.name.clone());
item.uploaderUrl = Some(Self::absolute_url(&entity.base_path));
item.uploaderId = Some(format!(
"{CHANNEL_ID}:{}:{}",
Self::entity_kind(&entity.base_path),
if entity.id > 0 {
entity.id.to_string()
} else {
entity.slug.clone()
}
));
}
let mut tags = Vec::new();
if let Some(action) = &video.action {
if !action.name.trim().is_empty() {
tags.push(action.name.trim().to_string());
}
}
for entity in video.tags.iter().chain(video.pornstars.iter()) {
if !entity.name.trim().is_empty() {
tags.push(entity.name.trim().to_string());
}
}
tags.sort();
tags.dedup();
if !tags.is_empty() {
item.tags = Some(tags);
}
let formats = Self::formats(&video, &page_url);
if !formats.is_empty() {
item.formats = Some(formats);
}
if let Some(source) = &video.source {
if source.mime_type.contains("video/") && !source.src.is_empty() {
item.preview = Some(source.src.clone());
}
}
Some(item)
}
fn best_title(video: &TikVideo) -> String {
let raw = if !video.metadata.title.trim().is_empty() {
video.metadata.title.as_str()
} else if !video.texts.video.parsed_text.trim().is_empty() {
video.texts.video.parsed_text.as_str()
} else {
video.texts.video.text.as_str()
};
let title = decode(raw.as_bytes())
.to_string()
.unwrap_or_else(|_| raw.to_string())
.replace(" | Tik.Porn", "")
.replace("{{", "")
.replace("}}", "");
title.split_whitespace().collect::<Vec<_>>().join(" ")
}
fn best_thumb(video: &TikVideo) -> String {
if !video.metadata.thumbnail.trim().is_empty() {
return video.metadata.thumbnail.clone();
}
if !video.thumbnails.md.trim().is_empty() {
return video.thumbnails.md.clone();
}
if !video.thumbnails.sm.trim().is_empty() {
return video.thumbnails.sm.clone();
}
video.poster.clone()
}
fn best_uploader(video: &TikVideo) -> Option<Entity> {
video
.creator
.iter()
.chain(video.pornstars.iter())
.chain(video.producer.iter())
.chain(video.user.iter())
.find(|entity| !entity.name.trim().is_empty() && !entity.base_path.trim().is_empty())
.cloned()
}
fn entity_kind(path: &str) -> &'static str {
if path.starts_with("/tag/") {
"tag"
} else if path.starts_with("/action/") {
"action"
} else {
"profile"
}
}
fn formats(video: &TikVideo, referer: &str) -> Vec<VideoFormat> {
let mut formats = Vec::new();
for source in video.sources.iter().chain(video.source.iter()) {
let url = source.src.trim();
if url.is_empty() {
continue;
}
let mut format = if source.mime_type.contains("mpegURL") || url.contains(".m3u8") {
VideoFormat::m3u8(url.to_string(), "hls".to_string(), "m3u8".to_string())
} else if source.mime_type.contains("mp4") || url.contains(".mp4") {
VideoFormat::new(url.to_string(), "direct".to_string(), "mp4".to_string())
} else {
continue;
};
format.add_http_header("Referer".to_string(), referer.to_string());
format.add_http_header("User-Agent".to_string(), BROWSER_UA.to_string());
formats.push(format);
}
formats
}
fn parse_uploaded_at(value: &str) -> Option<u64> {
DateTime::parse_from_rfc3339(value)
.ok()
.map(|date| date.with_timezone(&Utc).timestamp())
.and_then(|timestamp| u64::try_from(timestamp).ok())
}
fn absolute_url(value: &str) -> String {
if value.starts_with("http://") || value.starts_with("https://") {
value.to_string()
} else {
format!("{BASE_URL}/{}", value.trim_start_matches('/'))
}
}
/// Search/archive targets are served by the Next.js SSR `__NEXT_DATA__` payload.
async fn fetch_ssr(
&self,
requester: &mut crate::util::requester::Requester,
target: &Target,
page: u8,
) -> Result<Vec<VideoItem>> {
let props = self.fetch_page_props(requester, target, page).await?;
let (videos, _has_more) = Self::collect_videos(target, props);
Ok(videos.into_iter().filter_map(Self::video_to_item).collect())
}
/// The default browse feed is served by the apiv2 JSON API, which returns fully
/// hydrated rows (signed HLS/MP4, thumbnails, title, tags, pornstars).
async fn fetch_feed(
&self,
requester: &mut crate::util::requester::Requester,
kind: FeedKind,
page: u8,
) -> Result<Vec<VideoItem>> {
// `/getrecentvideos` is a live feed that ignores `offset`, so only page 1 uses
// it; deeper "newest" pages continue through the popular feed, which paginates.
let (path, offset) = match (kind, page) {
(FeedKind::Newest, 1) => ("getrecentvideos", 0),
(FeedKind::Newest, p) => ("videos/popular", u32::from(p - 2) * FEED_PAGE_SIZE),
(FeedKind::Popular, p) => ("videos/popular", u32::from(p - 1) * FEED_PAGE_SIZE),
};
let url = format!("{API_BASE}/{path}?limit={FEED_PAGE_SIZE}&offset={offset}");
let body = Self::fetch_text(requester, &url, &format!("{}/", self.url)).await?;
let response = serde_json::from_str::<ApiResponse>(&body)?;
Ok(response
.data
.into_iter()
.filter_map(Self::api_video_to_item)
.collect())
}
fn api_video_to_item(video: ApiVideo) -> Option<VideoItem> {
if video.video_id == 0 {
return None;
}
let page_url = format!("{BASE_URL}/video/{}", video.video_id);
let title = {
let cleaned = Self::clean_title(&video.video_text.meta_title.default.text);
if cleaned.is_empty() {
video.action_name.trim().to_string()
} else {
cleaned
}
};
let thumb = [
&video.medium_thumb,
&video.small_thumb,
&video.thumbnail_url,
&video.poster_url,
]
.into_iter()
.map(|value| value.trim())
.find(|value| !value.is_empty())
.unwrap_or("")
.to_string();
if title.is_empty() || thumb.is_empty() {
return None;
}
let mut item = VideoItem::new(
video.video_id.to_string(),
title,
page_url,
CHANNEL_ID.to_string(),
thumb,
video.duration,
);
if video.view_count > 0 {
item.views = Some(video.view_count);
}
let uploaded = if video.published.trim().is_empty() {
video.video_date.as_str()
} else {
video.published.as_str()
};
if let Some(timestamp) = Self::parse_api_date(uploaded) {
item.uploadedAt = Some(timestamp);
}
// Uploader: prefer the studio/producer, then the first named pornstar.
let uploader = match (video.producer_name.as_deref(), video.producer_slug.as_deref()) {
(Some(name), Some(slug)) if !name.trim().is_empty() && !slug.trim().is_empty() => {
Some((name.trim().to_string(), slug.trim().to_string()))
}
_ => video
.pornstars
.iter()
.find(|entity| !entity.name.trim().is_empty() && !entity.slug.trim().is_empty())
.map(|entity| (entity.name.trim().to_string(), entity.slug.trim().to_string())),
};
if let Some((name, slug)) = uploader {
item.uploader = Some(name);
item.uploaderUrl = Some(format!("{BASE_URL}/{slug}"));
item.uploaderId = Some(format!("{CHANNEL_ID}:profile:{slug}"));
}
let mut tags = Vec::new();
if !video.action_name.trim().is_empty() {
tags.push(video.action_name.trim().to_string());
}
for entity in video.tags.iter().chain(video.pornstars.iter()) {
if !entity.name.trim().is_empty() {
tags.push(entity.name.trim().to_string());
}
}
tags.sort();
tags.dedup();
if !tags.is_empty() {
item.tags = Some(tags);
}
let referer = format!("{BASE_URL}/");
let mut formats = Vec::new();
if let Some(hls) = video
.hls_url
.as_deref()
.map(str::trim)
.filter(|value| !value.is_empty())
{
let mut format =
VideoFormat::m3u8(hls.to_string(), "hls".to_string(), "m3u8".to_string());
format.add_http_header("Referer".to_string(), referer.clone());
format.add_http_header("User-Agent".to_string(), BROWSER_UA.to_string());
formats.push(format);
}
if let Some(mp4) = video
.mp4_url
.as_deref()
.map(str::trim)
.filter(|value| !value.is_empty())
{
let mut format =
VideoFormat::new(mp4.to_string(), "direct".to_string(), "mp4".to_string());
format.add_http_header("Referer".to_string(), referer);
format.add_http_header("User-Agent".to_string(), BROWSER_UA.to_string());
formats.push(format);
}
if !formats.is_empty() {
item.formats = Some(formats);
}
Some(item)
}
fn clean_title(raw: &str) -> String {
let trimmed = raw.trim();
let decoded = decode(trimmed.as_bytes())
.to_string()
.unwrap_or_else(|_| trimmed.to_string());
decoded
.replace(" | Tik.Porn", "")
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
}
fn parse_api_date(value: &str) -> Option<u64> {
NaiveDateTime::parse_from_str(value.trim(), "%Y-%m-%d %H:%M:%S")
.ok()
.map(|naive| naive.and_utc().timestamp())
.and_then(|timestamp| u64::try_from(timestamp).ok())
}
}
#[async_trait]
impl Provider for TikpornProvider {
async fn get_videos(
&self,
_cache: VideoCache,
_pool: DbPool,
sort: String,
query: Option<String>,
page: String,
_per_page: String,
options: ServerOptions,
) -> Vec<VideoItem> {
let page = page.parse::<u8>().unwrap_or(1).max(1);
let target = Self::resolve_target(query.as_deref(), &sort);
let mut requester = requester_or_default(&options, CHANNEL_ID, "get_videos");
let result = match &target {
Target::Feed { kind } => self.fetch_feed(&mut requester, *kind, page).await,
other => self.fetch_ssr(&mut requester, other, page).await,
};
match result {
Ok(items) => items,
Err(error) => {
report_provider_error(CHANNEL_ID, "get_videos", &error.to_string()).await;
Vec::new()
}
}
}
fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
Some(self.build_channel(clientversion))
}
}