Files
hottub/src/providers/vjav.rs
2026-04-05 21:13:05 +00:00

1748 lines
55 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
use crate::DbPool;
use crate::api::ClientVersion;
use crate::providers::{
Provider, report_provider_error, report_provider_error_background, requester_or_default,
};
use crate::status::*;
use crate::uploaders::{
UploaderChannelStat, UploaderLayoutRow, UploaderProfile, UploaderVideoRef,
};
use crate::util::cache::VideoCache;
use crate::util::requester::Requester;
use crate::util::time::parse_time_to_seconds;
use crate::videos::{ServerOptions, VideoFormat, VideoItem};
use async_trait::async_trait;
use base64::{Engine as _, engine::general_purpose::STANDARD};
use chrono::NaiveDateTime;
use error_chain::error_chain;
use futures::stream::{self, StreamExt};
use serde::Deserialize;
use std::collections::{HashMap, HashSet};
use std::sync::{Arc, RwLock};
use std::thread;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "jav",
tags: &["jav", "asian", "tube"],
};
const BASE_URL: &str = "https://vjav.com";
const CHANNEL_ID: &str = "vjav";
const DEFAULT_PER_PAGE: usize = 30;
const MAX_PER_PAGE: usize = 60;
const INITIAL_UPLOADER_PAGES: u16 = 10;
const HOT_SEARCH_LIMIT: usize = 120;
const UPLOADER_OPTION_LIMIT: usize = 200;
const ENRICH_CONCURRENCY: usize = 8;
error_chain! {
foreign_links {
Io(std::io::Error);
Json(serde_json::Error);
Url(url::ParseError);
Decode(base64::DecodeError);
Utf8(std::string::FromUtf8Error);
}
errors {
Parse(msg: String) {
description("parse error")
display("parse error: {}", msg)
}
}
}
#[derive(Debug, Clone)]
pub struct VjavProvider {
url: String,
tags: Arc<RwLock<Vec<FilterOption>>>,
hot_searches: Arc<RwLock<Vec<FilterOption>>>,
uploaders: Arc<RwLock<Vec<FilterOption>>>,
tag_ids: Arc<RwLock<HashMap<String, String>>>,
tag_targets: Arc<RwLock<HashMap<String, String>>>,
hot_search_lookup: Arc<RwLock<HashMap<String, String>>>,
uploader_lookup: Arc<RwLock<HashMap<String, String>>>,
}
#[derive(Debug, Clone)]
enum Target {
Videos,
Tag { slug: String },
Uploader { user_id: String },
Search { query: String },
}
#[derive(Debug, Deserialize, Clone, Default)]
struct VideosListResponse {
#[serde(default)]
videos: Vec<ListVideo>,
}
#[derive(Debug, Deserialize, Clone, Default)]
struct ListVideo {
#[serde(default)]
video_id: String,
#[serde(default)]
title: String,
#[serde(default)]
dir: String,
#[serde(default)]
duration: String,
#[serde(default)]
video_viewed: String,
#[serde(default)]
rating: String,
#[serde(default)]
post_date: String,
#[serde(default)]
file_dimensions: String,
#[serde(default)]
tags: String,
#[serde(default)]
categories_data: String,
#[serde(default)]
user_id: String,
#[serde(default)]
username: String,
#[serde(default)]
display_name: String,
#[serde(default)]
scr: String,
#[serde(default)]
models: String,
#[serde(default)]
categories: String,
}
#[derive(Debug, Deserialize, Clone, Default)]
struct DetailResponse {
#[serde(default)]
video: DetailVideo,
#[serde(default)]
related_searches: Option<Vec<String>>,
}
#[allow(dead_code)]
#[derive(Debug, Deserialize, Clone, Default)]
struct DetailVideo {
#[serde(default)]
video_id: String,
#[serde(default)]
title: String,
#[serde(default)]
dir: String,
#[serde(default)]
description: String,
#[serde(default)]
post_date: String,
#[serde(default)]
duration: String,
#[serde(default)]
thumb: String,
#[serde(default)]
thumbsrc: String,
#[serde(default)]
statistics: DetailStatistics,
#[serde(default)]
user: DetailUser,
#[serde(default)]
channel: DetailChannel,
#[serde(default)]
categories: HashMap<String, DetailCategory>,
#[serde(default)]
related_searches: Option<Vec<String>>,
}
#[allow(dead_code)]
#[derive(Debug, Deserialize, Clone, Default)]
struct DetailStatistics {
#[serde(default)]
viewed: String,
#[serde(default)]
rating: String,
#[serde(default)]
likes: i64,
#[serde(default)]
dislikes: i64,
}
#[allow(dead_code)]
#[derive(Debug, Deserialize, Clone, Default)]
struct DetailUser {
#[serde(default)]
id: String,
#[serde(default)]
username: String,
#[serde(default)]
avatar: String,
#[serde(default)]
subscribers_count: String,
}
#[allow(dead_code)]
#[derive(Debug, Deserialize, Clone, Default)]
struct DetailChannel {
#[serde(default)]
title: String,
}
#[allow(dead_code)]
#[derive(Debug, Deserialize, Clone, Default)]
struct DetailCategory {
#[serde(default)]
title: String,
#[serde(default)]
dir: String,
}
#[derive(Debug, Deserialize, Clone, Default)]
struct VideofileEntry {
#[serde(default)]
format: String,
#[serde(default)]
video_url: String,
#[serde(default)]
is_default: i32,
}
#[derive(Debug, Deserialize, Clone, Default)]
struct CategoriesResponse {
#[serde(default)]
categories: Vec<CategoryEntry>,
}
#[derive(Debug, Deserialize, Clone, Default)]
struct CategoryEntry {
#[serde(default)]
category_id: String,
#[serde(default)]
title: String,
#[serde(default)]
dir: String,
}
#[derive(Debug, Deserialize, Clone, Default)]
struct SuggesterResponse {
#[serde(default)]
keywords: Vec<String>,
}
#[derive(Debug, Deserialize, Clone, Default)]
struct MembersResponse {
#[serde(default)]
members: Vec<MemberSummary>,
}
#[allow(dead_code)]
#[derive(Debug, Deserialize, Clone, Default)]
struct MemberSummary {
#[serde(default)]
user_id: String,
#[serde(default)]
username: String,
#[serde(default)]
avatar: String,
#[serde(default)]
statistics: MemberStatistics,
}
#[allow(dead_code)]
#[derive(Debug, Deserialize, Clone, Default)]
struct MemberStatistics {
#[serde(default)]
videos: String,
#[serde(default)]
viewed: String,
#[serde(default)]
subscribers: String,
}
#[derive(Debug, Deserialize, Clone, Default)]
struct MemberProfileResponse {
#[serde(default)]
user: MemberProfile,
#[serde(default)]
success: i32,
}
#[derive(Debug, Deserialize, Clone, Default)]
struct MemberProfile {
#[serde(default)]
user_id: String,
#[serde(default)]
username: String,
#[serde(default)]
country: String,
#[serde(default)]
is_trusted: String,
#[serde(default)]
about_me: String,
#[serde(default)]
interests: String,
#[serde(default)]
occupation: String,
#[serde(default)]
avatar: String,
#[serde(default)]
statistics: MemberStatistics,
}
#[derive(Debug, Clone)]
struct ParsedCategoryTag {
id: String,
title: String,
slug: String,
}
impl VjavProvider {
pub fn new() -> Self {
let provider = Self {
url: BASE_URL.to_string(),
tags: Arc::new(RwLock::new(vec![FilterOption {
id: "all".to_string(),
title: "All".to_string(),
}])),
hot_searches: Arc::new(RwLock::new(vec![FilterOption {
id: "all".to_string(),
title: "All".to_string(),
}])),
uploaders: Arc::new(RwLock::new(vec![FilterOption {
id: "all".to_string(),
title: "All".to_string(),
}])),
tag_ids: Arc::new(RwLock::new(HashMap::new())),
tag_targets: Arc::new(RwLock::new(HashMap::new())),
hot_search_lookup: Arc::new(RwLock::new(HashMap::new())),
uploader_lookup: Arc::new(RwLock::new(HashMap::new())),
};
provider.spawn_initial_load();
provider
}
fn spawn_initial_load(&self) {
let tags = Arc::clone(&self.tags);
let hot_searches = Arc::clone(&self.hot_searches);
let uploaders = Arc::clone(&self.uploaders);
let tag_ids = Arc::clone(&self.tag_ids);
let tag_targets = Arc::clone(&self.tag_targets);
let hot_search_lookup = Arc::clone(&self.hot_search_lookup);
let uploader_lookup = Arc::clone(&self.uploader_lookup);
thread::spawn(move || {
let runtime = match tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()
{
Ok(runtime) => runtime,
Err(error) => {
report_provider_error_background(
CHANNEL_ID,
"spawn_initial_load.runtime",
&error.to_string(),
);
return;
}
};
runtime.block_on(async move {
if let Err(error) = Self::load_categories(
Arc::clone(&tags),
Arc::clone(&tag_ids),
Arc::clone(&tag_targets),
)
.await
{
report_provider_error_background(
CHANNEL_ID,
"spawn_initial_load.categories",
&error.to_string(),
);
}
if let Err(error) = Self::load_hot_searches(
Arc::clone(&hot_searches),
Arc::clone(&hot_search_lookup),
)
.await
{
report_provider_error_background(
CHANNEL_ID,
"spawn_initial_load.hot_searches",
&error.to_string(),
);
}
if let Err(error) = Self::load_uploaders(
Arc::clone(&uploaders),
Arc::clone(&uploader_lookup),
)
.await
{
report_provider_error_background(
CHANNEL_ID,
"spawn_initial_load.uploaders",
&error.to_string(),
);
}
});
});
}
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
let categories = self
.tags
.read()
.map(|values| values.iter().skip(1).map(|value| value.title.clone()).collect())
.unwrap_or_default();
let category_options = self
.tags
.read()
.map(|values| values.clone())
.unwrap_or_default();
let filter_options = self
.hot_searches
.read()
.map(|values| {
values
.iter()
.take(HOT_SEARCH_LIMIT + 1)
.cloned()
.collect::<Vec<_>>()
})
.unwrap_or_default();
let uploader_options = self
.uploaders
.read()
.map(|values| {
values
.iter()
.take(UPLOADER_OPTION_LIMIT + 1)
.cloned()
.collect::<Vec<_>>()
})
.unwrap_or_default();
Channel {
id: CHANNEL_ID.to_string(),
name: "VJAV".to_string(),
description:
"VJAV archives with exact tag, uploader, and hot-search matching plus direct HLS playback URLs."
.to_string(),
premium: false,
favicon: "https://www.google.com/s2/favicons?sz=64&domain=vjav.com".to_string(),
status: "active".to_string(),
categories,
options: vec![
ChannelOption {
id: "sort".to_string(),
title: "Sort".to_string(),
description:
"Browse VJAV by latest, popularity, views, rating, length, or comments."
.to_string(),
systemImage: "arrow.up.arrow.down".to_string(),
colorName: "blue".to_string(),
options: vec![
FilterOption {
id: "new".to_string(),
title: "Latest".to_string(),
},
FilterOption {
id: "popular".to_string(),
title: "Popular".to_string(),
},
FilterOption {
id: "views".to_string(),
title: "Most Viewed".to_string(),
},
FilterOption {
id: "top".to_string(),
title: "Top Rated".to_string(),
},
FilterOption {
id: "long".to_string(),
title: "Longest".to_string(),
},
FilterOption {
id: "commented".to_string(),
title: "Most Commented".to_string(),
},
],
multiSelect: false,
},
ChannelOption {
id: "categories".to_string(),
title: "Tags".to_string(),
description: "Use the native VJAV category archives.".to_string(),
systemImage: "tag".to_string(),
colorName: "orange".to_string(),
options: category_options,
multiSelect: false,
},
ChannelOption {
id: "stars".to_string(),
title: "Uploaders".to_string(),
description: "Open uploader archives directly when known.".to_string(),
systemImage: "person.2".to_string(),
colorName: "green".to_string(),
options: uploader_options,
multiSelect: false,
},
ChannelOption {
id: "filter".to_string(),
title: "Hot Searches".to_string(),
description: "Popular VJAV search shortcuts from the site suggester.".to_string(),
systemImage: "magnifyingglass".to_string(),
colorName: "purple".to_string(),
options: filter_options,
multiSelect: false,
},
],
nsfw: true,
cacheDuration: Some(60),
}
}
fn normalize_title(value: &str) -> String {
let mut normalized = String::new();
let mut last_space = true;
for character in value.chars().flat_map(|character| character.to_lowercase()) {
if character.is_alphanumeric() {
normalized.push(character);
last_space = false;
} else if !last_space {
normalized.push(' ');
last_space = true;
}
}
normalized.trim().to_string()
}
fn push_filter_option(target: &Arc<RwLock<Vec<FilterOption>>>, option: FilterOption) {
let Ok(mut values) = target.write() else {
return;
};
if option.id.trim().is_empty() || option.title.trim().is_empty() {
return;
}
if let Some(existing) = values.iter_mut().find(|value| value.id == option.id) {
if existing.title != option.title {
existing.title = option.title;
}
return;
}
values.push(option);
}
fn store_tag_mapping(
tag_ids: &Arc<RwLock<HashMap<String, String>>>,
tag_targets: &Arc<RwLock<HashMap<String, String>>>,
title: &str,
slug: &str,
category_id: &str,
) {
let normalized_title = Self::normalize_title(title);
if normalized_title.is_empty() || slug.trim().is_empty() || category_id.trim().is_empty() {
return;
}
if let Ok(mut values) = tag_ids.write() {
values.insert(normalized_title.clone(), category_id.to_string());
}
if let Ok(mut values) = tag_targets.write() {
values.insert(normalized_title.clone(), slug.to_string());
let slug_key = Self::normalize_title(&slug.replace('-', " "));
if !slug_key.is_empty() {
values.insert(slug_key, slug.to_string());
}
}
}
fn store_lookup(lookup: &Arc<RwLock<HashMap<String, String>>>, key: &str, value: &str) {
let normalized = Self::normalize_title(key);
if normalized.is_empty() || value.trim().is_empty() {
return;
}
if let Ok(mut values) = lookup.write() {
values.insert(normalized, value.to_string());
}
}
async fn load_categories(
tags: Arc<RwLock<Vec<FilterOption>>>,
tag_ids: Arc<RwLock<HashMap<String, String>>>,
tag_targets: Arc<RwLock<HashMap<String, String>>>,
) -> Result<()> {
let mut requester = Requester::new();
let text = requester
.get(
"https://vjav.com/api/json/categories/14400/str.all.en.json",
None,
)
.await
.map_err(|error| Error::from(format!("load categories request failed: {error}")))?;
let response: CategoriesResponse = serde_json::from_str(&text)?;
for category in response.categories {
if category.dir.trim().is_empty() || category.title.trim().is_empty() {
continue;
}
Self::push_filter_option(
&tags,
FilterOption {
id: category.dir.clone(),
title: category.title.clone(),
},
);
Self::store_tag_mapping(
&tag_ids,
&tag_targets,
&category.title,
&category.dir,
&category.category_id,
);
}
Ok(())
}
async fn load_hot_searches(
hot_searches: Arc<RwLock<Vec<FilterOption>>>,
hot_search_lookup: Arc<RwLock<HashMap<String, String>>>,
) -> Result<()> {
let mut requester = Requester::new();
let text = requester
.get("https://vjav.com/api//json/suggester/43200/1000.json", None)
.await
.map_err(|error| Error::from(format!("load suggester request failed: {error}")))?;
let response: SuggesterResponse = serde_json::from_str(&text)?;
for keyword in response.keywords {
if keyword.trim().is_empty() {
continue;
}
Self::push_filter_option(
&hot_searches,
FilterOption {
id: keyword.clone(),
title: keyword.clone(),
},
);
Self::store_lookup(&hot_search_lookup, &keyword, &keyword);
}
Ok(())
}
async fn load_uploaders(
uploaders: Arc<RwLock<Vec<FilterOption>>>,
uploader_lookup: Arc<RwLock<HashMap<String, String>>>,
) -> Result<()> {
let mut requester = Requester::new();
for page in 1..=INITIAL_UPLOADER_PAGES {
let url = format!(
"{BASE_URL}/api/json/members/86400/all/filt......./count-videos/96/{page}.json"
);
let text = requester
.get(&url, None)
.await
.map_err(|error| Error::from(format!("load uploaders page {page} failed: {error}")))?;
let response: MembersResponse = serde_json::from_str(&text)?;
if response.members.is_empty() {
break;
}
for member in response.members {
if Self::parse_u64(&member.statistics.videos).unwrap_or_default() == 0 {
continue;
}
Self::push_filter_option(
&uploaders,
FilterOption {
id: member.user_id.clone(),
title: member.username.clone(),
},
);
Self::store_lookup(&uploader_lookup, &member.username, &member.user_id);
}
}
Ok(())
}
async fn ensure_filters_loaded(&self) {
let tags_loaded = self.tags.read().map(|values| values.len()).unwrap_or_default() > 1;
let hot_searches_loaded = self
.hot_searches
.read()
.map(|values| values.len())
.unwrap_or_default()
> 1;
let uploaders_loaded = self
.uploaders
.read()
.map(|values| values.len())
.unwrap_or_default()
> 1;
if !tags_loaded {
if let Err(error) = Self::load_categories(
Arc::clone(&self.tags),
Arc::clone(&self.tag_ids),
Arc::clone(&self.tag_targets),
)
.await
{
report_provider_error_background(
CHANNEL_ID,
"ensure_filters_loaded.categories",
&error.to_string(),
);
}
}
if !hot_searches_loaded {
if let Err(error) = Self::load_hot_searches(
Arc::clone(&self.hot_searches),
Arc::clone(&self.hot_search_lookup),
)
.await
{
report_provider_error_background(
CHANNEL_ID,
"ensure_filters_loaded.hot_searches",
&error.to_string(),
);
}
}
if !uploaders_loaded {
if let Err(error) = Self::load_uploaders(
Arc::clone(&self.uploaders),
Arc::clone(&self.uploader_lookup),
)
.await
{
report_provider_error_background(
CHANNEL_ID,
"ensure_filters_loaded.uploaders",
&error.to_string(),
);
}
}
}
fn site_sort(sort: &str) -> &'static str {
match sort {
"popular" => "most-popular",
"views" => "most-viewed",
"top" => "top-rated",
"long" => "longest",
"commented" => "most-commented",
_ => "latest-updates",
}
}
fn resolve_option_target(&self, options: &ServerOptions) -> Option<Target> {
if let Some(user_id) = options.stars.as_deref() {
if user_id != "all" && !user_id.trim().is_empty() {
return Some(Target::Uploader {
user_id: user_id.to_string(),
});
}
}
if let Some(slug) = options.categories.as_deref() {
if slug != "all" && !slug.trim().is_empty() {
return Some(Target::Tag {
slug: slug.to_string(),
});
}
}
if let Some(query) = options.filter.as_deref() {
if query != "all" && !query.trim().is_empty() {
let key = Self::normalize_title(query);
if let Ok(values) = self.hot_search_lookup.read() {
if let Some(search) = values.get(&key) {
return Some(Target::Search {
query: search.clone(),
});
}
}
}
}
None
}
fn resolve_query_target(&self, query: &str) -> Option<Target> {
let key = Self::normalize_title(query);
if key.is_empty() {
return None;
}
if let Ok(values) = self.uploader_lookup.read() {
if let Some(user_id) = values.get(&key) {
return Some(Target::Uploader {
user_id: user_id.clone(),
});
}
}
if let Ok(values) = self.tag_targets.read() {
if let Some(slug) = values.get(&key) {
return Some(Target::Tag { slug: slug.clone() });
}
}
if let Ok(values) = self.hot_search_lookup.read() {
if let Some(search) = values.get(&key) {
return Some(Target::Search {
query: search.clone(),
});
}
}
None
}
async fn search_uploader_target(
&self,
query: &str,
options: &ServerOptions,
) -> Option<Target> {
let encoded_query: String = url::form_urlencoded::byte_serialize(query.as_bytes()).collect();
let url = format!(
"{BASE_URL}/api/json/members/86400/all/filt......./recent/30/1.json?search={encoded_query}"
);
let mut requester =
requester_or_default(options, CHANNEL_ID, "search_uploader_target.request");
let text = requester.get(&url, None).await.ok()?;
let response: MembersResponse = serde_json::from_str(&text).ok()?;
let normalized_query = Self::normalize_title(query);
let exact = response.members.into_iter().find(|member| {
Self::normalize_title(&member.username) == normalized_query
|| Self::normalize_title(&member.user_id) == normalized_query
})?;
Self::push_filter_option(
&self.uploaders,
FilterOption {
id: exact.user_id.clone(),
title: exact.username.clone(),
},
);
Self::store_lookup(&self.uploader_lookup, &exact.username, &exact.user_id);
Some(Target::Uploader {
user_id: exact.user_id,
})
}
async fn pick_target(
&self,
query: Option<&str>,
options: &ServerOptions,
) -> Target {
if let Some(query) = query {
if let Some(target) = self.resolve_query_target(query) {
return target;
}
self.ensure_filters_loaded().await;
if let Some(target) = self.resolve_query_target(query) {
return target;
}
if let Some(target) = self.search_uploader_target(query, options).await {
return target;
}
return Target::Search {
query: query.to_string(),
};
}
if let Some(target) = self.resolve_option_target(options) {
return target;
}
Target::Videos
}
fn build_list_api_url(
&self,
target: &Target,
sort: &str,
page: u16,
per_page: usize,
) -> String {
let page = page.max(1);
let per_page = per_page.clamp(1, MAX_PER_PAGE);
let site_sort = Self::site_sort(sort);
match target {
Target::Videos => format!(
"{BASE_URL}/api/json/videos2/86400/str/{site_sort}/{per_page}/..{page}.all...json"
),
Target::Tag { slug } => format!(
"{BASE_URL}/api/json/videos2/86400/str/{site_sort}/{per_page}/categories.{slug}.{page}.all...json"
),
Target::Uploader { user_id } => format!(
"{BASE_URL}/api/json/videos2/14400/str/{site_sort}/{per_page}/uploaded_videos.{user_id}.{page}.all...json"
),
Target::Search { query } => {
let encoded_query: String =
url::form_urlencoded::byte_serialize(query.as_bytes()).collect();
format!(
"{BASE_URL}/api/videos2.php?params=86400/str/relevance/{per_page}/search..{page}.all..&s={encoded_query}"
)
}
}
}
async fn fetch_list(
&self,
target: &Target,
sort: &str,
page: u16,
per_page: usize,
options: &ServerOptions,
) -> Result<Vec<ListVideo>> {
let url = self.build_list_api_url(target, sort, page, per_page);
let mut requester = requester_or_default(options, CHANNEL_ID, "fetch_list");
let text = requester
.get(&url, None)
.await
.map_err(|error| Error::from(format!("list request failed for {url}: {error}")))?;
let response: VideosListResponse = serde_json::from_str(&text)?;
Ok(response.videos)
}
fn parse_u64(value: &str) -> Option<u64> {
let digits: String = value.chars().filter(|character| character.is_ascii_digit()).collect();
if digits.is_empty() {
return None;
}
digits.parse::<u64>().ok()
}
fn parse_u32(value: &str) -> Option<u32> {
Self::parse_u64(value).and_then(|value| u32::try_from(value).ok())
}
fn parse_rating_percent(value: &str) -> Option<f32> {
let trimmed = value.trim();
if trimmed.is_empty() {
return None;
}
let parsed = trimmed.parse::<f32>().ok()?;
if parsed <= 5.0 {
Some((parsed * 20.0).clamp(0.0, 100.0))
} else {
Some(parsed.clamp(0.0, 100.0))
}
}
fn parse_uploaded_at(value: &str) -> Option<u64> {
NaiveDateTime::parse_from_str(value.trim(), "%Y-%m-%d %H:%M:%S")
.ok()
.map(|value| value.and_utc().timestamp())
.and_then(|timestamp| u64::try_from(timestamp).ok())
}
fn parse_aspect_ratio(dimensions: &str) -> Option<f32> {
let (width, height) = dimensions.split_once('x')?;
let width = width.trim().parse::<f32>().ok()?;
let height = height.trim().parse::<f32>().ok()?;
(height > 0.0).then_some(width / height)
}
fn parse_category_tags(value: &str) -> Vec<ParsedCategoryTag> {
value
.split(',')
.filter_map(|entry| {
let mut parts = entry.split('|');
let slug = parts.next()?.trim().to_string();
let id = parts.next()?.trim().to_string();
let title = parts.next()?.trim().to_string();
if slug.is_empty() || id.is_empty() || title.is_empty() {
return None;
}
Some(ParsedCategoryTag { id, title, slug })
})
.collect()
}
fn split_csv_titles(value: &str) -> Vec<String> {
value
.split(',')
.map(str::trim)
.filter(|value| !value.is_empty())
.map(ToOwned::to_owned)
.collect()
}
fn combine_tags(
&self,
seed: &ListVideo,
detail: &DetailResponse,
) -> Vec<String> {
let mut combined = Vec::new();
let mut seen = HashSet::new();
for entry in Self::parse_category_tags(&seed.categories_data) {
Self::store_tag_mapping(
&self.tag_ids,
&self.tag_targets,
&entry.title,
&entry.slug,
&entry.id,
);
let key = Self::normalize_title(&entry.title);
if !key.is_empty() && seen.insert(key) {
combined.push(entry.title);
}
}
for value in Self::split_csv_titles(&seed.categories) {
let key = Self::normalize_title(&value);
if !key.is_empty() && seen.insert(key) {
combined.push(value);
}
}
for value in Self::split_csv_titles(&seed.tags) {
let key = Self::normalize_title(&value);
if !key.is_empty() && seen.insert(key) {
combined.push(value);
}
}
for value in Self::split_csv_titles(&seed.models) {
let key = Self::normalize_title(&value);
if !key.is_empty() && seen.insert(key) {
combined.push(value);
}
}
for category in detail.video.categories.values() {
let key = Self::normalize_title(&category.title);
if !key.is_empty() && seen.insert(key) {
combined.push(category.title.clone());
}
}
let related_searches = detail
.video
.related_searches
.as_ref()
.or(detail.related_searches.as_ref())
.cloned()
.unwrap_or_default();
for value in related_searches.into_iter().take(8) {
let key = Self::normalize_title(&value);
if !key.is_empty() && seen.insert(key) {
combined.push(value);
}
}
combined
}
fn video_page_url(&self, video_id: &str, slug: &str) -> String {
format!("{}/videos/{video_id}/{slug}/", self.url)
}
fn uploader_page_url(&self, user_id: &str) -> String {
format!("{}/members/{user_id}/uploaded_videos/1/", self.url)
}
fn detail_api_url(&self, video_id: &str) -> Result<String> {
let numeric_id = video_id
.parse::<u64>()
.map_err(|error| Error::from(format!("invalid video id `{video_id}`: {error}")))?;
let bucket = numeric_id / 1000 * 1000;
Ok(format!(
"{}/api/json/video/86400/0/{bucket}/{video_id}.json",
self.url
))
}
async fn fetch_detail(
&self,
video_id: &str,
options: &ServerOptions,
) -> Result<DetailResponse> {
let url = self.detail_api_url(video_id)?;
let mut requester = requester_or_default(options, CHANNEL_ID, "fetch_detail");
let text = requester
.get(&url, None)
.await
.map_err(|error| Error::from(format!("detail request failed for {url}: {error}")))?;
Ok(serde_json::from_str(&text)?)
}
async fn fetch_videofile_entries(
&self,
video_id: &str,
options: &ServerOptions,
) -> Result<Vec<VideofileEntry>> {
let url = format!("{}/api/videofile.php?video_id={video_id}&lifetime=8640000", self.url);
let mut requester = requester_or_default(options, CHANNEL_ID, "fetch_videofile_entries");
let text = requester
.get(&url, None)
.await
.map_err(|error| Error::from(format!("videofile request failed for {url}: {error}")))?;
Ok(serde_json::from_str(&text)?)
}
fn decode_obfuscated_base64(value: &str) -> String {
value
.chars()
.map(|character| match character {
'А' => 'A',
'В' => 'B',
'Е' => 'E',
'К' => 'K',
'М' => 'M',
'Н' => 'H',
'О' => 'O',
'Р' => 'P',
'С' => 'C',
'Т' => 'T',
'Х' => 'X',
'а' => 'a',
'е' => 'e',
'о' => 'o',
'р' => 'p',
'с' => 'c',
'у' => 'y',
'х' => 'x',
'к' => 'k',
'м' => 'm',
'і' => 'i',
'І' => 'I',
_ => character,
})
.collect()
}
fn decode_base64ish(value: &str) -> Result<String> {
let mut normalized = value.trim().replace('~', "=");
while normalized.len() % 4 != 0 {
normalized.push('=');
}
Ok(String::from_utf8(STANDARD.decode(normalized)?)?)
}
fn decode_video_url(&self, value: &str) -> Result<String> {
let normalized = Self::decode_obfuscated_base64(value);
if normalized.contains(',') {
let mut parts = normalized.split(',');
let path_part = parts
.next()
.ok_or_else(|| Error::from("missing path segment".to_string()))?;
let query_part = parts
.next()
.ok_or_else(|| Error::from("missing query segment".to_string()))?;
let path = Self::decode_base64ish(path_part)?;
let query = Self::decode_base64ish(query_part)?;
let separator = if path.contains('?') { "&" } else { "?" };
return Ok(format!("{}{path}{separator}{query}&f=video.m3u8", self.url));
}
let decoded = Self::decode_base64ish(&normalized)?;
if decoded.starts_with("http://") || decoded.starts_with("https://") {
return Ok(decoded);
}
if decoded.starts_with('/') {
return Ok(format!("{}{}", self.url, decoded));
}
Err(Error::from("unsupported video url payload".to_string()))
}
fn format_quality_label(format: &str) -> String {
format
.trim()
.trim_start_matches('_')
.trim_end_matches(".mp4")
.replace('_', " ")
}
fn canonical_uploader_id(user_id: &str) -> String {
format!("{CHANNEL_ID}:{user_id}")
}
fn absolute_avatar(avatar: &str) -> Option<String> {
let trimmed = avatar.trim();
if trimmed.is_empty() {
return None;
}
if trimmed.starts_with("http://") || trimmed.starts_with("https://") {
return Some(trimmed.to_string());
}
Some(format!("{BASE_URL}/{}", trimmed.trim_start_matches('/')))
}
fn store_uploader_aliases(&self, user_id: &str, aliases: &[String]) {
for alias in aliases {
Self::store_lookup(&self.uploader_lookup, alias, user_id);
}
}
async fn enrich_video(
&self,
seed: ListVideo,
options: &ServerOptions,
) -> Result<VideoItem> {
let detail = self.fetch_detail(&seed.video_id, options).await?;
let videofile_entries = self.fetch_videofile_entries(&seed.video_id, options).await?;
let mut formats = Vec::new();
let mut default_url = None;
for entry in videofile_entries {
if entry.video_url.trim().is_empty() {
continue;
}
let url = self.decode_video_url(&entry.video_url)?;
if default_url.is_none() || entry.is_default == 1 {
default_url = Some(url.clone());
}
let quality = Self::format_quality_label(&entry.format);
let format = VideoFormat::new(url, quality.clone(), "hls".to_string())
.format_id(entry.format.clone())
.format_note(quality);
formats.push(format);
}
let media_url = default_url
.or_else(|| formats.first().map(|format| format.url.clone()))
.ok_or_else(|| Error::from("no playable media url".to_string()))?;
let title = if detail.video.title.trim().is_empty() {
seed.title.clone()
} else {
detail.video.title.clone()
};
let slug = if detail.video.dir.trim().is_empty() {
seed.dir.clone()
} else {
detail.video.dir.clone()
};
let thumb = if detail.video.thumb.trim().is_empty() {
seed.scr.clone()
} else {
detail.video.thumb.clone()
};
let preview = if detail.video.thumbsrc.trim().is_empty() {
None
} else {
Some(detail.video.thumbsrc.clone())
};
let uploader_user_id = if detail.video.user.id.trim().is_empty() {
seed.user_id.clone()
} else {
detail.video.user.id.clone()
};
let uploader_name = [
seed.display_name.trim(),
detail.video.user.username.trim(),
seed.username.trim(),
]
.into_iter()
.find(|value| !value.is_empty())
.unwrap_or("")
.to_string();
self.store_uploader_aliases(
&uploader_user_id,
&[
uploader_name.clone(),
detail.video.user.username.clone(),
seed.username.clone(),
],
);
Self::push_filter_option(
&self.uploaders,
FilterOption {
id: uploader_user_id.clone(),
title: if uploader_name.is_empty() {
uploader_user_id.clone()
} else {
uploader_name.clone()
},
},
);
let mut item = VideoItem::new(
seed.video_id.clone(),
title,
media_url,
CHANNEL_ID.to_string(),
thumb,
parse_time_to_seconds(&detail.video.duration)
.or_else(|| parse_time_to_seconds(&seed.duration))
.and_then(|value| u32::try_from(value).ok())
.unwrap_or_default(),
);
item.views = Self::parse_u32(&detail.video.statistics.viewed)
.or_else(|| Self::parse_u32(&seed.video_viewed));
item.rating = Self::parse_rating_percent(&detail.video.statistics.rating)
.or_else(|| Self::parse_rating_percent(&seed.rating));
item.uploader = (!uploader_name.is_empty()).then_some(uploader_name);
item.uploaderUrl = (!uploader_user_id.is_empty()).then_some(self.uploader_page_url(&uploader_user_id));
item.uploaderId = (!uploader_user_id.is_empty())
.then_some(Self::canonical_uploader_id(&uploader_user_id));
item.tags = {
let tags = self.combine_tags(&seed, &detail);
(!tags.is_empty()).then_some(tags)
};
item.uploadedAt = Self::parse_uploaded_at(&detail.video.post_date)
.or_else(|| Self::parse_uploaded_at(&seed.post_date));
item.formats = (!formats.is_empty()).then_some(formats);
item.preview = preview;
item.aspectRatio = Self::parse_aspect_ratio(&seed.file_dimensions);
item.verified = None;
item.embed = None;
let detail_page_url = self.video_page_url(&seed.video_id, &slug);
if item.url.trim().is_empty() {
item.url = detail_page_url;
}
Ok(item)
}
async fn fetch_target_items(
&self,
target: Target,
sort: &str,
page: u16,
per_page: usize,
options: &ServerOptions,
) -> Result<Vec<VideoItem>> {
let seeds = self.fetch_list(&target, sort, page, per_page, options).await?;
let options = options.clone();
Ok(stream::iter(seeds.into_iter().map(|seed| {
let provider = self.clone();
let options = options.clone();
async move {
match provider.enrich_video(seed, &options).await {
Ok(item) => Some(item),
Err(error) => {
report_provider_error_background(
CHANNEL_ID,
"fetch_target_items.enrich_video",
&error.to_string(),
);
None
}
}
}
}))
.buffer_unordered(ENRICH_CONCURRENCY)
.filter_map(async move |value| value)
.collect::<Vec<_>>()
.await)
}
async fn resolve_uploader_id(
&self,
uploader_id: Option<&str>,
uploader_name: Option<&str>,
options: &ServerOptions,
) -> Option<String> {
if let Some(uploader_id) = uploader_id {
let local_id = uploader_id
.strip_prefix(&format!("{CHANNEL_ID}:"))
.unwrap_or(uploader_id)
.trim();
if !local_id.is_empty() {
return Some(local_id.to_string());
}
}
let uploader_name = uploader_name?.trim();
if uploader_name.is_empty() {
return None;
}
if let Some(Target::Uploader { user_id }) = self.resolve_query_target(uploader_name) {
return Some(user_id);
}
self.ensure_filters_loaded().await;
if let Some(Target::Uploader { user_id }) = self.resolve_query_target(uploader_name) {
return Some(user_id);
}
match self.search_uploader_target(uploader_name, options).await {
Some(Target::Uploader { user_id }) => Some(user_id),
_ => None,
}
}
async fn fetch_member_profile(
&self,
user_id: &str,
options: &ServerOptions,
) -> Result<MemberProfileResponse> {
let url = format!("{}/api/member.php", self.url);
let body = format!("section=user&user_id={user_id}");
let headers = vec![("Content-Type", "application/x-www-form-urlencoded")];
let mut requester = requester_or_default(options, CHANNEL_ID, "fetch_member_profile");
let response = requester
.post(&url, &body, headers)
.await
.map_err(|error| Error::from(format!("member profile request failed: {error}")))?;
let text = response
.text()
.await
.map_err(|error| Error::from(format!("member profile body failed: {error}")))?;
Ok(serde_json::from_str(&text)?)
}
fn profile_bio(profile: &MemberProfile) -> Option<String> {
let mut parts = Vec::new();
if !profile.about_me.trim().is_empty() {
parts.push(profile.about_me.trim().to_string());
}
if !profile.interests.trim().is_empty() {
parts.push(profile.interests.trim().to_string());
}
if !profile.occupation.trim().is_empty() {
parts.push(format!("Occupation: {}", profile.occupation.trim()));
}
if !profile.country.trim().is_empty() {
parts.push(format!("Country: {}", profile.country.trim().to_uppercase()));
}
(!parts.is_empty()).then(|| parts.join("\n"))
}
async fn build_uploader_profile(
&self,
user_id: &str,
requested_name: Option<&str>,
query: Option<&str>,
profile_content: bool,
options: &ServerOptions,
) -> Result<Option<UploaderProfile>> {
let response = self.fetch_member_profile(user_id, options).await?;
if response.success != 1 || response.user.user_id.trim().is_empty() {
return Ok(None);
}
let resolved_name = requested_name
.filter(|value| !value.trim().is_empty())
.map(ToOwned::to_owned)
.unwrap_or_else(|| response.user.username.clone());
self.store_uploader_aliases(
&response.user.user_id,
&[resolved_name.clone(), response.user.username.clone()],
);
let mut videos = None;
let mut layout = vec![UploaderLayoutRow::videos(Some("Uploads".to_string()))];
if profile_content {
let items = self
.fetch_target_items(
Target::Uploader {
user_id: response.user.user_id.clone(),
},
"new",
1,
24,
options,
)
.await?;
let canonical_id = Self::canonical_uploader_id(&response.user.user_id);
let filtered_items = if let Some(query) = query.filter(|value| !value.trim().is_empty()) {
let normalized_query = Self::normalize_title(query);
items.into_iter()
.filter(|item| {
let haystack = format!(
"{} {}",
item.title,
item.tags.as_ref().map(|values| values.join(" ")).unwrap_or_default()
);
Self::normalize_title(&haystack).contains(&normalized_query)
})
.collect::<Vec<_>>()
} else {
items
};
let refs = filtered_items
.iter()
.map(|item| UploaderVideoRef::from_video_item(item, &resolved_name, &canonical_id))
.collect::<Vec<_>>();
let featured_ids = refs
.iter()
.take(10)
.map(|item| item.id.clone())
.collect::<Vec<_>>();
if !featured_ids.is_empty() {
layout.insert(
0,
UploaderLayoutRow::horizontal(Some("For You".to_string()), featured_ids),
);
}
videos = Some(refs);
}
Ok(Some(UploaderProfile {
id: Self::canonical_uploader_id(&response.user.user_id),
name: resolved_name,
url: Some(self.uploader_page_url(&response.user.user_id)),
channel: Some(CHANNEL_ID.to_string()),
verified: response.user.is_trusted == "1",
videoCount: Self::parse_u64(&response.user.statistics.videos).unwrap_or_default(),
totalViews: Self::parse_u64(&response.user.statistics.viewed).unwrap_or_default(),
channels: Some(vec![UploaderChannelStat {
channel: CHANNEL_ID.to_string(),
videoCount: Self::parse_u64(&response.user.statistics.videos).unwrap_or_default(),
firstSeenAt: None,
lastSeenAt: None,
}]),
avatar: Self::absolute_avatar(&response.user.avatar),
description: None,
bio: Self::profile_bio(&response.user),
videos,
tapes: Some(vec![]),
playlists: Some(vec![]),
layout: Some(layout),
}))
}
}
#[async_trait]
impl Provider for VjavProvider {
async fn get_videos(
&self,
cache: VideoCache,
pool: DbPool,
sort: String,
query: Option<String>,
page: String,
per_page: String,
options: ServerOptions,
) -> Vec<VideoItem> {
let _ = cache;
let _ = pool;
let page = page.parse::<u16>().unwrap_or(1);
let per_page = per_page
.parse::<usize>()
.unwrap_or(DEFAULT_PER_PAGE)
.clamp(1, MAX_PER_PAGE);
let normalized_query = query
.as_deref()
.map(str::trim)
.filter(|value| !value.is_empty())
.map(ToOwned::to_owned);
let target = self.pick_target(normalized_query.as_deref(), &options).await;
match self
.fetch_target_items(target, &sort, page, per_page, &options)
.await
{
Ok(items) => items,
Err(error) => {
report_provider_error(CHANNEL_ID, "get_videos", &error.to_string()).await;
vec![]
}
}
}
fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
Some(self.build_channel(clientversion))
}
async fn get_uploader(
&self,
cache: VideoCache,
pool: DbPool,
uploader_id: Option<String>,
uploader_name: Option<String>,
query: Option<String>,
profile_content: bool,
options: ServerOptions,
) -> std::result::Result<Option<UploaderProfile>, String> {
let _ = cache;
let _ = pool;
let Some(user_id) = self
.resolve_uploader_id(
uploader_id.as_deref(),
uploader_name.as_deref(),
&options,
)
.await
else {
return Ok(None);
};
self.build_uploader_profile(
&user_id,
uploader_name.as_deref(),
query.as_deref(),
profile_content,
&options,
)
.await
.map_err(|error| error.to_string())
}
}
#[cfg(test)]
mod tests {
use super::*;
fn provider() -> VjavProvider {
VjavProvider {
url: BASE_URL.to_string(),
tags: Arc::new(RwLock::new(vec![
FilterOption {
id: "all".to_string(),
title: "All".to_string(),
},
FilterOption {
id: "japanese".to_string(),
title: "Japanese".to_string(),
},
])),
hot_searches: Arc::new(RwLock::new(vec![
FilterOption {
id: "all".to_string(),
title: "All".to_string(),
},
FilterOption {
id: "Teacher".to_string(),
title: "Teacher".to_string(),
},
])),
uploaders: Arc::new(RwLock::new(vec![
FilterOption {
id: "all".to_string(),
title: "All".to_string(),
},
FilterOption {
id: "335627".to_string(),
title: "bata167".to_string(),
},
])),
tag_ids: Arc::new(RwLock::new(HashMap::from([(
"japanese".to_string(),
"5".to_string(),
)]))),
tag_targets: Arc::new(RwLock::new(HashMap::from([(
"japanese".to_string(),
"japanese".to_string(),
)]))),
hot_search_lookup: Arc::new(RwLock::new(HashMap::from([(
"teacher".to_string(),
"Teacher".to_string(),
)]))),
uploader_lookup: Arc::new(RwLock::new(HashMap::from([(
"bata167".to_string(),
"335627".to_string(),
)]))),
}
}
#[test]
fn decodes_obfuscated_video_url() {
let provider = provider();
let url = provider
.decode_video_url("L2dldF9maWxlLzМvМDc0ZjczY2Q2Zjg2МjQ2ZTRhМjkwМDFiYWUwYzljODМzZTlkN2FkY2U4LzgxNjАwМС84МTYyNTcvODЕ2МjU3X2hxLm1wNС8,ZD0yМzUzJmJyPTЕ4NSZ0aT0xNzc1МjQxODUx")
.expect("decodes");
assert!(url.starts_with("https://vjav.com/get_file/"));
assert!(url.contains("video.m3u8"));
assert!(url.contains("ti="));
let legacy = provider
.decode_video_url("aHR0cHМ6Ly92aWRlb3R4eHguY29tL2V4dС9nZXRfZmlsZS85LzZlМzkwNmМwYWNkNDRiZDМzNWVmYTЕ5ZTQzODY0МGМ1ODZlNjY5YmU4NS8zOTgwМDАvМzk4МTМxLzМ5ODЕzМV9ocS5tcDQvP2Q9NzQ1МСZicj0xМDМmdGk9МTc3NTI0МjkzМw~~")
.expect("decodes legacy");
assert!(legacy.starts_with("https://videotxxx.com/"));
}
#[test]
fn builds_archive_urls() {
let provider = provider();
assert_eq!(
provider.build_list_api_url(&Target::Videos, "new", 2, 30),
"https://vjav.com/api/json/videos2/86400/str/latest-updates/30/..2.all...json"
);
assert_eq!(
provider.build_list_api_url(
&Target::Tag {
slug: "japanese".to_string(),
},
"views",
3,
12,
),
"https://vjav.com/api/json/videos2/86400/str/most-viewed/12/categories.japanese.3.all...json"
);
assert_eq!(
provider.build_list_api_url(
&Target::Uploader {
user_id: "335627".to_string(),
},
"top",
1,
5,
),
"https://vjav.com/api/json/videos2/14400/str/top-rated/5/uploaded_videos.335627.1.all...json"
);
}
#[test]
fn resolves_exact_query_targets() {
let provider = provider();
match provider.resolve_query_target("bata167") {
Some(Target::Uploader { user_id }) => assert_eq!(user_id, "335627"),
_ => panic!("expected uploader target"),
}
match provider.resolve_query_target("Japanese") {
Some(Target::Tag { slug }) => assert_eq!(slug, "japanese"),
_ => panic!("expected tag target"),
}
match provider.resolve_query_target("Teacher") {
Some(Target::Search { query }) => assert_eq!(query, "Teacher"),
_ => panic!("expected search target"),
}
}
}