This commit is contained in:
Simon
2026-06-22 18:54:59 +00:00
parent 342c7dc098
commit da51d6a910
8 changed files with 1606 additions and 0 deletions

567
src/providers/hentaitv.rs Normal file
View File

@@ -0,0 +1,567 @@
// hentai.tv — large subbed/raw hentai catalogue.
//
// The site is a Next.js app backed by a clean JSON API (no RSC scraping needed):
// GET /api/browse?page=N&sort=<Label>&genres=<ExactName> -> {videos:[28], total, pages}
// GET /api/search?q=Q -> {videos:[...]}
// `browse` honours both `sort` (labels: "Most Recent" | "Most Viewed" | "Trending") and `genres`
// (the *exact, case-sensitive* stored genre name, e.g. "Big Boobs", "incest"), and paginates;
// `search` is single-page (it ignores `page`). Episode JSON carries everything for a card plus an
// `embedUrl` of the form `https://nhplayer.com/v/{embedId}/`.
//
// Playback: yt-dlp can resolve neither the watch page nor nhplayer, and the real MP4 sits on a
// Cloudflare/JA3-guarded CDN, so `video.url` is the (reachable) watch page and the media is served
// through `/proxy/hentaitv/{embedId}.mp4`, a redirect proxy that replicates nhplayer's browser
// challenge to mint a signed CDN URL and 302s the client to it (see src/proxies/hentaitv.rs).
use crate::DbPool;
use crate::api::ClientVersion;
use crate::providers::{
Provider, build_proxy_url, report_provider_error, report_provider_error_background,
requester_or_default,
};
use crate::status::*;
use crate::util::cache::VideoCache;
use crate::util::time::parse_time_to_seconds;
use crate::videos::{ServerOptions, VideoFormat, VideoItem};
use async_trait::async_trait;
use serde::Deserialize;
use std::collections::HashSet;
use std::sync::{Arc, RwLock};
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "hentai-animation",
tags: &["hentai", "anime", "subbed"],
};
const CHANNEL_ID: &str = "hentaitv";
const BASE_URL: &str = "https://hentai.tv";
const DEFAULT_PER_PAGE: usize = 28;
// One cheap JSON request per feed page; reuse it briefly so paging back and forth is instant.
const CACHE_TTL_SECS: u64 = 300;
#[derive(Debug, Clone, Deserialize)]
struct ApiVideo {
#[serde(default)]
slug: String,
#[serde(default)]
title: String,
#[serde(default)]
ep: Option<u32>,
#[serde(default)]
views: Option<u64>,
#[serde(default)]
rating: Option<f32>,
#[serde(default)]
brand: Option<String>,
#[serde(default)]
quality: Option<String>,
#[serde(default)]
duration: Option<String>,
#[serde(default)]
tags: Option<Vec<String>>,
#[serde(default)]
cover: Option<String>,
#[serde(default)]
thumb: Option<String>,
#[serde(default)]
backdrop: Option<String>,
#[serde(rename = "embedUrl", default)]
embed_url: Option<String>,
}
#[derive(Debug, Deserialize)]
struct ApiResponse {
#[serde(default)]
videos: Vec<ApiVideo>,
}
#[derive(Debug, Clone)]
enum Target {
/// Latest / sorted catalogue feed (paginated).
Browse { sort: String },
/// A single genre archive (paginated), keyed by the site's exact genre name.
Genre { name: String, sort: String },
/// Keyword search. The site's search is single-page, so page > 1 is empty.
Search { query: String },
}
#[derive(Debug, Clone)]
pub struct HentaitvProvider {
/// Exact-cased genre names loaded from the `/browse` page, used both for the `categories`
/// option and for routing keyword queries to a genre archive.
genres: Arc<RwLock<Vec<String>>>,
}
impl HentaitvProvider {
pub fn new() -> Self {
let provider = Self {
genres: Arc::new(RwLock::new(Vec::new())),
};
provider.spawn_genre_load();
provider
}
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
let genres = self.genres.read().map(|g| g.clone()).unwrap_or_default();
let mut cat_options: Vec<FilterOption> = vec![FilterOption {
id: "all".to_string(),
title: "All".to_string(),
}];
for name in &genres {
cat_options.push(FilterOption {
id: name.clone(),
title: name.clone(),
});
}
Channel {
id: CHANNEL_ID.to_string(),
name: "Hentai.tv".to_string(),
description: "Subbed and raw hentai from hentai.tv with latest, trending, and genre browsing.".to_string(),
premium: false,
favicon: "https://www.google.com/s2/favicons?sz=64&domain=hentai.tv".to_string(),
status: "active".to_string(),
categories: genres,
options: vec![
ChannelOption {
id: "sort".to_string(),
title: "Sort".to_string(),
description: "Order the feed by newest, most viewed, or trending.".to_string(),
systemImage: "arrow.up.arrow.down".to_string(),
colorName: "blue".to_string(),
options: vec![
FilterOption { id: "new".to_string(), title: "Most Recent".to_string() },
FilterOption { id: "views".to_string(), title: "Most Viewed".to_string() },
FilterOption { id: "trending".to_string(), title: "Trending".to_string() },
],
multiSelect: false,
},
ChannelOption {
id: "categories".to_string(),
title: "Genre".to_string(),
description: "Limit results to a single genre archive.".to_string(),
systemImage: "tag".to_string(),
colorName: "pink".to_string(),
options: cat_options,
multiSelect: false,
},
],
nsfw: true,
cacheDuration: Some(1800),
}
}
/// Map a `sort` option id to the exact label `/api/browse?sort=` expects.
fn sort_label(sort: &str) -> &'static str {
match sort {
"views" | "popular" | "most-viewed" => "Most Viewed",
"trending" | "hot" => "Trending",
_ => "Most Recent",
}
}
/// Normalise a free-text genre query so `school-girl`, `School Girl`, and `school girl` all
/// resolve to the same catalogue entry.
fn normalize_genre(value: &str) -> String {
value
.trim()
.to_ascii_lowercase()
.replace(['-', '_', '+'], " ")
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
}
/// Resolve an arbitrary input to the site's exact-cased genre name, if any.
fn resolve_genre(&self, input: &str) -> Option<String> {
let needle = Self::normalize_genre(input);
if needle.is_empty() {
return None;
}
let genres = self.genres.read().ok()?;
genres
.iter()
.find(|name| Self::normalize_genre(name) == needle)
.cloned()
}
fn pick_target(&self, query: Option<&str>, sort: &str, options: &ServerOptions) -> Target {
let sort = Self::sort_label(sort).to_string();
if let Some(raw) = query {
let q = raw.trim();
if !q.is_empty() {
for prefix in ["genre:", "category:", "cat:"] {
if let Some(rest) = q.strip_prefix(prefix) {
let name = self
.resolve_genre(rest)
.unwrap_or_else(|| rest.trim().to_string());
return Target::Genre { name, sort };
}
}
// A bare keyword that exactly matches a genre goes to that archive, otherwise fall
// back to the site's keyword search.
if let Some(name) = self.resolve_genre(q) {
return Target::Genre { name, sort };
}
return Target::Search { query: q.to_string() };
}
}
// An explicit genre selection from the `categories` option.
if let Some(selected) = options.categories.as_deref() {
let selected = selected.trim();
if !selected.is_empty() && selected != "all" {
let name = self
.resolve_genre(selected)
.unwrap_or_else(|| selected.to_string());
return Target::Genre { name, sort };
}
}
Target::Browse { sort }
}
fn encode(value: &str) -> String {
url::form_urlencoded::byte_serialize(value.as_bytes()).collect()
}
/// Build the JSON API URL for a target/page. Returns `None` when the request would be empty by
/// construction (search beyond page 1).
fn build_api_url(target: &Target, page: u32) -> Option<String> {
let page = page.max(1);
match target {
Target::Browse { sort } => Some(format!(
"{BASE_URL}/api/browse?page={page}&sort={}",
Self::encode(sort)
)),
Target::Genre { name, sort } => Some(format!(
"{BASE_URL}/api/browse?page={page}&sort={}&genres={}",
Self::encode(sort),
Self::encode(name)
)),
Target::Search { query } => {
if page > 1 {
None
} else {
Some(format!("{BASE_URL}/api/search?q={}", Self::encode(query)))
}
}
}
}
fn abs_url(path: &str) -> String {
if path.starts_with("http://") || path.starts_with("https://") {
path.to_string()
} else if path.starts_with('/') {
format!("{BASE_URL}{path}")
} else {
format!("{BASE_URL}/{path}")
}
}
fn pick_thumb(video: &ApiVideo) -> String {
for candidate in [&video.thumb, &video.backdrop, &video.cover] {
if let Some(path) = candidate.as_deref().filter(|s| !s.is_empty()) {
return Self::abs_url(path);
}
}
String::new()
}
fn extract_embed_id(embed_url: &str) -> Option<String> {
let after = embed_url.split("/v/").nth(1)?;
let id = after.trim_matches('/').split('/').next()?.split('?').next()?;
(!id.is_empty()).then(|| id.to_string())
}
fn build_item(video: &ApiVideo, options: &ServerOptions) -> Option<VideoItem> {
if video.slug.is_empty() {
return None;
}
let embed_id = Self::extract_embed_id(video.embed_url.as_deref().unwrap_or_default())?;
let base_title = video.title.trim();
let title = match video.ep {
_ if base_title.is_empty() => video.slug.replace('-', " "),
Some(ep) if ep >= 1 => format!("{base_title} Episode {ep}"),
_ => base_title.to_string(),
};
let url = format!("{BASE_URL}/hentai/{}", video.slug);
let thumb = Self::pick_thumb(video);
let duration = video
.duration
.as_deref()
.and_then(parse_time_to_seconds)
.and_then(|s| u32::try_from(s).ok())
.unwrap_or(0);
let quality = video.quality.as_deref().unwrap_or_default().to_string();
let label = if quality.is_empty() {
"mp4".to_string()
} else {
quality.clone()
};
let proxy_url = build_proxy_url(options, CHANNEL_ID, &format!("{embed_id}.mp4"));
let mut format = VideoFormat::new(proxy_url, label, "mp4".to_string()).ext("mp4".to_string());
if let Some(height) = quality
.chars()
.filter(|c| c.is_ascii_digit())
.collect::<String>()
.parse::<u32>()
.ok()
.filter(|h| *h > 0)
{
format = format.height(height);
}
let mut item = VideoItem::new(
video.slug.clone(),
title,
url,
CHANNEL_ID.to_string(),
thumb,
duration,
)
.formats(vec![format])
.aspect_ratio(16.0 / 9.0);
if let Some(views) = video.views {
item.views = Some(views.min(u32::MAX as u64) as u32);
}
if let Some(rating) = video.rating {
// Site rating is 0-10; expose the usual 0-100 scale.
item.rating = Some((rating * 10.0).clamp(0.0, 100.0));
}
if let Some(tags) = video.tags.clone().filter(|t| !t.is_empty()) {
item.tags = Some(tags);
}
if let Some(brand) = video.brand.as_deref().filter(|b| !b.trim().is_empty()) {
item.uploader = Some(brand.to_string());
}
Some(item)
}
async fn fetch_page(
&self,
target: Target,
page: u32,
per_page: usize,
cache: &VideoCache,
options: &ServerOptions,
) -> Vec<VideoItem> {
let Some(api_url) = Self::build_api_url(&target, page) else {
return vec![];
};
if let Some((time, items)) = cache.get(&api_url) {
let fresh = time
.elapsed()
.map(|e| e.as_secs() < CACHE_TTL_SECS)
.unwrap_or(false);
if fresh && !items.is_empty() {
return items;
}
}
let mut requester = requester_or_default(options, CHANNEL_ID, "fetch_page");
let body = match requester
.get_with_headers(
&api_url,
vec![
("Referer".to_string(), format!("{BASE_URL}/")),
("Accept".to_string(), "application/json".to_string()),
],
None,
)
.await
{
Ok(body) => body,
Err(e) => {
report_provider_error(CHANNEL_ID, "fetch_page.request", &format!("url={api_url}; error={e}")).await;
return cache.get(&api_url).map(|(_, items)| items).unwrap_or_default();
}
};
let response: ApiResponse = match serde_json::from_str(&body) {
Ok(response) => response,
Err(e) => {
report_provider_error(CHANNEL_ID, "fetch_page.parse", &format!("url={api_url}; error={e}")).await;
return cache.get(&api_url).map(|(_, items)| items).unwrap_or_default();
}
};
let items: Vec<VideoItem> = response
.videos
.iter()
.take(per_page)
.filter_map(|video| Self::build_item(video, options))
.collect();
if !items.is_empty() {
cache.insert(api_url, items.clone());
items
} else {
cache.get(&api_url).map(|(_, items)| items).unwrap_or_default()
}
}
fn spawn_genre_load(&self) {
let genres = self.genres.clone();
std::thread::spawn(move || {
let runtime = match tokio::runtime::Builder::new_current_thread().enable_all().build() {
Ok(rt) => rt,
Err(e) => {
report_provider_error_background("hentaitv", "spawn_genre_load.runtime", &e.to_string());
return;
}
};
runtime.block_on(async move {
let mut requester = crate::util::requester::Requester::new();
let html = match requester
.get_with_headers(
&format!("{BASE_URL}/browse"),
vec![("Referer".to_string(), format!("{BASE_URL}/"))],
None,
)
.await
{
Ok(html) => html,
Err(e) => {
report_provider_error_background("hentaitv", "spawn_genre_load.fetch", &e.to_string());
return;
}
};
let parsed = Self::parse_genres(&html);
if !parsed.is_empty() {
if let Ok(mut guard) = genres.write() {
*guard = parsed;
}
}
});
});
}
/// Pull the exact-cased genre names out of the `/browse` page payload, where they appear as
/// `"genres":[{"name":"Big Boobs","count":2219}, ...]`.
fn parse_genres(html: &str) -> Vec<String> {
let unescaped = html.replace("\\\"", "\"");
let mut out = Vec::new();
let mut seen = HashSet::new();
for chunk in unescaped.split("\"name\":\"").skip(1) {
let Some(end) = chunk.find('"') else { continue };
let name = &chunk[..end];
// Only accept entries immediately followed by a "count" field — that is what
// distinguishes the genre catalogue from other `"name"` objects on the page.
let tail = chunk[end..].trim_start_matches('"').trim_start();
if name.is_empty() || !tail.starts_with(",\"count\"") {
continue;
}
if seen.insert(name.to_string()) {
out.push(name.to_string());
}
}
out
}
}
#[async_trait]
impl Provider for HentaitvProvider {
async fn get_videos(
&self,
cache: VideoCache,
_pool: DbPool,
sort: String,
query: Option<String>,
page: String,
per_page: String,
options: ServerOptions,
) -> Vec<VideoItem> {
let page = page.parse::<u32>().unwrap_or(1).max(1);
let per_page = per_page
.parse::<usize>()
.unwrap_or(DEFAULT_PER_PAGE)
.clamp(1, DEFAULT_PER_PAGE);
let normalized_query = query
.as_deref()
.map(str::trim)
.filter(|q| !q.is_empty())
.map(ToOwned::to_owned);
let target = self.pick_target(normalized_query.as_deref(), &sort, &options);
self.fetch_page(target, page, per_page, &cache, &options).await
}
fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
Some(self.build_channel(clientversion))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn sort_labels_map_to_api_values() {
assert_eq!(HentaitvProvider::sort_label("new"), "Most Recent");
assert_eq!(HentaitvProvider::sort_label("views"), "Most Viewed");
assert_eq!(HentaitvProvider::sort_label("trending"), "Trending");
assert_eq!(HentaitvProvider::sort_label("whatever"), "Most Recent");
}
#[test]
fn builds_api_urls() {
let browse = Target::Browse { sort: "Most Recent".to_string() };
assert_eq!(
HentaitvProvider::build_api_url(&browse, 2).unwrap(),
"https://hentai.tv/api/browse?page=2&sort=Most+Recent"
);
let genre = Target::Genre { name: "Big Boobs".to_string(), sort: "Most Viewed".to_string() };
assert_eq!(
HentaitvProvider::build_api_url(&genre, 1).unwrap(),
"https://hentai.tv/api/browse?page=1&sort=Most+Viewed&genres=Big+Boobs"
);
let search = Target::Search { query: "school nurse".to_string() };
assert_eq!(
HentaitvProvider::build_api_url(&search, 1).unwrap(),
"https://hentai.tv/api/search?q=school+nurse"
);
// Search has no pagination.
assert!(HentaitvProvider::build_api_url(&search, 2).is_none());
}
#[test]
fn extracts_embed_id() {
assert_eq!(
HentaitvProvider::extract_embed_id("https://nhplayer.com/v/M2WMSkkRmf5wJvq/").as_deref(),
Some("M2WMSkkRmf5wJvq")
);
assert_eq!(
HentaitvProvider::extract_embed_id("https://nhplayer.com/v/abc123?x=1").as_deref(),
Some("abc123")
);
assert_eq!(HentaitvProvider::extract_embed_id("https://example.com/foo"), None);
}
#[test]
fn parses_genre_catalogue() {
let html = r#"...,"genres":[{"name":"Big Boobs","count":2219},{"name":"incest","count":476}],"blacklist":[]..."#;
let genres = HentaitvProvider::parse_genres(html);
assert_eq!(genres, vec!["Big Boobs".to_string(), "incest".to_string()]);
}
#[test]
fn normalizes_genres() {
assert_eq!(HentaitvProvider::normalize_genre("Big Boobs"), "big boobs");
assert_eq!(HentaitvProvider::normalize_genre("school-girl"), "school girl");
assert_eq!(HentaitvProvider::normalize_genre(" NTR "), "ntr");
}
}