This commit is contained in:
Simon
2026-04-09 07:19:33 +00:00
parent 6e43b3b3d0
commit 57eb2d7063
6 changed files with 49 additions and 352 deletions

View File

@@ -39,7 +39,7 @@ chrono = "0.4.44"
md5 = "0.8.0"
[lints.rust]
warnings = "deny"
warnings = "warn"
unexpected_cfgs = "allow"
# Or keep it as a warning but whitelist the cfg:
# unexpected_cfgs = { level = "warn", check-cfg = ['cfg(has_error_description_deprecated)'] }

View File

@@ -6,10 +6,9 @@ use crate::util::cache::VideoCache;
use crate::util::discord::{format_error_chain, send_discord_error_report};
use crate::util::requester::Requester;
use crate::util::time::parse_time_to_seconds;
use crate::videos::{ServerOptions, VideoFormat, VideoItem};
use crate::videos::{ServerOptions, VideoItem};
use async_trait::async_trait;
use error_chain::error_chain;
use futures::stream::{FuturesUnordered, StreamExt};
use htmlentity::entity::{ICodedDataTrait, decode};
use std::sync::{Arc, RwLock};
use std::{thread, vec};
@@ -194,9 +193,7 @@ impl HqpornerProvider {
.await
.map_err(|e| Error::from(format!("Request failed: {}", e)))?;
let video_items = self
.get_video_items_from_html(text, &mut requester, &options)
.await;
let video_items = self.get_video_items_from_html(text, &options).await;
if !video_items.is_empty() {
cache.insert(video_url, video_items.clone());
}
@@ -242,9 +239,7 @@ impl HqpornerProvider {
.await
.map_err(|e| Error::from(format!("Request failed: {}", e)))?;
let video_items = self
.get_video_items_from_html(text, &mut requester, &options)
.await;
let video_items = self.get_video_items_from_html(text, &options).await;
if !video_items.is_empty() {
cache.insert(video_url, video_items.clone());
}
@@ -254,7 +249,6 @@ impl HqpornerProvider {
async fn get_video_items_from_html(
&self,
html: String,
requester: &mut Requester,
options: &ServerOptions,
) -> Vec<VideoItem> {
if html.is_empty() || html.contains("404 Not Found") {
@@ -273,63 +267,14 @@ impl HqpornerProvider {
})
.unwrap_or_default();
// Limit concurrent detail-page requests to reduce transient connect errors.
let mut in_flight = FuturesUnordered::new();
let mut iter = raw_videos.into_iter();
let mut items = Vec::new();
const MAX_IN_FLIGHT: usize = 6;
loop {
while in_flight.len() < MAX_IN_FLIGHT {
let Some(seg) = iter.next() else {
break;
};
in_flight.push(self.get_video_item(seg, requester.clone(), options));
raw_videos
.into_iter()
.filter_map(|seg| self.get_video_item(seg, options).ok())
.collect()
}
let Some(result) = in_flight.next().await else {
break;
};
match result {
Ok(item)
if item
.formats
.as_ref()
.map(|formats| !formats.is_empty())
.unwrap_or(false) =>
{
items.push(item);
}
Ok(_) => {}
Err(e) => {
let msg = e.to_string();
let chain = format_error_chain(&e);
tokio::spawn(async move {
let _ = send_discord_error_report(
msg,
Some(chain),
Some("Hqporner Provider"),
None,
file!(),
line!(),
module_path!(),
)
.await;
});
}
}
}
items
}
async fn get_video_item(
&self,
seg: String,
mut requester: Requester,
options: &ServerOptions,
) -> Result<VideoItem> {
let video_url = format!(
fn get_video_item(&self, seg: String, options: &ServerOptions) -> Result<VideoItem> {
let detail_url = format!(
"{}{}",
self.url,
seg.split("<a href=\"")
@@ -348,7 +293,7 @@ impl HqpornerProvider {
.unwrap_or_else(|_| title_raw.to_string())
.titlecase();
let id = video_url
let id = detail_url
.split('/')
.nth(4)
.and_then(|s| s.split('.').next())
@@ -378,173 +323,20 @@ impl HqpornerProvider {
.unwrap_or_default();
let duration = parse_time_to_seconds(&raw_duration).unwrap_or(0) as u32;
let (tags, formats) = self.extract_media(&video_url, &mut requester).await?;
let proxied_url = crate::providers::build_proxy_url(
options,
"hqporner",
&crate::providers::strip_url_scheme(&detail_url),
);
Ok(
VideoItem::new(id, title, video_url, "hqporner".into(), thumb, duration)
.formats(formats)
.tags(tags),
)
}
async fn extract_media(
&self,
url: &str,
requester: &mut Requester,
) -> Result<(Vec<String>, Vec<VideoFormat>)> {
let mut formats = vec![];
let mut tags = vec![];
let headers = vec![("Referer".to_string(), "https://hqporner.com/".into())];
let mut text = match self
.fetch_text_with_retries(requester, url, &headers, 3)
.await
{
Ok(text) => text,
Err(primary_err) => {
if url.contains("://hqporner.com/") {
let fallback_url = url.replace("://hqporner.com/", "://www.hqporner.com/");
self.fetch_text_with_retries(requester, &fallback_url, &headers, 3)
.await
.map_err(|fallback_err| {
Error::from(format!(
"Request failed: primary={primary_err}; fallback={fallback_err}"
Ok(VideoItem::new(
id,
title,
proxied_url,
"hqporner".into(),
thumb,
duration,
))
})?
} else {
return Err(Error::from(format!("Request failed: {}", primary_err)));
}
}
};
if text.is_empty() && url.contains("://hqporner.com/") {
let fallback_url = url.replace("://hqporner.com/", "://www.hqporner.com/");
text = self
.fetch_text_with_retries(requester, &fallback_url, &headers, 3)
.await
.unwrap_or_default();
}
if text.contains("Why do I see it?") {
return Ok((tags, formats));
}
// Extract Stars & Tags
if let Some(stars_block) = text
.split("icon fa-star-o")
.nth(1)
.and_then(|s| s.split("</li>").next())
{
for star_el in stars_block.split("href=\"/actress/").skip(1) {
let id = star_el.split('"').next().unwrap_or("").to_string();
let name = star_el
.split("\">")
.nth(1)
.and_then(|s| s.split('<').next())
.unwrap_or("")
.to_string();
if !name.is_empty() {
tags.push(name.clone());
Self::push_unique(&self.stars, FilterOption { id, title: name });
}
}
}
// Player / Video Extraction
let player_url = format!(
"https:{}",
text.split("url: '/blocks/altplayer.php?i=")
.nth(1)
.and_then(|s| s.split('\'').next())
.ok_or("No player link")?
);
let response_text = match self
.fetch_text_with_retries(requester, &player_url, &headers, 2)
.await
{
Ok(text) => text,
Err(e) => {
let err = format!("altplayer request failed: {e}");
send_discord_error_report(
err.clone(),
None,
Some("Hqporner Provider"),
Some(&player_url),
file!(),
line!(),
module_path!(),
)
.await;
return Ok((tags, formats));
}
};
let text2 = response_text;
// Check for error response
if text2.starts_with("ERR:") {
return Ok((tags, formats));
}
let video_element = text2
.split("<video ")
.nth(2)
.and_then(|s| s.split("</video>").next())
.ok_or(format!("No video element\n{player_url}\n{text2}"))?;
for source in video_element.split("<source ").skip(1) {
let title = source
.split("title=\\\"")
.nth(1)
.and_then(|s| s.split("\\\"").next())
.unwrap_or("")
.to_string();
let quality = title.split(' ').next().unwrap_or("HD").to_string();
let media_url = format!(
"https:{}",
source
.split("src=\\\"")
.nth(1)
.and_then(|s| s.split("\\\"").next())
.unwrap_or("")
);
formats.push(
VideoFormat::new(media_url, quality, "mp4".into())
.format_id(title.clone())
.format_note(title),
);
}
Ok((tags, formats))
}
async fn fetch_text_with_retries(
&self,
requester: &mut Requester,
url: &str,
headers: &[(String, String)],
max_attempts: u8,
) -> std::result::Result<String, String> {
let mut last_err = String::new();
for attempt in 1..=max_attempts {
match requester.get_raw_with_headers(url, headers.to_vec()).await {
Ok(resp) => match resp.text().await {
Ok(text) => return Ok(text),
Err(e) => {
last_err =
format!("text read failed (attempt {attempt}/{max_attempts}): {e}");
}
},
Err(e) => {
last_err = format!("request failed (attempt {attempt}/{max_attempts}): {e}");
}
}
if attempt < max_attempts {
tokio::time::sleep(std::time::Duration::from_millis(250 * attempt as u64)).await;
}
}
Err(last_err)
}
}

View File

@@ -37,13 +37,10 @@ pub static ALL_PROVIDERS: Lazy<HashMap<&'static str, DynProvider>> = Lazy::new(|
m
});
const CHANNEL_STATUS_ERROR: &str = "error";
const VALIDATION_RESULTS_REQUIRED: usize = 5;
const VALIDATION_MIN_SUCCESS: usize = 1;
const VALIDATION_COOLDOWN: Duration = Duration::from_secs(3600);
const VALIDATION_MEDIA_TIMEOUT: Duration = Duration::from_secs(100);
const VALIDATION_ERROR_RETEST_INTERVAL: Duration = VALIDATION_COOLDOWN;
const VALIDATION_FAILURES_FOR_ERROR: u8 = 5;
#[derive(Clone)]
struct ProviderValidationContext {
@@ -64,7 +61,6 @@ static PROVIDER_VALIDATION_INFLIGHT: Lazy<DashSet<String>> = Lazy::new(DashSet::
static PROVIDER_VALIDATION_LAST_RUN: Lazy<DashMap<String, Instant>> = Lazy::new(DashMap::new);
static PROVIDER_VALIDATION_FAILURE_STATE: Lazy<DashMap<String, ValidationFailureState>> =
Lazy::new(DashMap::new);
static PROVIDER_ERROR_REVALIDATION_STARTED: OnceLock<()> = OnceLock::new();
fn validation_client_version() -> ClientVersion {
ClientVersion::new(22, 'c' as u32, "Hot%20Tub".to_string())
@@ -394,37 +390,6 @@ fn record_validation_failure(provider_id: &str, now: Instant) -> u8 {
1
}
fn start_periodic_error_revalidation() {
if PROVIDER_ERROR_REVALIDATION_STARTED.set(()).is_err() {
return;
}
tokio::spawn(async move {
let mut interval = tokio::time::interval(VALIDATION_ERROR_RETEST_INTERVAL);
loop {
interval.tick().await;
let errored_providers = PROVIDER_RUNTIME_STATUS
.iter()
.filter_map(|entry| {
if entry.value().as_str() == CHANNEL_STATUS_ERROR {
Some(entry.key().clone())
} else {
None
}
})
.collect::<Vec<_>>();
for provider_id in errored_providers {
schedule_provider_validation(
&provider_id,
"periodic_retest",
"provider currently marked as error",
);
}
}
});
}
pub fn configure_runtime_validation(
pool: DbPool,
cache: VideoCache,
@@ -436,9 +401,7 @@ pub fn configure_runtime_validation(
cache,
requester,
})
.map_err(|_| "provider validation context already configured")?;
start_periodic_error_revalidation();
Ok(())
.map_err(|_| "provider validation context already configured")
}
pub fn current_provider_channel_status(provider_id: &str) -> Option<String> {
@@ -486,19 +449,13 @@ pub fn schedule_provider_validation(provider_id: &str, context: &str, msg: &str)
match validation_result {
Ok(()) => {
reset_validation_failure_state(&provider_id);
PROVIDER_RUNTIME_STATUS.remove(&provider_id);
}
Err(_validation_error) => {
let failures = record_validation_failure(&provider_id, Instant::now());
if failures >= VALIDATION_FAILURES_FOR_ERROR {
PROVIDER_RUNTIME_STATUS
.insert(provider_id.clone(), CHANNEL_STATUS_ERROR.to_string());
}
let _failure_count = record_validation_failure(&provider_id, Instant::now());
crate::flow_debug!(
"provider validation failed provider={} failures={} threshold={} error={}",
"provider validation failed provider={} failures={} error={}",
&provider_id,
failures,
VALIDATION_FAILURES_FOR_ERROR,
_failure_count,
crate::util::flow_debug::preview(&_validation_error, 160)
);
}
@@ -907,6 +864,7 @@ pub fn build_status_response(status: Status) -> StatusResponse {
nsfw: status.nsfw,
categories: status.categories,
options: status.options,
cdnReferrers: status.cdnReferrers,
filtersFooter: status.filtersFooter,
}
}
@@ -1354,22 +1312,6 @@ mod tests {
PROVIDER_VALIDATION_FAILURE_STATE.remove(provider_id);
}
#[test]
fn validation_failure_threshold_matches_channel_error_policy() {
let provider_id = "hsex";
PROVIDER_VALIDATION_FAILURE_STATE.remove(provider_id);
let now = Instant::now();
let mut counted = 0;
for step in 0..VALIDATION_FAILURES_FOR_ERROR {
counted =
record_validation_failure(provider_id, now + VALIDATION_COOLDOWN * step as u32);
}
assert_eq!(counted, VALIDATION_FAILURES_FOR_ERROR);
PROVIDER_VALIDATION_FAILURE_STATE.remove(provider_id);
}
#[test]
fn builds_group_index() {
PROVIDER_RUNTIME_STATUS.remove("all");
@@ -1453,6 +1395,12 @@ mod tests {
.expect("chinese group present");
assert_eq!(chinese_group["systemImage"], "globe");
let cdn_referrers = json["cdnReferrers"].as_array().expect("cdnReferrers array");
assert_eq!(cdn_referrers.len(), 1);
assert_eq!(cdn_referrers[0]["hostContains"], "phncdn");
assert_eq!(cdn_referrers[0]["referer"], "https://www.pornhub.com/");
assert_eq!(cdn_referrers[0]["origin"], "https://www.pornhub.com");
let pimpbunny_channel = channels
.iter()
.find(|channel| channel["id"] == "pimpbunny")
@@ -1463,14 +1411,6 @@ mod tests {
);
}
#[test]
fn runtime_error_status_overrides_channel_status() {
PROVIDER_RUNTIME_STATUS.insert("hsex".to_string(), CHANNEL_STATUS_ERROR.to_string());
let channel = decorate_channel(base_channel("hsex"));
assert_eq!(channel.status, CHANNEL_STATUS_ERROR);
PROVIDER_RUNTIME_STATUS.remove("hsex");
}
#[ntex::test]
#[ignore = "live network sweep across all providers"]
async fn api_videos_returns_working_media_urls_for_all_channels() {

View File

@@ -1,8 +1,6 @@
use crate::DbPool;
use crate::api::ClientVersion;
use crate::providers::{
Provider, build_proxy_url, report_provider_error, requester_or_default, strip_url_scheme,
};
use crate::providers::{Provider, report_provider_error, requester_or_default};
use crate::status::*;
use crate::util::cache::VideoCache;
use crate::util::parse_abbreviated_number;
@@ -641,57 +639,12 @@ impl PornhubProvider {
return Ok(old_items);
}
let mut items = items;
// Rewrite thumbs to use the page-driven thumb proxy when appropriate
for item in items.iter_mut() {
let proxied = self.proxied_thumb(&options, &item.url);
if !proxied.is_empty() {
item.thumb = proxied;
}
}
cache.remove(&video_url);
cache.insert(video_url, items.clone());
Ok(items)
}
}
impl PornhubProvider {
fn proxied_thumb(&self, options: &ServerOptions, page_url: &str) -> String {
if page_url.is_empty() {
return String::new();
}
if !PornhubThumbPolicy::is_allowed_video_page_url(page_url) {
return String::new();
}
build_proxy_url(options, "pornhub-thumb", &strip_url_scheme(page_url))
}
}
struct PornhubThumbPolicy;
impl PornhubThumbPolicy {
fn is_allowed_video_page_url(url: &str) -> bool {
let Some(url) = Url::parse(url).ok() else {
return false;
};
if url.scheme() != "https" {
return false;
}
let Some(host) = url.host_str() else {
return false;
};
if !host.eq_ignore_ascii_case("pornhub.com")
&& !host.eq_ignore_ascii_case("www.pornhub.com")
&& !host.ends_with(".pornhub.com")
{
return false;
}
url.path().starts_with("/view_video.php") || url.path().starts_with("/video/")
}
}
#[async_trait]
impl Provider for PornhubProvider {
async fn get_videos(

View File

@@ -70,6 +70,13 @@ pub struct Subscription {
pub status: String, //"incomplete"
}
#[derive(serde::Serialize)]
pub struct CdnReferrer {
pub hostContains: String,
pub referer: String,
pub origin: String,
}
impl Subscription {
pub fn new() -> Self {
Subscription {
@@ -93,6 +100,7 @@ pub struct Status {
pub nsfw: bool,
pub categories: Vec<String>,
pub options: Vec<Options>,
pub cdnReferrers: Vec<CdnReferrer>,
pub filtersFooter: String, //"Help us improve our algorithms by selecting the categories that best describe you. These will not necessarily affect your search results, but will help us tailor the app to your interests."
}
impl Status {
@@ -113,6 +121,11 @@ impl Status {
nsfw: true,
categories: vec![],
options: vec![],
cdnReferrers: vec![CdnReferrer {
hostContains: "phncdn".to_string(),
referer: "https://www.pornhub.com/".to_string(),
origin: "https://www.pornhub.com".to_string(),
}],
filtersFooter:
"Help us improve our algorithms by giving us your feedback on the hottub discord."
.to_string(),
@@ -162,5 +175,6 @@ pub struct StatusResponse {
pub nsfw: bool,
pub categories: Vec<String>,
pub options: Vec<Options>,
pub cdnReferrers: Vec<CdnReferrer>,
pub filtersFooter: String,
}

View File

@@ -147,11 +147,9 @@ impl VideoItem {
aspectRatio: None,
}
}
#[cfg(any(not(hottub_single_provider), hottub_provider = "hentaihaven"))]
pub fn from(s: String) -> Result<Self, serde_json::Error> {
serde_json::from_str::<VideoItem>(&s)
}
#[cfg(any(not(hottub_single_provider), hottub_provider = "hanime"))]
pub fn tags(mut self, tags: Vec<String>) -> Self {
if tags.is_empty() {
return self;