This commit is contained in:
Simon
2026-04-09 07:19:33 +00:00
parent 6e43b3b3d0
commit 57eb2d7063
6 changed files with 49 additions and 352 deletions

View File

@@ -6,10 +6,9 @@ use crate::util::cache::VideoCache;
use crate::util::discord::{format_error_chain, send_discord_error_report};
use crate::util::requester::Requester;
use crate::util::time::parse_time_to_seconds;
use crate::videos::{ServerOptions, VideoFormat, VideoItem};
use crate::videos::{ServerOptions, VideoItem};
use async_trait::async_trait;
use error_chain::error_chain;
use futures::stream::{FuturesUnordered, StreamExt};
use htmlentity::entity::{ICodedDataTrait, decode};
use std::sync::{Arc, RwLock};
use std::{thread, vec};
@@ -194,9 +193,7 @@ impl HqpornerProvider {
.await
.map_err(|e| Error::from(format!("Request failed: {}", e)))?;
let video_items = self
.get_video_items_from_html(text, &mut requester, &options)
.await;
let video_items = self.get_video_items_from_html(text, &options).await;
if !video_items.is_empty() {
cache.insert(video_url, video_items.clone());
}
@@ -242,9 +239,7 @@ impl HqpornerProvider {
.await
.map_err(|e| Error::from(format!("Request failed: {}", e)))?;
let video_items = self
.get_video_items_from_html(text, &mut requester, &options)
.await;
let video_items = self.get_video_items_from_html(text, &options).await;
if !video_items.is_empty() {
cache.insert(video_url, video_items.clone());
}
@@ -254,7 +249,6 @@ impl HqpornerProvider {
async fn get_video_items_from_html(
&self,
html: String,
requester: &mut Requester,
options: &ServerOptions,
) -> Vec<VideoItem> {
if html.is_empty() || html.contains("404 Not Found") {
@@ -273,63 +267,14 @@ impl HqpornerProvider {
})
.unwrap_or_default();
// Limit concurrent detail-page requests to reduce transient connect errors.
let mut in_flight = FuturesUnordered::new();
let mut iter = raw_videos.into_iter();
let mut items = Vec::new();
const MAX_IN_FLIGHT: usize = 6;
loop {
while in_flight.len() < MAX_IN_FLIGHT {
let Some(seg) = iter.next() else {
break;
};
in_flight.push(self.get_video_item(seg, requester.clone(), options));
}
let Some(result) = in_flight.next().await else {
break;
};
match result {
Ok(item)
if item
.formats
.as_ref()
.map(|formats| !formats.is_empty())
.unwrap_or(false) =>
{
items.push(item);
}
Ok(_) => {}
Err(e) => {
let msg = e.to_string();
let chain = format_error_chain(&e);
tokio::spawn(async move {
let _ = send_discord_error_report(
msg,
Some(chain),
Some("Hqporner Provider"),
None,
file!(),
line!(),
module_path!(),
)
.await;
});
}
}
}
items
raw_videos
.into_iter()
.filter_map(|seg| self.get_video_item(seg, options).ok())
.collect()
}
async fn get_video_item(
&self,
seg: String,
mut requester: Requester,
options: &ServerOptions,
) -> Result<VideoItem> {
let video_url = format!(
fn get_video_item(&self, seg: String, options: &ServerOptions) -> Result<VideoItem> {
let detail_url = format!(
"{}{}",
self.url,
seg.split("<a href=\"")
@@ -348,7 +293,7 @@ impl HqpornerProvider {
.unwrap_or_else(|_| title_raw.to_string())
.titlecase();
let id = video_url
let id = detail_url
.split('/')
.nth(4)
.and_then(|s| s.split('.').next())
@@ -378,173 +323,20 @@ impl HqpornerProvider {
.unwrap_or_default();
let duration = parse_time_to_seconds(&raw_duration).unwrap_or(0) as u32;
let (tags, formats) = self.extract_media(&video_url, &mut requester).await?;
Ok(
VideoItem::new(id, title, video_url, "hqporner".into(), thumb, duration)
.formats(formats)
.tags(tags),
)
}
async fn extract_media(
&self,
url: &str,
requester: &mut Requester,
) -> Result<(Vec<String>, Vec<VideoFormat>)> {
let mut formats = vec![];
let mut tags = vec![];
let headers = vec![("Referer".to_string(), "https://hqporner.com/".into())];
let mut text = match self
.fetch_text_with_retries(requester, url, &headers, 3)
.await
{
Ok(text) => text,
Err(primary_err) => {
if url.contains("://hqporner.com/") {
let fallback_url = url.replace("://hqporner.com/", "://www.hqporner.com/");
self.fetch_text_with_retries(requester, &fallback_url, &headers, 3)
.await
.map_err(|fallback_err| {
Error::from(format!(
"Request failed: primary={primary_err}; fallback={fallback_err}"
))
})?
} else {
return Err(Error::from(format!("Request failed: {}", primary_err)));
}
}
};
if text.is_empty() && url.contains("://hqporner.com/") {
let fallback_url = url.replace("://hqporner.com/", "://www.hqporner.com/");
text = self
.fetch_text_with_retries(requester, &fallback_url, &headers, 3)
.await
.unwrap_or_default();
}
if text.contains("Why do I see it?") {
return Ok((tags, formats));
}
// Extract Stars & Tags
if let Some(stars_block) = text
.split("icon fa-star-o")
.nth(1)
.and_then(|s| s.split("</li>").next())
{
for star_el in stars_block.split("href=\"/actress/").skip(1) {
let id = star_el.split('"').next().unwrap_or("").to_string();
let name = star_el
.split("\">")
.nth(1)
.and_then(|s| s.split('<').next())
.unwrap_or("")
.to_string();
if !name.is_empty() {
tags.push(name.clone());
Self::push_unique(&self.stars, FilterOption { id, title: name });
}
}
}
// Player / Video Extraction
let player_url = format!(
"https:{}",
text.split("url: '/blocks/altplayer.php?i=")
.nth(1)
.and_then(|s| s.split('\'').next())
.ok_or("No player link")?
let proxied_url = crate::providers::build_proxy_url(
options,
"hqporner",
&crate::providers::strip_url_scheme(&detail_url),
);
let response_text = match self
.fetch_text_with_retries(requester, &player_url, &headers, 2)
.await
{
Ok(text) => text,
Err(e) => {
let err = format!("altplayer request failed: {e}");
send_discord_error_report(
err.clone(),
None,
Some("Hqporner Provider"),
Some(&player_url),
file!(),
line!(),
module_path!(),
)
.await;
return Ok((tags, formats));
}
};
let text2 = response_text;
// Check for error response
if text2.starts_with("ERR:") {
return Ok((tags, formats));
}
let video_element = text2
.split("<video ")
.nth(2)
.and_then(|s| s.split("</video>").next())
.ok_or(format!("No video element\n{player_url}\n{text2}"))?;
for source in video_element.split("<source ").skip(1) {
let title = source
.split("title=\\\"")
.nth(1)
.and_then(|s| s.split("\\\"").next())
.unwrap_or("")
.to_string();
let quality = title.split(' ').next().unwrap_or("HD").to_string();
let media_url = format!(
"https:{}",
source
.split("src=\\\"")
.nth(1)
.and_then(|s| s.split("\\\"").next())
.unwrap_or("")
);
formats.push(
VideoFormat::new(media_url, quality, "mp4".into())
.format_id(title.clone())
.format_note(title),
);
}
Ok((tags, formats))
}
async fn fetch_text_with_retries(
&self,
requester: &mut Requester,
url: &str,
headers: &[(String, String)],
max_attempts: u8,
) -> std::result::Result<String, String> {
let mut last_err = String::new();
for attempt in 1..=max_attempts {
match requester.get_raw_with_headers(url, headers.to_vec()).await {
Ok(resp) => match resp.text().await {
Ok(text) => return Ok(text),
Err(e) => {
last_err =
format!("text read failed (attempt {attempt}/{max_attempts}): {e}");
}
},
Err(e) => {
last_err = format!("request failed (attempt {attempt}/{max_attempts}): {e}");
}
}
if attempt < max_attempts {
tokio::time::sleep(std::time::Duration::from_millis(250 * attempt as u64)).await;
}
}
Err(last_err)
Ok(VideoItem::new(
id,
title,
proxied_url,
"hqporner".into(),
thumb,
duration,
))
}
}

View File

@@ -37,13 +37,10 @@ pub static ALL_PROVIDERS: Lazy<HashMap<&'static str, DynProvider>> = Lazy::new(|
m
});
const CHANNEL_STATUS_ERROR: &str = "error";
const VALIDATION_RESULTS_REQUIRED: usize = 5;
const VALIDATION_MIN_SUCCESS: usize = 1;
const VALIDATION_COOLDOWN: Duration = Duration::from_secs(3600);
const VALIDATION_MEDIA_TIMEOUT: Duration = Duration::from_secs(100);
const VALIDATION_ERROR_RETEST_INTERVAL: Duration = VALIDATION_COOLDOWN;
const VALIDATION_FAILURES_FOR_ERROR: u8 = 5;
#[derive(Clone)]
struct ProviderValidationContext {
@@ -64,7 +61,6 @@ static PROVIDER_VALIDATION_INFLIGHT: Lazy<DashSet<String>> = Lazy::new(DashSet::
static PROVIDER_VALIDATION_LAST_RUN: Lazy<DashMap<String, Instant>> = Lazy::new(DashMap::new);
static PROVIDER_VALIDATION_FAILURE_STATE: Lazy<DashMap<String, ValidationFailureState>> =
Lazy::new(DashMap::new);
static PROVIDER_ERROR_REVALIDATION_STARTED: OnceLock<()> = OnceLock::new();
fn validation_client_version() -> ClientVersion {
ClientVersion::new(22, 'c' as u32, "Hot%20Tub".to_string())
@@ -394,37 +390,6 @@ fn record_validation_failure(provider_id: &str, now: Instant) -> u8 {
1
}
fn start_periodic_error_revalidation() {
if PROVIDER_ERROR_REVALIDATION_STARTED.set(()).is_err() {
return;
}
tokio::spawn(async move {
let mut interval = tokio::time::interval(VALIDATION_ERROR_RETEST_INTERVAL);
loop {
interval.tick().await;
let errored_providers = PROVIDER_RUNTIME_STATUS
.iter()
.filter_map(|entry| {
if entry.value().as_str() == CHANNEL_STATUS_ERROR {
Some(entry.key().clone())
} else {
None
}
})
.collect::<Vec<_>>();
for provider_id in errored_providers {
schedule_provider_validation(
&provider_id,
"periodic_retest",
"provider currently marked as error",
);
}
}
});
}
pub fn configure_runtime_validation(
pool: DbPool,
cache: VideoCache,
@@ -436,9 +401,7 @@ pub fn configure_runtime_validation(
cache,
requester,
})
.map_err(|_| "provider validation context already configured")?;
start_periodic_error_revalidation();
Ok(())
.map_err(|_| "provider validation context already configured")
}
pub fn current_provider_channel_status(provider_id: &str) -> Option<String> {
@@ -486,19 +449,13 @@ pub fn schedule_provider_validation(provider_id: &str, context: &str, msg: &str)
match validation_result {
Ok(()) => {
reset_validation_failure_state(&provider_id);
PROVIDER_RUNTIME_STATUS.remove(&provider_id);
}
Err(_validation_error) => {
let failures = record_validation_failure(&provider_id, Instant::now());
if failures >= VALIDATION_FAILURES_FOR_ERROR {
PROVIDER_RUNTIME_STATUS
.insert(provider_id.clone(), CHANNEL_STATUS_ERROR.to_string());
}
let _failure_count = record_validation_failure(&provider_id, Instant::now());
crate::flow_debug!(
"provider validation failed provider={} failures={} threshold={} error={}",
"provider validation failed provider={} failures={} error={}",
&provider_id,
failures,
VALIDATION_FAILURES_FOR_ERROR,
_failure_count,
crate::util::flow_debug::preview(&_validation_error, 160)
);
}
@@ -907,6 +864,7 @@ pub fn build_status_response(status: Status) -> StatusResponse {
nsfw: status.nsfw,
categories: status.categories,
options: status.options,
cdnReferrers: status.cdnReferrers,
filtersFooter: status.filtersFooter,
}
}
@@ -1354,22 +1312,6 @@ mod tests {
PROVIDER_VALIDATION_FAILURE_STATE.remove(provider_id);
}
#[test]
fn validation_failure_threshold_matches_channel_error_policy() {
let provider_id = "hsex";
PROVIDER_VALIDATION_FAILURE_STATE.remove(provider_id);
let now = Instant::now();
let mut counted = 0;
for step in 0..VALIDATION_FAILURES_FOR_ERROR {
counted =
record_validation_failure(provider_id, now + VALIDATION_COOLDOWN * step as u32);
}
assert_eq!(counted, VALIDATION_FAILURES_FOR_ERROR);
PROVIDER_VALIDATION_FAILURE_STATE.remove(provider_id);
}
#[test]
fn builds_group_index() {
PROVIDER_RUNTIME_STATUS.remove("all");
@@ -1453,6 +1395,12 @@ mod tests {
.expect("chinese group present");
assert_eq!(chinese_group["systemImage"], "globe");
let cdn_referrers = json["cdnReferrers"].as_array().expect("cdnReferrers array");
assert_eq!(cdn_referrers.len(), 1);
assert_eq!(cdn_referrers[0]["hostContains"], "phncdn");
assert_eq!(cdn_referrers[0]["referer"], "https://www.pornhub.com/");
assert_eq!(cdn_referrers[0]["origin"], "https://www.pornhub.com");
let pimpbunny_channel = channels
.iter()
.find(|channel| channel["id"] == "pimpbunny")
@@ -1463,14 +1411,6 @@ mod tests {
);
}
#[test]
fn runtime_error_status_overrides_channel_status() {
PROVIDER_RUNTIME_STATUS.insert("hsex".to_string(), CHANNEL_STATUS_ERROR.to_string());
let channel = decorate_channel(base_channel("hsex"));
assert_eq!(channel.status, CHANNEL_STATUS_ERROR);
PROVIDER_RUNTIME_STATUS.remove("hsex");
}
#[ntex::test]
#[ignore = "live network sweep across all providers"]
async fn api_videos_returns_working_media_urls_for_all_channels() {

View File

@@ -1,8 +1,6 @@
use crate::DbPool;
use crate::api::ClientVersion;
use crate::providers::{
Provider, build_proxy_url, report_provider_error, requester_or_default, strip_url_scheme,
};
use crate::providers::{Provider, report_provider_error, requester_or_default};
use crate::status::*;
use crate::util::cache::VideoCache;
use crate::util::parse_abbreviated_number;
@@ -641,57 +639,12 @@ impl PornhubProvider {
return Ok(old_items);
}
let mut items = items;
// Rewrite thumbs to use the page-driven thumb proxy when appropriate
for item in items.iter_mut() {
let proxied = self.proxied_thumb(&options, &item.url);
if !proxied.is_empty() {
item.thumb = proxied;
}
}
cache.remove(&video_url);
cache.insert(video_url, items.clone());
Ok(items)
}
}
impl PornhubProvider {
fn proxied_thumb(&self, options: &ServerOptions, page_url: &str) -> String {
if page_url.is_empty() {
return String::new();
}
if !PornhubThumbPolicy::is_allowed_video_page_url(page_url) {
return String::new();
}
build_proxy_url(options, "pornhub-thumb", &strip_url_scheme(page_url))
}
}
struct PornhubThumbPolicy;
impl PornhubThumbPolicy {
fn is_allowed_video_page_url(url: &str) -> bool {
let Some(url) = Url::parse(url).ok() else {
return false;
};
if url.scheme() != "https" {
return false;
}
let Some(host) = url.host_str() else {
return false;
};
if !host.eq_ignore_ascii_case("pornhub.com")
&& !host.eq_ignore_ascii_case("www.pornhub.com")
&& !host.ends_with(".pornhub.com")
{
return false;
}
url.path().starts_with("/view_video.php") || url.path().starts_with("/video/")
}
}
#[async_trait]
impl Provider for PornhubProvider {
async fn get_videos(