uploaders

This commit is contained in:
Simon
2026-03-31 13:39:11 +00:00
parent 80207efa73
commit bdc7d61121
8 changed files with 913 additions and 4 deletions

View File

@@ -4,6 +4,9 @@ use crate::providers::{
Provider, report_provider_error, report_provider_error_background, requester_or_default,
};
use crate::status::*;
use crate::uploaders::{
UploaderChannelStat, UploaderLayoutRow, UploaderProfile, UploaderVideoRef,
};
use crate::util::cache::VideoCache;
use crate::util::parse_abbreviated_number;
use crate::util::requester::Requester;
@@ -14,6 +17,7 @@ use chrono::{DateTime, Duration as ChronoDuration, NaiveDate, Utc};
use error_chain::error_chain;
use futures::stream::{self, StreamExt};
use htmlentity::entity::{ICodedDataTrait, decode};
use percent_encoding::{NON_ALPHANUMERIC, percent_decode_str, utf8_percent_encode};
use regex::Regex;
use scraper::{ElementRef, Html, Selector};
use std::sync::{Arc, RwLock};
@@ -507,6 +511,263 @@ impl HsexProvider {
}
}
fn canonical_uploader_id(author: &str) -> String {
format!(
"{CHANNEL_ID}:{}",
utf8_percent_encode(author, NON_ALPHANUMERIC)
)
}
fn author_from_uploader_id(value: &str) -> Option<String> {
let suffix = match value.split_once(':') {
Some((channel, suffix)) if channel.eq_ignore_ascii_case(CHANNEL_ID) => suffix,
Some(_) => return None,
None => value,
};
percent_decode_str(suffix)
.decode_utf8()
.ok()
.map(|value| value.into_owned())
.and_then(|value| (!value.trim().is_empty()).then_some(value))
}
fn author_from_uploader_href(&self, href: &str) -> Option<String> {
let url = Url::parse(&self.absolute_url(href)).ok()?;
url.query_pairs()
.find(|(key, _)| key == "author")
.map(|(_, value)| value.to_string())
}
fn pagination_last_page(html: &str) -> Option<u16> {
let regex = Regex::new(r#"user-(?P<page>\d+)\.htm\?author="#).ok()?;
regex
.captures_iter(html)
.filter_map(|captures| captures.name("page")?.as_str().parse::<u16>().ok())
.max()
}
fn uploader_option_by_name(&self, uploader_name: &str) -> Option<FilterOption> {
let normalized = uploader_name.trim();
if normalized.is_empty() {
return None;
}
let lowered = normalized.to_lowercase();
self.uploaders
.read()
.ok()?
.iter()
.find(|option| {
option.title == normalized
|| option.title.to_lowercase() == lowered
|| option.id.eq_ignore_ascii_case(normalized)
})
.cloned()
}
fn resolve_uploader_author(
&self,
uploader_id: Option<&str>,
uploader_name: Option<&str>,
) -> Option<String> {
if let Some(uploader_id) = uploader_id {
if let Some(author) = Self::author_from_uploader_id(uploader_id) {
return Some(author);
}
}
if let Some(uploader_name) = uploader_name {
if let Some(option) = self.uploader_option_by_name(uploader_name) {
if let Some(Target::Uploader { author }) = self.target_from_filter_id(&option.id) {
return Some(author);
}
}
let trimmed = uploader_name.trim();
if !trimmed.is_empty() {
return Some(trimmed.to_string());
}
}
None
}
fn display_name_for_uploader(
&self,
author: &str,
requested_name: Option<&str>,
first_page_items: &[VideoItem],
) -> String {
if let Some(requested_name) = requested_name {
let trimmed = requested_name.trim();
if !trimmed.is_empty() {
return trimmed.to_string();
}
}
if let Some(name) = first_page_items
.iter()
.find_map(|item| item.uploader.as_deref())
.filter(|value| !value.trim().is_empty())
{
return name.to_string();
}
if let Some(option) = self
.uploaders
.read()
.ok()
.and_then(|values| {
values
.iter()
.find(|value| value.id.contains(author) || value.title == author)
.cloned()
})
{
return option.title;
}
author.to_string()
}
fn rank_videos_for_query(
videos: &[UploaderVideoRef],
query: Option<&str>,
) -> Vec<UploaderVideoRef> {
let Some(query) = query.map(|value| value.trim()).filter(|value| !value.is_empty()) else {
return videos.to_vec();
};
let query = query.to_lowercase();
let mut ranked = videos.to_vec();
ranked.sort_by(|a, b| {
let score = |video: &UploaderVideoRef| {
let mut score = 0u8;
if video.title.to_lowercase().contains(&query) {
score += 2;
}
if video.uploader.to_lowercase().contains(&query) {
score += 1;
}
score
};
score(b)
.cmp(&score(a))
.then(b.views.cmp(&a.views))
.then_with(|| a.id.cmp(&b.id))
});
ranked
}
async fn build_uploader_profile(
&self,
cache: VideoCache,
author: &str,
requested_name: Option<&str>,
query: Option<&str>,
profile_content: bool,
options: &ServerOptions,
) -> Result<Option<UploaderProfile>> {
let first_page_url = self.build_uploader_url(author, 1);
let first_page_items = self
.fetch_items_for_url(
cache.clone(),
first_page_url.clone(),
64,
profile_content,
options,
)
.await?;
if first_page_items.is_empty() {
return Ok(None);
}
let mut requester = requester_or_default(options, CHANNEL_ID, "get_uploader.profile_page");
let first_page_html = self
.fetch_html(&mut requester, &first_page_url, &format!("{}/", self.url))
.await?;
let last_page = Self::pagination_last_page(&first_page_html).unwrap_or(1);
let first_page_size = first_page_items.len() as u64;
let last_page_items = if last_page > 1 {
self.fetch_items_for_url(
cache,
self.build_uploader_url(author, last_page),
64,
false,
options,
)
.await
.unwrap_or_default()
} else {
Vec::new()
};
let display_name = self.display_name_for_uploader(author, requested_name, &first_page_items);
let canonical_id = Self::canonical_uploader_id(author);
let mut videos = first_page_items
.iter()
.map(|item| UploaderVideoRef::from_video_item(item, &display_name, &canonical_id))
.collect::<Vec<_>>();
let ranked_videos = Self::rank_videos_for_query(&videos, query);
let horizontal_ids = ranked_videos
.iter()
.take(12)
.map(|video| video.id.clone())
.collect::<Vec<_>>();
let newest_seen = first_page_items
.iter()
.filter_map(|item| item.uploadedAt)
.max();
let oldest_seen = last_page_items
.iter()
.filter_map(|item| item.uploadedAt)
.min()
.or_else(|| first_page_items.iter().filter_map(|item| item.uploadedAt).min());
let video_count = if last_page > 1 {
((last_page as u64 - 1) * first_page_size) + last_page_items.len() as u64
} else {
first_page_size
};
let total_views = first_page_items
.iter()
.chain(last_page_items.iter())
.filter_map(|item| item.views)
.map(u64::from)
.sum();
for item in &mut videos {
item.uploader = display_name.clone();
item.uploaderId = canonical_id.clone();
}
let layout = if horizontal_ids.is_empty() {
vec![UploaderLayoutRow::videos(None)]
} else {
vec![
UploaderLayoutRow::horizontal(Some("For You".to_string()), horizontal_ids),
UploaderLayoutRow::videos(None),
]
};
Ok(Some(UploaderProfile {
id: canonical_id,
name: display_name,
url: Some(first_page_url),
channel: Some(CHANNEL_ID.to_string()),
verified: false,
videoCount: video_count,
totalViews: total_views,
channels: Some(vec![UploaderChannelStat {
channel: CHANNEL_ID.to_string(),
videoCount: video_count,
firstSeenAt: crate::uploaders::iso_timestamp_from_unix(oldest_seen),
lastSeenAt: crate::uploaders::iso_timestamp_from_unix(newest_seen),
}]),
avatar: None,
description: None,
bio: None,
videos: profile_content.then_some(videos),
tapes: profile_content.then_some(Vec::new()),
playlists: profile_content.then_some(Vec::new()),
layout: Some(layout),
}))
}
fn first_video_link<'a>(&self, element: &'a ElementRef<'a>) -> Result<Option<ElementRef<'a>>> {
let selector = Self::selector("a[href]")?;
Ok(element.select(&selector).find(|link| {
@@ -636,10 +897,13 @@ impl HsexProvider {
if !uploader_name.is_empty() {
item.uploader = Some(uploader_name);
}
if let Some(uploader_href) = uploader.value().attr("href") {
item.uploaderUrl = Some(self.absolute_url(uploader_href));
}
if let Some(uploader_href) = uploader.value().attr("href") {
item.uploaderUrl = Some(self.absolute_url(uploader_href));
item.uploaderId = self
.author_from_uploader_href(uploader_href)
.map(|author| Self::canonical_uploader_id(&author));
}
}
if let Some(info) = element.select(&info_selector).next() {
let info_text = Self::decode_text(&Self::collapse_whitespace(
@@ -701,6 +965,9 @@ impl HsexProvider {
}
if let Some(href) = author.value().attr("href") {
item.uploaderUrl = Some(self.absolute_url(href));
item.uploaderId = self
.author_from_uploader_href(href)
.map(|author| Self::canonical_uploader_id(&author));
}
}
}
@@ -934,6 +1201,34 @@ impl Provider for HsexProvider {
fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
Some(self.build_channel(clientversion))
}
async fn get_uploader(
&self,
cache: VideoCache,
pool: DbPool,
uploader_id: Option<String>,
uploader_name: Option<String>,
query: Option<String>,
profile_content: bool,
options: ServerOptions,
) -> std::result::Result<Option<UploaderProfile>, String> {
let _ = pool;
let Some(author) =
self.resolve_uploader_author(uploader_id.as_deref(), uploader_name.as_deref())
else {
return Ok(None);
};
self.build_uploader_profile(
cache,
&author,
uploader_name.as_deref(),
query.as_deref(),
profile_content,
&options,
)
.await
.map_err(|error| error.to_string())
}
}
#[cfg(test)]
@@ -1020,6 +1315,29 @@ mod tests {
);
}
#[test]
fn canonical_uploader_id_round_trips() {
let canonical = HsexProvider::canonical_uploader_id("xihongshiddd");
assert_eq!(canonical, "hsex:xihongshiddd");
assert_eq!(
HsexProvider::author_from_uploader_id(&canonical).as_deref(),
Some("xihongshiddd")
);
}
#[test]
fn parses_last_page_from_pagination() {
let html = r#"
<ul class="pagination1">
<li><a href="user-1.htm?author=xihongshiddd">1</a></li>
<li><a href="user-2.htm?author=xihongshiddd">2</a></li>
<li><a href="user-7.htm?author=xihongshiddd">7</a></li>
</ul>
"#;
assert_eq!(HsexProvider::pagination_last_page(html), Some(7));
}
#[tokio::test]
#[ignore]
async fn fetches_page_two_items() {

View File

@@ -12,6 +12,7 @@ use crate::{
DbPool,
api::ClientVersion,
status::{Channel, ChannelGroup, ChannelView, FilterOption, Status, StatusResponse},
uploaders::UploaderProfile,
util::{cache::VideoCache, discord::send_discord_error_report, requester::Requester},
videos::{FlexibleNumber, ServerOptions, VideoItem, VideosRequest},
};
@@ -577,6 +578,53 @@ where
}
}
pub async fn run_uploader_provider_guarded<F>(
provider_name: &str,
context: &str,
fut: F,
) -> Result<Option<UploaderProfile>, String>
where
F: Future<Output = Result<Option<UploaderProfile>, String>>,
{
crate::flow_debug!(
"provider uploader guard enter provider={} context={}",
provider_name,
context
);
match AssertUnwindSafe(fut).catch_unwind().await {
Ok(result) => {
crate::flow_debug!(
"provider uploader guard exit provider={} context={} matched={}",
provider_name,
context,
result.as_ref().ok().and_then(|value| value.as_ref()).is_some()
);
result
}
Err(payload) => {
let panic_msg = panic_payload_to_string(payload);
crate::flow_debug!(
"provider uploader guard panic provider={} context={} panic={}",
provider_name,
context,
&panic_msg
);
let _ = send_discord_error_report(
format!("Provider panic: {}", provider_name),
None,
Some("Provider Guard"),
Some(&format!("context={}; panic={}", context, panic_msg)),
file!(),
line!(),
module_path!(),
)
.await;
schedule_provider_validation(provider_name, context, &panic_msg);
Err(panic_msg)
}
}
}
pub async fn report_provider_error(provider_name: &str, context: &str, msg: &str) {
let _ = send_discord_error_report(
format!("Provider error: {}", provider_name),
@@ -868,6 +916,19 @@ pub trait Provider: Send + Sync {
cacheDuration: None,
})
}
async fn get_uploader(
&self,
_cache: VideoCache,
_pool: DbPool,
_uploader_id: Option<String>,
_uploader_name: Option<String>,
_query: Option<String>,
_profile_content: bool,
_options: ServerOptions,
) -> Result<Option<UploaderProfile>, String> {
Ok(None)
}
}
#[cfg(all(test, not(hottub_single_provider)))]

View File

@@ -474,6 +474,19 @@ impl NoodlemagazineProvider {
if normalized.is_empty() || !self.is_allowed_thumb_url(&normalized) {
return String::new();
}
let Some(url) = Url::parse(&normalized).ok() else {
return String::new();
};
if url
.host_str()
.is_some_and(|host| host.eq_ignore_ascii_case("img.pvvstream.pro"))
{
return crate::providers::build_proxy_url(
_options,
"noodlemagazine-thumb",
&crate::providers::strip_url_scheme(&normalized),
);
}
normalized
}
@@ -707,7 +720,7 @@ mod tests {
assert_eq!(items.len(), 1);
assert_eq!(
items[0].thumb,
"https://img.pvvstream.pro/preview/abc/-111_222/240/iv.okcdn.ru/getVideoPreview?id=1&type=39&fn=vid_l"
"https://example.com/proxy/noodlemagazine-thumb/img.pvvstream.pro/preview/abc/-111_222/240/iv.okcdn.ru/getVideoPreview?id=1&type=39&fn=vid_l"
);
}