omgxx uploader

This commit is contained in:
Simon
2026-03-31 14:40:31 +00:00
parent ef459fa6b5
commit c4be911d8b

View File

@@ -1,6 +1,12 @@
use crate::DbPool; use crate::DbPool;
use crate::api::ClientVersion; use crate::api::ClientVersion;
use crate::providers::{Provider, report_provider_error, report_provider_error_background}; use crate::providers::{
Provider, report_provider_error, report_provider_error_background, requester_or_default,
};
use crate::uploaders::{
UploaderChannelStat, UploaderLayoutRow, UploaderProfile, UploaderVideoRef,
iso_timestamp_from_unix,
};
use crate::util::cache::VideoCache; use crate::util::cache::VideoCache;
use crate::util::parse_abbreviated_number; use crate::util::parse_abbreviated_number;
use crate::util::time::parse_time_to_seconds; use crate::util::time::parse_time_to_seconds;
@@ -9,6 +15,8 @@ use crate::{status::*, util};
use async_trait::async_trait; use async_trait::async_trait;
use error_chain::error_chain; use error_chain::error_chain;
use htmlentity::entity::{ICodedDataTrait, decode}; use htmlentity::entity::{ICodedDataTrait, decode};
use percent_encoding::{NON_ALPHANUMERIC, percent_decode_str, utf8_percent_encode};
use regex::Regex;
use scraper::{Html, Selector}; use scraper::{Html, Selector};
use std::sync::{Arc, RwLock}; use std::sync::{Arc, RwLock};
use std::thread; use std::thread;
@@ -34,6 +42,20 @@ pub struct OmgxxxProvider {
networks: Arc<RwLock<Vec<FilterOption>>>, networks: Arc<RwLock<Vec<FilterOption>>>,
stars: Arc<RwLock<Vec<FilterOption>>>, stars: Arc<RwLock<Vec<FilterOption>>>,
} }
#[derive(Debug, Clone, PartialEq, Eq)]
enum OmgUploaderTargetKind {
Site,
Network,
}
#[derive(Debug, Clone)]
struct OmgUploaderTarget {
kind: OmgUploaderTargetKind,
id: String,
title: String,
}
impl OmgxxxProvider { impl OmgxxxProvider {
pub fn new() -> Self { pub fn new() -> Self {
let provider = OmgxxxProvider { let provider = OmgxxxProvider {
@@ -418,6 +440,329 @@ impl OmgxxxProvider {
} }
} }
fn canonical_uploader_id(kind: &OmgUploaderTargetKind, id: &str) -> String {
let kind = match kind {
OmgUploaderTargetKind::Site => "site",
OmgUploaderTargetKind::Network => "network",
};
format!(
"omgxxx:{kind}:{}",
utf8_percent_encode(id, NON_ALPHANUMERIC)
)
}
fn uploader_target_from_id(&self, uploader_id: &str) -> Option<OmgUploaderTarget> {
let parts = uploader_id.split(':').collect::<Vec<_>>();
if parts.is_empty() {
return None;
}
let (kind, raw_id) = match parts.as_slice() {
["omgxxx", kind, raw_id] => (*kind, *raw_id),
["omgxxx", raw_id] => ("site", *raw_id),
_ => return None,
};
let decoded_id = percent_decode_str(raw_id).decode_utf8().ok()?.to_string();
match kind {
"site" => self
.sites
.read()
.ok()?
.iter()
.find(|option| option.id == decoded_id)
.map(|option| OmgUploaderTarget {
kind: OmgUploaderTargetKind::Site,
id: option.id.clone(),
title: option.title.clone(),
})
.or_else(|| {
Some(OmgUploaderTarget {
kind: OmgUploaderTargetKind::Site,
id: decoded_id.clone(),
title: decoded_id.clone(),
})
}),
"network" => self
.networks
.read()
.ok()?
.iter()
.find(|option| option.id == decoded_id)
.map(|option| OmgUploaderTarget {
kind: OmgUploaderTargetKind::Network,
id: option.id.clone(),
title: option.title.clone(),
})
.or_else(|| {
Some(OmgUploaderTarget {
kind: OmgUploaderTargetKind::Network,
id: decoded_id.clone(),
title: decoded_id.clone(),
})
}),
_ => None,
}
}
fn uploader_target_from_name(&self, uploader_name: &str) -> Option<OmgUploaderTarget> {
let normalized = uploader_name.trim();
if normalized.is_empty() {
return None;
}
let lowered = normalized.to_ascii_lowercase();
if let Ok(sites) = self.sites.read() {
if let Some(option) = sites.iter().find(|option| {
option.title.eq_ignore_ascii_case(normalized)
|| option.id.eq_ignore_ascii_case(normalized)
|| option.title.to_ascii_lowercase() == lowered
}) {
return Some(OmgUploaderTarget {
kind: OmgUploaderTargetKind::Site,
id: option.id.clone(),
title: option.title.clone(),
});
}
}
if let Ok(networks) = self.networks.read() {
if let Some(option) = networks.iter().find(|option| {
option.title.eq_ignore_ascii_case(normalized)
|| option.id.eq_ignore_ascii_case(normalized)
|| option.title.to_ascii_lowercase() == lowered
}) {
return Some(OmgUploaderTarget {
kind: OmgUploaderTargetKind::Network,
id: option.id.clone(),
title: option.title.clone(),
});
}
}
None
}
fn resolve_uploader_target(
&self,
uploader_id: Option<&str>,
uploader_name: Option<&str>,
) -> Option<OmgUploaderTarget> {
uploader_id
.and_then(|value| self.uploader_target_from_id(value))
.or_else(|| uploader_name.and_then(|value| self.uploader_target_from_name(value)))
}
fn uploader_target_url(&self, target: &OmgUploaderTarget, page: u8) -> String {
let base = match target.kind {
OmgUploaderTargetKind::Site => "sites",
OmgUploaderTargetKind::Network => "networks",
};
format!("{}/{}/{}/{}/", self.url, base, target.id, page.max(1))
}
fn uploader_target_last_page(&self, html: &str, target: &OmgUploaderTarget) -> Option<u8> {
let base = match target.kind {
OmgUploaderTargetKind::Site => "sites",
OmgUploaderTargetKind::Network => "networks",
};
let pattern = format!(r#"/{}/{}/(?P<page>\d+)/"#, regex::escape(base), regex::escape(&target.id));
let regex = Regex::new(&pattern).ok()?;
regex
.captures_iter(html)
.filter_map(|captures| captures.name("page")?.as_str().parse::<u8>().ok())
.max()
}
fn display_name_for_uploader_target(
&self,
target: &OmgUploaderTarget,
html: &str,
first_page_items: &[VideoItem],
) -> String {
if !target.title.trim().is_empty() && target.title != target.id {
return target.title.clone();
}
if let Some(title) = html
.split("<title>")
.nth(1)
.and_then(|segment| segment.split("</title>").next())
.map(|title| decode(title.as_bytes()).to_string().unwrap_or_else(|_| title.to_string()))
.map(|title| title.replace(" Porn! 😮 - OMG.XXX", ""))
.map(|title| title.replace(" - OMG.XXX", ""))
.map(|title| title.trim().to_string())
.filter(|title| !title.is_empty())
{
return title;
}
if let Some(site_name) = first_page_items.iter().find_map(|item| {
item.title
.strip_prefix('[')
.and_then(|title| title.split(']').next())
.map(str::trim)
.filter(|value| !value.is_empty())
.map(ToOwned::to_owned)
}) {
return site_name;
}
target
.id
.split('-')
.map(|part| {
let mut chars = part.chars();
match chars.next() {
Some(first) => first.to_uppercase().collect::<String>() + chars.as_str(),
None => String::new(),
}
})
.collect::<Vec<_>>()
.join(" ")
}
fn rank_uploader_videos(
videos: &[UploaderVideoRef],
query: Option<&str>,
) -> Vec<UploaderVideoRef> {
let Some(query) = query.map(str::trim).filter(|value| !value.is_empty()) else {
return videos.to_vec();
};
let query = query.to_ascii_lowercase();
let mut ranked = videos.to_vec();
ranked.sort_by(|a, b| {
let score = |video: &UploaderVideoRef| {
let mut score = 0u8;
if video.title.to_ascii_lowercase().contains(&query) {
score += 2;
}
if video.uploader.to_ascii_lowercase().contains(&query) {
score += 1;
}
score
};
score(b)
.cmp(&score(a))
.then(b.views.cmp(&a.views))
.then_with(|| a.id.cmp(&b.id))
});
ranked
}
async fn build_uploader_profile(
&self,
_cache: VideoCache,
target: &OmgUploaderTarget,
query: Option<&str>,
profile_content: bool,
options: &ServerOptions,
) -> Result<Option<UploaderProfile>> {
let first_page_url = self.uploader_target_url(target, 1);
let mut requester = requester_or_default(options, module_path!(), "missing_requester");
let first_page_html = match requester.get(&first_page_url, None).await {
Ok(html) => html,
Err(error) => {
return Err(Error::from(format!(
"uploader page request failed url={first_page_url}; error={error}"
)));
}
};
let first_page_items = self.get_video_items_from_html(first_page_html.clone());
if first_page_items.is_empty() {
return Ok(None);
}
let display_name =
self.display_name_for_uploader_target(target, &first_page_html, &first_page_items);
let last_page = self
.uploader_target_last_page(&first_page_html, target)
.unwrap_or(1);
let last_page_items = if last_page > 1 {
let mut requester = requester_or_default(options, module_path!(), "missing_requester");
let html = requester
.get(&self.uploader_target_url(target, last_page), None)
.await
.unwrap_or_default();
self.get_video_items_from_html(html)
} else {
Vec::new()
};
let uploader_id = Self::canonical_uploader_id(&target.kind, &target.id);
let mut video_refs = first_page_items
.iter()
.map(|item| {
let mut video =
UploaderVideoRef::from_video_item(item, &display_name, &uploader_id);
video.uploader = display_name.clone();
video.uploaderId = uploader_id.clone();
video
})
.collect::<Vec<_>>();
let ranked = Self::rank_uploader_videos(&video_refs, query);
let featured_ids = ranked
.iter()
.take(12)
.map(|video| video.id.clone())
.collect::<Vec<_>>();
let video_count = if last_page > 1 {
((last_page as u64 - 1) * first_page_items.len() as u64) + last_page_items.len() as u64
} else {
first_page_items.len() as u64
};
let total_views = first_page_items
.iter()
.chain(last_page_items.iter())
.filter_map(|item| item.views)
.map(u64::from)
.sum();
let newest = first_page_items.iter().filter_map(|item| item.uploadedAt).max();
let oldest = last_page_items
.iter()
.filter_map(|item| item.uploadedAt)
.min()
.or_else(|| first_page_items.iter().filter_map(|item| item.uploadedAt).min());
for video in &mut video_refs {
video.uploader = display_name.clone();
video.uploaderId = uploader_id.clone();
}
let layout = if featured_ids.is_empty() {
vec![UploaderLayoutRow::videos(None)]
} else {
vec![
UploaderLayoutRow::horizontal(Some("For You".to_string()), featured_ids),
UploaderLayoutRow::videos(None),
]
};
Ok(Some(UploaderProfile {
id: uploader_id,
name: display_name,
url: Some(first_page_url),
channel: Some("omgxxx".to_string()),
verified: false,
videoCount: video_count,
totalViews: total_views,
channels: Some(vec![UploaderChannelStat {
channel: "omgxxx".to_string(),
videoCount: video_count,
firstSeenAt: iso_timestamp_from_unix(oldest),
lastSeenAt: iso_timestamp_from_unix(newest),
}]),
avatar: None,
description: None,
bio: None,
videos: profile_content.then_some(video_refs),
tapes: profile_content.then_some(Vec::new()),
playlists: profile_content.then_some(Vec::new()),
layout: Some(layout),
}))
}
async fn get( async fn get(
&self, &self,
cache: VideoCache, cache: VideoCache,
@@ -464,8 +809,7 @@ impl OmgxxxProvider {
} }
}; };
let mut requester = let mut requester = requester_or_default(&options, module_path!(), "missing_requester");
crate::providers::requester_or_default(&options, module_path!(), "missing_requester");
let text = match requester.get(&video_url, None).await { let text = match requester.get(&video_url, None).await {
Ok(text) => text, Ok(text) => text,
Err(e) => { Err(e) => {
@@ -542,8 +886,7 @@ impl OmgxxxProvider {
} }
}; };
let mut requester = let mut requester = requester_or_default(&options, module_path!(), "missing_requester");
crate::providers::requester_or_default(&options, module_path!(), "missing_requester");
let text = match requester.get(&video_url, None).await { let text = match requester.get(&video_url, None).await {
Ok(text) => text, Ok(text) => text,
Err(e) => { Err(e) => {
@@ -837,7 +1180,7 @@ impl OmgxxxProvider {
} }
} }
let video_item = VideoItem::new( let mut video_item = VideoItem::new(
id, id,
title, title,
video_url.to_string(), video_url.to_string(),
@@ -846,8 +1189,10 @@ impl OmgxxxProvider {
duration, duration,
) )
.views(views) .views(views)
.preview(preview) .preview(preview);
.tags(tags); if !tags.is_empty() {
video_item.tags = Some(tags);
}
items.push(video_item); items.push(video_item);
} }
return items; return items;
@@ -865,11 +1210,40 @@ mod tests {
id: "clubsweethearts".to_string(), id: "clubsweethearts".to_string(),
title: "Club Sweethearts".to_string(), title: "Club Sweethearts".to_string(),
}])), }])),
networks: Arc::new(RwLock::new(vec![])), networks: Arc::new(RwLock::new(vec![FilterOption {
id: "mofos".to_string(),
title: "Club Sweethearts".to_string(),
}])),
stars: Arc::new(RwLock::new(vec![])), stars: Arc::new(RwLock::new(vec![])),
} }
} }
#[test]
fn uploader_name_prefers_site_before_network() {
let provider = test_provider();
let target = provider
.uploader_target_from_name("Club Sweethearts")
.expect("target should resolve");
assert!(matches!(target.kind, OmgUploaderTargetKind::Site));
assert_eq!(target.id, "clubsweethearts");
}
#[test]
fn uploader_id_round_trips_for_networks() {
let provider = test_provider();
let target = provider
.uploader_target_from_id("omgxxx:network:mofos")
.expect("target should resolve");
assert!(matches!(target.kind, OmgUploaderTargetKind::Network));
assert_eq!(target.id, "mofos");
assert_eq!(
OmgxxxProvider::canonical_uploader_id(&target.kind, &target.id),
"omgxxx:network:mofos"
);
}
#[test] #[test]
fn parses_model_and_site_tags_without_empty_strings() { fn parses_model_and_site_tags_without_empty_strings() {
let provider = test_provider(); let provider = test_provider();
@@ -1043,4 +1417,26 @@ impl Provider for OmgxxxProvider {
fn get_channel(&self, clientversion: ClientVersion) -> Option<crate::status::Channel> { fn get_channel(&self, clientversion: ClientVersion) -> Option<crate::status::Channel> {
Some(self.build_channel(clientversion)) Some(self.build_channel(clientversion))
} }
async fn get_uploader(
&self,
cache: VideoCache,
pool: DbPool,
uploader_id: Option<String>,
uploader_name: Option<String>,
query: Option<String>,
profile_content: bool,
options: ServerOptions,
) -> std::result::Result<Option<UploaderProfile>, String> {
let _ = pool;
let Some(target) =
self.resolve_uploader_target(uploader_id.as_deref(), uploader_name.as_deref())
else {
return Ok(None);
};
self.build_uploader_profile(cache, &target, query.as_deref(), profile_content, &options)
.await
.map_err(|error| error.to_string())
}
} }