From c4be911d8b20cc3e0c7a675a7278370fbbc38ffa Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 31 Mar 2026 14:40:31 +0000 Subject: [PATCH] omgxx uploader --- src/providers/omgxxx.rs | 414 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 405 insertions(+), 9 deletions(-) diff --git a/src/providers/omgxxx.rs b/src/providers/omgxxx.rs index 982f663..b61d497 100644 --- a/src/providers/omgxxx.rs +++ b/src/providers/omgxxx.rs @@ -1,6 +1,12 @@ use crate::DbPool; use crate::api::ClientVersion; -use crate::providers::{Provider, report_provider_error, report_provider_error_background}; +use crate::providers::{ + Provider, report_provider_error, report_provider_error_background, requester_or_default, +}; +use crate::uploaders::{ + UploaderChannelStat, UploaderLayoutRow, UploaderProfile, UploaderVideoRef, + iso_timestamp_from_unix, +}; use crate::util::cache::VideoCache; use crate::util::parse_abbreviated_number; use crate::util::time::parse_time_to_seconds; @@ -9,6 +15,8 @@ use crate::{status::*, util}; use async_trait::async_trait; use error_chain::error_chain; use htmlentity::entity::{ICodedDataTrait, decode}; +use percent_encoding::{NON_ALPHANUMERIC, percent_decode_str, utf8_percent_encode}; +use regex::Regex; use scraper::{Html, Selector}; use std::sync::{Arc, RwLock}; use std::thread; @@ -34,6 +42,20 @@ pub struct OmgxxxProvider { networks: Arc>>, stars: Arc>>, } + +#[derive(Debug, Clone, PartialEq, Eq)] +enum OmgUploaderTargetKind { + Site, + Network, +} + +#[derive(Debug, Clone)] +struct OmgUploaderTarget { + kind: OmgUploaderTargetKind, + id: String, + title: String, +} + impl OmgxxxProvider { pub fn new() -> Self { let provider = OmgxxxProvider { @@ -418,6 +440,329 @@ impl OmgxxxProvider { } } + fn canonical_uploader_id(kind: &OmgUploaderTargetKind, id: &str) -> String { + let kind = match kind { + OmgUploaderTargetKind::Site => "site", + OmgUploaderTargetKind::Network => "network", + }; + format!( + "omgxxx:{kind}:{}", + utf8_percent_encode(id, NON_ALPHANUMERIC) + ) + } + + fn uploader_target_from_id(&self, uploader_id: &str) -> Option { + let parts = uploader_id.split(':').collect::>(); + if parts.is_empty() { + return None; + } + + let (kind, raw_id) = match parts.as_slice() { + ["omgxxx", kind, raw_id] => (*kind, *raw_id), + ["omgxxx", raw_id] => ("site", *raw_id), + _ => return None, + }; + + let decoded_id = percent_decode_str(raw_id).decode_utf8().ok()?.to_string(); + match kind { + "site" => self + .sites + .read() + .ok()? + .iter() + .find(|option| option.id == decoded_id) + .map(|option| OmgUploaderTarget { + kind: OmgUploaderTargetKind::Site, + id: option.id.clone(), + title: option.title.clone(), + }) + .or_else(|| { + Some(OmgUploaderTarget { + kind: OmgUploaderTargetKind::Site, + id: decoded_id.clone(), + title: decoded_id.clone(), + }) + }), + "network" => self + .networks + .read() + .ok()? + .iter() + .find(|option| option.id == decoded_id) + .map(|option| OmgUploaderTarget { + kind: OmgUploaderTargetKind::Network, + id: option.id.clone(), + title: option.title.clone(), + }) + .or_else(|| { + Some(OmgUploaderTarget { + kind: OmgUploaderTargetKind::Network, + id: decoded_id.clone(), + title: decoded_id.clone(), + }) + }), + _ => None, + } + } + + fn uploader_target_from_name(&self, uploader_name: &str) -> Option { + let normalized = uploader_name.trim(); + if normalized.is_empty() { + return None; + } + let lowered = normalized.to_ascii_lowercase(); + + if let Ok(sites) = self.sites.read() { + if let Some(option) = sites.iter().find(|option| { + option.title.eq_ignore_ascii_case(normalized) + || option.id.eq_ignore_ascii_case(normalized) + || option.title.to_ascii_lowercase() == lowered + }) { + return Some(OmgUploaderTarget { + kind: OmgUploaderTargetKind::Site, + id: option.id.clone(), + title: option.title.clone(), + }); + } + } + + if let Ok(networks) = self.networks.read() { + if let Some(option) = networks.iter().find(|option| { + option.title.eq_ignore_ascii_case(normalized) + || option.id.eq_ignore_ascii_case(normalized) + || option.title.to_ascii_lowercase() == lowered + }) { + return Some(OmgUploaderTarget { + kind: OmgUploaderTargetKind::Network, + id: option.id.clone(), + title: option.title.clone(), + }); + } + } + + None + } + + fn resolve_uploader_target( + &self, + uploader_id: Option<&str>, + uploader_name: Option<&str>, + ) -> Option { + uploader_id + .and_then(|value| self.uploader_target_from_id(value)) + .or_else(|| uploader_name.and_then(|value| self.uploader_target_from_name(value))) + } + + fn uploader_target_url(&self, target: &OmgUploaderTarget, page: u8) -> String { + let base = match target.kind { + OmgUploaderTargetKind::Site => "sites", + OmgUploaderTargetKind::Network => "networks", + }; + format!("{}/{}/{}/{}/", self.url, base, target.id, page.max(1)) + } + + fn uploader_target_last_page(&self, html: &str, target: &OmgUploaderTarget) -> Option { + let base = match target.kind { + OmgUploaderTargetKind::Site => "sites", + OmgUploaderTargetKind::Network => "networks", + }; + let pattern = format!(r#"/{}/{}/(?P\d+)/"#, regex::escape(base), regex::escape(&target.id)); + let regex = Regex::new(&pattern).ok()?; + regex + .captures_iter(html) + .filter_map(|captures| captures.name("page")?.as_str().parse::().ok()) + .max() + } + + fn display_name_for_uploader_target( + &self, + target: &OmgUploaderTarget, + html: &str, + first_page_items: &[VideoItem], + ) -> String { + if !target.title.trim().is_empty() && target.title != target.id { + return target.title.clone(); + } + + if let Some(title) = html + .split("") + .nth(1) + .and_then(|segment| segment.split("").next()) + .map(|title| decode(title.as_bytes()).to_string().unwrap_or_else(|_| title.to_string())) + .map(|title| title.replace(" Porn! 😮 - OMG.XXX", "")) + .map(|title| title.replace(" - OMG.XXX", "")) + .map(|title| title.trim().to_string()) + .filter(|title| !title.is_empty()) + { + return title; + } + + if let Some(site_name) = first_page_items.iter().find_map(|item| { + item.title + .strip_prefix('[') + .and_then(|title| title.split(']').next()) + .map(str::trim) + .filter(|value| !value.is_empty()) + .map(ToOwned::to_owned) + }) { + return site_name; + } + + target + .id + .split('-') + .map(|part| { + let mut chars = part.chars(); + match chars.next() { + Some(first) => first.to_uppercase().collect::() + chars.as_str(), + None => String::new(), + } + }) + .collect::>() + .join(" ") + } + + fn rank_uploader_videos( + videos: &[UploaderVideoRef], + query: Option<&str>, + ) -> Vec { + let Some(query) = query.map(str::trim).filter(|value| !value.is_empty()) else { + return videos.to_vec(); + }; + let query = query.to_ascii_lowercase(); + let mut ranked = videos.to_vec(); + ranked.sort_by(|a, b| { + let score = |video: &UploaderVideoRef| { + let mut score = 0u8; + if video.title.to_ascii_lowercase().contains(&query) { + score += 2; + } + if video.uploader.to_ascii_lowercase().contains(&query) { + score += 1; + } + score + }; + score(b) + .cmp(&score(a)) + .then(b.views.cmp(&a.views)) + .then_with(|| a.id.cmp(&b.id)) + }); + ranked + } + + async fn build_uploader_profile( + &self, + _cache: VideoCache, + target: &OmgUploaderTarget, + query: Option<&str>, + profile_content: bool, + options: &ServerOptions, + ) -> Result> { + let first_page_url = self.uploader_target_url(target, 1); + let mut requester = requester_or_default(options, module_path!(), "missing_requester"); + let first_page_html = match requester.get(&first_page_url, None).await { + Ok(html) => html, + Err(error) => { + return Err(Error::from(format!( + "uploader page request failed url={first_page_url}; error={error}" + ))); + } + }; + let first_page_items = self.get_video_items_from_html(first_page_html.clone()); + if first_page_items.is_empty() { + return Ok(None); + } + let display_name = + self.display_name_for_uploader_target(target, &first_page_html, &first_page_items); + + let last_page = self + .uploader_target_last_page(&first_page_html, target) + .unwrap_or(1); + let last_page_items = if last_page > 1 { + let mut requester = requester_or_default(options, module_path!(), "missing_requester"); + let html = requester + .get(&self.uploader_target_url(target, last_page), None) + .await + .unwrap_or_default(); + self.get_video_items_from_html(html) + } else { + Vec::new() + }; + + let uploader_id = Self::canonical_uploader_id(&target.kind, &target.id); + let mut video_refs = first_page_items + .iter() + .map(|item| { + let mut video = + UploaderVideoRef::from_video_item(item, &display_name, &uploader_id); + video.uploader = display_name.clone(); + video.uploaderId = uploader_id.clone(); + video + }) + .collect::>(); + + let ranked = Self::rank_uploader_videos(&video_refs, query); + let featured_ids = ranked + .iter() + .take(12) + .map(|video| video.id.clone()) + .collect::>(); + let video_count = if last_page > 1 { + ((last_page as u64 - 1) * first_page_items.len() as u64) + last_page_items.len() as u64 + } else { + first_page_items.len() as u64 + }; + let total_views = first_page_items + .iter() + .chain(last_page_items.iter()) + .filter_map(|item| item.views) + .map(u64::from) + .sum(); + let newest = first_page_items.iter().filter_map(|item| item.uploadedAt).max(); + let oldest = last_page_items + .iter() + .filter_map(|item| item.uploadedAt) + .min() + .or_else(|| first_page_items.iter().filter_map(|item| item.uploadedAt).min()); + + for video in &mut video_refs { + video.uploader = display_name.clone(); + video.uploaderId = uploader_id.clone(); + } + + let layout = if featured_ids.is_empty() { + vec![UploaderLayoutRow::videos(None)] + } else { + vec![ + UploaderLayoutRow::horizontal(Some("For You".to_string()), featured_ids), + UploaderLayoutRow::videos(None), + ] + }; + + Ok(Some(UploaderProfile { + id: uploader_id, + name: display_name, + url: Some(first_page_url), + channel: Some("omgxxx".to_string()), + verified: false, + videoCount: video_count, + totalViews: total_views, + channels: Some(vec![UploaderChannelStat { + channel: "omgxxx".to_string(), + videoCount: video_count, + firstSeenAt: iso_timestamp_from_unix(oldest), + lastSeenAt: iso_timestamp_from_unix(newest), + }]), + avatar: None, + description: None, + bio: None, + videos: profile_content.then_some(video_refs), + tapes: profile_content.then_some(Vec::new()), + playlists: profile_content.then_some(Vec::new()), + layout: Some(layout), + })) + } + async fn get( &self, cache: VideoCache, @@ -464,8 +809,7 @@ impl OmgxxxProvider { } }; - let mut requester = - crate::providers::requester_or_default(&options, module_path!(), "missing_requester"); + let mut requester = requester_or_default(&options, module_path!(), "missing_requester"); let text = match requester.get(&video_url, None).await { Ok(text) => text, Err(e) => { @@ -542,8 +886,7 @@ impl OmgxxxProvider { } }; - let mut requester = - crate::providers::requester_or_default(&options, module_path!(), "missing_requester"); + let mut requester = requester_or_default(&options, module_path!(), "missing_requester"); let text = match requester.get(&video_url, None).await { Ok(text) => text, Err(e) => { @@ -837,7 +1180,7 @@ impl OmgxxxProvider { } } - let video_item = VideoItem::new( + let mut video_item = VideoItem::new( id, title, video_url.to_string(), @@ -846,8 +1189,10 @@ impl OmgxxxProvider { duration, ) .views(views) - .preview(preview) - .tags(tags); + .preview(preview); + if !tags.is_empty() { + video_item.tags = Some(tags); + } items.push(video_item); } return items; @@ -865,11 +1210,40 @@ mod tests { id: "clubsweethearts".to_string(), title: "Club Sweethearts".to_string(), }])), - networks: Arc::new(RwLock::new(vec![])), + networks: Arc::new(RwLock::new(vec![FilterOption { + id: "mofos".to_string(), + title: "Club Sweethearts".to_string(), + }])), stars: Arc::new(RwLock::new(vec![])), } } + #[test] + fn uploader_name_prefers_site_before_network() { + let provider = test_provider(); + let target = provider + .uploader_target_from_name("Club Sweethearts") + .expect("target should resolve"); + + assert!(matches!(target.kind, OmgUploaderTargetKind::Site)); + assert_eq!(target.id, "clubsweethearts"); + } + + #[test] + fn uploader_id_round_trips_for_networks() { + let provider = test_provider(); + let target = provider + .uploader_target_from_id("omgxxx:network:mofos") + .expect("target should resolve"); + + assert!(matches!(target.kind, OmgUploaderTargetKind::Network)); + assert_eq!(target.id, "mofos"); + assert_eq!( + OmgxxxProvider::canonical_uploader_id(&target.kind, &target.id), + "omgxxx:network:mofos" + ); + } + #[test] fn parses_model_and_site_tags_without_empty_strings() { let provider = test_provider(); @@ -1043,4 +1417,26 @@ impl Provider for OmgxxxProvider { fn get_channel(&self, clientversion: ClientVersion) -> Option { Some(self.build_channel(clientversion)) } + + async fn get_uploader( + &self, + cache: VideoCache, + pool: DbPool, + uploader_id: Option, + uploader_name: Option, + query: Option, + profile_content: bool, + options: ServerOptions, + ) -> std::result::Result, String> { + let _ = pool; + let Some(target) = + self.resolve_uploader_target(uploader_id.as_deref(), uploader_name.as_deref()) + else { + return Ok(None); + }; + + self.build_uploader_profile(cache, &target, query.as_deref(), profile_content, &options) + .await + .map_err(|error| error.to_string()) + } }