From 5e909178a1c61d54af825c31c5d57668bdb9110d Mon Sep 17 00:00:00 2001 From: Simon Date: Mon, 16 Mar 2026 22:02:06 +0000 Subject: [PATCH] fixes and upgrades --- src/api.rs | 4 + src/providers/rule34video.rs | 452 ++++++++++++++++++++++++++++++----- src/videos.rs | 2 + 3 files changed, 401 insertions(+), 57 deletions(-) diff --git a/src/api.rs b/src/api.rs index 9671142..5b830d4 100644 --- a/src/api.rs +++ b/src/api.rs @@ -360,6 +360,8 @@ async fn videos_post( duration: Some(duration), sort: Some(sort.clone()), sexuality: Some(sexuality), + uploaderUrl: None, + uploaderId: None, }; let mut video_items = run_provider_guarded( &channel, @@ -550,6 +552,8 @@ async fn uploader_post( duration: Some(duration), sort: Some(sort.clone()), sexuality: Some(sexuality), + uploaderUrl: uploader_request.uploaderUrl.clone(), + uploaderId: uploader_request.uploaderId.clone(), }; let provider = get_provider("all") diff --git a/src/providers/rule34video.rs b/src/providers/rule34video.rs index 4914f2b..64129c1 100644 --- a/src/providers/rule34video.rs +++ b/src/providers/rule34video.rs @@ -41,6 +41,8 @@ pub struct Rule34videoProvider { #[derive(Debug, Clone)] struct QueryTarget { url: String, + uploader_name: Option, + uploader_url: Option, } impl Rule34videoProvider { @@ -221,19 +223,24 @@ impl Rule34videoProvider { format!("{}/{}", self.url, url.trim_start_matches("./")) } - fn normalize_member_url(&self, url: &str) -> String { + fn normalize_member_profile_url(&self, url: &str) -> String { let absolute = self.normalize_url(url); if absolute.is_empty() { return absolute; } - if absolute.ends_with("/videos/") { - return absolute; - } let trimmed = absolute.trim_end_matches('/'); if trimmed.ends_with("/videos") { - return format!("{trimmed}/"); + return format!("{}/", trimmed.trim_end_matches("/videos")); } - format!("{trimmed}/videos/") + format!("{trimmed}/") + } + + fn normalize_member_videos_url(&self, url: &str) -> String { + let profile = self.normalize_member_profile_url(url); + if profile.is_empty() { + return profile; + } + format!("{}videos/", profile) } fn push_unique(target: &Arc>>, item: FilterOption) { @@ -297,7 +304,7 @@ impl Rule34videoProvider { Self::push_unique( &self.uploaders, FilterOption { - id: self.normalize_member_url(url), + id: self.normalize_member_videos_url(url), title, }, ); @@ -353,6 +360,10 @@ impl Rule34videoProvider { ) } + fn build_member_search_url(&self, query: &str) -> String { + format!("{}/members/?q={}", self.url, query.replace(' ', "%20")) + } + fn build_filtered_url(&self, base: &str, page: u8, sort: &str) -> String { let mut url = if page > 1 { format!("{}{page}/", base.trim_end_matches('/').to_string() + "/") @@ -366,10 +377,35 @@ impl Rule34videoProvider { } fn resolve_option_target(&self, options: &ServerOptions) -> Option { + if let Some(uploader_url) = options.uploaderUrl.as_deref() { + if !uploader_url.is_empty() { + return Some(QueryTarget { + url: self.normalize_member_videos_url(uploader_url), + uploader_name: None, + uploader_url: Some(self.normalize_member_profile_url(uploader_url)), + }); + } + } + + if let Some(uploader_id) = options.uploaderId.as_deref() { + let uploader_id = uploader_id.trim(); + if !uploader_id.is_empty() && uploader_id.chars().all(|value| value.is_ascii_digit()) { + return Some(QueryTarget { + url: self.normalize_member_videos_url(&format!("/members/{uploader_id}/")), + uploader_name: None, + uploader_url: Some( + self.normalize_member_profile_url(&format!("/members/{uploader_id}/")), + ), + }); + } + } + if let Some(category) = options.categories.as_deref() { if !category.is_empty() && category != "all" { return Some(QueryTarget { url: category.to_string(), + uploader_name: None, + uploader_url: None, }); } } @@ -378,6 +414,8 @@ impl Rule34videoProvider { if !artist.is_empty() && artist != "all" { return Some(QueryTarget { url: artist.to_string(), + uploader_name: None, + uploader_url: None, }); } } @@ -386,6 +424,8 @@ impl Rule34videoProvider { if !uploader.is_empty() && uploader != "all" { return Some(QueryTarget { url: uploader.to_string(), + uploader_name: None, + uploader_url: None, }); } } @@ -400,6 +440,8 @@ impl Rule34videoProvider { .find(|value| Self::normalize_title(&value.title) == normalized_query) .map(|value| QueryTarget { url: value.id.clone(), + uploader_name: None, + uploader_url: None, }) } @@ -431,12 +473,72 @@ impl Rule34videoProvider { None } + fn parse_member_search_target(&self, html: &str, query: &str) -> Option { + if html.trim().is_empty() { + return None; + } + + let document = Html::parse_document(html); + let card_selector = Self::selector("#list_members_members_items .item").ok()?; + let link_selector = Self::selector("a.wrap_item[href]").ok()?; + let name_selector = Self::selector(".name").ok()?; + let normalized_query = Self::normalize_title(query); + + for card in document.select(&card_selector) { + let Some(link) = card.select(&link_selector).next() else { + continue; + }; + + let name = card + .select(&name_selector) + .next() + .map(|value| Self::decode_html(&Self::text_of(&value))) + .unwrap_or_default(); + if Self::normalize_title(&name) != normalized_query { + continue; + } + + let href = link.value().attr("href").unwrap_or_default(); + if href.is_empty() { + continue; + } + + let profile_url = self.normalize_member_profile_url(href); + self.add_uploader_filter(&profile_url, &name); + + return Some(QueryTarget { + url: self.normalize_member_videos_url(&profile_url), + uploader_name: Some(name), + uploader_url: Some(profile_url), + }); + } + + None + } + + async fn resolve_member_search_target( + &self, + query: &str, + options: &ServerOptions, + ) -> Option { + let url = self.build_member_search_url(query); + let mut requester = requester_or_default( + options, + module_path!(), + "rule34video.resolve_member_search_target", + ); + let html = requester.get(&url, None).await.ok()?; + self.parse_member_search_target(&html, query) + } + async fn fetch_page_items( &self, cache: VideoCache, cache_key: String, url: String, use_cache: bool, + per_page: usize, + enrich_details: bool, options: ServerOptions, ) -> Result> { let old_items = if use_cache { @@ -467,8 +569,7 @@ impl Rule34videoProvider { return Ok(old_items); } }; - - let items = match self.parse_list_videos(&text) { + let mut items = match self.parse_list_videos(&text) { Ok(items) => items, Err(error) => { report_provider_error( @@ -480,11 +581,21 @@ impl Rule34videoProvider { return Ok(old_items); } }; - if items.is_empty() { return Ok(old_items); } + if per_page > 0 && items.len() > per_page { + items.truncate(per_page); + } + + if !enrich_details { + if use_cache { + cache.insert(cache_key, items.clone()); + } + return Ok(items); + } + let enriched = self.enrich_video_items(items, requester).await; if !enriched.is_empty() && use_cache { cache.insert(cache_key, enriched.clone()); @@ -532,7 +643,9 @@ impl Rule34videoProvider { } let document = Html::parse_document(html); - let card_selector = Self::selector("div.item.thumb")?; + let uploaded_card_selector = + Self::selector("#list_videos_uploaded_videos_items div.item.thumb")?; + let generic_card_selector = Self::selector("div.item.thumb")?; let link_selector = Self::selector("a.th.js-open-popup[href], a[href*=\"/video/\"]")?; let title_selector = Self::selector(".thumb_title")?; let image_selector = Self::selector("img")?; @@ -540,8 +653,16 @@ impl Rule34videoProvider { let views_selector = Self::selector(".views")?; let mut items = Vec::new(); + let cards = { + let uploaded_cards = document.select(&uploaded_card_selector).collect::>(); + if uploaded_cards.is_empty() { + document.select(&generic_card_selector).collect::>() + } else { + uploaded_cards + } + }; - for card in document.select(&card_selector) { + for card in cards { let Some(link) = card.select(&link_selector).next() else { continue; }; @@ -599,9 +720,9 @@ impl Rule34videoProvider { Ok(items) } - fn collect_link_values( + fn collect_link_values_from_root( &self, - document: &Html, + root: ElementRef<'_>, selector: &Selector, normalize_url: F, ) -> Vec<(String, String)> @@ -611,7 +732,7 @@ impl Rule34videoProvider { let mut seen = HashSet::new(); let mut values = Vec::new(); - for link in document.select(selector) { + for link in root.select(selector) { let Some(href) = link.value().attr("href") else { continue; }; @@ -628,6 +749,42 @@ impl Rule34videoProvider { values } + fn collect_scoped_link_values( + &self, + document: &Html, + root_selectors: &[&str], + link_selector: &str, + normalize_url: F, + ) -> Vec<(String, String)> + where + F: Fn(&str) -> String + Copy, + { + let Ok(link_selector) = Self::selector(link_selector) else { + return vec![]; + }; + + for root_selector in root_selectors { + let Ok(root_selector) = Self::selector(root_selector) else { + continue; + }; + + let mut values = Vec::new(); + for root in document.select(&root_selector) { + values.extend(self.collect_link_values_from_root( + root, + &link_selector, + normalize_url, + )); + } + + if !values.is_empty() { + return values; + } + } + + vec![] + } + fn dedupe_terms(values: Vec) -> Vec { let mut seen = HashSet::new(); let mut deduped = Vec::new(); @@ -648,10 +805,6 @@ impl Rule34videoProvider { let document = Html::parse_document(html); let title_selector = Self::selector("h1, .headline h1, .headline .title").ok(); - let category_selector = Self::selector("a[href*=\"/categories/\"]").ok(); - let artist_selector = Self::selector("a[href*=\"/artists/\"]").ok(); - let uploader_selector = Self::selector("a[href*=\"/members/\"]").ok(); - let tag_selector = Self::selector("a[href*=\"/tags/\"]").ok(); if item.title.is_empty() { if let Some(selector) = &title_selector { @@ -664,32 +817,64 @@ impl Rule34videoProvider { } } - let categories = category_selector - .as_ref() - .map(|selector| { - self.collect_link_values(&document, selector, |href| self.normalize_url(href)) - }) - .unwrap_or_default(); - let artists = artist_selector - .as_ref() - .map(|selector| { - self.collect_link_values(&document, selector, |href| self.normalize_url(href)) - }) - .unwrap_or_default(); - let uploaders = uploader_selector - .as_ref() - .map(|selector| { - self.collect_link_values(&document, selector, |href| { - self.normalize_member_url(href) - }) - }) - .unwrap_or_default(); - let tags = tag_selector - .as_ref() - .map(|selector| { - self.collect_link_values(&document, selector, |href| self.normalize_url(href)) - }) - .unwrap_or_default(); + let categories = self.collect_scoped_link_values( + &document, + &[ + "#tab2", + ".info", + ".item-list", + ".video-info", + ".block-video .info", + ".block-video .item-list", + ".block-video .video-info", + ], + "a[href*=\"/categories/\"]", + |href| self.normalize_url(href), + ); + let artists = self.collect_scoped_link_values( + &document, + &[ + "#tab2", + ".info", + ".item-list", + ".video-info", + ".block-video .info", + ".block-video .item-list", + ".block-video .video-info", + ], + "a[href*=\"/artists/\"]", + |href| self.normalize_url(href), + ); + let uploaders = self.collect_scoped_link_values( + &document, + &[ + ".headline", + "#tab2", + ".info", + ".item-list", + ".video-info", + ".block-video .info", + ".block-video .item-list", + ".block-video .video-info", + ], + "a[href*=\"/members/\"]", + |href| self.normalize_member_profile_url(href), + ); + let tags = self.collect_scoped_link_values( + &document, + &[ + ".tags-outer", + "#tab2", + ".info", + ".item-list", + ".video-info", + ".block-video .info", + ".block-video .item-list", + ".block-video .video-info", + ], + "a[href*=\"/tags/\"]", + |href| self.normalize_url(href), + ); for (id, title) in &categories { self.add_category_filter(id, title); @@ -733,20 +918,51 @@ impl Provider for Rule34videoProvider { sort: String, query: Option, page: String, - _per_page: String, + per_page: String, options: ServerOptions, ) -> Vec { let page = page.parse::().unwrap_or(1); + let per_page = per_page.parse::().unwrap_or(24); let sort = Self::expected_sort(&sort); let use_cache = sort != "pseudo_random"; + let option_target = self.resolve_option_target(&options); + let cached_query_target = query + .as_deref() + .and_then(|value| self.resolve_query_target(value)); + let member_search_target = if option_target.is_none() && cached_query_target.is_none() { + match query.as_deref() { + Some(query) => self.resolve_member_search_target(query, &options).await, + None => None, + } + } else { + None + }; + let query_target = cached_query_target.or(member_search_target); + let uploader_context = query_target + .as_ref() + .and_then(|target| { + target.uploader_name.as_ref().map(|name| { + ( + name.clone(), + target + .uploader_url + .clone() + .unwrap_or_else(|| self.normalize_member_profile_url(&target.url)), + ) + }) + }) + .or_else(|| { + options.uploaderUrl.as_ref().and_then(|url| { + query + .as_deref() + .map(str::trim) + .filter(|value| !value.is_empty()) + .map(|name| (name.to_string(), self.normalize_member_profile_url(url))) + }) + }); + let enrich_details = uploader_context.is_none(); - let (cache_key, url) = match ( - self.resolve_option_target(&options), - query - .as_deref() - .and_then(|value| self.resolve_query_target(value)), - query.as_deref(), - ) { + let (cache_key, url) = match (option_target, query_target, query.as_deref()) { (Some(target), _, _) => ( format!("rule34video:target:{}:{}:{}", page, sort, target.url), self.build_filtered_url(&target.url, page, sort), @@ -779,16 +995,37 @@ impl Provider for Rule34videoProvider { ), }; - match self - .fetch_page_items(cache, cache_key, url, use_cache, options) + let mut items = match self + .fetch_page_items( + cache, + cache_key, + url, + use_cache, + per_page, + enrich_details, + options, + ) .await { Ok(items) => items, Err(error) => { eprintln!("Error fetching Rule34Video videos: {error}"); - vec![] + return vec![]; + } + }; + + if let Some((uploader_name, uploader_url)) = uploader_context { + for item in &mut items { + if item.uploader.is_none() { + item.uploader = Some(uploader_name.clone()); + } + if item.uploaderUrl.is_none() { + item.uploaderUrl = Some(uploader_url.clone()); + } } } + + items } fn get_channel(&self, clientversion: ClientVersion) -> Option { @@ -809,6 +1046,27 @@ mod tests { assert!(url.contains("from_videos=2")); } + #[test] + fn parses_member_search_target() { + let provider = Rule34videoProvider::new(); + let html = r#" + + "#; + + let target = provider + .parse_member_search_target(html, "ChillUploader") + .expect("expected member target"); + assert_eq!(target.url, "https://rule34video.com/members/80235/videos/"); + } + #[test] fn parses_listing_cards() { let provider = Rule34videoProvider::new(); @@ -862,7 +1120,7 @@ mod tests { assert_eq!(item.uploader, Some("Jackerman".to_string())); assert_eq!( item.uploaderUrl, - Some("https://rule34video.com/members/123/jackerman/videos/".to_string()) + Some("https://rule34video.com/members/123/jackerman/".to_string()) ); assert_eq!( item.tags, @@ -884,4 +1142,84 @@ mod tests { assert!(categories.iter().any(|value| value.title == "Pokemon")); assert!(tags.iter().any(|value| value.title == "animated")); } + + #[test] + fn ignores_unrelated_page_wide_tags_outside_video_metadata() { + let provider = Rule34videoProvider::new(); + let item = VideoItem::new( + "4288578".to_string(), + "Friendly Encounter Preview".to_string(), + "https://rule34video.com/video/4288578/friendly-encounter-preview/".to_string(), + "rule34video".to_string(), + "https://img.example/thumb.jpg".to_string(), + 605, + ); + let html = r#" + +
+ +
+ 3d + animated +
+
+ "#; + + let item = provider.apply_detail_video(item, html); + assert_eq!( + item.tags, + Some(vec![ + "Pokemon".to_string(), + "Jackerman".to_string(), + "3d".to_string(), + "animated".to_string(), + ]) + ); + } + + #[test] + fn member_profile_listing_uses_uploaded_videos_only() { + let provider = Rule34videoProvider::new(); + let html = r#" +
+
+
+ + + +
[Xype3D] Jane's Maid to Order - Full Animation
+
10:12
+
88K
+
+
+
+
+
+
+ + + +
Favorite video
+
2:22
+
236K
+
+
+
+ "#; + + let items = provider.parse_list_videos(html).unwrap(); + assert_eq!(items.len(), 1); + assert_eq!(items[0].id, "4290070"); + assert_eq!( + items[0].title, + "[Xype3D] Jane's Maid to Order - Full Animation" + ); + } } diff --git a/src/videos.rs b/src/videos.rs index 0491711..7f5766a 100644 --- a/src/videos.rs +++ b/src/videos.rs @@ -97,6 +97,8 @@ pub struct ServerOptions { pub duration: Option, // pub sort: Option, // pub sexuality: Option, // + pub uploaderUrl: Option, + pub uploaderId: Option, } #[derive(serde::Serialize, Debug)]