supjav tags-fixes

2026-04-03 18:17:14 +00:00
parent e680319541
commit c0717fdacf
2 changed files with 125 additions and 30 deletions
--- a/prompts/new-channel.md
+++ b/prompts/new-channel.md
@@ -1 +1 @@
-write a provider for the site "vjav.com". use playwright-mcp to get the index site and get the layout of videos from this. also figure out how searches work and if it has a different layout then. also find out how more videos get loaded/more pages urls. also find if it has video sites like recommended,most views etc and include them. try to find as much information about a video on the site as possible and build the video items with all the information. put tags and uploader into lists so they can be utilized later. on query check if the search already exists in these FilterObject arrays and use them for the url instead (similar to omgxxx). after coding this, test that all provided urls work, yt-dlp can download the video.url, the thumbnails work, searches, tag-/uploader- searches work, more pages work. analzye the provider too and set the channel tags and put it in a group that matches this provider or create a new group where it fits in. do not include the embed field in the videoitems responses. when parsing uploaders, make sure that the uploaders. if unsure about the endpoints, check with the docs dir.
+write a provider for the site "vjav.com". use playwright-mcp to get the index site and get the layout of videos from this. also figure out how searches work and if it has a different layout then. also find out how more videos get loaded/more pages urls. also find if it has video sites like recommended,most views etc and include them. try to find as much information about a video on the site as possible and build the video items with all the information. put tags and uploader into lists so they can be utilized later. on query check if the search already exists in these FilterObject arrays and use them for the url instead (similar to omgxxx). after coding this, test that all provided urls work, yt-dlp can download the video.url, the thumbnails work, searches, tag-/uploader- searches work, more pages work. Keep a key-value storage for all provided Tag titles to tag IDs to do the correct crawl for queries of a tag. analzye the provider too and set the channel tags and put it in a group that matches this provider or create a new group where it fits in. do not include the embed field in the videoitems responses. when parsing uploaders, make sure that the uploaders. if unsure about the endpoints, check with the docs dir.
--- a/src/providers/supjav.rs
+++ b/src/providers/supjav.rs
@@ -13,7 +13,7 @@ use htmlentity::entity::{ICodedDataTrait, decode};
 use regex::Regex;
 use scraper::{ElementRef, Html, Selector};
 use serde::Deserialize;
-use std::collections::HashSet;
+use std::collections::{HashMap, HashSet};
 use std::process::Command;
 use std::sync::{Arc, RwLock};
 use std::thread;
@@ -454,12 +454,54 @@ impl SupjavProvider {
        Ok(url.to_string())
    }

-    fn match_filter(options: &[FilterOption], query: &str) -> Option<String> {
-        let normalized_query = Self::normalize_title(query);
-        options
-            .iter()
-            .find(|value| Self::normalize_title(&value.title) == normalized_query)
-            .map(|value| value.id.clone())
+    fn extend_filter_lookup(lookup: &mut HashMap<String, String>, options: &[FilterOption]) {
+        for option in options {
+            for key in Self::filter_lookup_keys(option) {
+                lookup.entry(key).or_insert_with(|| option.id.clone());
+            }
+        }
+    }
+
+    fn resolve_filter_lookup(&self) -> HashMap<String, String> {
+        let mut lookup = HashMap::new();
+
+        if let Ok(uploaders) = self.uploaders.read() {
+            Self::extend_filter_lookup(&mut lookup, &uploaders);
+        }
+        if let Ok(stars) = self.stars.read() {
+            Self::extend_filter_lookup(&mut lookup, &stars);
+        }
+        Self::extend_filter_lookup(&mut lookup, &self.categories);
+        if let Ok(tags) = self.tags.read() {
+            Self::extend_filter_lookup(&mut lookup, &tags);
+        }
+
+        lookup
+    }
+
+    fn filter_lookup_keys(option: &FilterOption) -> Vec<String> {
+        let mut keys = vec![Self::normalize_title(&option.title)];
+
+        let slug = option
+            .id
+            .trim_end_matches('/')
+            .rsplit('/')
+            .next()
+            .unwrap_or_default()
+            .trim();
+        if !slug.is_empty() {
+            keys.push(Self::normalize_title(&slug.replace('-', " ")));
+
+            if let Some(base_slug) = slug.strip_suffix("-jav") {
+                if !base_slug.is_empty() {
+                    keys.push(Self::normalize_title(&base_slug.replace('-', " ")));
+                }
+            }
+        }
+
+        keys.sort();
+        keys.dedup();
+        keys
    }

    fn resolve_option_target(&self, options: &ServerOptions) -> Option<String> {
@@ -481,23 +523,10 @@ impl SupjavProvider {
    }

    fn resolve_query_target(&self, query: &str) -> Option<String> {
-        if let Ok(uploaders) = self.uploaders.read() {
-            if let Some(target) = Self::match_filter(&uploaders, query) {
-                return self.normalize_archive_target(&target);
-            }
-        }
-        if let Ok(stars) = self.stars.read() {
-            if let Some(target) = Self::match_filter(&stars, query) {
-                return self.normalize_archive_target(&target);
-            }
-        }
-        if let Ok(tags) = self.tags.read() {
-            if let Some(target) = Self::match_filter(&tags, query) {
-                return self.normalize_archive_target(&target);
-            }
-        }
-        Self::match_filter(&self.categories, query)
-            .and_then(|target| self.normalize_archive_target(&target))
+        let normalized_query = Self::normalize_title(query);
+        self.resolve_filter_lookup()
+            .get(&normalized_query)
+            .and_then(|target| self.normalize_archive_target(target))
    }

    fn filters_need_loading(&self) -> bool {
@@ -1581,7 +1610,7 @@ print(json.dumps({
        let items = self
            .fetch_items_for_url(cache, url, sort, per_page_limit)
            .await?;
-        if exact_target.is_some() || items.len() >= 5 || !items.is_empty() {
+        if !items.is_empty() {
            return Ok(items);
        }

@@ -1647,13 +1676,23 @@ mod tests {
    fn test_provider() -> SupjavProvider {
        SupjavProvider {
            url: BASE_URL.to_string(),
-            categories: vec![FilterOption {
-                id: format!("{BASE_URL}/category/censored-jav"),
-                title: "Censored JAV".to_string(),
-            }],
+            categories: vec![
+                FilterOption {
+                    id: format!("{BASE_URL}/category/censored-jav"),
+                    title: "Censored JAV".to_string(),
+                },
+                FilterOption {
+                    id: format!("{BASE_URL}/category/uncensored-jav"),
+                    title: "Uncensored JAV".to_string(),
+                },
+            ],
            tags: Arc::new(RwLock::new(vec![FilterOption {
                id: "/tag/creampie".to_string(),
                title: "Creampie".to_string(),
+            },
+            FilterOption {
+                id: "/tag/uncensored-jav".to_string(),
+                title: "Uncensored JAV".to_string(),
            }])),
            uploaders: Arc::new(RwLock::new(vec![])),
            stars: Arc::new(RwLock::new(vec![])),
@@ -1666,6 +1705,18 @@ mod tests {
        assert_eq!(SupjavProvider::strip_count_suffix("Censored JAV"), "Censored JAV");
    }

+    #[test]
+    fn filter_lookup_stores_title_and_slug_aliases() {
+        let option = FilterOption {
+            id: format!("{BASE_URL}/category/uncensored-jav"),
+            title: "Uncensored JAV".to_string(),
+        };
+        let keys = SupjavProvider::filter_lookup_keys(&option);
+
+        assert!(keys.iter().any(|value| value == "uncensored jav"));
+        assert!(keys.iter().any(|value| value == "uncensored"));
+    }
+
    #[test]
    fn builds_archive_page_url_with_query() {
        assert_eq!(
@@ -1756,6 +1807,50 @@ mod tests {
        );
    }

+    #[test]
+    fn resolves_category_queries_with_or_without_jav_suffix() {
+        let provider = test_provider();
+        assert_eq!(
+            provider.resolve_query_target("Uncensored JAV").as_deref(),
+            Some("https://supjav.com/category/uncensored-jav")
+        );
+        assert_eq!(
+            provider.resolve_query_target("Uncensored").as_deref(),
+            Some("https://supjav.com/category/uncensored-jav")
+        );
+    }
+
+    #[test]
+    fn category_lookup_wins_over_same_named_tag() {
+        let provider = test_provider();
+        let lookup = provider.resolve_filter_lookup();
+
+        assert_eq!(
+            lookup.get("uncensored jav").map(String::as_str),
+            Some("https://supjav.com/category/uncensored-jav")
+        );
+        assert_eq!(
+            lookup.get("uncensored").map(String::as_str),
+            Some("https://supjav.com/category/uncensored-jav")
+        );
+    }
+
+    #[test]
+    fn item_query_matching_uses_full_query_text() {
+        let mut item = VideoItem::new(
+            "abc".to_string(),
+            "Sample".to_string(),
+            "https://supjav.com/sample".to_string(),
+            CHANNEL_ID.to_string(),
+            String::new(),
+            0,
+        );
+        item.tags = Some(vec!["Uncensored".to_string()]);
+
+        assert!(SupjavProvider::item_matches_query(&item, "Uncensored"));
+        assert!(!SupjavProvider::item_matches_query(&item, "Uncensored JAV"));
+    }
+
    fn test_db_pool() -> DbPool {
        let unique = SystemTime::now()
            .duration_since(UNIX_EPOCH)