supjav tags-fixes
This commit is contained in:
@@ -1 +1 @@
|
|||||||
write a provider for the site "vjav.com". use playwright-mcp to get the index site and get the layout of videos from this. also figure out how searches work and if it has a different layout then. also find out how more videos get loaded/more pages urls. also find if it has video sites like recommended,most views etc and include them. try to find as much information about a video on the site as possible and build the video items with all the information. put tags and uploader into lists so they can be utilized later. on query check if the search already exists in these FilterObject arrays and use them for the url instead (similar to omgxxx). after coding this, test that all provided urls work, yt-dlp can download the video.url, the thumbnails work, searches, tag-/uploader- searches work, more pages work. analzye the provider too and set the channel tags and put it in a group that matches this provider or create a new group where it fits in. do not include the embed field in the videoitems responses. when parsing uploaders, make sure that the uploaders. if unsure about the endpoints, check with the docs dir.
|
write a provider for the site "vjav.com". use playwright-mcp to get the index site and get the layout of videos from this. also figure out how searches work and if it has a different layout then. also find out how more videos get loaded/more pages urls. also find if it has video sites like recommended,most views etc and include them. try to find as much information about a video on the site as possible and build the video items with all the information. put tags and uploader into lists so they can be utilized later. on query check if the search already exists in these FilterObject arrays and use them for the url instead (similar to omgxxx). after coding this, test that all provided urls work, yt-dlp can download the video.url, the thumbnails work, searches, tag-/uploader- searches work, more pages work. Keep a key-value storage for all provided Tag titles to tag IDs to do the correct crawl for queries of a tag. analzye the provider too and set the channel tags and put it in a group that matches this provider or create a new group where it fits in. do not include the embed field in the videoitems responses. when parsing uploaders, make sure that the uploaders. if unsure about the endpoints, check with the docs dir.
|
||||||
@@ -13,7 +13,7 @@ use htmlentity::entity::{ICodedDataTrait, decode};
|
|||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
use scraper::{ElementRef, Html, Selector};
|
use scraper::{ElementRef, Html, Selector};
|
||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
use std::collections::HashSet;
|
use std::collections::{HashMap, HashSet};
|
||||||
use std::process::Command;
|
use std::process::Command;
|
||||||
use std::sync::{Arc, RwLock};
|
use std::sync::{Arc, RwLock};
|
||||||
use std::thread;
|
use std::thread;
|
||||||
@@ -454,12 +454,54 @@ impl SupjavProvider {
|
|||||||
Ok(url.to_string())
|
Ok(url.to_string())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn match_filter(options: &[FilterOption], query: &str) -> Option<String> {
|
fn extend_filter_lookup(lookup: &mut HashMap<String, String>, options: &[FilterOption]) {
|
||||||
let normalized_query = Self::normalize_title(query);
|
for option in options {
|
||||||
options
|
for key in Self::filter_lookup_keys(option) {
|
||||||
.iter()
|
lookup.entry(key).or_insert_with(|| option.id.clone());
|
||||||
.find(|value| Self::normalize_title(&value.title) == normalized_query)
|
}
|
||||||
.map(|value| value.id.clone())
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn resolve_filter_lookup(&self) -> HashMap<String, String> {
|
||||||
|
let mut lookup = HashMap::new();
|
||||||
|
|
||||||
|
if let Ok(uploaders) = self.uploaders.read() {
|
||||||
|
Self::extend_filter_lookup(&mut lookup, &uploaders);
|
||||||
|
}
|
||||||
|
if let Ok(stars) = self.stars.read() {
|
||||||
|
Self::extend_filter_lookup(&mut lookup, &stars);
|
||||||
|
}
|
||||||
|
Self::extend_filter_lookup(&mut lookup, &self.categories);
|
||||||
|
if let Ok(tags) = self.tags.read() {
|
||||||
|
Self::extend_filter_lookup(&mut lookup, &tags);
|
||||||
|
}
|
||||||
|
|
||||||
|
lookup
|
||||||
|
}
|
||||||
|
|
||||||
|
fn filter_lookup_keys(option: &FilterOption) -> Vec<String> {
|
||||||
|
let mut keys = vec![Self::normalize_title(&option.title)];
|
||||||
|
|
||||||
|
let slug = option
|
||||||
|
.id
|
||||||
|
.trim_end_matches('/')
|
||||||
|
.rsplit('/')
|
||||||
|
.next()
|
||||||
|
.unwrap_or_default()
|
||||||
|
.trim();
|
||||||
|
if !slug.is_empty() {
|
||||||
|
keys.push(Self::normalize_title(&slug.replace('-', " ")));
|
||||||
|
|
||||||
|
if let Some(base_slug) = slug.strip_suffix("-jav") {
|
||||||
|
if !base_slug.is_empty() {
|
||||||
|
keys.push(Self::normalize_title(&base_slug.replace('-', " ")));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
keys.sort();
|
||||||
|
keys.dedup();
|
||||||
|
keys
|
||||||
}
|
}
|
||||||
|
|
||||||
fn resolve_option_target(&self, options: &ServerOptions) -> Option<String> {
|
fn resolve_option_target(&self, options: &ServerOptions) -> Option<String> {
|
||||||
@@ -481,23 +523,10 @@ impl SupjavProvider {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn resolve_query_target(&self, query: &str) -> Option<String> {
|
fn resolve_query_target(&self, query: &str) -> Option<String> {
|
||||||
if let Ok(uploaders) = self.uploaders.read() {
|
let normalized_query = Self::normalize_title(query);
|
||||||
if let Some(target) = Self::match_filter(&uploaders, query) {
|
self.resolve_filter_lookup()
|
||||||
return self.normalize_archive_target(&target);
|
.get(&normalized_query)
|
||||||
}
|
.and_then(|target| self.normalize_archive_target(target))
|
||||||
}
|
|
||||||
if let Ok(stars) = self.stars.read() {
|
|
||||||
if let Some(target) = Self::match_filter(&stars, query) {
|
|
||||||
return self.normalize_archive_target(&target);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if let Ok(tags) = self.tags.read() {
|
|
||||||
if let Some(target) = Self::match_filter(&tags, query) {
|
|
||||||
return self.normalize_archive_target(&target);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Self::match_filter(&self.categories, query)
|
|
||||||
.and_then(|target| self.normalize_archive_target(&target))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn filters_need_loading(&self) -> bool {
|
fn filters_need_loading(&self) -> bool {
|
||||||
@@ -1581,7 +1610,7 @@ print(json.dumps({
|
|||||||
let items = self
|
let items = self
|
||||||
.fetch_items_for_url(cache, url, sort, per_page_limit)
|
.fetch_items_for_url(cache, url, sort, per_page_limit)
|
||||||
.await?;
|
.await?;
|
||||||
if exact_target.is_some() || items.len() >= 5 || !items.is_empty() {
|
if !items.is_empty() {
|
||||||
return Ok(items);
|
return Ok(items);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1647,13 +1676,23 @@ mod tests {
|
|||||||
fn test_provider() -> SupjavProvider {
|
fn test_provider() -> SupjavProvider {
|
||||||
SupjavProvider {
|
SupjavProvider {
|
||||||
url: BASE_URL.to_string(),
|
url: BASE_URL.to_string(),
|
||||||
categories: vec![FilterOption {
|
categories: vec![
|
||||||
|
FilterOption {
|
||||||
id: format!("{BASE_URL}/category/censored-jav"),
|
id: format!("{BASE_URL}/category/censored-jav"),
|
||||||
title: "Censored JAV".to_string(),
|
title: "Censored JAV".to_string(),
|
||||||
}],
|
},
|
||||||
|
FilterOption {
|
||||||
|
id: format!("{BASE_URL}/category/uncensored-jav"),
|
||||||
|
title: "Uncensored JAV".to_string(),
|
||||||
|
},
|
||||||
|
],
|
||||||
tags: Arc::new(RwLock::new(vec![FilterOption {
|
tags: Arc::new(RwLock::new(vec![FilterOption {
|
||||||
id: "/tag/creampie".to_string(),
|
id: "/tag/creampie".to_string(),
|
||||||
title: "Creampie".to_string(),
|
title: "Creampie".to_string(),
|
||||||
|
},
|
||||||
|
FilterOption {
|
||||||
|
id: "/tag/uncensored-jav".to_string(),
|
||||||
|
title: "Uncensored JAV".to_string(),
|
||||||
}])),
|
}])),
|
||||||
uploaders: Arc::new(RwLock::new(vec![])),
|
uploaders: Arc::new(RwLock::new(vec![])),
|
||||||
stars: Arc::new(RwLock::new(vec![])),
|
stars: Arc::new(RwLock::new(vec![])),
|
||||||
@@ -1666,6 +1705,18 @@ mod tests {
|
|||||||
assert_eq!(SupjavProvider::strip_count_suffix("Censored JAV"), "Censored JAV");
|
assert_eq!(SupjavProvider::strip_count_suffix("Censored JAV"), "Censored JAV");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn filter_lookup_stores_title_and_slug_aliases() {
|
||||||
|
let option = FilterOption {
|
||||||
|
id: format!("{BASE_URL}/category/uncensored-jav"),
|
||||||
|
title: "Uncensored JAV".to_string(),
|
||||||
|
};
|
||||||
|
let keys = SupjavProvider::filter_lookup_keys(&option);
|
||||||
|
|
||||||
|
assert!(keys.iter().any(|value| value == "uncensored jav"));
|
||||||
|
assert!(keys.iter().any(|value| value == "uncensored"));
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn builds_archive_page_url_with_query() {
|
fn builds_archive_page_url_with_query() {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
@@ -1756,6 +1807,50 @@ mod tests {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn resolves_category_queries_with_or_without_jav_suffix() {
|
||||||
|
let provider = test_provider();
|
||||||
|
assert_eq!(
|
||||||
|
provider.resolve_query_target("Uncensored JAV").as_deref(),
|
||||||
|
Some("https://supjav.com/category/uncensored-jav")
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
provider.resolve_query_target("Uncensored").as_deref(),
|
||||||
|
Some("https://supjav.com/category/uncensored-jav")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn category_lookup_wins_over_same_named_tag() {
|
||||||
|
let provider = test_provider();
|
||||||
|
let lookup = provider.resolve_filter_lookup();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
lookup.get("uncensored jav").map(String::as_str),
|
||||||
|
Some("https://supjav.com/category/uncensored-jav")
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
lookup.get("uncensored").map(String::as_str),
|
||||||
|
Some("https://supjav.com/category/uncensored-jav")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn item_query_matching_uses_full_query_text() {
|
||||||
|
let mut item = VideoItem::new(
|
||||||
|
"abc".to_string(),
|
||||||
|
"Sample".to_string(),
|
||||||
|
"https://supjav.com/sample".to_string(),
|
||||||
|
CHANNEL_ID.to_string(),
|
||||||
|
String::new(),
|
||||||
|
0,
|
||||||
|
);
|
||||||
|
item.tags = Some(vec!["Uncensored".to_string()]);
|
||||||
|
|
||||||
|
assert!(SupjavProvider::item_matches_query(&item, "Uncensored"));
|
||||||
|
assert!(!SupjavProvider::item_matches_query(&item, "Uncensored JAV"));
|
||||||
|
}
|
||||||
|
|
||||||
fn test_db_pool() -> DbPool {
|
fn test_db_pool() -> DbPool {
|
||||||
let unique = SystemTime::now()
|
let unique = SystemTime::now()
|
||||||
.duration_since(UNIX_EPOCH)
|
.duration_since(UNIX_EPOCH)
|
||||||
|
|||||||
Reference in New Issue
Block a user