fixes and upgrades

This commit is contained in:
Simon
2026-03-16 22:02:06 +00:00
parent 6b1072025c
commit 5e909178a1
3 changed files with 401 additions and 57 deletions

View File

@@ -360,6 +360,8 @@ async fn videos_post(
duration: Some(duration),
sort: Some(sort.clone()),
sexuality: Some(sexuality),
uploaderUrl: None,
uploaderId: None,
};
let mut video_items = run_provider_guarded(
&channel,
@@ -550,6 +552,8 @@ async fn uploader_post(
duration: Some(duration),
sort: Some(sort.clone()),
sexuality: Some(sexuality),
uploaderUrl: uploader_request.uploaderUrl.clone(),
uploaderId: uploader_request.uploaderId.clone(),
};
let provider = get_provider("all")

View File

@@ -41,6 +41,8 @@ pub struct Rule34videoProvider {
#[derive(Debug, Clone)]
struct QueryTarget {
url: String,
uploader_name: Option<String>,
uploader_url: Option<String>,
}
impl Rule34videoProvider {
@@ -221,19 +223,24 @@ impl Rule34videoProvider {
format!("{}/{}", self.url, url.trim_start_matches("./"))
}
fn normalize_member_url(&self, url: &str) -> String {
fn normalize_member_profile_url(&self, url: &str) -> String {
let absolute = self.normalize_url(url);
if absolute.is_empty() {
return absolute;
}
if absolute.ends_with("/videos/") {
return absolute;
}
let trimmed = absolute.trim_end_matches('/');
if trimmed.ends_with("/videos") {
return format!("{trimmed}/");
return format!("{}/", trimmed.trim_end_matches("/videos"));
}
format!("{trimmed}/videos/")
format!("{trimmed}/")
}
fn normalize_member_videos_url(&self, url: &str) -> String {
let profile = self.normalize_member_profile_url(url);
if profile.is_empty() {
return profile;
}
format!("{}videos/", profile)
}
fn push_unique(target: &Arc<RwLock<Vec<FilterOption>>>, item: FilterOption) {
@@ -297,7 +304,7 @@ impl Rule34videoProvider {
Self::push_unique(
&self.uploaders,
FilterOption {
id: self.normalize_member_url(url),
id: self.normalize_member_videos_url(url),
title,
},
);
@@ -353,6 +360,10 @@ impl Rule34videoProvider {
)
}
fn build_member_search_url(&self, query: &str) -> String {
format!("{}/members/?q={}", self.url, query.replace(' ', "%20"))
}
fn build_filtered_url(&self, base: &str, page: u8, sort: &str) -> String {
let mut url = if page > 1 {
format!("{}{page}/", base.trim_end_matches('/').to_string() + "/")
@@ -366,10 +377,35 @@ impl Rule34videoProvider {
}
fn resolve_option_target(&self, options: &ServerOptions) -> Option<QueryTarget> {
if let Some(uploader_url) = options.uploaderUrl.as_deref() {
if !uploader_url.is_empty() {
return Some(QueryTarget {
url: self.normalize_member_videos_url(uploader_url),
uploader_name: None,
uploader_url: Some(self.normalize_member_profile_url(uploader_url)),
});
}
}
if let Some(uploader_id) = options.uploaderId.as_deref() {
let uploader_id = uploader_id.trim();
if !uploader_id.is_empty() && uploader_id.chars().all(|value| value.is_ascii_digit()) {
return Some(QueryTarget {
url: self.normalize_member_videos_url(&format!("/members/{uploader_id}/")),
uploader_name: None,
uploader_url: Some(
self.normalize_member_profile_url(&format!("/members/{uploader_id}/")),
),
});
}
}
if let Some(category) = options.categories.as_deref() {
if !category.is_empty() && category != "all" {
return Some(QueryTarget {
url: category.to_string(),
uploader_name: None,
uploader_url: None,
});
}
}
@@ -378,6 +414,8 @@ impl Rule34videoProvider {
if !artist.is_empty() && artist != "all" {
return Some(QueryTarget {
url: artist.to_string(),
uploader_name: None,
uploader_url: None,
});
}
}
@@ -386,6 +424,8 @@ impl Rule34videoProvider {
if !uploader.is_empty() && uploader != "all" {
return Some(QueryTarget {
url: uploader.to_string(),
uploader_name: None,
uploader_url: None,
});
}
}
@@ -400,6 +440,8 @@ impl Rule34videoProvider {
.find(|value| Self::normalize_title(&value.title) == normalized_query)
.map(|value| QueryTarget {
url: value.id.clone(),
uploader_name: None,
uploader_url: None,
})
}
@@ -431,12 +473,72 @@ impl Rule34videoProvider {
None
}
fn parse_member_search_target(&self, html: &str, query: &str) -> Option<QueryTarget> {
if html.trim().is_empty() {
return None;
}
let document = Html::parse_document(html);
let card_selector = Self::selector("#list_members_members_items .item").ok()?;
let link_selector = Self::selector("a.wrap_item[href]").ok()?;
let name_selector = Self::selector(".name").ok()?;
let normalized_query = Self::normalize_title(query);
for card in document.select(&card_selector) {
let Some(link) = card.select(&link_selector).next() else {
continue;
};
let name = card
.select(&name_selector)
.next()
.map(|value| Self::decode_html(&Self::text_of(&value)))
.unwrap_or_default();
if Self::normalize_title(&name) != normalized_query {
continue;
}
let href = link.value().attr("href").unwrap_or_default();
if href.is_empty() {
continue;
}
let profile_url = self.normalize_member_profile_url(href);
self.add_uploader_filter(&profile_url, &name);
return Some(QueryTarget {
url: self.normalize_member_videos_url(&profile_url),
uploader_name: Some(name),
uploader_url: Some(profile_url),
});
}
None
}
async fn resolve_member_search_target(
&self,
query: &str,
options: &ServerOptions,
) -> Option<QueryTarget> {
let url = self.build_member_search_url(query);
let mut requester = requester_or_default(
options,
module_path!(),
"rule34video.resolve_member_search_target",
);
let html = requester.get(&url, None).await.ok()?;
self.parse_member_search_target(&html, query)
}
async fn fetch_page_items(
&self,
cache: VideoCache,
cache_key: String,
url: String,
use_cache: bool,
per_page: usize,
enrich_details: bool,
options: ServerOptions,
) -> Result<Vec<VideoItem>> {
let old_items = if use_cache {
@@ -467,8 +569,7 @@ impl Rule34videoProvider {
return Ok(old_items);
}
};
let items = match self.parse_list_videos(&text) {
let mut items = match self.parse_list_videos(&text) {
Ok(items) => items,
Err(error) => {
report_provider_error(
@@ -480,11 +581,21 @@ impl Rule34videoProvider {
return Ok(old_items);
}
};
if items.is_empty() {
return Ok(old_items);
}
if per_page > 0 && items.len() > per_page {
items.truncate(per_page);
}
if !enrich_details {
if use_cache {
cache.insert(cache_key, items.clone());
}
return Ok(items);
}
let enriched = self.enrich_video_items(items, requester).await;
if !enriched.is_empty() && use_cache {
cache.insert(cache_key, enriched.clone());
@@ -532,7 +643,9 @@ impl Rule34videoProvider {
}
let document = Html::parse_document(html);
let card_selector = Self::selector("div.item.thumb")?;
let uploaded_card_selector =
Self::selector("#list_videos_uploaded_videos_items div.item.thumb")?;
let generic_card_selector = Self::selector("div.item.thumb")?;
let link_selector = Self::selector("a.th.js-open-popup[href], a[href*=\"/video/\"]")?;
let title_selector = Self::selector(".thumb_title")?;
let image_selector = Self::selector("img")?;
@@ -540,8 +653,16 @@ impl Rule34videoProvider {
let views_selector = Self::selector(".views")?;
let mut items = Vec::new();
let cards = {
let uploaded_cards = document.select(&uploaded_card_selector).collect::<Vec<_>>();
if uploaded_cards.is_empty() {
document.select(&generic_card_selector).collect::<Vec<_>>()
} else {
uploaded_cards
}
};
for card in document.select(&card_selector) {
for card in cards {
let Some(link) = card.select(&link_selector).next() else {
continue;
};
@@ -599,9 +720,9 @@ impl Rule34videoProvider {
Ok(items)
}
fn collect_link_values<F>(
fn collect_link_values_from_root<F>(
&self,
document: &Html,
root: ElementRef<'_>,
selector: &Selector,
normalize_url: F,
) -> Vec<(String, String)>
@@ -611,7 +732,7 @@ impl Rule34videoProvider {
let mut seen = HashSet::new();
let mut values = Vec::new();
for link in document.select(selector) {
for link in root.select(selector) {
let Some(href) = link.value().attr("href") else {
continue;
};
@@ -628,6 +749,42 @@ impl Rule34videoProvider {
values
}
fn collect_scoped_link_values<F>(
&self,
document: &Html,
root_selectors: &[&str],
link_selector: &str,
normalize_url: F,
) -> Vec<(String, String)>
where
F: Fn(&str) -> String + Copy,
{
let Ok(link_selector) = Self::selector(link_selector) else {
return vec![];
};
for root_selector in root_selectors {
let Ok(root_selector) = Self::selector(root_selector) else {
continue;
};
let mut values = Vec::new();
for root in document.select(&root_selector) {
values.extend(self.collect_link_values_from_root(
root,
&link_selector,
normalize_url,
));
}
if !values.is_empty() {
return values;
}
}
vec![]
}
fn dedupe_terms(values: Vec<String>) -> Vec<String> {
let mut seen = HashSet::new();
let mut deduped = Vec::new();
@@ -648,10 +805,6 @@ impl Rule34videoProvider {
let document = Html::parse_document(html);
let title_selector = Self::selector("h1, .headline h1, .headline .title").ok();
let category_selector = Self::selector("a[href*=\"/categories/\"]").ok();
let artist_selector = Self::selector("a[href*=\"/artists/\"]").ok();
let uploader_selector = Self::selector("a[href*=\"/members/\"]").ok();
let tag_selector = Self::selector("a[href*=\"/tags/\"]").ok();
if item.title.is_empty() {
if let Some(selector) = &title_selector {
@@ -664,32 +817,64 @@ impl Rule34videoProvider {
}
}
let categories = category_selector
.as_ref()
.map(|selector| {
self.collect_link_values(&document, selector, |href| self.normalize_url(href))
})
.unwrap_or_default();
let artists = artist_selector
.as_ref()
.map(|selector| {
self.collect_link_values(&document, selector, |href| self.normalize_url(href))
})
.unwrap_or_default();
let uploaders = uploader_selector
.as_ref()
.map(|selector| {
self.collect_link_values(&document, selector, |href| {
self.normalize_member_url(href)
})
})
.unwrap_or_default();
let tags = tag_selector
.as_ref()
.map(|selector| {
self.collect_link_values(&document, selector, |href| self.normalize_url(href))
})
.unwrap_or_default();
let categories = self.collect_scoped_link_values(
&document,
&[
"#tab2",
".info",
".item-list",
".video-info",
".block-video .info",
".block-video .item-list",
".block-video .video-info",
],
"a[href*=\"/categories/\"]",
|href| self.normalize_url(href),
);
let artists = self.collect_scoped_link_values(
&document,
&[
"#tab2",
".info",
".item-list",
".video-info",
".block-video .info",
".block-video .item-list",
".block-video .video-info",
],
"a[href*=\"/artists/\"]",
|href| self.normalize_url(href),
);
let uploaders = self.collect_scoped_link_values(
&document,
&[
".headline",
"#tab2",
".info",
".item-list",
".video-info",
".block-video .info",
".block-video .item-list",
".block-video .video-info",
],
"a[href*=\"/members/\"]",
|href| self.normalize_member_profile_url(href),
);
let tags = self.collect_scoped_link_values(
&document,
&[
".tags-outer",
"#tab2",
".info",
".item-list",
".video-info",
".block-video .info",
".block-video .item-list",
".block-video .video-info",
],
"a[href*=\"/tags/\"]",
|href| self.normalize_url(href),
);
for (id, title) in &categories {
self.add_category_filter(id, title);
@@ -733,20 +918,51 @@ impl Provider for Rule34videoProvider {
sort: String,
query: Option<String>,
page: String,
_per_page: String,
per_page: String,
options: ServerOptions,
) -> Vec<VideoItem> {
let page = page.parse::<u8>().unwrap_or(1);
let per_page = per_page.parse::<usize>().unwrap_or(24);
let sort = Self::expected_sort(&sort);
let use_cache = sort != "pseudo_random";
let option_target = self.resolve_option_target(&options);
let cached_query_target = query
.as_deref()
.and_then(|value| self.resolve_query_target(value));
let member_search_target = if option_target.is_none() && cached_query_target.is_none() {
match query.as_deref() {
Some(query) => self.resolve_member_search_target(query, &options).await,
None => None,
}
} else {
None
};
let query_target = cached_query_target.or(member_search_target);
let uploader_context = query_target
.as_ref()
.and_then(|target| {
target.uploader_name.as_ref().map(|name| {
(
name.clone(),
target
.uploader_url
.clone()
.unwrap_or_else(|| self.normalize_member_profile_url(&target.url)),
)
})
})
.or_else(|| {
options.uploaderUrl.as_ref().and_then(|url| {
query
.as_deref()
.map(str::trim)
.filter(|value| !value.is_empty())
.map(|name| (name.to_string(), self.normalize_member_profile_url(url)))
})
});
let enrich_details = uploader_context.is_none();
let (cache_key, url) = match (
self.resolve_option_target(&options),
query
.as_deref()
.and_then(|value| self.resolve_query_target(value)),
query.as_deref(),
) {
let (cache_key, url) = match (option_target, query_target, query.as_deref()) {
(Some(target), _, _) => (
format!("rule34video:target:{}:{}:{}", page, sort, target.url),
self.build_filtered_url(&target.url, page, sort),
@@ -779,16 +995,37 @@ impl Provider for Rule34videoProvider {
),
};
match self
.fetch_page_items(cache, cache_key, url, use_cache, options)
let mut items = match self
.fetch_page_items(
cache,
cache_key,
url,
use_cache,
per_page,
enrich_details,
options,
)
.await
{
Ok(items) => items,
Err(error) => {
eprintln!("Error fetching Rule34Video videos: {error}");
vec![]
return vec![];
}
};
if let Some((uploader_name, uploader_url)) = uploader_context {
for item in &mut items {
if item.uploader.is_none() {
item.uploader = Some(uploader_name.clone());
}
if item.uploaderUrl.is_none() {
item.uploaderUrl = Some(uploader_url.clone());
}
}
}
items
}
fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
@@ -809,6 +1046,27 @@ mod tests {
assert!(url.contains("from_videos=2"));
}
#[test]
fn parses_member_search_target() {
let provider = Rule34videoProvider::new();
let html = r#"
<div class="list_items" id="list_members_members_items">
<div class="item">
<a class="wrap_item" href="https://rule34video.com/members/80235/">
<div class="wrap">
<div class="name">ChillUploader</div>
</div>
</a>
</div>
</div>
"#;
let target = provider
.parse_member_search_target(html, "ChillUploader")
.expect("expected member target");
assert_eq!(target.url, "https://rule34video.com/members/80235/videos/");
}
#[test]
fn parses_listing_cards() {
let provider = Rule34videoProvider::new();
@@ -862,7 +1120,7 @@ mod tests {
assert_eq!(item.uploader, Some("Jackerman".to_string()));
assert_eq!(
item.uploaderUrl,
Some("https://rule34video.com/members/123/jackerman/videos/".to_string())
Some("https://rule34video.com/members/123/jackerman/".to_string())
);
assert_eq!(
item.tags,
@@ -884,4 +1142,84 @@ mod tests {
assert!(categories.iter().any(|value| value.title == "Pokemon"));
assert!(tags.iter().any(|value| value.title == "animated"));
}
#[test]
fn ignores_unrelated_page_wide_tags_outside_video_metadata() {
let provider = Rule34videoProvider::new();
let item = VideoItem::new(
"4288578".to_string(),
"Friendly Encounter Preview".to_string(),
"https://rule34video.com/video/4288578/friendly-encounter-preview/".to_string(),
"rule34video".to_string(),
"https://img.example/thumb.jpg".to_string(),
605,
);
let html = r#"
<aside class="sidebar">
<a href="/tags/999/">sitewide sidebar tag</a>
<a href="/artists/999/sidebar-artist/">Sidebar Artist</a>
</aside>
<div class="block-video">
<div class="info">
<a href="/categories/pokemon/">Pokemon</a>
<a href="/artists/123/jackerman/">Jackerman</a>
<a href="/members/123/jackerman/">Jackerman</a>
</div>
<div class="tags-outer">
<a href="/tags/560/">3d</a>
<a href="/tags/561/">animated</a>
</div>
</div>
"#;
let item = provider.apply_detail_video(item, html);
assert_eq!(
item.tags,
Some(vec![
"Pokemon".to_string(),
"Jackerman".to_string(),
"3d".to_string(),
"animated".to_string(),
])
);
}
#[test]
fn member_profile_listing_uses_uploaded_videos_only() {
let provider = Rule34videoProvider::new();
let html = r#"
<div class="row_container" id="list_videos_uploaded_videos">
<div class="thumbs clearfix" id="list_videos_uploaded_videos_items">
<div class="item thumb video_4290070">
<a class="th js-open-popup" href="https://rule34video.com/video/4290070/xype3d-jane-s-maid-to-order-full-animation/" title="[Xype3D] Jane's Maid to Order - Full Animation">
<img data-original="https://img.example/uploaded.jpg" />
</a>
<div class="thumb_title">[Xype3D] Jane's Maid to Order - Full Animation</div>
<div class="time">10:12</div>
<div class="views">88K</div>
</div>
</div>
</div>
<div class="row_container" id="list_videos_favourite_videos">
<div class="thumbs clearfix" id="list_videos_favourite_videos_items">
<div class="item thumb favorite_videos_video_1">
<a class="th js-open-popup" href="https://rule34video.com/video/4250270/frieren-fern-midnight-snacks-forbidden-spells-pixel-perry-4k60fps/" title="Favorite video">
<img data-original="https://img.example/favorite.jpg" />
</a>
<div class="thumb_title">Favorite video</div>
<div class="time">2:22</div>
<div class="views">236K</div>
</div>
</div>
</div>
"#;
let items = provider.parse_list_videos(html).unwrap();
assert_eq!(items.len(), 1);
assert_eq!(items[0].id, "4290070");
assert_eq!(
items[0].title,
"[Xype3D] Jane's Maid to Order - Full Animation"
);
}
}

View File

@@ -97,6 +97,8 @@ pub struct ServerOptions {
pub duration: Option<String>, //
pub sort: Option<String>, //
pub sexuality: Option<String>, //
pub uploaderUrl: Option<String>,
pub uploaderId: Option<String>,
}
#[derive(serde::Serialize, Debug)]