use crate::DbPool; use crate::providers::Provider; use crate::util::cache::VideoCache; use crate::util::discord::send_discord_error_report; use crate::util::parse_abbreviated_number; use crate::util::time::parse_time_to_seconds; use crate::videos::{ServerOptions, VideoItem}; use async_trait::async_trait; use error_chain::error_chain; use htmlentity::entity::{ICodedDataTrait, decode}; use std::time::{SystemTime, UNIX_EPOCH}; use std::vec; error_chain! { foreign_links { Io(std::io::Error); HttpRequest(wreq::Error); } errors { ParsingError(t: String) { description("html parsing error") display("HTML parsing error: '{}'", t) } } } #[derive(Debug, Clone)] pub struct Rule34videoProvider { url: String, } impl Rule34videoProvider { pub fn new() -> Self { Rule34videoProvider { url: "https://rule34video.com".to_string(), } } /// Helper to safely extract a string between two delimiters fn extract_between<'a>(content: &'a str, start_pat: &str, end_pat: &str) -> Option<&'a str> { let start_idx = content.find(start_pat)? + start_pat.len(); let sub = &content[start_idx..]; let end_idx = sub.find(end_pat)?; Some(&sub[..end_idx]) } async fn get( &self, cache: VideoCache, page: u8, sort: &str, options: ServerOptions, ) -> Result> { let timestamp_millis = SystemTime::now() .duration_since(UNIX_EPOCH) .map(|d| d.as_millis()) .unwrap_or(0); let expected_sorts = vec![ "post_date", "video_viewed", "rating", "duration", "pseudo_random", ]; let sort_val = if expected_sorts.contains(&sort) { sort } else { "post_date" }; let index = format!("rule34video:{}:{}", page, sort_val); if sort_val != "pseudo_random" { if let Some((time, items)) = cache.get(&index) { if time.elapsed().unwrap_or_default().as_secs() < 300 { return Ok(items.clone()); } } } let mut requester = options.requester.clone().ok_or("Requester missing")?; let url = format!( "{}/?mode=async&function=get_block&block_id=custom_list_videos_most_recent_videos&tag_ids=&sort_by={}&from={}&_={}", self.url, sort_val, page, timestamp_millis ); let text = requester.get(&url, None).await.unwrap_or_else(|e| { eprintln!("Error fetching rule34video URL {}: {}", url, e); let _ = send_discord_error_report(e.to_string(), None, Some(&url), None, file!(), line!(), module_path!()); "".to_string() }); let video_items = self.get_video_items_from_html(text); if !video_items.is_empty() { cache.insert(index, video_items.clone()); Ok(video_items) } else { // Return empty or old items if available Ok(cache .get(&index) .map(|(_, items)| items) .unwrap_or_default()) } } async fn query( &self, cache: VideoCache, page: u8, query: &str, sort: &str, options: ServerOptions, ) -> Result> { let timestamp_millis = SystemTime::now() .duration_since(UNIX_EPOCH) .map(|d| d.as_millis()) .unwrap_or(0); let expected_sorts = vec![ "post_date", "video_viewed", "rating", "duration", "pseudo_random", ]; let sort_val = if expected_sorts.contains(&sort) { sort } else { "post_date" }; let index = format!("rule34video:{}:{}:{}", page, sort_val, query); if let Some((time, items)) = cache.get(&index) { if time.elapsed().unwrap_or_default().as_secs() < 300 { return Ok(items.clone()); } } let mut requester = options.requester.clone().ok_or("Requester missing")?; let url = format!( "{}/search/{}/?mode=async&function=get_block&block_id=custom_list_videos_videos_list_search&tag_ids=&sort_by={}&from_videos={}&from_albums={}&_={}", self.url, query.replace(" ", "-"), sort_val, page, page, timestamp_millis ); let text = requester.get(&url, None).await.unwrap_or_else(|e| { eprintln!("Error fetching rule34video URL {}: {}", url, e); let _ = send_discord_error_report(e.to_string(), None, Some(&url), None, file!(), line!(), module_path!()); "".to_string() }); let video_items = self.get_video_items_from_html(text); if !video_items.is_empty() { cache.insert(index, video_items.clone()); Ok(video_items) } else { Ok(cache .get(&index) .map(|(_, items)| items) .unwrap_or_default()) } } fn get_video_items_from_html(&self, html: String) -> Vec { if html.is_empty() { return vec![]; } // Safely isolate the video listing section let video_listing = match Self::extract_between( &html, "id=\"custom_list_videos", "
content, None => return vec![], }; let mut items = Vec::new(); // Skip the first split result as it's the preamble let raw_videos = video_listing .split("
", "<") .unwrap_or("Unknown"); let title = decode(title_raw.as_bytes()) .to_string() .unwrap_or_else(|_| title_raw.to_string()); // ID extraction let id = Self::extract_between(video_segment, "https://rule34video.com/video/", "/") .unwrap_or("0") .to_string(); // Duration extraction let raw_duration = Self::extract_between(video_segment, "
", "<").unwrap_or("0:00"); let duration = parse_time_to_seconds(raw_duration).unwrap_or(0) as u32; // Views extraction let views_segment = Self::extract_between(video_segment, "
", "<"); let views_count_str = views_segment .and_then(|s| s.split("").nth(1)) .unwrap_or("0"); let views = parse_abbreviated_number(views_count_str.trim()).unwrap_or(0); // Thumbnail extraction let thumb = Self::extract_between(video_segment, "data-original=\"", "\"") .unwrap_or("") .to_string(); // URL extraction let url = Self::extract_between(video_segment, ", page: String, _per_page: String, options: ServerOptions, ) -> Vec { let page_num = page.parse::().unwrap_or(1); let result = match query { Some(q) => self.query(cache, page_num, &q, &sort, options).await, None => self.get(cache, page_num, &sort, options).await, }; match result { Ok(v) => v, Err(e) => { eprintln!("Error fetching videos: {}", e); vec![] } } } }