Files
hottub/src/providers/rule34video.rs
2026-01-15 19:04:28 +00:00

269 lines
8.3 KiB
Rust

use crate::DbPool;
use crate::providers::Provider;
use crate::util::cache::VideoCache;
use crate::util::discord::send_discord_error_report;
use crate::util::parse_abbreviated_number;
use crate::util::time::parse_time_to_seconds;
use crate::videos::{ServerOptions, VideoItem};
use async_trait::async_trait;
use error_chain::error_chain;
use htmlentity::entity::{ICodedDataTrait, decode};
use std::time::{SystemTime, UNIX_EPOCH};
use std::vec;
error_chain! {
foreign_links {
Io(std::io::Error);
HttpRequest(wreq::Error);
}
errors {
ParsingError(t: String) {
description("html parsing error")
display("HTML parsing error: '{}'", t)
}
}
}
#[derive(Debug, Clone)]
pub struct Rule34videoProvider {
url: String,
}
impl Rule34videoProvider {
pub fn new() -> Self {
Rule34videoProvider {
url: "https://rule34video.com".to_string(),
}
}
/// Helper to safely extract a string between two delimiters
fn extract_between<'a>(content: &'a str, start_pat: &str, end_pat: &str) -> Option<&'a str> {
let start_idx = content.find(start_pat)? + start_pat.len();
let sub = &content[start_idx..];
let end_idx = sub.find(end_pat)?;
Some(&sub[..end_idx])
}
async fn get(
&self,
cache: VideoCache,
page: u8,
sort: &str,
options: ServerOptions,
) -> Result<Vec<VideoItem>> {
let timestamp_millis = SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|d| d.as_millis())
.unwrap_or(0);
let expected_sorts = vec![
"post_date",
"video_viewed",
"rating",
"duration",
"pseudo_random",
];
let sort_val = if expected_sorts.contains(&sort) {
sort
} else {
"post_date"
};
let index = format!("rule34video:{}:{}", page, sort_val);
if sort_val != "pseudo_random" {
if let Some((time, items)) = cache.get(&index) {
if time.elapsed().unwrap_or_default().as_secs() < 300 {
return Ok(items.clone());
}
}
}
let mut requester = options.requester.clone().ok_or("Requester missing")?;
let url = format!(
"{}/?mode=async&function=get_block&block_id=custom_list_videos_most_recent_videos&tag_ids=&sort_by={}&from={}&_={}",
self.url, sort_val, page, timestamp_millis
);
let text = requester.get(&url, None).await.unwrap_or_else(|e| {
eprintln!("Error fetching rule34video URL {}: {}", url, e);
let _ = send_discord_error_report(e.to_string(), None, Some(&url), None, file!(), line!(), module_path!());
"".to_string()
});
let video_items = self.get_video_items_from_html(text);
if !video_items.is_empty() {
cache.insert(index, video_items.clone());
Ok(video_items)
} else {
// Return empty or old items if available
Ok(cache
.get(&index)
.map(|(_, items)| items)
.unwrap_or_default())
}
}
async fn query(
&self,
cache: VideoCache,
page: u8,
query: &str,
sort: &str,
options: ServerOptions,
) -> Result<Vec<VideoItem>> {
let timestamp_millis = SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|d| d.as_millis())
.unwrap_or(0);
let expected_sorts = vec![
"post_date",
"video_viewed",
"rating",
"duration",
"pseudo_random",
];
let sort_val = if expected_sorts.contains(&sort) {
sort
} else {
"post_date"
};
let index = format!("rule34video:{}:{}:{}", page, sort_val, query);
if let Some((time, items)) = cache.get(&index) {
if time.elapsed().unwrap_or_default().as_secs() < 300 {
return Ok(items.clone());
}
}
let mut requester = options.requester.clone().ok_or("Requester missing")?;
let url = format!(
"{}/search/{}/?mode=async&function=get_block&block_id=custom_list_videos_videos_list_search&tag_ids=&sort_by={}&from_videos={}&from_albums={}&_={}",
self.url,
query.replace(" ", "-"),
sort_val,
page,
page,
timestamp_millis
);
let text = requester.get(&url, None).await.unwrap_or_else(|e| {
eprintln!("Error fetching rule34video URL {}: {}", url, e);
let _ = send_discord_error_report(e.to_string(), None, Some(&url), None, file!(), line!(), module_path!());
"".to_string()
});
let video_items = self.get_video_items_from_html(text);
if !video_items.is_empty() {
cache.insert(index, video_items.clone());
Ok(video_items)
} else {
Ok(cache
.get(&index)
.map(|(_, items)| items)
.unwrap_or_default())
}
}
fn get_video_items_from_html(&self, html: String) -> Vec<VideoItem> {
if html.is_empty() {
return vec![];
}
// Safely isolate the video listing section
let video_listing = match Self::extract_between(
&html,
"id=\"custom_list_videos",
"<div class=\"pagination\"",
) {
Some(content) => content,
None => return vec![],
};
let mut items = Vec::new();
// Skip the first split result as it's the preamble
let raw_videos = video_listing
.split("<div class=\"item thumb video_")
.skip(1);
for video_segment in raw_videos {
if video_segment.contains("title=\"Advertisement\"") {
continue;
}
// Title extraction
let title_raw =
Self::extract_between(video_segment, "<div class=\"thumb_title\">", "<")
.unwrap_or("Unknown");
let title = decode(title_raw.as_bytes())
.to_string()
.unwrap_or_else(|_| title_raw.to_string());
// ID extraction
let id = Self::extract_between(video_segment, "https://rule34video.com/video/", "/")
.unwrap_or("0")
.to_string();
// Duration extraction
let raw_duration =
Self::extract_between(video_segment, "<div class=\"time\">", "<").unwrap_or("0:00");
let duration = parse_time_to_seconds(raw_duration).unwrap_or(0) as u32;
// Views extraction
let views_segment = Self::extract_between(video_segment, "<div class=\"views\">", "<");
let views_count_str = views_segment
.and_then(|s| s.split("</svg>").nth(1))
.unwrap_or("0");
let views = parse_abbreviated_number(views_count_str.trim()).unwrap_or(0);
// Thumbnail extraction
let thumb = Self::extract_between(video_segment, "data-original=\"", "\"")
.unwrap_or("")
.to_string();
// URL extraction
let url =
Self::extract_between(video_segment, "<a class=\"th js-open-popup\" href=\"", "\"")
.unwrap_or("")
.to_string();
items.push(
VideoItem::new(id, title, url, "Rule34video".to_string(), thumb, duration)
.views(views),
);
}
items
}
}
#[async_trait]
impl Provider for Rule34videoProvider {
async fn get_videos(
&self,
cache: VideoCache,
_pool: DbPool,
sort: String,
query: Option<String>,
page: String,
_per_page: String,
options: ServerOptions,
) -> Vec<VideoItem> {
let page_num = page.parse::<u8>().unwrap_or(1);
let result = match query {
Some(q) => self.query(cache, page_num, &q, &sort, options).await,
None => self.get(cache, page_num, &sort, options).await,
};
match result {
Ok(v) => v,
Err(e) => {
eprintln!("Error fetching videos: {}", e);
vec![]
}
}
}
}