This commit is contained in:
Simon
2026-01-15 19:04:28 +00:00
parent 182eb8ac01
commit 27e2bcdbba

View File

@@ -1,203 +1,241 @@
use crate::util::parse_abbreviated_number;
use crate::DbPool; use crate::DbPool;
use crate::providers::Provider; use crate::providers::Provider;
use crate::util::cache::VideoCache; use crate::util::cache::VideoCache;
use crate::util::discord::send_discord_error_report;
use crate::util::parse_abbreviated_number;
use crate::util::time::parse_time_to_seconds; use crate::util::time::parse_time_to_seconds;
use crate::videos::{ServerOptions, VideoItem}; use crate::videos::{ServerOptions, VideoItem};
use async_trait::async_trait;
use error_chain::error_chain; use error_chain::error_chain;
use htmlentity::entity::{ICodedDataTrait, decode}; use htmlentity::entity::{ICodedDataTrait, decode};
use std::vec;
use std::time::{SystemTime, UNIX_EPOCH}; use std::time::{SystemTime, UNIX_EPOCH};
use async_trait::async_trait; use std::vec;
error_chain! { error_chain! {
foreign_links { foreign_links {
Io(std::io::Error); Io(std::io::Error);
HttpRequest(wreq::Error); HttpRequest(wreq::Error);
} }
errors {
ParsingError(t: String) {
description("html parsing error")
display("HTML parsing error: '{}'", t)
}
}
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct Rule34videoProvider { pub struct Rule34videoProvider {
url: String, url: String,
} }
impl Rule34videoProvider { impl Rule34videoProvider {
pub fn new() -> Self { pub fn new() -> Self {
Rule34videoProvider { Rule34videoProvider {
url: "https://rule34video.com".to_string(), url: "https://rule34video.com".to_string(),
} }
} }
/// Helper to safely extract a string between two delimiters
fn extract_between<'a>(content: &'a str, start_pat: &str, end_pat: &str) -> Option<&'a str> {
let start_idx = content.find(start_pat)? + start_pat.len();
let sub = &content[start_idx..];
let end_idx = sub.find(end_pat)?;
Some(&sub[..end_idx])
}
async fn get( async fn get(
&self, &self,
cache: VideoCache, cache: VideoCache,
page: u8, page: u8,
sort: &str, sort: &str,
options: ServerOptions options: ServerOptions,
) -> Result<Vec<VideoItem>> { ) -> Result<Vec<VideoItem>> {
let now = SystemTime::now() let timestamp_millis = SystemTime::now()
.duration_since(UNIX_EPOCH) .duration_since(UNIX_EPOCH)
.expect("Time went backwards"); .map(|d| d.as_millis())
.unwrap_or(0);
let timestamp_millis = now.as_millis(); // u128 let expected_sorts = vec![
let expected_sorts = vec!["post_date", "video_viewed", "rating", "duration", "pseudo_random"]; "post_date",
let sort = if expected_sorts.contains(&sort) { "video_viewed",
"rating",
"duration",
"pseudo_random",
];
let sort_val = if expected_sorts.contains(&sort) {
sort sort
} else { } else {
"post_date" "post_date"
}; };
let index = format!("rule34video:{}:{}", page, sort); let index = format!("rule34video:{}:{}", page, sort_val);
let url = format!("{}/?mode=async&function=get_block&block_id=custom_list_videos_most_recent_videos&tag_ids=&sort_by={}&from={}&_={}", self.url, sort, page, timestamp_millis); if sort_val != "pseudo_random" {
if let Some((time, items)) = cache.get(&index) {
let mut old_items: Vec<VideoItem> = vec![]; if time.elapsed().unwrap_or_default().as_secs() < 300 {
if !(sort == "pseudo_random") {
old_items = match cache.get(&index) {
Some((time, items)) => {
if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 {
// println!("Cache hit for URL: {}", url);
return Ok(items.clone()); return Ok(items.clone());
} else {
items.clone()
} }
} }
None => {
vec![]
} }
};
} let mut requester = options.requester.clone().ok_or("Requester missing")?;
let mut requester = options.requester.clone().unwrap(); let url = format!(
let text = requester.get(&url, None).await.unwrap(); "{}/?mode=async&function=get_block&block_id=custom_list_videos_most_recent_videos&tag_ids=&sort_by={}&from={}&_={}",
let video_items: Vec<VideoItem> = self.get_video_items_from_html(text.clone()); self.url, sort_val, page, timestamp_millis
);
let text = requester.get(&url, None).await.unwrap_or_else(|e| {
eprintln!("Error fetching rule34video URL {}: {}", url, e);
let _ = send_discord_error_report(e.to_string(), None, Some(&url), None, file!(), line!(), module_path!());
"".to_string()
});
let video_items = self.get_video_items_from_html(text);
if !video_items.is_empty() { if !video_items.is_empty() {
cache.remove(&url); cache.insert(index, video_items.clone());
cache.insert(url.clone(), video_items.clone());
} else {
return Ok(old_items);
}
Ok(video_items) Ok(video_items)
} else {
// Return empty or old items if available
Ok(cache
.get(&index)
.map(|(_, items)| items)
.unwrap_or_default())
} }
}
async fn query( async fn query(
&self, &self,
cache: VideoCache, cache: VideoCache,
page: u8, page: u8,
query: &str, query: &str,
sort: &str, sort: &str,
options: ServerOptions options: ServerOptions,
) -> Result<Vec<VideoItem>> { ) -> Result<Vec<VideoItem>> {
let now = SystemTime::now() let timestamp_millis = SystemTime::now()
.duration_since(UNIX_EPOCH) .duration_since(UNIX_EPOCH)
.expect("Time went backwards"); .map(|d| d.as_millis())
let timestamp_millis = now.as_millis(); // u128 .unwrap_or(0);
let expected_sorts = vec!["post_date", "video_viewed", "rating", "duration", "pseudo_random"];
let sort = if expected_sorts.contains(&sort) { let expected_sorts = vec![
"post_date",
"video_viewed",
"rating",
"duration",
"pseudo_random",
];
let sort_val = if expected_sorts.contains(&sort) {
sort sort
} else { } else {
"post_date" "post_date"
}; };
let index = format!("rule34video:{}:{}:{}", page, sort, query); let index = format!("rule34video:{}:{}:{}", page, sort_val, query);
let url = format!("{}/search/{}/?mode=async&function=get_block&block_id=custom_list_videos_videos_list_search&tag_ids=&sort_by={}&from_videos={}&from_albums={}&_={}", self.url, query.replace(" ","-"), sort, page, page, timestamp_millis); if let Some((time, items)) = cache.get(&index) {
if time.elapsed().unwrap_or_default().as_secs() < 300 {
// Check our Video Cache. If the result is younger than 1 hour, we return it.
let old_items = match cache.get(&index) {
Some((time, items)) => {
if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 {
return Ok(items.clone());
} else {
let _ = cache.check().await;
return Ok(items.clone()); return Ok(items.clone());
} }
} }
None => {
vec![] let mut requester = options.requester.clone().ok_or("Requester missing")?;
} let url = format!(
}; "{}/search/{}/?mode=async&function=get_block&block_id=custom_list_videos_videos_list_search&tag_ids=&sort_by={}&from_videos={}&from_albums={}&_={}",
let mut requester = options.requester.clone().unwrap(); self.url,
let text = requester.get(&url, None).await.unwrap(); query.replace(" ", "-"),
let video_items: Vec<VideoItem> = self.get_video_items_from_html(text.clone()); sort_val,
page,
page,
timestamp_millis
);
let text = requester.get(&url, None).await.unwrap_or_else(|e| {
eprintln!("Error fetching rule34video URL {}: {}", url, e);
let _ = send_discord_error_report(e.to_string(), None, Some(&url), None, file!(), line!(), module_path!());
"".to_string()
});
let video_items = self.get_video_items_from_html(text);
if !video_items.is_empty() { if !video_items.is_empty() {
cache.remove(&url); cache.insert(index, video_items.clone());
cache.insert(url.clone(), video_items.clone());
} else {
return Ok(old_items);
}
Ok(video_items) Ok(video_items)
} else {
Ok(cache
.get(&index)
.map(|(_, items)| items)
.unwrap_or_default())
}
} }
fn get_video_items_from_html(&self, html: String) -> Vec<VideoItem> { fn get_video_items_from_html(&self, html: String) -> Vec<VideoItem> {
if html.is_empty() { if html.is_empty() {
println!("HTML is empty");
return vec![]; return vec![];
} }
let mut items: Vec<VideoItem> = Vec::new();
let video_listing_content = html.split("<div class=\"thumbs clearfix\" id=\"custom_list_videos").collect::<Vec<&str>>()[1].split("<div class=\"pagination\"").collect::<Vec<&str>>()[0].to_string();
let raw_videos = video_listing_content
.split("<div class=\"item thumb video_")
.collect::<Vec<&str>>()[1..]
.to_vec();
for video_segment in &raw_videos {
// let vid = video_segment.split("\n").collect::<Vec<&str>>()[1]
// for (index, line) in vid.iter().enumerate() {
// println!("Line {}: {}", index, line);
// }
if video_segment.contains("https://rule34video.com/images/advertisements"){ // Safely isolate the video listing section
let video_listing = match Self::extract_between(
&html,
"id=\"custom_list_videos",
"<div class=\"pagination\"",
) {
Some(content) => content,
None => return vec![],
};
let mut items = Vec::new();
// Skip the first split result as it's the preamble
let raw_videos = video_listing
.split("<div class=\"item thumb video_")
.skip(1);
for video_segment in raw_videos {
if video_segment.contains("title=\"Advertisement\"") {
continue; continue;
} }
let mut title = video_segment.split("<div class=\"thumb_title\">").collect::<Vec<&str>>()[1] // Title extraction
.split("<") let title_raw =
.collect::<Vec<&str>>()[0] Self::extract_between(video_segment, "<div class=\"thumb_title\">", "<")
.to_string(); .unwrap_or("Unknown");
// html decode let title = decode(title_raw.as_bytes())
title = decode(title.as_bytes()).to_string().unwrap_or(title); .to_string()
let id = video_segment.split("https://rule34video.com/video/").collect::<Vec<&str>>()[1].split("/").collect::<Vec<&str>>()[0].to_string(); .unwrap_or_else(|_| title_raw.to_string());
let raw_duration = video_segment.split("<div class=\"time\">").collect::<Vec<&str>>()[1]
.split("<")
.collect::<Vec<&str>>()[0]
.to_string();
let duration = parse_time_to_seconds(&raw_duration).unwrap_or(0) as u32;
let views = parse_abbreviated_number(&video_segment
.split("<div class=\"views\">").collect::<Vec<&str>>()[1].split("</svg>").collect::<Vec<&str>>()[1]
.split("<")
.collect::<Vec<&str>>()[0]).unwrap_or(0);
//https://rule34video.com/get_file/47/5e71602b7642f9b997f90c979a368c99b8aad90d89/3942000/3942353/3942353_preview.mp4/
//https://rule34video.com/get_file/47/5e71602b7642f9b997f90c979a368c99b8aad90d89/3942000/3942353/3942353_preview.mp4/
let thumb = video_segment.split("<img class=\"thumb lazy-load\" src=\"").collect::<Vec<&str>>()[1].split("data-original=\"").collect::<Vec<&str>>()[1]
.split("\"")
.collect::<Vec<&str>>()[0]
.to_string();
let url = video_segment.split("<a class=\"th js-open-popup\" href=\"").collect::<Vec<&str>>()[1]
.split("\"")
.collect::<Vec<&str>>()[0]
.to_string();
// let preview = video_segment.split("<div class=\"img wrap_image\" data-preview=\"").collect::<Vec<&str>>()[1]
// .split("\"")
// .collect::<Vec<&str>>()[0]
// .to_string();
// ID extraction
let id = Self::extract_between(video_segment, "https://rule34video.com/video/", "/")
.unwrap_or("0")
.to_string();
let video_item = VideoItem::new( // Duration extraction
id, let raw_duration =
title, Self::extract_between(video_segment, "<div class=\"time\">", "<").unwrap_or("0:00");
url.to_string(), let duration = parse_time_to_seconds(raw_duration).unwrap_or(0) as u32;
"Rule34video".to_string(),
thumb,
duration,
)
.views(views)
// .preview(preview)
;
// Views extraction
let views_segment = Self::extract_between(video_segment, "<div class=\"views\">", "<");
let views_count_str = views_segment
.and_then(|s| s.split("</svg>").nth(1))
.unwrap_or("0");
let views = parse_abbreviated_number(views_count_str.trim()).unwrap_or(0);
items.push(video_item); // Thumbnail extraction
let thumb = Self::extract_between(video_segment, "data-original=\"", "\"")
.unwrap_or("")
.to_string();
// URL extraction
let url =
Self::extract_between(video_segment, "<a class=\"th js-open-popup\" href=\"", "\"")
.unwrap_or("")
.to_string();
items.push(
VideoItem::new(id, title, url, "Rule34video".to_string(), thumb, duration)
.views(views),
);
} }
return items; items
} }
} }
#[async_trait] #[async_trait]
@@ -205,30 +243,24 @@ impl Provider for Rule34videoProvider {
async fn get_videos( async fn get_videos(
&self, &self,
cache: VideoCache, cache: VideoCache,
pool: DbPool, _pool: DbPool,
sort: String, sort: String,
query: Option<String>, query: Option<String>,
page: String, page: String,
per_page: String, _per_page: String,
options: ServerOptions, options: ServerOptions,
) -> Vec<VideoItem> { ) -> Vec<VideoItem> {
let _ = options; let page_num = page.parse::<u8>().unwrap_or(1);
let _ = per_page;
let _ = pool; // Ignored in this implementation let result = match query {
let videos: std::result::Result<Vec<VideoItem>, Error> = match query { Some(q) => self.query(cache, page_num, &q, &sort, options).await,
Some(q) => { None => self.get(cache, page_num, &sort, options).await,
self.query(cache, page.parse::<u8>().unwrap_or(1), &q, &sort, options)
.await
}
None => {
self.get(cache, page.parse::<u8>().unwrap_or(1), &sort, options)
.await
}
}; };
match videos {
match result {
Ok(v) => v, Ok(v) => v,
Err(e) => { Err(e) => {
println!("Error fetching videos: {}", e); eprintln!("Error fetching videos: {}", e);
vec![] vec![]
} }
} }