fixes
This commit is contained in:
@@ -1,203 +1,241 @@
|
|||||||
use crate::util::parse_abbreviated_number;
|
|
||||||
use crate::DbPool;
|
use crate::DbPool;
|
||||||
use crate::providers::Provider;
|
use crate::providers::Provider;
|
||||||
use crate::util::cache::VideoCache;
|
use crate::util::cache::VideoCache;
|
||||||
|
use crate::util::discord::send_discord_error_report;
|
||||||
|
use crate::util::parse_abbreviated_number;
|
||||||
use crate::util::time::parse_time_to_seconds;
|
use crate::util::time::parse_time_to_seconds;
|
||||||
use crate::videos::{ServerOptions, VideoItem};
|
use crate::videos::{ServerOptions, VideoItem};
|
||||||
|
use async_trait::async_trait;
|
||||||
use error_chain::error_chain;
|
use error_chain::error_chain;
|
||||||
use htmlentity::entity::{ICodedDataTrait, decode};
|
use htmlentity::entity::{ICodedDataTrait, decode};
|
||||||
use std::vec;
|
|
||||||
use std::time::{SystemTime, UNIX_EPOCH};
|
use std::time::{SystemTime, UNIX_EPOCH};
|
||||||
use async_trait::async_trait;
|
use std::vec;
|
||||||
|
|
||||||
error_chain! {
|
error_chain! {
|
||||||
foreign_links {
|
foreign_links {
|
||||||
Io(std::io::Error);
|
Io(std::io::Error);
|
||||||
HttpRequest(wreq::Error);
|
HttpRequest(wreq::Error);
|
||||||
}
|
}
|
||||||
|
errors {
|
||||||
|
ParsingError(t: String) {
|
||||||
|
description("html parsing error")
|
||||||
|
display("HTML parsing error: '{}'", t)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct Rule34videoProvider {
|
pub struct Rule34videoProvider {
|
||||||
url: String,
|
url: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Rule34videoProvider {
|
impl Rule34videoProvider {
|
||||||
pub fn new() -> Self {
|
pub fn new() -> Self {
|
||||||
Rule34videoProvider {
|
Rule34videoProvider {
|
||||||
url: "https://rule34video.com".to_string(),
|
url: "https://rule34video.com".to_string(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Helper to safely extract a string between two delimiters
|
||||||
|
fn extract_between<'a>(content: &'a str, start_pat: &str, end_pat: &str) -> Option<&'a str> {
|
||||||
|
let start_idx = content.find(start_pat)? + start_pat.len();
|
||||||
|
let sub = &content[start_idx..];
|
||||||
|
let end_idx = sub.find(end_pat)?;
|
||||||
|
Some(&sub[..end_idx])
|
||||||
|
}
|
||||||
|
|
||||||
async fn get(
|
async fn get(
|
||||||
&self,
|
&self,
|
||||||
cache: VideoCache,
|
cache: VideoCache,
|
||||||
page: u8,
|
page: u8,
|
||||||
sort: &str,
|
sort: &str,
|
||||||
options: ServerOptions
|
options: ServerOptions,
|
||||||
) -> Result<Vec<VideoItem>> {
|
) -> Result<Vec<VideoItem>> {
|
||||||
let now = SystemTime::now()
|
let timestamp_millis = SystemTime::now()
|
||||||
.duration_since(UNIX_EPOCH)
|
.duration_since(UNIX_EPOCH)
|
||||||
.expect("Time went backwards");
|
.map(|d| d.as_millis())
|
||||||
|
.unwrap_or(0);
|
||||||
|
|
||||||
let timestamp_millis = now.as_millis(); // u128
|
let expected_sorts = vec![
|
||||||
let expected_sorts = vec!["post_date", "video_viewed", "rating", "duration", "pseudo_random"];
|
"post_date",
|
||||||
let sort = if expected_sorts.contains(&sort) {
|
"video_viewed",
|
||||||
|
"rating",
|
||||||
|
"duration",
|
||||||
|
"pseudo_random",
|
||||||
|
];
|
||||||
|
let sort_val = if expected_sorts.contains(&sort) {
|
||||||
sort
|
sort
|
||||||
} else {
|
} else {
|
||||||
"post_date"
|
"post_date"
|
||||||
};
|
};
|
||||||
|
|
||||||
let index = format!("rule34video:{}:{}", page, sort);
|
let index = format!("rule34video:{}:{}", page, sort_val);
|
||||||
|
|
||||||
let url = format!("{}/?mode=async&function=get_block&block_id=custom_list_videos_most_recent_videos&tag_ids=&sort_by={}&from={}&_={}", self.url, sort, page, timestamp_millis);
|
if sort_val != "pseudo_random" {
|
||||||
|
if let Some((time, items)) = cache.get(&index) {
|
||||||
let mut old_items: Vec<VideoItem> = vec![];
|
if time.elapsed().unwrap_or_default().as_secs() < 300 {
|
||||||
if !(sort == "pseudo_random") {
|
return Ok(items.clone());
|
||||||
old_items = match cache.get(&index) {
|
|
||||||
Some((time, items)) => {
|
|
||||||
if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 {
|
|
||||||
// println!("Cache hit for URL: {}", url);
|
|
||||||
return Ok(items.clone());
|
|
||||||
} else {
|
|
||||||
items.clone()
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
None => {
|
}
|
||||||
vec![]
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
let mut requester = options.requester.clone().unwrap();
|
|
||||||
let text = requester.get(&url, None).await.unwrap();
|
let mut requester = options.requester.clone().ok_or("Requester missing")?;
|
||||||
let video_items: Vec<VideoItem> = self.get_video_items_from_html(text.clone());
|
let url = format!(
|
||||||
|
"{}/?mode=async&function=get_block&block_id=custom_list_videos_most_recent_videos&tag_ids=&sort_by={}&from={}&_={}",
|
||||||
|
self.url, sort_val, page, timestamp_millis
|
||||||
|
);
|
||||||
|
|
||||||
|
let text = requester.get(&url, None).await.unwrap_or_else(|e| {
|
||||||
|
eprintln!("Error fetching rule34video URL {}: {}", url, e);
|
||||||
|
let _ = send_discord_error_report(e.to_string(), None, Some(&url), None, file!(), line!(), module_path!());
|
||||||
|
"".to_string()
|
||||||
|
});
|
||||||
|
let video_items = self.get_video_items_from_html(text);
|
||||||
|
|
||||||
if !video_items.is_empty() {
|
if !video_items.is_empty() {
|
||||||
cache.remove(&url);
|
cache.insert(index, video_items.clone());
|
||||||
cache.insert(url.clone(), video_items.clone());
|
Ok(video_items)
|
||||||
} else {
|
} else {
|
||||||
return Ok(old_items);
|
// Return empty or old items if available
|
||||||
|
Ok(cache
|
||||||
|
.get(&index)
|
||||||
|
.map(|(_, items)| items)
|
||||||
|
.unwrap_or_default())
|
||||||
}
|
}
|
||||||
Ok(video_items)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn query(
|
async fn query(
|
||||||
&self,
|
&self,
|
||||||
cache: VideoCache,
|
cache: VideoCache,
|
||||||
page: u8,
|
page: u8,
|
||||||
query: &str,
|
query: &str,
|
||||||
sort: &str,
|
sort: &str,
|
||||||
options: ServerOptions
|
options: ServerOptions,
|
||||||
) -> Result<Vec<VideoItem>> {
|
) -> Result<Vec<VideoItem>> {
|
||||||
let now = SystemTime::now()
|
let timestamp_millis = SystemTime::now()
|
||||||
.duration_since(UNIX_EPOCH)
|
.duration_since(UNIX_EPOCH)
|
||||||
.expect("Time went backwards");
|
.map(|d| d.as_millis())
|
||||||
let timestamp_millis = now.as_millis(); // u128
|
.unwrap_or(0);
|
||||||
let expected_sorts = vec!["post_date", "video_viewed", "rating", "duration", "pseudo_random"];
|
|
||||||
let sort = if expected_sorts.contains(&sort) {
|
let expected_sorts = vec![
|
||||||
|
"post_date",
|
||||||
|
"video_viewed",
|
||||||
|
"rating",
|
||||||
|
"duration",
|
||||||
|
"pseudo_random",
|
||||||
|
];
|
||||||
|
let sort_val = if expected_sorts.contains(&sort) {
|
||||||
sort
|
sort
|
||||||
} else {
|
} else {
|
||||||
"post_date"
|
"post_date"
|
||||||
};
|
};
|
||||||
|
|
||||||
let index = format!("rule34video:{}:{}:{}", page, sort, query);
|
let index = format!("rule34video:{}:{}:{}", page, sort_val, query);
|
||||||
|
|
||||||
let url = format!("{}/search/{}/?mode=async&function=get_block&block_id=custom_list_videos_videos_list_search&tag_ids=&sort_by={}&from_videos={}&from_albums={}&_={}", self.url, query.replace(" ","-"), sort, page, page, timestamp_millis);
|
if let Some((time, items)) = cache.get(&index) {
|
||||||
|
if time.elapsed().unwrap_or_default().as_secs() < 300 {
|
||||||
|
return Ok(items.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Check our Video Cache. If the result is younger than 1 hour, we return it.
|
let mut requester = options.requester.clone().ok_or("Requester missing")?;
|
||||||
let old_items = match cache.get(&index) {
|
let url = format!(
|
||||||
Some((time, items)) => {
|
"{}/search/{}/?mode=async&function=get_block&block_id=custom_list_videos_videos_list_search&tag_ids=&sort_by={}&from_videos={}&from_albums={}&_={}",
|
||||||
if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 {
|
self.url,
|
||||||
return Ok(items.clone());
|
query.replace(" ", "-"),
|
||||||
} else {
|
sort_val,
|
||||||
let _ = cache.check().await;
|
page,
|
||||||
return Ok(items.clone());
|
page,
|
||||||
}
|
timestamp_millis
|
||||||
}
|
);
|
||||||
None => {
|
|
||||||
vec![]
|
let text = requester.get(&url, None).await.unwrap_or_else(|e| {
|
||||||
}
|
eprintln!("Error fetching rule34video URL {}: {}", url, e);
|
||||||
};
|
let _ = send_discord_error_report(e.to_string(), None, Some(&url), None, file!(), line!(), module_path!());
|
||||||
let mut requester = options.requester.clone().unwrap();
|
"".to_string()
|
||||||
let text = requester.get(&url, None).await.unwrap();
|
});
|
||||||
let video_items: Vec<VideoItem> = self.get_video_items_from_html(text.clone());
|
let video_items = self.get_video_items_from_html(text);
|
||||||
if !video_items.is_empty() {
|
|
||||||
cache.remove(&url);
|
if !video_items.is_empty() {
|
||||||
cache.insert(url.clone(), video_items.clone());
|
cache.insert(index, video_items.clone());
|
||||||
} else {
|
|
||||||
return Ok(old_items);
|
|
||||||
}
|
|
||||||
Ok(video_items)
|
Ok(video_items)
|
||||||
|
} else {
|
||||||
|
Ok(cache
|
||||||
|
.get(&index)
|
||||||
|
.map(|(_, items)| items)
|
||||||
|
.unwrap_or_default())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_video_items_from_html(&self, html: String) -> Vec<VideoItem> {
|
fn get_video_items_from_html(&self, html: String) -> Vec<VideoItem> {
|
||||||
if html.is_empty() {
|
if html.is_empty() {
|
||||||
println!("HTML is empty");
|
|
||||||
return vec![];
|
return vec![];
|
||||||
}
|
}
|
||||||
let mut items: Vec<VideoItem> = Vec::new();
|
|
||||||
let video_listing_content = html.split("<div class=\"thumbs clearfix\" id=\"custom_list_videos").collect::<Vec<&str>>()[1].split("<div class=\"pagination\"").collect::<Vec<&str>>()[0].to_string();
|
|
||||||
let raw_videos = video_listing_content
|
|
||||||
.split("<div class=\"item thumb video_")
|
|
||||||
.collect::<Vec<&str>>()[1..]
|
|
||||||
.to_vec();
|
|
||||||
for video_segment in &raw_videos {
|
|
||||||
// let vid = video_segment.split("\n").collect::<Vec<&str>>()[1]
|
|
||||||
// for (index, line) in vid.iter().enumerate() {
|
|
||||||
// println!("Line {}: {}", index, line);
|
|
||||||
// }
|
|
||||||
|
|
||||||
if video_segment.contains("https://rule34video.com/images/advertisements"){
|
// Safely isolate the video listing section
|
||||||
|
let video_listing = match Self::extract_between(
|
||||||
|
&html,
|
||||||
|
"id=\"custom_list_videos",
|
||||||
|
"<div class=\"pagination\"",
|
||||||
|
) {
|
||||||
|
Some(content) => content,
|
||||||
|
None => return vec![],
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut items = Vec::new();
|
||||||
|
// Skip the first split result as it's the preamble
|
||||||
|
let raw_videos = video_listing
|
||||||
|
.split("<div class=\"item thumb video_")
|
||||||
|
.skip(1);
|
||||||
|
|
||||||
|
for video_segment in raw_videos {
|
||||||
|
if video_segment.contains("title=\"Advertisement\"") {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut title = video_segment.split("<div class=\"thumb_title\">").collect::<Vec<&str>>()[1]
|
// Title extraction
|
||||||
.split("<")
|
let title_raw =
|
||||||
.collect::<Vec<&str>>()[0]
|
Self::extract_between(video_segment, "<div class=\"thumb_title\">", "<")
|
||||||
|
.unwrap_or("Unknown");
|
||||||
|
let title = decode(title_raw.as_bytes())
|
||||||
|
.to_string()
|
||||||
|
.unwrap_or_else(|_| title_raw.to_string());
|
||||||
|
|
||||||
|
// ID extraction
|
||||||
|
let id = Self::extract_between(video_segment, "https://rule34video.com/video/", "/")
|
||||||
|
.unwrap_or("0")
|
||||||
.to_string();
|
.to_string();
|
||||||
// html decode
|
|
||||||
title = decode(title.as_bytes()).to_string().unwrap_or(title);
|
// Duration extraction
|
||||||
let id = video_segment.split("https://rule34video.com/video/").collect::<Vec<&str>>()[1].split("/").collect::<Vec<&str>>()[0].to_string();
|
let raw_duration =
|
||||||
let raw_duration = video_segment.split("<div class=\"time\">").collect::<Vec<&str>>()[1]
|
Self::extract_between(video_segment, "<div class=\"time\">", "<").unwrap_or("0:00");
|
||||||
.split("<")
|
let duration = parse_time_to_seconds(raw_duration).unwrap_or(0) as u32;
|
||||||
.collect::<Vec<&str>>()[0]
|
|
||||||
|
// Views extraction
|
||||||
|
let views_segment = Self::extract_between(video_segment, "<div class=\"views\">", "<");
|
||||||
|
let views_count_str = views_segment
|
||||||
|
.and_then(|s| s.split("</svg>").nth(1))
|
||||||
|
.unwrap_or("0");
|
||||||
|
let views = parse_abbreviated_number(views_count_str.trim()).unwrap_or(0);
|
||||||
|
|
||||||
|
// Thumbnail extraction
|
||||||
|
let thumb = Self::extract_between(video_segment, "data-original=\"", "\"")
|
||||||
|
.unwrap_or("")
|
||||||
|
.to_string();
|
||||||
|
|
||||||
|
// URL extraction
|
||||||
|
let url =
|
||||||
|
Self::extract_between(video_segment, "<a class=\"th js-open-popup\" href=\"", "\"")
|
||||||
|
.unwrap_or("")
|
||||||
.to_string();
|
.to_string();
|
||||||
let duration = parse_time_to_seconds(&raw_duration).unwrap_or(0) as u32;
|
|
||||||
let views = parse_abbreviated_number(&video_segment
|
|
||||||
.split("<div class=\"views\">").collect::<Vec<&str>>()[1].split("</svg>").collect::<Vec<&str>>()[1]
|
|
||||||
.split("<")
|
|
||||||
.collect::<Vec<&str>>()[0]).unwrap_or(0);
|
|
||||||
//https://rule34video.com/get_file/47/5e71602b7642f9b997f90c979a368c99b8aad90d89/3942000/3942353/3942353_preview.mp4/
|
|
||||||
//https://rule34video.com/get_file/47/5e71602b7642f9b997f90c979a368c99b8aad90d89/3942000/3942353/3942353_preview.mp4/
|
|
||||||
let thumb = video_segment.split("<img class=\"thumb lazy-load\" src=\"").collect::<Vec<&str>>()[1].split("data-original=\"").collect::<Vec<&str>>()[1]
|
|
||||||
.split("\"")
|
|
||||||
.collect::<Vec<&str>>()[0]
|
|
||||||
.to_string();
|
|
||||||
let url = video_segment.split("<a class=\"th js-open-popup\" href=\"").collect::<Vec<&str>>()[1]
|
|
||||||
.split("\"")
|
|
||||||
.collect::<Vec<&str>>()[0]
|
|
||||||
.to_string();
|
|
||||||
// let preview = video_segment.split("<div class=\"img wrap_image\" data-preview=\"").collect::<Vec<&str>>()[1]
|
|
||||||
// .split("\"")
|
|
||||||
// .collect::<Vec<&str>>()[0]
|
|
||||||
// .to_string();
|
|
||||||
|
|
||||||
|
items.push(
|
||||||
let video_item = VideoItem::new(
|
VideoItem::new(id, title, url, "Rule34video".to_string(), thumb, duration)
|
||||||
id,
|
.views(views),
|
||||||
title,
|
);
|
||||||
url.to_string(),
|
|
||||||
"Rule34video".to_string(),
|
|
||||||
thumb,
|
|
||||||
duration,
|
|
||||||
)
|
|
||||||
.views(views)
|
|
||||||
// .preview(preview)
|
|
||||||
;
|
|
||||||
|
|
||||||
|
|
||||||
items.push(video_item);
|
|
||||||
}
|
}
|
||||||
return items;
|
items
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
@@ -205,30 +243,24 @@ impl Provider for Rule34videoProvider {
|
|||||||
async fn get_videos(
|
async fn get_videos(
|
||||||
&self,
|
&self,
|
||||||
cache: VideoCache,
|
cache: VideoCache,
|
||||||
pool: DbPool,
|
_pool: DbPool,
|
||||||
sort: String,
|
sort: String,
|
||||||
query: Option<String>,
|
query: Option<String>,
|
||||||
page: String,
|
page: String,
|
||||||
per_page: String,
|
_per_page: String,
|
||||||
options: ServerOptions,
|
options: ServerOptions,
|
||||||
) -> Vec<VideoItem> {
|
) -> Vec<VideoItem> {
|
||||||
let _ = options;
|
let page_num = page.parse::<u8>().unwrap_or(1);
|
||||||
let _ = per_page;
|
|
||||||
let _ = pool; // Ignored in this implementation
|
let result = match query {
|
||||||
let videos: std::result::Result<Vec<VideoItem>, Error> = match query {
|
Some(q) => self.query(cache, page_num, &q, &sort, options).await,
|
||||||
Some(q) => {
|
None => self.get(cache, page_num, &sort, options).await,
|
||||||
self.query(cache, page.parse::<u8>().unwrap_or(1), &q, &sort, options)
|
|
||||||
.await
|
|
||||||
}
|
|
||||||
None => {
|
|
||||||
self.get(cache, page.parse::<u8>().unwrap_or(1), &sort, options)
|
|
||||||
.await
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
match videos {
|
|
||||||
|
match result {
|
||||||
Ok(v) => v,
|
Ok(v) => v,
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
println!("Error fetching videos: {}", e);
|
eprintln!("Error fetching videos: {}", e);
|
||||||
vec![]
|
vec![]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user