use crate::util::parse_abbreviated_number; use crate::DbPool; use crate::providers::Provider; use crate::util::cache::VideoCache; use crate::util::flaresolverr::{FlareSolverrRequest, Flaresolverr}; use crate::util::time::parse_time_to_seconds; use crate::videos::{VideoItem}; use error_chain::error_chain; use futures::stream::SplitSink; use htmlentity::entity::{ICodedDataTrait, decode}; use std::env; use std::vec; use wreq::{Client, Proxy}; use wreq_util::Emulation; use std::time::{SystemTime, UNIX_EPOCH}; error_chain! { foreign_links { Io(std::io::Error); HttpRequest(wreq::Error); } } #[derive(Debug, Clone)] pub struct Rule34videoProvider { url: String, } impl Rule34videoProvider { pub fn new() -> Self { Rule34videoProvider { url: "https://rule34video.com".to_string(), } } async fn get( &self, cache: VideoCache, page: u8, sort: &str, ) -> Result> { let now = SystemTime::now() .duration_since(UNIX_EPOCH) .expect("Time went backwards"); let timestamp_millis = now.as_millis(); // u128 let expected_sorts = vec!["post_date", "video_viewed", "rating", "duration", "pseudo_random"]; let sort = if expected_sorts.contains(&sort) { sort } else { "post_date" }; let index = format!("rule34video:{}:{}", page, sort); let url = format!("{}/?mode=async&function=get_block&block_id=custom_list_videos_most_recent_videos&tag_ids=&sort_by={}&from={}&_={}", self.url, sort, page, timestamp_millis); let mut old_items: Vec = vec![]; if !(sort == "pseudo_random") { old_items = match cache.get(&index) { Some((time, items)) => { if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 { println!("Cache hit for URL: {}", url); return Ok(items.clone()); } else { items.clone() } } None => { vec![] } }; } let proxy = Proxy::all("http://192.168.0.103:8081").unwrap(); let client = Client::builder().cert_verification(false).emulation(Emulation::Firefox136).build()?; let mut response = client.get(url.clone()).proxy(proxy.clone()).send().await?; while response.status().is_redirection(){ response = client.get(self.url.clone() + response.headers()["Location"].to_str().unwrap()).proxy(proxy.clone()).send().await?; } if response.status().is_success() { let text = response.text().await?; let video_items: Vec = self.get_video_items_from_html(text.clone()); if !video_items.is_empty() { cache.remove(&url); cache.insert(url.clone(), video_items.clone()); } else { return Ok(old_items); } Ok(video_items) } else { let flare_url = env::var("FLARE_URL").expect("FLARE_URL not set"); let flare = Flaresolverr::new(flare_url); let result = flare .solve(FlareSolverrRequest { cmd: "request.get".to_string(), url: url.clone(), maxTimeout: 60000, }) .await; let video_items = match result { Ok(res) => { // println!("FlareSolverr response: {}", res); self.get_video_items_from_html(res.solution.response) } Err(e) => { println!("Error solving FlareSolverr: {}", e); return Err("Failed to solve FlareSolverr".into()); } }; if !video_items.is_empty() { cache.remove(&url); cache.insert(url.clone(), video_items.clone()); } else { return Ok(old_items); } Ok(video_items) } } async fn query( &self, cache: VideoCache, page: u8, query: &str, sort: &str, ) -> Result> { let now = SystemTime::now() .duration_since(UNIX_EPOCH) .expect("Time went backwards"); let timestamp_millis = now.as_millis(); // u128 let expected_sorts = vec!["post_date", "video_viewed", "rating", "duration", "pseudo_random"]; let sort = if expected_sorts.contains(&sort) { sort } else { "post_date" }; let index = format!("rule34video:{}:{}:{}", page, sort, query); let url = format!("{}/search/{}/?mode=async&function=get_block&block_id=custom_list_videos_videos_list_search&tag_ids=&sort_by={}&from={}&_={}", self.url, query, sort, page, timestamp_millis); // Check our Video Cache. If the result is younger than 1 hour, we return it. let old_items = match cache.get(&index) { Some((time, items)) => { if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 { return Ok(items.clone()); } else { let _ = cache.check().await; return Ok(items.clone()); } } None => { vec![] } }; let proxy = Proxy::all("http://192.168.0.103:8081").unwrap(); let client = Client::builder().cert_verification(false).emulation(Emulation::Firefox136).build()?; let mut response = client.get(url.clone()).proxy(proxy.clone()).send().await?; if response.status().is_redirection(){ response = client.get(self.url.clone() + response.headers()["Location"].to_str().unwrap()).proxy(proxy).send().await?; } if response.status().is_success() { let text = response.text().await?; let video_items: Vec = self.get_video_items_from_html(text.clone()); if !video_items.is_empty() { cache.remove(&url); cache.insert(url.clone(), video_items.clone()); } else { return Ok(old_items); } Ok(video_items) } else { let flare_url = env::var("FLARE_URL").expect("FLARE_URL not set"); let flare = Flaresolverr::new(flare_url); let result = flare .solve(FlareSolverrRequest { cmd: "request.get".to_string(), url: url.clone(), maxTimeout: 60000, }) .await; let video_items = match result { Ok(res) => self.get_video_items_from_html(res.solution.response), Err(e) => { println!("Error solving FlareSolverr: {}", e); return Err("Failed to solve FlareSolverr".into()); } }; if !video_items.is_empty() { cache.remove(&url); cache.insert(url.clone(), video_items.clone()); } else { return Ok(old_items); } Ok(video_items) } } fn get_video_items_from_html(&self, html: String) -> Vec { if html.is_empty() { println!("HTML is empty"); return vec![]; } let mut items: Vec = Vec::new(); let video_listing_content = html.split("
>()[1].split("
>()[0].to_string(); let raw_videos = video_listing_content .split("
>()[1..] .to_vec(); for video_segment in &raw_videos { // let vid = video_segment.split("\n").collect::>()[1] // for (index, line) in vid.iter().enumerate() { // println!("Line {}: {}", index, line); // } if video_segment.contains("https://rule34video.com/images/advertisements"){ continue; } let mut title = video_segment.split("
").collect::>()[1] .split("<") .collect::>()[0] .to_string(); // html decode title = decode(title.as_bytes()).to_string().unwrap_or(title); let id = video_segment.split("https://rule34video.com/video/").collect::>()[1].split("/").collect::>()[0].to_string(); let raw_duration = video_segment.split("
").collect::>()[1] .split("<") .collect::>()[0] .to_string(); let duration = parse_time_to_seconds(&raw_duration).unwrap_or(0) as u32; let views = parse_abbreviated_number(&video_segment .split("