diff --git a/src/api.rs b/src/api.rs index 801c314..c0171c2 100644 --- a/src/api.rs +++ b/src/api.rs @@ -16,6 +16,7 @@ use crate::providers::rule34video::Rule34videoProvider; // use crate::providers::spankbang::SpankbangProvider; use crate::util::cache::VideoCache; use crate::util::discord::send_discord_error_report; +use crate::util::proxy::{Proxy, all_proxies_snapshot}; use crate::util::requester::Requester; use crate::{DbPool, db, status::*, videos::*}; use cute::c; @@ -110,6 +111,10 @@ pub fn config(cfg: &mut web::ServiceConfig) { web::resource("/test") .route(web::get().to(test)) ) + .service( + web::resource("/proxies") + .route(web::get().to(proxies)) + ) ; } @@ -1202,3 +1207,25 @@ pub async fn test() -> Result { Ok(web::HttpResponse::Ok()) } + +pub async fn proxies() -> Result { + let proxies = all_proxies_snapshot().await.unwrap_or_default(); + let mut by_protocol: std::collections::BTreeMap> = + std::collections::BTreeMap::new(); + for proxy in proxies { + by_protocol + .entry(proxy.protocol.clone()) + .or_default() + .push(proxy); + } + for proxies in by_protocol.values_mut() { + proxies.sort_by(|a, b| { + a.host + .cmp(&b.host) + .then(a.port.cmp(&b.port)) + .then(a.username.cmp(&b.username)) + .then(a.password.cmp(&b.password)) + }); + } + Ok(web::HttpResponse::Ok().json(&by_protocol)) +} diff --git a/src/providers/hentaihaven.rs b/src/providers/hentaihaven.rs new file mode 100644 index 0000000..0ded2ab --- /dev/null +++ b/src/providers/hentaihaven.rs @@ -0,0 +1,365 @@ +use crate::api::ClientVersion; +use crate::providers::Provider; +use crate::schema::videos::url; +use crate::status::{Channel, ChannelOption, FilterOption}; +use crate::util::cache::VideoCache; +use crate::util::flaresolverr::{FlareSolverrRequest, Flaresolverr}; +use crate::util::parse_abbreviated_number; +use crate::util::time::parse_time_to_seconds; +use crate::videos::{ServerOptions, VideoItem}; +use crate::{DbPool, videos}; +use async_trait::async_trait; +use error_chain::error_chain; +use htmlentity::entity::{ICodedDataTrait, decode}; +use std::env; +use std::vec; +use wreq::{Client, Proxy}; +use wreq_util::Emulation; + +error_chain! { + foreign_links { + Io(std::io::Error); + HttpRequest(wreq::Error); + } +} + +#[derive(Debug, Clone)] +pub struct HentaihavenProvider { + url: String, +} +impl HentaihavenProvider { + pub fn new() -> Self { + HentaihavenProvider { + url: "https://hentaihaven.xxx".to_string(), + } + } + + fn build_channel(&self, clientversion: ClientVersion) -> Channel { + let _ = clientversion; + + Channel { + id: "hentaihaven".to_string(), + name: "WORK IN PROGRESS Hentai Haven".to_string(), + description: "".to_string(), + premium: false, + favicon: "https://www.google.com/s2/favicons?sz=64&domain=hentaihaven.xxx".to_string(), + status: "active".to_string(), + categories: vec![], + options: vec![], + nsfw: true, + cacheDuration: None, + } + } + + async fn get(&self, cache: VideoCache, page: u8, sort: &str) -> Result> { + let video_url = format!("{}/page/{}/?m_orderby={}", self.url, page, sort); + let old_items = match cache.get(&video_url) { + Some((time, items)) => { + if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 { + println!("Cache hit for URL: {}", video_url); + return Ok(items.clone()); + } else { + items.clone() + } + } + None => { + vec![] + } + }; + + let proxy = Proxy::all("http://192.168.0.103:8081").unwrap(); + let client = Client::builder() + .cert_verification(false) + .emulation(Emulation::Firefox136) + .build()?; + + let mut response = client + .get(video_url.clone()) + .proxy(proxy.clone()) + .send() + .await?; + if response.status().is_redirection() { + println!( + "Redirection detected, following to: {}", + response.headers()["Location"].to_str().unwrap() + ); + response = client + .get(response.headers()["Location"].to_str().unwrap()) + // .proxy(proxy) + .send() + .await?; + } + if response.status().is_success() { + let text = response.text().await?; + let video_items: Vec = self.get_video_items_from_html(text.clone()); + if !video_items.is_empty() { + cache.remove(&video_url); + cache.insert(video_url.clone(), video_items.clone()); + } else { + return Ok(old_items); + } + Ok(video_items) + } else { + let flare_url = env::var("FLARE_URL").expect("FLARE_URL not set"); + let flare = Flaresolverr::new(flare_url); + let result = flare + .solve(FlareSolverrRequest { + cmd: "request.get".to_string(), + url: video_url.clone(), + maxTimeout: 60000, + }) + .await; + let video_items = match result { + Ok(res) => { + // println!("FlareSolverr response: {}", res); + self.get_video_items_from_html(res.solution.response) + } + Err(e) => { + println!("Error solving FlareSolverr: {}", e); + return Err("Failed to solve FlareSolverr".into()); + } + }; + if !video_items.is_empty() { + cache.remove(&video_url); + cache.insert(video_url.clone(), video_items.clone()); + } else { + return Ok(old_items); + } + Ok(video_items) + } + } + async fn query(&self, cache: VideoCache, page: u8, query: &str) -> Result> { + let search_string = query.to_lowercase().trim().replace(" ", "-"); + let mut video_url = format!("{}/search/{}/{}/", self.url, search_string, page); + + if search_string.starts_with("@") { + let url_part = search_string.split("@").collect::>()[1].replace(":", "/"); + video_url = format!("{}/{}/", self.url, url_part); + } + // Check our Video Cache. If the result is younger than 1 hour, we return it. + let old_items = match cache.get(&video_url) { + Some((time, items)) => { + if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 { + return Ok(items.clone()); + } else { + let _ = cache.check().await; + return Ok(items.clone()); + } + } + None => { + vec![] + } + }; + + let proxy = Proxy::all("http://192.168.0.103:8081").unwrap(); + let client = Client::builder() + .cert_verification(false) + .emulation(Emulation::Firefox136) + .build()?; + + let mut response = client + .get(video_url.clone()) + .proxy(proxy.clone()) + .send() + .await?; + + if response.status().is_redirection() { + response = client + .get(self.url.clone() + response.headers()["Location"].to_str().unwrap()) + // .proxy(proxy) + .send() + .await?; + } + + if response.status().is_success() { + let text = response.text().await?; + let video_items: Vec = self.get_video_items_from_html(text.clone()); + if !video_items.is_empty() { + cache.remove(&video_url); + cache.insert(video_url.clone(), video_items.clone()); + } else { + return Ok(old_items); + } + Ok(video_items) + } else { + let flare_url = env::var("FLARE_URL").expect("FLARE_URL not set"); + let flare = Flaresolverr::new(flare_url); + let result = flare + .solve(FlareSolverrRequest { + cmd: "request.get".to_string(), + url: video_url.clone(), + maxTimeout: 60000, + }) + .await; + let video_items = match result { + Ok(res) => self.get_video_items_from_html(res.solution.response), + Err(e) => { + println!("Error solving FlareSolverr: {}", e); + return Err("Failed to solve FlareSolverr".into()); + } + }; + if !video_items.is_empty() { + cache.remove(&video_url); + cache.insert(video_url.clone(), video_items.clone()); + } else { + return Ok(old_items); + } + Ok(video_items) + } + } + + fn get_video_items_from_html(&self, html: String) -> Vec { + if html.is_empty() { + println!("HTML is empty"); + return vec![]; + } + let mut items: Vec = Vec::new(); + let raw_videos = html.split("\"wp-pagenavi\"").collect::>()[0] + .split("page-item-detail video") + .collect::>()[1..] + .to_vec(); + for video_segment in &raw_videos { + let vid = video_segment.split("\n").collect::>(); + for (index, line) in vid.iter().enumerate() { + println!("Line {}: {}", index, line); + } + + let episode_count = video_segment + .split("chapter font-meta") + .collect::>()[1] + .split("class=\"btn-link\">") + .collect::>()[1] + .split("<") + .collect::>()[0] + .split(" ") + .collect::>()[2] + .to_string() + .parse::() + .unwrap(); + let season = video_segment + .split("chapter font-meta") + .collect::>()[1] + .split("class=\"btn-link\">") + .collect::>()[1] + .split("<") + .collect::>()[0] + .split(" ") + .collect::>()[1] + == "Season"; + let mut url_part_list = video_segment + .split("chapter font-meta") + .collect::>()[1] + .split("href=\"") + .collect::>()[1] + .split("\"") + .collect::>()[0] + .split("/") + .collect::>()[4] + .split("-") + .collect::>(); + if url_part_list.len() > 5 { + if let Some(pos) = url_part_list.iter().rposition(|x| *x == "no") { + url_part_list.remove(pos); + } + } + url_part_list.truncate(5); + let url_part = url_part_list.join("-"); + for i in 1..=episode_count { + let mut video_url = format!( + "https://master-lengs.org/api/v3/hh/{}-{}-eng/master.m3u8", + url_part, i + ); + if season { + video_url = format!( + "https://master-lengs.org/api/v3/hh/{}-season-eng/master.m3u8", + url_part + ); + } + let title = format!( + "{} - {}", + video_segment.split("title=\"").collect::>()[1] + .split("\"") + .collect::>()[0] + .to_string(), + i + ); + let id = format!("{}-{}", url_part, i); + + let thumb = match video_segment.split(">()[1] + .split("") + .collect::>()[0] + .contains("data-src=\"") + { + true => video_segment.split(">()[1] + .split("data-src=\"") + .collect::>()[1] + .split("\"") + .collect::>()[0] + .replace(" ", "%20") + .to_string(), + false => video_segment.split(">()[1] + .split("src=\"") + .collect::>()[1] + .split("\"") + .collect::>()[0] + .replace(" ", "%20") + .to_string(), + }; + items.push( + VideoItem::new( + id, + title, + video_url.clone(), + "hentaihaven".to_string(), + thumb, + 0, // duration is not available + ) + .formats(vec![videos::VideoFormat::new( + video_url.clone(), + "1080".to_string(), + "m3u8".to_string(), + )]) + .aspect_ratio(0.73), + ); + } + } + return items; + //return items; + } +} + +#[async_trait] +impl Provider for HentaihavenProvider { + async fn get_videos( + &self, + cache: VideoCache, + pool: DbPool, + sort: String, + query: Option, + page: String, + per_page: String, + options: ServerOptions, + ) -> Vec { + let _ = options; + let _ = per_page; + let _ = pool; + let videos: std::result::Result, Error> = match query { + Some(q) => self.query(cache, page.parse::().unwrap_or(1), &q).await, + None => { + self.get(cache, page.parse::().unwrap_or(1), &sort) + .await + } + }; + match videos { + Ok(v) => v, + Err(e) => { + println!("Error fetching videos: {}", e); + vec![] + } + } + } + + fn get_channel(&self, clientversion: ClientVersion) -> Option { + Some(self.build_channel(clientversion)) + } +} diff --git a/src/providers/mod.rs b/src/providers/mod.rs index 6f5c147..dc3c0eb 100644 --- a/src/providers/mod.rs +++ b/src/providers/mod.rs @@ -42,6 +42,8 @@ pub mod pimpbunny; pub mod javtiful; pub mod hypnotube; pub mod freepornvideosxxx; +pub mod hentaihaven; + // convenient alias pub type DynProvider = Arc; @@ -60,6 +62,7 @@ pub static ALL_PROVIDERS: Lazy> = Lazy::new(| m.insert("javtiful", Arc::new(javtiful::JavtifulProvider::new()) as DynProvider); m.insert("hypnotube", Arc::new(hypnotube::HypnotubeProvider::new()) as DynProvider); m.insert("freepornvideosxxx", Arc::new(freepornvideosxxx::FreepornvideosxxxProvider::new()) as DynProvider); + m.insert("hentaihaven", Arc::new(hentaihaven::HentaihavenProvider::new()) as DynProvider); // add more here as you migrate them m }); diff --git a/src/util/mod.rs b/src/util/mod.rs index f7e61e4..399e9d1 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -3,6 +3,7 @@ pub mod flaresolverr; pub mod cache; pub mod requester; pub mod discord; +pub mod proxy; pub fn parse_abbreviated_number(s: &str) -> Option { let s = s.trim(); @@ -42,4 +43,4 @@ pub fn interleave(lists: &[Vec]) -> Vec { } result -} \ No newline at end of file +} diff --git a/src/util/proxy.rs b/src/util/proxy.rs new file mode 100644 index 0000000..8ab17c4 --- /dev/null +++ b/src/util/proxy.rs @@ -0,0 +1,311 @@ +use std::fmt; +use std::sync::Arc; +use std::str::FromStr; + +use serde::Serialize; +use tokio::sync::{OnceCell, RwLock}; +use tokio::time::{self, Duration}; +use url::Url; +use wreq::Proxy as WreqProxy; +use wreq::{Client, Version}; + +use crate::util::requester::Requester; + +pub static ALL_PROXIES: OnceCell>>> = OnceCell::const_new(); +static ALL_PROXY_KEYS: OnceCell>>> = + OnceCell::const_new(); + +const PROXY_LIST: [&str; 3] = [ + "https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/socks5.txt", + "https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/http.txt", + "https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/socks4.txt", +]; +const IFCONFIG_URL: &str = "https://ifconfig.co"; + +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +pub struct Proxy { + pub protocol: String, + pub host: String, + pub port: u16, + pub username: Option, + pub password: Option, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ProxyParseError { + message: String, +} + +impl ProxyParseError { + fn new(message: impl Into) -> Self { + Self { + message: message.into(), + } + } +} + +pub fn all_proxies_handle() -> Option>>> { + ALL_PROXIES.get().cloned() +} + +pub async fn all_proxies_snapshot() -> Option> { + let handle = ALL_PROXIES.get()?.clone(); + let proxies = handle.read().await; + Some(proxies.clone()) +} + +impl fmt::Display for ProxyParseError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.message) + } +} + +impl std::error::Error for ProxyParseError {} + +impl fmt::Display for Proxy { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match (&self.username, &self.password) { + (Some(username), Some(password)) => write!( + f, + "{}://{}:{}@{}:{}", + self.protocol, username, password, self.host, self.port + ), + (Some(username), None) => write!( + f, + "{}://{}@{}:{}", + self.protocol, username, self.host, self.port + ), + (None, Some(_)) => write!( + f, + "{}://{}:{}", + self.protocol, self.host, self.port + ), + (None, None) => write!(f, "{}://{}:{}", self.protocol, self.host, self.port), + } + } +} + +impl Proxy { + pub fn parse(input: &str) -> Result { + input.parse() + } + + pub fn to_wreq_proxy(&self) -> Result { + let base_url = format!("{}://{}:{}", self.protocol, self.host, self.port); + let proxy_url = Url::parse(&base_url) + .map_err(|e| ProxyParseError::new(format!("invalid proxy url: {e}")))?; + + let mut proxy = WreqProxy::all(proxy_url.as_str()) + .map_err(|e| ProxyParseError::new(format!("failed to build wreq proxy: {e}")))?; + + if let Some(username) = &self.username { + let password = self.password.as_deref().unwrap_or(""); + proxy = proxy.basic_auth(username, password); + } + + Ok(proxy) + } +} + +pub async fn fetch_proxies( + requester: &mut Requester, + proxy_list_url: &str, + proxy_sink: Arc>>, +) -> Result { + let default_protocol = protocol_from_list_url(proxy_list_url)?; + let body = requester + .get(proxy_list_url, None) + .await + .map_err(|e| ProxyParseError::new(format!("failed to fetch proxy list: {e}")))?; + let mut tasks = tokio::task::JoinSet::new(); + for (line_index, line) in body.lines().enumerate() { + let trimmed = line.trim(); + if trimmed.is_empty() { + continue; + } + + let proxy_input = if trimmed.contains("://") { + trimmed.to_string() + } else { + format!("{default_protocol}://{trimmed}") + }; + + let proxy = Proxy::parse(&proxy_input).map_err(|e| { + ProxyParseError::new(format!("invalid proxy on line {}: {e}", line_index + 1)) + })?; + + tasks.spawn(async move { verify_proxy(proxy).await }); + } + + let mut added = 0usize; + while let Some(result) = tasks.join_next().await { + match result { + Ok(Ok(proxy)) => { + if let Some(proxy_keys) = ALL_PROXY_KEYS.get() { + let key = proxy_key(&proxy); + let mut keys = proxy_keys.write().await; + if !keys.insert(key) { + continue; + } + let mut proxies = proxy_sink.write().await; + proxies.push(proxy); + added += 1; + } + } + Ok(Err(err)) => { + // eprintln!("Proxy verification failed: {err}"); + } + Err(err) => { + eprintln!("Proxy verification task failed: {err}"); + } + } + } + + Ok(added) +} + +fn protocol_from_list_url(list_url: &str) -> Result<&'static str, ProxyParseError> { + if list_url.contains("http.txt") { + Ok("http") + } else if list_url.contains("socks4.txt") { + Ok("socks4") + } else if list_url.contains("socks5.txt") { + Ok("socks5") + } else { + Err(ProxyParseError::new(format!( + "unknown proxy list protocol for url: {list_url}" + ))) + } +} + +async fn verify_proxy(proxy: Proxy) -> Result { + let wreq_proxy = proxy.to_wreq_proxy()?; + let client = Client::builder() + .cert_verification(false) + .build() + .map_err(|e| ProxyParseError::new(format!("failed to build http client: {e}")))?; + + let response = client + .get(IFCONFIG_URL) + .version(Version::HTTP_11) + .proxy(wreq_proxy) + .timeout(Duration::from_secs(5)) + .send() + .await + .map_err(|e| ProxyParseError::new(format!("proxy request failed: {e}")))?; + + if response.status().is_success() { + Ok(proxy) + } else { + Err(ProxyParseError::new(format!( + "proxy returned status {}", + response.status() + ))) + } +} + +pub fn init_all_proxies_background(requester: Requester) { + if ALL_PROXIES.get().is_some() { + return; + } + + if tokio::runtime::Handle::try_current().is_err() { + eprintln!("Skipping proxy list init: no Tokio runtime available"); + return; + } + + let proxy_cache = Arc::new(RwLock::new(Vec::new())); + let proxy_keys = Arc::new(RwLock::new(std::collections::HashSet::new())); + if ALL_PROXIES.set(proxy_cache.clone()).is_err() { + return; + } + let _ = ALL_PROXY_KEYS.set(proxy_keys); + + tokio::spawn(async move { + let mut interval = time::interval(Duration::from_secs(60 * 60)); + loop { + let mut tasks = tokio::task::JoinSet::new(); + for list in PROXY_LIST { + let proxy_cache = proxy_cache.clone(); + let mut requester = requester.clone(); + tasks + .spawn(async move { fetch_proxies(&mut requester, list, proxy_cache).await }); + } + + while let Some(result) = tasks.join_next().await { + match result { + Ok(Ok(_added)) => {} + Ok(Err(err)) => { + eprintln!("Failed to fetch proxy list: {err}"); + } + Err(err) => { + eprintln!("Proxy list task failed: {err}"); + } + } + } + + interval.tick().await; + } + }); +} + +impl FromStr for Proxy { + type Err = ProxyParseError; + + fn from_str(input: &str) -> Result { + let trimmed = input.trim(); + if trimmed.is_empty() { + return Err(ProxyParseError::new("proxy string is empty")); + } + + let with_scheme = if trimmed.contains("://") { + trimmed.to_string() + } else { + format!("http://{trimmed}") + }; + + let url = Url::parse(&with_scheme) + .map_err(|e| ProxyParseError::new(format!("invalid proxy url: {e}")))?; + if !(url.path().is_empty() || url.path() == "/") + || url.query().is_some() + || url.fragment().is_some() + { + println!("Parsed proxy URL: {:?}", url); + return Err(ProxyParseError::new(format!( + "proxy url must not include path, query, or fragment: {:?}", + input + ))); + } + + let host = url + .host_str() + .ok_or_else(|| ProxyParseError::new("proxy url is missing host"))? + .to_string(); + + let port = url + .port() + .unwrap_or(80); + + Ok(Proxy { + protocol: url.scheme().to_string(), + host, + port, + username: match url.username() { + "" => None, + username => Some(username.to_string()), + }, + password: url.password().map(|password| password.to_string()), + }) + } +} + +fn proxy_key(proxy: &Proxy) -> String { + format!( + "{}://{}:{}@{}:{}", + proxy.protocol, + proxy.username.as_deref().unwrap_or(""), + proxy.password.as_deref().unwrap_or(""), + proxy.host, + proxy.port + ) +} diff --git a/src/util/requester.rs b/src/util/requester.rs index c0efdd3..93f6e3c 100644 --- a/src/util/requester.rs +++ b/src/util/requester.rs @@ -8,7 +8,9 @@ use wreq::Version; use wreq::header::HeaderValue; use wreq::redirect::Policy; use wreq_util::Emulation; +use rand::seq::SliceRandom; +use crate::util::proxy; use crate::util::flaresolverr::FlareSolverrRequest; use crate::util::flaresolverr::Flaresolverr; @@ -21,6 +23,7 @@ pub struct Requester { client: Client, proxy: bool, flaresolverr_session: Option, + use_random_proxy: bool, } impl Requester { @@ -33,11 +36,16 @@ impl Requester { .build() .expect("Failed to create HTTP client"); - Requester { + let requester = Requester { client, proxy: false, flaresolverr_session: None, - } + use_random_proxy: false, + }; + + proxy::init_all_proxies_background(requester.clone()); + + requester } pub fn set_proxy(&mut self, proxy: bool) { @@ -47,6 +55,10 @@ impl Requester { self.proxy = proxy; } + pub fn set_random_proxy(&mut self, random: bool) { + self.use_random_proxy = random; + } + pub async fn get_raw(&mut self, url: &str) -> Result { let client = Client::builder() .cert_verification(false) @@ -67,6 +79,26 @@ impl Requester { request.send().await } + pub async fn get_raw_with_proxy( + &mut self, + url: &str, + proxy: Proxy, + ) -> Result { + let client = Client::builder() + .cert_verification(false) + .emulation(Emulation::Firefox136) + .cookie_store(true) + .build() + .expect("Failed to create HTTP client"); + + client + .get(url) + .version(Version::HTTP_11) + .proxy(proxy) + .send() + .await + } + pub async fn get_raw_with_headers( &mut self, url: &str, @@ -185,7 +217,14 @@ pub async fn post_json( let proxy = Proxy::all(&proxy_url).unwrap(); request = request.proxy(proxy); } - } + } + // else if self.use_random_proxy { + // let proxies = proxy::all_proxies_snapshot().await.unwrap_or_default(); + // if !proxies.is_empty() { + // let mut random_proxy = proxies.choose_mut(&mut rand::thread_rng()).unwrap().clone(); + // request = request.proxy(random_proxy); + // } + // } let response = request.send().await?; if response.status().is_success() || response.status().as_u16() == 404 { return Ok(response.text().await?);