diff --git a/src/providers/porndish.rs b/src/providers/porndish.rs index 6941288..3dddaac 100644 --- a/src/providers/porndish.rs +++ b/src/providers/porndish.rs @@ -13,9 +13,9 @@ use futures::stream::{self, StreamExt}; use htmlentity::entity::{ICodedDataTrait, decode}; use regex::Regex; use scraper::{ElementRef, Html, Selector}; -use std::process::Command; use std::sync::{Arc, RwLock}; use std::thread; +use std::time::{SystemTime, UNIX_EPOCH}; error_chain! { foreign_links { @@ -263,52 +263,22 @@ impl PorndishProvider { ); } - async fn fetch_with_curl_cffi(url: &str, referer: Option<&str>) -> Result { - let url = url.to_string(); - let referer = referer.unwrap_or("").to_string(); - - let output = tokio::task::spawn_blocking(move || { - Command::new("python3") - .arg("-c") - .arg( - r#" -import sys -from curl_cffi import requests - -url = sys.argv[1] -referer = sys.argv[2] if len(sys.argv) > 2 else "" -headers = {"Referer": referer} if referer else {} -response = requests.get( - url, - impersonate="chrome", - timeout=30, - allow_redirects=True, - headers=headers, -) -if response.status_code >= 400: - sys.stderr.write(f"status={response.status_code} url={response.url}\n") - sys.exit(1) -sys.stdout.buffer.write(response.content) -"#, - ) - .arg(url) - .arg(referer) - .output() - }) - .await - .map_err(|error| Error::from(format!("spawn_blocking failed: {error}")))? - .map_err(|error| Error::from(format!("python3 execution failed: {error}")))?; - - if !output.status.success() { - let stderr = String::from_utf8_lossy(&output.stderr).trim().to_string(); - return Err(Error::from(format!("curl_cffi request failed: {stderr}"))); - } - - Ok(String::from_utf8_lossy(&output.stdout).to_string()) + fn request_headers(referer: Option<&str>) -> Vec<(String, String)> { + let referer = referer + .filter(|referer| !referer.is_empty()) + .unwrap_or("https://www.porndish.com/"); + vec![("Referer".to_string(), referer.to_string())] } - async fn fetch_html(url: &str) -> Result { - Self::fetch_with_curl_cffi(url, None).await + async fn fetch_html( + requester: &mut Requester, + url: &str, + referer: Option<&str>, + ) -> Result { + requester + .get_with_headers(url, Self::request_headers(referer), None) + .await + .map_err(|error| Error::from(format!("request failed: {error}"))) } async fn load_filters( @@ -317,6 +287,7 @@ sys.stdout.buffer.write(response.content) tags: Arc>>, uploaders: Arc>>, ) -> Result<()> { + let mut requester = Requester::new(); let link_selector = Self::selector("a[href]")?; let article_selector = Self::selector("article.entry-tpl-grid, article.post")?; let pages = vec![ @@ -328,7 +299,7 @@ sys.stdout.buffer.write(response.content) ]; for url in pages { - let html = match Self::fetch_html(&url).await { + let html = match Self::fetch_html(&mut requester, &url, None).await { Ok(html) => html, Err(error) => { report_provider_error_background( @@ -648,59 +619,64 @@ sys.stdout.buffer.write(response.content) Ok(fragments) } - async fn resolve_myvidplay_stream(&self, iframe_url: &str) -> Result { - let iframe_url = iframe_url.to_string(); - let output = tokio::task::spawn_blocking(move || { - Command::new("python3") - .arg("-c") - .arg( - r#" -import re -import sys -import time -from curl_cffi import requests + async fn resolve_myvidplay_stream( + &self, + requester: &mut Requester, + iframe_url: &str, + referer: &str, + ) -> Result { + let html = Self::fetch_html(requester, iframe_url, Some(referer)).await?; + let pass_regex = Self::regex(r#"\$\.get\(\s*['"](/pass_md5/[^'"]+)['"]"#)?; + let path = pass_regex + .captures(&html) + .and_then(|captures| captures.get(1).map(|value| value.as_str().to_string())) + .ok_or_else(|| Error::from("myvidplay resolution failed: missing pass_md5 path"))?; -iframe_url = sys.argv[1] -session = requests.Session(impersonate="chrome") -html = session.get(iframe_url, timeout=30).text -match = re.search(r"\$\.get\(\s*['\"](/pass_md5/[^'\"]+)['\"]", html) -if not match: - sys.stderr.write("missing pass_md5 path\n") - sys.exit(1) -path = match.group(1) -token = path.rstrip("/").split("/")[-1] -if not token: - sys.stderr.write("missing pass_md5 token\n") - sys.exit(1) -if path.startswith("http://") or path.startswith("https://"): - pass_url = path -else: - pass_url = "/".join(iframe_url.split("/")[:3]) + path -base = session.get(pass_url, headers={"Referer": iframe_url}, timeout=30).text.strip() -if not base or base == "RELOAD" or not base.startswith("http"): - sys.stderr.write(f"unusable pass_md5 response: {base[:120]}\n") - sys.exit(1) -chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" -now = int(time.time() * 1000) -suffix = "".join(chars[(now + i * 17) % len(chars)] for i in range(10)) -sys.stdout.write(f"{base}{suffix}?token={token}&expiry={now}") -"#, - ) - .arg(iframe_url) - .output() - }) - .await - .map_err(|error| Error::from(format!("spawn_blocking failed: {error}")))? - .map_err(|error| Error::from(format!("python3 execution failed: {error}")))?; + let token = path + .trim_end_matches('/') + .rsplit('/') + .next() + .unwrap_or_default() + .to_string(); + if token.is_empty() { + return Err(Error::from( + "myvidplay resolution failed: missing pass_md5 token".to_string(), + )); + } - if !output.status.success() { - let stderr = String::from_utf8_lossy(&output.stderr).trim().to_string(); + let pass_url = if path.starts_with("http://") || path.starts_with("https://") { + path + } else { + let base = url::Url::parse(iframe_url) + .map_err(|error| Error::from(format!("invalid iframe url: {error}")))?; + base.join(&path) + .map_err(|error| Error::from(format!("invalid pass_md5 url: {error}")))? + .to_string() + }; + + let base = Self::fetch_html(requester, &pass_url, Some(iframe_url)) + .await? + .trim() + .to_string(); + if base.is_empty() || base == "RELOAD" || !base.starts_with("http") { return Err(Error::from(format!( - "myvidplay resolution failed: {stderr}" + "myvidplay resolution failed: unusable pass_md5 response: {}", + &base.chars().take(120).collect::() ))); } - let resolved = String::from_utf8_lossy(&output.stdout).trim().to_string(); + let chars = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"; + let now = SystemTime::now() + .duration_since(UNIX_EPOCH) + .map_err(|error| Error::from(format!("time error: {error}")))? + .as_millis(); + let suffix = (0..10) + .map(|index| { + let pos = ((now + (index as u128 * 17)) % chars.len() as u128) as usize; + chars[pos] as char + }) + .collect::(); + let resolved = format!("{base}{suffix}?token={token}&expiry={now}"); if resolved.is_empty() || !resolved.starts_with("http") { return Err(Error::from( "myvidplay resolution returned empty url".to_string(), @@ -725,7 +701,7 @@ sys.stdout.write(f"{base}{suffix}?token={token}&expiry={now}") html: &str, page_url: &str, options: &ServerOptions, - _requester: &mut Requester, + requester: &mut Requester, ) -> Result { let ( parsed_title, @@ -864,7 +840,10 @@ sys.stdout.write(f"{base}{suffix}?token={token}&expiry={now}") }); if iframe_url.contains("myvidplay.com") { - match self.resolve_myvidplay_stream(&iframe_url).await { + match self + .resolve_myvidplay_stream(requester, &iframe_url, page_url) + .await + { Ok(stream_url) => { item.url = stream_url.clone(); let mut format = VideoFormat::new( @@ -919,7 +898,7 @@ sys.stdout.write(f"{base}{suffix}?token={token}&expiry={now}") None => Requester::new(), }; - let html = match Self::fetch_with_curl_cffi(&page_url, None).await { + let html = match Self::fetch_html(&mut requester, &page_url, None).await { Ok(html) => html, Err(error) => { report_provider_error_background( @@ -959,10 +938,10 @@ sys.stdout.write(f"{base}{suffix}?token={token}&expiry={now}") } } - let _requester = + let mut requester = crate::providers::requester_or_default(options, "porndish", "missing_requester"); - let html = match Self::fetch_with_curl_cffi(&url, None).await { + let html = match Self::fetch_html(&mut requester, &url, None).await { Ok(html) => html, Err(error) => { report_provider_error( diff --git a/src/proxies/porndishthumb.rs b/src/proxies/porndishthumb.rs index 61e88d1..6e9e04b 100644 --- a/src/proxies/porndishthumb.rs +++ b/src/proxies/porndishthumb.rs @@ -1,15 +1,14 @@ -use ntex::http::header::CONTENT_TYPE; +use ntex::http::header::{CONTENT_LENGTH, CONTENT_TYPE}; use ntex::{ http::Response, web::{self, HttpRequest, error}, }; -use std::process::Command; use crate::util::requester::Requester; pub async fn get_image( req: HttpRequest, - _requester: web::types::State, + requester: web::types::State, ) -> Result { let endpoint = req.match_info().query("endpoint").to_string(); let image_url = if endpoint.starts_with("http://") || endpoint.starts_with("https://") { @@ -18,45 +17,41 @@ pub async fn get_image( format!("https://{}", endpoint.trim_start_matches('/')) }; - let output = tokio::task::spawn_blocking(move || { - Command::new("python3") - .arg("-c") - .arg( - r#" -import sys -from curl_cffi import requests + let upstream = match requester + .get_ref() + .clone() + .get_raw_with_headers( + image_url.as_str(), + vec![( + "Referer".to_string(), + "https://www.porndish.com/".to_string(), + )], + ) + .await + { + Ok(response) => response, + Err(_) => return Ok(web::HttpResponse::NotFound().finish()), + }; -url = sys.argv[1] -response = requests.get( - url, - impersonate="chrome", - timeout=30, - allow_redirects=True, - headers={"Referer": "https://www.porndish.com/"}, -) -if response.status_code >= 400: - sys.stderr.write(f"status={response.status_code}\n") - sys.exit(1) -sys.stderr.write(response.headers.get("content-type", "application/octet-stream")) -sys.stdout.buffer.write(response.content) -"#, - ) - .arg(image_url) - .output() - }) - .await - .map_err(error::ErrorBadGateway)? - .map_err(error::ErrorBadGateway)?; + let status = upstream.status(); + let headers = upstream.headers().clone(); + let bytes = upstream.bytes().await.map_err(error::ErrorBadGateway)?; - if !output.status.success() { + if !status.is_success() { return Ok(web::HttpResponse::NotFound().finish()); } - let content_type = String::from_utf8_lossy(&output.stderr).trim().to_string(); - let mut resp = Response::build(ntex::http::StatusCode::OK); - if !content_type.is_empty() { - resp.set_header(CONTENT_TYPE, content_type); + let mut resp = Response::build(status); + if let Some(ct) = headers.get(CONTENT_TYPE) { + if let Ok(ct_str) = ct.to_str() { + resp.set_header(CONTENT_TYPE, ct_str); + } + } + if let Some(cl) = headers.get(CONTENT_LENGTH) { + if let Ok(cl_str) = cl.to_str() { + resp.set_header(CONTENT_LENGTH, cl_str); + } } - Ok(resp.body(output.stdout)) + Ok(resp.body(bytes.to_vec())) } diff --git a/supervisord/hottub.sh b/supervisord/hottub.sh index 6499495..d5001cf 100644 --- a/supervisord/hottub.sh +++ b/supervisord/hottub.sh @@ -1 +1,5 @@ -/app/target/release/hottub \ No newline at end of file +#!/usr/bin/env bash +set -euo pipefail + +cd /app +exec cargo run --release