swap to curl-cffi

This commit is contained in:
Simon
2026-03-17 08:41:48 +00:00
parent 9172941ac6
commit 3a2e77436e

View File

@@ -1,28 +1,137 @@
use base64::Engine;
use futures::TryStreamExt;
use ntex::http::header::{CONTENT_TYPE, COOKIE, HeaderMap, HeaderName, HeaderValue, USER_AGENT};
use ntex::http::StatusCode;
use serde::Serialize; use serde::Serialize;
use std::env; use std::env;
use std::fmt; use std::fmt;
use std::io::Write;
use std::process::{Command, Stdio};
use std::sync::Arc; use std::sync::Arc;
use wreq::Client;
use wreq::Proxy;
use wreq::Response;
use wreq::Version; use wreq::Version;
use wreq::cookie::Jar; use wreq::cookie::{CookieStore, Cookies, Jar};
use wreq::header::{HeaderMap, HeaderValue, USER_AGENT};
use wreq::multipart::Form; use wreq::multipart::Form;
use wreq::redirect::Policy; use wreq::Uri;
use wreq_util::Emulation;
use crate::util::flaresolverr::FlareSolverrRequest; use crate::util::flaresolverr::FlareSolverrRequest;
use crate::util::flaresolverr::Flaresolverr; use crate::util::flaresolverr::Flaresolverr;
use crate::util::proxy; use crate::util::proxy;
// A Send + Sync error type for all async paths
type AnyErr = Box<dyn std::error::Error + Send + Sync + 'static>; type AnyErr = Box<dyn std::error::Error + Send + Sync + 'static>;
const CURL_CFFI_SCRIPT: &str = r#"
import base64
import json
import sys
from curl_cffi import requests
def main():
payload = json.load(sys.stdin)
headers = {k: v for k, v in payload.get("headers", [])}
body_b64 = payload.get("body_base64")
data = base64.b64decode(body_b64) if body_b64 else None
kwargs = {
"method": payload["method"],
"url": payload["url"],
"headers": headers or None,
"timeout": payload.get("timeout_secs", 60),
"allow_redirects": payload.get("follow_redirects", True),
"verify": False,
"impersonate": payload.get("impersonate", "chrome"),
}
proxy_url = payload.get("proxy_url")
if proxy_url:
kwargs["proxies"] = {"http": proxy_url, "https": proxy_url}
if data is not None:
kwargs["data"] = data
response = requests.request(**kwargs)
cookies = []
cookie_jar = getattr(response.cookies, "jar", None)
if cookie_jar is not None:
for cookie in cookie_jar:
parts = [f"{cookie.name}={cookie.value}"]
if cookie.domain:
parts.append(f"Domain={cookie.domain}")
if cookie.path:
parts.append(f"Path={cookie.path}")
if cookie.secure:
parts.append("Secure")
cookies.append("; ".join(parts))
meta = {
"status": response.status_code,
"headers": list(response.headers.items()),
"cookies": cookies,
}
sys.stderr.write(json.dumps(meta))
sys.stdout.buffer.write(response.content)
if __name__ == "__main__":
try:
main()
except Exception as exc:
sys.stderr.write(json.dumps({"error": str(exc)}))
sys.exit(1)
"#;
#[derive(Debug, Clone)]
pub struct Response {
status: StatusCode,
headers: HeaderMap,
body: Vec<u8>,
}
impl Response {
pub fn status(&self) -> StatusCode {
self.status
}
pub fn headers(&self) -> &HeaderMap {
&self.headers
}
pub async fn text(self) -> Result<String, AnyErr> {
String::from_utf8(self.body).map_err(|error| error.into())
}
pub async fn bytes(self) -> Result<Vec<u8>, AnyErr> {
Ok(self.body)
}
pub async fn json<T>(self) -> Result<T, AnyErr>
where
T: serde::de::DeserializeOwned,
{
Ok(serde_json::from_slice(&self.body)?)
}
}
#[derive(Serialize)]
struct PythonRequestPayload<'a> {
method: &'a str,
url: &'a str,
headers: Vec<(String, String)>,
body_base64: Option<String>,
follow_redirects: bool,
timeout_secs: u64,
proxy_url: Option<String>,
impersonate: &'a str,
}
#[derive(serde::Deserialize)]
struct PythonResponseMeta {
status: Option<u16>,
headers: Option<Vec<(String, String)>>,
cookies: Option<Vec<String>>,
error: Option<String>,
}
#[derive(serde::Serialize, serde::Deserialize, Clone)] #[derive(serde::Serialize, serde::Deserialize, Clone)]
pub struct Requester { pub struct Requester {
#[serde(skip)]
client: Client,
#[serde(skip)] #[serde(skip)]
cookie_jar: Arc<Jar>, cookie_jar: Arc<Jar>,
proxy: bool, proxy: bool,
@@ -41,31 +150,9 @@ impl fmt::Debug for Requester {
} }
impl Requester { impl Requester {
fn build_client(cookie_jar: Arc<Jar>, user_agent: Option<&str>) -> Client {
let mut builder = Client::builder()
.cert_verification(false)
.emulation(Emulation::Firefox146)
.cookie_provider(cookie_jar)
.redirect(Policy::default());
if let Some(user_agent) = user_agent {
let mut headers = HeaderMap::new();
if let Ok(value) = HeaderValue::from_str(user_agent) {
headers.insert(USER_AGENT, value);
builder = builder.default_headers(headers);
}
}
builder.build().expect("Failed to create HTTP client")
}
pub fn new() -> Self { pub fn new() -> Self {
let cookie_jar = Arc::new(Jar::default());
let client = Self::build_client(cookie_jar.clone(), None);
let requester = Requester { let requester = Requester {
client, cookie_jar: Arc::new(Jar::default()),
cookie_jar,
proxy: false, proxy: false,
flaresolverr_session: None, flaresolverr_session: None,
user_agent: None, user_agent: None,
@@ -83,41 +170,163 @@ impl Requester {
self.proxy = proxy; self.proxy = proxy;
} }
pub async fn get_raw(&mut self, url: &str) -> Result<Response, wreq::Error> { fn cookie_headers(&self, url: &str) -> Vec<(String, String)> {
let client = Self::build_client(self.cookie_jar.clone(), self.user_agent.as_deref()); let Ok(uri) = url.parse::<Uri>() else {
return vec![];
};
let mut request = client.get(url).version(Version::HTTP_11); match self.cookie_jar.cookies(&uri) {
Cookies::Compressed(value) => value
.to_str()
.ok()
.map(|value| vec![(COOKIE.to_string(), value.to_string())])
.unwrap_or_default(),
Cookies::Uncompressed(values) => values
.into_iter()
.filter_map(|value| value.to_str().ok().map(|value| (COOKIE.to_string(), value.to_string())))
.collect(),
Cookies::Empty => vec![],
_ => vec![],
}
}
if self.proxy { fn merged_headers(
if let Ok(proxy_url) = env::var("BURP_URL") { &self,
let proxy = Proxy::all(&proxy_url).unwrap(); url: &str,
request = request.proxy(proxy); headers: Vec<(String, String)>,
ensure_json: bool,
) -> Vec<(String, String)> {
let mut merged = headers;
if ensure_json
&& !merged
.iter()
.any(|(key, _)| key.eq_ignore_ascii_case(CONTENT_TYPE.as_str()))
{
merged.push((CONTENT_TYPE.to_string(), "application/json".to_string()));
}
if let Some(user_agent) = &self.user_agent {
if !merged
.iter()
.any(|(key, _)| key.eq_ignore_ascii_case(USER_AGENT.as_str()))
{
merged.push((USER_AGENT.to_string(), user_agent.clone()));
} }
} }
request.send().await let has_cookie = merged
.iter()
.any(|(key, _)| key.eq_ignore_ascii_case(COOKIE.as_str()));
if !has_cookie {
merged.extend(self.cookie_headers(url));
}
merged
}
async fn run_python_request(
&mut self,
method: &str,
url: &str,
headers: Vec<(String, String)>,
body: Option<Vec<u8>>,
follow_redirects: bool,
) -> Result<Response, AnyErr> {
let headers = self.merged_headers(url, headers, false);
let proxy_url = if self.proxy {
env::var("BURP_URL").ok()
} else {
None
};
let payload = PythonRequestPayload {
method,
url,
headers,
body_base64: body.map(|body| base64::engine::general_purpose::STANDARD.encode(body)),
follow_redirects,
timeout_secs: 60,
proxy_url,
impersonate: "chrome",
};
let payload = serde_json::to_vec(&payload)?;
let output = tokio::task::spawn_blocking(move || -> Result<std::process::Output, AnyErr> {
let mut command = Command::new("python3");
command
.arg("-c")
.arg(CURL_CFFI_SCRIPT)
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(Stdio::piped());
let mut child = command.spawn()?;
if let Some(stdin) = child.stdin.as_mut() {
stdin.write_all(&payload)?;
}
Ok(child.wait_with_output()?)
})
.await
.map_err(|error| -> AnyErr { format!("spawn_blocking failed: {error}").into() })??;
let meta_text = String::from_utf8_lossy(&output.stderr).trim().to_string();
let meta: PythonResponseMeta = serde_json::from_str(&meta_text)
.map_err(|error| format!("failed to parse curl_cffi metadata: {error}; stderr={meta_text}"))?;
if !output.status.success() {
let error = meta
.error
.unwrap_or_else(|| format!("curl_cffi request failed for {method} {url}"));
return Err(error.into());
}
for cookie in meta.cookies.unwrap_or_default() {
self.cookie_jar.add_cookie_str(&cookie, url);
}
let status = StatusCode::from_u16(meta.status.unwrap_or(500))
.map_err(|error| format!("invalid status code from curl_cffi: {error}"))?;
let mut response_headers = HeaderMap::new();
for (key, value) in meta.headers.unwrap_or_default() {
let Ok(name) = HeaderName::try_from(key.as_str()) else {
continue;
};
let Ok(value) = HeaderValue::from_str(&value) else {
continue;
};
response_headers.append(name, value);
}
Ok(Response {
status,
headers: response_headers,
body: output.stdout,
})
}
async fn multipart_to_body(form: Form) -> Result<(Vec<u8>, String), AnyErr> {
let boundary = form.boundary().to_string();
let chunks: Vec<_> = form.into_stream().try_collect().await?;
let mut body = Vec::new();
for chunk in chunks {
body.extend_from_slice(&chunk);
}
Ok((body, format!("multipart/form-data; boundary={boundary}")))
}
pub async fn get_raw(&mut self, url: &str) -> Result<Response, AnyErr> {
self.run_python_request("GET", url, vec![], None, false).await
} }
pub async fn get_raw_with_headers( pub async fn get_raw_with_headers(
&mut self, &mut self,
url: &str, url: &str,
headers: Vec<(String, String)>, headers: Vec<(String, String)>,
) -> Result<Response, wreq::Error> { ) -> Result<Response, AnyErr> {
let client = Self::build_client(self.cookie_jar.clone(), self.user_agent.as_deref()); self.run_python_request("GET", url, headers, None, true).await
let mut request = client.get(url).version(Version::HTTP_11);
if self.proxy {
if let Ok(proxy_url) = env::var("BURP_URL") {
let proxy = Proxy::all(&proxy_url).unwrap();
request = request.proxy(proxy);
}
}
// Set custom headers
for (key, value) in headers.iter() {
request = request.header(key, value);
}
request.send().await
} }
pub async fn post_json<S>( pub async fn post_json<S>(
@@ -125,25 +334,21 @@ impl Requester {
url: &str, url: &str,
data: &S, data: &S,
headers: Vec<(String, String)>, headers: Vec<(String, String)>,
) -> Result<Response, wreq::Error> ) -> Result<Response, AnyErr>
where where
S: Serialize + ?Sized, S: Serialize + ?Sized,
{ {
let mut request = self.client.post(url).version(Version::HTTP_11).json(data); let mut headers = self.merged_headers(url, headers, true);
if !headers
// Set custom headers .iter()
for (key, value) in headers.iter() { .any(|(key, _)| key.eq_ignore_ascii_case(CONTENT_TYPE.as_str()))
request = request.header(key, value); {
headers.push((CONTENT_TYPE.to_string(), "application/json".to_string()));
} }
if self.proxy { let body = serde_json::to_vec(data)?;
if let Ok(proxy_url) = env::var("BURP_URL") { self.run_python_request("POST", url, headers, Some(body), true)
let proxy = Proxy::all(&proxy_url).unwrap(); .await
request = request.proxy(proxy);
}
}
request.send().await
} }
pub async fn post( pub async fn post(
@@ -151,26 +356,14 @@ impl Requester {
url: &str, url: &str,
data: &str, data: &str,
headers: Vec<(&str, &str)>, headers: Vec<(&str, &str)>,
) -> Result<Response, wreq::Error> { ) -> Result<Response, AnyErr> {
let mut request = self let headers = headers
.client .into_iter()
.post(url) .map(|(key, value)| (key.to_string(), value.to_string()))
.version(Version::HTTP_11) .collect::<Vec<_>>();
.body(data.to_string());
// Set custom headers self.run_python_request("POST", url, headers, Some(data.as_bytes().to_vec()), true)
for (key, value) in headers.iter() { .await
request = request.header(key.to_string(), value.to_string());
}
if self.proxy {
if let Ok(proxy_url) = env::var("BURP_URL") {
let proxy = Proxy::all(&proxy_url).unwrap();
request = request.proxy(proxy);
}
}
request.send().await
} }
pub async fn post_multipart( pub async fn post_multipart(
@@ -179,27 +372,18 @@ impl Requester {
form: Form, form: Form,
headers: Vec<(String, String)>, headers: Vec<(String, String)>,
_http_version: Option<Version>, _http_version: Option<Version>,
) -> Result<Response, wreq::Error> { ) -> Result<Response, AnyErr> {
let http_version = match _http_version { let (body, content_type) = Self::multipart_to_body(form).await?;
Some(v) => v, let mut headers = headers;
None => Version::HTTP_11, if !headers
}; .iter()
.any(|(key, _)| key.eq_ignore_ascii_case(CONTENT_TYPE.as_str()))
let mut request = self.client.post(url).multipart(form).version(http_version); {
headers.push((CONTENT_TYPE.to_string(), content_type));
// Set custom headers
for (key, value) in headers.iter() {
request = request.header(key, value);
} }
if self.proxy { self.run_python_request("POST", url, headers, Some(body), true)
if let Ok(proxy_url) = env::var("BURP_URL") { .await
let proxy = Proxy::all(&proxy_url).unwrap();
request = request.proxy(proxy);
}
}
request.send().await
} }
pub async fn get( pub async fn get(
@@ -216,25 +400,15 @@ impl Requester {
headers: Vec<(String, String)>, headers: Vec<(String, String)>,
_http_version: Option<Version>, _http_version: Option<Version>,
) -> Result<String, AnyErr> { ) -> Result<String, AnyErr> {
let http_version = match _http_version {
Some(v) => v,
None => Version::HTTP_11,
};
loop { loop {
let mut request = self.client.get(url).version(http_version); let response = self
for (key, value) in headers.iter() { .run_python_request("GET", url, headers.clone(), None, true)
request = request.header(key, value); .await?;
}
if self.proxy {
if let Ok(proxy_url) = env::var("BURP_URL") {
let proxy = Proxy::all(&proxy_url).unwrap();
request = request.proxy(proxy);
}
}
let response = request.send().await?;
if response.status().is_success() || response.status().as_u16() == 404 { if response.status().is_success() || response.status().as_u16() == 404 {
return Ok(response.text().await?); return response.text().await;
} }
if response.status().as_u16() == 429 { if response.status().as_u16() == 429 {
tokio::time::sleep(std::time::Duration::from_secs(1)).await; tokio::time::sleep(std::time::Duration::from_secs(1)).await;
continue; continue;
@@ -248,8 +422,6 @@ impl Requester {
} }
} }
// If direct request failed, try FlareSolverr. Map its error to a Send+Sync error immediately,
// so no non-Send error value lives across later `.await`s.
let flare_url = match env::var("FLARE_URL") { let flare_url = match env::var("FLARE_URL") {
Ok(url) => url, Ok(url) => url,
Err(e) => return Err(format!("FLARE_URL not set: {e}").into()), Err(e) => return Err(format!("FLARE_URL not set: {e}").into()),
@@ -268,39 +440,21 @@ impl Requester {
.await .await
.map_err(|e| -> AnyErr { format!("Failed to solve FlareSolverr: {e}").into() })?; .map_err(|e| -> AnyErr { format!("Failed to solve FlareSolverr: {e}").into() })?;
// Rebuild client and apply UA/cookies from FlareSolverr
let cookie_origin = url.split('/').take(3).collect::<Vec<&str>>().join("/"); let cookie_origin = url.split('/').take(3).collect::<Vec<&str>>().join("/");
self.user_agent = Some(res.solution.userAgent);
let useragent = res.solution.userAgent; for cookie in res.solution.cookies {
self.user_agent = Some(useragent); self.cookie_jar
.add_cookie_str(&format!("{}={}", cookie.name, cookie.value), &cookie_origin);
if url::Url::parse(&cookie_origin).is_ok() {
for cookie in res.solution.cookies {
self.cookie_jar
.add_cookie_str(&format!("{}={}", cookie.name, cookie.value), &cookie_origin);
}
} }
self.client = Self::build_client(self.cookie_jar.clone(), self.user_agent.as_deref()); let response = self
.run_python_request("GET", url, headers, None, true)
// Retry the original URL with the updated client & (optional) proxy .await?;
let mut request = self.client.get(url).version(Version::HTTP_11);
for (key, value) in headers.iter() {
request = request.header(key, value);
}
if self.proxy {
if let Ok(proxy_url) = env::var("BURP_URL") {
let proxy = Proxy::all(&proxy_url).unwrap();
request = request.proxy(proxy);
}
}
let response = request.send().await?;
if response.status().is_success() { if response.status().is_success() {
return Ok(response.text().await?); return response.text().await;
} }
// Fall back to FlareSolverr-provided body
Ok(res.solution.response) Ok(res.solution.response)
} }
} }