noodlemagazine upgrade
This commit is contained in:
@@ -10,10 +10,11 @@ use crate::videos::{ServerOptions, VideoFormat, VideoItem};
|
|||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use error_chain::error_chain;
|
use error_chain::error_chain;
|
||||||
use htmlentity::entity::{ICodedDataTrait, decode};
|
use htmlentity::entity::{ICodedDataTrait, decode};
|
||||||
|
use regex::Regex;
|
||||||
use std::net::IpAddr;
|
use std::net::IpAddr;
|
||||||
use url::Url;
|
|
||||||
use std::vec;
|
use std::vec;
|
||||||
use titlecase::Titlecase;
|
use titlecase::Titlecase;
|
||||||
|
use url::Url;
|
||||||
use wreq::Version;
|
use wreq::Version;
|
||||||
|
|
||||||
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
|
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
|
||||||
@@ -53,21 +54,232 @@ impl NoodlemagazineProvider {
|
|||||||
favicon: "https://www.google.com/s2/favicons?sz=64&domain=noodlemagazine.com".into(),
|
favicon: "https://www.google.com/s2/favicons?sz=64&domain=noodlemagazine.com".into(),
|
||||||
status: "active".into(),
|
status: "active".into(),
|
||||||
categories: vec![],
|
categories: vec![],
|
||||||
options: vec![],
|
options: vec![
|
||||||
|
ChannelOption {
|
||||||
|
id: "category".into(),
|
||||||
|
title: "Popular Period".into(),
|
||||||
|
description: "Pick which popular feed to browse.".into(),
|
||||||
|
systemImage: "clock".into(),
|
||||||
|
colorName: "blue".into(),
|
||||||
|
options: vec![
|
||||||
|
FilterOption {
|
||||||
|
id: "recent".into(),
|
||||||
|
title: "Recent".into(),
|
||||||
|
},
|
||||||
|
FilterOption {
|
||||||
|
id: "week".into(),
|
||||||
|
title: "This Week".into(),
|
||||||
|
},
|
||||||
|
FilterOption {
|
||||||
|
id: "month".into(),
|
||||||
|
title: "This Month".into(),
|
||||||
|
},
|
||||||
|
FilterOption {
|
||||||
|
id: "all".into(),
|
||||||
|
title: "All Time".into(),
|
||||||
|
},
|
||||||
|
],
|
||||||
|
multiSelect: false,
|
||||||
|
},
|
||||||
|
ChannelOption {
|
||||||
|
id: "sort".into(),
|
||||||
|
title: "Sort By".into(),
|
||||||
|
description: "Sort popular feed results.".into(),
|
||||||
|
systemImage: "arrow.up.arrow.down".into(),
|
||||||
|
colorName: "orange".into(),
|
||||||
|
options: vec![
|
||||||
|
FilterOption {
|
||||||
|
id: "views".into(),
|
||||||
|
title: "Views".into(),
|
||||||
|
},
|
||||||
|
FilterOption {
|
||||||
|
id: "date".into(),
|
||||||
|
title: "Newest".into(),
|
||||||
|
},
|
||||||
|
FilterOption {
|
||||||
|
id: "duration".into(),
|
||||||
|
title: "Duration".into(),
|
||||||
|
},
|
||||||
|
],
|
||||||
|
multiSelect: false,
|
||||||
|
},
|
||||||
|
ChannelOption {
|
||||||
|
id: "filter".into(),
|
||||||
|
title: "Order".into(),
|
||||||
|
description: "Ascending or descending order.".into(),
|
||||||
|
systemImage: "list.number".into(),
|
||||||
|
colorName: "green".into(),
|
||||||
|
options: vec![
|
||||||
|
FilterOption {
|
||||||
|
id: "desc".into(),
|
||||||
|
title: "Descending".into(),
|
||||||
|
},
|
||||||
|
FilterOption {
|
||||||
|
id: "asc".into(),
|
||||||
|
title: "Ascending".into(),
|
||||||
|
},
|
||||||
|
],
|
||||||
|
multiSelect: false,
|
||||||
|
},
|
||||||
|
],
|
||||||
nsfw: true,
|
nsfw: true,
|
||||||
cacheDuration: Some(1800),
|
cacheDuration: Some(1800),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn resolve_popular_period(options: &ServerOptions) -> &'static str {
|
||||||
|
match options.category.as_deref() {
|
||||||
|
Some("week") => "week",
|
||||||
|
Some("month") => "month",
|
||||||
|
Some("all") => "all",
|
||||||
|
_ => "recent",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn resolve_sort_by(sort: &str, options: &ServerOptions) -> &'static str {
|
||||||
|
match options.sort.as_deref().unwrap_or(sort) {
|
||||||
|
"date" | "new" | "latest" => "date",
|
||||||
|
"duration" | "length" => "duration",
|
||||||
|
_ => "views",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn resolve_sort_order(options: &ServerOptions) -> &'static str {
|
||||||
|
match options.filter.as_deref() {
|
||||||
|
Some("asc") => "asc",
|
||||||
|
_ => "desc",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn mirror_url(url: &str) -> String {
|
||||||
|
let stripped = url
|
||||||
|
.strip_prefix("https://")
|
||||||
|
.or_else(|| url.strip_prefix("http://"))
|
||||||
|
.unwrap_or(url);
|
||||||
|
format!("https://r.jina.ai/http://{stripped}")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn looks_like_bot_challenge_or_block(html: &str) -> bool {
|
||||||
|
let lower = html.to_ascii_lowercase();
|
||||||
|
lower.contains("just a moment")
|
||||||
|
|| lower.contains("cf-browser-verification")
|
||||||
|
|| lower.contains("cf-chl")
|
||||||
|
|| lower.contains("access restricted")
|
||||||
|
|| lower.contains("cloudflare")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_markdown_listing_items(
|
||||||
|
&self,
|
||||||
|
markdown: &str,
|
||||||
|
options: &ServerOptions,
|
||||||
|
) -> Vec<VideoItem> {
|
||||||
|
let Some(regex) = Regex::new(
|
||||||
|
r#"(?is)\[\!\[Image\s+\d+:\s*(?P<title>.*?)\]\((?P<thumb>https?://[^)\s]+)\)(?P<meta>.*?)\]\((?P<url>https?://noodlemagazine\.com/watch/[^)\s]+)\)"#,
|
||||||
|
)
|
||||||
|
.ok() else {
|
||||||
|
return vec![];
|
||||||
|
};
|
||||||
|
let Some(duration_regex) = Regex::new(r"(?P<duration>\d{1,2}:\d{2}(?::\d{2})?)").ok() else {
|
||||||
|
return vec![];
|
||||||
|
};
|
||||||
|
let Some(views_regex) = Regex::new(r"(?P<views>[0-9]+(?:\.[0-9]+)?[KMB]?)\s+\d{1,2}:\d{2}(?::\d{2})?").ok() else {
|
||||||
|
return vec![];
|
||||||
|
};
|
||||||
|
|
||||||
|
regex
|
||||||
|
.captures_iter(markdown)
|
||||||
|
.filter_map(|caps| {
|
||||||
|
let title_raw = caps.name("title")?.as_str().trim();
|
||||||
|
let thumb = caps.name("thumb")?.as_str().trim();
|
||||||
|
let video_url = caps.name("url")?.as_str().trim();
|
||||||
|
let meta = caps.name("meta").map(|m| m.as_str()).unwrap_or("");
|
||||||
|
|
||||||
|
let parsed_url = Url::parse(video_url).ok()?;
|
||||||
|
let id = parsed_url
|
||||||
|
.path_segments()
|
||||||
|
.and_then(|mut segs| segs.next_back())
|
||||||
|
.filter(|value| !value.is_empty())
|
||||||
|
.map(|value| value.to_string())?;
|
||||||
|
|
||||||
|
let duration = duration_regex
|
||||||
|
.captures(meta)
|
||||||
|
.and_then(|m| m.name("duration").map(|v| v.as_str()))
|
||||||
|
.and_then(|v| parse_time_to_seconds(v))
|
||||||
|
.unwrap_or(0) as u32;
|
||||||
|
|
||||||
|
let views = views_regex
|
||||||
|
.captures(meta)
|
||||||
|
.and_then(|m| m.name("views").map(|v| v.as_str()))
|
||||||
|
.and_then(|v| parse_abbreviated_number(v.trim()))
|
||||||
|
.unwrap_or(0);
|
||||||
|
|
||||||
|
let title = decode(title_raw.as_bytes())
|
||||||
|
.to_string()
|
||||||
|
.unwrap_or_else(|_| title_raw.to_string())
|
||||||
|
.titlecase();
|
||||||
|
let proxy_url = self.proxy_url(options, video_url);
|
||||||
|
let proxied_thumb = self.proxied_thumb(options, thumb);
|
||||||
|
|
||||||
|
Some(
|
||||||
|
VideoItem::new(
|
||||||
|
id,
|
||||||
|
title,
|
||||||
|
proxy_url.clone(),
|
||||||
|
"noodlemagazine".into(),
|
||||||
|
proxied_thumb,
|
||||||
|
duration,
|
||||||
|
)
|
||||||
|
.views(views)
|
||||||
|
.formats(vec![
|
||||||
|
VideoFormat::new(proxy_url, "auto".into(), "video/mp4".into())
|
||||||
|
.format_id("auto".into())
|
||||||
|
.format_note("proxied".into())
|
||||||
|
.http_header("Referer".into(), video_url.to_string()),
|
||||||
|
]),
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn fetch_listing_items(
|
||||||
|
&self,
|
||||||
|
requester: &mut crate::util::requester::Requester,
|
||||||
|
page_url: &str,
|
||||||
|
options: &ServerOptions,
|
||||||
|
) -> Vec<VideoItem> {
|
||||||
|
let html = requester
|
||||||
|
.get(page_url, Some(Version::HTTP_2))
|
||||||
|
.await
|
||||||
|
.unwrap_or_default();
|
||||||
|
let mut items = self.get_video_items_from_html(html.clone(), options);
|
||||||
|
if !items.is_empty() {
|
||||||
|
return items;
|
||||||
|
}
|
||||||
|
|
||||||
|
if !Self::looks_like_bot_challenge_or_block(&html) {
|
||||||
|
return items;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mirror = requester
|
||||||
|
.get(&Self::mirror_url(page_url), Some(Version::HTTP_11))
|
||||||
|
.await
|
||||||
|
.unwrap_or_default();
|
||||||
|
items = self.parse_markdown_listing_items(&mirror, options);
|
||||||
|
items
|
||||||
|
}
|
||||||
|
|
||||||
async fn get(
|
async fn get(
|
||||||
&self,
|
&self,
|
||||||
cache: VideoCache,
|
cache: VideoCache,
|
||||||
page: u8,
|
page: u8,
|
||||||
_sort: &str,
|
sort: &str,
|
||||||
options: ServerOptions,
|
options: ServerOptions,
|
||||||
) -> Result<Vec<VideoItem>> {
|
) -> Result<Vec<VideoItem>> {
|
||||||
|
let period = Self::resolve_popular_period(&options);
|
||||||
|
let sort_by = Self::resolve_sort_by(sort, &options);
|
||||||
|
let sort_order = Self::resolve_sort_order(&options);
|
||||||
let video_url = format!(
|
let video_url = format!(
|
||||||
"{}/popular/recent?sort_by=views&sort_order=desc&p={}",
|
"{}/popular/{period}?sort_by={sort_by}&sort_order={sort_order}&p={}",
|
||||||
self.url,
|
self.url,
|
||||||
page.saturating_sub(1)
|
page.saturating_sub(1)
|
||||||
);
|
);
|
||||||
@@ -83,12 +295,9 @@ impl NoodlemagazineProvider {
|
|||||||
None => return Ok(old_items),
|
None => return Ok(old_items),
|
||||||
};
|
};
|
||||||
|
|
||||||
let text = requester
|
let items = self
|
||||||
.get(&video_url, Some(Version::HTTP_2))
|
.fetch_listing_items(&mut requester, &video_url, &options)
|
||||||
.await
|
.await;
|
||||||
.unwrap_or_default();
|
|
||||||
|
|
||||||
let items = self.get_video_items_from_html(text, &options);
|
|
||||||
|
|
||||||
if items.is_empty() {
|
if items.is_empty() {
|
||||||
Ok(old_items)
|
Ok(old_items)
|
||||||
@@ -120,12 +329,9 @@ impl NoodlemagazineProvider {
|
|||||||
None => return Ok(old_items),
|
None => return Ok(old_items),
|
||||||
};
|
};
|
||||||
|
|
||||||
let text = requester
|
let items = self
|
||||||
.get(&video_url, Some(Version::HTTP_2))
|
.fetch_listing_items(&mut requester, &video_url, &options)
|
||||||
.await
|
.await;
|
||||||
.unwrap_or_default();
|
|
||||||
|
|
||||||
let items = self.get_video_items_from_html(text, &options);
|
|
||||||
|
|
||||||
if items.is_empty() {
|
if items.is_empty() {
|
||||||
Ok(old_items)
|
Ok(old_items)
|
||||||
@@ -154,7 +360,7 @@ impl NoodlemagazineProvider {
|
|||||||
None => return vec![],
|
None => return vec![],
|
||||||
};
|
};
|
||||||
|
|
||||||
list.split("<div class=\"item\">")
|
list.split("<div class=\"item")
|
||||||
.skip(1)
|
.skip(1)
|
||||||
.filter_map(|segment| {
|
.filter_map(|segment| {
|
||||||
self.get_video_item(segment.to_string(), options).ok()
|
self.get_video_item(segment.to_string(), options).ok()
|
||||||
@@ -198,6 +404,24 @@ impl NoodlemagazineProvider {
|
|||||||
.any(|ext| path.ends_with(ext))
|
.any(|ext| path.ends_with(ext))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn is_known_preview_host(host: &str) -> bool {
|
||||||
|
let host = host.to_ascii_lowercase();
|
||||||
|
host.ends_with("pvvstream.pro")
|
||||||
|
|| host.ends_with("okcdn.ru")
|
||||||
|
|| host.ends_with("vkuserphoto.ru")
|
||||||
|
|| host.ends_with("noodlemagazine.com")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn has_preview_signature(url: &Url) -> bool {
|
||||||
|
let path = url.path().to_ascii_lowercase();
|
||||||
|
let query = url.query().unwrap_or("").to_ascii_lowercase();
|
||||||
|
path.contains("/preview/")
|
||||||
|
|| path.contains("/poster/")
|
||||||
|
|| path.contains("getvideopreview")
|
||||||
|
|| query.contains("type=video_thumb")
|
||||||
|
|| query.contains("keep_aspect_ratio=")
|
||||||
|
}
|
||||||
|
|
||||||
fn is_disallowed_thumb_host(host: &str) -> bool {
|
fn is_disallowed_thumb_host(host: &str) -> bool {
|
||||||
if host.eq_ignore_ascii_case("localhost") {
|
if host.eq_ignore_ascii_case("localhost") {
|
||||||
return true;
|
return true;
|
||||||
@@ -234,20 +458,23 @@ impl NoodlemagazineProvider {
|
|||||||
return false;
|
return false;
|
||||||
};
|
};
|
||||||
|
|
||||||
!Self::is_disallowed_thumb_host(host) && Self::has_allowed_image_extension(url.path())
|
if Self::is_disallowed_thumb_host(host) {
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
fn proxied_thumb(&self, options: &ServerOptions, thumb: &str) -> String {
|
if Self::has_allowed_image_extension(url.path()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
Self::is_known_preview_host(host) && Self::has_preview_signature(&url)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn proxied_thumb(&self, _options: &ServerOptions, thumb: &str) -> String {
|
||||||
let normalized = self.normalize_thumb_url(thumb);
|
let normalized = self.normalize_thumb_url(thumb);
|
||||||
if normalized.is_empty() || !self.is_allowed_thumb_url(&normalized) {
|
if normalized.is_empty() || !self.is_allowed_thumb_url(&normalized) {
|
||||||
return String::new();
|
return String::new();
|
||||||
}
|
}
|
||||||
|
normalized
|
||||||
crate::providers::build_proxy_url(
|
|
||||||
options,
|
|
||||||
"noodlemagazine-thumb",
|
|
||||||
&crate::providers::strip_url_scheme(&normalized),
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_video_item(&self, video_segment: String, options: &ServerOptions) -> Result<VideoItem> {
|
fn get_video_item(&self, video_segment: String, options: &ServerOptions) -> Result<VideoItem> {
|
||||||
@@ -279,12 +506,17 @@ impl NoodlemagazineProvider {
|
|||||||
.ok_or_else(|| Error::from("missing id"))?
|
.ok_or_else(|| Error::from("missing id"))?
|
||||||
.to_string();
|
.to_string();
|
||||||
|
|
||||||
let thumb = video_segment
|
let thumb = Regex::new(
|
||||||
.split("data-src=\"")
|
r#"(?i)(?:data-src|data-original|data-webp|src|poster)\s*=\s*"(?P<url>[^"]+)""#,
|
||||||
.nth(1)
|
)
|
||||||
.and_then(|s| s.split('"').next())
|
.ok()
|
||||||
.unwrap_or("")
|
.and_then(|regex| {
|
||||||
.to_string();
|
regex
|
||||||
|
.captures_iter(&video_segment)
|
||||||
|
.filter_map(|captures| captures.name("url").map(|value| value.as_str().to_string()))
|
||||||
|
.find(|candidate| !candidate.starts_with("data:image/"))
|
||||||
|
})
|
||||||
|
.unwrap_or_default();
|
||||||
|
|
||||||
let raw_duration = video_segment
|
let raw_duration = video_segment
|
||||||
.split("#clock-o\"></use></svg>")
|
.split("#clock-o\"></use></svg>")
|
||||||
@@ -414,7 +646,7 @@ mod tests {
|
|||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
items[0].thumb,
|
items[0].thumb,
|
||||||
"https://example.com/proxy/noodlemagazine-thumb/noodlemagazine.com/thumbs/test.jpg"
|
"https://noodlemagazine.com/thumbs/test.jpg"
|
||||||
);
|
);
|
||||||
assert_eq!(items[0].formats.as_ref().map(|f| f.len()), Some(1));
|
assert_eq!(items[0].formats.as_ref().map(|f| f.len()), Some(1));
|
||||||
}
|
}
|
||||||
@@ -449,8 +681,70 @@ mod tests {
|
|||||||
assert_eq!(items.len(), 2);
|
assert_eq!(items.len(), 2);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
items[0].thumb,
|
items[0].thumb,
|
||||||
"https://example.com/proxy/noodlemagazine-thumb/cdn.example/thumb.jpg"
|
"https://cdn.example/thumb.jpg"
|
||||||
);
|
);
|
||||||
assert!(items[1].thumb.is_empty());
|
assert!(items[1].thumb.is_empty());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn keeps_preview_urls_without_file_extension() {
|
||||||
|
let provider = NoodlemagazineProvider::new();
|
||||||
|
let options = options();
|
||||||
|
let html = r#"
|
||||||
|
<div class="list_videos" id="list_videos">
|
||||||
|
<div class="item">
|
||||||
|
<a href="/watch/-111_222">
|
||||||
|
<img data-src="https://img.pvvstream.pro/preview/abc/-111_222/240/iv.okcdn.ru/getVideoPreview?id=1&type=39&fn=vid_l" />
|
||||||
|
</a>
|
||||||
|
<div class="title">sample</div>
|
||||||
|
<svg><use></use></svg>#clock-o"></use></svg>12:34<
|
||||||
|
<svg><use></use></svg>#eye"></use></svg>1.2K<
|
||||||
|
</div>
|
||||||
|
>Show more</div>
|
||||||
|
"#;
|
||||||
|
|
||||||
|
let items = provider.get_video_items_from_html(html.to_string(), &options);
|
||||||
|
assert_eq!(items.len(), 1);
|
||||||
|
assert_eq!(
|
||||||
|
items[0].thumb,
|
||||||
|
"https://img.pvvstream.pro/preview/abc/-111_222/240/iv.okcdn.ru/getVideoPreview?id=1&type=39&fn=vid_l"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parses_item_variants_and_alternate_thumb_attributes() {
|
||||||
|
let provider = NoodlemagazineProvider::new();
|
||||||
|
let options = options();
|
||||||
|
let html = r#"
|
||||||
|
<div class="list_videos" id="list_videos">
|
||||||
|
<div class="item has-video" data-id="123">
|
||||||
|
<a href="/watch/-333_444">
|
||||||
|
<img data-original="https://cdn2.pvvstream.pro/videos/-333/444/preview_320.jpg" />
|
||||||
|
</a>
|
||||||
|
<div class="title">sample alt</div>
|
||||||
|
<svg><use></use></svg>#clock-o"></use></svg>00:42<
|
||||||
|
<svg><use></use></svg>#eye"></use></svg>123<
|
||||||
|
</div>
|
||||||
|
>Show more</div>
|
||||||
|
"#;
|
||||||
|
|
||||||
|
let items = provider.get_video_items_from_html(html.to_string(), &options);
|
||||||
|
assert_eq!(items.len(), 1);
|
||||||
|
assert_eq!(
|
||||||
|
items[0].thumb,
|
||||||
|
"https://cdn2.pvvstream.pro/videos/-333/444/preview_320.jpg"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn resolves_popular_filters_for_usability_options() {
|
||||||
|
let mut options = options();
|
||||||
|
options.category = Some("month".to_string());
|
||||||
|
options.sort = Some("date".to_string());
|
||||||
|
options.filter = Some("asc".to_string());
|
||||||
|
|
||||||
|
assert_eq!(NoodlemagazineProvider::resolve_popular_period(&options), "month");
|
||||||
|
assert_eq!(NoodlemagazineProvider::resolve_sort_by("views", &options), "date");
|
||||||
|
assert_eq!(NoodlemagazineProvider::resolve_sort_order(&options), "asc");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -136,6 +136,42 @@ impl Requester {
|
|||||||
.unwrap_or_else(|| "none".to_string())
|
.unwrap_or_else(|| "none".to_string())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn jina_mirror_url(url: &str) -> Option<String> {
|
||||||
|
if url.trim().is_empty() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
if url.starts_with("https://r.jina.ai/") || url.starts_with("http://r.jina.ai/") {
|
||||||
|
return Some(url.to_string());
|
||||||
|
}
|
||||||
|
let stripped = url
|
||||||
|
.strip_prefix("https://")
|
||||||
|
.or_else(|| url.strip_prefix("http://"))
|
||||||
|
.unwrap_or(url);
|
||||||
|
Some(format!("https://r.jina.ai/http://{stripped}"))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn fetch_jina_mirror_body(
|
||||||
|
cookie_jar: Arc<Jar>,
|
||||||
|
user_agent: Option<String>,
|
||||||
|
proxy_enabled: bool,
|
||||||
|
url: &str,
|
||||||
|
) -> Result<String, AnyErr> {
|
||||||
|
let mirror_url = Self::jina_mirror_url(url).ok_or("invalid mirror url")?;
|
||||||
|
let client = Self::build_client(cookie_jar, user_agent.as_deref());
|
||||||
|
let mut request = client.get(&mirror_url).version(Version::HTTP_11);
|
||||||
|
if proxy_enabled {
|
||||||
|
if let Ok(proxy_url) = env::var("BURP_URL") {
|
||||||
|
let proxy = Proxy::all(&proxy_url)?;
|
||||||
|
request = request.proxy(proxy);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let response = request.send().await?;
|
||||||
|
if !response.status().is_success() {
|
||||||
|
return Err(format!("jina mirror returned status {}", response.status()).into());
|
||||||
|
}
|
||||||
|
Ok(response.text().await?)
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(any(not(hottub_single_provider), hottub_provider = "hypnotube"))]
|
#[cfg(any(not(hottub_single_provider), hottub_provider = "hypnotube"))]
|
||||||
fn debug_cookie_preview_from_borrowed_headers(
|
fn debug_cookie_preview_from_borrowed_headers(
|
||||||
&self,
|
&self,
|
||||||
@@ -453,6 +489,14 @@ impl Requester {
|
|||||||
Some(v) => v,
|
Some(v) => v,
|
||||||
None => Version::HTTP_11,
|
None => Version::HTTP_11,
|
||||||
};
|
};
|
||||||
|
let cookie_jar = self.cookie_jar.clone();
|
||||||
|
let user_agent = self.user_agent.clone();
|
||||||
|
let proxy_enabled = self.proxy;
|
||||||
|
let _trace_id = self.debug_trace_id.as_deref().unwrap_or("none").to_string();
|
||||||
|
let url_owned = url.to_string();
|
||||||
|
let jina_handle = tokio::spawn(async move {
|
||||||
|
Self::fetch_jina_mirror_body(cookie_jar, user_agent, proxy_enabled, &url_owned).await
|
||||||
|
});
|
||||||
loop {
|
loop {
|
||||||
let mut request = self.client.get(url).version(http_version);
|
let mut request = self.client.get(url).version(http_version);
|
||||||
for (key, value) in headers.iter() {
|
for (key, value) in headers.iter() {
|
||||||
@@ -464,7 +508,38 @@ impl Requester {
|
|||||||
request = request.proxy(proxy);
|
request = request.proxy(proxy);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let response = request.send().await?;
|
let response = match request.send().await {
|
||||||
|
Ok(response) => response,
|
||||||
|
Err(error) => {
|
||||||
|
crate::flow_debug!(
|
||||||
|
"trace={} requester direct transport failed url={} error={} using_jina_fallback=true",
|
||||||
|
_trace_id,
|
||||||
|
crate::util::flow_debug::preview(url, 120),
|
||||||
|
crate::util::flow_debug::preview(&error.to_string(), 160)
|
||||||
|
);
|
||||||
|
match jina_handle.await {
|
||||||
|
Ok(Ok(body)) => return Ok(body),
|
||||||
|
Ok(Err(_jina_error)) => {
|
||||||
|
crate::flow_debug!(
|
||||||
|
"trace={} requester jina fallback failed after transport error url={} error={}",
|
||||||
|
_trace_id,
|
||||||
|
crate::util::flow_debug::preview(url, 120),
|
||||||
|
crate::util::flow_debug::preview(&_jina_error.to_string(), 160)
|
||||||
|
);
|
||||||
|
return Err(error.into());
|
||||||
|
}
|
||||||
|
Err(_join_error) => {
|
||||||
|
crate::flow_debug!(
|
||||||
|
"trace={} requester jina task join failed after transport error url={} error={}",
|
||||||
|
_trace_id,
|
||||||
|
crate::util::flow_debug::preview(url, 120),
|
||||||
|
crate::util::flow_debug::preview(&_join_error.to_string(), 160)
|
||||||
|
);
|
||||||
|
return Err(error.into());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
self.store_response_cookies(url, &response);
|
self.store_response_cookies(url, &response);
|
||||||
crate::flow_debug!(
|
crate::flow_debug!(
|
||||||
"trace={} requester direct response url={} status={}",
|
"trace={} requester direct response url={} status={}",
|
||||||
@@ -473,6 +548,7 @@ impl Requester {
|
|||||||
response.status()
|
response.status()
|
||||||
);
|
);
|
||||||
if response.status().is_success() || response.status().as_u16() == 404 {
|
if response.status().is_success() || response.status().as_u16() == 404 {
|
||||||
|
jina_handle.abort();
|
||||||
return Ok(response.text().await?);
|
return Ok(response.text().await?);
|
||||||
}
|
}
|
||||||
if response.status().as_u16() == 429 {
|
if response.status().as_u16() == 429 {
|
||||||
@@ -484,11 +560,31 @@ impl Requester {
|
|||||||
tokio::time::sleep(std::time::Duration::from_secs(1)).await;
|
tokio::time::sleep(std::time::Duration::from_secs(1)).await;
|
||||||
continue;
|
continue;
|
||||||
} else {
|
} else {
|
||||||
println!(
|
crate::flow_debug!(
|
||||||
"Direct request to {} failed with status: {}",
|
"trace={} requester direct failed url={} status={} using_jina_fallback=true",
|
||||||
url,
|
_trace_id,
|
||||||
|
crate::util::flow_debug::preview(url, 120),
|
||||||
response.status()
|
response.status()
|
||||||
);
|
);
|
||||||
|
match jina_handle.await {
|
||||||
|
Ok(Ok(body)) => return Ok(body),
|
||||||
|
Ok(Err(_error)) => {
|
||||||
|
crate::flow_debug!(
|
||||||
|
"trace={} requester jina fallback failed url={} error={}",
|
||||||
|
_trace_id,
|
||||||
|
crate::util::flow_debug::preview(url, 120),
|
||||||
|
crate::util::flow_debug::preview(&_error.to_string(), 160)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
Err(_error) => {
|
||||||
|
crate::flow_debug!(
|
||||||
|
"trace={} requester jina task join failed url={} error={}",
|
||||||
|
_trace_id,
|
||||||
|
crate::util::flow_debug::preview(url, 120),
|
||||||
|
crate::util::flow_debug::preview(&_error.to_string(), 160)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user