fixes etc

This commit is contained in:
Simon
2026-04-07 16:53:45 +00:00
parent 81e8158161
commit 6e43b3b3d0
5 changed files with 452 additions and 207 deletions

View File

@@ -7,14 +7,14 @@ use crate::status::*;
use crate::util::cache::VideoCache; use crate::util::cache::VideoCache;
use crate::util::parse_abbreviated_number; use crate::util::parse_abbreviated_number;
use crate::util::time::parse_time_to_seconds; use crate::util::time::parse_time_to_seconds;
use crate::videos::{ServerOptions, VideoItem}; use crate::videos::{ServerOptions, VideoFormat, VideoItem};
use async_trait::async_trait; use async_trait::async_trait;
use error_chain::error_chain; use error_chain::error_chain;
use htmlentity::entity::{ICodedDataTrait, decode}; use htmlentity::entity::{ICodedDataTrait, decode};
use scraper::{ElementRef, Html, Selector}; use scraper::{ElementRef, Html, Selector};
use std::collections::HashSet;
use std::collections::HashMap; use std::collections::HashMap;
use std::collections::HashSet;
use std::sync::{Arc, RwLock}; use std::sync::{Arc, RwLock};
use std::thread; use std::thread;
use url::Url; use url::Url;
@@ -112,7 +112,10 @@ impl PornhubProvider {
}); });
} }
async fn load_tags(base_url: &str, tag_map: Arc<RwLock<HashMap<String, TagInfo>>>) -> Result<()> { async fn load_tags(
base_url: &str,
tag_map: Arc<RwLock<HashMap<String, TagInfo>>>,
) -> Result<()> {
Self::load_kind(base_url, "channel", QueryTargetKind::Channel, &tag_map).await?; Self::load_kind(base_url, "channel", QueryTargetKind::Channel, &tag_map).await?;
Self::load_kind(base_url, "pornstar", QueryTargetKind::Pornstar, &tag_map).await?; Self::load_kind(base_url, "pornstar", QueryTargetKind::Pornstar, &tag_map).await?;
Self::load_kind(base_url, "model", QueryTargetKind::Model, &tag_map).await?; Self::load_kind(base_url, "model", QueryTargetKind::Model, &tag_map).await?;
@@ -120,10 +123,18 @@ impl PornhubProvider {
Ok(()) Ok(())
} }
async fn load_kind(base_url: &str, path_segment: &str, kind: QueryTargetKind, tag_map: &Arc<RwLock<HashMap<String, TagInfo>>>) -> Result<()> { async fn load_kind(
base_url: &str,
path_segment: &str,
kind: QueryTargetKind,
tag_map: &Arc<RwLock<HashMap<String, TagInfo>>>,
) -> Result<()> {
let url = format!("{}/{}/top", base_url, path_segment); let url = format!("{}/{}/top", base_url, path_segment);
let mut requester = crate::util::requester::Requester::new(); let mut requester = crate::util::requester::Requester::new();
let body = requester.get(&url, None).await.map_err(|e| Error::from(ErrorKind::Parse(format!("http request failed: {e}"))))?; let body = requester
.get(&url, None)
.await
.map_err(|e| Error::from(ErrorKind::Parse(format!("http request failed: {e}"))))?;
let document = Html::parse_document(&body); let document = Html::parse_document(&body);
let selector = Self::selector(&format!("a[href^='/{}/']", path_segment))?; let selector = Self::selector(&format!("a[href^='/{}/']", path_segment))?;
for element in document.select(&selector) { for element in document.select(&selector) {
@@ -206,8 +217,11 @@ impl PornhubProvider {
} }
fn selector(value: &str) -> Result<Selector> { fn selector(value: &str) -> Result<Selector> {
Selector::parse(value) Selector::parse(value).map_err(|error| {
.map_err(|error| Error::from(ErrorKind::Parse(format!("selector parse failed for {value}: {error}")))) Error::from(ErrorKind::Parse(format!(
"selector parse failed for {value}: {error}"
)))
})
} }
fn text_of(element: &ElementRef<'_>) -> String { fn text_of(element: &ElementRef<'_>) -> String {
@@ -256,7 +270,10 @@ impl PornhubProvider {
fn parse_query_target(&self, query: &str) -> Option<QueryTarget> { fn parse_query_target(&self, query: &str) -> Option<QueryTarget> {
let normalized = query.trim().to_ascii_lowercase(); let normalized = query.trim().to_ascii_lowercase();
if let Some(info) = self.tag_map.read().unwrap().get(&normalized) { if let Some(info) = self.tag_map.read().unwrap().get(&normalized) {
return Some(QueryTarget { kind: info.kind, slug: info.slug.clone() }); return Some(QueryTarget {
kind: info.kind,
slug: info.slug.clone(),
});
} }
// Fallback to kind:slug without @ // Fallback to kind:slug without @
let trimmed = query.trim(); let trimmed = query.trim();
@@ -320,11 +337,19 @@ impl PornhubProvider {
url url
} }
fn build_listing_request(&self, page: u8, sort: &str, query: Option<&str>) -> (String, ListingScope) { fn build_listing_request(
&self,
page: u8,
sort: &str,
query: Option<&str>,
) -> (String, ListingScope) {
match query.map(str::trim).filter(|value| !value.is_empty()) { match query.map(str::trim).filter(|value| !value.is_empty()) {
Some(query) => { Some(query) => {
if let Some(target) = self.parse_query_target(query) { if let Some(target) = self.parse_query_target(query) {
(self.build_creator_url(page, sort, &target), ListingScope::Creator) (
self.build_creator_url(page, sort, &target),
ListingScope::Creator,
)
} else { } else {
let encoded = query.to_ascii_lowercase().replace(' ', "+"); let encoded = query.to_ascii_lowercase().replace(' ', "+");
( (
@@ -470,23 +495,24 @@ impl PornhubProvider {
.map(|value| self.normalize_url(value)) .map(|value| self.normalize_url(value))
.filter(|value| !value.is_empty()); .filter(|value| !value.is_empty());
let mut item = VideoItem::new( let mut item =
id, VideoItem::new(id, title, page_url, CHANNEL_ID.to_string(), thumb, duration);
title,
page_url,
CHANNEL_ID.to_string(),
thumb,
duration,
);
item.views = views; item.views = views;
item.preview = image let preview_url = image
.and_then(|value| value.value().attr("data-mediabook")) .and_then(|value| value.value().attr("data-mediabook"))
.map(|value| self.normalize_url(value)) .map(|value| self.normalize_url(value))
.filter(|value| !value.is_empty()); .filter(|value| !value.is_empty());
item.verified = card item.preview = preview_url.clone();
.select(&verified_selector) if preview_url.is_some() {
.next() let mut format = VideoFormat::new(
.map(|_| true); item.url.clone(),
"preview".to_string(),
"video/mp4".to_string(),
);
format.add_http_header("Referer".to_string(), item.url.clone());
item.formats = Some(vec![format]);
}
item.verified = card.select(&verified_selector).next().map(|_| true);
item.uploader = uploader.clone(); item.uploader = uploader.clone();
item.uploaderUrl = uploader_url.clone(); item.uploaderUrl = uploader_url.clone();
item.uploaderId = uploader_url item.uploaderId = uploader_url
@@ -554,7 +580,10 @@ impl PornhubProvider {
if normalized.is_empty() { if normalized.is_empty() {
return; return;
} }
if values.iter().any(|existing| existing.eq_ignore_ascii_case(normalized)) { if values
.iter()
.any(|existing| existing.eq_ignore_ascii_case(normalized))
{
return; return;
} }
values.push(normalized.to_string()); values.push(normalized.to_string());
@@ -644,11 +673,15 @@ struct PornhubThumbPolicy;
impl PornhubThumbPolicy { impl PornhubThumbPolicy {
fn is_allowed_video_page_url(url: &str) -> bool { fn is_allowed_video_page_url(url: &str) -> bool {
let Some(url) = Url::parse(url).ok() else { return false; }; let Some(url) = Url::parse(url).ok() else {
return false;
};
if url.scheme() != "https" { if url.scheme() != "https" {
return false; return false;
} }
let Some(host) = url.host_str() else { return false; }; let Some(host) = url.host_str() else {
return false;
};
if !host.eq_ignore_ascii_case("pornhub.com") if !host.eq_ignore_ascii_case("pornhub.com")
&& !host.eq_ignore_ascii_case("www.pornhub.com") && !host.eq_ignore_ascii_case("www.pornhub.com")
&& !host.ends_with(".pornhub.com") && !host.ends_with(".pornhub.com")
@@ -701,12 +734,14 @@ mod tests {
#[test] #[test]
fn parses_creator_queries() { fn parses_creator_queries() {
let provider = PornhubProvider::new(); let provider = PornhubProvider::new();
let target = provider.parse_query_target("channels:Brazzers") let target = provider
.parse_query_target("channels:Brazzers")
.expect("channel target should parse"); .expect("channel target should parse");
assert!(matches!(target.kind, QueryTargetKind::Channel)); assert!(matches!(target.kind, QueryTargetKind::Channel));
assert_eq!(target.slug, "brazzers"); assert_eq!(target.slug, "brazzers");
let target = provider.parse_query_target("pornstar:Alex Mack") let target = provider
.parse_query_target("pornstar:Alex Mack")
.expect("pornstar target should parse"); .expect("pornstar target should parse");
assert!(matches!(target.kind, QueryTargetKind::Pornstar)); assert!(matches!(target.kind, QueryTargetKind::Pornstar));
assert_eq!(target.slug, "alex-mack"); assert_eq!(target.slug, "alex-mack");
@@ -782,10 +817,11 @@ mod tests {
items[0].preview.as_deref(), items[0].preview.as_deref(),
Some("https://example.com/preview.webm") Some("https://example.com/preview.webm")
); );
assert!(items[0] assert!(items[0].tags.as_ref().is_some_and(|values| {
.tags values
.as_ref() .iter()
.is_some_and(|values| values.iter().any(|value| value.eq_ignore_ascii_case("honeycore")))); .any(|value| value.eq_ignore_ascii_case("honeycore"))
}));
} }
#[test] #[test]
@@ -816,16 +852,23 @@ mod tests {
assert_eq!(items.len(), 1); assert_eq!(items.len(), 1);
assert_eq!(items[0].thumb, "https://example.com/thumb.jpg"); assert_eq!(items[0].thumb, "https://example.com/thumb.jpg");
assert_eq!(items[0].preview.as_deref(), Some("https://example.com/preview.webm")); assert_eq!(
items[0].preview.as_deref(),
Some("https://example.com/preview.webm")
);
assert_eq!(items[0].views, Some(199000)); assert_eq!(items[0].views, Some(199000));
assert_eq!(items[0].rating, Some(95.0)); assert_eq!(items[0].rating, Some(95.0));
assert!(items[0] assert!(
.tags items[0]
.as_ref() .tags
.is_some_and(|values| values.iter().any(|value| value == "Anal"))); .as_ref()
assert!(items[0] .is_some_and(|values| values.iter().any(|value| value == "Anal"))
.tags );
.as_ref() assert!(
.is_some_and(|values| values.iter().any(|value| value == "Jane Doe"))); items[0]
.tags
.as_ref()
.is_some_and(|values| values.iter().any(|value| value == "Jane Doe"))
);
} }
} }

View File

@@ -10,8 +10,8 @@ use async_trait::async_trait;
use error_chain::error_chain; use error_chain::error_chain;
use htmlentity::entity::{ICodedDataTrait, decode}; use htmlentity::entity::{ICodedDataTrait, decode};
use scraper::{ElementRef, Html, Selector}; use scraper::{ElementRef, Html, Selector};
use serde_json::Value;
use std::process::Command; use std::process::Command;
use std::time::Duration;
use url::form_urlencoded::byte_serialize; use url::form_urlencoded::byte_serialize;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata = pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
@@ -143,7 +143,38 @@ impl SpankbangProvider {
} }
fn request_headers(&self) -> Vec<(String, String)> { fn request_headers(&self) -> Vec<(String, String)> {
vec![("Referer".to_string(), format!("{}/", self.url))] vec![
(
"accept".to_string(),
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8"
.to_string(),
),
("accept-language".to_string(), "en-US,en;q=0.6".to_string()),
("cache-control".to_string(), "no-cache".to_string()),
("pragma".to_string(), "no-cache".to_string()),
("priority".to_string(), "u=0, i".to_string()),
(
"sec-ch-ua".to_string(),
r#""Chromium";v="146", "Not-A.Brand";v="24", "Brave";v="146""#.to_string(),
),
("sec-ch-ua-mobile".to_string(), "?0".to_string()),
("sec-ch-ua-platform".to_string(), "\"Linux\"".to_string()),
("sec-fetch-dest".to_string(), "document".to_string()),
("sec-fetch-mode".to_string(), "navigate".to_string()),
("sec-fetch-site".to_string(), "none".to_string()),
("sec-fetch-user".to_string(), "?1".to_string()),
("sec-gpc".to_string(), "1".to_string()),
(
"upgrade-insecure-requests".to_string(),
"1".to_string(),
),
(
"user-agent".to_string(),
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36"
.to_string(),
),
("Referer".to_string(), format!("{}/", self.url)),
]
} }
fn is_cloudflare_block(text: &str) -> bool { fn is_cloudflare_block(text: &str) -> bool {
@@ -153,168 +184,222 @@ impl SpankbangProvider {
|| lowercase.contains("cloudflare ray id") || lowercase.contains("cloudflare ray id")
} }
fn fallback_items_from_ytdlp(&self, page_url: &str, limit: usize) -> Vec<VideoItem> { fn fetch_items_with_curl_cffi(&self, page_url: &str, proxy_base_url: &str) -> Vec<VideoItem> {
let output = match Command::new("yt-dlp") crate::flow_debug!(
.arg("-J") "trace={} spankbang curl_cffi fetch start url={}",
.arg("--flat-playlist") "none",
.arg("--extractor-args") crate::util::flow_debug::preview(page_url, 120)
.arg("generic:impersonate=chrome") );
let output = match Command::new("python3")
.arg("-c")
.arg(
r#"from curl_cffi import requests
from bs4 import BeautifulSoup
import json
import sys
url = sys.argv[1]
r = requests.get(url, impersonate='chrome124', timeout=45, headers={'Referer': 'https://spankbang.com/'})
if r.status_code >= 400:
raise SystemExit(2)
soup = BeautifulSoup(r.text, 'html.parser')
cards = soup.select('[data-testid="video-list"] [data-testid="video-item"]')
if not cards:
cards = soup.select('[data-testid="video-item"]')
items = []
for card in cards:
vid = (card.get('data-id') or '').strip()
link = card.select_one('a[href*="/video/"]')
if not vid or link is None:
continue
href = (link.get('href') or '').strip()
if not href:
continue
img = card.select_one('picture img, img')
title_anchor = card.select_one('p a[title], a[title]')
duration = card.select_one('[data-testid="video-item-length"]')
views = card.select_one('[data-testid="views"]')
uploader = card.select_one('[data-testid="video-info-with-badge"] a[data-testid="title"]')
preview = card.select_one('video source[data-src]')
items.append({
'id': vid,
'href': href,
'title': (title_anchor.get('title') if title_anchor else '') or (img.get('alt') if img else ''),
'thumb': ((img.get('src') if img else '') or (img.get('data-src') if img else '') or '').strip(),
'preview': (preview.get('data-src') if preview else '') or '',
'duration': duration.get_text(' ', strip=True) if duration else '',
'views': views.get_text(' ', strip=True) if views else '',
'uploader': uploader.get_text(' ', strip=True) if uploader else '',
'uploader_href': (uploader.get('href') if uploader else '') or '',
})
sys.stdout.write(json.dumps(items))
"#,
)
.arg(page_url) .arg(page_url)
.output() .output()
{ {
Ok(output) if output.status.success() => output, Ok(output) if output.status.success() => output,
_ => return vec![], Ok(output) => {
let stderr = String::from_utf8_lossy(&output.stderr);
crate::providers::report_provider_error_background(
"spankbang",
"curl_cffi.fetch.status",
&format!(
"url={page_url}; status={}; stderr={}",
output.status,
crate::util::flow_debug::preview(&stderr, 300)
),
);
return vec![];
}
Err(e) => {
crate::providers::report_provider_error_background(
"spankbang",
"curl_cffi.fetch.spawn",
&format!("url={page_url}; error={e}"),
);
return vec![];
}
}; };
let payload: serde_json::Value = match serde_json::from_slice(&output.stdout) { let payload = String::from_utf8(output.stdout).ok();
Ok(payload) => payload, if payload.as_deref().unwrap_or("").trim().is_empty() {
Err(_) => return vec![], crate::providers::report_provider_error_background(
}; "spankbang",
"curl_cffi.fetch.empty",
&format!("url={page_url}"),
);
return vec![];
}
crate::flow_debug!(
"trace={} spankbang curl_cffi fetch ok url={} bytes={}",
"none",
crate::util::flow_debug::preview(page_url, 120),
payload.as_deref().unwrap_or("").len()
);
let entries = match payload.get("entries").and_then(|value| value.as_array()) { let items_json: Value = match serde_json::from_str(payload.as_deref().unwrap_or("")) {
Some(entries) => entries, Ok(value) => value,
None => return vec![], Err(e) => {
crate::providers::report_provider_error_background(
"spankbang",
"curl_cffi.parse.json",
&format!("url={page_url}; error={e}"),
);
return vec![];
}
};
let Some(entries) = items_json.as_array() else {
return vec![];
}; };
let mut items = Vec::new(); let mut items = Vec::new();
for (index, entry) in entries.iter().take(limit).enumerate() { for entry in entries {
let Some(url) = entry.get("url").and_then(|value| value.as_str()) else {
continue;
};
if !(url.starts_with("https://") || url.starts_with("http://")) {
continue;
}
let id = entry let id = entry
.get("id") .get("id")
.and_then(|value| value.as_str()) .and_then(|value| value.as_str())
.filter(|value| !value.is_empty()) .unwrap_or("")
.map(ToOwned::to_owned) .trim()
.unwrap_or_else(|| format!("spankbang-fallback-{}", index + 1)); .to_string();
let href = entry
.get("href")
.and_then(|value| value.as_str())
.unwrap_or("")
.trim()
.to_string();
if id.is_empty() || href.is_empty() {
continue;
}
let detail_url = self.normalize_url(&href);
let title = entry let title = entry
.get("title") .get("title")
.and_then(|value| value.as_str()) .and_then(|value| value.as_str())
.filter(|value| !value.is_empty())
.map(Self::decode_html) .map(Self::decode_html)
.unwrap_or_else(|| format!("SpankBang Video {}", index + 1)); .unwrap_or_default();
if title.is_empty() {
continue;
}
let thumb = entry let thumb = entry
.get("thumbnail") .get("thumb")
.and_then(|value| value.as_str()) .and_then(|value| value.as_str())
.unwrap_or("") .map(|value| self.normalize_url(value))
.to_string(); .unwrap_or_default();
let preview = entry
.get("preview")
.and_then(|value| value.as_str())
.map(|value| self.normalize_url(value))
.unwrap_or_default();
let duration = entry let duration = entry
.get("duration") .get("duration")
.and_then(|value| value.as_u64()) .and_then(|value| value.as_str())
.and_then(|value| u32::try_from(value).ok()) .map(Self::parse_duration)
.unwrap_or(0); .unwrap_or(0);
let views = entry
let format_kind = if url.contains(".m3u8") { .get("views")
"m3u8" .and_then(|value| value.as_str())
} else { .and_then(parse_abbreviated_number);
"video/mp4"
};
let mut format = VideoFormat::new(url.to_string(), "auto".to_string(), format_kind.to_string());
if let Some(headers) = entry.get("http_headers").and_then(|value| value.as_object()) {
for (key, value) in headers {
if let Some(value) = value.as_str() {
format.add_http_header(key.to_string(), value.to_string());
}
}
}
if entry
.get("http_headers")
.and_then(|value| value.as_object())
.is_none()
{
format.add_http_header("Referer".to_string(), format!("{}/", self.url));
}
let mut item = VideoItem::new( let mut item = VideoItem::new(
id, id,
title, title,
url.to_string(), self.proxy_url(proxy_base_url, &href),
"spankbang".to_string(), "spankbang".to_string(),
thumb, thumb,
duration, duration,
) );
.formats(vec![format]); if let Some(views) = views {
if let Some(views) = entry
.get("view_count")
.and_then(|value| value.as_u64())
.and_then(|value| u32::try_from(value).ok())
{
item = item.views(views); item = item.views(views);
} }
if let Some(uploader) = entry if !preview.is_empty() {
let mut format = VideoFormat::new(
preview.clone(),
"preview".to_string(),
"video/mp4".to_string(),
);
format.add_http_header("Referer".to_string(), detail_url.clone());
item = item.preview(preview).formats(vec![format]);
}
let uploader = entry
.get("uploader") .get("uploader")
.and_then(|value| value.as_str()) .and_then(|value| value.as_str())
.filter(|value| !value.is_empty()) .map(Self::decode_html)
{ .unwrap_or_default();
item = item.uploader(uploader.to_string()); if !uploader.is_empty() {
item = item.uploader(uploader);
}
let uploader_href = entry
.get("uploader_href")
.and_then(|value| value.as_str())
.unwrap_or("")
.trim();
if !uploader_href.is_empty() {
let uploader_url = self.normalize_url(uploader_href);
if !uploader_url.is_empty() {
item = item.uploader_url(uploader_url);
}
} }
items.push(item); items.push(item);
} }
items if items.is_empty() {
} crate::providers::report_provider_error_background(
"spankbang",
async fn fallback_items_with_working_media( "curl_cffi.parse.empty",
&self, &format!("url={page_url}"),
page_url: &str, );
options: &ServerOptions,
) -> Vec<VideoItem> {
let fallback_items = self.fallback_items_from_ytdlp(page_url, 72);
if fallback_items.is_empty() {
return vec![]; return vec![];
} }
crate::flow_debug!(
let mut requester = requester_or_default( "trace={} spankbang curl_cffi parsed url={} items={}",
options, "none",
"spankbang", crate::util::flow_debug::preview(page_url, 120),
"spankbang.fallback_items_with_working_media.missing_requester", items.len()
); );
let mut working_items = Vec::new();
for item in fallback_items { items
let format_headers = item
.formats
.as_ref()
.and_then(|formats| formats.first())
.map(|format| format.http_headers_pairs())
.unwrap_or_default();
let media_url = item
.formats
.as_ref()
.and_then(|formats| formats.first())
.map(|format| format.url.clone())
.unwrap_or_else(|| item.url.clone());
if media_url.is_empty() {
continue;
}
let mut headers = format_headers;
if !headers
.iter()
.any(|(key, _)| key.eq_ignore_ascii_case("range"))
{
headers.push(("Range".to_string(), "bytes=0-2047".to_string()));
}
let is_working = match requester
.get_raw_with_headers_timeout(&media_url, headers, Some(Duration::from_secs(20)))
.await
{
Ok(response) => response.status().is_success(),
Err(_) => false,
};
if is_working {
working_items.push(item);
}
}
working_items
} }
fn build_query_url(&self, query: &str, page: u32, sort: &str) -> String { fn build_query_url(&self, query: &str, page: u32, sort: &str) -> String {
@@ -512,8 +597,11 @@ impl SpankbangProvider {
item = item.rating(rating); item = item.rating(rating);
} }
if let Some(preview) = preview { if let Some(preview) = preview {
let mut format = let mut format = VideoFormat::new(
VideoFormat::new(preview.clone(), "preview".to_string(), "video/mp4".to_string()); preview.clone(),
"preview".to_string(),
"video/mp4".to_string(),
);
format.add_http_header("Referer".to_string(), detail_url.clone()); format.add_http_header("Referer".to_string(), detail_url.clone());
item = item.preview(preview).formats(vec![format]); item = item.preview(preview).formats(vec![format]);
} }
@@ -594,7 +682,6 @@ impl SpankbangProvider {
} }
None => vec![], None => vec![],
}; };
let mut requester = let mut requester =
requester_or_default(&options, "spankbang", "spankbang.get.missing_requester"); requester_or_default(&options, "spankbang", "spankbang.get.missing_requester");
let text = match requester let text = match requester
@@ -609,13 +696,12 @@ impl SpankbangProvider {
&format!("url={video_url}; error={e}"), &format!("url={video_url}; error={e}"),
) )
.await; .await;
let fallback_items = self let proxy_base_url = options.public_url_base.as_deref().unwrap_or_default();
.fallback_items_with_working_media(&video_url, &options) let curl_cffi_items = self.fetch_items_with_curl_cffi(&video_url, proxy_base_url);
.await; if !curl_cffi_items.is_empty() {
if !fallback_items.is_empty() {
cache.remove(&video_url); cache.remove(&video_url);
cache.insert(video_url.clone(), fallback_items.clone()); cache.insert(video_url.clone(), curl_cffi_items.clone());
return Ok(fallback_items); return Ok(curl_cffi_items);
} }
return Ok(old_items); return Ok(old_items);
} }
@@ -628,13 +714,12 @@ impl SpankbangProvider {
&format!("url={video_url}"), &format!("url={video_url}"),
) )
.await; .await;
let fallback_items = self let proxy_base_url = options.public_url_base.as_deref().unwrap_or_default();
.fallback_items_with_working_media(&video_url, &options) let curl_cffi_items = self.fetch_items_with_curl_cffi(&video_url, proxy_base_url);
.await; if !curl_cffi_items.is_empty() {
if !fallback_items.is_empty() {
cache.remove(&video_url); cache.remove(&video_url);
cache.insert(video_url.clone(), fallback_items.clone()); cache.insert(video_url.clone(), curl_cffi_items.clone());
return Ok(fallback_items); return Ok(curl_cffi_items);
} }
return Ok(old_items); return Ok(old_items);
} }
@@ -646,17 +731,17 @@ impl SpankbangProvider {
&format!("url={video_url}"), &format!("url={video_url}"),
) )
.await; .await;
let fallback_items = self let proxy_base_url = options.public_url_base.as_deref().unwrap_or_default();
.fallback_items_with_working_media(&video_url, &options) let curl_cffi_items = self.fetch_items_with_curl_cffi(&video_url, proxy_base_url);
.await; if !curl_cffi_items.is_empty() {
if !fallback_items.is_empty() {
cache.remove(&video_url); cache.remove(&video_url);
cache.insert(video_url.clone(), fallback_items.clone()); cache.insert(video_url.clone(), curl_cffi_items.clone());
return Ok(fallback_items); return Ok(curl_cffi_items);
} }
return Ok(old_items); return Ok(old_items);
} }
let looks_like_html = text.to_ascii_lowercase().contains("<html");
let proxy_base_url = options.public_url_base.as_deref().unwrap_or_default(); let proxy_base_url = options.public_url_base.as_deref().unwrap_or_default();
let video_items = self.get_video_items_from_html(text, proxy_base_url); let video_items = self.get_video_items_from_html(text, proxy_base_url);
if !video_items.is_empty() { if !video_items.is_empty() {
@@ -665,6 +750,18 @@ impl SpankbangProvider {
return Ok(video_items); return Ok(video_items);
} }
report_provider_error(
"spankbang",
"get.parse_empty",
&format!("url={video_url}; looks_like_html={looks_like_html}"),
)
.await;
let curl_cffi_items = self.fetch_items_with_curl_cffi(&video_url, proxy_base_url);
if !curl_cffi_items.is_empty() {
cache.remove(&video_url);
cache.insert(video_url.clone(), curl_cffi_items.clone());
return Ok(curl_cffi_items);
}
Ok(old_items) Ok(old_items)
} }
@@ -686,7 +783,6 @@ impl SpankbangProvider {
} }
None => vec![], None => vec![],
}; };
let mut requester = let mut requester =
requester_or_default(&options, "spankbang", "spankbang.query.missing_requester"); requester_or_default(&options, "spankbang", "spankbang.query.missing_requester");
let text = match requester let text = match requester
@@ -701,13 +797,12 @@ impl SpankbangProvider {
&format!("url={video_url}; error={e}"), &format!("url={video_url}; error={e}"),
) )
.await; .await;
let fallback_items = self let proxy_base_url = options.public_url_base.as_deref().unwrap_or_default();
.fallback_items_with_working_media(&video_url, &options) let curl_cffi_items = self.fetch_items_with_curl_cffi(&video_url, proxy_base_url);
.await; if !curl_cffi_items.is_empty() {
if !fallback_items.is_empty() {
cache.remove(&video_url); cache.remove(&video_url);
cache.insert(video_url.clone(), fallback_items.clone()); cache.insert(video_url.clone(), curl_cffi_items.clone());
return Ok(fallback_items); return Ok(curl_cffi_items);
} }
return Ok(old_items); return Ok(old_items);
} }
@@ -720,13 +815,12 @@ impl SpankbangProvider {
&format!("url={video_url}"), &format!("url={video_url}"),
) )
.await; .await;
let fallback_items = self let proxy_base_url = options.public_url_base.as_deref().unwrap_or_default();
.fallback_items_with_working_media(&video_url, &options) let curl_cffi_items = self.fetch_items_with_curl_cffi(&video_url, proxy_base_url);
.await; if !curl_cffi_items.is_empty() {
if !fallback_items.is_empty() {
cache.remove(&video_url); cache.remove(&video_url);
cache.insert(video_url.clone(), fallback_items.clone()); cache.insert(video_url.clone(), curl_cffi_items.clone());
return Ok(fallback_items); return Ok(curl_cffi_items);
} }
return Ok(old_items); return Ok(old_items);
} }
@@ -738,17 +832,17 @@ impl SpankbangProvider {
&format!("url={video_url}"), &format!("url={video_url}"),
) )
.await; .await;
let fallback_items = self let proxy_base_url = options.public_url_base.as_deref().unwrap_or_default();
.fallback_items_with_working_media(&video_url, &options) let curl_cffi_items = self.fetch_items_with_curl_cffi(&video_url, proxy_base_url);
.await; if !curl_cffi_items.is_empty() {
if !fallback_items.is_empty() {
cache.remove(&video_url); cache.remove(&video_url);
cache.insert(video_url.clone(), fallback_items.clone()); cache.insert(video_url.clone(), curl_cffi_items.clone());
return Ok(fallback_items); return Ok(curl_cffi_items);
} }
return Ok(old_items); return Ok(old_items);
} }
let looks_like_html = text.to_ascii_lowercase().contains("<html");
let proxy_base_url = options.public_url_base.as_deref().unwrap_or_default(); let proxy_base_url = options.public_url_base.as_deref().unwrap_or_default();
let video_items = self.get_video_items_from_html(text, proxy_base_url); let video_items = self.get_video_items_from_html(text, proxy_base_url);
if !video_items.is_empty() { if !video_items.is_empty() {
@@ -757,6 +851,18 @@ impl SpankbangProvider {
return Ok(video_items); return Ok(video_items);
} }
report_provider_error(
"spankbang",
"query.parse_empty",
&format!("url={video_url}; looks_like_html={looks_like_html}"),
)
.await;
let curl_cffi_items = self.fetch_items_with_curl_cffi(&video_url, proxy_base_url);
if !curl_cffi_items.is_empty() {
cache.remove(&video_url);
cache.insert(video_url.clone(), curl_cffi_items.clone());
return Ok(curl_cffi_items);
}
Ok(old_items) Ok(old_items)
} }
} }
@@ -857,7 +963,38 @@ mod tests {
); );
assert_eq!( assert_eq!(
provider.request_headers(), provider.request_headers(),
vec![("Referer".to_string(), "https://spankbang.com/".to_string())] vec![
(
"accept".to_string(),
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8"
.to_string(),
),
("accept-language".to_string(), "en-US,en;q=0.6".to_string()),
("cache-control".to_string(), "no-cache".to_string()),
("pragma".to_string(), "no-cache".to_string()),
("priority".to_string(), "u=0, i".to_string()),
(
"sec-ch-ua".to_string(),
r#""Chromium";v="146", "Not-A.Brand";v="24", "Brave";v="146""#.to_string(),
),
("sec-ch-ua-mobile".to_string(), "?0".to_string()),
("sec-ch-ua-platform".to_string(), "\"Linux\"".to_string()),
("sec-fetch-dest".to_string(), "document".to_string()),
("sec-fetch-mode".to_string(), "navigate".to_string()),
("sec-fetch-site".to_string(), "none".to_string()),
("sec-fetch-user".to_string(), "?1".to_string()),
("sec-gpc".to_string(), "1".to_string()),
(
"upgrade-insecure-requests".to_string(),
"1".to_string(),
),
(
"user-agent".to_string(),
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36"
.to_string(),
),
("Referer".to_string(), "https://spankbang.com/".to_string()),
]
); );
} }

View File

@@ -13,7 +13,38 @@ impl SpankbangProxy {
} }
fn request_headers() -> Vec<(String, String)> { fn request_headers() -> Vec<(String, String)> {
vec![("Referer".to_string(), "https://spankbang.com/".to_string())] vec![
(
"accept".to_string(),
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8"
.to_string(),
),
("accept-language".to_string(), "en-US,en;q=0.6".to_string()),
("cache-control".to_string(), "no-cache".to_string()),
("pragma".to_string(), "no-cache".to_string()),
("priority".to_string(), "u=0, i".to_string()),
(
"sec-ch-ua".to_string(),
r#""Chromium";v="146", "Not-A.Brand";v="24", "Brave";v="146""#.to_string(),
),
("sec-ch-ua-mobile".to_string(), "?0".to_string()),
("sec-ch-ua-platform".to_string(), "\"Linux\"".to_string()),
("sec-fetch-dest".to_string(), "document".to_string()),
("sec-fetch-mode".to_string(), "navigate".to_string()),
("sec-fetch-site".to_string(), "none".to_string()),
("sec-fetch-user".to_string(), "?1".to_string()),
("sec-gpc".to_string(), "1".to_string()),
(
"upgrade-insecure-requests".to_string(),
"1".to_string(),
),
(
"user-agent".to_string(),
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36"
.to_string(),
),
("Referer".to_string(), "https://spankbang.com/".to_string()),
]
} }
fn extract_stream_data(text: &str) -> Option<&str> { fn extract_stream_data(text: &str) -> Option<&str> {
@@ -76,7 +107,38 @@ mod tests {
fn prefers_m3u8_when_present() { fn prefers_m3u8_when_present() {
assert_eq!( assert_eq!(
SpankbangProxy::request_headers(), SpankbangProxy::request_headers(),
vec![("Referer".to_string(), "https://spankbang.com/".to_string())] vec![
(
"accept".to_string(),
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8"
.to_string(),
),
("accept-language".to_string(), "en-US,en;q=0.6".to_string()),
("cache-control".to_string(), "no-cache".to_string()),
("pragma".to_string(), "no-cache".to_string()),
("priority".to_string(), "u=0, i".to_string()),
(
"sec-ch-ua".to_string(),
r#""Chromium";v="146", "Not-A.Brand";v="24", "Brave";v="146""#.to_string(),
),
("sec-ch-ua-mobile".to_string(), "?0".to_string()),
("sec-ch-ua-platform".to_string(), "\"Linux\"".to_string()),
("sec-fetch-dest".to_string(), "document".to_string()),
("sec-fetch-mode".to_string(), "navigate".to_string()),
("sec-fetch-site".to_string(), "none".to_string()),
("sec-fetch-user".to_string(), "?1".to_string()),
("sec-gpc".to_string(), "1".to_string()),
(
"upgrade-insecure-requests".to_string(),
"1".to_string(),
),
(
"user-agent".to_string(),
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36"
.to_string(),
),
("Referer".to_string(), "https://spankbang.com/".to_string()),
]
); );
let data = r#" let data = r#"

View File

@@ -46,6 +46,7 @@ impl VideoCache {
} }
} }
#[allow(dead_code)]
pub fn entries(&self) -> Option<Vec<(String, (SystemTime, Vec<VideoItem>))>> { pub fn entries(&self) -> Option<Vec<(String, (SystemTime, Vec<VideoItem>))>> {
if let Ok(cache) = self.cache.lock() { if let Ok(cache) = self.cache.lock() {
// Return a cloned vector of the cache entries // Return a cloned vector of the cache entries
@@ -54,6 +55,7 @@ impl VideoCache {
None None
} }
#[allow(dead_code)]
pub async fn check(&self) -> Result<(), Box<dyn std::error::Error>> { pub async fn check(&self) -> Result<(), Box<dyn std::error::Error>> {
let iter = match self.entries() { let iter = match self.entries() {
Some(iter) => iter, Some(iter) => iter,

View File

@@ -265,6 +265,7 @@ impl VideoItem {
hottub_provider = "pimpbunny", hottub_provider = "pimpbunny",
hottub_provider = "pmvhaven", hottub_provider = "pmvhaven",
hottub_provider = "shooshtime", hottub_provider = "shooshtime",
hottub_provider = "spankbang",
))] ))]
pub fn formats(mut self, formats: Vec<VideoFormat>) -> Self { pub fn formats(mut self, formats: Vec<VideoFormat>) -> Self {
if formats.is_empty() { if formats.is_empty() {