supjav fix

This commit is contained in:
Simon
2026-05-21 21:08:30 +00:00
committed by ForgeCode
parent 7db9465750
commit 55edb2b4e3
4 changed files with 106 additions and 63 deletions

View File

@@ -984,11 +984,14 @@ print(json.dumps({
let base_url =
Url::parse(master_url).map_err(|error| Error::from(format!("invalid master url: {error}")))?;
let mut formats = vec![
VideoFormat::new(master_url.to_string(), "auto".to_string(), "m3u8".to_string())
.format_note("master".to_string())
.format_id("master".to_string()),
];
let mut formats = vec![VideoFormat::new(
master_url.to_string(),
"auto".to_string(),
"m3u8".to_string(),
)
.format_note("master".to_string())
.format_id("master".to_string())
.http_header("Referer".to_string(), BASE_URL.to_string())];
let resolution_regex = Self::regex(r#"RESOLUTION=(\d+)x(\d+)"#)?;
let bandwidth_regex = Self::regex(r#"BANDWIDTH=(\d+)"#)?;
let mut lines = response.text.lines();
@@ -1033,7 +1036,8 @@ print(json.dumps({
height
.map(|value| format!("hls-{value}p"))
.unwrap_or_else(|| "hls-variant".to_string()),
);
)
.http_header("Referer".to_string(), BASE_URL.to_string());
if let Some(bandwidth) = bandwidth {
format = format.format_note(format!("{quality} ({bandwidth}bps)"));
}
@@ -1261,8 +1265,7 @@ print(json.dumps({
item.tags = Some(parsed_tags);
}
if let Some((master_url, formats)) = self.resolve_player(page_url, &players).await? {
item.url = master_url;
if let Some((_master_url, formats)) = self.resolve_player(page_url, &players).await? {
if !formats.is_empty() {
item.formats = Some(formats);
}

View File

@@ -211,7 +211,7 @@ impl XhamsterProvider {
if page <= 1 {
base
} else {
format!("{base}/page/{page}")
format!("{base}?page={page}")
}
}
@@ -505,11 +505,11 @@ mod tests {
);
assert_eq!(
XhamsterProvider::target_url(&Target::Newest, 2),
"https://xhamster.com/newest/page/2"
"https://xhamster.com/newest?page=2"
);
assert_eq!(
XhamsterProvider::target_url(&Target::MostViewed, 3),
"https://xhamster.com/most-viewed/page/3"
"https://xhamster.com/most-viewed?page=3"
);
assert_eq!(
XhamsterProvider::target_url(&Target::Search("big ass".to_string()), 1),
@@ -517,7 +517,7 @@ mod tests {
);
assert_eq!(
XhamsterProvider::target_url(&Target::Category("amateur".to_string()), 2),
"https://xhamster.com/categories/amateur/page/2"
"https://xhamster.com/categories/amateur?page=2"
);
assert_eq!(
XhamsterProvider::target_url(&Target::Channel("vip4k".to_string()), 1),

View File

@@ -9,6 +9,7 @@ use async_trait::async_trait;
use chrono::{Datelike, Local, Months};
use error_chain::error_chain;
use htmlentity::entity::{ICodedDataTrait, decode};
use regex::Regex;
use scraper::{ElementRef, Html, Selector};
use wreq::Version;
@@ -261,7 +262,7 @@ impl XvideosProvider {
fn parse_duration(text: &str) -> u32 {
let text = text.trim();
// Colon-separated formats MM:SS and HH:MM:SS
// Colon-separated MM:SS and HH:MM:SS
let parts: Vec<&str> = text.split(':').collect();
if parts.len() == 2 {
let m: u32 = parts[0].trim().parse().unwrap_or(0);
@@ -275,26 +276,24 @@ impl XvideosProvider {
return h * 3600 + m * 60 + s;
}
// Word-based: "1h20min", "30 min", "45sec", etc.
// Word-based: "1h20min", "30 min", "45sec".
// Trim the "before" slice so trailing spaces don't swallow the digits.
let low = text.to_ascii_lowercase();
let h: u32 = low
.find('h')
.and_then(|i| low[..i].trim().parse().ok())
.unwrap_or(0);
let m: u32 = low.find("min").and_then(|i| {
let start = low[..i]
.rfind(|c: char| !c.is_ascii_digit())
.map(|j| j + 1)
.unwrap_or(0);
low[start..i].trim().parse().ok()
}).unwrap_or(0);
let s: u32 = low.find("sec").and_then(|i| {
let start = low[..i]
.rfind(|c: char| !c.is_ascii_digit())
.map(|j| j + 1)
.unwrap_or(0);
low[start..i].trim().parse().ok()
}).unwrap_or(0);
let extract_num = |needle: &str| -> u32 {
low.find(needle).and_then(|i| {
let before = low[..i].trim_end(); // drop trailing whitespace
let start = before
.rfind(|c: char| !c.is_ascii_digit())
.map(|j| j + 1)
.unwrap_or(0);
before[start..].parse().ok()
}).unwrap_or(0)
};
let h = extract_num("h");
let m = extract_num("min");
let s = extract_num("sec");
h * 3600 + m * 60 + s
}
@@ -315,11 +314,17 @@ impl XvideosProvider {
let card_sel = Self::selector("div.thumb-block")?;
let img_sel = Self::selector("img[data-src]")?;
let link_sel = Self::selector("a[href]")?;
let title_sel = Self::selector("p.title a[title], a.title[title]")?;
let uploader_name_sel = Self::selector("p.metadata a span.name")?;
let uploader_link_sel = Self::selector("p.metadata a[href]")?;
// Homepage format: p.title a[title="…"]; best-page format: div.title a[href]
let title_attr_sel = Self::selector("p.title a[title], a.title[title]")?;
let title_text_sel = Self::selector("div.title a[href]")?;
// Homepage uploader: p.metadata a span.name; best-page: div.video-metadata a.name
let uploader_a_sel =
Self::selector("p.metadata a[href], div.video-metadata a.name[href]")?;
let dur_sel = Self::selector(".thumb-under span.duration")?;
// Homepage views: text near "Views" in p.metadata
let metadata_sel = Self::selector("p.metadata")?;
// Best-page views: span.views-count
let views_count_sel = Self::selector("span.views-count")?;
let mut items = Vec::new();
@@ -354,15 +359,22 @@ impl XvideosProvider {
continue;
}
// Numeric id from data-id attribute; fall back to eid
// Numeric id: try data-id attr, then data-video JSON, then eid
let video_id = card
.value()
.attr("data-id")
.filter(|s| !s.is_empty())
.unwrap_or(&eid)
.to_string();
.map(str::to_string)
.or_else(|| {
card.value().attr("data-video").and_then(|dv| {
// {"id":57049413,...}
let re = Regex::new(r#""id"\s*:\s*(\d+)"#).ok()?;
re.captures(dv)?.get(1).map(|m| m.as_str().to_string())
})
})
.unwrap_or_else(|| eid.clone());
// Thumbnail (lazy-loaded, stored in data-src)
// Thumbnail (lazy-loaded in data-src)
let thumb = card
.select(&img_sel)
.next()
@@ -373,20 +385,36 @@ impl XvideosProvider {
continue 'card;
}
// Preview video clip (data-pvv on the same img element)
// Preview video clip.
// Homepage: data-pvv on the img; best page: "previewVideo" in data-video JSON.
let preview = card
.select(&img_sel)
.next()
.and_then(|el| el.value().attr("data-pvv"))
.map(str::to_string)
.filter(|s| !s.is_empty());
.filter(|s| !s.is_empty())
.or_else(|| {
card.value().attr("data-video").and_then(|dv| {
let re =
Regex::new(r#""previewVideo"\s*:\s*"([^"]+)""#).ok()?;
re.captures(dv)
.and_then(|c| c.get(1))
.map(|m| m.as_str().replace("\\/", "/"))
})
});
// Title from the title attribute on the link inside p.title
// Title: prefer title attr (homepage), fall back to text (best page)
let title = card
.select(&title_sel)
.select(&title_attr_sel)
.next()
.and_then(|el| el.value().attr("title").map(Self::decode_html))
.filter(|t| !t.trim().is_empty());
.filter(|t| !t.trim().is_empty())
.or_else(|| {
card.select(&title_text_sel)
.next()
.map(|el| Self::text_of(&el))
.filter(|t| !t.trim().is_empty())
});
let Some(title) = title else {
continue;
};
@@ -398,30 +426,39 @@ impl XvideosProvider {
.map(|el| Self::parse_duration(&Self::text_of(&el)))
.unwrap_or(0);
// Uploader name and URL
let uploader_name = card
.select(&uploader_name_sel)
.next()
// Uploader: pick the first non-video anchor in the metadata area.
// Skip anchors that link to a video page.
let uploader_el = card.select(&uploader_a_sel).find(|el| {
!el.value()
.attr("href")
.map(|h| h.contains("/video."))
.unwrap_or(false)
});
let uploader_name = uploader_el
.map(|el| Self::text_of(&el))
.filter(|s| !s.is_empty());
let uploader_url = card
.select(&uploader_link_sel)
.next()
let uploader_url = uploader_el
.and_then(|el| el.value().attr("href").map(Self::normalize_url))
.filter(|u| !u.is_empty());
// Views: scan p.metadata text for "NNN Views"
let views = card.select(&metadata_sel).next().and_then(|meta| {
let text = Self::text_of(&meta);
let low = text.to_ascii_lowercase();
low.find("views").and_then(|idx| {
// grab the token immediately before "views"
text[..idx]
.split_whitespace()
.last()
.and_then(|w| Self::parse_views(w))
})
});
// Views: best page has span.views-count; homepage has text near "Views"
let views = card
.select(&views_count_sel)
.next()
.map(|el| Self::text_of(&el))
.and_then(|t| Self::parse_views(&t))
.or_else(|| {
card.select(&metadata_sel).next().and_then(|meta| {
let text = Self::text_of(&meta);
let low = text.to_ascii_lowercase();
low.find("views").and_then(|idx| {
text[..idx]
.split_whitespace()
.last()
.and_then(|w| Self::parse_views(w))
})
})
});
let mut item = VideoItem::new(
video_id,