supjav fix

This commit is contained in:
Simon
2026-05-21 21:08:30 +00:00
committed by ForgeCode
parent 7db9465750
commit 55edb2b4e3
4 changed files with 106 additions and 63 deletions

View File

@@ -68,6 +68,9 @@ This is the current implementation inventory as of this snapshot of the repo. Us
| `fullporner` | `mainstream-tube` | no | no | HTML scraper for fullporner.com; thumbnail IDs derived from `/thumb/{id}.jpg` URLs and used to build direct `xiaoshenke.net/vid/{id}/720` media redirect URLs (Referer + User-Agent headers required); supports cat:/category:/pornstar:/star: shortcut queries; no proxy needed. |
| `thepornbunny` | `mainstream-tube` | no | yes | KVS-style HTML scraper for thepornbunny.com; 24 items per site page; thumbnails at `https://www.thepornbunny.com/images/thumb/{id}.webp` from `data-original` attribute (no proxy needed); studio exposed as uploader; pornstar names in tags; `/proxy/thepornbunny/{slug}` fetches the video page, extracts `generate_mp4(enc_data, key, rnd, video_id)` args, decrypts `enc_data` via PBKDF2-HMAC-SHA512+AES-256-CBC to get an OK.ru session key, calls `api.ok.ru/fb.do?method=video.get&session_key=KEY&vids=RND` to get signed CDN URLs, and returns 302 to the best-quality okcdn.ru/vkuser.net MP4 URL (no special client headers needed); supports sort: new/popular/rated, 20 hardcoded categories via `categories` option, and tag:/category:/studio:/pornstar: query shortcuts. |
| `eporner` | `mainstream-tube` | no | no | HTML scraper for eporner.com (5M+ videos); card selector `div.mb[data-id]` with inline duration/rating/views/uploader; thumbnails at `static-eu-cdn.eporner.com` (no proxy needed); pagination uses `/{N}/` suffix (page 1 = no suffix, page 2 = `/2/`); search queries map to `/tag/{slug}/` (eporner redirects all keyword searches to tag pages — 404 tag pages still return related content); supports sort: new/popular/rated/best; 65 hardcoded categories via `cat:`, `tag:`, `pornstar:`, `uploader:` query shortcuts; background-loads pornstar name→URL map from `/pornstar-list/`; yt-dlp resolves `video.url` natively (Eporner extractor); no proxy needed. |
| `xnxx` | `mainstream-tube` | no | no | HTML scraper for xnxx.com (10M+ videos); unified card parser handles two formats: `div.thumb-block[data-eid]` (search) and `div.thumb-block.video[data-video='{"id":...}']` (hits); eid extracted from `/video-{eid}/{slug}` URL path; thumbnails at `thumb-cdn77.xnxx-cdn.com` and `thumbs-gcore.xnxx-cdn.com` (no proxy, no Referer needed); 0-indexed pagination (page 1 = `/hits`, page N = `/hits/{N-1}`); default feed is `/hits` (most-viewed — xnxx has no chronological listing); search via `/search/{slug}` (works for keywords and tags); supports `tag:`, `cat:`, `category:` query shortcuts; yt-dlp resolves `video.url` natively (XNXX extractor, returns 4-7 HLS formats); no proxy needed. |
| `xhamster` | `mainstream-tube` | no | no | HTML scraper for xhamster.com; card selector `div[data-video-type="video"]` with `data-video-id`; thumbnails via `img[data-role="thumb-preview-img"]` at `ic-vt-nss.xhcdn.com` (no proxy, no Referer needed); pagination via `?page=N` query param (browse feeds use infinite-scroll so only search reliably returns different content per page); feeds: `/newest` (default), `/most-viewed`, `/best`; categories via `/categories/{slug}`; channels via `/channels/{slug}`; 43 hardcoded categories as `categories` option; uploader type inferred from URL path (`/channels/` → channel, `/creators/` → creator, `/pornstars/` → pornstar); supports `cat:`/`category:` and `channel:` query shortcuts, plus static category name matching; preview mp4 clips from `data-previewvideo` attribute; yt-dlp resolves `video.url` natively (xHamster extractor, 28 formats); no proxy needed. |
| `xvideos` | `mainstream-tube` | no | no | HTML scraper for xvideos.com; handles two card formats: homepage (`div.thumb-block[data-id][data-eid]`) uses `p.title a[title]` + `data-pvv` on img, best-of-month page uses `div.thumb-block.video[data-video=JSON]` with `div.title a` text + `previewVideo` JSON key; thumbnails at `thumb-cdn77.xvideos-cdn.com` / `thumbs-gcore.xvideos-cdn.com` (no proxy needed); latest: `/` (page 1) / `/new/{N-1}` (page N≥2); best-of-month: `/best/{YYYY-MM}` (previous calendar month), page N: `/best/{YYYY-MM}/{N-1}`; search: `/?k={query}` / `/?k={query}&p={N-1}` (0-indexed); tag shortcuts: `/tags/{slug}/{N-1}`; category shortcuts: `/c/{Name}-{ID}/{N-1}` (38 hardcoded categories); `cat:`, `tag:`, `uploader:` query prefix routing; yt-dlp resolves `video.url` natively (XVideos extractor → HLS formats); CDN preview mp4 in `preview` field; no proxy needed. |
## Proxy Routes

View File

@@ -984,11 +984,14 @@ print(json.dumps({
let base_url =
Url::parse(master_url).map_err(|error| Error::from(format!("invalid master url: {error}")))?;
let mut formats = vec![
VideoFormat::new(master_url.to_string(), "auto".to_string(), "m3u8".to_string())
.format_note("master".to_string())
.format_id("master".to_string()),
];
let mut formats = vec![VideoFormat::new(
master_url.to_string(),
"auto".to_string(),
"m3u8".to_string(),
)
.format_note("master".to_string())
.format_id("master".to_string())
.http_header("Referer".to_string(), BASE_URL.to_string())];
let resolution_regex = Self::regex(r#"RESOLUTION=(\d+)x(\d+)"#)?;
let bandwidth_regex = Self::regex(r#"BANDWIDTH=(\d+)"#)?;
let mut lines = response.text.lines();
@@ -1033,7 +1036,8 @@ print(json.dumps({
height
.map(|value| format!("hls-{value}p"))
.unwrap_or_else(|| "hls-variant".to_string()),
);
)
.http_header("Referer".to_string(), BASE_URL.to_string());
if let Some(bandwidth) = bandwidth {
format = format.format_note(format!("{quality} ({bandwidth}bps)"));
}
@@ -1261,8 +1265,7 @@ print(json.dumps({
item.tags = Some(parsed_tags);
}
if let Some((master_url, formats)) = self.resolve_player(page_url, &players).await? {
item.url = master_url;
if let Some((_master_url, formats)) = self.resolve_player(page_url, &players).await? {
if !formats.is_empty() {
item.formats = Some(formats);
}

View File

@@ -211,7 +211,7 @@ impl XhamsterProvider {
if page <= 1 {
base
} else {
format!("{base}/page/{page}")
format!("{base}?page={page}")
}
}
@@ -505,11 +505,11 @@ mod tests {
);
assert_eq!(
XhamsterProvider::target_url(&Target::Newest, 2),
"https://xhamster.com/newest/page/2"
"https://xhamster.com/newest?page=2"
);
assert_eq!(
XhamsterProvider::target_url(&Target::MostViewed, 3),
"https://xhamster.com/most-viewed/page/3"
"https://xhamster.com/most-viewed?page=3"
);
assert_eq!(
XhamsterProvider::target_url(&Target::Search("big ass".to_string()), 1),
@@ -517,7 +517,7 @@ mod tests {
);
assert_eq!(
XhamsterProvider::target_url(&Target::Category("amateur".to_string()), 2),
"https://xhamster.com/categories/amateur/page/2"
"https://xhamster.com/categories/amateur?page=2"
);
assert_eq!(
XhamsterProvider::target_url(&Target::Channel("vip4k".to_string()), 1),

View File

@@ -9,6 +9,7 @@ use async_trait::async_trait;
use chrono::{Datelike, Local, Months};
use error_chain::error_chain;
use htmlentity::entity::{ICodedDataTrait, decode};
use regex::Regex;
use scraper::{ElementRef, Html, Selector};
use wreq::Version;
@@ -261,7 +262,7 @@ impl XvideosProvider {
fn parse_duration(text: &str) -> u32 {
let text = text.trim();
// Colon-separated formats MM:SS and HH:MM:SS
// Colon-separated MM:SS and HH:MM:SS
let parts: Vec<&str> = text.split(':').collect();
if parts.len() == 2 {
let m: u32 = parts[0].trim().parse().unwrap_or(0);
@@ -275,26 +276,24 @@ impl XvideosProvider {
return h * 3600 + m * 60 + s;
}
// Word-based: "1h20min", "30 min", "45sec", etc.
// Word-based: "1h20min", "30 min", "45sec".
// Trim the "before" slice so trailing spaces don't swallow the digits.
let low = text.to_ascii_lowercase();
let h: u32 = low
.find('h')
.and_then(|i| low[..i].trim().parse().ok())
.unwrap_or(0);
let m: u32 = low.find("min").and_then(|i| {
let start = low[..i]
.rfind(|c: char| !c.is_ascii_digit())
.map(|j| j + 1)
.unwrap_or(0);
low[start..i].trim().parse().ok()
}).unwrap_or(0);
let s: u32 = low.find("sec").and_then(|i| {
let start = low[..i]
.rfind(|c: char| !c.is_ascii_digit())
.map(|j| j + 1)
.unwrap_or(0);
low[start..i].trim().parse().ok()
}).unwrap_or(0);
let extract_num = |needle: &str| -> u32 {
low.find(needle).and_then(|i| {
let before = low[..i].trim_end(); // drop trailing whitespace
let start = before
.rfind(|c: char| !c.is_ascii_digit())
.map(|j| j + 1)
.unwrap_or(0);
before[start..].parse().ok()
}).unwrap_or(0)
};
let h = extract_num("h");
let m = extract_num("min");
let s = extract_num("sec");
h * 3600 + m * 60 + s
}
@@ -315,11 +314,17 @@ impl XvideosProvider {
let card_sel = Self::selector("div.thumb-block")?;
let img_sel = Self::selector("img[data-src]")?;
let link_sel = Self::selector("a[href]")?;
let title_sel = Self::selector("p.title a[title], a.title[title]")?;
let uploader_name_sel = Self::selector("p.metadata a span.name")?;
let uploader_link_sel = Self::selector("p.metadata a[href]")?;
// Homepage format: p.title a[title="…"]; best-page format: div.title a[href]
let title_attr_sel = Self::selector("p.title a[title], a.title[title]")?;
let title_text_sel = Self::selector("div.title a[href]")?;
// Homepage uploader: p.metadata a span.name; best-page: div.video-metadata a.name
let uploader_a_sel =
Self::selector("p.metadata a[href], div.video-metadata a.name[href]")?;
let dur_sel = Self::selector(".thumb-under span.duration")?;
// Homepage views: text near "Views" in p.metadata
let metadata_sel = Self::selector("p.metadata")?;
// Best-page views: span.views-count
let views_count_sel = Self::selector("span.views-count")?;
let mut items = Vec::new();
@@ -354,15 +359,22 @@ impl XvideosProvider {
continue;
}
// Numeric id from data-id attribute; fall back to eid
// Numeric id: try data-id attr, then data-video JSON, then eid
let video_id = card
.value()
.attr("data-id")
.filter(|s| !s.is_empty())
.unwrap_or(&eid)
.to_string();
.map(str::to_string)
.or_else(|| {
card.value().attr("data-video").and_then(|dv| {
// {"id":57049413,...}
let re = Regex::new(r#""id"\s*:\s*(\d+)"#).ok()?;
re.captures(dv)?.get(1).map(|m| m.as_str().to_string())
})
})
.unwrap_or_else(|| eid.clone());
// Thumbnail (lazy-loaded, stored in data-src)
// Thumbnail (lazy-loaded in data-src)
let thumb = card
.select(&img_sel)
.next()
@@ -373,20 +385,36 @@ impl XvideosProvider {
continue 'card;
}
// Preview video clip (data-pvv on the same img element)
// Preview video clip.
// Homepage: data-pvv on the img; best page: "previewVideo" in data-video JSON.
let preview = card
.select(&img_sel)
.next()
.and_then(|el| el.value().attr("data-pvv"))
.map(str::to_string)
.filter(|s| !s.is_empty());
.filter(|s| !s.is_empty())
.or_else(|| {
card.value().attr("data-video").and_then(|dv| {
let re =
Regex::new(r#""previewVideo"\s*:\s*"([^"]+)""#).ok()?;
re.captures(dv)
.and_then(|c| c.get(1))
.map(|m| m.as_str().replace("\\/", "/"))
})
});
// Title from the title attribute on the link inside p.title
// Title: prefer title attr (homepage), fall back to text (best page)
let title = card
.select(&title_sel)
.select(&title_attr_sel)
.next()
.and_then(|el| el.value().attr("title").map(Self::decode_html))
.filter(|t| !t.trim().is_empty());
.filter(|t| !t.trim().is_empty())
.or_else(|| {
card.select(&title_text_sel)
.next()
.map(|el| Self::text_of(&el))
.filter(|t| !t.trim().is_empty())
});
let Some(title) = title else {
continue;
};
@@ -398,30 +426,39 @@ impl XvideosProvider {
.map(|el| Self::parse_duration(&Self::text_of(&el)))
.unwrap_or(0);
// Uploader name and URL
let uploader_name = card
.select(&uploader_name_sel)
.next()
// Uploader: pick the first non-video anchor in the metadata area.
// Skip anchors that link to a video page.
let uploader_el = card.select(&uploader_a_sel).find(|el| {
!el.value()
.attr("href")
.map(|h| h.contains("/video."))
.unwrap_or(false)
});
let uploader_name = uploader_el
.map(|el| Self::text_of(&el))
.filter(|s| !s.is_empty());
let uploader_url = card
.select(&uploader_link_sel)
.next()
let uploader_url = uploader_el
.and_then(|el| el.value().attr("href").map(Self::normalize_url))
.filter(|u| !u.is_empty());
// Views: scan p.metadata text for "NNN Views"
let views = card.select(&metadata_sel).next().and_then(|meta| {
let text = Self::text_of(&meta);
let low = text.to_ascii_lowercase();
low.find("views").and_then(|idx| {
// grab the token immediately before "views"
text[..idx]
.split_whitespace()
.last()
.and_then(|w| Self::parse_views(w))
})
});
// Views: best page has span.views-count; homepage has text near "Views"
let views = card
.select(&views_count_sel)
.next()
.map(|el| Self::text_of(&el))
.and_then(|t| Self::parse_views(&t))
.or_else(|| {
card.select(&metadata_sel).next().and_then(|meta| {
let text = Self::text_of(&meta);
let low = text.to_ascii_lowercase();
low.find("views").and_then(|idx| {
text[..idx]
.split_whitespace()
.last()
.and_then(|w| Self::parse_views(w))
})
})
});
let mut item = VideoItem::new(
video_id,