xhamster, xnxx, xvidos early build
This commit is contained in:
15
build.rs
15
build.rs
@@ -331,6 +331,21 @@ const PROVIDERS: &[ProviderDef] = &[
|
||||
module: "eporner",
|
||||
ty: "EpornerProvider",
|
||||
},
|
||||
ProviderDef {
|
||||
id: "xnxx",
|
||||
module: "xnxx",
|
||||
ty: "XnxxProvider",
|
||||
},
|
||||
ProviderDef {
|
||||
id: "xhamster",
|
||||
module: "xhamster",
|
||||
ty: "XhamsterProvider",
|
||||
},
|
||||
ProviderDef {
|
||||
id: "xvideos",
|
||||
module: "xvideos",
|
||||
ty: "XvideosProvider",
|
||||
},
|
||||
];
|
||||
|
||||
fn main() {
|
||||
|
||||
629
src/providers/xhamster.rs
Normal file
629
src/providers/xhamster.rs
Normal file
@@ -0,0 +1,629 @@
|
||||
use crate::DbPool;
|
||||
use crate::api::ClientVersion;
|
||||
use crate::providers::{Provider, report_provider_error, requester_or_default};
|
||||
use crate::status::*;
|
||||
use crate::util::cache::VideoCache;
|
||||
use crate::util::parse_abbreviated_number;
|
||||
use crate::util::time::parse_time_to_seconds;
|
||||
use crate::videos::{ServerOptions, VideoItem};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use error_chain::error_chain;
|
||||
use htmlentity::entity::{ICodedDataTrait, decode};
|
||||
use scraper::{ElementRef, Html, Selector};
|
||||
|
||||
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
|
||||
crate::providers::ProviderChannelMetadata {
|
||||
group_id: "mainstream-tube",
|
||||
tags: &["mainstream", "tube", "hd", "general"],
|
||||
};
|
||||
|
||||
const BASE_URL: &str = "https://xhamster.com";
|
||||
const CHANNEL_ID: &str = "xhamster";
|
||||
|
||||
const FIREFOX_UA: &str =
|
||||
"Mozilla/5.0 (X11; Linux x86_64; rv:120.0) Gecko/20100101 Firefox/120.0";
|
||||
const HTML_ACCEPT: &str =
|
||||
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8";
|
||||
|
||||
error_chain! {
|
||||
foreign_links {
|
||||
Io(std::io::Error);
|
||||
}
|
||||
errors {
|
||||
Parse(msg: String) {
|
||||
description("parse error")
|
||||
display("parse error: {}", msg)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Static category list — xhamster has 600+ categories; this covers the mainstream ones
|
||||
const CATEGORIES: &[(&str, &str)] = &[
|
||||
("18-year-old", "18 Year Old"),
|
||||
("amateur", "Amateur"),
|
||||
("anal", "Anal"),
|
||||
("asian", "Asian"),
|
||||
("bbw", "BBW"),
|
||||
("bdsm", "BDSM"),
|
||||
("big-ass", "Big Ass"),
|
||||
("big-tits", "Big Tits"),
|
||||
("bisexual", "Bisexual"),
|
||||
("blonde", "Blonde"),
|
||||
("blowjob", "Blowjob"),
|
||||
("bondage", "Bondage"),
|
||||
("brunette", "Brunette"),
|
||||
("creampie", "Creampie"),
|
||||
("cumshot", "Cumshot"),
|
||||
("ebony", "Ebony"),
|
||||
("fetish", "Fetish"),
|
||||
("gay", "Gay"),
|
||||
("granny", "Granny"),
|
||||
("hardcore", "Hardcore"),
|
||||
("hentai", "Hentai"),
|
||||
("homemade", "Homemade"),
|
||||
("indian", "Indian"),
|
||||
("interracial", "Interracial"),
|
||||
("japanese", "Japanese"),
|
||||
("latina", "Latina"),
|
||||
("lesbian", "Lesbian"),
|
||||
("massage", "Massage"),
|
||||
("masturbation", "Masturbation"),
|
||||
("mature", "Mature"),
|
||||
("milf", "MILF"),
|
||||
("old-young", "Old & Young"),
|
||||
("orgasm", "Orgasm"),
|
||||
("pov", "POV"),
|
||||
("public", "Public"),
|
||||
("russian", "Russian"),
|
||||
("shemale", "Shemale"),
|
||||
("small-tits", "Small Tits"),
|
||||
("squirt", "Squirt"),
|
||||
("teen", "Teen"),
|
||||
("threesome", "Threesome"),
|
||||
("toys", "Toys"),
|
||||
("vintage", "Vintage"),
|
||||
("webcam", "Webcam"),
|
||||
];
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
enum Target {
|
||||
Newest,
|
||||
MostViewed,
|
||||
Best,
|
||||
Search(String),
|
||||
Category(String),
|
||||
Channel(String),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct XhamsterProvider;
|
||||
|
||||
impl XhamsterProvider {
|
||||
pub fn new() -> Self {
|
||||
Self
|
||||
}
|
||||
|
||||
fn build_channel(&self, _cv: ClientVersion) -> Channel {
|
||||
let mut cat_options = vec![FilterOption {
|
||||
id: "all".to_string(),
|
||||
title: "All".to_string(),
|
||||
}];
|
||||
for (slug, label) in CATEGORIES {
|
||||
cat_options.push(FilterOption {
|
||||
id: slug.to_string(),
|
||||
title: label.to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
Channel {
|
||||
id: CHANNEL_ID.to_string(),
|
||||
name: "xHamster".to_string(),
|
||||
description:
|
||||
"xHamster — free porn with newest, most viewed, category, channel, and search routing."
|
||||
.to_string(),
|
||||
premium: false,
|
||||
favicon: "https://www.google.com/s2/favicons?sz=64&domain=xhamster.com".to_string(),
|
||||
status: "active".to_string(),
|
||||
categories: CATEGORIES
|
||||
.iter()
|
||||
.map(|(_, label)| label.to_string())
|
||||
.collect(),
|
||||
options: vec![
|
||||
ChannelOption {
|
||||
id: "sort".to_string(),
|
||||
title: "Sort".to_string(),
|
||||
description: "Browse xHamster by sort order.".to_string(),
|
||||
systemImage: "list.number".to_string(),
|
||||
colorName: "blue".to_string(),
|
||||
options: vec![
|
||||
FilterOption {
|
||||
id: "new".to_string(),
|
||||
title: "Newest".to_string(),
|
||||
},
|
||||
FilterOption {
|
||||
id: "popular".to_string(),
|
||||
title: "Most Viewed".to_string(),
|
||||
},
|
||||
FilterOption {
|
||||
id: "best".to_string(),
|
||||
title: "Best".to_string(),
|
||||
},
|
||||
],
|
||||
multiSelect: false,
|
||||
},
|
||||
ChannelOption {
|
||||
id: "categories".to_string(),
|
||||
title: "Categories".to_string(),
|
||||
description: "Browse an xHamster category archive.".to_string(),
|
||||
systemImage: "square.grid.2x2".to_string(),
|
||||
colorName: "orange".to_string(),
|
||||
options: cat_options,
|
||||
multiSelect: false,
|
||||
},
|
||||
],
|
||||
nsfw: true,
|
||||
cacheDuration: Some(1800),
|
||||
}
|
||||
}
|
||||
|
||||
fn selector(value: &str) -> Result<Selector> {
|
||||
Selector::parse(value)
|
||||
.map_err(|e| Error::from(format!("selector `{value}` parse failed: {e}")))
|
||||
}
|
||||
|
||||
fn decode_html(text: &str) -> String {
|
||||
decode(text.as_bytes())
|
||||
.to_string()
|
||||
.unwrap_or_else(|_| text.to_string())
|
||||
}
|
||||
|
||||
fn text_of(el: &ElementRef<'_>) -> String {
|
||||
el.text()
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ")
|
||||
.split_whitespace()
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ")
|
||||
}
|
||||
|
||||
fn normalize_key(s: &str) -> String {
|
||||
s.trim()
|
||||
.replace(['-', '_'], " ")
|
||||
.split_whitespace()
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ")
|
||||
.to_ascii_lowercase()
|
||||
}
|
||||
|
||||
fn target_url(target: &Target, page: u16) -> String {
|
||||
let base = match target {
|
||||
Target::Newest => format!("{BASE_URL}/newest"),
|
||||
Target::MostViewed => format!("{BASE_URL}/most-viewed"),
|
||||
Target::Best => format!("{BASE_URL}/best"),
|
||||
Target::Search(q) => {
|
||||
let encoded = q.trim().replace(' ', "+");
|
||||
format!("{BASE_URL}/search/{encoded}")
|
||||
}
|
||||
Target::Category(slug) => format!("{BASE_URL}/categories/{slug}"),
|
||||
Target::Channel(slug) => format!("{BASE_URL}/channels/{slug}"),
|
||||
};
|
||||
if page <= 1 {
|
||||
base
|
||||
} else {
|
||||
format!("{base}/page/{page}")
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_views(text: &str) -> Option<u32> {
|
||||
let cleaned = text
|
||||
.replace("views", "")
|
||||
.replace("view", "")
|
||||
.replace([',', ' '], "");
|
||||
parse_abbreviated_number(cleaned.trim())
|
||||
}
|
||||
|
||||
fn html_headers(referer: &str) -> Vec<(String, String)> {
|
||||
vec![
|
||||
("User-Agent".to_string(), FIREFOX_UA.to_string()),
|
||||
("Accept".to_string(), HTML_ACCEPT.to_string()),
|
||||
("Referer".to_string(), referer.to_string()),
|
||||
]
|
||||
}
|
||||
|
||||
fn parse_list_page(html: &str) -> Result<Vec<VideoItem>> {
|
||||
let document = Html::parse_document(html);
|
||||
|
||||
let card_sel = Self::selector("div[data-video-type=\"video\"]")?;
|
||||
let thumb_link_sel = Self::selector("a[data-role=\"thumb-link\"]")?;
|
||||
let img_sel = Self::selector("img[data-role=\"thumb-preview-img\"]")?;
|
||||
let dur_sel = Self::selector("div[data-role=\"video-duration\"]")?;
|
||||
let title_sel = Self::selector("a.video-thumb-info__name")?;
|
||||
let uploader_name_sel = Self::selector("a.video-uploader__name")?;
|
||||
let uploader_link_sel = Self::selector("a[data-role=\"video-uploader-link\"]")?;
|
||||
let views_sel = Self::selector("div.video-thumb-views")?;
|
||||
|
||||
let mut items = Vec::new();
|
||||
|
||||
for card in document.select(&card_sel) {
|
||||
let id = match card.value().attr("data-video-id") {
|
||||
Some(v) if !v.is_empty() => v.to_string(),
|
||||
_ => continue,
|
||||
};
|
||||
|
||||
let thumb_link = match card.select(&thumb_link_sel).next() {
|
||||
Some(el) => el,
|
||||
None => continue,
|
||||
};
|
||||
let href = thumb_link.value().attr("href").unwrap_or_default();
|
||||
if href.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let page_url = if href.starts_with("https://") {
|
||||
href.to_string()
|
||||
} else {
|
||||
format!("{BASE_URL}{href}")
|
||||
};
|
||||
|
||||
let preview = thumb_link
|
||||
.value()
|
||||
.attr("data-previewvideo")
|
||||
.or_else(|| thumb_link.value().attr("data-previewvideo-fallback"))
|
||||
.map(str::to_string);
|
||||
|
||||
// srcset holds a smaller 526x298 thumb; fall back to src for the large one
|
||||
let thumb = card
|
||||
.select(&img_sel)
|
||||
.next()
|
||||
.and_then(|el| {
|
||||
el.value()
|
||||
.attr("srcset")
|
||||
.or_else(|| el.value().attr("src"))
|
||||
})
|
||||
.map(|v| {
|
||||
// srcset may have descriptor suffix like " 1w"; take first whitespace token
|
||||
v.split_whitespace().next().unwrap_or(v).to_string()
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
let duration = card
|
||||
.select(&dur_sel)
|
||||
.next()
|
||||
.map(|el| Self::text_of(&el))
|
||||
.and_then(|text| parse_time_to_seconds(&text))
|
||||
.and_then(|v| u32::try_from(v).ok())
|
||||
.unwrap_or(0);
|
||||
|
||||
let title = card
|
||||
.select(&title_sel)
|
||||
.next()
|
||||
.and_then(|el| el.value().attr("title"))
|
||||
.map(Self::decode_html)
|
||||
.filter(|v| !v.trim().is_empty())
|
||||
.or_else(|| {
|
||||
// fallback: aria-label on thumb link
|
||||
thumb_link
|
||||
.value()
|
||||
.attr("aria-label")
|
||||
.map(Self::decode_html)
|
||||
.filter(|v| !v.is_empty())
|
||||
})
|
||||
.unwrap_or_default();
|
||||
if title.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let uploader_name_el = card.select(&uploader_name_sel).next();
|
||||
let uploader_link_el = card.select(&uploader_link_sel).next();
|
||||
|
||||
let uploader = uploader_name_el
|
||||
.as_ref()
|
||||
.map(|el| Self::decode_html(&Self::text_of(el)))
|
||||
.filter(|v| !v.is_empty());
|
||||
|
||||
let uploader_url = uploader_link_el
|
||||
.and_then(|el| el.value().attr("href"))
|
||||
.map(|v| {
|
||||
if v.starts_with("https://") {
|
||||
v.to_string()
|
||||
} else {
|
||||
format!("{BASE_URL}{v}")
|
||||
}
|
||||
})
|
||||
.filter(|v| !v.is_empty());
|
||||
|
||||
let views = card
|
||||
.select(&views_sel)
|
||||
.next()
|
||||
.and_then(|el| Self::parse_views(&Self::text_of(&el)));
|
||||
|
||||
let mut item = VideoItem::new(
|
||||
id,
|
||||
title,
|
||||
page_url,
|
||||
CHANNEL_ID.to_string(),
|
||||
thumb,
|
||||
duration,
|
||||
);
|
||||
|
||||
item.views = views;
|
||||
item.preview = preview;
|
||||
item.uploader = uploader;
|
||||
item.uploaderUrl = uploader_url.clone();
|
||||
|
||||
if let Some(url) = &uploader_url {
|
||||
let slug = url
|
||||
.trim_end_matches('/')
|
||||
.rsplit('/')
|
||||
.next()
|
||||
.unwrap_or_default();
|
||||
if !slug.is_empty() {
|
||||
let kind = if url.contains("/channels/") {
|
||||
"channel"
|
||||
} else if url.contains("/pornstars/") {
|
||||
"pornstar"
|
||||
} else {
|
||||
"creator"
|
||||
};
|
||||
item.uploaderId = Some(format!("{CHANNEL_ID}:{kind}:{slug}"));
|
||||
}
|
||||
}
|
||||
|
||||
items.push(item);
|
||||
}
|
||||
|
||||
Ok(items)
|
||||
}
|
||||
|
||||
fn resolve_query_target(&self, query: &str) -> Target {
|
||||
let trimmed = query.trim();
|
||||
|
||||
if let Some((kind, value)) = trimmed.split_once(':') {
|
||||
let slug = value.trim().replace(' ', "-").to_ascii_lowercase();
|
||||
if !slug.is_empty() {
|
||||
match kind.trim().to_ascii_lowercase().as_str() {
|
||||
"cat" | "category" => return Target::Category(slug),
|
||||
"channel" | "channels" => return Target::Channel(slug),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check static category list by label or slug
|
||||
let normalized = Self::normalize_key(trimmed);
|
||||
for (slug, label) in CATEGORIES {
|
||||
if Self::normalize_key(label) == normalized || Self::normalize_key(slug) == normalized {
|
||||
return Target::Category(slug.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
Target::Search(trimmed.to_string())
|
||||
}
|
||||
|
||||
fn resolve_sort_target(sort: &str) -> Target {
|
||||
match sort.trim().to_ascii_lowercase().as_str() {
|
||||
"popular" | "viewed" | "most_viewed" | "mostviewed" => Target::MostViewed,
|
||||
"best" => Target::Best,
|
||||
_ => Target::Newest,
|
||||
}
|
||||
}
|
||||
|
||||
fn resolve_option_target(&self, options: &ServerOptions, sort: &str) -> Target {
|
||||
if let Some(cat) = options.categories.as_deref() {
|
||||
if cat != "all" && !cat.is_empty() {
|
||||
return Target::Category(cat.to_string());
|
||||
}
|
||||
}
|
||||
Self::resolve_sort_target(sort)
|
||||
}
|
||||
|
||||
async fn fetch_target(
|
||||
&self,
|
||||
cache: VideoCache,
|
||||
target: Target,
|
||||
page: u16,
|
||||
per_page: usize,
|
||||
options: ServerOptions,
|
||||
) -> Result<Vec<VideoItem>> {
|
||||
let url = Self::target_url(&target, page);
|
||||
let cache_key = format!("{url}#per={per_page}");
|
||||
|
||||
if let Some((ts, cached)) = cache.get(&cache_key) {
|
||||
if ts.elapsed().unwrap_or_default().as_secs() < 300 {
|
||||
return Ok(cached.clone());
|
||||
}
|
||||
}
|
||||
|
||||
let mut requester =
|
||||
requester_or_default(&options, CHANNEL_ID, "xhamster.fetch_target");
|
||||
|
||||
let html = requester
|
||||
.get_with_headers(&url, Self::html_headers(&url), None)
|
||||
.await
|
||||
.map_err(|e| Error::from(format!("request failed for {url}: {e}")))?;
|
||||
|
||||
if html.trim().is_empty() {
|
||||
return Err(Error::from(format!("empty response for {url}")));
|
||||
}
|
||||
|
||||
let all = Self::parse_list_page(&html)?;
|
||||
let items: Vec<VideoItem> = all.into_iter().take(per_page.max(1)).collect();
|
||||
|
||||
if !items.is_empty() {
|
||||
cache.insert(cache_key, items.clone());
|
||||
}
|
||||
Ok(items)
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Provider for XhamsterProvider {
|
||||
async fn get_videos(
|
||||
&self,
|
||||
cache: VideoCache,
|
||||
_pool: DbPool,
|
||||
sort: String,
|
||||
query: Option<String>,
|
||||
page: String,
|
||||
per_page: String,
|
||||
options: ServerOptions,
|
||||
) -> Vec<VideoItem> {
|
||||
let page = page.parse::<u16>().unwrap_or(1).max(1);
|
||||
let per_page = per_page.parse::<usize>().unwrap_or(10).clamp(1, 60);
|
||||
|
||||
let target = match query {
|
||||
Some(q) if !q.trim().is_empty() => self.resolve_query_target(q.trim()),
|
||||
_ => self.resolve_option_target(&options, &sort),
|
||||
};
|
||||
|
||||
match self
|
||||
.fetch_target(cache, target, page, per_page, options)
|
||||
.await
|
||||
{
|
||||
Ok(items) => items,
|
||||
Err(e) => {
|
||||
report_provider_error(CHANNEL_ID, "get_videos", &e.to_string()).await;
|
||||
vec![]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn get_channel(&self, cv: ClientVersion) -> Option<Channel> {
|
||||
Some(self.build_channel(cv))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn parses_target_url_pagination() {
|
||||
assert_eq!(
|
||||
XhamsterProvider::target_url(&Target::Newest, 1),
|
||||
"https://xhamster.com/newest"
|
||||
);
|
||||
assert_eq!(
|
||||
XhamsterProvider::target_url(&Target::Newest, 2),
|
||||
"https://xhamster.com/newest/page/2"
|
||||
);
|
||||
assert_eq!(
|
||||
XhamsterProvider::target_url(&Target::MostViewed, 3),
|
||||
"https://xhamster.com/most-viewed/page/3"
|
||||
);
|
||||
assert_eq!(
|
||||
XhamsterProvider::target_url(&Target::Search("big ass".to_string()), 1),
|
||||
"https://xhamster.com/search/big+ass"
|
||||
);
|
||||
assert_eq!(
|
||||
XhamsterProvider::target_url(&Target::Category("amateur".to_string()), 2),
|
||||
"https://xhamster.com/categories/amateur/page/2"
|
||||
);
|
||||
assert_eq!(
|
||||
XhamsterProvider::target_url(&Target::Channel("vip4k".to_string()), 1),
|
||||
"https://xhamster.com/channels/vip4k"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resolves_category_by_label_and_slug() {
|
||||
let p = XhamsterProvider::new();
|
||||
assert!(matches!(
|
||||
p.resolve_query_target("amateur"),
|
||||
Target::Category(s) if s == "amateur"
|
||||
));
|
||||
assert!(matches!(
|
||||
p.resolve_query_target("Big Ass"),
|
||||
Target::Category(s) if s == "big-ass"
|
||||
));
|
||||
assert!(matches!(
|
||||
p.resolve_query_target("Old & Young"),
|
||||
Target::Category(s) if s == "old-young"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resolves_explicit_shortcuts() {
|
||||
let p = XhamsterProvider::new();
|
||||
assert!(matches!(
|
||||
p.resolve_query_target("cat:milf"),
|
||||
Target::Category(s) if s == "milf"
|
||||
));
|
||||
assert!(matches!(
|
||||
p.resolve_query_target("channel:vip4k"),
|
||||
Target::Channel(s) if s == "vip4k"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn falls_through_to_search() {
|
||||
let p = XhamsterProvider::new();
|
||||
assert!(matches!(
|
||||
p.resolve_query_target("some unknown query"),
|
||||
Target::Search(_)
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parses_listing_card() {
|
||||
let p = XhamsterProvider::new();
|
||||
let html = r#"
|
||||
<html><body>
|
||||
<div data-video-type="video" data-video-id="12345678">
|
||||
<a data-role="thumb-link"
|
||||
href="https://xhamster.com/videos/test-video-xh12345"
|
||||
data-previewvideo="https://thumb-v1.xhcdn.com/a/abc/012/345/678/526x298.t.mp4"
|
||||
aria-label="Test Video Title">
|
||||
<img data-role="thumb-preview-img"
|
||||
src="https://ic-vt-nss.xhcdn.com/a/abc/012/345/678/1280x720.1.jpg"
|
||||
srcset="https://ic-vt-nss.xhcdn.com/a/xyz/012/345/678/526x298.1.jpg"/>
|
||||
<div data-role="video-duration">
|
||||
<div>10:30</div>
|
||||
</div>
|
||||
</a>
|
||||
<div class="container-f146a video-thumb-info">
|
||||
<a class="video-thumb-info__name" title="Test Video Title"
|
||||
href="https://xhamster.com/videos/test-video-xh12345">
|
||||
Test Video Title
|
||||
</a>
|
||||
<div class="video-thumb-uploader">
|
||||
<div class="video-uploader-data">
|
||||
<a data-role="video-uploader-link"
|
||||
href="https://xhamster.com/channels/testchannel">TestChannel</a>
|
||||
<a class="video-uploader__name"
|
||||
href="https://xhamster.com/channels/testchannel">TestChannel</a>
|
||||
<div class="video-thumb-views">1.2M views</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</body></html>
|
||||
"#;
|
||||
|
||||
let items = XhamsterProvider::parse_list_page(html).expect("parse should succeed");
|
||||
assert_eq!(items.len(), 1);
|
||||
let item = &items[0];
|
||||
assert_eq!(item.id, "12345678");
|
||||
assert_eq!(item.title, "Test Video Title");
|
||||
assert_eq!(
|
||||
item.url,
|
||||
"https://xhamster.com/videos/test-video-xh12345"
|
||||
);
|
||||
assert!(item.thumb.contains("526x298") || item.thumb.contains("1280x720"));
|
||||
assert_eq!(item.duration, 630);
|
||||
assert_eq!(item.views, Some(1200000));
|
||||
assert_eq!(item.uploader.as_deref(), Some("TestChannel"));
|
||||
assert_eq!(
|
||||
item.uploaderUrl.as_deref(),
|
||||
Some("https://xhamster.com/channels/testchannel")
|
||||
);
|
||||
assert_eq!(
|
||||
item.uploaderId.as_deref(),
|
||||
Some("xhamster:channel:testchannel")
|
||||
);
|
||||
assert_eq!(
|
||||
item.preview.as_deref(),
|
||||
Some("https://thumb-v1.xhcdn.com/a/abc/012/345/678/526x298.t.mp4")
|
||||
);
|
||||
}
|
||||
}
|
||||
492
src/providers/xnxx.rs
Normal file
492
src/providers/xnxx.rs
Normal file
@@ -0,0 +1,492 @@
|
||||
use crate::DbPool;
|
||||
use crate::api::ClientVersion;
|
||||
use crate::providers::{
|
||||
Provider, report_provider_error, requester_or_default,
|
||||
};
|
||||
use crate::status::*;
|
||||
use crate::util::cache::VideoCache;
|
||||
use crate::util::parse_abbreviated_number;
|
||||
use crate::videos::{ServerOptions, VideoItem};
|
||||
use async_trait::async_trait;
|
||||
use error_chain::error_chain;
|
||||
use htmlentity::entity::{ICodedDataTrait, decode};
|
||||
use regex::Regex;
|
||||
use scraper::{ElementRef, Html, Selector};
|
||||
use wreq::Version;
|
||||
|
||||
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
|
||||
crate::providers::ProviderChannelMetadata {
|
||||
group_id: "mainstream-tube",
|
||||
tags: &["tube", "hd", "mixed", "search"],
|
||||
};
|
||||
|
||||
const BASE_URL: &str = "https://www.xnxx.com";
|
||||
const CHANNEL_ID: &str = "xnxx";
|
||||
const FIREFOX_UA: &str =
|
||||
"Mozilla/5.0 (X11; Linux x86_64; rv:127.0) Gecko/20100101 Firefox/127.0";
|
||||
const HTML_ACCEPT: &str =
|
||||
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8";
|
||||
|
||||
error_chain! {
|
||||
foreign_links {
|
||||
Io(std::io::Error);
|
||||
}
|
||||
errors {
|
||||
Parse(msg: String) {
|
||||
description("parse error")
|
||||
display("parse error: {}", msg)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
enum Target {
|
||||
// Most-viewed global feed — the best "default" xnxx has
|
||||
Hits,
|
||||
// Keyword search (also covers tag shortcuts since /search/{term} works for both)
|
||||
Search(String),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct XnxxProvider;
|
||||
|
||||
impl XnxxProvider {
|
||||
pub fn new() -> Self {
|
||||
Self
|
||||
}
|
||||
|
||||
fn build_channel(&self, _cv: ClientVersion) -> Channel {
|
||||
Channel {
|
||||
id: CHANNEL_ID.to_string(),
|
||||
name: "XNXX".to_string(),
|
||||
description: "XNXX — 10M+ free HD porn videos with keyword search, tag routing, and a most-viewed global feed.".to_string(),
|
||||
premium: false,
|
||||
favicon: "https://www.google.com/s2/favicons?sz=64&domain=xnxx.com".to_string(),
|
||||
status: "active".to_string(),
|
||||
categories: vec![],
|
||||
options: vec![
|
||||
ChannelOption {
|
||||
id: "sort".to_string(),
|
||||
title: "Sort".to_string(),
|
||||
description: "Browse XNXX ranking feeds.".to_string(),
|
||||
systemImage: "list.number".to_string(),
|
||||
colorName: "blue".to_string(),
|
||||
options: vec![
|
||||
FilterOption {
|
||||
id: "popular".to_string(),
|
||||
title: "Most Viewed".to_string(),
|
||||
},
|
||||
FilterOption {
|
||||
id: "new".to_string(),
|
||||
title: "Latest (Most Viewed)".to_string(),
|
||||
},
|
||||
],
|
||||
multiSelect: false,
|
||||
},
|
||||
],
|
||||
nsfw: true,
|
||||
cacheDuration: Some(1800),
|
||||
}
|
||||
}
|
||||
|
||||
fn selector(value: &str) -> Result<Selector> {
|
||||
Selector::parse(value)
|
||||
.map_err(|e| Error::from(format!("selector `{value}` failed: {e}")))
|
||||
}
|
||||
|
||||
fn decode_html(text: &str) -> String {
|
||||
decode(text.as_bytes())
|
||||
.to_string()
|
||||
.unwrap_or_else(|_| text.to_string())
|
||||
}
|
||||
|
||||
fn text_of(el: &ElementRef<'_>) -> String {
|
||||
let raw: String = el.text().collect::<Vec<_>>().join(" ");
|
||||
Self::decode_html(&raw.split_whitespace().collect::<Vec<_>>().join(" "))
|
||||
}
|
||||
|
||||
fn normalize_url(path: &str) -> String {
|
||||
let path = path.trim();
|
||||
if path.starts_with("http://") || path.starts_with("https://") {
|
||||
return path.to_string();
|
||||
}
|
||||
if path.starts_with("//") {
|
||||
return format!("https:{path}");
|
||||
}
|
||||
if path.starts_with('/') {
|
||||
return format!("{BASE_URL}{path}");
|
||||
}
|
||||
format!("{BASE_URL}/{path}")
|
||||
}
|
||||
|
||||
/// Build a 0-indexed paged URL.
|
||||
/// page 1 → `{base}`, page N → `{base}/{N-1}`
|
||||
fn page_url(base: &str, page: u16) -> String {
|
||||
let base = base.trim_end_matches('/');
|
||||
if page <= 1 {
|
||||
base.to_string()
|
||||
} else {
|
||||
format!("{base}/{}", page - 1)
|
||||
}
|
||||
}
|
||||
|
||||
fn target_url(target: &Target, page: u16) -> String {
|
||||
match target {
|
||||
Target::Hits => Self::page_url(&format!("{BASE_URL}/hits"), page),
|
||||
Target::Search(q) => {
|
||||
// Encode the query as slug: lowercase, spaces become hyphens
|
||||
// xnxx search uses URL-encoded spaces but also accepts hyphens
|
||||
let slug = q.trim()
|
||||
.replace(' ', "-")
|
||||
.to_ascii_lowercase();
|
||||
Self::page_url(&format!("{BASE_URL}/search/{slug}"), page)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn html_headers(referer: &str) -> Vec<(String, String)> {
|
||||
vec![
|
||||
("User-Agent".to_string(), FIREFOX_UA.to_string()),
|
||||
("Accept".to_string(), HTML_ACCEPT.to_string()),
|
||||
("Accept-Language".to_string(), "en-US,en;q=0.5".to_string()),
|
||||
("Referer".to_string(), referer.to_string()),
|
||||
]
|
||||
}
|
||||
|
||||
async fn fetch_html(
|
||||
requester: &mut crate::util::requester::Requester,
|
||||
url: &str,
|
||||
) -> Result<String> {
|
||||
requester
|
||||
.get_with_headers(url, Self::html_headers(url), Some(Version::HTTP_11))
|
||||
.await
|
||||
.map_err(|e| Error::from(format!("request failed for {url}: {e}")))
|
||||
}
|
||||
|
||||
fn parse_duration_mins(text: &str) -> u32 {
|
||||
// Matches patterns like "16min", "23min", "1h20min", "1h"
|
||||
let re_hm = Regex::new(r"(\d+)h\s*(\d+)?min").ok();
|
||||
let re_h = Regex::new(r"(\d+)h(?:our)?s?").ok();
|
||||
let re_m = Regex::new(r"(\d+)\s*min").ok();
|
||||
let re_s = Regex::new(r"(\d+)\s*sec").ok();
|
||||
|
||||
let text = text.trim();
|
||||
if let Some(re) = re_hm.as_ref() {
|
||||
if let Some(cap) = re.captures(text) {
|
||||
let h: u32 = cap.get(1).and_then(|m| m.as_str().parse().ok()).unwrap_or(0);
|
||||
let m: u32 = cap.get(2).and_then(|m| m.as_str().parse().ok()).unwrap_or(0);
|
||||
return h * 3600 + m * 60;
|
||||
}
|
||||
}
|
||||
if let Some(re) = re_h.as_ref() {
|
||||
if let Some(cap) = re.captures(text) {
|
||||
let h: u32 = cap.get(1).and_then(|m| m.as_str().parse().ok()).unwrap_or(0);
|
||||
return h * 3600;
|
||||
}
|
||||
}
|
||||
if let Some(re) = re_m.as_ref() {
|
||||
if let Some(cap) = re.captures(text) {
|
||||
let m: u32 = cap.get(1).and_then(|m| m.as_str().parse().ok()).unwrap_or(0);
|
||||
return m * 60;
|
||||
}
|
||||
}
|
||||
if let Some(re) = re_s.as_ref() {
|
||||
if let Some(cap) = re.captures(text) {
|
||||
let s: u32 = cap.get(1).and_then(|m| m.as_str().parse().ok()).unwrap_or(0);
|
||||
return s;
|
||||
}
|
||||
}
|
||||
0
|
||||
}
|
||||
|
||||
fn parse_views(text: &str) -> Option<u32> {
|
||||
// text looks like "471.4M " or "15.5M " — extract the number+suffix before whitespace/icon
|
||||
let cleaned = text
|
||||
.split_whitespace()
|
||||
.next()
|
||||
.unwrap_or("")
|
||||
.trim_end_matches(',');
|
||||
parse_abbreviated_number(cleaned)
|
||||
}
|
||||
|
||||
fn parse_rating_pct(text: &str) -> Option<f32> {
|
||||
let digits: String = text.chars().filter(|c| c.is_ascii_digit()).collect();
|
||||
digits.parse::<f32>().ok().map(|v| v / 100.0)
|
||||
}
|
||||
|
||||
/// Parse video cards from both xnxx listing page formats.
|
||||
///
|
||||
/// Format A (search pages): outer div has `data-eid` attribute directly.
|
||||
/// Format B (hits/browse pages): outer div has `data-video` JSON attribute.
|
||||
fn parse_listing(html: &str, limit: usize) -> Result<Vec<VideoItem>> {
|
||||
let document = Html::parse_document(html);
|
||||
let card_sel = Self::selector("div.thumb-block")?;
|
||||
let link_sel = Self::selector("a[href]")?;
|
||||
let img_sel = Self::selector("img[data-src]")?;
|
||||
let uploader_sel = Self::selector(".uploader a")?;
|
||||
let metadata_sel = Self::selector("div.metadata, p.metadata")?;
|
||||
let views_right_sel = Self::selector("span.right")?;
|
||||
let duration_left_sel = Self::selector("span.left")?;
|
||||
let superfluous_sel = Self::selector("span.superfluous")?;
|
||||
// Title: either `a.title[title]` (hits) or `p a[title]` (search)
|
||||
let title_sel = Self::selector("a.title[title], p a[title], a[title][href]")?;
|
||||
|
||||
let mut items = Vec::new();
|
||||
|
||||
'card: for card in document.select(&card_sel) {
|
||||
// Find a link that goes to a /video- page
|
||||
let video_link = card
|
||||
.select(&link_sel)
|
||||
.find(|el| {
|
||||
el.value()
|
||||
.attr("href")
|
||||
.map(|h| h.contains("/video-"))
|
||||
.unwrap_or(false)
|
||||
});
|
||||
let Some(video_link) = video_link else { continue };
|
||||
|
||||
let href = video_link.value().attr("href").unwrap_or_default();
|
||||
let page_url = Self::normalize_url(href);
|
||||
if page_url.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Extract eid from the URL path: /video-{eid}/{slug}
|
||||
// Also works as the video id for deduplication
|
||||
let eid = href
|
||||
.trim_matches('/')
|
||||
.split('/')
|
||||
.find(|s| s.starts_with("video-"))
|
||||
.and_then(|s| s.strip_prefix("video-"))
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
if eid.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Numeric id: prefer data-id, then data-video JSON, then eid
|
||||
let numeric_id = card.value().attr("data-id")
|
||||
.map(str::to_string)
|
||||
.filter(|s| !s.is_empty())
|
||||
.or_else(|| {
|
||||
// Try to extract from data-video JSON: {"id":12345,...}
|
||||
card.value().attr("data-video")
|
||||
.and_then(|dv| {
|
||||
let re = Regex::new(r#""id"\s*:\s*(\d+)"#).ok()?;
|
||||
re.captures(dv)?.get(1).map(|m| m.as_str().to_string())
|
||||
})
|
||||
})
|
||||
.unwrap_or_else(|| eid.clone());
|
||||
|
||||
// Thumbnail
|
||||
let thumb = card
|
||||
.select(&img_sel)
|
||||
.next()
|
||||
.and_then(|el| el.value().attr("data-src").map(str::to_string))
|
||||
.unwrap_or_default();
|
||||
if thumb.is_empty() {
|
||||
continue 'card;
|
||||
}
|
||||
|
||||
// Title: find an <a title="..."> element pointing to the video
|
||||
let title = card
|
||||
.select(&title_sel)
|
||||
.find(|el| {
|
||||
el.value()
|
||||
.attr("href")
|
||||
.map(|h| h.contains("/video-"))
|
||||
.unwrap_or(false)
|
||||
})
|
||||
.and_then(|el| el.value().attr("title").map(Self::decode_html))
|
||||
.filter(|t| !t.trim().is_empty());
|
||||
let Some(title) = title else { continue };
|
||||
|
||||
// Uploader
|
||||
let uploader_el = card.select(&uploader_sel).next();
|
||||
let uploader_name = uploader_el.as_ref().map(|el| Self::text_of(el))
|
||||
.filter(|s| !s.is_empty());
|
||||
let uploader_href = uploader_el
|
||||
.and_then(|el| el.value().attr("href").map(Self::normalize_url));
|
||||
|
||||
// Metadata: views, rating, duration
|
||||
// Both formats share: views in span.right, rating in span.superfluous
|
||||
// Duration: in span.left (hits) or as text between span.right and end (search)
|
||||
let metadata_el = card.select(&metadata_sel).next();
|
||||
let (duration, views, rating) = if let Some(meta) = metadata_el {
|
||||
// Rating from .superfluous
|
||||
let rating = meta
|
||||
.select(&superfluous_sel)
|
||||
.map(|el| Self::text_of(&el))
|
||||
.find(|t| t.contains('%'))
|
||||
.and_then(|t| Self::parse_rating_pct(&t));
|
||||
|
||||
// Views from span.right (text before the eye icon)
|
||||
let views = meta
|
||||
.select(&views_right_sel)
|
||||
.next()
|
||||
.map(|el| Self::text_of(&el))
|
||||
.and_then(|t| Self::parse_views(&t));
|
||||
|
||||
// Duration: try span.left first (hits format), then raw metadata text (search format)
|
||||
let duration = meta
|
||||
.select(&duration_left_sel)
|
||||
.next()
|
||||
.map(|el| Self::text_of(&el))
|
||||
.map(|t| Self::parse_duration_mins(&t))
|
||||
.filter(|&d| d > 0)
|
||||
.unwrap_or_else(|| {
|
||||
// Search format: duration text is a direct text node in p.metadata
|
||||
let full_text = Self::text_of(&meta);
|
||||
Self::parse_duration_mins(&full_text)
|
||||
});
|
||||
|
||||
(duration, views, rating)
|
||||
} else {
|
||||
(0, None, None)
|
||||
};
|
||||
|
||||
let mut item = VideoItem::new(
|
||||
numeric_id,
|
||||
title.trim().to_string(),
|
||||
page_url,
|
||||
CHANNEL_ID.to_string(),
|
||||
thumb,
|
||||
duration,
|
||||
);
|
||||
if let Some(v) = views {
|
||||
item.views = Some(v);
|
||||
}
|
||||
if let Some(r) = rating {
|
||||
item.rating = Some(r);
|
||||
}
|
||||
if let Some(name) = uploader_name {
|
||||
item.uploader = Some(name);
|
||||
}
|
||||
if let Some(url) = uploader_href.filter(|u| !u.is_empty()) {
|
||||
let uploader_id = url
|
||||
.trim_end_matches('/')
|
||||
.rsplit('/')
|
||||
.next()
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
if !uploader_id.is_empty() {
|
||||
item.uploaderId = Some(format!("{CHANNEL_ID}:{uploader_id}"));
|
||||
}
|
||||
item.uploaderUrl = Some(url);
|
||||
}
|
||||
|
||||
items.push(item);
|
||||
if items.len() >= limit {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(items)
|
||||
}
|
||||
|
||||
fn resolve_query_target(query: &str) -> Target {
|
||||
let trimmed = query.trim().trim_start_matches('@');
|
||||
|
||||
// Explicit prefix shortcuts: tag:X, cat:X
|
||||
if let Some((kind, value)) = trimmed.split_once(':') {
|
||||
let value = value.trim();
|
||||
if !value.is_empty() {
|
||||
match kind.trim().to_ascii_lowercase().as_str() {
|
||||
"tag" | "cat" | "category" => return Target::Search(value.to_string()),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Target::Search(trimmed.to_string())
|
||||
}
|
||||
|
||||
async fn fetch_target(
|
||||
&self,
|
||||
cache: VideoCache,
|
||||
target: Target,
|
||||
page: u16,
|
||||
per_page: usize,
|
||||
options: ServerOptions,
|
||||
) -> Result<Vec<VideoItem>> {
|
||||
let url = Self::target_url(&target, page);
|
||||
let cache_key = format!("{url}#per={per_page}");
|
||||
|
||||
if let Some((ts, cached)) = cache.get(&cache_key) {
|
||||
if ts.elapsed().unwrap_or_default().as_secs() < 300 {
|
||||
return Ok(cached.clone());
|
||||
}
|
||||
}
|
||||
|
||||
let mut requester =
|
||||
requester_or_default(&options, CHANNEL_ID, "xnxx.fetch_target.missing_requester");
|
||||
let html = match Self::fetch_html(&mut requester, &url).await {
|
||||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
report_provider_error(
|
||||
CHANNEL_ID,
|
||||
"fetch_target.request",
|
||||
&format!("url={url}; error={e}"),
|
||||
)
|
||||
.await;
|
||||
return Ok(vec![]);
|
||||
}
|
||||
};
|
||||
|
||||
if html.trim().is_empty() {
|
||||
report_provider_error(CHANNEL_ID, "fetch_target.empty", &format!("url={url}")).await;
|
||||
return Ok(vec![]);
|
||||
}
|
||||
|
||||
let items = Self::parse_listing(&html, per_page)?;
|
||||
if !items.is_empty() {
|
||||
cache.insert(cache_key, items.clone());
|
||||
}
|
||||
Ok(items)
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Provider for XnxxProvider {
|
||||
async fn get_videos(
|
||||
&self,
|
||||
cache: VideoCache,
|
||||
_pool: DbPool,
|
||||
sort: String,
|
||||
query: Option<String>,
|
||||
page: String,
|
||||
per_page: String,
|
||||
options: ServerOptions,
|
||||
) -> Vec<VideoItem> {
|
||||
let page = page.parse::<u16>().unwrap_or(1).max(1);
|
||||
let per_page = per_page.parse::<usize>().unwrap_or(10).clamp(1, 60);
|
||||
|
||||
let target = match query {
|
||||
Some(q) if !q.trim().is_empty() => Self::resolve_query_target(q.trim()),
|
||||
_ => Target::Hits,
|
||||
};
|
||||
|
||||
// sort=new falls back to Hits since xnxx has no chronological listing
|
||||
let target = match (&target, sort.trim().to_ascii_lowercase().as_str()) {
|
||||
(Target::Hits, _) => Target::Hits,
|
||||
(Target::Search(_), _) => target,
|
||||
};
|
||||
|
||||
match self.fetch_target(cache, target, page, per_page, options).await {
|
||||
Ok(items) => items,
|
||||
Err(e) => {
|
||||
crate::providers::report_provider_error(
|
||||
CHANNEL_ID,
|
||||
"get_videos",
|
||||
&format!("sort={sort}; page={page}; error={e}"),
|
||||
)
|
||||
.await;
|
||||
vec![]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn get_channel(&self, cv: ClientVersion) -> Option<Channel> {
|
||||
Some(self.build_channel(cv))
|
||||
}
|
||||
}
|
||||
615
src/providers/xvideos.rs
Normal file
615
src/providers/xvideos.rs
Normal file
@@ -0,0 +1,615 @@
|
||||
use crate::DbPool;
|
||||
use crate::api::ClientVersion;
|
||||
use crate::providers::{Provider, report_provider_error, requester_or_default};
|
||||
use crate::status::*;
|
||||
use crate::util::cache::VideoCache;
|
||||
use crate::util::parse_abbreviated_number;
|
||||
use crate::videos::{ServerOptions, VideoItem};
|
||||
use async_trait::async_trait;
|
||||
use chrono::{Datelike, Local, Months};
|
||||
use error_chain::error_chain;
|
||||
use htmlentity::entity::{ICodedDataTrait, decode};
|
||||
use scraper::{ElementRef, Html, Selector};
|
||||
use wreq::Version;
|
||||
|
||||
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
|
||||
crate::providers::ProviderChannelMetadata {
|
||||
group_id: "mainstream-tube",
|
||||
tags: &["tube", "hd", "mixed", "search"],
|
||||
};
|
||||
|
||||
const BASE_URL: &str = "https://www.xvideos.com";
|
||||
const CHANNEL_ID: &str = "xvideos";
|
||||
const FIREFOX_UA: &str =
|
||||
"Mozilla/5.0 (X11; Linux x86_64; rv:147.0) Gecko/20100101 Firefox/147.0";
|
||||
const HTML_ACCEPT: &str =
|
||||
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8";
|
||||
|
||||
error_chain! {
|
||||
foreign_links {
|
||||
Io(std::io::Error);
|
||||
}
|
||||
errors {
|
||||
Parse(msg: String) {
|
||||
description("parse error")
|
||||
display("parse error: {}", msg)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Slug format is `{SiteName}-{ID}` as returned by /c listing.
|
||||
const CATEGORIES: &[(&str, &str)] = &[
|
||||
("AI-239", "AI"),
|
||||
("Amateur-65", "Amateur"),
|
||||
("Anal-12", "Anal"),
|
||||
("Arab-159", "Arab"),
|
||||
("Asian_Woman-32", "Asian"),
|
||||
("ASMR-229", "ASMR"),
|
||||
("Ass-14", "Ass"),
|
||||
("bbw-51", "BBW"),
|
||||
("Bi_Sexual-62", "Bi"),
|
||||
("Big_Ass-24", "Big Ass"),
|
||||
("Big_Cock-34", "Big Cock"),
|
||||
("Big_Tits-23", "Big Tits"),
|
||||
("Black_Woman-30", "Black"),
|
||||
("Blonde-20", "Blonde"),
|
||||
("Blowjob-15", "Blowjob"),
|
||||
("Brunette-25", "Brunette"),
|
||||
("Cam_Porn-58", "Cam Porn"),
|
||||
("Creampie-40", "Creampie"),
|
||||
("Cuckold-237", "Cuckold"),
|
||||
("Cumshot-18", "Cumshot"),
|
||||
("Femdom-235", "Femdom"),
|
||||
("Fisting-165", "Fisting"),
|
||||
("Fucked_Up_Family-81", "Step Family"),
|
||||
("Gangbang-69", "Gangbang"),
|
||||
("Gapes-167", "Gapes"),
|
||||
("Indian-89", "Indian"),
|
||||
("Interracial-27", "Interracial"),
|
||||
("Latina-16", "Latina"),
|
||||
("Lesbian-26", "Lesbian"),
|
||||
("Lingerie-83", "Lingerie"),
|
||||
("Mature-38", "Mature"),
|
||||
("Milf-19", "MILF"),
|
||||
("Oiled-22", "Oiled"),
|
||||
("Redhead-31", "Redhead"),
|
||||
("Solo_and_Masturbation-33", "Solo"),
|
||||
("Squirting-56", "Squirting"),
|
||||
("Stockings-28", "Stockings"),
|
||||
("Teen-13", "Teen"),
|
||||
];
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
enum Target {
|
||||
Latest,
|
||||
Best,
|
||||
Search(String),
|
||||
Archive(String),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct XvideosProvider;
|
||||
|
||||
impl XvideosProvider {
|
||||
pub fn new() -> Self {
|
||||
Self
|
||||
}
|
||||
|
||||
fn build_channel(&self, _cv: ClientVersion) -> Channel {
|
||||
let mut cat_options: Vec<FilterOption> = vec![FilterOption {
|
||||
id: "all".to_string(),
|
||||
title: "All".to_string(),
|
||||
}];
|
||||
for (slug, label) in CATEGORIES {
|
||||
cat_options.push(FilterOption {
|
||||
id: slug.to_string(),
|
||||
title: label.to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
Channel {
|
||||
id: CHANNEL_ID.to_string(),
|
||||
name: "XVideos".to_string(),
|
||||
description:
|
||||
"XVideos — one of the world's largest free porn sites with latest, best-of-month, category, tag, and keyword search."
|
||||
.to_string(),
|
||||
premium: false,
|
||||
favicon: "https://www.google.com/s2/favicons?sz=64&domain=xvideos.com".to_string(),
|
||||
status: "active".to_string(),
|
||||
categories: CATEGORIES.iter().map(|(_, label)| label.to_string()).collect(),
|
||||
options: vec![
|
||||
ChannelOption {
|
||||
id: "sort".to_string(),
|
||||
title: "Sort".to_string(),
|
||||
description: "Browse XVideos ranking feeds.".to_string(),
|
||||
systemImage: "list.number".to_string(),
|
||||
colorName: "blue".to_string(),
|
||||
options: vec![
|
||||
FilterOption {
|
||||
id: "new".to_string(),
|
||||
title: "Latest".to_string(),
|
||||
},
|
||||
FilterOption {
|
||||
id: "best".to_string(),
|
||||
title: "Best of Month".to_string(),
|
||||
},
|
||||
],
|
||||
multiSelect: false,
|
||||
},
|
||||
ChannelOption {
|
||||
id: "categories".to_string(),
|
||||
title: "Categories".to_string(),
|
||||
description: "Browse an XVideos category archive.".to_string(),
|
||||
systemImage: "square.grid.2x2".to_string(),
|
||||
colorName: "orange".to_string(),
|
||||
options: cat_options,
|
||||
multiSelect: false,
|
||||
},
|
||||
],
|
||||
nsfw: true,
|
||||
cacheDuration: Some(1800),
|
||||
}
|
||||
}
|
||||
|
||||
fn selector(value: &str) -> Result<Selector> {
|
||||
Selector::parse(value)
|
||||
.map_err(|e| Error::from(format!("selector `{value}` parse failed: {e}")))
|
||||
}
|
||||
|
||||
fn decode_html(text: &str) -> String {
|
||||
decode(text.as_bytes())
|
||||
.to_string()
|
||||
.unwrap_or_else(|_| text.to_string())
|
||||
}
|
||||
|
||||
fn text_of(el: &ElementRef<'_>) -> String {
|
||||
let raw: String = el.text().collect::<Vec<_>>().join(" ");
|
||||
Self::decode_html(&raw.split_whitespace().collect::<Vec<_>>().join(" "))
|
||||
}
|
||||
|
||||
fn normalize_key(s: &str) -> String {
|
||||
s.trim()
|
||||
.trim_start_matches('#')
|
||||
.replace(['_', '-'], " ")
|
||||
.split_whitespace()
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ")
|
||||
.to_ascii_lowercase()
|
||||
}
|
||||
|
||||
fn normalize_url(path: &str) -> String {
|
||||
let path = path.trim();
|
||||
if path.starts_with("http://") || path.starts_with("https://") {
|
||||
return path.to_string();
|
||||
}
|
||||
if path.starts_with("//") {
|
||||
return format!("https:{path}");
|
||||
}
|
||||
if path.starts_with('/') {
|
||||
return format!("{BASE_URL}{path}");
|
||||
}
|
||||
format!("{BASE_URL}/{path}")
|
||||
}
|
||||
|
||||
fn html_headers(referer: &str) -> Vec<(String, String)> {
|
||||
vec![
|
||||
("User-Agent".to_string(), FIREFOX_UA.to_string()),
|
||||
("Accept".to_string(), HTML_ACCEPT.to_string()),
|
||||
("Accept-Language".to_string(), "en-US,en;q=0.5".to_string()),
|
||||
("Referer".to_string(), referer.to_string()),
|
||||
]
|
||||
}
|
||||
|
||||
// /best always redirects to the previous calendar month's archive.
|
||||
fn best_base_url() -> String {
|
||||
let now = Local::now();
|
||||
let prev = now
|
||||
.checked_sub_months(Months::new(1))
|
||||
.unwrap_or(now);
|
||||
format!("{BASE_URL}/best/{}-{:02}", prev.year(), prev.month())
|
||||
}
|
||||
|
||||
fn target_url(target: &Target, page: u16) -> String {
|
||||
match target {
|
||||
Target::Latest => {
|
||||
if page <= 1 {
|
||||
format!("{BASE_URL}/")
|
||||
} else {
|
||||
// page 2 = /new/1, page 3 = /new/2, ...
|
||||
format!("{BASE_URL}/new/{}", page - 1)
|
||||
}
|
||||
}
|
||||
Target::Best => {
|
||||
let base = Self::best_base_url();
|
||||
if page <= 1 {
|
||||
base
|
||||
} else {
|
||||
format!("{base}/{}", page - 1)
|
||||
}
|
||||
}
|
||||
Target::Search(q) => {
|
||||
let encoded: String =
|
||||
url::form_urlencoded::byte_serialize(q.trim().as_bytes()).collect();
|
||||
if page <= 1 {
|
||||
format!("{BASE_URL}/?k={encoded}")
|
||||
} else {
|
||||
format!("{BASE_URL}/?k={encoded}&p={}", page - 1)
|
||||
}
|
||||
}
|
||||
Target::Archive(base_url) => {
|
||||
let base = base_url.trim_end_matches('/');
|
||||
if page <= 1 {
|
||||
base.to_string()
|
||||
} else {
|
||||
format!("{base}/{}", page - 1)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn fetch_html(
|
||||
requester: &mut crate::util::requester::Requester,
|
||||
url: &str,
|
||||
) -> Result<String> {
|
||||
requester
|
||||
.get_with_headers(url, Self::html_headers(url), Some(Version::HTTP_11))
|
||||
.await
|
||||
.map_err(|e| Error::from(format!("request failed for {url}: {e}")))
|
||||
}
|
||||
|
||||
// Parses "21 min", "1h20min", "2h", "45sec", "MM:SS", "HH:MM:SS"
|
||||
fn parse_duration(text: &str) -> u32 {
|
||||
let text = text.trim();
|
||||
|
||||
// Colon-separated formats MM:SS and HH:MM:SS
|
||||
let parts: Vec<&str> = text.split(':').collect();
|
||||
if parts.len() == 2 {
|
||||
let m: u32 = parts[0].trim().parse().unwrap_or(0);
|
||||
let s: u32 = parts[1].trim().parse().unwrap_or(0);
|
||||
return m * 60 + s;
|
||||
}
|
||||
if parts.len() == 3 {
|
||||
let h: u32 = parts[0].trim().parse().unwrap_or(0);
|
||||
let m: u32 = parts[1].trim().parse().unwrap_or(0);
|
||||
let s: u32 = parts[2].trim().parse().unwrap_or(0);
|
||||
return h * 3600 + m * 60 + s;
|
||||
}
|
||||
|
||||
// Word-based: "1h20min", "30 min", "45sec", etc.
|
||||
let low = text.to_ascii_lowercase();
|
||||
let h: u32 = low
|
||||
.find('h')
|
||||
.and_then(|i| low[..i].trim().parse().ok())
|
||||
.unwrap_or(0);
|
||||
let m: u32 = low.find("min").and_then(|i| {
|
||||
let start = low[..i]
|
||||
.rfind(|c: char| !c.is_ascii_digit())
|
||||
.map(|j| j + 1)
|
||||
.unwrap_or(0);
|
||||
low[start..i].trim().parse().ok()
|
||||
}).unwrap_or(0);
|
||||
let s: u32 = low.find("sec").and_then(|i| {
|
||||
let start = low[..i]
|
||||
.rfind(|c: char| !c.is_ascii_digit())
|
||||
.map(|j| j + 1)
|
||||
.unwrap_or(0);
|
||||
low[start..i].trim().parse().ok()
|
||||
}).unwrap_or(0);
|
||||
|
||||
h * 3600 + m * 60 + s
|
||||
}
|
||||
|
||||
fn parse_views(text: &str) -> Option<u32> {
|
||||
// "877.3k Views", "1.2M Views" — strip suffix then parse
|
||||
let cleaned = text
|
||||
.replace("Views", "")
|
||||
.replace("views", "")
|
||||
.replace("View", "")
|
||||
.replace(',', "");
|
||||
parse_abbreviated_number(cleaned.trim())
|
||||
}
|
||||
|
||||
fn parse_listing(html: &str, limit: usize) -> Result<Vec<VideoItem>> {
|
||||
let document = Html::parse_document(html);
|
||||
|
||||
let card_sel = Self::selector("div.thumb-block")?;
|
||||
let img_sel = Self::selector("img[data-src]")?;
|
||||
let link_sel = Self::selector("a[href]")?;
|
||||
let title_sel = Self::selector("p.title a[title], a.title[title]")?;
|
||||
let uploader_name_sel = Self::selector("p.metadata a span.name")?;
|
||||
let uploader_link_sel = Self::selector("p.metadata a[href]")?;
|
||||
let dur_sel = Self::selector(".thumb-under span.duration")?;
|
||||
let metadata_sel = Self::selector("p.metadata")?;
|
||||
|
||||
let mut items = Vec::new();
|
||||
|
||||
'card: for card in document.select(&card_sel) {
|
||||
// Find the anchor whose href contains /video.
|
||||
let video_link = card
|
||||
.select(&link_sel)
|
||||
.find(|el| {
|
||||
el.value()
|
||||
.attr("href")
|
||||
.map(|h| h.contains("/video."))
|
||||
.unwrap_or(false)
|
||||
});
|
||||
let Some(video_link) = video_link else {
|
||||
continue;
|
||||
};
|
||||
|
||||
let href = video_link.value().attr("href").unwrap_or_default();
|
||||
let page_url = Self::normalize_url(href);
|
||||
if page_url.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
// eid: path segment starting with "video." e.g. "video.ohedfck8b21"
|
||||
let eid = href
|
||||
.split('/')
|
||||
.find(|s| s.starts_with("video."))
|
||||
.and_then(|s| s.strip_prefix("video."))
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
if eid.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Numeric id from data-id attribute; fall back to eid
|
||||
let video_id = card
|
||||
.value()
|
||||
.attr("data-id")
|
||||
.filter(|s| !s.is_empty())
|
||||
.unwrap_or(&eid)
|
||||
.to_string();
|
||||
|
||||
// Thumbnail (lazy-loaded, stored in data-src)
|
||||
let thumb = card
|
||||
.select(&img_sel)
|
||||
.next()
|
||||
.and_then(|el| el.value().attr("data-src"))
|
||||
.map(str::to_string)
|
||||
.unwrap_or_default();
|
||||
if thumb.is_empty() {
|
||||
continue 'card;
|
||||
}
|
||||
|
||||
// Preview video clip (data-pvv on the same img element)
|
||||
let preview = card
|
||||
.select(&img_sel)
|
||||
.next()
|
||||
.and_then(|el| el.value().attr("data-pvv"))
|
||||
.map(str::to_string)
|
||||
.filter(|s| !s.is_empty());
|
||||
|
||||
// Title from the title attribute on the link inside p.title
|
||||
let title = card
|
||||
.select(&title_sel)
|
||||
.next()
|
||||
.and_then(|el| el.value().attr("title").map(Self::decode_html))
|
||||
.filter(|t| !t.trim().is_empty());
|
||||
let Some(title) = title else {
|
||||
continue;
|
||||
};
|
||||
|
||||
// Duration from span.duration inside .thumb-under
|
||||
let duration = card
|
||||
.select(&dur_sel)
|
||||
.next()
|
||||
.map(|el| Self::parse_duration(&Self::text_of(&el)))
|
||||
.unwrap_or(0);
|
||||
|
||||
// Uploader name and URL
|
||||
let uploader_name = card
|
||||
.select(&uploader_name_sel)
|
||||
.next()
|
||||
.map(|el| Self::text_of(&el))
|
||||
.filter(|s| !s.is_empty());
|
||||
let uploader_url = card
|
||||
.select(&uploader_link_sel)
|
||||
.next()
|
||||
.and_then(|el| el.value().attr("href").map(Self::normalize_url))
|
||||
.filter(|u| !u.is_empty());
|
||||
|
||||
// Views: scan p.metadata text for "NNN Views"
|
||||
let views = card.select(&metadata_sel).next().and_then(|meta| {
|
||||
let text = Self::text_of(&meta);
|
||||
let low = text.to_ascii_lowercase();
|
||||
low.find("views").and_then(|idx| {
|
||||
// grab the token immediately before "views"
|
||||
text[..idx]
|
||||
.split_whitespace()
|
||||
.last()
|
||||
.and_then(|w| Self::parse_views(w))
|
||||
})
|
||||
});
|
||||
|
||||
let mut item = VideoItem::new(
|
||||
video_id,
|
||||
title.trim().to_string(),
|
||||
page_url,
|
||||
CHANNEL_ID.to_string(),
|
||||
thumb,
|
||||
duration,
|
||||
);
|
||||
if let Some(v) = views {
|
||||
item.views = Some(v);
|
||||
}
|
||||
if let Some(p) = preview {
|
||||
item.preview = Some(p);
|
||||
}
|
||||
if let Some(name) = uploader_name {
|
||||
item.uploader = Some(name);
|
||||
}
|
||||
if let Some(url) = uploader_url {
|
||||
let uploader_id = url
|
||||
.trim_end_matches('/')
|
||||
.rsplit('/')
|
||||
.next()
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
if !uploader_id.is_empty() {
|
||||
item.uploaderId = Some(format!("{CHANNEL_ID}:{uploader_id}"));
|
||||
}
|
||||
item.uploaderUrl = Some(url);
|
||||
}
|
||||
|
||||
items.push(item);
|
||||
if items.len() >= limit {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(items)
|
||||
}
|
||||
|
||||
fn lookup_category(query: &str) -> Option<String> {
|
||||
let normalized = Self::normalize_key(query);
|
||||
for (slug, label) in CATEGORIES {
|
||||
if Self::normalize_key(label) == normalized || Self::normalize_key(slug) == normalized {
|
||||
return Some(format!("{BASE_URL}/c/{slug}"));
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn resolve_query_target(query: &str) -> Target {
|
||||
let trimmed = query.trim().trim_start_matches('@');
|
||||
|
||||
if let Some((kind, value)) = trimmed.split_once(':') {
|
||||
let value = value.trim();
|
||||
if !value.is_empty() {
|
||||
match kind.trim().to_ascii_lowercase().as_str() {
|
||||
"tag" => {
|
||||
let slug = value.replace(' ', "-").to_ascii_lowercase();
|
||||
return Target::Archive(format!("{BASE_URL}/tags/{slug}"));
|
||||
}
|
||||
"cat" | "category" => {
|
||||
if let Some(url) = Self::lookup_category(value) {
|
||||
return Target::Archive(url);
|
||||
}
|
||||
let slug = value.replace(' ', "_");
|
||||
return Target::Archive(format!("{BASE_URL}/c/{slug}"));
|
||||
}
|
||||
"uploader" | "channel" | "profile" => {
|
||||
let slug = value.replace(' ', "_").to_ascii_lowercase();
|
||||
return Target::Archive(format!("{BASE_URL}/{slug}"));
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Category name lookup
|
||||
if let Some(url) = Self::lookup_category(trimmed) {
|
||||
return Target::Archive(url);
|
||||
}
|
||||
|
||||
Target::Search(trimmed.to_string())
|
||||
}
|
||||
|
||||
fn resolve_option_target(options: &ServerOptions, sort: &str) -> Target {
|
||||
if let Some(cat) = options.categories.as_deref() {
|
||||
if cat != "all" && !cat.is_empty() {
|
||||
return Target::Archive(format!("{BASE_URL}/c/{cat}"));
|
||||
}
|
||||
}
|
||||
match sort.trim().to_ascii_lowercase().as_str() {
|
||||
"best" | "top" => Target::Best,
|
||||
_ => Target::Latest,
|
||||
}
|
||||
}
|
||||
|
||||
async fn fetch_target(
|
||||
&self,
|
||||
cache: VideoCache,
|
||||
target: Target,
|
||||
page: u16,
|
||||
per_page: usize,
|
||||
options: ServerOptions,
|
||||
) -> Result<Vec<VideoItem>> {
|
||||
let url = Self::target_url(&target, page);
|
||||
let cache_key = format!("{url}#per={per_page}");
|
||||
|
||||
if let Some((ts, cached)) = cache.get(&cache_key) {
|
||||
if ts.elapsed().unwrap_or_default().as_secs() < 300 {
|
||||
return Ok(cached.clone());
|
||||
}
|
||||
}
|
||||
|
||||
let mut requester = requester_or_default(
|
||||
&options,
|
||||
CHANNEL_ID,
|
||||
"xvideos.fetch_target.missing_requester",
|
||||
);
|
||||
let html = match Self::fetch_html(&mut requester, &url).await {
|
||||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
report_provider_error(
|
||||
CHANNEL_ID,
|
||||
"fetch_target.request",
|
||||
&format!("url={url}; error={e}"),
|
||||
)
|
||||
.await;
|
||||
return Ok(vec![]);
|
||||
}
|
||||
};
|
||||
|
||||
if html.trim().is_empty() {
|
||||
report_provider_error(
|
||||
CHANNEL_ID,
|
||||
"fetch_target.empty",
|
||||
&format!("url={url}"),
|
||||
)
|
||||
.await;
|
||||
return Ok(vec![]);
|
||||
}
|
||||
|
||||
let items = Self::parse_listing(&html, per_page)?;
|
||||
if !items.is_empty() {
|
||||
cache.insert(cache_key, items.clone());
|
||||
}
|
||||
Ok(items)
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Provider for XvideosProvider {
|
||||
async fn get_videos(
|
||||
&self,
|
||||
cache: VideoCache,
|
||||
_pool: DbPool,
|
||||
sort: String,
|
||||
query: Option<String>,
|
||||
page: String,
|
||||
per_page: String,
|
||||
options: ServerOptions,
|
||||
) -> Vec<VideoItem> {
|
||||
let page = page.parse::<u16>().unwrap_or(1).max(1);
|
||||
let per_page = per_page.parse::<usize>().unwrap_or(10).clamp(1, 60);
|
||||
|
||||
let target = match query {
|
||||
Some(q) if !q.trim().is_empty() => Self::resolve_query_target(q.trim()),
|
||||
_ => Self::resolve_option_target(&options, &sort),
|
||||
};
|
||||
|
||||
match self
|
||||
.fetch_target(cache, target, page, per_page, options)
|
||||
.await
|
||||
{
|
||||
Ok(items) => items,
|
||||
Err(e) => {
|
||||
report_provider_error(
|
||||
CHANNEL_ID,
|
||||
"get_videos",
|
||||
&format!("sort={sort}; page={page}; error={e}"),
|
||||
)
|
||||
.await;
|
||||
vec![]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn get_channel(&self, cv: ClientVersion) -> Option<Channel> {
|
||||
Some(self.build_channel(cv))
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user