hsex
This commit is contained in:
992
src/providers/hsex.rs
Normal file
992
src/providers/hsex.rs
Normal file
@@ -0,0 +1,992 @@
|
||||
use crate::DbPool;
|
||||
use crate::api::ClientVersion;
|
||||
use crate::providers::{
|
||||
Provider, report_provider_error, report_provider_error_background, requester_or_default,
|
||||
};
|
||||
use crate::status::*;
|
||||
use crate::util::cache::VideoCache;
|
||||
use crate::util::parse_abbreviated_number;
|
||||
use crate::util::requester::Requester;
|
||||
use crate::util::time::parse_time_to_seconds;
|
||||
use crate::videos::{ServerOptions, VideoFormat, VideoItem};
|
||||
use async_trait::async_trait;
|
||||
use chrono::{DateTime, Duration, NaiveDate, Utc};
|
||||
use error_chain::error_chain;
|
||||
use futures::stream::{self, StreamExt};
|
||||
use htmlentity::entity::{ICodedDataTrait, decode};
|
||||
use regex::Regex;
|
||||
use scraper::{ElementRef, Html, Selector};
|
||||
use std::sync::{Arc, RwLock};
|
||||
use std::{thread, vec};
|
||||
use url::Url;
|
||||
use wreq::Version;
|
||||
|
||||
error_chain! {
|
||||
foreign_links {
|
||||
Io(std::io::Error);
|
||||
}
|
||||
errors {
|
||||
Parse(msg: String) {
|
||||
description("parse error")
|
||||
display("parse error: {}", msg)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const BASE_URL: &str = "https://hsex.tv";
|
||||
const CHANNEL_ID: &str = "hsex";
|
||||
const FIREFOX_UA: &str =
|
||||
"Mozilla/5.0 (X11; Linux x86_64; rv:146.0) Gecko/20100101 Firefox/146.0";
|
||||
const HTML_ACCEPT: &str =
|
||||
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8";
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct HsexProvider {
|
||||
url: String,
|
||||
tags: Arc<RwLock<Vec<FilterOption>>>,
|
||||
uploaders: Arc<RwLock<Vec<FilterOption>>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
enum ArchiveMode {
|
||||
Latest,
|
||||
Hot,
|
||||
Weekly,
|
||||
Monthly,
|
||||
FiveMinLatest,
|
||||
FiveMinHot,
|
||||
TenMinLatest,
|
||||
TenMinHot,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
enum Target {
|
||||
Archive(ArchiveMode),
|
||||
Search {
|
||||
query: String,
|
||||
sort: Option<String>,
|
||||
},
|
||||
Uploader {
|
||||
author: String,
|
||||
},
|
||||
}
|
||||
|
||||
impl HsexProvider {
|
||||
pub fn new() -> Self {
|
||||
let provider = Self {
|
||||
url: BASE_URL.to_string(),
|
||||
tags: Arc::new(RwLock::new(vec![FilterOption {
|
||||
id: "all".to_string(),
|
||||
title: "All".to_string(),
|
||||
}])),
|
||||
uploaders: Arc::new(RwLock::new(vec![FilterOption {
|
||||
id: "all".to_string(),
|
||||
title: "All".to_string(),
|
||||
}])),
|
||||
};
|
||||
provider.spawn_initial_load();
|
||||
provider
|
||||
}
|
||||
|
||||
fn spawn_initial_load(&self) {
|
||||
let url = self.url.clone();
|
||||
let tags = Arc::clone(&self.tags);
|
||||
let uploaders = Arc::clone(&self.uploaders);
|
||||
|
||||
thread::spawn(move || {
|
||||
let runtime = match tokio::runtime::Builder::new_current_thread()
|
||||
.enable_all()
|
||||
.build()
|
||||
{
|
||||
Ok(runtime) => runtime,
|
||||
Err(error) => {
|
||||
report_provider_error_background(
|
||||
CHANNEL_ID,
|
||||
"spawn_initial_load.runtime_build",
|
||||
&error.to_string(),
|
||||
);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
runtime.block_on(async move {
|
||||
if let Err(error) = Self::load_hot_searches(&url, Arc::clone(&tags)).await {
|
||||
report_provider_error_background(
|
||||
CHANNEL_ID,
|
||||
"load_hot_searches",
|
||||
&error.to_string(),
|
||||
);
|
||||
}
|
||||
if let Err(error) = Self::load_uploaders(&url, Arc::clone(&uploaders)).await {
|
||||
report_provider_error_background(
|
||||
CHANNEL_ID,
|
||||
"load_uploaders",
|
||||
&error.to_string(),
|
||||
);
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
|
||||
let tags = self.tags.read().map(|value| value.clone()).unwrap_or_default();
|
||||
let uploaders = self
|
||||
.uploaders
|
||||
.read()
|
||||
.map(|value| value.clone())
|
||||
.unwrap_or_default();
|
||||
|
||||
Channel {
|
||||
id: CHANNEL_ID.to_string(),
|
||||
name: "Hsex".to_string(),
|
||||
description:
|
||||
"Hsex.tv videos with hot-search filters, uploader archives, and direct HLS formats."
|
||||
.to_string(),
|
||||
premium: false,
|
||||
favicon: "https://www.google.com/s2/favicons?sz=64&domain=hsex.tv".to_string(),
|
||||
status: "active".to_string(),
|
||||
categories: tags.iter().map(|value| value.title.clone()).collect(),
|
||||
options: vec![
|
||||
ChannelOption {
|
||||
id: "sort".to_string(),
|
||||
title: "Sort".to_string(),
|
||||
description: "Browse Hsex archive pages.".to_string(),
|
||||
systemImage: "list.number".to_string(),
|
||||
colorName: "blue".to_string(),
|
||||
options: vec![
|
||||
FilterOption {
|
||||
id: "new".to_string(),
|
||||
title: "Latest".to_string(),
|
||||
},
|
||||
FilterOption {
|
||||
id: "hot".to_string(),
|
||||
title: "Hottest".to_string(),
|
||||
},
|
||||
FilterOption {
|
||||
id: "weekly".to_string(),
|
||||
title: "Weekly Top".to_string(),
|
||||
},
|
||||
FilterOption {
|
||||
id: "monthly".to_string(),
|
||||
title: "Monthly Top".to_string(),
|
||||
},
|
||||
FilterOption {
|
||||
id: "five_min_new".to_string(),
|
||||
title: "5-10 Min Latest".to_string(),
|
||||
},
|
||||
FilterOption {
|
||||
id: "five_min_hot".to_string(),
|
||||
title: "5-10 Min Hot".to_string(),
|
||||
},
|
||||
FilterOption {
|
||||
id: "ten_min_new".to_string(),
|
||||
title: "10+ Min Latest".to_string(),
|
||||
},
|
||||
FilterOption {
|
||||
id: "ten_min_hot".to_string(),
|
||||
title: "10+ Min Hot".to_string(),
|
||||
},
|
||||
],
|
||||
multiSelect: false,
|
||||
},
|
||||
ChannelOption {
|
||||
id: "filter".to_string(),
|
||||
title: "Tags".to_string(),
|
||||
description: "Home page hot-search shortcuts.".to_string(),
|
||||
systemImage: "tag.fill".to_string(),
|
||||
colorName: "green".to_string(),
|
||||
options: tags,
|
||||
multiSelect: false,
|
||||
},
|
||||
ChannelOption {
|
||||
id: "sites".to_string(),
|
||||
title: "Uploaders".to_string(),
|
||||
description: "Uploader archive pages discovered from Hsex lists.".to_string(),
|
||||
systemImage: "person.crop.square".to_string(),
|
||||
colorName: "purple".to_string(),
|
||||
options: uploaders,
|
||||
multiSelect: false,
|
||||
},
|
||||
],
|
||||
nsfw: true,
|
||||
cacheDuration: Some(1800),
|
||||
}
|
||||
}
|
||||
|
||||
fn selector(value: &str) -> Result<Selector> {
|
||||
Selector::parse(value)
|
||||
.map_err(|error| Error::from(format!("selector `{value}` parse failed: {error}")))
|
||||
}
|
||||
|
||||
fn regex(value: &str) -> Result<Regex> {
|
||||
Regex::new(value).map_err(|error| Error::from(format!("regex `{value}` failed: {error}")))
|
||||
}
|
||||
|
||||
fn collapse_whitespace(text: &str) -> String {
|
||||
text.split_whitespace().collect::<Vec<_>>().join(" ")
|
||||
}
|
||||
|
||||
fn decode_text(text: &str) -> String {
|
||||
decode(text.as_bytes())
|
||||
.to_string()
|
||||
.unwrap_or_else(|_| text.to_string())
|
||||
.replace('\u{a0}', " ")
|
||||
.trim()
|
||||
.to_string()
|
||||
}
|
||||
|
||||
fn absolute_url(&self, value: &str) -> String {
|
||||
if value.starts_with("http://") || value.starts_with("https://") {
|
||||
return value.to_string();
|
||||
}
|
||||
format!(
|
||||
"{}/{}",
|
||||
self.url.trim_end_matches('/'),
|
||||
value.trim_start_matches('/')
|
||||
)
|
||||
}
|
||||
|
||||
fn html_headers(&self, referer: &str) -> Vec<(String, String)> {
|
||||
vec![
|
||||
("Referer".to_string(), referer.to_string()),
|
||||
("User-Agent".to_string(), FIREFOX_UA.to_string()),
|
||||
("Accept".to_string(), HTML_ACCEPT.to_string()),
|
||||
("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()),
|
||||
]
|
||||
}
|
||||
|
||||
async fn fetch_html(
|
||||
&self,
|
||||
requester: &mut Requester,
|
||||
url: &str,
|
||||
referer: &str,
|
||||
) -> Result<String> {
|
||||
requester
|
||||
.get_with_headers(url, self.html_headers(referer), Some(Version::HTTP_11))
|
||||
.await
|
||||
.map_err(|error| Error::from(format!("request failed for {url}: {error}")))
|
||||
}
|
||||
|
||||
fn push_unique(target: &Arc<RwLock<Vec<FilterOption>>>, item: FilterOption) {
|
||||
if item.id.is_empty() || item.title.is_empty() {
|
||||
return;
|
||||
}
|
||||
if let Ok(mut values) = target.write() {
|
||||
if !values
|
||||
.iter()
|
||||
.any(|existing| existing.id == item.id || existing.title == item.title)
|
||||
{
|
||||
values.push(item);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn load_hot_searches(base_url: &str, tags: Arc<RwLock<Vec<FilterOption>>>) -> Result<()> {
|
||||
let mut requester = Requester::new();
|
||||
let provider = Self {
|
||||
url: base_url.to_string(),
|
||||
tags: Arc::clone(&tags),
|
||||
uploaders: Arc::new(RwLock::new(vec![])),
|
||||
};
|
||||
let html = provider.fetch_html(&mut requester, base_url, &format!("{base_url}/")).await?;
|
||||
let document = Html::parse_document(&html);
|
||||
let selector = Self::selector("div.hot_search a[href]")?;
|
||||
for element in document.select(&selector) {
|
||||
let Some(href) = element.value().attr("href") else {
|
||||
continue;
|
||||
};
|
||||
let title = Self::decode_text(&element.text().collect::<String>());
|
||||
Self::push_unique(
|
||||
&tags,
|
||||
FilterOption {
|
||||
id: href.to_string(),
|
||||
title,
|
||||
},
|
||||
);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn collect_uploaders_from_html(
|
||||
html: &str,
|
||||
uploaders: &Arc<RwLock<Vec<FilterOption>>>,
|
||||
) -> Result<()> {
|
||||
let document = Html::parse_document(html);
|
||||
let selector = Self::selector("a[href*='user.htm?author=']")?;
|
||||
for element in document.select(&selector) {
|
||||
let Some(href) = element.value().attr("href") else {
|
||||
continue;
|
||||
};
|
||||
let title = Self::decode_text(&element.text().collect::<String>());
|
||||
if title.is_empty() {
|
||||
continue;
|
||||
}
|
||||
Self::push_unique(
|
||||
uploaders,
|
||||
FilterOption {
|
||||
id: href.to_string(),
|
||||
title,
|
||||
},
|
||||
);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn load_uploaders(base_url: &str, uploaders: Arc<RwLock<Vec<FilterOption>>>) -> Result<()> {
|
||||
let provider = Self {
|
||||
url: base_url.to_string(),
|
||||
tags: Arc::new(RwLock::new(vec![])),
|
||||
uploaders: Arc::clone(&uploaders),
|
||||
};
|
||||
let mut requester = Requester::new();
|
||||
let root_referer = format!("{base_url}/");
|
||||
let seed_urls = vec![
|
||||
format!("{base_url}/"),
|
||||
format!("{base_url}/list-1.htm"),
|
||||
format!("{base_url}/list-1.htm?sort=hot"),
|
||||
format!("{base_url}/top7_list-1.htm"),
|
||||
format!("{base_url}/top_list-1.htm"),
|
||||
format!("{base_url}/5min_list-1.htm"),
|
||||
format!("{base_url}/long_list-1.htm"),
|
||||
];
|
||||
|
||||
for url in seed_urls {
|
||||
let html = provider.fetch_html(&mut requester, &url, &root_referer).await?;
|
||||
Self::collect_uploaders_from_html(&html, &uploaders)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn archive_from_sort(sort: &str) -> ArchiveMode {
|
||||
match sort {
|
||||
"hot" | "popular" => ArchiveMode::Hot,
|
||||
"weekly" | "top7" | "trending" => ArchiveMode::Weekly,
|
||||
"monthly" | "top" => ArchiveMode::Monthly,
|
||||
"five_min_new" | "5min" => ArchiveMode::FiveMinLatest,
|
||||
"five_min_hot" => ArchiveMode::FiveMinHot,
|
||||
"ten_min_new" | "long" => ArchiveMode::TenMinLatest,
|
||||
"ten_min_hot" => ArchiveMode::TenMinHot,
|
||||
_ => ArchiveMode::Latest,
|
||||
}
|
||||
}
|
||||
|
||||
fn resolve_option_target(&self, options: &ServerOptions, sort: &str) -> Target {
|
||||
if let Some(value) = options.sites.as_deref() {
|
||||
if let Some(target) = self.find_target_in_options(&self.uploaders, value) {
|
||||
return target;
|
||||
}
|
||||
}
|
||||
if let Some(value) = options.filter.as_deref() {
|
||||
if let Some(target) = self.find_target_in_options(&self.tags, value) {
|
||||
return target;
|
||||
}
|
||||
}
|
||||
Target::Archive(Self::archive_from_sort(sort))
|
||||
}
|
||||
|
||||
fn resolve_query_target(&self, query: &str, sort: &str) -> Target {
|
||||
if let Some(target) = self.find_target_in_options(&self.uploaders, query) {
|
||||
return target;
|
||||
}
|
||||
if let Some(target) = self.find_target_in_options(&self.tags, query) {
|
||||
return target;
|
||||
}
|
||||
Target::Search {
|
||||
query: query.trim().to_string(),
|
||||
sort: match sort {
|
||||
"hot" | "popular" => Some("hot".to_string()),
|
||||
"new" | "latest" => Some("new".to_string()),
|
||||
_ => None,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn find_target_in_options(
|
||||
&self,
|
||||
options: &Arc<RwLock<Vec<FilterOption>>>,
|
||||
value: &str,
|
||||
) -> Option<Target> {
|
||||
let normalized = value.trim().to_lowercase();
|
||||
let options = options.read().ok()?;
|
||||
let option = options.iter().find(|item| {
|
||||
item.id.eq_ignore_ascii_case(value) || item.title.trim().to_lowercase() == normalized
|
||||
})?;
|
||||
self.target_from_filter_id(&option.id)
|
||||
}
|
||||
|
||||
fn target_from_filter_id(&self, id: &str) -> Option<Target> {
|
||||
if id.starts_with("search") {
|
||||
let url = Url::parse(&self.absolute_url(id)).ok()?;
|
||||
let mut search = None;
|
||||
let mut sort = None;
|
||||
for (key, value) in url.query_pairs() {
|
||||
if key == "search" {
|
||||
search = Some(value.to_string());
|
||||
}
|
||||
if key == "sort" {
|
||||
sort = Some(value.to_string());
|
||||
}
|
||||
}
|
||||
return search.map(|query| Target::Search { query, sort });
|
||||
}
|
||||
|
||||
if id.starts_with("user") {
|
||||
let url = Url::parse(&self.absolute_url(id)).ok()?;
|
||||
for (key, value) in url.query_pairs() {
|
||||
if key == "author" {
|
||||
return Some(Target::Uploader {
|
||||
author: value.to_string(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
fn build_url_for_target(&self, target: &Target, page: u16) -> String {
|
||||
match target {
|
||||
Target::Archive(mode) => self.build_archive_url(*mode, page),
|
||||
Target::Search { query, sort } => self.build_search_url(query, sort.as_deref(), page),
|
||||
Target::Uploader { author } => self.build_uploader_url(author, page),
|
||||
}
|
||||
}
|
||||
|
||||
fn build_archive_url(&self, mode: ArchiveMode, page: u16) -> String {
|
||||
match mode {
|
||||
ArchiveMode::Latest => format!("{}/list-{}.htm?sort=new", self.url, page.max(1)),
|
||||
ArchiveMode::Hot => format!("{}/list-{}.htm?sort=hot", self.url, page.max(1)),
|
||||
ArchiveMode::Weekly => format!("{}/top7_list-{}.htm", self.url, page.max(1)),
|
||||
ArchiveMode::Monthly => format!("{}/top_list-{}.htm", self.url, page.max(1)),
|
||||
ArchiveMode::FiveMinLatest => {
|
||||
format!("{}/5min_list-{}.htm?sort=new", self.url, page.max(1))
|
||||
}
|
||||
ArchiveMode::FiveMinHot => {
|
||||
format!("{}/5min_list-{}.htm?sort=hot", self.url, page.max(1))
|
||||
}
|
||||
ArchiveMode::TenMinLatest => {
|
||||
format!("{}/long_list-{}.htm?sort=new", self.url, page.max(1))
|
||||
}
|
||||
ArchiveMode::TenMinHot => {
|
||||
format!("{}/long_list-{}.htm?sort=hot", self.url, page.max(1))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn build_search_url(&self, query: &str, sort: Option<&str>, page: u16) -> String {
|
||||
let mut serializer = url::form_urlencoded::Serializer::new(String::new());
|
||||
serializer.append_pair("search", query);
|
||||
if let Some(sort) = sort {
|
||||
serializer.append_pair("sort", sort);
|
||||
}
|
||||
let query_string = serializer.finish();
|
||||
if page <= 1 {
|
||||
format!("{}/search.htm?{query_string}", self.url)
|
||||
} else {
|
||||
format!("{}/search-{}.htm?{query_string}", self.url, page)
|
||||
}
|
||||
}
|
||||
|
||||
fn build_uploader_url(&self, author: &str, page: u16) -> String {
|
||||
let mut serializer = url::form_urlencoded::Serializer::new(String::new());
|
||||
serializer.append_pair("author", author);
|
||||
let query_string = serializer.finish();
|
||||
if page <= 1 {
|
||||
format!("{}/user.htm?{query_string}", self.url)
|
||||
} else {
|
||||
format!("{}/user-{}.htm?{query_string}", self.url, page)
|
||||
}
|
||||
}
|
||||
|
||||
fn first_video_link<'a>(&self, element: &'a ElementRef<'a>) -> Result<Option<ElementRef<'a>>> {
|
||||
let selector = Self::selector("a[href]")?;
|
||||
Ok(element.select(&selector).find(|link| {
|
||||
link.value()
|
||||
.attr("href")
|
||||
.map(|href| href.contains("video-"))
|
||||
.unwrap_or(false)
|
||||
}))
|
||||
}
|
||||
|
||||
fn extract_thumb(style: &str) -> Option<String> {
|
||||
let regex =
|
||||
Regex::new(r#"background-image:\s*url\(['"]?(?P<url>[^'")]+)['"]?\)"#).ok()?;
|
||||
regex
|
||||
.captures(style)
|
||||
.and_then(|captures| captures.name("url").map(|value| value.as_str().to_string()))
|
||||
}
|
||||
|
||||
fn parse_views(text: &str) -> Option<u32> {
|
||||
let regex = Regex::new(r"([0-9]+(?:\.[0-9]+)?[kKmM]?)次观看").ok()?;
|
||||
let captures = regex.captures(text)?;
|
||||
parse_abbreviated_number(captures.get(1)?.as_str())
|
||||
}
|
||||
|
||||
fn parse_uploaded_at(text: &str) -> Option<u64> {
|
||||
let trimmed = text.trim();
|
||||
if trimmed.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
if let Ok(date) = NaiveDate::parse_from_str(trimmed, "%Y-%m-%d") {
|
||||
let dt = date.and_hms_opt(0, 0, 0)?;
|
||||
return Some(DateTime::<Utc>::from_naive_utc_and_offset(dt, Utc).timestamp() as u64);
|
||||
}
|
||||
|
||||
let regex = Regex::new(r"^([0-9]+)(分钟|小时|天|月|年)前$").ok()?;
|
||||
let captures = regex.captures(trimmed)?;
|
||||
let amount = captures.get(1)?.as_str().parse::<i64>().ok()?;
|
||||
let unit = captures.get(2)?.as_str();
|
||||
let now = Utc::now();
|
||||
let timestamp = match unit {
|
||||
"分钟" => now - Duration::minutes(amount),
|
||||
"小时" => now - Duration::hours(amount),
|
||||
"天" => now - Duration::days(amount),
|
||||
"月" => now - Duration::days(amount * 30),
|
||||
"年" => now - Duration::days(amount * 365),
|
||||
_ => return None,
|
||||
};
|
||||
Some(timestamp.timestamp() as u64)
|
||||
}
|
||||
|
||||
fn parse_info_date(text: &str) -> Option<String> {
|
||||
let regex = Regex::new(r"次观看\s*(.+)$").ok()?;
|
||||
let captures = regex.captures(text)?;
|
||||
let value = captures.get(1)?.as_str().trim();
|
||||
(!value.is_empty()).then_some(value.to_string())
|
||||
}
|
||||
|
||||
fn parse_list_videos(&self, html: &str) -> Result<Vec<VideoItem>> {
|
||||
let document = Html::parse_document(html);
|
||||
let item_selector = Self::selector("div.thumbnail")?;
|
||||
let title_selector = Self::selector("div.caption.title a[href]")?;
|
||||
let image_selector = Self::selector("div.image[style]")?;
|
||||
let duration_selector = Self::selector("var.duration")?;
|
||||
let uploader_selector = Self::selector("div.info a[href*='user.htm?author=']")?;
|
||||
let info_selector = Self::selector("div.info p")?;
|
||||
|
||||
let mut items = Vec::new();
|
||||
for element in document.select(&item_selector) {
|
||||
let Some(link) = self.first_video_link(&element)? else {
|
||||
continue;
|
||||
};
|
||||
let Some(href) = link.value().attr("href") else {
|
||||
continue;
|
||||
};
|
||||
if !href.contains("video-") {
|
||||
continue;
|
||||
}
|
||||
|
||||
let id = href
|
||||
.trim_start_matches("video-")
|
||||
.trim_end_matches(".htm")
|
||||
.to_string();
|
||||
if id.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let title = element
|
||||
.select(&title_selector)
|
||||
.next()
|
||||
.map(|value| Self::decode_text(&value.text().collect::<String>()))
|
||||
.unwrap_or_default();
|
||||
if title.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let thumb = element
|
||||
.select(&image_selector)
|
||||
.next()
|
||||
.and_then(|value| value.value().attr("style"))
|
||||
.and_then(Self::extract_thumb)
|
||||
.map(|value| self.absolute_url(&value))
|
||||
.unwrap_or_default();
|
||||
if thumb.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let duration = element
|
||||
.select(&duration_selector)
|
||||
.next()
|
||||
.map(|value| Self::collapse_whitespace(&value.text().collect::<String>()))
|
||||
.and_then(|value| parse_time_to_seconds(&value))
|
||||
.unwrap_or(0) as u32;
|
||||
|
||||
let url = self.absolute_url(href);
|
||||
let mut item = VideoItem::new(
|
||||
id,
|
||||
title,
|
||||
url,
|
||||
CHANNEL_ID.to_string(),
|
||||
thumb,
|
||||
duration,
|
||||
);
|
||||
|
||||
if let Some(uploader) = element.select(&uploader_selector).next() {
|
||||
let uploader_name = Self::decode_text(&uploader.text().collect::<String>());
|
||||
if !uploader_name.is_empty() {
|
||||
item.uploader = Some(uploader_name);
|
||||
}
|
||||
if let Some(uploader_href) = uploader.value().attr("href") {
|
||||
item.uploaderUrl = Some(self.absolute_url(uploader_href));
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(info) = element.select(&info_selector).next() {
|
||||
let info_text = Self::decode_text(&Self::collapse_whitespace(
|
||||
&info.text().collect::<String>(),
|
||||
));
|
||||
item.views = Self::parse_views(&info_text);
|
||||
if let Some(date_text) = Self::parse_info_date(&info_text) {
|
||||
item.uploadedAt = Self::parse_uploaded_at(&date_text);
|
||||
}
|
||||
}
|
||||
|
||||
items.push(item);
|
||||
}
|
||||
|
||||
Ok(items)
|
||||
}
|
||||
|
||||
fn extract_meta_content(document: &Html, selector: &str) -> Result<Option<String>> {
|
||||
let selector = Self::selector(selector)?;
|
||||
Ok(document
|
||||
.select(&selector)
|
||||
.next()
|
||||
.and_then(|value| value.value().attr("content"))
|
||||
.map(Self::decode_text))
|
||||
}
|
||||
|
||||
fn apply_detail_video(&self, mut item: VideoItem, html: &str, page_url: &str) -> Result<VideoItem> {
|
||||
let document = Html::parse_document(html);
|
||||
let source_selector = Self::selector("video source[src]")?;
|
||||
let author_selector = Self::selector(".panel-body a[href*='user.htm?author=']")?;
|
||||
let heading_title_selector = Self::selector("h3.panel-title")?;
|
||||
let panel_body_selector = Self::selector(".panel-body")?;
|
||||
|
||||
if item.title.is_empty() {
|
||||
if let Some(title) = document
|
||||
.select(&heading_title_selector)
|
||||
.next()
|
||||
.map(|value| Self::decode_text(&value.text().collect::<String>()))
|
||||
{
|
||||
if !title.is_empty() {
|
||||
item.title = title;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if item.thumb.is_empty() {
|
||||
if let Some(thumb) =
|
||||
Self::extract_meta_content(&document, "meta[property='og:image']")?
|
||||
{
|
||||
item.thumb = thumb;
|
||||
}
|
||||
}
|
||||
|
||||
if item.uploader.is_none() || item.uploaderUrl.is_none() {
|
||||
if let Some(author) = document.select(&author_selector).next() {
|
||||
let author_name = Self::decode_text(&author.text().collect::<String>());
|
||||
if !author_name.is_empty() {
|
||||
item.uploader = Some(author_name);
|
||||
}
|
||||
if let Some(href) = author.value().attr("href") {
|
||||
item.uploaderUrl = Some(self.absolute_url(href));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(source_url) = document
|
||||
.select(&source_selector)
|
||||
.next()
|
||||
.and_then(|value| value.value().attr("src"))
|
||||
.map(|value| self.absolute_url(value))
|
||||
{
|
||||
let format = VideoFormat::new(
|
||||
source_url,
|
||||
"master".to_string(),
|
||||
"hls".to_string(),
|
||||
)
|
||||
.http_header("Referer".to_string(), page_url.to_string())
|
||||
.http_header("User-Agent".to_string(), FIREFOX_UA.to_string());
|
||||
item.formats = Some(vec![format]);
|
||||
}
|
||||
|
||||
if html.contains("vjs-16-9") && item.aspectRatio.is_none() {
|
||||
item.aspectRatio = Some(16.0 / 9.0);
|
||||
}
|
||||
|
||||
let mut tags = item.tags.take().unwrap_or_default();
|
||||
if let Some(keywords) = Self::extract_meta_content(&document, "meta[name='keywords']")? {
|
||||
tags.extend(
|
||||
keywords
|
||||
.split(',')
|
||||
.map(Self::decode_text)
|
||||
.filter(|value| !value.is_empty()),
|
||||
);
|
||||
}
|
||||
if let Some(class_name) =
|
||||
Self::extract_meta_content(&document, "meta[itemprop='class']")?
|
||||
{
|
||||
if !class_name.is_empty() {
|
||||
tags.push(class_name);
|
||||
}
|
||||
}
|
||||
if let Some(area) = Self::extract_meta_content(&document, "meta[itemprop='contentLocation']")?
|
||||
{
|
||||
if !area.is_empty() {
|
||||
tags.push(area);
|
||||
}
|
||||
}
|
||||
if let Some(actor) = Self::extract_meta_content(&document, "meta[itemprop='actor']")? {
|
||||
if !actor.is_empty() {
|
||||
if item.uploader.is_none() {
|
||||
item.uploader = Some(actor.clone());
|
||||
}
|
||||
tags.push(actor);
|
||||
}
|
||||
}
|
||||
tags.sort();
|
||||
tags.dedup();
|
||||
if !tags.is_empty() {
|
||||
item.tags = Some(tags);
|
||||
}
|
||||
|
||||
if let Some(panel_body) = document.select(&panel_body_selector).next() {
|
||||
let body_text = Self::decode_text(&Self::collapse_whitespace(
|
||||
&panel_body.text().collect::<String>(),
|
||||
));
|
||||
if item.views.is_none() {
|
||||
let view_regex = Self::regex(r"观看:([0-9]+(?:\.[0-9]+)?[kKmM]?)")?;
|
||||
if let Some(captures) = view_regex.captures(&body_text) {
|
||||
item.views = captures
|
||||
.get(1)
|
||||
.and_then(|value| parse_abbreviated_number(value.as_str()));
|
||||
}
|
||||
}
|
||||
if item.uploadedAt.is_none() {
|
||||
let date_regex = Self::regex(r"日期:([0-9年月天小时分钟前\-]+)")?;
|
||||
if let Some(captures) = date_regex.captures(&body_text) {
|
||||
if let Some(value) = captures.get(1) {
|
||||
item.uploadedAt = Self::parse_uploaded_at(value.as_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(item)
|
||||
}
|
||||
|
||||
async fn enrich_video(&self, item: VideoItem, options: &ServerOptions) -> VideoItem {
|
||||
let mut requester = requester_or_default(options, CHANNEL_ID, "enrich_video");
|
||||
match self.fetch_html(&mut requester, &item.url, &format!("{}/", self.url)).await {
|
||||
Ok(html) => match self.apply_detail_video(item.clone(), &html, &item.url) {
|
||||
Ok(enriched) => enriched,
|
||||
Err(error) => {
|
||||
report_provider_error_background(
|
||||
CHANNEL_ID,
|
||||
"apply_detail_video",
|
||||
&error.to_string(),
|
||||
);
|
||||
item
|
||||
}
|
||||
},
|
||||
Err(error) => {
|
||||
report_provider_error_background(CHANNEL_ID, "fetch_detail", &error.to_string());
|
||||
item
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn fetch_items_for_url(
|
||||
&self,
|
||||
cache: VideoCache,
|
||||
url: String,
|
||||
per_page_limit: usize,
|
||||
options: &ServerOptions,
|
||||
) -> Result<Vec<VideoItem>> {
|
||||
if let Some((time, items)) = cache.get(&url) {
|
||||
if time.elapsed().unwrap_or_default().as_secs() < 60 * 15 {
|
||||
return Ok(items.clone());
|
||||
}
|
||||
}
|
||||
|
||||
let mut requester = requester_or_default(options, CHANNEL_ID, "fetch_items_for_url");
|
||||
let html = self.fetch_html(&mut requester, &url, &format!("{}/", self.url)).await?;
|
||||
let list_items = self.parse_list_videos(&html)?;
|
||||
if list_items.is_empty() {
|
||||
return Ok(vec![]);
|
||||
}
|
||||
|
||||
let limited_items = list_items
|
||||
.into_iter()
|
||||
.take(per_page_limit.max(1))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let items = stream::iter(limited_items.into_iter().map(|item| {
|
||||
let provider = self.clone();
|
||||
let options = options.clone();
|
||||
async move { provider.enrich_video(item, &options).await }
|
||||
}))
|
||||
.buffer_unordered(4)
|
||||
.collect::<Vec<_>>()
|
||||
.await;
|
||||
|
||||
if !items.is_empty() {
|
||||
cache.insert(url, items.clone());
|
||||
}
|
||||
|
||||
Ok(items)
|
||||
}
|
||||
|
||||
async fn get(
|
||||
&self,
|
||||
cache: VideoCache,
|
||||
page: u16,
|
||||
sort: &str,
|
||||
per_page_limit: usize,
|
||||
options: ServerOptions,
|
||||
) -> Result<Vec<VideoItem>> {
|
||||
let target = self.resolve_option_target(&options, sort);
|
||||
let url = self.build_url_for_target(&target, page);
|
||||
self.fetch_items_for_url(cache, url, per_page_limit, &options)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn query(
|
||||
&self,
|
||||
cache: VideoCache,
|
||||
page: u16,
|
||||
sort: &str,
|
||||
query: &str,
|
||||
per_page_limit: usize,
|
||||
options: ServerOptions,
|
||||
) -> Result<Vec<VideoItem>> {
|
||||
let target = self.resolve_query_target(query, sort);
|
||||
let url = self.build_url_for_target(&target, page);
|
||||
self.fetch_items_for_url(cache, url, per_page_limit, &options)
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Provider for HsexProvider {
|
||||
async fn get_videos(
|
||||
&self,
|
||||
cache: VideoCache,
|
||||
pool: DbPool,
|
||||
sort: String,
|
||||
query: Option<String>,
|
||||
page: String,
|
||||
per_page: String,
|
||||
options: ServerOptions,
|
||||
) -> Vec<VideoItem> {
|
||||
let _ = pool;
|
||||
let page = page.parse::<u16>().unwrap_or(1);
|
||||
let per_page_limit = per_page.parse::<usize>().unwrap_or(30);
|
||||
|
||||
let result = match query {
|
||||
Some(query) if !query.trim().is_empty() => {
|
||||
self.query(cache, page, &sort, &query, per_page_limit, options)
|
||||
.await
|
||||
}
|
||||
_ => self.get(cache, page, &sort, per_page_limit, options).await,
|
||||
};
|
||||
|
||||
match result {
|
||||
Ok(videos) => videos,
|
||||
Err(error) => {
|
||||
report_provider_error(CHANNEL_ID, "get_videos", &error.to_string()).await;
|
||||
vec![]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
|
||||
Some(self.build_channel(clientversion))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn provider() -> HsexProvider {
|
||||
HsexProvider {
|
||||
url: BASE_URL.to_string(),
|
||||
tags: Arc::new(RwLock::new(vec![FilterOption {
|
||||
id: "search.htm?search=%E4%BA%BA%E5%A6%BB&sort=new".to_string(),
|
||||
title: "人妻".to_string(),
|
||||
}])),
|
||||
uploaders: Arc::new(RwLock::new(vec![FilterOption {
|
||||
id: "user.htm?author=xihongshiddd".to_string(),
|
||||
title: "xihongshiddd".to_string(),
|
||||
}])),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn builds_search_page_two_url() {
|
||||
let provider = provider();
|
||||
let url = provider.build_url_for_target(
|
||||
&Target::Search {
|
||||
query: "体育生".to_string(),
|
||||
sort: Some("new".to_string()),
|
||||
},
|
||||
2,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
url,
|
||||
"https://hsex.tv/search-2.htm?search=%E4%BD%93%E8%82%B2%E7%94%9F&sort=new"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn builds_uploader_page_two_url() {
|
||||
let provider = provider();
|
||||
let url = provider.build_url_for_target(
|
||||
&Target::Uploader {
|
||||
author: "xihongshiddd".to_string(),
|
||||
},
|
||||
2,
|
||||
);
|
||||
|
||||
assert_eq!(url, "https://hsex.tv/user-2.htm?author=xihongshiddd");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn preserves_list_thumb_when_detail_has_og_image() {
|
||||
let provider = provider();
|
||||
let item = VideoItem::new(
|
||||
"1183662".to_string(),
|
||||
"Example".to_string(),
|
||||
"https://hsex.tv/video-1183662.htm".to_string(),
|
||||
CHANNEL_ID.to_string(),
|
||||
"https://img.ml0987.com/thumb/1183662.webp".to_string(),
|
||||
1141,
|
||||
);
|
||||
|
||||
let html = r#"
|
||||
<meta property="og:image" content="https://p.tengxuny.com/thumb/1183662.jpg" />
|
||||
<meta name="keywords" content="顶级,体育生" />
|
||||
<video><source src="https://cdn1.hdcdn.online/hls/1183662/index.m3u8" /></video>
|
||||
"#;
|
||||
|
||||
let enriched = provider
|
||||
.apply_detail_video(item, html, "https://hsex.tv/video-1183662.htm")
|
||||
.expect("detail parsing should succeed");
|
||||
|
||||
assert_eq!(enriched.thumb, "https://img.ml0987.com/thumb/1183662.webp");
|
||||
assert_eq!(enriched.tags.as_ref().map(|v| v.len()), Some(2));
|
||||
assert_eq!(
|
||||
enriched
|
||||
.formats
|
||||
.as_ref()
|
||||
.and_then(|formats| formats.first())
|
||||
.map(|format| format.url.as_str()),
|
||||
Some("https://cdn1.hdcdn.online/hls/1183662/index.m3u8")
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -48,6 +48,7 @@ pub mod youjizz;
|
||||
pub mod chaturbate;
|
||||
pub mod freepornvideosxxx;
|
||||
pub mod heavyfetish;
|
||||
pub mod hsex;
|
||||
pub mod hentaihaven;
|
||||
pub mod hqporner;
|
||||
pub mod hypnotube;
|
||||
@@ -210,6 +211,7 @@ pub static ALL_PROVIDERS: Lazy<HashMap<&'static str, DynProvider>> = Lazy::new(|
|
||||
"heavyfetish",
|
||||
Arc::new(heavyfetish::HeavyfetishProvider::new()) as DynProvider,
|
||||
);
|
||||
m.insert("hsex", Arc::new(hsex::HsexProvider::new()) as DynProvider);
|
||||
m.insert(
|
||||
"hentaihaven",
|
||||
Arc::new(hentaihaven::HentaihavenProvider::new()) as DynProvider,
|
||||
|
||||
Reference in New Issue
Block a user