Files
hottub/src/providers/sxyprn.rs
2026-05-06 13:50:47 +00:00

615 lines
21 KiB
Rust

use crate::DbPool;
use crate::api::ClientVersion;
use crate::providers::Provider;
use crate::status::*;
use crate::util::cache::VideoCache;
use crate::util::discord::format_error_chain;
use crate::util::discord::send_discord_error_report;
use crate::util::requester::Requester;
use crate::util::time::parse_time_to_seconds;
use crate::util::hoster_proxy::{proxy_name_for_url, rewrite_hoster_url};
use crate::videos::ServerOptions;
use crate::videos::VideoFormat;
use crate::videos::VideoItem;
use async_trait::async_trait;
use error_chain::error_chain;
use htmlentity::entity::{ICodedDataTrait, decode};
use scraper::{Html, Selector};
use std::vec;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "mainstream-tube",
tags: &["tube", "community", "mixed"],
};
error_chain! {
foreign_links {
Io(std::io::Error);
HttpRequest(wreq::Error);
JsonError(serde_json::Error);
}
errors {
Parse(msg: String) {
description("html parse error")
display("html parse error: {}", msg)
}
}
}
#[derive(Debug, Clone)]
pub struct SxyprnProvider {
url: String,
}
impl SxyprnProvider {
pub fn new() -> Self {
SxyprnProvider {
url: "https://sxyprn.com".to_string(),
}
}
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
Channel {
id: "sxyprn".to_string(),
name: "SexyPorn".to_string(),
description: "Free Porn Site".to_string(),
premium: false,
favicon: "https://www.google.com/s2/favicons?sz=64&domain=sxyprn.com".to_string(),
status: "active".to_string(),
categories: vec![],
options: vec![
ChannelOption {
id: "sort".to_string(),
title: "Sort".to_string(),
description: "Sort the Videos".to_string(),
systemImage: "list.number".to_string(),
colorName: "blue".to_string(),
options: vec![
FilterOption {
id: "latest".to_string(),
title: "Latest".to_string(),
},
FilterOption {
id: "views".to_string(),
title: "Views".to_string(),
},
FilterOption {
id: "rating".to_string(),
title: "Rating".to_string(),
},
FilterOption {
id: "orgasmic".to_string(),
title: "Orgasmic".to_string(),
},
],
multiSelect: false,
},
ChannelOption {
id: "filter".to_string(),
title: "Filter".to_string(),
description: "Filter the Videos".to_string(),
systemImage: "line.horizontal.3.decrease.circle".to_string(),
colorName: "green".to_string(),
options: vec![
FilterOption {
id: "top".to_string(),
title: "Top".to_string(),
},
FilterOption {
id: "other".to_string(),
title: "Other".to_string(),
},
FilterOption {
id: "all".to_string(),
title: "All".to_string(),
},
],
multiSelect: false,
},
],
nsfw: true,
cacheDuration: Some(1800),
}
}
async fn get(
&self,
cache: VideoCache,
pool: DbPool,
page: u8,
sort: String,
options: ServerOptions,
) -> Result<Vec<VideoItem>> {
let sort_string = match sort.as_str() {
"views" => "views",
"rating" => "rating",
"orgasmic" => "orgasmic",
_ => "latest",
};
// Extract needed fields from options at the start
let filter = options.filter.clone().unwrap_or_else(|| "top".to_string());
let filter_string = match filter.as_str() {
"other" => "other",
"all" => "all",
_ => "top",
};
let mut requester =
crate::providers::requester_or_default(&options, module_path!(), "missing_requester");
let url_str = format!(
"{}/blog/all/{}.html?fl={}&sm={}",
self.url,
((page as u32) - 1) * 20,
filter_string,
sort_string
);
let old_items = match cache.get(&url_str) {
Some((time, items)) => {
if time.elapsed().unwrap_or_default().as_secs() < 60 * 60 {
return Ok(items.clone());
} else {
items.clone()
}
}
None => {
vec![]
}
};
let text = match requester.get(&url_str, None).await {
Ok(text) => text,
Err(e) => {
crate::providers::report_provider_error(
"sxyprn",
"get.request",
&format!("url={url_str}; error={e}"),
)
.await;
return Ok(old_items);
}
};
// Pass a reference to options if needed, or reconstruct as needed
let video_items = match self
.get_video_items_from_html(text.clone(), pool, requester, &options)
.await
{
Ok(items) => items,
Err(e) => {
println!("Error parsing video items: {}", e);
send_discord_error_report(
e.to_string(),
Some(format_error_chain(&e)),
Some("Sxyprn Provider"),
Some(&format!("URL: {}", url_str)),
file!(),
line!(),
module_path!(),
)
.await;
return Ok(old_items);
}
};
// let video_items: Vec<VideoItem> = self
// .get_video_items_from_html(text.clone(), pool, requester)
// .await;
if !video_items.is_empty() {
cache.remove(&url_str);
cache.insert(url_str.clone(), video_items.clone());
} else {
return Ok(old_items);
}
Ok(video_items)
}
async fn query(
&self,
cache: VideoCache,
pool: DbPool,
page: u8,
query: &str,
sort: String,
options: ServerOptions,
) -> Result<Vec<VideoItem>> {
let sort_string = match sort.as_str() {
"views" => "views",
"rating" => "trending",
"orgasmic" => "orgasmic",
_ => "latest",
};
// Extract needed fields from options at the start
let mut requester =
crate::providers::requester_or_default(&options, module_path!(), "missing_requester");
let search_string = query.replace(" ", "-");
let url_str = format!(
"{}/{}.html?page={}&sm={}",
self.url,
search_string,
((page as u32) - 1) * 20,
sort_string
);
// Check our Video Cache. If the result is younger than 1 hour, we return it.
let old_items = match cache.get(&url_str) {
Some((time, items)) => {
if time.elapsed().unwrap_or_default().as_secs() < 60 * 60 {
return Ok(items.clone());
} else {
let _ = cache.check().await;
return Ok(items.clone());
}
}
None => {
vec![]
}
};
let text = match requester.get(&url_str, None).await {
Ok(text) => text,
Err(e) => {
crate::providers::report_provider_error(
"sxyprn",
"query.request",
&format!("url={url_str}; error={e}"),
)
.await;
return Ok(old_items);
}
};
let video_items = match self
.get_video_items_from_html(text.clone(), pool, requester, &options)
.await
{
Ok(items) => items,
Err(e) => {
println!("Error parsing video items: {}", e); // 1. Convert the error to a string immediately
send_discord_error_report(
e.to_string(),
Some(format_error_chain(&e)),
Some("Sxyprn Provider"),
Some(&format!("URL: {}", url_str)),
file!(),
line!(),
module_path!(),
)
.await;
return Ok(old_items);
}
};
// let video_items: Vec<VideoItem> = self
// .get_video_items_from_html(text.clone(), pool, requester)
// .await;
if !video_items.is_empty() {
cache.remove(&url_str);
cache.insert(url_str.clone(), video_items.clone());
} else {
return Ok(old_items);
}
Ok(video_items)
}
async fn get_video_items_from_html(
&self,
html: String,
_pool: DbPool,
_requester: Requester,
options: &ServerOptions,
) -> Result<Vec<VideoItem>> {
if html.is_empty() {
return Ok(vec![]);
}
// take content before "<script async"
let before_script = html
.split("<script async")
.next()
.ok_or_else(|| ErrorKind::Parse("missing '<script async' split point".into()))?;
// split into video segments (skip the first chunk)
let raw_videos: Vec<&str> = before_script.split("post_el_small'").skip(1).collect();
if raw_videos.is_empty() {
return Err(ErrorKind::Parse("no 'post_el_small\\'' segments found".into()).into());
}
let mut items = Vec::new();
for video_segment in raw_videos {
// url id
let url = video_segment
.split("/post/")
.nth(1)
.and_then(|s| s.split('\'').next())
.ok_or_else(|| ErrorKind::Parse("failed to extract /post/ url".into()))?
.to_string();
let video_url =
crate::providers::build_proxy_url(options, "sxyprn", &format!("post/{}", url));
// title parts
let title_parts = video_segment
.split("post_text")
.nth(1)
.and_then(|s| s.split("style=''>").nth(1))
.and_then(|s| s.split("</div>").next())
.ok_or_else(|| ErrorKind::Parse("failed to extract title_parts".into()))?;
let title_links: Vec<String> = video_segment
.split("href='https://")
.skip(1)
.filter_map(|part| part.split("'").next().map(|u| u.to_string()))
.collect();
let document = Html::parse_document(title_parts);
let selector = Selector::parse("*")
.map_err(|e| ErrorKind::Parse(format!("selector parse failed: {e}")))?;
let mut texts = Vec::new();
for element in document.select(&selector) {
let text = element.text().collect::<Vec<_>>().join(" ");
if !text.trim().is_empty() {
texts.push(text.trim().to_string());
}
}
let mut title = texts.join(" ");
title = decode(title.as_bytes())
.to_string()
.unwrap_or(title)
.replace(" ", " ");
title = title
.replace('\n', "")
.replace(" + ", " ")
.replace(" ", " ")
.replace("\\", "")
.trim()
.to_string();
// De-duplicate repeated titles
let words: Vec<&str> = title.split_whitespace().collect();
if words.len() > 1 {
for pattern_len in (1..=words.len() / 2).rev() {
let pattern = &words[0..pattern_len];
let mut all_match = true;
let mut idx = pattern_len;
while idx < words.len() {
let end = std::cmp::min(idx + pattern_len, words.len());
if &words[idx..end] != &pattern[0..(end - idx)] {
all_match = false;
break;
}
idx += pattern_len;
}
if all_match && words.len() % pattern_len == 0 {
title = pattern.join(" ");
break;
}
}
}
if title.to_ascii_lowercase().starts_with("new ") {
title = title[4..].to_string();
}
// Extract tags from title (words starting with #)
let mut tags = Vec::new();
let words: Vec<&str> = title.split_whitespace().collect();
let mut cleaned_words = Vec::new();
for word in words {
let raw_tag = word
.trim_end_matches(|c: char| !c.is_alphanumeric() && c != '_' && c != '-')
.to_string();
if raw_tag.starts_with('#') && raw_tag.len() > 1 {
let tag = raw_tag[1..].to_string();
if !tags.contains(&tag) {
tags.push(tag);
}
} else {
cleaned_words.push(word.to_string());
}
}
// Reconstruct title without tags
title = cleaned_words.join(" ");
// id (DON'T index [6])
let id = video_url
.split('/')
.last()
.ok_or_else(|| ErrorKind::Parse("failed to extract id from video_url".into()))?
.split('?')
.next()
.unwrap_or("")
.to_string();
// thumb
let thumb_path = video_segment
.split("<img class='mini_post_vid_thumb lazyload'")
.nth(1)
.and_then(|s| s.split("data-src='").nth(1))
.and_then(|s| s.split('\'').next())
.unwrap_or("");
let thumb = format!("https:{thumb_path}");
// preview
let preview = if video_segment.contains("class='hvp_player'") {
Some(format!(
"https:{}",
video_segment
.split("class='hvp_player'")
.nth(1)
.and_then(|s| s.split(" src='").nth(1))
.and_then(|s| s.split('\'').next())
.ok_or_else(|| ErrorKind::Parse("failed to extract preview src".into()))?
))
} else {
None
};
// views
let views = video_segment
.split("<strong>·</strong> ")
.nth(1)
.and_then(|s| s.split_whitespace().next())
.ok_or_else(|| ErrorKind::Parse("failed to extract views".into()))?
.to_string();
// duration
let raw_duration = video_segment
.split("duration_small")
.nth(1)
.and_then(|s| s.split("title='").nth(1))
.and_then(|s| s.split('\'').nth(1))
.and_then(|s| s.split('>').nth(1))
.and_then(|s| s.split('<').next())
.ok_or_else(|| ErrorKind::Parse("failed to extract duration".into()))?
.to_string();
let duration = parse_time_to_seconds(&raw_duration).unwrap_or(0) as u32;
// stream urls - collect both lulustream and vidara.so URLs
let mut formats = vec![];
// Add sxyprn format
let sxyprn_url = format!(
"{}/proxy/sxyprn/post/{}",
options.public_url_base.as_deref().unwrap_or(""),
id
);
formats.push(
VideoFormat::new(sxyprn_url.clone(), "auto".to_string(), "mp4".to_string())
.format_note(
sxyprn_url
.split("/")
.nth(4)
.unwrap_or("sxyprn")
.to_string(),
),
);
// Also collect and transform vidara.so URLs to proxy format and add as formats
let vidara_urls: Vec<String> = title_links
.iter()
.filter(|url| proxy_name_for_url(url).as_deref() == Some("vidara"))
.map(|url| rewrite_hoster_url(options, url))
.collect();
for vidara_url in vidara_urls {
formats.push(
VideoFormat::m3u8(vidara_url.clone(), "1080".to_string(), "m3u8".to_string())
.format_note(
vidara_url
.split("/")
.nth(4)
.unwrap_or("vidara")
.to_string(),
)
.format_id("vidara".to_string()),
);
}
let doodstream_urls: Vec<String> = title_links
.iter()
.filter(|url| proxy_name_for_url(url).as_deref() == Some("doodstream"))
.map(|url| rewrite_hoster_url(options, url))
.collect();
for dood_url in doodstream_urls {
formats.push(
VideoFormat::m3u8(dood_url.clone(), "auto".to_string(), "m3u8".to_string())
.format_note("doodstream".to_string())
.format_id("doodstream".to_string()),
);
}
let lulustream_urls: Vec<String> = title_links
.iter()
.filter(|url| proxy_name_for_url(url).as_deref() == Some("lulustream"))
.map(|url| rewrite_hoster_url(options, url))
.collect();
for lulustream_url in lulustream_urls {
formats.push(
VideoFormat::m3u8(lulustream_url.clone(), "auto".to_string(), "m3u8".to_string())
.format_note("lulustream".to_string())
.format_id("lulustream".to_string()),
);
}
let mut video_item = VideoItem::new(
id.clone(),
title,
format!("{}/post/{}", self.url, id.clone()),
"sxyprn".to_string(),
thumb,
duration,
)
.views(views.parse::<u32>().unwrap_or(0))
.formats(formats);
// Add tags if any were found
if !tags.is_empty() {
video_item.tags = Some(tags);
}
if preview.is_some() {
video_item.preview = preview;
}
items.push(video_item);
}
Ok(items)
}
}
#[async_trait]
impl Provider for SxyprnProvider {
async fn get_videos(
&self,
cache: VideoCache,
pool: DbPool,
sort: String,
query: Option<String>,
page: String,
per_page: String,
options: ServerOptions,
) -> Vec<VideoItem> {
let _ = per_page;
let videos: std::result::Result<Vec<VideoItem>, Error> = match query {
Some(q) => {
self.query(
cache,
pool,
page.parse::<u8>().unwrap_or(1),
&q,
sort,
options,
)
.await
}
None => {
self.get(cache, pool, page.parse::<u8>().unwrap_or(1), sort, options)
.await
}
};
match videos {
Ok(v) => v,
Err(e) => {
println!("Error fetching videos: {}", e);
vec![]
}
}
}
fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
Some(self.build_channel(clientversion))
}
}