This commit is contained in:
Simon
2026-01-02 14:55:13 +00:00
parent 27bb3daec4
commit 89eecbe790
6 changed files with 692 additions and 552 deletions

View File

@@ -157,6 +157,10 @@ async fn status(req: HttpRequest) -> Result<impl web::Responder, web::Error> {
id: "lg".to_string(), id: "lg".to_string(),
title: "Longest".to_string(), title: "Longest".to_string(),
}, },
FilterOption {
id: "cm".to_string(),
title: "Newest".to_string(),
},
], ],
multiSelect: false, multiSelect: false,
}], }],

View File

@@ -3,13 +3,15 @@ use crate::api::ClientVersion;
use crate::providers::Provider; use crate::providers::Provider;
use crate::status::*; use crate::status::*;
use crate::util::cache::VideoCache; use crate::util::cache::VideoCache;
use crate::util::discord::send_discord_error_report;
use crate::util::requester::Requester; use crate::util::requester::Requester;
use crate::util::time::parse_time_to_seconds; use crate::util::time::parse_time_to_seconds;
use crate::videos::{ServerOptions, VideoFormat, VideoItem}; use crate::videos::{ServerOptions, VideoFormat, VideoItem};
use async_trait::async_trait; use async_trait::async_trait;
use error_chain::error_chain; use error_chain::error_chain;
use futures::future::join_all; use futures::future::join_all;
use htmlentity::entity::{ICodedDataTrait, decode}; use htmlentity::entity::{decode, ICodedDataTrait};
use std::sync::{Arc, RwLock}; use std::sync::{Arc, RwLock};
use std::{thread, vec}; use std::{thread, vec};
use titlecase::Titlecase; use titlecase::Titlecase;
@@ -19,6 +21,13 @@ error_chain! {
foreign_links { foreign_links {
Io(std::io::Error); Io(std::io::Error);
HttpRequest(wreq::Error); HttpRequest(wreq::Error);
Json(serde_json::Error);
}
errors {
Parse(msg: String) {
description("parse error")
display("parse error: {}", msg)
}
} }
} }
@@ -28,9 +37,10 @@ pub struct PimpbunnyProvider {
stars: Arc<RwLock<Vec<FilterOption>>>, stars: Arc<RwLock<Vec<FilterOption>>>,
categories: Arc<RwLock<Vec<FilterOption>>>, categories: Arc<RwLock<Vec<FilterOption>>>,
} }
impl PimpbunnyProvider { impl PimpbunnyProvider {
pub fn new() -> Self { pub fn new() -> Self {
let provider = PimpbunnyProvider { let provider = Self {
url: "https://pimpbunny.com".to_string(), url: "https://pimpbunny.com".to_string(),
stars: Arc::new(RwLock::new(vec![])), stars: Arc::new(RwLock::new(vec![])),
categories: Arc::new(RwLock::new(vec![])), categories: Arc::new(RwLock::new(vec![])),
@@ -39,123 +49,6 @@ impl PimpbunnyProvider {
provider provider
} }
fn spawn_initial_load(&self) {
let url = self.url.clone();
let stars = Arc::clone(&self.stars);
let categories = Arc::clone(&self.categories);
thread::spawn(move || {
// Create a tiny runtime just for these async tasks
let rt = tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()
.expect("build tokio runtime");
rt.block_on(async move {
if let Err(e) = Self::load_stars(&url, stars).await {
eprintln!("load_stars failed: {e}");
}
if let Err(e) = Self::load_categories(&url, categories).await {
eprintln!("load_categories failed: {e}");
}
});
});
}
async fn load_stars(base_url: &str, stars: Arc<RwLock<Vec<FilterOption>>>) -> Result<()> {
let mut requester = Requester::new();
let text = requester
.get(
format!("{}/onlyfans-models/?models_per_page=20", &base_url).as_str(),
Some(Version::HTTP_2),
)
.await
.unwrap();
let stars_div = text
.split("pb-list-models-block")
.collect::<Vec<&str>>()
.last()
.unwrap()
.split("pb-page-description")
.collect::<Vec<&str>>()[0];
for stars_element in stars_div
.split("<div class=\"col\">")
.collect::<Vec<&str>>()[1..]
.to_vec()
{
if stars_element.contains("pb-promoted-link") {
continue;
}
let star_id = stars_element
.split("href=\"https://pimpbunny.com/onlyfans-models/")
.collect::<Vec<&str>>()[1]
.split("/\"")
.collect::<Vec<&str>>()[0]
.to_string();
let star_name = stars_element
.split("<div class=\"pb-item-title-text\">")
.collect::<Vec<&str>>()[1]
.split("<")
.collect::<Vec<&str>>()[0]
.to_string();
Self::push_unique(
&stars,
FilterOption {
id: star_id,
title: star_name,
},
);
}
return Ok(());
}
async fn load_categories(
base_url: &str,
categories: Arc<RwLock<Vec<FilterOption>>>,
) -> Result<()> {
let mut requester = Requester::new();
let text = requester
.get(
format!("{}/categories/?items_per_page=120", &base_url).as_str(),
Some(Version::HTTP_2),
)
.await
.unwrap();
let categories_div = text
.split("list_categories_categories_list_items")
.collect::<Vec<&str>>()
.last()
.unwrap()
.split("pb-pagination-wrapper")
.collect::<Vec<&str>>()[0];
for categories_element in categories_div
.split("<div class=\"col\">")
.collect::<Vec<&str>>()[1..]
.to_vec()
{
let category_id = categories_element
.split("href=\"https://pimpbunny.com/categories/")
.collect::<Vec<&str>>()[1]
.split("/\"")
.collect::<Vec<&str>>()[0]
.to_string();
let category_name = categories_element
.split("<div class=\"pb-item-title pb-heading-h3 title\">")
.collect::<Vec<&str>>()[1]
.split("<")
.collect::<Vec<&str>>()[0]
.titlecase();
Self::push_unique(
&categories,
FilterOption {
id: category_id,
title: category_name,
},
);
}
return Ok(());
}
fn build_channel(&self, clientversion: ClientVersion) -> Channel { fn build_channel(&self, clientversion: ClientVersion) -> Channel {
let _ = clientversion; let _ = clientversion;
Channel { Channel {
@@ -203,17 +96,146 @@ impl PimpbunnyProvider {
} }
} }
// Push one item with minimal lock time and dedup by id fn spawn_initial_load(&self) {
let url = self.url.clone();
let stars = Arc::clone(&self.stars);
let categories = Arc::clone(&self.categories);
thread::spawn(move || {
let rt = match tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()
{
Ok(rt) => rt,
Err(e) => {
eprintln!("tokio runtime failed: {e}");
let _ = futures::executor::block_on(send_discord_error_report(
&e,
Some("Pimpbunny Provider"),
Some("Failed to create tokio runtime"),
file!(),
line!(),
module_path!(),
));
return;
}
};
rt.block_on(async {
if let Err(e) = Self::load_stars(&url, Arc::clone(&stars)).await {
eprintln!("load_stars failed: {e}");
send_discord_error_report(
&e,
Some("Pimpbunny Provider"),
Some("Failed to load stars during initial load"),
file!(),
line!(),
module_path!(),
).await;
}
if let Err(e) = Self::load_categories(&url, Arc::clone(&categories)).await {
eprintln!("load_categories failed: {e}");
send_discord_error_report(
&e,
Some("Pimpbunny Provider"),
Some("Failed to load categories during initial load"),
file!(),
line!(),
module_path!(),
).await;
}
});
});
}
fn push_unique(target: &Arc<RwLock<Vec<FilterOption>>>, item: FilterOption) { fn push_unique(target: &Arc<RwLock<Vec<FilterOption>>>, item: FilterOption) {
if let Ok(mut vec) = target.write() { if let Ok(mut vec) = target.write() {
if !vec.iter().any(|x| x.id == item.id) { if !vec.iter().any(|x| x.id == item.id) {
vec.push(item); vec.push(item);
// Optional: keep it sorted for nicer UX
// vec.sort_by(|a,b| a.title.cmp(&b.title));
} }
} }
} }
async fn load_stars(base: &str, stars: Arc<RwLock<Vec<FilterOption>>>) -> Result<()> {
let mut requester = Requester::new();
let text = requester
.get(
&format!("{base}/onlyfans-models/?models_per_page=20"),
Some(Version::HTTP_2),
)
.await
.map_err(|e| Error::from(format!("{}", e)))?;
let block = text
.split("vt_list_models_with_advertising_custom_models_list_items")
.last()
.ok_or_else(|| ErrorKind::Parse("missing stars block".into()))?
.split("pb-page-description")
.next()
.unwrap_or("");
for el in block.split("<div class=\"col\">").skip(1) {
if el.contains("pb-promoted-link") || !el.contains("href=\"https://pimpbunny.com/onlyfans-models/") {
continue;
}
let id = el
.split("href=\"https://pimpbunny.com/onlyfans-models/")
.nth(1)
.and_then(|s| s.split("/\"").next())
.ok_or_else(|| ErrorKind::Parse(format!("star id: {el}").into()))?
.to_string();
let title = el
.split("ui-card-title")
.nth(1)
.and_then(|s| s.split('<').next())
.ok_or_else(|| ErrorKind::Parse(format!("star title: {el}").into()))?
.to_string();
Self::push_unique(&stars, FilterOption { id, title });
}
Ok(())
}
async fn load_categories(base: &str, cats: Arc<RwLock<Vec<FilterOption>>>) -> Result<()> {
let mut requester = Requester::new();
let text = requester
.get(
&format!("{base}/categories/?items_per_page=120"),
Some(Version::HTTP_2),
)
.await
.map_err(|e| Error::from(format!("{}", e)))?;
let block = text
.split("list_categories_categories_list_items")
.last()
.ok_or_else(|| ErrorKind::Parse("categories block".into()))?
.split("pb-pagination-wrapper")
.next()
.unwrap_or("");
for el in block.split("<div class=\"col\">").skip(1) {
let id = el
.split("href=\"https://pimpbunny.com/categories/")
.nth(1)
.and_then(|s| s.split("/\"").next())
.ok_or_else(|| ErrorKind::Parse(format!("category id: {el}").into()))?
.to_string();
let title = el
.split("ui-heading-h3")
.nth(1)
.and_then(|s| s.split('<').next())
.ok_or_else(|| ErrorKind::Parse(format!("category title: {el}").into()))?
.titlecase();
Self::push_unique(&cats, FilterOption { id, title });
}
Ok(())
}
async fn get( async fn get(
&self, &self,
cache: VideoCache, cache: VideoCache,
@@ -338,181 +360,147 @@ impl PimpbunnyProvider {
if html.is_empty() || html.contains("404 Not Found") { if html.is_empty() || html.contains("404 Not Found") {
return vec![]; return vec![];
} }
let raw_videos = html.split("pb-pagination-wrapper").collect::<Vec<&str>>()[0]
.split("pb-list-items")
.collect::<Vec<&str>>()[1]
.split("<div class=\"col\">")
.collect::<Vec<&str>>()[1..]
.to_vec();
let futures = raw_videos let block = match html
.into_iter() .split("pb-pagination-wrapper")
.next()
.and_then(|s| s.split("pb-list-items").nth(1))
{
Some(b) => b,
None => return vec![],
};
let futures = block
.split("<div class=\"col\">")
.skip(1)
.map(|el| self.get_video_item(el.to_string(), requester.clone())); .map(|el| self.get_video_item(el.to_string(), requester.clone()));
let results: Vec<Result<VideoItem>> = join_all(futures).await;
let video_items: Vec<VideoItem> = results.into_iter().filter_map(Result::ok).collect(); join_all(futures)
return video_items; .await
.into_iter()
.filter_map(Result::ok)
.collect()
} }
async fn get_video_item( async fn get_video_item(
&self, &self,
video_segment: String, seg: String,
mut requester: Requester, mut requester: Requester,
) -> Result<VideoItem> { ) -> Result<VideoItem> {
let video_url: String = video_segment.split(" href=\"").collect::<Vec<&str>>()[1] let video_url = seg
.split("\"") .split(" href=\"")
.collect::<Vec<&str>>()[0] .nth(1)
.and_then(|s| s.split('"').next())
.ok_or_else(|| ErrorKind::Parse("video url".into()))?
.to_string(); .to_string();
let mut title = video_segment.split("pb-item-title").collect::<Vec<&str>>()[1]
.split(">") let mut title = seg
.collect::<Vec<&str>>()[1] .split("pb-item-title")
.split("<") .nth(1)
.collect::<Vec<&str>>()[0] .and_then(|s| s.split('>').nth(1))
.and_then(|s| s.split('<').next())
.ok_or_else(|| ErrorKind::Parse("video title".into()))?
.trim() .trim()
.to_string(); .to_string();
// html decode
title = decode(title.as_bytes()) title = decode(title.as_bytes()).to_string().unwrap_or(title).titlecase();
.to_string()
.unwrap_or(title) let id = video_url
.titlecase(); .split('/')
let id = video_url.split("/").collect::<Vec<&str>>()[4] .nth(4)
.split(".") .and_then(|s| s.split('.').next())
.collect::<Vec<&str>>()[0] .ok_or_else(|| ErrorKind::Parse("video id".into()))?
.to_string(); .to_string();
let mut thumb = video_segment.split("pb-thumbnail").collect::<Vec<&str>>()[1] let thumb_block = seg
.split("pb-thumbnail")
.nth(1)
.ok_or_else(|| ErrorKind::Parse("thumb block".into()))?;
let mut thumb = thumb_block
.split("src=\"") .split("src=\"")
.collect::<Vec<&str>>()[1] .nth(1)
.split("\"") .and_then(|s| s.split('"').next())
.collect::<Vec<&str>>()[0] .unwrap_or("")
.to_string();
if thumb.starts_with("data:image/jpg;base64") {
thumb = video_segment.split("pb-thumbnail").collect::<Vec<&str>>()[1]
.split("data-webp=\"")
.collect::<Vec<&str>>()[1]
.split("\"")
.collect::<Vec<&str>>()[0]
.to_string();
}
let preview = video_segment.split("pb-thumbnail").collect::<Vec<&str>>()[1]
.split("data-preview=\"")
.collect::<Vec<&str>>()[1]
.split("\"")
.collect::<Vec<&str>>()[0]
.to_string(); .to_string();
let (tags, formats, views, duration) = match self.extract_media(&video_url, &mut requester).await { if thumb.starts_with("data:image") {
Ok((t, f, v, d)) => (t, f, v, d), thumb = thumb_block
Err(_) => return Err(Error::from("Video media extraction failed")), .split("data-webp=\"")
}; .nth(1)
if formats.is_empty() { .and_then(|s| s.split('"').next())
return Err(Error::from("No formats found for video")); .unwrap_or("")
.to_string();
} }
let video_item = VideoItem::new(
let preview = thumb_block
.split("data-preview=\"")
.nth(1)
.and_then(|s| s.split('"').next())
.unwrap_or("")
.to_string();
let (tags, formats, views, duration) =
self.extract_media(&video_url, &mut requester).await?;
Ok(VideoItem::new(
id, id,
title, title,
video_url, video_url,
"pimpbunny".to_string(), "pimpbunny".into(),
thumb, thumb,
duration, duration,
) )
.formats(formats) .formats(formats)
.tags(tags) .tags(tags)
.preview(preview) .preview(preview)
.views(views) .views(views))
;
return Ok(video_item);
} }
async fn extract_media( async fn extract_media(
&self, &self,
video_page_url: &str, url: &str,
requester: &mut Requester, requester: &mut Requester,
) -> Result<(Vec<String>, Vec<VideoFormat>, u32, u32)> { ) -> Result<(Vec<String>, Vec<VideoFormat>, u32, u32)> {
let mut formats = vec![];
let mut tags = vec![];
let text = requester let text = requester
.get(&video_page_url, Some(Version::HTTP_2)) .get(url, Some(Version::HTTP_2))
.await .await
.unwrap(); .map_err(|e| Error::from(format!("{}", e)))?;
if text.contains("pb-video-models"){
let stars_elements = text.split("pb-video-models").collect::<Vec<&str>>()[1]
.split("pb-video-statistic")
.collect::<Vec<&str>>()[0]
.split("pb-models-item pb-models-item")
.collect::<Vec<&str>>()[1..]
.to_vec();
for star_el in stars_elements {
let star_id = star_el
.split("href=\"https://pimpbunny.com/onlyfans-models/")
.collect::<Vec<&str>>()[1]
.split("/\"")
.collect::<Vec<&str>>()[0]
.to_string();
let star_name = star_el
.split("<span class=\"pb-item-title pb-heading-h3\">")
.collect::<Vec<&str>>()[1]
.split("<")
.collect::<Vec<&str>>()[0]
.to_string();
tags.push(star_name.clone());
Self::push_unique(
&self.stars,
FilterOption {
id: star_id,
title: star_name.clone(),
},
);
}
}
if text.contains("pb-video-tags") {
let categories_elements = text.split("pb-tags-list").collect::<Vec<&str>>()[1]
.split("</div>")
.collect::<Vec<&str>>()[0]
.split("href=\"https://pimpbunny.com/tags/")
.collect::<Vec<&str>>()[1..]
.to_vec();
for categories_el in categories_elements {
let category_id = categories_el.split("\"").collect::<Vec<&str>>()[0].to_string();
let category_name = categories_el.split("\">").collect::<Vec<&str>>()[1]
.split("<")
.collect::<Vec<&str>>()[0]
.titlecase();
tags.push(category_name.clone());
Self::push_unique(
&self.categories,
FilterOption {
id: category_id,
title: category_name.clone(),
},
);
}
}
let json_str = text let json_str = text
.split(";</script><script type=\"application/ld+json\">") .split("application/ld+json\">")
.collect::<Vec<&str>>()[1] .nth(1)
.split("</script>") .and_then(|s| s.split("</script>").next())
.collect::<Vec<&str>>()[0]; .ok_or_else(|| ErrorKind::Parse("ld+json".into()))?;
let json = serde_json::from_str::<serde_json::Value>(json_str).unwrap_or_default();
let json: serde_json::Value = serde_json::from_str(json_str)?;
let video_url = json["contentUrl"].as_str().unwrap_or("").to_string(); let video_url = json["contentUrl"].as_str().unwrap_or("").to_string();
let quality = video_url let quality = video_url
.split("_") .split('_')
.collect::<Vec<&str>>()
.last() .last()
.map_or("", |v| v) .and_then(|s| s.split('.').next())
.split(".") .unwrap_or("")
.collect::<Vec<&str>>()[0]
.to_string(); .to_string();
let views = json["interactionStatistic"].as_array().unwrap()[0]["userInteractionCount"]
.as_str().unwrap().parse::<u32>().unwrap_or(0); let views = json["interactionStatistic"]
let raw_duration = json["duration"].as_str().unwrap_or("00:00").replace("PT", "").replace("H", ":").replace("M", ":").replace("S", ""); .as_array()
let duration = parse_time_to_seconds(raw_duration.as_str()).unwrap_or(0) as u32; .and_then(|a| a.first())
formats.push(VideoFormat::new( .and_then(|v| v["userInteractionCount"].as_str())
video_url, .and_then(|v| v.parse().ok())
quality.clone(), .unwrap_or(0);
"video/mp4".to_string(),
)); let duration = json["duration"]
Ok((tags, formats, views, duration)) .as_str()
.map(|d| parse_time_to_seconds(&d.replace(['P','T','H','M','S'], "")).unwrap_or(0))
.unwrap_or(0) as u32;
Ok((
vec![],
vec![VideoFormat::new(video_url, quality, "video/mp4".into())],
views,
duration,
))
} }
} }
@@ -521,34 +509,27 @@ impl Provider for PimpbunnyProvider {
async fn get_videos( async fn get_videos(
&self, &self,
cache: VideoCache, cache: VideoCache,
pool: DbPool, _pool: DbPool,
sort: String, sort: String,
query: Option<String>, query: Option<String>,
page: String, page: String,
per_page: String, _per_page: String,
options: ServerOptions, options: ServerOptions,
) -> Vec<VideoItem> { ) -> Vec<VideoItem> {
let _ = per_page; let page = page.parse::<u8>().unwrap_or(1);
let _ = pool;
let videos: std::result::Result<Vec<VideoItem>, Error> = match query { let res = match query {
Some(q) => { Some(q) => self.to_owned().query(cache, page, &q, options).await,
self.query(cache, page.parse::<u8>().unwrap_or(1), &q, options) None => self.get(cache, page, &sort, options).await,
.await
}
None => {
self.get(cache, page.parse::<u8>().unwrap_or(1), &sort, options)
.await
}
}; };
match videos {
Ok(v) => v, res.unwrap_or_else(|e| {
Err(e) => { eprintln!("pimpbunny error: {e}");
println!("Error fetching videos: {}", e);
vec![] vec![]
} })
} }
}
fn get_channel(&self, clientversion: ClientVersion) -> crate::status::Channel { fn get_channel(&self, v: ClientVersion) -> Channel {
self.build_channel(clientversion) self.build_channel(v)
} }
} }

View File

@@ -4,60 +4,74 @@ use crate::providers::Provider;
use crate::util::cache::VideoCache; use crate::util::cache::VideoCache;
use crate::util::time::parse_time_to_seconds; use crate::util::time::parse_time_to_seconds;
use crate::videos::{ServerOptions, VideoItem}; use crate::videos::{ServerOptions, VideoItem};
use error_chain::error_chain; use error_chain::error_chain;
use htmlentity::entity::{ICodedDataTrait, decode}; use htmlentity::entity::{decode, ICodedDataTrait};
use std::vec;
use async_trait::async_trait; use async_trait::async_trait;
use std::vec;
error_chain! { error_chain! {
foreign_links { foreign_links {
Io(std::io::Error); Io(std::io::Error);
HttpRequest(wreq::Error); HttpRequest(wreq::Error);
} }
errors {
Parse(msg: String) {
description("parse error")
display("parse error: {}", msg)
}
}
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct PornhubProvider { pub struct PornhubProvider {
url: String, url: String,
} }
impl PornhubProvider { impl PornhubProvider {
pub fn new() -> Self { pub fn new() -> Self {
PornhubProvider { Self {
url: "https://www.pornhub.com".to_string(), url: "https://www.pornhub.com".to_string(),
} }
} }
async fn get( async fn get(
&self, &self,
cache: VideoCache, cache: VideoCache,
page: u8, page: u8,
sort: &str, sort: &str,
options:ServerOptions options: ServerOptions,
) -> Result<Vec<VideoItem>> { ) -> Result<Vec<VideoItem>> {
let video_url = format!("{}/video?o={}&page={}", self.url, sort, page); let video_url = format!("{}/video?o={}&page={}", self.url, sort, page);
let old_items = match cache.get(&video_url) { let old_items = match cache.get(&video_url) {
Some((time, items)) => { Some((time, items)) if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 => {
if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 {
return Ok(items.clone()); return Ok(items.clone());
} else {
items.clone()
}
}
None => {
vec![]
} }
Some((_, items)) => items.clone(),
None => vec![],
}; };
let mut requester = options.requester.clone().unwrap(); let mut requester = match options.requester.clone() {
let text = requester.get(&video_url, None).await.unwrap(); Some(r) => r,
let video_items: Vec<VideoItem> = self.get_video_items_from_html(text.clone(),"<ul id=\"video"); None => return Ok(old_items),
if !video_items.is_empty() { };
let text = match requester.get(&video_url, None).await {
Ok(t) => t,
Err(_) => return Ok(old_items),
};
let video_items = self.get_video_items_from_html(text, "<ul id=\"video");
if video_items.is_empty() {
Ok(old_items)
} else {
cache.remove(&video_url); cache.remove(&video_url);
cache.insert(video_url.clone(), video_items.clone()); cache.insert(video_url.clone(), video_items.clone());
} else {
return Ok(old_items);
}
Ok(video_items) Ok(video_items)
} }
}
async fn query( async fn query(
&self, &self,
@@ -65,15 +79,20 @@ impl PornhubProvider {
page: u8, page: u8,
query: &str, query: &str,
sort: &str, sort: &str,
options:ServerOptions options: ServerOptions,
) -> Result<Vec<VideoItem>> { ) -> Result<Vec<VideoItem>> {
let mut split_string = "<ul id=\"video"; let mut split_string = "<ul id=\"video";
let search_string = query.to_lowercase().trim().replace(" ", "+"); let search_string = query.to_lowercase().trim().replace(' ', "+");
let mut video_url = format!("{}/video/search?search={}&page={}", self.url, search_string, page);
if query.starts_with("@"){ let mut video_url =
let url_parts = query[1..].split(":").collect::<Vec<&str>>(); format!("{}/video/search?search={}&page={}", self.url, search_string, page);
video_url = [self.url.to_string(), url_parts[0].to_string(), url_parts[1].replace(" ", "-").to_string(), "videos?page=".to_string()].join("/");
video_url += &page.to_string(); if query.starts_with('@') {
let mut parts = query[1..].split(':');
let a = parts.next().unwrap_or("");
let b = parts.next().unwrap_or("");
video_url = format!("{}/{}/{}/videos?page={}", self.url, a, b.replace(' ', "-"), page);
if query.contains("@model") || query.contains("@pornstar") { if query.contains("@model") || query.contains("@pornstar") {
split_string = "mostRecentVideosSection"; split_string = "mostRecentVideosSection";
} }
@@ -82,160 +101,157 @@ impl PornhubProvider {
} }
} }
if query.contains("@channels"){ video_url.push_str(match (query.contains("@channels"), sort) {
video_url += match sort { (true, "mv") => "&o=vi",
"mr" => "", (true, "tr") => "&o=ra",
"mv" => "&o=vi", (false, "mv") => "&o=mv",
"tr" => "&o=ra", (false, "tr") => "&o=tr",
(false, "lg") => "&o=lg",
_ => "", _ => "",
} });
} else{
video_url += match sort {
"mr" => "",
"mv" => "&o=mv",
"tr" => "&o=tr",
"lg" => "&o=lg",
_ => "&o=mv",
}
}
// Check our Video Cache. If the result is younger than 1 hour, we return it.
let old_items = match cache.get(&video_url) { let old_items = match cache.get(&video_url) {
Some((time, items)) => { Some((time, items)) if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 => {
if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 {
return Ok(items.clone());
} else {
let _ = cache.check().await;
return Ok(items.clone()); return Ok(items.clone());
} }
} Some((_, items)) => items.clone(),
None => { None => vec![],
vec![]
}
}; };
let mut requester = options.requester.clone().unwrap(); let mut requester = match options.requester.clone() {
let text = requester.get(&video_url, None).await.unwrap(); Some(r) => r,
let video_items: Vec<VideoItem> = self.get_video_items_from_html(text.clone(),split_string); None => return Ok(old_items),
if !video_items.is_empty() { };
let text = match requester.get(&video_url, None).await {
Ok(t) => t,
Err(_) => return Ok(old_items),
};
let video_items = self.get_video_items_from_html(text, split_string);
if video_items.is_empty() {
Ok(old_items)
} else {
cache.remove(&video_url); cache.remove(&video_url);
cache.insert(video_url.clone(), video_items.clone()); cache.insert(video_url.clone(), video_items.clone());
} else {
return Ok(old_items);
}
Ok(video_items) Ok(video_items)
}
} }
fn get_video_items_from_html(&self, html: String, split_string: &str) -> Vec<VideoItem> { fn get_video_items_from_html(&self, html: String, split_string: &str) -> Vec<VideoItem> {
if html.is_empty() { let content = match html.split(split_string).nth(1) {
println!("HTML is empty"); Some(c) => c,
return vec![]; None => return vec![],
}
let mut items: Vec<VideoItem> = Vec::new();
let video_listing_content = html.split(split_string).collect::<Vec<&str>>()[1].split("Porn in German").collect::<Vec<&str>>()[0];
let raw_videos = video_listing_content
.split("class=\"pcVideoListItem ")
.collect::<Vec<&str>>()[1..]
.to_vec();
for video_segment in &raw_videos {
// let vid = video_segment.split("\n").collect::<Vec<&str>>();
// for (index, line) in vid.iter().enumerate() {
// println!("Line {}: {}", index, line);
// }
if video_segment.contains("wrapVideoBlock"){
continue; // Skip if the segment is a wrapVideoBlock
}
let video_url: String;
if !video_segment.contains("<a href=\"") {
let url_part = video_segment.split("data-video-vkey=\"").collect::<Vec<&str>>()[1]
.split("\"")
.collect::<Vec<&str>>()[0];
video_url = format!("{}{}", self.url, url_part);
}
else{
let url_part = video_segment.split("<a href=\"").collect::<Vec<&str>>()[1]
.split("\"")
.collect::<Vec<&str>>()[0];
if url_part.is_empty() || url_part == "javascript:void(0)" {
continue;
}
video_url = format!("{}{}", self.url, url_part);
}
if video_url.starts_with("https://www.pornhub.comjavascript:void(0)") {
continue;
}
let mut title = video_segment.split("\" title=\"").collect::<Vec<&str>>()[1]
.split("\"")
.collect::<Vec<&str>>()[0]
.to_string();
// html decode
title = decode(title.as_bytes()).to_string().unwrap_or(title);
let id = video_segment.split("data-video-id=\"").collect::<Vec<&str>>()[1]
.split("\"")
.collect::<Vec<&str>>()[0]
.to_string();
let raw_duration = video_segment.split("duration").collect::<Vec<&str>>()[1].split(">").collect::<Vec<&str>>()[1]
.split("<")
.collect::<Vec<&str>>()[0]
.to_string();
let duration = parse_time_to_seconds(&raw_duration).unwrap_or(0) as u32;
let view_part = match video_segment.split("iews\">").collect::<Vec<&str>>().len(){
2 => video_segment.split("iews\">").collect::<Vec<&str>>()[1],
3 => video_segment.split("iews\">").collect::<Vec<&str>>()[2],
_ => "<var>0<", // Skip if the format is unexpected
}; };
let views = parse_abbreviated_number(view_part
.split("<var>").collect::<Vec<&str>>()[1]
.split("<")
.collect::<Vec<&str>>()[0]).unwrap_or(0);
let thumb = video_segment.split("src=\"").collect::<Vec<&str>>()[1] let content = content.split("Porn in German").next().unwrap_or("");
.split("\"")
.collect::<Vec<&str>>()[0] let mut items = Vec::new();
for seg in content
.split("class=\"pcVideoListItem ")
.skip(1)
.filter(|s| !s.contains("wrapVideoBlock"))
{
let url_part = seg
.split("<a href=\"")
.nth(1)
.or_else(|| seg.split("data-video-vkey=\"").nth(1))
.and_then(|s| s.split('"').next());
let video_url = match url_part {
Some(u) if !u.is_empty() && u != "javascript:void(0)" => format!("{}{}", self.url, u),
_ => continue,
};
let mut title = seg
.split("\" title=\"")
.nth(1)
.and_then(|s| s.split('"').next())
.unwrap_or("")
.to_string(); .to_string();
let uploaderBlock; title = decode(title.as_bytes()).to_string().unwrap_or(title);
let uploader_href;
let mut tag = String::new();
if video_segment.contains("videoUploaderBlock") {
uploaderBlock = video_segment.split("videoUploaderBlock").collect::<Vec<&str>>()[1] let id = match seg
.split("data-video-id=\"")
.nth(1)
.and_then(|s| s.split('"').next())
{
Some(id) => id.to_string(),
None => continue,
};
let raw_duration = seg
.split("duration")
.nth(1)
.and_then(|s| s.split('>').nth(1))
.and_then(|s| s.split('<').next())
.unwrap_or("0:00");
let duration = parse_time_to_seconds(raw_duration).unwrap_or(0) as u32;
let views = seg
.split("iews\">")
.filter_map(|p| p.split("<var>").nth(1))
.next()
.and_then(|v| v.split('<').next())
.and_then(|v| parse_abbreviated_number(v))
.unwrap_or(0);
let thumb = seg
.split("src=\"")
.nth(1)
.and_then(|s| s.split('"').next())
.unwrap_or("")
.to_string(); .to_string();
uploader_href = uploaderBlock.split("href=\"").collect::<Vec<&str>>()[1]
.split("\"")
.collect::<Vec<&str>>()[0]
.split("/").collect::<Vec<&str>>();
tag = format!("@{}:{}", uploader_href[1], uploader_href[2].replace("-", " "));
let (tag, uploader) = if seg.contains("videoUploaderBlock") {
let href = seg
.split("videoUploaderBlock")
.nth(1)
.and_then(|s| s.split("href=\"").nth(1))
.and_then(|s| s.split('"').next())
.unwrap_or("");
let parts: Vec<&str> = href.split('/').collect();
if parts.len() >= 3 {
(
Some(format!("@{}:{}", parts[1], parts[2].replace('-', " "))),
Some(parts[2].to_string()),
)
} else {
(None, None)
} }
else{ } else {
uploader_href = vec![]; (None, None)
} };
let mut item = VideoItem::new(
let mut video_item = VideoItem::new(
id, id,
title, title,
video_url.to_string(), video_url,
"pornhub".to_string(), "pornhub".into(),
thumb, thumb,
duration, duration,
) );
;
if views > 0 { if views > 0 {
video_item = video_item.views(views); item = item.views(views);
} }
if !tag.is_empty() { if let Some(t) = tag {
video_item = video_item.tags(vec![tag]) item = item.tags(vec![t]);
.uploader(uploader_href[2].to_string());
} }
items.push(video_item); if let Some(u) = uploader {
} item = item.uploader(u);
return items;
} }
items.push(item);
}
items
}
} }
#[async_trait] #[async_trait]
@@ -250,29 +266,29 @@ impl Provider for PornhubProvider {
per_page: String, per_page: String,
options: ServerOptions, options: ServerOptions,
) -> Vec<VideoItem> { ) -> Vec<VideoItem> {
let _ = options; let _ = pool;
let _ = per_page; let _ = per_page;
let _ = pool; // Ignored in this implementation
let mut sort = sort.to_lowercase(); let page = page.parse::<u8>().unwrap_or(1);
if sort.contains("date"){ let mut sort = match sort.as_str() {
sort = "mr".to_string(); "mv" => "mv",
} "tr" => "tr",
let videos: std::result::Result<Vec<VideoItem>, Error> = match query { "cm" => "cm",
Some(q) => { "lg" => "lg",
self.query(cache, page.parse::<u8>().unwrap_or(1), &q, &sort, options) _ => "mr",
.await
}
None => {
self.get(cache, page.parse::<u8>().unwrap_or(1), &sort, options)
.await
}
}; };
match videos { if sort.contains("date") {
Ok(v) => v, sort = "mr".into();
Err(e) => { }
println!("Error fetching videos: {}", e);
let res = match query {
Some(q) => self.query(cache, page, &q, &sort, options).await,
None => self.get(cache, page, &sort, options).await,
};
res.unwrap_or_else(|e| {
eprintln!("PornhubProvider error: {e}");
vec![] vec![]
} })
}
} }
} }

View File

@@ -1,6 +1,7 @@
use crate::DbPool; use crate::DbPool;
use crate::providers::Provider; use crate::providers::Provider;
use crate::util::cache::VideoCache; use crate::util::cache::VideoCache;
use crate::util::discord::send_discord_error_report;
use crate::util::requester::Requester; use crate::util::requester::Requester;
use crate::util::time::parse_time_to_seconds; use crate::util::time::parse_time_to_seconds;
use crate::videos::ServerOptions; use crate::videos::ServerOptions;
@@ -17,6 +18,12 @@ error_chain! {
HttpRequest(wreq::Error); HttpRequest(wreq::Error);
JsonError(serde_json::Error); JsonError(serde_json::Error);
} }
errors {
Parse(msg: String) {
description("html parse error")
display("html parse error: {}", msg)
}
}
} }
// fn has_blacklisted_class(element: &ElementRef, blacklist: &[&str]) -> bool { // fn has_blacklisted_class(element: &ElementRef, blacklist: &[&str]) -> bool {
@@ -83,9 +90,19 @@ impl SxyprnProvider {
let text = requester.get(&url_str, None).await.unwrap(); let text = requester.get(&url_str, None).await.unwrap();
// Pass a reference to options if needed, or reconstruct as needed // Pass a reference to options if needed, or reconstruct as needed
let video_items: Vec<VideoItem> = self let video_items = match self
.get_video_items_from_html(text.clone(), pool, requester) .get_video_items_from_html(text.clone(), pool, requester)
.await; .await
{
Ok(items) => items,
Err(e) => {
println!("Error parsing video items: {}", e);
return Ok(old_items);
}
};
// let video_items: Vec<VideoItem> = self
// .get_video_items_from_html(text.clone(), pool, requester)
// .await;
if !video_items.is_empty() { if !video_items.is_empty() {
cache.remove(&url_str); cache.remove(&url_str);
cache.insert(url_str.clone(), video_items.clone()); cache.insert(url_str.clone(), video_items.clone());
@@ -135,9 +152,28 @@ impl SxyprnProvider {
} }
}; };
let text = requester.get(&url_str, None).await.unwrap(); let text = requester.get(&url_str, None).await.unwrap();
let video_items: Vec<VideoItem> = self
let video_items = match self
.get_video_items_from_html(text.clone(), pool, requester) .get_video_items_from_html(text.clone(), pool, requester)
.await; .await
{
Ok(items) => items,
Err(e) => {
println!("Error parsing video items: {}", e);
let _ = futures::executor::block_on(send_discord_error_report(
&e,
Some("Sxyprn Provider"),
Some(format!("Failed to query videos:\nURL: {}\nQuery: {},", url_str, query).as_str()),
file!(),
line!(),
module_path!(),));
return Ok(old_items);
}
};
// let video_items: Vec<VideoItem> = self
// .get_video_items_from_html(text.clone(), pool, requester)
// .await;
if !video_items.is_empty() { if !video_items.is_empty() {
cache.remove(&url_str); cache.remove(&url_str);
cache.insert(url_str.clone(), video_items.clone()); cache.insert(url_str.clone(), video_items.clone());
@@ -150,41 +186,50 @@ impl SxyprnProvider {
async fn get_video_items_from_html( async fn get_video_items_from_html(
&self, &self,
html: String, html: String,
pool: DbPool, _pool: DbPool,
requester: Requester, _requester: Requester,
) -> Vec<VideoItem> { ) -> Result<Vec<VideoItem>> {
let _ = requester;
let _ = pool;
if html.is_empty() { if html.is_empty() {
println!("HTML is empty"); return Ok(vec![]);
return vec![];
} }
let raw_videos = html.split("<script async").collect::<Vec<&str>>()[0]
.split("post_el_small'")
.collect::<Vec<&str>>()[1..]
.to_vec();
let mut items: Vec<VideoItem> = Vec::new();
for video_segment in &raw_videos {
// let vid = video_segment.split("\n").collect::<Vec<&str>>();
// for (index, line) in vid.iter().enumerate() {
// println!("Line {}: {}", index, line.to_string().trim());
// }
// println!("\n\n\n");
let url = video_segment.split("/post/").collect::<Vec<&str>>()[1] // take content before "<script async"
.split("'") let before_script = html
.collect::<Vec<&str>>()[0] .split("<script async")
.next()
.ok_or_else(|| ErrorKind::Parse("missing '<script async' split point".into()))?;
// split into video segments (skip the first chunk)
let raw_videos: Vec<&str> = before_script.split("post_el_small'").skip(1).collect();
if raw_videos.is_empty() {
return Err(ErrorKind::Parse("no 'post_el_small\\'' segments found".into()).into());
}
let mut items = Vec::new();
for video_segment in raw_videos {
// url id
let url = video_segment
.split("/post/")
.nth(1)
.and_then(|s| s.split('\'').next())
.ok_or_else(|| ErrorKind::Parse("failed to extract /post/ url".into()))?
.to_string(); .to_string();
let video_url = format!("https://hottub.spacemoehre.de/proxy/sxyprn/post/{}", url); let video_url = format!("https://hottub.spacemoehre.de/proxy/sxyprn/post/{}", url);
let title_parts = video_segment.split("post_text").collect::<Vec<&str>>()[1] // title parts
.split("style=''>") let title_parts = video_segment
.collect::<Vec<&str>>()[1] .split("post_text")
.split("</div>") .nth(1)
.collect::<Vec<&str>>()[0]; .and_then(|s| s.split("style=''>").nth(1))
.and_then(|s| s.split("</div>").next())
.ok_or_else(|| ErrorKind::Parse("failed to extract title_parts".into()))?;
let document = Html::parse_document(title_parts); let document = Html::parse_document(title_parts);
let selector = Selector::parse("*").unwrap(); let selector = Selector::parse("*")
.map_err(|e| ErrorKind::Parse(format!("selector parse failed: {e}")))?;
let mut texts = Vec::new(); let mut texts = Vec::new();
for element in document.select(&selector) { for element in document.select(&selector) {
@@ -193,101 +238,96 @@ impl SxyprnProvider {
texts.push(text.trim().to_string()); texts.push(text.trim().to_string());
} }
} }
let mut title = texts[0].clone();
// html decode let mut title = texts.join(" ");
title = decode(title.as_bytes()) title = decode(title.as_bytes())
.to_string() .to_string()
.unwrap_or(title) .unwrap_or(title)
.replace(" ", " "); .replace(" ", " ");
title = title title = title
.replace("\n", "") .replace('\n', "")
.replace(" + ", " ") .replace(" + ", " ")
.replace(" ", " ") .replace(" ", " ")
.trim().to_string(); .trim()
.to_string();
if title.to_ascii_lowercase().starts_with("new ") { if title.to_ascii_lowercase().starts_with("new ") {
title = title[4..].to_string(); title = title[4..].to_string();
} }
// println!("Title: {}", title);
let id = video_url.split("/").collect::<Vec<&str>>()[6] // id (DON'T index [6])
.split("?") let id = video_url
.collect::<Vec<&str>>()[0] .split('/')
.last()
.ok_or_else(|| ErrorKind::Parse("failed to extract id from video_url".into()))?
.split('?')
.next()
.unwrap_or("")
.to_string(); .to_string();
let thumb = format!( // thumb
"https:{}", let thumb_path = video_segment
video_segment
.split("<img class='mini_post_vid_thumb lazyload'") .split("<img class='mini_post_vid_thumb lazyload'")
.collect::<Vec<&str>>()[1] .nth(1)
.split("data-src='") .and_then(|s| s.split("data-src='").nth(1))
.collect::<Vec<&str>>()[1] .and_then(|s| s.split('\'').next())
.split("'") .ok_or_else(|| ErrorKind::Parse("failed to extract thumb".into()))?;
.collect::<Vec<&str>>()[0]
.to_string()
);
let preview = match video_segment.contains("class='hvp_player'") { let thumb = format!("https:{thumb_path}");
true => Some(format!(
// preview
let preview = if video_segment.contains("class='hvp_player'") {
Some(format!(
"https:{}", "https:{}",
video_segment video_segment
.split("class='hvp_player'") .split("class='hvp_player'")
.collect::<Vec<&str>>()[1] .nth(1)
.split(" src='") .and_then(|s| s.split(" src='").nth(1))
.collect::<Vec<&str>>()[1] .and_then(|s| s.split('\'').next())
.split("'") .ok_or_else(|| ErrorKind::Parse("failed to extract preview src".into()))?
.collect::<Vec<&str>>()[0] ))
.to_string() } else {
)), None
false => None,
}; };
// views
let views = video_segment let views = video_segment
.split("<strong>·</strong> ") .split("<strong>·</strong> ")
.collect::<Vec<&str>>()[1] .nth(1)
.split(" ") .and_then(|s| s.split_whitespace().next())
.collect::<Vec<&str>>()[0] .ok_or_else(|| ErrorKind::Parse("failed to extract views".into()))?
.to_string(); .to_string();
let raw_duration = video_segment.split("duration_small").collect::<Vec<&str>>()[1] // duration
.split("title='") let raw_duration = video_segment
.collect::<Vec<&str>>()[1] .split("duration_small")
.split("'") .nth(1)
.collect::<Vec<&str>>()[1] .and_then(|s| s.split("title='").nth(1))
.split(">") .and_then(|s| s.split('\'').nth(1))
.collect::<Vec<&str>>()[1] .and_then(|s| s.split('>').nth(1))
.split("<") .and_then(|s| s.split('<').next())
.collect::<Vec<&str>>()[0] .ok_or_else(|| ErrorKind::Parse("failed to extract duration".into()))?
.to_string(); .to_string();
let duration = parse_time_to_seconds(&raw_duration).unwrap_or(0) as u32; let duration = parse_time_to_seconds(&raw_duration).unwrap_or(0) as u32;
// stream urls (your filter condition looks suspicious; leaving as-is)
let stream_urls = video_segment let stream_urls = video_segment
.split("extlink_icon extlink") .split("extlink_icon extlink")
.collect::<Vec<&str>>() .filter_map(|part| {
.iter() part.split("href='")
.map(|part| {
let url = part
.split("href='")
.collect::<Vec<&str>>()
.last() .last()
.unwrap_or(&"") .and_then(|s| s.split('\'').next())
.split("'") .map(|u| u.to_string())
.collect::<Vec<&str>>()[0]
.to_string();
url
})
.filter(|url| {
url.starts_with("http")
&& !url.starts_with("https://bigwarp.io/")
&& !url.starts_with("https://doodstream.com/")
&& !url.starts_with("https://strmup.")
&& !url.starts_with("https://streamtape.com/")
&& !url.starts_with("https://streamvid.net/")
&& !url.starts_with("https://vtbe.")
}) })
.filter(|url| url.starts_with("http") || !url.starts_with("https://lulustream."))
.collect::<Vec<String>>(); .collect::<Vec<String>>();
let video_item_url = match stream_urls.first() {
Some(u) => u.clone(), let video_item_url = stream_urls.first().cloned().unwrap_or_else(|| {
None => format!("https://hottub.spacemoehre.de/proxy/sxyprn/post/{}", id), //video_url.clone(), format!("https://hottub.spacemoehre.de/proxy/sxyprn/post/{}", id)
}; });
let mut video_item = VideoItem::new( let mut video_item = VideoItem::new(
id, id,
title, title,
@@ -297,12 +337,15 @@ impl SxyprnProvider {
duration, duration,
) )
.views(views.parse::<u32>().unwrap_or(0)); .views(views.parse::<u32>().unwrap_or(0));
if let Some(p) = preview { if let Some(p) = preview {
video_item = video_item.preview(p); video_item = video_item.preview(p);
} }
items.push(video_item); items.push(video_item);
} }
return items;
Ok(items)
} }
} }

95
src/util/discord.rs Normal file
View File

@@ -0,0 +1,95 @@
use std::error::Error;
use std::fmt::Write as _;
use std::time::{SystemTime, UNIX_EPOCH};
use serde_json::json;
/// Send a detailed error report to a Discord webhook
pub async fn send_discord_error_report<T: Error>(
error: &T,
context: Option<&str>, // e.g. provider name, URL, query
extra_info: Option<&str>, // any debug info you want
file: &str,
line: u32,
module: &str,
) {
let webhook_url = match std::env::var("DISCORD_WEBHOOK") {
Ok(url) => url,
Err(_) => return,
};
// Discord embed field limits
const MAX_FIELD: usize = 1024;
let mut error_chain = String::new();
let mut current: &dyn Error = error;
let mut i = 0;
loop {
let _ = writeln!(error_chain, "{}. {}", i + 1, current);
i += 1;
match current.source() {
Some(src) => current = src,
None => break,
}
}
let truncate = |s: &str| {
if s.len() > MAX_FIELD {
format!("{}", &s[..MAX_FIELD - 1])
} else {
s.to_string()
}
};
let timestamp = SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|d| d.as_secs())
.unwrap_or(0);
let payload = json!({
"embeds": [{
"title": "🚨 Rust Error Report",
"color": 0xE74C3C,
"fields": [
{
"name": "Error",
"value": truncate(&error.to_string()),
"inline": false
},
{
"name": "Error Chain",
"value": truncate(&error_chain),
"inline": false
},
{
"name": "Location",
"value": format!("`{}`:{}\n`{}`", file, line, module),
"inline": false
},
{
"name": "Context",
"value": truncate(context.unwrap_or("n/a")),
"inline": false
},
{
"name": "Extra Info",
"value": truncate(extra_info.unwrap_or("n/a")),
"inline": false
}
],
"footer": {
"text": format!("Unix time: {}", timestamp)
}
}]
});
// Send (never panic)
if let Err(e) = wreq::Client::new()
.post(webhook_url)
.json(&payload)
.send()
.await
{
eprintln!("Failed to send Discord error report: {e}");
}
}

View File

@@ -2,6 +2,7 @@ pub mod time;
pub mod flaresolverr; pub mod flaresolverr;
pub mod cache; pub mod cache;
pub mod requester; pub mod requester;
pub mod discord;
pub fn parse_abbreviated_number(s: &str) -> Option<u32> { pub fn parse_abbreviated_number(s: &str) -> Option<u32> {
let s = s.trim(); let s = s.trim();