Files
hottub/src/providers/pornhub.rs
2026-01-02 14:55:13 +00:00

295 lines
8.4 KiB
Rust

use crate::util::parse_abbreviated_number;
use crate::DbPool;
use crate::providers::Provider;
use crate::util::cache::VideoCache;
use crate::util::time::parse_time_to_seconds;
use crate::videos::{ServerOptions, VideoItem};
use error_chain::error_chain;
use htmlentity::entity::{decode, ICodedDataTrait};
use async_trait::async_trait;
use std::vec;
error_chain! {
foreign_links {
Io(std::io::Error);
HttpRequest(wreq::Error);
}
errors {
Parse(msg: String) {
description("parse error")
display("parse error: {}", msg)
}
}
}
#[derive(Debug, Clone)]
pub struct PornhubProvider {
url: String,
}
impl PornhubProvider {
pub fn new() -> Self {
Self {
url: "https://www.pornhub.com".to_string(),
}
}
async fn get(
&self,
cache: VideoCache,
page: u8,
sort: &str,
options: ServerOptions,
) -> Result<Vec<VideoItem>> {
let video_url = format!("{}/video?o={}&page={}", self.url, sort, page);
let old_items = match cache.get(&video_url) {
Some((time, items)) if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 => {
return Ok(items.clone());
}
Some((_, items)) => items.clone(),
None => vec![],
};
let mut requester = match options.requester.clone() {
Some(r) => r,
None => return Ok(old_items),
};
let text = match requester.get(&video_url, None).await {
Ok(t) => t,
Err(_) => return Ok(old_items),
};
let video_items = self.get_video_items_from_html(text, "<ul id=\"video");
if video_items.is_empty() {
Ok(old_items)
} else {
cache.remove(&video_url);
cache.insert(video_url.clone(), video_items.clone());
Ok(video_items)
}
}
async fn query(
&self,
cache: VideoCache,
page: u8,
query: &str,
sort: &str,
options: ServerOptions,
) -> Result<Vec<VideoItem>> {
let mut split_string = "<ul id=\"video";
let search_string = query.to_lowercase().trim().replace(' ', "+");
let mut video_url =
format!("{}/video/search?search={}&page={}", self.url, search_string, page);
if query.starts_with('@') {
let mut parts = query[1..].split(':');
let a = parts.next().unwrap_or("");
let b = parts.next().unwrap_or("");
video_url = format!("{}/{}/{}/videos?page={}", self.url, a, b.replace(' ', "-"), page);
if query.contains("@model") || query.contains("@pornstar") {
split_string = "mostRecentVideosSection";
}
if query.contains("@channels") {
split_string = "<ul class=\"videos row-5-thumbs";
}
}
video_url.push_str(match (query.contains("@channels"), sort) {
(true, "mv") => "&o=vi",
(true, "tr") => "&o=ra",
(false, "mv") => "&o=mv",
(false, "tr") => "&o=tr",
(false, "lg") => "&o=lg",
_ => "",
});
let old_items = match cache.get(&video_url) {
Some((time, items)) if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 => {
return Ok(items.clone());
}
Some((_, items)) => items.clone(),
None => vec![],
};
let mut requester = match options.requester.clone() {
Some(r) => r,
None => return Ok(old_items),
};
let text = match requester.get(&video_url, None).await {
Ok(t) => t,
Err(_) => return Ok(old_items),
};
let video_items = self.get_video_items_from_html(text, split_string);
if video_items.is_empty() {
Ok(old_items)
} else {
cache.remove(&video_url);
cache.insert(video_url.clone(), video_items.clone());
Ok(video_items)
}
}
fn get_video_items_from_html(&self, html: String, split_string: &str) -> Vec<VideoItem> {
let content = match html.split(split_string).nth(1) {
Some(c) => c,
None => return vec![],
};
let content = content.split("Porn in German").next().unwrap_or("");
let mut items = Vec::new();
for seg in content
.split("class=\"pcVideoListItem ")
.skip(1)
.filter(|s| !s.contains("wrapVideoBlock"))
{
let url_part = seg
.split("<a href=\"")
.nth(1)
.or_else(|| seg.split("data-video-vkey=\"").nth(1))
.and_then(|s| s.split('"').next());
let video_url = match url_part {
Some(u) if !u.is_empty() && u != "javascript:void(0)" => format!("{}{}", self.url, u),
_ => continue,
};
let mut title = seg
.split("\" title=\"")
.nth(1)
.and_then(|s| s.split('"').next())
.unwrap_or("")
.to_string();
title = decode(title.as_bytes()).to_string().unwrap_or(title);
let id = match seg
.split("data-video-id=\"")
.nth(1)
.and_then(|s| s.split('"').next())
{
Some(id) => id.to_string(),
None => continue,
};
let raw_duration = seg
.split("duration")
.nth(1)
.and_then(|s| s.split('>').nth(1))
.and_then(|s| s.split('<').next())
.unwrap_or("0:00");
let duration = parse_time_to_seconds(raw_duration).unwrap_or(0) as u32;
let views = seg
.split("iews\">")
.filter_map(|p| p.split("<var>").nth(1))
.next()
.and_then(|v| v.split('<').next())
.and_then(|v| parse_abbreviated_number(v))
.unwrap_or(0);
let thumb = seg
.split("src=\"")
.nth(1)
.and_then(|s| s.split('"').next())
.unwrap_or("")
.to_string();
let (tag, uploader) = if seg.contains("videoUploaderBlock") {
let href = seg
.split("videoUploaderBlock")
.nth(1)
.and_then(|s| s.split("href=\"").nth(1))
.and_then(|s| s.split('"').next())
.unwrap_or("");
let parts: Vec<&str> = href.split('/').collect();
if parts.len() >= 3 {
(
Some(format!("@{}:{}", parts[1], parts[2].replace('-', " "))),
Some(parts[2].to_string()),
)
} else {
(None, None)
}
} else {
(None, None)
};
let mut item = VideoItem::new(
id,
title,
video_url,
"pornhub".into(),
thumb,
duration,
);
if views > 0 {
item = item.views(views);
}
if let Some(t) = tag {
item = item.tags(vec![t]);
}
if let Some(u) = uploader {
item = item.uploader(u);
}
items.push(item);
}
items
}
}
#[async_trait]
impl Provider for PornhubProvider {
async fn get_videos(
&self,
cache: VideoCache,
pool: DbPool,
sort: String,
query: Option<String>,
page: String,
per_page: String,
options: ServerOptions,
) -> Vec<VideoItem> {
let _ = pool;
let _ = per_page;
let page = page.parse::<u8>().unwrap_or(1);
let mut sort = match sort.as_str() {
"mv" => "mv",
"tr" => "tr",
"cm" => "cm",
"lg" => "lg",
_ => "mr",
};
if sort.contains("date") {
sort = "mr".into();
}
let res = match query {
Some(q) => self.query(cache, page, &q, &sort, options).await,
None => self.get(cache, page, &sort, options).await,
};
res.unwrap_or_else(|e| {
eprintln!("PornhubProvider error: {e}");
vec![]
})
}
}