tags upgrade

This commit is contained in:
Simon
2025-11-29 13:55:56 +00:00
parent cafb990fd4
commit 0e02a1b821
2 changed files with 244 additions and 53 deletions

View File

@@ -27,6 +27,7 @@ once_cell = "1.21.3"
rustc-hash = "2.1.1"
async-trait = "0.1"
regex = "1.12.2"
titlecase = "3.6.0"
[lints.rust]
unexpected_cfgs = "allow"

View File

@@ -1,16 +1,18 @@
use crate::util::requester::Requester;
use crate::{DbPool};
use crate::DbPool;
use crate::api::ClientVersion;
use crate::providers::Provider;
use crate::status::*;
use crate::util::cache::VideoCache;
use crate::util::requester::Requester;
use crate::util::time::parse_time_to_seconds;
use crate::videos::{ServerOptions, VideoFormat, VideoItem};
use async_trait::async_trait;
use error_chain::error_chain;
use futures::future::join_all;
use htmlentity::entity::{ICodedDataTrait, decode};
use std::vec;
use std::sync::{Arc, RwLock};
use std::{thread, vec};
use titlecase::Titlecase;
error_chain! {
foreign_links {
@@ -22,19 +24,107 @@ error_chain! {
#[derive(Debug, Clone)]
pub struct HqpornerProvider {
url: String,
// stars: Arc<RwLock<Vec<String>>>,
// categories: Arc<RwLock<Vec<String>>>,
stars: Arc<RwLock<Vec<FilterOption>>>,
categories: Arc<RwLock<Vec<FilterOption>>>,
}
impl HqpornerProvider {
pub fn new() -> Self {
let provider = HqpornerProvider {
url: "https://hqporner.com".to_string(),
// stars: Arc::new(RwLock::new(vec![])),
// categories: Arc::new(RwLock::new(vec![])),
stars: Arc::new(RwLock::new(vec![])),
categories: Arc::new(RwLock::new(vec![])),
};
provider.spawn_initial_load();
provider
}
fn spawn_initial_load(&self) {
let url = self.url.clone();
let stars = Arc::clone(&self.stars);
let categories = Arc::clone(&self.categories);
thread::spawn(move || {
// Create a tiny runtime just for these async tasks
let rt = tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()
.expect("build tokio runtime");
rt.block_on(async move {
if let Err(e) = Self::load_stars(&url, stars).await {
eprintln!("load_stars failed: {e}");
}
if let Err(e) = Self::load_categories(&url, categories).await {
eprintln!("load_categories failed: {e}");
}
});
});
}
async fn load_stars(base_url: &str, stars: Arc<RwLock<Vec<FilterOption>>>) -> Result<()> {
let mut requester = Requester::new();
let text = requester
.get(format!("{}/girls", &base_url).as_str())
.await
.unwrap();
let stars_div = text
.split("<span>Girls</span>")
.collect::<Vec<&str>>().last().unwrap()
.split("</ul>")
.collect::<Vec<&str>>()[0];
for stars_element in stars_div.split("<li ").collect::<Vec<&str>>()[1..].to_vec() {
let star_id = stars_element.split("href=\"/actress/").collect::<Vec<&str>>()[1]
.split("\"")
.collect::<Vec<&str>>()[0]
.to_string();
let star_name = stars_element.split("<a ").collect::<Vec<&str>>()[1]
.split(">").collect::<Vec<&str>>()[1]
.split("<")
.collect::<Vec<&str>>()[0]
.to_string();
Self::push_unique(
&stars,
FilterOption {
id: star_id,
title: star_name,
},
);
}
return Ok(());
}
async fn load_categories(base_url: &str, categories: Arc<RwLock<Vec<FilterOption>>>) -> Result<()> {
let mut requester = Requester::new();
let text = requester
.get(format!("{}/categories", &base_url).as_str())
.await
.unwrap();
let categories_div = text
.split("<span>Categories</span>")
.collect::<Vec<&str>>().last().unwrap()
.split("</ul>")
.collect::<Vec<&str>>()[0];
for categories_element in categories_div.split("<li ").collect::<Vec<&str>>()[1..].to_vec() {
let category_id = categories_element.split("href=\"/category/").collect::<Vec<&str>>()[1]
.split("\"")
.collect::<Vec<&str>>()[0]
.to_string();
let category_name = categories_element.split("<a ").collect::<Vec<&str>>()[1]
.split(">").collect::<Vec<&str>>()[1]
.split("<")
.collect::<Vec<&str>>()[0]
.titlecase();
Self::push_unique(
&categories,
FilterOption {
id: category_id,
title: category_name,
},
);
}
return Ok(());
}
fn build_channel(&self, clientversion: ClientVersion) -> Channel {
let _ = clientversion;
Channel {
@@ -51,6 +141,17 @@ impl HqpornerProvider {
}
}
// Push one item with minimal lock time and dedup by id
fn push_unique(target: &Arc<RwLock<Vec<FilterOption>>>, item: FilterOption) {
if let Ok(mut vec) = target.write() {
if !vec.iter().any(|x| x.id == item.id) {
vec.push(item);
// Optional: keep it sorted for nicer UX
// vec.sort_by(|a,b| a.title.cmp(&b.title));
}
}
}
async fn get(
&self,
cache: VideoCache,
@@ -76,7 +177,9 @@ impl HqpornerProvider {
let mut requester = options.requester.clone().unwrap();
let text = requester.get(&video_url).await.unwrap();
let video_items: Vec<VideoItem> = self.get_video_items_from_html(text.clone(), &mut requester).await;
let video_items: Vec<VideoItem> = self
.get_video_items_from_html(text.clone(), &mut requester)
.await;
if !video_items.is_empty() {
cache.remove(&video_url);
cache.insert(video_url.clone(), video_items.clone());
@@ -95,7 +198,15 @@ impl HqpornerProvider {
) -> Result<Vec<VideoItem>> {
let search_string = query.trim().to_string();
let video_url = format!("{}/?q={}&p={}", self.url, search_string, page);
let mut video_url = format!("{}/?q={}&p={}", self.url, search_string, page);
if let Some(star) = self.stars.read().unwrap().iter().find(|s| s.title.to_ascii_lowercase() == search_string.to_ascii_lowercase()) {
video_url = format!("{}/actress/{}/{}", self.url, star.id, page);
}
if let Some(cat) = self.categories.read().unwrap().iter().find(|c| c.title.to_ascii_lowercase() == search_string.to_ascii_lowercase()) {
video_url = format!("{}/category/{}/{}", self.url, cat.id, page);
}
println!("Searching URL: {}", video_url);
// Check our Video Cache. If the result is younger than 1 hour, we return it.
let old_items = match cache.get(&video_url) {
Some((time, items)) => {
@@ -114,7 +225,9 @@ impl HqpornerProvider {
let mut requester = options.requester.clone().unwrap();
let text = requester.get(&video_url).await.unwrap();
let video_items: Vec<VideoItem> = self.get_video_items_from_html(text.clone(), &mut requester).await;
let video_items: Vec<VideoItem> = self
.get_video_items_from_html(text.clone(), &mut requester)
.await;
if !video_items.is_empty() {
cache.remove(&video_url);
cache.insert(video_url.clone(), video_items.clone());
@@ -124,7 +237,11 @@ impl HqpornerProvider {
Ok(video_items)
}
async fn get_video_items_from_html(&self, html: String, requester: &mut Requester) -> Vec<VideoItem> {
async fn get_video_items_from_html(
&self,
html: String,
requester: &mut Requester,
) -> Vec<VideoItem> {
if html.is_empty() || html.contains("404 Not Found") {
return vec![];
}
@@ -135,30 +252,41 @@ impl HqpornerProvider {
.collect::<Vec<&str>>()[1..]
.to_vec();
let futures = raw_videos.into_iter().map(|el| self.get_video_item(el.to_string(), requester.clone()));
let results: Vec<Result<VideoItem>> = join_all(futures).await;
let video_items: Vec<VideoItem> = results
let futures = raw_videos
.into_iter()
.filter_map(Result::ok)
.collect();
.map(|el| self.get_video_item(el.to_string(), requester.clone()));
let results: Vec<Result<VideoItem>> = join_all(futures).await;
let video_items: Vec<VideoItem> = results.into_iter().filter_map(Result::ok).collect();
return video_items;
}
async fn get_video_item(&self, video_segment: String, mut requester: Requester) -> Result<VideoItem> {
async fn get_video_item(
&self,
video_segment: String,
mut requester: Requester,
) -> Result<VideoItem> {
let video_url: String = format!(
"{}{}",
self.url,
video_segment.split("<a href=\"").collect::<Vec<&str>>()[1]
.split("\"").collect::<Vec<&str>>()[0]
.split("\"")
.collect::<Vec<&str>>()[0]
.to_string()
);
let mut title = video_segment
.split("<h3 class=\"meta-data-title\">").collect::<Vec<&str>>()[1]
.split(">").collect::<Vec<&str>>()[1]
.split("<").collect::<Vec<&str>>()[0]
.trim().to_string();
.split("<h3 class=\"meta-data-title\">")
.collect::<Vec<&str>>()[1]
.split(">")
.collect::<Vec<&str>>()[1]
.split("<")
.collect::<Vec<&str>>()[0]
.trim()
.to_string();
// html decode
title = decode(title.as_bytes()).to_string().unwrap_or(title);
title = decode(title.as_bytes())
.to_string()
.unwrap_or(title)
.titlecase();
let id = video_url.split("/").collect::<Vec<&str>>()[4]
.split(".")
.collect::<Vec<&str>>()[0]
@@ -182,7 +310,7 @@ impl HqpornerProvider {
.to_string();
let duration = parse_time_to_seconds(raw_duration.as_str()).unwrap_or(0) as u32;
let formats = get_media_formats(&video_url, &mut requester).await;
let (tags, formats) = self.extract_media(&video_url, &mut requester).await;
if formats.is_empty() {
return Err(Error::from("No formats found for video"));
}
@@ -195,39 +323,101 @@ impl HqpornerProvider {
duration,
)
.formats(formats)
;
.tags(tags);
return Ok(video_item);
}
}
async fn get_media_formats(video_page_url: &str, requester: &mut Requester) -> Vec<VideoFormat> {
async fn extract_media(
&self,
video_page_url: &str,
requester: &mut Requester,
) -> (Vec<String>, Vec<VideoFormat>) {
let mut formats = vec![];
let mut tags = vec![];
let text = requester.get(&video_page_url).await.unwrap();
if text.contains("Why do I see it?"){
return formats;
if text.contains("Why do I see it?") {
return (tags, formats);
}
let video_url = format!("https:{}", text
.split("url: '/blocks/altplayer.php?i=")
let stars_elements = text.split("icon fa-star-o").collect::<Vec<&str>>()[1]
.split("</li>")
.collect::<Vec<&str>>()[0]
.split("href=\"/actress/")
.collect::<Vec<&str>>()[1..]
.to_vec();
for star_el in stars_elements {
let star_id = star_el.split("\"").collect::<Vec<&str>>()[0].to_string();
let star_name = star_el.split("\">").collect::<Vec<&str>>()[1]
.split("<")
.collect::<Vec<&str>>()[0]
.to_string();
tags.push(star_name.clone());
Self::push_unique(&self.stars, FilterOption {
id: star_id,
title: star_name.clone(),
});
}
let categories_elements = text.split("This video belongs to the following categories").collect::<Vec<&str>>()[1]
.split("</p>")
.collect::<Vec<&str>>()[0]
.split("href=\"/category/")
.collect::<Vec<&str>>()[1..]
.to_vec();
for categories_el in categories_elements {
let category_id = categories_el.split("\"").collect::<Vec<&str>>()[0].to_string();
let category_name = categories_el.split("\">").collect::<Vec<&str>>()[1]
.split("<")
.collect::<Vec<&str>>()[0].titlecase();
tags.push(category_name.clone());
Self::push_unique(&self.categories, FilterOption {
id: category_id,
title: category_name.clone(),
});
}
let video_url = format!(
"https:{}",
text.split("url: '/blocks/altplayer.php?i=")
.collect::<Vec<&str>>()[1]
.split("'")
.collect::<Vec<&str>>()[0]);
.collect::<Vec<&str>>()[0]
);
let text2 = requester.get_raw_with_headers(&video_url, vec![("Referer".to_string(), "https://hqporner.com/".to_string())]).await.unwrap().text().await.unwrap();
let text2 = requester
.get_raw_with_headers(
&video_url,
vec![("Referer".to_string(), "https://hqporner.com/".to_string())],
)
.await
.unwrap()
.text()
.await
.unwrap();
let video_element = text2.split("<video ").collect::<Vec<&str>>()[2]
.split("</video>")
.collect::<Vec<&str>>()[0];
let sources = video_element.split("<source ").collect::<Vec<&str>>()[1..].to_vec();
for source in sources {
let title = source.split("title=\\\"").collect::<Vec<&str>>()[1]
.split("\\\"").collect::<Vec<&str>>()[0].to_string();
.split("\\\"")
.collect::<Vec<&str>>()[0]
.to_string();
let quality = title.split(" ").collect::<Vec<&str>>()[0].to_string();
let format = "mp4".to_string();
let media_url = format!("https:{}", source.split("src=\\\"").collect::<Vec<&str>>()[1]
.split("\\\"").collect::<Vec<&str>>()[0]);
let media_url = format!(
"https:{}",
source.split("src=\\\"").collect::<Vec<&str>>()[1]
.split("\\\"")
.collect::<Vec<&str>>()[0]
);
formats.push(VideoFormat::new(media_url, quality, format).format_id(title.clone()).format_note(title.clone()));
formats.push(
VideoFormat::new(media_url, quality, format)
.format_id(title.clone())
.format_note(title.clone()),
);
}
(tags, formats)
}
formats
}
#[async_trait]