all urls for paradise hill

This commit is contained in:
Simon
2025-09-19 18:33:18 +00:00
parent 025ee713e3
commit d9fed99104

View File

@@ -1,14 +1,13 @@
use std::vec;
use error_chain::error_chain;
use htmlentity::entity::{decode, ICodedDataTrait};
use futures::future::join_all;
use crate::DbPool;
use crate::providers::Provider;
use crate::util::cache::VideoCache;
use crate::videos::{self, ServerOptions};
use crate::videos::{VideoItem};
use crate::DbPool;
use crate::util::requester::Requester;
use crate::videos::VideoItem;
use crate::videos::{self, ServerOptions};
use error_chain::error_chain;
use futures::future::join_all;
use htmlentity::entity::{ICodedDataTrait, decode};
use std::vec;
error_chain! {
foreign_links {
@@ -25,20 +24,24 @@ pub struct ParadisehillProvider {
impl ParadisehillProvider {
pub fn new() -> Self {
ParadisehillProvider {
url: "https://en.paradisehill.cc".to_string()
url: "https://en.paradisehill.cc".to_string(),
}
}
async fn get(&self, cache:VideoCache, page: u8, options: ServerOptions) -> Result<Vec<VideoItem>> {
async fn get(
&self,
cache: VideoCache,
page: u8,
options: ServerOptions,
) -> Result<Vec<VideoItem>> {
let mut requester = options.requester.clone().unwrap();
let url_str = format!("{}/all/?sort=created_at&page={}", self.url, page);
let old_items = match cache.get(&url_str) {
Some((time, items)) => {
if time.elapsed().unwrap_or_default().as_secs() < 60 * 60 {
return Ok(items.clone());
}
else{
} else {
items.clone()
}
}
@@ -49,17 +52,25 @@ impl ParadisehillProvider {
let text = requester.get(&url_str).await.unwrap();
// Pass a reference to options if needed, or reconstruct as needed
let video_items: Vec<VideoItem> = self.get_video_items_from_html(text.clone(), requester).await;
let video_items: Vec<VideoItem> = self
.get_video_items_from_html(text.clone(), requester)
.await;
if !video_items.is_empty() {
cache.remove(&url_str);
cache.insert(url_str.clone(), video_items.clone());
} else{
} else {
return Ok(old_items);
}
Ok(video_items)
}
async fn query(&self, cache: VideoCache, page: u8, query: &str, options: ServerOptions) -> Result<Vec<VideoItem>> {
async fn query(
&self,
cache: VideoCache,
page: u8,
query: &str,
options: ServerOptions,
) -> Result<Vec<VideoItem>> {
// Extract needed fields from options at the start
let mut requester = options.requester.clone().unwrap();
let search_string = query.replace(" ", "+");
@@ -72,84 +83,115 @@ impl ParadisehillProvider {
Some((time, items)) => {
if time.elapsed().unwrap_or_default().as_secs() < 60 * 60 {
return Ok(items.clone());
}
else{
} else {
let _ = cache.check().await;
return Ok(items.clone())
return Ok(items.clone());
}
}
None => {
vec![]
}
};
};
let text = requester.get(&url_str).await.unwrap();
let video_items: Vec<VideoItem> = self.get_video_items_from_html(text.clone(), requester).await;
let video_items: Vec<VideoItem> = self
.get_video_items_from_html(text.clone(), requester)
.await;
if !video_items.is_empty() {
cache.remove(&url_str);
cache.insert(url_str.clone(), video_items.clone());
} else{
} else {
return Ok(old_items);
}
Ok(video_items)
}
async fn get_video_items_from_html(&self, html: String, requester: Requester) -> Vec<VideoItem> {
async fn get_video_items_from_html(
&self,
html: String,
requester: Requester,
) -> Vec<VideoItem> {
if html.is_empty() {
println!("HTML is empty");
return vec![];
}
let raw_videos = html
.split("item list-film-item")
.collect::<Vec<&str>>()[1..]
.to_vec();
let raw_videos = html.split("item list-film-item").collect::<Vec<&str>>()[1..].to_vec();
let mut urls: Vec<String> = vec![];
for video_segment in &raw_videos {
// let vid = video_segment.split("\n").collect::<Vec<&str>>();
// for (index, line) in vid.iter().enumerate() {
// println!("Line {}: {}", index, line.to_string().trim());
// }
let url_str = format!("{}{}",self.url, video_segment.split("<a href=\"").collect::<Vec<&str>>()[1]
.split("\"")
.collect::<Vec<&str>>()[0]
.to_string());
let url_str = format!(
"{}{}",
self.url,
video_segment.split("<a href=\"").collect::<Vec<&str>>()[1]
.split("\"")
.collect::<Vec<&str>>()[0]
.to_string()
);
urls.push(url_str.clone());
}
let futures = urls.into_iter().map(|el| self.get_video_item(el.clone(), requester.clone()));
let results: Vec<Result<VideoItem>> = join_all(futures).await;
let video_items: Vec<VideoItem> = results
let futures = urls
.into_iter()
.filter_map(Result::ok)
.collect();
.map(|el| self.get_video_item(el.clone(), requester.clone()));
let results: Vec<Result<VideoItem>> = join_all(futures).await;
let video_items: Vec<VideoItem> = results.into_iter().filter_map(Result::ok).collect();
return video_items;
}
async fn get_video_item(&self, url_str: String, mut requester: Requester) -> Result<VideoItem> {
let vid = requester.get(&url_str).await.unwrap();
let mut title = vid.split("<meta property=\"og:title\" content=\"").collect::<Vec<&str>>()[1]
.split("\"")
.collect::<Vec<&str>>()[0].trim()
.to_string();
title = decode(title.as_bytes()).to_string().unwrap_or(title);
let thumb = format!("{}{}",self.url, vid.split("<meta property=\"og:image\" content=\"").collect::<Vec<&str>>()[1]
let mut title = vid
.split("<meta property=\"og:title\" content=\"")
.collect::<Vec<&str>>()[1]
.split("\"")
.collect::<Vec<&str>>()[0]
.to_string());
let video_url = vid.split("var videoList = ").collect::<Vec<&str>>()[1]
.split("\"src\":\"").collect::<Vec<&str>>()[1]
.split("\"").collect::<Vec<&str>>()[0].replace("\\", "").to_string();
let id = video_url.split("/").collect::<Vec<&str>>().last().unwrap().split("_").collect::<Vec<&str>>()[0]
.trim()
.to_string();
title = decode(title.as_bytes()).to_string().unwrap_or(title);
let thumb = format!(
"{}{}",
self.url,
vid.split("<meta property=\"og:image\" content=\"")
.collect::<Vec<&str>>()[1]
.split("\"")
.collect::<Vec<&str>>()[0]
.to_string()
);
let format = videos::VideoFormat::new(video_url.clone(), "1080".to_string(), "mp4".to_string())
.protocol("https".to_string())
.format_id("mp4-1080".to_string())
;
let video_urls = vid.split("var videoList = ").collect::<Vec<&str>>()[1]
.split("\"src\":\"")
.collect::<Vec<&str>>();
let mut formats = vec![];
let mut formatctr =1;
for url in video_urls {
let video_url = vid.split("var videoList = ").collect::<Vec<&str>>()[1]
.split("\"src\":\"")
.collect::<Vec<&str>>()[1]
.split("\"")
.collect::<Vec<&str>>()[0]
.replace("\\", "")
.to_string();
let format =
videos::VideoFormat::new(video_url.clone(), "1080".to_string(), "mp4".to_string())
.protocol("https".to_string())
.format_id(video_url.split("/").last().unwrap().to_string())
.format_note(format!("{} - {}", formatctr, video_url.split("_").last().unwrap().replace(".mp4", "").to_string()))
;
formatctr += 1;
formats.push(format);
}
let id = url_str
.split("/")
.collect::<Vec<&str>>()
.last()
.unwrap()
.split("_")
.collect::<Vec<&str>>()[0]
.to_string();
let video_item = VideoItem::new(
id,
@@ -160,8 +202,7 @@ impl ParadisehillProvider {
0,
)
.aspect_ratio(0.697674419 as f32)
.formats(vec![format])
;
.formats(formats);
return Ok(video_item);
}
@@ -182,8 +223,14 @@ impl Provider for ParadisehillProvider {
let _ = sort;
let _ = per_page;
let videos: std::result::Result<Vec<VideoItem>, Error> = match query {
Some(q) => self.query(cache, page.parse::<u8>().unwrap_or(1), &q, options).await,
None => self.get(cache, page.parse::<u8>().unwrap_or(1), options).await,
Some(q) => {
self.query(cache, page.parse::<u8>().unwrap_or(1), &q, options)
.await
}
None => {
self.get(cache, page.parse::<u8>().unwrap_or(1), options)
.await
}
};
match videos {
Ok(v) => v,