From 41374470b10fda7e9c069691afa7c58993d6f1e4 Mon Sep 17 00:00:00 2001 From: Simon Date: Sat, 19 Jul 2025 14:37:11 +0000 Subject: [PATCH] advanced search for channel and models --- src/providers/pornhub.rs | 65 ++++++++++++++++++++++++++++++++-------- 1 file changed, 53 insertions(+), 12 deletions(-) diff --git a/src/providers/pornhub.rs b/src/providers/pornhub.rs index 8e8693b..3b91b4e 100644 --- a/src/providers/pornhub.rs +++ b/src/providers/pornhub.rs @@ -6,6 +6,7 @@ use crate::util::flaresolverr::{FlareSolverrRequest, Flaresolverr}; use crate::util::time::parse_time_to_seconds; use crate::videos::{VideoItem}; use error_chain::error_chain; +use futures::stream::SplitSink; use htmlentity::entity::{ICodedDataTrait, decode}; use std::env; use std::vec; @@ -53,12 +54,13 @@ impl PornhubProvider { } }; - let client = Client::builder().emulation(Emulation::Firefox136).build()?; + let proxy = Proxy::all("http://192.168.0.103:8081").unwrap(); + let client = Client::builder().cert_verification(false).emulation(Emulation::Firefox136).build()?; - let response = client.get(url.clone()).send().await?; + let response = client.get(url.clone()).proxy(proxy).send().await?; if response.status().is_success() { let text = response.text().await?; - let video_items: Vec = self.get_video_items_from_html(text.clone()); + let video_items: Vec = self.get_video_items_from_html(text.clone(),"
    { // println!("FlareSolverr response: {}", res); - self.get_video_items_from_html(res.solution.response) + self.get_video_items_from_html(res.solution.response,"
      { println!("Error solving FlareSolverr: {}", e); @@ -101,12 +103,26 @@ impl PornhubProvider { page: u8, query: &str, ) -> Result> { + let mut split_string = "
        >(); + url = [self.url.to_string(), url_parts[0].to_string(), url_parts[1].to_string(), "videos?page=".to_string()].join("/"); + url += &page.to_string(); + if query.contains("@model"){ + split_string = "mostRecentVideosSection"; + } + if query.contains("@channels"){ + split_string = "dropdownHottestVideos"; + } + println!("Query URL: {}", url); + } + // Check our Video Cache. If the result is younger than 1 hour, we return it. let old_items = match cache.get(&url) { Some((time, items)) => { @@ -128,7 +144,7 @@ impl PornhubProvider { let response = client.get(url.clone()).proxy(proxy).send().await?; if response.status().is_success() { let text = response.text().await?; - let video_items: Vec = self.get_video_items_from_html(text.clone()); + let video_items: Vec = self.get_video_items_from_html(text.clone(),split_string); if !video_items.is_empty() { cache.remove(&url); cache.insert(url.clone(), video_items.clone()); @@ -147,7 +163,7 @@ impl PornhubProvider { }) .await; let video_items = match result { - Ok(res) => self.get_video_items_from_html(res.solution.response), + Ok(res) => self.get_video_items_from_html(res.solution.response,split_string), Err(e) => { println!("Error solving FlareSolverr: {}", e); return Err("Failed to solve FlareSolverr".into()); @@ -163,18 +179,20 @@ impl PornhubProvider { } } - fn get_video_items_from_html(&self, html: String) -> Vec { + fn get_video_items_from_html(&self, html: String, split_string: &str) -> Vec { if html.is_empty() { println!("HTML is empty"); return vec![]; } let mut items: Vec = Vec::new(); - println!("{}", html.contains("
          >()[1]; + println!("split_string: {}", split_string); + let video_listing_content = html.split(split_string).collect::>()[1].split("Porn in German").collect::>()[0]; + println!("Video listing content length: {}", video_listing_content.len()); let raw_videos = video_listing_content .split("class=\"pcVideoListItem ") .collect::>()[1..] .to_vec(); + println!("Number of video segments found: {}", raw_videos.len()); for video_segment in &raw_videos { // let vid = video_segment.split("\n").collect::>(); // for (index, line) in vid.iter().enumerate() { @@ -211,7 +229,7 @@ impl PornhubProvider { let view_part = match video_segment.split("iews\">").collect::>().len(){ 2 => video_segment.split("iews\">").collect::>()[1], 3 => video_segment.split("iews\">").collect::>()[2], - _ => continue, // Skip if the format is unexpected + _ => "0<", // Skip if the format is unexpected }; let views = parse_abbreviated_number(view_part .split("").collect::>()[1] @@ -222,7 +240,24 @@ impl PornhubProvider { .split("\"") .collect::>()[0] .to_string(); - let video_item = VideoItem::new( + + let mut uploaderBlock = String::new(); + let mut uploader_href = vec![]; + let mut tag = String::new(); + if video_segment.contains("videoUploaderBlock") { + + uploaderBlock = video_segment.split("videoUploaderBlock").collect::>()[1] + .to_string(); + uploader_href = uploaderBlock.split("href=\"").collect::>()[1] + .split("\"") + .collect::>()[0] + .split("/").collect::>(); + tag = format!("@{}:{}", uploader_href[1], uploader_href[2]); + + } + + + let mut video_item = VideoItem::new( id, title, url.to_string(), @@ -230,8 +265,14 @@ impl PornhubProvider { thumb, duration, ) - .views(views) ; + if views > 0 { + video_item = video_item.views(views); + } + if !tag.is_empty() { + video_item = video_item.tags(vec![tag]) + .uploader(uploader_href[2].to_string()); + } items.push(video_item); } return items;