pimpbunny changes

This commit is contained in:
Simon
2026-03-20 21:02:47 +00:00
parent dd7c4ec6a1
commit 46cd348148
4 changed files with 322 additions and 77 deletions

View File

@@ -2,6 +2,7 @@ use crate::DbPool;
use crate::api::ClientVersion;
use crate::providers::Provider;
use crate::status::*;
use crate::util::parse_abbreviated_number;
use crate::util::cache::VideoCache;
use crate::util::discord::{format_error_chain, send_discord_error_report};
use crate::util::requester::Requester;
@@ -10,7 +11,6 @@ use crate::videos::{ServerOptions, VideoFormat, VideoItem};
use async_trait::async_trait;
use error_chain::error_chain;
use futures::future::join_all;
use htmlentity::entity::{ICodedDataTrait, decode};
use std::sync::{Arc, RwLock};
use std::{thread, vec};
@@ -205,6 +205,32 @@ impl PimpbunnyProvider {
)
}
fn is_allowed_detail_url(url: &str) -> bool {
let Some(url) = Url::parse(url).ok() else {
return false;
};
if url.scheme() != "https" {
return false;
}
let Some(host) = url.host_str() else {
return false;
};
matches!(host, "pimpbunny.com" | "www.pimpbunny.com")
&& !url.path().starts_with("/contents/videos_screenshots/")
}
fn proxied_video(&self, options: &ServerOptions, page_url: &str) -> String {
if page_url.is_empty() || !Self::is_allowed_detail_url(page_url) {
return page_url.to_string();
}
crate::providers::build_proxy_url(
options,
"pimpbunny",
&crate::providers::strip_url_scheme(page_url),
)
}
fn root_referer(&self) -> String {
format!("{}/", self.url.trim_end_matches('/'))
}
@@ -407,9 +433,7 @@ impl PimpbunnyProvider {
return Ok(old_items);
}
};
let video_items: Vec<VideoItem> = self
.get_video_items_from_html(text.clone(), &mut requester)
.await;
let video_items = self.get_video_items_from_html(text.clone(), &options);
if !video_items.is_empty() {
cache.remove(&video_url);
cache.insert(video_url.clone(), video_items.clone());
@@ -509,9 +533,7 @@ impl PimpbunnyProvider {
return Ok(old_items);
}
};
let video_items: Vec<VideoItem> = self
.get_video_items_from_html(text.clone(), &mut requester)
.await;
let video_items = self.get_video_items_from_html(text.clone(), &options);
if !video_items.is_empty() {
cache.remove(&video_url);
cache.insert(video_url.clone(), video_items.clone());
@@ -521,11 +543,7 @@ impl PimpbunnyProvider {
Ok(video_items)
}
async fn get_video_items_from_html(
&self,
html: String,
requester: &mut Requester,
) -> Vec<VideoItem> {
fn get_video_items_from_html(&self, html: String, options: &ServerOptions) -> Vec<VideoItem> {
if html.is_empty() || html.contains("404 Not Found") {
return vec![];
}
@@ -539,19 +557,42 @@ impl PimpbunnyProvider {
None => return vec![],
};
let futures = block
block
.split("<div class=\"col\">")
.skip(1)
.map(|el| self.get_video_item(el.to_string(), requester.clone()));
join_all(futures)
.await
.filter_map(|el| self.get_video_item(el.to_string(), options).ok())
.into_iter()
.filter_map(Result::ok)
.collect()
}
async fn get_video_item(&self, seg: String, mut requester: Requester) -> Result<VideoItem> {
fn extract_duration_from_segment(&self, seg: &str) -> u32 {
for token in seg.split(|ch: char| ch == '<' || ch == '>' || ch.is_whitespace()) {
let candidate = token.trim();
if candidate.is_empty() || !candidate.contains(':') {
continue;
}
if let Some(parsed) = parse_time_to_seconds(candidate) {
return parsed as u32;
}
}
0
}
fn extract_views_from_segment(&self, seg: &str) -> u32 {
let Some(before_views) = seg.split("Views").next() else {
return 0;
};
let candidate = before_views
.split(|ch: char| ch == '<' || ch == '>' || ch.is_whitespace())
.filter(|value| !value.trim().is_empty())
.next_back()
.unwrap_or("")
.trim_matches(|ch: char| ch == '(' || ch == ')' || ch == ',');
parse_abbreviated_number(candidate).unwrap_or(0)
}
fn get_video_item(&self, seg: String, options: &ServerOptions) -> Result<VideoItem> {
let video_url = seg
.split(" href=\"")
.nth(1)
@@ -606,71 +647,22 @@ impl PimpbunnyProvider {
.and_then(|s| s.split('"').next())
.unwrap_or("")
.to_string();
let (tags, formats, views, duration) =
self.extract_media(&video_url, &mut requester).await?;
let proxy_url = self.proxied_video(options, &video_url);
let views = self.extract_views_from_segment(&seg);
let duration = self.extract_duration_from_segment(&seg);
let formats = vec![
VideoFormat::new(proxy_url.clone(), "auto".into(), "video/mp4".into())
.format_id("auto".into())
.format_note("proxied".into()),
];
Ok(
VideoItem::new(id, title, video_url, "pimpbunny".into(), thumb, duration)
VideoItem::new(id, title, proxy_url, "pimpbunny".into(), thumb, duration)
.formats(formats)
.tags(tags)
.preview(preview)
.views(views),
)
}
async fn extract_media(
&self,
url: &str,
requester: &mut Requester,
) -> Result<(Vec<String>, Vec<VideoFormat>, u32, u32)> {
self.warm_root_session(requester).await;
let headers = self.headers_with_cookies(requester, url, &self.root_referer());
let text = requester
.get_with_headers(url, headers, Some(Version::HTTP_2))
.await
.map_err(|e| Error::from(format!("{}", e)))?;
let json_str = text
.split("application/ld+json\">")
.nth(1)
.and_then(|s| s.split("</script>").next())
.ok_or_else(|| ErrorKind::Parse("ld+json".into()))?;
let json: serde_json::Value = serde_json::from_str(json_str)?;
let video_url = json["contentUrl"].as_str().unwrap_or("").to_string();
let quality = video_url
.split('_')
.last()
.and_then(|s| s.split('.').next())
.unwrap_or("")
.to_string();
let views = json["interactionStatistic"]
.as_array()
.and_then(|a| a.first())
.and_then(|v| v["userInteractionCount"].as_str())
.and_then(|v| v.parse().ok())
.unwrap_or(0);
let duration = json["duration"]
.as_str()
.map(|d| parse_time_to_seconds(&d.replace(['P', 'T', 'H', 'M', 'S'], "")).unwrap_or(0))
.unwrap_or(0) as u32;
let mut format = VideoFormat::new(video_url.clone(), quality, "video/mp4".into())
.http_header("Referer".to_string(), url.to_string())
.http_header(
"User-Agent".to_string(),
Self::FIREFOX_USER_AGENT.to_string(),
);
if let Some(cookie) = requester.cookie_header_for_url(&video_url) {
format.add_http_header("Cookie".to_string(), cookie);
}
Ok((vec![], vec![format], views, duration))
}
}
#[async_trait]
@@ -716,6 +708,7 @@ impl Provider for PimpbunnyProvider {
mod tests {
use super::PimpbunnyProvider;
use crate::videos::ServerOptions;
use std::sync::{Arc, RwLock};
#[test]
fn rewrites_allowed_thumbs_to_proxy_urls() {
@@ -746,4 +739,85 @@ mod tests {
"https://example.com/proxy/pimpbunny-thumb/pimpbunny.com/contents/videos_screenshots/517000/517329/800x450/1.jpg"
);
}
#[test]
fn rewrites_video_pages_to_redirect_proxy() {
let provider = PimpbunnyProvider::new();
let options = ServerOptions {
featured: None,
category: None,
sites: None,
filter: None,
language: None,
public_url_base: Some("https://example.com".to_string()),
requester: None,
network: None,
stars: None,
categories: None,
duration: None,
sort: None,
sexuality: None,
};
let proxied = provider.proxied_video(
&options,
"https://pimpbunny.com/videos/example-video/",
);
assert_eq!(
proxied,
"https://example.com/proxy/pimpbunny/pimpbunny.com/videos/example-video/"
);
}
#[test]
fn parses_listing_without_detail_requests() {
let provider = PimpbunnyProvider {
url: "https://pimpbunny.com".to_string(),
stars: Arc::new(RwLock::new(vec![])),
categories: Arc::new(RwLock::new(vec![])),
};
let options = ServerOptions {
featured: None,
category: None,
sites: None,
filter: None,
language: None,
public_url_base: Some("https://example.com".to_string()),
requester: None,
network: None,
stars: None,
categories: None,
duration: None,
sort: None,
sexuality: None,
};
let html = r#"
<div class="videos_videos_list"></div>
<div class="videos_videos_list">
<div class="col">
<a href="https://pimpbunny.com/videos/example-video/">
<div class="card-thumbnail">
<img src="https://pimpbunny.com/contents/videos_screenshots/517000/517329/800x450/1.jpg" data-preview="https://pimpbunny.com/preview.mp4" />
</div>
<div class="card-title">sample &amp; title</div>
<div>12:34</div>
<div>1.2K Views</div>
</a>
</div>
-pagination-wrapper
"#;
let items = provider.get_video_items_from_html(html.to_string(), &options);
assert_eq!(items.len(), 1);
assert_eq!(
items[0].url,
"https://example.com/proxy/pimpbunny/pimpbunny.com/videos/example-video/"
);
assert_eq!(items[0].duration, 754);
assert_eq!(items[0].views, Some(1200));
assert_eq!(items[0].formats.as_ref().map(|f| f.len()), Some(1));
}
}