This commit is contained in:
Simon
2026-04-05 20:31:38 +00:00
parent 9773590f64
commit 7b464fe796
10 changed files with 815 additions and 565 deletions

View File

@@ -13,6 +13,8 @@ use futures::future::join_all;
use htmlentity::entity::{ICodedDataTrait, decode};
use serde::Deserialize;
use serde::Serialize;
use std::collections::HashMap;
use std::sync::{Arc, RwLock};
use wreq::Client;
use wreq::Version;
use wreq_util::Emulation;
@@ -40,11 +42,13 @@ struct PerverzijaDbEntry {
#[derive(Debug, Clone)]
pub struct PerverzijaProvider {
url: String,
tag_map: Arc<RwLock<HashMap<String, String>>>,
}
impl PerverzijaProvider {
pub fn new() -> Self {
PerverzijaProvider {
url: "https://tube.perverzija.com/".to_string(),
tag_map: Arc::new(RwLock::new(HashMap::new())),
}
}
@@ -143,6 +147,104 @@ impl PerverzijaProvider {
title.trim().to_string()
}
fn clip_at_first<'a>(haystack: &'a str, end_markers: &[&str]) -> &'a str {
let mut end = haystack.len();
for marker in end_markers {
if let Some(index) = haystack.find(marker) {
end = end.min(index);
}
}
&haystack[..end]
}
fn listing_item_scope(haystack: &str) -> &str {
Self::clip_at_first(haystack, &["</article>", "</li>", "<article ", "video-item post"])
}
fn detail_meta_section<'a>(text: &'a str, label: &str) -> &'a str {
let section = text
.split(label)
.nth(1)
.unwrap_or_default();
Self::clip_at_first(
section,
&["</div>", "</p>", "<strong>", "<div class=\"related", "<section", "<aside"],
)
}
fn push_unique(tags: &mut Vec<String>, value: String) {
let normalized = value.trim();
if normalized.is_empty() {
return;
}
if !tags
.iter()
.any(|existing| existing.eq_ignore_ascii_case(normalized))
{
tags.push(normalized.to_string());
}
}
fn parse_href_values(section: &str) -> Vec<String> {
section
.split("<a href=\"")
.skip(1)
.filter_map(|part| part.split('"').next())
.map(|value| value.to_string())
.collect()
}
fn normalize_key(value: &str) -> String {
value
.trim()
.to_ascii_lowercase()
.replace(['_', '-'], " ")
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
}
fn humanize_slug(value: &str) -> String {
value
.trim_matches('/')
.replace('-', " ")
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
}
fn insert_tag_mapping(&self, kind: &str, slug: &str, title: Option<&str>) {
let slug = slug.trim().trim_matches('/');
if slug.is_empty() {
return;
}
let path = format!("{kind}/{slug}");
if let Ok(mut map) = self.tag_map.write() {
map.insert(Self::normalize_key(slug), path.clone());
let normalized_title = Self::normalize_key(title.unwrap_or(slug));
if !normalized_title.is_empty() {
map.insert(normalized_title, path);
}
}
}
fn resolve_query_path(&self, query: &str) -> Option<String> {
let trimmed = query.trim().trim_start_matches('@');
if let Some((kind, raw_value)) = trimmed.split_once(':') {
let kind = kind.trim().to_ascii_lowercase();
let value = raw_value.trim().trim_matches('/').replace(' ', "-");
if !value.is_empty() && matches!(kind.as_str(), "studio" | "stars" | "tag" | "genre")
{
return Some(format!("{kind}/{value}"));
}
}
let normalized = Self::normalize_key(trimmed);
if normalized.is_empty() {
return None;
}
self.tag_map.read().ok()?.get(&normalized).cloned()
}
async fn get(
&self,
cache: VideoCache,
@@ -212,13 +314,8 @@ impl PerverzijaProvider {
url_str = format!("{}?s={}", self.url, search_string);
}
if query.starts_with("@studio:") {
let studio_name = query.replace("@studio:", "");
url_str = format!("{}studio/{}/page/{}/", self.url, studio_name, page);
query_parse = false;
} else if query.starts_with("@stars:") {
let stars_name = query.replace("@stars:", "");
url_str = format!("{}stars/{}/page/{}/", self.url, stars_name, page);
if let Some(path) = self.resolve_query_path(query) {
url_str = format!("{}/{}/page/{}/", self.url.trim_end_matches('/'), path, page);
query_parse = false;
}
url_str = url_str.replace("page/1/", "");
@@ -292,7 +389,8 @@ impl PerverzijaProvider {
return vec![];
}
for video_segment in raw_videos {
for raw_video_segment in raw_videos {
let video_segment = Self::listing_item_scope(raw_video_segment);
let title = Self::extract_title(video_segment);
let embed_html_raw = Self::extract_between(video_segment, "data-embed='", "'")
@@ -370,15 +468,17 @@ impl PerverzijaProvider {
let studios_parts = video_segment.split("a href=\"").collect::<Vec<&str>>();
for studio in studios_parts.iter().skip(1) {
if studio.starts_with("https://tube.perverzija.com/studio/") {
tags.push(
studio
.split("/\"")
.collect::<Vec<&str>>()
.get(0)
.copied()
.unwrap_or_default()
.replace("https://tube.perverzija.com/studio/", "@studio:")
.to_string(),
let slug = studio
.split("/\"")
.collect::<Vec<&str>>()
.first()
.copied()
.unwrap_or_default()
.replace("https://tube.perverzija.com/studio/", "");
self.insert_tag_mapping("studio", &slug, None);
Self::push_unique(
&mut tags,
Self::humanize_slug(&slug),
);
}
}
@@ -396,7 +496,8 @@ impl PerverzijaProvider {
.unwrap_or_default()
.to_string();
if !tag_name.is_empty() {
tags.push(format!("@stars:{}", tag_name));
self.insert_tag_mapping("stars", &tag_name, None);
Self::push_unique(&mut tags, Self::humanize_slug(&tag_name));
}
}
}
@@ -407,7 +508,7 @@ impl PerverzijaProvider {
if token.starts_with("tag-") {
let tag_name = token.split("tag-").nth(1).unwrap_or_default().to_string();
if !tag_name.is_empty() {
tags.push(tag_name.replace("-", " ").to_string());
Self::push_unique(&mut tags, tag_name.replace("-", " ").to_string());
}
}
}
@@ -579,88 +680,58 @@ impl PerverzijaProvider {
url_str = "!".to_string()
}
let mut tags: Vec<String> = Vec::new(); // Placeholder for tags, adjust as needed
let mut tags: Vec<String> = Vec::new();
let studios_parts = text
.split("<strong>Studio: </strong>")
.collect::<Vec<&str>>()
.get(1)
.copied()
.unwrap_or_default()
.split("</div>")
.collect::<Vec<&str>>()
.get(0)
.copied()
.unwrap_or_default()
.split("<a href=\"")
.collect::<Vec<&str>>();
for studio in studios_parts.iter().skip(1) {
if studio.starts_with("https://tube.perverzija.com/studio/") {
tags.push(
studio
.split("/\"")
.collect::<Vec<&str>>()
.get(0)
.copied()
.unwrap_or_default()
.replace("https://tube.perverzija.com/studio/", "@studio:")
.to_string(),
);
}
}
if text.contains("<strong>Stars: </strong>") {
let stars_parts: Vec<&str> = text
.split("<strong>Stars: </strong>")
.collect::<Vec<&str>>()
.get(1)
.copied()
.unwrap_or_default()
.split("</div>")
.collect::<Vec<&str>>()
.get(0)
.copied()
.unwrap_or_default()
.split("<a href=\"")
.collect::<Vec<&str>>();
for star in stars_parts.iter().skip(1) {
if star.starts_with("https://tube.perverzija.com/stars/") {
tags.push(
star.split("/\"")
.collect::<Vec<&str>>()
.get(0)
.copied()
.unwrap_or_default()
.replace("https://tube.perverzija.com/stars/", "@stars:")
.to_string(),
);
}
let studios_section = Self::detail_meta_section(&text, "<strong>Studio: </strong>");
for href in Self::parse_href_values(studios_section) {
if href.starts_with("https://tube.perverzija.com/studio/") {
let studio_slug = href
.trim_end_matches('/')
.replace("https://tube.perverzija.com/studio/", "");
self.insert_tag_mapping("studio", &studio_slug, None);
Self::push_unique(&mut tags, Self::humanize_slug(&studio_slug));
}
}
let tags_parts: Vec<&str> = text
.split("<strong>Tags: </strong>")
.collect::<Vec<&str>>()
.get(1)
.copied()
.unwrap_or_default()
.split("</div>")
.collect::<Vec<&str>>()
.get(0)
.copied()
.unwrap_or_default()
.split("<a href=\"")
.collect::<Vec<&str>>();
for star in tags_parts.iter().skip(1) {
if star.starts_with("https://tube.perverzija.com/stars/") {
tags.push(
star.split("/\"")
.collect::<Vec<&str>>()
.get(0)
.copied()
.unwrap_or_default()
.replace("https://tube.perverzija.com/stars/", "@stars:")
.to_string(),
);
let stars_section = Self::detail_meta_section(&text, "<strong>Stars: </strong>");
for href in Self::parse_href_values(stars_section) {
if href.starts_with("https://tube.perverzija.com/stars/") {
let star_slug = href
.trim_end_matches('/')
.replace("https://tube.perverzija.com/stars/", "");
self.insert_tag_mapping("stars", &star_slug, None);
Self::push_unique(&mut tags, Self::humanize_slug(&star_slug));
}
}
let tags_section = if text.contains("<strong>Tags: </strong>") {
Self::detail_meta_section(&text, "<strong>Tags: </strong>")
} else {
Self::detail_meta_section(&text, "<strong>Genres: </strong>")
};
for href in Self::parse_href_values(tags_section) {
if href.starts_with("https://tube.perverzija.com/stars/") {
let star_slug = href
.trim_end_matches('/')
.replace("https://tube.perverzija.com/stars/", "");
self.insert_tag_mapping("stars", &star_slug, None);
Self::push_unique(&mut tags, Self::humanize_slug(&star_slug));
continue;
}
if href.starts_with("https://tube.perverzija.com/tag/") {
let tag_slug = href
.trim_end_matches('/')
.replace("https://tube.perverzija.com/tag/", "");
self.insert_tag_mapping("tag", &tag_slug, None);
Self::push_unique(&mut tags, Self::humanize_slug(&tag_slug));
continue;
}
if href.starts_with("https://tube.perverzija.com/genre/") {
let genre_slug = href
.trim_end_matches('/')
.replace("https://tube.perverzija.com/genre/", "");
self.insert_tag_mapping("genre", &genre_slug, None);
Self::push_unique(&mut tags, Self::humanize_slug(&genre_slug));
}
}