upgrades
This commit is contained in:
@@ -13,6 +13,8 @@ use futures::future::join_all;
|
||||
use htmlentity::entity::{ICodedDataTrait, decode};
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::{Arc, RwLock};
|
||||
use wreq::Client;
|
||||
use wreq::Version;
|
||||
use wreq_util::Emulation;
|
||||
@@ -40,11 +42,13 @@ struct PerverzijaDbEntry {
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PerverzijaProvider {
|
||||
url: String,
|
||||
tag_map: Arc<RwLock<HashMap<String, String>>>,
|
||||
}
|
||||
impl PerverzijaProvider {
|
||||
pub fn new() -> Self {
|
||||
PerverzijaProvider {
|
||||
url: "https://tube.perverzija.com/".to_string(),
|
||||
tag_map: Arc::new(RwLock::new(HashMap::new())),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -143,6 +147,104 @@ impl PerverzijaProvider {
|
||||
title.trim().to_string()
|
||||
}
|
||||
|
||||
fn clip_at_first<'a>(haystack: &'a str, end_markers: &[&str]) -> &'a str {
|
||||
let mut end = haystack.len();
|
||||
for marker in end_markers {
|
||||
if let Some(index) = haystack.find(marker) {
|
||||
end = end.min(index);
|
||||
}
|
||||
}
|
||||
&haystack[..end]
|
||||
}
|
||||
|
||||
fn listing_item_scope(haystack: &str) -> &str {
|
||||
Self::clip_at_first(haystack, &["</article>", "</li>", "<article ", "video-item post"])
|
||||
}
|
||||
|
||||
fn detail_meta_section<'a>(text: &'a str, label: &str) -> &'a str {
|
||||
let section = text
|
||||
.split(label)
|
||||
.nth(1)
|
||||
.unwrap_or_default();
|
||||
Self::clip_at_first(
|
||||
section,
|
||||
&["</div>", "</p>", "<strong>", "<div class=\"related", "<section", "<aside"],
|
||||
)
|
||||
}
|
||||
|
||||
fn push_unique(tags: &mut Vec<String>, value: String) {
|
||||
let normalized = value.trim();
|
||||
if normalized.is_empty() {
|
||||
return;
|
||||
}
|
||||
if !tags
|
||||
.iter()
|
||||
.any(|existing| existing.eq_ignore_ascii_case(normalized))
|
||||
{
|
||||
tags.push(normalized.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_href_values(section: &str) -> Vec<String> {
|
||||
section
|
||||
.split("<a href=\"")
|
||||
.skip(1)
|
||||
.filter_map(|part| part.split('"').next())
|
||||
.map(|value| value.to_string())
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn normalize_key(value: &str) -> String {
|
||||
value
|
||||
.trim()
|
||||
.to_ascii_lowercase()
|
||||
.replace(['_', '-'], " ")
|
||||
.split_whitespace()
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ")
|
||||
}
|
||||
|
||||
fn humanize_slug(value: &str) -> String {
|
||||
value
|
||||
.trim_matches('/')
|
||||
.replace('-', " ")
|
||||
.split_whitespace()
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ")
|
||||
}
|
||||
|
||||
fn insert_tag_mapping(&self, kind: &str, slug: &str, title: Option<&str>) {
|
||||
let slug = slug.trim().trim_matches('/');
|
||||
if slug.is_empty() {
|
||||
return;
|
||||
}
|
||||
let path = format!("{kind}/{slug}");
|
||||
if let Ok(mut map) = self.tag_map.write() {
|
||||
map.insert(Self::normalize_key(slug), path.clone());
|
||||
let normalized_title = Self::normalize_key(title.unwrap_or(slug));
|
||||
if !normalized_title.is_empty() {
|
||||
map.insert(normalized_title, path);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn resolve_query_path(&self, query: &str) -> Option<String> {
|
||||
let trimmed = query.trim().trim_start_matches('@');
|
||||
if let Some((kind, raw_value)) = trimmed.split_once(':') {
|
||||
let kind = kind.trim().to_ascii_lowercase();
|
||||
let value = raw_value.trim().trim_matches('/').replace(' ', "-");
|
||||
if !value.is_empty() && matches!(kind.as_str(), "studio" | "stars" | "tag" | "genre")
|
||||
{
|
||||
return Some(format!("{kind}/{value}"));
|
||||
}
|
||||
}
|
||||
let normalized = Self::normalize_key(trimmed);
|
||||
if normalized.is_empty() {
|
||||
return None;
|
||||
}
|
||||
self.tag_map.read().ok()?.get(&normalized).cloned()
|
||||
}
|
||||
|
||||
async fn get(
|
||||
&self,
|
||||
cache: VideoCache,
|
||||
@@ -212,13 +314,8 @@ impl PerverzijaProvider {
|
||||
url_str = format!("{}?s={}", self.url, search_string);
|
||||
}
|
||||
|
||||
if query.starts_with("@studio:") {
|
||||
let studio_name = query.replace("@studio:", "");
|
||||
url_str = format!("{}studio/{}/page/{}/", self.url, studio_name, page);
|
||||
query_parse = false;
|
||||
} else if query.starts_with("@stars:") {
|
||||
let stars_name = query.replace("@stars:", "");
|
||||
url_str = format!("{}stars/{}/page/{}/", self.url, stars_name, page);
|
||||
if let Some(path) = self.resolve_query_path(query) {
|
||||
url_str = format!("{}/{}/page/{}/", self.url.trim_end_matches('/'), path, page);
|
||||
query_parse = false;
|
||||
}
|
||||
url_str = url_str.replace("page/1/", "");
|
||||
@@ -292,7 +389,8 @@ impl PerverzijaProvider {
|
||||
return vec![];
|
||||
}
|
||||
|
||||
for video_segment in raw_videos {
|
||||
for raw_video_segment in raw_videos {
|
||||
let video_segment = Self::listing_item_scope(raw_video_segment);
|
||||
let title = Self::extract_title(video_segment);
|
||||
|
||||
let embed_html_raw = Self::extract_between(video_segment, "data-embed='", "'")
|
||||
@@ -370,15 +468,17 @@ impl PerverzijaProvider {
|
||||
let studios_parts = video_segment.split("a href=\"").collect::<Vec<&str>>();
|
||||
for studio in studios_parts.iter().skip(1) {
|
||||
if studio.starts_with("https://tube.perverzija.com/studio/") {
|
||||
tags.push(
|
||||
studio
|
||||
.split("/\"")
|
||||
.collect::<Vec<&str>>()
|
||||
.get(0)
|
||||
.copied()
|
||||
.unwrap_or_default()
|
||||
.replace("https://tube.perverzija.com/studio/", "@studio:")
|
||||
.to_string(),
|
||||
let slug = studio
|
||||
.split("/\"")
|
||||
.collect::<Vec<&str>>()
|
||||
.first()
|
||||
.copied()
|
||||
.unwrap_or_default()
|
||||
.replace("https://tube.perverzija.com/studio/", "");
|
||||
self.insert_tag_mapping("studio", &slug, None);
|
||||
Self::push_unique(
|
||||
&mut tags,
|
||||
Self::humanize_slug(&slug),
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -396,7 +496,8 @@ impl PerverzijaProvider {
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
if !tag_name.is_empty() {
|
||||
tags.push(format!("@stars:{}", tag_name));
|
||||
self.insert_tag_mapping("stars", &tag_name, None);
|
||||
Self::push_unique(&mut tags, Self::humanize_slug(&tag_name));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -407,7 +508,7 @@ impl PerverzijaProvider {
|
||||
if token.starts_with("tag-") {
|
||||
let tag_name = token.split("tag-").nth(1).unwrap_or_default().to_string();
|
||||
if !tag_name.is_empty() {
|
||||
tags.push(tag_name.replace("-", " ").to_string());
|
||||
Self::push_unique(&mut tags, tag_name.replace("-", " ").to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -579,88 +680,58 @@ impl PerverzijaProvider {
|
||||
url_str = "!".to_string()
|
||||
}
|
||||
|
||||
let mut tags: Vec<String> = Vec::new(); // Placeholder for tags, adjust as needed
|
||||
let mut tags: Vec<String> = Vec::new();
|
||||
|
||||
let studios_parts = text
|
||||
.split("<strong>Studio: </strong>")
|
||||
.collect::<Vec<&str>>()
|
||||
.get(1)
|
||||
.copied()
|
||||
.unwrap_or_default()
|
||||
.split("</div>")
|
||||
.collect::<Vec<&str>>()
|
||||
.get(0)
|
||||
.copied()
|
||||
.unwrap_or_default()
|
||||
.split("<a href=\"")
|
||||
.collect::<Vec<&str>>();
|
||||
for studio in studios_parts.iter().skip(1) {
|
||||
if studio.starts_with("https://tube.perverzija.com/studio/") {
|
||||
tags.push(
|
||||
studio
|
||||
.split("/\"")
|
||||
.collect::<Vec<&str>>()
|
||||
.get(0)
|
||||
.copied()
|
||||
.unwrap_or_default()
|
||||
.replace("https://tube.perverzija.com/studio/", "@studio:")
|
||||
.to_string(),
|
||||
);
|
||||
}
|
||||
}
|
||||
if text.contains("<strong>Stars: </strong>") {
|
||||
let stars_parts: Vec<&str> = text
|
||||
.split("<strong>Stars: </strong>")
|
||||
.collect::<Vec<&str>>()
|
||||
.get(1)
|
||||
.copied()
|
||||
.unwrap_or_default()
|
||||
.split("</div>")
|
||||
.collect::<Vec<&str>>()
|
||||
.get(0)
|
||||
.copied()
|
||||
.unwrap_or_default()
|
||||
.split("<a href=\"")
|
||||
.collect::<Vec<&str>>();
|
||||
for star in stars_parts.iter().skip(1) {
|
||||
if star.starts_with("https://tube.perverzija.com/stars/") {
|
||||
tags.push(
|
||||
star.split("/\"")
|
||||
.collect::<Vec<&str>>()
|
||||
.get(0)
|
||||
.copied()
|
||||
.unwrap_or_default()
|
||||
.replace("https://tube.perverzija.com/stars/", "@stars:")
|
||||
.to_string(),
|
||||
);
|
||||
}
|
||||
let studios_section = Self::detail_meta_section(&text, "<strong>Studio: </strong>");
|
||||
for href in Self::parse_href_values(studios_section) {
|
||||
if href.starts_with("https://tube.perverzija.com/studio/") {
|
||||
let studio_slug = href
|
||||
.trim_end_matches('/')
|
||||
.replace("https://tube.perverzija.com/studio/", "");
|
||||
self.insert_tag_mapping("studio", &studio_slug, None);
|
||||
Self::push_unique(&mut tags, Self::humanize_slug(&studio_slug));
|
||||
}
|
||||
}
|
||||
|
||||
let tags_parts: Vec<&str> = text
|
||||
.split("<strong>Tags: </strong>")
|
||||
.collect::<Vec<&str>>()
|
||||
.get(1)
|
||||
.copied()
|
||||
.unwrap_or_default()
|
||||
.split("</div>")
|
||||
.collect::<Vec<&str>>()
|
||||
.get(0)
|
||||
.copied()
|
||||
.unwrap_or_default()
|
||||
.split("<a href=\"")
|
||||
.collect::<Vec<&str>>();
|
||||
for star in tags_parts.iter().skip(1) {
|
||||
if star.starts_with("https://tube.perverzija.com/stars/") {
|
||||
tags.push(
|
||||
star.split("/\"")
|
||||
.collect::<Vec<&str>>()
|
||||
.get(0)
|
||||
.copied()
|
||||
.unwrap_or_default()
|
||||
.replace("https://tube.perverzija.com/stars/", "@stars:")
|
||||
.to_string(),
|
||||
);
|
||||
let stars_section = Self::detail_meta_section(&text, "<strong>Stars: </strong>");
|
||||
for href in Self::parse_href_values(stars_section) {
|
||||
if href.starts_with("https://tube.perverzija.com/stars/") {
|
||||
let star_slug = href
|
||||
.trim_end_matches('/')
|
||||
.replace("https://tube.perverzija.com/stars/", "");
|
||||
self.insert_tag_mapping("stars", &star_slug, None);
|
||||
Self::push_unique(&mut tags, Self::humanize_slug(&star_slug));
|
||||
}
|
||||
}
|
||||
|
||||
let tags_section = if text.contains("<strong>Tags: </strong>") {
|
||||
Self::detail_meta_section(&text, "<strong>Tags: </strong>")
|
||||
} else {
|
||||
Self::detail_meta_section(&text, "<strong>Genres: </strong>")
|
||||
};
|
||||
for href in Self::parse_href_values(tags_section) {
|
||||
if href.starts_with("https://tube.perverzija.com/stars/") {
|
||||
let star_slug = href
|
||||
.trim_end_matches('/')
|
||||
.replace("https://tube.perverzija.com/stars/", "");
|
||||
self.insert_tag_mapping("stars", &star_slug, None);
|
||||
Self::push_unique(&mut tags, Self::humanize_slug(&star_slug));
|
||||
continue;
|
||||
}
|
||||
if href.starts_with("https://tube.perverzija.com/tag/") {
|
||||
let tag_slug = href
|
||||
.trim_end_matches('/')
|
||||
.replace("https://tube.perverzija.com/tag/", "");
|
||||
self.insert_tag_mapping("tag", &tag_slug, None);
|
||||
Self::push_unique(&mut tags, Self::humanize_slug(&tag_slug));
|
||||
continue;
|
||||
}
|
||||
if href.starts_with("https://tube.perverzija.com/genre/") {
|
||||
let genre_slug = href
|
||||
.trim_end_matches('/')
|
||||
.replace("https://tube.perverzija.com/genre/", "");
|
||||
self.insert_tag_mapping("genre", &genre_slug, None);
|
||||
Self::push_unique(&mut tags, Self::humanize_slug(&genre_slug));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user