This commit is contained in:
Simon
2026-05-05 13:53:01 +00:00
committed by ForgeCode
parent 01306c508a
commit d6fa364b28
3 changed files with 593 additions and 0 deletions

View File

@@ -36,6 +36,11 @@ const PROVIDERS: &[ProviderDef] = &[
module: "pornhub",
ty: "PornhubProvider",
},
ProviderDef {
id: "youporn",
module: "youporn",
ty: "YoupornProvider",
},
ProviderDef {
id: "pornhd3x",
module: "pornhd3x",

View File

@@ -57,6 +57,7 @@ This is the current implementation inventory as of this snapshot of the repo. Us
| `xxthots` | `onlyfans` | no | no | OnlyFans-like metadata example. |
| `yesporn` | `mainstream-tube` | no | no | Preview format examples. |
| `youjizz` | `mainstream-tube` | no | no | Mainstream tube provider. |
| `youporn` | `mainstream-tube` | no | no | Pornhub-network HTML provider with watch-page playback URLs and tag/channel/pornstar shortcuts. |
## Proxy Routes

587
src/providers/youporn.rs Normal file
View File

@@ -0,0 +1,587 @@
use crate::DbPool;
use crate::api::ClientVersion;
use crate::providers::{Provider, report_provider_error, requester_or_default};
use crate::status::*;
use crate::util::cache::VideoCache;
use crate::util::parse_abbreviated_number;
use crate::util::time::parse_time_to_seconds;
use crate::videos::{ServerOptions, VideoItem};
use async_trait::async_trait;
use error_chain::error_chain;
use htmlentity::entity::{ICodedDataTrait, decode};
use scraper::{ElementRef, Html, Selector};
use std::collections::HashMap;
use std::sync::{Arc, RwLock};
use std::thread;
use url::{Url, form_urlencoded};
use wreq::Version;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "mainstream-tube",
tags: &["mainstream", "studio", "search"],
};
const BASE_URL: &str = "https://www.youporn.com";
const CHANNEL_ID: &str = "youporn";
error_chain! {
foreign_links {
Io(std::io::Error);
HttpRequest(wreq::Error);
}
}
#[derive(Debug, Clone)]
pub struct YoupornProvider {
url: String,
shortcuts: Arc<RwLock<HashMap<String, Target>>>,
}
#[derive(Debug, Clone)]
enum Target {
Latest { sort: String },
Search { query: String },
Tag { slug: String, sort: String },
Channel { slug: String, sort: String },
Pornstar { slug: String, sort: String },
Amateur { slug: String, sort: String },
}
impl YoupornProvider {
pub fn new() -> Self {
let provider = Self {
url: BASE_URL.to_string(),
shortcuts: Arc::new(RwLock::new(HashMap::new())),
};
provider.spawn_initial_load();
provider
}
fn spawn_initial_load(&self) {
let shortcuts = Arc::clone(&self.shortcuts);
let url = self.url.clone();
thread::spawn(move || {
let rt = match tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()
{
Ok(v) => v,
Err(_) => return,
};
rt.block_on(async move {
let mut requester = crate::util::requester::Requester::new();
if let Ok(html) = requester.get(&url, None).await {
let map = Self::collect_shortcuts(&html);
if let Ok(mut guard) = shortcuts.write() {
*guard = map;
}
}
});
});
}
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
Channel {
id: CHANNEL_ID.to_string(),
name: "YouPorn".to_string(),
description: "YouPorn listings with search, tag/channel shortcuts, and watch-page playback URLs."
.to_string(),
premium: false,
favicon: "https://www.google.com/s2/favicons?sz=64&domain=youporn.com".to_string(),
status: "active".to_string(),
categories: vec![],
options: vec![ChannelOption {
id: "sort".to_string(),
title: "Sort".to_string(),
description: "Latest feed ordering.".to_string(),
systemImage: "list.number".to_string(),
colorName: "blue".to_string(),
options: vec![FilterOption {
id: "new".to_string(),
title: "Most Recent".to_string(),
}],
multiSelect: false,
}],
nsfw: true,
cacheDuration: Some(1800),
}
}
fn selector(value: &str) -> Option<Selector> {
Selector::parse(value).ok()
}
fn normalize_text(value: &str) -> String {
decode(value.as_bytes())
.to_string()
.unwrap_or_else(|_| value.to_string())
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
.trim()
.to_string()
}
fn normalize_url(&self, value: &str) -> String {
let trimmed = value.trim();
if trimmed.is_empty() {
return String::new();
}
if trimmed.starts_with("http://") || trimmed.starts_with("https://") {
return trimmed.to_string();
}
if trimmed.starts_with("//") {
return format!("https:{trimmed}");
}
format!(
"{}/{}",
self.url.trim_end_matches('/'),
trimmed.trim_start_matches('/')
)
}
fn normalized_sort(sort: &str) -> &'static str {
let _ = sort;
"new"
}
fn sort_suffix(sort: &str) -> &'static str {
let _ = sort;
""
}
fn page_suffix(page: u8) -> String {
if page > 1 {
format!("?page={page}")
} else {
String::new()
}
}
fn html_headers(referer: &str) -> Vec<(String, String)> {
vec![
(
"accept".to_string(),
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8".to_string(),
),
("accept-language".to_string(), "en-US,en;q=0.7".to_string()),
("cache-control".to_string(), "no-cache".to_string()),
("pragma".to_string(), "no-cache".to_string()),
(
"user-agent".to_string(),
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36".to_string(),
),
("referer".to_string(), referer.to_string()),
]
}
fn target_from_query(&self, query: &str, sort: &str) -> Target {
let q = query.trim();
if q.is_empty() {
return Target::Latest {
sort: Self::normalized_sort(sort).to_string(),
};
}
let lower = q.to_ascii_lowercase();
for (prefix, kind) in [
("tag:", "tag"),
("channel:", "channel"),
("pornstar:", "pornstar"),
("amateur:", "amateur"),
] {
if let Some(rest) = lower.strip_prefix(prefix) {
let slug = rest.trim().replace(' ', "-");
if !slug.is_empty() {
return match kind {
"tag" => Target::Tag {
slug,
sort: Self::normalized_sort(sort).to_string(),
},
"channel" => Target::Channel {
slug,
sort: Self::normalized_sort(sort).to_string(),
},
"pornstar" => Target::Pornstar {
slug,
sort: Self::normalized_sort(sort).to_string(),
},
_ => Target::Amateur {
slug,
sort: Self::normalized_sort(sort).to_string(),
},
};
}
}
}
let shortcut_key = lower.split_whitespace().collect::<Vec<_>>().join(" ");
if let Ok(guard) = self.shortcuts.read()
&& let Some(target) = guard.get(&shortcut_key)
{
return match target {
Target::Tag { slug, .. } => Target::Tag {
slug: slug.clone(),
sort: Self::normalized_sort(sort).to_string(),
},
Target::Channel { slug, .. } => Target::Channel {
slug: slug.clone(),
sort: Self::normalized_sort(sort).to_string(),
},
Target::Pornstar { slug, .. } => Target::Pornstar {
slug: slug.clone(),
sort: Self::normalized_sort(sort).to_string(),
},
Target::Amateur { slug, .. } => Target::Amateur {
slug: slug.clone(),
sort: Self::normalized_sort(sort).to_string(),
},
_ => target.clone(),
};
}
Target::Search {
query: q.to_string(),
}
}
fn build_url(&self, target: &Target, page: u8) -> String {
match target {
Target::Latest { sort } => format!(
"{}/{}{}",
self.url,
Self::sort_suffix(sort),
Self::page_suffix(page)
),
Target::Search { query } => {
let encoded: String = form_urlencoded::byte_serialize(query.as_bytes()).collect();
if page > 1 {
format!("{}/search/?query={encoded}&page={page}", self.url)
} else {
format!("{}/search/?query={encoded}", self.url)
}
}
Target::Tag { slug, sort } => format!(
"{}/porntags/{}/{}{}",
self.url,
slug.trim_matches('/'),
Self::sort_suffix(sort),
Self::page_suffix(page)
),
Target::Channel { slug, sort } => format!(
"{}/channel/{}/{}{}",
self.url,
slug.trim_matches('/'),
Self::sort_suffix(sort),
Self::page_suffix(page)
),
Target::Pornstar { slug, sort } => format!(
"{}/pornstar/{}/{}{}",
self.url,
slug.trim_matches('/'),
Self::sort_suffix(sort),
Self::page_suffix(page)
),
Target::Amateur { slug, sort } => format!(
"{}/amateur/{}/{}{}",
self.url,
slug.trim_matches('/'),
Self::sort_suffix(sort),
Self::page_suffix(page)
),
}
}
fn collect_shortcuts(html: &str) -> HashMap<String, Target> {
let mut map = HashMap::new();
let document = Html::parse_document(html);
let Some(link_selector) = Self::selector("a[href]") else {
return map;
};
for link in document.select(&link_selector) {
let Some(href) = link.value().attr("href") else {
continue;
};
let title = Self::normalize_text(&link.text().collect::<String>()).to_ascii_lowercase();
if title.is_empty() {
continue;
}
let path = if href.starts_with("http://") || href.starts_with("https://") {
Url::parse(href)
.ok()
.map(|u| u.path().to_string())
.unwrap_or_default()
} else {
href.to_string()
};
if let Some(slug) = path
.strip_prefix("/porntags/")
.map(|v| v.trim_matches('/').to_string())
{
if !slug.is_empty() {
map.insert(
title,
Target::Tag {
slug,
sort: "new".to_string(),
},
);
}
continue;
}
if let Some(slug) = path
.strip_prefix("/channel/")
.map(|v| v.trim_matches('/').to_string())
{
if !slug.is_empty() {
map.insert(
title,
Target::Channel {
slug,
sort: "new".to_string(),
},
);
}
continue;
}
if let Some(slug) = path
.strip_prefix("/pornstar/")
.map(|v| v.trim_matches('/').to_string())
{
if !slug.is_empty() {
map.insert(
title,
Target::Pornstar {
slug,
sort: "new".to_string(),
},
);
}
continue;
}
if let Some(slug) = path
.strip_prefix("/amateur/")
.map(|v| v.trim_matches('/').to_string())
&& !slug.is_empty()
{
map.insert(
title,
Target::Amateur {
slug,
sort: "new".to_string(),
},
);
}
}
map
}
fn text_of(node: Option<ElementRef<'_>>) -> String {
node.map(|v| Self::normalize_text(&v.text().collect::<String>()))
.unwrap_or_default()
}
fn parse_items(&self, html: &str) -> Vec<VideoItem> {
let document = Html::parse_document(html);
let Some(card_selector) = Self::selector("article.video-box.js_video-box") else {
return vec![];
};
let link_selector = Self::selector("a[data-testid='plw_video_thumbnail_link'], a.video-box-image, a.video-title-text");
let title_selector = Self::selector("a.video-title-text");
let thumb_selector = Self::selector("img");
let duration_selector = Self::selector(".tm_video_duration");
let views_selector = Self::selector("span.info-views");
let uploader_selector = Self::selector("a.author-title-text");
let tag_selector = Self::selector("a.bubble-porntag");
let mut items = Vec::new();
for card in document.select(&card_selector) {
let link_node = link_selector
.as_ref()
.and_then(|s| card.select(s).next());
let href = link_node
.and_then(|v| v.value().attr("href"))
.unwrap_or_default();
if !href.contains("/watch/") {
continue;
}
let id = card
.value()
.attr("data-video-id")
.map(|v| v.to_string())
.or_else(|| {
href.split("/watch/")
.nth(1)
.and_then(|v| v.split('/').next())
.map(|v| v.to_string())
})
.unwrap_or_default();
if id.is_empty() {
continue;
}
let title = title_selector
.as_ref()
.and_then(|s| card.select(s).next())
.map(|v| {
let from_title = v.value().attr("title").unwrap_or_default();
if from_title.is_empty() {
Self::normalize_text(&v.text().collect::<String>())
} else {
Self::normalize_text(from_title)
}
})
.unwrap_or_default();
let thumb = thumb_selector
.as_ref()
.and_then(|s| card.select(s).next())
.and_then(|v| {
v.value()
.attr("data-original")
.or_else(|| v.value().attr("data-src"))
.or_else(|| v.value().attr("src"))
})
.map(|v| self.normalize_url(v))
.unwrap_or_default();
let duration_text = Self::text_of(duration_selector.as_ref().and_then(|s| card.select(s).next()));
let duration = parse_time_to_seconds(&duration_text).unwrap_or(0) as u32;
let view_text = views_selector
.as_ref()
.and_then(|s| card.select(s).next())
.map(|v| Self::normalize_text(&v.text().collect::<String>()))
.unwrap_or_default();
let views = parse_abbreviated_number(&view_text).unwrap_or(0) as u32;
let rating = views_selector
.as_ref()
.and_then(|s| card.select(s).nth(1))
.map(|v| Self::normalize_text(&v.text().collect::<String>()).replace('%', ""))
.and_then(|v| v.parse::<f32>().ok());
let uploader_node = uploader_selector.as_ref().and_then(|s| card.select(s).next());
let uploader_name = uploader_node
.as_ref()
.map(|v| Self::normalize_text(&v.text().collect::<String>()))
.unwrap_or_default();
let uploader_href = uploader_node
.and_then(|v| v.value().attr("href"))
.map(|v| self.normalize_url(v));
let uploader_id = card
.value()
.attr("data-uploader-id")
.map(|v| format!("{CHANNEL_ID}:{v}"));
let preview = link_node
.and_then(|v| v.value().attr("data-mediabook"))
.map(|v| v.replace("&amp;", "&"));
let mut tags = Vec::new();
if let Some(sel) = &tag_selector {
for tag in card.select(sel) {
let title = Self::normalize_text(&tag.text().collect::<String>());
if !title.is_empty() {
tags.push(title);
}
}
}
let mut item = VideoItem::new(
id,
title,
self.normalize_url(href),
CHANNEL_ID.to_string(),
thumb,
duration,
)
.views(views);
if let Some(value) = rating {
item = item.rating(value);
}
if !uploader_name.is_empty() {
item = item.uploader(uploader_name);
}
if let Some(value) = uploader_href {
item.uploaderUrl = Some(value);
}
if let Some(value) = uploader_id {
item.uploaderId = Some(value);
}
if let Some(value) = preview {
item = item.preview(value);
}
if !tags.is_empty() {
item = item.tags(tags);
}
items.push(item);
}
items
}
}
#[async_trait]
impl Provider for YoupornProvider {
async fn get_videos(
&self,
cache: VideoCache,
_db_pool: DbPool,
sort: String,
query: Option<String>,
page: String,
_per_page: String,
options: ServerOptions,
) -> Vec<VideoItem> {
let query = query.unwrap_or_default();
let page = page.parse::<u8>().unwrap_or(1);
let target = self.target_from_query(&query, &sort);
let video_url = self.build_url(&target, page);
let old_items = match cache.get(&video_url) {
Some((time, items)) if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 => {
return items.clone();
}
Some((_time, items)) => items.clone(),
None => vec![],
};
let mut requester = requester_or_default(&options, CHANNEL_ID, "get_videos");
let referer = format!("{}/", self.url.trim_end_matches('/'));
let text = match requester
.get_with_headers(&video_url, Self::html_headers(&referer), Some(Version::HTTP_11))
.await
{
Ok(text) => text,
Err(e) => {
report_provider_error(
CHANNEL_ID,
"get_videos.request",
&format!("url={video_url}; error={e}"),
)
.await;
return old_items;
}
};
let items = self.parse_items(&text);
if items.is_empty() {
return old_items;
}
cache.remove(&video_url);
cache.insert(video_url, items.clone());
items
}
fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
Some(self.build_channel(clientversion))
}
}