camsoda and pornhub shorties
This commit is contained in:
10
build.rs
10
build.rs
@@ -36,6 +36,11 @@ const PROVIDERS: &[ProviderDef] = &[
|
|||||||
module: "pornhub",
|
module: "pornhub",
|
||||||
ty: "PornhubProvider",
|
ty: "PornhubProvider",
|
||||||
},
|
},
|
||||||
|
ProviderDef {
|
||||||
|
id: "pornhub-shorties",
|
||||||
|
module: "pornhub_shorties",
|
||||||
|
ty: "PornhubShortiesProvider",
|
||||||
|
},
|
||||||
ProviderDef {
|
ProviderDef {
|
||||||
id: "youporn",
|
id: "youporn",
|
||||||
module: "youporn",
|
module: "youporn",
|
||||||
@@ -341,6 +346,11 @@ const PROVIDERS: &[ProviderDef] = &[
|
|||||||
module: "xvideos",
|
module: "xvideos",
|
||||||
ty: "XvideosProvider",
|
ty: "XvideosProvider",
|
||||||
},
|
},
|
||||||
|
ProviderDef {
|
||||||
|
id: "camsoda",
|
||||||
|
module: "camsoda",
|
||||||
|
ty: "CamsodaProvider",
|
||||||
|
},
|
||||||
];
|
];
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
|
|||||||
39
check.py
39
check.py
@@ -64,6 +64,23 @@ class Results:
|
|||||||
|
|
||||||
_BROWSER_UA = "Mozilla/5.0 (X11; Linux x86_64; rv:146.0) Gecko/20100101 Firefox/146.0"
|
_BROWSER_UA = "Mozilla/5.0 (X11; Linux x86_64; rv:146.0) Gecko/20100101 Firefox/146.0"
|
||||||
|
|
||||||
|
# Domains known to be Cloudflare-protected and return 403/connection-refused to direct
|
||||||
|
# HTTP checks. URL reachability failures for these hosts are downgraded to warnings.
|
||||||
|
_CF_PROTECTED_HOSTS = {
|
||||||
|
"www.camsoda.com",
|
||||||
|
"camsoda.com",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _is_cf_protected(url: str) -> bool:
|
||||||
|
"""Return True if the URL's host is known to be CF-protected."""
|
||||||
|
try:
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
host = urlparse(url).hostname or ""
|
||||||
|
return host in _CF_PROTECTED_HOSTS
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def http_ok(url: str, headers: dict | None = None) -> tuple[bool, int]:
|
def http_ok(url: str, headers: dict | None = None) -> tuple[bool, int]:
|
||||||
"""Return (ok, http_status). Tries HEAD then ranged GET on 405."""
|
"""Return (ok, http_status). Tries HEAD then ranged GET on 405."""
|
||||||
@@ -234,7 +251,10 @@ def check_video(video: dict, channel_id: str, results: Results, run_ytdlp: bool)
|
|||||||
else:
|
else:
|
||||||
ok, code = http_ok(vurl)
|
ok, code = http_ok(vurl)
|
||||||
if not ok:
|
if not ok:
|
||||||
results.err(channel_id, f"{label}: url unreachable HTTP={code}: {vurl}")
|
if _is_cf_protected(vurl):
|
||||||
|
results.warn(channel_id, f"{label}: url unreachable HTTP={code} (CF-protected host, expected): {vurl}")
|
||||||
|
else:
|
||||||
|
results.err(channel_id, f"{label}: url unreachable HTTP={code}: {vurl}")
|
||||||
else:
|
else:
|
||||||
results.info(channel_id, f"{label}: url OK (HTTP {code})")
|
results.info(channel_id, f"{label}: url OK (HTTP {code})")
|
||||||
|
|
||||||
@@ -275,11 +295,18 @@ def check_video(video: dict, channel_id: str, results: Results, run_ytdlp: bool)
|
|||||||
results.info(channel_id, f"{label}: yt-dlp extract {ytdlp_url}")
|
results.info(channel_id, f"{label}: yt-dlp extract {ytdlp_url}")
|
||||||
yt, stderr = ytdlp_extract(ytdlp_url)
|
yt, stderr = ytdlp_extract(ytdlp_url)
|
||||||
if yt is None:
|
if yt is None:
|
||||||
results.err(
|
if _is_cf_protected(ytdlp_url):
|
||||||
channel_id,
|
results.warn(
|
||||||
f"{label}: yt-dlp failed for {ytdlp_url}"
|
channel_id,
|
||||||
+ (f": {stderr[:300]}" if stderr else ""),
|
f"{label}: yt-dlp failed for {ytdlp_url} (CF-protected host, expected)"
|
||||||
)
|
+ (f": {stderr[:200]}" if stderr else ""),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
results.err(
|
||||||
|
channel_id,
|
||||||
|
f"{label}: yt-dlp failed for {ytdlp_url}"
|
||||||
|
+ (f": {stderr[:300]}" if stderr else ""),
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
yt_title = (yt.get("title") or "").strip()
|
yt_title = (yt.get("title") or "").strip()
|
||||||
api_title = (video.get("title") or "").strip()
|
api_title = (video.get("title") or "").strip()
|
||||||
|
|||||||
@@ -40,6 +40,7 @@ This is the current implementation inventory as of this snapshot of the repo. Us
|
|||||||
| `pornhat` | `mainstream-tube` | no | no | Basic tube provider. |
|
| `pornhat` | `mainstream-tube` | no | no | Basic tube provider. |
|
||||||
| `pornhd3x` | `studio-network` | no | yes | Best template for complex catalogs and redirect proxy generation. |
|
| `pornhd3x` | `studio-network` | no | yes | Best template for complex catalogs and redirect proxy generation. |
|
||||||
| `pornhub` | `mainstream-tube` | no | no | Rich metadata and format examples. |
|
| `pornhub` | `mainstream-tube` | no | no | Rich metadata and format examples. |
|
||||||
|
| `pornhub-shorties` | `tiktok` | no | no | Pornhub Shorties vertical short-form clips; parses `JSON_SHORTIES` JS variable embedded in HTML; fields: vkey, title, linkUrl, imageUrl, likeNumber, dislikeNumber, name/profileUrl (uploader), pillsData (tags), trackingTimeWatched.video_duration; pagination via `?page=N`; search via `?search=query`; sort via `?sort=trending\|mostviewed\|top_rated\|hottest`; phncdn thumbnails require `Referer: https://www.pornhub.com/` (served via cdnReferrers in /api/status); yt-dlp resolves `video.url` natively (PornHub extractor); no proxy needed. |
|
||||||
| `pornmz` | `mainstream-tube` | no | no | Mainstream archive. |
|
| `pornmz` | `mainstream-tube` | no | no | Mainstream archive. |
|
||||||
| `pornzog` | `mainstream-tube` | no | no | Basic list/detail scraper. |
|
| `pornzog` | `mainstream-tube` | no | no | Basic list/detail scraper. |
|
||||||
| `porntrex` | `mainstream-tube` | no | no | KVS-style HTML archive with direct MP4 formats and tag-aware search shortcuts. |
|
| `porntrex` | `mainstream-tube` | no | no | KVS-style HTML archive with direct MP4 formats and tag-aware search shortcuts. |
|
||||||
@@ -70,6 +71,7 @@ This is the current implementation inventory as of this snapshot of the repo. Us
|
|||||||
| `eporner` | `mainstream-tube` | no | no | HTML scraper for eporner.com (5M+ videos); card selector `div.mb[data-id]` with inline duration/rating/views/uploader; thumbnails at `static-eu-cdn.eporner.com` (no proxy needed); pagination uses `/{N}/` suffix (page 1 = no suffix, page 2 = `/2/`); search queries map to `/tag/{slug}/` (eporner redirects all keyword searches to tag pages — 404 tag pages still return related content); supports sort: new/popular/rated/best; 65 hardcoded categories via `cat:`, `tag:`, `pornstar:`, `uploader:` query shortcuts; background-loads pornstar name→URL map from `/pornstar-list/`; yt-dlp resolves `video.url` natively (Eporner extractor); no proxy needed. |
|
| `eporner` | `mainstream-tube` | no | no | HTML scraper for eporner.com (5M+ videos); card selector `div.mb[data-id]` with inline duration/rating/views/uploader; thumbnails at `static-eu-cdn.eporner.com` (no proxy needed); pagination uses `/{N}/` suffix (page 1 = no suffix, page 2 = `/2/`); search queries map to `/tag/{slug}/` (eporner redirects all keyword searches to tag pages — 404 tag pages still return related content); supports sort: new/popular/rated/best; 65 hardcoded categories via `cat:`, `tag:`, `pornstar:`, `uploader:` query shortcuts; background-loads pornstar name→URL map from `/pornstar-list/`; yt-dlp resolves `video.url` natively (Eporner extractor); no proxy needed. |
|
||||||
| `xnxx` | `mainstream-tube` | no | no | HTML scraper for xnxx.com (10M+ videos); unified card parser handles two formats: `div.thumb-block[data-eid]` (search) and `div.thumb-block.video[data-video='{"id":...}']` (hits); eid extracted from `/video-{eid}/{slug}` URL path; thumbnails at `thumb-cdn77.xnxx-cdn.com` and `thumbs-gcore.xnxx-cdn.com` (no proxy, no Referer needed); 0-indexed pagination (page 1 = `/hits`, page N = `/hits/{N-1}`); default feed is `/hits` (most-viewed — xnxx has no chronological listing); search via `/search/{slug}` (works for keywords and tags); supports `tag:`, `cat:`, `category:` query shortcuts; yt-dlp resolves `video.url` natively (XNXX extractor, returns 4-7 HLS formats); no proxy needed. |
|
| `xnxx` | `mainstream-tube` | no | no | HTML scraper for xnxx.com (10M+ videos); unified card parser handles two formats: `div.thumb-block[data-eid]` (search) and `div.thumb-block.video[data-video='{"id":...}']` (hits); eid extracted from `/video-{eid}/{slug}` URL path; thumbnails at `thumb-cdn77.xnxx-cdn.com` and `thumbs-gcore.xnxx-cdn.com` (no proxy, no Referer needed); 0-indexed pagination (page 1 = `/hits`, page N = `/hits/{N-1}`); default feed is `/hits` (most-viewed — xnxx has no chronological listing); search via `/search/{slug}` (works for keywords and tags); supports `tag:`, `cat:`, `category:` query shortcuts; yt-dlp resolves `video.url` natively (XNXX extractor, returns 4-7 HLS formats); no proxy needed. |
|
||||||
| `xhamster` | `mainstream-tube` | no | no | HTML scraper for xhamster.com; card selector `div[data-video-type="video"]` with `data-video-id`; thumbnails via `img[data-role="thumb-preview-img"]` at `ic-vt-nss.xhcdn.com` (no proxy, no Referer needed); pagination via `?page=N` query param (browse feeds use infinite-scroll so only search reliably returns different content per page); feeds: `/newest` (default), `/most-viewed`, `/best`; categories via `/categories/{slug}`; channels via `/channels/{slug}`; 43 hardcoded categories as `categories` option; uploader type inferred from URL path (`/channels/` → channel, `/creators/` → creator, `/pornstars/` → pornstar); supports `cat:`/`category:` and `channel:` query shortcuts, plus static category name matching; preview mp4 clips from `data-previewvideo` attribute; yt-dlp resolves `video.url` natively (xHamster extractor, 28 formats); no proxy needed. |
|
| `xhamster` | `mainstream-tube` | no | no | HTML scraper for xhamster.com; card selector `div[data-video-type="video"]` with `data-video-id`; thumbnails via `img[data-role="thumb-preview-img"]` at `ic-vt-nss.xhcdn.com` (no proxy, no Referer needed); pagination via `?page=N` query param (browse feeds use infinite-scroll so only search reliably returns different content per page); feeds: `/newest` (default), `/most-viewed`, `/best`; categories via `/categories/{slug}`; channels via `/channels/{slug}`; 43 hardcoded categories as `categories` option; uploader type inferred from URL path (`/channels/` → channel, `/creators/` → creator, `/pornstars/` → pornstar); supports `cat:`/`category:` and `channel:` query shortcuts, plus static category name matching; preview mp4 clips from `data-previewvideo` attribute; yt-dlp resolves `video.url` natively (xHamster extractor, 28 formats); no proxy needed. |
|
||||||
|
| `camsoda` | `live-cams` | no | no | HTML scraper for camsoda.com `/media` listing; CF-protected so relies on Jina HTML fallback (requester sends `X-Return-Format: html`); parses anchor tags with `[class*="media-item-module__title"]` / `[class*="media-item-module__subtitle"]` CSS selectors; video-specific thumbnails from `media-secure.camsoda.com/user/videos/{id}/`; video URLs are page URLs (`/{username}/media/{slug}/{id}`) — CF blocks direct access and yt-dlp; supports `uploader:`/`model:` query prefix to browse a model's media page; no search API — keyword queries fall through to client-side filtering; pagination via `?page=N` but CamSoda serves the same 60 items regardless of page (JS-driven infinite scroll). |
|
||||||
| `xvideos` | `mainstream-tube` | no | no | HTML scraper for xvideos.com; handles two card formats: homepage (`div.thumb-block[data-id][data-eid]`) uses `p.title a[title]` + `data-pvv` on img, best-of-month page uses `div.thumb-block.video[data-video=JSON]` with `div.title a` text + `previewVideo` JSON key; thumbnails at `thumb-cdn77.xvideos-cdn.com` / `thumbs-gcore.xvideos-cdn.com` (no proxy needed); latest: `/` (page 1) / `/new/{N-1}` (page N≥2); best-of-month: `/best/{YYYY-MM}` (previous calendar month), page N: `/best/{YYYY-MM}/{N-1}`; search: `/?k={query}` / `/?k={query}&p={N-1}` (0-indexed); tag shortcuts: `/tags/{slug}/{N-1}`; category shortcuts: `/c/{Name}-{ID}/{N-1}` (38 hardcoded categories); `cat:`, `tag:`, `uploader:` query prefix routing; yt-dlp resolves `video.url` natively (XVideos extractor → HLS formats); CDN preview mp4 in `preview` field; no proxy needed. |
|
| `xvideos` | `mainstream-tube` | no | no | HTML scraper for xvideos.com; handles two card formats: homepage (`div.thumb-block[data-id][data-eid]`) uses `p.title a[title]` + `data-pvv` on img, best-of-month page uses `div.thumb-block.video[data-video=JSON]` with `div.title a` text + `previewVideo` JSON key; thumbnails at `thumb-cdn77.xvideos-cdn.com` / `thumbs-gcore.xvideos-cdn.com` (no proxy needed); latest: `/` (page 1) / `/new/{N-1}` (page N≥2); best-of-month: `/best/{YYYY-MM}` (previous calendar month), page N: `/best/{YYYY-MM}/{N-1}`; search: `/?k={query}` / `/?k={query}&p={N-1}` (0-indexed); tag shortcuts: `/tags/{slug}/{N-1}`; category shortcuts: `/c/{Name}-{ID}/{N-1}` (38 hardcoded categories); `cat:`, `tag:`, `uploader:` query prefix routing; yt-dlp resolves `video.url` natively (XVideos extractor → HLS formats); CDN preview mp4 in `preview` field; no proxy needed. |
|
||||||
|
|
||||||
## Proxy Routes
|
## Proxy Routes
|
||||||
|
|||||||
403
src/providers/camsoda.rs
Normal file
403
src/providers/camsoda.rs
Normal file
@@ -0,0 +1,403 @@
|
|||||||
|
use crate::DbPool;
|
||||||
|
use crate::api::ClientVersion;
|
||||||
|
use crate::providers::{
|
||||||
|
Provider, report_provider_error, requester_or_default,
|
||||||
|
};
|
||||||
|
use crate::status::*;
|
||||||
|
use crate::util::cache::VideoCache;
|
||||||
|
use crate::util::time::parse_time_to_seconds;
|
||||||
|
use crate::videos::{ServerOptions, VideoItem};
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use error_chain::error_chain;
|
||||||
|
use scraper::{Html, Selector};
|
||||||
|
use std::collections::HashSet;
|
||||||
|
|
||||||
|
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
|
||||||
|
crate::providers::ProviderChannelMetadata {
|
||||||
|
group_id: "live-cams",
|
||||||
|
tags: &["cams", "amateur", "recordings", "clips"],
|
||||||
|
};
|
||||||
|
|
||||||
|
const BASE_URL: &str = "https://www.camsoda.com";
|
||||||
|
const CHANNEL_ID: &str = "camsoda";
|
||||||
|
|
||||||
|
error_chain! {
|
||||||
|
foreign_links {
|
||||||
|
Io(std::io::Error);
|
||||||
|
}
|
||||||
|
errors {
|
||||||
|
Parse(msg: String) {
|
||||||
|
description("parse error")
|
||||||
|
display("parse error: {}", msg)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct CamsodaProvider {
|
||||||
|
url: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
enum Target {
|
||||||
|
/// Default listing at /media?page=N
|
||||||
|
Listing,
|
||||||
|
/// Model media page at /{username}/media
|
||||||
|
Model { username: String },
|
||||||
|
}
|
||||||
|
|
||||||
|
impl CamsodaProvider {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
url: BASE_URL.to_string(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
|
||||||
|
Channel {
|
||||||
|
id: CHANNEL_ID.to_string(),
|
||||||
|
name: "CamSoda".to_string(),
|
||||||
|
description:
|
||||||
|
"CamSoda model video clips — recorded amateur cam shows uploaded by performers."
|
||||||
|
.to_string(),
|
||||||
|
premium: false,
|
||||||
|
favicon: "https://www.google.com/s2/favicons?sz=64&domain=camsoda.com".to_string(),
|
||||||
|
status: "active".to_string(),
|
||||||
|
categories: vec![],
|
||||||
|
options: vec![],
|
||||||
|
nsfw: true,
|
||||||
|
cacheDuration: Some(1800),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Resolve the fetch target from query and options.
|
||||||
|
fn pick_target(query: Option<&str>) -> Target {
|
||||||
|
let Some(query) = query.map(str::trim).filter(|v| !v.is_empty()) else {
|
||||||
|
return Target::Listing;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Support "uploader:username" or "model:username" shortcuts.
|
||||||
|
for prefix in &["uploader:", "model:", "user:"] {
|
||||||
|
if let Some(username) = query.strip_prefix(prefix) {
|
||||||
|
let username = username.trim().to_lowercase();
|
||||||
|
if !username.is_empty() {
|
||||||
|
return Target::Model { username };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// For other queries fall back to the default listing;
|
||||||
|
// the server will apply client-side substring filtering.
|
||||||
|
Target::Listing
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_listing_url(&self, target: &Target, page: u16) -> String {
|
||||||
|
let page = page.max(1);
|
||||||
|
match target {
|
||||||
|
Target::Listing => format!("{}/media?page={}", self.url, page),
|
||||||
|
Target::Model { username } => {
|
||||||
|
if page <= 1 {
|
||||||
|
format!("{}/{}/media", self.url, username)
|
||||||
|
} else {
|
||||||
|
format!("{}/{}/media?page={}", self.url, username, page)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse video cards from the HTML of a CamSoda media page.
|
||||||
|
///
|
||||||
|
/// The page contains anchor elements linking to individual video pages:
|
||||||
|
/// href="/{username}/media/{slug}/{id}"
|
||||||
|
///
|
||||||
|
/// Inside each anchor:
|
||||||
|
/// - `[class*="media-item-module__title"]` span: the video title
|
||||||
|
/// - `[class*="media-item-module__subtitle"]` span: "by UPLOADER (MM:SS)"
|
||||||
|
/// - `img[src*="media-secure.camsoda.com"]`: video-specific thumbnail
|
||||||
|
fn parse_html_items(html: &str) -> Vec<VideoItem> {
|
||||||
|
let document = Html::parse_document(html);
|
||||||
|
|
||||||
|
// Select all anchors linking to /{username}/media/{slug}/{id}
|
||||||
|
let anchor_sel = match Selector::parse(r#"a[href]"#) {
|
||||||
|
Ok(s) => s,
|
||||||
|
Err(_) => return vec![],
|
||||||
|
};
|
||||||
|
let title_sel = match Selector::parse(r#"[class*="media-item-module__title"]"#) {
|
||||||
|
Ok(s) => s,
|
||||||
|
Err(_) => return vec![],
|
||||||
|
};
|
||||||
|
let subtitle_sel = match Selector::parse(r#"[class*="media-item-module__subtitle"]"#) {
|
||||||
|
Ok(s) => s,
|
||||||
|
Err(_) => return vec![],
|
||||||
|
};
|
||||||
|
let img_sel = match Selector::parse(r#"img[src]"#) {
|
||||||
|
Ok(s) => s,
|
||||||
|
Err(_) => return vec![],
|
||||||
|
};
|
||||||
|
|
||||||
|
// Regex for parsing subtitle "by UPLOADER (MM:SS)"
|
||||||
|
let sub_re = match regex::Regex::new(r"(?i)^by\s+(.+?)\s+\((\d{1,2}:\d{2}(?::\d{2})?)\)\s*$") {
|
||||||
|
Ok(r) => r,
|
||||||
|
Err(_) => return vec![],
|
||||||
|
};
|
||||||
|
|
||||||
|
// Regex for media URL: /{username}/media/{slug}/{id}
|
||||||
|
let href_re = match regex::Regex::new(r"^/([^/]+)/media/([^/]+)/(\d+)$") {
|
||||||
|
Ok(r) => r,
|
||||||
|
Err(_) => return vec![],
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut items: Vec<VideoItem> = Vec::new();
|
||||||
|
let mut seen_ids: HashSet<String> = HashSet::new();
|
||||||
|
|
||||||
|
for anchor in document.select(&anchor_sel) {
|
||||||
|
let href = match anchor.value().attr("href") {
|
||||||
|
Some(h) => h,
|
||||||
|
None => continue,
|
||||||
|
};
|
||||||
|
|
||||||
|
let caps = match href_re.captures(href) {
|
||||||
|
Some(c) => c,
|
||||||
|
None => continue,
|
||||||
|
};
|
||||||
|
|
||||||
|
let username = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string();
|
||||||
|
let slug = caps.get(2).map(|m| m.as_str()).unwrap_or("").to_string();
|
||||||
|
let video_id = caps.get(3).map(|m| m.as_str()).unwrap_or("").to_string();
|
||||||
|
|
||||||
|
if video_id.is_empty() || username.is_empty() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if !seen_ids.insert(video_id.clone()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Title
|
||||||
|
let title = anchor
|
||||||
|
.select(&title_sel)
|
||||||
|
.next()
|
||||||
|
.map(|el| el.text().collect::<String>().trim().to_string())
|
||||||
|
.unwrap_or_default();
|
||||||
|
let title = if title.is_empty() {
|
||||||
|
format!("CamSoda video {video_id}")
|
||||||
|
} else {
|
||||||
|
title
|
||||||
|
};
|
||||||
|
|
||||||
|
// Subtitle: "by UPLOADER (MM:SS)"
|
||||||
|
let subtitle = anchor
|
||||||
|
.select(&subtitle_sel)
|
||||||
|
.next()
|
||||||
|
.map(|el| el.text().collect::<String>().trim().to_string())
|
||||||
|
.unwrap_or_default();
|
||||||
|
|
||||||
|
let (uploader, duration) = if let Some(sc) = sub_re.captures(&subtitle) {
|
||||||
|
let u = sc.get(1).map(|m| m.as_str().trim().to_string()).unwrap_or_default();
|
||||||
|
let d = sc.get(2)
|
||||||
|
.and_then(|m| parse_time_to_seconds(m.as_str()))
|
||||||
|
.and_then(|s| u32::try_from(s).ok())
|
||||||
|
.unwrap_or(0);
|
||||||
|
(if u.is_empty() { None } else { Some(u) }, d)
|
||||||
|
} else {
|
||||||
|
(None, 0)
|
||||||
|
};
|
||||||
|
|
||||||
|
// Thumbnail — prefer video-specific from media-secure.camsoda.com
|
||||||
|
let thumb = anchor
|
||||||
|
.select(&img_sel)
|
||||||
|
.filter_map(|img| img.value().attr("src"))
|
||||||
|
.find(|src| src.contains("media-secure.camsoda.com"))
|
||||||
|
.or_else(|| {
|
||||||
|
anchor
|
||||||
|
.select(&img_sel)
|
||||||
|
.filter_map(|img| img.value().attr("src"))
|
||||||
|
.find(|src| src.contains("livemediahost.com"))
|
||||||
|
})
|
||||||
|
.unwrap_or("")
|
||||||
|
.to_string();
|
||||||
|
|
||||||
|
let page_url = format!("{BASE_URL}/{username}/media/{slug}/{video_id}");
|
||||||
|
|
||||||
|
let mut item = VideoItem::new(
|
||||||
|
video_id,
|
||||||
|
title,
|
||||||
|
page_url,
|
||||||
|
CHANNEL_ID.to_string(),
|
||||||
|
thumb,
|
||||||
|
duration,
|
||||||
|
);
|
||||||
|
item.uploader = uploader;
|
||||||
|
item.uploaderUrl = Some(format!("{BASE_URL}/{username}/media"));
|
||||||
|
item.uploaderId = Some(format!("{CHANNEL_ID}:{username}"));
|
||||||
|
|
||||||
|
items.push(item);
|
||||||
|
}
|
||||||
|
|
||||||
|
items
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn fetch_items(
|
||||||
|
&self,
|
||||||
|
target: &Target,
|
||||||
|
page: u16,
|
||||||
|
options: &ServerOptions,
|
||||||
|
) -> Result<Vec<VideoItem>> {
|
||||||
|
let url = self.build_listing_url(target, page);
|
||||||
|
let mut requester = requester_or_default(options, CHANNEL_ID, "fetch_items");
|
||||||
|
let text = requester
|
||||||
|
.get(&url, None)
|
||||||
|
.await
|
||||||
|
.map_err(|e| Error::from(format!("fetch failed for {url}: {e}")))?;
|
||||||
|
|
||||||
|
// Guard against CF challenge pages slipping through
|
||||||
|
if text.contains("cf-browser-verification")
|
||||||
|
|| text.contains("cf-chl")
|
||||||
|
|| text.contains("Just a moment")
|
||||||
|
{
|
||||||
|
return Err(Error::from("cloudflare challenge page returned".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
let items = Self::parse_html_items(&text);
|
||||||
|
Ok(items)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl Provider for CamsodaProvider {
|
||||||
|
async fn get_videos(
|
||||||
|
&self,
|
||||||
|
cache: VideoCache,
|
||||||
|
pool: DbPool,
|
||||||
|
sort: String,
|
||||||
|
query: Option<String>,
|
||||||
|
page: String,
|
||||||
|
per_page: String,
|
||||||
|
options: ServerOptions,
|
||||||
|
) -> Vec<VideoItem> {
|
||||||
|
let _ = cache;
|
||||||
|
let _ = pool;
|
||||||
|
let _ = sort;
|
||||||
|
let _ = per_page;
|
||||||
|
|
||||||
|
let page = page.parse::<u16>().unwrap_or(1).max(1);
|
||||||
|
let normalized_query = query
|
||||||
|
.as_deref()
|
||||||
|
.map(str::trim)
|
||||||
|
.filter(|v| !v.is_empty())
|
||||||
|
.map(ToOwned::to_owned);
|
||||||
|
|
||||||
|
let target = Self::pick_target(normalized_query.as_deref());
|
||||||
|
|
||||||
|
match self.fetch_items(&target, page, &options).await {
|
||||||
|
Ok(items) => items,
|
||||||
|
Err(error) => {
|
||||||
|
report_provider_error(CHANNEL_ID, "get_videos", &error.to_string()).await;
|
||||||
|
vec![]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
|
||||||
|
Some(self.build_channel(clientversion))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
fn sample_html() -> String {
|
||||||
|
// Simplified version of the HTML returned by Jina (X-Return-Format: html)
|
||||||
|
r#"<!DOCTYPE html><html><body>
|
||||||
|
<a href="/lil-asian-jaz/media/torso-ride-begging-for-your-cum/16984249">
|
||||||
|
<span><span class="media-item-module__title--lrB0Q">Torso ride begging for your cum</span><span class="media-item-module__subtitle--VRwsa">by jazzyj (24:35)</span></span>
|
||||||
|
<div><img alt="thumb" src="https://media-secure.camsoda.com/user/videos/16984249/16984249.thumb.1777857871.webp" width="320" height="240"></div>
|
||||||
|
</a>
|
||||||
|
<a href="/coco-dethick/media/first-ir-bg-show-ft-johnny-love/17009049">
|
||||||
|
<span><span class="media-item-module__new--uXKvP">new</span><span class="media-item-module__title--lrB0Q">FIRST IR BG SHOW FT JOHNNY LOVE</span><span class="media-item-module__subtitle--VRwsa">by Coco Dethick (44:14)</span></span>
|
||||||
|
<div><img alt="thumb" src="https://media-secure.camsoda.com/user/videos/17009049/17009049_1778619855.thumb.webp" width="320" height="240"></div>
|
||||||
|
</a>
|
||||||
|
<a href="/hot-wife-mia/media/ultimate-squirting-video/17112135">
|
||||||
|
<span><span class="media-item-module__title--lrB0Q">Ultimate squirting video!</span><span class="media-item-module__subtitle--VRwsa">by Hot Wife Mia (02:47)</span></span>
|
||||||
|
<div><img alt="thumb" src="https://media-secure.camsoda.com/user/videos/17112135/17112135.thumb.1779200000.webp" width="320" height="240"></div>
|
||||||
|
</a>
|
||||||
|
</body></html>"#.to_string()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parses_video_cards_from_html() {
|
||||||
|
let items = CamsodaProvider::parse_html_items(&sample_html());
|
||||||
|
assert_eq!(items.len(), 3, "expected 3 items, got {}: {:?}", items.len(), items.iter().map(|i| &i.id).collect::<Vec<_>>());
|
||||||
|
|
||||||
|
let item = &items[0];
|
||||||
|
assert_eq!(item.id, "16984249");
|
||||||
|
assert_eq!(item.title, "Torso ride begging for your cum");
|
||||||
|
assert_eq!(item.uploader.as_deref(), Some("jazzyj"));
|
||||||
|
assert_eq!(item.duration, 24 * 60 + 35);
|
||||||
|
assert!(item.url.contains("16984249"), "url should contain id: {}", item.url);
|
||||||
|
assert!(item.thumb.contains("media-secure.camsoda.com"), "thumb: {}", item.thumb);
|
||||||
|
assert_eq!(item.uploaderUrl.as_deref(), Some("https://www.camsoda.com/lil-asian-jaz/media"));
|
||||||
|
assert_eq!(item.uploaderId.as_deref(), Some("camsoda:lil-asian-jaz"));
|
||||||
|
|
||||||
|
let item2 = &items[1];
|
||||||
|
assert_eq!(item2.id, "17009049");
|
||||||
|
assert_eq!(item2.uploader.as_deref(), Some("Coco Dethick"));
|
||||||
|
|
||||||
|
let item3 = &items[2];
|
||||||
|
assert_eq!(item3.id, "17112135");
|
||||||
|
assert_eq!(item3.duration, 2 * 60 + 47);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn deduplicates_items() {
|
||||||
|
// Same video appears twice
|
||||||
|
let html = sample_html();
|
||||||
|
let doubled = format!("{html}\n{html}");
|
||||||
|
let items = CamsodaProvider::parse_html_items(&doubled);
|
||||||
|
assert_eq!(items.len(), 3, "should deduplicate to 3 unique items");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn picks_target_correctly() {
|
||||||
|
assert!(matches!(CamsodaProvider::pick_target(None), Target::Listing));
|
||||||
|
assert!(matches!(CamsodaProvider::pick_target(Some("")), Target::Listing));
|
||||||
|
assert!(matches!(CamsodaProvider::pick_target(Some("blowjob")), Target::Listing));
|
||||||
|
|
||||||
|
match CamsodaProvider::pick_target(Some("uploader:lil-asian-jaz")) {
|
||||||
|
Target::Model { username } => assert_eq!(username, "lil-asian-jaz"),
|
||||||
|
_ => panic!("expected Model target"),
|
||||||
|
}
|
||||||
|
match CamsodaProvider::pick_target(Some("model:katt-leya")) {
|
||||||
|
Target::Model { username } => assert_eq!(username, "katt-leya"),
|
||||||
|
_ => panic!("expected Model target"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn builds_listing_urls_correctly() {
|
||||||
|
let provider = CamsodaProvider::new();
|
||||||
|
assert_eq!(
|
||||||
|
provider.build_listing_url(&Target::Listing, 1),
|
||||||
|
"https://www.camsoda.com/media?page=1"
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
provider.build_listing_url(&Target::Listing, 3),
|
||||||
|
"https://www.camsoda.com/media?page=3"
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
provider.build_listing_url(
|
||||||
|
&Target::Model { username: "lil-asian-jaz".to_string() },
|
||||||
|
1
|
||||||
|
),
|
||||||
|
"https://www.camsoda.com/lil-asian-jaz/media"
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
provider.build_listing_url(
|
||||||
|
&Target::Model { username: "lil-asian-jaz".to_string() },
|
||||||
|
2
|
||||||
|
),
|
||||||
|
"https://www.camsoda.com/lil-asian-jaz/media?page=2"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
566
src/providers/pornhub_shorties.rs
Normal file
566
src/providers/pornhub_shorties.rs
Normal file
@@ -0,0 +1,566 @@
|
|||||||
|
use crate::DbPool;
|
||||||
|
use crate::api::ClientVersion;
|
||||||
|
use crate::providers::{Provider, report_provider_error, requester_or_default};
|
||||||
|
use crate::status::*;
|
||||||
|
use crate::util::cache::VideoCache;
|
||||||
|
use crate::videos::{ServerOptions, VideoItem};
|
||||||
|
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use error_chain::error_chain;
|
||||||
|
use serde::Deserialize;
|
||||||
|
|
||||||
|
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
|
||||||
|
crate::providers::ProviderChannelMetadata {
|
||||||
|
group_id: "tiktok",
|
||||||
|
tags: &["shorts", "pornhub", "vertical"],
|
||||||
|
};
|
||||||
|
|
||||||
|
const BASE_URL: &str = "https://www.pornhub.com";
|
||||||
|
const CHANNEL_ID: &str = "pornhub-shorties";
|
||||||
|
|
||||||
|
error_chain! {
|
||||||
|
foreign_links {
|
||||||
|
Io(std::io::Error);
|
||||||
|
HttpRequest(wreq::Error);
|
||||||
|
Json(serde_json::Error);
|
||||||
|
}
|
||||||
|
errors {
|
||||||
|
Parse(msg: String) {
|
||||||
|
description("parse error")
|
||||||
|
display("parse error: {}", msg)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct PornhubShortiesProvider {
|
||||||
|
url: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
enum Target {
|
||||||
|
Home { sort: String },
|
||||||
|
Search { query: String, sort: String },
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Deserialized structure of each entry in the `JSON_SHORTIES` JS variable.
|
||||||
|
#[derive(Debug, Deserialize, Clone)]
|
||||||
|
struct ShortieItem {
|
||||||
|
#[serde(rename = "vkey")]
|
||||||
|
vkey: String,
|
||||||
|
#[serde(rename = "videoTitle")]
|
||||||
|
video_title: String,
|
||||||
|
#[serde(rename = "linkUrl")]
|
||||||
|
link_url: String,
|
||||||
|
#[serde(rename = "imageUrl")]
|
||||||
|
image_url: Option<String>,
|
||||||
|
#[serde(rename = "likeNumber", default)]
|
||||||
|
like_number: u64,
|
||||||
|
#[serde(rename = "dislikeNumber", default)]
|
||||||
|
dislike_number: u64,
|
||||||
|
#[serde(rename = "name")]
|
||||||
|
name: Option<String>,
|
||||||
|
#[serde(rename = "profileUrl")]
|
||||||
|
profile_url: Option<String>,
|
||||||
|
#[serde(rename = "entityId")]
|
||||||
|
entity_id: Option<u64>,
|
||||||
|
#[serde(rename = "entityType")]
|
||||||
|
entity_type: Option<String>,
|
||||||
|
#[serde(rename = "trackingTimeWatched")]
|
||||||
|
tracking_time_watched: Option<TrackingTimeWatched>,
|
||||||
|
#[serde(rename = "pillsData", default)]
|
||||||
|
pills_data: Vec<PillData>,
|
||||||
|
#[serde(rename = "badges")]
|
||||||
|
badges: Option<Badges>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize, Clone)]
|
||||||
|
struct TrackingTimeWatched {
|
||||||
|
#[serde(rename = "video_duration", default)]
|
||||||
|
video_duration: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize, Clone)]
|
||||||
|
struct PillData {
|
||||||
|
#[serde(rename = "name")]
|
||||||
|
name: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize, Clone)]
|
||||||
|
struct Badges {
|
||||||
|
#[serde(rename = "verified", default)]
|
||||||
|
verified: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PornhubShortiesProvider {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
url: BASE_URL.to_string(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_target(sort: &str, query: Option<&str>) -> Target {
|
||||||
|
let sort = Self::normalize_sort(sort).to_string();
|
||||||
|
match query.map(str::trim).filter(|q| !q.is_empty()) {
|
||||||
|
Some(q) => Target::Search {
|
||||||
|
query: q.to_string(),
|
||||||
|
sort,
|
||||||
|
},
|
||||||
|
None => Target::Home { sort },
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn normalize_sort(sort: &str) -> &'static str {
|
||||||
|
match sort.trim().to_ascii_lowercase().as_str() {
|
||||||
|
"trending" => "trending",
|
||||||
|
"popular" | "mv" | "mostviewed" => "mostviewed",
|
||||||
|
"top_rated" | "tr" | "toprated" => "top_rated",
|
||||||
|
"hottest" | "ht" => "hottest",
|
||||||
|
_ => "new",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_url(base: &str, target: &Target, page: u8) -> String {
|
||||||
|
match target {
|
||||||
|
Target::Home { sort } => {
|
||||||
|
if sort == "new" {
|
||||||
|
if page <= 1 {
|
||||||
|
format!("{base}/shorties")
|
||||||
|
} else {
|
||||||
|
format!("{base}/shorties?page={page}")
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if page <= 1 {
|
||||||
|
format!("{base}/shorties?sort={sort}")
|
||||||
|
} else {
|
||||||
|
format!("{base}/shorties?sort={sort}&page={page}")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Target::Search { query, sort } => {
|
||||||
|
let encoded = query.replace(' ', "+");
|
||||||
|
if sort == "new" {
|
||||||
|
if page <= 1 {
|
||||||
|
format!("{base}/shorties?search={encoded}")
|
||||||
|
} else {
|
||||||
|
format!("{base}/shorties?search={encoded}&page={page}")
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if page <= 1 {
|
||||||
|
format!("{base}/shorties?search={encoded}&sort={sort}")
|
||||||
|
} else {
|
||||||
|
format!("{base}/shorties?search={encoded}&sort={sort}&page={page}")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn extract_json_shorties(html: &str) -> Result<Vec<ShortieItem>> {
|
||||||
|
// The page embeds: JSON_SHORTIES = insertAfterNthPosition([{...}, ...], AD_POSITION, ...)
|
||||||
|
// We locate the array by finding the '[' after JSON_SHORTIES and matching brackets.
|
||||||
|
let marker = "JSON_SHORTIES";
|
||||||
|
let start = html.find(marker).ok_or_else(|| {
|
||||||
|
Error::from(ErrorKind::Parse(
|
||||||
|
"JSON_SHORTIES marker not found in HTML".to_string(),
|
||||||
|
))
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let arr_start = html[start..].find('[').ok_or_else(|| {
|
||||||
|
Error::from(ErrorKind::Parse(
|
||||||
|
"JSON_SHORTIES array open bracket not found".to_string(),
|
||||||
|
))
|
||||||
|
})? + start;
|
||||||
|
|
||||||
|
// Walk the HTML to find the matching closing bracket.
|
||||||
|
let bytes = html.as_bytes();
|
||||||
|
let mut depth: i32 = 0;
|
||||||
|
let mut in_string = false;
|
||||||
|
let mut escape_next = false;
|
||||||
|
let mut arr_end = arr_start;
|
||||||
|
|
||||||
|
for (offset, &b) in bytes[arr_start..].iter().enumerate() {
|
||||||
|
if escape_next {
|
||||||
|
escape_next = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if b == b'\\' && in_string {
|
||||||
|
escape_next = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if b == b'"' {
|
||||||
|
in_string = !in_string;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if in_string {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
match b {
|
||||||
|
b'[' => depth += 1,
|
||||||
|
b']' => {
|
||||||
|
depth -= 1;
|
||||||
|
if depth == 0 {
|
||||||
|
arr_end = arr_start + offset;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if depth != 0 {
|
||||||
|
return Err(
|
||||||
|
ErrorKind::Parse("JSON_SHORTIES array bracket mismatch".to_string()).into(),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
let raw = &html[arr_start..=arr_end];
|
||||||
|
let items: Vec<ShortieItem> = serde_json::from_str(raw).map_err(|e| {
|
||||||
|
Error::from(ErrorKind::Parse(format!("JSON_SHORTIES parse error: {e}")))
|
||||||
|
})?;
|
||||||
|
|
||||||
|
Ok(items)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn item_to_video(item: ShortieItem) -> Option<VideoItem> {
|
||||||
|
let id = item.vkey;
|
||||||
|
if id.is_empty() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
let title = item.video_title.trim().to_string();
|
||||||
|
if title.is_empty() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
let url = item.link_url.trim().to_string();
|
||||||
|
if url.is_empty() || !url.contains("/view_video.php") {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let thumb = item
|
||||||
|
.image_url
|
||||||
|
.unwrap_or_default()
|
||||||
|
.replace("\\/", "/")
|
||||||
|
.trim()
|
||||||
|
.to_string();
|
||||||
|
|
||||||
|
let duration = item
|
||||||
|
.tracking_time_watched
|
||||||
|
.map(|t| t.video_duration)
|
||||||
|
.unwrap_or(0);
|
||||||
|
|
||||||
|
let mut video = VideoItem::new(
|
||||||
|
id,
|
||||||
|
title,
|
||||||
|
url,
|
||||||
|
CHANNEL_ID.to_string(),
|
||||||
|
thumb,
|
||||||
|
duration,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Rating from likes / (likes + dislikes)
|
||||||
|
let total = item.like_number + item.dislike_number;
|
||||||
|
if total > 0 {
|
||||||
|
video.rating = Some((item.like_number as f32 / total as f32) * 100.0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Uploader
|
||||||
|
if let Some(name) = item.name.filter(|n| !n.is_empty()) {
|
||||||
|
video.uploader = Some(name.clone());
|
||||||
|
if let Some(profile_url) = item
|
||||||
|
.profile_url
|
||||||
|
.map(|u| u.replace("\\/", "/"))
|
||||||
|
.filter(|u| !u.is_empty())
|
||||||
|
{
|
||||||
|
video.uploaderUrl = Some(profile_url.clone());
|
||||||
|
// Build namespaced uploader ID from entity_type + entity_id
|
||||||
|
if let (Some(et), Some(eid)) = (item.entity_type.as_deref(), item.entity_id) {
|
||||||
|
let kind = match et {
|
||||||
|
"Mpp" | "Model" => "model",
|
||||||
|
"Channel" => "channels",
|
||||||
|
"Pornstar" => "pornstar",
|
||||||
|
_ => "model",
|
||||||
|
};
|
||||||
|
video.uploaderId = Some(format!("{CHANNEL_ID}:{kind}:{eid}"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Tags from pillsData
|
||||||
|
let tags: Vec<String> = item.pills_data.into_iter().map(|p| p.name).collect();
|
||||||
|
if !tags.is_empty() {
|
||||||
|
video.tags = Some(tags);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verified badge
|
||||||
|
video.verified = item.badges.map(|b| b.verified).filter(|&v| v);
|
||||||
|
|
||||||
|
Some(video)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
|
||||||
|
Channel {
|
||||||
|
id: CHANNEL_ID.to_string(),
|
||||||
|
name: "PH Shorties".to_string(),
|
||||||
|
description: "Pornhub Shorties — vertical short-form porn clips.".to_string(),
|
||||||
|
premium: false,
|
||||||
|
favicon: "https://www.google.com/s2/favicons?sz=64&domain=pornhub.com".to_string(),
|
||||||
|
status: "active".to_string(),
|
||||||
|
categories: vec![],
|
||||||
|
options: vec![ChannelOption {
|
||||||
|
id: "sort".to_string(),
|
||||||
|
title: "Sort".to_string(),
|
||||||
|
description: "Browse Pornhub Shorties by sort order.".to_string(),
|
||||||
|
systemImage: "list.number".to_string(),
|
||||||
|
colorName: "orange".to_string(),
|
||||||
|
options: vec![
|
||||||
|
FilterOption {
|
||||||
|
id: "new".to_string(),
|
||||||
|
title: "New".to_string(),
|
||||||
|
},
|
||||||
|
FilterOption {
|
||||||
|
id: "trending".to_string(),
|
||||||
|
title: "Trending".to_string(),
|
||||||
|
},
|
||||||
|
FilterOption {
|
||||||
|
id: "mostviewed".to_string(),
|
||||||
|
title: "Most Viewed".to_string(),
|
||||||
|
},
|
||||||
|
FilterOption {
|
||||||
|
id: "top_rated".to_string(),
|
||||||
|
title: "Top Rated".to_string(),
|
||||||
|
},
|
||||||
|
FilterOption {
|
||||||
|
id: "hottest".to_string(),
|
||||||
|
title: "Hottest".to_string(),
|
||||||
|
},
|
||||||
|
],
|
||||||
|
multiSelect: false,
|
||||||
|
}],
|
||||||
|
nsfw: true,
|
||||||
|
cacheDuration: Some(1800),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn fetch_videos(
|
||||||
|
&self,
|
||||||
|
cache: VideoCache,
|
||||||
|
page: u8,
|
||||||
|
sort: &str,
|
||||||
|
query: Option<&str>,
|
||||||
|
options: ServerOptions,
|
||||||
|
) -> Result<Vec<VideoItem>> {
|
||||||
|
let target = Self::build_target(sort, query);
|
||||||
|
let fetch_url = Self::build_url(&self.url, &target, page);
|
||||||
|
|
||||||
|
// Cache hit
|
||||||
|
let old_items = match cache.get(&fetch_url) {
|
||||||
|
Some((time, items)) if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 => {
|
||||||
|
return Ok(items.clone());
|
||||||
|
}
|
||||||
|
Some((_, items)) => items.clone(),
|
||||||
|
None => vec![],
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut requester = requester_or_default(&options, CHANNEL_ID, "fetch_videos");
|
||||||
|
|
||||||
|
let text = match requester
|
||||||
|
.get_with_headers(
|
||||||
|
&fetch_url,
|
||||||
|
vec![
|
||||||
|
("Referer".to_string(), format!("{}/shorties", self.url)),
|
||||||
|
(
|
||||||
|
"Accept".to_string(),
|
||||||
|
"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
|
||||||
|
.to_string(),
|
||||||
|
),
|
||||||
|
("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()),
|
||||||
|
],
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
Ok(text) => text,
|
||||||
|
Err(error) => {
|
||||||
|
report_provider_error(
|
||||||
|
CHANNEL_ID,
|
||||||
|
"fetch_videos.request",
|
||||||
|
&format!("url={fetch_url}; error={error}"),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
return Ok(old_items);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let shorties = match Self::extract_json_shorties(&text) {
|
||||||
|
Ok(items) => items,
|
||||||
|
Err(error) => {
|
||||||
|
report_provider_error(
|
||||||
|
CHANNEL_ID,
|
||||||
|
"fetch_videos.parse",
|
||||||
|
&format!("url={fetch_url}; error={error}"),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
return Ok(old_items);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let items: Vec<VideoItem> = shorties
|
||||||
|
.into_iter()
|
||||||
|
.filter_map(Self::item_to_video)
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
if items.is_empty() {
|
||||||
|
return Ok(old_items);
|
||||||
|
}
|
||||||
|
|
||||||
|
cache.remove(&fetch_url);
|
||||||
|
cache.insert(fetch_url, items.clone());
|
||||||
|
Ok(items)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl Provider for PornhubShortiesProvider {
|
||||||
|
async fn get_videos(
|
||||||
|
&self,
|
||||||
|
cache: VideoCache,
|
||||||
|
pool: DbPool,
|
||||||
|
sort: String,
|
||||||
|
query: Option<String>,
|
||||||
|
page: String,
|
||||||
|
per_page: String,
|
||||||
|
options: ServerOptions,
|
||||||
|
) -> Vec<VideoItem> {
|
||||||
|
let _ = pool;
|
||||||
|
let _ = per_page;
|
||||||
|
|
||||||
|
let page = page.parse::<u8>().unwrap_or(1);
|
||||||
|
|
||||||
|
match self
|
||||||
|
.fetch_videos(cache, page, &sort, query.as_deref(), options)
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
Ok(items) => items,
|
||||||
|
Err(error) => {
|
||||||
|
report_provider_error(CHANNEL_ID, "get_videos", &error.to_string()).await;
|
||||||
|
vec![]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
|
||||||
|
Some(self.build_channel(clientversion))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parses_json_shorties_from_html() {
|
||||||
|
let html = r#"
|
||||||
|
var JSON_SHORTIES = insertAfterNthPosition([
|
||||||
|
{
|
||||||
|
"videoId": 123456,
|
||||||
|
"vkey": "abcdef1234567",
|
||||||
|
"videoTitle": "Test Short Video",
|
||||||
|
"favoriteInfo": "1K",
|
||||||
|
"likeInfo": "2K",
|
||||||
|
"likeNumber": 2000,
|
||||||
|
"dislikeNumber": 100,
|
||||||
|
"isHD": true,
|
||||||
|
"linkUrl": "https:\/\/www.pornhub.com\/view_video.php?viewkey=abcdef1234567",
|
||||||
|
"shortieUrl": "https:\/\/www.pornhub.com\/shorties\/abcdef1234567",
|
||||||
|
"embedUrl": "",
|
||||||
|
"imageUrl": "https:\/\/example.com\/thumb.jpg",
|
||||||
|
"mediaPriority": "hls",
|
||||||
|
"mediaDefinitions": [],
|
||||||
|
"isFavorite": false,
|
||||||
|
"isReported": false,
|
||||||
|
"isSubscribed": false,
|
||||||
|
"userVoteDetail": -1,
|
||||||
|
"trackingTimeWatched": {"video_duration": 45},
|
||||||
|
"pillsData": [{"name": "Amateur", "slug": "amateur", "type": "category"}],
|
||||||
|
"badges": {"verified": true, "premium": false, "award": false},
|
||||||
|
"name": "TestUser",
|
||||||
|
"profileUrl": "https:\/\/www.pornhub.com\/model\/testuser",
|
||||||
|
"entityType": "Mpp",
|
||||||
|
"entityId": 9876543
|
||||||
|
}
|
||||||
|
], AD_POSITION, {});
|
||||||
|
"#;
|
||||||
|
|
||||||
|
let items = PornhubShortiesProvider::extract_json_shorties(html)
|
||||||
|
.expect("should parse JSON_SHORTIES");
|
||||||
|
assert_eq!(items.len(), 1);
|
||||||
|
assert_eq!(items[0].vkey, "abcdef1234567");
|
||||||
|
assert_eq!(items[0].video_title, "Test Short Video");
|
||||||
|
assert_eq!(
|
||||||
|
items[0].link_url,
|
||||||
|
"https://www.pornhub.com/view_video.php?viewkey=abcdef1234567"
|
||||||
|
);
|
||||||
|
assert_eq!(items[0].like_number, 2000);
|
||||||
|
assert_eq!(items[0].dislike_number, 100);
|
||||||
|
assert_eq!(
|
||||||
|
items[0]
|
||||||
|
.tracking_time_watched
|
||||||
|
.as_ref()
|
||||||
|
.map(|t| t.video_duration),
|
||||||
|
Some(45)
|
||||||
|
);
|
||||||
|
assert_eq!(items[0].pills_data[0].name, "Amateur");
|
||||||
|
assert_eq!(items[0].name.as_deref(), Some("TestUser"));
|
||||||
|
|
||||||
|
let video = PornhubShortiesProvider::item_to_video(items[0].clone())
|
||||||
|
.expect("should convert to VideoItem");
|
||||||
|
assert_eq!(video.id, "abcdef1234567");
|
||||||
|
assert_eq!(video.duration, 45);
|
||||||
|
assert!(video.rating.is_some());
|
||||||
|
assert!((video.rating.unwrap() - 95.23).abs() < 0.1);
|
||||||
|
assert_eq!(video.uploader.as_deref(), Some("TestUser"));
|
||||||
|
assert!(video.tags.as_ref().is_some_and(|t| t.contains(&"Amateur".to_string())));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn build_url_home_new() {
|
||||||
|
let url = PornhubShortiesProvider::build_url(
|
||||||
|
BASE_URL,
|
||||||
|
&Target::Home {
|
||||||
|
sort: "new".to_string(),
|
||||||
|
},
|
||||||
|
1,
|
||||||
|
);
|
||||||
|
assert_eq!(url, "https://www.pornhub.com/shorties");
|
||||||
|
|
||||||
|
let url2 = PornhubShortiesProvider::build_url(
|
||||||
|
BASE_URL,
|
||||||
|
&Target::Home {
|
||||||
|
sort: "new".to_string(),
|
||||||
|
},
|
||||||
|
2,
|
||||||
|
);
|
||||||
|
assert_eq!(url2, "https://www.pornhub.com/shorties?page=2");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn build_url_search() {
|
||||||
|
let url = PornhubShortiesProvider::build_url(
|
||||||
|
BASE_URL,
|
||||||
|
&Target::Search {
|
||||||
|
query: "teen".to_string(),
|
||||||
|
sort: "new".to_string(),
|
||||||
|
},
|
||||||
|
1,
|
||||||
|
);
|
||||||
|
assert_eq!(url, "https://www.pornhub.com/shorties?search=teen");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn normalize_sort_variants() {
|
||||||
|
assert_eq!(PornhubShortiesProvider::normalize_sort("new"), "new");
|
||||||
|
assert_eq!(PornhubShortiesProvider::normalize_sort("trending"), "trending");
|
||||||
|
assert_eq!(PornhubShortiesProvider::normalize_sort("popular"), "mostviewed");
|
||||||
|
assert_eq!(PornhubShortiesProvider::normalize_sort("unknown"), "new");
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user