camsoda and pornhub shorties

This commit is contained in:
Simon
2026-05-22 10:26:05 +00:00
committed by ForgeCode
parent e4941b1209
commit 95d2defa13
5 changed files with 1014 additions and 6 deletions

View File

@@ -36,6 +36,11 @@ const PROVIDERS: &[ProviderDef] = &[
module: "pornhub", module: "pornhub",
ty: "PornhubProvider", ty: "PornhubProvider",
}, },
ProviderDef {
id: "pornhub-shorties",
module: "pornhub_shorties",
ty: "PornhubShortiesProvider",
},
ProviderDef { ProviderDef {
id: "youporn", id: "youporn",
module: "youporn", module: "youporn",
@@ -341,6 +346,11 @@ const PROVIDERS: &[ProviderDef] = &[
module: "xvideos", module: "xvideos",
ty: "XvideosProvider", ty: "XvideosProvider",
}, },
ProviderDef {
id: "camsoda",
module: "camsoda",
ty: "CamsodaProvider",
},
]; ];
fn main() { fn main() {

View File

@@ -64,6 +64,23 @@ class Results:
_BROWSER_UA = "Mozilla/5.0 (X11; Linux x86_64; rv:146.0) Gecko/20100101 Firefox/146.0" _BROWSER_UA = "Mozilla/5.0 (X11; Linux x86_64; rv:146.0) Gecko/20100101 Firefox/146.0"
# Domains known to be Cloudflare-protected and return 403/connection-refused to direct
# HTTP checks. URL reachability failures for these hosts are downgraded to warnings.
_CF_PROTECTED_HOSTS = {
"www.camsoda.com",
"camsoda.com",
}
def _is_cf_protected(url: str) -> bool:
"""Return True if the URL's host is known to be CF-protected."""
try:
from urllib.parse import urlparse
host = urlparse(url).hostname or ""
return host in _CF_PROTECTED_HOSTS
except Exception:
return False
def http_ok(url: str, headers: dict | None = None) -> tuple[bool, int]: def http_ok(url: str, headers: dict | None = None) -> tuple[bool, int]:
"""Return (ok, http_status). Tries HEAD then ranged GET on 405.""" """Return (ok, http_status). Tries HEAD then ranged GET on 405."""
@@ -234,7 +251,10 @@ def check_video(video: dict, channel_id: str, results: Results, run_ytdlp: bool)
else: else:
ok, code = http_ok(vurl) ok, code = http_ok(vurl)
if not ok: if not ok:
results.err(channel_id, f"{label}: url unreachable HTTP={code}: {vurl}") if _is_cf_protected(vurl):
results.warn(channel_id, f"{label}: url unreachable HTTP={code} (CF-protected host, expected): {vurl}")
else:
results.err(channel_id, f"{label}: url unreachable HTTP={code}: {vurl}")
else: else:
results.info(channel_id, f"{label}: url OK (HTTP {code})") results.info(channel_id, f"{label}: url OK (HTTP {code})")
@@ -275,11 +295,18 @@ def check_video(video: dict, channel_id: str, results: Results, run_ytdlp: bool)
results.info(channel_id, f"{label}: yt-dlp extract {ytdlp_url}") results.info(channel_id, f"{label}: yt-dlp extract {ytdlp_url}")
yt, stderr = ytdlp_extract(ytdlp_url) yt, stderr = ytdlp_extract(ytdlp_url)
if yt is None: if yt is None:
results.err( if _is_cf_protected(ytdlp_url):
channel_id, results.warn(
f"{label}: yt-dlp failed for {ytdlp_url}" channel_id,
+ (f": {stderr[:300]}" if stderr else ""), f"{label}: yt-dlp failed for {ytdlp_url} (CF-protected host, expected)"
) + (f": {stderr[:200]}" if stderr else ""),
)
else:
results.err(
channel_id,
f"{label}: yt-dlp failed for {ytdlp_url}"
+ (f": {stderr[:300]}" if stderr else ""),
)
else: else:
yt_title = (yt.get("title") or "").strip() yt_title = (yt.get("title") or "").strip()
api_title = (video.get("title") or "").strip() api_title = (video.get("title") or "").strip()

View File

@@ -40,6 +40,7 @@ This is the current implementation inventory as of this snapshot of the repo. Us
| `pornhat` | `mainstream-tube` | no | no | Basic tube provider. | | `pornhat` | `mainstream-tube` | no | no | Basic tube provider. |
| `pornhd3x` | `studio-network` | no | yes | Best template for complex catalogs and redirect proxy generation. | | `pornhd3x` | `studio-network` | no | yes | Best template for complex catalogs and redirect proxy generation. |
| `pornhub` | `mainstream-tube` | no | no | Rich metadata and format examples. | | `pornhub` | `mainstream-tube` | no | no | Rich metadata and format examples. |
| `pornhub-shorties` | `tiktok` | no | no | Pornhub Shorties vertical short-form clips; parses `JSON_SHORTIES` JS variable embedded in HTML; fields: vkey, title, linkUrl, imageUrl, likeNumber, dislikeNumber, name/profileUrl (uploader), pillsData (tags), trackingTimeWatched.video_duration; pagination via `?page=N`; search via `?search=query`; sort via `?sort=trending\|mostviewed\|top_rated\|hottest`; phncdn thumbnails require `Referer: https://www.pornhub.com/` (served via cdnReferrers in /api/status); yt-dlp resolves `video.url` natively (PornHub extractor); no proxy needed. |
| `pornmz` | `mainstream-tube` | no | no | Mainstream archive. | | `pornmz` | `mainstream-tube` | no | no | Mainstream archive. |
| `pornzog` | `mainstream-tube` | no | no | Basic list/detail scraper. | | `pornzog` | `mainstream-tube` | no | no | Basic list/detail scraper. |
| `porntrex` | `mainstream-tube` | no | no | KVS-style HTML archive with direct MP4 formats and tag-aware search shortcuts. | | `porntrex` | `mainstream-tube` | no | no | KVS-style HTML archive with direct MP4 formats and tag-aware search shortcuts. |
@@ -70,6 +71,7 @@ This is the current implementation inventory as of this snapshot of the repo. Us
| `eporner` | `mainstream-tube` | no | no | HTML scraper for eporner.com (5M+ videos); card selector `div.mb[data-id]` with inline duration/rating/views/uploader; thumbnails at `static-eu-cdn.eporner.com` (no proxy needed); pagination uses `/{N}/` suffix (page 1 = no suffix, page 2 = `/2/`); search queries map to `/tag/{slug}/` (eporner redirects all keyword searches to tag pages — 404 tag pages still return related content); supports sort: new/popular/rated/best; 65 hardcoded categories via `cat:`, `tag:`, `pornstar:`, `uploader:` query shortcuts; background-loads pornstar name→URL map from `/pornstar-list/`; yt-dlp resolves `video.url` natively (Eporner extractor); no proxy needed. | | `eporner` | `mainstream-tube` | no | no | HTML scraper for eporner.com (5M+ videos); card selector `div.mb[data-id]` with inline duration/rating/views/uploader; thumbnails at `static-eu-cdn.eporner.com` (no proxy needed); pagination uses `/{N}/` suffix (page 1 = no suffix, page 2 = `/2/`); search queries map to `/tag/{slug}/` (eporner redirects all keyword searches to tag pages — 404 tag pages still return related content); supports sort: new/popular/rated/best; 65 hardcoded categories via `cat:`, `tag:`, `pornstar:`, `uploader:` query shortcuts; background-loads pornstar name→URL map from `/pornstar-list/`; yt-dlp resolves `video.url` natively (Eporner extractor); no proxy needed. |
| `xnxx` | `mainstream-tube` | no | no | HTML scraper for xnxx.com (10M+ videos); unified card parser handles two formats: `div.thumb-block[data-eid]` (search) and `div.thumb-block.video[data-video='{"id":...}']` (hits); eid extracted from `/video-{eid}/{slug}` URL path; thumbnails at `thumb-cdn77.xnxx-cdn.com` and `thumbs-gcore.xnxx-cdn.com` (no proxy, no Referer needed); 0-indexed pagination (page 1 = `/hits`, page N = `/hits/{N-1}`); default feed is `/hits` (most-viewed — xnxx has no chronological listing); search via `/search/{slug}` (works for keywords and tags); supports `tag:`, `cat:`, `category:` query shortcuts; yt-dlp resolves `video.url` natively (XNXX extractor, returns 4-7 HLS formats); no proxy needed. | | `xnxx` | `mainstream-tube` | no | no | HTML scraper for xnxx.com (10M+ videos); unified card parser handles two formats: `div.thumb-block[data-eid]` (search) and `div.thumb-block.video[data-video='{"id":...}']` (hits); eid extracted from `/video-{eid}/{slug}` URL path; thumbnails at `thumb-cdn77.xnxx-cdn.com` and `thumbs-gcore.xnxx-cdn.com` (no proxy, no Referer needed); 0-indexed pagination (page 1 = `/hits`, page N = `/hits/{N-1}`); default feed is `/hits` (most-viewed — xnxx has no chronological listing); search via `/search/{slug}` (works for keywords and tags); supports `tag:`, `cat:`, `category:` query shortcuts; yt-dlp resolves `video.url` natively (XNXX extractor, returns 4-7 HLS formats); no proxy needed. |
| `xhamster` | `mainstream-tube` | no | no | HTML scraper for xhamster.com; card selector `div[data-video-type="video"]` with `data-video-id`; thumbnails via `img[data-role="thumb-preview-img"]` at `ic-vt-nss.xhcdn.com` (no proxy, no Referer needed); pagination via `?page=N` query param (browse feeds use infinite-scroll so only search reliably returns different content per page); feeds: `/newest` (default), `/most-viewed`, `/best`; categories via `/categories/{slug}`; channels via `/channels/{slug}`; 43 hardcoded categories as `categories` option; uploader type inferred from URL path (`/channels/` → channel, `/creators/` → creator, `/pornstars/` → pornstar); supports `cat:`/`category:` and `channel:` query shortcuts, plus static category name matching; preview mp4 clips from `data-previewvideo` attribute; yt-dlp resolves `video.url` natively (xHamster extractor, 28 formats); no proxy needed. | | `xhamster` | `mainstream-tube` | no | no | HTML scraper for xhamster.com; card selector `div[data-video-type="video"]` with `data-video-id`; thumbnails via `img[data-role="thumb-preview-img"]` at `ic-vt-nss.xhcdn.com` (no proxy, no Referer needed); pagination via `?page=N` query param (browse feeds use infinite-scroll so only search reliably returns different content per page); feeds: `/newest` (default), `/most-viewed`, `/best`; categories via `/categories/{slug}`; channels via `/channels/{slug}`; 43 hardcoded categories as `categories` option; uploader type inferred from URL path (`/channels/` → channel, `/creators/` → creator, `/pornstars/` → pornstar); supports `cat:`/`category:` and `channel:` query shortcuts, plus static category name matching; preview mp4 clips from `data-previewvideo` attribute; yt-dlp resolves `video.url` natively (xHamster extractor, 28 formats); no proxy needed. |
| `camsoda` | `live-cams` | no | no | HTML scraper for camsoda.com `/media` listing; CF-protected so relies on Jina HTML fallback (requester sends `X-Return-Format: html`); parses anchor tags with `[class*="media-item-module__title"]` / `[class*="media-item-module__subtitle"]` CSS selectors; video-specific thumbnails from `media-secure.camsoda.com/user/videos/{id}/`; video URLs are page URLs (`/{username}/media/{slug}/{id}`) — CF blocks direct access and yt-dlp; supports `uploader:`/`model:` query prefix to browse a model's media page; no search API — keyword queries fall through to client-side filtering; pagination via `?page=N` but CamSoda serves the same 60 items regardless of page (JS-driven infinite scroll). |
| `xvideos` | `mainstream-tube` | no | no | HTML scraper for xvideos.com; handles two card formats: homepage (`div.thumb-block[data-id][data-eid]`) uses `p.title a[title]` + `data-pvv` on img, best-of-month page uses `div.thumb-block.video[data-video=JSON]` with `div.title a` text + `previewVideo` JSON key; thumbnails at `thumb-cdn77.xvideos-cdn.com` / `thumbs-gcore.xvideos-cdn.com` (no proxy needed); latest: `/` (page 1) / `/new/{N-1}` (page N≥2); best-of-month: `/best/{YYYY-MM}` (previous calendar month), page N: `/best/{YYYY-MM}/{N-1}`; search: `/?k={query}` / `/?k={query}&p={N-1}` (0-indexed); tag shortcuts: `/tags/{slug}/{N-1}`; category shortcuts: `/c/{Name}-{ID}/{N-1}` (38 hardcoded categories); `cat:`, `tag:`, `uploader:` query prefix routing; yt-dlp resolves `video.url` natively (XVideos extractor → HLS formats); CDN preview mp4 in `preview` field; no proxy needed. | | `xvideos` | `mainstream-tube` | no | no | HTML scraper for xvideos.com; handles two card formats: homepage (`div.thumb-block[data-id][data-eid]`) uses `p.title a[title]` + `data-pvv` on img, best-of-month page uses `div.thumb-block.video[data-video=JSON]` with `div.title a` text + `previewVideo` JSON key; thumbnails at `thumb-cdn77.xvideos-cdn.com` / `thumbs-gcore.xvideos-cdn.com` (no proxy needed); latest: `/` (page 1) / `/new/{N-1}` (page N≥2); best-of-month: `/best/{YYYY-MM}` (previous calendar month), page N: `/best/{YYYY-MM}/{N-1}`; search: `/?k={query}` / `/?k={query}&p={N-1}` (0-indexed); tag shortcuts: `/tags/{slug}/{N-1}`; category shortcuts: `/c/{Name}-{ID}/{N-1}` (38 hardcoded categories); `cat:`, `tag:`, `uploader:` query prefix routing; yt-dlp resolves `video.url` natively (XVideos extractor → HLS formats); CDN preview mp4 in `preview` field; no proxy needed. |
## Proxy Routes ## Proxy Routes

403
src/providers/camsoda.rs Normal file
View File

@@ -0,0 +1,403 @@
use crate::DbPool;
use crate::api::ClientVersion;
use crate::providers::{
Provider, report_provider_error, requester_or_default,
};
use crate::status::*;
use crate::util::cache::VideoCache;
use crate::util::time::parse_time_to_seconds;
use crate::videos::{ServerOptions, VideoItem};
use async_trait::async_trait;
use error_chain::error_chain;
use scraper::{Html, Selector};
use std::collections::HashSet;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "live-cams",
tags: &["cams", "amateur", "recordings", "clips"],
};
const BASE_URL: &str = "https://www.camsoda.com";
const CHANNEL_ID: &str = "camsoda";
error_chain! {
foreign_links {
Io(std::io::Error);
}
errors {
Parse(msg: String) {
description("parse error")
display("parse error: {}", msg)
}
}
}
#[derive(Debug, Clone)]
pub struct CamsodaProvider {
url: String,
}
#[derive(Debug, Clone)]
enum Target {
/// Default listing at /media?page=N
Listing,
/// Model media page at /{username}/media
Model { username: String },
}
impl CamsodaProvider {
pub fn new() -> Self {
Self {
url: BASE_URL.to_string(),
}
}
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
Channel {
id: CHANNEL_ID.to_string(),
name: "CamSoda".to_string(),
description:
"CamSoda model video clips — recorded amateur cam shows uploaded by performers."
.to_string(),
premium: false,
favicon: "https://www.google.com/s2/favicons?sz=64&domain=camsoda.com".to_string(),
status: "active".to_string(),
categories: vec![],
options: vec![],
nsfw: true,
cacheDuration: Some(1800),
}
}
/// Resolve the fetch target from query and options.
fn pick_target(query: Option<&str>) -> Target {
let Some(query) = query.map(str::trim).filter(|v| !v.is_empty()) else {
return Target::Listing;
};
// Support "uploader:username" or "model:username" shortcuts.
for prefix in &["uploader:", "model:", "user:"] {
if let Some(username) = query.strip_prefix(prefix) {
let username = username.trim().to_lowercase();
if !username.is_empty() {
return Target::Model { username };
}
}
}
// For other queries fall back to the default listing;
// the server will apply client-side substring filtering.
Target::Listing
}
fn build_listing_url(&self, target: &Target, page: u16) -> String {
let page = page.max(1);
match target {
Target::Listing => format!("{}/media?page={}", self.url, page),
Target::Model { username } => {
if page <= 1 {
format!("{}/{}/media", self.url, username)
} else {
format!("{}/{}/media?page={}", self.url, username, page)
}
}
}
}
/// Parse video cards from the HTML of a CamSoda media page.
///
/// The page contains anchor elements linking to individual video pages:
/// href="/{username}/media/{slug}/{id}"
///
/// Inside each anchor:
/// - `[class*="media-item-module__title"]` span: the video title
/// - `[class*="media-item-module__subtitle"]` span: "by UPLOADER (MM:SS)"
/// - `img[src*="media-secure.camsoda.com"]`: video-specific thumbnail
fn parse_html_items(html: &str) -> Vec<VideoItem> {
let document = Html::parse_document(html);
// Select all anchors linking to /{username}/media/{slug}/{id}
let anchor_sel = match Selector::parse(r#"a[href]"#) {
Ok(s) => s,
Err(_) => return vec![],
};
let title_sel = match Selector::parse(r#"[class*="media-item-module__title"]"#) {
Ok(s) => s,
Err(_) => return vec![],
};
let subtitle_sel = match Selector::parse(r#"[class*="media-item-module__subtitle"]"#) {
Ok(s) => s,
Err(_) => return vec![],
};
let img_sel = match Selector::parse(r#"img[src]"#) {
Ok(s) => s,
Err(_) => return vec![],
};
// Regex for parsing subtitle "by UPLOADER (MM:SS)"
let sub_re = match regex::Regex::new(r"(?i)^by\s+(.+?)\s+\((\d{1,2}:\d{2}(?::\d{2})?)\)\s*$") {
Ok(r) => r,
Err(_) => return vec![],
};
// Regex for media URL: /{username}/media/{slug}/{id}
let href_re = match regex::Regex::new(r"^/([^/]+)/media/([^/]+)/(\d+)$") {
Ok(r) => r,
Err(_) => return vec![],
};
let mut items: Vec<VideoItem> = Vec::new();
let mut seen_ids: HashSet<String> = HashSet::new();
for anchor in document.select(&anchor_sel) {
let href = match anchor.value().attr("href") {
Some(h) => h,
None => continue,
};
let caps = match href_re.captures(href) {
Some(c) => c,
None => continue,
};
let username = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string();
let slug = caps.get(2).map(|m| m.as_str()).unwrap_or("").to_string();
let video_id = caps.get(3).map(|m| m.as_str()).unwrap_or("").to_string();
if video_id.is_empty() || username.is_empty() {
continue;
}
if !seen_ids.insert(video_id.clone()) {
continue;
}
// Title
let title = anchor
.select(&title_sel)
.next()
.map(|el| el.text().collect::<String>().trim().to_string())
.unwrap_or_default();
let title = if title.is_empty() {
format!("CamSoda video {video_id}")
} else {
title
};
// Subtitle: "by UPLOADER (MM:SS)"
let subtitle = anchor
.select(&subtitle_sel)
.next()
.map(|el| el.text().collect::<String>().trim().to_string())
.unwrap_or_default();
let (uploader, duration) = if let Some(sc) = sub_re.captures(&subtitle) {
let u = sc.get(1).map(|m| m.as_str().trim().to_string()).unwrap_or_default();
let d = sc.get(2)
.and_then(|m| parse_time_to_seconds(m.as_str()))
.and_then(|s| u32::try_from(s).ok())
.unwrap_or(0);
(if u.is_empty() { None } else { Some(u) }, d)
} else {
(None, 0)
};
// Thumbnail — prefer video-specific from media-secure.camsoda.com
let thumb = anchor
.select(&img_sel)
.filter_map(|img| img.value().attr("src"))
.find(|src| src.contains("media-secure.camsoda.com"))
.or_else(|| {
anchor
.select(&img_sel)
.filter_map(|img| img.value().attr("src"))
.find(|src| src.contains("livemediahost.com"))
})
.unwrap_or("")
.to_string();
let page_url = format!("{BASE_URL}/{username}/media/{slug}/{video_id}");
let mut item = VideoItem::new(
video_id,
title,
page_url,
CHANNEL_ID.to_string(),
thumb,
duration,
);
item.uploader = uploader;
item.uploaderUrl = Some(format!("{BASE_URL}/{username}/media"));
item.uploaderId = Some(format!("{CHANNEL_ID}:{username}"));
items.push(item);
}
items
}
async fn fetch_items(
&self,
target: &Target,
page: u16,
options: &ServerOptions,
) -> Result<Vec<VideoItem>> {
let url = self.build_listing_url(target, page);
let mut requester = requester_or_default(options, CHANNEL_ID, "fetch_items");
let text = requester
.get(&url, None)
.await
.map_err(|e| Error::from(format!("fetch failed for {url}: {e}")))?;
// Guard against CF challenge pages slipping through
if text.contains("cf-browser-verification")
|| text.contains("cf-chl")
|| text.contains("Just a moment")
{
return Err(Error::from("cloudflare challenge page returned".to_string()));
}
let items = Self::parse_html_items(&text);
Ok(items)
}
}
#[async_trait]
impl Provider for CamsodaProvider {
async fn get_videos(
&self,
cache: VideoCache,
pool: DbPool,
sort: String,
query: Option<String>,
page: String,
per_page: String,
options: ServerOptions,
) -> Vec<VideoItem> {
let _ = cache;
let _ = pool;
let _ = sort;
let _ = per_page;
let page = page.parse::<u16>().unwrap_or(1).max(1);
let normalized_query = query
.as_deref()
.map(str::trim)
.filter(|v| !v.is_empty())
.map(ToOwned::to_owned);
let target = Self::pick_target(normalized_query.as_deref());
match self.fetch_items(&target, page, &options).await {
Ok(items) => items,
Err(error) => {
report_provider_error(CHANNEL_ID, "get_videos", &error.to_string()).await;
vec![]
}
}
}
fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
Some(self.build_channel(clientversion))
}
}
#[cfg(test)]
mod tests {
use super::*;
fn sample_html() -> String {
// Simplified version of the HTML returned by Jina (X-Return-Format: html)
r#"<!DOCTYPE html><html><body>
<a href="/lil-asian-jaz/media/torso-ride-begging-for-your-cum/16984249">
<span><span class="media-item-module__title--lrB0Q">Torso ride begging for your cum</span><span class="media-item-module__subtitle--VRwsa">by jazzyj (24:35)</span></span>
<div><img alt="thumb" src="https://media-secure.camsoda.com/user/videos/16984249/16984249.thumb.1777857871.webp" width="320" height="240"></div>
</a>
<a href="/coco-dethick/media/first-ir-bg-show-ft-johnny-love/17009049">
<span><span class="media-item-module__new--uXKvP">new</span><span class="media-item-module__title--lrB0Q">FIRST IR BG SHOW FT JOHNNY LOVE</span><span class="media-item-module__subtitle--VRwsa">by Coco Dethick (44:14)</span></span>
<div><img alt="thumb" src="https://media-secure.camsoda.com/user/videos/17009049/17009049_1778619855.thumb.webp" width="320" height="240"></div>
</a>
<a href="/hot-wife-mia/media/ultimate-squirting-video/17112135">
<span><span class="media-item-module__title--lrB0Q">Ultimate squirting video!</span><span class="media-item-module__subtitle--VRwsa">by Hot Wife Mia (02:47)</span></span>
<div><img alt="thumb" src="https://media-secure.camsoda.com/user/videos/17112135/17112135.thumb.1779200000.webp" width="320" height="240"></div>
</a>
</body></html>"#.to_string()
}
#[test]
fn parses_video_cards_from_html() {
let items = CamsodaProvider::parse_html_items(&sample_html());
assert_eq!(items.len(), 3, "expected 3 items, got {}: {:?}", items.len(), items.iter().map(|i| &i.id).collect::<Vec<_>>());
let item = &items[0];
assert_eq!(item.id, "16984249");
assert_eq!(item.title, "Torso ride begging for your cum");
assert_eq!(item.uploader.as_deref(), Some("jazzyj"));
assert_eq!(item.duration, 24 * 60 + 35);
assert!(item.url.contains("16984249"), "url should contain id: {}", item.url);
assert!(item.thumb.contains("media-secure.camsoda.com"), "thumb: {}", item.thumb);
assert_eq!(item.uploaderUrl.as_deref(), Some("https://www.camsoda.com/lil-asian-jaz/media"));
assert_eq!(item.uploaderId.as_deref(), Some("camsoda:lil-asian-jaz"));
let item2 = &items[1];
assert_eq!(item2.id, "17009049");
assert_eq!(item2.uploader.as_deref(), Some("Coco Dethick"));
let item3 = &items[2];
assert_eq!(item3.id, "17112135");
assert_eq!(item3.duration, 2 * 60 + 47);
}
#[test]
fn deduplicates_items() {
// Same video appears twice
let html = sample_html();
let doubled = format!("{html}\n{html}");
let items = CamsodaProvider::parse_html_items(&doubled);
assert_eq!(items.len(), 3, "should deduplicate to 3 unique items");
}
#[test]
fn picks_target_correctly() {
assert!(matches!(CamsodaProvider::pick_target(None), Target::Listing));
assert!(matches!(CamsodaProvider::pick_target(Some("")), Target::Listing));
assert!(matches!(CamsodaProvider::pick_target(Some("blowjob")), Target::Listing));
match CamsodaProvider::pick_target(Some("uploader:lil-asian-jaz")) {
Target::Model { username } => assert_eq!(username, "lil-asian-jaz"),
_ => panic!("expected Model target"),
}
match CamsodaProvider::pick_target(Some("model:katt-leya")) {
Target::Model { username } => assert_eq!(username, "katt-leya"),
_ => panic!("expected Model target"),
}
}
#[test]
fn builds_listing_urls_correctly() {
let provider = CamsodaProvider::new();
assert_eq!(
provider.build_listing_url(&Target::Listing, 1),
"https://www.camsoda.com/media?page=1"
);
assert_eq!(
provider.build_listing_url(&Target::Listing, 3),
"https://www.camsoda.com/media?page=3"
);
assert_eq!(
provider.build_listing_url(
&Target::Model { username: "lil-asian-jaz".to_string() },
1
),
"https://www.camsoda.com/lil-asian-jaz/media"
);
assert_eq!(
provider.build_listing_url(
&Target::Model { username: "lil-asian-jaz".to_string() },
2
),
"https://www.camsoda.com/lil-asian-jaz/media?page=2"
);
}
}

View File

@@ -0,0 +1,566 @@
use crate::DbPool;
use crate::api::ClientVersion;
use crate::providers::{Provider, report_provider_error, requester_or_default};
use crate::status::*;
use crate::util::cache::VideoCache;
use crate::videos::{ServerOptions, VideoItem};
use async_trait::async_trait;
use error_chain::error_chain;
use serde::Deserialize;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "tiktok",
tags: &["shorts", "pornhub", "vertical"],
};
const BASE_URL: &str = "https://www.pornhub.com";
const CHANNEL_ID: &str = "pornhub-shorties";
error_chain! {
foreign_links {
Io(std::io::Error);
HttpRequest(wreq::Error);
Json(serde_json::Error);
}
errors {
Parse(msg: String) {
description("parse error")
display("parse error: {}", msg)
}
}
}
#[derive(Debug, Clone)]
pub struct PornhubShortiesProvider {
url: String,
}
#[derive(Debug, Clone)]
enum Target {
Home { sort: String },
Search { query: String, sort: String },
}
/// Deserialized structure of each entry in the `JSON_SHORTIES` JS variable.
#[derive(Debug, Deserialize, Clone)]
struct ShortieItem {
#[serde(rename = "vkey")]
vkey: String,
#[serde(rename = "videoTitle")]
video_title: String,
#[serde(rename = "linkUrl")]
link_url: String,
#[serde(rename = "imageUrl")]
image_url: Option<String>,
#[serde(rename = "likeNumber", default)]
like_number: u64,
#[serde(rename = "dislikeNumber", default)]
dislike_number: u64,
#[serde(rename = "name")]
name: Option<String>,
#[serde(rename = "profileUrl")]
profile_url: Option<String>,
#[serde(rename = "entityId")]
entity_id: Option<u64>,
#[serde(rename = "entityType")]
entity_type: Option<String>,
#[serde(rename = "trackingTimeWatched")]
tracking_time_watched: Option<TrackingTimeWatched>,
#[serde(rename = "pillsData", default)]
pills_data: Vec<PillData>,
#[serde(rename = "badges")]
badges: Option<Badges>,
}
#[derive(Debug, Deserialize, Clone)]
struct TrackingTimeWatched {
#[serde(rename = "video_duration", default)]
video_duration: u32,
}
#[derive(Debug, Deserialize, Clone)]
struct PillData {
#[serde(rename = "name")]
name: String,
}
#[derive(Debug, Deserialize, Clone)]
struct Badges {
#[serde(rename = "verified", default)]
verified: bool,
}
impl PornhubShortiesProvider {
pub fn new() -> Self {
Self {
url: BASE_URL.to_string(),
}
}
fn build_target(sort: &str, query: Option<&str>) -> Target {
let sort = Self::normalize_sort(sort).to_string();
match query.map(str::trim).filter(|q| !q.is_empty()) {
Some(q) => Target::Search {
query: q.to_string(),
sort,
},
None => Target::Home { sort },
}
}
fn normalize_sort(sort: &str) -> &'static str {
match sort.trim().to_ascii_lowercase().as_str() {
"trending" => "trending",
"popular" | "mv" | "mostviewed" => "mostviewed",
"top_rated" | "tr" | "toprated" => "top_rated",
"hottest" | "ht" => "hottest",
_ => "new",
}
}
fn build_url(base: &str, target: &Target, page: u8) -> String {
match target {
Target::Home { sort } => {
if sort == "new" {
if page <= 1 {
format!("{base}/shorties")
} else {
format!("{base}/shorties?page={page}")
}
} else {
if page <= 1 {
format!("{base}/shorties?sort={sort}")
} else {
format!("{base}/shorties?sort={sort}&page={page}")
}
}
}
Target::Search { query, sort } => {
let encoded = query.replace(' ', "+");
if sort == "new" {
if page <= 1 {
format!("{base}/shorties?search={encoded}")
} else {
format!("{base}/shorties?search={encoded}&page={page}")
}
} else {
if page <= 1 {
format!("{base}/shorties?search={encoded}&sort={sort}")
} else {
format!("{base}/shorties?search={encoded}&sort={sort}&page={page}")
}
}
}
}
}
fn extract_json_shorties(html: &str) -> Result<Vec<ShortieItem>> {
// The page embeds: JSON_SHORTIES = insertAfterNthPosition([{...}, ...], AD_POSITION, ...)
// We locate the array by finding the '[' after JSON_SHORTIES and matching brackets.
let marker = "JSON_SHORTIES";
let start = html.find(marker).ok_or_else(|| {
Error::from(ErrorKind::Parse(
"JSON_SHORTIES marker not found in HTML".to_string(),
))
})?;
let arr_start = html[start..].find('[').ok_or_else(|| {
Error::from(ErrorKind::Parse(
"JSON_SHORTIES array open bracket not found".to_string(),
))
})? + start;
// Walk the HTML to find the matching closing bracket.
let bytes = html.as_bytes();
let mut depth: i32 = 0;
let mut in_string = false;
let mut escape_next = false;
let mut arr_end = arr_start;
for (offset, &b) in bytes[arr_start..].iter().enumerate() {
if escape_next {
escape_next = false;
continue;
}
if b == b'\\' && in_string {
escape_next = true;
continue;
}
if b == b'"' {
in_string = !in_string;
continue;
}
if in_string {
continue;
}
match b {
b'[' => depth += 1,
b']' => {
depth -= 1;
if depth == 0 {
arr_end = arr_start + offset;
break;
}
}
_ => {}
}
}
if depth != 0 {
return Err(
ErrorKind::Parse("JSON_SHORTIES array bracket mismatch".to_string()).into(),
);
}
let raw = &html[arr_start..=arr_end];
let items: Vec<ShortieItem> = serde_json::from_str(raw).map_err(|e| {
Error::from(ErrorKind::Parse(format!("JSON_SHORTIES parse error: {e}")))
})?;
Ok(items)
}
fn item_to_video(item: ShortieItem) -> Option<VideoItem> {
let id = item.vkey;
if id.is_empty() {
return None;
}
let title = item.video_title.trim().to_string();
if title.is_empty() {
return None;
}
let url = item.link_url.trim().to_string();
if url.is_empty() || !url.contains("/view_video.php") {
return None;
}
let thumb = item
.image_url
.unwrap_or_default()
.replace("\\/", "/")
.trim()
.to_string();
let duration = item
.tracking_time_watched
.map(|t| t.video_duration)
.unwrap_or(0);
let mut video = VideoItem::new(
id,
title,
url,
CHANNEL_ID.to_string(),
thumb,
duration,
);
// Rating from likes / (likes + dislikes)
let total = item.like_number + item.dislike_number;
if total > 0 {
video.rating = Some((item.like_number as f32 / total as f32) * 100.0);
}
// Uploader
if let Some(name) = item.name.filter(|n| !n.is_empty()) {
video.uploader = Some(name.clone());
if let Some(profile_url) = item
.profile_url
.map(|u| u.replace("\\/", "/"))
.filter(|u| !u.is_empty())
{
video.uploaderUrl = Some(profile_url.clone());
// Build namespaced uploader ID from entity_type + entity_id
if let (Some(et), Some(eid)) = (item.entity_type.as_deref(), item.entity_id) {
let kind = match et {
"Mpp" | "Model" => "model",
"Channel" => "channels",
"Pornstar" => "pornstar",
_ => "model",
};
video.uploaderId = Some(format!("{CHANNEL_ID}:{kind}:{eid}"));
}
}
}
// Tags from pillsData
let tags: Vec<String> = item.pills_data.into_iter().map(|p| p.name).collect();
if !tags.is_empty() {
video.tags = Some(tags);
}
// Verified badge
video.verified = item.badges.map(|b| b.verified).filter(|&v| v);
Some(video)
}
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
Channel {
id: CHANNEL_ID.to_string(),
name: "PH Shorties".to_string(),
description: "Pornhub Shorties — vertical short-form porn clips.".to_string(),
premium: false,
favicon: "https://www.google.com/s2/favicons?sz=64&domain=pornhub.com".to_string(),
status: "active".to_string(),
categories: vec![],
options: vec![ChannelOption {
id: "sort".to_string(),
title: "Sort".to_string(),
description: "Browse Pornhub Shorties by sort order.".to_string(),
systemImage: "list.number".to_string(),
colorName: "orange".to_string(),
options: vec![
FilterOption {
id: "new".to_string(),
title: "New".to_string(),
},
FilterOption {
id: "trending".to_string(),
title: "Trending".to_string(),
},
FilterOption {
id: "mostviewed".to_string(),
title: "Most Viewed".to_string(),
},
FilterOption {
id: "top_rated".to_string(),
title: "Top Rated".to_string(),
},
FilterOption {
id: "hottest".to_string(),
title: "Hottest".to_string(),
},
],
multiSelect: false,
}],
nsfw: true,
cacheDuration: Some(1800),
}
}
async fn fetch_videos(
&self,
cache: VideoCache,
page: u8,
sort: &str,
query: Option<&str>,
options: ServerOptions,
) -> Result<Vec<VideoItem>> {
let target = Self::build_target(sort, query);
let fetch_url = Self::build_url(&self.url, &target, page);
// Cache hit
let old_items = match cache.get(&fetch_url) {
Some((time, items)) if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 => {
return Ok(items.clone());
}
Some((_, items)) => items.clone(),
None => vec![],
};
let mut requester = requester_or_default(&options, CHANNEL_ID, "fetch_videos");
let text = match requester
.get_with_headers(
&fetch_url,
vec![
("Referer".to_string(), format!("{}/shorties", self.url)),
(
"Accept".to_string(),
"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
.to_string(),
),
("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()),
],
None,
)
.await
{
Ok(text) => text,
Err(error) => {
report_provider_error(
CHANNEL_ID,
"fetch_videos.request",
&format!("url={fetch_url}; error={error}"),
)
.await;
return Ok(old_items);
}
};
let shorties = match Self::extract_json_shorties(&text) {
Ok(items) => items,
Err(error) => {
report_provider_error(
CHANNEL_ID,
"fetch_videos.parse",
&format!("url={fetch_url}; error={error}"),
)
.await;
return Ok(old_items);
}
};
let items: Vec<VideoItem> = shorties
.into_iter()
.filter_map(Self::item_to_video)
.collect();
if items.is_empty() {
return Ok(old_items);
}
cache.remove(&fetch_url);
cache.insert(fetch_url, items.clone());
Ok(items)
}
}
#[async_trait]
impl Provider for PornhubShortiesProvider {
async fn get_videos(
&self,
cache: VideoCache,
pool: DbPool,
sort: String,
query: Option<String>,
page: String,
per_page: String,
options: ServerOptions,
) -> Vec<VideoItem> {
let _ = pool;
let _ = per_page;
let page = page.parse::<u8>().unwrap_or(1);
match self
.fetch_videos(cache, page, &sort, query.as_deref(), options)
.await
{
Ok(items) => items,
Err(error) => {
report_provider_error(CHANNEL_ID, "get_videos", &error.to_string()).await;
vec![]
}
}
}
fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
Some(self.build_channel(clientversion))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parses_json_shorties_from_html() {
let html = r#"
var JSON_SHORTIES = insertAfterNthPosition([
{
"videoId": 123456,
"vkey": "abcdef1234567",
"videoTitle": "Test Short Video",
"favoriteInfo": "1K",
"likeInfo": "2K",
"likeNumber": 2000,
"dislikeNumber": 100,
"isHD": true,
"linkUrl": "https:\/\/www.pornhub.com\/view_video.php?viewkey=abcdef1234567",
"shortieUrl": "https:\/\/www.pornhub.com\/shorties\/abcdef1234567",
"embedUrl": "",
"imageUrl": "https:\/\/example.com\/thumb.jpg",
"mediaPriority": "hls",
"mediaDefinitions": [],
"isFavorite": false,
"isReported": false,
"isSubscribed": false,
"userVoteDetail": -1,
"trackingTimeWatched": {"video_duration": 45},
"pillsData": [{"name": "Amateur", "slug": "amateur", "type": "category"}],
"badges": {"verified": true, "premium": false, "award": false},
"name": "TestUser",
"profileUrl": "https:\/\/www.pornhub.com\/model\/testuser",
"entityType": "Mpp",
"entityId": 9876543
}
], AD_POSITION, {});
"#;
let items = PornhubShortiesProvider::extract_json_shorties(html)
.expect("should parse JSON_SHORTIES");
assert_eq!(items.len(), 1);
assert_eq!(items[0].vkey, "abcdef1234567");
assert_eq!(items[0].video_title, "Test Short Video");
assert_eq!(
items[0].link_url,
"https://www.pornhub.com/view_video.php?viewkey=abcdef1234567"
);
assert_eq!(items[0].like_number, 2000);
assert_eq!(items[0].dislike_number, 100);
assert_eq!(
items[0]
.tracking_time_watched
.as_ref()
.map(|t| t.video_duration),
Some(45)
);
assert_eq!(items[0].pills_data[0].name, "Amateur");
assert_eq!(items[0].name.as_deref(), Some("TestUser"));
let video = PornhubShortiesProvider::item_to_video(items[0].clone())
.expect("should convert to VideoItem");
assert_eq!(video.id, "abcdef1234567");
assert_eq!(video.duration, 45);
assert!(video.rating.is_some());
assert!((video.rating.unwrap() - 95.23).abs() < 0.1);
assert_eq!(video.uploader.as_deref(), Some("TestUser"));
assert!(video.tags.as_ref().is_some_and(|t| t.contains(&"Amateur".to_string())));
}
#[test]
fn build_url_home_new() {
let url = PornhubShortiesProvider::build_url(
BASE_URL,
&Target::Home {
sort: "new".to_string(),
},
1,
);
assert_eq!(url, "https://www.pornhub.com/shorties");
let url2 = PornhubShortiesProvider::build_url(
BASE_URL,
&Target::Home {
sort: "new".to_string(),
},
2,
);
assert_eq!(url2, "https://www.pornhub.com/shorties?page=2");
}
#[test]
fn build_url_search() {
let url = PornhubShortiesProvider::build_url(
BASE_URL,
&Target::Search {
query: "teen".to_string(),
sort: "new".to_string(),
},
1,
);
assert_eq!(url, "https://www.pornhub.com/shorties?search=teen");
}
#[test]
fn normalize_sort_variants() {
assert_eq!(PornhubShortiesProvider::normalize_sort("new"), "new");
assert_eq!(PornhubShortiesProvider::normalize_sort("trending"), "trending");
assert_eq!(PornhubShortiesProvider::normalize_sort("popular"), "mostviewed");
assert_eq!(PornhubShortiesProvider::normalize_sort("unknown"), "new");
}
}