camsoda and pornhub shorties

2026-05-22 10:26:05 +00:00
parent e4941b1209
commit 95d2defa13
5 changed files with 1014 additions and 6 deletions
--- a/build.rs
+++ b/build.rs
@@ -36,6 +36,11 @@ const PROVIDERS: &[ProviderDef] = &[
        module: "pornhub",
        ty: "PornhubProvider",
    },
    ProviderDef {
        id: "pornhub-shorties",
        module: "pornhub_shorties",
        ty: "PornhubShortiesProvider",
    },
    ProviderDef {
        id: "youporn",
        module: "youporn",
@@ -341,6 +346,11 @@ const PROVIDERS: &[ProviderDef] = &[
        module: "xvideos",
        ty: "XvideosProvider",
    },
    ProviderDef {
        id: "camsoda",
        module: "camsoda",
        ty: "CamsodaProvider",
    },
 ];
 fn main() {
--- a/check.py
+++ b/check.py
@@ -64,6 +64,23 @@ class Results:
 _BROWSER_UA = "Mozilla/5.0 (X11; Linux x86_64; rv:146.0) Gecko/20100101 Firefox/146.0"
 # Domains known to be Cloudflare-protected and return 403/connection-refused to direct
 # HTTP checks. URL reachability failures for these hosts are downgraded to warnings.
 _CF_PROTECTED_HOSTS = {
    "www.camsoda.com",
    "camsoda.com",
 }
 def _is_cf_protected(url: str) -> bool:
    """Return True if the URL's host is known to be CF-protected."""
    try:
        from urllib.parse import urlparse
        host = urlparse(url).hostname or ""
        return host in _CF_PROTECTED_HOSTS
    except Exception:
        return False
 def http_ok(url: str, headers: dict | None = None) -> tuple[bool, int]:
    """Return (ok, http_status). Tries HEAD then ranged GET on 405."""
@@ -234,7 +251,10 @@ def check_video(video: dict, channel_id: str, results: Results, run_ytdlp: bool)
    else:
        ok, code = http_ok(vurl)
        if not ok:
-            results.err(channel_id, f"{label}: url unreachable HTTP={code}: {vurl}")
+            if _is_cf_protected(vurl):
                results.warn(channel_id, f"{label}: url unreachable HTTP={code} (CF-protected host, expected): {vurl}")
            else:
                results.err(channel_id, f"{label}: url unreachable HTTP={code}: {vurl}")
        else:
            results.info(channel_id, f"{label}: url OK (HTTP {code})")
@@ -275,11 +295,18 @@ def check_video(video: dict, channel_id: str, results: Results, run_ytdlp: bool)
        results.info(channel_id, f"{label}: yt-dlp extract {ytdlp_url}")
        yt, stderr = ytdlp_extract(ytdlp_url)
        if yt is None:
-            results.err(
+            if _is_cf_protected(ytdlp_url):
-                channel_id,
+                results.warn(
-                f"{label}: yt-dlp failed for {ytdlp_url}"
+                    channel_id,
-                + (f": {stderr[:300]}" if stderr else ""),
+                    f"{label}: yt-dlp failed for {ytdlp_url} (CF-protected host, expected)"
-            )
+                    + (f": {stderr[:200]}" if stderr else ""),
                )
            else:
                results.err(
                    channel_id,
                    f"{label}: yt-dlp failed for {ytdlp_url}"
                    + (f": {stderr[:300]}" if stderr else ""),
                )
        else:
            yt_title = (yt.get("title") or "").strip()
            api_title = (video.get("title") or "").strip()
--- a/docs/provider-catalog.md
+++ b/docs/provider-catalog.md
@@ -40,6 +40,7 @@ This is the current implementation inventory as of this snapshot of the repo. Us
 | `pornhat` | `mainstream-tube` | no | no | Basic tube provider. |
 | `pornhd3x` | `studio-network` | no | yes | Best template for complex catalogs and redirect proxy generation. |
 | `pornhub` | `mainstream-tube` | no | no | Rich metadata and format examples. |
 | `pornhub-shorties` | `tiktok` | no | no | Pornhub Shorties vertical short-form clips; parses `JSON_SHORTIES` JS variable embedded in HTML; fields: vkey, title, linkUrl, imageUrl, likeNumber, dislikeNumber, name/profileUrl (uploader), pillsData (tags), trackingTimeWatched.video_duration; pagination via `?page=N`; search via `?search=query`; sort via `?sort=trending\|mostviewed\|top_rated\|hottest`; phncdn thumbnails require `Referer: https://www.pornhub.com/` (served via cdnReferrers in /api/status); yt-dlp resolves `video.url` natively (PornHub extractor); no proxy needed. |
 | `pornmz` | `mainstream-tube` | no | no | Mainstream archive. |
 | `pornzog` | `mainstream-tube` | no | no | Basic list/detail scraper. |
 | `porntrex` | `mainstream-tube` | no | no | KVS-style HTML archive with direct MP4 formats and tag-aware search shortcuts. |
@@ -70,6 +71,7 @@ This is the current implementation inventory as of this snapshot of the repo. Us
 | `eporner` | `mainstream-tube` | no | no | HTML scraper for eporner.com (5M+ videos); card selector `div.mb[data-id]` with inline duration/rating/views/uploader; thumbnails at `static-eu-cdn.eporner.com` (no proxy needed); pagination uses `/{N}/` suffix (page 1 = no suffix, page 2 = `/2/`); search queries map to `/tag/{slug}/` (eporner redirects all keyword searches to tag pages — 404 tag pages still return related content); supports sort: new/popular/rated/best; 65 hardcoded categories via `cat:`, `tag:`, `pornstar:`, `uploader:` query shortcuts; background-loads pornstar name→URL map from `/pornstar-list/`; yt-dlp resolves `video.url` natively (Eporner extractor); no proxy needed. |
 | `xnxx` | `mainstream-tube` | no | no | HTML scraper for xnxx.com (10M+ videos); unified card parser handles two formats: `div.thumb-block[data-eid]` (search) and `div.thumb-block.video[data-video='{"id":...}']` (hits); eid extracted from `/video-{eid}/{slug}` URL path; thumbnails at `thumb-cdn77.xnxx-cdn.com` and `thumbs-gcore.xnxx-cdn.com` (no proxy, no Referer needed); 0-indexed pagination (page 1 = `/hits`, page N = `/hits/{N-1}`); default feed is `/hits` (most-viewed — xnxx has no chronological listing); search via `/search/{slug}` (works for keywords and tags); supports `tag:`, `cat:`, `category:` query shortcuts; yt-dlp resolves `video.url` natively (XNXX extractor, returns 4-7 HLS formats); no proxy needed. |
 | `xhamster` | `mainstream-tube` | no | no | HTML scraper for xhamster.com; card selector `div[data-video-type="video"]` with `data-video-id`; thumbnails via `img[data-role="thumb-preview-img"]` at `ic-vt-nss.xhcdn.com` (no proxy, no Referer needed); pagination via `?page=N` query param (browse feeds use infinite-scroll so only search reliably returns different content per page); feeds: `/newest` (default), `/most-viewed`, `/best`; categories via `/categories/{slug}`; channels via `/channels/{slug}`; 43 hardcoded categories as `categories` option; uploader type inferred from URL path (`/channels/` → channel, `/creators/` → creator, `/pornstars/` → pornstar); supports `cat:`/`category:` and `channel:` query shortcuts, plus static category name matching; preview mp4 clips from `data-previewvideo` attribute; yt-dlp resolves `video.url` natively (xHamster extractor, 28 formats); no proxy needed. |
 | `camsoda` | `live-cams` | no | no | HTML scraper for camsoda.com `/media` listing; CF-protected so relies on Jina HTML fallback (requester sends `X-Return-Format: html`); parses anchor tags with `[class*="media-item-module__title"]` / `[class*="media-item-module__subtitle"]` CSS selectors; video-specific thumbnails from `media-secure.camsoda.com/user/videos/{id}/`; video URLs are page URLs (`/{username}/media/{slug}/{id}`) — CF blocks direct access and yt-dlp; supports `uploader:`/`model:` query prefix to browse a model's media page; no search API — keyword queries fall through to client-side filtering; pagination via `?page=N` but CamSoda serves the same 60 items regardless of page (JS-driven infinite scroll). |
 | `xvideos` | `mainstream-tube` | no | no | HTML scraper for xvideos.com; handles two card formats: homepage (`div.thumb-block[data-id][data-eid]`) uses `p.title a[title]` + `data-pvv` on img, best-of-month page uses `div.thumb-block.video[data-video=JSON]` with `div.title a` text + `previewVideo` JSON key; thumbnails at `thumb-cdn77.xvideos-cdn.com` / `thumbs-gcore.xvideos-cdn.com` (no proxy needed); latest: `/` (page 1) / `/new/{N-1}` (page N≥2); best-of-month: `/best/{YYYY-MM}` (previous calendar month), page N: `/best/{YYYY-MM}/{N-1}`; search: `/?k={query}` / `/?k={query}&p={N-1}` (0-indexed); tag shortcuts: `/tags/{slug}/{N-1}`; category shortcuts: `/c/{Name}-{ID}/{N-1}` (38 hardcoded categories); `cat:`, `tag:`, `uploader:` query prefix routing; yt-dlp resolves `video.url` natively (XVideos extractor → HLS formats); CDN preview mp4 in `preview` field; no proxy needed. |
 ## Proxy Routes
--- a/src/providers/camsoda.rs
+++ b/src/providers/camsoda.rs
@@ -0,0 +1,403 @@
 use crate::DbPool;
 use crate::api::ClientVersion;
 use crate::providers::{
    Provider, report_provider_error, requester_or_default,
 };
 use crate::status::*;
 use crate::util::cache::VideoCache;
 use crate::util::time::parse_time_to_seconds;
 use crate::videos::{ServerOptions, VideoItem};
 use async_trait::async_trait;
 use error_chain::error_chain;
 use scraper::{Html, Selector};
 use std::collections::HashSet;
 pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
    crate::providers::ProviderChannelMetadata {
        group_id: "live-cams",
        tags: &["cams", "amateur", "recordings", "clips"],
    };
 const BASE_URL: &str = "https://www.camsoda.com";
 const CHANNEL_ID: &str = "camsoda";
 error_chain! {
    foreign_links {
        Io(std::io::Error);
    }
    errors {
        Parse(msg: String) {
            description("parse error")
            display("parse error: {}", msg)
        }
    }
 }
 #[derive(Debug, Clone)]
 pub struct CamsodaProvider {
    url: String,
 }
 #[derive(Debug, Clone)]
 enum Target {
    /// Default listing at /media?page=N
    Listing,
    /// Model media page at /{username}/media
    Model { username: String },
 }
 impl CamsodaProvider {
    pub fn new() -> Self {
        Self {
            url: BASE_URL.to_string(),
        }
    }
    fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
        Channel {
            id: CHANNEL_ID.to_string(),
            name: "CamSoda".to_string(),
            description:
                "CamSoda model video clips — recorded amateur cam shows uploaded by performers."
                    .to_string(),
            premium: false,
            favicon: "https://www.google.com/s2/favicons?sz=64&domain=camsoda.com".to_string(),
            status: "active".to_string(),
            categories: vec![],
            options: vec![],
            nsfw: true,
            cacheDuration: Some(1800),
        }
    }
    /// Resolve the fetch target from query and options.
    fn pick_target(query: Option<&str>) -> Target {
        let Some(query) = query.map(str::trim).filter(|v| !v.is_empty()) else {
            return Target::Listing;
        };
        // Support "uploader:username" or "model:username" shortcuts.
        for prefix in &["uploader:", "model:", "user:"] {
            if let Some(username) = query.strip_prefix(prefix) {
                let username = username.trim().to_lowercase();
                if !username.is_empty() {
                    return Target::Model { username };
                }
            }
        }
        // For other queries fall back to the default listing;
        // the server will apply client-side substring filtering.
        Target::Listing
    }
    fn build_listing_url(&self, target: &Target, page: u16) -> String {
        let page = page.max(1);
        match target {
            Target::Listing => format!("{}/media?page={}", self.url, page),
            Target::Model { username } => {
                if page <= 1 {
                    format!("{}/{}/media", self.url, username)
                } else {
                    format!("{}/{}/media?page={}", self.url, username, page)
                }
            }
        }
    }
    /// Parse video cards from the HTML of a CamSoda media page.
    ///
    /// The page contains anchor elements linking to individual video pages:
    ///   href="/{username}/media/{slug}/{id}"
    ///
    /// Inside each anchor:
    ///   - `[class*="media-item-module__title"]` span: the video title
    ///   - `[class*="media-item-module__subtitle"]` span: "by UPLOADER (MM:SS)"
    ///   - `img[src*="media-secure.camsoda.com"]`: video-specific thumbnail
    fn parse_html_items(html: &str) -> Vec<VideoItem> {
        let document = Html::parse_document(html);
        // Select all anchors linking to /{username}/media/{slug}/{id}
        let anchor_sel = match Selector::parse(r#"a[href]"#) {
            Ok(s) => s,
            Err(_) => return vec![],
        };
        let title_sel = match Selector::parse(r#"[class*="media-item-module__title"]"#) {
            Ok(s) => s,
            Err(_) => return vec![],
        };
        let subtitle_sel = match Selector::parse(r#"[class*="media-item-module__subtitle"]"#) {
            Ok(s) => s,
            Err(_) => return vec![],
        };
        let img_sel = match Selector::parse(r#"img[src]"#) {
            Ok(s) => s,
            Err(_) => return vec![],
        };
        // Regex for parsing subtitle "by UPLOADER (MM:SS)"
        let sub_re = match regex::Regex::new(r"(?i)^by\s+(.+?)\s+\((\d{1,2}:\d{2}(?::\d{2})?)\)\s*$") {
            Ok(r) => r,
            Err(_) => return vec![],
        };
        // Regex for media URL: /{username}/media/{slug}/{id}
        let href_re = match regex::Regex::new(r"^/([^/]+)/media/([^/]+)/(\d+)$") {
            Ok(r) => r,
            Err(_) => return vec![],
        };
        let mut items: Vec<VideoItem> = Vec::new();
        let mut seen_ids: HashSet<String> = HashSet::new();
        for anchor in document.select(&anchor_sel) {
            let href = match anchor.value().attr("href") {
                Some(h) => h,
                None => continue,
            };
            let caps = match href_re.captures(href) {
                Some(c) => c,
                None => continue,
            };
            let username = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string();
            let slug = caps.get(2).map(|m| m.as_str()).unwrap_or("").to_string();
            let video_id = caps.get(3).map(|m| m.as_str()).unwrap_or("").to_string();
            if video_id.is_empty() || username.is_empty() {
                continue;
            }
            if !seen_ids.insert(video_id.clone()) {
                continue;
            }
            // Title
            let title = anchor
                .select(&title_sel)
                .next()
                .map(|el| el.text().collect::<String>().trim().to_string())
                .unwrap_or_default();
            let title = if title.is_empty() {
                format!("CamSoda video {video_id}")
            } else {
                title
            };
            // Subtitle: "by UPLOADER (MM:SS)"
            let subtitle = anchor
                .select(&subtitle_sel)
                .next()
                .map(|el| el.text().collect::<String>().trim().to_string())
                .unwrap_or_default();
            let (uploader, duration) = if let Some(sc) = sub_re.captures(&subtitle) {
                let u = sc.get(1).map(|m| m.as_str().trim().to_string()).unwrap_or_default();
                let d = sc.get(2)
                    .and_then(|m| parse_time_to_seconds(m.as_str()))
                    .and_then(|s| u32::try_from(s).ok())
                    .unwrap_or(0);
                (if u.is_empty() { None } else { Some(u) }, d)
            } else {
                (None, 0)
            };
            // Thumbnail — prefer video-specific from media-secure.camsoda.com
            let thumb = anchor
                .select(&img_sel)
                .filter_map(|img| img.value().attr("src"))
                .find(|src| src.contains("media-secure.camsoda.com"))
                .or_else(|| {
                    anchor
                        .select(&img_sel)
                        .filter_map(|img| img.value().attr("src"))
                        .find(|src| src.contains("livemediahost.com"))
                })
                .unwrap_or("")
                .to_string();
            let page_url = format!("{BASE_URL}/{username}/media/{slug}/{video_id}");
            let mut item = VideoItem::new(
                video_id,
                title,
                page_url,
                CHANNEL_ID.to_string(),
                thumb,
                duration,
            );
            item.uploader = uploader;
            item.uploaderUrl = Some(format!("{BASE_URL}/{username}/media"));
            item.uploaderId = Some(format!("{CHANNEL_ID}:{username}"));
            items.push(item);
        }
        items
    }
    async fn fetch_items(
        &self,
        target: &Target,
        page: u16,
        options: &ServerOptions,
    ) -> Result<Vec<VideoItem>> {
        let url = self.build_listing_url(target, page);
        let mut requester = requester_or_default(options, CHANNEL_ID, "fetch_items");
        let text = requester
            .get(&url, None)
            .await
            .map_err(|e| Error::from(format!("fetch failed for {url}: {e}")))?;
        // Guard against CF challenge pages slipping through
        if text.contains("cf-browser-verification")
            || text.contains("cf-chl")
            || text.contains("Just a moment")
        {
            return Err(Error::from("cloudflare challenge page returned".to_string()));
        }
        let items = Self::parse_html_items(&text);
        Ok(items)
    }
 }
 #[async_trait]
 impl Provider for CamsodaProvider {
    async fn get_videos(
        &self,
        cache: VideoCache,
        pool: DbPool,
        sort: String,
        query: Option<String>,
        page: String,
        per_page: String,
        options: ServerOptions,
    ) -> Vec<VideoItem> {
        let _ = cache;
        let _ = pool;
        let _ = sort;
        let _ = per_page;
        let page = page.parse::<u16>().unwrap_or(1).max(1);
        let normalized_query = query
            .as_deref()
            .map(str::trim)
            .filter(|v| !v.is_empty())
            .map(ToOwned::to_owned);
        let target = Self::pick_target(normalized_query.as_deref());
        match self.fetch_items(&target, page, &options).await {
            Ok(items) => items,
            Err(error) => {
                report_provider_error(CHANNEL_ID, "get_videos", &error.to_string()).await;
                vec![]
            }
        }
    }
    fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
        Some(self.build_channel(clientversion))
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    fn sample_html() -> String {
        // Simplified version of the HTML returned by Jina (X-Return-Format: html)
        r#"<!DOCTYPE html><html><body>
 <a href="/lil-asian-jaz/media/torso-ride-begging-for-your-cum/16984249">
  <span><span class="media-item-module__title--lrB0Q">Torso ride begging for your cum</span><span class="media-item-module__subtitle--VRwsa">by jazzyj (24:35)</span></span>
  <div><img alt="thumb" src="https://media-secure.camsoda.com/user/videos/16984249/16984249.thumb.1777857871.webp" width="320" height="240"></div>
 </a>
 <a href="/coco-dethick/media/first-ir-bg-show-ft-johnny-love/17009049">
  <span><span class="media-item-module__new--uXKvP">new</span><span class="media-item-module__title--lrB0Q">FIRST IR BG SHOW FT JOHNNY LOVE</span><span class="media-item-module__subtitle--VRwsa">by Coco Dethick (44:14)</span></span>
  <div><img alt="thumb" src="https://media-secure.camsoda.com/user/videos/17009049/17009049_1778619855.thumb.webp" width="320" height="240"></div>
 </a>
 <a href="/hot-wife-mia/media/ultimate-squirting-video/17112135">
  <span><span class="media-item-module__title--lrB0Q">Ultimate squirting video!</span><span class="media-item-module__subtitle--VRwsa">by Hot Wife Mia (02:47)</span></span>
  <div><img alt="thumb" src="https://media-secure.camsoda.com/user/videos/17112135/17112135.thumb.1779200000.webp" width="320" height="240"></div>
 </a>
 </body></html>"#.to_string()
    }
    #[test]
    fn parses_video_cards_from_html() {
        let items = CamsodaProvider::parse_html_items(&sample_html());
        assert_eq!(items.len(), 3, "expected 3 items, got {}: {:?}", items.len(), items.iter().map(|i| &i.id).collect::<Vec<_>>());
        let item = &items[0];
        assert_eq!(item.id, "16984249");
        assert_eq!(item.title, "Torso ride begging for your cum");
        assert_eq!(item.uploader.as_deref(), Some("jazzyj"));
        assert_eq!(item.duration, 24 * 60 + 35);
        assert!(item.url.contains("16984249"), "url should contain id: {}", item.url);
        assert!(item.thumb.contains("media-secure.camsoda.com"), "thumb: {}", item.thumb);
        assert_eq!(item.uploaderUrl.as_deref(), Some("https://www.camsoda.com/lil-asian-jaz/media"));
        assert_eq!(item.uploaderId.as_deref(), Some("camsoda:lil-asian-jaz"));
        let item2 = &items[1];
        assert_eq!(item2.id, "17009049");
        assert_eq!(item2.uploader.as_deref(), Some("Coco Dethick"));
        let item3 = &items[2];
        assert_eq!(item3.id, "17112135");
        assert_eq!(item3.duration, 2 * 60 + 47);
    }
    #[test]
    fn deduplicates_items() {
        // Same video appears twice
        let html = sample_html();
        let doubled = format!("{html}\n{html}");
        let items = CamsodaProvider::parse_html_items(&doubled);
        assert_eq!(items.len(), 3, "should deduplicate to 3 unique items");
    }
    #[test]
    fn picks_target_correctly() {
        assert!(matches!(CamsodaProvider::pick_target(None), Target::Listing));
        assert!(matches!(CamsodaProvider::pick_target(Some("")), Target::Listing));
        assert!(matches!(CamsodaProvider::pick_target(Some("blowjob")), Target::Listing));
        match CamsodaProvider::pick_target(Some("uploader:lil-asian-jaz")) {
            Target::Model { username } => assert_eq!(username, "lil-asian-jaz"),
            _ => panic!("expected Model target"),
        }
        match CamsodaProvider::pick_target(Some("model:katt-leya")) {
            Target::Model { username } => assert_eq!(username, "katt-leya"),
            _ => panic!("expected Model target"),
        }
    }
    #[test]
    fn builds_listing_urls_correctly() {
        let provider = CamsodaProvider::new();
        assert_eq!(
            provider.build_listing_url(&Target::Listing, 1),
            "https://www.camsoda.com/media?page=1"
        );
        assert_eq!(
            provider.build_listing_url(&Target::Listing, 3),
            "https://www.camsoda.com/media?page=3"
        );
        assert_eq!(
            provider.build_listing_url(
                &Target::Model { username: "lil-asian-jaz".to_string() },
                1
            ),
            "https://www.camsoda.com/lil-asian-jaz/media"
        );
        assert_eq!(
            provider.build_listing_url(
                &Target::Model { username: "lil-asian-jaz".to_string() },
                2
            ),
            "https://www.camsoda.com/lil-asian-jaz/media?page=2"
        );
    }
 }
--- a/src/providers/pornhub_shorties.rs
+++ b/src/providers/pornhub_shorties.rs
@@ -0,0 +1,566 @@
 use crate::DbPool;
 use crate::api::ClientVersion;
 use crate::providers::{Provider, report_provider_error, requester_or_default};
 use crate::status::*;
 use crate::util::cache::VideoCache;
 use crate::videos::{ServerOptions, VideoItem};
 use async_trait::async_trait;
 use error_chain::error_chain;
 use serde::Deserialize;
 pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
    crate::providers::ProviderChannelMetadata {
        group_id: "tiktok",
        tags: &["shorts", "pornhub", "vertical"],
    };
 const BASE_URL: &str = "https://www.pornhub.com";
 const CHANNEL_ID: &str = "pornhub-shorties";
 error_chain! {
    foreign_links {
        Io(std::io::Error);
        HttpRequest(wreq::Error);
        Json(serde_json::Error);
    }
    errors {
        Parse(msg: String) {
            description("parse error")
            display("parse error: {}", msg)
        }
    }
 }
 #[derive(Debug, Clone)]
 pub struct PornhubShortiesProvider {
    url: String,
 }
 #[derive(Debug, Clone)]
 enum Target {
    Home { sort: String },
    Search { query: String, sort: String },
 }
 /// Deserialized structure of each entry in the `JSON_SHORTIES` JS variable.
 #[derive(Debug, Deserialize, Clone)]
 struct ShortieItem {
    #[serde(rename = "vkey")]
    vkey: String,
    #[serde(rename = "videoTitle")]
    video_title: String,
    #[serde(rename = "linkUrl")]
    link_url: String,
    #[serde(rename = "imageUrl")]
    image_url: Option<String>,
    #[serde(rename = "likeNumber", default)]
    like_number: u64,
    #[serde(rename = "dislikeNumber", default)]
    dislike_number: u64,
    #[serde(rename = "name")]
    name: Option<String>,
    #[serde(rename = "profileUrl")]
    profile_url: Option<String>,
    #[serde(rename = "entityId")]
    entity_id: Option<u64>,
    #[serde(rename = "entityType")]
    entity_type: Option<String>,
    #[serde(rename = "trackingTimeWatched")]
    tracking_time_watched: Option<TrackingTimeWatched>,
    #[serde(rename = "pillsData", default)]
    pills_data: Vec<PillData>,
    #[serde(rename = "badges")]
    badges: Option<Badges>,
 }
 #[derive(Debug, Deserialize, Clone)]
 struct TrackingTimeWatched {
    #[serde(rename = "video_duration", default)]
    video_duration: u32,
 }
 #[derive(Debug, Deserialize, Clone)]
 struct PillData {
    #[serde(rename = "name")]
    name: String,
 }
 #[derive(Debug, Deserialize, Clone)]
 struct Badges {
    #[serde(rename = "verified", default)]
    verified: bool,
 }
 impl PornhubShortiesProvider {
    pub fn new() -> Self {
        Self {
            url: BASE_URL.to_string(),
        }
    }
    fn build_target(sort: &str, query: Option<&str>) -> Target {
        let sort = Self::normalize_sort(sort).to_string();
        match query.map(str::trim).filter(|q| !q.is_empty()) {
            Some(q) => Target::Search {
                query: q.to_string(),
                sort,
            },
            None => Target::Home { sort },
        }
    }
    fn normalize_sort(sort: &str) -> &'static str {
        match sort.trim().to_ascii_lowercase().as_str() {
            "trending" => "trending",
            "popular" | "mv" | "mostviewed" => "mostviewed",
            "top_rated" | "tr" | "toprated" => "top_rated",
            "hottest" | "ht" => "hottest",
            _ => "new",
        }
    }
    fn build_url(base: &str, target: &Target, page: u8) -> String {
        match target {
            Target::Home { sort } => {
                if sort == "new" {
                    if page <= 1 {
                        format!("{base}/shorties")
                    } else {
                        format!("{base}/shorties?page={page}")
                    }
                } else {
                    if page <= 1 {
                        format!("{base}/shorties?sort={sort}")
                    } else {
                        format!("{base}/shorties?sort={sort}&page={page}")
                    }
                }
            }
            Target::Search { query, sort } => {
                let encoded = query.replace(' ', "+");
                if sort == "new" {
                    if page <= 1 {
                        format!("{base}/shorties?search={encoded}")
                    } else {
                        format!("{base}/shorties?search={encoded}&page={page}")
                    }
                } else {
                    if page <= 1 {
                        format!("{base}/shorties?search={encoded}&sort={sort}")
                    } else {
                        format!("{base}/shorties?search={encoded}&sort={sort}&page={page}")
                    }
                }
            }
        }
    }
    fn extract_json_shorties(html: &str) -> Result<Vec<ShortieItem>> {
        // The page embeds: JSON_SHORTIES = insertAfterNthPosition([{...}, ...], AD_POSITION, ...)
        // We locate the array by finding the '[' after JSON_SHORTIES and matching brackets.
        let marker = "JSON_SHORTIES";
        let start = html.find(marker).ok_or_else(|| {
            Error::from(ErrorKind::Parse(
                "JSON_SHORTIES marker not found in HTML".to_string(),
            ))
        })?;
        let arr_start = html[start..].find('[').ok_or_else(|| {
            Error::from(ErrorKind::Parse(
                "JSON_SHORTIES array open bracket not found".to_string(),
            ))
        })? + start;
        // Walk the HTML to find the matching closing bracket.
        let bytes = html.as_bytes();
        let mut depth: i32 = 0;
        let mut in_string = false;
        let mut escape_next = false;
        let mut arr_end = arr_start;
        for (offset, &b) in bytes[arr_start..].iter().enumerate() {
            if escape_next {
                escape_next = false;
                continue;
            }
            if b == b'\\' && in_string {
                escape_next = true;
                continue;
            }
            if b == b'"' {
                in_string = !in_string;
                continue;
            }
            if in_string {
                continue;
            }
            match b {
                b'[' => depth += 1,
                b']' => {
                    depth -= 1;
                    if depth == 0 {
                        arr_end = arr_start + offset;
                        break;
                    }
                }
                _ => {}
            }
        }
        if depth != 0 {
            return Err(
                ErrorKind::Parse("JSON_SHORTIES array bracket mismatch".to_string()).into(),
            );
        }
        let raw = &html[arr_start..=arr_end];
        let items: Vec<ShortieItem> = serde_json::from_str(raw).map_err(|e| {
            Error::from(ErrorKind::Parse(format!("JSON_SHORTIES parse error: {e}")))
        })?;
        Ok(items)
    }
    fn item_to_video(item: ShortieItem) -> Option<VideoItem> {
        let id = item.vkey;
        if id.is_empty() {
            return None;
        }
        let title = item.video_title.trim().to_string();
        if title.is_empty() {
            return None;
        }
        let url = item.link_url.trim().to_string();
        if url.is_empty() || !url.contains("/view_video.php") {
            return None;
        }
        let thumb = item
            .image_url
            .unwrap_or_default()
            .replace("\\/", "/")
            .trim()
            .to_string();
        let duration = item
            .tracking_time_watched
            .map(|t| t.video_duration)
            .unwrap_or(0);
        let mut video = VideoItem::new(
            id,
            title,
            url,
            CHANNEL_ID.to_string(),
            thumb,
            duration,
        );
        // Rating from likes / (likes + dislikes)
        let total = item.like_number + item.dislike_number;
        if total > 0 {
            video.rating = Some((item.like_number as f32 / total as f32) * 100.0);
        }
        // Uploader
        if let Some(name) = item.name.filter(|n| !n.is_empty()) {
            video.uploader = Some(name.clone());
            if let Some(profile_url) = item
                .profile_url
                .map(|u| u.replace("\\/", "/"))
                .filter(|u| !u.is_empty())
            {
                video.uploaderUrl = Some(profile_url.clone());
                // Build namespaced uploader ID from entity_type + entity_id
                if let (Some(et), Some(eid)) = (item.entity_type.as_deref(), item.entity_id) {
                    let kind = match et {
                        "Mpp" | "Model" => "model",
                        "Channel" => "channels",
                        "Pornstar" => "pornstar",
                        _ => "model",
                    };
                    video.uploaderId = Some(format!("{CHANNEL_ID}:{kind}:{eid}"));
                }
            }
        }
        // Tags from pillsData
        let tags: Vec<String> = item.pills_data.into_iter().map(|p| p.name).collect();
        if !tags.is_empty() {
            video.tags = Some(tags);
        }
        // Verified badge
        video.verified = item.badges.map(|b| b.verified).filter(|&v| v);
        Some(video)
    }
    fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
        Channel {
            id: CHANNEL_ID.to_string(),
            name: "PH Shorties".to_string(),
            description: "Pornhub Shorties — vertical short-form porn clips.".to_string(),
            premium: false,
            favicon: "https://www.google.com/s2/favicons?sz=64&domain=pornhub.com".to_string(),
            status: "active".to_string(),
            categories: vec![],
            options: vec![ChannelOption {
                id: "sort".to_string(),
                title: "Sort".to_string(),
                description: "Browse Pornhub Shorties by sort order.".to_string(),
                systemImage: "list.number".to_string(),
                colorName: "orange".to_string(),
                options: vec![
                    FilterOption {
                        id: "new".to_string(),
                        title: "New".to_string(),
                    },
                    FilterOption {
                        id: "trending".to_string(),
                        title: "Trending".to_string(),
                    },
                    FilterOption {
                        id: "mostviewed".to_string(),
                        title: "Most Viewed".to_string(),
                    },
                    FilterOption {
                        id: "top_rated".to_string(),
                        title: "Top Rated".to_string(),
                    },
                    FilterOption {
                        id: "hottest".to_string(),
                        title: "Hottest".to_string(),
                    },
                ],
                multiSelect: false,
            }],
            nsfw: true,
            cacheDuration: Some(1800),
        }
    }
    async fn fetch_videos(
        &self,
        cache: VideoCache,
        page: u8,
        sort: &str,
        query: Option<&str>,
        options: ServerOptions,
    ) -> Result<Vec<VideoItem>> {
        let target = Self::build_target(sort, query);
        let fetch_url = Self::build_url(&self.url, &target, page);
        // Cache hit
        let old_items = match cache.get(&fetch_url) {
            Some((time, items)) if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 => {
                return Ok(items.clone());
            }
            Some((_, items)) => items.clone(),
            None => vec![],
        };
        let mut requester = requester_or_default(&options, CHANNEL_ID, "fetch_videos");
        let text = match requester
            .get_with_headers(
                &fetch_url,
                vec![
                    ("Referer".to_string(), format!("{}/shorties", self.url)),
                    (
                        "Accept".to_string(),
                        "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
                            .to_string(),
                    ),
                    ("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()),
                ],
                None,
            )
            .await
        {
            Ok(text) => text,
            Err(error) => {
                report_provider_error(
                    CHANNEL_ID,
                    "fetch_videos.request",
                    &format!("url={fetch_url}; error={error}"),
                )
                .await;
                return Ok(old_items);
            }
        };
        let shorties = match Self::extract_json_shorties(&text) {
            Ok(items) => items,
            Err(error) => {
                report_provider_error(
                    CHANNEL_ID,
                    "fetch_videos.parse",
                    &format!("url={fetch_url}; error={error}"),
                )
                .await;
                return Ok(old_items);
            }
        };
        let items: Vec<VideoItem> = shorties
            .into_iter()
            .filter_map(Self::item_to_video)
            .collect();
        if items.is_empty() {
            return Ok(old_items);
        }
        cache.remove(&fetch_url);
        cache.insert(fetch_url, items.clone());
        Ok(items)
    }
 }
 #[async_trait]
 impl Provider for PornhubShortiesProvider {
    async fn get_videos(
        &self,
        cache: VideoCache,
        pool: DbPool,
        sort: String,
        query: Option<String>,
        page: String,
        per_page: String,
        options: ServerOptions,
    ) -> Vec<VideoItem> {
        let _ = pool;
        let _ = per_page;
        let page = page.parse::<u8>().unwrap_or(1);
        match self
            .fetch_videos(cache, page, &sort, query.as_deref(), options)
            .await
        {
            Ok(items) => items,
            Err(error) => {
                report_provider_error(CHANNEL_ID, "get_videos", &error.to_string()).await;
                vec![]
            }
        }
    }
    fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
        Some(self.build_channel(clientversion))
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn parses_json_shorties_from_html() {
        let html = r#"
            var JSON_SHORTIES = insertAfterNthPosition([
                {
                    "videoId": 123456,
                    "vkey": "abcdef1234567",
                    "videoTitle": "Test Short Video",
                    "favoriteInfo": "1K",
                    "likeInfo": "2K",
                    "likeNumber": 2000,
                    "dislikeNumber": 100,
                    "isHD": true,
                    "linkUrl": "https:\/\/www.pornhub.com\/view_video.php?viewkey=abcdef1234567",
                    "shortieUrl": "https:\/\/www.pornhub.com\/shorties\/abcdef1234567",
                    "embedUrl": "",
                    "imageUrl": "https:\/\/example.com\/thumb.jpg",
                    "mediaPriority": "hls",
                    "mediaDefinitions": [],
                    "isFavorite": false,
                    "isReported": false,
                    "isSubscribed": false,
                    "userVoteDetail": -1,
                    "trackingTimeWatched": {"video_duration": 45},
                    "pillsData": [{"name": "Amateur", "slug": "amateur", "type": "category"}],
                    "badges": {"verified": true, "premium": false, "award": false},
                    "name": "TestUser",
                    "profileUrl": "https:\/\/www.pornhub.com\/model\/testuser",
                    "entityType": "Mpp",
                    "entityId": 9876543
                }
            ], AD_POSITION, {});
        "#;
        let items = PornhubShortiesProvider::extract_json_shorties(html)
            .expect("should parse JSON_SHORTIES");
        assert_eq!(items.len(), 1);
        assert_eq!(items[0].vkey, "abcdef1234567");
        assert_eq!(items[0].video_title, "Test Short Video");
        assert_eq!(
            items[0].link_url,
            "https://www.pornhub.com/view_video.php?viewkey=abcdef1234567"
        );
        assert_eq!(items[0].like_number, 2000);
        assert_eq!(items[0].dislike_number, 100);
        assert_eq!(
            items[0]
                .tracking_time_watched
                .as_ref()
                .map(|t| t.video_duration),
            Some(45)
        );
        assert_eq!(items[0].pills_data[0].name, "Amateur");
        assert_eq!(items[0].name.as_deref(), Some("TestUser"));
        let video = PornhubShortiesProvider::item_to_video(items[0].clone())
            .expect("should convert to VideoItem");
        assert_eq!(video.id, "abcdef1234567");
        assert_eq!(video.duration, 45);
        assert!(video.rating.is_some());
        assert!((video.rating.unwrap() - 95.23).abs() < 0.1);
        assert_eq!(video.uploader.as_deref(), Some("TestUser"));
        assert!(video.tags.as_ref().is_some_and(|t| t.contains(&"Amateur".to_string())));
    }
    #[test]
    fn build_url_home_new() {
        let url = PornhubShortiesProvider::build_url(
            BASE_URL,
            &Target::Home {
                sort: "new".to_string(),
            },
            1,
        );
        assert_eq!(url, "https://www.pornhub.com/shorties");
        let url2 = PornhubShortiesProvider::build_url(
            BASE_URL,
            &Target::Home {
                sort: "new".to_string(),
            },
            2,
        );
        assert_eq!(url2, "https://www.pornhub.com/shorties?page=2");
    }
    #[test]
    fn build_url_search() {
        let url = PornhubShortiesProvider::build_url(
            BASE_URL,
            &Target::Search {
                query: "teen".to_string(),
                sort: "new".to_string(),
            },
            1,
        );
        assert_eq!(url, "https://www.pornhub.com/shorties?search=teen");
    }
    #[test]
    fn normalize_sort_variants() {
        assert_eq!(PornhubShortiesProvider::normalize_sort("new"), "new");
        assert_eq!(PornhubShortiesProvider::normalize_sort("trending"), "trending");
        assert_eq!(PornhubShortiesProvider::normalize_sort("popular"), "mostviewed");
        assert_eq!(PornhubShortiesProvider::normalize_sort("unknown"), "new");
    }
 }