From 342c7dc09857b388e5e5b149a249f7ca6cfd464c Mon Sep 17 00:00:00 2001 From: Simon Date: Mon, 22 Jun 2026 13:22:37 +0000 Subject: [PATCH] camsoda update --- docs/provider-catalog.md | 2 +- src/providers/camsoda.rs | 813 ++++++++++++--------------------------- src/videos.rs | 6 +- 3 files changed, 245 insertions(+), 576 deletions(-) diff --git a/docs/provider-catalog.md b/docs/provider-catalog.md index 5bea193..d19c371 100644 --- a/docs/provider-catalog.md +++ b/docs/provider-catalog.md @@ -73,7 +73,7 @@ This is the current implementation inventory as of this snapshot of the repo. Us | `eporner` | `mainstream-tube` | no | no | HTML scraper for eporner.com (5M+ videos); card selector `div.mb[data-id]` with inline duration/rating/views/uploader; thumbnails at `static-eu-cdn.eporner.com` (no proxy needed); pagination uses `/{N}/` suffix (page 1 = no suffix, page 2 = `/2/`); search queries map to `/tag/{slug}/` (eporner redirects all keyword searches to tag pages — 404 tag pages still return related content); supports sort: new/popular/rated/best; 65 hardcoded categories via `cat:`, `tag:`, `pornstar:`, `uploader:` query shortcuts; background-loads pornstar name→URL map from `/pornstar-list/`; yt-dlp resolves `video.url` natively (Eporner extractor); no proxy needed. | | `xnxx` | `mainstream-tube` | no | no | HTML scraper for xnxx.com (10M+ videos); unified card parser handles two formats: `div.thumb-block[data-eid]` (search) and `div.thumb-block.video[data-video='{"id":...}']` (hits); eid extracted from `/video-{eid}/{slug}` URL path; thumbnails at `thumb-cdn77.xnxx-cdn.com` and `thumbs-gcore.xnxx-cdn.com` (no proxy, no Referer needed); 0-indexed pagination (page 1 = `/hits`, page N = `/hits/{N-1}`); default feed is `/hits` (most-viewed — xnxx has no chronological listing); search via `/search/{slug}` (works for keywords and tags); supports `tag:`, `cat:`, `category:` query shortcuts; yt-dlp resolves `video.url` natively (XNXX extractor, returns 4-7 HLS formats); no proxy needed. | | `xhamster` | `mainstream-tube` | no | no | HTML scraper for xhamster.com; card selector `div[data-video-type="video"]` with `data-video-id`; thumbnails via `img[data-role="thumb-preview-img"]` at `ic-vt-nss.xhcdn.com` (no proxy, no Referer needed); pagination via `?page=N` query param (browse feeds use infinite-scroll so only search reliably returns different content per page); feeds: `/newest` (default), `/most-viewed`, `/best`; categories via `/categories/{slug}`; channels via `/channels/{slug}`; 43 hardcoded categories as `categories` option; uploader type inferred from URL path (`/channels/` → channel, `/creators/` → creator, `/pornstars/` → pornstar); supports `cat:`/`category:` and `channel:` query shortcuts, plus static category name matching; preview mp4 clips from `data-previewvideo` attribute; yt-dlp resolves `video.url` natively (xHamster extractor, 28 formats); no proxy needed. | -| `camsoda` | `live-cams` | no | no | JSON-API provider for camsoda.com recorded model clips. camsoda.com is hard Cloudflare-protected: direct requests and yt-dlp both get HTTP 403, and FlareSolverr was unreliable during development, so the only reliable path through CF is the shared requester's Jina mirror fallback (`r.jina.ai/http://...`, `X-Return-Format: html`) — note Jina rate-limits per IP, so multi-provider builds that burst many concurrent fetches see 429s; validate with a single-provider build (`HOT_TUB_PROVIDER=camsoda`) which makes one fetch at a time. The `/media` page is a CF-protected SPA whose SSR ignores `?page`/`?p`/`sort` (it always renders the same default 60 items); pagination/sort/tag are client-only XHR to a JSON API discovered in the (non-CF-protected) static `pages-media-MediaMainPage` bundle: `GET https://www.camsoda.com/api/v1/media/list/video?page=N&sort_by=&tag=` returning `{"result":true,"data":[...]}` — Jina returns that JSON wrapped in a `
`, so the provider extracts the `{...}` slice and parses it (this gives real pagination across ~166 pages, plus sort and tag filtering — fixing the old HTML-scrape that couldn't paginate or search). Rich item fields come straight from the JSON: `name`→title, `username`→uploader slug, `user_display_name`→uploader, `duration` (seconds), `created_at`→`uploadedAt`, `thumbnail_url` (direct `media-secure.camsoda.com`, no proxy/referer needed). `sort_by` values: `date_added` (default/new), `popular`, `popular_all_time` (top). 49-tag catalog (extracted from the bundle) is exposed via the `categories` option (sanitized out of `/api/status` like other big catalogs, but honored in `/api/videos`) and routed by `tag:`/`cat:`/`category:` prefixes or a bare keyword that exactly matches a tag slug; there is no keyword media-search endpoint, so other bare queries fall back to the default listing for the server's client-side substring filter. `model:`/`uploader:`/`user:`/`performer:` prefixes browse a performer's SSR `/{username}/media` page, parsed via anchor selectors (`[class*="media-item-module__title"]` / `[class*="media-item-module__subtitle"]`). `video.url` is the page URL (`/{username}/media/{slug}/{id}`); recorded clips are token-gated (`token_price`>0, `is_free_no_auth` effectively always false) and CF-blocks both browser and yt-dlp, so no `formats` are populated and playback is not resolvable in this environment (`check.py` reports these as expected CF warnings — `www.camsoda.com` is in its CF allowlist — not errors). No proxy needed. |
+| `camsoda` | `live-cams` | no | no | Live-cam provider for camsoda.com (chaturbate-style — `live` performers streaming now, `video.url` = the room page, `is_live=true`, no `formats`). camsoda.com is hard Cloudflare-protected: direct requests and yt-dlp both get HTTP 403, and FlareSolverr was unreliable during development, so the live-browse API is reached through the shared requester's Jina mirror fallback (`r.jina.ai/http://...`, `X-Return-Format: html`); Jina rate-limits per IP, so the provider caches each fetched feed URL for 60s (and serves stale items on a 429 rather than emptying the feed), and a single-provider build (`HOT_TUB_PROVIDER=camsoda`) validates most cleanly (one fetch at a time). Endpoint (found in the non-CF static `main.js` bundle): `GET https://www.camsoda.com/api/v1/browse/react{route}?p=N` returning a body with a top-level `userList` array (Jina wraps it in `
`, so the provider slices out the `{...}` and parses it with `serde_json::Value`, like the chaturbate provider). Per-cam fields: `username`→id + room URL (`/{username}`), `subjectText`→title (html-decoded, falls back to `displayName`), `displayName`→uploader, `connectionCount`→views (string or number tolerated), `thumbUrl`→thumb (direct `media.livemediahost.com` CDN, no proxy/referer), `status` (skip `offline`), `vr`/`private` surfaced as tags. Category option `category` uses verified `browse/react` routes — `all`(featured)/`girls`/`trans`/`couples`/`voyeur-cams`/`new` (`/male` is NOT a path route, camsoda gates male via `gender-hide`); `cat:`/`category:` prefixes and a bare keyword matching a category id route there too. Search: `GET browse/react/search/{dashed-query}?sortByConnection=1` (single connection-sorted result set, no real paging). Playback: `video.url` is the live room page; the room and the token-gated edge HLS (`*.livemediahost.com`) are both Cloudflare-protected, so HLS can't be resolved server-side and no `formats` are populated — yt-dlp has a `Camsoda` live extractor that resolves the room on a non-CF-blocked client, and `check.py` reports the sandbox's CF 403s as expected warnings (`www.camsoda.com` is in its CF allowlist), not errors. The earlier recorded-`/media` JSON scrape was replaced because clips were token-gated/non-playable; live cams are the site's actual product. No proxy needed. |
 | `xvideos` | `mainstream-tube` | no | no | HTML scraper for xvideos.com; handles two card formats: homepage (`div.thumb-block[data-id][data-eid]`) uses `p.title a[title]` + `data-pvv` on img, best-of-month page uses `div.thumb-block.video[data-video=JSON]` with `div.title a` text + `previewVideo` JSON key; thumbnails at `thumb-cdn77.xvideos-cdn.com` / `thumbs-gcore.xvideos-cdn.com` (no proxy needed); latest: `/` (page 1) / `/new/{N-1}` (page N≥2); best-of-month: `/best/{YYYY-MM}` (previous calendar month), page N: `/best/{YYYY-MM}/{N-1}`; search: `/?k={query}` / `/?k={query}&p={N-1}` (0-indexed); tag shortcuts: `/tags/{slug}/{N-1}`; category shortcuts: `/c/{Name}-{ID}/{N-1}` (38 hardcoded categories); `cat:`, `tag:`, `uploader:` query prefix routing; yt-dlp resolves `video.url` natively (XVideos extractor → HLS formats); CDN preview mp4 in `preview` field; no proxy needed. |
 | `wowxxx` | `studio-network` | no | no | HTML scraper for wow.xxx premium aggregator; default feed `/latest-updates/`, page 2 `/{N}/` suffix (for example `/latest-updates/2/`), search `/search/{query}/relevance/` with the same page suffix; supports `site:`/`studio:`/`network:`/`model:`/`pornstar:`/`tag:`/`cat:` query shortcuts to direct archive routes; list cards expose preview clips (`cast.wow.xxx/preview/*.mp4`), thumbnails (`img.wow.xxx/.../medium@2x/1.jpg`), duration, rating, views, site (as uploader), and model tags; `video.url` is the detail page URL and yt-dlp resolves HTML5 MP4 formats dynamically; no proxy needed. |
 
diff --git a/src/providers/camsoda.rs b/src/providers/camsoda.rs
index d6f3c9e..6217233 100644
--- a/src/providers/camsoda.rs
+++ b/src/providers/camsoda.rs
@@ -3,84 +3,34 @@ use crate::api::ClientVersion;
 use crate::providers::{Provider, report_provider_error, requester_or_default};
 use crate::status::*;
 use crate::util::cache::VideoCache;
-use crate::util::time::parse_time_to_seconds;
 use crate::videos::{ServerOptions, VideoItem};
 use async_trait::async_trait;
-use chrono::NaiveDateTime;
 use error_chain::error_chain;
 use htmlentity::entity::{ICodedDataTrait, decode};
-use scraper::{Html, Selector};
-use serde::Deserialize;
-use std::collections::HashSet;
 
 pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
     crate::providers::ProviderChannelMetadata {
         group_id: "live-cams",
-        tags: &["cams", "amateur", "recordings", "clips"],
+        tags: &["live", "cams", "amateur", "webcam"],
     };
 
 const BASE_URL: &str = "https://www.camsoda.com";
 const CHANNEL_ID: &str = "camsoda";
-/// Recorded-media listing API. The site's `/media` page is a CF-protected SPA
-/// that loads this JSON endpoint over XHR for every page/sort/tag change:
-///   GET /api/v1/media/list/video?page=N&sort_by=&tag=
-/// Direct access is Cloudflare-challenged (HTTP 403), so the shared requester
-/// falls back to the Jina mirror, which returns the JSON wrapped in a `
`.
-const API_LIST: &str = "https://www.camsoda.com/api/v1/media/list/video";
+/// Live-cam browse API. The `/` SPA loads this over XHR; it is Cloudflare-
+/// protected on direct access (HTTP 403), so the shared requester's Jina mirror
+/// fallback is what gets through. Response body has `userList` at the top level.
+const API_BROWSE: &str = "https://www.camsoda.com/api/v1/browse/react";
 
-/// Tag catalog exposed by the media filter dropdown (extracted from the
-/// MediaMainPage bundle). Used for the `categories` option and to route bare /
-/// `tag:`/`cat:` queries straight to a tag archive instead of dropping them.
-const MEDIA_TAGS: &[&str] = &[
-    "amateur",
-    "anal",
-    "asian",
-    "ass",
-    "bbw",
-    "big-ass",
-    "big-tits",
-    "black",
-    "blonde-hair",
-    "blowjob",
-    "bondage",
-    "brown-hair",
-    "college",
-    "cosplay",
-    "creampie",
-    "cum",
-    "curvy",
-    "dildo",
-    "dp",
-    "ebony",
-    "facial",
-    "feet",
-    "fetish",
-    "hairy-pussy",
-    "hd",
-    "japanese",
-    "latina",
-    "lesbian",
-    "lovense",
-    "lush",
-    "massage",
-    "masturbation",
-    "milf",
-    "muscle",
-    "ohmibod",
-    "outdoor",
-    "petite",
-    "pov",
-    "public",
-    "red-hair",
-    "shaved-pussy",
-    "small-tits",
-    "squirting",
-    "swallow",
-    "teen-18",
-    "threesome",
-    "toys",
-    "tranny",
-    "voyeur",
+/// Category routes appended to `browse/react` (verified against the live API).
+/// `id` is what the client sends back in the `category` option; `route` is the
+/// path segment (empty = the default featured feed).
+const CATEGORIES: &[(&str, &str, &str)] = &[
+    ("all", "Featured", ""),
+    ("girls", "Girls", "/girls"),
+    ("trans", "Trans", "/trans"),
+    ("couples", "Couples", "/couples"),
+    ("voyeur-cams", "Voyeur Cams", "/voyeur-cams"),
+    ("new", "New", "/girls/new"),
 ];
 
 error_chain! {
@@ -96,237 +46,118 @@ error_chain! {
 }
 
 #[derive(Debug, Clone)]
-pub struct CamsodaProvider {
-    url: String,
-}
+pub struct CamsodaProvider;
 
 #[derive(Debug, Clone, PartialEq, Eq)]
 enum Target {
-    /// Default/tag listing via the JSON API. `tag` is `None` for "all".
-    Listing { tag: Option },
-    /// A specific model's recorded-media page (`/{username}/media`).
-    Model { username: String },
-}
-
-/// Subset of the `media/list` JSON item fields the provider consumes. All
-/// nullable fields are modelled as `Option` so a stray `null` never aborts the
-/// whole page parse.
-#[derive(Debug, Deserialize)]
-struct ApiMediaItem {
-    id: i64,
-    #[serde(default)]
-    name: Option,
-    #[serde(default)]
-    slug: Option,
-    #[serde(default)]
-    duration: Option,
-    #[serde(default)]
-    created_at: Option,
-    #[serde(default)]
-    thumbnail_url: Option,
-    #[serde(default)]
-    user_display_name: Option,
-    #[serde(default)]
-    username: Option,
-    #[serde(default)]
-    is_video: Option,
-}
-
-#[derive(Debug, Deserialize)]
-struct ApiResponse {
-    #[serde(default)]
-    data: Vec,
+    /// Live browse feed at `browse/react{route}?p=N`.
+    Browse { route: String },
+    /// Keyword search at `browse/react/search/{query}?sortByConnection=1`.
+    Search { query: String },
 }
 
 impl CamsodaProvider {
     pub fn new() -> Self {
-        Self {
-            url: BASE_URL.to_string(),
-        }
+        CamsodaProvider
     }
 
     fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
-        let cat_options = std::iter::once(FilterOption {
-            id: "all".to_string(),
-            title: "All".to_string(),
-        })
-        .chain(MEDIA_TAGS.iter().map(|slug| FilterOption {
-            id: slug.to_string(),
-            title: Self::pretty_tag(slug),
-        }))
-        .collect::>();
+        let cat_options = CATEGORIES
+            .iter()
+            .map(|(id, title, _)| FilterOption {
+                id: id.to_string(),
+                title: title.to_string(),
+            })
+            .collect::>();
 
         Channel {
             id: CHANNEL_ID.to_string(),
             name: "CamSoda".to_string(),
-            description:
-                "CamSoda model video clips — recorded amateur cam shows uploaded by performers."
-                    .to_string(),
+            description: "CamSoda live webcams — free adult cam shows streaming right now."
+                .to_string(),
             premium: false,
             favicon: "https://www.google.com/s2/favicons?sz=64&domain=camsoda.com".to_string(),
             status: "active".to_string(),
             categories: vec![],
-            options: vec![
-                ChannelOption {
-                    id: "sort".to_string(),
-                    title: "Sort".to_string(),
-                    description: "Order the CamSoda media feed.".to_string(),
-                    systemImage: "list.number".to_string(),
-                    colorName: "blue".to_string(),
-                    options: vec![
-                        FilterOption {
-                            id: "new".to_string(),
-                            title: "Newest".to_string(),
-                        },
-                        FilterOption {
-                            id: "popular".to_string(),
-                            title: "Popular".to_string(),
-                        },
-                        FilterOption {
-                            id: "top".to_string(),
-                            title: "Popular (All Time)".to_string(),
-                        },
-                    ],
-                    multiSelect: false,
-                },
-                ChannelOption {
-                    id: "categories".to_string(),
-                    title: "Categories".to_string(),
-                    description: "Filter CamSoda media by tag.".to_string(),
-                    systemImage: "square.grid.2x2".to_string(),
-                    colorName: "orange".to_string(),
-                    options: cat_options,
-                    multiSelect: false,
-                },
-            ],
+            options: vec![ChannelOption {
+                id: "category".to_string(),
+                title: "Category".to_string(),
+                description: "Browse a CamSoda live-cam category.".to_string(),
+                systemImage: "square.grid.2x2".to_string(),
+                colorName: "orange".to_string(),
+                options: cat_options,
+                multiSelect: false,
+            }],
             nsfw: true,
-            cacheDuration: Some(1800),
+            cacheDuration: Some(60),
         }
     }
 
-    /// Map a Hot Tub sort id to the API's `sort_by` value.
-    fn map_sort(sort: &str) -> &'static str {
-        match sort.trim().to_ascii_lowercase().as_str() {
-            "popular" | "trending" | "hot" | "featured" => "popular",
-            "top" | "rated" | "best" | "mostviewed" | "most_viewed" | "popular_all_time" => {
-                "popular_all_time"
-            }
-            // "new", "newest", "latest", "recent", empty, anything else
-            _ => "date_added",
-        }
-    }
-
-    /// Lowercase/space-normalize a value for tag lookups.
-    fn normalize_key(s: &str) -> String {
-        s.trim()
-            .trim_start_matches('#')
-            .replace(['_', ' '], "-")
-            .to_ascii_lowercase()
-    }
-
-    /// Resolve a user-supplied value to a known tag slug, if it matches one.
-    fn resolve_tag(value: &str) -> Option {
-        let key = Self::normalize_key(value);
+    /// Resolve the category id (from the option or a query prefix) to a route.
+    fn route_for_category(value: &str) -> Option {
+        let key = value.trim().trim_start_matches('#').to_ascii_lowercase();
         if key.is_empty() {
             return None;
         }
-        MEDIA_TAGS
+        CATEGORIES
             .iter()
-            .find(|slug| **slug == key)
-            .map(|slug| slug.to_string())
+            .find(|(id, _, _)| *id == key)
+            .map(|(_, _, route)| route.to_string())
     }
 
-    /// Pretty display title for a tag slug (e.g. `big-tits` -> `Big Tits`).
-    fn pretty_tag(slug: &str) -> String {
-        slug.split('-')
-            .map(|word| match word {
-                "dp" => "DP".to_string(),
-                "pov" => "POV".to_string(),
-                "bbw" => "BBW".to_string(),
-                "hd" => "HD".to_string(),
-                "18" => "18".to_string(),
-                other => {
-                    let mut chars = other.chars();
-                    match chars.next() {
-                        Some(first) => {
-                            first.to_uppercase().collect::() + chars.as_str()
-                        }
-                        None => String::new(),
-                    }
-                }
-            })
-            .collect::>()
-            .join(" ")
-    }
-
-    /// Resolve the fetch target from the query and the selected category option.
+    /// Decide what to fetch from the query and the selected category option.
     fn pick_target(query: Option<&str>, category: Option<&str>) -> Target {
-        // An explicitly selected category option wins.
         if let Some(cat) = category {
-            let cat = cat.trim();
-            if !cat.is_empty() && cat != "all" {
-                if let Some(tag) = Self::resolve_tag(cat) {
-                    return Target::Listing { tag: Some(tag) };
-                }
-                // Unknown but non-empty: still pass a slug through to the API.
-                return Target::Listing {
-                    tag: Some(Self::normalize_key(cat)),
-                };
+            if let Some(route) = Self::route_for_category(cat) {
+                return Target::Browse { route };
             }
         }
 
         let Some(query) = query.map(str::trim).filter(|v| !v.is_empty()) else {
-            return Target::Listing { tag: None };
+            return Target::Browse {
+                route: String::new(),
+            };
         };
 
-        // Model shortcuts browse a performer's media page.
-        for prefix in &["uploader:", "model:", "user:", "performer:"] {
-            if let Some(username) = query.strip_prefix(prefix) {
-                let username = username.trim().to_lowercase();
-                if !username.is_empty() {
-                    return Target::Model { username };
-                }
-            }
-        }
-
-        // Tag/category shortcuts route straight to a tag archive.
-        for prefix in &["tag:", "cat:", "category:"] {
+        // `cat:`/`category:` prefixes route to a known category.
+        for prefix in &["category:", "cat:"] {
             if let Some(rest) = query.strip_prefix(prefix) {
-                let slug = Self::resolve_tag(rest).unwrap_or_else(|| Self::normalize_key(rest));
-                if !slug.is_empty() {
-                    return Target::Listing { tag: Some(slug) };
+                if let Some(route) = Self::route_for_category(rest) {
+                    return Target::Browse { route };
                 }
             }
         }
 
-        // A bare keyword that exactly matches a known tag is much better served
-        // by that tag archive than by the (non-existent) media search endpoint.
-        if let Some(tag) = Self::resolve_tag(query) {
-            return Target::Listing { tag: Some(tag) };
+        // A bare keyword that exactly matches a category id goes to that feed.
+        if let Some(route) = Self::route_for_category(query) {
+            return Target::Browse { route };
         }
 
-        // No media keyword search exists; fall back to the default listing and
-        // let the server apply its client-side substring filter for quoted
-        // queries.
-        Target::Listing { tag: None }
+        Target::Search {
+            query: query.to_string(),
+        }
     }
 
-    fn build_api_url(tag: Option<&str>, sort: &str, page: u16) -> String {
+    fn build_url(&self, target: &Target, page: u16) -> String {
         let page = page.max(1);
-        match tag {
-            Some(tag) if !tag.is_empty() && tag != "all" => {
-                format!("{API_LIST}?page={page}&sort_by={sort}&tag={tag}")
+        match target {
+            Target::Browse { route } => format!("{}{}?p={}", API_BROWSE, route, page),
+            Target::Search { query } => {
+                let q = Self::slug_query(query);
+                // Search is a single connection-sorted result set (no paging).
+                format!("{}/search/{}?sortByConnection=1", API_BROWSE, q)
             }
-            _ => format!("{API_LIST}?page={page}&sort_by={sort}"),
         }
     }
 
-    fn build_model_url(&self, username: &str, page: u16) -> String {
-        if page <= 1 {
-            format!("{}/{}/media", self.url, username)
-        } else {
-            format!("{}/{}/media?page={}", self.url, username, page)
-        }
+    /// Encode a free-text query into the `/search/` path segment.
+    fn slug_query(query: &str) -> String {
+        query
+            .trim()
+            .to_ascii_lowercase()
+            .split_whitespace()
+            .collect::>()
+            .join("-")
     }
 
     fn clean_text(text: &str) -> String {
@@ -336,91 +167,103 @@ impl CamsodaProvider {
         decoded.split_whitespace().collect::>().join(" ")
     }
 
-    fn parse_created_at(value: &str) -> Option {
-        NaiveDateTime::parse_from_str(value.trim(), "%Y-%m-%dT%H:%M:%S")
-            .ok()
-            .map(|dt| dt.and_utc().timestamp())
-            .and_then(|ts| u64::try_from(ts).ok())
-    }
-
     /// Extract the JSON object from a body that may be wrapped in HTML by the
     /// Jina mirror (`
{...}
`) or returned raw. fn extract_json(body: &str) -> Option<&str> { let start = body.find('{')?; let end = body.rfind('}')?; - if end > start { - Some(&body[start..=end]) - } else { - None - } + (end > start).then(|| &body[start..=end]) } - /// Parse the `media/list` JSON response into rich `VideoItem`s. - fn parse_api_items(body: &str, tag: Option<&str>) -> Result> { + /// Parse the `browse/react` JSON (top-level `userList`) into live + /// `VideoItem`s. Done with `serde_json::Value` so a stray field shape never + /// aborts the whole page (mirrors the chaturbate provider). + fn parse_items(body: &str) -> Result> { let json = Self::extract_json(body) .ok_or_else(|| Error::from("no JSON object found in response".to_string()))?; - let parsed: ApiResponse = serde_json::from_str(json) - .map_err(|e| Error::from(format!("media/list JSON decode failed: {e}")))?; + let root: serde_json::Value = serde_json::from_str(json) + .map_err(|e| Error::from(format!("browse JSON decode failed: {e}")))?; - let mut items = Vec::with_capacity(parsed.data.len()); - let mut seen: HashSet = HashSet::new(); + let users = root + .get("userList") + .and_then(|v| v.as_array()) + .ok_or_else(|| Error::from("missing userList array".to_string()))?; - for media in parsed.data { - if matches!(media.is_video, Some(false)) { + let mut items = Vec::with_capacity(users.len()); + for user in users { + let Some(username) = user.get("username").and_then(|v| v.as_str()) else { + continue; + }; + if username.is_empty() { continue; } - if !seen.insert(media.id) { - continue; - } - let username = media.username.unwrap_or_default(); - let slug = media.slug.unwrap_or_default(); - if username.is_empty() || slug.is_empty() { - continue; - } - let video_id = media.id.to_string(); - let title = media - .name - .as_deref() + // Skip offline performers; everything else in the browse feed is live. + let status = user.get("status").and_then(|v| v.as_str()).unwrap_or(""); + if status.eq_ignore_ascii_case("offline") { + continue; + } + + let display = user + .get("displayName") + .and_then(|v| v.as_str()) + .filter(|s| !s.is_empty()) + .unwrap_or(username); + + let subject = user + .get("subjectText") + .and_then(|v| v.as_str()) .map(Self::clean_text) - .filter(|t| !t.is_empty()) - .unwrap_or_else(|| format!("CamSoda video {video_id}")); + .filter(|s| !s.is_empty()); + let title = subject.unwrap_or_else(|| display.to_string()); - let duration = media - .duration - .and_then(|d| u32::try_from(d).ok()) + let thumb = user + .get("thumbUrl") + .and_then(|v| v.as_str()) + .filter(|s| !s.is_empty()) + .or_else(|| user.get("offlinePictureUrl").and_then(|v| v.as_str())) + .unwrap_or("") + .to_string(); + + // connectionCount is usually a string ("34") but tolerate a number. + let views = user + .get("connectionCount") + .map(|v| match v { + serde_json::Value::String(s) => s.parse::().unwrap_or(0), + serde_json::Value::Number(n) => n.as_u64().unwrap_or(0) as u32, + _ => 0, + }) .unwrap_or(0); - let thumb = media.thumbnail_url.unwrap_or_default(); - let page_url = format!("{BASE_URL}/{username}/media/{slug}/{video_id}"); + let room_url = format!("{BASE_URL}/{username}"); - let mut item = VideoItem::new( - video_id, - title, - page_url, - CHANNEL_ID.to_string(), - thumb, - duration, - ); - - let uploader = media - .user_display_name - .as_deref() - .map(Self::clean_text) - .filter(|u| !u.is_empty()) - .unwrap_or_else(|| username.clone()); - item.uploader = Some(uploader); - item.uploaderUrl = Some(format!("{BASE_URL}/{username}/media")); - item.uploaderId = Some(format!("{CHANNEL_ID}:{username}")); - - if let Some(ts) = media.created_at.as_deref().and_then(Self::parse_created_at) { - item.uploadedAt = Some(ts); + let mut tags = Vec::new(); + if status.eq_ignore_ascii_case("private") { + tags.push("Private Show".to_string()); + } + if user + .get("vr") + .and_then(|v| v.as_bool()) + .unwrap_or(false) + { + tags.push("VR".to_string()); } - if let Some(tag) = tag { - if !tag.is_empty() && tag != "all" { - item.tags = Some(vec![Self::pretty_tag(tag)]); - } + let mut item = VideoItem::new( + username.to_string(), + title, + room_url.clone(), + CHANNEL_ID.to_string(), + thumb, + 0, + ) + .is_live(true) + .views(views) + .uploader(display.to_string()) + .uploader_url(room_url); + item.uploaderId = Some(format!("{CHANNEL_ID}:{username}")); + if !tags.is_empty() { + item.tags = Some(tags); } items.push(item); @@ -429,178 +272,57 @@ impl CamsodaProvider { Ok(items) } - /// Parse video cards from the HTML of a CamSoda model media page. - /// - /// Each card is an anchor linking to `/{username}/media/{slug}/{id}` with a - /// `media-item-module__title` span and a `media-item-module__subtitle` span - /// holding `by UPLOADER (MM:SS)`. - fn parse_html_items(html: &str) -> Vec { - let document = Html::parse_document(html); - - let anchor_sel = match Selector::parse(r#"a[href]"#) { - Ok(s) => s, - Err(_) => return vec![], - }; - let title_sel = match Selector::parse(r#"[class*="media-item-module__title"]"#) { - Ok(s) => s, - Err(_) => return vec![], - }; - let subtitle_sel = match Selector::parse(r#"[class*="media-item-module__subtitle"]"#) { - Ok(s) => s, - Err(_) => return vec![], - }; - let img_sel = match Selector::parse(r#"img[src]"#) { - Ok(s) => s, - Err(_) => return vec![], - }; - - let sub_re = - match regex::Regex::new(r"(?i)^by\s+(.+?)\s+\((\d{1,2}:\d{2}(?::\d{2})?)\)\s*$") { - Ok(r) => r, - Err(_) => return vec![], - }; - let href_re = match regex::Regex::new(r"^/([^/]+)/media/([^/]+)/(\d+)$") { - Ok(r) => r, - Err(_) => return vec![], - }; - - let mut items: Vec = Vec::new(); - let mut seen_ids: HashSet = HashSet::new(); - - for anchor in document.select(&anchor_sel) { - let href = match anchor.value().attr("href") { - Some(h) => h, - None => continue, - }; - let caps = match href_re.captures(href) { - Some(c) => c, - None => continue, - }; - - let username = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string(); - let slug = caps.get(2).map(|m| m.as_str()).unwrap_or("").to_string(); - let video_id = caps.get(3).map(|m| m.as_str()).unwrap_or("").to_string(); - if video_id.is_empty() || username.is_empty() { - continue; - } - if !seen_ids.insert(video_id.clone()) { - continue; - } - - let title = anchor - .select(&title_sel) - .next() - .map(|el| el.text().collect::().trim().to_string()) - .unwrap_or_default(); - let title = if title.is_empty() { - format!("CamSoda video {video_id}") - } else { - title - }; - - let subtitle = anchor - .select(&subtitle_sel) - .next() - .map(|el| el.text().collect::().trim().to_string()) - .unwrap_or_default(); - - let (uploader, duration) = if let Some(sc) = sub_re.captures(&subtitle) { - let u = sc - .get(1) - .map(|m| m.as_str().trim().to_string()) - .unwrap_or_default(); - let d = sc - .get(2) - .and_then(|m| parse_time_to_seconds(m.as_str())) - .and_then(|s| u32::try_from(s).ok()) - .unwrap_or(0); - (if u.is_empty() { None } else { Some(u) }, d) - } else { - (None, 0) - }; - - let thumb = anchor - .select(&img_sel) - .filter_map(|img| img.value().attr("src")) - .find(|src| src.contains("media-secure.camsoda.com")) - .or_else(|| { - anchor - .select(&img_sel) - .filter_map(|img| img.value().attr("src")) - .find(|src| src.contains("livemediahost.com")) - }) - .unwrap_or("") - .to_string(); - - let page_url = format!("{BASE_URL}/{username}/media/{slug}/{video_id}"); - - let mut item = VideoItem::new( - video_id, - title, - page_url, - CHANNEL_ID.to_string(), - thumb, - duration, - ); - item.uploader = uploader; - item.uploaderUrl = Some(format!("{BASE_URL}/{username}/media")); - item.uploaderId = Some(format!("{CHANNEL_ID}:{username}")); - - items.push(item); - } - - items - } - - async fn fetch_listing( + async fn fetch( &self, - tag: Option<&str>, - sort: &str, + target: &Target, page: u16, + cache: &VideoCache, options: &ServerOptions, ) -> Result> { - let url = Self::build_api_url(tag, sort, page); - let mut requester = requester_or_default(options, CHANNEL_ID, "fetch_listing"); - let text = requester - .get(&url, None) - .await - .map_err(|e| Error::from(format!("fetch failed for {url}: {e}")))?; + let url = self.build_url(target, page); + + // Short cache (the feed changes constantly and Jina rate-limits hard). + let stale = match cache.get(&url) { + Some((time, items)) => { + if time.elapsed().unwrap_or_default().as_secs() < 60 { + return Ok(items.clone()); + } + items.clone() + } + None => vec![], + }; + + let mut requester = requester_or_default(options, CHANNEL_ID, "fetch"); + let text = match requester.get(&url, None).await { + Ok(t) => t, + Err(e) => { + report_provider_error(CHANNEL_ID, "fetch.request", &format!("url={url}; error={e}")) + .await; + return Ok(stale); + } + }; if text.contains("cf-browser-verification") || text.contains("cf-chl") || text.contains("Just a moment") { - return Err(Error::from( - "cloudflare challenge page returned".to_string(), - )); + report_provider_error(CHANNEL_ID, "fetch.cloudflare", &format!("url={url}")).await; + return Ok(stale); } - Self::parse_api_items(&text, tag) - } - - async fn fetch_model( - &self, - username: &str, - page: u16, - options: &ServerOptions, - ) -> Result> { - let url = self.build_model_url(username, page); - let mut requester = requester_or_default(options, CHANNEL_ID, "fetch_model"); - let text = requester - .get(&url, None) - .await - .map_err(|e| Error::from(format!("fetch failed for {url}: {e}")))?; - - if text.contains("cf-browser-verification") - || text.contains("cf-chl") - || text.contains("Just a moment") - { - return Err(Error::from( - "cloudflare challenge page returned".to_string(), - )); + match Self::parse_items(&text) { + Ok(items) if !items.is_empty() => { + cache.remove(&url); + cache.insert(url, items.clone()); + Ok(items) + } + Ok(_) => Ok(stale), + Err(e) => { + report_provider_error(CHANNEL_ID, "fetch.parse", &format!("url={url}; error={e}")) + .await; + Ok(stale) + } } - - Ok(Self::parse_html_items(&text)) } } @@ -610,13 +332,12 @@ impl Provider for CamsodaProvider { &self, cache: VideoCache, pool: DbPool, - sort: String, + _sort: String, query: Option, page: String, per_page: String, options: ServerOptions, ) -> Vec { - let _ = cache; let _ = pool; let _ = per_page; @@ -633,17 +354,8 @@ impl Provider for CamsodaProvider { .or(options.category.as_deref()); let target = Self::pick_target(normalized_query.as_deref(), category); - let sort_value = Self::map_sort(&sort); - let result = match &target { - Target::Listing { tag } => { - self.fetch_listing(tag.as_deref(), sort_value, page, &options) - .await - } - Target::Model { username } => self.fetch_model(username, page, &options).await, - }; - - match result { + match self.fetch(&target, page, &cache, &options).await { Ok(items) => items, Err(error) => { report_provider_error(CHANNEL_ID, "get_videos", &error.to_string()).await; @@ -661,128 +373,81 @@ impl Provider for CamsodaProvider { mod tests { use super::*; - fn sample_api_body() -> String { + fn sample_browse() -> String { // Mimics the Jina mirror response: JSON wrapped in a
 block.
-        r#"
{"result":true,"data":[
-{"id":15032118,"user_id":18777219,"type_id":2,"name":"Extreme Fuck & Squirt","slug":"fuck-machine-squirt-surprise","token_price":555,"created_at":"2025-06-07T10:13:35","duration":2711,"is_video":true,"thumbnail_url":"https://media-secure.camsoda.com/user/videos/15032118/15032118_1749294653.thumb.jpg","type_name":"Video","user_display_name":"Lola Bunniii","username":"lolabunniii"},
-{"id":17009049,"user_id":1,"type_id":2,"name":"FIRST IR BG SHOW","slug":"first-ir-bg-show","token_price":0,"created_at":"2026-02-12T08:00:00","duration":2654,"is_video":true,"thumbnail_url":"https://media-secure.camsoda.com/user/videos/17009049/17009049.thumb.webp","type_name":"Video","user_display_name":"Coco Dethick","username":"coco-dethick"},
-{"id":99,"user_id":2,"type_id":1,"name":"a picture","slug":"pic","is_video":false,"username":"someone"}
-]}
"#.to_string() + r#"
{"perPageCount":60,"totalCount":98,"userList":[
+{"id":1,"username":"theowonder","displayName":"Theo Wonder","connectionCount":"69","status":"online","subjectText":"hey guys & girls","thumbUrl":"https://media.livemediahost.com/thumbs/199/theowonder.jpg?cb=1","streamName":"cam_obs/theowonder-flu","vr":false},
+{"id":2,"username":"miavr","displayName":"Mia","connectionCount":139,"status":"private","subjectText":"","thumbUrl":"https://media.livemediahost.com/thumbs/197/miavr.jpg","vr":true},
+{"id":3,"username":"gone","displayName":"Gone","connectionCount":"0","status":"offline","subjectText":"bye","thumbUrl":""}
+]}
"# + .to_string() } #[test] - fn parses_api_items() { - let items = CamsodaProvider::parse_api_items(&sample_api_body(), None).unwrap(); - assert_eq!(items.len(), 2, "non-video item should be skipped"); + fn parses_live_items_and_skips_offline() { + let items = CamsodaProvider::parse_items(&sample_browse()).unwrap(); + assert_eq!(items.len(), 2, "offline performer should be skipped"); let a = &items[0]; - assert_eq!(a.id, "15032118"); - assert_eq!(a.title, "Extreme Fuck & Squirt", "html entity should decode"); - assert_eq!(a.duration, 2711); - assert_eq!( - a.url, - "https://www.camsoda.com/lolabunniii/media/fuck-machine-squirt-surprise/15032118" - ); - assert_eq!(a.uploader.as_deref(), Some("Lola Bunniii")); - assert_eq!(a.uploaderId.as_deref(), Some("camsoda:lolabunniii")); - assert!(a.thumb.contains("media-secure.camsoda.com")); - assert!(a.uploadedAt.is_some(), "created_at should parse"); + assert_eq!(a.id, "theowonder"); + assert_eq!(a.title, "hey guys & girls", "subject html-decoded"); + assert_eq!(a.url, "https://www.camsoda.com/theowonder"); + assert!(a.isLive); + assert_eq!(a.views, Some(69)); + assert_eq!(a.uploader.as_deref(), Some("Theo Wonder")); + assert_eq!(a.uploaderId.as_deref(), Some("camsoda:theowonder")); + assert!(a.thumb.contains("media.livemediahost.com")); let b = &items[1]; - assert_eq!(b.id, "17009049"); - assert_eq!(b.uploader.as_deref(), Some("Coco Dethick")); + assert_eq!(b.id, "miavr"); + // numeric connectionCount tolerated + assert_eq!(b.views, Some(139)); + // empty subject falls back to display name + assert_eq!(b.title, "Mia"); + // private + vr surface as tags + let tags = b.tags.clone().unwrap_or_default(); + assert!(tags.contains(&"Private Show".to_string())); + assert!(tags.contains(&"VR".to_string())); } #[test] - fn tags_attached_when_filtering() { - let items = - CamsodaProvider::parse_api_items(&sample_api_body(), Some("big-tits")).unwrap(); - assert_eq!(items[0].tags.as_deref(), Some(["Big Tits".to_string()].as_slice())); - } - - #[test] - fn maps_sort_values() { - assert_eq!(CamsodaProvider::map_sort("new"), "date_added"); - assert_eq!(CamsodaProvider::map_sort(""), "date_added"); - assert_eq!(CamsodaProvider::map_sort("popular"), "popular"); - assert_eq!(CamsodaProvider::map_sort("top"), "popular_all_time"); - assert_eq!(CamsodaProvider::map_sort("rated"), "popular_all_time"); - } - - #[test] - fn picks_target_correctly() { + fn picks_target() { assert_eq!( CamsodaProvider::pick_target(None, None), - Target::Listing { tag: None } + Target::Browse { route: String::new() } ); - // bare non-tag keyword -> default listing (server substring-filters) assert_eq!( - CamsodaProvider::pick_target(Some("some random phrase"), None), - Target::Listing { tag: None } + CamsodaProvider::pick_target(None, Some("girls")), + Target::Browse { route: "/girls".to_string() } ); - // bare keyword matching a known tag -> tag archive assert_eq!( - CamsodaProvider::pick_target(Some("blowjob"), None), - Target::Listing { - tag: Some("blowjob".to_string()) - } + CamsodaProvider::pick_target(Some("cat:trans"), None), + Target::Browse { route: "/trans".to_string() } ); - // tag: prefix assert_eq!( - CamsodaProvider::pick_target(Some("tag:big tits"), None), - Target::Listing { - tag: Some("big-tits".to_string()) - } + CamsodaProvider::pick_target(Some("voyeur-cams"), None), + Target::Browse { route: "/voyeur-cams".to_string() } ); - // category option selected assert_eq!( - CamsodaProvider::pick_target(None, Some("latina")), - Target::Listing { - tag: Some("latina".to_string()) - } - ); - // model shortcut - assert_eq!( - CamsodaProvider::pick_target(Some("model:katt-leya"), None), - Target::Model { - username: "katt-leya".to_string() - } + CamsodaProvider::pick_target(Some("blonde teen"), None), + Target::Search { query: "blonde teen".to_string() } ); } #[test] - fn builds_api_urls() { + fn builds_urls() { + let p = CamsodaProvider::new(); assert_eq!( - CamsodaProvider::build_api_url(None, "date_added", 1), - "https://www.camsoda.com/api/v1/media/list/video?page=1&sort_by=date_added" + p.build_url(&Target::Browse { route: String::new() }, 1), + "https://www.camsoda.com/api/v1/browse/react?p=1" ); assert_eq!( - CamsodaProvider::build_api_url(None, "date_added", 3), - "https://www.camsoda.com/api/v1/media/list/video?page=3&sort_by=date_added" + p.build_url(&Target::Browse { route: "/girls".to_string() }, 3), + "https://www.camsoda.com/api/v1/browse/react/girls?p=3" ); assert_eq!( - CamsodaProvider::build_api_url(Some("big-tits"), "popular", 2), - "https://www.camsoda.com/api/v1/media/list/video?page=2&sort_by=popular&tag=big-tits" + p.build_url(&Target::Search { query: "big boobs".to_string() }, 1), + "https://www.camsoda.com/api/v1/browse/react/search/big-boobs?sortByConnection=1" ); } - - #[test] - fn pretty_tag_titles() { - assert_eq!(CamsodaProvider::pretty_tag("big-tits"), "Big Tits"); - assert_eq!(CamsodaProvider::pretty_tag("pov"), "POV"); - assert_eq!(CamsodaProvider::pretty_tag("teen-18"), "Teen 18"); - } - - #[test] - fn parses_model_html() { - let html = r#" - Torso rideby jazzyj (24:35) - -"#; - let items = CamsodaProvider::parse_html_items(html); - assert_eq!(items.len(), 1); - assert_eq!(items[0].id, "16984249"); - assert_eq!(items[0].uploader.as_deref(), Some("jazzyj")); - assert_eq!(items[0].duration, 24 * 60 + 35); - } } diff --git a/src/videos.rs b/src/videos.rs index 3444332..7fe2598 100644 --- a/src/videos.rs +++ b/src/videos.rs @@ -194,7 +194,11 @@ impl VideoItem { self } - #[cfg(any(not(hottub_single_provider), hottub_provider = "chaturbate"))] + #[cfg(any( + not(hottub_single_provider), + hottub_provider = "chaturbate", + hottub_provider = "camsoda" + ))] pub fn is_live(mut self, is_live: bool) -> Self { self.isLive = is_live; self