fyptt

2026-06-19 08:25:33 +00:00
parent 674ea99b0d
commit 1bd06db894
3 changed files with 528 additions and 0 deletions
--- a/build.rs
+++ b/build.rs
@@ -366,6 +366,11 @@ const PROVIDERS: &[ProviderDef] = &[
        module: "camsoda",
        ty: "CamsodaProvider",
    },
+    ProviderDef {
+        id: "fyptt",
+        module: "fyptt",
+        ty: "FypttProvider",
+    },
 ];

 fn main() {
--- a/docs/provider-catalog.md
+++ b/docs/provider-catalog.md
@@ -16,6 +16,7 @@ This is the current implementation inventory as of this snapshot of the repo. Us
 | `erome` | `amateur-homemade` | no | no | HTML album scraper with hot/new feeds, keyword search, and uploader-slug shortcuts (`uploader:<name>`). |
 | `fikfap` | `tiktok` | yes | yes (thumbs only) | JSON-API provider for fikfap.com (TikTok-style swipe short clips); anonymous auth via a client-generated `Authorization-Anonymous` UUID header (no real login needed); listing via `GET api.fikfap.com/posts?sort=new\|trending\|random&amount=N&afterId=<lastPostId>` (cursor pagination — page N costs N sequential requests); search via `GET search?q=` (single fixed-size batch, no pagination — page 2+ returns empty); hashtag feeds via `GET hashtags/label/{label}/posts` and creator feeds via `GET profile/username/{user}/posts`, both also cursor-paginated; `tag:`/`hashtag:`/`#` and `user:`/`uploader:` query prefixes route directly; `categories` option exposes a small curated static hashtag list (no full catalog endpoint exists anonymously); `video.url` is the `fikfap.com/post/{id}` page (a client-rendered SPA, not yt-dlp-resolvable on its own); `videoStreamUrl` from the JSON response is sent directly as `formats[0].url` (signed Bunny CDN HLS `.m3u8`, ~24h token expiry) with `httpHeaders: {Referer: https://fikfap.com/}` — Hot Tub clients apply a format's `http_headers` across the whole HLS playback session (manifest, sub-playlists, and segments), so no proxying of the media itself is needed; thumbnails have no per-field header mechanism, so they're proxied via `/proxy/fikfap-thumb/...` to inject the same Referer; `get_uploader` implemented (`fikfap:<username>` IDs) using `GET profile/username/{user}`. |
 | `freepornvideosxxx` | `studio-network` | no | no | Studio-style scraper. |
+| `fyptt` | `tiktok` | no | no | HTML scraper for fyptt.to (Beaver Builder/WordPress short-form TikTok-style vertical porn); card selector `.fl-post-grid-post[class*="post-ID"]` with `category-{slug}` CSS class doubling as both listing tag and category-archive route; latest feed `/` (page N: `/page/N/`), search `/?s=query` (page N: `/page/N/?s=query`), category archives at bare top-level slugs like `/tiktok-ass/` (12 hardcoded categories exposed via `categories` option, also matched from free-text `cat:`/`category:` query prefixes or bare category-title queries); per-item enrichment fetches the detail page for the JSON-LD `embedURL` (one of three on-site player endpoints: `fypttstr.php`, `fypttjwstr.php`, or `fypttjwstrhls.php`) and `datePublished`, then fetches that embed URL to extract the actual signed `stream.fyptt.to` mp4 or `/hls/*.m3u8` URL (token expires ~2h, no Referer required) for `formats`; thumbnails (`fyptt.to/wp-content/uploads/...webp`) need no proxy; no duration metadata available on listing or detail pages (set to 0); no real uploader/model identity (the `girl-{slug}` CSS class is cosmetic only, not a linkable archive) so `/api/uploaders` is not implemented; `video.url` is the detail page URL (not yt-dlp resolvable directly — the player is sandboxed-iframe-only) so `formats` are populated instead; no proxy needed. |
 | `freeuseporn` | `fetish-kink` | no | no | Fetish archive pattern. |
 | `hanime` | `hentai-animation` | no | yes | Uses proxied CDN/thumb handling. |
 | `heavyfetish` | `fetish-kink` | no | no | Direct media handling. |
--- a/src/providers/fyptt.rs
+++ b/src/providers/fyptt.rs
@@ -0,0 +1,522 @@
+use crate::DbPool;
+use crate::api::ClientVersion;
+use crate::providers::{Provider, report_provider_error, report_provider_error_background, requester_or_default};
+use crate::status::*;
+use crate::util::cache::VideoCache;
+use crate::videos::{ServerOptions, VideoFormat, VideoItem};
+use async_trait::async_trait;
+use chrono::DateTime;
+use error_chain::error_chain;
+use futures::stream::{self, StreamExt};
+use ntex::http::Version;
+use regex::Regex;
+use scraper::{Html, Selector};
+
+pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
+    crate::providers::ProviderChannelMetadata {
+        group_id: "tiktok",
+        tags: &["tiktok", "vertical", "shorts"],
+    };
+
+const BASE_URL: &str = "https://fyptt.to";
+const CHANNEL_ID: &str = "fyptt";
+const DEFAULT_PER_PAGE: usize = 24;
+const ENRICH_CONCURRENCY: usize = 6;
+
+const CATEGORIES: &[(&str, &str)] = &[
+    ("tiktok-nudes", "Nudes"),
+    ("tiktok-porn", "TikTok"),
+    ("tiktok-boobs", "Boobs"),
+    ("instagram-porn", "Instagram"),
+    ("tiktok-sex", "Sex"),
+    ("nsfw-tiktok", "NSFW"),
+    ("tiktok-xxx", "XXX"),
+    ("tiktok-ass", "Ass"),
+    ("tiktok-pussy", "Pussy"),
+    ("tiktok-live", "Live"),
+    ("tiktok-thots", "Thots"),
+    ("sexy-tiktok", "Sexy"),
+];
+
+error_chain! {
+    foreign_links {
+        Io(std::io::Error);
+        Url(url::ParseError);
+    }
+    errors {
+        Parse(msg: String) {
+            description("parse error")
+            display("parse error: {}", msg)
+        }
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct FypttProvider;
+
+#[derive(Debug, Clone)]
+enum Target {
+    Latest,
+    Search { query: String },
+    Category { slug: String },
+}
+
+#[derive(Debug, Clone)]
+struct CardStub {
+    id: String,
+    title: String,
+    url: String,
+    thumb: String,
+    category_slug: Option<String>,
+}
+
+impl FypttProvider {
+    pub fn new() -> Self {
+        Self
+    }
+
+    fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
+        Channel {
+            id: CHANNEL_ID.to_string(),
+            name: "FYPTT".to_string(),
+            description: "FYPTT.to short-form vertical TikTok/Reels/Shorts-style porn videos with category browsing and direct playback.".to_string(),
+            premium: false,
+            favicon: "https://www.google.com/s2/favicons?sz=64&domain=fyptt.to".to_string(),
+            status: "active".to_string(),
+            categories: vec![],
+            options: vec![ChannelOption {
+                id: "categories".to_string(),
+                title: "Category".to_string(),
+                description: "Browse FYPTT by content category.".to_string(),
+                systemImage: "square.grid.2x2".to_string(),
+                colorName: "purple".to_string(),
+                options: CATEGORIES
+                    .iter()
+                    .map(|(slug, title)| FilterOption {
+                        id: slug.to_string(),
+                        title: title.to_string(),
+                    })
+                    .collect(),
+                multiSelect: false,
+            }],
+            nsfw: true,
+            cacheDuration: Some(1800),
+        }
+    }
+
+    fn selector(value: &str) -> Result<Selector> {
+        Selector::parse(value)
+            .map_err(|error| Error::from(format!("selector `{value}` failed: {error}")))
+    }
+
+    fn category_slug_for(query: &str) -> Option<&'static str> {
+        let normalized = query.trim().to_lowercase();
+        CATEGORIES
+            .iter()
+            .find(|(slug, title)| {
+                slug.eq_ignore_ascii_case(&normalized) || title.eq_ignore_ascii_case(&normalized)
+            })
+            .map(|(slug, _)| *slug)
+    }
+
+    fn pick_target(query: Option<&str>) -> Target {
+        if let Some(query) = query {
+            let q = query.trim();
+            if !q.is_empty() {
+                if let Some(slug) = q.strip_prefix("cat:").or_else(|| q.strip_prefix("category:")) {
+                    if let Some(known) = Self::category_slug_for(slug) {
+                        return Target::Category { slug: known.to_string() };
+                    }
+                    return Target::Category { slug: slug.trim().to_string() };
+                }
+                if let Some(slug) = Self::category_slug_for(q) {
+                    return Target::Category { slug: slug.to_string() };
+                }
+                return Target::Search { query: q.to_string() };
+            }
+        }
+
+        Target::Latest
+    }
+
+    fn build_listing_url(target: &Target, page: u16) -> String {
+        let page = page.max(1);
+        match target {
+            Target::Latest => {
+                if page <= 1 {
+                    format!("{BASE_URL}/")
+                } else {
+                    format!("{BASE_URL}/page/{page}/")
+                }
+            }
+            Target::Search { query } => {
+                let encoded: String = url::form_urlencoded::byte_serialize(query.as_bytes()).collect();
+                if page <= 1 {
+                    format!("{BASE_URL}/?s={encoded}")
+                } else {
+                    format!("{BASE_URL}/page/{page}/?s={encoded}")
+                }
+            }
+            Target::Category { slug } => {
+                if page <= 1 {
+                    format!("{BASE_URL}/{slug}/")
+                } else {
+                    format!("{BASE_URL}/{slug}/page/{page}/")
+                }
+            }
+        }
+    }
+
+    fn html_headers(referer: &str) -> Vec<(String, String)> {
+        vec![
+            ("User-Agent".to_string(), "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36".to_string()),
+            ("Referer".to_string(), referer.to_string()),
+        ]
+    }
+
+    async fn fetch_html(options: &ServerOptions, url: &str, referer: &str) -> Result<String> {
+        let mut requester = requester_or_default(options, CHANNEL_ID, "fetch_html");
+        requester
+            .get_with_headers(url, Self::html_headers(referer), Some(Version::HTTP_11))
+            .await
+            .map_err(|error| Error::from(format!("request failed for {url}: {error}")))
+    }
+
+    fn parse_listing_page(html: &str) -> Result<Vec<CardStub>> {
+        let document = Html::parse_document(html);
+        let card_sel = Self::selector(".fl-post-grid-post")?;
+        let link_sel = Self::selector(".fl-post-grid-title a[href]")?;
+        let img_sel = Self::selector("img[src]")?;
+
+        let class_regex = Regex::new(r"category-([a-z0-9-]+)")
+            .map_err(|error| Error::from(format!("regex failed: {error}")))?;
+
+        let mut stubs = Vec::new();
+        let mut seen_ids = std::collections::HashSet::new();
+
+        for card in document.select(&card_sel) {
+            let class_attr = card.value().attr("class").unwrap_or_default();
+
+            let id = class_attr
+                .split_whitespace()
+                .find_map(|c| c.strip_prefix("post-"))
+                .map(str::to_string)
+                .unwrap_or_default();
+            if id.is_empty() || !seen_ids.insert(id.clone()) {
+                continue;
+            }
+
+            let link = card.select(&link_sel).next();
+            let url = link
+                .and_then(|a| a.value().attr("href"))
+                .map(str::to_string)
+                .unwrap_or_default();
+            if url.is_empty() {
+                continue;
+            }
+
+            let title = link
+                .map(|a| a.text().collect::<Vec<_>>().join(""))
+                .map(|t| t.trim().to_string())
+                .filter(|t| !t.is_empty())
+                .unwrap_or_else(|| id.clone());
+
+            let thumb = card
+                .select(&img_sel)
+                .next()
+                .and_then(|img| img.value().attr("src"))
+                .map(str::to_string)
+                .unwrap_or_default();
+
+            let category_slug = class_regex
+                .captures(class_attr)
+                .and_then(|caps| caps.get(1))
+                .map(|m| m.as_str().to_string());
+
+            stubs.push(CardStub {
+                id,
+                title,
+                url,
+                thumb,
+                category_slug,
+            });
+        }
+
+        Ok(stubs)
+    }
+
+    async fn fetch_listing(url: &str, options: &ServerOptions) -> Result<Vec<CardStub>> {
+        let html = Self::fetch_html(options, url, BASE_URL).await?;
+        Self::parse_listing_page(&html)
+    }
+
+    fn extract_embed_url(html: &str) -> Option<String> {
+        let idx = html.find("\"embedURL\":\"")?;
+        let rest = &html[idx + "\"embedURL\":\"".len()..];
+        let end = rest.find('"')?;
+        Some(rest[..end].replace("\\/", "/"))
+    }
+
+    fn extract_uploaded_at(html: &str) -> Option<u64> {
+        let idx = html.find("\"datePublished\":\"")?;
+        let rest = &html[idx + "\"datePublished\":\"".len()..];
+        let end = rest.find('"')?;
+        let raw = rest[..end].replace("\\/", "/");
+        DateTime::parse_from_rfc3339(&raw)
+            .ok()
+            .map(|dt| dt.timestamp() as u64)
+    }
+
+    fn extract_stream_url(html: &str) -> Option<String> {
+        let idx = html.find("https://stream.fyptt.to/")?;
+        let rest = &html[idx..];
+        let end = rest
+            .find(|c: char| c == '"' || c == '\'')
+            .unwrap_or(rest.len());
+        Some(rest[..end].to_string())
+    }
+
+    fn category_title(slug: &str) -> Option<&'static str> {
+        CATEGORIES
+            .iter()
+            .find(|(s, _)| *s == slug)
+            .map(|(_, title)| *title)
+    }
+
+    async fn enrich_card(stub: CardStub, options: &ServerOptions) -> Result<VideoItem> {
+        let detail_html = Self::fetch_html(options, &stub.url, BASE_URL).await?;
+
+        let embed_url = Self::extract_embed_url(&detail_html)
+            .ok_or_else(|| Error::from(format!("no embedURL found in {}", stub.url)))?;
+        let uploaded_at = Self::extract_uploaded_at(&detail_html);
+
+        let player_html = Self::fetch_html(options, &embed_url, &stub.url).await?;
+        let stream_url = Self::extract_stream_url(&player_html)
+            .ok_or_else(|| Error::from(format!("no stream url found via {embed_url}")))?;
+
+        let format = if stream_url.contains(".m3u8") {
+            VideoFormat::m3u8(stream_url, "auto".to_string(), "m3u8".to_string())
+        } else {
+            VideoFormat::new(stream_url, "auto".to_string(), "mp4".to_string())
+        };
+
+        let mut item = VideoItem::new(
+            stub.id,
+            stub.title,
+            stub.url,
+            CHANNEL_ID.to_string(),
+            stub.thumb,
+            0,
+        );
+
+        item.formats = Some(vec![format]);
+        item.uploadedAt = uploaded_at;
+        item.aspectRatio = Some(9.0 / 16.0);
+
+        if let Some(tag) = stub
+            .category_slug
+            .as_deref()
+            .and_then(Self::category_title)
+        {
+            item.tags = Some(vec![tag.to_string()]);
+        }
+
+        Ok(item)
+    }
+
+    async fn fetch_page(
+        target: Target,
+        page: u16,
+        per_page: usize,
+        options: &ServerOptions,
+    ) -> Result<Vec<VideoItem>> {
+        let url = Self::build_listing_url(&target, page);
+        let stubs = Self::fetch_listing(&url, options).await?;
+
+        let limited: Vec<_> = stubs.into_iter().take(per_page).collect();
+        let options = options.clone();
+
+        let items = stream::iter(limited.into_iter().map(|stub| {
+            let options = options.clone();
+            async move {
+                match Self::enrich_card(stub, &options).await {
+                    Ok(item) => Some(item),
+                    Err(error) => {
+                        report_provider_error_background(
+                            CHANNEL_ID,
+                            "fetch_page.enrich_card",
+                            &error.to_string(),
+                        );
+                        None
+                    }
+                }
+            }
+        }))
+        .buffer_unordered(ENRICH_CONCURRENCY)
+        .filter_map(async move |v| v)
+        .collect::<Vec<_>>()
+        .await;
+
+        Ok(items)
+    }
+}
+
+#[async_trait]
+impl Provider for FypttProvider {
+    async fn get_videos(
+        &self,
+        _cache: VideoCache,
+        _pool: DbPool,
+        _sort: String,
+        query: Option<String>,
+        page: String,
+        per_page: String,
+        options: ServerOptions,
+    ) -> Vec<VideoItem> {
+        let page = page.parse::<u16>().unwrap_or(1).max(1);
+        let per_page = per_page
+            .parse::<usize>()
+            .unwrap_or(DEFAULT_PER_PAGE)
+            .clamp(1, 48);
+
+        let normalized_query = query
+            .as_deref()
+            .map(str::trim)
+            .filter(|q| !q.is_empty())
+            .map(ToOwned::to_owned);
+
+        let category_option = options
+            .categories
+            .as_deref()
+            .map(str::trim)
+            .filter(|c| !c.is_empty());
+
+        let target = match category_option {
+            Some(cat) => match Self::category_slug_for(cat) {
+                Some(slug) => Target::Category { slug: slug.to_string() },
+                None => Target::Category { slug: cat.to_string() },
+            },
+            None => Self::pick_target(normalized_query.as_deref()),
+        };
+
+        match Self::fetch_page(target, page, per_page, &options).await {
+            Ok(items) => items,
+            Err(error) => {
+                report_provider_error(CHANNEL_ID, "get_videos", &error.to_string()).await;
+                vec![]
+            }
+        }
+    }
+
+    fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
+        Some(self.build_channel(clientversion))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn builds_listing_urls() {
+        assert_eq!(
+            FypttProvider::build_listing_url(&Target::Latest, 1),
+            "https://fyptt.to/"
+        );
+        assert_eq!(
+            FypttProvider::build_listing_url(&Target::Latest, 2),
+            "https://fyptt.to/page/2/"
+        );
+        assert_eq!(
+            FypttProvider::build_listing_url(
+                &Target::Search { query: "thot ass".to_string() },
+                1
+            ),
+            "https://fyptt.to/?s=thot+ass"
+        );
+        assert_eq!(
+            FypttProvider::build_listing_url(
+                &Target::Search { query: "thot".to_string() },
+                2
+            ),
+            "https://fyptt.to/page/2/?s=thot"
+        );
+        assert_eq!(
+            FypttProvider::build_listing_url(&Target::Category { slug: "tiktok-ass".to_string() }, 1),
+            "https://fyptt.to/tiktok-ass/"
+        );
+        assert_eq!(
+            FypttProvider::build_listing_url(&Target::Category { slug: "tiktok-ass".to_string() }, 2),
+            "https://fyptt.to/tiktok-ass/page/2/"
+        );
+    }
+
+    #[test]
+    fn picks_category_target_from_prefix() {
+        match FypttProvider::pick_target(Some("cat:tiktok-ass")) {
+            Target::Category { slug } => assert_eq!(slug, "tiktok-ass"),
+            other => panic!("expected Category, got {:?}", other),
+        }
+    }
+
+    #[test]
+    fn picks_category_target_from_title_match() {
+        match FypttProvider::pick_target(Some("Boobs")) {
+            Target::Category { slug } => assert_eq!(slug, "tiktok-boobs"),
+            other => panic!("expected Category, got {:?}", other),
+        }
+    }
+
+    #[test]
+    fn picks_search_target_for_unknown_query() {
+        match FypttProvider::pick_target(Some("amateur")) {
+            Target::Search { query } => assert_eq!(query, "amateur"),
+            other => panic!("expected Search, got {:?}", other),
+        }
+    }
+
+    #[test]
+    fn extracts_embed_url_from_json_ld() {
+        let html = r#"{"@type":"VideoObject","embedURL":"https:\/\/fyptt.to\/fypttstr.php?fileid=1jyYHhjN&mainurl=23135%2Fslug","name":"Title"}"#;
+        assert_eq!(
+            FypttProvider::extract_embed_url(html).as_deref(),
+            Some("https://fyptt.to/fypttstr.php?fileid=1jyYHhjN&mainurl=23135%2Fslug")
+        );
+    }
+
+    #[test]
+    fn extracts_uploaded_at_from_json_ld() {
+        let html = r#"{"datePublished":"2026-05-29T13:06:45+00:00"}"#;
+        assert_eq!(FypttProvider::extract_uploaded_at(html), Some(1780060005));
+    }
+
+    #[test]
+    fn extracts_mp4_stream_url() {
+        let html = r#"<source src="https://stream.fyptt.to/1jyYHhjN.mp4?token=AAA&expires=111" type="video/mp4" />"#;
+        assert_eq!(
+            FypttProvider::extract_stream_url(html).as_deref(),
+            Some("https://stream.fyptt.to/1jyYHhjN.mp4?token=AAA&expires=111")
+        );
+    }
+
+    #[test]
+    fn extracts_hls_stream_url() {
+        let html = r#"<source src="https://stream.fyptt.to/hls/J2KKe8ya.m3u8?token=BBB&expires=222" type="application/x-mpegURL" />"#;
+        assert_eq!(
+            FypttProvider::extract_stream_url(html).as_deref(),
+            Some("https://stream.fyptt.to/hls/J2KKe8ya.m3u8?token=BBB&expires=222")
+        );
+    }
+
+    #[test]
+    fn parses_listing_cards() {
+        let html = r#"<div class="fl-post-grid-post fl-post-grid-image-above-title fl-post-columns-post fl-post-align-default post-23135 post type-post status-publish format-standard has-post-thumbnail hentry category-nsfw-tiktok girl-fernandamotafarhat"><div class="fl-post-grid-image"> <a href="https://fyptt.to/23135/slug/"><img src="https://fyptt.to/thumb.webp" /></a></div><div class="fl-post-grid-text"><h3 class="fl-post-grid-title"><a href="https://fyptt.to/23135/slug/" title="Some Title">Some Title</a></h3></div></div>"#;
+        let stubs = FypttProvider::parse_listing_page(html).unwrap();
+        assert_eq!(stubs.len(), 1);
+        assert_eq!(stubs[0].id, "23135");
+        assert_eq!(stubs[0].title, "Some Title");
+        assert_eq!(stubs[0].url, "https://fyptt.to/23135/slug/");
+        assert_eq!(stubs[0].thumb, "https://fyptt.to/thumb.webp");
+        assert_eq!(stubs[0].category_slug.as_deref(), Some("nsfw-tiktok"));
+    }
+}