Compare commits

..

91 Commits

Author SHA1 Message Date
Simon
ff680964cf porn4fans fix 2026-06-20 12:03:23 +00:00
Simon
d21c36e585 fyptt search fix
Bare keyword queries no longer hijack to a category archive when the
query matches a category name (sexy/ass/tiktok/...); only an explicit
cat:/category: prefix or the categories filter routes to an archive.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-19 21:47:21 +00:00
Simon
1bd06db894 fyptt 2026-06-19 08:25:33 +00:00
Simon
674ea99b0d fikffap 2026-06-19 06:07:50 +00:00
Simon
1a1a05941c fikfap 2026-06-18 13:49:08 +00:00
Simon
25ea03d696 hentaihaven 2026-06-18 11:24:09 +00:00
Simon
751fc7765a tikporn 2026-06-18 10:41:49 +00:00
Simon
c19e3ebbea pmvhaven fix 2026-06-15 21:40:09 +00:00
Simon
b761325018 wowxxx 2026-05-27 19:07:53 +00:00
Simon
8220ce1095 renamed channel id 2026-05-22 13:14:58 +00:00
Simon
95d2defa13 camsoda and pornhub shorties 2026-05-22 10:26:05 +00:00
Simon
e4941b1209 removed archivebate1 2026-05-22 09:19:19 +00:00
Simon
a5c6290596 hanime changes 2026-05-22 09:07:18 +00:00
Simon
7149847a2a supjav and shooshtime fixes 2026-05-22 08:38:36 +00:00
Simon
62e3a20d7d supjav fix 2026-05-22 04:32:52 +00:00
Simon
55edb2b4e3 supjav fix 2026-05-21 21:08:31 +00:00
Simon
7db9465750 xhamster, xnxx, xvidos early build 2026-05-21 13:52:32 +00:00
Simon
8ca1df8f5a eporner 2026-05-21 13:02:27 +00:00
Simon
07154d50de fullporner & thepornbunny 2026-05-21 12:17:44 +00:00
Simon
4ad7672ac9 jable implemented 2026-05-20 19:53:14 +00:00
Simon
a2746f965a moved sccache to global cargo 2026-05-20 18:25:46 +00:00
Simon
2ec9137df9 tube8 2026-05-20 14:28:11 +00:00
Simon
2a72e08d8a updates 2026-05-20 09:10:13 +00:00
Simon
f95e8fc632 pornhub - removed unnecessary format 2026-05-20 05:53:46 +00:00
Simon
6cc56a710c videos fixing protocol 2026-05-20 05:19:23 +00:00
Simon
3e41945fab channel rename 2026-05-19 17:24:26 +00:00
Simon
ad1ed1b68e allpornstreamd done and sxyprn updated 2026-05-19 13:48:50 +00:00
Simon
bd8382d579 hentaihaven workflow update 2026-05-18 20:42:36 +00:00
Simon
b077b24d63 missav fetch first db fallback 2026-05-18 20:39:45 +00:00
Simon
d15b49b1cf missav fix 2026-05-18 20:19:28 +00:00
Simon
cca234664e missav fix in place 2026-05-18 19:55:05 +00:00
Simon
21270aa266 thaiporntv: move into own "Thai" channel group
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-18 18:59:14 +00:00
Simon
dc14adbb2e thaiporntv: rewrite provider for Tailwind redesign and fix base64 decode
The site was redesigned from old HTML classes to Tailwind CSS, breaking all
selectors. Also fixes a base64 space-padding bug that corrupted the XOR cipher
decryption of data-enc attributes (video stream URLs).

Key changes:
- New parse_card() using updated Tailwind CSS selectors (div.group, a.playthumb,
  a.text-brand-pink, etc.) to match the redesigned page structure
- Fixed base64 padding from spaces to = characters in both provider and proxy
- Fixed proxy route (/proxy/thaiporntv/{endpoint}* was double-prefixed and used
  wrong capture group name)
- Updated load_tags() to use a.group[href*='/tags/'] with h2 child selector
- Added CDN base URL constant (web.techvids.top) for thumbnail and HLS paths
- Preview GIF URLs populated from data-id attribute

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-18 18:10:56 +00:00
Simon
aea2cda627 pimpbunny fix 2026-05-18 17:27:34 +00:00
Simon
ee47bbe74d pimpbunny and more fixes 2026-05-18 16:52:25 +00:00
Simon
dc70e2c9a6 pimpbunny partial fixes 2026-05-16 10:06:23 +00:00
Simon
1aaa388f9e sxyprn fix 2026-05-15 09:07:39 +00:00
Simon
eef08401ab clapdat image fix 2026-05-13 19:48:35 +00:00
Simon
bf11d4e866 clapdat 2026-05-13 08:24:04 +00:00
Simon
00f693ee9b sxyprn exclude doodstream until fixed 2026-05-11 15:01:55 +00:00
Simon
5ba16ab338 sxyprn fixes-ish 2026-05-11 13:32:08 +00:00
Simon
b4774a0c0f sxyprn hotfix 2026-05-09 20:59:51 +00:00
Simon
41353782a5 javtiful thumb fix 2026-05-08 05:08:55 +00:00
Simon
e3d62524be removed debug print 2026-05-06 13:50:47 +00:00
Simon
5e5786010a doodstream and lulustream in sxyprn integrated
Co-authored-by: Copilot <copilot@github.com>
2026-05-06 11:17:26 +00:00
Simon
9e8f326518 sxyprn format changes vidara 2026-05-05 19:11:26 +00:00
Simon
c4292c2ffc blowjobspro launch 2026-05-05 18:41:05 +00:00
Simon
3da09dc561 erome launch 2026-05-05 14:20:47 +00:00
Simon
d6fa364b28 youporn 2026-05-05 13:53:01 +00:00
Simon
01306c508a vidara and cargo updates 2026-05-05 12:25:43 +00:00
Simon
8ae0fcb544 vidara proxy for sxyprn 2026-05-05 08:58:27 +00:00
Simon
60d29ca905 sxyprn fix 2026-05-04 16:18:54 +00:00
Simon
275c89efef javtiful id fix 2026-05-02 20:19:12 +00:00
Simon
1df76af8f2 javtiful sorting fix 2026-05-02 14:45:06 +00:00
Simon
e4d409fe1f javtiful fix 2026-05-02 14:11:06 +00:00
Simon
6a4fc98720 javtiful fix 2026-04-30 06:04:51 +00:00
Simon
698644c5f8 heavyfetish testing 2026-04-26 09:53:02 +00:00
Simon
4400f21b79 heqvyfetish fix for favs 2026-04-26 07:18:19 +00:00
Simon
cc66f045cd hqporner fix 2026-04-25 18:22:05 +00:00
Simon
2bd132820b viralxxxporn fix 2026-04-25 16:40:53 +00:00
Simon
635c45d2c1 javtiful fix 2026-04-25 15:57:24 +00:00
Simon
6a72f84c17 archivebate1 2026-04-22 14:01:08 +00:00
Simon
a47a69962f archivebate fix 2026-04-22 10:18:15 +00:00
Simon
47631b8a70 yesporn fix 2026-04-22 08:17:54 +00:00
Simon
c379550085 xxthots fix 2026-04-22 07:25:09 +00:00
Simon
e6eb85cd5a archivebate still needs work 2026-04-17 21:03:01 +00:00
Simon
33ec098aae heavyfetish fix 2026-04-17 13:35:21 +00:00
Simon
5ac6d72239 heavyfetish fix 2026-04-14 21:01:44 +00:00
Simon
57ae23656a pornxp fixes 2026-04-14 17:11:11 +00:00
Simon
fe5cf9a42f archivebate work and pornxp returned 2026-04-14 16:58:04 +00:00
Simon
765a21c110 porntrex fix 2026-04-12 11:52:06 +00:00
Simon
8846005995 pornxp return 2026-04-11 21:45:00 +00:00
Simon
61f39e23b7 porntrex 2026-04-11 21:43:36 +00:00
Simon
f85a725883 docs and prompt 2026-04-10 22:57:27 +00:00
Simon
57eb2d7063 upgrades 2026-04-09 07:19:33 +00:00
Simon
6e43b3b3d0 fixes etc 2026-04-07 16:53:45 +00:00
Simon
81e8158161 redtube fix 2026-04-07 12:32:41 +00:00
Simon
a7e38c97a6 header fix 2026-04-06 06:51:43 +00:00
Simon
70355dd969 pimpbunny thumb proxy 2026-04-06 06:40:31 +00:00
Simon
772835d4d1 vjav proxy 2026-04-06 06:23:34 +00:00
Simon
8d39b3a36f proxies 2026-04-05 21:27:47 +00:00
Simon
004399ecbe flaresolverr session cycling 2026-04-05 21:13:05 +00:00
Simon
bc2a73dd06 flaresolverr life cycle 2026-04-05 20:55:15 +00:00
Simon
7b464fe796 upgrades 2026-04-05 20:31:38 +00:00
Simon
9773590f64 small adjustment to ph thumb proxy 2026-04-05 16:18:53 +00:00
Simon
78e852c29d pornhub fix 2026-04-05 15:53:16 +00:00
Simon
4d50e0a9fb pornhub fix 2026-04-03 20:27:45 +00:00
Simon
b8c326306d vjav 2026-04-03 19:24:46 +00:00
Simon
041460d9b9 status update 2026-04-03 19:24:42 +00:00
Simon
c0717fdacf supjav tags-fixes 2026-04-03 18:17:14 +00:00
Simon
e680319541 pornhub 2026-04-03 18:01:03 +00:00
104 changed files with 25573 additions and 2867 deletions

View File

@@ -1,3 +1,2 @@
[build]
rustflags = ["-C", "debuginfo=1"]
#rustc-wrapper = "sccache"

4
.env
View File

@@ -1 +1,3 @@
DATABASE_URL=hottub.db
DATABASE_URL=hottub.db
RUST_LOG=info
FLARE_URL=http://192.168.0.103:8191/v1

3
.gitignore vendored
View File

@@ -17,3 +17,6 @@ Cargo.lock
*.db
migrations/.keep
.mcp.json
*.mp4*
prompts/new-channel.md

View File

@@ -15,30 +15,39 @@ env_logger = "0.11.8"
error-chain = "0.12.4"
futures = "0.3.31"
htmlentity = "1.3.2"
ntex = { version = "2.15.1", features = ["tokio"] }
ntex-files = "2.0.0"
ntex = { version = "3", features = ["tokio"] }
ntex-files = "3"
serde = "1.0.228"
serde_json = "1.0.145"
tokio = { version = "1.49", features = ["full"] }
wreq = { version = "6.0.0-rc.26", features = ["cookies", "multipart", "json"] }
wreq-util = "3.0.0-rc.10"
wreq = { version = "5.3.0", features = ["cookies", "multipart", "json"] }
wreq-util = "2"
percent-encoding = "2.3.2"
capitalize = "0.3.4"
url = "2.5.7"
base64 = "0.22.1"
scraper = "0.24.0"
scraper = "0.26.0"
once_cell = "1.21.3"
rustc-hash = "2.1.1"
async-trait = "0.1"
regex = "1.12.2"
titlecase = "3.6.0"
dashmap = "6.1.0"
lru = "0.16.3"
lru = "0.18.0"
rand = "0.10.0"
chrono = "0.4.44"
md5 = "0.8.0"
pbkdf2 = { version = "0.12", features = ["hmac"] }
hmac = "0.12"
sha2 = "0.10"
aes = "0.8"
cbc = { version = "0.1", features = ["alloc"] }
hex = "0.4"
chromiumoxide = { version = "0.7", features = ["tokio-runtime"] }
playwright = "0.0.20"
[lints.rust]
warnings = "warn"
unexpected_cfgs = "allow"
# Or keep it as a warning but whitelist the cfg:
# unexpected_cfgs = { level = "warn", check-cfg = ['cfg(has_error_description_deprecated)'] }

124
build.rs
View File

@@ -1,3 +1,5 @@
#![deny(warnings)]
use std::env;
use std::fs;
use std::path::PathBuf;
@@ -9,6 +11,11 @@ struct ProviderDef {
}
const PROVIDERS: &[ProviderDef] = &[
ProviderDef {
id: "pornxp",
module: "pornxp",
ty: "PornxpProvider",
},
ProviderDef {
id: "all",
module: "all",
@@ -29,6 +36,21 @@ const PROVIDERS: &[ProviderDef] = &[
module: "pornhub",
ty: "PornhubProvider",
},
ProviderDef {
id: "ph-shorties",
module: "pornhub_shorties",
ty: "PornhubShortiesProvider",
},
ProviderDef {
id: "tikporn",
module: "tikporn",
ty: "TikpornProvider",
},
ProviderDef {
id: "youporn",
module: "youporn",
ty: "YoupornProvider",
},
ProviderDef {
id: "pornhd3x",
module: "pornhd3x",
@@ -89,6 +111,11 @@ const PROVIDERS: &[ProviderDef] = &[
module: "yesporn",
ty: "YespornProvider",
},
ProviderDef {
id: "porntrex",
module: "porntrex",
ty: "PorntrexProvider",
},
ProviderDef {
id: "sxyprn",
module: "sxyprn",
@@ -209,6 +236,11 @@ const PROVIDERS: &[ProviderDef] = &[
module: "supjav",
ty: "SupjavProvider",
},
ProviderDef {
id: "vjav",
module: "vjav",
ty: "VjavProvider",
},
ProviderDef {
id: "hypnotube",
module: "hypnotube",
@@ -234,6 +266,16 @@ const PROVIDERS: &[ProviderDef] = &[
module: "hsex",
ty: "HsexProvider",
},
ProviderDef {
id: "blowjobspro",
module: "blowjobspro",
ty: "BlowjobsproProvider",
},
ProviderDef {
id: "erome",
module: "erome",
ty: "EromeProvider",
},
ProviderDef {
id: "sextb",
module: "sextb",
@@ -244,11 +286,91 @@ const PROVIDERS: &[ProviderDef] = &[
module: "hentaihaven",
ty: "HentaihavenProvider",
},
ProviderDef {
id: "fikfap",
module: "fikfap",
ty: "FikfapProvider",
},
ProviderDef {
id: "chaturbate",
module: "chaturbate",
ty: "ChaturbateProvider",
},
ProviderDef {
id: "clapdat",
module: "clapdat",
ty: "ClapdatProvider",
},
ProviderDef {
id: "archivebate",
module: "archivebate",
ty: "ArchivebateProvider",
},
ProviderDef {
id: "thaiporntv",
module: "thaiporntv",
ty: "ThaipornTvProvider",
},
ProviderDef {
id: "aps",
module: "allpornstream",
ty: "AllPornStreamProvider",
},
ProviderDef {
id: "tube8",
module: "tube8",
ty: "Tube8Provider",
},
ProviderDef {
id: "jable",
module: "jable",
ty: "JableProvider",
},
ProviderDef {
id: "fullporner",
module: "fullporner",
ty: "FullpornerProvider",
},
ProviderDef {
id: "thepornbunny",
module: "thepornbunny",
ty: "ThepornbunnyProvider",
},
ProviderDef {
id: "eporner",
module: "eporner",
ty: "EpornerProvider",
},
ProviderDef {
id: "xnxx",
module: "xnxx",
ty: "XnxxProvider",
},
ProviderDef {
id: "xhamster",
module: "xhamster",
ty: "XhamsterProvider",
},
ProviderDef {
id: "xvideos",
module: "xvideos",
ty: "XvideosProvider",
},
ProviderDef {
id: "wowxxx",
module: "wowxxx",
ty: "WowxxxProvider",
},
ProviderDef {
id: "camsoda",
module: "camsoda",
ty: "CamsodaProvider",
},
ProviderDef {
id: "fyptt",
module: "fyptt",
ty: "FypttProvider",
},
];
fn main() {
@@ -348,7 +470,7 @@ fn main() {
let selection = match selected.as_deref() {
Some(selected_id) => format!(
"pub const COMPILE_TIME_SELECTED_PROVIDER: Option<&str> = Some(\"{selected_id}\");"
"pub const COMPILE_TIME_SELECTED_PROVIDER: Option<&str> = Some(\"{selected_id}\");",
),
None => "pub const COMPILE_TIME_SELECTED_PROVIDER: Option<&str> = None;".to_string(),
};

520
check.py Normal file
View File

@@ -0,0 +1,520 @@
#!/usr/bin/env python3
"""
Hottub channel health checker.
Usage:
python check.py [channel_id]
python check.py --url http://127.0.0.1:18080 spankbang
python check.py --no-ytdlp # skip yt-dlp extraction
python check.py --workers 8 # parallel channel workers
python check.py -v # verbose
Checks per channel:
- /api/videos returns items
- video.url resolves (follows localhost proxy redirects)
- video.thumb resolves
- video.formats[].url resolves with the declared http_headers
- yt-dlp -J on video.url (page URLs only): title/duration must match
- yt-dlp -J on each format.url that is not a direct media file
"""
import sys
import json
import subprocess
import argparse
import logging
import threading
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import Optional
try:
import requests
except ImportError:
sys.exit("requests is required: pip install requests")
DEFAULT_BASE = "http://127.0.0.1:18080"
VIDEOS_TO_SAMPLE = 3
YTDLP_TIMEOUT = 60
HTTP_TIMEOUT = 15
log = logging.getLogger("check")
class Results:
def __init__(self):
self._lock = threading.Lock()
self.errors: list[str] = []
self.warnings: list[str] = []
def err(self, channel: str, msg: str):
entry = f"[{channel}] {msg}"
log.error("[%s] %s", channel, msg)
with self._lock:
self.errors.append(entry)
def warn(self, channel: str, msg: str):
entry = f"[{channel}] {msg}"
log.warning("[%s] %s", channel, msg)
with self._lock:
self.warnings.append(entry)
def info(self, channel: str, msg: str):
log.info("[%s] %s", channel, msg)
_BROWSER_UA = "Mozilla/5.0 (X11; Linux x86_64; rv:146.0) Gecko/20100101 Firefox/146.0"
# Domains known to be Cloudflare-protected and return 403/connection-refused to direct
# HTTP checks. URL reachability failures for these hosts are downgraded to warnings.
_CF_PROTECTED_HOSTS = {
"www.camsoda.com",
"camsoda.com",
}
def _is_cf_protected(url: str) -> bool:
"""Return True if the URL's host is known to be CF-protected."""
try:
from urllib.parse import urlparse
host = urlparse(url).hostname or ""
return host in _CF_PROTECTED_HOSTS
except Exception:
return False
def http_ok(url: str, headers: dict | None = None) -> tuple[bool, int]:
"""Return (ok, http_status). Tries HEAD then ranged GET on 405."""
h = {"User-Agent": _BROWSER_UA, **(headers or {})}
try:
r = requests.head(url, headers=h, timeout=HTTP_TIMEOUT, allow_redirects=True)
if r.status_code in (200, 206):
return True, r.status_code
if r.status_code == 405:
r2 = requests.get(
url,
headers={**h, "Range": "bytes=0-1023"},
timeout=HTTP_TIMEOUT,
stream=True,
)
r2.close()
if r2.status_code in (200, 206):
return True, r2.status_code
return False, r2.status_code
return False, r.status_code
except requests.exceptions.Timeout:
return False, -1 # timeout
except requests.exceptions.ConnectionError:
return False, -2 # connection refused / DNS
except Exception as e:
log.debug("http_ok exception for %s: %s", url, e)
return False, -3
def ytdlp_extract(url: str, extra_args: list[str] | None = None) -> tuple[Optional[dict], str]:
"""Run yt-dlp -j and return (parsed_info_or_None, stderr_text).
-j outputs one JSON object per line; we take the first non-empty line.
"""
cmd = (
["yt-dlp", "-j", "--no-warnings", "--socket-timeout", "20"]
+ (extra_args or [])
+ [url]
)
try:
proc = subprocess.run(cmd, capture_output=True, timeout=YTDLP_TIMEOUT)
stderr = proc.stderr.decode("utf-8", errors="replace").strip()
if proc.returncode != 0:
return None, stderr
first_line = next(
(l for l in proc.stdout.splitlines() if l.strip()), b""
)
if not first_line:
return None, "yt-dlp produced no output"
info = json.loads(first_line)
return info, stderr
except subprocess.TimeoutExpired:
return None, "yt-dlp timed out"
except json.JSONDecodeError as e:
return None, f"invalid JSON from yt-dlp: {e}"
except FileNotFoundError:
return None, "yt-dlp not found in PATH"
except Exception as e:
return None, str(e)
# Headers yt-dlp always injects regardless of site — not meaningful to compare.
_YTDLP_BUILTIN_HEADERS = frozenset(
k.lower() for k in (
"User-Agent", "Accept", "Accept-Language", "Accept-Encoding",
"Accept-Charset", "Connection", "Sec-Fetch-Mode",
)
)
def compare_format_fields(
api_fmt: dict,
yt_fmt: dict,
channel: str,
label: str,
results: Results,
):
"""Flag differences in ext, protocol, video_ext and http_headers."""
for field in ("ext", "protocol", "video_ext"):
api_val = api_fmt.get(field)
yt_val = yt_fmt.get(field)
if api_val and yt_val and api_val != yt_val:
results.warn(
channel,
f"{label}: {field} mismatch — api='{api_val}' yt-dlp='{yt_val}'",
)
api_headers = {k.lower(): v for k, v in (api_fmt.get("http_headers") or {}).items()}
yt_headers = {k.lower(): v for k, v in (yt_fmt.get("http_headers") or {}).items()}
# Headers declared by the API that yt-dlp also knows about — values must match.
for key, api_val in api_headers.items():
if key in _YTDLP_BUILTIN_HEADERS:
continue
if key not in yt_headers:
results.warn(channel, f"{label}: http_header '{key}' in api but absent in yt-dlp")
elif yt_headers[key] != api_val:
results.warn(
channel,
f"{label}: http_header '{key}' mismatch — api='{api_val}' yt-dlp='{yt_headers[key]}'",
)
# Non-builtin headers yt-dlp requires that the API does not declare.
for key, yt_val in yt_headers.items():
if key in _YTDLP_BUILTIN_HEADERS or key in api_headers:
continue
results.warn(channel, f"{label}: http_header '{key}' required by yt-dlp but not declared in api")
def is_page_url(url: str) -> bool:
"""True if the URL looks like a browseable page, not a media file or local proxy."""
if not url.startswith(("http://", "https://")):
return False
if "127.0.0.1" in url or "localhost" in url:
return False
path = url.lower().split("?")[0]
for ext in (".mp4", ".m3u8", ".ts", ".webm", ".mkv", ".flv", ".avi", ".mov"):
if path.endswith(ext):
return False
return True
def is_media_file_url(url: str) -> bool:
"""True if the URL directly points to a media file (not a page, not HLS)."""
path = url.lower().split("?")[0]
for ext in (".mp4", ".webm", ".mkv", ".flv", ".avi", ".mov", ".ts"):
if path.endswith(ext):
return True
return False
def follow_proxy_redirect(url: str) -> str:
"""If url is a localhost proxy URL, follow one redirect to get the real URL."""
if "127.0.0.1" not in url and "localhost" not in url:
return url
try:
r = requests.head(url, timeout=HTTP_TIMEOUT, allow_redirects=False)
if r.status_code in (301, 302, 303, 307, 308):
loc = r.headers.get("Location", "")
if loc and "127.0.0.1" not in loc and "localhost" not in loc:
return loc
except Exception:
pass
return url
def titles_match(a: str, b: str) -> bool:
"""Fuzzy title comparison: first 20 chars of one appears in the other."""
a, b = a.lower().strip(), b.lower().strip()
if not a or not b:
return True
prefix_len = min(20, min(len(a), len(b)))
return a[:prefix_len] in b or b[:prefix_len] in a
def check_video(video: dict, channel_id: str, results: Results, run_ytdlp: bool):
vid_id = video.get("id", "?")
label = f"id={vid_id}"
vurl = video.get("url", "")
thumb = video.get("thumb", "")
formats: list[dict] = video.get("formats") or []
# video.url must not point to the hottub server itself
if not vurl:
results.err(channel_id, f"{label}: missing url")
elif "127.0.0.1" in vurl or "localhost" in vurl:
results.err(channel_id, f"{label}: url points to hottub server: {vurl}")
else:
ok, code = http_ok(vurl)
if not ok:
if _is_cf_protected(vurl):
results.warn(channel_id, f"{label}: url unreachable HTTP={code} (CF-protected host, expected): {vurl}")
else:
results.err(channel_id, f"{label}: url unreachable HTTP={code}: {vurl}")
else:
results.info(channel_id, f"{label}: url OK (HTTP {code})")
# video.thumb
if thumb:
ok, code = http_ok(thumb)
if not ok:
results.err(channel_id, f"{label}: thumb unreachable HTTP={code}: {thumb}")
else:
results.info(channel_id, f"{label}: thumb OK (HTTP {code})")
else:
results.warn(channel_id, f"{label}: no thumb")
# video.formats[].url with declared http_headers
for j, fmt in enumerate(formats):
furl = fmt.get("url", "")
fheaders: dict[str, str] = fmt.get("http_headers") or {}
if not furl:
results.err(channel_id, f"{label} format[{j}]: missing url")
continue
ok, code = http_ok(furl, headers=fheaders)
if not ok:
results.err(
channel_id,
f"{label} format[{j}]: unreachable HTTP={code}"
+ (f" (headers={list(fheaders.keys())})" if fheaders else "")
+ f": {furl}",
)
else:
results.info(channel_id, f"{label} format[{j}]: OK (HTTP {code})")
if not run_ytdlp:
return
# yt-dlp info extraction on video.url (page URLs only, skipped when formats are provided)
ytdlp_url = vurl if vurl and "127.0.0.1" not in vurl and "localhost" not in vurl else ""
if not formats and ytdlp_url and is_page_url(ytdlp_url):
results.info(channel_id, f"{label}: yt-dlp extract {ytdlp_url}")
yt, stderr = ytdlp_extract(ytdlp_url)
if yt is None:
if _is_cf_protected(ytdlp_url):
results.warn(
channel_id,
f"{label}: yt-dlp failed for {ytdlp_url} (CF-protected host, expected)"
+ (f": {stderr[:200]}" if stderr else ""),
)
else:
results.err(
channel_id,
f"{label}: yt-dlp failed for {ytdlp_url}"
+ (f": {stderr[:300]}" if stderr else ""),
)
else:
yt_title = (yt.get("title") or "").strip()
api_title = (video.get("title") or "").strip()
yt_dur = yt.get("duration")
api_dur = video.get("duration") or 0
if yt_dur is not None and api_dur and abs(float(yt_dur) - float(api_dur)) > 10:
results.warn(
channel_id,
f"{label}: duration mismatch — yt-dlp={yt_dur}s api={api_dur}s",
)
yt_fmts = yt.get("formats") or []
yt_direct = yt.get("url")
if not yt_fmts and not yt_direct:
results.err(
channel_id,
f"{label}: yt-dlp returned no formats/url for {ytdlp_url}",
)
else:
results.info(
channel_id,
f"{label}: yt-dlp OK — formats={len(yt_fmts)}"
+ (f" title='{yt_title[:50]}'" if yt_title else ""),
)
# Compare each API format against the matching yt-dlp format by URL.
yt_fmts_by_url = {f.get("url", ""): f for f in yt_fmts}
# Also handle the single-format case where yt-dlp puts url at top level.
if yt_direct and not yt_fmts:
yt_fmts_by_url[yt_direct] = yt
for j, api_fmt in enumerate(formats):
furl = api_fmt.get("url", "")
if not furl:
continue
yt_fmt = yt_fmts_by_url.get(furl)
if yt_fmt is None:
results.warn(
channel_id,
f"{label} format[{j}]: url not found in yt-dlp formats — {furl}",
)
else:
compare_format_fields(api_fmt, yt_fmt, channel_id, f"{label} format[{j}]", results)
# yt-dlp on format URLs that are not direct media files
for j, fmt in enumerate(formats):
furl = fmt.get("url", "")
if not furl or is_media_file_url(furl):
continue
fheaders: dict[str, str] = fmt.get("http_headers") or {}
extra_args: list[str] = []
for hk, hv in fheaders.items():
extra_args += ["--add-header", f"{hk}:{hv}"]
results.info(channel_id, f"{label} format[{j}]: yt-dlp extract {furl}")
yt, stderr = ytdlp_extract(furl, extra_args=extra_args)
if yt is None:
results.err(
channel_id,
f"{label} format[{j}]: yt-dlp failed for {furl}"
+ (f": {stderr[:200]}" if stderr else ""),
)
else:
yt_fmts = yt.get("formats") or []
yt_direct = yt.get("url")
if not yt_fmts and not yt_direct:
results.err(
channel_id,
f"{label} format[{j}]: yt-dlp returned no formats/url for {furl}",
)
else:
results.info(
channel_id,
f"{label} format[{j}]: yt-dlp OK — formats={len(yt_fmts)}",
)
def check_channel(channel_id: str, channel_name: str, base: str, results: Results, run_ytdlp: bool):
results.info(channel_id, f"--- start '{channel_name}' ---")
try:
resp = requests.post(
f"{base}/api/videos",
json={
"channel": channel_id,
"sort": "date",
"page": "1",
"perPage": str(VIDEOS_TO_SAMPLE),
},
timeout=30,
)
except Exception as e:
results.err(channel_id, f"videos request failed: {e}")
return
if resp.status_code != 200:
results.err(channel_id, f"videos endpoint returned HTTP {resp.status_code}")
return
try:
data = resp.json()
except Exception as e:
results.err(channel_id, f"could not parse videos response: {e}")
return
items: list[dict] = data.get("items") or []
if not items:
results.err(channel_id, "no items returned by /api/videos")
return
results.info(channel_id, f"{len(items)} item(s) returned")
for video in items[:VIDEOS_TO_SAMPLE]:
check_video(video, channel_id, results, run_ytdlp)
def main():
parser = argparse.ArgumentParser(description="Hottub channel health checker")
parser.add_argument("channel", nargs="?", help="single channel id to test")
parser.add_argument("--url", default=DEFAULT_BASE, metavar="BASE_URL",
help=f"server base URL (default: {DEFAULT_BASE})")
parser.add_argument("--no-ytdlp", action="store_true",
help="skip yt-dlp extraction checks")
parser.add_argument("--workers", type=int, default=4,
help="parallel channel workers (default: 4)")
parser.add_argument("-v", "--verbose", action="store_true",
help="show INFO log lines")
args = parser.parse_args()
logging.basicConfig(
level=logging.DEBUG if args.verbose else logging.WARNING,
format="%(asctime)s %(levelname)-7s %(message)s",
datefmt="%H:%M:%S",
)
# Always print ERRORs and WARNINGs; INFO only in verbose mode
if not args.verbose:
logging.getLogger("check").setLevel(logging.WARNING)
else:
logging.getLogger("check").setLevel(logging.INFO)
base = args.url.rstrip("/")
run_ytdlp = not args.no_ytdlp
results = Results()
if args.channel:
# Single channel: skip status, go directly to the channel check
channels = [{"id": args.channel, "name": args.channel}]
else:
# No channel specified: check status first, then all channels
print(f"checking {base}/api/status ...")
try:
resp = requests.get(f"{base}/api/status", timeout=15)
except Exception as e:
print(f"ERROR: /api/status unreachable: {e}")
sys.exit(1)
if resp.status_code != 200:
print(f"ERROR: /api/status returned HTTP {resp.status_code}")
sys.exit(1)
try:
status_data = resp.json()
except Exception as e:
print(f"ERROR: could not parse /api/status response: {e}")
sys.exit(1)
channels: list[dict] = status_data.get("channels") or []
if not channels:
print("ERROR: no channels in /api/status response")
sys.exit(1)
print(f"status OK — {len(channels)} channels")
# --- per-channel checks ---
if len(channels) == 1 or args.workers <= 1:
for ch in channels:
check_channel(ch["id"], ch.get("name", ch["id"]), base, results, run_ytdlp)
else:
with ThreadPoolExecutor(max_workers=args.workers) as pool:
futs = {
pool.submit(check_channel, ch["id"], ch.get("name", ch["id"]), base, results, run_ytdlp): ch
for ch in channels
}
for fut in as_completed(futs):
try:
fut.result()
except Exception as e:
ch = futs[fut]
results.err(ch["id"], f"unexpected exception: {e}")
# --- summary ---
print()
total = len(results.errors) + len(results.warnings)
if not results.errors and not results.warnings:
print(f"All checks passed ({len(channels)} channel(s) tested).")
sys.exit(0)
if results.errors:
print(f"=== {len(results.errors)} error(s) ===")
for e in results.errors:
print(f" {e}")
if results.warnings:
print(f"=== {len(results.warnings)} warning(s) ===")
for w in results.warnings:
print(f" {w}")
sys.exit(1 if results.errors else 0)
if __name__ == "__main__":
main()

40
docs/README.md Normal file
View File

@@ -0,0 +1,40 @@
# Hottub Docs
This folder is the fastest handoff path for anyone adding or repairing a channel.
Start here:
1. Read `architecture.md` for the server flow, request lifecycle, and core types.
2. Read `provider-playbook.md` for the exact process to add a new provider or proxy.
3. Use `provider-catalog.md` to find the closest existing implementation to copy.
4. Use `docs/hottubapp/*.html` when you need the client-facing API contract for status, videos, or uploaders.
5. Only then touch `prompts/new-channel.md`; it assumes the docs above exist.
Recommended local workflow:
```bash
cargo check -q
HOT_TUB_PROVIDER=<channel_id> cargo check -q
HOT_TUB_PROVIDER=<channel_id> cargo run --features debug
```
Useful runtime baseline:
```dotenv
DATABASE_URL=hottub.db
RUST_LOG=info
PROXY=0
BURP_URL=http://127.0.0.1:8081
FLARE_URL=http://127.0.0.1:8191/v1
DOMAIN=127.0.0.1:18080
DISCORD_WEBHOOK=
```
Key facts:
- Hottub is a Rust `ntex` server with providers under `src/providers/`.
- `build.rs` controls compile-time provider registration.
- `/api/videos` is the main provider execution path.
- `/proxy/...` exists for sites whose direct media or thumbnails need a redirect/proxy layer.
- Only three providers currently implement `/api/uploaders`: `hsex`, `omgxxx`, and `vjav`.
- Uploader IDs should be namespaced like `<channel>:<site-local-id>` so `/api/uploaders` can route directly.

313
docs/architecture.md Normal file
View File

@@ -0,0 +1,313 @@
# Hottub Architecture
## Purpose
Hottub is a Rust server that exposes Hot Tub compatible endpoints for channel discovery, video search, uploader lookups, and site-specific proxying. Most work in this repo is adding or repairing a provider module under `src/providers/`.
## Top-Level Structure
- `src/main.rs`: server bootstrap, env loading, database pool, shared requester/cache, route mounting.
- `src/api.rs`: `/api/status`, `/api/videos`, `/api/uploaders`, `/api/test`, `/api/proxies`.
- `src/providers/mod.rs`: provider trait, provider registry, build-time provider selection, status decoration, runtime validation, panic/error guards.
- `src/providers/*.rs`: one module per channel/provider.
- `src/proxy.rs`: route table for `/proxy/...`.
- `src/proxies/*.rs`: redirect/media/thumb proxy implementations.
- `src/videos.rs`: request/response payloads, `VideoItem`, `VideoFormat`, `ServerOptions`.
- `src/status.rs`: status/channel/group payloads.
- `src/uploaders.rs`: uploader request/profile payloads.
- `src/util/requester.rs`: outbound HTTP with cookies, optional Burp proxying, Jina fallback, and FlareSolverr fallback.
- `build.rs`: compile-time provider registry generation and single-provider build support.
## Startup Flow
1. `main` loads `.env` and ensures `RUST_LOG` is set.
2. It creates the Diesel SQLite pool from `DATABASE_URL`.
3. It creates a shared `Requester`, enables Burp proxying when `PROXY != 0`, and builds the LRU video cache.
4. It configures provider runtime validation in `providers::configure_runtime_validation`.
5. It spawns a background thread that forces provider initialization via `providers::init_providers_now()`.
6. It starts an `ntex` HTTP server on `0.0.0.0:18080`.
## Runtime Environment
Important environment variables:
- `DATABASE_URL`: required SQLite path.
- `RUST_LOG`: defaults to `warn` if unset.
- `PROXY`: enables Burp proxying when not equal to `0`.
- `BURP_URL`: outbound proxy URL used when `PROXY` is enabled.
- `FLARE_URL`: FlareSolverr endpoint used as the last HTML-fetch fallback.
- `DOMAIN`: used by the `/` redirect target.
- `DISCORD_WEBHOOK`: enables `/api/test` and provider error reporting.
Bundled reference material:
- `docs/hottubapp/📡 Status - Hot Tub Docs.html`
- `docs/hottubapp/🎬 Videos - Hot Tub Docs.html`
- `docs/hottubapp/👤 Uploaders - Hot Tub Docs.html`
Those HTML files are useful when a provider author needs to confirm the expected client payload shape without reading Rust structs first.
## Build-Time Provider Selection
`build.rs` reads `HOT_TUB_PROVIDER` or `HOTTUB_PROVIDER`.
- If unset, every provider in `build.rs` is compiled and registered.
- If set, only that provider is compiled into the binary.
- In a single-provider build, `/api/videos` remaps `"channel": "all"` to the compiled provider.
Generated files in `OUT_DIR` are included by `src/providers/mod.rs`:
- `provider_modules.rs`
- `provider_registry.rs`
- `provider_metadata_fn.rs`
- `provider_selection.rs`
This means adding a new provider always requires updating `build.rs`.
## HTTP Surface
### `/`
Returns a `302` redirect to `hottub://source?url=<DOMAIN-or-request-host>`.
### `/api/status`
Builds the channel list by iterating `ALL_PROVIDERS` and calling `Provider::get_channel`.
Important behavior:
- The `User-Agent` is parsed into `ClientVersion`.
- A provider can hide itself by returning `None`.
- `providers::build_status_response` decorates channels with `groupKey`, top tags, runtime status, and sort order.
- Some heavy status filters are intentionally removed from the client-facing response. The server still accepts them in `/api/videos`.
### `/api/videos`
This is the main provider execution path.
Flow:
1. Parse `VideosRequest`.
2. Normalize `channel`, `sort`, `query`, `page`, and `perPage`.
3. Build `ServerOptions`.
4. If `query` is a full `http://` or `https://` URL, try the `yt-dlp -J` fast path first.
5. Otherwise call `provider.get_videos(...)` through `run_provider_guarded`.
6. For quoted queries like `"teacher"`, apply a literal substring filter after provider fetch.
7. Spawn a background prefetch for the next page.
8. For short videos (`duration <= 120`), populate `preview` from the main URL or first format.
Important behavior:
- Leading `#` is stripped from queries before provider dispatch.
- `"all"` uses `AllProvider` in a normal build, but resolves to the single compiled provider in a single-provider build.
- Older `Hot Tub/38` clients are patched by replacing `video.url` with the last format URL when formats exist.
### `/api/uploaders`
Uploader lookup is optional and provider-specific.
Important behavior:
- At least one of `uploaderId` or `uploaderName` is required.
- If `uploaderId` looks like `channel:id`, the server directly targets that provider.
- Otherwise it scans all providers and returns the best exact-name match.
- Only `hsex`, `omgxxx`, and `vjav` currently implement `get_uploader`.
- In practice, provider-owned uploader IDs should be namespaced, for example `vjav:12345` or `hsex:author_slug`.
### `/api/test`
Sends a Discord error test if `DISCORD_WEBHOOK` is configured.
### `/api/proxies`
Returns the background-fetched outbound proxy snapshot from `src/util/proxy.rs`.
## Core Data Structures
### `VideosRequest`
Defined in `src/videos.rs`. Common fields used by providers:
- `channel`
- `sort`
- `query`
- `page`
- `perPage`
- `featured`
- `category`
- `sites`
- `all_provider_sites`
- `filter`
- `language`
- `networks`
- `stars`
- `categories`
- `duration`
- `sexuality`
### `ServerOptions`
The servers normalized option bag. Providers should read from this instead of reparsing the raw API request.
Important fields:
- `public_url_base`: needed when generating `/proxy/...` URLs.
- `requester`: the shared request client with cookies/debug trace/proxy state.
- `sort`, `sites`, `filter`, `category`, `language`, `network`, `stars`, `categories`, `duration`, `sexuality`.
### `VideoItem`
Minimum useful fields for a provider:
- `id`
- `title`
- `url`
- `channel`
- `thumb`
- `duration`
High-value optional fields:
- `views`
- `rating`
- `uploader`
- `uploaderUrl`
- `uploaderId`
- `tags`
- `uploadedAt`
- `formats`
- `preview`
- `aspectRatio`
Avoid setting `embed` for new providers unless the site truly needs it.
### `VideoFormat`
Use `formats` when:
- the site returns a better direct media URL than the page URL
- HLS or multiple qualities exist
- extra HTTP headers such as `Referer` are required
Use `http_header` or `add_http_header` when the player endpoint needs request headers.
### `Channel` and `ChannelOption`
Each providers `get_channel` returns the status metadata exposed by `/api/status`.
Typical option IDs used across the repo:
- `sort`
- `filter`
- `sites`
- `category`
- `language`
- `networks`
- `stars`
- `categories`
Use the same IDs when possible so the server and client behavior stay consistent.
### `UploaderProfile`
If a provider supports `/api/uploaders`, keep the ID routable:
- preferred format: `<channel>:<site-local-id>`
- examples in the repo: `vjav:<user_id>`, `hsex:<author>`, `omgxxx:<kind>:<id>`
This lets `src/api.rs` derive the owning provider immediately.
## Provider Contract
Defined in `src/providers/mod.rs`:
- `async fn get_videos(...) -> Vec<VideoItem>`
- `fn get_channel(clientversion: ClientVersion) -> Option<Channel>`
- `async fn get_uploader(...) -> Result<Option<UploaderProfile>, String>` optional
The server wraps provider execution in:
- `run_provider_guarded` for video paths
- `run_uploader_provider_guarded` for uploader paths
Panics and reported errors trigger runtime validation and optional Discord reporting.
## Runtime Validation and Error Handling
`src/providers/mod.rs` includes a validation subsystem that:
- runs a small sample request against a provider after failures
- checks that enough video items exist
- tries media URLs or format URLs with a `Range` header
- marks repeated failures over time
This means a provider that returns page URLs but no real media/formats may pass visually but still fail operationally.
## Requester Behavior
`src/util/requester.rs` is the standard outbound HTTP layer.
Capabilities:
- shared cookie jar across clones
- optional Burp proxying via `PROXY` and `BURP_URL`
- direct request retries for `429`
- Jina mirror fallback for blocked HTML fetches
- FlareSolverr fallback via `FLARE_URL`
- raw response helpers for media validation and custom headers
Use the shared requester from `ServerOptions` through `requester_or_default`. Do not instantiate a brand-new requester in normal provider fetch paths unless you have a very specific reason.
FlareSolverr note:
- `src/util/flaresolverr.rs` keeps a reusable session pool pattern by rotating a ready session per solve.
- If a provider only works after anti-bot negotiation, the shared requester is the path that benefits from that solved session and cookie state.
## Proxy Subsystem
There are two proxy styles.
### Redirect proxies
These take a provider-specific endpoint and return `302 Location: <resolved-media-url>`.
Examples:
- `/proxy/spankbang/...`
- `/proxy/sxyprn/...`
- `/proxy/pornhd3x/...`
- `/proxy/vjav/...`
### Media or image proxies
These actively fetch media or thumbnails and stream or rewrite the response.
Examples:
- `/proxy/noodlemagazine/...`
- `/proxy/noodlemagazine-thumb/...`
- `/proxy/shooshtime-media/...`
- `/proxy/hanime-cdn/...`
If a site only needs a referer-preserving redirect, use a redirect proxy. If manifests, relative playlist entries, cookies, or binary thumbs need rewriting, use a media/image proxy.
## Best Existing Templates
Use the closest existing provider instead of inventing a new style.
- `src/providers/vjav.rs`: rich API-backed provider with tags, uploader support, and detail enrichment.
- `src/providers/hsex.rs`: HTML scraping with background-loaded filters, uploader support, and direct HLS formats.
- `src/providers/omgxxx.rs`: large filter catalogs and uploader lookup by site/network identity.
- `src/providers/noodlemagazine.rs`: proxied media/thumbs, Jina fallback, and mirrored listing parsing.
- `src/providers/pornhd3x.rs`: complex filter catalogs, detail enrichment, and proxy-generated playback URLs.
- `src/providers/spankbang.rs`: anti-bot handling and a redirect-proxy-based media strategy.
## Important Gotchas
- New providers must export `CHANNEL_METADATA`.
- New providers must be listed in `build.rs` or they will never compile into the registry.
- If a provider returns proxied URLs, it usually also needs `options.public_url_base`.
- Keep filter IDs stable. The `title` is for display; the `id` is what the provider matches on.
- `categories` in `Channel` are not the same as `ChannelOption { id: "categories" }`.
- `/api/status` sanitizes some options away from the client-facing payload. That does not mean the provider option is useless in `/api/videos`.
- If a site needs per-request cookies or a solved user agent, rely on the shared requester.

122
docs/provider-catalog.md Normal file
View File

@@ -0,0 +1,122 @@
# Provider And Proxy Catalog
This is the current implementation inventory as of this snapshot of the repo. Use it to find the nearest existing pattern before adding a new channel.
## Providers
| Provider | Group | `/api/uploaders` | Uses local `/proxy` | Notes |
| --- | --- | --- | --- | --- |
| `all` | `meta-search` | no | no | Aggregates all compiled providers. |
| `allpornstream` | `mainstream-tube` | no | yes | Next.js App Router scraper; extracts cards via `data-thumb-id/href/title/images` attributes; redirect proxy lazy-resolves VOE/DoodStream/StreamTape/FileMoon embeds. |
| `archivebate` | `live-cams` | no | no | Livewire-backed cam archive listings with platform/gender/profile shortcuts. |
| `beeg` | `mainstream-tube` | no | no | Basic mainstream tube pattern. |
| `blowjobspro` | `mainstream-tube` | no | no | KVS-style HTML provider with async search pagination and category shortcut routing. |
| `chaturbate` | `live-cams` | no | no | Live cam channel. |
| `clapdat` | `amateur-homemade` | no | yes | Svelte/JSON-hydrated provider using home/recent/trending routes, Meilisearch keyword search, and `/proxy/clapdat/...` redirect playback resolution. |
| `erome` | `amateur-homemade` | no | no | HTML album scraper with hot/new feeds, keyword search, and uploader-slug shortcuts (`uploader:<name>`). |
| `fikfap` | `tiktok` | yes | yes (thumbs only) | JSON-API provider for fikfap.com (TikTok-style swipe short clips); anonymous auth via a client-generated `Authorization-Anonymous` UUID header (no real login needed); listing via `GET api.fikfap.com/posts?sort=new\|trending\|random&amount=N&afterId=<lastPostId>` (cursor pagination — page N costs N sequential requests); search via `GET search?q=` (single fixed-size batch, no pagination — page 2+ returns empty); hashtag feeds via `GET hashtags/label/{label}/posts` and creator feeds via `GET profile/username/{user}/posts`, both also cursor-paginated; `tag:`/`hashtag:`/`#` and `user:`/`uploader:` query prefixes route directly; `categories` option exposes a small curated static hashtag list (no full catalog endpoint exists anonymously); `video.url` is the `fikfap.com/post/{id}` page (a client-rendered SPA, not yt-dlp-resolvable on its own); `videoStreamUrl` from the JSON response is sent directly as `formats[0].url` (signed Bunny CDN HLS `.m3u8`, ~24h token expiry) with `httpHeaders: {Referer: https://fikfap.com/}` — Hot Tub clients apply a format's `http_headers` across the whole HLS playback session (manifest, sub-playlists, and segments), so no proxying of the media itself is needed; thumbnails have no per-field header mechanism, so they're proxied via `/proxy/fikfap-thumb/...` to inject the same Referer; `get_uploader` implemented (`fikfap:<username>` IDs) using `GET profile/username/{user}`. |
| `freepornvideosxxx` | `studio-network` | no | no | Studio-style scraper. |
| `fyptt` | `tiktok` | no | no | HTML scraper for fyptt.to (Beaver Builder/WordPress short-form TikTok-style vertical porn); card selector `.fl-post-grid-post[class*="post-ID"]` with `category-{slug}` CSS class doubling as both listing tag and category-archive route; latest feed `/` (page N: `/page/N/`), search `/?s=query` (page N: `/page/N/?s=query`), category archives at bare top-level slugs like `/tiktok-ass/` (12 hardcoded categories exposed via the `categories` filter option, or via an explicit `cat:`/`category:` query prefix — bare keyword queries always go to WordPress search, never a category archive, because the category names ("sexy", "ass", "tiktok", "live", ...) are also the most common search terms); per-item enrichment fetches the detail page for the JSON-LD `embedURL` (one of three on-site player endpoints: `fypttstr.php`, `fypttjwstr.php`, or `fypttjwstrhls.php`) and `datePublished`, then fetches that embed URL to extract the actual signed `stream.fyptt.to` mp4 or `/hls/*.m3u8` URL (token expires ~2h, no Referer required) for `formats`; thumbnails (`fyptt.to/wp-content/uploads/...webp`) need no proxy; no duration metadata available on listing or detail pages (set to 0); no real uploader/model identity (the `girl-{slug}` CSS class is cosmetic only, not a linkable archive) so `/api/uploaders` is not implemented; `video.url` is the detail page URL (not yt-dlp resolvable directly — the player is sandboxed-iframe-only) so `formats` are populated instead; no proxy needed. |
| `freeuseporn` | `fetish-kink` | no | no | Fetish archive pattern. |
| `hanime` | `hentai-animation` | no | yes | Uses proxied CDN/thumb handling. |
| `heavyfetish` | `fetish-kink` | no | no | Direct media handling. |
| `hentaihaven` | `hentai-animation` | no | no | HLS format builder pattern. |
| `homoxxx` | `gay-male` | no | no | Gay category grouping example. |
| `hqporner` | `studio-network` | no | yes | Uses thumb and redirect proxy helpers. |
| `hsex` | `chinese` | yes | no | Strong template for tags, uploaders, and direct HLS formats. |
| `hypnotube` | `fetish-kink` | no | no | Fetish/tube hybrid. |
| `javtiful` | `jav` | no | no | JAV channel family. |
| `missav` | `jav` | no | no | HLS format pattern. |
| `noodlemagazine` | `mainstream-tube` | no | yes | Best template for media and thumbnail proxying. |
| `okporn` | `mainstream-tube` | no | no | Simple mainstream archive. |
| `okxxx` | `mainstream-tube` | no | no | Mainstream search/archive pattern. |
| `omgxxx` | `studio-network` | yes | no | Best template for sites/networks/stars filter catalogs. |
| `paradisehill` | `mainstream-tube` | no | no | Simple page scraper. |
| `perfectgirls` | `studio-network` | no | no | Studio archive. |
| `perverzija` | `studio-network` | no | no | Multi-format/HLS examples. |
| `pimpbunny` | `onlyfans` | no | yes | Proxy-backed playback and thumbnail handling. |
| `pmvhaven` | `pmv-compilation` | no | no | PMV grouping example. |
| `porn00` | `mainstream-tube` | no | no | Lightweight scraper. |
| `porn4fans` | `onlyfans` | no | no | KVS (Kernel Video Sharing) scraper for porn4fans.com (OnlyFans creator clips); Cloudflare-fronted but serves direct requests (no JS challenge), so the shared requester works without Jina/FlareSolverr and detail-page enrichment is safe; all feeds are fetched as KVS `?mode=async&function=get_block` HTML fragments (cleaner + properly paginated vs the JS-filled full pages), parsed with `scraper` over `div.item` cards (`a.img-wrap.video` href→id/title, `img.thumb` data-webp/src, `div.duration`, `li.video-item.views span`, `li.video-item.model a` for uploader, `div.preview-video[data-src]` preview clip); latest feed is `/onlyfans-videos/` block `custom_list_videos_latest_videos_list` paginated by `from=N` (12/page — note: NOT `/latest-updates/`, which 404s); search is `/search/{dashed-query}/` block `custom_list_videos_videos_list_search_result` with `q={query}&category_ids=&from_videos=N` (24/page); category `/categories/{slug}/` and tag `/tags/{slug}/` share block `custom_list_videos_common_videos_list` (`from=N`, 12/page); model `/models/{slug}/` uses block `custom_list_videos_models_videos_list`; sort maps new→post_date, popular→video_viewed, rated→rating, longest→duration; `cat:`/`category:`, `tag:`, and `model:`/`uploader:`/`pornstar:`/`star:` query prefixes route to the matching archive, and a bare query that exactly matches a background-loaded category title goes to that archive instead of keyword search; background-loads the 55-entry category title→slug map from `/categories/` (`#list_categories_categories_list_items a.item`) for the `categories` filter option (sanitized out of `/api/status` like `stars`/`networks`, but honored in `/api/videos`); `video.url` is the `/video/{id}/{slug}/` page URL (NOT yt-dlp-resolvable — yt-dlp's generic KVS extractor fails on this site's flashvars), so per-card enrichment fetches the detail page and pulls the direct `video_url`/`video_alt_url` flashvars (480p/720p) into `formats` (bounded `buffered(8)` concurrency); KVS `get_file` MP4 URLs come as `…/ID.mp4/?v-acctoken=…` with a trailing slash before the query — the provider strips it to `…/ID.mp4?v-acctoken=…` so the path ends in `.mp4` (health-check/yt-dlp media detection keys off the extension); formats carry a `Referer` header (works with or without it); thumbnails (`/contents/videos_screenshots/…`) need no proxy or referer; uploader name on a card is the OnlyFans handle while the `/models/{slug}/` URL slug is the canonical model name (they legitimately differ — e.g. handle "Blasianflexcouple" at slug `nina-lee`), so `uploader` uses the display handle and `uploaderUrl`/`uploaderId` (`porn4fans:<slug>`) use the slug; no `/api/uploaders` profile, no proxy; note "teen" and similar are compliance-blocked keywords that the site itself returns empty for. |
| `porndish` | `studio-network` | no | yes | Redirect proxy plus thumb proxy usage. |
| `pornhat` | `mainstream-tube` | no | no | Basic tube provider. |
| `pornhd3x` | `studio-network` | no | yes | Best template for complex catalogs and redirect proxy generation. |
| `pornhub` | `mainstream-tube` | no | no | Rich metadata and format examples. |
| `pornhub-shorties` | `tiktok` | no | no | Pornhub Shorties vertical short-form clips; parses `JSON_SHORTIES` JS variable embedded in HTML; fields: vkey, title, linkUrl, imageUrl, likeNumber, dislikeNumber, name/profileUrl (uploader), pillsData (tags), trackingTimeWatched.video_duration; pagination via `?page=N`; search via `?search=query`; sort via `?sort=trending\|mostviewed\|top_rated\|hottest`; phncdn thumbnails require `Referer: https://www.pornhub.com/` (served via cdnReferrers in /api/status); yt-dlp resolves `video.url` natively (PornHub extractor); no proxy needed. |
| `pornmz` | `mainstream-tube` | no | no | Mainstream archive. |
| `pornzog` | `mainstream-tube` | no | no | Basic list/detail scraper. |
| `porntrex` | `mainstream-tube` | no | no | KVS-style HTML archive with direct MP4 formats and tag-aware search shortcuts. |
| `redtube` | `mainstream-tube` | no | no | Mainstream archive. |
| `rule34gen` | `ai` | no | no | AI group example. |
| `rule34video` | `hentai-animation` | no | no | Hentai group example. |
| `sextb` | `jav` | no | no | JAV family provider. |
| `shooshtime` | `onlyfans` | no | yes | Redirect proxy plus dedicated media route. |
| `spankbang` | `mainstream-tube` | no | yes | Best template for redirect proxy plus anti-bot fetches. |
| `thaiporntv` | `mainstream-tube` | no | yes | Decodes `data-enc` attribute for proxied HLS playback. |
| `supjav` | `jav` | no | yes | JAV/HLS provider; detail page URLs for `video.url`, proxied HLS format URLs via `/proxy/supjav/...`. |
| `sxyprn` | `mainstream-tube` | no | yes | Redirect proxy helper usage. |
| `tnaflix` | `mainstream-tube` | no | no | Mainstream tube provider. |
| `tokyomotion` | `jav` | no | no | JAV/tube hybrid. |
| `viralxxxporn` | `mainstream-tube` | no | no | Basic parser with format extraction. |
| `vjav` | `jav` | yes | no | Best API-style template with uploaders and tag-id lookup maps. |
| `vrporn` | `studio-network` | no | no | Multi-format direct playback. |
| `xfree` | `tiktok` | no | no | Short-form grouping example. |
| `xxdbx` | `onlyfans` | no | no | OnlyFans-like grouping example. |
| `xxthots` | `onlyfans` | no | no | OnlyFans-like metadata example. |
| `yesporn` | `mainstream-tube` | no | no | Preview format examples. |
| `youjizz` | `mainstream-tube` | no | no | Mainstream tube provider. |
| `youporn` | `mainstream-tube` | no | no | Pornhub-network HTML provider with watch-page playback URLs and tag/channel/pornstar shortcuts. |
| `tube8` | `mainstream-tube` | no | yes | Aylo/MindGeek platform scraper; redirect proxy fetches signed `/media/hls/?s=TOKEN` endpoint and returns highest-quality CDN HLS URL; supports tag/category/channel/pornstar shortcut queries. |
| `jable` | `jav` | no | yes | HTML JAV archive scraper; extracts `var hlsUrl` from detail pages; m3u8 format requires Referer + browser User-Agent; proxy route handles HEAD (200 OK) and GET (redirect to watch page) since yt-dlp blocks jable.tv; tag/category/model shortcut queries. |
| `fullporner` | `mainstream-tube` | no | no | HTML scraper for fullporner.com; thumbnail IDs derived from `/thumb/{id}.jpg` URLs and used to build direct `xiaoshenke.net/vid/{id}/720` media redirect URLs (Referer + User-Agent headers required); supports cat:/category:/pornstar:/star: shortcut queries; no proxy needed. |
| `thepornbunny` | `mainstream-tube` | no | yes | KVS-style HTML scraper for thepornbunny.com; 24 items per site page; thumbnails at `https://www.thepornbunny.com/images/thumb/{id}.webp` from `data-original` attribute (no proxy needed); studio exposed as uploader; pornstar names in tags; `/proxy/thepornbunny/{slug}` fetches the video page, extracts `generate_mp4(enc_data, key, rnd, video_id)` args, decrypts `enc_data` via PBKDF2-HMAC-SHA512+AES-256-CBC to get an OK.ru session key, calls `api.ok.ru/fb.do?method=video.get&session_key=KEY&vids=RND` to get signed CDN URLs, and returns 302 to the best-quality okcdn.ru/vkuser.net MP4 URL (no special client headers needed); supports sort: new/popular/rated, 20 hardcoded categories via `categories` option, and tag:/category:/studio:/pornstar: query shortcuts. |
| `eporner` | `mainstream-tube` | no | no | HTML scraper for eporner.com (5M+ videos); card selector `div.mb[data-id]` with inline duration/rating/views/uploader; thumbnails at `static-eu-cdn.eporner.com` (no proxy needed); pagination uses `/{N}/` suffix (page 1 = no suffix, page 2 = `/2/`); search queries map to `/tag/{slug}/` (eporner redirects all keyword searches to tag pages — 404 tag pages still return related content); supports sort: new/popular/rated/best; 65 hardcoded categories via `cat:`, `tag:`, `pornstar:`, `uploader:` query shortcuts; background-loads pornstar name→URL map from `/pornstar-list/`; yt-dlp resolves `video.url` natively (Eporner extractor); no proxy needed. |
| `xnxx` | `mainstream-tube` | no | no | HTML scraper for xnxx.com (10M+ videos); unified card parser handles two formats: `div.thumb-block[data-eid]` (search) and `div.thumb-block.video[data-video='{"id":...}']` (hits); eid extracted from `/video-{eid}/{slug}` URL path; thumbnails at `thumb-cdn77.xnxx-cdn.com` and `thumbs-gcore.xnxx-cdn.com` (no proxy, no Referer needed); 0-indexed pagination (page 1 = `/hits`, page N = `/hits/{N-1}`); default feed is `/hits` (most-viewed — xnxx has no chronological listing); search via `/search/{slug}` (works for keywords and tags); supports `tag:`, `cat:`, `category:` query shortcuts; yt-dlp resolves `video.url` natively (XNXX extractor, returns 4-7 HLS formats); no proxy needed. |
| `xhamster` | `mainstream-tube` | no | no | HTML scraper for xhamster.com; card selector `div[data-video-type="video"]` with `data-video-id`; thumbnails via `img[data-role="thumb-preview-img"]` at `ic-vt-nss.xhcdn.com` (no proxy, no Referer needed); pagination via `?page=N` query param (browse feeds use infinite-scroll so only search reliably returns different content per page); feeds: `/newest` (default), `/most-viewed`, `/best`; categories via `/categories/{slug}`; channels via `/channels/{slug}`; 43 hardcoded categories as `categories` option; uploader type inferred from URL path (`/channels/` → channel, `/creators/` → creator, `/pornstars/` → pornstar); supports `cat:`/`category:` and `channel:` query shortcuts, plus static category name matching; preview mp4 clips from `data-previewvideo` attribute; yt-dlp resolves `video.url` natively (xHamster extractor, 28 formats); no proxy needed. |
| `camsoda` | `live-cams` | no | no | HTML scraper for camsoda.com `/media` listing; CF-protected so relies on Jina HTML fallback (requester sends `X-Return-Format: html`); parses anchor tags with `[class*="media-item-module__title"]` / `[class*="media-item-module__subtitle"]` CSS selectors; video-specific thumbnails from `media-secure.camsoda.com/user/videos/{id}/`; video URLs are page URLs (`/{username}/media/{slug}/{id}`) — CF blocks direct access and yt-dlp; supports `uploader:`/`model:` query prefix to browse a model's media page; no search API — keyword queries fall through to client-side filtering; pagination via `?page=N` but CamSoda serves the same 60 items regardless of page (JS-driven infinite scroll). |
| `xvideos` | `mainstream-tube` | no | no | HTML scraper for xvideos.com; handles two card formats: homepage (`div.thumb-block[data-id][data-eid]`) uses `p.title a[title]` + `data-pvv` on img, best-of-month page uses `div.thumb-block.video[data-video=JSON]` with `div.title a` text + `previewVideo` JSON key; thumbnails at `thumb-cdn77.xvideos-cdn.com` / `thumbs-gcore.xvideos-cdn.com` (no proxy needed); latest: `/` (page 1) / `/new/{N-1}` (page N≥2); best-of-month: `/best/{YYYY-MM}` (previous calendar month), page N: `/best/{YYYY-MM}/{N-1}`; search: `/?k={query}` / `/?k={query}&p={N-1}` (0-indexed); tag shortcuts: `/tags/{slug}/{N-1}`; category shortcuts: `/c/{Name}-{ID}/{N-1}` (38 hardcoded categories); `cat:`, `tag:`, `uploader:` query prefix routing; yt-dlp resolves `video.url` natively (XVideos extractor → HLS formats); CDN preview mp4 in `preview` field; no proxy needed. |
| `wowxxx` | `studio-network` | no | no | HTML scraper for wow.xxx premium aggregator; default feed `/latest-updates/`, page 2 `/{N}/` suffix (for example `/latest-updates/2/`), search `/search/{query}/relevance/` with the same page suffix; supports `site:`/`studio:`/`network:`/`model:`/`pornstar:`/`tag:`/`cat:` query shortcuts to direct archive routes; list cards expose preview clips (`cast.wow.xxx/preview/*.mp4`), thumbnails (`img.wow.xxx/.../medium@2x/1.jpg`), duration, rating, views, site (as uploader), and model tags; `video.url` is the detail page URL and yt-dlp resolves HTML5 MP4 formats dynamically; no proxy needed. |
## Proxy Routes
### Redirect proxies
These resolve a provider-specific input into a `302 Location`.
- `/proxy/doodstream/{endpoint}*`
- `/proxy/sxyprn/{endpoint}*`
- `/proxy/javtiful/{endpoint}*`
- `/proxy/spankbang/{endpoint}*`
- `/proxy/porndish/{endpoint}*`
- `/proxy/hqporner/{endpoint}*`
- `/proxy/heavyfetish/{endpoint}*`
- `/proxy/vjav/{endpoint}*`
- `/proxy/pornhd3x/{endpoint}*`
- `/proxy/shooshtime/{endpoint}*`
- `/proxy/pimpbunny/{endpoint}*`
- `/proxy/allpornstream/{endpoint}*`
- `/proxy/tube8/{endpoint}*`
- `/proxy/supjav/{endpoint}*`
- `/proxy/jable/{slug}*`
- `/proxy/thepornbunny/{slug}*`
### Media/image proxies
These return binary media or images, sometimes rewriting manifests or forwarding cookies/referers.
- `/proxy/shooshtime-media/{endpoint}*`
- `/proxy/noodlemagazine/{endpoint}*`
- `/proxy/noodlemagazine-thumb/{endpoint}*`
- `/proxy/hanime-cdn/{endpoint}*`
- `/proxy/fikfap-thumb/{endpoint}*`
- `/proxy/hqporner-thumb/{endpoint}*`
- `/proxy/porndish-thumb/{endpoint}*`
- `/proxy/pornhub-thumb/{endpoint}*`
## Best Copy Sources By Problem
- Need uploader support: copy `hsex`, `omgxxx`, or `vjav`.
- Need proxied media: copy `noodlemagazine`.
- Need proxied redirect-only playback: copy `spankbang` or `pornhd3x`.
- Need big background-loaded filter catalogs: copy `pornhd3x` or `omgxxx`.
- Need tag title to site-ID lookup maps: copy `vjav` or `hsex`.

349
docs/provider-playbook.md Normal file
View File

@@ -0,0 +1,349 @@
# New Provider Playbook
This is the implementation checklist for adding a working channel with the least guessing.
## Definition Of Done
A provider is not done when it compiles. It is done when:
1. `/api/status` shows the channel with sensible options and grouping.
2. `/api/videos` returns real items for the default feed.
3. Search works.
4. Pagination works.
5. Thumbnails load.
6. `video.url` or at least one `formats[*].url` resolves to playable media.
7. If the site needs proxying, the `/proxy/...` route works.
8. `HOT_TUB_PROVIDER=<id> cargo check -q` passes.
## Files To Touch
Always:
- `build.rs`
- `src/providers/<channel_id>.rs`
Sometimes:
- `src/proxy.rs`
- `src/proxies/<channel_id>.rs`
- `src/proxies/<channel_id>thumb.rs`
- `prompts/new-channel.md` if you are improving the handoff prompt
- `docs/provider-catalog.md` if you add a new provider or proxy
## Step 1: Pick The Closest Template
Do not start from an empty file.
Choose the nearest match:
- API-first site with tags/uploader metadata: copy `vjav.rs`
- HTML site with background-loaded tags/uploaders: copy `hsex.rs`
- Site with multiple large catalogs like sites/networks/stars: copy `omgxxx.rs`
- Site whose media or thumbs need local proxying: copy `noodlemagazine.rs`, `pornhd3x.rs`, `spankbang.rs`, or `porndish.rs`
- Very simple archive/search site: copy a small provider from `mainstream-tube`
Before writing code, confirm the site shape:
1. home or latest feed URL
2. search URL and page 2 URL
3. detail page URL shape
4. player request or manifest request
5. thumbnail host and whether it needs referer/cookies
6. tag/category/uploader/studio routes if they exist
7. whether the site exposes JSON endpoints that are easier than HTML scraping
Use browser/network tooling for this if needed. Do not guess URL patterns from one page.
## Step 2: Register The Provider
Add the provider to `build.rs`:
- `id`: channel id used by `/api/videos`
- `module`: Rust file name
- `ty`: provider struct name
If this is missing, the server will not discover the provider.
## Step 3: Define Channel Metadata
Every provider should export:
```rust
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "...",
tags: &["...", "...", "..."],
};
```
Pick `group_id` from the existing set in `src/providers/mod.rs`:
- `meta-search`
- `mainstream-tube`
- `tiktok`
- `studio-network`
- `amateur-homemade`
- `onlyfans`
- `chinese`
- `jav`
- `fetish-kink`
- `hentai-animation`
- `ai`
- `gay-male`
- `live-cams`
- `pmv-compilation`
## Step 4: Build The Channel Surface
Implement `build_channel` or equivalent and return it from `get_channel`.
Required:
- `id`
- `name`
- `description`
- `favicon`
- `status`
- `nsfw`
Recommended:
- `cacheDuration: Some(1800)` unless the site is unusually stable
- use standard option IDs like `sort`, `filter`, `sites`, `category`, `stars`, `categories`
- keep options minimal at first; only expose filters that actually work in `get_videos`
The option `id` values matter more than the display `title`.
## Step 5: Model Provider Routing Explicitly
Create a local enum like:
```rust
enum Target {
Latest,
Search { query: String },
Tag { slug: String },
Uploader { id: String },
}
```
Then write one function that resolves `sort`, `query`, `filter`, `sites`, and related options into a `Target`.
This is easier to debug than scattering URL decisions across the provider.
## Step 6: Load Filter Catalogs In The Background If Needed
If the site exposes tags, uploaders, studios, networks, or stars:
- store them in `Arc<RwLock<Vec<FilterOption>>>`
- initialize them with an `All` option
- spawn a background thread in `new()`
- create a tiny Tokio runtime inside that thread
- fill the lists without blocking server startup
Patterns:
- `hsex.rs`
- `omgxxx.rs`
- `pornhd3x.rs`
- `vjav.rs`
If tags or uploaders need stable IDs, keep a lookup map such as:
- `HashMap<String, String>` from title to site ID
- `HashMap<String, String>` from site ID to URL target
Normalize lookup keys to lowercase trimmed strings.
## Step 7: Fetch Pages Through The Shared Requester
In `get_videos`, start with:
```rust
let mut requester = requester_or_default(&options, CHANNEL_ID, "get_videos");
```
Use it for HTML, JSON, and raw media requests.
Why:
- it preserves cookies
- it carries debug trace IDs
- it respects Burp proxying
- it can fall back to Jina or FlareSolverr
## Step 8: Parse Listing Cards First, Then Enrich Only If Needed
Preferred flow:
1. Fetch the archive or search page.
2. Parse a lightweight list of stubs.
3. Return list data directly if enough metadata is already present.
4. Fetch detail pages or JSON endpoints only for fields the card does not expose.
Use bounded concurrency for detail enrichment. Existing providers usually use `futures::stream` with `buffer_unordered`.
## Step 9: Build High-Quality `VideoItem`s
Always fill:
- `id`
- `title`
- `url`
- `channel`
- `thumb`
- `duration`
Fill when available:
- `views`
- `rating`
- `uploader`
- `uploaderUrl`
- `uploaderId`
- `tags`
- `uploadedAt`
- `preview`
- `aspectRatio`
- `formats`
Rules:
- Keep `tags` as a list of displayable titles.
- Keep uploader data as structured fields, not mashed into the title.
- If you support uploader profiles, set `uploaderId` to a namespaced value like `<channel>:<site-local-id>`.
- Do not include `embed` unless the provider truly needs it.
- If direct media exists, prefer `formats` and keep `url` stable.
## Step 10: Decide Whether A Proxy Is Required
Use no proxy when:
- page URLs are enough and the client can resolve media itself
- or direct media URLs already work cleanly
Use a redirect proxy when:
- the provider must turn a detail URL into a resolved media URL
- headers/cookies do not need full response rewriting
Use a media/image proxy when:
- the site requires a referer for every fetch
- thumbnails need cookie-backed access
- manifests contain relative URIs that must be rewritten
- the server must stream binary content itself
If a proxy is needed:
1. add `src/proxies/<id>.rs`
2. wire the route in `src/proxy.rs`
3. generate provider URLs with `build_proxy_url(&options, "<id>", target)`
## Step 11: Implement Search Correctly
Check for three search modes:
1. native site search endpoint
2. tag/uploader shortcut search from preloaded filter catalogs
3. literal client-side substring search after fetch, triggered by quoted queries
Important server behavior:
- `#tag` becomes `tag`
- `"teacher"` becomes a literal post-fetch filter
- raw URL queries may bypass the provider through the `yt-dlp` fast path
Provider guidance:
- if the query matches a known tag/uploader shortcut, prefer the sites direct archive URL instead of generic search
- otherwise fall back to the sites keyword search
## Step 12: Support Pagination Explicitly
Do not assume pagination is `?page=N`.
Confirm:
- archive page 2 URL shape
- search page 2 URL shape
- tag page 2 URL shape
- uploader page 2 URL shape
If the site uses infinite scroll or an XHR endpoint, document that in code comments and hit the underlying endpoint directly.
## Step 13: Only Add `/api/uploaders` When The Site Has Real Uploader Identity
Uploader support is optional. Only implement it when the site exposes stable uploader pages or IDs.
Use `hsex.rs`, `omgxxx.rs`, or `vjav.rs` as the template.
Minimum expectations for `UploaderProfile`:
- stable `id`
- `name`
- `channel`
- `videoCount`
- `totalViews`
Nice to have:
- `avatar`
- `description`
- `videos`
- `layout`
- per-channel stats
## Validation Checklist
Run all of these:
```bash
cargo check -q
HOT_TUB_PROVIDER=<channel_id> cargo check -q
HOT_TUB_PROVIDER=<channel_id> cargo run --features debug
```
Then hit:
```bash
curl -s http://127.0.0.1:18080/api/status \
-H 'User-Agent: Hot%20Tub/22c CFNetwork/1494.0.7 Darwin/23.4.0' | jq
```
```bash
curl -s http://127.0.0.1:18080/api/videos \
-H 'Content-Type: application/json' \
-d '{"channel":"<channel_id>","sort":"new","page":1,"perPage":10}' | jq
```
Also verify:
- search query works
- page 2 works
- tag shortcut works if implemented
- uploader shortcut works if implemented
- `yt-dlp '<video.url or first format url>'` resolves media
- thumbnail URL returns an image
- proxy route returns a `302` or working media body, whichever is expected
- if uploaders are implemented, `/api/uploaders` works with both `uploaderId` and `uploaderName`
## Common Failure Modes
- Forgot `build.rs` entry.
- Returned page URLs but no playable media/formats.
- Used a local requester instead of the shared one and lost cookies.
- Built `/proxy/...` URLs without `public_url_base`.
- Put human-readable titles into filter IDs, making routing brittle.
- Added huge option lists to the status response without background loading.
- Implemented search but not search pagination.
- Implemented proxies but forgot to test them independently with `curl -I`.
## Best Reference Matrix
- Rich uploader support: `vjav.rs`, `hsex.rs`, `omgxxx.rs`
- Tag and uploader lookup maps: `vjav.rs`, `hsex.rs`
- Background catalog loading: `hsex.rs`, `omgxxx.rs`, `pornhd3x.rs`
- Redirect proxy: `spankbang.rs` plus `src/proxies/spankbang.rs`
- Manifest or image proxy: `noodlemagazine.rs` plus `src/proxies/noodlemagazine.rs`
- Complex detail enrichment: `pornhd3x.rs`

View File

@@ -1,60 +0,0 @@
# Uploaders Endpoint Plan
## Summary
Implement `POST /api/uploaders` using the Hot Tub uploader profile contract and ship it framework-first. The server will expose shared uploader request/response types, a provider hook for uploader lookup, endpoint routing in `src/api.rs`, and a first real provider implementation in `hsex`.
## Implementation
- Add dedicated uploader API types in `src/uploaders.rs`:
- `UploadersRequest`
- `UploaderProfile`
- `UploaderChannelStat`
- `UploaderVideoRef`
- `UploaderLayoutRow`
- Keep camelCase as the canonical serialized shape.
- Accept documented decode aliases:
- `uploader_id`
- `uploader_name`
- `profile_content`
- `profile_picture_url`
- `video_ids`
- `horizontal_videos`
- Add `POST /api/uploaders` in `src/api.rs`.
- Validate that at least one of `uploaderId` or `uploaderName` is present.
- Return:
- `400` for invalid request
- `404` for no match
- `500` for provider execution failure
- Add `Provider::get_uploader(...)` with a default `Ok(None)` implementation.
- Add a guarded uploader execution helper in `src/providers/mod.rs`.
- Use canonical uploader IDs in the format `<channel>:<provider-local-id>`.
- Implement the first provider-backed uploader profile in `src/providers/hsex.rs`.
## Hsex Strategy
- Resolve uploader lookup by canonical uploader ID or exact uploader name.
- Reuse existing uploader archive discovery and archive page fetching.
- Build uploader profile metadata from uploader archive pages.
- Populate `videos` with `UploaderVideoRef` values derived from existing `VideoItem`s.
- Always return `layout`.
- When `profileContent == true`, return:
- `videos`
- `tapes: []`
- `playlists: []`
- a `"For You"` horizontal row plus the default videos row
- When `profileContent == false`, return metadata and layout only.
## Tests
- Request alias decoding for uploader request fields.
- Response alias decoding for avatar and layout row compatibility fields.
- Endpoint helper tests for request validation and provider routing.
- Hsex uploader ID generation and uploader page parsing coverage.
## Assumptions
- The first ship focuses on the endpoint framework and one real provider implementation.
- Providers without explicit uploader support remain unsupported by `/api/uploaders`.
- Name-based resolution uses exact display-name matching.
- `videoCount` and `totalViews` are best-effort when the upstream site does not expose authoritative profile totals.

View File

@@ -1 +1,128 @@
write a provider for the site "vjav.com". use playwright-mcp to get the index site and get the layout of videos from this. also figure out how searches work and if it has a different layout then. also find out how more videos get loaded/more pages urls. also find if it has video sites like recommended,most views etc and include them. try to find as much information about a video on the site as possible and build the video items with all the information. put tags and uploader into lists so they can be utilized later. on query check if the search already exists in these FilterObject arrays and use them for the url instead (similar to omgxxx). after coding this, test that all provided urls work, yt-dlp can download the video.url, the thumbnails work, searches, tag-/uploader- searches work, more pages work. analzye the provider too and set the channel tags and put it in a group that matches this provider or create a new group where it fits in. do not include the embed field in the videoitems responses. when parsing uploaders, make sure that the uploaders. if unsure about the endpoints, check with the docs dir.
Implement a new Hottub provider for `thaiporntv` at `https://www.thaiporntv.com`.
You are working inside the Hottub Rust server. Your job is to add a functioning provider module that can survive handoff to another model with minimal guesswork. Do not stop at code generation. Carry the work through code, validation, and documentation updates.
Execution order is mandatory:
1. Read the repo docs.
2. Inspect the target site and collect evidence about routes, player/media requests, and pagination.
3. Choose the closest existing provider/proxy as the template.
4. Implement the provider.
5. Validate it end to end.
6. Update docs if the new provider adds a new pattern.
Do not start coding until you know:
- latest/default feed URL
- search URL
- page 2 URL
- detail page URL
- actual media request or manifest URL
- thumbnail behavior
- whether tag/uploader/studio pages exist
- whether the site has a JSON API that is easier than HTML scraping
Read these files first:
1. `docs/README.md`
2. `docs/architecture.md`
3. `docs/provider-playbook.md`
4. `docs/provider-catalog.md`
5. `docs/hottubapp/🎬 Videos - Hot Tub Docs.html`
6. `docs/hottubapp/📡 Status - Hot Tub Docs.html`
7. `docs/hottubapp/👤 Uploaders - Hot Tub Docs.html`
Then inspect the closest existing providers and proxies before coding. Pick the nearest template instead of starting from scratch.
Template selection rules:
- Use `src/providers/vjav.rs` if the target site has JSON APIs, rich tag metadata, or stable uploader identities.
- Use `src/providers/hsex.rs` if the target site is mostly HTML and needs background-loaded tags/uploaders.
- Use `src/providers/omgxxx.rs` if the site exposes multiple large filter catalogs like sites, networks, models, or studios.
- Use `src/providers/noodlemagazine.rs`, `src/providers/pornhd3x.rs`, or `src/providers/spankbang.rs` if media or thumbnails require local `/proxy/...` routes.
Required deliverables:
1. Add a new provider file at `src/providers/<channel_id>.rs`.
2. Register it in `build.rs`.
3. Export `CHANNEL_METADATA` with the correct group.
4. Implement `get_channel` with sane options and descriptions.
5. Implement `get_videos` so the default feed works, search works, and page 2 works.
6. If the site needs proxying, add `src/proxies/<channel_id>.rs` and wire `src/proxy.rs`.
7. Reuse `requester_or_default(&options, CHANNEL_ID, "...")` for outbound requests.
8. Return high-quality `VideoItem`s with the best metadata the site exposes.
9. Do not use `embed` unless the site truly requires it.
10. Update `docs/provider-catalog.md` if you add a new provider or proxy.
Implementation requirements:
- Determine the real site routing for:
- default/latest listing
- search
- page 2 and later
- tag/category shortcuts
- uploader/studio/model shortcuts if the site exposes them
- featured/trending/most-viewed or similar alternate feeds
- Model routing explicitly with a local enum like `Target`.
- If the site exposes tag or uploader IDs, keep a lookup map from normalized display title to site ID/URL target.
- Put tags into `VideoItem.tags`.
- Put uploader name/url/id into `uploader`, `uploaderUrl`, and `uploaderId` when available.
- If uploader support is implemented, use a namespaced `uploaderId` such as `<channel>:<site-local-id>` so `/api/uploaders` can route directly.
- If the query matches a known tag/uploader shortcut, use the direct archive URL instead of generic search.
- If the site exposes real media URLs or HLS manifests, populate `formats`.
- If the video page URL can be directly downloaded by yt-dlp, set `video.url` to the page URL and do not populate `formats`, as yt-dlp will extract formats dynamically.
- If direct playback needs a referer/cookie transform, use a local `/proxy/...` route built with `build_proxy_url(&options, "...", target)`.
- Keep the first version small and reliable. Add extra filters only after the default feed, search, and pagination are working.
Validation requirements:
1. `cargo check -q`
2. `HOT_TUB_PROVIDER=<channel_id> cargo check -q`
3. `HOT_TUB_PROVIDER=<channel_id> cargo run --features debug`
4. Verify `/api/status` exposes the new channel.
5. Verify `/api/videos` returns results for:
- default feed
- search query
- page 2
- at least one tag/uploader shortcut if implemented
6. Verify thumbnails load.
7. Verify `yt-dlp` can resolve `video.url` (if formats are not populated) or one of `formats[*].url` (if formats are populated).
8. If a proxy route exists, verify it directly with `curl -I` or equivalent.
Testing commands to run:
```bash
curl -s http://127.0.0.1:18080/api/status \
-H 'User-Agent: Hot%20Tub/22c CFNetwork/1494.0.7 Darwin/23.4.0' | jq
```
```bash
curl -s http://127.0.0.1:18080/api/videos \
-H 'Content-Type: application/json' \
-H 'User-Agent: Hot%20Tub/22c CFNetwork/1494.0.7 Darwin/23.4.0' \
-d '{"channel":"<channel_id>","sort":"new","page":1,"perPage":10}' | jq
```
```bash
curl -s http://127.0.0.1:18080/api/videos \
-H 'Content-Type: application/json' \
-d '{"channel":"<channel_id>","query":"test","page":1,"perPage":10}' | jq
```
Important Hottub-specific rules:
- Do not invent a new provider style if an existing provider already matches the site shape.
- Do not forget `build.rs`; missing registration means the provider does not exist at runtime.
- Do not create a brand-new requester in normal provider fetches unless you have a strong reason.
- Do not assume page URLs are playable media URLs.
- Do not expose status filters that you did not implement in `get_videos`.
- Do not populate `formats` if the page URL is yt-dlp compatible; instead, set `video.url` to the page URL.
- Do not finish without checking at least one returned media URL with `yt-dlp`.
- Do not claim pagination works unless page 2 was verified.
Completion format:
1. Briefly state which existing provider/proxy you used as the template and why.
2. List the files changed.
3. Report the exact validation commands you ran and whether they passed.
4. Report any residual limitations or site behaviors that still need follow-up.

View File

@@ -599,7 +599,7 @@ async fn videos_post(
}
for video in video_items.iter_mut() {
if video.duration <= 120 {
if video.duration <= 120 && video.duration > 0{
let mut preview_url = video.url.clone();
if let Some(formats) = &video.formats {
if let Some(first) = formats.first() {
@@ -654,6 +654,11 @@ async fn videos_post(
video_items.len()
);
for video in video_items.iter_mut() {
video.id = format!("{}:{}", channel, video.id);
}
// There is a bug in Hottub38 that makes the client error for a 403-url even though formats work fine
if clientversion == ClientVersion::new(38, 0, "Hot%20Tub".to_string()) {
// filter out videos without preview for old clients

View File

@@ -45,6 +45,36 @@ pub fn insert_video(
.execute(conn)
}
// Replace any existing rows for `new_id` with a single fresh row. The `videos`
// table is created without a UNIQUE/PRIMARY KEY constraint, so a plain insert
// would append duplicates and `get_video` (which reads the first match) would
// keep returning the stalest copy. Delete-then-insert in a transaction keeps a
// single, up-to-date entry per id so background refreshes actually take effect.
#[cfg(any(
not(hottub_single_provider),
hottub_provider = "hanime",
hottub_provider = "hentaihaven",
hottub_provider = "missav",
hottub_provider = "perverzija",
))]
pub fn upsert_video(
conn: &mut SqliteConnection,
new_id: &str,
new_url: &str,
) -> Result<usize, diesel::result::Error> {
use crate::models::DBVideo;
use crate::schema::videos::dsl::*;
conn.transaction(|conn| {
diesel::delete(videos.filter(id.eq(new_id))).execute(conn)?;
diesel::insert_into(videos)
.values(DBVideo {
id: new_id.to_string(),
url: new_url.to_string(),
})
.execute(conn)
})
}
#[cfg(any(
not(hottub_single_provider),
hottub_provider = "hanime",

View File

@@ -85,28 +85,32 @@ async fn main() -> std::io::Result<()> {
crate::flow_debug!("http server binding addr=0.0.0.0:18080 workers=8");
web::HttpServer::new(move || {
web::App::new()
.state(pool.clone())
.state(cache.clone())
.state(requester.clone())
.wrap(web::middleware::Logger::default())
.service(web::scope("/api").configure(api::config))
.service(web::scope("/proxy").configure(proxy::config))
.service(
web::resource("/").route(web::get().to(|req: web::HttpRequest| async move {
let host = match std::env::var("DOMAIN") {
Ok(d) => d,
Err(_) => req.connection_info().host().to_string(),
};
let source_forward_header = format!("hottub://source?url={}", host);
web::HttpResponse::Found()
.header("Location", source_forward_header)
.finish()
})),
)
.service(fs::Files::new("/", "static").index_file("index.html"))
let pool = pool.clone();
let cache = cache.clone();
let requester = requester.clone();
async move {
web::App::new()
.state(pool)
.state(cache)
.state(requester)
.middleware(web::middleware::Logger::default())
.service(web::scope("/api").configure(api::config))
.service(web::scope("/proxy").configure(proxy::config))
.service(
web::resource("/").route(web::get().to(|req: web::HttpRequest| async move {
let host = match std::env::var("DOMAIN") {
Ok(d) => d,
Err(_) => req.connection_info().host().to_string(),
};
let source_forward_header = format!("hottub://source?url={}", host);
web::HttpResponse::Found()
.header("Location", source_forward_header)
.finish()
})),
)
.service(fs::Files::new("/", "static").index_file("index.html"))
}
})
.workers(8)
// .bind_openssl(("0.0.0.0", 18080), builder)?
.bind(("0.0.0.0", 18080))?
.run()

View File

@@ -0,0 +1,598 @@
use crate::DbPool;
use crate::api::ClientVersion;
use crate::providers::{
Provider, build_proxy_url, report_provider_error, requester_or_default, strip_url_scheme,
};
use crate::status::*;
use crate::util::cache::VideoCache;
use crate::videos::{ServerOptions, VideoFormat, VideoItem};
use async_trait::async_trait;
use chrono::DateTime;
use error_chain::error_chain;
use percent_encoding::{NON_ALPHANUMERIC, utf8_percent_encode};
use regex::Regex;
use scraper::{Html, Selector};
use std::collections::HashMap;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "mainstream-tube",
tags: &["tube", "aggregator", "mixed"],
};
const BASE_URL: &str = "https://allpornstream.com";
const CHANNEL_ID: &str = "aps";
const BROWSER_UA: &str =
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36";
error_chain! {
foreign_links {
Io(std::io::Error);
HttpRequest(wreq::Error);
}
}
#[derive(Debug, Clone)]
enum Target {
Latest { page: u32 },
Search { query: String, page: u32 },
Category { slug: String, page: u32 },
Producer { slug: String, page: u32 },
Actor { slug: String, page: u32 },
}
#[derive(Debug, Clone)]
pub struct AllPornStreamProvider {}
impl AllPornStreamProvider {
pub fn new() -> Self {
Self {}
}
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
Channel {
id: CHANNEL_ID.to_string(),
name: "All Porn Stream".to_string(),
description: "Free HD porn videos aggregated from major studios and independent creators.".to_string(),
premium: false,
favicon: "https://www.google.com/s2/favicons?sz=64&domain=allpornstream.com".to_string(),
status: "active".to_string(),
categories: vec![],
options: vec![
ChannelOption {
id: "sort".to_string(),
title: "Sort".to_string(),
description: "Browse the latest feed.".to_string(),
systemImage: "list.number".to_string(),
colorName: "blue".to_string(),
options: vec![FilterOption {
id: "new".to_string(),
title: "Latest".to_string(),
}],
multiSelect: false,
},
ChannelOption {
id: "sites".to_string(),
title: "Producer".to_string(),
description: "Jump directly to a studio or producer page. Use the slug from the URL (e.g. brazzers).".to_string(),
systemImage: "building.2".to_string(),
colorName: "purple".to_string(),
options: vec![FilterOption {
id: "all".to_string(),
title: "All".to_string(),
}],
multiSelect: false,
},
],
nsfw: true,
cacheDuration: Some(1800),
}
}
fn build_url(target: &Target) -> String {
match target {
Target::Latest { page } => {
if *page > 1 {
format!("{BASE_URL}/?page={page}")
} else {
BASE_URL.to_string()
}
}
Target::Search { query, page } => {
let encoded =
utf8_percent_encode(query, NON_ALPHANUMERIC).to_string();
if *page > 1 {
format!("{BASE_URL}/?search={encoded}&page={page}")
} else {
format!("{BASE_URL}/?search={encoded}")
}
}
Target::Category { slug, page } => {
if *page > 1 {
format!("{BASE_URL}/categories/{slug}?page={page}")
} else {
format!("{BASE_URL}/categories/{slug}")
}
}
Target::Producer { slug, page } => {
if *page > 1 {
format!("{BASE_URL}/producers/{slug}?page={page}")
} else {
format!("{BASE_URL}/producers/{slug}")
}
}
Target::Actor { slug, page } => {
if *page > 1 {
format!("{BASE_URL}/actors/{slug}?page={page}")
} else {
format!("{BASE_URL}/actors/{slug}")
}
}
}
}
fn parse_duration(text: &str) -> u32 {
let parts: Vec<u32> = text
.trim()
.split(':')
.filter_map(|p| p.parse::<u32>().ok())
.collect();
match parts.as_slice() {
[m, s] => m * 60 + s,
[h, m, s] => h * 3600 + m * 60 + s,
_ => 0,
}
}
fn parse_uploaded_at(dt: &str) -> Option<u64> {
DateTime::parse_from_rfc3339(dt)
.ok()
.map(|d| d.timestamp() as u64)
}
fn extract_first_image(data_images: &str) -> String {
// data_images is a JSON array, already HTML-decoded by scraper's parser.
// Find the first https:// URL in it.
if let Some(start) = data_images.find("https://") {
let rest = &data_images[start..];
if let Some(end) = rest.find('"') {
return rest[..end].to_string();
}
}
String::new()
}
fn slug_to_title(slug: &str) -> String {
slug.split(['-', '_'])
.filter(|s| !s.is_empty())
.map(|s| {
let mut chars = s.chars();
match chars.next() {
None => String::new(),
Some(f) => format!("{}{}", f.to_uppercase(), chars.collect::<String>()),
}
})
.collect::<Vec<_>>()
.join(" ")
}
fn parse_listing(&self, html: &str, options: &ServerOptions) -> Vec<VideoItem> {
let document = Html::parse_document(html);
let card_sel = match Selector::parse("[data-thumb-id][data-href][data-title][data-images]")
{
Ok(s) => s,
Err(_) => return vec![],
};
let time_sel = match Selector::parse("time[datetime]") {
Ok(s) => s,
Err(_) => return vec![],
};
let studio_sel = match Selector::parse("[data-ga-category='thumbnail_studio']") {
Ok(s) => s,
Err(_) => return vec![],
};
let actor_sel = match Selector::parse("[data-ga-category='thumbnail_actor']") {
Ok(s) => s,
Err(_) => return vec![],
};
// Duration: span with class starting "absolute bottom-2" containing a time string
let dur_re = match Regex::new(
r#"<span[^>]*class="absolute[^"]*"[^>]*>(\d+:\d{2}(?::\d{2})?)</span>"#,
) {
Ok(r) => r,
Err(_) => return vec![],
};
// Views: number directly after the eye-icon SVG closing tag
let views_re =
match Regex::new(r"</svg>\s*(\d+)\s*</div>") {
Ok(r) => r,
Err(_) => return vec![],
};
let mut items = Vec::new();
for card in document.select(&card_sel) {
let uuid = match card.value().attr("data-thumb-id") {
Some(v) if !v.is_empty() => v.to_string(),
_ => continue,
};
let href = match card.value().attr("data-href") {
Some(v) if v.starts_with('/') => v.to_string(),
_ => continue,
};
let title = match card.value().attr("data-title") {
Some(v) if !v.is_empty() => v.to_string(),
_ => continue,
};
let images_raw = card.value().attr("data-images").unwrap_or_default();
let thumb = Self::extract_first_image(images_raw);
let card_html = card.html();
// Duration from the overlay span
let duration = dur_re
.captures(&card_html)
.and_then(|c| c.get(1))
.map(|m| Self::parse_duration(m.as_str()))
.unwrap_or(0);
// Views from after the eye icon SVG
let views = views_re
.captures(&card_html)
.and_then(|c| c.get(1))
.and_then(|m| m.as_str().parse::<u32>().ok());
// video.url is the stable page URL; the proxy URL goes into formats so the
// client can supply the required Referer header alongside the stream request.
let detail_url = format!("{BASE_URL}{href}");
let proxy_target = strip_url_scheme(&detail_url);
let proxy_url = build_proxy_url(options, CHANNEL_ID, &proxy_target);
let mut item = VideoItem::new(
uuid,
title,
detail_url.clone(),
CHANNEL_ID.to_string(),
thumb,
duration,
);
if !proxy_url.is_empty() {
let mut format =
VideoFormat::new(proxy_url, "auto".to_string(), "video/mp4".to_string());
format.add_http_header("Referer".to_string(), detail_url.clone());
item = item.formats(vec![format]);
}
// Upload date
if let Some(time_el) = card.select(&time_sel).next() {
if let Some(dt) = time_el.value().attr("datetime") {
if let Some(ts) = Self::parse_uploaded_at(dt) {
item = item.uploaded_at(ts);
}
}
}
if let Some(v) = views {
item = item.views(v);
}
// Producer / studio
if let Some(studio_link) = card.select(&studio_sel).next() {
let label = studio_link
.value()
.attr("aria-label")
.unwrap_or_default();
// "producer: ONLY FANS" → "ONLY FANS"
let raw_name = label
.strip_prefix("producer: ")
.unwrap_or_default()
.trim()
.to_string();
if !raw_name.is_empty() {
let producer_href =
studio_link.value().attr("href").unwrap_or_default();
let slug = producer_href
.trim_start_matches("/producers/")
.to_string();
let display = Self::slug_to_title(&raw_name.to_lowercase().replace(' ', "-"));
item = item.uploader(display.clone());
if !slug.is_empty() {
item = item.uploader_url(format!("{BASE_URL}/producers/{slug}"));
item.uploaderId = Some(format!("{CHANNEL_ID}:{slug}"));
}
}
}
// Actors as tags deduplicate by href, keep the longest text per actor
let mut actor_map: HashMap<String, String> = HashMap::new();
for actor_link in card.select(&actor_sel) {
let actor_href = actor_link
.value()
.attr("href")
.unwrap_or_default()
.to_string();
let text = actor_link
.text()
.collect::<String>()
.trim()
.to_string();
if !actor_href.is_empty() && !text.is_empty() {
actor_map
.entry(actor_href)
.and_modify(|v| {
if text.len() > v.len() {
*v = text.clone();
}
})
.or_insert(text);
}
}
let mut actors: Vec<String> = actor_map.into_values().collect();
actors.sort();
if !actors.is_empty() {
item = item.tags(actors);
}
items.push(item);
}
items
}
fn resolve_target(query: &str, _sort: &str, page: u32, options: &ServerOptions) -> Target {
// Explicit shortcuts: "actor:slug", "producer:slug", "category:slug"
if let Some(slug) = query.strip_prefix("actor:") {
return Target::Actor {
slug: slug.to_string(),
page,
};
}
if let Some(slug) = query.strip_prefix("producer:") {
return Target::Producer {
slug: slug.to_string(),
page,
};
}
if let Some(slug) = query.strip_prefix("category:") {
return Target::Category {
slug: slug.to_string(),
page,
};
}
// Keyword search
if !query.is_empty() {
return Target::Search {
query: query.to_string(),
page,
};
}
// Producer filter from options.sites
if let Some(sites) = &options.sites {
let sites = sites.trim();
if !sites.is_empty() && sites != "all" {
return Target::Producer {
slug: sites.to_string(),
page,
};
}
}
Target::Latest { page }
}
async fn fetch_and_parse(
&self,
cache: VideoCache,
target: Target,
options: ServerOptions,
) -> Result<Vec<VideoItem>> {
let url = Self::build_url(&target);
if let Some((time, items)) = cache.get(&url) {
if time.elapsed().unwrap_or_default().as_secs() < 300 {
return Ok(items.clone());
}
}
let mut requester = requester_or_default(&options, CHANNEL_ID, "fetch_and_parse");
let html = requester
.get_with_headers(
&url,
vec![
("user-agent".to_string(), BROWSER_UA.to_string()),
("accept".to_string(), "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8".to_string()),
("accept-language".to_string(), "en-US,en;q=0.5".to_string()),
],
Some(wreq::Version::HTTP_11),
)
.await
.map_err(|e| Error::from(format!("request failed url={url}: {e}")))?;
if html.is_empty() {
return Ok(vec![]);
}
let items = self.parse_listing(&html, &options);
if !items.is_empty() {
cache.insert(url, items.clone());
}
Ok(items)
}
}
#[async_trait]
impl Provider for AllPornStreamProvider {
async fn get_videos(
&self,
cache: VideoCache,
pool: DbPool,
sort: String,
query: Option<String>,
page: String,
per_page: String,
options: ServerOptions,
) -> Vec<VideoItem> {
let _ = pool;
let _ = per_page;
let page = page.parse::<u32>().unwrap_or(1);
let query_str = query.unwrap_or_default();
let target = Self::resolve_target(&query_str, &sort, page, &options);
match self.fetch_and_parse(cache, target, options).await {
Ok(items) => items,
Err(e) => {
report_provider_error(CHANNEL_ID, "get_videos", &e.to_string()).await;
vec![]
}
}
}
fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
Some(self.build_channel(clientversion))
}
}
#[cfg(test)]
mod tests {
use super::AllPornStreamProvider;
use crate::videos::ServerOptions;
fn make_options() -> ServerOptions {
ServerOptions {
featured: None,
category: None,
sites: None,
filter: None,
language: None,
public_url_base: Some("http://127.0.0.1:18080".to_string()),
requester: None,
network: None,
stars: None,
categories: None,
duration: None,
sort: None,
sexuality: None,
}
}
#[test]
fn builds_latest_urls() {
assert_eq!(
AllPornStreamProvider::build_url(&super::Target::Latest { page: 1 }),
"https://allpornstream.com"
);
assert_eq!(
AllPornStreamProvider::build_url(&super::Target::Latest { page: 2 }),
"https://allpornstream.com/?page=2"
);
}
#[test]
fn builds_search_urls() {
assert_eq!(
AllPornStreamProvider::build_url(&super::Target::Search {
query: "brazzers".to_string(),
page: 1
}),
"https://allpornstream.com/?search=brazzers"
);
assert_eq!(
AllPornStreamProvider::build_url(&super::Target::Search {
query: "big tits".to_string(),
page: 2
}),
"https://allpornstream.com/?search=big%20tits&page=2"
);
}
#[test]
fn builds_producer_urls() {
assert_eq!(
AllPornStreamProvider::build_url(&super::Target::Producer {
slug: "brazzers".to_string(),
page: 1
}),
"https://allpornstream.com/producers/brazzers"
);
assert_eq!(
AllPornStreamProvider::build_url(&super::Target::Producer {
slug: "brazzers".to_string(),
page: 2
}),
"https://allpornstream.com/producers/brazzers?page=2"
);
}
#[test]
fn parses_duration() {
assert_eq!(AllPornStreamProvider::parse_duration("18:42"), 1122);
assert_eq!(AllPornStreamProvider::parse_duration("1:23:45"), 5025);
assert_eq!(AllPornStreamProvider::parse_duration("00:59"), 59);
}
#[test]
fn extracts_first_image() {
let input =
r#"["https://example.com/thumb1.jpg","https://example.com/thumb2.jpg"]"#;
assert_eq!(
AllPornStreamProvider::extract_first_image(input),
"https://example.com/thumb1.jpg"
);
}
#[test]
fn parses_cards_from_html() {
let provider = AllPornStreamProvider::new();
let options = make_options();
let html = r#"<!DOCTYPE html><html><body>
<div
data-thumb-id="34a7e37d-7fca-4f30-ad0b-3ab134a00f9f"
data-href="/post/34a7e37d-7fca-4f30-ad0b-3ab134a00f9f/test-video"
data-slug="/post/34a7e37d-7fca-4f30-ad0b-3ab134a00f9f/test-video"
data-title="Test Video Title"
data-images='["https://example.com/thumb.jpg"]'
>
<span class="absolute bottom-2 right-2 z-10">18:42</span>
<a data-ga-category="thumbnail_studio" aria-label="producer: ONLY FANS" href="/producers/only-fans">OF</a>
<a data-ga-category="thumbnail_actor" href="/actors/bonnie-blue">BB</a>
<a data-ga-category="thumbnail_actor" href="/actors/bonnie-blue">Bonnie Blue</a>
<time datetime="2026-05-18T19:06:53.000Z">1 hour ago</time>
<div class="flex items-center gap-1"><svg><path/><path fill-rule="evenodd" clip-rule="evenodd"></path></svg>416</div>
</div>
</body></html>"#;
let items = provider.parse_listing(html, &options);
assert_eq!(items.len(), 1);
let item = &items[0];
assert_eq!(item.id, "34a7e37d-7fca-4f30-ad0b-3ab134a00f9f");
assert_eq!(item.title, "Test Video Title");
assert_eq!(item.duration, 1122);
assert_eq!(item.views, Some(416));
assert!(item.thumb.contains("example.com/thumb.jpg"));
assert_eq!(item.uploader.as_deref(), Some("Only Fans"));
assert!(item.tags.as_ref().unwrap().contains(&"Bonnie Blue".to_string()));
assert!(item.uploadedAt.is_some());
// video.url is the page URL; proxy URL is in formats[0]
assert_eq!(
item.url,
"https://allpornstream.com/post/34a7e37d-7fca-4f30-ad0b-3ab134a00f9f/test-video"
);
let format = &item.formats.as_ref().unwrap()[0];
assert!(format.url.contains("/proxy/aps/"));
assert!(format.url.contains("allpornstream.com/post/34a7e37d"));
let headers = format.http_headers_pairs();
assert!(headers.iter().any(|(k, _)| k.to_lowercase() == "referer"));
}
}

1218
src/providers/archivebate.rs Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,531 @@
use crate::DbPool;
use crate::api::ClientVersion;
use crate::providers::{
Provider, report_provider_error, report_provider_error_background, requester_or_default,
};
use crate::status::*;
use crate::util::cache::VideoCache;
use crate::util::parse_abbreviated_number;
use crate::util::time::parse_time_to_seconds;
use crate::videos::{ServerOptions, VideoItem};
use async_trait::async_trait;
use error_chain::error_chain;
use htmlentity::entity::{ICodedDataTrait, decode};
use percent_encoding::{NON_ALPHANUMERIC, utf8_percent_encode};
use regex::Regex;
use scraper::{Html, Selector};
use std::collections::HashMap;
use std::sync::{Arc, RwLock};
use std::thread;
use wreq::Version;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "mainstream-tube",
tags: &["tube", "kvs", "blowjob"],
};
const BASE_URL: &str = "https://blowjobs.pro";
const CHANNEL_ID: &str = "blowjobspro";
const FIREFOX_UA: &str =
"Mozilla/5.0 (X11; Linux x86_64; rv:147.0) Gecko/20100101 Firefox/147.0";
const HTML_ACCEPT: &str =
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8";
error_chain! {
foreign_links {
Io(std::io::Error);
}
errors {
Parse(msg: String) {
description("parse error")
display("parse error: {}", msg)
}
}
}
#[derive(Debug, Clone)]
pub struct BlowjobsproProvider {
url: String,
categories: Arc<RwLock<Vec<FilterOption>>>,
category_map: Arc<RwLock<HashMap<String, String>>>,
}
#[derive(Debug, Clone)]
enum Target {
Latest,
MostViewed,
TopRated,
Search { query: String },
Category { url: String },
}
impl BlowjobsproProvider {
pub fn new() -> Self {
let provider = Self {
url: BASE_URL.to_string(),
categories: Arc::new(RwLock::new(vec![FilterOption {
id: "all".to_string(),
title: "All".to_string(),
}])),
category_map: Arc::new(RwLock::new(HashMap::new())),
};
provider.spawn_initial_load();
provider
}
fn spawn_initial_load(&self) {
let url = self.url.clone();
let categories = Arc::clone(&self.categories);
let category_map = Arc::clone(&self.category_map);
thread::spawn(move || {
let runtime = match tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()
{
Ok(runtime) => runtime,
Err(error) => {
report_provider_error_background(
CHANNEL_ID,
"spawn_initial_load.runtime_build",
&error.to_string(),
);
return;
}
};
runtime.block_on(async move {
if let Err(error) = Self::load_categories(&url, categories, category_map).await {
report_provider_error_background(CHANNEL_ID, "load_categories", &error.to_string());
}
});
});
}
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
let categories = self
.categories
.read()
.map(|value| value.clone())
.unwrap_or_default();
Channel {
id: CHANNEL_ID.to_string(),
name: "Blowjobs.pro".to_string(),
description: "Blowjobs.pro KVS listings with latest, most viewed, top rated, search, and category shortcuts."
.to_string(),
premium: false,
favicon: "https://www.google.com/s2/favicons?sz=64&domain=blowjobs.pro".to_string(),
status: "active".to_string(),
categories: categories.iter().map(|value| value.title.clone()).collect(),
options: vec![
ChannelOption {
id: "sort".to_string(),
title: "Sort".to_string(),
description: "Browse feed ordering.".to_string(),
systemImage: "list.number".to_string(),
colorName: "blue".to_string(),
options: vec![
FilterOption {
id: "new".to_string(),
title: "Latest".to_string(),
},
FilterOption {
id: "popular".to_string(),
title: "Most Viewed".to_string(),
},
FilterOption {
id: "rated".to_string(),
title: "Top Rated".to_string(),
},
],
multiSelect: false,
},
ChannelOption {
id: "categories".to_string(),
title: "Categories".to_string(),
description: "Jump directly to category archives.".to_string(),
systemImage: "square.grid.2x2".to_string(),
colorName: "orange".to_string(),
options: categories,
multiSelect: false,
},
],
nsfw: true,
cacheDuration: Some(1800),
}
}
fn selector(value: &str) -> Result<Selector> {
Selector::parse(value)
.map_err(|error| Error::from(format!("selector `{value}` parse failed: {error}")))
}
fn regex(value: &str) -> Result<Regex> {
Regex::new(value).map_err(|error| Error::from(format!("regex `{value}` failed: {error}")))
}
fn decode_html(text: &str) -> String {
decode(text.as_bytes())
.to_string()
.unwrap_or_else(|_| text.to_string())
}
fn collapse_whitespace(text: &str) -> String {
text.split_whitespace().collect::<Vec<_>>().join(" ")
}
fn normalize_title(title: &str) -> String {
title
.trim()
.trim_start_matches('#')
.replace(['_', '-'], " ")
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
.to_ascii_lowercase()
}
fn normalize_url(&self, url: &str) -> String {
let trimmed = url.trim();
if trimmed.is_empty() {
return String::new();
}
if trimmed.starts_with("http://") || trimmed.starts_with("https://") {
return trimmed.to_string();
}
if trimmed.starts_with("//") {
return format!("https:{trimmed}");
}
if trimmed.starts_with('/') {
return format!("{}{}", self.url, trimmed);
}
format!("{}/{}", self.url, trimmed.trim_start_matches("./"))
}
fn html_headers(referer: &str) -> Vec<(String, String)> {
vec![
("User-Agent".to_string(), FIREFOX_UA.to_string()),
("Accept".to_string(), HTML_ACCEPT.to_string()),
("Referer".to_string(), referer.to_string()),
]
}
fn build_search_path(query: &str) -> String {
query
.split_whitespace()
.map(|part| utf8_percent_encode(part, NON_ALPHANUMERIC).to_string())
.collect::<Vec<_>>()
.join("-")
}
fn build_archive_page_url(archive_url: &str, page: u16) -> String {
if page <= 1 {
return archive_url.trim_end_matches('/').to_string() + "/";
}
format!("{}/{page}/", archive_url.trim_end_matches('/'))
}
fn build_target_url(&self, target: &Target, page: u16) -> String {
match target {
Target::Latest => {
Self::build_archive_page_url(&format!("{}/latest-updates/", self.url), page)
}
Target::MostViewed => {
Self::build_archive_page_url(&format!("{}/most-popular/", self.url), page)
}
Target::TopRated => {
Self::build_archive_page_url(&format!("{}/top-rated/", self.url), page)
}
Target::Category { url } => Self::build_archive_page_url(url, page),
Target::Search { query } => {
let normalized = Self::build_search_path(query);
format!("{}/search/{normalized}/", self.url)
}
}
}
fn target_from_request(
&self,
sort: &str,
query: Option<&str>,
category: Option<&str>,
) -> Target {
let category_value = category.unwrap_or("").trim();
if !category_value.is_empty() && !category_value.eq_ignore_ascii_case("all") {
if let Some(url) = self.resolve_category(category_value) {
return Target::Category { url };
}
}
if let Some(raw_query) = query {
let trimmed = raw_query.trim();
if !trimmed.is_empty() {
if let Some(value) = trimmed.strip_prefix("category:")
&& let Some(url) = self.resolve_category(value)
{
return Target::Category { url };
}
if let Some(url) = self.resolve_category(trimmed) {
return Target::Category { url };
}
return Target::Search {
query: trimmed.to_string(),
};
}
}
match sort {
"popular" => Target::MostViewed,
"rated" => Target::TopRated,
_ => Target::Latest,
}
}
fn resolve_category(&self, value: &str) -> Option<String> {
let normalized = Self::normalize_title(value);
if normalized.is_empty() {
return None;
}
self.category_map
.read()
.ok()
.and_then(|map| map.get(&normalized).cloned())
}
async fn fetch_html(
&self,
options: &ServerOptions,
url: &str,
referer: &str,
) -> Result<String> {
let mut requester = requester_or_default(options, CHANNEL_ID, "fetch_html");
requester
.get_with_headers(url, Self::html_headers(referer), Some(Version::HTTP_11))
.await
.map_err(|error| Error::from(format!("request failed for {url}: {error}")))
}
fn search_page_from_async(&self, query: &str, page: u16) -> Option<String> {
if page <= 1 {
return None;
}
let query_value = query.trim();
if query_value.is_empty() {
return None;
}
let from = page;
let encoded_query = utf8_percent_encode(query_value, NON_ALPHANUMERIC).to_string();
Some(format!(
"{}/search/{}/?mode=async&function=get_block&block_id=list_videos_videos_list_search_result&q={encoded_query}&category_ids=&sort_by=&from_videos%2Bfrom_albums={from}",
self.url,
Self::build_search_path(query_value)
))
}
async fn fetch_listing_html(
&self,
options: &ServerOptions,
target: &Target,
page: u16,
) -> Result<String> {
if let Target::Search { query } = target
&& page > 1
&& let Some(async_url) = self.search_page_from_async(query, page)
{
return self
.fetch_html(options, &async_url, &format!("{}/search/{}/", self.url, Self::build_search_path(query)))
.await;
}
let page_url = self.build_target_url(target, page);
self.fetch_html(options, &page_url, &self.url).await
}
fn parse_listing_html(&self, html: &str) -> Result<Vec<VideoItem>> {
let href_re = Self::regex(r#"href="([^"]+/videos/\d+/[^"]*)""#)?;
let title_re = Self::regex(r#"title="([^"]+)""#)?;
let thumb_re = Self::regex(r#"(?:data-original|src)="([^"]+/contents/videos_screenshots/[^"]+)""#)?;
let duration_re = Self::regex(r#"<div class="duration">\s*([^<]+)\s*</div>"#)?;
let views_re = Self::regex(r#"<div class="views">\s*([^<]+)\s*</div>"#)?;
let tag_re = Self::regex(r#"<a href="[^"]*/categories/[^"]*"[^>]*>\s*([^<]+)\s*</a>"#)?;
let id_re = Self::regex(r"/videos/(\d+)/")?;
let mut videos = Vec::new();
for segment in html.split("<div class=\"item\">").skip(1) {
let Some(href_caps) = href_re.captures(segment) else {
continue;
};
let Some(raw_url) = href_caps.get(1).map(|m| m.as_str()) else {
continue;
};
let url = self.normalize_url(raw_url);
let id = id_re
.captures(&url)
.and_then(|caps| caps.get(1).map(|m| m.as_str().to_string()))
.unwrap_or_else(|| url.clone());
let title = title_re
.captures(segment)
.and_then(|caps| caps.get(1).map(|m| Self::decode_html(m.as_str())))
.unwrap_or_default();
if title.is_empty() {
continue;
}
let thumb = thumb_re
.captures(segment)
.and_then(|caps| caps.get(1).map(|m| self.normalize_url(m.as_str())))
.unwrap_or_default();
let duration = duration_re
.captures(segment)
.and_then(|caps| caps.get(1).map(|m| m.as_str().trim().to_string()))
.and_then(|value| parse_time_to_seconds(&value))
.unwrap_or(0)
.max(0) as u32;
let views = views_re
.captures(segment)
.and_then(|caps| caps.get(1).map(|m| m.as_str().trim().to_string()))
.and_then(|value| parse_abbreviated_number(&value))
.map(|value| value as u32);
let tags = tag_re
.captures_iter(segment)
.filter_map(|caps| {
caps.get(1).map(|m| {
Self::decode_html(m.as_str())
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
.trim()
.to_string()
})
})
.filter(|value| !value.is_empty())
.collect::<Vec<_>>();
let mut item = VideoItem::new(
id,
title,
url,
CHANNEL_ID.to_string(),
thumb,
duration,
);
item.tags = Some(tags);
item.views = views;
videos.push(item);
}
Ok(videos)
}
async fn load_categories(
url: &str,
categories: Arc<RwLock<Vec<FilterOption>>>,
category_map: Arc<RwLock<HashMap<String, String>>>,
) -> Result<()> {
let mut requester = crate::util::requester::Requester::new();
let category_url = format!("{url}/categories/");
let html = requester
.get_with_headers(
&category_url,
Self::html_headers(url),
Some(Version::HTTP_11),
)
.await
.map_err(|error| Error::from(format!("category fetch failed: {error}")))?;
let doc = Html::parse_document(&html);
let link_selector = Self::selector("#list_categories_categories_list_items a.item-link")?;
let mut options = vec![FilterOption {
id: "all".to_string(),
title: "All".to_string(),
}];
let mut map = HashMap::new();
for link in doc.select(&link_selector) {
let Some(href) = link.value().attr("href") else {
continue;
};
let raw_title = link
.value()
.attr("title")
.unwrap_or("")
.to_string();
let title = Self::decode_html(&raw_title).trim().to_string();
if title.is_empty() {
continue;
}
let normalized = Self::normalize_title(&title);
if normalized.is_empty() {
continue;
}
options.push(FilterOption {
id: normalized.clone(),
title: title.clone(),
});
map.insert(normalized, href.to_string());
}
if let Ok(mut guard) = categories.write() {
*guard = options;
}
if let Ok(mut guard) = category_map.write() {
*guard = map;
}
Ok(())
}
}
#[async_trait]
impl Provider for BlowjobsproProvider {
async fn get_videos(
&self,
_cache: VideoCache,
_pool: DbPool,
sort: String,
query: Option<String>,
page: String,
_per_page: String,
options: ServerOptions,
) -> Vec<VideoItem> {
let page_number = page.parse::<u16>().unwrap_or(1).max(1);
let target = self.target_from_request(
sort.as_str(),
query.as_deref(),
options.categories.as_deref(),
);
let html = match self.fetch_listing_html(&options, &target, page_number).await {
Ok(html) => html,
Err(error) => {
report_provider_error(CHANNEL_ID, "get_videos.fetch_listing_html", &error.to_string())
.await;
return vec![];
}
};
match self.parse_listing_html(&html) {
Ok(videos) => videos,
Err(error) => {
report_provider_error(CHANNEL_ID, "get_videos.parse_listing_html", &error.to_string())
.await;
vec![]
}
}
}
fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
Some(self.build_channel(clientversion))
}
}

403
src/providers/camsoda.rs Normal file
View File

@@ -0,0 +1,403 @@
use crate::DbPool;
use crate::api::ClientVersion;
use crate::providers::{
Provider, report_provider_error, requester_or_default,
};
use crate::status::*;
use crate::util::cache::VideoCache;
use crate::util::time::parse_time_to_seconds;
use crate::videos::{ServerOptions, VideoItem};
use async_trait::async_trait;
use error_chain::error_chain;
use scraper::{Html, Selector};
use std::collections::HashSet;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "live-cams",
tags: &["cams", "amateur", "recordings", "clips"],
};
const BASE_URL: &str = "https://www.camsoda.com";
const CHANNEL_ID: &str = "camsoda";
error_chain! {
foreign_links {
Io(std::io::Error);
}
errors {
Parse(msg: String) {
description("parse error")
display("parse error: {}", msg)
}
}
}
#[derive(Debug, Clone)]
pub struct CamsodaProvider {
url: String,
}
#[derive(Debug, Clone)]
enum Target {
/// Default listing at /media?page=N
Listing,
/// Model media page at /{username}/media
Model { username: String },
}
impl CamsodaProvider {
pub fn new() -> Self {
Self {
url: BASE_URL.to_string(),
}
}
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
Channel {
id: CHANNEL_ID.to_string(),
name: "CamSoda".to_string(),
description:
"CamSoda model video clips — recorded amateur cam shows uploaded by performers."
.to_string(),
premium: false,
favicon: "https://www.google.com/s2/favicons?sz=64&domain=camsoda.com".to_string(),
status: "active".to_string(),
categories: vec![],
options: vec![],
nsfw: true,
cacheDuration: Some(1800),
}
}
/// Resolve the fetch target from query and options.
fn pick_target(query: Option<&str>) -> Target {
let Some(query) = query.map(str::trim).filter(|v| !v.is_empty()) else {
return Target::Listing;
};
// Support "uploader:username" or "model:username" shortcuts.
for prefix in &["uploader:", "model:", "user:"] {
if let Some(username) = query.strip_prefix(prefix) {
let username = username.trim().to_lowercase();
if !username.is_empty() {
return Target::Model { username };
}
}
}
// For other queries fall back to the default listing;
// the server will apply client-side substring filtering.
Target::Listing
}
fn build_listing_url(&self, target: &Target, page: u16) -> String {
let page = page.max(1);
match target {
Target::Listing => format!("{}/media?page={}", self.url, page),
Target::Model { username } => {
if page <= 1 {
format!("{}/{}/media", self.url, username)
} else {
format!("{}/{}/media?page={}", self.url, username, page)
}
}
}
}
/// Parse video cards from the HTML of a CamSoda media page.
///
/// The page contains anchor elements linking to individual video pages:
/// href="/{username}/media/{slug}/{id}"
///
/// Inside each anchor:
/// - `[class*="media-item-module__title"]` span: the video title
/// - `[class*="media-item-module__subtitle"]` span: "by UPLOADER (MM:SS)"
/// - `img[src*="media-secure.camsoda.com"]`: video-specific thumbnail
fn parse_html_items(html: &str) -> Vec<VideoItem> {
let document = Html::parse_document(html);
// Select all anchors linking to /{username}/media/{slug}/{id}
let anchor_sel = match Selector::parse(r#"a[href]"#) {
Ok(s) => s,
Err(_) => return vec![],
};
let title_sel = match Selector::parse(r#"[class*="media-item-module__title"]"#) {
Ok(s) => s,
Err(_) => return vec![],
};
let subtitle_sel = match Selector::parse(r#"[class*="media-item-module__subtitle"]"#) {
Ok(s) => s,
Err(_) => return vec![],
};
let img_sel = match Selector::parse(r#"img[src]"#) {
Ok(s) => s,
Err(_) => return vec![],
};
// Regex for parsing subtitle "by UPLOADER (MM:SS)"
let sub_re = match regex::Regex::new(r"(?i)^by\s+(.+?)\s+\((\d{1,2}:\d{2}(?::\d{2})?)\)\s*$") {
Ok(r) => r,
Err(_) => return vec![],
};
// Regex for media URL: /{username}/media/{slug}/{id}
let href_re = match regex::Regex::new(r"^/([^/]+)/media/([^/]+)/(\d+)$") {
Ok(r) => r,
Err(_) => return vec![],
};
let mut items: Vec<VideoItem> = Vec::new();
let mut seen_ids: HashSet<String> = HashSet::new();
for anchor in document.select(&anchor_sel) {
let href = match anchor.value().attr("href") {
Some(h) => h,
None => continue,
};
let caps = match href_re.captures(href) {
Some(c) => c,
None => continue,
};
let username = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string();
let slug = caps.get(2).map(|m| m.as_str()).unwrap_or("").to_string();
let video_id = caps.get(3).map(|m| m.as_str()).unwrap_or("").to_string();
if video_id.is_empty() || username.is_empty() {
continue;
}
if !seen_ids.insert(video_id.clone()) {
continue;
}
// Title
let title = anchor
.select(&title_sel)
.next()
.map(|el| el.text().collect::<String>().trim().to_string())
.unwrap_or_default();
let title = if title.is_empty() {
format!("CamSoda video {video_id}")
} else {
title
};
// Subtitle: "by UPLOADER (MM:SS)"
let subtitle = anchor
.select(&subtitle_sel)
.next()
.map(|el| el.text().collect::<String>().trim().to_string())
.unwrap_or_default();
let (uploader, duration) = if let Some(sc) = sub_re.captures(&subtitle) {
let u = sc.get(1).map(|m| m.as_str().trim().to_string()).unwrap_or_default();
let d = sc.get(2)
.and_then(|m| parse_time_to_seconds(m.as_str()))
.and_then(|s| u32::try_from(s).ok())
.unwrap_or(0);
(if u.is_empty() { None } else { Some(u) }, d)
} else {
(None, 0)
};
// Thumbnail — prefer video-specific from media-secure.camsoda.com
let thumb = anchor
.select(&img_sel)
.filter_map(|img| img.value().attr("src"))
.find(|src| src.contains("media-secure.camsoda.com"))
.or_else(|| {
anchor
.select(&img_sel)
.filter_map(|img| img.value().attr("src"))
.find(|src| src.contains("livemediahost.com"))
})
.unwrap_or("")
.to_string();
let page_url = format!("{BASE_URL}/{username}/media/{slug}/{video_id}");
let mut item = VideoItem::new(
video_id,
title,
page_url,
CHANNEL_ID.to_string(),
thumb,
duration,
);
item.uploader = uploader;
item.uploaderUrl = Some(format!("{BASE_URL}/{username}/media"));
item.uploaderId = Some(format!("{CHANNEL_ID}:{username}"));
items.push(item);
}
items
}
async fn fetch_items(
&self,
target: &Target,
page: u16,
options: &ServerOptions,
) -> Result<Vec<VideoItem>> {
let url = self.build_listing_url(target, page);
let mut requester = requester_or_default(options, CHANNEL_ID, "fetch_items");
let text = requester
.get(&url, None)
.await
.map_err(|e| Error::from(format!("fetch failed for {url}: {e}")))?;
// Guard against CF challenge pages slipping through
if text.contains("cf-browser-verification")
|| text.contains("cf-chl")
|| text.contains("Just a moment")
{
return Err(Error::from("cloudflare challenge page returned".to_string()));
}
let items = Self::parse_html_items(&text);
Ok(items)
}
}
#[async_trait]
impl Provider for CamsodaProvider {
async fn get_videos(
&self,
cache: VideoCache,
pool: DbPool,
sort: String,
query: Option<String>,
page: String,
per_page: String,
options: ServerOptions,
) -> Vec<VideoItem> {
let _ = cache;
let _ = pool;
let _ = sort;
let _ = per_page;
let page = page.parse::<u16>().unwrap_or(1).max(1);
let normalized_query = query
.as_deref()
.map(str::trim)
.filter(|v| !v.is_empty())
.map(ToOwned::to_owned);
let target = Self::pick_target(normalized_query.as_deref());
match self.fetch_items(&target, page, &options).await {
Ok(items) => items,
Err(error) => {
report_provider_error(CHANNEL_ID, "get_videos", &error.to_string()).await;
vec![]
}
}
}
fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
Some(self.build_channel(clientversion))
}
}
#[cfg(test)]
mod tests {
use super::*;
fn sample_html() -> String {
// Simplified version of the HTML returned by Jina (X-Return-Format: html)
r#"<!DOCTYPE html><html><body>
<a href="/lil-asian-jaz/media/torso-ride-begging-for-your-cum/16984249">
<span><span class="media-item-module__title--lrB0Q">Torso ride begging for your cum</span><span class="media-item-module__subtitle--VRwsa">by jazzyj (24:35)</span></span>
<div><img alt="thumb" src="https://media-secure.camsoda.com/user/videos/16984249/16984249.thumb.1777857871.webp" width="320" height="240"></div>
</a>
<a href="/coco-dethick/media/first-ir-bg-show-ft-johnny-love/17009049">
<span><span class="media-item-module__new--uXKvP">new</span><span class="media-item-module__title--lrB0Q">FIRST IR BG SHOW FT JOHNNY LOVE</span><span class="media-item-module__subtitle--VRwsa">by Coco Dethick (44:14)</span></span>
<div><img alt="thumb" src="https://media-secure.camsoda.com/user/videos/17009049/17009049_1778619855.thumb.webp" width="320" height="240"></div>
</a>
<a href="/hot-wife-mia/media/ultimate-squirting-video/17112135">
<span><span class="media-item-module__title--lrB0Q">Ultimate squirting video!</span><span class="media-item-module__subtitle--VRwsa">by Hot Wife Mia (02:47)</span></span>
<div><img alt="thumb" src="https://media-secure.camsoda.com/user/videos/17112135/17112135.thumb.1779200000.webp" width="320" height="240"></div>
</a>
</body></html>"#.to_string()
}
#[test]
fn parses_video_cards_from_html() {
let items = CamsodaProvider::parse_html_items(&sample_html());
assert_eq!(items.len(), 3, "expected 3 items, got {}: {:?}", items.len(), items.iter().map(|i| &i.id).collect::<Vec<_>>());
let item = &items[0];
assert_eq!(item.id, "16984249");
assert_eq!(item.title, "Torso ride begging for your cum");
assert_eq!(item.uploader.as_deref(), Some("jazzyj"));
assert_eq!(item.duration, 24 * 60 + 35);
assert!(item.url.contains("16984249"), "url should contain id: {}", item.url);
assert!(item.thumb.contains("media-secure.camsoda.com"), "thumb: {}", item.thumb);
assert_eq!(item.uploaderUrl.as_deref(), Some("https://www.camsoda.com/lil-asian-jaz/media"));
assert_eq!(item.uploaderId.as_deref(), Some("camsoda:lil-asian-jaz"));
let item2 = &items[1];
assert_eq!(item2.id, "17009049");
assert_eq!(item2.uploader.as_deref(), Some("Coco Dethick"));
let item3 = &items[2];
assert_eq!(item3.id, "17112135");
assert_eq!(item3.duration, 2 * 60 + 47);
}
#[test]
fn deduplicates_items() {
// Same video appears twice
let html = sample_html();
let doubled = format!("{html}\n{html}");
let items = CamsodaProvider::parse_html_items(&doubled);
assert_eq!(items.len(), 3, "should deduplicate to 3 unique items");
}
#[test]
fn picks_target_correctly() {
assert!(matches!(CamsodaProvider::pick_target(None), Target::Listing));
assert!(matches!(CamsodaProvider::pick_target(Some("")), Target::Listing));
assert!(matches!(CamsodaProvider::pick_target(Some("blowjob")), Target::Listing));
match CamsodaProvider::pick_target(Some("uploader:lil-asian-jaz")) {
Target::Model { username } => assert_eq!(username, "lil-asian-jaz"),
_ => panic!("expected Model target"),
}
match CamsodaProvider::pick_target(Some("model:katt-leya")) {
Target::Model { username } => assert_eq!(username, "katt-leya"),
_ => panic!("expected Model target"),
}
}
#[test]
fn builds_listing_urls_correctly() {
let provider = CamsodaProvider::new();
assert_eq!(
provider.build_listing_url(&Target::Listing, 1),
"https://www.camsoda.com/media?page=1"
);
assert_eq!(
provider.build_listing_url(&Target::Listing, 3),
"https://www.camsoda.com/media?page=3"
);
assert_eq!(
provider.build_listing_url(
&Target::Model { username: "lil-asian-jaz".to_string() },
1
),
"https://www.camsoda.com/lil-asian-jaz/media"
);
assert_eq!(
provider.build_listing_url(
&Target::Model { username: "lil-asian-jaz".to_string() },
2
),
"https://www.camsoda.com/lil-asian-jaz/media?page=2"
);
}
}

View File

@@ -38,7 +38,7 @@ impl ChaturbateProvider {
Channel {
id: "chaturbate".to_string(),
name: "WORK IN PROGRESS Chaturbate".to_string(),
name: "Chaturbate".to_string(),
description: "Free Adult Webcams".to_string(),
premium: false,
favicon: "https://www.google.com/s2/favicons?sz=64&domain=chaturbate.com".to_string(),

552
src/providers/clapdat.rs Normal file
View File

@@ -0,0 +1,552 @@
use crate::DbPool;
use crate::api::ClientVersion;
use crate::providers::{
Provider, build_proxy_url, report_provider_error, requester_or_default, strip_url_scheme,
};
use crate::status::*;
use crate::util::time::parse_time_to_seconds;
use crate::videos::{ServerOptions, VideoItem};
use async_trait::async_trait;
use chrono::NaiveDate;
use error_chain::error_chain;
use htmlentity::entity::{ICodedDataTrait, decode};
use regex::Regex;
use scraper::{ElementRef, Html, Selector};
use serde::Deserialize;
use std::collections::HashSet;
use wreq::Version;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "amateur-homemade",
tags: &["amateur", "homemade", "interracial"],
};
const BASE_URL: &str = "https://www.clapdat.com";
const SEARCH_URL: &str = "https://search.clapdat.com/indexes/videos/search";
const SEARCH_KEY: &str = "36ce9a190ca0e797debc3f0a2a311749dbd76262c389531c3a37e9dd74ab9df5";
const CHANNEL_ID: &str = "clapdat";
error_chain! {
foreign_links {
Io(std::io::Error);
HttpRequest(wreq::Error);
Json(serde_json::Error);
}
}
#[derive(Debug, Clone)]
pub struct ClapdatProvider {
url: String,
}
#[derive(Debug, Clone)]
enum Target {
Trending,
Recent,
Search { query: String },
Tag { slug: String },
User { username: String },
}
#[derive(Debug, Clone)]
struct StubVideo {
id: String,
title: String,
url: String,
thumb: String,
duration: u32,
}
#[derive(Debug, Deserialize)]
struct SearchResponse {
#[serde(default)]
hits: Vec<SearchHit>,
}
#[derive(Debug, Deserialize)]
struct SearchHit {
#[serde(rename = "_id", default)]
id: String,
#[serde(default)]
title: String,
#[serde(default)]
slug: String,
#[serde(default)]
image: String,
}
impl ClapdatProvider {
pub fn new() -> Self {
Self {
url: BASE_URL.to_string(),
}
}
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
Channel {
id: CHANNEL_ID.to_string(),
name: "ClapDat".to_string(),
description: "ClapDat trending/recent feeds with tag and uploader shortcuts."
.to_string(),
premium: false,
favicon: "https://www.google.com/s2/favicons?sz=64&domain=clapdat.com".to_string(),
status: "active".to_string(),
categories: vec![],
options: vec![ChannelOption {
id: "sort".to_string(),
title: "Sort".to_string(),
description: "Trending or latest ClapDat feed.".to_string(),
systemImage: "list.number".to_string(),
colorName: "blue".to_string(),
options: vec![
FilterOption {
id: "trending".to_string(),
title: "Trending".to_string(),
},
FilterOption {
id: "new".to_string(),
title: "Recent".to_string(),
},
],
multiSelect: false,
}],
nsfw: true,
cacheDuration: Some(1800),
}
}
fn resolve_target(&self, query: &str, sort: &str) -> Target {
let q = query.trim();
if let Some(value) = q.strip_prefix("tag:").or_else(|| q.strip_prefix('#')) {
let slug = value.trim().to_lowercase().replace(' ', "-");
if !slug.is_empty() {
return Target::Tag { slug };
}
}
if let Some(value) = q
.strip_prefix("user:")
.or_else(|| q.strip_prefix("uploader:"))
{
let username = value.trim().to_lowercase().replace(' ', "-");
if !username.is_empty() {
return Target::User { username };
}
}
if !q.is_empty() {
return Target::Search {
query: q.to_string(),
};
}
match sort {
"recent" | "new" | "latest" => Target::Recent,
_ => Target::Trending,
}
}
fn listing_url(&self, target: &Target, page: u16) -> Option<String> {
let page = page.max(1);
match target {
Target::Trending => Some(if page == 1 {
self.url.clone()
} else {
format!("{}/trending/{page}", self.url)
}),
Target::Recent => Some(if page == 1 {
self.url.clone()
} else {
format!("{}/recent/{page}", self.url)
}),
Target::Tag { slug } => Some(if page == 1 {
format!("{}/tag/{slug}", self.url)
} else {
format!("{}/tag/{slug}/{page}", self.url)
}),
Target::User { username } => Some(if page == 1 {
format!("{}/user/{username}", self.url)
} else {
format!("{}/user/{username}/{page}", self.url)
}),
Target::Search { .. } => None,
}
}
fn selector(value: &str) -> Result<Selector> {
Selector::parse(value).map_err(|error| Error::from(format!("selector `{value}`: {error}")))
}
fn regex(value: &str) -> Result<Regex> {
Regex::new(value).map_err(|error| Error::from(format!("regex `{value}`: {error}")))
}
fn normalize_text(value: &str) -> String {
decode(value.as_bytes())
.to_string()
.unwrap_or_else(|_| value.to_string())
.replace('\u{a0}', " ")
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
.trim()
.to_string()
}
fn normalize_url(&self, value: &str) -> String {
let trimmed = value.trim();
if trimmed.starts_with("http://") || trimmed.starts_with("https://") {
return trimmed.to_string();
}
if trimmed.starts_with("//") {
return format!("https:{trimmed}");
}
format!(
"{}/{}",
self.url.trim_end_matches('/'),
trimmed.trim_start_matches('/')
)
}
fn extract_video_id(url: &str) -> Option<String> {
let re = Regex::new(r"-([a-z0-9]+)(?:/|$)").ok()?;
re.captures(url)
.and_then(|caps| caps.get(1).map(|m| m.as_str().to_string()))
}
fn parse_duration(value: &str) -> u32 {
parse_time_to_seconds(value)
.and_then(|seconds| u32::try_from(seconds).ok())
.unwrap_or(0)
}
fn parse_card(
&self,
card: &ElementRef<'_>,
link_sel: &Selector,
img_sel: &Selector,
) -> Option<StubVideo> {
let link = card.select(link_sel).next()?;
let href = link.value().attr("href")?;
let url = self.normalize_url(href);
let id = Self::extract_video_id(&url)?;
let title = card
.select(&Self::selector("h3").ok()?)
.next()
.map(|node| Self::normalize_text(&node.text().collect::<Vec<_>>().join(" ")))
.unwrap_or_default();
if title.is_empty() {
return None;
}
let duration_text = card
.select(&Self::selector("span").ok()?)
.filter_map(|node| {
let value = Self::normalize_text(&node.text().collect::<Vec<_>>().join(" "));
if value.contains(':') {
Some(value)
} else {
None
}
})
.next()
.unwrap_or_default();
let thumb = card
.select(img_sel)
.filter_map(|img| {
// Collect the attributes we are looking for
img.value()
.attr("src")
.or_else(|| img.value().attr("data-src"))
})
.nth(1) // Skips the first match (index 0) and takes the second match (index 1)
.map(|value| self.normalize_url(value))
.unwrap_or_default();
Some(StubVideo {
id,
title,
url,
thumb,
duration: Self::parse_duration(&duration_text),
})
}
fn parse_listing_html(&self, html: &str) -> Result<Vec<StubVideo>> {
let doc = Html::parse_document(html);
let card_sel = Self::selector("div.video-card")?;
let link_sel = Self::selector("a[href*='/video/']")?;
let img_sel = Self::selector("img")?;
let mut out = Vec::new();
let mut seen = HashSet::new();
for card in doc.select(&card_sel) {
if let Some(stub) = self.parse_card(&card, &link_sel, &img_sel) {
if seen.insert(stub.id.clone()) {
out.push(stub);
}
}
}
Ok(out)
}
fn parse_home_section_html(&self, html: &str, section_id: &str) -> Result<Vec<StubVideo>> {
let doc = Html::parse_document(html);
let section_sel = Self::selector(&format!("section#{section_id}"))?;
let card_sel = Self::selector("div.video-card")?;
let link_sel = Self::selector("a[href*='/video/']")?;
let img_sel = Self::selector("img")?;
let mut out = Vec::new();
let mut seen = HashSet::new();
if let Some(section) = doc.select(&section_sel).next() {
for card in section.select(&card_sel) {
if let Some(stub) = self.parse_card(&card, &link_sel, &img_sel) {
if seen.insert(stub.id.clone()) {
out.push(stub);
}
}
}
}
Ok(out)
}
fn html_headers(&self) -> Vec<(String, String)> {
vec![
("accept".to_string(), "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8".to_string()),
("accept-language".to_string(), "en-US,en;q=0.8".to_string()),
("user-agent".to_string(), "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36".to_string()),
("referer".to_string(), self.url.clone()),
]
}
async fn fetch_html(&self, options: &ServerOptions, url: &str) -> Result<String> {
let mut requester = requester_or_default(options, CHANNEL_ID, "fetch_html");
requester
.get_with_headers(url, self.html_headers(), Some(Version::HTTP_11))
.await
.map_err(|error| Error::from(format!("request failed for {url}: {error}")))
}
async fn search_videos(
&self,
options: &ServerOptions,
query: &str,
page: u16,
per_page: usize,
) -> Result<Vec<StubVideo>> {
let mut requester = requester_or_default(options, CHANNEL_ID, "search_videos");
let offset = page.saturating_sub(1) as usize * per_page;
let query_encoded =
url::form_urlencoded::byte_serialize(query.as_bytes()).collect::<String>();
let search_url = format!("{SEARCH_URL}?q={query_encoded}&limit={per_page}&offset={offset}");
let auth_header = format!("Bearer {SEARCH_KEY}");
let headers = vec![
("accept".to_string(), "application/json".to_string()),
("authorization".to_string(), auth_header),
(
"user-agent".to_string(),
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36".to_string(),
),
];
let text = requester
.get_with_headers(&search_url, headers, Some(Version::HTTP_11))
.await
.map_err(|error| Error::from(format!("search request failed: {error}")))?;
let parsed: SearchResponse = serde_json::from_str(&text)?;
Ok(parsed
.hits
.into_iter()
.filter_map(|hit| {
let slug = hit.slug.trim();
if hit.id.is_empty() || slug.is_empty() || hit.title.trim().is_empty() {
return None;
}
Some(StubVideo {
id: hit.id,
title: Self::normalize_text(&hit.title),
url: format!("{}/video/{}", self.url, slug),
thumb: hit.image,
duration: 0,
})
})
.collect())
}
fn extract_detail_metadata(
&self,
html: &str,
) -> (
Vec<String>,
Option<String>,
Option<String>,
Option<String>,
Option<u64>,
) {
let uploader_name =
Self::regex(r#"<a href="/user/([^"]+)"[^>]*>[^<]*<img[^>]*>\s*<p[^>]*>([^<]+)</p>"#)
.ok()
.and_then(|re| re.captures(html))
.and_then(|caps| {
let slug = caps.get(1)?.as_str().to_string();
let name = Self::normalize_text(caps.get(2)?.as_str());
if name.is_empty() {
return None;
}
Some((name, slug))
});
let uploader = uploader_name.as_ref().map(|v| v.0.clone());
let uploader_url = uploader_name
.as_ref()
.map(|v| format!("{}/user/{}", self.url, v.1));
let uploader_id = uploader_name
.as_ref()
.map(|v| format!("{CHANNEL_ID}:{}", v.1));
let uploaded_at =
Self::regex(r#"<p class="s-1he9h8j">([A-Za-z]{3}\s+\d{1,2},\s+\d{4})</p>"#)
.ok()
.and_then(|re| re.captures(html))
.and_then(|caps| caps.get(1).map(|m| m.as_str().to_string()))
.and_then(|value| NaiveDate::parse_from_str(&value, "%b %e, %Y").ok())
.and_then(|date| date.and_hms_opt(0, 0, 0))
.and_then(|dt| u64::try_from(dt.and_utc().timestamp()).ok());
let tag_re = Self::regex(r#"<a href="/tag/[^"]+"[^>]*>([^<]+)</a>"#).ok();
let tags = tag_re
.map(|re| {
re.captures_iter(html)
.filter_map(|caps| caps.get(1).map(|m| Self::normalize_text(m.as_str())))
.filter(|t| !t.is_empty())
.collect::<Vec<_>>()
})
.unwrap_or_default();
(tags, uploader, uploader_url, uploader_id, uploaded_at)
}
async fn enrich_video(&self, options: &ServerOptions, stub: StubVideo) -> VideoItem {
let proxy_url = build_proxy_url(&options, CHANNEL_ID, &strip_url_scheme(&stub.url));
let mut item = VideoItem::new(
stub.id,
stub.title,
proxy_url,
CHANNEL_ID.to_string(),
stub.thumb,
stub.duration,
);
if let Ok(detail_html) = self.fetch_html(options, &stub.url).await {
let (tags, uploader, uploader_url, uploader_id, uploaded_at) =
self.extract_detail_metadata(&detail_html);
if !tags.is_empty() {
item.tags = Some(tags);
}
if let Some(value) = uploader {
item = item.uploader(value);
}
if let Some(value) = uploader_url {
item = item.uploader_url(value);
}
if let Some(value) = uploader_id {
item.uploaderId = Some(value);
}
if let Some(value) = uploaded_at {
item.uploadedAt = Some(value);
}
}
item
}
}
#[async_trait]
impl Provider for ClapdatProvider {
async fn get_videos(
&self,
_cache: crate::util::cache::VideoCache,
_pool: DbPool,
sort: String,
query: Option<String>,
page: String,
per_page: String,
options: ServerOptions,
) -> Vec<VideoItem> {
let page_num = page.parse::<u16>().unwrap_or(1).max(1);
let per_page_num = per_page.parse::<usize>().unwrap_or(20).clamp(1, 60);
let sort_value = if sort.trim().is_empty() {
options.sort.as_deref().unwrap_or("trending").to_string()
} else {
sort
};
let query_value = query.unwrap_or_default();
let target = self.resolve_target(&query_value, &sort_value);
let stubs = match &target {
Target::Search { query } => match self
.search_videos(&options, query, page_num, per_page_num)
.await
{
Ok(items) => items,
Err(error) => {
report_provider_error(CHANNEL_ID, "search_videos", &error.to_string()).await;
vec![]
}
},
_ => {
let Some(url) = self.listing_url(&target, page_num) else {
return vec![];
};
match self.fetch_html(&options, &url).await {
Ok(html) => {
let parsed = match (&target, page_num) {
(Target::Trending, 1) => {
self.parse_home_section_html(&html, "trending-videos")
}
(Target::Recent, 1) => {
self.parse_home_section_html(&html, "recent-videos")
}
_ => self.parse_listing_html(&html),
};
match parsed {
Ok(items) => items,
Err(error) => {
report_provider_error(
CHANNEL_ID,
"parse_listing_html",
&error.to_string(),
)
.await;
vec![]
}
}
}
Err(error) => {
report_provider_error(CHANNEL_ID, "fetch_html", &error.to_string()).await;
vec![]
}
}
}
};
let mut output = Vec::with_capacity(stubs.len());
for stub in stubs.into_iter().take(per_page_num) {
output.push(self.enrich_video(&options, stub).await);
}
output
}
fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
Some(self.build_channel(clientversion))
}
}

643
src/providers/eporner.rs Normal file
View File

@@ -0,0 +1,643 @@
use crate::DbPool;
use crate::api::ClientVersion;
use crate::providers::{
Provider, report_provider_error, report_provider_error_background, requester_or_default,
};
use crate::status::*;
use crate::util::cache::VideoCache;
use crate::util::parse_abbreviated_number;
use crate::util::requester::Requester;
use crate::util::time::parse_time_to_seconds;
use crate::videos::{ServerOptions, VideoItem};
use async_trait::async_trait;
use error_chain::error_chain;
use htmlentity::entity::{ICodedDataTrait, decode};
use scraper::{ElementRef, Html, Selector};
use std::sync::{Arc, RwLock};
use std::{collections::HashMap, thread, vec};
use wreq::Version;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "mainstream-tube",
tags: &["tube", "hd", "mixed", "search"],
};
const BASE_URL: &str = "https://www.eporner.com";
const CHANNEL_ID: &str = "eporner";
const FIREFOX_UA: &str =
"Mozilla/5.0 (X11; Linux x86_64; rv:147.0) Gecko/20100101 Firefox/147.0";
const HTML_ACCEPT: &str =
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8";
error_chain! {
foreign_links {
Io(std::io::Error);
}
errors {
Parse(msg: String) {
description("parse error")
display("parse error: {}", msg)
}
}
}
// Static category list — eporner categories are stable
const CATEGORIES: &[(&str, &str)] = &[
("4k-porn", "4K Ultra HD"),
("60fps", "60 FPS"),
("amateur", "Amateur"),
("anal", "Anal"),
("asian", "Asian"),
("asmr", "ASMR"),
("bbw", "BBW"),
("bdsm", "BDSM"),
("big-ass", "Big Ass"),
("big-dick", "Big Dick"),
("big-tits", "Big Tits"),
("bisexual", "Bisexual"),
("blonde", "Blonde"),
("blowjob", "Blowjob"),
("bondage", "Bondage"),
("brunette", "Brunette"),
("bukkake", "Bukkake"),
("creampie", "Creampie"),
("cumshot", "Cumshot"),
("double-penetration", "Double Penetration"),
("ebony", "Ebony"),
("fat", "Fat"),
("fetish", "Fetish"),
("fisting", "Fisting"),
("footjob", "Footjob"),
("for-women", "For Women"),
("gay", "Gay"),
("group-sex", "Group Sex"),
("handjob", "Handjob"),
("hardcore", "Hardcore"),
("hd-1080p", "HD 1080p"),
("hentai", "Hentai"),
("homemade", "Homemade"),
("hotel", "Hotel"),
("indian", "Indian"),
("interracial", "Interracial"),
("japanese", "Japanese"),
("latina", "Latina"),
("lesbians", "Lesbian"),
("lingerie", "Lingerie"),
("massage", "Massage"),
("masturbation", "Masturbation"),
("mature", "Mature"),
("milf", "MILF"),
("nurse", "Nurse"),
("office", "Office"),
("orgy", "Orgy"),
("outdoor", "Outdoor"),
("petite", "Petite"),
("pornstar", "Pornstar"),
("pov-porn", "POV"),
("public", "Public"),
("redhead", "Redhead"),
("shemale", "Shemale"),
("small-tits", "Small Tits"),
("squirt", "Squirt"),
("striptease", "Striptease"),
("teens", "Teen"),
("threesome", "Threesome"),
("toys", "Toys"),
("uncategorized", "Uncategorized"),
("uniform", "Uniform"),
("vintage", "Vintage"),
("vr-porn", "VR Porn"),
("webcam", "Webcam"),
];
#[derive(Debug, Clone)]
enum Target {
Latest,
MostViewed,
TopRated,
BestVideos,
Search(String),
Archive(String),
}
#[derive(Debug, Clone)]
pub struct EpornerProvider {
pornstar_map: Arc<RwLock<HashMap<String, String>>>,
}
impl EpornerProvider {
pub fn new() -> Self {
let provider = Self {
pornstar_map: Arc::new(RwLock::new(HashMap::new())),
};
provider.spawn_initial_load();
provider
}
fn spawn_initial_load(&self) {
let pornstar_map = Arc::clone(&self.pornstar_map);
thread::spawn(move || {
let runtime = match tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()
{
Ok(r) => r,
Err(e) => {
report_provider_error_background(
CHANNEL_ID,
"spawn_initial_load.runtime_build",
&e.to_string(),
);
return;
}
};
runtime.block_on(async move {
if let Err(e) = Self::load_pornstars(Arc::clone(&pornstar_map)).await {
report_provider_error_background(
CHANNEL_ID,
"load_pornstars",
&e.to_string(),
);
}
});
});
}
fn build_channel(&self, _cv: ClientVersion) -> Channel {
let mut cat_options: Vec<FilterOption> = vec![FilterOption {
id: "all".to_string(),
title: "All".to_string(),
}];
for (slug, label) in CATEGORIES {
cat_options.push(FilterOption {
id: slug.to_string(),
title: label.to_string(),
});
}
Channel {
id: CHANNEL_ID.to_string(),
name: "EPorner".to_string(),
description:
"EPorner — 5M+ free HD porn videos with latest, most viewed, top rated, category, tag, and pornstar routing."
.to_string(),
premium: false,
favicon: "https://www.google.com/s2/favicons?sz=64&domain=eporner.com".to_string(),
status: "active".to_string(),
categories: CATEGORIES.iter().map(|(_, label)| label.to_string()).collect(),
options: vec![
ChannelOption {
id: "sort".to_string(),
title: "Sort".to_string(),
description: "Browse EPorner ranking feeds.".to_string(),
systemImage: "list.number".to_string(),
colorName: "blue".to_string(),
options: vec![
FilterOption {
id: "new".to_string(),
title: "Latest".to_string(),
},
FilterOption {
id: "popular".to_string(),
title: "Most Viewed".to_string(),
},
FilterOption {
id: "rated".to_string(),
title: "Top Rated".to_string(),
},
FilterOption {
id: "best".to_string(),
title: "Best Videos".to_string(),
},
],
multiSelect: false,
},
ChannelOption {
id: "categories".to_string(),
title: "Categories".to_string(),
description: "Browse an EPorner category archive.".to_string(),
systemImage: "square.grid.2x2".to_string(),
colorName: "orange".to_string(),
options: cat_options,
multiSelect: false,
},
],
nsfw: true,
cacheDuration: Some(1800),
}
}
fn selector(value: &str) -> Result<Selector> {
Selector::parse(value)
.map_err(|e| Error::from(format!("selector `{value}` parse failed: {e}")))
}
fn decode_html(text: &str) -> String {
decode(text.as_bytes())
.to_string()
.unwrap_or_else(|_| text.to_string())
}
fn collapse_ws(text: &str) -> String {
text.split_whitespace().collect::<Vec<_>>().join(" ")
}
fn text_of(el: &ElementRef<'_>) -> String {
Self::decode_html(&Self::collapse_ws(&el.text().collect::<Vec<_>>().join(" ")))
}
fn normalize_key(s: &str) -> String {
s.trim()
.trim_start_matches('#')
.replace(['_', '-'], " ")
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
.to_ascii_lowercase()
}
fn normalize_url(path: &str) -> String {
let path = path.trim();
if path.starts_with("http://") || path.starts_with("https://") {
return path.to_string();
}
if path.starts_with("//") {
return format!("https:{path}");
}
if path.starts_with('/') {
return format!("{BASE_URL}{path}");
}
format!("{BASE_URL}/{path}")
}
fn html_headers(referer: &str) -> Vec<(String, String)> {
vec![
("User-Agent".to_string(), FIREFOX_UA.to_string()),
("Accept".to_string(), HTML_ACCEPT.to_string()),
("Referer".to_string(), referer.to_string()),
]
}
// Build a page URL: page 1 → `{base}/`, page N → `{base}/{N}/`
fn page_url(base: &str, page: u16) -> String {
let base = base.trim_end_matches('/');
if page <= 1 {
format!("{base}/")
} else {
format!("{base}/{page}/")
}
}
fn target_url(target: &Target, page: u16) -> String {
match target {
Target::Latest => Self::page_url(BASE_URL, page),
Target::MostViewed => Self::page_url(&format!("{BASE_URL}/most-viewed"), page),
Target::TopRated => Self::page_url(&format!("{BASE_URL}/top-rated"), page),
Target::BestVideos => Self::page_url(&format!("{BASE_URL}/best-videos"), page),
Target::Search(q) => {
let slug = q.trim().replace(' ', "-").to_ascii_lowercase();
Self::page_url(&format!("{BASE_URL}/tag/{slug}"), page)
}
Target::Archive(url) => Self::page_url(url, page),
}
}
async fn fetch_html(requester: &mut Requester, url: &str) -> Result<String> {
requester
.get_with_headers(url, Self::html_headers(url), Some(Version::HTTP_11))
.await
.map_err(|e| Error::from(format!("request failed for {url}: {e}")))
}
fn parse_duration(text: &str) -> u32 {
parse_time_to_seconds(text)
.and_then(|v| u32::try_from(v).ok())
.unwrap_or(0)
}
fn parse_views(text: &str) -> Option<u32> {
let cleaned = text
.replace("views", "")
.replace("view", "")
.replace([',', ' '], "");
parse_abbreviated_number(cleaned.trim())
}
fn parse_rating_pct(text: &str) -> Option<f32> {
let digits: String = text.chars().filter(|c| c.is_ascii_digit()).collect();
digits.parse::<f32>().ok().map(|v| v / 100.0)
}
fn parse_list_page(html: &str) -> Result<Vec<VideoItem>> {
let document = Html::parse_document(html);
let card_sel = Self::selector("div.mb[data-id]")?;
let img_sel = Self::selector("div.mbimg a img[src]")?;
let link_sel = Self::selector("p.mbtit a[href], div.mbtit a[href]")?;
let dur_sel = Self::selector("span.mbtim")?;
let rate_sel = Self::selector("span.mbrate")?;
let views_sel = Self::selector("span.mbvie")?;
let uploader_sel = Self::selector("span.mb-uploader a[href]")?;
let mut items = Vec::new();
for card in document.select(&card_sel) {
let id = match card.value().attr("data-id") {
Some(v) if !v.is_empty() => v.to_string(),
_ => continue,
};
let link = match card.select(&link_sel).next() {
Some(el) => el,
None => continue,
};
let href = link.value().attr("href").unwrap_or_default();
let page_url = Self::normalize_url(href);
if page_url.is_empty() {
continue;
}
let title = link
.value()
.attr("title")
.map(Self::decode_html)
.filter(|v| !v.trim().is_empty())
.unwrap_or_else(|| Self::text_of(&link));
if title.is_empty() {
continue;
}
let thumb = card
.select(&img_sel)
.next()
.and_then(|el| el.value().attr("src").or_else(|| el.value().attr("data-src")))
.map(Self::normalize_url)
.unwrap_or_default();
let duration = card
.select(&dur_sel)
.next()
.map(|el| Self::parse_duration(&Self::text_of(&el)))
.unwrap_or(0);
let rating = card
.select(&rate_sel)
.next()
.and_then(|el| Self::parse_rating_pct(&Self::text_of(&el)));
let views = card
.select(&views_sel)
.next()
.and_then(|el| Self::parse_views(&Self::text_of(&el)));
let uploader_el = card.select(&uploader_sel).next();
let uploader_name = uploader_el.as_ref().map(|el| Self::text_of(el));
let uploader_url = uploader_el
.and_then(|el| el.value().attr("href").map(Self::normalize_url));
let mut item = VideoItem::new(
id,
title.trim().to_string(),
page_url,
CHANNEL_ID.to_string(),
thumb,
duration,
);
if let Some(r) = rating {
item.rating = Some(r);
}
if let Some(v) = views {
item.views = Some(v);
}
if let Some(name) = uploader_name.filter(|n| !n.is_empty()) {
item.uploader = Some(name);
}
if let Some(url) = uploader_url.filter(|u| !u.is_empty()) {
let uploader_id = url
.trim_end_matches('/')
.rsplit('/')
.next()
.unwrap_or_default()
.to_string();
if !uploader_id.is_empty() {
item.uploaderId = Some(format!("{CHANNEL_ID}:{uploader_id}"));
}
item.uploaderUrl = Some(url);
}
items.push(item);
}
Ok(items)
}
async fn load_pornstars(pornstar_map: Arc<RwLock<HashMap<String, String>>>) -> Result<()> {
let mut requester = Requester::new();
let url = format!("{BASE_URL}/pornstar-list/");
let html = Self::fetch_html(&mut requester, &url).await?;
let document = Html::parse_document(&html);
let sel = Self::selector("a[href*=\"/pornstar/\"]")?;
let prefix = format!("{BASE_URL}/pornstar/");
for el in document.select(&sel) {
let href = el.value().attr("href").unwrap_or_default();
let full = Self::normalize_url(href);
if !full.starts_with(&prefix) {
continue;
}
let slug = full
.trim_end_matches('/')
.rsplit('/')
.next()
.unwrap_or_default()
.to_string();
if slug.is_empty() {
continue;
}
let name = el
.value()
.attr("title")
.map(Self::decode_html)
.filter(|v| !v.trim().is_empty())
.unwrap_or_else(|| Self::text_of(&el));
if name.is_empty() {
continue;
}
let canonical = format!("{BASE_URL}/pornstar/{slug}");
if let Ok(mut map) = pornstar_map.write() {
map.insert(Self::normalize_key(&name), canonical.clone());
map.insert(Self::normalize_key(&slug), canonical);
}
}
Ok(())
}
fn lookup_category(query: &str) -> Option<String> {
let normalized = Self::normalize_key(query);
for (slug, label) in CATEGORIES {
if Self::normalize_key(label) == normalized || Self::normalize_key(slug) == normalized {
return Some(format!("{BASE_URL}/cat/{slug}"));
}
}
None
}
fn resolve_query_target(&self, query: &str) -> Target {
let trimmed = query.trim().trim_start_matches('@');
if let Some((kind, value)) = trimmed.split_once(':') {
let value = value.trim().replace(' ', "-").to_ascii_lowercase();
if !value.is_empty() {
match kind.trim().to_ascii_lowercase().as_str() {
"cat" | "category" => {
return Target::Archive(format!("{BASE_URL}/cat/{value}"));
}
"tag" => {
return Target::Archive(format!("{BASE_URL}/tag/{value}"));
}
"pornstar" | "star" => {
return Target::Archive(format!("{BASE_URL}/pornstar/{value}"));
}
"uploader" | "profile" => {
return Target::Archive(format!("{BASE_URL}/profile/{value}"));
}
_ => {}
}
}
}
// Check category name
if let Some(url) = Self::lookup_category(trimmed) {
return Target::Archive(url);
}
// Check pornstar map
let normalized = Self::normalize_key(trimmed);
if let Some(url) = self
.pornstar_map
.read()
.ok()
.and_then(|m| m.get(&normalized).cloned())
{
return Target::Archive(url);
}
Target::Search(trimmed.to_string())
}
fn resolve_sort_target(sort: &str) -> Target {
match sort.trim().to_ascii_lowercase().as_str() {
"popular" | "viewed" | "most_viewed" => Target::MostViewed,
"rated" | "rating" | "top" => Target::TopRated,
"best" => Target::BestVideos,
_ => Target::Latest,
}
}
fn resolve_option_target(&self, options: &ServerOptions, sort: &str) -> Target {
if let Some(cat) = options.categories.as_deref() {
if cat != "all" && !cat.is_empty() {
let url = if cat.starts_with("http") {
cat.to_string()
} else {
format!("{BASE_URL}/cat/{cat}")
};
return Target::Archive(url);
}
}
Self::resolve_sort_target(sort)
}
async fn fetch_target(
&self,
cache: VideoCache,
target: Target,
page: u16,
per_page: usize,
options: ServerOptions,
) -> Result<Vec<VideoItem>> {
let url = Self::target_url(&target, page);
let cache_key = format!("{url}#per={per_page}");
if let Some((ts, cached)) = cache.get(&cache_key) {
if ts.elapsed().unwrap_or_default().as_secs() < 300 {
return Ok(cached.clone());
}
}
let mut requester =
requester_or_default(&options, CHANNEL_ID, "eporner.fetch_target.missing_requester");
let html = match Self::fetch_html(&mut requester, &url).await {
Ok(v) => v,
Err(e) => {
report_provider_error(
CHANNEL_ID,
"fetch_target.request",
&format!("url={url}; error={e}"),
)
.await;
return Ok(vec![]);
}
};
if html.trim().is_empty() {
report_provider_error(
CHANNEL_ID,
"fetch_target.empty",
&format!("url={url}"),
)
.await;
return Ok(vec![]);
}
let items = self.parse_list_page_limited(&html, per_page)?;
if !items.is_empty() {
cache.insert(cache_key, items.clone());
}
Ok(items)
}
fn parse_list_page_limited(&self, html: &str, limit: usize) -> Result<Vec<VideoItem>> {
let all = Self::parse_list_page(html)?;
Ok(all.into_iter().take(limit.max(1)).collect())
}
}
#[async_trait]
impl Provider for EpornerProvider {
async fn get_videos(
&self,
cache: VideoCache,
_pool: DbPool,
sort: String,
query: Option<String>,
page: String,
per_page: String,
options: ServerOptions,
) -> Vec<VideoItem> {
let page = page.parse::<u16>().unwrap_or(1).max(1);
let per_page = per_page.parse::<usize>().unwrap_or(10).clamp(1, 60);
let target = match query {
Some(q) if !q.trim().is_empty() => self.resolve_query_target(q.trim()),
_ => self.resolve_option_target(&options, &sort),
};
match self.fetch_target(cache, target, page, per_page, options).await {
Ok(items) => items,
Err(e) => {
report_provider_error(
CHANNEL_ID,
"get_videos",
&format!("sort={sort}; page={page}; error={e}"),
)
.await;
vec![]
}
}
}
fn get_channel(&self, cv: ClientVersion) -> Option<Channel> {
Some(self.build_channel(cv))
}
}

384
src/providers/erome.rs Normal file
View File

@@ -0,0 +1,384 @@
use crate::DbPool;
use crate::api::ClientVersion;
use crate::providers::{Provider, report_provider_error, requester_or_default};
use crate::status::*;
use crate::util::parse_abbreviated_number;
use crate::videos::{ServerOptions, VideoItem};
use async_trait::async_trait;
use error_chain::error_chain;
use htmlentity::entity::{ICodedDataTrait, decode};
use regex::Regex;
use scraper::{ElementRef, Html, Selector};
use url::form_urlencoded;
use wreq::Version;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "amateur-homemade",
tags: &["amateur", "albums", "homemade"],
};
const BASE_URL: &str = "https://www.erome.com";
const CHANNEL_ID: &str = "erome";
error_chain! {
foreign_links {
Io(std::io::Error);
HttpRequest(wreq::Error);
}
}
#[derive(Debug, Clone)]
pub struct EromeProvider {
url: String,
}
#[derive(Debug, Clone)]
enum Target {
ExploreHot,
ExploreNew,
Search { query: String, order_new: bool },
UploaderPosts { slug: String },
}
impl EromeProvider {
pub fn new() -> Self {
Self {
url: BASE_URL.to_string(),
}
}
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
Channel {
id: CHANNEL_ID.to_string(),
name: "EroMe".to_string(),
description:
"EroMe album feed with hot/new routing, search, and uploader profile shortcuts."
.to_string(),
premium: false,
favicon: "https://www.google.com/s2/favicons?sz=64&domain=erome.com".to_string(),
status: "active".to_string(),
categories: vec![],
options: vec![
ChannelOption {
id: "sort".to_string(),
title: "Sort".to_string(),
description: "Browse EroMe hot or new feeds.".to_string(),
systemImage: "list.number".to_string(),
colorName: "blue".to_string(),
options: vec![
FilterOption {
id: "new".to_string(),
title: "New".to_string(),
},
FilterOption {
id: "hot".to_string(),
title: "Hot".to_string(),
},
],
multiSelect: false,
},
ChannelOption {
id: "sites".to_string(),
title: "Uploader".to_string(),
description: "Jump directly to an uploader profile slug.".to_string(),
systemImage: "person.crop.square".to_string(),
colorName: "purple".to_string(),
options: vec![FilterOption {
id: "all".to_string(),
title: "All".to_string(),
}],
multiSelect: false,
},
],
nsfw: true,
cacheDuration: Some(1800),
}
}
fn selector(value: &str) -> Option<Selector> {
Selector::parse(value).ok()
}
fn regex(value: &str) -> Option<Regex> {
Regex::new(value).ok()
}
fn normalize_text(value: &str) -> String {
decode(value.as_bytes())
.to_string()
.unwrap_or_else(|_| value.to_string())
.replace('\u{a0}', " ")
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
.trim()
.to_string()
}
fn normalize_url(&self, value: &str) -> String {
let trimmed = value.trim();
if trimmed.is_empty() {
return String::new();
}
if trimmed.starts_with("http://") || trimmed.starts_with("https://") {
return trimmed.to_string();
}
if trimmed.starts_with("//") {
return format!("https:{trimmed}");
}
format!(
"{}/{}",
self.url.trim_end_matches('/'),
trimmed.trim_start_matches('/')
)
}
fn extract_album_guid_from_url(value: &str) -> Option<String> {
let re = Self::regex(r#"/a/([A-Za-z0-9]+)"#)?;
re.captures(value)
.and_then(|caps| caps.get(1).map(|m| m.as_str().to_string()))
}
fn uploader_slug_from_query(query: &str) -> Option<String> {
let q = query.trim();
if q.is_empty() {
return None;
}
let lower = q.to_ascii_lowercase();
let stripped = if let Some(rest) = lower.strip_prefix("uploader:") {
rest.trim().to_string()
} else if let Some(rest) = lower.strip_prefix("user:") {
rest.trim().to_string()
} else {
return None;
};
let slug = stripped
.trim_start_matches('@')
.chars()
.filter(|c| c.is_ascii_alphanumeric() || *c == '_' || *c == '-')
.collect::<String>();
(!slug.is_empty()).then_some(slug)
}
fn resolve_target(&self, query: &str, options: &ServerOptions, sort: &str) -> Target {
if let Some(site) = options.sites.as_deref() {
let normalized = site.trim();
if !normalized.is_empty() && !normalized.eq_ignore_ascii_case("all") {
let slug = normalized
.trim_start_matches('@')
.chars()
.filter(|c| c.is_ascii_alphanumeric() || *c == '_' || *c == '-')
.collect::<String>();
if !slug.is_empty() {
return Target::UploaderPosts { slug };
}
}
}
if !query.trim().is_empty() {
if let Some(slug) = Self::uploader_slug_from_query(query) {
return Target::UploaderPosts { slug };
}
return Target::Search {
query: query.trim().to_string(),
order_new: matches!(sort, "new" | "latest"),
};
}
if matches!(sort, "new" | "latest") {
Target::ExploreNew
} else {
Target::ExploreHot
}
}
fn build_url_for_target(&self, target: &Target, page: u16) -> String {
let page = page.max(1);
match target {
Target::ExploreHot => {
if page == 1 {
format!("{}/explore", self.url)
} else {
format!("{}/explore?page={page}", self.url)
}
}
Target::ExploreNew => {
if page == 1 {
format!("{}/explore/new", self.url)
} else {
format!("{}/explore/new?page={page}", self.url)
}
}
Target::Search { query, order_new } => {
let mut serializer = form_urlencoded::Serializer::new(String::new());
serializer.append_pair("q", query);
if *order_new {
serializer.append_pair("o", "new");
}
serializer.append_pair("page", &page.to_string());
format!("{}/search?{}", self.url, serializer.finish())
}
Target::UploaderPosts { slug } => {
let mut serializer = form_urlencoded::Serializer::new(String::new());
serializer.append_pair("t", "posts");
serializer.append_pair("page", &page.to_string());
format!("{}/{}?{}", self.url, slug, serializer.finish())
}
}
}
fn text_from_selector(parent: &ElementRef<'_>, selector: &Selector) -> String {
parent
.select(selector)
.next()
.map(|node| Self::normalize_text(&node.text().collect::<Vec<_>>().join(" ")))
.unwrap_or_default()
}
fn parse_video_item(&self, card: &ElementRef<'_>) -> Option<VideoItem> {
let link_selector = Self::selector("a.album-link[href]")?;
let title_selector = Self::selector("a.album-title")?;
let thumb_selector = Self::selector("img.album-thumbnail")?;
let user_selector = Self::selector("span.album-user")?;
let views_selector = Self::selector("span.album-bottom-views")?;
let link = card.select(&link_selector).next()?;
let href = link.value().attr("href")?;
let album_url = self.normalize_url(href);
let album_id = Self::extract_album_guid_from_url(&album_url)?;
let title = Self::text_from_selector(card, &title_selector);
let fallback_title = link
.value()
.attr("title")
.map(Self::normalize_text)
.unwrap_or_default();
let final_title = if !title.is_empty() { title } else { fallback_title };
if final_title.is_empty() {
return None;
}
let thumb = card
.select(&thumb_selector)
.find_map(|node| {
node.value()
.attr("src")
.or_else(|| node.value().attr("data-rotate-src"))
})
.map(|value| self.normalize_url(value))
.unwrap_or_default();
let uploader = Self::text_from_selector(card, &user_selector);
let uploader_slug = uploader
.trim()
.trim_start_matches('@')
.chars()
.filter(|c| c.is_ascii_alphanumeric() || *c == '_' || *c == '-')
.collect::<String>();
let views_text = Self::text_from_selector(card, &views_selector);
let views = parse_abbreviated_number(views_text.replace(',', ".").replace(' ', "").as_str());
let uploader_url = if uploader_slug.is_empty() {
String::new()
} else {
format!("{}/{}?t=posts", self.url, uploader_slug)
};
let mut item = VideoItem::new(
album_id,
final_title,
album_url,
CHANNEL_ID.to_string(),
thumb,
0,
);
if let Some(value) = views {
item = item.views(value);
}
if !uploader.is_empty() {
item = item.uploader(uploader);
}
if !uploader_url.is_empty() {
item = item.uploader_url(uploader_url);
}
if !uploader_slug.is_empty() {
item.uploaderId = Some(format!("{CHANNEL_ID}:{uploader_slug}"));
}
Some(item)
}
fn parse_listing(&self, html: &str) -> Vec<VideoItem> {
let document = Html::parse_document(html);
let Some(card_selector) = Self::selector("div.album") else {
return vec![];
};
document
.select(&card_selector)
.filter_map(|card| self.parse_video_item(&card))
.collect()
}
fn html_headers(referer: &str) -> Vec<(String, String)> {
vec![
(
"accept".to_string(),
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8".to_string(),
),
("accept-language".to_string(), "en-US,en;q=0.8".to_string()),
(
"user-agent".to_string(),
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36".to_string(),
),
("referer".to_string(), referer.to_string()),
]
}
async fn fetch_page(&self, options: &ServerOptions, url: &str) -> Result<String> {
let mut requester = requester_or_default(options, CHANNEL_ID, "get_videos");
requester
.get_with_headers(url, Self::html_headers(&format!("{}/explore", self.url)), Some(Version::HTTP_11))
.await
.map_err(|error| Error::from(format!("request failed for {url}: {error}")))
}
}
#[async_trait]
impl Provider for EromeProvider {
async fn get_videos(
&self,
_cache: crate::util::cache::VideoCache,
_pool: DbPool,
sort: String,
query: Option<String>,
page: String,
_per_page: String,
options: ServerOptions,
) -> Vec<VideoItem> {
let sort_value = if sort.is_empty() {
options.sort.as_deref().unwrap_or("new").to_string()
} else {
sort
};
let query_value = query.unwrap_or_default();
let page_value = page.parse::<u16>().unwrap_or(1);
let target = self.resolve_target(&query_value, &options, &sort_value);
let url = self.build_url_for_target(&target, page_value);
match self.fetch_page(&options, &url).await {
Ok(html) => self.parse_listing(&html),
Err(error) => {
report_provider_error(CHANNEL_ID, "get_videos.fetch", &error.to_string()).await;
vec![]
}
}
}
fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
Some(self.build_channel(clientversion))
}
}

748
src/providers/fikfap.rs Normal file
View File

@@ -0,0 +1,748 @@
use crate::DbPool;
use crate::api::ClientVersion;
use crate::providers::{
Provider, build_proxy_url, report_provider_error, requester_or_default, strip_url_scheme,
};
use crate::status::*;
use crate::uploaders::{UploaderChannelStat, UploaderLayoutRow, UploaderProfile, UploaderVideoRef};
use crate::util::cache::VideoCache;
use crate::videos::{ServerOptions, VideoFormat, VideoItem};
use async_trait::async_trait;
use chrono::DateTime;
use error_chain::error_chain;
use percent_encoding::{NON_ALPHANUMERIC, utf8_percent_encode};
use serde::Deserialize;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "tiktok",
tags: &["shortform", "onlyfans", "swipe", "amateur"],
};
const BASE_URL: &str = "https://fikfap.com";
const API_BASE: &str = "https://api.fikfap.com";
const CHANNEL_ID: &str = "fikfap";
const DEFAULT_PER_PAGE: usize = 20;
const MAX_PER_PAGE: usize = 40;
// FikFap pagination is cursor-based (`afterId`), not page-number based, so reaching
// page N requires walking N sequential requests from the start of the feed.
const MAX_PAGE_WALK: u16 = 25;
// A small curated set of well-known FikFap hashtags. The site exposes a hashtag
// catalog only through a randomized "discover" sample, so there is no stable full
// catalog to background-load; any hashtag label works directly as a routing target.
const CURATED_HASHTAGS: &[&str] = &[
"anal",
"ass",
"milf",
"threesome",
"blonde",
"brunette",
"redhead",
"natural",
"hardcore",
"lingerie",
"masturbation",
"cumshot",
"squirting",
"creampie",
"bbc",
"gonewild",
"blowjob",
"doggystyle",
"lesbian",
"deepthroat",
];
error_chain! {
foreign_links {
Json(serde_json::Error);
}
errors {
Request(msg: String) {
description("request error")
display("request error: {}", msg)
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum FeedSort {
New,
Trending,
Random,
}
impl FeedSort {
fn api_value(self) -> &'static str {
match self {
FeedSort::New => "new",
FeedSort::Trending => "trending",
FeedSort::Random => "random",
}
}
fn from_sort_id(value: &str) -> Self {
match value.trim().to_ascii_lowercase().as_str() {
"trending" | "hot" => FeedSort::Trending,
"random" | "foryou" | "for_you" => FeedSort::Random,
_ => FeedSort::New,
}
}
}
#[derive(Debug, Clone)]
enum Target {
Feed(FeedSort),
Hashtag(String),
User(String),
Search(String),
}
#[derive(Debug, Deserialize, Clone, Default)]
#[serde(rename_all = "camelCase")]
struct Post {
post_id: i64,
#[serde(default)]
label: String,
#[serde(default)]
views_count: u32,
#[serde(default)]
duration: Option<u32>,
#[serde(default)]
video_stream_url: String,
#[serde(default)]
thumbnail_stream_url: String,
#[serde(default)]
published_at: Option<String>,
#[serde(default)]
created_at: String,
#[serde(default)]
author: Author,
#[serde(default)]
hashtags: Vec<HashtagEntry>,
}
#[derive(Debug, Deserialize, Clone, Default)]
#[serde(rename_all = "camelCase")]
struct Author {
#[serde(default)]
username: String,
#[serde(default)]
is_verified: bool,
}
#[derive(Debug, Deserialize, Clone, Default)]
struct HashtagEntry {
#[serde(default)]
label: String,
}
#[derive(Debug, Deserialize, Clone, Default)]
struct SearchResponse {
#[serde(default)]
posts: Vec<Post>,
}
#[derive(Debug, Deserialize, Clone, Default)]
#[serde(rename_all = "camelCase")]
struct UserProfile {
#[serde(default)]
username: String,
#[serde(default)]
count_posts: u64,
#[serde(default)]
count_total_views: u64,
#[serde(default)]
is_verified: bool,
#[serde(default)]
description: Option<String>,
#[serde(default)]
thumbnail_url: String,
}
#[derive(Debug, Clone)]
pub struct FikfapProvider {
anon_id: String,
}
impl FikfapProvider {
pub fn new() -> Self {
Self {
anon_id: Self::generate_anon_id(),
}
}
fn generate_anon_id() -> String {
let mut bytes = [0u8; 16];
rand::fill(&mut bytes);
bytes[6] = (bytes[6] & 0x0f) | 0x40;
bytes[8] = (bytes[8] & 0x3f) | 0x80;
format!(
"{:02x}{:02x}{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}{:02x}{:02x}{:02x}{:02x}",
bytes[0],
bytes[1],
bytes[2],
bytes[3],
bytes[4],
bytes[5],
bytes[6],
bytes[7],
bytes[8],
bytes[9],
bytes[10],
bytes[11],
bytes[12],
bytes[13],
bytes[14],
bytes[15]
)
}
fn anon_headers(&self) -> Vec<(String, String)> {
vec![
("Referer".to_string(), format!("{BASE_URL}/")),
("Authorization-Anonymous".to_string(), self.anon_id.clone()),
("IsLoggedIn".to_string(), "false".to_string()),
]
}
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
let category_titles = CURATED_HASHTAGS
.iter()
.map(|label| Self::title_case(label))
.collect::<Vec<_>>();
let category_options = CURATED_HASHTAGS
.iter()
.map(|label| FilterOption {
id: label.to_string(),
title: Self::title_case(label),
})
.collect::<Vec<_>>();
Channel {
id: CHANNEL_ID.to_string(),
name: "FikFap".to_string(),
description:
"FikFap swipe-style short clips with direct HLS playback, hashtag browsing, and creator pages."
.to_string(),
premium: false,
favicon: "https://www.google.com/s2/favicons?sz=64&domain=fikfap.com".to_string(),
status: "active".to_string(),
categories: category_titles,
options: vec![
ChannelOption {
id: "sort".to_string(),
title: "Sort".to_string(),
description: "Browse FikFap by latest, trending, or a randomized For You feed."
.to_string(),
systemImage: "arrow.up.arrow.down".to_string(),
colorName: "blue".to_string(),
options: vec![
FilterOption {
id: "new".to_string(),
title: "Latest".to_string(),
},
FilterOption {
id: "trending".to_string(),
title: "Trending".to_string(),
},
FilterOption {
id: "random".to_string(),
title: "For You".to_string(),
},
],
multiSelect: false,
},
ChannelOption {
id: "categories".to_string(),
title: "Hashtags".to_string(),
description: "Open a FikFap hashtag feed directly.".to_string(),
systemImage: "tag".to_string(),
colorName: "orange".to_string(),
options: category_options,
multiSelect: false,
},
],
nsfw: true,
cacheDuration: Some(60),
}
}
fn title_case(label: &str) -> String {
let mut chars = label.chars();
match chars.next() {
Some(first) => first.to_uppercase().collect::<String>() + chars.as_str(),
None => String::new(),
}
}
fn normalize_hashtag(value: &str) -> String {
value
.trim()
.trim_start_matches('#')
.to_ascii_lowercase()
.replace(' ', "")
}
fn resolve_query_target(query: &str) -> Target {
let trimmed = query.trim();
if let Some((kind, value)) = trimmed.split_once(':') {
let value = value.trim();
if !value.is_empty() {
match kind.trim().to_ascii_lowercase().as_str() {
"tag" | "hashtag" | "hash" => {
return Target::Hashtag(Self::normalize_hashtag(value));
}
"user" | "uploader" | "creator" => {
return Target::User(value.to_string());
}
_ => {}
}
}
}
if let Some(hashtag) = trimmed.strip_prefix('#') {
if !hashtag.trim().is_empty() {
return Target::Hashtag(Self::normalize_hashtag(hashtag));
}
}
Target::Search(trimmed.to_string())
}
fn resolve_option_target(options: &ServerOptions) -> Option<Target> {
if let Some(category) = options.categories.as_deref() {
if category != "all" && !category.trim().is_empty() {
return Some(Target::Hashtag(Self::normalize_hashtag(category)));
}
}
None
}
fn pick_target(query: Option<&str>, sort: &str, options: &ServerOptions) -> Target {
if let Some(query) = query {
if !query.trim().is_empty() {
return Self::resolve_query_target(query);
}
}
if let Some(target) = Self::resolve_option_target(options) {
return target;
}
Target::Feed(FeedSort::from_sort_id(sort))
}
fn parse_timestamp(value: &str) -> Option<u64> {
let value = value.trim();
if value.is_empty() {
return None;
}
DateTime::parse_from_rfc3339(value)
.ok()
.map(|parsed| parsed.timestamp().max(0) as u64)
}
fn proxied_thumb(thumb_url: &str, options: &ServerOptions) -> Option<String> {
let trimmed = thumb_url.trim();
if trimmed.is_empty() {
return None;
}
let stripped = strip_url_scheme(trimmed);
Some(build_proxy_url(options, "fikfap-thumb", &stripped))
}
fn build_video_item(&self, post: Post, options: &ServerOptions) -> Option<VideoItem> {
if post.video_stream_url.trim().is_empty() {
return None;
}
let id = post.post_id.to_string();
let title = if post.label.trim().is_empty() {
format!("FikFap post {}", post.post_id)
} else {
post.label.trim().to_string()
};
let url = format!("{BASE_URL}/post/{}", post.post_id);
let thumb = Self::proxied_thumb(&post.thumbnail_stream_url, options)
.unwrap_or_else(|| post.thumbnail_stream_url.clone());
let duration = post.duration.unwrap_or(0);
// The CDN requires a Referer on every request in the HLS session (master
// playlist, sub-playlists, and segments). Hot Tub clients apply a format's
// `http_headers` to the whole HLS playback session, not just the initial
// manifest fetch, so the documented `httpHeaders` mechanism is sufficient
// here without needing to reverse-proxy the media ourselves.
let format = VideoFormat::m3u8(
post.video_stream_url.clone(),
"auto".to_string(),
"hls".to_string(),
)
.http_header("Referer".to_string(), format!("{BASE_URL}/"));
let mut item = VideoItem::new(id, title, url, CHANNEL_ID.to_string(), thumb, duration);
item.views = Some(post.views_count);
item.uploadedAt = post
.published_at
.as_deref()
.filter(|value| !value.trim().is_empty())
.and_then(Self::parse_timestamp)
.or_else(|| Self::parse_timestamp(&post.created_at));
if !post.author.username.trim().is_empty() {
item.uploader = Some(post.author.username.clone());
item.uploaderUrl = Some(format!("{BASE_URL}/user/{}", post.author.username));
item.uploaderId = Some(format!("{CHANNEL_ID}:{}", post.author.username));
}
item.verified = Some(post.author.is_verified);
let tags = post
.hashtags
.iter()
.map(|entry| entry.label.clone())
.filter(|label| !label.trim().is_empty())
.collect::<Vec<_>>();
item.tags = (!tags.is_empty()).then_some(tags);
item.formats = Some(vec![format]);
Some(item)
}
async fn fetch_posts_page(
&self,
target_path: &str,
sort: Option<&str>,
after_id: Option<i64>,
amount: usize,
options: &ServerOptions,
) -> Result<Vec<Post>> {
let mut url = format!("{API_BASE}/{target_path}?amount={amount}");
if let Some(sort) = sort {
url.push_str(&format!("&sort={sort}"));
}
if let Some(after_id) = after_id {
url.push_str(&format!("&afterId={after_id}"));
}
let mut requester = requester_or_default(options, CHANNEL_ID, "fetch_posts_page");
let text = requester
.get_with_headers(&url, self.anon_headers(), None)
.await
.map_err(|error| Error::from(format!("request failed for {url}: {error}")))?;
Ok(serde_json::from_str(&text)?)
}
async fn fetch_cursor_page(
&self,
target_path: &str,
sort: Option<&str>,
page: u16,
per_page: usize,
options: &ServerOptions,
) -> Result<Vec<Post>> {
let page = page.max(1).min(MAX_PAGE_WALK);
let mut after_id: Option<i64> = None;
let mut items = Vec::new();
for _ in 0..page {
let batch = self
.fetch_posts_page(target_path, sort, after_id, per_page, options)
.await?;
if batch.is_empty() {
items = Vec::new();
break;
}
after_id = batch.last().map(|post| post.post_id);
items = batch;
}
Ok(items)
}
async fn fetch_search(&self, query: &str, options: &ServerOptions) -> Result<Vec<Post>> {
let encoded_query: String = url::form_urlencoded::byte_serialize(query.as_bytes()).collect();
let url = format!("{API_BASE}/search?q={encoded_query}");
let mut requester = requester_or_default(options, CHANNEL_ID, "fetch_search");
let text = requester
.get_with_headers(&url, self.anon_headers(), None)
.await
.map_err(|error| Error::from(format!("request failed for {url}: {error}")))?;
let response: SearchResponse = serde_json::from_str(&text)?;
Ok(response.posts)
}
async fn fetch_target_posts(
&self,
target: &Target,
page: u16,
per_page: usize,
options: &ServerOptions,
) -> Result<Vec<Post>> {
match target {
Target::Feed(feed_sort) => {
self.fetch_cursor_page("posts", Some(feed_sort.api_value()), page, per_page, options)
.await
}
Target::Hashtag(label) => {
let encoded = utf8_percent_encode(label, NON_ALPHANUMERIC).to_string();
let path = format!("hashtags/label/{encoded}/posts");
self.fetch_cursor_page(&path, Some("new"), page, per_page, options)
.await
}
Target::User(username) => {
let encoded = utf8_percent_encode(username, NON_ALPHANUMERIC).to_string();
let path = format!("profile/username/{encoded}/posts");
self.fetch_cursor_page(&path, None, page, per_page, options)
.await
}
Target::Search(query) => {
// FikFap's search endpoint returns a single fixed-size batch with no
// cursor or page parameter, so later pages have nothing new to offer.
if page > 1 {
Ok(Vec::new())
} else {
self.fetch_search(query, options).await
}
}
}
}
async fn fetch_target_items(
&self,
target: Target,
page: u16,
per_page: usize,
options: &ServerOptions,
) -> Result<Vec<VideoItem>> {
let posts = self.fetch_target_posts(&target, page, per_page, options).await?;
Ok(posts
.into_iter()
.filter_map(|post| self.build_video_item(post, options))
.collect())
}
async fn fetch_user_profile(
&self,
username: &str,
options: &ServerOptions,
) -> Result<Option<UserProfile>> {
let encoded = utf8_percent_encode(username, NON_ALPHANUMERIC).to_string();
let url = format!("{API_BASE}/profile/username/{encoded}");
let mut requester = requester_or_default(options, CHANNEL_ID, "fetch_user_profile");
let text = match requester.get_with_headers(&url, self.anon_headers(), None).await {
Ok(text) => text,
Err(_) => return Ok(None),
};
match serde_json::from_str::<UserProfile>(&text) {
Ok(profile) if !profile.username.trim().is_empty() => Ok(Some(profile)),
_ => Ok(None),
}
}
async fn build_uploader_profile(
&self,
username: &str,
query: Option<&str>,
profile_content: bool,
options: &ServerOptions,
) -> Result<Option<UploaderProfile>> {
let Some(profile) = self.fetch_user_profile(username, options).await? else {
return Ok(None);
};
let canonical_id = format!("{CHANNEL_ID}:{}", profile.username);
let mut videos = None;
if profile_content {
let items = self
.fetch_target_items(Target::User(profile.username.clone()), 1, 24, options)
.await?;
let filtered_items = if let Some(query) = query.filter(|value| !value.trim().is_empty()) {
let normalized_query = query.to_ascii_lowercase();
items
.into_iter()
.filter(|item| {
let haystack = format!(
"{} {}",
item.title,
item.tags.as_ref().map(|values| values.join(" ")).unwrap_or_default()
)
.to_ascii_lowercase();
haystack.contains(&normalized_query)
})
.collect::<Vec<_>>()
} else {
items
};
let refs = filtered_items
.iter()
.map(|item| UploaderVideoRef::from_video_item(item, &profile.username, &canonical_id))
.collect::<Vec<_>>();
videos = Some(refs);
}
Ok(Some(UploaderProfile {
id: canonical_id,
name: profile.username.clone(),
url: Some(format!("{BASE_URL}/user/{}", profile.username)),
channel: Some(CHANNEL_ID.to_string()),
verified: profile.is_verified,
videoCount: profile.count_posts,
totalViews: profile.count_total_views,
channels: Some(vec![UploaderChannelStat {
channel: CHANNEL_ID.to_string(),
videoCount: profile.count_posts,
firstSeenAt: None,
lastSeenAt: None,
}]),
avatar: Self::proxied_thumb(&profile.thumbnail_url, options),
description: profile.description.clone(),
bio: profile.description,
videos,
tapes: Some(vec![]),
playlists: Some(vec![]),
layout: Some(vec![UploaderLayoutRow::videos(Some("Posts".to_string()))]),
}))
}
}
#[async_trait]
impl Provider for FikfapProvider {
async fn get_videos(
&self,
cache: VideoCache,
pool: DbPool,
sort: String,
query: Option<String>,
page: String,
per_page: String,
options: ServerOptions,
) -> Vec<VideoItem> {
let _ = cache;
let _ = pool;
let page = page.parse::<u16>().unwrap_or(1).max(1);
let per_page = per_page
.parse::<usize>()
.unwrap_or(DEFAULT_PER_PAGE)
.clamp(1, MAX_PER_PAGE);
let normalized_query = query
.as_deref()
.map(str::trim)
.filter(|value| !value.is_empty());
let target = Self::pick_target(normalized_query, &sort, &options);
match self.fetch_target_items(target, page, per_page, &options).await {
Ok(items) => items,
Err(error) => {
report_provider_error(CHANNEL_ID, "get_videos", &error.to_string()).await;
vec![]
}
}
}
fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
Some(self.build_channel(clientversion))
}
async fn get_uploader(
&self,
cache: VideoCache,
pool: DbPool,
uploader_id: Option<String>,
uploader_name: Option<String>,
query: Option<String>,
profile_content: bool,
options: ServerOptions,
) -> std::result::Result<Option<UploaderProfile>, String> {
let _ = cache;
let _ = pool;
let username = uploader_id
.as_deref()
.and_then(|id| id.strip_prefix(&format!("{CHANNEL_ID}:")))
.map(ToOwned::to_owned)
.or(uploader_name.clone())
.map(|value| value.trim().to_string())
.filter(|value| !value.is_empty());
let Some(username) = username else {
return Ok(None);
};
self.build_uploader_profile(&username, query.as_deref(), profile_content, &options)
.await
.map_err(|error| error.to_string())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn resolves_query_shortcuts() {
match FikfapProvider::resolve_query_target("tag:Big Tits") {
Target::Hashtag(label) => assert_eq!(label, "bigtits"),
other => panic!("expected hashtag target, got {other:?}"),
}
match FikfapProvider::resolve_query_target("#blonde") {
Target::Hashtag(label) => assert_eq!(label, "blonde"),
other => panic!("expected hashtag target, got {other:?}"),
}
match FikfapProvider::resolve_query_target("user:AdultPrime") {
Target::User(username) => assert_eq!(username, "AdultPrime"),
other => panic!("expected user target, got {other:?}"),
}
match FikfapProvider::resolve_query_target("blonde teen") {
Target::Search(query) => assert_eq!(query, "blonde teen"),
other => panic!("expected search target, got {other:?}"),
}
}
#[test]
fn maps_sort_ids() {
assert_eq!(FeedSort::from_sort_id("new"), FeedSort::New);
assert_eq!(FeedSort::from_sort_id("trending"), FeedSort::Trending);
assert_eq!(FeedSort::from_sort_id("random"), FeedSort::Random);
assert_eq!(FeedSort::from_sort_id("unknown"), FeedSort::New);
}
#[test]
fn parses_post_payload() {
let json = r#"{
"postId": 123,
"label": "Test post",
"viewsCount": 42,
"duration": 12,
"videoStreamUrl": "https://vz-x.b-cdn.net/abc/playlist.m3u8",
"thumbnailStreamUrl": "https://vz-x.b-cdn.net/abc/thumbnail.jpg",
"publishedAt": "2026-01-01T00:00:00.000Z",
"createdAt": "2026-01-01T00:00:00.000Z",
"author": {
"username": "creator",
"isVerified": true,
"countPosts": 10,
"countTotalViews": 100,
"thumbnailUrl": "https://example.com/avatar.jpg",
"description": null
},
"hashtags": [{"label": "blonde"}]
}"#;
let post: Post = serde_json::from_str(json).expect("parses");
assert_eq!(post.post_id, 123);
assert_eq!(post.author.username, "creator");
assert_eq!(post.hashtags.len(), 1);
}
}

View File

@@ -202,7 +202,7 @@ impl FreeusepornProvider {
.await
.map_err(|error| format!("search submit failed url={search_url}; error={error}"))?;
Ok(response.uri().to_string().trim_end_matches('/').to_string())
Ok(response.url().to_string().trim_end_matches('/').to_string())
}
fn build_formats(&self, id: &str) -> Vec<VideoFormat> {

496
src/providers/fullporner.rs Normal file
View File

@@ -0,0 +1,496 @@
use crate::DbPool;
use crate::api::ClientVersion;
use crate::providers::{Provider, report_provider_error, requester_or_default};
use crate::status::*;
use crate::util::cache::VideoCache;
use crate::util::time::parse_time_to_seconds;
use crate::videos::{ServerOptions, VideoFormat, VideoItem};
use async_trait::async_trait;
use error_chain::error_chain;
use scraper::{Html, Selector};
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "mainstream-tube",
tags: &["full length", "hd", "free", "mainstream"],
};
const BASE_URL: &str = "https://fullporner.com";
const MEDIA_HOST: &str = "https://xiaoshenke.net";
const CHANNEL_ID: &str = "fullporner";
const DEFAULT_PER_PAGE: usize = 32;
const BROWSER_UA: &str =
"Mozilla/5.0 (X11; Linux x86_64; rv:146.0) Gecko/20100101 Firefox/146.0";
error_chain! {
foreign_links {
Io(std::io::Error);
Url(url::ParseError);
}
errors {
Parse(msg: String) {
description("parse error")
display("parse error: {}", msg)
}
}
}
#[derive(Debug, Clone)]
pub struct FullpornerProvider;
#[derive(Debug, Clone)]
enum Target {
Latest { page: u32 },
Search { query: String, page: u32 },
Category { slug: String, page: u32 },
Pornstar { slug: String, page: u32 },
}
#[derive(Debug, Clone)]
struct CardStub {
slug: String,
title: String,
thumb: String,
thumb_id: Option<String>,
duration: u32,
uploaded_at: Option<u64>,
}
impl FullpornerProvider {
pub fn new() -> Self {
Self
}
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
Channel {
id: CHANNEL_ID.to_string(),
name: "FullPorner".to_string(),
description: "Full-length free HD porn videos. Latest uploads, categories, and pornstar browsing.".to_string(),
premium: false,
favicon: "https://www.google.com/s2/favicons?sz=64&domain=fullporner.com".to_string(),
status: "active".to_string(),
categories: vec![],
options: vec![
ChannelOption {
id: "sort".to_string(),
title: "Sort".to_string(),
description: "Browse FullPorner by newest videos.".to_string(),
systemImage: "arrow.up.arrow.down".to_string(),
colorName: "blue".to_string(),
options: vec![
FilterOption {
id: "new".to_string(),
title: "Latest".to_string(),
},
],
multiSelect: false,
},
],
nsfw: true,
cacheDuration: Some(1800),
}
}
fn build_listing_url(target: &Target) -> String {
match target {
Target::Latest { page } => {
format!("{BASE_URL}/home/{page}")
}
Target::Search { query, page } => {
let encoded: String =
url::form_urlencoded::byte_serialize(query.as_bytes()).collect();
format!("{BASE_URL}/search?q={encoded}&p={page}")
}
Target::Category { slug, page } => {
if *page <= 1 {
format!("{BASE_URL}/category/{slug}")
} else {
format!("{BASE_URL}/category/{slug}/{page}")
}
}
Target::Pornstar { slug, page } => {
if *page <= 1 {
format!("{BASE_URL}/pornstar/{slug}")
} else {
format!("{BASE_URL}/pornstar/{slug}/{page}")
}
}
}
}
fn pick_target(query: Option<&str>, page: u32) -> Target {
if let Some(q) = query {
let q = q.trim();
if !q.is_empty() {
if let Some(slug) = q.strip_prefix("cat:") {
return Target::Category {
slug: slug.trim().to_string(),
page,
};
}
if let Some(slug) = q.strip_prefix("category:") {
return Target::Category {
slug: slug.trim().to_string(),
page,
};
}
if let Some(slug) = q.strip_prefix("pornstar:") {
return Target::Pornstar {
slug: slug.trim().to_string(),
page,
};
}
if let Some(slug) = q.strip_prefix("star:") {
return Target::Pornstar {
slug: slug.trim().to_string(),
page,
};
}
return Target::Search {
query: q.to_string(),
page,
};
}
}
Target::Latest { page }
}
fn selector(value: &str) -> Result<Selector> {
Selector::parse(value)
.map_err(|e| Error::from(format!("selector `{value}` failed: {e}")))
}
// Extract the thumb_id from a thumbnail URL like
// https://imgs.xiaoshenke.net/thumb/3195337.jpg → "3195337"
// https://imgs.xiaoshenke.net/thumb/6a0d75fb4182b.jpg → "6a0d75fb4182b"
fn extract_thumb_id(thumb_url: &str) -> Option<String> {
let path = thumb_url.split('/').last()?;
if let Some(stem) = path.strip_suffix(".jpg") {
if stem.chars().all(|c| c.is_ascii_alphanumeric()) && !stem.is_empty() {
return Some(stem.to_string());
}
}
None
}
fn parse_listing_page(html: &str) -> Result<Vec<CardStub>> {
let document = Html::parse_document(html);
let card_sel = Self::selector(".video-card")?;
let img_sel = Self::selector("img[data-src]")?;
let time_sel = Self::selector(".time")?;
let title_sel = Self::selector(".video-title a")?;
let create_sel = Self::selector(".create")?;
let link_sel = Self::selector("a[href]")?;
let mut stubs = Vec::new();
let mut seen = std::collections::HashSet::new();
for card in document.select(&card_sel) {
// Derive the watch URL slug from the first link to /watch/
let slug = card
.select(&link_sel)
.find_map(|a| {
let href = a.value().attr("href")?;
href.strip_prefix("/watch/").map(|s| s.to_string())
})
.unwrap_or_default();
if slug.is_empty() || !seen.insert(slug.clone()) {
continue;
}
let img_el = card.select(&img_sel).next();
let thumb = img_el
.and_then(|img| img.value().attr("data-src"))
.map(|s| {
if s.starts_with("//") {
format!("https:{s}")
} else {
s.to_string()
}
})
.unwrap_or_default();
// Title: prefer .video-title a text, fall back to img alt
let title = card
.select(&title_sel)
.next()
.map(|el| el.text().collect::<Vec<_>>().join("").trim().to_string())
.filter(|t| !t.is_empty())
.or_else(|| {
img_el
.and_then(|img| img.value().attr("alt"))
.map(str::to_string)
.filter(|t| !t.is_empty())
})
.unwrap_or_else(|| slug.clone());
let duration_text = card
.select(&time_sel)
.next()
.map(|el| el.text().collect::<Vec<_>>().join("").trim().to_string())
.unwrap_or_default();
let duration = parse_time_to_seconds(&duration_text)
.and_then(|s| u32::try_from(s).ok())
.unwrap_or(0);
let uploaded_at = card
.select(&create_sel)
.next()
.and_then(|el| {
el.text()
.collect::<Vec<_>>()
.join("")
.trim()
.parse::<u64>()
.ok()
});
let thumb_id = Self::extract_thumb_id(&thumb);
stubs.push(CardStub {
slug,
title,
thumb,
thumb_id,
duration,
uploaded_at,
});
}
Ok(stubs)
}
fn build_format(thumb_id: &str) -> VideoFormat {
let url = format!("{MEDIA_HOST}/vid/{thumb_id}/720");
let mut fmt = VideoFormat::new(url, "720p".to_string(), "mp4".to_string());
fmt.add_http_header("Referer".to_string(), format!("{MEDIA_HOST}/"));
fmt.add_http_header("User-Agent".to_string(), BROWSER_UA.to_string());
fmt
}
fn stub_to_item(stub: CardStub) -> VideoItem {
let url = format!("{BASE_URL}/watch/{}", stub.slug);
let formats = stub.thumb_id.as_deref().map(|id| vec![Self::build_format(id)]);
let mut item = VideoItem::new(
stub.slug,
stub.title,
url,
CHANNEL_ID.to_string(),
stub.thumb,
stub.duration,
);
item.formats = formats;
item.uploadedAt = stub.uploaded_at;
item.aspectRatio = Some(16.0 / 9.0);
item
}
async fn fetch_page(
target: &Target,
per_page: usize,
options: &ServerOptions,
) -> Result<Vec<VideoItem>> {
let url = Self::build_listing_url(target);
let mut requester = requester_or_default(options, CHANNEL_ID, "fetch_page");
let html = requester
.get(&url, None)
.await
.map_err(|e| Error::from(format!("fetch failed for {url}: {e}")))?;
let stubs = Self::parse_listing_page(&html)?;
let items = stubs
.into_iter()
.take(per_page)
.map(Self::stub_to_item)
.collect();
Ok(items)
}
}
#[async_trait]
impl Provider for FullpornerProvider {
async fn get_videos(
&self,
_cache: VideoCache,
_pool: DbPool,
_sort: String,
query: Option<String>,
page: String,
per_page: String,
options: ServerOptions,
) -> Vec<VideoItem> {
let page = page.parse::<u32>().unwrap_or(1).max(1);
let per_page = per_page
.parse::<usize>()
.unwrap_or(DEFAULT_PER_PAGE)
.clamp(1, 48);
let normalized_query = query
.as_deref()
.map(str::trim)
.filter(|q| !q.is_empty())
.map(ToOwned::to_owned);
let target = Self::pick_target(normalized_query.as_deref(), page);
match Self::fetch_page(&target, per_page, &options).await {
Ok(items) => items,
Err(e) => {
report_provider_error(CHANNEL_ID, "get_videos", &e.to_string()).await;
vec![]
}
}
}
fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
Some(self.build_channel(clientversion))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn builds_latest_url() {
let t = Target::Latest { page: 1 };
assert_eq!(
FullpornerProvider::build_listing_url(&t),
"https://fullporner.com/home/1"
);
}
#[test]
fn builds_search_url() {
let t = Target::Search {
query: "teen".to_string(),
page: 2,
};
assert_eq!(
FullpornerProvider::build_listing_url(&t),
"https://fullporner.com/search?q=teen&p=2"
);
}
#[test]
fn builds_category_url_page1() {
let t = Target::Category {
slug: "anal".to_string(),
page: 1,
};
assert_eq!(
FullpornerProvider::build_listing_url(&t),
"https://fullporner.com/category/anal"
);
}
#[test]
fn builds_category_url_page2() {
let t = Target::Category {
slug: "anal".to_string(),
page: 2,
};
assert_eq!(
FullpornerProvider::build_listing_url(&t),
"https://fullporner.com/category/anal/2"
);
}
#[test]
fn builds_pornstar_url() {
let t = Target::Pornstar {
slug: "mia-malkova".to_string(),
page: 1,
};
assert_eq!(
FullpornerProvider::build_listing_url(&t),
"https://fullporner.com/pornstar/mia-malkova"
);
}
#[test]
fn extracts_numeric_thumb_id() {
assert_eq!(
FullpornerProvider::extract_thumb_id(
"https://imgs.xiaoshenke.net/thumb/3195337.jpg"
),
Some("3195337".to_string())
);
}
#[test]
fn extracts_hex_thumb_id() {
assert_eq!(
FullpornerProvider::extract_thumb_id(
"https://imgs.xiaoshenke.net/thumb/6a0d75fb4182b.jpg"
),
Some("6a0d75fb4182b".to_string())
);
}
#[test]
fn rejects_old_thumb_format() {
// /imgs/28/54/ff64a89399f570b_main.jpg — underscore in name, different path
assert_eq!(
FullpornerProvider::extract_thumb_id(
"https://imgs.xiaoshenke.net/imgs/28/54/ff64a89399f570b_main.jpg"
),
None
);
}
#[test]
fn picks_cat_target_from_query_prefix() {
match FullpornerProvider::pick_target(Some("cat:anal"), 1) {
Target::Category { slug, page } => {
assert_eq!(slug, "anal");
assert_eq!(page, 1);
}
other => panic!("expected Category, got {:?}", other),
}
}
#[test]
fn picks_pornstar_target_from_query_prefix() {
match FullpornerProvider::pick_target(Some("pornstar:mia-malkova"), 1) {
Target::Pornstar { slug, page } => {
assert_eq!(slug, "mia-malkova");
assert_eq!(page, 1);
}
other => panic!("expected Pornstar, got {:?}", other),
}
}
#[test]
fn parse_listing_page_extracts_cards() {
let html = r#"<!DOCTYPE html><html><body>
<div class="video-card">
<div class="video-card-image">
<a class="popout" href="/watch/abc123">
<img src="" class="lazy" data-src="https://imgs.xiaoshenke.net/thumb/9876543.jpg" alt="Test Video Title">
</a>
<div class="time">01:30:00</div>
</div>
<div class="video-card-body">
<div class="video-title">
<a href="/watch/abc123">Test Video Title</a>
</div>
<div class="video-view">
<span class="create">1779289876</span>
</div>
</div>
</div>
</body></html>"#;
let stubs = FullpornerProvider::parse_listing_page(html).unwrap();
assert_eq!(stubs.len(), 1);
let s = &stubs[0];
assert_eq!(s.slug, "abc123");
assert_eq!(s.title, "Test Video Title");
assert_eq!(s.duration, 5400);
assert_eq!(s.thumb_id.as_deref(), Some("9876543"));
assert_eq!(s.uploaded_at, Some(1779289876));
}
}

527
src/providers/fyptt.rs Normal file
View File

@@ -0,0 +1,527 @@
use crate::DbPool;
use crate::api::ClientVersion;
use crate::providers::{Provider, report_provider_error, report_provider_error_background, requester_or_default};
use crate::status::*;
use crate::util::cache::VideoCache;
use crate::videos::{ServerOptions, VideoFormat, VideoItem};
use async_trait::async_trait;
use chrono::DateTime;
use error_chain::error_chain;
use futures::stream::{self, StreamExt};
use ntex::http::Version;
use regex::Regex;
use scraper::{Html, Selector};
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "tiktok",
tags: &["tiktok", "vertical", "shorts"],
};
const BASE_URL: &str = "https://fyptt.to";
const CHANNEL_ID: &str = "fyptt";
const DEFAULT_PER_PAGE: usize = 24;
const ENRICH_CONCURRENCY: usize = 6;
const CATEGORIES: &[(&str, &str)] = &[
("tiktok-nudes", "Nudes"),
("tiktok-porn", "TikTok"),
("tiktok-boobs", "Boobs"),
("instagram-porn", "Instagram"),
("tiktok-sex", "Sex"),
("nsfw-tiktok", "NSFW"),
("tiktok-xxx", "XXX"),
("tiktok-ass", "Ass"),
("tiktok-pussy", "Pussy"),
("tiktok-live", "Live"),
("tiktok-thots", "Thots"),
("sexy-tiktok", "Sexy"),
];
error_chain! {
foreign_links {
Io(std::io::Error);
Url(url::ParseError);
}
errors {
Parse(msg: String) {
description("parse error")
display("parse error: {}", msg)
}
}
}
#[derive(Debug, Clone)]
pub struct FypttProvider;
#[derive(Debug, Clone)]
enum Target {
Latest,
Search { query: String },
Category { slug: String },
}
#[derive(Debug, Clone)]
struct CardStub {
id: String,
title: String,
url: String,
thumb: String,
category_slug: Option<String>,
}
impl FypttProvider {
pub fn new() -> Self {
Self
}
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
Channel {
id: CHANNEL_ID.to_string(),
name: "FYPTT".to_string(),
description: "FYPTT.to short-form vertical TikTok/Reels/Shorts-style porn videos with category browsing and direct playback.".to_string(),
premium: false,
favicon: "https://www.google.com/s2/favicons?sz=64&domain=fyptt.to".to_string(),
status: "active".to_string(),
categories: vec![],
options: vec![ChannelOption {
id: "categories".to_string(),
title: "Category".to_string(),
description: "Browse FYPTT by content category.".to_string(),
systemImage: "square.grid.2x2".to_string(),
colorName: "purple".to_string(),
options: CATEGORIES
.iter()
.map(|(slug, title)| FilterOption {
id: slug.to_string(),
title: title.to_string(),
})
.collect(),
multiSelect: false,
}],
nsfw: true,
cacheDuration: Some(1800),
}
}
fn selector(value: &str) -> Result<Selector> {
Selector::parse(value)
.map_err(|error| Error::from(format!("selector `{value}` failed: {error}")))
}
fn category_slug_for(query: &str) -> Option<&'static str> {
let normalized = query.trim().to_lowercase();
CATEGORIES
.iter()
.find(|(slug, title)| {
slug.eq_ignore_ascii_case(&normalized) || title.eq_ignore_ascii_case(&normalized)
})
.map(|(slug, _)| *slug)
}
fn pick_target(query: Option<&str>) -> Target {
if let Some(query) = query {
let q = query.trim();
if !q.is_empty() {
// Only an explicit `cat:`/`category:` prefix routes to a category
// archive. Bare category-name words ("sexy", "ass", "tiktok", ...)
// are far more common as real search terms on this site, so they
// must fall through to keyword search rather than being hijacked.
if let Some(slug) = q.strip_prefix("cat:").or_else(|| q.strip_prefix("category:")) {
if let Some(known) = Self::category_slug_for(slug) {
return Target::Category { slug: known.to_string() };
}
return Target::Category { slug: slug.trim().to_string() };
}
return Target::Search { query: q.to_string() };
}
}
Target::Latest
}
fn build_listing_url(target: &Target, page: u16) -> String {
let page = page.max(1);
match target {
Target::Latest => {
if page <= 1 {
format!("{BASE_URL}/")
} else {
format!("{BASE_URL}/page/{page}/")
}
}
Target::Search { query } => {
let encoded: String = url::form_urlencoded::byte_serialize(query.as_bytes()).collect();
if page <= 1 {
format!("{BASE_URL}/?s={encoded}")
} else {
format!("{BASE_URL}/page/{page}/?s={encoded}")
}
}
Target::Category { slug } => {
if page <= 1 {
format!("{BASE_URL}/{slug}/")
} else {
format!("{BASE_URL}/{slug}/page/{page}/")
}
}
}
}
fn html_headers(referer: &str) -> Vec<(String, String)> {
vec![
("User-Agent".to_string(), "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36".to_string()),
("Referer".to_string(), referer.to_string()),
]
}
async fn fetch_html(options: &ServerOptions, url: &str, referer: &str) -> Result<String> {
let mut requester = requester_or_default(options, CHANNEL_ID, "fetch_html");
requester
.get_with_headers(url, Self::html_headers(referer), Some(Version::HTTP_11))
.await
.map_err(|error| Error::from(format!("request failed for {url}: {error}")))
}
fn parse_listing_page(html: &str) -> Result<Vec<CardStub>> {
let document = Html::parse_document(html);
let card_sel = Self::selector(".fl-post-grid-post")?;
let link_sel = Self::selector(".fl-post-grid-title a[href]")?;
let img_sel = Self::selector("img[src]")?;
let class_regex = Regex::new(r"category-([a-z0-9-]+)")
.map_err(|error| Error::from(format!("regex failed: {error}")))?;
let mut stubs = Vec::new();
let mut seen_ids = std::collections::HashSet::new();
for card in document.select(&card_sel) {
let class_attr = card.value().attr("class").unwrap_or_default();
let id = class_attr
.split_whitespace()
.find_map(|c| c.strip_prefix("post-"))
.map(str::to_string)
.unwrap_or_default();
if id.is_empty() || !seen_ids.insert(id.clone()) {
continue;
}
let link = card.select(&link_sel).next();
let url = link
.and_then(|a| a.value().attr("href"))
.map(str::to_string)
.unwrap_or_default();
if url.is_empty() {
continue;
}
let title = link
.map(|a| a.text().collect::<Vec<_>>().join(""))
.map(|t| t.trim().to_string())
.filter(|t| !t.is_empty())
.unwrap_or_else(|| id.clone());
let thumb = card
.select(&img_sel)
.next()
.and_then(|img| img.value().attr("src"))
.map(str::to_string)
.unwrap_or_default();
let category_slug = class_regex
.captures(class_attr)
.and_then(|caps| caps.get(1))
.map(|m| m.as_str().to_string());
stubs.push(CardStub {
id,
title,
url,
thumb,
category_slug,
});
}
Ok(stubs)
}
async fn fetch_listing(url: &str, options: &ServerOptions) -> Result<Vec<CardStub>> {
let html = Self::fetch_html(options, url, BASE_URL).await?;
Self::parse_listing_page(&html)
}
fn extract_embed_url(html: &str) -> Option<String> {
let idx = html.find("\"embedURL\":\"")?;
let rest = &html[idx + "\"embedURL\":\"".len()..];
let end = rest.find('"')?;
Some(rest[..end].replace("\\/", "/"))
}
fn extract_uploaded_at(html: &str) -> Option<u64> {
let idx = html.find("\"datePublished\":\"")?;
let rest = &html[idx + "\"datePublished\":\"".len()..];
let end = rest.find('"')?;
let raw = rest[..end].replace("\\/", "/");
DateTime::parse_from_rfc3339(&raw)
.ok()
.map(|dt| dt.timestamp() as u64)
}
fn extract_stream_url(html: &str) -> Option<String> {
let idx = html.find("https://stream.fyptt.to/")?;
let rest = &html[idx..];
let end = rest
.find(|c: char| c == '"' || c == '\'')
.unwrap_or(rest.len());
Some(rest[..end].to_string())
}
fn category_title(slug: &str) -> Option<&'static str> {
CATEGORIES
.iter()
.find(|(s, _)| *s == slug)
.map(|(_, title)| *title)
}
async fn enrich_card(stub: CardStub, options: &ServerOptions) -> Result<VideoItem> {
let detail_html = Self::fetch_html(options, &stub.url, BASE_URL).await?;
let embed_url = Self::extract_embed_url(&detail_html)
.ok_or_else(|| Error::from(format!("no embedURL found in {}", stub.url)))?;
let uploaded_at = Self::extract_uploaded_at(&detail_html);
let player_html = Self::fetch_html(options, &embed_url, &stub.url).await?;
let stream_url = Self::extract_stream_url(&player_html)
.ok_or_else(|| Error::from(format!("no stream url found via {embed_url}")))?;
let format = if stream_url.contains(".m3u8") {
VideoFormat::m3u8(stream_url, "auto".to_string(), "m3u8".to_string())
} else {
VideoFormat::new(stream_url, "auto".to_string(), "mp4".to_string())
};
let mut item = VideoItem::new(
stub.id,
stub.title,
stub.url,
CHANNEL_ID.to_string(),
stub.thumb,
0,
);
item.formats = Some(vec![format]);
item.uploadedAt = uploaded_at;
item.aspectRatio = Some(9.0 / 16.0);
if let Some(tag) = stub
.category_slug
.as_deref()
.and_then(Self::category_title)
{
item.tags = Some(vec![tag.to_string()]);
}
Ok(item)
}
async fn fetch_page(
target: Target,
page: u16,
per_page: usize,
options: &ServerOptions,
) -> Result<Vec<VideoItem>> {
let url = Self::build_listing_url(&target, page);
let stubs = Self::fetch_listing(&url, options).await?;
let limited: Vec<_> = stubs.into_iter().take(per_page).collect();
let options = options.clone();
let items = stream::iter(limited.into_iter().map(|stub| {
let options = options.clone();
async move {
match Self::enrich_card(stub, &options).await {
Ok(item) => Some(item),
Err(error) => {
report_provider_error_background(
CHANNEL_ID,
"fetch_page.enrich_card",
&error.to_string(),
);
None
}
}
}
}))
.buffer_unordered(ENRICH_CONCURRENCY)
.filter_map(async move |v| v)
.collect::<Vec<_>>()
.await;
Ok(items)
}
}
#[async_trait]
impl Provider for FypttProvider {
async fn get_videos(
&self,
_cache: VideoCache,
_pool: DbPool,
_sort: String,
query: Option<String>,
page: String,
per_page: String,
options: ServerOptions,
) -> Vec<VideoItem> {
let page = page.parse::<u16>().unwrap_or(1).max(1);
let per_page = per_page
.parse::<usize>()
.unwrap_or(DEFAULT_PER_PAGE)
.clamp(1, 48);
let normalized_query = query
.as_deref()
.map(str::trim)
.filter(|q| !q.is_empty())
.map(ToOwned::to_owned);
let category_option = options
.categories
.as_deref()
.map(str::trim)
.filter(|c| !c.is_empty());
let target = match category_option {
Some(cat) => match Self::category_slug_for(cat) {
Some(slug) => Target::Category { slug: slug.to_string() },
None => Target::Category { slug: cat.to_string() },
},
None => Self::pick_target(normalized_query.as_deref()),
};
match Self::fetch_page(target, page, per_page, &options).await {
Ok(items) => items,
Err(error) => {
report_provider_error(CHANNEL_ID, "get_videos", &error.to_string()).await;
vec![]
}
}
}
fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
Some(self.build_channel(clientversion))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn builds_listing_urls() {
assert_eq!(
FypttProvider::build_listing_url(&Target::Latest, 1),
"https://fyptt.to/"
);
assert_eq!(
FypttProvider::build_listing_url(&Target::Latest, 2),
"https://fyptt.to/page/2/"
);
assert_eq!(
FypttProvider::build_listing_url(
&Target::Search { query: "thot ass".to_string() },
1
),
"https://fyptt.to/?s=thot+ass"
);
assert_eq!(
FypttProvider::build_listing_url(
&Target::Search { query: "thot".to_string() },
2
),
"https://fyptt.to/page/2/?s=thot"
);
assert_eq!(
FypttProvider::build_listing_url(&Target::Category { slug: "tiktok-ass".to_string() }, 1),
"https://fyptt.to/tiktok-ass/"
);
assert_eq!(
FypttProvider::build_listing_url(&Target::Category { slug: "tiktok-ass".to_string() }, 2),
"https://fyptt.to/tiktok-ass/page/2/"
);
}
#[test]
fn picks_category_target_from_prefix() {
match FypttProvider::pick_target(Some("cat:tiktok-ass")) {
Target::Category { slug } => assert_eq!(slug, "tiktok-ass"),
other => panic!("expected Category, got {:?}", other),
}
}
#[test]
fn category_name_word_routes_to_search_not_category() {
// "Boobs"/"sexy"/"tiktok" are category names but also common search
// terms; a bare query must search, not hijack to the category archive.
for word in ["Boobs", "sexy", "tiktok", "ass"] {
match FypttProvider::pick_target(Some(word)) {
Target::Search { query } => assert_eq!(query, word),
other => panic!("expected Search for {word:?}, got {:?}", other),
}
}
}
#[test]
fn picks_search_target_for_unknown_query() {
match FypttProvider::pick_target(Some("amateur")) {
Target::Search { query } => assert_eq!(query, "amateur"),
other => panic!("expected Search, got {:?}", other),
}
}
#[test]
fn extracts_embed_url_from_json_ld() {
let html = r#"{"@type":"VideoObject","embedURL":"https:\/\/fyptt.to\/fypttstr.php?fileid=1jyYHhjN&mainurl=23135%2Fslug","name":"Title"}"#;
assert_eq!(
FypttProvider::extract_embed_url(html).as_deref(),
Some("https://fyptt.to/fypttstr.php?fileid=1jyYHhjN&mainurl=23135%2Fslug")
);
}
#[test]
fn extracts_uploaded_at_from_json_ld() {
let html = r#"{"datePublished":"2026-05-29T13:06:45+00:00"}"#;
assert_eq!(FypttProvider::extract_uploaded_at(html), Some(1780060005));
}
#[test]
fn extracts_mp4_stream_url() {
let html = r#"<source src="https://stream.fyptt.to/1jyYHhjN.mp4?token=AAA&expires=111" type="video/mp4" />"#;
assert_eq!(
FypttProvider::extract_stream_url(html).as_deref(),
Some("https://stream.fyptt.to/1jyYHhjN.mp4?token=AAA&expires=111")
);
}
#[test]
fn extracts_hls_stream_url() {
let html = r#"<source src="https://stream.fyptt.to/hls/J2KKe8ya.m3u8?token=BBB&expires=222" type="application/x-mpegURL" />"#;
assert_eq!(
FypttProvider::extract_stream_url(html).as_deref(),
Some("https://stream.fyptt.to/hls/J2KKe8ya.m3u8?token=BBB&expires=222")
);
}
#[test]
fn parses_listing_cards() {
let html = r#"<div class="fl-post-grid-post fl-post-grid-image-above-title fl-post-columns-post fl-post-align-default post-23135 post type-post status-publish format-standard has-post-thumbnail hentry category-nsfw-tiktok girl-fernandamotafarhat"><div class="fl-post-grid-image"> <a href="https://fyptt.to/23135/slug/"><img src="https://fyptt.to/thumb.webp" /></a></div><div class="fl-post-grid-text"><h3 class="fl-post-grid-title"><a href="https://fyptt.to/23135/slug/" title="Some Title">Some Title</a></h3></div></div>"#;
let stubs = FypttProvider::parse_listing_page(html).unwrap();
assert_eq!(stubs.len(), 1);
assert_eq!(stubs[0].id, "23135");
assert_eq!(stubs[0].title, "Some Title");
assert_eq!(stubs[0].url, "https://fyptt.to/23135/slug/");
assert_eq!(stubs[0].thumb, "https://fyptt.to/thumb.webp");
assert_eq!(stubs[0].category_slug.as_deref(), Some("nsfw-tiktok"));
}
}

View File

@@ -174,99 +174,47 @@ impl HanimeProvider {
}
}
async fn get_video_item(
&self,
hit: HanimeSearchResult,
pool: DbPool,
options: ServerOptions,
) -> Result<VideoItem> {
let mut conn = match pool.get() {
Ok(conn) => conn,
Err(e) => {
report_provider_error("hanime", "get_video_item.pool_get", &e.to_string()).await;
return Err(Error::from("Failed to get DB connection"));
}
};
let db_result = db::get_video(
&mut conn,
format!(
"https://h.freeanimehentai.net/api/v8/video?id={}&",
hit.slug.clone()
),
);
drop(conn);
let id = hit.id.to_string();
let title = hit.name;
let thumb = crate::providers::build_proxy_url(
&options,
"hanime-cdn",
&crate::providers::strip_url_scheme(&hit.cover_url),
);
let duration = (hit.duration_in_ms / 1000) as u32; // Convert ms to seconds
let channel = "hanime".to_string(); // Placeholder, adjust as needed
match db_result {
Ok(Some(video_url)) => {
if video_url != "https://streamable.cloud/hls/stream.m3u8" {
return Ok(VideoItem::new(
id,
title,
video_url.clone(),
channel,
thumb,
duration,
)
.tags(hit.tags)
.uploader(hit.brand)
.views(hit.views as u32)
.rating((hit.likes as f32 / (hit.likes + hit.dislikes) as f32) * 100 as f32)
.aspect_ratio(0.68)
.formats(vec![videos::VideoFormat::new(
video_url.clone(),
"1080".to_string(),
"m3u8".to_string(),
)]));
} else {
match pool.get() {
Ok(mut conn) => {
let _ = db::delete_video(
&mut conn,
format!(
"https://h.freeanimehentai.net/api/v8/video?id={}&",
hit.slug.clone()
),
);
}
Err(e) => {
report_provider_error_background(
"hanime",
"get_video_item.delete_video.pool_get",
&e.to_string(),
);
}
}
}
}
Ok(None) => (),
Err(e) => {
println!("Error fetching video from database: {}", e);
// return Err(format!("Error fetching video from database: {}", e).into());
}
}
let url = format!(
"https://cached.freeanimehentai.net/api/v8/guest/videos/{}/manifest",
id
);
fn db_key(slug: &str) -> String {
format!("https://h.freeanimehentai.net/api/v8/video?id={slug}&")
}
let mut requester =
crate::providers::requester_or_default(&options, module_path!(), "missing_requester");
let payload = json!({
"width": 571, "height": 703, "ab": "kh" }
fn build_video_item(
id: String,
title: String,
video_url: String,
channel: String,
thumb: String,
duration: u32,
tags: Vec<String>,
brand: String,
views: u64,
likes: u64,
dislikes: u64,
) -> VideoItem {
VideoItem::new(id, title, video_url.clone(), channel, thumb, duration)
.tags(tags)
.uploader(brand)
.views(views as u32)
.rating((likes as f32 / (likes + dislikes) as f32) * 100_f32)
.aspect_ratio(0.68)
.formats(vec![videos::VideoFormat::new(
video_url,
"1080".to_string(),
"m3u8".to_string(),
)])
}
async fn fetch_stream_url(&self, id: &str, slug: &str, options: &ServerOptions) -> Result<String> {
let manifest_url = format!(
"https://cached.freeanimehentai.net/api/v8/guest/videos/{id}/manifest"
);
let mut requester =
crate::providers::requester_or_default(options, module_path!(), "missing_requester");
let payload = json!({ "width": 571, "height": 703, "ab": "kh" });
let _ = requester
.post_json(
&format!(
"https://cached.freeanimehentai.net/api/v8/hentai_videos/{}/play",
hit.slug
"https://cached.freeanimehentai.net/api/v8/hentai_videos/{slug}/play"
),
&payload,
vec![
@@ -274,11 +222,11 @@ impl HanimeProvider {
("Referer".to_string(), "https://hanime.tv/".to_string()),
],
)
.await; // Initial request to set cookies
.await;
ntex::time::sleep(ntex::time::Seconds(1)).await;
let text = requester
.get_raw_with_headers(
&url,
&manifest_url,
vec![
("Origin".to_string(), "https://hanime.tv".to_string()),
("Referer".to_string(), "https://hanime.tv/".to_string()),
@@ -288,77 +236,97 @@ impl HanimeProvider {
.map_err(|e| {
report_provider_error_background(
"hanime",
"get_video_item.get_raw_with_headers",
"fetch_stream_url.get_raw_with_headers",
&e.to_string(),
);
Error::from(format!("Failed to fetch manifest response: {e}"))
Error::from(format!("Failed to fetch manifest: {e}"))
})?
.text()
.await
.map_err(|e| {
report_provider_error_background(
"hanime",
"get_video_item.response_text",
"fetch_stream_url.response_text",
&e.to_string(),
);
Error::from(format!("Failed to decode manifest response body: {e}"))
Error::from(format!("Failed to decode manifest body: {e}"))
})?;
if text.contains("Unautho") {
println!("Fetched video details for {}: {}", title, text);
return Err(Error::from("Unauthorized"));
}
let urls = text
let urls_section = text
.split("streams")
.nth(1)
.ok_or_else(|| Error::from("Missing streams section in manifest"))?;
let mut url_vec = vec![];
for el in urls.split("\"url\":\"").collect::<Vec<&str>>() {
let url = el
.split("\"")
.collect::<Vec<&str>>()
.get(0)
.copied()
.unwrap_or_default();
let mut url_vec = vec![];
for el in urls_section.split("\"url\":\"") {
let url = el.split('"').next().unwrap_or_default();
if !url.is_empty() && url.contains("m3u8") {
url_vec.push(url.to_string());
}
}
let first_url = url_vec
.first()
.cloned()
.ok_or_else(|| Error::from("No stream URL found in manifest"))?;
match pool.get() {
Ok(mut conn) => {
let _ = db::insert_video(
&mut conn,
&format!(
"https://h.freeanimehentai.net/api/v8/video?id={}&",
hit.slug.clone()
),
&first_url,
);
url_vec
.into_iter()
.next()
.ok_or_else(|| Error::from("No stream URL found in manifest"))
}
async fn get_video_item(
&self,
hit: HanimeSearchResult,
pool: DbPool,
options: ServerOptions,
) -> Result<VideoItem> {
let id = hit.id.to_string();
let title = hit.name;
let thumb = crate::providers::build_proxy_url(
&options,
"hanime-cdn",
&crate::providers::strip_url_scheme(&hit.cover_url),
);
let duration = (hit.duration_in_ms / 1000) as u32;
let channel = "hanime".to_string();
let db_key = Self::db_key(&hit.slug);
match self.fetch_stream_url(&id, &hit.slug, &options).await {
Ok(stream_url) => {
if let Ok(mut conn) = pool.get() {
let _ = db::insert_video(&mut conn, &db_key, &stream_url);
}
return Ok(Self::build_video_item(
id, title, stream_url, channel, thumb, duration,
hit.tags, hit.brand, hit.views, hit.likes, hit.dislikes,
));
}
Err(e) => {
report_provider_error_background(
"hanime",
"get_video_item.insert_video.pool_get",
&e.to_string(),
);
report_provider_error_background("hanime", "get_video_item.fetch_stream_url", &e.to_string());
}
}
Ok(
VideoItem::new(id, title, first_url.clone(), channel, thumb, duration)
.tags(hit.tags)
.uploader(hit.brand)
.views(hit.views as u32)
.rating((hit.likes as f32 / (hit.likes + hit.dislikes) as f32) * 100 as f32)
.formats(vec![videos::VideoFormat::new(
first_url,
"1080".to_string(),
"m3u8".to_string(),
)]),
)
// API failed — fall back to DB
let db_result = pool.get().ok().and_then(|mut conn| {
db::get_video(&mut conn, db_key.clone()).ok().flatten()
});
match db_result {
Some(video_url) if video_url != "https://streamable.cloud/hls/stream.m3u8" => {
Ok(Self::build_video_item(
id, title, video_url, channel, thumb, duration,
hit.tags, hit.brand, hit.views, hit.likes, hit.dislikes,
))
}
Some(_) => {
if let Ok(mut conn) = pool.get() {
let _ = db::delete_video(&mut conn, db_key);
}
Err(Error::from("Stale DB entry and API unavailable"))
}
None => Err(Error::from("API unavailable and no DB fallback")),
}
}
async fn get(

View File

@@ -11,13 +11,13 @@ use crate::util::time::parse_time_to_seconds;
use crate::videos::{ServerOptions, VideoFormat, VideoItem};
use async_trait::async_trait;
use error_chain::error_chain;
use futures::stream::{self, StreamExt};
use htmlentity::entity::{ICodedDataTrait, decode};
use regex::Regex;
use scraper::{ElementRef, Html, Selector};
use std::collections::HashMap;
use std::sync::{Arc, RwLock};
use std::{thread, vec};
use url::Url;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
@@ -169,11 +169,10 @@ impl HeavyfetishProvider {
Channel {
id: "heavyfetish".to_string(),
name: "HeavyFetish".to_string(),
description:
"HeavyFetish videos, categories, tags, models, and uploader archives.".to_string(),
premium: false,
favicon: "https://www.google.com/s2/favicons?sz=64&domain=heavyfetish.com"
description: "HeavyFetish videos, categories, tags, models, and uploader archives."
.to_string(),
premium: false,
favicon: "https://www.google.com/s2/favicons?sz=64&domain=heavyfetish.com".to_string(),
status: "active".to_string(),
categories: categories.iter().map(|value| value.title.clone()).collect(),
options: vec![
@@ -321,11 +320,7 @@ impl HeavyfetishProvider {
}
fn parse_percent(text: &str) -> Option<f32> {
text.trim()
.trim_end_matches('%')
.trim()
.parse::<f32>()
.ok()
text.trim().trim_end_matches('%').trim().parse::<f32>().ok()
}
fn sort_param(sort: &str) -> Option<&'static str> {
@@ -354,6 +349,31 @@ impl HeavyfetishProvider {
.to_string()
}
fn video_id_from_page_url(page_url: &str) -> String {
let Ok(parsed_url) = Url::parse(page_url) else {
return String::new();
};
let segments = parsed_url
.path_segments()
.map(|value| value.collect::<Vec<_>>())
.unwrap_or_default();
if segments.is_empty() {
return String::new();
}
if let Some(index) = segments.iter().position(|segment| *segment == "videos") {
if let Some(primary) = segments.get(index + 1) {
if !primary.is_empty() {
return (*primary).to_string();
}
}
}
segments.last().copied().unwrap_or_default().to_string()
}
fn push_unique(target: &Arc<RwLock<Vec<FilterOption>>>, item: FilterOption) {
if item.id.is_empty() || item.title.is_empty() {
return;
@@ -414,7 +434,11 @@ impl HeavyfetishProvider {
let selector = Self::selector("a[href]")?;
for element in document.select(&selector) {
let href = element.value().attr("href").unwrap_or_default().trim_end_matches('/');
let href = element
.value()
.attr("href")
.unwrap_or_default()
.trim_end_matches('/');
if !href.starts_with(&format!("{base_url}/tags/")) {
continue;
}
@@ -462,7 +486,11 @@ impl HeavyfetishProvider {
let document = Html::parse_document(&html);
for element in document.select(&selector) {
let href = element.value().attr("href").unwrap_or_default().trim_end_matches('/');
let href = element
.value()
.attr("href")
.unwrap_or_default()
.trim_end_matches('/');
let title = element
.value()
.attr("title")
@@ -505,7 +533,11 @@ impl HeavyfetishProvider {
let document = Html::parse_document(&html);
for element in document.select(&selector) {
let href = element.value().attr("href").unwrap_or_default().trim_end_matches('/');
let href = element
.value()
.attr("href")
.unwrap_or_default()
.trim_end_matches('/');
if !href.starts_with(&format!("{base_url}/members/")) {
continue;
}
@@ -586,11 +618,17 @@ impl HeavyfetishProvider {
}
}
fn match_filter(options: &[FilterOption], query: &str, kind: TargetKind) -> Option<QueryTarget> {
fn match_filter(
options: &[FilterOption],
query: &str,
kind: TargetKind,
) -> Option<QueryTarget> {
let normalized_query = Self::normalize_title(query);
options
.iter()
.find(|value| value.id != "all" && Self::normalize_title(&value.title) == normalized_query)
.find(|value| {
value.id != "all" && Self::normalize_title(&value.title) == normalized_query
})
.map(|value| QueryTarget {
kind,
value: value.id.clone(),
@@ -679,26 +717,8 @@ impl HeavyfetishProvider {
}
}
fn list_container<'a>(&self, document: &'a Html) -> Result<Option<ElementRef<'a>>> {
for selector_text in [
"#list_videos_fetish_videos_list_items",
"#list_videos_videos_list_search_result_items",
"#list_videos_common_videos_list_items",
"#list_videos_uploaded_videos_items",
] {
let selector = Self::selector(selector_text)?;
if let Some(element) = document.select(&selector).next() {
return Ok(Some(element));
}
}
Ok(None)
}
fn parse_list_videos(&self, html: &str) -> Result<Vec<VideoItem>> {
let document = Html::parse_document(html);
let Some(container) = self.list_container(&document)? else {
return Ok(vec![]);
};
let card_selector = Self::selector("div.item.hf-video-item")?;
let link_selector = Self::selector("a[href*=\"/videos/\"]")?;
@@ -710,19 +730,13 @@ impl HeavyfetishProvider {
let mut items = Vec::new();
for card in container.select(&card_selector) {
for card in document.select(&card_selector) {
let Some(link) = card.select(&link_selector).next() else {
continue;
};
let href = link.value().attr("href").unwrap_or_default();
let page_url = self.normalize_url(href);
let id = page_url
.trim_end_matches('/')
.split('/')
.nth_back(1)
.unwrap_or_default()
.to_string();
let id = Self::video_id_from_page_url(&page_url);
if id.is_empty() || page_url.is_empty() {
continue;
@@ -857,7 +871,7 @@ impl HeavyfetishProvider {
let Some(url) = seen.get(quality) else {
continue;
};
let format = VideoFormat::new(url.clone(), quality.to_string(), "mp4".to_string())
let format = VideoFormat::new(page_url.to_string().clone(), quality.to_string(), "mp4".to_string())
.format_id(quality.to_string())
.http_header("Referer".to_string(), page_url.to_string())
.http_header("User-Agent".to_string(), BROWSER_UA.to_string());
@@ -895,8 +909,7 @@ impl HeavyfetishProvider {
let info_span_selector = Self::selector(".block-details .info .item span")?;
let category_selector = Self::selector(".block-details .info a[href*=\"/categories/\"]")?;
let tag_selector = Self::selector(".block-details .info a[href*=\"/tags/\"]")?;
let model_selector =
Self::selector(".block-details .info a[href*=\"/fetish-models/\"]")?;
let model_selector = Self::selector(".block-details .info a[href*=\"/fetish-models/\"]")?;
if let Some(title) = Self::extract_js_value(&flashvars, &title_regex) {
if !title.is_empty() {
@@ -904,11 +917,6 @@ impl HeavyfetishProvider {
}
}
let formats = self.build_formats(html, page_url)?;
if !formats.is_empty() {
item = item.formats(formats);
}
let uploader_link = document.select(&uploader_selector).next();
let uploader = uploader_link
.as_ref()
@@ -1079,8 +1087,11 @@ impl HeavyfetishProvider {
}
}
let mut requester =
requester_or_default(options, "heavyfetish", "fetch_items_for_url.requester_missing");
let mut requester = requester_or_default(
options,
"heavyfetish",
"fetch_items_for_url.requester_missing",
);
let html = requester
.get(&url, None)
.await
@@ -1091,20 +1102,11 @@ impl HeavyfetishProvider {
return Ok(vec![]);
}
let limited_items = list_items
let items = list_items
.into_iter()
.take(per_page_limit.max(1))
.collect::<Vec<_>>();
let items = stream::iter(limited_items.into_iter().map(|item| {
let provider = self.clone();
let options = options.clone();
async move { provider.enrich_video(item, &options).await }
}))
.buffer_unordered(4)
.collect::<Vec<_>>()
.await;
if !items.is_empty() {
cache.insert(url, items.clone());
}
@@ -1258,4 +1260,20 @@ mod tests {
Some("https://heavyfetish.com/list-preview.mp4")
);
}
#[test]
fn extracts_video_id_from_slug_only_layout() {
let id = HeavyfetishProvider::video_id_from_page_url(
"https://heavyfetish.com/videos/mistress-gaia-silky-feet-sniffer/",
);
assert_eq!(id, "mistress-gaia-silky-feet-sniffer");
}
#[test]
fn extracts_video_id_from_legacy_numeric_layout() {
let id = HeavyfetishProvider::video_id_from_page_url(
"https://heavyfetish.com/videos/120660/example/",
);
assert_eq!(id, "120660");
}
}

View File

@@ -2,19 +2,55 @@ use crate::api::ClientVersion;
use crate::providers::Provider;
use crate::status::*;
use crate::util::cache::VideoCache;
use crate::util::discord::{format_error_chain, send_discord_error_report};
use crate::util::requester::Requester;
use crate::videos::{ServerOptions, VideoFormat, VideoItem};
use crate::{DbPool, db};
use async_trait::async_trait;
use base64::{Engine as _, engine::general_purpose::STANDARD};
use error_chain::error_chain;
use futures::future::join_all;
use futures::stream::{self, StreamExt};
use htmlentity::entity::{ICodedDataTrait, decode};
use std::sync::{Arc, RwLock};
use serde::Deserialize;
use std::collections::HashSet;
use std::sync::{Arc, Mutex, OnceLock, RwLock};
use std::vec;
use titlecase::Titlecase;
use wreq::Version;
use wreq_util::Emulation;
// How long a cached listing/search entry is considered usable at all.
const HARD_TTL_SECS: u64 = 60 * 60 * 24;
// Past this age we still answer instantly from cache/DB but trigger a
// background refresh so the next request gets fresh data / renewed signed URLs.
const SOFT_TTL_SECS: u64 = 60 * 60;
#[derive(Debug, Deserialize)]
struct PlayerSecureConfig {
en: String,
iv: String,
uri: String,
}
#[derive(Debug, Deserialize)]
struct PlayerApiSource {
src: String,
#[serde(default)]
label: String,
}
#[derive(Debug, Deserialize, Default)]
struct PlayerApiData {
#[serde(default)]
sources: Vec<PlayerApiSource>,
}
#[derive(Debug, Deserialize)]
struct PlayerApiResponse {
status: bool,
#[serde(default)]
data: Option<PlayerApiData>,
}
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
@@ -96,22 +132,30 @@ impl HentaihavenProvider {
) -> Result<Vec<VideoItem>> {
let _ = sort;
let video_url = format!("{}/hentai/page/{}/", self.url, page);
let old_items = match cache.get(&video_url) {
Some((time, items)) => {
if time.elapsed().unwrap_or_default().as_secs() < 60 * 60 * 24 {
return Ok(items.clone());
} else {
items.clone()
}
}
None => {
vec![]
}
};
// Fast path: a usable in-memory entry exists. Answer immediately; once it
// is older than the soft TTL, kick a background refresh so the next caller
// sees fresher data without anyone waiting on it now.
if let Some((time, items)) = cache.get(&video_url) {
let age = time.elapsed().unwrap_or_default().as_secs();
if age < HARD_TTL_SECS && !items.is_empty() {
if age >= SOFT_TTL_SECS {
let requester = crate::providers::requester_or_default(
&options,
module_path!(),
"missing_requester",
);
self.spawn_refresh(requester, pool, cache, video_url, None, false);
}
return Ok(items);
}
}
// Fetch the listing page (a single cheap request) to learn which episode
// URLs belong on this page and in what order.
let mut requester =
crate::providers::requester_or_default(&options, module_path!(), "missing_requester");
let text = match requester.get(&video_url, Some(Version::HTTP_2)).await {
let text = match Self::get_with_retry(&mut requester, &video_url, 3).await {
Ok(text) => text,
Err(e) => {
crate::providers::report_provider_error(
@@ -120,19 +164,40 @@ impl HentaihavenProvider {
&format!("url={video_url}; error={e}"),
)
.await;
return Ok(old_items);
return Ok(cache
.get(&video_url)
.map(|(_, items)| items)
.unwrap_or_default());
}
};
let video_items: Vec<VideoItem> = self
.get_video_items_from_html(text.clone(), &mut requester, pool.clone())
.await;
if !video_items.is_empty() {
cache.remove(&video_url);
cache.insert(video_url.clone(), video_items.clone());
} else {
return Ok(old_items);
let urls = Self::parse_listing_urls(&text);
if urls.is_empty() {
return Ok(cache
.get(&video_url)
.map(|(_, items)| items)
.unwrap_or_default());
}
Ok(video_items)
// Serve whatever we have already resolved (from the DB) right away, then
// refresh the entire listing in the background.
let db_items = Self::items_from_db(&urls, &pool);
if !db_items.is_empty() {
cache.insert(video_url.clone(), db_items.clone());
self.spawn_refresh(requester, pool, cache, video_url, Some(urls), false);
return Ok(db_items);
}
// Cold start: nothing cached for any item yet, resolve synchronously this
// one time so the first ever request is not empty.
let items = self.resolve_urls(urls, &requester, pool).await;
if !items.is_empty() {
cache.insert(video_url.clone(), items.clone());
return Ok(items);
}
Ok(cache
.get(&video_url)
.map(|(_, items)| items)
.unwrap_or_default())
}
async fn query(
@@ -143,25 +208,29 @@ impl HentaihavenProvider {
options: ServerOptions,
pool: DbPool,
) -> Result<Vec<VideoItem>> {
let video_url = format!("{}/?s={}", self.url, query.replace(" ", "+"),);
// Check our Video Cache. If the result is younger than 1 hour, we return it.
let old_items = match cache.get(&video_url) {
Some((time, items)) => {
if time.elapsed().unwrap_or_default().as_secs() < 60 * 60 * 24 {
return Ok(items.clone());
} else {
let _ = cache.check().await;
return Ok(items.clone());
if page > 1 {
return Ok(vec![]);
}
let video_url = format!("{}/?s={}", self.url, query.replace(" ", "+"));
if let Some((time, items)) = cache.get(&video_url) {
let age = time.elapsed().unwrap_or_default().as_secs();
if age < HARD_TTL_SECS && !items.is_empty() {
if age >= SOFT_TTL_SECS {
let requester = crate::providers::requester_or_default(
&options,
module_path!(),
"missing_requester",
);
self.spawn_refresh(requester, pool, cache, video_url, None, true);
}
return Ok(items);
}
None => {
vec![]
}
};
}
let mut requester =
crate::providers::requester_or_default(&options, module_path!(), "missing_requester");
let text = match requester.get(&video_url, Some(Version::HTTP_2)).await {
let text = match Self::get_with_retry(&mut requester, &video_url, 3).await {
Ok(text) => text,
Err(e) => {
crate::providers::report_provider_error(
@@ -170,105 +239,78 @@ impl HentaihavenProvider {
&format!("url={video_url}; error={e}"),
)
.await;
return Ok(old_items);
return Ok(cache
.get(&video_url)
.map(|(_, items)| items)
.unwrap_or_default());
}
};
if page > 1 {
return Ok(vec![]);
let urls = Self::parse_search_urls(&text);
if urls.is_empty() {
return Ok(cache
.get(&video_url)
.map(|(_, items)| items)
.unwrap_or_default());
}
let video_items: Vec<VideoItem> = self
.get_video_items_from_html_search(text.clone(), &mut requester, pool)
.await;
if !video_items.is_empty() {
cache.remove(&video_url);
cache.insert(video_url.clone(), video_items.clone());
} else {
return Ok(old_items);
let db_items = Self::items_from_db(&urls, &pool);
if !db_items.is_empty() {
cache.insert(video_url.clone(), db_items.clone());
self.spawn_refresh(requester, pool, cache, video_url, Some(urls), true);
return Ok(db_items);
}
Ok(video_items)
let items = self.resolve_urls(urls, &requester, pool).await;
if !items.is_empty() {
cache.insert(video_url.clone(), items.clone());
return Ok(items);
}
Ok(cache
.get(&video_url)
.map(|(_, items)| items)
.unwrap_or_default())
}
async fn get_video_items_from_html(
&self,
html: String,
requester: &mut Requester,
pool: DbPool,
) -> Vec<VideoItem> {
fn extract_segment_url(seg: &str) -> Option<String> {
seg.split("a href=\"")
.nth(1)
.and_then(|s| s.split('"').next())
.map(|s| s.to_string())
}
/// Extract the ordered list of episode page URLs from a listing page.
fn parse_listing_urls(html: &str) -> Vec<String> {
if html.is_empty() || html.contains("404 Not Found") {
return vec![];
}
let block = match html
.split("previouspostslink")
.next()
.and_then(|s| {
s.split("vraven_manga_list").nth(1).or_else(|| {
s.find(r#"<div class="page-content-listing item-big_thumbnail">"#)
.map(|idx| &s[idx..])
})
let block = match html.split("previouspostslink").next().and_then(|s| {
s.split("vraven_manga_list").nth(1).or_else(|| {
s.find(r#"<div class="page-content-listing item-big_thumbnail">"#)
.map(|idx| &s[idx..])
})
{
}) {
Some(b) => b,
None => {
eprint!("Hentai Haven Provider: Failed to get block from html");
let e = Error::from(ErrorKind::Parse("html".into()));
send_discord_error_report(
e.to_string(),
Some(format_error_chain(&e)),
Some("Hentai Haven Provider"),
Some(&format!("Failed to get block from html:\n```{html}\n```")),
file!(),
line!(),
module_path!(),
)
.await;
crate::providers::report_provider_error_background(
"hentaihaven",
"parse_listing.block",
"Failed to get block from listing html",
);
return vec![];
}
};
let futures = block
block
.split("id=\"manga-item-")
.skip(1)
.map(|el| self.get_video_item(el.to_string(), pool.clone(), requester.clone()));
join_all(futures)
.await
.into_iter()
.inspect(|r| {
if let Err(e) = r {
eprint!("Hentai Haven Provider: Failed to get video item:{}\n", e);
// Prepare data to move into the background task
let msg = e.to_string();
let chain = format_error_chain(&e);
// Spawn the report into the background - NO .await here
tokio::spawn(async move {
let _ = send_discord_error_report(
msg,
Some(chain),
Some("Hentai Haven Provider"),
Some("Failed to get video item"),
file!(), // Note: these might report the utility line
line!(), // better to hardcode or pass from outside
module_path!(),
)
.await;
});
}
})
.filter_map(Result::ok)
.filter_map(Self::extract_segment_url)
.collect()
}
async fn get_video_items_from_html_search(
&self,
html: String,
requester: &mut Requester,
pool: DbPool,
) -> Vec<VideoItem> {
/// Extract the ordered list of result URLs from a search page.
fn parse_search_urls(html: &str) -> Vec<String> {
if html.is_empty() || html.contains("404 Not Found") {
return vec![];
}
let block = match html
.split("<footer")
.next()
@@ -276,106 +318,187 @@ impl HentaihavenProvider {
{
Some(b) => b,
None => {
eprint!("Hentai Haven Provider: Failed to get block from html");
let e = Error::from(ErrorKind::Parse("html".into()));
send_discord_error_report(
e.to_string(),
Some(format_error_chain(&e)),
Some("Hentai Haven Provider"),
Some(&format!("Failed to get block from html:\n```{html}\n```")),
file!(),
line!(),
module_path!(),
)
.await;
crate::providers::report_provider_error_background(
"hentaihaven",
"parse_search.block",
"Failed to get block from search html",
);
return vec![];
}
};
let futures = block
block
.split("c-tabs-item__content col-6 col-md-12")
.skip(1)
.map(|el| self.get_video_item(el.to_string(), pool.clone(), requester.clone()));
join_all(futures)
.await
.into_iter()
.inspect(|r| {
if let Err(e) = r {
eprint!("Hentai Haven Provider: Failed to get video item:{}\n", e);
// Prepare data to move into the background task
let msg = e.to_string();
let chain = format_error_chain(&e);
// Spawn the report into the background - NO .await here
tokio::spawn(async move {
let _ = send_discord_error_report(
msg,
Some(chain),
Some("Hentai Haven Provider"),
Some("Failed to get video item"),
file!(), // Note: these might report the utility line
line!(), // better to hardcode or pass from outside
module_path!(),
)
.await;
});
}
})
.filter_map(Result::ok)
.filter_map(Self::extract_segment_url)
.collect()
}
async fn get_video_item(
&self,
seg: String,
pool: DbPool,
mut requester: Requester,
) -> Result<VideoItem> {
let video_url = seg
.split("a href=\"")
.nth(1)
.and_then(|s| s.split('"').next())
.ok_or_else(|| ErrorKind::Parse("video url\n\n{seg}".into()))?
.to_string();
/// Build a response from already-resolved items stored in the DB, preserving
/// the order of `urls`. Items not yet in the DB are simply skipped.
fn items_from_db(urls: &[String], pool: &DbPool) -> Vec<VideoItem> {
let mut conn = match pool.get() {
Ok(conn) => conn,
Err(e) => {
let msg = format!("DB pool error: {}", e);
send_discord_error_report(
msg.clone(),
None,
Some("Hentai Haven Provider"),
Some("get_video_item.pool_get"),
file!(),
line!(),
module_path!(),
)
.await;
return Err(msg.into());
}
Err(_) => return vec![],
};
let db_result = db::get_video(&mut conn, video_url.clone());
drop(conn);
match db_result {
Ok(Some(video)) => {
let video_item = VideoItem::from(video);
match video_item {
Ok(item) => return Ok(item),
urls.iter()
.filter_map(|url| match db::get_video(&mut conn, url.clone()) {
Ok(Some(json)) => VideoItem::from(json).ok(),
_ => None,
})
.collect()
}
/// Resolve each episode page URL into a full `VideoItem`, persisting every
/// success to the DB. On failure we fall back to any stored copy so a
/// transient error does not drop the item from the page.
async fn resolve_urls(
&self,
urls: Vec<String>,
requester: &Requester,
pool: DbPool,
) -> Vec<VideoItem> {
stream::iter(urls.into_iter().map(|url| {
let provider = self.clone();
let mut req = requester.clone();
let pool = pool.clone();
async move {
match provider.fetch_video_item(&url, &mut req).await {
Ok(item) => {
if let Ok(mut conn) = pool.get() {
let new_len = item.formats.as_ref().map_or(0, |f| f.len());
let old_item = db::get_video(&mut conn, url.clone())
.ok()
.flatten()
.and_then(|json| VideoItem::from(json).ok());
let old_len = old_item
.as_ref()
.and_then(|o| o.formats.as_ref())
.map_or(0, |f| f.len());
if new_len >= old_len {
let _ = db::upsert_video(
&mut conn,
&url,
&serde_json::to_string(&item).unwrap_or_default(),
);
Some(item)
} else {
// A partial refresh resolved fewer episodes than we
// already have (likely a transient outage) — keep the
// richer stored copy rather than degrading it.
old_item.or(Some(item))
}
} else {
Some(item)
}
}
Err(e) => {
eprint!("Failed to convert video from DB result: {}\n", e);
eprintln!("Hentai Haven Provider: Failed to resolve {url}: {e}");
if let Ok(mut conn) = pool.get() {
if let Ok(Some(cached)) = db::get_video(&mut conn, url.clone()) {
if let Ok(item) = VideoItem::from(cached) {
return Some(item);
}
}
}
None
}
}
}
Ok(None) => {
// continue to fetch and parse the video
}
Err(e) => {
eprint!("Database error: {}\n", e);
// continue to fetch and parse the video even if there's a DB error
}
}))
.buffered(2)
.filter_map(|item| async move { item })
.collect::<Vec<_>>()
.await
}
/// Per-listing in-flight guard so we never run two background refreshes for
/// the same page concurrently.
fn refresh_in_flight() -> &'static Mutex<HashSet<String>> {
static SET: OnceLock<Mutex<HashSet<String>>> = OnceLock::new();
SET.get_or_init(|| Mutex::new(HashSet::new()))
}
fn try_begin_refresh(key: &str) -> bool {
match Self::refresh_in_flight().lock() {
Ok(mut set) => set.insert(key.to_string()),
Err(_) => false,
}
let html = requester
.get(&video_url, Some(Version::HTTP_2))
}
fn end_refresh(key: &str) {
if let Ok(mut set) = Self::refresh_in_flight().lock() {
set.remove(key);
}
}
/// Spawn a non-blocking refresh of a listing/search page. `urls` may be
/// supplied when the caller already fetched the listing; otherwise the
/// refresh re-fetches it itself.
fn spawn_refresh(
&self,
requester: Requester,
pool: DbPool,
cache: VideoCache,
key: String,
urls: Option<Vec<String>>,
search: bool,
) {
if !Self::try_begin_refresh(&key) {
return;
}
let provider = self.clone();
tokio::spawn(async move {
provider
.refresh(requester, pool, cache, key.clone(), urls, search)
.await;
Self::end_refresh(&key);
});
}
async fn refresh(
&self,
mut requester: Requester,
pool: DbPool,
cache: VideoCache,
key: String,
urls: Option<Vec<String>>,
search: bool,
) {
let urls = match urls {
Some(urls) => urls,
None => match Self::get_with_retry(&mut requester, &key, 3).await {
Ok(text) => {
if search {
Self::parse_search_urls(&text)
} else {
Self::parse_listing_urls(&text)
}
}
Err(e) => {
crate::providers::report_provider_error_background(
"hentaihaven",
"refresh.request",
&format!("url={key}; error={e}"),
);
return;
}
},
};
if urls.is_empty() {
return;
}
let items = self.resolve_urls(urls, &requester, pool).await;
if !items.is_empty() {
cache.insert(key, items);
}
}
async fn fetch_video_item(
&self,
video_url: &str,
requester: &mut Requester,
) -> Result<VideoItem> {
let html = Self::get_with_retry(requester, video_url, 3)
.await
.map_err(|e| Error::from(format!("Failed to fetch video page: {}", e)))?;
@@ -383,7 +506,7 @@ impl HentaihavenProvider {
.split("<h1>")
.nth(1)
.and_then(|s| s.split("</h1>").next())
.ok_or_else(|| ErrorKind::Parse(format!("video title\n\n{seg}").into()))?
.ok_or_else(|| ErrorKind::Parse(format!("video title: {video_url}")))?
.trim()
.to_string();
title = decode(title.as_bytes())
@@ -394,7 +517,7 @@ impl HentaihavenProvider {
.split('/')
.nth(4)
.and_then(|s| s.split('.').next())
.ok_or_else(|| ErrorKind::Parse("video id\n\n{seg}".into()))?
.ok_or_else(|| ErrorKind::Parse(format!("video id: {video_url}")))?
.to_string();
let thumb = html
.split("og:image\" content=\"")
@@ -446,7 +569,6 @@ impl HentaihavenProvider {
.and_then(|s| s.split(" Total").nth(0))
.map(|s| s.trim().parse::<u32>().unwrap_or(0))
.unwrap_or(0);
let mut formats = vec![];
let episode_block = html
.split("manga-chapters-holder")
.nth(1)
@@ -454,84 +576,250 @@ impl HentaihavenProvider {
.split("vraven_read")
.nth(0)
.unwrap_or_default();
for episode in episode_block.split("wp-manga-chapter").skip(1) {
let ep_thumbnail = episode
.split(" src=\"")
.nth(1)
.and_then(|s| s.split('"').next())
.unwrap_or_default();
let episode_title = episode
.split("<div>")
.nth(1)
.and_then(|s| s.split('<').next())
.unwrap_or_default()
.trim()
.to_string();
let episode_id = ep_thumbnail.split('/').nth(5).unwrap_or_default();
let episode_url = format!(
"https://master-lengs.org/api/v3/hh/{}/master.m3u8",
episode_id
);
let format = VideoFormat::new(episode_url, "1080p".to_string(), "m3u8".to_string())
.format_id(episode_title.clone())
.http_header("Connection".to_string(), "keep-alive".to_string())
.http_header(
"User-Agent".to_string(),
"Mozilla/5.0 (X11; Linux x86_64; rv:140.0) Gecko/20100101 Firefox/140.0"
.to_string(),
)
.http_header(
"Accept".to_string(),
"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8".to_string(),
)
.http_header("Accept-Language".to_string(), "en-US,en;q=0.5".to_string())
.http_header(
"Accept-Encoding".to_string(),
"gzip, deflate, br".to_string(),
)
.http_header("Sec-Fetch-Mode".to_string(), "navigate".to_string())
.http_header("Origin".to_string(), self.url.clone())
.format_note(episode_title.clone());
formats.push(format);
}
let episodes: Vec<(String, String)> = episode_block
.split("wp-manga-chapter")
.skip(1)
.filter_map(|episode| {
let href = episode
.split("a href=\"")
.nth(1)
.and_then(|s| s.split('"').next())?
.to_string();
let title = episode
.split("<div>")
.nth(1)
.and_then(|s| s.split('<').next())
.unwrap_or_default()
.trim()
.to_string();
Some((title, href))
})
.collect();
let formats: Vec<VideoFormat> = stream::iter(episodes.into_iter().map(|(title, href)| {
let requester = requester.clone();
let provider = self.clone();
async move { provider.resolve_episode_format(title, href, requester).await }
}))
.buffered(1)
.filter_map(|result| async move {
match result {
Ok(format) => Some(format),
Err(e) => {
eprintln!("Hentai Haven Provider: Failed to resolve episode format: {e}");
None
}
}
})
.collect::<Vec<_>>()
.await;
if formats.is_empty() {
let e = Error::from(format!("No formats found for video URL: {}", video_url));
return Err(e);
return Err(Error::from(format!("No formats found for video URL: {}", video_url)));
}
if formats.len() > 1 {
title = format!("{} ({} Episodes)", title, formats.len());
}
let video_item =
VideoItem::new(id, title, video_url.clone(), "hentaihaven".into(), thumb, 0)
Ok(
VideoItem::new(id, title, video_url.to_string(), "hentaihaven".into(), thumb, 0)
.formats(formats)
.tags(tags)
.views(views)
.aspect_ratio(0.715);
.aspect_ratio(0.715),
)
}
match pool.get() {
Ok(mut conn) => {
let _ = db::insert_video(
&mut conn,
&video_url,
&serde_json::to_string(&video_item).unwrap_or_default(),
);
}
Err(e) => {
send_discord_error_report(
format!("DB pool error: {}", e),
None,
Some("Hentai Haven Provider"),
Some("get_video_item.insert_video.pool_get"),
file!(),
line!(),
module_path!(),
async fn resolve_episode_format(
&self,
title: String,
href: String,
mut requester: Requester,
) -> Result<VideoFormat> {
let episode_html = Self::get_with_retry(&mut requester, &href, 4)
.await
.map_err(|e| Error::from(format!("Failed to fetch episode page {href}: {e}")))?;
let player_url = episode_html
.split("iframe src=\"")
.nth(1)
.and_then(|s| s.split('"').next())
.map(|s| s.replace("&amp;", "&"))
.ok_or_else(|| ErrorKind::Parse(format!("player iframe url: {href}")))?;
let player_html = Self::get_with_retry(&mut requester, &player_url, 4)
.await
.map_err(|e| Error::from(format!("Failed to fetch player page {player_url}: {e}")))?;
let token = player_html
.split("x-secure-token\" content=\"")
.nth(1)
.and_then(|s| s.split('"').next())
.ok_or_else(|| ErrorKind::Parse(format!("secure token: {href}")))?;
let config = Self::decode_secure_token(token)?;
let api_base = if config.uri.starts_with("//") {
format!("https:{}", config.uri)
} else {
config.uri.clone()
};
let api_url = format!("{api_base}api.php");
let body = Self::build_player_api_body(&config.en, &config.iv);
let text = Self::post_ajax_with_retry(
&api_url,
&body,
vec![
("Content-Type", "application/x-www-form-urlencoded"),
("Accept", "*/*"),
("Accept-Language", "en-US,en;q=0.5"),
("Referer", player_url.as_str()),
("Origin", self.url.as_str()),
("Sec-Fetch-Dest", "empty"),
("Sec-Fetch-Mode", "cors"),
("Sec-Fetch-Site", "same-origin"),
("X-Requested-With", "XMLHttpRequest"),
],
4,
)
.await
.map_err(|e| Error::from(format!("Failed to call player api {api_url}: {e}")))?;
let api_response: PlayerApiResponse = serde_json::from_str(&text)
.map_err(|e| Error::from(format!("Failed to parse player api body {api_url}: {e}")))?;
if !api_response.status {
return Err(Error::from(format!("player api returned status=false for {href}")));
}
let source = api_response
.data
.and_then(|d| d.sources.into_iter().next())
.ok_or_else(|| ErrorKind::Parse(format!("no sources in player api response: {href}")))?;
let quality = if source.label.trim().is_empty() {
"auto".to_string()
} else {
source.label.to_ascii_lowercase()
};
Ok(
VideoFormat::new(source.src, quality, "m3u8".to_string())
.format_id(title.clone())
.http_header(
"User-Agent".to_string(),
"Mozilla/5.0 (X11; Linux x86_64; rv:140.0) Gecko/20100101 Firefox/140.0"
.to_string(),
)
.await;
.http_header("Referer".to_string(), self.url.clone())
.http_header("Origin".to_string(), self.url.clone())
.format_note(title),
)
}
async fn get_with_retry(
requester: &mut Requester,
url: &str,
attempts: u32,
) -> std::result::Result<String, String> {
let mut last_err = String::new();
for attempt in 0..attempts {
if attempt > 0 {
let backoff_ms = 500u64 * (1u64 << (attempt - 1).min(3));
tokio::time::sleep(std::time::Duration::from_millis(backoff_ms)).await;
}
match requester.get(url, Some(Version::HTTP_2)).await {
Ok(text) => return Ok(text),
Err(e) => last_err = e.to_string(),
}
}
Err(last_err)
}
Ok(video_item)
fn ajax_client() -> &'static wreq::Client {
static CLIENT: OnceLock<wreq::Client> = OnceLock::new();
CLIENT.get_or_init(|| {
wreq::Client::builder()
.cert_verification(false)
.emulation(Emulation::Chrome137)
.build()
.expect("Failed to build hentaihaven AJAX client")
})
}
async fn post_ajax_with_retry(
url: &str,
body: &str,
headers: Vec<(&str, &str)>,
attempts: u32,
) -> std::result::Result<String, String> {
let mut last_err = String::new();
for attempt in 0..attempts {
if attempt > 0 {
let backoff_ms = 500u64 * (1u64 << (attempt - 1).min(3));
tokio::time::sleep(std::time::Duration::from_millis(backoff_ms)).await;
}
let mut request = Self::ajax_client()
.post(url)
.version(Version::HTTP_2)
.body(body.to_string());
for (key, value) in headers.iter() {
request = request.header(*key, *value);
}
match request.send().await {
Ok(response) => {
let status = response.status();
match response.text().await {
Ok(text) if status.is_success() => return Ok(text),
Ok(_) => last_err = format!("status {status}"),
Err(e) => last_err = e.to_string(),
}
}
Err(e) => last_err = e.to_string(),
}
}
Err(last_err)
}
fn build_player_api_body(en: &str, iv: &str) -> String {
let mut serializer = url::form_urlencoded::Serializer::new(String::new());
serializer
.append_pair("action", "zarat_get_data_player_ajax")
.append_pair("a", en)
.append_pair("b", iv);
serializer.finish()
}
fn decode_secure_token(token: &str) -> Result<PlayerSecureConfig> {
let stripped = token.strip_prefix("sha512-").unwrap_or(token);
let mut data = Self::rot13(stripped);
data = Self::decode_base64_layer(&data)?;
data = Self::rot13(&data);
data = Self::decode_base64_layer(&data)?;
data = Self::rot13(&data);
data = Self::decode_base64_layer(&data)?;
serde_json::from_str(&data)
.map_err(|e| Error::from(format!("Failed to parse secure token json: {e}")))
}
fn decode_base64_layer(value: &str) -> Result<String> {
let mut normalized = value.trim().to_string();
while normalized.len() % 4 != 0 {
normalized.push('=');
}
let bytes = STANDARD
.decode(normalized)
.map_err(|e| Error::from(format!("base64 decode failed: {e}")))?;
String::from_utf8(bytes).map_err(|e| Error::from(format!("utf8 decode failed: {e}")))
}
fn rot13(input: &str) -> String {
input
.chars()
.map(|c| match c {
'A'..='Z' => (((c as u8 - b'A' + 13) % 26) + b'A') as char,
'a'..='z' => (((c as u8 - b'a' + 13) % 26) + b'a') as char,
other => other,
})
.collect()
}
}

View File

@@ -177,15 +177,18 @@ impl HomoxxxProvider {
let search_string = query.to_lowercase().trim().replace(" ", "-");
let mut video_url = format!("{}/search/{}/{}/", self.url, search_string, page);
if search_string.starts_with("@") {
let url_part = search_string
.split("@")
.collect::<Vec<&str>>()
.get(1)
.copied()
.unwrap_or_default()
.replace(":", "/");
video_url = format!("{}/{}/", self.url, url_part);
let trimmed = query.trim().trim_start_matches('@');
if let Some((kind, value)) = trimmed.split_once(':') {
let kind = kind.trim().to_ascii_lowercase();
let value = value.trim().replace(' ', "-");
if !value.is_empty()
&& matches!(
kind.as_str(),
"models" | "pornstars" | "stars" | "channels" | "categories" | "tags"
)
{
video_url = format!("{}/{}/{}/", self.url, kind, value);
}
}
// Check our Video Cache. If the result is younger than 1 hour, we return it.
let old_items = match cache.get(&video_url) {

View File

@@ -9,7 +9,6 @@ use crate::util::time::parse_time_to_seconds;
use crate::videos::{ServerOptions, VideoFormat, VideoItem};
use async_trait::async_trait;
use error_chain::error_chain;
use futures::stream::{FuturesUnordered, StreamExt};
use htmlentity::entity::{ICodedDataTrait, decode};
use std::sync::{Arc, RwLock};
use std::{thread, vec};
@@ -194,9 +193,7 @@ impl HqpornerProvider {
.await
.map_err(|e| Error::from(format!("Request failed: {}", e)))?;
let video_items = self
.get_video_items_from_html(text, &mut requester, &options)
.await;
let video_items = self.get_video_items_from_html(text, &options).await;
if !video_items.is_empty() {
cache.insert(video_url, video_items.clone());
}
@@ -242,9 +239,7 @@ impl HqpornerProvider {
.await
.map_err(|e| Error::from(format!("Request failed: {}", e)))?;
let video_items = self
.get_video_items_from_html(text, &mut requester, &options)
.await;
let video_items = self.get_video_items_from_html(text, &options).await;
if !video_items.is_empty() {
cache.insert(video_url, video_items.clone());
}
@@ -254,7 +249,6 @@ impl HqpornerProvider {
async fn get_video_items_from_html(
&self,
html: String,
requester: &mut Requester,
options: &ServerOptions,
) -> Vec<VideoItem> {
if html.is_empty() || html.contains("404 Not Found") {
@@ -273,63 +267,14 @@ impl HqpornerProvider {
})
.unwrap_or_default();
// Limit concurrent detail-page requests to reduce transient connect errors.
let mut in_flight = FuturesUnordered::new();
let mut iter = raw_videos.into_iter();
let mut items = Vec::new();
const MAX_IN_FLIGHT: usize = 6;
loop {
while in_flight.len() < MAX_IN_FLIGHT {
let Some(seg) = iter.next() else {
break;
};
in_flight.push(self.get_video_item(seg, requester.clone(), options));
}
let Some(result) = in_flight.next().await else {
break;
};
match result {
Ok(item)
if item
.formats
.as_ref()
.map(|formats| !formats.is_empty())
.unwrap_or(false) =>
{
items.push(item);
}
Ok(_) => {}
Err(e) => {
let msg = e.to_string();
let chain = format_error_chain(&e);
tokio::spawn(async move {
let _ = send_discord_error_report(
msg,
Some(chain),
Some("Hqporner Provider"),
None,
file!(),
line!(),
module_path!(),
)
.await;
});
}
}
}
items
raw_videos
.into_iter()
.filter_map(|seg| self.get_video_item(seg, options).ok())
.collect()
}
async fn get_video_item(
&self,
seg: String,
mut requester: Requester,
options: &ServerOptions,
) -> Result<VideoItem> {
let video_url = format!(
fn get_video_item(&self, seg: String, options: &ServerOptions) -> Result<VideoItem> {
let detail_url = format!(
"{}{}",
self.url,
seg.split("<a href=\"")
@@ -348,7 +293,7 @@ impl HqpornerProvider {
.unwrap_or_else(|_| title_raw.to_string())
.titlecase();
let id = video_url
let id = detail_url
.split('/')
.nth(4)
.and_then(|s| s.split('.').next())
@@ -378,173 +323,49 @@ impl HqpornerProvider {
.unwrap_or_default();
let duration = parse_time_to_seconds(&raw_duration).unwrap_or(0) as u32;
let (tags, formats) = self.extract_media(&video_url, &mut requester).await?;
Ok(
VideoItem::new(id, title, video_url, "hqporner".into(), thumb, duration)
.formats(formats)
.tags(tags),
)
}
async fn extract_media(
&self,
url: &str,
requester: &mut Requester,
) -> Result<(Vec<String>, Vec<VideoFormat>)> {
let mut formats = vec![];
let mut tags = vec![];
let headers = vec![("Referer".to_string(), "https://hqporner.com/".into())];
let mut text = match self
.fetch_text_with_retries(requester, url, &headers, 3)
.await
{
Ok(text) => text,
Err(primary_err) => {
if url.contains("://hqporner.com/") {
let fallback_url = url.replace("://hqporner.com/", "://www.hqporner.com/");
self.fetch_text_with_retries(requester, &fallback_url, &headers, 3)
.await
.map_err(|fallback_err| {
Error::from(format!(
"Request failed: primary={primary_err}; fallback={fallback_err}"
))
})?
} else {
return Err(Error::from(format!("Request failed: {}", primary_err)));
}
}
};
if text.is_empty() && url.contains("://hqporner.com/") {
let fallback_url = url.replace("://hqporner.com/", "://www.hqporner.com/");
text = self
.fetch_text_with_retries(requester, &fallback_url, &headers, 3)
.await
.unwrap_or_default();
}
if text.contains("Why do I see it?") {
return Ok((tags, formats));
}
// Extract Stars & Tags
if let Some(stars_block) = text
.split("icon fa-star-o")
.nth(1)
.and_then(|s| s.split("</li>").next())
{
for star_el in stars_block.split("href=\"/actress/").skip(1) {
let id = star_el.split('"').next().unwrap_or("").to_string();
let name = star_el
.split("\">")
.nth(1)
.and_then(|s| s.split('<').next())
.unwrap_or("")
.to_string();
if !name.is_empty() {
tags.push(name.clone());
Self::push_unique(&self.stars, FilterOption { id, title: name });
}
}
}
// Player / Video Extraction
let player_url = format!(
"https:{}",
text.split("url: '/blocks/altplayer.php?i=")
.nth(1)
.and_then(|s| s.split('\'').next())
.ok_or("No player link")?
let stripped_detail_url = crate::providers::strip_url_scheme(&detail_url);
let proxied_url = crate::providers::build_proxy_url(
options,
"hqporner",
&stripped_detail_url,
);
let response_text = match self
.fetch_text_with_retries(requester, &player_url, &headers, 2)
.await
{
Ok(text) => text,
Err(e) => {
let err = format!("altplayer request failed: {e}");
send_discord_error_report(
err.clone(),
None,
Some("Hqporner Provider"),
Some(&player_url),
file!(),
line!(),
module_path!(),
)
.await;
return Ok((tags, formats));
}
let quality_target = |quality: &str| -> String {
format!("{stripped_detail_url}/__quality__/{quality}")
};
let text2 = response_text;
let formats = vec![
VideoFormat::new(
crate::providers::build_proxy_url(options, "hqporner", &quality_target("1080")),
"1080p".to_string(),
"mp4".to_string(),
)
.format_id("1080p".to_string())
.format_note("1080p Full HD".to_string()),
VideoFormat::new(
crate::providers::build_proxy_url(options, "hqporner", &quality_target("720")),
"720p".to_string(),
"mp4".to_string(),
)
.format_id("720p".to_string())
.format_note("720p HD".to_string()),
VideoFormat::new(
crate::providers::build_proxy_url(options, "hqporner", &quality_target("360")),
"360p".to_string(),
"mp4".to_string(),
)
.format_id("360p".to_string())
.format_note("360p".to_string()),
];
// Check for error response
if text2.starts_with("ERR:") {
return Ok((tags, formats));
}
let video_element = text2
.split("<video ")
.nth(2)
.and_then(|s| s.split("</video>").next())
.ok_or(format!("No video element\n{player_url}\n{text2}"))?;
for source in video_element.split("<source ").skip(1) {
let title = source
.split("title=\\\"")
.nth(1)
.and_then(|s| s.split("\\\"").next())
.unwrap_or("")
.to_string();
let quality = title.split(' ').next().unwrap_or("HD").to_string();
let media_url = format!(
"https:{}",
source
.split("src=\\\"")
.nth(1)
.and_then(|s| s.split("\\\"").next())
.unwrap_or("")
);
formats.push(
VideoFormat::new(media_url, quality, "mp4".into())
.format_id(title.clone())
.format_note(title),
);
}
Ok((tags, formats))
}
async fn fetch_text_with_retries(
&self,
requester: &mut Requester,
url: &str,
headers: &[(String, String)],
max_attempts: u8,
) -> std::result::Result<String, String> {
let mut last_err = String::new();
for attempt in 1..=max_attempts {
match requester.get_raw_with_headers(url, headers.to_vec()).await {
Ok(resp) => match resp.text().await {
Ok(text) => return Ok(text),
Err(e) => {
last_err =
format!("text read failed (attempt {attempt}/{max_attempts}): {e}");
}
},
Err(e) => {
last_err = format!("request failed (attempt {attempt}/{max_attempts}): {e}");
}
}
if attempt < max_attempts {
tokio::time::sleep(std::time::Duration::from_millis(250 * attempt as u64)).await;
}
}
Err(last_err)
Ok(VideoItem::new(
id,
title,
proxied_url,
"hqporner".into(),
thumb,
duration,
)
.formats(formats))
}
}

696
src/providers/jable.rs Normal file
View File

@@ -0,0 +1,696 @@
use crate::DbPool;
use crate::api::ClientVersion;
use crate::providers::{Provider, build_proxy_url, report_provider_error, report_provider_error_background, requester_or_default};
use crate::status::*;
use crate::util::cache::VideoCache;
use crate::util::time::parse_time_to_seconds;
use crate::videos::{ServerOptions, VideoFormat, VideoItem};
use async_trait::async_trait;
use chrono::NaiveDate;
use error_chain::error_chain;
use futures::stream::{self, StreamExt};
use regex::Regex;
use scraper::{Html, Selector};
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "jav",
tags: &["jav", "asian", "uncensored"],
};
const BASE_URL: &str = "https://jable.tv";
const CHANNEL_ID: &str = "jable";
const DEFAULT_PER_PAGE: usize = 24;
const ENRICH_CONCURRENCY: usize = 6;
error_chain! {
foreign_links {
Io(std::io::Error);
Json(serde_json::Error);
Url(url::ParseError);
}
errors {
Parse(msg: String) {
description("parse error")
display("parse error: {}", msg)
}
}
}
#[derive(Debug, Clone)]
pub struct JableProvider;
#[derive(Debug, Clone)]
enum Target {
Latest,
Hot,
Search { query: String },
Tag { slug: String },
Category { slug: String },
Model { id: String },
}
#[derive(Debug, Clone)]
struct CardStub {
id: String,
title: String,
url: String,
thumb: String,
preview: Option<String>,
duration: u32,
views: Option<u32>,
}
impl JableProvider {
pub fn new() -> Self {
Self
}
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
Channel {
id: CHANNEL_ID.to_string(),
name: "Jable".to_string(),
description: "Jable.TV JAV archive with latest, trending, tag, and model browsing plus direct HLS playback.".to_string(),
premium: false,
favicon: "https://www.google.com/s2/favicons?sz=64&domain=jable.tv".to_string(),
status: "active".to_string(),
categories: vec![],
options: vec![
ChannelOption {
id: "sort".to_string(),
title: "Sort".to_string(),
description: "Browse Jable by newest or hottest videos.".to_string(),
systemImage: "arrow.up.arrow.down".to_string(),
colorName: "blue".to_string(),
options: vec![
FilterOption {
id: "new".to_string(),
title: "Latest".to_string(),
},
FilterOption {
id: "hot".to_string(),
title: "Hot".to_string(),
},
],
multiSelect: false,
},
ChannelOption {
id: "language".to_string(),
title: "Language".to_string(),
description: "Interface language for titles, categories, and navigation.".to_string(),
systemImage: "globe".to_string(),
colorName: "green".to_string(),
options: vec![
FilterOption {
id: "en".to_string(),
title: "English".to_string(),
},
FilterOption {
id: "zh".to_string(),
title: "Chinese".to_string(),
},
FilterOption {
id: "jp".to_string(),
title: "Japanese".to_string(),
},
],
multiSelect: false,
},
],
nsfw: true,
cacheDuration: Some(1800),
}
}
fn resolve_lang(options: &ServerOptions) -> &'static str {
match options.language.as_deref().unwrap_or("en") {
"zh" => "zh",
"jp" => "jp",
_ => "en",
}
}
fn lang_param(url: &str, lang: &str) -> String {
if url.contains('?') {
format!("{url}&lang={lang}")
} else {
format!("{url}?lang={lang}")
}
}
fn selector(value: &str) -> Result<Selector> {
Selector::parse(value)
.map_err(|error| Error::from(format!("selector `{value}` failed: {error}")))
}
fn regex(value: &str) -> Result<Regex> {
Regex::new(value).map_err(|error| Error::from(format!("regex `{value}` failed: {error}")))
}
fn build_listing_url(target: &Target, page: u16, lang: &str) -> String {
let page = page.max(1);
let base = match target {
Target::Latest => format!("{BASE_URL}/latest-updates/{page}/"),
Target::Hot => format!("{BASE_URL}/hot/{page}/"),
Target::Search { query } => {
let encoded: String = url::form_urlencoded::byte_serialize(query.as_bytes()).collect();
if page <= 1 {
format!("{BASE_URL}/search/?q={encoded}")
} else {
format!("{BASE_URL}/search/{page}/?q={encoded}")
}
}
Target::Tag { slug } => format!("{BASE_URL}/tags/{slug}/{page}/"),
Target::Category { slug } => format!("{BASE_URL}/categories/{slug}/{page}/"),
Target::Model { id } => format!("{BASE_URL}/models/{id}/{page}/"),
};
Self::lang_param(&base, lang)
}
fn pick_target(query: Option<&str>, options: &ServerOptions) -> Target {
if let Some(query) = query {
let q = query.trim();
if !q.is_empty() {
if let Some(slug) = q.strip_prefix("tag:") {
return Target::Tag { slug: slug.to_string() };
}
if let Some(slug) = q.strip_prefix("cat:") {
return Target::Category { slug: slug.to_string() };
}
if let Some(id) = q.strip_prefix("model:") {
return Target::Model { id: id.to_string() };
}
return Target::Search { query: q.to_string() };
}
}
if let Some(sort) = options.sort.as_deref() {
if sort == "hot" {
return Target::Hot;
}
}
Target::Latest
}
fn parse_views_text(text: &str) -> Option<u32> {
let digits: String = text.chars().filter(|c| c.is_ascii_digit()).collect();
digits.parse::<u32>().ok()
}
fn parse_uploaded_at(text: &str) -> Option<u64> {
let trimmed = text.trim();
NaiveDate::parse_from_str(trimmed, "%Y-%m-%d")
.ok()
.and_then(|d| d.and_hms_opt(0, 0, 0))
.map(|dt| dt.and_utc().timestamp() as u64)
}
fn parse_listing_page(html: &str) -> Result<Vec<CardStub>> {
let document = Html::parse_document(html);
let card_sel = Self::selector(".video-img-box")?;
let link_sel = Self::selector("a[href]")?;
let img_sel = Self::selector("img[data-src]")?;
let label_sel = Self::selector(".label")?;
let title_sel = Self::selector(".title a[href]")?;
let sub_sel = Self::selector(".sub-title")?;
let duration_regex = Self::regex(r"(\d+:\d{2}:\d{2}|\d+:\d{2})")?;
let views_regex = Self::regex(r"icon-eye[^>]*>[^<]*</[^>]+>\s*(\S+)")?;
let mut stubs = Vec::new();
let mut seen_ids = std::collections::HashSet::new();
for card in document.select(&card_sel) {
let url = card
.select(&link_sel)
.next()
.and_then(|a| a.value().attr("href"))
.map(str::to_string)
.unwrap_or_default();
if url.is_empty() {
continue;
}
let id = url
.trim_end_matches('/')
.rsplit('/')
.next()
.unwrap_or_default()
.to_string();
if id.is_empty() || !seen_ids.insert(id.clone()) {
continue;
}
let img_el = card.select(&img_sel).next();
let thumb = img_el
.and_then(|img| img.value().attr("data-src"))
.map(str::to_string)
.unwrap_or_default();
let preview = img_el
.and_then(|img| img.value().attr("data-preview"))
.map(str::to_string);
let raw_label = card
.select(&label_sel)
.next()
.map(|el| el.text().collect::<Vec<_>>().join(""))
.unwrap_or_default();
let duration = duration_regex
.find(&raw_label)
.and_then(|m| parse_time_to_seconds(m.as_str()))
.and_then(|s| u32::try_from(s).ok())
.unwrap_or_default();
let title = card
.select(&title_sel)
.next()
.map(|a| a.text().collect::<Vec<_>>().join("").trim().to_string())
.filter(|t| !t.is_empty())
.unwrap_or_else(|| id.clone());
let views = card.select(&sub_sel).next().and_then(|sub| {
let sub_html = sub.inner_html();
views_regex
.captures(&sub_html)
.and_then(|caps| caps.get(1))
.and_then(|m| Self::parse_views_text(m.as_str()))
});
stubs.push(CardStub {
id,
title,
url,
thumb,
preview,
duration,
views,
});
}
Ok(stubs)
}
async fn fetch_listing(
url: &str,
options: &ServerOptions,
) -> Result<Vec<CardStub>> {
let mut requester = requester_or_default(options, CHANNEL_ID, "fetch_listing");
let html = requester
.get(url, None)
.await
.map_err(|e| Error::from(format!("listing fetch failed for {url}: {e}")))?;
Self::parse_listing_page(&html)
}
fn detail_url(stub_url: &str, lang: &str) -> String {
Self::lang_param(stub_url, lang)
}
fn extract_hls_url(html: &str) -> Option<String> {
let idx = html.find("var hlsUrl = '")?;
let rest = &html[idx + "var hlsUrl = '".len()..];
let end = rest.find('\'')?;
let url = rest[..end].trim().to_string();
if url.starts_with("http://") || url.starts_with("https://") {
Some(url)
} else {
None
}
}
fn extract_detail_tags(html: &str) -> Vec<String> {
let document = Html::parse_document(html);
let Ok(tag_sel) = Selector::parse(".tags a") else {
return vec![];
};
let mut tags = Vec::new();
let mut seen = std::collections::HashSet::new();
for a in document.select(&tag_sel) {
let text = a.text().collect::<Vec<_>>().join("").trim().to_string();
if !text.is_empty() && seen.insert(text.clone()) {
tags.push(text);
}
}
tags
}
fn extract_model_info(html: &str) -> Vec<(String, String)> {
let document = Html::parse_document(html);
let Ok(model_sel) = Selector::parse(".models a.model") else {
return vec![];
};
let Ok(span_sel) = Selector::parse("span[title]") else {
return vec![];
};
let mut models = Vec::new();
for a in document.select(&model_sel) {
let href = a
.value()
.attr("href")
.map(str::to_string)
.unwrap_or_default();
let name = a
.select(&span_sel)
.next()
.and_then(|span| span.value().attr("title"))
.or_else(|| a.value().attr("title"))
.map(str::to_string)
.unwrap_or_else(|| {
a.text().collect::<Vec<_>>().join("").trim().to_string()
});
if !href.is_empty() && !name.is_empty() {
models.push((name, href));
}
}
models
}
fn extract_uploaded_at(html: &str) -> Option<u64> {
let idx = html.find("上市於 ")?;
let rest = &html[idx + "上市於 ".len()..];
let end = rest.find('<').unwrap_or(rest.len()).min(20);
Self::parse_uploaded_at(rest[..end].trim())
}
fn extract_views_detail(html: &str) -> Option<u32> {
let document = Html::parse_document(html);
let Ok(sel) = Selector::parse(".info-header .mr-3") else {
return None;
};
for span in document.select(&sel) {
let raw = span.text().collect::<Vec<_>>().join("").replace('\u{a0}', "").replace(' ', "");
if let Ok(v) = raw.parse::<u32>() {
return Some(v);
}
}
None
}
async fn enrich_card(
stub: CardStub,
options: &ServerOptions,
proxy_video_url: String,
lang: &str,
) -> Result<VideoItem> {
let mut requester = requester_or_default(options, CHANNEL_ID, "enrich_card");
let detail_url = Self::detail_url(&stub.url, lang);
let html = requester
.get(&detail_url, None)
.await
.map_err(|e| Error::from(format!("detail fetch failed for {}: {e}", stub.url)))?;
let hls_url = Self::extract_hls_url(&html)
.ok_or_else(|| Error::from(format!("no hlsUrl found in {}", stub.url)))?;
let mut format = VideoFormat::m3u8(hls_url.clone(), "auto".to_string(), "m3u8".to_string());
format.add_http_header("Referer".to_string(), format!("{BASE_URL}/"));
format.add_http_header(
"User-Agent".to_string(),
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36".to_string(),
);
let tags = Self::extract_detail_tags(&html);
let models = Self::extract_model_info(&html);
let uploaded_at = Self::extract_uploaded_at(&html);
let views = Self::extract_views_detail(&html).or(stub.views);
let (uploader, uploader_url) = models
.into_iter()
.next()
.map(|(name, url)| (Some(name), Some(url)))
.unwrap_or((None, None));
let model_id = uploader_url.as_deref().and_then(|url| {
url.trim_end_matches('/').rsplit('/').next().map(|s| format!("{CHANNEL_ID}:{s}"))
});
let mut item = VideoItem::new(
stub.id,
stub.title,
stub.url,
CHANNEL_ID.to_string(),
stub.thumb,
stub.duration,
);
item.formats = Some(vec![format]);
item.preview = stub.preview;
item.views = views;
item.uploadedAt = uploaded_at;
item.aspectRatio = Some(16.0 / 9.0);
if !tags.is_empty() {
item.tags = Some(tags);
}
item.uploader = uploader;
item.uploaderUrl = uploader_url;
item.uploaderId = model_id;
Ok(item)
}
async fn fetch_page(
target: Target,
page: u16,
per_page: usize,
options: &ServerOptions,
) -> Result<Vec<VideoItem>> {
let lang = Self::resolve_lang(options);
let url = Self::build_listing_url(&target, page, lang);
let stubs = Self::fetch_listing(&url, options).await?;
let limited: Vec<_> = stubs.into_iter().take(per_page).collect();
let options = options.clone();
let items = stream::iter(limited.into_iter().map(|stub| {
let options = options.clone();
let lang = Self::resolve_lang(&options);
let proxy_url = build_proxy_url(&options, CHANNEL_ID, &stub.id);
async move {
match Self::enrich_card(stub, &options, proxy_url, lang).await {
Ok(item) => Some(item),
Err(error) => {
report_provider_error_background(
CHANNEL_ID,
"fetch_page.enrich_card",
&error.to_string(),
);
None
}
}
}
}))
.buffer_unordered(ENRICH_CONCURRENCY)
.filter_map(async move |v| v)
.collect::<Vec<_>>()
.await;
Ok(items)
}
}
#[async_trait]
impl Provider for JableProvider {
async fn get_videos(
&self,
_cache: VideoCache,
_pool: DbPool,
sort: String,
query: Option<String>,
page: String,
per_page: String,
options: ServerOptions,
) -> Vec<VideoItem> {
let page = page.parse::<u16>().unwrap_or(1).max(1);
let per_page = per_page
.parse::<usize>()
.unwrap_or(DEFAULT_PER_PAGE)
.clamp(1, 48);
let normalized_query = query
.as_deref()
.map(str::trim)
.filter(|q| !q.is_empty())
.map(ToOwned::to_owned);
let options_with_sort = {
let mut o = options.clone();
o.sort = Some(sort.clone());
o
};
let target = Self::pick_target(normalized_query.as_deref(), &options_with_sort);
match Self::fetch_page(target, page, per_page, &options).await {
Ok(items) => items,
Err(error) => {
report_provider_error(CHANNEL_ID, "get_videos", &error.to_string()).await;
vec![]
}
}
}
fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
Some(self.build_channel(clientversion))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn builds_listing_urls() {
assert_eq!(
JableProvider::build_listing_url(&Target::Latest, 1, "en"),
"https://jable.tv/latest-updates/1/?lang=en"
);
assert_eq!(
JableProvider::build_listing_url(&Target::Latest, 3, "jp"),
"https://jable.tv/latest-updates/3/?lang=jp"
);
assert_eq!(
JableProvider::build_listing_url(&Target::Hot, 2, "en"),
"https://jable.tv/hot/2/?lang=en"
);
assert_eq!(
JableProvider::build_listing_url(
&Target::Search {
query: "nurse".to_string()
},
1,
"en"
),
"https://jable.tv/search/?q=nurse&lang=en"
);
assert_eq!(
JableProvider::build_listing_url(
&Target::Search {
query: "nurse".to_string()
},
2,
"zh"
),
"https://jable.tv/search/2/?q=nurse&lang=zh"
);
assert_eq!(
JableProvider::build_listing_url(
&Target::Tag {
slug: "creampie".to_string()
},
1,
"en"
),
"https://jable.tv/tags/creampie/1/?lang=en"
);
}
#[test]
fn lang_param_appended_correctly() {
assert_eq!(
JableProvider::lang_param("https://jable.tv/latest-updates/1/", "en"),
"https://jable.tv/latest-updates/1/?lang=en"
);
assert_eq!(
JableProvider::lang_param("https://jable.tv/search/?q=nurse", "zh"),
"https://jable.tv/search/?q=nurse&lang=zh"
);
}
#[test]
fn resolve_lang_defaults_to_en() {
let opts = ServerOptions {
language: None,
sort: None, featured: None, category: None, sites: None,
filter: None, public_url_base: None, requester: None,
network: None, stars: None, categories: None, duration: None,
sexuality: None,
};
assert_eq!(JableProvider::resolve_lang(&opts), "en");
let opts_jp = ServerOptions { language: Some("jp".to_string()), ..opts };
assert_eq!(JableProvider::resolve_lang(&opts_jp), "jp");
let opts_zh = ServerOptions { language: Some("zh".to_string()), ..opts_jp };
assert_eq!(JableProvider::resolve_lang(&opts_zh), "zh");
}
#[test]
fn extracts_hls_url_from_script() {
let html = r#"<script>
var hlsUrl = 'https://asf-doc.mushroomtrack.com/hls/TOKEN/1234/59000/59222/59222.m3u8';
var tagUrl = 'https://example.com/ad';
</script>"#;
assert_eq!(
JableProvider::extract_hls_url(html).as_deref(),
Some("https://asf-doc.mushroomtrack.com/hls/TOKEN/1234/59000/59222/59222.m3u8")
);
}
#[test]
fn extracts_uploaded_at() {
let html = r#"<span class="inactive-color">上市於 2026-05-14</span>"#;
let ts = JableProvider::extract_uploaded_at(html);
assert!(ts.is_some());
assert_eq!(ts.unwrap(), 1778716800);
}
#[test]
fn parses_duration_from_label() {
let html = r#"<div class="absolute-bottom-right"><span class="label">2:32:18</span></div>"#;
let stubs = JableProvider::parse_listing_page(html).unwrap_or_default();
assert!(stubs.is_empty());
}
#[test]
fn picks_target_from_sort() {
let opts = ServerOptions {
sort: Some("hot".to_string()),
featured: None,
category: None,
sites: None,
filter: None,
language: None,
public_url_base: None,
requester: None,
network: None,
stars: None,
categories: None,
duration: None,
sexuality: None,
};
match JableProvider::pick_target(None, &opts) {
Target::Hot => {}
other => panic!("expected Hot, got {:?}", other),
}
}
#[test]
fn picks_tag_target_from_query_prefix() {
let opts = ServerOptions {
sort: None,
featured: None,
category: None,
sites: None,
filter: None,
language: None,
public_url_base: None,
requester: None,
network: None,
stars: None,
categories: None,
duration: None,
sexuality: None,
};
match JableProvider::pick_target(Some("tag:creampie"), &opts) {
Target::Tag { slug } => assert_eq!(slug, "creampie"),
other => panic!("expected Tag, got {:?}", other),
}
}
}

View File

@@ -1,12 +1,12 @@
use crate::DbPool;
use crate::api::ClientVersion;
use crate::providers::Provider;
use crate::providers::{Provider, build_proxy_url, strip_url_scheme};
use crate::status::*;
use crate::util::cache::VideoCache;
use crate::util::discord::{format_error_chain, send_discord_error_report};
use crate::util::requester::Requester;
use crate::util::time::parse_time_to_seconds;
use crate::videos::{ServerOptions, VideoFormat, VideoItem};
use crate::videos::{ServerOptions, VideoItem};
use async_trait::async_trait;
use error_chain::error_chain;
@@ -81,20 +81,16 @@ impl JavtifulProvider {
colorName: "blue".to_string(),
options: vec![
FilterOption {
id: "newest".into(),
title: "Newest".into(),
id: "relevance".into(),
title: "Relevance".into(),
},
FilterOption {
id: "top rated".into(),
title: "Top Rated".into(),
id: "latest".into(),
title: "Latest".into(),
},
FilterOption {
id: "most viewed".into(),
title: "Most Viewed".into(),
},
FilterOption {
id: "top favorites".into(),
title: "Top Favorites".into(),
id: "popular".into(),
title: "Popular".into(),
},
],
multiSelect: false,
@@ -120,11 +116,12 @@ impl JavtifulProvider {
options: ServerOptions,
) -> Result<Vec<VideoItem>> {
let sort_string = match sort {
"top rated" => "/sort=top_rated",
"most viewed" => "/sort=most_viewed",
"latest" => "sort=latest&",
"popular" => "sort=popular&",
_ => "",
};
let video_url = format!("{}/videos{}?page={}", self.url, sort_string, page);
let video_url = format!("{}/videos?{}page={}", self.url, sort_string, page);
println!("Fetching URL: {}", video_url);
let old_items = match cache.get(&video_url) {
Some((time, items)) => {
if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 {
@@ -154,7 +151,7 @@ impl JavtifulProvider {
};
if page > 1
&& !text.contains(&format!(
"<li class=\"page-item active\"><span class=\"page-link\">{}</span>",
"<a class=\"front-pagination-link is-active\" href=\"/videos\" aria-current=\"page\">{}</a>",
page
))
{
@@ -180,12 +177,12 @@ impl JavtifulProvider {
options: ServerOptions,
) -> Result<Vec<VideoItem>> {
let sort_string = match options.sort.as_deref().unwrap_or("") {
"top rated" => "/sort=top_rated",
"most viewed" => "/sort=most_viewed",
"latest" => "sort=latest&",
"popular" => "sort=popular&",
_ => "",
};
let video_url = format!(
"{}/search/videos{}?search_query={}&page={}",
"{}/search?{}q={}&page={}",
self.url,
sort_string,
query.replace(" ", "+"),
@@ -222,7 +219,7 @@ impl JavtifulProvider {
};
if page > 1
&& !text.contains(&format!(
"<li class=\"page-item active\"><span class=\"page-link\">{}</span>",
"<a class=\"front-pagination-link is-active\" href=\"/videos\" aria-current=\"page\">{}</a>",
page
))
{
@@ -250,10 +247,11 @@ impl JavtifulProvider {
return vec![];
}
let block = match html.split("pagination ").next().and_then(|s| {
s.split("row row-cols-1 row-cols-sm-2 row-cols-lg-3 row-cols-xl-4")
.nth(1)
}) {
let block = match html
.split("front-pagination")
.next()
.and_then(|s| s.split("front-video-grid").nth(1))
{
Some(b) => b,
None => {
eprint!("Javtiful Provider: Failed to get block from html");
@@ -273,9 +271,9 @@ impl JavtifulProvider {
};
let futures = block
.split("card ")
.split("\"front-video-card\"")
.skip(1)
.filter(|seg| !seg.contains("SPONSOR"))
.filter(|seg| !seg.contains("front-ad-card"))
.map(|el| self.get_video_item(el.to_string(), requester.clone(), options));
join_all(futures)
@@ -313,42 +311,55 @@ impl JavtifulProvider {
mut requester: Requester,
options: &ServerOptions,
) -> Result<VideoItem> {
let video_url = seg
.split(" href=\"")
.nth(1)
.and_then(|s| s.split('"').next())
.ok_or_else(|| ErrorKind::Parse("video url\n\n{seg}".into()))?
.to_string();
let video_url = format!(
"{}{}",
self.url,
seg.split(" href=\"")
.nth(1)
.and_then(|s| s.split('"').next())
.ok_or_else(|| ErrorKind::Parse(format!("video url\n\n{seg}")))?
.to_string()
);
let mut title = seg
.split(" alt=\"")
.nth(1)
.and_then(|s| s.split('"').next())
.ok_or_else(|| ErrorKind::Parse(format!("video title\n\n{seg}").into()))?
.trim()
.to_string();
let mut title = match seg.contains("front-video-title") {
true => seg
.split("front-video-title")
.nth(1)
.and_then(|s| s.split('>').nth(1))
.and_then(|s| s.split('<').next())
.ok_or_else(|| ErrorKind::Parse(format!("video title\n\n{seg}")))?
.trim()
.to_string(),
false => seg
.split("alt=\"")
.nth(1)
.and_then(|s| s.split('\"').next())
.ok_or_else(|| ErrorKind::Parse(format!("video title\n\n{seg}")))?
.trim()
.to_string(),
};
title = decode(title.as_bytes())
.to_string()
.unwrap_or(title)
.titlecase();
let id = video_url
.split('/')
.nth(5)
.and_then(|s| s.split('.').next())
.ok_or_else(|| ErrorKind::Parse("video id\n\n{seg}".into()))?
.to_string();
let thumb_block = seg
.split("<img ")
.split("/video/")
.filter(|s| !s.is_empty())
.nth(1)
.ok_or_else(|| ErrorKind::Parse("thumb block\n\n{seg}".into()))?;
.ok_or_else(|| ErrorKind::Parse(format!("video id\n\n{seg}")))?
.to_string();
let thumb_block = seg.split("<img ").nth(1);
let thumb = thumb_block
.split("data-src=\"")
.nth(1)
.and_then(|s| s.split('"').next())
.unwrap_or("")
.to_string();
let thumb = match thumb_block {
Some(block) => format!("{}{}", self.url,block
.split("data-front-lazy-src=\"")
.nth(1)
.and_then(|s| s.split('"').next())
.unwrap_or("")
.to_string()),
None => "".to_string(),
};
let mut preview = seg
.split("data-trailer=\"")
.nth(1)
@@ -356,26 +367,23 @@ impl JavtifulProvider {
.unwrap_or("")
.to_string();
let raw_duration = seg
.split("label-duration\">")
.split("class=\"front-duration-tag\">")
.nth(1)
.and_then(|s| s.split('<').next())
.unwrap_or("")
.to_string();
let duration = parse_time_to_seconds(&raw_duration).unwrap_or(0) as u32;
let (tags, mut formats, views) = self
.extract_media(&video_url, &mut requester, options)
.await?;
let (tags, views) = self.extract_media(&video_url, &mut requester).await?;
if preview.len() == 0 {
preview = format!("https://trailers.jav.si/preview/{id}.mp4");
}
if formats.is_empty() && !preview.is_empty() {
let mut format = VideoFormat::new(preview.clone(), "preview".to_string(), "video/mp4".to_string());
format.add_http_header("Referer".to_string(), video_url.clone());
formats.push(format);
}
let video_item = VideoItem::new(id, title, video_url, "javtiful".into(), thumb, duration)
.formats(formats)
let proxy_url = build_proxy_url(
options,
"javtiful",
&strip_url_scheme(video_url.clone().as_str()),
);
let video_item = VideoItem::new(id, title, proxy_url, "javtiful".into(), thumb, duration)
.tags(tags)
.preview(preview)
.views(views);
@@ -386,8 +394,7 @@ impl JavtifulProvider {
&self,
url: &str,
requester: &mut Requester,
options: &ServerOptions,
) -> Result<(Vec<String>, Vec<VideoFormat>, u32)> {
) -> Result<(Vec<String>, u32)> {
let text = requester
.get(url, Some(Version::HTTP_2))
.await
@@ -432,56 +439,7 @@ impl JavtifulProvider {
.and_then(|s| s.replace(".", "").parse::<u32>().ok())
.unwrap_or(0);
let quality = "1080p".to_string();
let mut formats = Vec::new();
let video_id = url
.split("/video/")
.nth(1)
.and_then(|value| value.split('/').next())
.unwrap_or("")
.trim();
let token = text
.split("data-csrf-token=\"")
.nth(1)
.and_then(|value| value.split('"').next())
.unwrap_or("")
.trim();
if !video_id.is_empty() && !token.is_empty() {
let form = wreq::multipart::Form::new()
.text("video_id", video_id.to_string())
.text("pid_c", "".to_string())
.text("token", token.to_string());
if let Ok(response) = requester
.post_multipart(
"https://javtiful.com/ajax/get_cdn",
form,
vec![("Referer".to_string(), url.to_string())],
Some(Version::HTTP_11),
)
.await
{
let payload = response.text().await.unwrap_or_default();
if let Ok(json) = serde_json::from_str::<serde_json::Value>(&payload) {
if let Some(cdn_url) = json.get("playlists").and_then(|value| value.as_str()) {
if !cdn_url.trim().is_empty() {
let mut format = VideoFormat::new(
cdn_url.to_string(),
quality.clone(),
"m3u8".into(),
);
format.add_http_header("Referer".to_string(), url.to_string());
formats.push(format);
}
}
}
}
}
let _ = options;
Ok((tags, formats, views))
Ok((tags, views))
}
}

View File

@@ -12,8 +12,10 @@ use crate::videos::VideoItem;
use async_trait::async_trait;
use diesel::r2d2;
use error_chain::error_chain;
use futures::future::join_all;
use futures::stream::{self, StreamExt};
use htmlentity::entity::{ICodedDataTrait, decode};
use std::collections::HashMap;
use std::sync::{Arc, RwLock};
use std::vec;
use wreq::Version;
@@ -41,15 +43,58 @@ error_chain! {
#[derive(Debug, Clone)]
pub struct MissavProvider {
url: String,
tag_map: Arc<RwLock<HashMap<String, String>>>,
}
impl MissavProvider {
pub fn new() -> Self {
MissavProvider {
url: "https://missav.ws".to_string(),
tag_map: Arc::new(RwLock::new(HashMap::new())),
}
}
fn normalize_key(value: &str) -> String {
value
.trim()
.to_ascii_lowercase()
.replace(['_', '-'], " ")
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
}
fn humanize_slug(value: &str) -> String {
value
.trim_matches('/')
.replace('-', " ")
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
}
fn insert_tag_mapping(&self, key: &str, path_or_url: &str) {
let normalized = Self::normalize_key(key);
if normalized.is_empty() || path_or_url.trim().is_empty() {
return;
}
if let Ok(mut map) = self.tag_map.write() {
map.insert(normalized, path_or_url.trim().to_string());
}
}
fn resolve_query_url(&self, query: &str, page: u8, sort: &str) -> Option<String> {
let normalized = Self::normalize_key(query);
let mapped = self.tag_map.read().ok()?.get(&normalized)?.clone();
let separator = if mapped.contains('?') { "&" } else { "?" };
let mut url = format!("{mapped}{separator}page={page}");
if !sort.is_empty() {
url.push_str("&sort=");
url.push_str(sort);
}
Some(url)
}
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
Channel {
id: "missav".to_string(),
@@ -248,10 +293,13 @@ impl MissavProvider {
if !sort.is_empty() {
sort = format!("&sort={}", sort);
}
let url_str = format!(
let mut url_str = format!(
"{}/{}/search/{}?page={}{}",
self.url, language, search_string, page, sort
);
if let Some(mapped_url) = self.resolve_query_url(query, page, &sort.replace("&sort=", "")) {
url_str = mapped_url;
}
if let Some((time, items)) = cache.get(&url_str) {
if time.elapsed().unwrap_or_default().as_secs() < 3600 {
@@ -309,14 +357,16 @@ impl MissavProvider {
}
}
let futures = urls
.into_iter()
.map(|url| self.get_video_item(url, pool.clone(), requester.clone()));
join_all(futures)
.await
.into_iter()
.filter_map(Result::ok)
.collect()
stream::iter(urls.into_iter().map(|url| {
let pool = pool.clone();
let requester = requester.clone();
let provider = self.clone();
async move { provider.get_video_item(url, pool, requester).await }
}))
.buffer_unordered(4)
.filter_map(|result| async move { result.ok() })
.collect::<Vec<_>>()
.await
}
async fn get_video_item(
@@ -325,35 +375,57 @@ impl MissavProvider {
pool: DbPool,
mut requester: Requester,
) -> Result<VideoItem> {
// 1. Database Check
{
let mut conn = pool
.get()
.map_err(|e| Error::from(format!("Pool error: {}", e)))?;
if let Ok(Some(entry)) = db::get_video(&mut conn, url_str.clone()) {
if let Ok(video_item) = serde_json::from_str::<VideoItem>(entry.as_str()) {
return Ok(video_item);
// 1. Attempt live fetch
let fetched = self.fetch_video_item(&url_str, &mut requester).await;
match fetched {
Ok(video_item) => {
// Store/override in DB
if let Ok(mut conn) = pool.get() {
let _ = db::insert_video(
&mut conn,
&url_str,
&serde_json::to_string(&video_item).unwrap_or_default(),
);
}
Ok(video_item)
}
Err(e) => {
// 2. Fall back to DB
if let Ok(mut conn) = pool.get() {
if let Ok(Some(entry)) = db::get_video(&mut conn, url_str.clone()) {
if let Ok(mut video_item) = serde_json::from_str::<VideoItem>(entry.as_str()) {
video_item.url = url_str.clone();
return Ok(video_item);
}
}
}
Err(e)
}
}
}
// 2. Fetch Page
async fn fetch_video_item(
&self,
url_str: &str,
requester: &mut Requester,
) -> Result<VideoItem> {
let vid = requester
.get(&url_str, Some(Version::HTTP_2))
.get(url_str, Some(Version::HTTP_2))
.await
.unwrap_or_else(|e| {
.map_err(|e| {
eprintln!("Error fetching Missav URL {}: {}", url_str, e);
let _ = send_discord_error_report(
e.to_string(),
None,
Some(&url_str),
Some(url_str),
None,
file!(),
line!(),
module_path!(),
);
"".to_string()
});
Error::from(e.to_string())
})?;
// Helper closure to extract content between two strings
let extract = |html: &str, start_tag: &str, end_tag: &str| -> Option<String> {
@@ -384,28 +456,63 @@ impl MissavProvider {
let id = url_str.split('/').last().ok_or("No ID found")?.to_string();
// 3. Extract Tags (Generic approach to avoid repetitive code)
// Extract Tags
let mut tags = vec![];
for (label, prefix) in [
("Actress:", "@actress"),
("Actor:", "@actor"),
("Maker:", "@maker"),
("Genre:", "@genre"),
for (label, route_kind) in [
("Actress:", "actress"),
("Actor:", "actor"),
("Maker:", "maker"),
("Genre:", "genre"),
] {
let marker = format!("<span>{}</span>", label);
if let Some(section) = extract(&vid, &marker, "</div>") {
for part in section.split("class=\"text-nord13 font-medium\">").skip(1) {
if let Some(val) = part.split('<').next() {
let clean = val.trim();
if !clean.is_empty() {
tags.push(format!("{}:{}", prefix, clean));
for anchor in section.split("<a ").skip(1) {
let href = anchor
.split("href=\"")
.nth(1)
.and_then(|value| value.split('"').next())
.unwrap_or_default()
.to_string();
let title = anchor
.split("class=\"text-nord13 font-medium\">")
.nth(1)
.and_then(|value| value.split('<').next())
.map(str::trim)
.unwrap_or_default()
.to_string();
if !title.is_empty() {
tags.push(title.clone());
if !href.is_empty() {
let full_url = if href.starts_with("http://") || href.starts_with("https://") {
href.clone()
} else {
format!("{}{}", self.url, href)
};
self.insert_tag_mapping(&title, &full_url);
let slug = href
.trim_matches('/')
.rsplit('/')
.next()
.unwrap_or_default()
.to_string();
if !slug.is_empty() {
self.insert_tag_mapping(&slug, &full_url);
self.insert_tag_mapping(
&format!("{route_kind}:{}", slug),
&full_url,
);
self.insert_tag_mapping(
&format!("{route_kind}:{}", Self::humanize_slug(&slug)),
&full_url,
);
}
}
}
}
}
}
// 4. Extract Video URL (The m3u8 logic)
// Extract Video URL (m3u8)
let video_url = (|| {
let parts_str = vid.split("m3u8").nth(1)?.split("https").next()?;
let mut parts: Vec<&str> = parts_str.split('|').collect();
@@ -425,7 +532,7 @@ impl MissavProvider {
let mut format = VideoFormat::new(video_url.clone(), "auto".to_string(), "m3u8".to_string());
format.add_http_header("Referer".to_string(), "https://missav.ws/".to_string());
let video_item = VideoItem::new(id, title, video_url, "missav".to_string(), thumb, duration)
let video_item = VideoItem::new(id, title, url_str.to_string(), "missav".to_string(), thumb, duration)
.formats(vec![format])
.tags(tags)
.preview(format!(
@@ -433,15 +540,6 @@ impl MissavProvider {
url_str.split('/').last().unwrap_or_default()
));
// 5. Cache to DB
if let Ok(mut conn) = pool.get() {
let _ = db::insert_video(
&mut conn,
&url_str,
&serde_json::to_string(&video_item).unwrap_or_default(),
);
}
Ok(video_item)
}
}

View File

@@ -11,7 +11,9 @@ use std::time::{Duration, Instant};
use crate::{
DbPool,
api::ClientVersion,
status::{Channel, ChannelGroup, ChannelView, FilterOption, Status, StatusResponse},
status::{
Channel, ChannelGroup, ChannelOption, ChannelView, FilterOption, Status, StatusResponse,
},
uploaders::UploaderProfile,
util::{cache::VideoCache, discord::send_discord_error_report, requester::Requester},
videos::{FlexibleNumber, ServerOptions, VideoItem, VideosRequest},
@@ -35,13 +37,10 @@ pub static ALL_PROVIDERS: Lazy<HashMap<&'static str, DynProvider>> = Lazy::new(|
m
});
const CHANNEL_STATUS_ERROR: &str = "error";
const VALIDATION_RESULTS_REQUIRED: usize = 5;
const VALIDATION_MIN_SUCCESS: usize = 1;
const VALIDATION_COOLDOWN: Duration = Duration::from_secs(3600);
const VALIDATION_MEDIA_TIMEOUT: Duration = Duration::from_secs(100);
const VALIDATION_ERROR_RETEST_INTERVAL: Duration = VALIDATION_COOLDOWN;
const VALIDATION_FAILURES_FOR_ERROR: u8 = 5;
#[derive(Clone)]
struct ProviderValidationContext {
@@ -62,7 +61,6 @@ static PROVIDER_VALIDATION_INFLIGHT: Lazy<DashSet<String>> = Lazy::new(DashSet::
static PROVIDER_VALIDATION_LAST_RUN: Lazy<DashMap<String, Instant>> = Lazy::new(DashMap::new);
static PROVIDER_VALIDATION_FAILURE_STATE: Lazy<DashMap<String, ValidationFailureState>> =
Lazy::new(DashMap::new);
static PROVIDER_ERROR_REVALIDATION_STARTED: OnceLock<()> = OnceLock::new();
fn validation_client_version() -> ClientVersion {
ClientVersion::new(22, 'c' as u32, "Hot%20Tub".to_string())
@@ -392,37 +390,6 @@ fn record_validation_failure(provider_id: &str, now: Instant) -> u8 {
1
}
fn start_periodic_error_revalidation() {
if PROVIDER_ERROR_REVALIDATION_STARTED.set(()).is_err() {
return;
}
tokio::spawn(async move {
let mut interval = tokio::time::interval(VALIDATION_ERROR_RETEST_INTERVAL);
loop {
interval.tick().await;
let errored_providers = PROVIDER_RUNTIME_STATUS
.iter()
.filter_map(|entry| {
if entry.value().as_str() == CHANNEL_STATUS_ERROR {
Some(entry.key().clone())
} else {
None
}
})
.collect::<Vec<_>>();
for provider_id in errored_providers {
schedule_provider_validation(
&provider_id,
"periodic_retest",
"provider currently marked as error",
);
}
}
});
}
pub fn configure_runtime_validation(
pool: DbPool,
cache: VideoCache,
@@ -434,9 +401,7 @@ pub fn configure_runtime_validation(
cache,
requester,
})
.map_err(|_| "provider validation context already configured")?;
start_periodic_error_revalidation();
Ok(())
.map_err(|_| "provider validation context already configured")
}
pub fn current_provider_channel_status(provider_id: &str) -> Option<String> {
@@ -484,19 +449,13 @@ pub fn schedule_provider_validation(provider_id: &str, context: &str, msg: &str)
match validation_result {
Ok(()) => {
reset_validation_failure_state(&provider_id);
PROVIDER_RUNTIME_STATUS.remove(&provider_id);
}
Err(_validation_error) => {
let failures = record_validation_failure(&provider_id, Instant::now());
if failures >= VALIDATION_FAILURES_FOR_ERROR {
PROVIDER_RUNTIME_STATUS
.insert(provider_id.clone(), CHANNEL_STATUS_ERROR.to_string());
}
let _failure_count = record_validation_failure(&provider_id, Instant::now());
crate::flow_debug!(
"provider validation failed provider={} failures={} threshold={} error={}",
"provider validation failed provider={} failures={} error={}",
&provider_id,
failures,
VALIDATION_FAILURES_FOR_ERROR,
_failure_count,
crate::util::flow_debug::preview(&_validation_error, 160)
);
}
@@ -683,6 +642,7 @@ pub fn requester_or_default(
}
}
#[allow(dead_code)]
pub fn strip_url_scheme(url: &str) -> String {
url.strip_prefix("https://")
.or_else(|| url.strip_prefix("http://"))
@@ -691,6 +651,7 @@ pub fn strip_url_scheme(url: &str) -> String {
.to_string()
}
#[allow(dead_code)]
pub fn build_proxy_url(options: &ServerOptions, proxy: &str, target: &str) -> String {
let target = target.trim_start_matches('/');
let base = options
@@ -699,6 +660,10 @@ pub fn build_proxy_url(options: &ServerOptions, proxy: &str, target: &str) -> St
.unwrap_or("")
.trim_end_matches('/');
if target.starts_with("http://") || target.starts_with("https://") {
return format!("/proxy/{target}");
}
if base.is_empty() {
format!("/proxy/{proxy}/{target}")
} else {
@@ -720,6 +685,7 @@ fn channel_group_title(group_id: &str) -> &'static str {
"onlyfans" => "OnlyFans",
"chinese" => "Chinese",
"jav" => "JAV",
"thai" => "Thai",
"fetish-kink" => "Fetish & Kink",
"hentai-animation" => "Hentai & Animation",
"ai" => "AI",
@@ -732,7 +698,7 @@ fn channel_group_title(group_id: &str) -> &'static str {
fn channel_group_system_image(group_id: &str) -> Option<&'static str> {
match group_id {
"jav" | "chinese" => Some("globe"),
"jav" | "chinese" | "thai" => Some("globe"),
_ => None,
}
}
@@ -746,16 +712,36 @@ fn channel_group_order(group_id: &str) -> usize {
"onlyfans" => 4,
"chinese" => 5,
"jav" => 6,
"fetish-kink" => 7,
"hentai-animation" => 8,
"ai" => 9,
"gay-male" => 10,
"live-cams" => 11,
"pmv-compilation" => 12,
"thai" => 7,
"fetish-kink" => 8,
"hentai-animation" => 9,
"ai" => 10,
"gay-male" => 11,
"live-cams" => 12,
"pmv-compilation" => 13,
_ => 99,
}
}
fn should_hide_status_option(option: &ChannelOption) -> bool {
let id = option.id.trim().to_ascii_lowercase();
let title = option.title.trim().to_ascii_lowercase();
matches!(id.as_str(), "stars" | "networks" | "categories")
|| title.contains("models")
|| title.contains("pornstars")
|| title.contains("genres")
|| title.contains("networks")
|| (id == "filter" && title.contains("tags"))
}
fn sanitize_status_options(options: Vec<ChannelOption>) -> Vec<ChannelOption> {
options
.into_iter()
.filter(|option| !should_hide_status_option(option))
.collect()
}
pub fn decorate_channel(channel: Channel) -> ChannelView {
let metadata = channel_metadata_for(&channel.id);
let runtime_status = current_provider_channel_status(&channel.id);
@@ -770,8 +756,8 @@ pub fn decorate_channel(channel: Channel) -> ChannelView {
premium: channel.premium,
favicon: channel.favicon,
status: runtime_status.unwrap_or(channel.status),
categories: channel.categories,
options: channel.options,
categories: vec![],
options: sanitize_status_options(channel.options),
nsfw: channel.nsfw,
groupKey: metadata.map(|value| value.group_id.to_string()),
sortOrder: None,
@@ -884,6 +870,7 @@ pub fn build_status_response(status: Status) -> StatusResponse {
nsfw: status.nsfw,
categories: status.categories,
options: status.options,
cdnReferrers: status.cdnReferrers,
filtersFooter: status.filtersFooter,
}
}
@@ -987,6 +974,79 @@ mod tests {
}
}
#[test]
fn decorate_channel_strips_heavy_status_filters() {
let mut channel = base_channel("status-clean");
channel.categories = vec!["Drama".to_string(), "Action".to_string()];
channel.options = vec![
ChannelOption {
id: "sort".to_string(),
title: "Sort".to_string(),
description: String::new(),
systemImage: "arrow.up.arrow.down".to_string(),
colorName: "blue".to_string(),
options: vec![FilterOption {
id: "new".to_string(),
title: "Newest".to_string(),
}],
multiSelect: false,
},
ChannelOption {
id: "categories".to_string(),
title: "Genres".to_string(),
description: String::new(),
systemImage: "square.grid.2x2".to_string(),
colorName: "orange".to_string(),
options: vec![FilterOption {
id: "drama".to_string(),
title: "Drama".to_string(),
}],
multiSelect: true,
},
ChannelOption {
id: "filter".to_string(),
title: "Tags".to_string(),
description: String::new(),
systemImage: "tag".to_string(),
colorName: "green".to_string(),
options: vec![FilterOption {
id: "tag".to_string(),
title: "Tag".to_string(),
}],
multiSelect: true,
},
ChannelOption {
id: "stars".to_string(),
title: "Models".to_string(),
description: String::new(),
systemImage: "person.2".to_string(),
colorName: "pink".to_string(),
options: vec![FilterOption {
id: "model".to_string(),
title: "Model".to_string(),
}],
multiSelect: true,
},
ChannelOption {
id: "networks".to_string(),
title: "Networks".to_string(),
description: String::new(),
systemImage: "network".to_string(),
colorName: "purple".to_string(),
options: vec![FilterOption {
id: "network".to_string(),
title: "Network".to_string(),
}],
multiSelect: true,
},
];
let decorated = decorate_channel(channel);
assert!(decorated.categories.is_empty());
assert_eq!(decorated.options.len(), 1);
assert_eq!(decorated.options[0].id, "sort");
}
fn test_db_pool() -> DbPool {
let unique = SystemTime::now()
.duration_since(UNIX_EPOCH)
@@ -1258,22 +1318,6 @@ mod tests {
PROVIDER_VALIDATION_FAILURE_STATE.remove(provider_id);
}
#[test]
fn validation_failure_threshold_matches_channel_error_policy() {
let provider_id = "hsex";
PROVIDER_VALIDATION_FAILURE_STATE.remove(provider_id);
let now = Instant::now();
let mut counted = 0;
for step in 0..VALIDATION_FAILURES_FOR_ERROR {
counted =
record_validation_failure(provider_id, now + VALIDATION_COOLDOWN * step as u32);
}
assert_eq!(counted, VALIDATION_FAILURES_FOR_ERROR);
PROVIDER_VALIDATION_FAILURE_STATE.remove(provider_id);
}
#[test]
fn builds_group_index() {
PROVIDER_RUNTIME_STATUS.remove("all");
@@ -1357,6 +1401,12 @@ mod tests {
.expect("chinese group present");
assert_eq!(chinese_group["systemImage"], "globe");
let cdn_referrers = json["cdnReferrers"].as_array().expect("cdnReferrers array");
assert_eq!(cdn_referrers.len(), 1);
assert_eq!(cdn_referrers[0]["hostContains"], "phncdn");
assert_eq!(cdn_referrers[0]["referer"], "https://www.pornhub.com/");
assert_eq!(cdn_referrers[0]["origin"], "https://www.pornhub.com");
let pimpbunny_channel = channels
.iter()
.find(|channel| channel["id"] == "pimpbunny")
@@ -1367,14 +1417,6 @@ mod tests {
);
}
#[test]
fn runtime_error_status_overrides_channel_status() {
PROVIDER_RUNTIME_STATUS.insert("hsex".to_string(), CHANNEL_STATUS_ERROR.to_string());
let channel = decorate_channel(base_channel("hsex"));
assert_eq!(channel.status, CHANNEL_STATUS_ERROR);
PROVIDER_RUNTIME_STATUS.remove("hsex");
}
#[ntex::test]
#[ignore = "live network sweep across all providers"]
async fn api_videos_returns_working_media_urls_for_all_channels() {

View File

@@ -10,7 +10,9 @@ use crate::videos::{ServerOptions, VideoItem};
use async_trait::async_trait;
use error_chain::error_chain;
use htmlentity::entity::{ICodedDataTrait, decode};
use std::collections::HashMap;
use std::env;
use std::sync::{Arc, RwLock};
use std::vec;
use wreq::Client;
use wreq_util::Emulation;
@@ -31,14 +33,66 @@ error_chain! {
#[derive(Debug, Clone)]
pub struct OkxxxProvider {
url: String,
tag_map: Arc<RwLock<HashMap<String, String>>>,
}
impl OkxxxProvider {
pub fn new() -> Self {
OkxxxProvider {
url: "https://ok.xxx".to_string(),
tag_map: Arc::new(RwLock::new(HashMap::new())),
}
}
fn normalize_key(value: &str) -> String {
value
.trim()
.to_ascii_lowercase()
.replace(['_', '-'], " ")
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
}
fn humanize_slug(value: &str) -> String {
value
.trim_matches('/')
.replace('-', " ")
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
}
fn insert_tag_mapping(&self, kind: &str, slug: &str, title: Option<&str>) {
let slug = slug.trim().trim_matches('/');
if slug.is_empty() {
return;
}
let path = format!("{kind}/{slug}");
if let Ok(mut map) = self.tag_map.write() {
map.insert(Self::normalize_key(slug), path.clone());
let normalized_title = Self::normalize_key(title.unwrap_or(slug));
if !normalized_title.is_empty() {
map.insert(normalized_title, path);
}
}
}
fn resolve_query_path(&self, query: &str) -> Option<String> {
let trimmed = query.trim().trim_start_matches('@');
if let Some((kind, raw_value)) = trimmed.split_once(':') {
let kind = kind.trim().to_ascii_lowercase();
let value = raw_value.trim().trim_matches('/').replace(' ', "-");
if !value.is_empty() && matches!(kind.as_str(), "sites" | "models") {
return Some(format!("{kind}/{value}"));
}
}
let normalized = Self::normalize_key(trimmed);
if normalized.is_empty() {
return None;
}
self.tag_map.read().ok()?.get(&normalized).cloned()
}
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
Channel {
id: "okxxx".to_string(),
@@ -177,16 +231,8 @@ impl OkxxxProvider {
async fn query(&self, cache: VideoCache, page: u8, query: &str) -> Result<Vec<VideoItem>> {
let search_string = query.to_lowercase().trim().replace(" ", "-");
let mut video_url = format!("{}/search/{}/{}/", self.url, search_string, page);
if search_string.starts_with("@") {
let url_part = search_string
.split("@")
.collect::<Vec<&str>>()
.get(1)
.copied()
.unwrap_or_default()
.replace(":", "/");
video_url = format!("{}/{}/", self.url, url_part);
if let Some(path) = self.resolve_query_path(query) {
video_url = format!("{}/{}/{}/", self.url, path, page);
}
// Check our Video Cache. If the result is younger than 1 hour, we return it.
let old_items = match cache.get(&video_url) {
@@ -405,7 +451,8 @@ impl OkxxxProvider {
.collect::<Vec<String>>();
for tag in raw_tags {
if !tag.is_empty() {
tags.push(format!("@sites:{}", tag));
self.insert_tag_mapping("sites", &tag, None);
tags.push(Self::humanize_slug(&tag));
}
}
}
@@ -425,7 +472,8 @@ impl OkxxxProvider {
.collect::<Vec<String>>();
for tag in raw_tags {
if !tag.is_empty() {
tags.push(format!("@models:{}", tag));
self.insert_tag_mapping("models", &tag, None);
tags.push(Self::humanize_slug(&tag));
}
}
}

View File

@@ -883,11 +883,35 @@ impl OmgxxxProvider {
) -> Result<Vec<VideoItem>> {
let mut search_type = "search";
let mut search_string = query.to_string().to_ascii_lowercase().trim().to_string();
let trimmed = query.trim().trim_start_matches('@').to_ascii_lowercase();
if let Some((kind, raw)) = trimmed.split_once(':') {
let candidate = raw.trim().replace(' ', "-");
if !candidate.is_empty() {
match kind.trim() {
"models" | "model" | "stars" => {
search_type = "models";
search_string = candidate;
}
"sites" | "site" => {
search_type = "sites";
search_string = candidate;
}
"networks" | "network" => {
search_type = "networks";
search_string = candidate;
}
_ => {}
}
}
}
match self.stars.read() {
Ok(stars) => {
if let Some(star) = stars
.iter()
.find(|s| s.title.to_ascii_lowercase() == search_string)
.find(|s| {
s.title.eq_ignore_ascii_case(&search_string)
|| s.id.eq_ignore_ascii_case(&search_string)
})
{
search_type = "models";
search_string = star.id.clone();
@@ -901,7 +925,10 @@ impl OmgxxxProvider {
Ok(sites) => {
if let Some(site) = sites
.iter()
.find(|s| s.title.to_ascii_lowercase() == search_string)
.find(|s| {
s.title.eq_ignore_ascii_case(&search_string)
|| s.id.eq_ignore_ascii_case(&search_string)
})
{
search_type = "sites";
search_string = site.id.clone();
@@ -911,6 +938,23 @@ impl OmgxxxProvider {
report_provider_error_background("omgxxx", "query.sites_read", &e.to_string());
}
}
match self.networks.read() {
Ok(networks) => {
if let Some(network) = networks
.iter()
.find(|n| {
n.title.eq_ignore_ascii_case(&search_string)
|| n.id.eq_ignore_ascii_case(&search_string)
})
{
search_type = "networks";
search_string = network.id.clone();
}
}
Err(e) => {
report_provider_error_background("omgxxx", "query.networks_read", &e.to_string());
}
}
let mut video_url = format!("{}/{}/{}/{}/", self.url, search_type, search_string, page);
video_url = video_url.replace(" ", "+");
// Check our Video Cache. If the result is younger than 1 hour, we return it.

View File

@@ -10,7 +10,9 @@ use crate::videos::{ServerOptions, VideoItem};
use async_trait::async_trait;
use error_chain::error_chain;
use htmlentity::entity::{ICodedDataTrait, decode};
use std::collections::HashMap;
use std::env;
use std::sync::{Arc, RwLock};
use std::vec;
use wreq::Client;
use wreq_util::Emulation;
@@ -31,14 +33,66 @@ error_chain! {
#[derive(Debug, Clone)]
pub struct PerfectgirlsProvider {
url: String,
tag_map: Arc<RwLock<HashMap<String, String>>>,
}
impl PerfectgirlsProvider {
pub fn new() -> Self {
PerfectgirlsProvider {
url: "https://www.perfectgirls.xxx".to_string(),
tag_map: Arc::new(RwLock::new(HashMap::new())),
}
}
fn normalize_key(value: &str) -> String {
value
.trim()
.to_ascii_lowercase()
.replace(['_', '-'], " ")
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
}
fn humanize_slug(value: &str) -> String {
value
.trim_matches('/')
.replace('-', " ")
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
}
fn insert_tag_mapping(&self, kind: &str, slug: &str, title: Option<&str>) {
let slug = slug.trim().trim_matches('/');
if slug.is_empty() {
return;
}
let path = format!("{kind}/{slug}");
if let Ok(mut map) = self.tag_map.write() {
map.insert(Self::normalize_key(slug), path.clone());
let normalized_title = Self::normalize_key(title.unwrap_or(slug));
if !normalized_title.is_empty() {
map.insert(normalized_title, path);
}
}
}
fn resolve_query_path(&self, query: &str) -> Option<String> {
let trimmed = query.trim().trim_start_matches('@');
if let Some((kind, raw_value)) = trimmed.split_once(':') {
let kind = kind.trim().to_ascii_lowercase();
let value = raw_value.trim().trim_matches('/').replace(' ', "-");
if !value.is_empty() && matches!(kind.as_str(), "channels" | "pornstars") {
return Some(format!("{kind}/{value}"));
}
}
let normalized = Self::normalize_key(trimmed);
if normalized.is_empty() {
return None;
}
self.tag_map.read().ok()?.get(&normalized).cloned()
}
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
Channel {
id: "perfectgirls".to_string(),
@@ -177,16 +231,8 @@ impl PerfectgirlsProvider {
async fn query(&self, cache: VideoCache, page: u8, query: &str) -> Result<Vec<VideoItem>> {
let search_string = query.to_lowercase().trim().replace(" ", "-");
let mut video_url = format!("{}/search/{}/{}/", self.url, search_string, page);
if search_string.starts_with("@") {
let url_part = search_string
.split("@")
.collect::<Vec<&str>>()
.get(1)
.copied()
.unwrap_or_default()
.replace(":", "/");
video_url = format!("{}/{}/", self.url, url_part);
if let Some(path) = self.resolve_query_path(query) {
video_url = format!("{}/{}/{}/", self.url, path, page);
}
// Check our Video Cache. If the result is younger than 1 hour, we return it.
let old_items = match cache.get(&video_url) {
@@ -407,7 +453,8 @@ impl PerfectgirlsProvider {
.collect::<Vec<String>>();
for tag in raw_tags {
if !tag.is_empty() {
tags.push(format!("@channels:{}", tag));
self.insert_tag_mapping("channels", &tag, None);
tags.push(Self::humanize_slug(&tag));
}
}
}
@@ -427,7 +474,8 @@ impl PerfectgirlsProvider {
.collect::<Vec<String>>();
for tag in raw_tags {
if !tag.is_empty() {
tags.push(format!("@pornstars:{}", tag));
self.insert_tag_mapping("pornstars", &tag, None);
tags.push(Self::humanize_slug(&tag));
}
}
}

View File

@@ -13,6 +13,8 @@ use futures::future::join_all;
use htmlentity::entity::{ICodedDataTrait, decode};
use serde::Deserialize;
use serde::Serialize;
use std::collections::HashMap;
use std::sync::{Arc, RwLock};
use wreq::Client;
use wreq::Version;
use wreq_util::Emulation;
@@ -40,11 +42,13 @@ struct PerverzijaDbEntry {
#[derive(Debug, Clone)]
pub struct PerverzijaProvider {
url: String,
tag_map: Arc<RwLock<HashMap<String, String>>>,
}
impl PerverzijaProvider {
pub fn new() -> Self {
PerverzijaProvider {
url: "https://tube.perverzija.com/".to_string(),
tag_map: Arc::new(RwLock::new(HashMap::new())),
}
}
@@ -143,6 +147,104 @@ impl PerverzijaProvider {
title.trim().to_string()
}
fn clip_at_first<'a>(haystack: &'a str, end_markers: &[&str]) -> &'a str {
let mut end = haystack.len();
for marker in end_markers {
if let Some(index) = haystack.find(marker) {
end = end.min(index);
}
}
&haystack[..end]
}
fn listing_item_scope(haystack: &str) -> &str {
Self::clip_at_first(haystack, &["</article>", "</li>", "<article ", "video-item post"])
}
fn detail_meta_section<'a>(text: &'a str, label: &str) -> &'a str {
let section = text
.split(label)
.nth(1)
.unwrap_or_default();
Self::clip_at_first(
section,
&["</div>", "</p>", "<strong>", "<div class=\"related", "<section", "<aside"],
)
}
fn push_unique(tags: &mut Vec<String>, value: String) {
let normalized = value.trim();
if normalized.is_empty() {
return;
}
if !tags
.iter()
.any(|existing| existing.eq_ignore_ascii_case(normalized))
{
tags.push(normalized.to_string());
}
}
fn parse_href_values(section: &str) -> Vec<String> {
section
.split("<a href=\"")
.skip(1)
.filter_map(|part| part.split('"').next())
.map(|value| value.to_string())
.collect()
}
fn normalize_key(value: &str) -> String {
value
.trim()
.to_ascii_lowercase()
.replace(['_', '-'], " ")
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
}
fn humanize_slug(value: &str) -> String {
value
.trim_matches('/')
.replace('-', " ")
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
}
fn insert_tag_mapping(&self, kind: &str, slug: &str, title: Option<&str>) {
let slug = slug.trim().trim_matches('/');
if slug.is_empty() {
return;
}
let path = format!("{kind}/{slug}");
if let Ok(mut map) = self.tag_map.write() {
map.insert(Self::normalize_key(slug), path.clone());
let normalized_title = Self::normalize_key(title.unwrap_or(slug));
if !normalized_title.is_empty() {
map.insert(normalized_title, path);
}
}
}
fn resolve_query_path(&self, query: &str) -> Option<String> {
let trimmed = query.trim().trim_start_matches('@');
if let Some((kind, raw_value)) = trimmed.split_once(':') {
let kind = kind.trim().to_ascii_lowercase();
let value = raw_value.trim().trim_matches('/').replace(' ', "-");
if !value.is_empty() && matches!(kind.as_str(), "studio" | "stars" | "tag" | "genre")
{
return Some(format!("{kind}/{value}"));
}
}
let normalized = Self::normalize_key(trimmed);
if normalized.is_empty() {
return None;
}
self.tag_map.read().ok()?.get(&normalized).cloned()
}
async fn get(
&self,
cache: VideoCache,
@@ -212,13 +314,8 @@ impl PerverzijaProvider {
url_str = format!("{}?s={}", self.url, search_string);
}
if query.starts_with("@studio:") {
let studio_name = query.replace("@studio:", "");
url_str = format!("{}studio/{}/page/{}/", self.url, studio_name, page);
query_parse = false;
} else if query.starts_with("@stars:") {
let stars_name = query.replace("@stars:", "");
url_str = format!("{}stars/{}/page/{}/", self.url, stars_name, page);
if let Some(path) = self.resolve_query_path(query) {
url_str = format!("{}/{}/page/{}/", self.url.trim_end_matches('/'), path, page);
query_parse = false;
}
url_str = url_str.replace("page/1/", "");
@@ -292,7 +389,8 @@ impl PerverzijaProvider {
return vec![];
}
for video_segment in raw_videos {
for raw_video_segment in raw_videos {
let video_segment = Self::listing_item_scope(raw_video_segment);
let title = Self::extract_title(video_segment);
let embed_html_raw = Self::extract_between(video_segment, "data-embed='", "'")
@@ -370,15 +468,17 @@ impl PerverzijaProvider {
let studios_parts = video_segment.split("a href=\"").collect::<Vec<&str>>();
for studio in studios_parts.iter().skip(1) {
if studio.starts_with("https://tube.perverzija.com/studio/") {
tags.push(
studio
.split("/\"")
.collect::<Vec<&str>>()
.get(0)
.copied()
.unwrap_or_default()
.replace("https://tube.perverzija.com/studio/", "@studio:")
.to_string(),
let slug = studio
.split("/\"")
.collect::<Vec<&str>>()
.first()
.copied()
.unwrap_or_default()
.replace("https://tube.perverzija.com/studio/", "");
self.insert_tag_mapping("studio", &slug, None);
Self::push_unique(
&mut tags,
Self::humanize_slug(&slug),
);
}
}
@@ -396,7 +496,8 @@ impl PerverzijaProvider {
.unwrap_or_default()
.to_string();
if !tag_name.is_empty() {
tags.push(format!("@stars:{}", tag_name));
self.insert_tag_mapping("stars", &tag_name, None);
Self::push_unique(&mut tags, Self::humanize_slug(&tag_name));
}
}
}
@@ -407,7 +508,7 @@ impl PerverzijaProvider {
if token.starts_with("tag-") {
let tag_name = token.split("tag-").nth(1).unwrap_or_default().to_string();
if !tag_name.is_empty() {
tags.push(tag_name.replace("-", " ").to_string());
Self::push_unique(&mut tags, tag_name.replace("-", " ").to_string());
}
}
}
@@ -579,88 +680,58 @@ impl PerverzijaProvider {
url_str = "!".to_string()
}
let mut tags: Vec<String> = Vec::new(); // Placeholder for tags, adjust as needed
let mut tags: Vec<String> = Vec::new();
let studios_parts = text
.split("<strong>Studio: </strong>")
.collect::<Vec<&str>>()
.get(1)
.copied()
.unwrap_or_default()
.split("</div>")
.collect::<Vec<&str>>()
.get(0)
.copied()
.unwrap_or_default()
.split("<a href=\"")
.collect::<Vec<&str>>();
for studio in studios_parts.iter().skip(1) {
if studio.starts_with("https://tube.perverzija.com/studio/") {
tags.push(
studio
.split("/\"")
.collect::<Vec<&str>>()
.get(0)
.copied()
.unwrap_or_default()
.replace("https://tube.perverzija.com/studio/", "@studio:")
.to_string(),
);
}
}
if text.contains("<strong>Stars: </strong>") {
let stars_parts: Vec<&str> = text
.split("<strong>Stars: </strong>")
.collect::<Vec<&str>>()
.get(1)
.copied()
.unwrap_or_default()
.split("</div>")
.collect::<Vec<&str>>()
.get(0)
.copied()
.unwrap_or_default()
.split("<a href=\"")
.collect::<Vec<&str>>();
for star in stars_parts.iter().skip(1) {
if star.starts_with("https://tube.perverzija.com/stars/") {
tags.push(
star.split("/\"")
.collect::<Vec<&str>>()
.get(0)
.copied()
.unwrap_or_default()
.replace("https://tube.perverzija.com/stars/", "@stars:")
.to_string(),
);
}
let studios_section = Self::detail_meta_section(&text, "<strong>Studio: </strong>");
for href in Self::parse_href_values(studios_section) {
if href.starts_with("https://tube.perverzija.com/studio/") {
let studio_slug = href
.trim_end_matches('/')
.replace("https://tube.perverzija.com/studio/", "");
self.insert_tag_mapping("studio", &studio_slug, None);
Self::push_unique(&mut tags, Self::humanize_slug(&studio_slug));
}
}
let tags_parts: Vec<&str> = text
.split("<strong>Tags: </strong>")
.collect::<Vec<&str>>()
.get(1)
.copied()
.unwrap_or_default()
.split("</div>")
.collect::<Vec<&str>>()
.get(0)
.copied()
.unwrap_or_default()
.split("<a href=\"")
.collect::<Vec<&str>>();
for star in tags_parts.iter().skip(1) {
if star.starts_with("https://tube.perverzija.com/stars/") {
tags.push(
star.split("/\"")
.collect::<Vec<&str>>()
.get(0)
.copied()
.unwrap_or_default()
.replace("https://tube.perverzija.com/stars/", "@stars:")
.to_string(),
);
let stars_section = Self::detail_meta_section(&text, "<strong>Stars: </strong>");
for href in Self::parse_href_values(stars_section) {
if href.starts_with("https://tube.perverzija.com/stars/") {
let star_slug = href
.trim_end_matches('/')
.replace("https://tube.perverzija.com/stars/", "");
self.insert_tag_mapping("stars", &star_slug, None);
Self::push_unique(&mut tags, Self::humanize_slug(&star_slug));
}
}
let tags_section = if text.contains("<strong>Tags: </strong>") {
Self::detail_meta_section(&text, "<strong>Tags: </strong>")
} else {
Self::detail_meta_section(&text, "<strong>Genres: </strong>")
};
for href in Self::parse_href_values(tags_section) {
if href.starts_with("https://tube.perverzija.com/stars/") {
let star_slug = href
.trim_end_matches('/')
.replace("https://tube.perverzija.com/stars/", "");
self.insert_tag_mapping("stars", &star_slug, None);
Self::push_unique(&mut tags, Self::humanize_slug(&star_slug));
continue;
}
if href.starts_with("https://tube.perverzija.com/tag/") {
let tag_slug = href
.trim_end_matches('/')
.replace("https://tube.perverzija.com/tag/", "");
self.insert_tag_mapping("tag", &tag_slug, None);
Self::push_unique(&mut tags, Self::humanize_slug(&tag_slug));
continue;
}
if href.starts_with("https://tube.perverzija.com/genre/") {
let genre_slug = href
.trim_end_matches('/')
.replace("https://tube.perverzija.com/genre/", "");
self.insert_tag_mapping("genre", &genre_slug, None);
Self::push_unique(&mut tags, Self::humanize_slug(&genre_slug));
}
}

View File

@@ -178,30 +178,9 @@ impl PimpbunnyProvider {
}
}
fn is_allowed_thumb_url(url: &str) -> bool {
let Some(url) = Url::parse(url).ok() else {
return false;
};
if url.scheme() != "https" {
return false;
}
let Some(host) = url.host_str() else {
return false;
};
matches!(host, "pimpbunny.com" | "www.pimpbunny.com")
&& url.path().starts_with("/contents/videos_screenshots/")
}
fn proxied_thumb(&self, options: &ServerOptions, thumb: &str) -> String {
if thumb.is_empty() || !Self::is_allowed_thumb_url(thumb) {
return thumb.to_string();
}
crate::providers::build_proxy_url(
options,
"pimpbunny-thumb",
&crate::providers::strip_url_scheme(thumb),
)
let _ = options;
thumb.to_string()
}
fn is_allowed_detail_url(url: &str) -> bool {
@@ -820,11 +799,14 @@ impl PimpbunnyProvider {
let formats = vec![
VideoFormat::new(proxy_url.clone(), "auto".into(), "video/mp4".into())
.format_id("auto".into())
.format_note("proxied".into()),
.format_note("proxied".into())
.http_header("Referer".to_string(), video_url.clone())
.http_header("Accept-Language".to_string(), "en-US,en;q=0.9".to_string())
,
];
Ok(
VideoItem::new(id, title, proxy_url, "pimpbunny".into(), thumb, duration)
VideoItem::new(id, title, video_url, "pimpbunny".into(), thumb, duration)
.formats(formats)
.preview(preview)
.views(views),
@@ -911,7 +893,7 @@ mod tests {
assert_eq!(
proxied,
"https://example.com/proxy/pimpbunny-thumb/pimpbunny.com/contents/videos_screenshots/517000/517329/800x450/1.jpg"
"https://pimpbunny.com/contents/videos_screenshots/517000/517329/800x450/1.jpg"
);
}
@@ -990,6 +972,15 @@ mod tests {
assert_eq!(items[0].duration, 754);
assert_eq!(items[0].views, Some(1200));
assert_eq!(items[0].formats.as_ref().map(|f| f.len()), Some(1));
assert_eq!(
items[0].formats.as_ref().and_then(|formats| formats.first()).map(
|format| format.http_headers_pairs().contains(&(
"Referer".to_string(),
"https://pimpbunny.com/videos/example-video/".to_string(),
))
),
Some(true)
);
}
#[test]

View File

@@ -175,6 +175,16 @@ impl PmvhavenProvider {
}
}
// Search API returns hlsMasterPlaylistUrl instead of videoUrl/key.
let hls_url = video
.get("hlsMasterPlaylistUrl")
.and_then(|v| v.as_str())
.unwrap_or("")
.trim();
if Self::is_direct_media_url(hls_url) {
return Some(hls_url.to_string());
}
None
}

File diff suppressed because it is too large Load Diff

View File

@@ -9,6 +9,8 @@ use crate::videos::{ServerOptions, VideoItem};
use async_trait::async_trait;
use error_chain::error_chain;
use htmlentity::entity::{ICodedDataTrait, decode};
use std::collections::HashMap;
use std::sync::{Arc, RwLock};
use std::vec;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
@@ -27,14 +29,66 @@ error_chain! {
#[derive(Debug, Clone)]
pub struct PornhatProvider {
url: String,
tag_map: Arc<RwLock<HashMap<String, String>>>,
}
impl PornhatProvider {
pub fn new() -> Self {
PornhatProvider {
url: "https://www.pornhat.com".to_string(),
tag_map: Arc::new(RwLock::new(HashMap::new())),
}
}
fn normalize_key(value: &str) -> String {
value
.trim()
.to_ascii_lowercase()
.replace(['_', '-'], " ")
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
}
fn humanize_slug(value: &str) -> String {
value
.trim_matches('/')
.replace('-', " ")
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
}
fn insert_tag_mapping(&self, kind: &str, slug: &str, title: Option<&str>) {
let slug = slug.trim().trim_matches('/');
if slug.is_empty() {
return;
}
let path = format!("{kind}/{slug}");
if let Ok(mut map) = self.tag_map.write() {
map.insert(Self::normalize_key(slug), path.clone());
let normalized_title = Self::normalize_key(title.unwrap_or(slug));
if !normalized_title.is_empty() {
map.insert(normalized_title, path);
}
}
}
fn resolve_query_path(&self, query: &str) -> Option<String> {
let trimmed = query.trim().trim_start_matches('@');
if let Some((kind, raw_value)) = trimmed.split_once(':') {
let kind = kind.trim().to_ascii_lowercase();
let value = raw_value.trim().trim_matches('/').replace(' ', "-");
if !value.is_empty() && matches!(kind.as_str(), "sites" | "models") {
return Some(format!("{kind}/{value}"));
}
}
let normalized = Self::normalize_key(trimmed);
if normalized.is_empty() {
return None;
}
self.tag_map.read().ok()?.get(&normalized).cloned()
}
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
Channel {
id: "pornhat".to_string(),
@@ -127,16 +181,8 @@ impl PornhatProvider {
) -> Result<Vec<VideoItem>> {
let search_string = query.to_lowercase().trim().replace(" ", "-");
let mut video_url = format!("{}/search/{}/{}/", self.url, search_string, page);
if search_string.starts_with("@") {
let url_part = search_string
.split("@")
.collect::<Vec<&str>>()
.get(1)
.copied()
.unwrap_or_default()
.replace(":", "/");
video_url = format!("{}/{}/", self.url, url_part);
if let Some(path) = self.resolve_query_path(query) {
video_url = format!("{}/{}/{}/", self.url, path, page);
}
// Check our Video Cache. If the result is younger than 1 hour, we return it.
let old_items = match cache.get(&video_url) {
@@ -296,7 +342,8 @@ impl PornhatProvider {
.collect::<Vec<String>>();
for tag in raw_tags {
if !tag.is_empty() {
tags.push(format!("@sites:{}", tag));
self.insert_tag_mapping("sites", &tag, None);
tags.push(Self::humanize_slug(&tag));
}
}
}
@@ -316,7 +363,8 @@ impl PornhatProvider {
.collect::<Vec<String>>();
for tag in raw_tags {
if !tag.is_empty() {
tags.push(format!("@models:{}", tag));
self.insert_tag_mapping("models", &tag, None);
tags.push(Self::humanize_slug(&tag));
}
}
}

View File

@@ -1,16 +1,21 @@
use crate::DbPool;
use crate::api::ClientVersion;
use crate::providers::Provider;
use crate::providers::{Provider, report_provider_error, requester_or_default};
use crate::status::*;
use crate::util::cache::VideoCache;
use crate::util::parse_abbreviated_number;
use crate::util::time::parse_time_to_seconds;
use crate::videos::{ServerOptions, VideoItem};
use crate::videos::{ServerOptions, VideoFormat, VideoItem};
use async_trait::async_trait;
use error_chain::error_chain;
use htmlentity::entity::{ICodedDataTrait, decode};
use std::vec;
use scraper::{ElementRef, Html, Selector};
use std::collections::HashMap;
use std::collections::HashSet;
use std::sync::{Arc, RwLock};
use std::thread;
use url::Url;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
@@ -18,10 +23,15 @@ pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
tags: &["mainstream", "studio", "general"],
};
const BASE_URL: &str = "https://www.pornhub.com";
const CHANNEL_ID: &str = "pornhub";
error_chain! {
foreign_links {
Io(std::io::Error);
HttpRequest(wreq::Error);
Json(serde_json::Error);
Url(url::ParseError);
}
errors {
Parse(msg: String) {
@@ -34,20 +44,133 @@ error_chain! {
#[derive(Debug, Clone)]
pub struct PornhubProvider {
url: String,
tag_map: Arc<RwLock<HashMap<String, TagInfo>>>,
}
#[derive(Debug, Clone, Copy)]
enum ListingScope {
Browse,
Search,
Creator,
}
#[derive(Debug, Clone, Copy)]
enum QueryTargetKind {
Channel,
Pornstar,
Model,
User,
}
#[derive(Debug, Clone)]
struct QueryTarget {
kind: QueryTargetKind,
slug: String,
}
#[derive(Debug, Clone)]
struct TagInfo {
kind: QueryTargetKind,
slug: String,
title: String,
}
impl QueryTargetKind {
fn path_segment(self) -> &'static str {
match self {
Self::Channel => "channels",
Self::Pornstar => "pornstar",
Self::Model => "model",
Self::User => "users",
}
}
}
impl PornhubProvider {
pub fn new() -> Self {
Self {
url: "https://www.pornhub.com".to_string(),
let provider = Self {
url: BASE_URL.to_string(),
tag_map: Arc::new(RwLock::new(HashMap::new())),
};
provider.spawn_initial_load();
provider
}
fn spawn_initial_load(&self) {
let url = self.url.clone();
let tag_map = Arc::clone(&self.tag_map);
thread::spawn(move || {
let rt = tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()
.unwrap();
rt.block_on(async {
let _ = Self::load_tags(&url, tag_map).await;
});
});
}
async fn load_tags(
base_url: &str,
tag_map: Arc<RwLock<HashMap<String, TagInfo>>>,
) -> Result<()> {
Self::load_kind(base_url, "channel", QueryTargetKind::Channel, &tag_map).await?;
Self::load_kind(base_url, "pornstar", QueryTargetKind::Pornstar, &tag_map).await?;
Self::load_kind(base_url, "model", QueryTargetKind::Model, &tag_map).await?;
Self::load_kind(base_url, "user", QueryTargetKind::User, &tag_map).await?;
Ok(())
}
async fn load_kind(
base_url: &str,
path_segment: &str,
kind: QueryTargetKind,
tag_map: &Arc<RwLock<HashMap<String, TagInfo>>>,
) -> Result<()> {
let url = format!("{}/{}/top", base_url, path_segment);
let mut requester = crate::util::requester::Requester::new();
let body = requester
.get(&url, None)
.await
.map_err(|e| Error::from(ErrorKind::Parse(format!("http request failed: {e}"))))?;
let document = Html::parse_document(&body);
let selector = Self::selector(&format!("a[href^='/{}/']", path_segment))?;
for element in document.select(&selector) {
if let Some(href) = element.attr("href") {
if let Some(slug) = Self::slug_from_url(href, path_segment) {
let title = element.text().collect::<String>().trim().to_string();
if !title.is_empty() && !slug.is_empty() {
let info = TagInfo {
kind,
slug: slug.clone(),
title: title.clone(),
};
let mut map = tag_map.write().unwrap();
map.insert(title.to_ascii_lowercase(), info.clone());
map.insert(slug.to_ascii_lowercase(), info);
}
}
}
}
Ok(())
}
fn slug_from_url(url: &str, path_segment: &str) -> Option<String> {
let parsed = Url::parse(url).ok()?;
let mut segments = parsed.path_segments()?;
if segments.next() == Some(path_segment) {
segments.next().map(|s| s.to_string())
} else {
None
}
}
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
Channel {
id: "pornhub".to_string(),
id: CHANNEL_ID.to_string(),
name: "Pornhub".to_string(),
description: "Pornhub Free Videos".to_string(),
description: "Pornhub listings with creator queries and direct HLS playback links."
.to_string(),
premium: false,
favicon: "https://www.google.com/s2/favicons?sz=64&domain=pornhub.com".to_string(),
status: "active".to_string(),
@@ -55,13 +178,13 @@ impl PornhubProvider {
options: vec![ChannelOption {
id: "sort".to_string(),
title: "Sort".to_string(),
description: "Sort the Videos".to_string(),
description: "Browse Pornhub charts by sort order.".to_string(),
systemImage: "list.number".to_string(),
colorName: "blue".to_string(),
options: vec![
FilterOption {
id: "mr".to_string(),
title: "Most Recent".to_string(),
title: "Featured Recently".to_string(),
},
FilterOption {
id: "mv".to_string(),
@@ -71,6 +194,10 @@ impl PornhubProvider {
id: "tr".to_string(),
title: "Top Rated".to_string(),
},
FilterOption {
id: "ht".to_string(),
title: "Hottest".to_string(),
},
FilterOption {
id: "lg".to_string(),
title: "Longest".to_string(),
@@ -87,225 +214,434 @@ impl PornhubProvider {
}
}
async fn get(
&self,
cache: VideoCache,
page: u8,
sort: &str,
options: ServerOptions,
) -> Result<Vec<VideoItem>> {
let video_url = format!("{}/video?o={}&page={}", self.url, sort, page);
fn selector(value: &str) -> Result<Selector> {
Selector::parse(value).map_err(|error| {
Error::from(ErrorKind::Parse(format!(
"selector parse failed for {value}: {error}"
)))
})
}
let old_items = match cache.get(&video_url) {
Some((time, items)) if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 => {
return Ok(items.clone());
}
Some((_, items)) => items.clone(),
None => vec![],
};
fn text_of(element: &ElementRef<'_>) -> String {
element
.text()
.collect::<Vec<_>>()
.join(" ")
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
}
let mut requester = match options.requester.clone() {
Some(r) => r,
None => return Ok(old_items),
};
fn decode_html(value: &str) -> String {
decode(value.as_bytes())
.to_string()
.unwrap_or_else(|_| value.to_string())
}
let text = match requester.get(&video_url, None).await {
Ok(t) => t,
Err(_) => return Ok(old_items),
};
fn normalize_url(&self, value: &str) -> String {
let trimmed = value.trim();
if trimmed.is_empty() {
return String::new();
}
if let Ok(url) = Url::parse(trimmed) {
return url.to_string();
}
Url::parse(BASE_URL)
.and_then(|base| base.join(trimmed))
.map(|value| value.to_string())
.unwrap_or_default()
}
let video_items = self.get_video_items_from_html(text, "<ul id=\"video");
if video_items.is_empty() {
Ok(old_items)
} else {
cache.remove(&video_url);
cache.insert(video_url.clone(), video_items.clone());
Ok(video_items)
fn normalize_sort(sort: &str) -> &'static str {
match sort.trim().to_ascii_lowercase().as_str() {
"mv" => "mv",
"tr" => "tr",
"ht" => "ht",
"lg" => "lg",
"cm" => "cm",
"mr" | "new" => "mr",
value if value.contains("date") => "mr",
_ => "mr",
}
}
async fn query(
&self,
cache: VideoCache,
page: u8,
query: &str,
sort: &str,
options: ServerOptions,
) -> Result<Vec<VideoItem>> {
let mut split_string = "<ul id=\"video";
let search_string = query.to_lowercase().trim().replace(' ', "+");
fn parse_query_target(&self, query: &str) -> Option<QueryTarget> {
let normalized = query.trim().to_ascii_lowercase();
if let Some(info) = self.tag_map.read().unwrap().get(&normalized) {
return Some(QueryTarget {
kind: info.kind,
slug: info.slug.clone(),
});
}
// Fallback to kind:slug without @
let trimmed = query.trim();
let (kind_str, raw_slug) = trimmed.split_once(':')?;
let slug = raw_slug
.trim()
.trim_matches('/')
.replace(' ', "-")
.to_ascii_lowercase();
if slug.is_empty() {
return None;
}
let mut video_url = format!(
"{}/video/search?search={}&page={}",
self.url, search_string, page
let kind = match kind_str.trim().to_ascii_lowercase().as_str() {
"channel" | "channels" => QueryTargetKind::Channel,
"pornstar" | "pornstars" => QueryTargetKind::Pornstar,
"model" | "models" => QueryTargetKind::Model,
"user" | "users" => QueryTargetKind::User,
_ => return None,
};
Some(QueryTarget { kind, slug })
}
fn build_browse_url(&self, page: u8, sort: &str) -> String {
let order = Self::normalize_sort(sort);
if order == "mr" {
format!("{}/video?page={page}", self.url)
} else {
format!("{}/video?o={order}&page={page}", self.url)
}
}
fn build_creator_url(&self, page: u8, sort: &str, target: &QueryTarget) -> String {
let mut url = format!(
"{}/{}/{}/videos?page={page}",
self.url,
target.kind.path_segment(),
target.slug
);
if query.starts_with('@') {
let mut parts = query[1..].split(':');
let a = parts.next().unwrap_or("");
let b = parts.next().unwrap_or("");
video_url = format!(
"{}/{}/{}/videos?page={}",
self.url,
a,
b.replace(' ', "-"),
page
);
let mapped_sort = match target.kind {
QueryTargetKind::Channel => match Self::normalize_sort(sort) {
"mv" => Some("vi"),
"tr" => Some("ra"),
_ => None,
},
_ => match Self::normalize_sort(sort) {
"mv" => Some("mv"),
"tr" => Some("tr"),
"lg" => Some("lg"),
_ => None,
},
};
if query.contains("@model") || query.contains("@pornstar") {
split_string = "mostRecentVideosSection";
}
if query.contains("@channels") {
split_string = "<ul class=\"videos row-5-thumbs";
}
if let Some(order) = mapped_sort {
url.push_str("&o=");
url.push_str(order);
}
video_url.push_str(match (query.contains("@channels"), sort) {
(true, "mv") => "&o=vi",
(true, "tr") => "&o=ra",
(false, "mv") => "&o=mv",
(false, "tr") => "&o=tr",
(false, "lg") => "&o=lg",
_ => "",
});
url
}
let old_items = match cache.get(&video_url) {
Some((time, items)) if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 => {
return Ok(items.clone());
fn build_listing_request(
&self,
page: u8,
sort: &str,
query: Option<&str>,
) -> (String, ListingScope) {
match query.map(str::trim).filter(|value| !value.is_empty()) {
Some(query) => {
if let Some(target) = self.parse_query_target(query) {
(
self.build_creator_url(page, sort, &target),
ListingScope::Creator,
)
} else {
let encoded = query.to_ascii_lowercase().replace(' ', "+");
(
format!("{}/video/search?search={encoded}&page={page}", self.url),
ListingScope::Search,
)
}
}
Some((_, items)) => items.clone(),
None => vec![],
};
let mut requester = match options.requester.clone() {
Some(r) => r,
None => return Ok(old_items),
};
let text = match requester.get(&video_url, None).await {
Ok(t) => t,
Err(_) => return Ok(old_items),
};
let video_items = self.get_video_items_from_html(text, split_string);
if video_items.is_empty() {
Ok(old_items)
} else {
cache.remove(&video_url);
cache.insert(video_url.clone(), video_items.clone());
Ok(video_items)
None => (self.build_browse_url(page, sort), ListingScope::Browse),
}
}
fn get_video_items_from_html(&self, html: String, split_string: &str) -> Vec<VideoItem> {
let content = match html.split(split_string).nth(1) {
Some(c) => c,
None => return vec![],
fn parse_listing_page(&self, html: &str, scope: ListingScope) -> Result<Vec<VideoItem>> {
let document = Html::parse_document(html);
let item_selector = Self::selector("li.pcVideoListItem")?;
let container_selectors = match scope {
ListingScope::Browse => vec!["#videoCategory"],
ListingScope::Search => vec!["#videoSearchResult"],
ListingScope::Creator => vec!["#showAllChanelVideos", "#mostRecentVideosSection"],
};
let content = content.split("Porn in German").next().unwrap_or("");
for selector_text in container_selectors {
let container_selector = Self::selector(selector_text)?;
if let Some(container) = document.select(&container_selector).next() {
if container.select(&item_selector).next().is_some() {
return self.parse_listing_items(container);
}
}
}
Err(ErrorKind::Parse(format!("missing listing container for scope {scope:?}")).into())
}
fn parse_listing_items(&self, container: ElementRef<'_>) -> Result<Vec<VideoItem>> {
let item_selector = Self::selector("li.pcVideoListItem")?;
let link_selector = Self::selector("a[href*=\"/view_video.php\"]")?;
let title_selector = Self::selector(".title a, .thumbnailTitle, span.title a")?;
let image_selector = Self::selector("img")?;
let duration_selector = Self::selector(".duration")?;
let views_selector = Self::selector(".views var, .views")?;
let rating_selector =
Self::selector(".value, .rating, .ratingInfo, .percent, .ratingPercent")?;
let tag_link_selector = Self::selector(
"a[href*=\"/categories/\"], a[href*=\"/video/search\"], a[href*=\"/pornstar/\"], a[href*=\"/model/\"], a[href*=\"/channels/\"], a[href*=\"/users/\"]",
)?;
let uploader_selector = Self::selector(
".videoUploaderBlock a[href], .usernameWrap a[href], .usernameWrapper a[href]",
)?;
let verified_selector = Self::selector(".verified-icon, .channel-icon")?;
let mut items = Vec::new();
let mut seen_ids = HashSet::new();
for seg in content
.split("class=\"pcVideoListItem ")
.skip(1)
.filter(|s| !s.contains("wrapVideoBlock"))
{
let url_part = seg
.split("<a href=\"")
.nth(1)
.or_else(|| seg.split("data-video-vkey=\"").nth(1))
.and_then(|s| s.split('"').next());
let video_url = match url_part {
Some(u) if !u.is_empty() && u != "javascript:void(0)" => {
format!("{}{}", self.url, u)
}
_ => continue,
for card in container.select(&item_selector) {
let Some(link) = card.select(&link_selector).next() else {
continue;
};
let mut title = seg
.split("\" title=\"")
.nth(1)
.and_then(|s| s.split('"').next())
.unwrap_or("")
.to_string();
let href = link.value().attr("href").unwrap_or_default();
let page_url = self.normalize_url(href);
if page_url.is_empty() || !page_url.contains("/view_video.php") {
continue;
}
title = decode(title.as_bytes()).to_string().unwrap_or(title);
let id = match seg
.split("data-video-id=\"")
.nth(1)
.and_then(|s| s.split('"').next())
{
Some(id) => id.to_string(),
None => continue,
let id = card
.value()
.attr("data-video-vkey")
.filter(|value| !value.trim().is_empty())
.map(ToOwned::to_owned)
.or_else(|| Self::viewkey_from_url(&page_url))
.or_else(|| {
card.value()
.attr("data-video-id")
.filter(|value| !value.trim().is_empty())
.map(ToOwned::to_owned)
});
let Some(id) = id else {
continue;
};
if !seen_ids.insert(id.clone()) {
continue;
}
let raw_duration = seg
.split("duration")
.nth(1)
.and_then(|s| s.split('>').nth(1))
.and_then(|s| s.split('<').next())
.unwrap_or("0:00");
let title = link
.value()
.attr("title")
.filter(|value| !value.trim().is_empty())
.map(Self::decode_html)
.or_else(|| {
card.select(&title_selector)
.next()
.map(|value| Self::decode_html(&Self::text_of(&value)))
})
.unwrap_or_default();
if title.is_empty() {
continue;
}
let duration = parse_time_to_seconds(raw_duration).unwrap_or(0) as u32;
let image = card.select(&image_selector).next();
let thumb = image
.as_ref()
.and_then(|value| {
value
.value()
.attr("src")
.or_else(|| value.value().attr("data-mediumthumb"))
.or_else(|| value.value().attr("data-path"))
.or_else(|| value.value().attr("data-src"))
})
.map(|value| self.normalize_url(value))
.unwrap_or_default();
let views = seg
.split("iews\">")
.filter_map(|p| p.split("<var>").nth(1))
let duration = card
.select(&duration_selector)
.next()
.and_then(|v| v.split('<').next())
.and_then(|v| parse_abbreviated_number(v))
.unwrap_or(0);
.map(|value| Self::text_of(&value))
.and_then(|value| parse_time_to_seconds(&value))
.unwrap_or(0) as u32;
let thumb = seg
.split("src=\"")
.nth(1)
.and_then(|s| s.split('"').next())
.unwrap_or("")
.to_string();
let views = card.select(&views_selector).find_map(|value| {
let text = Self::text_of(&value);
parse_abbreviated_number(&text)
.or_else(|| parse_abbreviated_number(text.replace("views", "").trim()))
});
let rating = card.select(&rating_selector).find_map(|value| {
let text = Self::text_of(&value);
let cleaned = text
.trim()
.trim_end_matches('%')
.replace(',', "")
.replace(' ', "");
cleaned.parse::<f32>().ok()
});
let (tag, uploader) = if seg.contains("videoUploaderBlock") {
let href = seg
.split("videoUploaderBlock")
.nth(1)
.and_then(|s| s.split("href=\"").nth(1))
.and_then(|s| s.split('"').next())
.unwrap_or("");
let uploader_link = card.select(&uploader_selector).next();
let uploader = uploader_link
.as_ref()
.map(|value| Self::decode_html(&Self::text_of(value)))
.filter(|value| !value.is_empty());
let uploader_url = uploader_link
.and_then(|value| value.value().attr("href"))
.map(|value| self.normalize_url(value))
.filter(|value| !value.is_empty());
let parts: Vec<&str> = href.split('/').collect();
if let (Some(kind), Some(name)) = (parts.get(1), parts.get(2)) {
(
Some(format!("@{}:{}", kind, name.replace('-', " "))),
Some((*name).to_string()),
)
} else {
(None, None)
}
} else {
(None, None)
};
let mut item =
VideoItem::new(id, title, page_url, CHANNEL_ID.to_string(), thumb, duration);
item.views = views;
let preview_url = image
.and_then(|value| value.value().attr("data-mediabook"))
.map(|value| self.normalize_url(value))
.filter(|value| !value.is_empty());
item.preview = preview_url.clone();
// if preview_url.is_some() {
// let mut format = VideoFormat::new(
// item.url.clone(),
// "preview".to_string(),
// "video/mp4".to_string(),
// );
// format.add_http_header("Referer".to_string(), item.url.clone());
// item.formats = Some(vec![format]);
// }
item.verified = card.select(&verified_selector).next().map(|_| true);
item.uploader = uploader.clone();
item.uploaderUrl = uploader_url.clone();
item.uploaderId = uploader_url
.as_deref()
.and_then(Self::uploader_identity_from_url);
item.rating = rating;
let mut item = VideoItem::new(id, title, video_url, "pornhub".into(), thumb, duration);
if views > 0 {
item = item.views(views);
let mut tags = Vec::new();
if let Some(tag) = uploader_url
.as_deref()
.and_then(|url| self.query_tag_from_uploader_url(url))
{
Self::push_unique(&mut tags, tag);
}
if let Some(t) = tag {
item = item.tags(vec![t]);
for tag_link in card.select(&tag_link_selector) {
let tag = Self::decode_html(&Self::text_of(&tag_link));
Self::push_unique(&mut tags, tag);
}
if let Some(u) = uploader {
item = item.uploader(u);
if !tags.is_empty() {
item.tags = Some(tags);
}
items.push(item);
}
items
Ok(items)
}
fn viewkey_from_url(url: &str) -> Option<String> {
let parsed = Url::parse(url).ok()?;
parsed
.query_pairs()
.find(|(key, _)| key == "viewkey")
.map(|(_, value)| value.into_owned())
}
fn uploader_identity_from_url(url: &str) -> Option<String> {
let parsed = Url::parse(url).ok()?;
let mut segments = parsed.path_segments()?;
let kind = segments.next()?.trim_matches('/');
let slug = segments.next()?.trim_matches('/');
if kind.is_empty() || slug.is_empty() {
return None;
}
Some(format!("{CHANNEL_ID}:{kind}:{slug}"))
}
fn query_tag_from_uploader_url(&self, url: &str) -> Option<String> {
let parsed = Url::parse(url).ok()?;
let mut segments = parsed.path_segments()?;
let kind_str = segments.next()?.trim_matches('/');
let slug = segments.next()?.trim_matches('/');
if kind_str.is_empty() || slug.is_empty() {
return None;
}
let normalized_slug = slug.to_ascii_lowercase();
if let Some(info) = self.tag_map.read().unwrap().get(&normalized_slug) {
return Some(info.title.clone());
}
Some(slug.replace('-', " "))
}
fn push_unique(values: &mut Vec<String>, value: String) {
let normalized = value.trim();
if normalized.is_empty() {
return;
}
if values
.iter()
.any(|existing| existing.eq_ignore_ascii_case(normalized))
{
return;
}
values.push(normalized.to_string());
}
async fn fetch_listing(
&self,
cache: VideoCache,
page: u8,
sort: &str,
query: Option<&str>,
options: ServerOptions,
) -> Result<Vec<VideoItem>> {
if query.is_some() && self.tag_map.read().unwrap().is_empty() {
let _ = Self::load_tags(&self.url, Arc::clone(&self.tag_map)).await;
}
let (video_url, scope) = self.build_listing_request(page, sort, query);
let old_items = match cache.get(&video_url) {
Some((time, items)) if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 => {
return Ok(items.clone());
}
Some((_, items)) => items.clone(),
None => vec![],
};
let mut requester = requester_or_default(&options, CHANNEL_ID, "fetch_listing.requester");
let text = match requester.get(&video_url, None).await {
Ok(text) => text,
Err(error) => {
report_provider_error(
CHANNEL_ID,
"fetch_listing.request",
&format!("url={video_url}; error={error}"),
)
.await;
return Ok(old_items);
}
};
let items = match self.parse_listing_page(&text, scope) {
Ok(items) => items,
Err(error) => {
report_provider_error(
CHANNEL_ID,
"fetch_listing.parse",
&format!("url={video_url}; error={error}"),
)
.await;
return Ok(old_items);
}
};
if items.is_empty() {
return Ok(old_items);
}
cache.remove(&video_url);
cache.insert(video_url, items.clone());
Ok(items)
}
}
@@ -325,29 +661,167 @@ impl Provider for PornhubProvider {
let _ = per_page;
let page = page.parse::<u8>().unwrap_or(1);
let mut sort = match sort.as_str() {
"mv" => "mv",
"tr" => "tr",
"cm" => "cm",
"lg" => "lg",
_ => "mr",
};
if sort.contains("date") {
sort = "mr".into();
let sort = Self::normalize_sort(&sort).to_string();
match self
.fetch_listing(cache, page, &sort, query.as_deref(), options)
.await
{
Ok(items) => items,
Err(error) => {
report_provider_error(CHANNEL_ID, "get_videos", &error.to_string()).await;
vec![]
}
}
let res = match query {
Some(q) => self.query(cache, page, &q, &sort, options).await,
None => self.get(cache, page, &sort, options).await,
};
res.unwrap_or_else(|e| {
eprintln!("PornhubProvider error: {e}");
vec![]
})
}
fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
Some(self.build_channel(clientversion))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parses_creator_queries() {
let provider = PornhubProvider::new();
let target = provider
.parse_query_target("channels:Brazzers")
.expect("channel target should parse");
assert!(matches!(target.kind, QueryTargetKind::Channel));
assert_eq!(target.slug, "brazzers");
let target = provider
.parse_query_target("pornstar:Alex Mack")
.expect("pornstar target should parse");
assert!(matches!(target.kind, QueryTargetKind::Pornstar));
assert_eq!(target.slug, "alex-mack");
assert!(provider.parse_query_target("teacher").is_none());
}
#[test]
fn resolves_query_from_tag_map_by_id_or_title() {
let provider = PornhubProvider::new();
{
let mut map = provider.tag_map.write().unwrap();
let info = TagInfo {
kind: QueryTargetKind::Channel,
slug: "mature-4k".to_string(),
title: "Mature 4K".to_string(),
};
map.insert("mature-4k".to_string(), info.clone());
map.insert("mature 4k".to_string(), info);
}
let by_id = provider
.parse_query_target("mature-4k")
.expect("id lookup should resolve");
assert!(matches!(by_id.kind, QueryTargetKind::Channel));
assert_eq!(by_id.slug, "mature-4k");
let by_title = provider
.parse_query_target("Mature 4K")
.expect("title lookup should resolve");
assert!(matches!(by_title.kind, QueryTargetKind::Channel));
assert_eq!(by_title.slug, "mature-4k");
}
#[test]
fn parses_browse_listing_cards() {
let provider = PornhubProvider::new();
let html = r#"
<ul id="videoCategory" class="nf-videos videos search-video-thumbs">
<li class="sniperModeEngaged"></li>
<li class="pcVideoListItem js-pop videoblock videoBox withKebabMenu"
data-video-id="466705435"
data-video-vkey="67ed937c986b1">
<a href="/view_video.php?viewkey=67ed937c986b1" title="Black asian teen"></a>
<img src="https://example.com/thumb.jpg"
data-mediabook="https://example.com/preview.webm" />
<div class="marker-overlays"><var class="duration">12:18</var></div>
<div class="videoUploaderBlock">
<div class="usernameWrap">
<a href="/model/honeycore">Honeycore</a>
</div>
</div>
<div class="videoDetailsBlock">
<span class="views"><var>199K</var> views</span>
</div>
</li>
</ul>
"#;
let items = provider
.parse_listing_page(html, ListingScope::Browse)
.expect("browse listing should parse");
assert_eq!(items.len(), 1);
assert_eq!(items[0].id, "67ed937c986b1");
assert_eq!(items[0].uploader.as_deref(), Some("Honeycore"));
assert_eq!(
items[0].uploaderUrl.as_deref(),
Some("https://www.pornhub.com/model/honeycore")
);
assert_eq!(items[0].views, Some(199000));
assert_eq!(items[0].duration, 738);
assert_eq!(
items[0].preview.as_deref(),
Some("https://example.com/preview.webm")
);
assert!(items[0].tags.as_ref().is_some_and(|values| {
values
.iter()
.any(|value| value.eq_ignore_ascii_case("honeycore"))
}));
}
#[test]
fn parses_listing_metadata_without_detail_fetch() {
let provider = PornhubProvider::new();
let html = r#"
<ul id="videoCategory" class="nf-videos videos search-video-thumbs">
<li class="pcVideoListItem js-pop videoblock videoBox withKebabMenu"
data-video-id="466705435"
data-video-vkey="67ed937c986b1">
<a href="/view_video.php?viewkey=67ed937c986b1" title="Black asian teen"></a>
<img data-src="https://example.com/thumb.jpg"
data-mediabook="https://example.com/preview.webm" />
<div class="marker-overlays"><var class="duration">12:18</var></div>
<div class="videoDetailsBlock">
<span class="views"><var>199K</var> views</span>
<span class="value">95%</span>
</div>
<a href="/categories/anal">Anal</a>
<a href="/pornstar/jane-doe">Jane Doe</a>
</li>
</ul>
"#;
let items = provider
.parse_listing_page(html, ListingScope::Browse)
.expect("browse listing should parse");
assert_eq!(items.len(), 1);
assert_eq!(items[0].thumb, "https://example.com/thumb.jpg");
assert_eq!(
items[0].preview.as_deref(),
Some("https://example.com/preview.webm")
);
assert_eq!(items[0].views, Some(199000));
assert_eq!(items[0].rating, Some(95.0));
assert!(
items[0]
.tags
.as_ref()
.is_some_and(|values| values.iter().any(|value| value == "Anal"))
);
assert!(
items[0]
.tags
.as_ref()
.is_some_and(|values| values.iter().any(|value| value == "Jane Doe"))
);
}
}

View File

@@ -0,0 +1,566 @@
use crate::DbPool;
use crate::api::ClientVersion;
use crate::providers::{Provider, report_provider_error, requester_or_default};
use crate::status::*;
use crate::util::cache::VideoCache;
use crate::videos::{ServerOptions, VideoItem};
use async_trait::async_trait;
use error_chain::error_chain;
use serde::Deserialize;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "tiktok",
tags: &["shorts", "pornhub", "vertical"],
};
const BASE_URL: &str = "https://www.pornhub.com";
const CHANNEL_ID: &str = "ph-shorties";
error_chain! {
foreign_links {
Io(std::io::Error);
HttpRequest(wreq::Error);
Json(serde_json::Error);
}
errors {
Parse(msg: String) {
description("parse error")
display("parse error: {}", msg)
}
}
}
#[derive(Debug, Clone)]
pub struct PornhubShortiesProvider {
url: String,
}
#[derive(Debug, Clone)]
enum Target {
Home { sort: String },
Search { query: String, sort: String },
}
/// Deserialized structure of each entry in the `JSON_SHORTIES` JS variable.
#[derive(Debug, Deserialize, Clone)]
struct ShortieItem {
#[serde(rename = "vkey")]
vkey: String,
#[serde(rename = "videoTitle")]
video_title: String,
#[serde(rename = "linkUrl")]
link_url: String,
#[serde(rename = "imageUrl")]
image_url: Option<String>,
#[serde(rename = "likeNumber", default)]
like_number: u64,
#[serde(rename = "dislikeNumber", default)]
dislike_number: u64,
#[serde(rename = "name")]
name: Option<String>,
#[serde(rename = "profileUrl")]
profile_url: Option<String>,
#[serde(rename = "entityId")]
entity_id: Option<u64>,
#[serde(rename = "entityType")]
entity_type: Option<String>,
#[serde(rename = "trackingTimeWatched")]
tracking_time_watched: Option<TrackingTimeWatched>,
#[serde(rename = "pillsData", default)]
pills_data: Vec<PillData>,
#[serde(rename = "badges")]
badges: Option<Badges>,
}
#[derive(Debug, Deserialize, Clone)]
struct TrackingTimeWatched {
#[serde(rename = "video_duration", default)]
video_duration: u32,
}
#[derive(Debug, Deserialize, Clone)]
struct PillData {
#[serde(rename = "name")]
name: String,
}
#[derive(Debug, Deserialize, Clone)]
struct Badges {
#[serde(rename = "verified", default)]
verified: bool,
}
impl PornhubShortiesProvider {
pub fn new() -> Self {
Self {
url: BASE_URL.to_string(),
}
}
fn build_target(sort: &str, query: Option<&str>) -> Target {
let sort = Self::normalize_sort(sort).to_string();
match query.map(str::trim).filter(|q| !q.is_empty()) {
Some(q) => Target::Search {
query: q.to_string(),
sort,
},
None => Target::Home { sort },
}
}
fn normalize_sort(sort: &str) -> &'static str {
match sort.trim().to_ascii_lowercase().as_str() {
"trending" => "trending",
"popular" | "mv" | "mostviewed" => "mostviewed",
"top_rated" | "tr" | "toprated" => "top_rated",
"hottest" | "ht" => "hottest",
_ => "new",
}
}
fn build_url(base: &str, target: &Target, page: u8) -> String {
match target {
Target::Home { sort } => {
if sort == "new" {
if page <= 1 {
format!("{base}/shorties")
} else {
format!("{base}/shorties?page={page}")
}
} else {
if page <= 1 {
format!("{base}/shorties?sort={sort}")
} else {
format!("{base}/shorties?sort={sort}&page={page}")
}
}
}
Target::Search { query, sort } => {
let encoded = query.replace(' ', "+");
if sort == "new" {
if page <= 1 {
format!("{base}/shorties?search={encoded}")
} else {
format!("{base}/shorties?search={encoded}&page={page}")
}
} else {
if page <= 1 {
format!("{base}/shorties?search={encoded}&sort={sort}")
} else {
format!("{base}/shorties?search={encoded}&sort={sort}&page={page}")
}
}
}
}
}
fn extract_json_shorties(html: &str) -> Result<Vec<ShortieItem>> {
// The page embeds: JSON_SHORTIES = insertAfterNthPosition([{...}, ...], AD_POSITION, ...)
// We locate the array by finding the '[' after JSON_SHORTIES and matching brackets.
let marker = "JSON_SHORTIES";
let start = html.find(marker).ok_or_else(|| {
Error::from(ErrorKind::Parse(
"JSON_SHORTIES marker not found in HTML".to_string(),
))
})?;
let arr_start = html[start..].find('[').ok_or_else(|| {
Error::from(ErrorKind::Parse(
"JSON_SHORTIES array open bracket not found".to_string(),
))
})? + start;
// Walk the HTML to find the matching closing bracket.
let bytes = html.as_bytes();
let mut depth: i32 = 0;
let mut in_string = false;
let mut escape_next = false;
let mut arr_end = arr_start;
for (offset, &b) in bytes[arr_start..].iter().enumerate() {
if escape_next {
escape_next = false;
continue;
}
if b == b'\\' && in_string {
escape_next = true;
continue;
}
if b == b'"' {
in_string = !in_string;
continue;
}
if in_string {
continue;
}
match b {
b'[' => depth += 1,
b']' => {
depth -= 1;
if depth == 0 {
arr_end = arr_start + offset;
break;
}
}
_ => {}
}
}
if depth != 0 {
return Err(
ErrorKind::Parse("JSON_SHORTIES array bracket mismatch".to_string()).into(),
);
}
let raw = &html[arr_start..=arr_end];
let items: Vec<ShortieItem> = serde_json::from_str(raw).map_err(|e| {
Error::from(ErrorKind::Parse(format!("JSON_SHORTIES parse error: {e}")))
})?;
Ok(items)
}
fn item_to_video(item: ShortieItem) -> Option<VideoItem> {
let id = item.vkey;
if id.is_empty() {
return None;
}
let title = item.video_title.trim().to_string();
if title.is_empty() {
return None;
}
let url = item.link_url.trim().to_string();
if url.is_empty() || !url.contains("/view_video.php") {
return None;
}
let thumb = item
.image_url
.unwrap_or_default()
.replace("\\/", "/")
.trim()
.to_string();
let duration = item
.tracking_time_watched
.map(|t| t.video_duration)
.unwrap_or(0);
let mut video = VideoItem::new(
id,
title,
url,
CHANNEL_ID.to_string(),
thumb,
duration,
);
// Rating from likes / (likes + dislikes)
let total = item.like_number + item.dislike_number;
if total > 0 {
video.rating = Some((item.like_number as f32 / total as f32) * 100.0);
}
// Uploader
if let Some(name) = item.name.filter(|n| !n.is_empty()) {
video.uploader = Some(name.clone());
if let Some(profile_url) = item
.profile_url
.map(|u| u.replace("\\/", "/"))
.filter(|u| !u.is_empty())
{
video.uploaderUrl = Some(profile_url.clone());
// Build namespaced uploader ID from entity_type + entity_id
if let (Some(et), Some(eid)) = (item.entity_type.as_deref(), item.entity_id) {
let kind = match et {
"Mpp" | "Model" => "model",
"Channel" => "channels",
"Pornstar" => "pornstar",
_ => "model",
};
video.uploaderId = Some(format!("{CHANNEL_ID}:{kind}:{eid}"));
}
}
}
// Tags from pillsData
let tags: Vec<String> = item.pills_data.into_iter().map(|p| p.name).collect();
if !tags.is_empty() {
video.tags = Some(tags);
}
// Verified badge
video.verified = item.badges.map(|b| b.verified).filter(|&v| v);
Some(video)
}
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
Channel {
id: CHANNEL_ID.to_string(),
name: "PH Shorties".to_string(),
description: "Pornhub Shorties — vertical short-form porn clips.".to_string(),
premium: false,
favicon: "https://www.google.com/s2/favicons?sz=64&domain=pornhub.com".to_string(),
status: "active".to_string(),
categories: vec![],
options: vec![ChannelOption {
id: "sort".to_string(),
title: "Sort".to_string(),
description: "Browse Pornhub Shorties by sort order.".to_string(),
systemImage: "list.number".to_string(),
colorName: "orange".to_string(),
options: vec![
FilterOption {
id: "new".to_string(),
title: "New".to_string(),
},
FilterOption {
id: "trending".to_string(),
title: "Trending".to_string(),
},
FilterOption {
id: "mostviewed".to_string(),
title: "Most Viewed".to_string(),
},
FilterOption {
id: "top_rated".to_string(),
title: "Top Rated".to_string(),
},
FilterOption {
id: "hottest".to_string(),
title: "Hottest".to_string(),
},
],
multiSelect: false,
}],
nsfw: true,
cacheDuration: Some(1800),
}
}
async fn fetch_videos(
&self,
cache: VideoCache,
page: u8,
sort: &str,
query: Option<&str>,
options: ServerOptions,
) -> Result<Vec<VideoItem>> {
let target = Self::build_target(sort, query);
let fetch_url = Self::build_url(&self.url, &target, page);
// Cache hit
let old_items = match cache.get(&fetch_url) {
Some((time, items)) if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 => {
return Ok(items.clone());
}
Some((_, items)) => items.clone(),
None => vec![],
};
let mut requester = requester_or_default(&options, CHANNEL_ID, "fetch_videos");
let text = match requester
.get_with_headers(
&fetch_url,
vec![
("Referer".to_string(), format!("{}/shorties", self.url)),
(
"Accept".to_string(),
"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
.to_string(),
),
("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()),
],
None,
)
.await
{
Ok(text) => text,
Err(error) => {
report_provider_error(
CHANNEL_ID,
"fetch_videos.request",
&format!("url={fetch_url}; error={error}"),
)
.await;
return Ok(old_items);
}
};
let shorties = match Self::extract_json_shorties(&text) {
Ok(items) => items,
Err(error) => {
report_provider_error(
CHANNEL_ID,
"fetch_videos.parse",
&format!("url={fetch_url}; error={error}"),
)
.await;
return Ok(old_items);
}
};
let items: Vec<VideoItem> = shorties
.into_iter()
.filter_map(Self::item_to_video)
.collect();
if items.is_empty() {
return Ok(old_items);
}
cache.remove(&fetch_url);
cache.insert(fetch_url, items.clone());
Ok(items)
}
}
#[async_trait]
impl Provider for PornhubShortiesProvider {
async fn get_videos(
&self,
cache: VideoCache,
pool: DbPool,
sort: String,
query: Option<String>,
page: String,
per_page: String,
options: ServerOptions,
) -> Vec<VideoItem> {
let _ = pool;
let _ = per_page;
let page = page.parse::<u8>().unwrap_or(1);
match self
.fetch_videos(cache, page, &sort, query.as_deref(), options)
.await
{
Ok(items) => items,
Err(error) => {
report_provider_error(CHANNEL_ID, "get_videos", &error.to_string()).await;
vec![]
}
}
}
fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
Some(self.build_channel(clientversion))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parses_json_shorties_from_html() {
let html = r#"
var JSON_SHORTIES = insertAfterNthPosition([
{
"videoId": 123456,
"vkey": "abcdef1234567",
"videoTitle": "Test Short Video",
"favoriteInfo": "1K",
"likeInfo": "2K",
"likeNumber": 2000,
"dislikeNumber": 100,
"isHD": true,
"linkUrl": "https:\/\/www.pornhub.com\/view_video.php?viewkey=abcdef1234567",
"shortieUrl": "https:\/\/www.pornhub.com\/shorties\/abcdef1234567",
"embedUrl": "",
"imageUrl": "https:\/\/example.com\/thumb.jpg",
"mediaPriority": "hls",
"mediaDefinitions": [],
"isFavorite": false,
"isReported": false,
"isSubscribed": false,
"userVoteDetail": -1,
"trackingTimeWatched": {"video_duration": 45},
"pillsData": [{"name": "Amateur", "slug": "amateur", "type": "category"}],
"badges": {"verified": true, "premium": false, "award": false},
"name": "TestUser",
"profileUrl": "https:\/\/www.pornhub.com\/model\/testuser",
"entityType": "Mpp",
"entityId": 9876543
}
], AD_POSITION, {});
"#;
let items = PornhubShortiesProvider::extract_json_shorties(html)
.expect("should parse JSON_SHORTIES");
assert_eq!(items.len(), 1);
assert_eq!(items[0].vkey, "abcdef1234567");
assert_eq!(items[0].video_title, "Test Short Video");
assert_eq!(
items[0].link_url,
"https://www.pornhub.com/view_video.php?viewkey=abcdef1234567"
);
assert_eq!(items[0].like_number, 2000);
assert_eq!(items[0].dislike_number, 100);
assert_eq!(
items[0]
.tracking_time_watched
.as_ref()
.map(|t| t.video_duration),
Some(45)
);
assert_eq!(items[0].pills_data[0].name, "Amateur");
assert_eq!(items[0].name.as_deref(), Some("TestUser"));
let video = PornhubShortiesProvider::item_to_video(items[0].clone())
.expect("should convert to VideoItem");
assert_eq!(video.id, "abcdef1234567");
assert_eq!(video.duration, 45);
assert!(video.rating.is_some());
assert!((video.rating.unwrap() - 95.23).abs() < 0.1);
assert_eq!(video.uploader.as_deref(), Some("TestUser"));
assert!(video.tags.as_ref().is_some_and(|t| t.contains(&"Amateur".to_string())));
}
#[test]
fn build_url_home_new() {
let url = PornhubShortiesProvider::build_url(
BASE_URL,
&Target::Home {
sort: "new".to_string(),
},
1,
);
assert_eq!(url, "https://www.pornhub.com/shorties");
let url2 = PornhubShortiesProvider::build_url(
BASE_URL,
&Target::Home {
sort: "new".to_string(),
},
2,
);
assert_eq!(url2, "https://www.pornhub.com/shorties?page=2");
}
#[test]
fn build_url_search() {
let url = PornhubShortiesProvider::build_url(
BASE_URL,
&Target::Search {
query: "teen".to_string(),
sort: "new".to_string(),
},
1,
);
assert_eq!(url, "https://www.pornhub.com/shorties?search=teen");
}
#[test]
fn normalize_sort_variants() {
assert_eq!(PornhubShortiesProvider::normalize_sort("new"), "new");
assert_eq!(PornhubShortiesProvider::normalize_sort("trending"), "trending");
assert_eq!(PornhubShortiesProvider::normalize_sort("popular"), "mostviewed");
assert_eq!(PornhubShortiesProvider::normalize_sort("unknown"), "new");
}
}

987
src/providers/porntrex.rs Normal file
View File

@@ -0,0 +1,987 @@
use crate::DbPool;
use crate::api::ClientVersion;
use crate::providers::{
Provider, report_provider_error, report_provider_error_background, requester_or_default,
};
use crate::status::*;
use crate::util::cache::VideoCache;
use crate::util::parse_abbreviated_number;
use crate::util::requester::Requester;
use crate::util::time::parse_time_to_seconds;
use crate::videos::{ServerOptions, VideoFormat, VideoItem};
use async_trait::async_trait;
use error_chain::error_chain;
use futures::stream::{self, StreamExt};
use htmlentity::entity::{ICodedDataTrait, decode};
use percent_encoding::{NON_ALPHANUMERIC, utf8_percent_encode};
use regex::Regex;
use scraper::{ElementRef, Html, Selector};
use std::collections::HashMap;
use std::sync::{Arc, RwLock};
use std::{thread, vec};
use wreq::Version;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "mainstream-tube",
tags: &["tube", "mixed", "hd"],
};
const BASE_URL: &str = "https://www.porntrex.com";
const CHANNEL_ID: &str = "porntrex";
const FIREFOX_UA: &str =
"Mozilla/5.0 (X11; Linux x86_64; rv:147.0) Gecko/20100101 Firefox/147.0";
const HTML_ACCEPT: &str =
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8";
error_chain! {
foreign_links {
Io(std::io::Error);
}
errors {
Parse(msg: String) {
description("parse error")
display("parse error: {}", msg)
}
}
}
#[derive(Debug, Clone)]
pub struct PorntrexProvider {
url: String,
categories: Arc<RwLock<Vec<FilterOption>>>,
tag_map: Arc<RwLock<HashMap<String, String>>>,
}
#[derive(Debug, Clone)]
enum Target {
Latest,
Popular,
TopRated,
Search(String),
Archive {
url: String,
page_mode: PageMode,
},
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum PageMode {
SitePaged,
LocalSlice,
}
impl PorntrexProvider {
pub fn new() -> Self {
let provider = Self {
url: BASE_URL.to_string(),
categories: Arc::new(RwLock::new(vec![FilterOption {
id: "all".to_string(),
title: "All".to_string(),
}])),
tag_map: Arc::new(RwLock::new(HashMap::new())),
};
provider.spawn_initial_load();
provider
}
fn spawn_initial_load(&self) {
let url = self.url.clone();
let categories = Arc::clone(&self.categories);
let tag_map = Arc::clone(&self.tag_map);
thread::spawn(move || {
let runtime = match tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()
{
Ok(runtime) => runtime,
Err(error) => {
report_provider_error_background(
CHANNEL_ID,
"spawn_initial_load.runtime_build",
&error.to_string(),
);
return;
}
};
runtime.block_on(async move {
if let Err(error) = Self::load_categories(&url, Arc::clone(&categories)).await {
report_provider_error_background(
CHANNEL_ID,
"load_categories",
&error.to_string(),
);
}
if let Err(error) = Self::load_tags(&url, Arc::clone(&tag_map)).await {
report_provider_error_background(CHANNEL_ID, "load_tags", &error.to_string());
}
});
});
}
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
let categories = self
.categories
.read()
.map(|value| value.clone())
.unwrap_or_default();
Channel {
id: CHANNEL_ID.to_string(),
name: "PornTrex".to_string(),
description:
"PornTrex videos with latest, most viewed, top rated, category, and tag-aware search routing."
.to_string(),
premium: false,
favicon: "https://www.google.com/s2/favicons?sz=64&domain=porntrex.com".to_string(),
status: "active".to_string(),
categories: categories.iter().map(|value| value.title.clone()).collect(),
options: vec![
ChannelOption {
id: "sort".to_string(),
title: "Sort".to_string(),
description: "Browse PornTrex ranking feeds.".to_string(),
systemImage: "list.number".to_string(),
colorName: "blue".to_string(),
options: vec![
FilterOption {
id: "new".to_string(),
title: "Latest".to_string(),
},
FilterOption {
id: "popular".to_string(),
title: "Most Viewed".to_string(),
},
FilterOption {
id: "rated".to_string(),
title: "Top Rated".to_string(),
},
],
multiSelect: false,
},
ChannelOption {
id: "categories".to_string(),
title: "Categories".to_string(),
description: "Browse a PornTrex category archive.".to_string(),
systemImage: "square.grid.2x2".to_string(),
colorName: "orange".to_string(),
options: categories,
multiSelect: false,
},
],
nsfw: true,
cacheDuration: Some(1800),
}
}
fn selector(value: &str) -> Result<Selector> {
Selector::parse(value)
.map_err(|error| Error::from(format!("selector `{value}` parse failed: {error}")))
}
fn regex(value: &str) -> Result<Regex> {
Regex::new(value).map_err(|error| Error::from(format!("regex `{value}` failed: {error}")))
}
fn decode_html(text: &str) -> String {
decode(text.as_bytes())
.to_string()
.unwrap_or_else(|_| text.to_string())
}
fn collapse_whitespace(text: &str) -> String {
text.split_whitespace().collect::<Vec<_>>().join(" ")
}
fn text_of(element: &ElementRef<'_>) -> String {
Self::decode_html(&Self::collapse_whitespace(
&element.text().collect::<Vec<_>>().join(" "),
))
}
fn normalize_title(title: &str) -> String {
title
.trim()
.trim_start_matches('#')
.replace(['_', '-'], " ")
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
.to_ascii_lowercase()
}
fn normalize_url(&self, url: &str) -> String {
let trimmed = url.trim();
if trimmed.is_empty() {
return String::new();
}
if trimmed.starts_with("http://") || trimmed.starts_with("https://") {
return trimmed.to_string();
}
if trimmed.starts_with("//") {
return format!("https:{trimmed}");
}
if trimmed.starts_with('/') {
return format!("{}{}", self.url, trimmed);
}
format!("{}/{}", self.url, trimmed.trim_start_matches("./"))
}
fn html_headers(referer: &str) -> Vec<(String, String)> {
vec![
("User-Agent".to_string(), FIREFOX_UA.to_string()),
("Accept".to_string(), HTML_ACCEPT.to_string()),
("Referer".to_string(), referer.to_string()),
]
}
fn build_search_path(query: &str) -> String {
query
.split_whitespace()
.map(|part| utf8_percent_encode(part, NON_ALPHANUMERIC).to_string())
.collect::<Vec<_>>()
.join("-")
}
fn build_archive_page_url(archive_url: &str, page: u16) -> String {
if page <= 1 {
return archive_url.trim_end_matches('/').to_string() + "/";
}
format!("{}/{page}/", archive_url.trim_end_matches('/'))
}
fn archive_target(url: String, page_mode: PageMode) -> Target {
Target::Archive { url, page_mode }
}
fn build_target_url(&self, target: &Target, page: u16) -> String {
match target {
Target::Latest => {
Self::build_archive_page_url(&format!("{}/latest-updates/", self.url), page)
}
Target::Popular => {
Self::build_archive_page_url(&format!("{}/most-popular/", self.url), page)
}
Target::TopRated => {
Self::build_archive_page_url(&format!("{}/top-rated/", self.url), page)
}
Target::Search(query) => Self::build_archive_page_url(
&format!("{}/search/{}/", self.url, Self::build_search_path(query)),
page,
),
Target::Archive { url, page_mode } => match page_mode {
PageMode::SitePaged => Self::build_archive_page_url(url, page),
PageMode::LocalSlice => Self::build_archive_page_url(url, 1),
},
}
}
async fn fetch_html(requester: &mut Requester, url: &str, referer: &str) -> Result<String> {
requester
.get_with_headers(url, Self::html_headers(referer), Some(Version::HTTP_11))
.await
.map_err(|error| Error::from(format!("request failed for {url}: {error}")))
}
fn slug_remainder(href: &str, prefix: &str) -> Option<String> {
let trimmed = href.trim().trim_end_matches('/');
let remainder = trimmed.strip_prefix(prefix)?.trim_matches('/');
if remainder.is_empty() || remainder.contains('/') {
return None;
}
Some(remainder.to_string())
}
fn push_category(target: &Arc<RwLock<Vec<FilterOption>>>, item: FilterOption) {
if item.id.is_empty() || item.title.is_empty() {
return;
}
if let Ok(mut values) = target.write() {
let normalized = Self::normalize_title(&item.title);
if !values
.iter()
.any(|value| value.id == item.id || Self::normalize_title(&value.title) == normalized)
{
values.push(item);
}
}
}
fn insert_tag_mapping(target: &Arc<RwLock<HashMap<String, String>>>, title: &str, href: &str) {
let normalized_title = Self::normalize_title(title);
if normalized_title.is_empty() || href.is_empty() {
return;
}
if let Ok(mut values) = target.write() {
values.insert(normalized_title, href.to_string());
}
}
async fn load_categories(
base_url: &str,
categories: Arc<RwLock<Vec<FilterOption>>>,
) -> Result<()> {
let mut requester = Requester::new();
let page_url = format!("{base_url}/categories/");
let html = Self::fetch_html(&mut requester, &page_url, &page_url).await?;
let document = Html::parse_document(&html);
let selector = Self::selector("a.item[href]")?;
let prefix = format!("{base_url}/categories/");
for element in document.select(&selector) {
let href = element.value().attr("href").unwrap_or_default();
let Some(_slug) = Self::slug_remainder(href, &prefix) else {
continue;
};
let title = element
.value()
.attr("title")
.map(Self::decode_html)
.filter(|value| !value.trim().is_empty())
.unwrap_or_else(|| Self::text_of(&element));
let title = title.trim().to_string();
if title.is_empty() {
continue;
}
Self::push_category(
&categories,
FilterOption {
id: format!("{}/", href.trim_end_matches('/')),
title,
},
);
}
Ok(())
}
async fn load_tags(base_url: &str, tag_map: Arc<RwLock<HashMap<String, String>>>) -> Result<()> {
let mut requester = Requester::new();
let page_url = format!("{base_url}/tags/");
let html = Self::fetch_html(&mut requester, &page_url, &page_url).await?;
let document = Html::parse_document(&html);
let selector = Self::selector("div.list-tags a[href]")?;
let prefix = format!("{base_url}/tags/");
for element in document.select(&selector) {
let href = element.value().attr("href").unwrap_or_default();
let Some(slug) = Self::slug_remainder(href, &prefix) else {
continue;
};
let title = Self::text_of(&element);
if title.is_empty() {
continue;
}
let canonical = format!("{}/", href.trim_end_matches('/'));
Self::insert_tag_mapping(&tag_map, &title, &canonical);
Self::insert_tag_mapping(&tag_map, &slug, &canonical);
}
Ok(())
}
fn parse_duration(text: &str) -> u32 {
parse_time_to_seconds(text)
.and_then(|value| u32::try_from(value).ok())
.unwrap_or(0)
}
fn parse_views(text: &str) -> Option<u32> {
let cleaned = text
.replace("views", "")
.replace("view", "")
.replace([',', ' '], "");
parse_abbreviated_number(cleaned.trim())
}
fn parse_rating(text: &str) -> Option<f32> {
let digits = text
.chars()
.filter(|value| value.is_ascii_digit() || *value == '.')
.collect::<String>();
digits.parse::<f32>().ok()
}
fn parse_list_videos(&self, html: &str) -> Result<Vec<VideoItem>> {
let document = Html::parse_document(html);
let card_selector = Self::selector("div.video-preview-screen.video-item.thumb-item")?;
let link_selector = Self::selector("a[href*=\"/video/\"]")?;
let title_link_selector = Self::selector("p.inf a[href*=\"/video/\"], a[title][href*=\"/video/\"]")?;
let image_selector = Self::selector("img.cover")?;
let duration_selector = Self::selector("div.durations")?;
let views_selector = Self::selector("div.viewsthumb")?;
let rating_selector = Self::selector("ul.list-unstyled li.pull-right")?;
let mut items = Vec::new();
for card in document.select(&card_selector) {
let Some(link) = card.select(&link_selector).next() else {
continue;
};
let href = link.value().attr("href").unwrap_or_default();
let page_url = self.normalize_url(href);
if page_url.is_empty() {
continue;
}
let id = card
.value()
.attr("data-item-id")
.map(str::to_string)
.filter(|value| !value.is_empty())
.unwrap_or_else(|| {
page_url
.trim_end_matches('/')
.split('/')
.nth_back(1)
.unwrap_or_default()
.to_string()
});
if id.is_empty() {
continue;
}
let image = card.select(&image_selector).next();
let thumb = image
.and_then(|value| value.value().attr("data-src").or_else(|| value.value().attr("src")))
.map(|value| self.normalize_url(value))
.unwrap_or_default();
let title = card
.select(&title_link_selector)
.next()
.or_else(|| card.select(&link_selector).find(|value| value.value().attr("title").is_some()))
.and_then(|value| value.value().attr("title").map(Self::decode_html).or_else(|| {
let text = Self::text_of(&value);
(!text.is_empty()).then_some(text)
}))
.filter(|value| !value.trim().is_empty())
.unwrap_or_else(|| {
image
.and_then(|value| value.value().attr("alt").map(Self::decode_html))
.unwrap_or_default()
});
if title.is_empty() {
continue;
}
let duration = card
.select(&duration_selector)
.next()
.map(|value| Self::parse_duration(&Self::text_of(&value)))
.unwrap_or(0);
let views = card
.select(&views_selector)
.next()
.and_then(|value| Self::parse_views(&Self::text_of(&value)));
let rating = card
.select(&rating_selector)
.next()
.and_then(|value| Self::parse_rating(&Self::text_of(&value)));
let mut item = VideoItem::new(
id,
title.trim().to_string(),
page_url,
CHANNEL_ID.to_string(),
thumb,
duration,
);
item.views = views;
item.rating = rating;
items.push(item);
}
Ok(items)
}
fn parse_format_urls(html: &str) -> Result<Vec<VideoFormat>> {
let pairs = [
("video_url", "video_url_text"),
("video_alt_url", "video_alt_url_text"),
("video_alt_url2", "video_alt_url2_text"),
("video_alt_url3", "video_alt_url3_text"),
];
let mut formats = Vec::new();
let mut seen = std::collections::HashSet::new();
for (url_key, label_key) in pairs {
let url_re = Self::regex(&format!(r#"{url_key}:\s*'([^']+)'"#))?;
let label_re = Self::regex(&format!(r#"{label_key}:\s*'([^']*)'"#))?;
let Some(url_match) = url_re.captures(html).and_then(|value| value.get(1)) else {
continue;
};
let url = url_match.as_str().replace("\\/", "/");
if !seen.insert(url.clone()) {
continue;
}
let label = label_re
.captures(html)
.and_then(|value| value.get(1))
.map(|value| value.as_str().trim().to_string())
.filter(|value| !value.is_empty())
.unwrap_or_else(|| "mp4".to_string());
let normalized_label = label.replace(" HD", "").replace(" FHD", "").trim().to_string();
let format = VideoFormat::new(url, normalized_label.clone(), "mp4".to_string())
.format_id(normalized_label.clone())
.format_note(label);
formats.push(format);
}
Ok(formats)
}
fn parse_aspect_ratio(html: &str) -> Result<Option<f32>> {
let width_re = Self::regex(r#"player_width:\s*'([0-9.]+)'"#)?;
let height_re = Self::regex(r#"player_height:\s*'([0-9.]+)'"#)?;
let Some(width) = width_re
.captures(html)
.and_then(|value| value.get(1))
.and_then(|value| value.as_str().parse::<f32>().ok())
else {
return Ok(None);
};
let Some(height) = height_re
.captures(html)
.and_then(|value| value.get(1))
.and_then(|value| value.as_str().parse::<f32>().ok())
else {
return Ok(None);
};
Ok((height > 0.0).then_some(width / height))
}
fn collect_tags(document: &Html, html: &str) -> Result<Vec<String>> {
let category_selector = Self::selector("div.items-holder.js-categories a[href*=\"/categories/\"]")?;
let tag_selector = Self::selector("div.item a[href*=\"/tags/\"]")?;
let mut values = Vec::new();
for element in document.select(&category_selector) {
let value = Self::text_of(&element);
if !value.is_empty() {
values.push(value);
}
}
for element in document.select(&tag_selector) {
let value = Self::text_of(&element);
if !value.is_empty() {
values.push(value);
}
}
for pattern in [r#"video_categories:\s*'([^']*)'"#, r#"video_tags:\s*'([^']*)'"#] {
let re = Self::regex(pattern)?;
if let Some(raw) = re.captures(html).and_then(|value| value.get(1)) {
for entry in raw
.as_str()
.split(',')
.map(str::trim)
.map(Self::decode_html)
.filter(|value| !value.is_empty())
{
values.push(entry);
}
}
}
let mut unique = Vec::new();
for value in values {
let normalized = Self::normalize_title(&value);
if normalized.is_empty() || normalized == "-" {
continue;
}
if !unique
.iter()
.any(|existing: &String| Self::normalize_title(existing) == normalized)
{
unique.push(value);
}
}
Ok(unique)
}
fn apply_detail_video(&self, mut item: VideoItem, html: &str) -> Result<VideoItem> {
let document = Html::parse_document(html);
let title_selector = Self::selector("h1")?;
let uploader_selector =
Self::selector("div.info-block div.block-user div.username a[href*=\"/members/\"]")?;
let stat_selector = Self::selector("div.info-block div.item span")?;
if let Some(title) = document
.select(&title_selector)
.next()
.map(|value| Self::text_of(&value))
.filter(|value| !value.is_empty())
{
item.title = title;
}
if let Some(uploader) = document.select(&uploader_selector).next() {
let uploader_name = Self::text_of(&uploader);
let uploader_url = uploader
.value()
.attr("href")
.map(|value| self.normalize_url(value))
.unwrap_or_default();
if !uploader_name.is_empty() {
item.uploader = Some(uploader_name);
}
if !uploader_url.is_empty() {
let uploader_id = uploader_url
.trim_end_matches('/')
.split('/')
.next_back()
.unwrap_or_default()
.to_string();
item.uploaderUrl = Some(uploader_url);
if !uploader_id.is_empty() {
item.uploaderId = Some(format!("{CHANNEL_ID}:{uploader_id}"));
}
}
}
for stat in document.select(&stat_selector).map(|value| Self::text_of(&value)) {
if item.views.is_none() {
item.views = Self::parse_views(&stat);
}
if item.duration == 0 {
let duration = Self::parse_duration(&stat);
if duration > 0 {
item.duration = duration;
}
}
}
let tags = Self::collect_tags(&document, html)?;
if !tags.is_empty() {
item.tags = Some(tags);
}
// let formats = Self::parse_format_urls(html)?;
// if !formats.is_empty() {
// item.formats = Some(formats);
// }
if item.aspectRatio.is_none() {
item.aspectRatio = Self::parse_aspect_ratio(html)?;
}
Ok(item)
}
async fn enrich_item(&self, item: VideoItem, options: &ServerOptions) -> VideoItem {
let mut requester =
requester_or_default(options, CHANNEL_ID, "porntrex.enrich_item.missing_requester");
match Self::fetch_html(&mut requester, &item.url, &item.url).await {
Ok(html) => match self.apply_detail_video(item.clone(), &html) {
Ok(value) => value,
Err(error) => {
report_provider_error_background(
CHANNEL_ID,
"enrich_item.apply_detail_video",
&format!("url={}; error={error}", item.url),
);
item
}
},
Err(error) => {
report_provider_error_background(
CHANNEL_ID,
"enrich_item.fetch_html",
&format!("url={}; error={error}", item.url),
);
item
}
}
}
fn resolve_sort_target(sort: &str) -> Target {
match sort.trim().to_ascii_lowercase().as_str() {
"popular" | "viewed" | "most_viewed" => Target::Popular,
"rated" | "rating" | "top" => Target::TopRated,
_ => Target::Latest,
}
}
fn resolve_option_target(&self, options: &ServerOptions, sort: &str) -> Target {
if let Some(category) = options.categories.as_deref() {
if category.starts_with(&self.url) && category != "all" {
return Self::archive_target(category.to_string(), PageMode::SitePaged);
}
}
Self::resolve_sort_target(sort)
}
fn lookup_category_target(&self, query: &str) -> Option<String> {
let normalized_query = Self::normalize_title(query);
self.categories
.read()
.ok()?
.iter()
.find(|value| value.id != "all" && Self::normalize_title(&value.title) == normalized_query)
.map(|value| value.id.clone())
}
fn resolve_query_target(&self, query: &str) -> Target {
let trimmed = query.trim().trim_start_matches('@');
if let Some((kind, raw_value)) = trimmed.split_once(':') {
let value = raw_value.trim().trim_matches('/').replace(' ', "-");
if !value.is_empty() {
match kind.trim().to_ascii_lowercase().as_str() {
"tag" | "tags" => {
return Self::archive_target(
format!("{}/tags/{value}/", self.url),
PageMode::LocalSlice,
);
}
"category" | "categories" => {
return Self::archive_target(
format!("{}/categories/{value}/", self.url),
PageMode::SitePaged,
);
}
_ => {}
}
}
}
if let Some(category) = self.lookup_category_target(trimmed) {
return Self::archive_target(category, PageMode::SitePaged);
}
let normalized = Self::normalize_title(trimmed);
if let Some(target) = self
.tag_map
.read()
.ok()
.and_then(|value| value.get(&normalized).cloned())
{
return Self::archive_target(target, PageMode::LocalSlice);
}
Target::Search(trimmed.to_string())
}
fn catalogs_need_refresh(&self) -> bool {
let categories_len = self
.categories
.read()
.map(|value| value.len())
.unwrap_or_default();
let tag_count = self
.tag_map
.read()
.map(|value| value.len())
.unwrap_or_default();
categories_len <= 1 || tag_count == 0
}
async fn refresh_catalogs(&self) {
if let Err(error) = Self::load_categories(&self.url, Arc::clone(&self.categories)).await {
report_provider_error_background(
CHANNEL_ID,
"refresh_catalogs.categories",
&error.to_string(),
);
}
if let Err(error) = Self::load_tags(&self.url, Arc::clone(&self.tag_map)).await {
report_provider_error_background(
CHANNEL_ID,
"refresh_catalogs.tags",
&error.to_string(),
);
}
}
async fn fetch_target(
&self,
cache: VideoCache,
target: Target,
page: u16,
per_page_limit: usize,
options: ServerOptions,
) -> Result<Vec<VideoItem>> {
let page_mode = match &target {
Target::Archive { page_mode, .. } => *page_mode,
_ => PageMode::SitePaged,
};
let source_url = self.build_target_url(&target, page);
let cache_key = match page_mode {
PageMode::SitePaged => source_url.clone(),
PageMode::LocalSlice => format!("{source_url}#page={page}&per_page={per_page_limit}"),
};
let old_items = match cache.get(&cache_key) {
Some((time, items)) => {
if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 {
return Ok(items.clone());
}
items.clone()
}
None => vec![],
};
let mut requester =
requester_or_default(&options, CHANNEL_ID, "porntrex.fetch_target.missing_requester");
let html = match Self::fetch_html(&mut requester, &source_url, &source_url).await {
Ok(value) => value,
Err(error) => {
report_provider_error(
CHANNEL_ID,
"fetch_target.request",
&format!("url={source_url}; error={error}"),
)
.await;
return Ok(old_items);
}
};
if html.trim().is_empty() {
report_provider_error(
CHANNEL_ID,
"fetch_target.empty_response",
&format!("url={source_url}"),
)
.await;
return Ok(old_items);
}
let items = self.parse_list_videos(&html)?;
if items.is_empty() {
return Ok(old_items);
}
let limited_items = match page_mode {
PageMode::SitePaged => items
.into_iter()
.take(per_page_limit.max(1))
.collect::<Vec<_>>(),
PageMode::LocalSlice => {
let start = page.saturating_sub(1) as usize * per_page_limit.max(1);
items.into_iter()
.skip(start)
.take(per_page_limit.max(1))
.collect::<Vec<_>>()
}
};
if limited_items.is_empty() {
cache.insert(cache_key, vec![]);
return Ok(vec![]);
}
let enriched = stream::iter(limited_items.into_iter().map(|item| {
let provider = self.clone();
let options = options.clone();
async move { provider.enrich_item(item, &options).await }
}))
.buffer_unordered(4)
.collect::<Vec<_>>()
.await;
cache.remove(&cache_key);
cache.insert(cache_key, enriched.clone());
Ok(enriched)
}
}
#[async_trait]
impl Provider for PorntrexProvider {
async fn get_videos(
&self,
cache: VideoCache,
_pool: DbPool,
sort: String,
query: Option<String>,
page: String,
per_page: String,
options: ServerOptions,
) -> Vec<VideoItem> {
let page = page.parse::<u16>().unwrap_or(1).max(1);
let per_page_limit = per_page.parse::<usize>().unwrap_or(10).clamp(1, 60);
let target = match query {
Some(query) if !query.trim().is_empty() => {
let query = query.trim();
let mut target = self.resolve_query_target(query);
if matches!(target, Target::Search(_)) && self.catalogs_need_refresh() {
self.refresh_catalogs().await;
target = self.resolve_query_target(query);
}
target
}
_ => self.resolve_option_target(&options, &sort),
};
match self
.fetch_target(cache, target, page, per_page_limit, options.clone())
.await
{
Ok(items) => items,
Err(error) => {
report_provider_error(
CHANNEL_ID,
"get_videos.fetch_target",
&format!("sort={sort}; page={page}; error={error}"),
)
.await;
vec![]
}
}
}
fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
Some(self.build_channel(clientversion))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn builds_search_url() {
let provider = PorntrexProvider::new();
assert_eq!(
provider.build_target_url(&Target::Search("adriana chechik".to_string()), 2),
"https://www.porntrex.com/search/adriana-chechik/2/"
);
}
#[test]
fn resolves_tag_prefix() {
let provider = PorntrexProvider::new();
match provider.resolve_query_target("tag:blowjob") {
Target::Archive { url, page_mode } => {
assert_eq!(url, "https://www.porntrex.com/tags/blowjob/");
assert_eq!(page_mode, PageMode::LocalSlice);
}
_ => panic!("expected archive target"),
}
}
#[test]
fn builds_local_slice_archive_url_without_numeric_page() {
let provider = PorntrexProvider::new();
let target = PorntrexProvider::archive_target(
"https://www.porntrex.com/tags/anal-creampie/".to_string(),
PageMode::LocalSlice,
);
assert_eq!(
provider.build_target_url(&target, 3),
"https://www.porntrex.com/tags/anal-creampie/"
);
}
}

View File

@@ -8,8 +8,6 @@ use crate::videos::{ServerOptions, VideoItem};
use async_trait::async_trait;
use error_chain::error_chain;
use htmlentity::entity::{ICodedDataTrait, decode};
// use std::sync::{Arc, RwLock};
// use std::thread;
use std::vec;
error_chain! {
@@ -19,6 +17,12 @@ error_chain! {
}
}
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "mainstream-tube",
tags: &["tube", "mainstream"],
};
#[derive(Debug, Clone)]
pub struct PornxpProvider {
url: String,
@@ -26,7 +30,7 @@ pub struct PornxpProvider {
impl PornxpProvider {
pub fn new() -> Self {
let provider = PornxpProvider {
url: "https://pornxp.me".to_string(),
url: "https://pornxp.ph".to_string(),
};
provider
}
@@ -38,7 +42,7 @@ impl PornxpProvider {
name: "PornXP".to_string(),
description: "For Those Who Know The Difference".to_string(),
premium: false,
favicon: "https://www.google.com/s2/favicons?sz=64&domain=pornxp.me".to_string(),
favicon: "https://www.google.com/s2/favicons?sz=64&domain=pornxp.ph".to_string(),
status: "active".to_string(),
categories: vec![],
options: vec![
@@ -185,12 +189,12 @@ impl PornxpProvider {
let id = video_url.split("/").collect::<Vec<&str>>()[4].to_string();
let thumb = match video_segment.contains("<img class=\"item_img lazy\""){
true => format!("https:{}", video_segment.split("<img ").collect::<Vec<&str>>()[1]
true => format!("{}{}", self.url,video_segment.split("<img ").collect::<Vec<&str>>()[1]
.split("data-src=\"").collect::<Vec<&str>>()[1]
.split("\"")
.collect::<Vec<&str>>()[0]
.to_string()),
false => format!("https:{}", video_segment.split("<img ").collect::<Vec<&str>>()[1]
false => format!("{}{}", self.url, video_segment.split("<img ").collect::<Vec<&str>>()[1]
.split("src=\"").collect::<Vec<&str>>()[1]
.split("\"")
.collect::<Vec<&str>>()[0]

View File

@@ -144,11 +144,99 @@ impl RedtubeProvider {
Ok(video_items)
}
fn extract_between<'a>(&self, text: &'a str, start: &str, end: &str) -> Option<&'a str> {
let start_idx = text.find(start)?;
let from = start_idx + start.len();
let rest = &text[from..];
let end_idx = rest.find(end)?;
Some(&rest[..end_idx])
}
fn parse_video_grid_items(&self, html: &str) -> Vec<VideoItem> {
if !html.contains("videos_grid") {
return vec![];
}
let listing = html
.split("videos_grid")
.nth(1)
.unwrap_or_default()
.split("</ul>")
.next()
.unwrap_or_default();
let mut items: Vec<VideoItem> = Vec::new();
for li in listing.split("<li id=\"").skip(1) {
let id = self
.extract_between(li, "data-video-id=\"", "\"")
.unwrap_or_default()
.trim()
.to_string();
if id.is_empty() {
continue;
}
let title = li
.split("video-title-wrapper")
.nth(1)
.and_then(|part| self.extract_between(part, "title=\"", "\""))
.or_else(|| {
li.split("class=\"video-title-text")
.nth(1)
.and_then(|part| self.extract_between(part, "title=\"", "\""))
})
.or_else(|| self.extract_between(li, "<a title=\"", "\""))
.unwrap_or_default()
.trim()
.to_string();
let title = decode(title.as_bytes()).to_string().unwrap_or(title);
let thumb = self
.extract_between(li, "data-src=\"", "\"")
.or_else(|| self.extract_between(li, "data-o_thumb=\"", "\""))
.unwrap_or_default()
.replace("&amp;", "&");
let raw_duration = self
.extract_between(li, "<span class=\"video-properties tm_video_duration\">", "</span>")
.unwrap_or_default()
.trim()
.to_string();
let duration = parse_time_to_seconds(&raw_duration).unwrap_or(0) as u32;
let views_str = self
.extract_between(li, "<span class='info-views'>", "</span>")
.unwrap_or_default()
.trim()
.to_string();
let views = parse_abbreviated_number(&views_str).unwrap_or(0) as u32;
let preview = self
.extract_between(li, "data-mediabook=\"", "\"")
.unwrap_or_default()
.replace("&amp;", "&");
let video_url = format!("{}/{}", self.url, id);
let video_item =
VideoItem::new(id, title, video_url, "redtube".to_string(), thumb, duration)
.views(views)
.preview(preview);
items.push(video_item);
}
items
}
fn get_video_items_from_html(&self, html: String) -> Vec<VideoItem> {
if html.is_empty() {
println!("HTML is empty");
return vec![];
}
let card_items = self.parse_video_grid_items(&html);
if !card_items.is_empty() {
return card_items;
}
let mut items: Vec<VideoItem> = Vec::new();
let video_listing_content = html
.split("<script type=\"application/ld+json\">")
@@ -220,117 +308,7 @@ impl RedtubeProvider {
println!("HTML is empty");
return vec![];
}
let mut items: Vec<VideoItem> = Vec::new();
let video_listing_content = html
.split("videos_grid")
.collect::<Vec<&str>>()
.get(1)
.copied()
.unwrap_or_default();
let videos = video_listing_content
.split("<li id=\"tags_videos_")
.collect::<Vec<&str>>()[1..]
.to_vec();
for vid in videos {
// for (i, c) in vid.split("\n").enumerate() {
// println!("{}: {}", i, c);
// }
let id = vid
.split("data-video-id=\"")
.collect::<Vec<&str>>()
.get(1)
.copied()
.unwrap_or_default()
.split("\"")
.collect::<Vec<&str>>()
.get(0)
.copied()
.unwrap_or_default()
.to_string();
let video_url = format!("{}/{}", self.url, id);
let title = vid
.split(" <a title=\"")
.collect::<Vec<&str>>()
.get(1)
.copied()
.unwrap_or_default()
.split("\"")
.collect::<Vec<&str>>()
.get(0)
.copied()
.unwrap_or_default()
.trim()
.to_string();
let thumb = vid
.split("<img")
.collect::<Vec<&str>>()
.get(1)
.copied()
.unwrap_or_default()
.split(" data-src=\"")
.collect::<Vec<&str>>()
.get(1)
.copied()
.unwrap_or_default()
.split("\"")
.collect::<Vec<&str>>()
.get(0)
.copied()
.unwrap_or_default()
.to_string();
let raw_duration = vid
.split("<span class=\"video-properties tm_video_duration\">")
.collect::<Vec<&str>>()
.get(1)
.copied()
.unwrap_or_default()
.split("</span>")
.collect::<Vec<&str>>()
.get(0)
.copied()
.unwrap_or_default()
.trim()
.to_string();
let duration = parse_time_to_seconds(&raw_duration).unwrap_or(0) as u32;
let views_str = vid
.split("<span class='info-views'>")
.collect::<Vec<&str>>()
.get(1)
.copied()
.unwrap_or_default()
.split("</span>")
.collect::<Vec<&str>>()
.get(0)
.copied()
.unwrap_or_default()
.trim()
.to_string();
let views = parse_abbreviated_number(&views_str).unwrap_or(0) as u32;
let preview = vid
.split("<img")
.collect::<Vec<&str>>()
.get(1)
.copied()
.unwrap_or_default()
.split(" data-mediabook=\"")
.collect::<Vec<&str>>()
.get(1)
.copied()
.unwrap_or_default()
.split("\"")
.collect::<Vec<&str>>()
.get(0)
.copied()
.unwrap_or_default()
.to_string();
let video_item =
VideoItem::new(id, title, video_url, "redtube".to_string(), thumb, duration)
.views(views)
.preview(preview);
items.push(video_item);
}
return items;
self.parse_video_grid_items(&html)
}
}
@@ -376,3 +354,63 @@ impl Provider for RedtubeProvider {
Some(self.build_channel(clientversion))
}
}
#[cfg(test)]
mod tests {
use super::RedtubeProvider;
#[test]
fn parse_video_grid_items_handles_browse_cards() {
let provider = RedtubeProvider::new();
let html = r#"
<ul id="block_browse" class="videos_grid">
<li id="browse_195840661" data-video-id="195840661">
<a data-testid="plw_video_thumbnail_link" href="/195840661" data-video-id="195840661">
<img data-src="https://cdn.example/thumb.jpg" data-mediabook="https://cdn.example/preview.mp4?x=1&amp;y=2">
</a>
<a class="video-title-text js-pop tm_video_title " title="Stepmoms &amp; More"></a>
<span class="video-properties tm_video_duration">2:17:57</span>
<span class='info-views'>981K</span>
</li>
</ul>
"#;
let items = provider.parse_video_grid_items(html);
assert_eq!(items.len(), 1);
assert_eq!(items[0].id, "195840661");
assert_eq!(items[0].title, "Stepmoms & More");
assert_eq!(items[0].url, "https://www.redtube.com/195840661");
assert_eq!(items[0].thumb, "https://cdn.example/thumb.jpg");
assert_eq!(
items[0].preview.as_deref(),
Some("https://cdn.example/preview.mp4?x=1&y=2")
);
assert_eq!(items[0].duration, 8277);
assert_eq!(items[0].views, Some(981000));
}
#[test]
fn parse_video_grid_items_handles_tags_cards() {
let provider = RedtubeProvider::new();
let html = r#"
<div><ul class="videos_grid">
<li id="tags_videos_42785231" data-video-id="42785231">
<a data-testid="plw_video_thumbnail_link" href="/42785231" data-video-id="42785231">
<img data-o_thumb="https://cdn.example/thumb2.jpg" data-mediabook="https://cdn.example/p2.mp4">
</a>
<a class="video-title-text js-pop tm_video_title " title="Title 2"></a>
<span class="video-properties tm_video_duration">13:06</span>
<span class='info-views'>51.2K</span>
</li>
</ul></div>
"#;
let items = provider.parse_video_grid_items(html);
assert_eq!(items.len(), 1);
assert_eq!(items[0].id, "42785231");
assert_eq!(items[0].url, "https://www.redtube.com/42785231");
assert_eq!(items[0].thumb, "https://cdn.example/thumb2.jpg");
assert_eq!(items[0].duration, 786);
assert_eq!(items[0].views, Some(51200));
}
}

View File

@@ -172,8 +172,8 @@ impl SextbProvider {
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
let genres = self.genres.read().map(|value| value.clone()).unwrap_or_default();
let studios = self.studios.read().map(|value| value.clone()).unwrap_or_default();
let actresses = self
let _studios = self.studios.read().map(|value| value.clone()).unwrap_or_default();
let _actresses = self
.actresses
.read()
.map(|value| value.clone())
@@ -220,59 +220,6 @@ impl SextbProvider {
],
multiSelect: false,
},
ChannelOption {
id: "filter".to_string(),
title: "Section".to_string(),
description: "Browse one of the main site sections.".to_string(),
systemImage: "square.grid.2x2".to_string(),
colorName: "green".to_string(),
options: vec![
FilterOption {
id: "censored".to_string(),
title: "Censored".to_string(),
},
FilterOption {
id: "uncensored".to_string(),
title: "Uncensored".to_string(),
},
FilterOption {
id: "subtitle".to_string(),
title: "Subtitle".to_string(),
},
FilterOption {
id: "amateur".to_string(),
title: "Amateur".to_string(),
},
],
multiSelect: false,
},
ChannelOption {
id: "categories".to_string(),
title: "Genres".to_string(),
description: "Browse direct genre archive pages.".to_string(),
systemImage: "tag.fill".to_string(),
colorName: "orange".to_string(),
options: genres,
multiSelect: false,
},
ChannelOption {
id: "sites".to_string(),
title: "Studios".to_string(),
description: "Browse direct studio archive pages.".to_string(),
systemImage: "building.2.fill".to_string(),
colorName: "purple".to_string(),
options: studios,
multiSelect: false,
},
ChannelOption {
id: "stars".to_string(),
title: "Actresses".to_string(),
description: "Browse direct actress archive pages.".to_string(),
systemImage: "person.crop.square".to_string(),
colorName: "pink".to_string(),
options: actresses,
multiSelect: false,
},
],
nsfw: true,
cacheDuration: Some(1800),

View File

@@ -13,7 +13,6 @@ use crate::videos::{ServerOptions, VideoFormat, VideoItem};
use async_trait::async_trait;
use chrono::NaiveDate;
use error_chain::error_chain;
use futures::stream::{self, StreamExt};
use htmlentity::entity::{ICodedDataTrait, decode};
use regex::Regex;
use scraper::{ElementRef, Html, Selector};
@@ -644,7 +643,7 @@ impl ShooshtimeProvider {
target.push_str(&quality.replace(' ', "%20"));
}
build_proxy_url(options, "shooshtime", &target)
build_proxy_url(options, "shooshtime-media", &target)
}
fn search_sort_param(sort: &str) -> Option<&'static str> {
@@ -1157,7 +1156,11 @@ impl ShooshtimeProvider {
}
let proxied_url = self.proxied_video(options, page_url, None);
if !proxied_url.is_empty() {
item.url = proxied_url;
item.url = page_url.to_string();
formats.push(
VideoFormat::new(proxied_url, "Best".to_string(), "mp4".to_string())
.format_id("best".to_string()),
);
}
if !formats.is_empty() {
item = item.formats(formats);
@@ -1194,40 +1197,6 @@ impl ShooshtimeProvider {
Ok(item)
}
async fn enrich_video(&self, item: VideoItem, options: &ServerOptions) -> VideoItem {
let page_url = item.url.clone();
let original_item = item.clone();
let mut requester = match options.requester.clone() {
Some(requester) => requester,
None => Requester::new(),
};
let html = match requester.get(&page_url, None).await {
Ok(html) => html,
Err(error) => {
report_provider_error_background(
"shooshtime",
"enrich_video.request",
&format!("url={}; error={error}", page_url),
);
return item;
}
};
match self.apply_detail_video(item, &html, &page_url, options) {
Ok(item) => item,
Err(error) => {
report_provider_error_background(
"shooshtime",
"enrich_video.parse",
&format!("url={}; error={error}", page_url),
);
original_item
}
}
}
async fn fetch_items_for_url(
&self,
cache: VideoCache,
@@ -1256,23 +1225,12 @@ impl ShooshtimeProvider {
}
};
let list_videos = self.parse_list_videos(&html)?;
if list_videos.is_empty() {
let items = self.parse_list_videos(&html)?;
if items.is_empty() {
return Ok(vec![]);
}
let items = stream::iter(list_videos.into_iter().map(|video| {
let provider = self.clone();
let options = options.clone();
async move { provider.enrich_video(video, &options).await }
}))
.buffer_unordered(6)
.collect::<Vec<_>>()
.await;
if !items.is_empty() {
cache.insert(url, items.clone());
}
cache.insert(url, items.clone());
Ok(items)
}
@@ -1422,7 +1380,7 @@ mod tests {
assert_eq!(
provider.proxied_video(&options, "https://shooshtime.com/videos/example/123/", None,),
"https://example.com/proxy/shooshtime/shooshtime.com/videos/example/123/"
"https://example.com/proxy/shooshtime-media/shooshtime.com/videos/example/123/"
);
assert_eq!(
provider.proxied_video(
@@ -1430,7 +1388,7 @@ mod tests {
"https://shooshtime.com/videos/example/123/",
Some("720p"),
),
"https://example.com/proxy/shooshtime/shooshtime.com/videos/example/123/__quality__/720p"
"https://example.com/proxy/shooshtime-media/shooshtime.com/videos/example/123/__quality__/720p"
);
}
}

View File

@@ -10,8 +10,8 @@ use async_trait::async_trait;
use error_chain::error_chain;
use htmlentity::entity::{ICodedDataTrait, decode};
use scraper::{ElementRef, Html, Selector};
use serde_json::Value;
use std::process::Command;
use std::time::Duration;
use url::form_urlencoded::byte_serialize;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
@@ -143,7 +143,38 @@ impl SpankbangProvider {
}
fn request_headers(&self) -> Vec<(String, String)> {
vec![("Referer".to_string(), format!("{}/", self.url))]
vec![
(
"accept".to_string(),
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8"
.to_string(),
),
("accept-language".to_string(), "en-US,en;q=0.6".to_string()),
("cache-control".to_string(), "no-cache".to_string()),
("pragma".to_string(), "no-cache".to_string()),
("priority".to_string(), "u=0, i".to_string()),
(
"sec-ch-ua".to_string(),
r#""Chromium";v="146", "Not-A.Brand";v="24", "Brave";v="146""#.to_string(),
),
("sec-ch-ua-mobile".to_string(), "?0".to_string()),
("sec-ch-ua-platform".to_string(), "\"Linux\"".to_string()),
("sec-fetch-dest".to_string(), "document".to_string()),
("sec-fetch-mode".to_string(), "navigate".to_string()),
("sec-fetch-site".to_string(), "none".to_string()),
("sec-fetch-user".to_string(), "?1".to_string()),
("sec-gpc".to_string(), "1".to_string()),
(
"upgrade-insecure-requests".to_string(),
"1".to_string(),
),
(
"user-agent".to_string(),
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36"
.to_string(),
),
("Referer".to_string(), format!("{}/", self.url)),
]
}
fn is_cloudflare_block(text: &str) -> bool {
@@ -153,168 +184,222 @@ impl SpankbangProvider {
|| lowercase.contains("cloudflare ray id")
}
fn fallback_items_from_ytdlp(&self, page_url: &str, limit: usize) -> Vec<VideoItem> {
let output = match Command::new("yt-dlp")
.arg("-J")
.arg("--flat-playlist")
.arg("--extractor-args")
.arg("generic:impersonate=chrome")
fn fetch_items_with_curl_cffi(&self, page_url: &str, proxy_base_url: &str) -> Vec<VideoItem> {
crate::flow_debug!(
"trace={} spankbang curl_cffi fetch start url={}",
"none",
crate::util::flow_debug::preview(page_url, 120)
);
let output = match Command::new("python3")
.arg("-c")
.arg(
r#"from curl_cffi import requests
from bs4 import BeautifulSoup
import json
import sys
url = sys.argv[1]
r = requests.get(url, impersonate='chrome124', timeout=45, headers={'Referer': 'https://spankbang.com/'})
if r.status_code >= 400:
raise SystemExit(2)
soup = BeautifulSoup(r.text, 'html.parser')
cards = soup.select('[data-testid="video-list"] [data-testid="video-item"]')
if not cards:
cards = soup.select('[data-testid="video-item"]')
items = []
for card in cards:
vid = (card.get('data-id') or '').strip()
link = card.select_one('a[href*="/video/"]')
if not vid or link is None:
continue
href = (link.get('href') or '').strip()
if not href:
continue
img = card.select_one('picture img, img')
title_anchor = card.select_one('p a[title], a[title]')
duration = card.select_one('[data-testid="video-item-length"]')
views = card.select_one('[data-testid="views"]')
uploader = card.select_one('[data-testid="video-info-with-badge"] a[data-testid="title"]')
preview = card.select_one('video source[data-src]')
items.append({
'id': vid,
'href': href,
'title': (title_anchor.get('title') if title_anchor else '') or (img.get('alt') if img else ''),
'thumb': ((img.get('src') if img else '') or (img.get('data-src') if img else '') or '').strip(),
'preview': (preview.get('data-src') if preview else '') or '',
'duration': duration.get_text(' ', strip=True) if duration else '',
'views': views.get_text(' ', strip=True) if views else '',
'uploader': uploader.get_text(' ', strip=True) if uploader else '',
'uploader_href': (uploader.get('href') if uploader else '') or '',
})
sys.stdout.write(json.dumps(items))
"#,
)
.arg(page_url)
.output()
{
Ok(output) if output.status.success() => output,
_ => return vec![],
Ok(output) => {
let stderr = String::from_utf8_lossy(&output.stderr);
crate::providers::report_provider_error_background(
"spankbang",
"curl_cffi.fetch.status",
&format!(
"url={page_url}; status={}; stderr={}",
output.status,
crate::util::flow_debug::preview(&stderr, 300)
),
);
return vec![];
}
Err(e) => {
crate::providers::report_provider_error_background(
"spankbang",
"curl_cffi.fetch.spawn",
&format!("url={page_url}; error={e}"),
);
return vec![];
}
};
let payload: serde_json::Value = match serde_json::from_slice(&output.stdout) {
Ok(payload) => payload,
Err(_) => return vec![],
};
let payload = String::from_utf8(output.stdout).ok();
if payload.as_deref().unwrap_or("").trim().is_empty() {
crate::providers::report_provider_error_background(
"spankbang",
"curl_cffi.fetch.empty",
&format!("url={page_url}"),
);
return vec![];
}
crate::flow_debug!(
"trace={} spankbang curl_cffi fetch ok url={} bytes={}",
"none",
crate::util::flow_debug::preview(page_url, 120),
payload.as_deref().unwrap_or("").len()
);
let entries = match payload.get("entries").and_then(|value| value.as_array()) {
Some(entries) => entries,
None => return vec![],
let items_json: Value = match serde_json::from_str(payload.as_deref().unwrap_or("")) {
Ok(value) => value,
Err(e) => {
crate::providers::report_provider_error_background(
"spankbang",
"curl_cffi.parse.json",
&format!("url={page_url}; error={e}"),
);
return vec![];
}
};
let Some(entries) = items_json.as_array() else {
return vec![];
};
let mut items = Vec::new();
for (index, entry) in entries.iter().take(limit).enumerate() {
let Some(url) = entry.get("url").and_then(|value| value.as_str()) else {
continue;
};
if !(url.starts_with("https://") || url.starts_with("http://")) {
continue;
}
for entry in entries {
let id = entry
.get("id")
.and_then(|value| value.as_str())
.filter(|value| !value.is_empty())
.map(ToOwned::to_owned)
.unwrap_or_else(|| format!("spankbang-fallback-{}", index + 1));
.unwrap_or("")
.trim()
.to_string();
let href = entry
.get("href")
.and_then(|value| value.as_str())
.unwrap_or("")
.trim()
.to_string();
if id.is_empty() || href.is_empty() {
continue;
}
let detail_url = self.normalize_url(&href);
let title = entry
.get("title")
.and_then(|value| value.as_str())
.filter(|value| !value.is_empty())
.map(Self::decode_html)
.unwrap_or_else(|| format!("SpankBang Video {}", index + 1));
.unwrap_or_default();
if title.is_empty() {
continue;
}
let thumb = entry
.get("thumbnail")
.get("thumb")
.and_then(|value| value.as_str())
.unwrap_or("")
.to_string();
.map(|value| self.normalize_url(value))
.unwrap_or_default();
let preview = entry
.get("preview")
.and_then(|value| value.as_str())
.map(|value| self.normalize_url(value))
.unwrap_or_default();
let duration = entry
.get("duration")
.and_then(|value| value.as_u64())
.and_then(|value| u32::try_from(value).ok())
.and_then(|value| value.as_str())
.map(Self::parse_duration)
.unwrap_or(0);
let format_kind = if url.contains(".m3u8") {
"m3u8"
} else {
"video/mp4"
};
let mut format = VideoFormat::new(url.to_string(), "auto".to_string(), format_kind.to_string());
if let Some(headers) = entry.get("http_headers").and_then(|value| value.as_object()) {
for (key, value) in headers {
if let Some(value) = value.as_str() {
format.add_http_header(key.to_string(), value.to_string());
}
}
}
if entry
.get("http_headers")
.and_then(|value| value.as_object())
.is_none()
{
format.add_http_header("Referer".to_string(), format!("{}/", self.url));
}
let views = entry
.get("views")
.and_then(|value| value.as_str())
.and_then(parse_abbreviated_number);
let mut item = VideoItem::new(
id,
title,
url.to_string(),
self.proxy_url(proxy_base_url, &href),
"spankbang".to_string(),
thumb,
duration,
)
.formats(vec![format]);
if let Some(views) = entry
.get("view_count")
.and_then(|value| value.as_u64())
.and_then(|value| u32::try_from(value).ok())
{
);
if let Some(views) = views {
item = item.views(views);
}
if let Some(uploader) = entry
if !preview.is_empty() {
let mut format = VideoFormat::new(
preview.clone(),
"preview".to_string(),
"video/mp4".to_string(),
);
format.add_http_header("Referer".to_string(), detail_url.clone());
item = item.preview(preview).formats(vec![format]);
}
let uploader = entry
.get("uploader")
.and_then(|value| value.as_str())
.filter(|value| !value.is_empty())
{
item = item.uploader(uploader.to_string());
.map(Self::decode_html)
.unwrap_or_default();
if !uploader.is_empty() {
item = item.uploader(uploader);
}
let uploader_href = entry
.get("uploader_href")
.and_then(|value| value.as_str())
.unwrap_or("")
.trim();
if !uploader_href.is_empty() {
let uploader_url = self.normalize_url(uploader_href);
if !uploader_url.is_empty() {
item = item.uploader_url(uploader_url);
}
}
items.push(item);
}
items
}
async fn fallback_items_with_working_media(
&self,
page_url: &str,
options: &ServerOptions,
) -> Vec<VideoItem> {
let fallback_items = self.fallback_items_from_ytdlp(page_url, 72);
if fallback_items.is_empty() {
if items.is_empty() {
crate::providers::report_provider_error_background(
"spankbang",
"curl_cffi.parse.empty",
&format!("url={page_url}"),
);
return vec![];
}
let mut requester = requester_or_default(
options,
"spankbang",
"spankbang.fallback_items_with_working_media.missing_requester",
crate::flow_debug!(
"trace={} spankbang curl_cffi parsed url={} items={}",
"none",
crate::util::flow_debug::preview(page_url, 120),
items.len()
);
let mut working_items = Vec::new();
for item in fallback_items {
let format_headers = item
.formats
.as_ref()
.and_then(|formats| formats.first())
.map(|format| format.http_headers_pairs())
.unwrap_or_default();
let media_url = item
.formats
.as_ref()
.and_then(|formats| formats.first())
.map(|format| format.url.clone())
.unwrap_or_else(|| item.url.clone());
if media_url.is_empty() {
continue;
}
let mut headers = format_headers;
if !headers
.iter()
.any(|(key, _)| key.eq_ignore_ascii_case("range"))
{
headers.push(("Range".to_string(), "bytes=0-2047".to_string()));
}
let is_working = match requester
.get_raw_with_headers_timeout(&media_url, headers, Some(Duration::from_secs(20)))
.await
{
Ok(response) => response.status().is_success(),
Err(_) => false,
};
if is_working {
working_items.push(item);
}
}
working_items
items
}
fn build_query_url(&self, query: &str, page: u32, sort: &str) -> String {
@@ -512,8 +597,11 @@ impl SpankbangProvider {
item = item.rating(rating);
}
if let Some(preview) = preview {
let mut format =
VideoFormat::new(preview.clone(), "preview".to_string(), "video/mp4".to_string());
let mut format = VideoFormat::new(
preview.clone(),
"preview".to_string(),
"video/mp4".to_string(),
);
format.add_http_header("Referer".to_string(), detail_url.clone());
item = item.preview(preview).formats(vec![format]);
}
@@ -594,7 +682,6 @@ impl SpankbangProvider {
}
None => vec![],
};
let mut requester =
requester_or_default(&options, "spankbang", "spankbang.get.missing_requester");
let text = match requester
@@ -609,13 +696,12 @@ impl SpankbangProvider {
&format!("url={video_url}; error={e}"),
)
.await;
let fallback_items = self
.fallback_items_with_working_media(&video_url, &options)
.await;
if !fallback_items.is_empty() {
let proxy_base_url = options.public_url_base.as_deref().unwrap_or_default();
let curl_cffi_items = self.fetch_items_with_curl_cffi(&video_url, proxy_base_url);
if !curl_cffi_items.is_empty() {
cache.remove(&video_url);
cache.insert(video_url.clone(), fallback_items.clone());
return Ok(fallback_items);
cache.insert(video_url.clone(), curl_cffi_items.clone());
return Ok(curl_cffi_items);
}
return Ok(old_items);
}
@@ -628,13 +714,12 @@ impl SpankbangProvider {
&format!("url={video_url}"),
)
.await;
let fallback_items = self
.fallback_items_with_working_media(&video_url, &options)
.await;
if !fallback_items.is_empty() {
let proxy_base_url = options.public_url_base.as_deref().unwrap_or_default();
let curl_cffi_items = self.fetch_items_with_curl_cffi(&video_url, proxy_base_url);
if !curl_cffi_items.is_empty() {
cache.remove(&video_url);
cache.insert(video_url.clone(), fallback_items.clone());
return Ok(fallback_items);
cache.insert(video_url.clone(), curl_cffi_items.clone());
return Ok(curl_cffi_items);
}
return Ok(old_items);
}
@@ -646,17 +731,17 @@ impl SpankbangProvider {
&format!("url={video_url}"),
)
.await;
let fallback_items = self
.fallback_items_with_working_media(&video_url, &options)
.await;
if !fallback_items.is_empty() {
let proxy_base_url = options.public_url_base.as_deref().unwrap_or_default();
let curl_cffi_items = self.fetch_items_with_curl_cffi(&video_url, proxy_base_url);
if !curl_cffi_items.is_empty() {
cache.remove(&video_url);
cache.insert(video_url.clone(), fallback_items.clone());
return Ok(fallback_items);
cache.insert(video_url.clone(), curl_cffi_items.clone());
return Ok(curl_cffi_items);
}
return Ok(old_items);
}
let looks_like_html = text.to_ascii_lowercase().contains("<html");
let proxy_base_url = options.public_url_base.as_deref().unwrap_or_default();
let video_items = self.get_video_items_from_html(text, proxy_base_url);
if !video_items.is_empty() {
@@ -665,6 +750,18 @@ impl SpankbangProvider {
return Ok(video_items);
}
report_provider_error(
"spankbang",
"get.parse_empty",
&format!("url={video_url}; looks_like_html={looks_like_html}"),
)
.await;
let curl_cffi_items = self.fetch_items_with_curl_cffi(&video_url, proxy_base_url);
if !curl_cffi_items.is_empty() {
cache.remove(&video_url);
cache.insert(video_url.clone(), curl_cffi_items.clone());
return Ok(curl_cffi_items);
}
Ok(old_items)
}
@@ -686,7 +783,6 @@ impl SpankbangProvider {
}
None => vec![],
};
let mut requester =
requester_or_default(&options, "spankbang", "spankbang.query.missing_requester");
let text = match requester
@@ -701,13 +797,12 @@ impl SpankbangProvider {
&format!("url={video_url}; error={e}"),
)
.await;
let fallback_items = self
.fallback_items_with_working_media(&video_url, &options)
.await;
if !fallback_items.is_empty() {
let proxy_base_url = options.public_url_base.as_deref().unwrap_or_default();
let curl_cffi_items = self.fetch_items_with_curl_cffi(&video_url, proxy_base_url);
if !curl_cffi_items.is_empty() {
cache.remove(&video_url);
cache.insert(video_url.clone(), fallback_items.clone());
return Ok(fallback_items);
cache.insert(video_url.clone(), curl_cffi_items.clone());
return Ok(curl_cffi_items);
}
return Ok(old_items);
}
@@ -720,13 +815,12 @@ impl SpankbangProvider {
&format!("url={video_url}"),
)
.await;
let fallback_items = self
.fallback_items_with_working_media(&video_url, &options)
.await;
if !fallback_items.is_empty() {
let proxy_base_url = options.public_url_base.as_deref().unwrap_or_default();
let curl_cffi_items = self.fetch_items_with_curl_cffi(&video_url, proxy_base_url);
if !curl_cffi_items.is_empty() {
cache.remove(&video_url);
cache.insert(video_url.clone(), fallback_items.clone());
return Ok(fallback_items);
cache.insert(video_url.clone(), curl_cffi_items.clone());
return Ok(curl_cffi_items);
}
return Ok(old_items);
}
@@ -738,17 +832,17 @@ impl SpankbangProvider {
&format!("url={video_url}"),
)
.await;
let fallback_items = self
.fallback_items_with_working_media(&video_url, &options)
.await;
if !fallback_items.is_empty() {
let proxy_base_url = options.public_url_base.as_deref().unwrap_or_default();
let curl_cffi_items = self.fetch_items_with_curl_cffi(&video_url, proxy_base_url);
if !curl_cffi_items.is_empty() {
cache.remove(&video_url);
cache.insert(video_url.clone(), fallback_items.clone());
return Ok(fallback_items);
cache.insert(video_url.clone(), curl_cffi_items.clone());
return Ok(curl_cffi_items);
}
return Ok(old_items);
}
let looks_like_html = text.to_ascii_lowercase().contains("<html");
let proxy_base_url = options.public_url_base.as_deref().unwrap_or_default();
let video_items = self.get_video_items_from_html(text, proxy_base_url);
if !video_items.is_empty() {
@@ -757,6 +851,18 @@ impl SpankbangProvider {
return Ok(video_items);
}
report_provider_error(
"spankbang",
"query.parse_empty",
&format!("url={video_url}; looks_like_html={looks_like_html}"),
)
.await;
let curl_cffi_items = self.fetch_items_with_curl_cffi(&video_url, proxy_base_url);
if !curl_cffi_items.is_empty() {
cache.remove(&video_url);
cache.insert(video_url.clone(), curl_cffi_items.clone());
return Ok(curl_cffi_items);
}
Ok(old_items)
}
}
@@ -857,7 +963,38 @@ mod tests {
);
assert_eq!(
provider.request_headers(),
vec![("Referer".to_string(), "https://spankbang.com/".to_string())]
vec![
(
"accept".to_string(),
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8"
.to_string(),
),
("accept-language".to_string(), "en-US,en;q=0.6".to_string()),
("cache-control".to_string(), "no-cache".to_string()),
("pragma".to_string(), "no-cache".to_string()),
("priority".to_string(), "u=0, i".to_string()),
(
"sec-ch-ua".to_string(),
r#""Chromium";v="146", "Not-A.Brand";v="24", "Brave";v="146""#.to_string(),
),
("sec-ch-ua-mobile".to_string(), "?0".to_string()),
("sec-ch-ua-platform".to_string(), "\"Linux\"".to_string()),
("sec-fetch-dest".to_string(), "document".to_string()),
("sec-fetch-mode".to_string(), "navigate".to_string()),
("sec-fetch-site".to_string(), "none".to_string()),
("sec-fetch-user".to_string(), "?1".to_string()),
("sec-gpc".to_string(), "1".to_string()),
(
"upgrade-insecure-requests".to_string(),
"1".to_string(),
),
(
"user-agent".to_string(),
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36"
.to_string(),
),
("Referer".to_string(), "https://spankbang.com/".to_string()),
]
);
}

View File

@@ -1,6 +1,9 @@
use crate::DbPool;
use crate::api::ClientVersion;
use crate::providers::{Provider, report_provider_error, report_provider_error_background};
use crate::providers::{
Provider, build_proxy_url, report_provider_error, report_provider_error_background,
strip_url_scheme,
};
use crate::status::*;
use crate::util::cache::VideoCache;
use crate::util::parse_abbreviated_number;
@@ -13,7 +16,7 @@ use htmlentity::entity::{ICodedDataTrait, decode};
use regex::Regex;
use scraper::{ElementRef, Html, Selector};
use serde::Deserialize;
use std::collections::HashSet;
use std::collections::{HashMap, HashSet};
use std::process::Command;
use std::sync::{Arc, RwLock};
use std::thread;
@@ -181,14 +184,6 @@ impl SupjavProvider {
}
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
let tags = self.tags.read().map(|value| value.clone()).unwrap_or_default();
let uploaders = self
.uploaders
.read()
.map(|value| value.clone())
.unwrap_or_default();
let stars = self.stars.read().map(|value| value.clone()).unwrap_or_default();
Channel {
id: CHANNEL_ID.to_string(),
name: "SupJav".to_string(),
@@ -231,43 +226,6 @@ impl SupjavProvider {
],
multiSelect: false,
},
ChannelOption {
id: "filter".to_string(),
title: "Category".to_string(),
description: "Browse a SupJav category archive directly.".to_string(),
systemImage: "line.horizontal.3.decrease.circle".to_string(),
colorName: "green".to_string(),
options: self.categories.clone(),
multiSelect: false,
},
ChannelOption {
id: "sites".to_string(),
title: "Makers".to_string(),
description: "Browse maker archives and reuse them as uploader targets."
.to_string(),
systemImage: "building.2".to_string(),
colorName: "purple".to_string(),
options: uploaders,
multiSelect: false,
},
ChannelOption {
id: "stars".to_string(),
title: "Cast".to_string(),
description: "Browse cast archives directly.".to_string(),
systemImage: "person.2".to_string(),
colorName: "pink".to_string(),
options: stars,
multiSelect: false,
},
ChannelOption {
id: "categories".to_string(),
title: "Genres".to_string(),
description: "Browse a genre/tag archive directly.".to_string(),
systemImage: "tag.fill".to_string(),
colorName: "orange".to_string(),
options: tags,
multiSelect: false,
},
],
nsfw: true,
cacheDuration: Some(1800),
@@ -454,12 +412,54 @@ impl SupjavProvider {
Ok(url.to_string())
}
fn match_filter(options: &[FilterOption], query: &str) -> Option<String> {
let normalized_query = Self::normalize_title(query);
options
.iter()
.find(|value| Self::normalize_title(&value.title) == normalized_query)
.map(|value| value.id.clone())
fn extend_filter_lookup(lookup: &mut HashMap<String, String>, options: &[FilterOption]) {
for option in options {
for key in Self::filter_lookup_keys(option) {
lookup.entry(key).or_insert_with(|| option.id.clone());
}
}
}
fn resolve_filter_lookup(&self) -> HashMap<String, String> {
let mut lookup = HashMap::new();
if let Ok(uploaders) = self.uploaders.read() {
Self::extend_filter_lookup(&mut lookup, &uploaders);
}
if let Ok(stars) = self.stars.read() {
Self::extend_filter_lookup(&mut lookup, &stars);
}
Self::extend_filter_lookup(&mut lookup, &self.categories);
if let Ok(tags) = self.tags.read() {
Self::extend_filter_lookup(&mut lookup, &tags);
}
lookup
}
fn filter_lookup_keys(option: &FilterOption) -> Vec<String> {
let mut keys = vec![Self::normalize_title(&option.title)];
let slug = option
.id
.trim_end_matches('/')
.rsplit('/')
.next()
.unwrap_or_default()
.trim();
if !slug.is_empty() {
keys.push(Self::normalize_title(&slug.replace('-', " ")));
if let Some(base_slug) = slug.strip_suffix("-jav") {
if !base_slug.is_empty() {
keys.push(Self::normalize_title(&base_slug.replace('-', " ")));
}
}
}
keys.sort();
keys.dedup();
keys
}
fn resolve_option_target(&self, options: &ServerOptions) -> Option<String> {
@@ -481,23 +481,10 @@ impl SupjavProvider {
}
fn resolve_query_target(&self, query: &str) -> Option<String> {
if let Ok(uploaders) = self.uploaders.read() {
if let Some(target) = Self::match_filter(&uploaders, query) {
return self.normalize_archive_target(&target);
}
}
if let Ok(stars) = self.stars.read() {
if let Some(target) = Self::match_filter(&stars, query) {
return self.normalize_archive_target(&target);
}
}
if let Ok(tags) = self.tags.read() {
if let Some(target) = Self::match_filter(&tags, query) {
return self.normalize_archive_target(&target);
}
}
Self::match_filter(&self.categories, query)
.and_then(|target| self.normalize_archive_target(&target))
let normalized_query = Self::normalize_title(query);
self.resolve_filter_lookup()
.get(&normalized_query)
.and_then(|target| self.normalize_archive_target(target))
}
fn filters_need_loading(&self) -> bool {
@@ -1000,11 +987,14 @@ print(json.dumps({
let base_url =
Url::parse(master_url).map_err(|error| Error::from(format!("invalid master url: {error}")))?;
let mut formats = vec![
VideoFormat::new(master_url.to_string(), "auto".to_string(), "m3u8".to_string())
.format_note("master".to_string())
.format_id("master".to_string()),
];
let mut formats = vec![VideoFormat::new(
master_url.to_string(),
"auto".to_string(),
"m3u8".to_string(),
)
.format_note("master".to_string())
.format_id("master".to_string())
.http_header("Referer".to_string(), BASE_URL.to_string())];
let resolution_regex = Self::regex(r#"RESOLUTION=(\d+)x(\d+)"#)?;
let bandwidth_regex = Self::regex(r#"BANDWIDTH=(\d+)"#)?;
let mut lines = response.text.lines();
@@ -1049,7 +1039,8 @@ print(json.dumps({
height
.map(|value| format!("hls-{value}p"))
.unwrap_or_else(|| "hls-variant".to_string()),
);
)
.http_header("Referer".to_string(), BASE_URL.to_string());
if let Some(bandwidth) = bandwidth {
format = format.format_note(format!("{quality} ({bandwidth}bps)"));
}
@@ -1277,8 +1268,7 @@ print(json.dumps({
item.tags = Some(parsed_tags);
}
if let Some((master_url, formats)) = self.resolve_player(page_url, &players).await? {
item.url = master_url;
if let Some((_master_url, formats)) = self.resolve_player(page_url, &players).await? {
if !formats.is_empty() {
item.formats = Some(formats);
}
@@ -1549,6 +1539,13 @@ print(json.dumps({
options: ServerOptions,
) -> Result<Vec<VideoItem>> {
if self.resolve_option_target(&options).is_none() && sort == "new" {
let home_url = self.build_home_url(page)?;
let home_items = self
.fetch_items_for_url(cache.clone(), home_url, sort, per_page_limit)
.await?;
if !home_items.is_empty() {
return Ok(home_items);
}
return self.aggregate_latest(page, per_page_limit).await;
}
@@ -1581,7 +1578,7 @@ print(json.dumps({
let items = self
.fetch_items_for_url(cache, url, sort, per_page_limit)
.await?;
if exact_target.is_some() || items.len() >= 5 || !items.is_empty() {
if !items.is_empty() {
return Ok(items);
}
@@ -1609,6 +1606,7 @@ impl Provider for SupjavProvider {
let _ = pool;
let page = page.parse::<u16>().unwrap_or(1);
let per_page_limit = per_page.parse::<usize>().unwrap_or(24);
let rewrite_options = options.clone();
let result = match query {
Some(query) if !query.trim().is_empty() => {
@@ -1619,7 +1617,25 @@ impl Provider for SupjavProvider {
};
match result {
Ok(videos) => videos,
Ok(mut videos) => {
for video in &mut videos {
if let Some(formats) = video.formats.as_mut() {
for format in formats {
if format.url.starts_with("/proxy/supjav/")
|| format.url.contains("/proxy/supjav/")
{
continue;
}
format.url = build_proxy_url(
&rewrite_options,
CHANNEL_ID,
&strip_url_scheme(&format.url),
);
}
}
}
videos
}
Err(error) => {
report_provider_error(CHANNEL_ID, "get_videos", &error.to_string()).await;
vec![]
@@ -1647,13 +1663,23 @@ mod tests {
fn test_provider() -> SupjavProvider {
SupjavProvider {
url: BASE_URL.to_string(),
categories: vec![FilterOption {
id: format!("{BASE_URL}/category/censored-jav"),
title: "Censored JAV".to_string(),
}],
categories: vec![
FilterOption {
id: format!("{BASE_URL}/category/censored-jav"),
title: "Censored JAV".to_string(),
},
FilterOption {
id: format!("{BASE_URL}/category/uncensored-jav"),
title: "Uncensored JAV".to_string(),
},
],
tags: Arc::new(RwLock::new(vec![FilterOption {
id: "/tag/creampie".to_string(),
title: "Creampie".to_string(),
},
FilterOption {
id: "/tag/uncensored-jav".to_string(),
title: "Uncensored JAV".to_string(),
}])),
uploaders: Arc::new(RwLock::new(vec![])),
stars: Arc::new(RwLock::new(vec![])),
@@ -1666,6 +1692,18 @@ mod tests {
assert_eq!(SupjavProvider::strip_count_suffix("Censored JAV"), "Censored JAV");
}
#[test]
fn filter_lookup_stores_title_and_slug_aliases() {
let option = FilterOption {
id: format!("{BASE_URL}/category/uncensored-jav"),
title: "Uncensored JAV".to_string(),
};
let keys = SupjavProvider::filter_lookup_keys(&option);
assert!(keys.iter().any(|value| value == "uncensored jav"));
assert!(keys.iter().any(|value| value == "uncensored"));
}
#[test]
fn builds_archive_page_url_with_query() {
assert_eq!(
@@ -1756,6 +1794,50 @@ mod tests {
);
}
#[test]
fn resolves_category_queries_with_or_without_jav_suffix() {
let provider = test_provider();
assert_eq!(
provider.resolve_query_target("Uncensored JAV").as_deref(),
Some("https://supjav.com/category/uncensored-jav")
);
assert_eq!(
provider.resolve_query_target("Uncensored").as_deref(),
Some("https://supjav.com/category/uncensored-jav")
);
}
#[test]
fn category_lookup_wins_over_same_named_tag() {
let provider = test_provider();
let lookup = provider.resolve_filter_lookup();
assert_eq!(
lookup.get("uncensored jav").map(String::as_str),
Some("https://supjav.com/category/uncensored-jav")
);
assert_eq!(
lookup.get("uncensored").map(String::as_str),
Some("https://supjav.com/category/uncensored-jav")
);
}
#[test]
fn item_query_matching_uses_full_query_text() {
let mut item = VideoItem::new(
"abc".to_string(),
"Sample".to_string(),
"https://supjav.com/sample".to_string(),
CHANNEL_ID.to_string(),
String::new(),
0,
);
item.tags = Some(vec!["Uncensored".to_string()]);
assert!(SupjavProvider::item_matches_query(&item, "Uncensored"));
assert!(!SupjavProvider::item_matches_query(&item, "Uncensored JAV"));
}
fn test_db_pool() -> DbPool {
let unique = SystemTime::now()
.duration_since(UNIX_EPOCH)
@@ -1815,10 +1897,15 @@ mod tests {
"supjav items must not serialize embed"
);
assert!(
first.url.contains(".m3u8"),
"expected direct m3u8 url, got {}",
first.url.starts_with(BASE_URL),
"expected supjav page url, got {}",
first.url
);
let formats = first.formats.as_ref().expect("supjav item should have formats");
let master_format = formats
.iter()
.find(|f| f.url.contains(".m3u8") || f.url.contains("/proxy/supjav/"))
.expect("formats should contain a proxied m3u8 url");
let mut requester = Requester::new();
let thumb_response = requester
@@ -1836,7 +1923,7 @@ mod tests {
);
let ytdlp = Command::new("yt-dlp")
.args(["--no-warnings", "--simulate", "--skip-download", &first.url])
.args(["--no-warnings", "--simulate", "--skip-download", &master_format.url])
.output()
.expect("yt-dlp should run");
assert!(

View File

@@ -5,9 +5,11 @@ use crate::status::*;
use crate::util::cache::VideoCache;
use crate::util::discord::format_error_chain;
use crate::util::discord::send_discord_error_report;
use crate::util::hoster_proxy::{proxy_name_for_url, rewrite_hoster_url};
use crate::util::requester::Requester;
use crate::util::time::parse_time_to_seconds;
use crate::videos::ServerOptions;
use crate::videos::VideoFormat;
use crate::videos::VideoItem;
use async_trait::async_trait;
use error_chain::error_chain;
@@ -154,7 +156,7 @@ impl SxyprnProvider {
}
};
let text = match requester.get(&url_str, None).await {
let text = match requester.get_with_headers(&url_str, vec![("Referer".to_string(), self.url.clone())],None).await {
Ok(text) => text,
Err(e) => {
crate::providers::report_provider_error(
@@ -198,7 +200,6 @@ impl SxyprnProvider {
}
Ok(video_items)
}
async fn query(
&self,
cache: VideoCache,
@@ -297,16 +298,16 @@ impl SxyprnProvider {
}
// take content before "<script async"
let before_script = html
.split("<script async")
.next()
.ok_or_else(|| ErrorKind::Parse("missing '<script async' split point".into()))?;
let main_content = html
.split("main_content")
.nth(1)
.ok_or_else(|| ErrorKind::Parse("missing 'main_content' split point".into()))?;
// split into video segments (skip the first chunk)
let raw_videos: Vec<&str> = before_script.split("post_el_small'").skip(1).collect();
let raw_videos: Vec<&str> = main_content.split("post_el_small'").skip(1).collect();
if raw_videos.is_empty() {
return Err(ErrorKind::Parse("no 'post_el_small\\'' segments found".into()).into());
return Err(ErrorKind::Parse("no 'post_el_small\'' segments found".into()).into());
}
let mut items = Vec::new();
@@ -317,7 +318,7 @@ impl SxyprnProvider {
.split("/post/")
.nth(1)
.and_then(|s| s.split('\'').next())
.ok_or_else(|| ErrorKind::Parse("failed to extract /post/ url".into()))?
.ok_or_else(|| ErrorKind::Parse(format!("failed to extract /post/ url")))?
.to_string();
let video_url =
@@ -331,6 +332,12 @@ impl SxyprnProvider {
.and_then(|s| s.split("</div>").next())
.ok_or_else(|| ErrorKind::Parse("failed to extract title_parts".into()))?;
let title_links: Vec<String> = video_segment
.split("href='https://")
.skip(1)
.filter_map(|part| part.split("'").next().map(|u| u.to_string()))
.collect();
let document = Html::parse_document(title_parts);
let selector = Selector::parse("*")
.map_err(|e| ErrorKind::Parse(format!("selector parse failed: {e}")))?;
@@ -353,13 +360,61 @@ impl SxyprnProvider {
.replace('\n', "")
.replace(" + ", " ")
.replace(" ", " ")
.replace("\\", "")
.trim()
.to_string();
// De-duplicate repeated titles
let words: Vec<&str> = title.split_whitespace().collect();
if words.len() > 1 {
for pattern_len in (1..=words.len() / 2).rev() {
let pattern = &words[0..pattern_len];
let mut all_match = true;
let mut idx = pattern_len;
while idx < words.len() {
let end = std::cmp::min(idx + pattern_len, words.len());
if &words[idx..end] != &pattern[0..(end - idx)] {
all_match = false;
break;
}
idx += pattern_len;
}
if all_match && words.len() % pattern_len == 0 {
title = pattern.join(" ");
break;
}
}
}
if title.to_ascii_lowercase().starts_with("new ") {
title = title[4..].to_string();
}
// Extract tags from title (words starting with #)
let mut tags = Vec::new();
let words: Vec<&str> = title.split_whitespace().collect();
let mut cleaned_words = Vec::new();
for word in words {
let raw_tag = word
.trim_end_matches(|c: char| !c.is_alphanumeric() && c != '_' && c != '-')
.to_string();
if raw_tag.starts_with('#') && raw_tag.len() > 1 {
let tag = raw_tag[1..].to_string();
if !tags.contains(&tag) {
tags.push(tag);
}
} else {
cleaned_words.push(word.to_string());
}
}
// Reconstruct title without tags
title = cleaned_words.join(" ");
// id (DON'T index [6])
let id = video_url
.split('/')
@@ -376,7 +431,7 @@ impl SxyprnProvider {
.nth(1)
.and_then(|s| s.split("data-src='").nth(1))
.and_then(|s| s.split('\'').next())
.ok_or_else(|| ErrorKind::Parse("failed to extract thumb".into()))?;
.unwrap_or("");
let thumb = format!("https:{thumb_path}");
@@ -416,36 +471,87 @@ impl SxyprnProvider {
let duration = parse_time_to_seconds(&raw_duration).unwrap_or(0) as u32;
// stream urls (your filter condition looks suspicious; leaving as-is)
let stream_urls = video_segment
.split("extlink_icon extlink")
.filter_map(|part| {
part.split("href='")
.last()
.and_then(|s| s.split('\'').next())
.map(|u| u.to_string())
})
.filter(|url| url.starts_with("https://lulustream."))
.collect::<Vec<String>>();
// stream urls - collect both lulustream and vidara.so URLs
let mut formats = vec![];
let video_item_url = stream_urls.first().cloned().unwrap_or_else(|| {
crate::providers::build_proxy_url(options, "sxyprn", &format!("post/{}", id))
});
// Add sxyprn format
let sxyprn_url = format!(
"{}/proxy/sxyprn/post/{}",
options.public_url_base.as_deref().unwrap_or(""),
id
);
formats.push(
VideoFormat::new(sxyprn_url.clone(), "auto".to_string(), "mp4".to_string())
.format_note(sxyprn_url.split("/").nth(4).unwrap_or("sxyprn").to_string()),
);
let doodstream_urls: Vec<String> = title_links
.iter()
.filter(|url| proxy_name_for_url(url).as_deref() == Some("doodstream"))
.map(|url| rewrite_hoster_url(options, url))
.collect();
for dood_url in doodstream_urls {
formats.push(
VideoFormat::new(dood_url.clone(), "auto".to_string(), "doodstream".to_string())
.format_note("doodstream".to_string())
.format_id("doodstream".to_string())
.http_header("Referer".to_string(), "https://sxyprn.com/".to_string()),
);
}
// let lulustream_urls: Vec<String> = title_links
// .iter()
// .filter(|url| proxy_name_for_url(url).as_deref() == Some("lulustream"))
// .map(|url| rewrite_hoster_url(options, url))
// .collect();
// for lulustream_url in lulustream_urls {
// formats.push(
// VideoFormat::m3u8(
// lulustream_url.clone(),
// "auto".to_string(),
// "m3u8".to_string(),
// )
// .format_note("lulustream".to_string())
// .format_id("lulustream".to_string()),
// );
// }
// Also collect and transform vidara.so URLs to proxy format and add as formats
let vidara_urls: Vec<String> = title_links
.iter()
.filter(|url| proxy_name_for_url(url).as_deref() == Some("vidara"))
.map(|url| rewrite_hoster_url(options, url))
.collect();
for vidara_url in vidara_urls {
formats.push(
VideoFormat::m3u8(vidara_url.clone(), "1080".to_string(), "m3u8".to_string())
.format_note(vidara_url.split("/").nth(4).unwrap_or("vidara").to_string())
.format_id("vidara".to_string())
.http_header("Referer".to_string(), "https://sxyprn.com/".to_string()),
);
}
let mut video_item = VideoItem::new(
id,
id.clone(),
title,
video_item_url,
format!("https://sxyprn.com/post/{}", url.clone()),
"sxyprn".to_string(),
thumb,
duration,
)
.views(views.parse::<u32>().unwrap_or(0));
.views(views.parse::<u32>().unwrap_or(0))
.formats(formats);
if let Some(p) = preview {
video_item = video_item.preview(p);
// Add tags if any were found
if !tags.is_empty() {
video_item.tags = Some(tags);
}
if preview.is_some() {
video_item.preview = preview;
}
items.push(video_item);
}

762
src/providers/thaiporntv.rs Normal file
View File

@@ -0,0 +1,762 @@
use crate::DbPool;
use crate::api::ClientVersion;
use crate::providers::{Provider, report_provider_error, report_provider_error_background, requester_or_default};
use crate::status::*;
use crate::util::cache::VideoCache;
use crate::util::requester::Requester;
use crate::videos::{ServerOptions, VideoFormat, VideoItem};
use async_trait::async_trait;
use base64::{engine::general_purpose, Engine};
use chrono::{DateTime, NaiveDate, Utc};
use error_chain::error_chain;
use futures::stream::{self, StreamExt};
use htmlentity::entity::{ICodedDataTrait, decode};
use percent_encoding::{NON_ALPHANUMERIC, utf8_percent_encode};
use regex::Regex;
use scraper::{ElementRef, Html, Selector};
use std::sync::{Arc, RwLock};
use std::time::Duration as StdDuration;
use std::{thread, vec};
use tokio::time::timeout;
use url::Url;
use wreq::Version;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "thai",
tags: &["thai", "asian", "amateur"],
};
error_chain! {
foreign_links {
Io(std::io::Error);
}
errors {
Parse(msg: String) {
description("parse error")
display("parse error: {}", msg)
}
}
}
const BASE_URL: &str = "https://www.thaiporntv.com";
const CHANNEL_ID: &str = "thaiporntv";
const USER_AGENT: &str =
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36";
const HTML_ACCEPT: &str =
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8";
const CDN_BASE: &str = "https://web.techvids.top";
#[derive(Debug, Clone)]
pub struct ThaipornTvProvider {
url: String,
tags: Arc<RwLock<Vec<FilterOption>>>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum ArchiveMode {
Latest,
Popular,
Commented,
Rated,
Longest,
}
#[derive(Debug, Clone)]
enum Target {
Archive(ArchiveMode),
Tag {
slug: String,
},
}
impl ThaipornTvProvider {
pub fn new() -> Self {
let provider = Self {
url: BASE_URL.to_string(),
tags: Arc::new(RwLock::new(vec![FilterOption {
id: "all".to_string(),
title: "All".to_string(),
}])),
};
provider.spawn_initial_load();
provider
}
fn spawn_initial_load(&self) {
let url = self.url.clone();
let tags = Arc::clone(&self.tags);
thread::spawn(move || {
let runtime = match tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()
{
Ok(runtime) => runtime,
Err(error) => {
report_provider_error_background(
CHANNEL_ID,
"spawn_initial_load.runtime_build",
&error.to_string(),
);
return;
}
};
runtime.block_on(async move {
if let Err(error) = Self::load_tags(&url, Arc::clone(&tags)).await {
report_provider_error_background(
CHANNEL_ID,
"load_tags",
&error.to_string(),
);
}
});
});
}
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
let tags = self.tags.read().map(|value| value.clone()).unwrap_or_default();
Channel {
id: CHANNEL_ID.to_string(),
name: "THAIPornTV".to_string(),
description: "Free Thai Porn Videos & Asian Sex Tube.".to_string(),
premium: false,
favicon: "https://www.google.com/s2/favicons?sz=64&domain=thaiporntv.com".to_string(),
status: "active".to_string(),
categories: tags.iter().map(|value| value.title.clone()).collect(),
options: vec![
ChannelOption {
id: "sort".to_string(),
title: "Sort".to_string(),
description: "Sort the videos".to_string(),
systemImage: "list.number".to_string(),
colorName: "blue".to_string(),
options: vec![
FilterOption {
id: "new".to_string(),
title: "Most Recent".to_string(),
},
FilterOption {
id: "popular".to_string(),
title: "Most Viewed".to_string(),
},
FilterOption {
id: "commented".to_string(),
title: "Most Commented".to_string(),
},
FilterOption {
id: "rated".to_string(),
title: "Top Rated".to_string(),
},
FilterOption {
id: "longest".to_string(),
title: "Longest".to_string(),
},
],
multiSelect: false,
},
ChannelOption {
id: "filter".to_string(),
title: "Tags".to_string(),
description: "Video tags.".to_string(),
systemImage: "tag.fill".to_string(),
colorName: "green".to_string(),
options: tags,
multiSelect: false,
},
],
nsfw: true,
cacheDuration: Some(1800),
}
}
fn selector(value: &str) -> Result<Selector> {
Selector::parse(value)
.map_err(|error| Error::from(format!("selector `{value}` parse failed: {error}")))
}
fn collapse_whitespace(text: &str) -> String {
text.split_whitespace().collect::<Vec<_>>().join(" ")
}
fn decode_html_entities(text: &str) -> String {
decode(text.as_bytes())
.to_string()
.unwrap_or_else(|_| text.to_string())
.replace('\u{a0}', " ")
.trim()
.to_string()
}
fn absolute_url(&self, value: &str) -> String {
if value.starts_with("http://") || value.starts_with("https://") {
return value.to_string();
}
format!(
"{}/{}",
self.url.trim_end_matches('/'),
value.trim_start_matches('/')
)
}
fn html_headers(&self, referer: &str) -> Vec<(String, String)> {
vec![
("Referer".to_string(), referer.to_string()),
("User-Agent".to_string(), USER_AGENT.to_string()),
("Accept".to_string(), HTML_ACCEPT.to_string()),
("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()),
]
}
async fn fetch_html(
&self,
requester: &mut Requester,
url: &str,
referer: &str,
) -> Result<String> {
requester
.get_with_headers(url, self.html_headers(referer), Some(Version::HTTP_11))
.await
.map_err(|error| Error::from(format!("request failed for {url}: {error}")))
}
fn push_unique(target: &Arc<RwLock<Vec<FilterOption>>>, item: FilterOption) {
if item.id.is_empty() || item.title.is_empty() {
return;
}
if let Ok(mut values) = target.write() {
if !values
.iter()
.any(|existing| existing.id == item.id || existing.title == item.title)
{
values.push(item);
}
}
}
async fn load_tags(base_url: &str, tags: Arc<RwLock<Vec<FilterOption>>>) -> Result<()> {
let mut requester = Requester::new();
let provider = Self {
url: base_url.to_string(),
tags: Arc::clone(&tags),
};
let html = provider.fetch_html(
&mut requester,
&format!("{}/tags/", base_url),
&format!("{}/", base_url),
).await?;
let document = Html::parse_document(&html);
// Tag cards are <a class="group block relative ..."> links with /tags/ in href
// html5ever handles unquoted href attributes correctly
let a_selector = Self::selector("a.group[href*='/tags/']")?;
let h2_selector = Self::selector("h2")?;
for element in document.select(&a_selector) {
let Some(href) = element.value().attr("href") else {
continue;
};
// Skip pagination and root tag page
if href.ends_with("/tags/") || href.contains("/page/") {
continue;
}
// Extract title from the h2 inside the card
let title = element.select(&h2_selector).next()
.map(|h| Self::collapse_whitespace(&h.text().collect::<String>()))
.unwrap_or_default();
if title.is_empty() {
continue;
}
Self::push_unique(
&tags,
FilterOption {
id: href.to_string(),
title,
},
);
}
Ok(())
}
fn archive_from_sort(sort: &str) -> ArchiveMode {
match sort {
"popular" => ArchiveMode::Popular,
"commented" => ArchiveMode::Commented,
"rated" => ArchiveMode::Rated,
"longest" => ArchiveMode::Longest,
_ => ArchiveMode::Latest,
}
}
fn resolve_target(&self, options: &ServerOptions, sort: &str, query: Option<&str>) -> Target {
// Query: check for tag shortcut first
if let Some(q) = query {
if let Some(target) = self.find_tag_target_in_options(q) {
return target;
}
}
// Filter option: check for tag shortcut
if let Some(value) = options.filter.as_deref() {
if let Some(target) = self.find_tag_target_in_options(value) {
return target;
}
}
Target::Archive(Self::archive_from_sort(sort))
}
fn find_tag_target_in_options(&self, value: &str) -> Option<Target> {
let normalized = value.trim().to_lowercase();
let tags = self.tags.read().ok()?;
let option = tags.iter().find(|item| {
item.id.eq_ignore_ascii_case(value) || item.title.trim().to_lowercase() == normalized
})?;
self.target_from_filter_id(&option.id)
}
fn target_from_filter_id(&self, id: &str) -> Option<Target> {
if id.contains("/tags/") {
let url = Url::parse(&self.absolute_url(id)).ok()?;
let segments: Vec<_> = url
.path_segments()?
.filter(|s| !s.is_empty())
.collect();
let slug = segments.last()?.to_string();
if slug == "tags" {
return None;
}
return Some(Target::Tag { slug });
}
None
}
fn build_url_for_target(&self, target: &Target, page: u32) -> String {
match target {
Target::Archive(mode) => self.build_archive_url(*mode, page),
Target::Tag { slug } => self.build_tag_url(slug, page),
}
}
fn build_archive_url(&self, mode: ArchiveMode, page: u32) -> String {
let base_path = match mode {
ArchiveMode::Latest => "recent",
ArchiveMode::Popular => "popular",
ArchiveMode::Commented => "commented",
ArchiveMode::Rated => "rated",
ArchiveMode::Longest => "longest",
};
if page <= 1 {
format!("{}/{}/", self.url, base_path)
} else {
format!("{}/{}/page/{}/", self.url, base_path, page)
}
}
fn build_tag_url(&self, slug: &str, page: u32) -> String {
let encoded_slug = utf8_percent_encode(slug, NON_ALPHANUMERIC).to_string();
if page <= 1 {
format!("{}/tags/{}/", self.url, encoded_slug)
} else {
format!("{}/tags/{}/page/{}/", self.url, encoded_slug, page)
}
}
fn decode_data_enc(encoded_data: &str) -> Result<Vec<VideoFormat>> {
let cleaned_data = encoded_data.replace("-", "+").replace("_", "/");
let padding = (4 - cleaned_data.len() % 4) % 4;
let padded_data = format!("{}{}", cleaned_data, "=".repeat(padding));
let decoded_bytes = general_purpose::STANDARD.decode(&padded_data)
.map_err(|e| Error::from(format!("Base64 decode failed: {e}")))?;
if decoded_bytes.is_empty() {
return Err(Error::from("Decoded bytes are empty"));
}
let key = decoded_bytes[0];
let decrypted_bytes: Vec<u8> = decoded_bytes[1..]
.iter()
.map(|&b| b ^ key)
.collect();
let json_str = String::from_utf8(decrypted_bytes)
.map_err(|e| Error::from(format!("UTF-8 decode failed: {e}")))?;
let formats_json: serde_json::Value = serde_json::from_str(&json_str)
.map_err(|e| Error::from(format!("JSON parse failed: {e}")))?;
let Some(formats_array) = formats_json.as_array() else {
return Err(Error::from("JSON is not an array"));
};
let mut formats = Vec::new();
for format_val in formats_array {
let Some(u) = format_val.get("u").and_then(|v| v.as_str()) else { continue; };
let Some(q) = format_val.get("q").and_then(|v| v.as_str()) else { continue; };
let mut format = VideoFormat::new(
u.to_string(),
q.to_string(),
"application/x-mpegURL".to_string(),
);
format.add_http_header("Referer".to_string(), BASE_URL.to_string());
formats.push(format);
}
Ok(formats)
}
fn parse_card(&self, card: ElementRef<'_>) -> Option<VideoItem> {
// Selectors for the Tailwind-based redesign
let playthumb_sel = Self::selector("a.playthumb").ok()?;
let img_sel = Self::selector("img").ok()?;
let title_sel = Self::selector("a.text-brand-pink").ok()?;
let tag_sel = Self::selector("a[href*='/tags/']").ok()?;
let date_sel = Self::selector("span.ml-auto").ok()?;
let link = card.select(&playthumb_sel).next()?;
let href = link.value().attr("href")?;
let data_id = link.value().attr("data-id").unwrap_or("");
// ID: numeric part from data-id (xn88-39688 → 39688) or from URL
let id = if !data_id.is_empty() {
data_id.rsplit('-').next().unwrap_or(data_id).to_string()
} else {
let re = Regex::new(r"-(\d+)/$").unwrap();
re.captures(href)?.get(1)?.as_str().to_string()
};
let url = if href.starts_with("http") {
href.to_string()
} else {
self.absolute_url(href)
};
let thumb = card.select(&img_sel).next()
.and_then(|e| e.value().attr("src"))
.map(|s| if s.starts_with("http") { s.to_string() } else { self.absolute_url(s) })
.unwrap_or_default();
// Preview GIF from CDN
let preview = if !data_id.is_empty() {
Some(format!("{CDN_BASE}/2/4/7/9/preview/{data_id}_preview.gif"))
} else {
None
};
let title = card.select(&title_sel).next()
.map(|e| Self::collapse_whitespace(&e.text().collect::<String>()))
.filter(|t| !t.is_empty())
.or_else(|| {
link.value().attr("aria-label")
.map(|s| {
let s = s.strip_prefix("Watch ").unwrap_or(s);
let s = s.strip_suffix(" video").unwrap_or(s);
Self::decode_html_entities(s)
})
})
.unwrap_or_default();
// Duration is in a font-mono div inside the thumbnail overlay
let card_html = card.html();
let dur_re = Regex::new(r"font-mono[^>]+>(\d+:\d+(?::\d+)?)<").unwrap();
let duration_text = dur_re.captures(&card_html)
.and_then(|c| c.get(1))
.map(|m| m.as_str().to_string())
.unwrap_or_default();
let duration = parse_duration_mm_ss(&duration_text);
// Views from the fa-eye span
let views_re = Regex::new(r"fa-eye[^>]+></i>\s*(\d[\d,]*)").unwrap();
let views = views_re.captures(&card_html)
.and_then(|c| c.get(1))
.and_then(|m| m.as_str().replace(',', "").parse::<u32>().ok());
// Upload date from the ml-auto span
let uploaded_at = card.select(&date_sel).next()
.map(|e| Self::collapse_whitespace(&e.text().collect::<String>()))
.and_then(|s| NaiveDate::parse_from_str(s.trim(), "%d %b %Y").ok())
.and_then(|date| {
date.and_hms_opt(0, 0, 0)
.map(|dt| DateTime::<Utc>::from_naive_utc_and_offset(dt, Utc).timestamp() as u64)
});
// Tags from /tags/ links in the card (these are simple text-only links in cards)
let tags: Vec<String> = card.select(&tag_sel)
.map(|e| Self::collapse_whitespace(&e.text().collect::<String>()))
.filter(|s| !s.is_empty())
.collect();
let mut item = VideoItem::new(
id,
title,
url,
CHANNEL_ID.to_string(),
thumb,
duration,
);
if let Some(views) = views { item = item.views(views); }
if let Some(uploaded_at) = uploaded_at { item = item.uploaded_at(uploaded_at); }
if let Some(preview) = preview { item = item.preview(preview); }
if !tags.is_empty() { item = item.tags(tags); }
Some(item)
}
fn get_video_items_from_html(&self, html: String) -> Result<Vec<VideoItem>> {
let document = Html::parse_document(&html);
// Cards use class "group flex flex-col"; ad cards additionally have "ad-container"
let card_selector = Self::selector("div.group:not(.ad-container)")?;
let mut items = Vec::new();
for card in document.select(&card_selector) {
if let Some(item) = self.parse_card(card) {
items.push(item);
}
}
Ok(items)
}
async fn enrich_video(&self, mut item: VideoItem, options: &ServerOptions) -> Result<VideoItem> {
let mut requester = requester_or_default(options, CHANNEL_ID, "enrich_video");
let detail_fetch = timeout(
StdDuration::from_secs(6),
self.fetch_html(&mut requester, &item.url, &item.url),
)
.await
.map_err(|_| Error::from(format!("detail request timed out for {}", item.url)))??;
let document = Html::parse_document(&detail_fetch);
let video_element_selector = Self::selector("video[data-enc]")?;
if let Some(video_element) = document.select(&video_element_selector).next() {
if let Some(data_enc) = video_element.value().attr("data-enc") {
match Self::decode_data_enc(data_enc) {
Ok(formats) => {
item.formats = Some(formats);
},
Err(e) => {
report_provider_error_background(
CHANNEL_ID,
"decode_data_enc",
&format!("url={}; error={}", item.url, e),
);
}
}
}
}
Ok(item)
}
async fn fetch_items_for_url(
&self,
cache: VideoCache,
url: String,
per_page_limit: usize,
enrich_details: bool,
options: &ServerOptions,
) -> Result<Vec<VideoItem>> {
if let Some((time, items)) = cache.get(&url) {
if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 {
return Ok(items.clone());
}
}
let mut requester = requester_or_default(options, CHANNEL_ID, "fetch_items_for_url");
let html = timeout(
StdDuration::from_secs(10),
self.fetch_html(&mut requester, &url, &url),
)
.await
.map_err(|_| Error::from(format!("list request timed out for {url}")))??;
let list_items = self.get_video_items_from_html(html)?;
if list_items.is_empty() {
return Ok(vec![]);
}
let limited_items = list_items
.into_iter()
.take(per_page_limit.max(1))
.collect::<Vec<_>>();
if !enrich_details {
cache.insert(url, limited_items.clone());
return Ok(limited_items);
}
let items = stream::iter(limited_items.into_iter().map(|item| {
let provider = self.clone();
let options = options.clone();
async move { provider.enrich_video(item, &options).await }
}))
.buffer_unordered(4)
.collect::<Vec<_>>()
.await;
let mut final_items = Vec::new();
for item_result in items {
match item_result {
Ok(item) => final_items.push(item),
Err(e) => report_provider_error_background(CHANNEL_ID, "enrich_video_stream", &e.to_string()),
}
}
if !final_items.is_empty() {
cache.insert(url, final_items.clone());
}
Ok(final_items)
}
async fn get(
&self,
cache: VideoCache,
page: u32,
sort: &str,
query: Option<&str>,
per_page_limit: usize,
options: ServerOptions,
) -> Result<Vec<VideoItem>> {
let target = self.resolve_target(&options, sort, query);
let url = self.build_url_for_target(&target, page);
self.fetch_items_for_url(cache, url, per_page_limit, page <= 1, &options)
.await
}
}
/// Parse "MM:SS" or "HH:MM:SS" into total seconds.
fn parse_duration_mm_ss(text: &str) -> u32 {
let parts: Vec<u32> = text
.split(':')
.filter_map(|p| p.trim().parse().ok())
.collect();
match parts.as_slice() {
[h, m, s] => h * 3600 + m * 60 + s,
[m, s] => m * 60 + s,
[s] => *s,
_ => 0,
}
}
#[async_trait]
impl Provider for ThaipornTvProvider {
async fn get_videos(
&self,
cache: VideoCache,
pool: DbPool,
sort: String,
query: Option<String>,
page: String,
per_page: String,
options: ServerOptions,
) -> Vec<VideoItem> {
let _ = pool;
let page = page.parse::<u32>().unwrap_or(1);
let per_page_limit = per_page.parse::<usize>().unwrap_or(30);
let query_ref = query.as_deref().filter(|q| !q.trim().is_empty());
let result = self.get(cache, page, &sort, query_ref, per_page_limit, options).await;
match result {
Ok(videos) => videos,
Err(error) => {
report_provider_error(CHANNEL_ID, "get_videos", &error.to_string()).await;
vec![]
}
}
}
fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
Some(self.build_channel(clientversion))
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::util::cache::VideoCache;
use crate::util::requester::Requester;
fn provider() -> ThaipornTvProvider {
ThaipornTvProvider::new()
}
#[test]
fn builds_archive_urls() {
let provider = provider();
assert_eq!(
provider.build_archive_url(ArchiveMode::Latest, 1),
"https://www.thaiporntv.com/recent/"
);
assert_eq!(
provider.build_archive_url(ArchiveMode::Latest, 2),
"https://www.thaiporntv.com/recent/page/2/"
);
assert_eq!(
provider.build_archive_url(ArchiveMode::Popular, 1),
"https://www.thaiporntv.com/popular/"
);
assert_eq!(
provider.build_archive_url(ArchiveMode::Popular, 3),
"https://www.thaiporntv.com/popular/page/3/"
);
}
#[test]
fn builds_tag_urls() {
let provider = provider();
assert_eq!(
provider.build_tag_url("blowjob", 1),
"https://www.thaiporntv.com/tags/blowjob/"
);
assert_eq!(
provider.build_tag_url("thai-massage", 2),
"https://www.thaiporntv.com/tags/thai%2Dmassage/page/2/"
);
}
#[test]
fn decodes_data_enc_correctly() {
let encoded = "C1BwKX4pMSljf397eDEkJHxuaSV_bmhjfWJveCV_ZHskZjh-MyQ6PT4zVD8zO3slZjh-MyknKXopMSk_Mzt7KScpYykxbWpneG52Vg";
let formats = ThaipornTvProvider::decode_data_enc(encoded).unwrap();
assert_eq!(formats.len(), 1);
assert_eq!(formats[0].url, "https://web.techvids.top/m3u8/1658_480p.m3u8");
// Verify format fields via JSON serialization (quality and http_headers are private)
let json = serde_json::to_value(&formats[0]).unwrap();
assert_eq!(json["quality"], "480p");
assert_eq!(json["http_headers"]["Referer"], BASE_URL);
}
#[test]
fn parses_duration() {
assert_eq!(parse_duration_mm_ss("50:47"), 3047);
assert_eq!(parse_duration_mm_ss("1:05:30"), 3930);
assert_eq!(parse_duration_mm_ss("12:04"), 724);
}
#[tokio::test]
#[ignore]
async fn fetches_and_parses_archive() {
let provider = provider();
let options = ServerOptions {
featured: None,
category: None,
sites: None,
filter: None,
language: None,
public_url_base: Some("http://127.0.0.1:18080".to_string()),
requester: Some(Requester::new()),
network: None,
stars: None,
categories: None,
duration: None,
sort: Some("new".to_string()),
sexuality: None,
};
let videos = provider.get(VideoCache::new(), 1, "new", None, 10, options).await.unwrap();
assert!(!videos.is_empty());
}
}

View File

@@ -0,0 +1,556 @@
use crate::DbPool;
use crate::api::ClientVersion;
use crate::providers::{
Provider, build_proxy_url, report_provider_error, requester_or_default,
};
use crate::status::*;
use crate::util::cache::VideoCache;
use crate::util::parse_abbreviated_number;
use crate::util::requester::Requester;
use crate::util::time::parse_time_to_seconds;
use crate::videos::{ServerOptions, VideoFormat, VideoItem};
use async_trait::async_trait;
use error_chain::error_chain;
use scraper::{ElementRef, Html, Selector};
use wreq::Version;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "mainstream-tube",
tags: &["tube", "hd", "studio", "exclusive"],
};
const BASE_URL: &str = "https://www.thepornbunny.com";
const CHANNEL_ID: &str = "thepornbunny";
const FIREFOX_UA: &str =
"Mozilla/5.0 (X11; Linux x86_64; rv:147.0) Gecko/20100101 Firefox/147.0";
const HTML_ACCEPT: &str =
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8";
error_chain! {
foreign_links {
Io(std::io::Error);
}
errors {
Parse(msg: String) {
description("parse error")
display("parse error: {}", msg)
}
}
}
#[derive(Debug, Clone)]
pub struct ThepornbunnyProvider;
#[derive(Debug, Clone)]
enum Target {
Latest,
Popular,
TopRated,
Search(String),
Archive(String),
}
// All 20 categories exposed on thepornbunny.com/categories/
static CATEGORIES: &[(&str, &str)] = &[
("anal", "Anal"),
("asian", "Asian"),
("big-ass", "Big Ass"),
("big-tits", "Big Tits"),
("black", "Black"),
("cheating", "Cheating"),
("creampie", "Creampie"),
("deep-throat", "Deep Throat"),
("geek-nerd", "Geek / Nerd"),
("gym", "Gym"),
("latina", "Latina"),
("lesbian", "Lesbian"),
("massage", "Massage"),
("milf", "MILF"),
("outdoors", "Outdoors"),
("red-head", "Red Head"),
("shower", "Shower"),
("squirt", "Squirt"),
("teen18", "Teen18+"),
("threesome", "Threesome"),
];
impl ThepornbunnyProvider {
pub fn new() -> Self {
Self
}
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
let cat_options: Vec<FilterOption> = std::iter::once(FilterOption {
id: "all".to_string(),
title: "All".to_string(),
})
.chain(CATEGORIES.iter().map(|(slug, name)| FilterOption {
id: format!("{BASE_URL}/category/{slug}/"),
title: name.to_string(),
}))
.collect();
Channel {
id: CHANNEL_ID.to_string(),
name: "ThePornBunny".to_string(),
description: "PornBunny HD porn videos with latest, top-rated, most-viewed, category, tag, studio, and search routing.".to_string(),
premium: false,
favicon: "https://www.google.com/s2/favicons?sz=64&domain=thepornbunny.com"
.to_string(),
status: "active".to_string(),
categories: CATEGORIES.iter().map(|(_, name)| name.to_string()).collect(),
options: vec![
ChannelOption {
id: "sort".to_string(),
title: "Sort".to_string(),
description: "Browse PornBunny feeds.".to_string(),
systemImage: "list.number".to_string(),
colorName: "blue".to_string(),
options: vec![
FilterOption {
id: "new".to_string(),
title: "Latest".to_string(),
},
FilterOption {
id: "popular".to_string(),
title: "Most Viewed".to_string(),
},
FilterOption {
id: "rated".to_string(),
title: "Top Rated".to_string(),
},
],
multiSelect: false,
},
ChannelOption {
id: "categories".to_string(),
title: "Categories".to_string(),
description: "Browse a PornBunny category archive.".to_string(),
systemImage: "square.grid.2x2".to_string(),
colorName: "orange".to_string(),
options: cat_options,
multiSelect: false,
},
],
nsfw: true,
cacheDuration: Some(1800),
}
}
fn selector(value: &str) -> Result<Selector> {
Selector::parse(value)
.map_err(|error| Error::from(format!("selector `{value}` parse failed: {error}")))
}
fn text_of(element: &ElementRef<'_>) -> String {
element
.text()
.collect::<Vec<_>>()
.join(" ")
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
}
fn normalize_url(url: &str) -> String {
let trimmed = url.trim();
if trimmed.is_empty() {
return String::new();
}
if trimmed.starts_with("http://") || trimmed.starts_with("https://") {
return trimmed.to_string();
}
if trimmed.starts_with("//") {
return format!("https:{trimmed}");
}
if trimmed.starts_with('/') {
return format!("{BASE_URL}{trimmed}");
}
format!("{BASE_URL}/{}", trimmed.trim_start_matches("./"))
}
fn html_headers(referer: &str) -> Vec<(String, String)> {
vec![
("User-Agent".to_string(), FIREFOX_UA.to_string()),
("Accept".to_string(), HTML_ACCEPT.to_string()),
("Referer".to_string(), referer.to_string()),
]
}
// Page 1: {base}/, page N: {base}/{N}/
fn build_archive_page_url(base: &str, page: u16) -> String {
if page <= 1 {
return base.trim_end_matches('/').to_string() + "/";
}
format!("{}/{page}/", base.trim_end_matches('/'))
}
// Words joined with hyphens for search slug: "big ass" → "big-ass"
fn build_search_slug(query: &str) -> String {
query
.split_whitespace()
.map(str::to_ascii_lowercase)
.collect::<Vec<_>>()
.join("-")
}
fn build_target_url(target: &Target, page: u16) -> String {
match target {
Target::Latest => {
Self::build_archive_page_url(&format!("{BASE_URL}/videos"), page)
}
Target::Popular => {
Self::build_archive_page_url(&format!("{BASE_URL}/most-viewed"), page)
}
Target::TopRated => {
Self::build_archive_page_url(&format!("{BASE_URL}/top-rated"), page)
}
Target::Search(query) => Self::build_archive_page_url(
&format!("{BASE_URL}/search/{}", Self::build_search_slug(query)),
page,
),
Target::Archive(url) => Self::build_archive_page_url(url, page),
}
}
async fn fetch_html(requester: &mut Requester, url: &str, referer: &str) -> Result<String> {
requester
.get_with_headers(url, Self::html_headers(referer), Some(Version::HTTP_11))
.await
.map_err(|error| Error::from(format!("request failed for {url}: {error}")))
}
fn parse_duration(text: &str) -> u32 {
parse_time_to_seconds(text)
.and_then(|v| u32::try_from(v).ok())
.unwrap_or(0)
}
fn parse_views(text: &str) -> Option<u32> {
parse_abbreviated_number(text.trim())
}
fn parse_rating(text: &str) -> Option<f32> {
let digits: String = text
.chars()
.filter(|c| c.is_ascii_digit() || *c == '.')
.collect();
digits.parse::<f32>().ok()
}
fn parse_list_videos(html: &str, options: &ServerOptions) -> Result<Vec<VideoItem>> {
let document = Html::parse_document(html);
let card_sel = Self::selector("div.th.item")?;
let link_sel = Self::selector("a.th-description")?;
let img_sel = Self::selector("img.lazy-load[data-original]")?;
let duration_sel = Self::selector("span.th-length")?;
let rating_sel = Self::selector("span.th-rating")?;
let views_sel = Self::selector("span.th-views")?;
let source_sel = Self::selector("a.th-source")?;
let model_sel = Self::selector("div.th-models a[href*=\"/pornstar/\"]")?;
let mut items = Vec::new();
for card in document.select(&card_sel) {
let Some(link) = card.select(&link_sel).next() else {
continue;
};
let href = link.value().attr("href").unwrap_or_default();
let page_url = Self::normalize_url(href);
if page_url.is_empty() {
continue;
}
let title = Self::text_of(&link);
if title.is_empty() {
continue;
}
let Some(img) = card.select(&img_sel).next() else {
continue;
};
let data_original = img.value().attr("data-original").unwrap_or_default();
let thumb = Self::normalize_url(data_original);
// Video ID from /images/thumb/{id}.webp
let id = data_original
.trim_end_matches(".webp")
.rsplit('/')
.next()
.filter(|s| !s.is_empty())
.unwrap_or_default()
.to_string();
if id.is_empty() {
continue;
}
let duration = card
.select(&duration_sel)
.next()
.map(|el| Self::parse_duration(&Self::text_of(&el)))
.unwrap_or(0);
let rating = card
.select(&rating_sel)
.next()
.and_then(|el| Self::parse_rating(&Self::text_of(&el)));
let views = card
.select(&views_sel)
.next()
.and_then(|el| Self::parse_views(&Self::text_of(&el)));
let studio_el = card.select(&source_sel).next();
let uploader_name = studio_el
.map(|el| Self::text_of(&el))
.filter(|s| !s.is_empty());
let uploader_url = studio_el
.and_then(|el| el.value().attr("href"))
.map(|href| Self::normalize_url(href))
.filter(|s| !s.is_empty());
let uploader_id = uploader_url.as_deref().and_then(|url| {
let slug = url.trim_end_matches('/').rsplit('/').next()?;
if slug.is_empty() {
None
} else {
Some(format!("{CHANNEL_ID}:{slug}"))
}
});
// Pornstar names become tags
let tags: Vec<String> = card
.select(&model_sel)
.map(|el| Self::text_of(&el))
.filter(|s| !s.is_empty())
.collect();
let slug = page_url
.trim_end_matches('/')
.rsplit('/')
.next()
.unwrap_or("")
.to_string();
let proxy_url = build_proxy_url(options, CHANNEL_ID, &slug);
let format = VideoFormat::new(proxy_url, "1080".to_string(), "mp4".to_string());
let mut item = VideoItem::new(
id,
title,
page_url,
CHANNEL_ID.to_string(),
thumb,
duration,
);
item.formats = Some(vec![format]);
item.rating = rating;
item.views = views;
item.uploader = uploader_name;
item.uploaderUrl = uploader_url;
item.uploaderId = uploader_id;
if !tags.is_empty() {
item.tags = Some(tags);
}
items.push(item);
}
Ok(items)
}
fn resolve_sort_target(sort: &str) -> Target {
match sort.trim().to_ascii_lowercase().as_str() {
"popular" | "viewed" | "most_viewed" => Target::Popular,
"rated" | "rating" | "top" => Target::TopRated,
_ => Target::Latest,
}
}
fn resolve_option_target(options: &ServerOptions, sort: &str) -> Target {
if let Some(cat) = options.categories.as_deref() {
if cat.starts_with(BASE_URL) && cat != "all" {
return Target::Archive(cat.to_string());
}
}
Self::resolve_sort_target(sort)
}
fn resolve_query_target(query: &str) -> Target {
let trimmed = query.trim().trim_start_matches('@');
// Prefix shortcuts: tag:blonde, studio:rk-prime, pornstar:mia-molotov, category:anal
if let Some((kind, value)) = trimmed.split_once(':') {
let slug = value.trim().replace(' ', "-").trim_matches('/').to_lowercase();
if !slug.is_empty() {
match kind.trim().to_ascii_lowercase().as_str() {
"tag" => return Target::Archive(format!("{BASE_URL}/tag/{slug}")),
"category" | "cat" => {
return Target::Archive(format!("{BASE_URL}/category/{slug}"))
}
"studio" => return Target::Archive(format!("{BASE_URL}/studio/{slug}")),
"pornstar" | "star" | "model" => {
return Target::Archive(format!("{BASE_URL}/pornstar/{slug}"))
}
_ => {}
}
}
}
// Direct category name match
let normalized = trimmed.to_ascii_lowercase();
for (slug, name) in CATEGORIES {
if name.to_ascii_lowercase() == normalized || slug.replace('-', " ") == normalized {
return Target::Archive(format!("{BASE_URL}/category/{slug}"));
}
}
Target::Search(trimmed.to_string())
}
async fn fetch_target(
&self,
cache: VideoCache,
target: Target,
page: u16,
per_page_limit: usize,
options: ServerOptions,
) -> Result<Vec<VideoItem>> {
let source_url = Self::build_target_url(&target, page);
if let Some((time, items)) = cache.get(&source_url) {
if time.elapsed().unwrap_or_default().as_secs() < 300 {
return Ok(items.into_iter().take(per_page_limit.max(1)).collect());
}
}
let mut requester =
requester_or_default(&options, CHANNEL_ID, "thepornbunny.fetch_target");
let html = match Self::fetch_html(&mut requester, &source_url, &source_url).await {
Ok(v) => v,
Err(error) => {
report_provider_error(
CHANNEL_ID,
"fetch_target.request",
&format!("url={source_url}; error={error}"),
)
.await;
return Ok(vec![]);
}
};
if html.trim().is_empty() {
report_provider_error(
CHANNEL_ID,
"fetch_target.empty",
&format!("url={source_url}"),
)
.await;
return Ok(vec![]);
}
let items = Self::parse_list_videos(&html, &options)?;
if items.is_empty() {
return Ok(vec![]);
}
let limited: Vec<VideoItem> = items.iter().cloned().take(per_page_limit.max(1)).collect();
cache.remove(&source_url);
cache.insert(source_url, items);
Ok(limited)
}
}
#[async_trait]
impl Provider for ThepornbunnyProvider {
async fn get_videos(
&self,
cache: VideoCache,
_pool: DbPool,
sort: String,
query: Option<String>,
page: String,
per_page: String,
options: ServerOptions,
) -> Vec<VideoItem> {
let page = page.parse::<u16>().unwrap_or(1).max(1);
let per_page_limit = per_page.parse::<usize>().unwrap_or(10).clamp(1, 60);
let target = match query {
Some(ref q) if !q.trim().is_empty() => Self::resolve_query_target(q.trim()),
_ => Self::resolve_option_target(&options, &sort),
};
match self
.fetch_target(cache, target, page, per_page_limit, options)
.await
{
Ok(items) => items,
Err(error) => {
report_provider_error(
CHANNEL_ID,
"get_videos",
&format!("sort={sort}; page={page}; error={error}"),
)
.await;
vec![]
}
}
}
fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
Some(self.build_channel(clientversion))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn builds_latest_url_page1() {
assert_eq!(
ThepornbunnyProvider::build_target_url(&Target::Latest, 1),
"https://www.thepornbunny.com/videos/"
);
}
#[test]
fn builds_latest_url_page2() {
assert_eq!(
ThepornbunnyProvider::build_target_url(&Target::Latest, 2),
"https://www.thepornbunny.com/videos/2/"
);
}
#[test]
fn builds_search_url() {
assert_eq!(
ThepornbunnyProvider::build_target_url(&Target::Search("big tits".to_string()), 1),
"https://www.thepornbunny.com/search/big-tits/"
);
}
#[test]
fn resolves_tag_prefix() {
match ThepornbunnyProvider::resolve_query_target("tag:blonde") {
Target::Archive(url) => assert_eq!(url, "https://www.thepornbunny.com/tag/blonde"),
_ => panic!("expected archive target"),
}
}
#[test]
fn resolves_studio_prefix() {
match ThepornbunnyProvider::resolve_query_target("studio:rk-prime") {
Target::Archive(url) => assert_eq!(url, "https://www.thepornbunny.com/studio/rk-prime"),
_ => panic!("expected archive target"),
}
}
#[test]
fn resolves_category_name() {
match ThepornbunnyProvider::resolve_query_target("Anal") {
Target::Archive(url) => assert_eq!(url, "https://www.thepornbunny.com/category/anal"),
_ => panic!("expected archive target"),
}
}
}

892
src/providers/tikporn.rs Normal file
View File

@@ -0,0 +1,892 @@
use crate::DbPool;
use crate::api::ClientVersion;
use crate::providers::{Provider, report_provider_error, requester_or_default};
use crate::status::*;
use crate::util::cache::VideoCache;
use crate::videos::{ServerOptions, VideoFormat, VideoItem};
use async_trait::async_trait;
use chrono::{DateTime, NaiveDateTime, Utc};
use error_chain::error_chain;
use htmlentity::entity::{ICodedDataTrait, decode};
use percent_encoding::{NON_ALPHANUMERIC, utf8_percent_encode};
use regex::Regex;
use serde::Deserialize;
use std::sync::{Arc, RwLock};
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "tiktok",
tags: &["shorts", "vertical", "tiktok"],
};
const BASE_URL: &str = "https://tik.porn";
const API_BASE: &str = "https://apiv2.tik.porn";
const CHANNEL_ID: &str = "tikporn";
const BROWSER_UA: &str =
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0 Safari/537.36";
// apiv2 listing endpoints return ~10 rows per call regardless of `limit`; step the
// offset by this so pages stay contiguous (no skipped or duplicated rows).
const FEED_PAGE_SIZE: u32 = 10;
error_chain! {
foreign_links {
Io(std::io::Error);
HttpRequest(wreq::Error);
Json(serde_json::Error);
Regex(regex::Error);
}
errors {
Parse(msg: String) {
description("parse error")
display("parse error: {}", msg)
}
}
}
#[derive(Debug, Clone)]
pub struct TikpornProvider {
url: String,
build_id: Arc<RwLock<Option<String>>>,
}
#[derive(Debug, Clone)]
enum Target {
/// Default browse feed, served from the apiv2 JSON API (paginates by offset).
Feed { kind: FeedKind },
/// Keyword search, served from the Next.js `/?s=` SSR payload.
Search { query: String },
/// Tag/action/model archive, served from the Next.js SSR payload.
Archive { path: String },
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum FeedKind {
/// Newest uploads first (`/getrecentvideos`); deeper pages fall back to popular
/// because the recent endpoint is a live feed and does not honor `offset`.
Newest,
/// Most popular shorts (`/videos/popular`); honors `offset` for clean paging.
Popular,
}
// ---- apiv2 listing rows (https://apiv2.tik.porn/{getrecentvideos,videos/popular}) ----
#[derive(Debug, Deserialize, Default)]
struct ApiResponse {
#[serde(default)]
data: Vec<ApiVideo>,
}
#[derive(Debug, Deserialize, Default)]
struct ApiVideo {
#[serde(default)]
video_id: u64,
#[serde(default, deserialize_with = "de_u32")]
duration: u32,
#[serde(default, deserialize_with = "de_u32")]
view_count: u32,
#[serde(default)]
published: String,
#[serde(default)]
video_date: String,
#[serde(default)]
action_name: String,
#[serde(default)]
producer_name: Option<String>,
#[serde(default)]
producer_slug: Option<String>,
#[serde(default)]
pornstars: Vec<ApiEntity>,
#[serde(default)]
tags: Vec<ApiEntity>,
#[serde(default)]
thumbnail_url: String,
#[serde(default)]
medium_thumb: String,
#[serde(default)]
small_thumb: String,
#[serde(default)]
poster_url: String,
#[serde(default)]
hls_url: Option<String>,
#[serde(default)]
mp4_url: Option<String>,
#[serde(default)]
video_text: ApiVideoText,
}
#[derive(Debug, Deserialize, Default)]
struct ApiEntity {
#[serde(default)]
name: String,
#[serde(default)]
slug: String,
}
#[derive(Debug, Deserialize, Default)]
struct ApiVideoText {
#[serde(default)]
meta_title: ApiLocalizedText,
}
#[derive(Debug, Deserialize, Default)]
struct ApiLocalizedText {
#[serde(default)]
default: ApiTextValue,
}
#[derive(Debug, Deserialize, Default)]
struct ApiTextValue {
#[serde(default)]
text: String,
}
/// Tik.Porn returns sentinel `-1`/`null` values and occasional stringly-typed numbers
/// in integer fields. A single such value would otherwise abort the whole list parse,
/// so coerce anything unexpected to `0`.
fn de_u32<'de, D>(deserializer: D) -> std::result::Result<u32, D::Error>
where
D: serde::Deserializer<'de>,
{
Ok(match serde_json::Value::deserialize(deserializer)? {
serde_json::Value::Number(number) => number
.as_u64()
.or_else(|| number.as_i64().filter(|value| *value >= 0).map(|v| v as u64))
.map(|value| value.min(u64::from(u32::MAX)) as u32)
.unwrap_or(0),
serde_json::Value::String(text) => text.trim().parse().unwrap_or(0),
_ => 0,
})
}
#[derive(Debug, Deserialize, Clone, Default)]
struct NextData {
#[serde(default, rename = "buildId")]
build_id: String,
#[serde(default)]
props: NextProps,
}
#[derive(Debug, Deserialize, Clone, Default)]
struct NextProps {
#[serde(default, rename = "pageProps")]
page_props: PageProps,
}
#[derive(Debug, Deserialize, Clone, Default)]
struct PageProps {
#[serde(default)]
videos: Option<VideoList>,
#[serde(default, rename = "initialVideoResults")]
initial_video_results: Option<VideoList>,
}
#[derive(Debug, Deserialize, Clone, Default)]
struct VideoList {
#[serde(default)]
data: Vec<TikVideo>,
#[serde(default)]
pagination: Pagination,
}
#[derive(Debug, Deserialize, Clone, Default)]
struct Pagination {
#[serde(default, rename = "hasMore")]
has_more: bool,
}
#[derive(Debug, Deserialize, Clone, Default)]
struct TikVideo {
#[serde(default)]
id: u64,
#[serde(default, rename = "basePath")]
base_path: String,
#[serde(default)]
texts: Texts,
#[serde(default)]
action: Option<Entity>,
#[serde(default)]
producer: Option<Entity>,
#[serde(default)]
pornstars: Vec<Entity>,
#[serde(default)]
user: Option<Entity>,
#[serde(default)]
creator: Vec<Entity>,
#[serde(default)]
tags: Vec<Entity>,
#[serde(default)]
source: Option<Source>,
#[serde(default)]
sources: Vec<Source>,
#[serde(default)]
poster: String,
#[serde(default)]
thumbnails: Thumbnails,
#[serde(default, deserialize_with = "de_u32")]
duration: u32,
#[serde(default, deserialize_with = "de_u32")]
likes: u32,
#[serde(default, deserialize_with = "de_u32")]
views: u32,
#[serde(default)]
metadata: Metadata,
}
#[derive(Debug, Deserialize, Clone, Default)]
struct Texts {
#[serde(default)]
video: TextValue,
}
#[derive(Debug, Deserialize, Clone, Default)]
struct TextValue {
#[serde(default)]
text: String,
#[serde(default)]
parsed_text: String,
}
#[derive(Debug, Deserialize, Clone, Default)]
struct Entity {
#[serde(default)]
id: u64,
#[serde(default)]
name: String,
#[serde(default)]
slug: String,
#[serde(default, rename = "basePath")]
base_path: String,
}
#[derive(Debug, Deserialize, Clone, Default)]
struct Source {
#[serde(default)]
src: String,
#[serde(default, rename = "type")]
mime_type: String,
}
#[derive(Debug, Deserialize, Clone, Default)]
struct Thumbnails {
#[serde(default)]
sm: String,
#[serde(default)]
md: String,
}
#[derive(Debug, Deserialize, Clone, Default)]
struct Metadata {
#[serde(default)]
title: String,
#[serde(default)]
thumbnail: String,
#[serde(default, rename = "uploadDate")]
upload_date: String,
}
impl TikpornProvider {
pub fn new() -> Self {
Self {
url: BASE_URL.to_string(),
build_id: Arc::new(RwLock::new(None)),
}
}
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
Channel {
id: CHANNEL_ID.to_string(),
name: "Tik Porn".to_string(),
description: "Tik.Porn short-form vertical videos with search, tag/action/model shortcuts, and direct CDN formats.".to_string(),
premium: false,
favicon: "https://tik.porn/favicon.ico".to_string(),
status: "active".to_string(),
categories: vec![
"Shorts".to_string(),
"Vertical".to_string(),
"TikTok-style".to_string(),
],
options: vec![ChannelOption {
id: "sort".to_string(),
title: "Sort".to_string(),
description: "Newest uploads or most popular shorts. Search and tag:/action:/model: shortcuts are also supported.".to_string(),
systemImage: "sparkles.tv".to_string(),
colorName: "pink".to_string(),
options: vec![
FilterOption {
id: "new".to_string(),
title: "Newest".to_string(),
},
FilterOption {
id: "trending".to_string(),
title: "Trending".to_string(),
},
],
multiSelect: false,
}],
nsfw: true,
cacheDuration: Some(900),
}
}
fn resolve_target(query: Option<&str>, sort: &str) -> Target {
let Some(raw_query) = query.map(str::trim).filter(|value| !value.is_empty()) else {
return Target::Feed {
kind: Self::feed_kind(sort),
};
};
if let Some(path) = Self::prefixed_archive(raw_query) {
return Target::Archive { path };
}
Target::Search {
query: raw_query.to_string(),
}
}
fn feed_kind(sort: &str) -> FeedKind {
// The server defaults `sort` to "date" when the client omits it.
match sort.trim().to_ascii_lowercase().as_str() {
"trending" | "popular" | "hot" | "best" | "views" | "most-viewed" => FeedKind::Popular,
_ => FeedKind::Newest,
}
}
fn prefixed_archive(query: &str) -> Option<String> {
let (prefix, value) = query.split_once(':')?;
let value = value.trim();
if value.is_empty() {
return None;
}
match prefix.trim().to_ascii_lowercase().as_str() {
"tag" => Some(format!("/tag/{}", Self::slugify(value))),
"action" | "category" | "cat" => Some(format!("/action/{}", Self::slugify(value))),
"model" | "pornstar" | "creator" | "uploader" | "studio" | "site" => {
Some(format!("/{}", Self::slugify(value)))
}
_ => None,
}
}
fn slugify(value: &str) -> String {
value
.trim()
.trim_matches('/')
.to_ascii_lowercase()
.chars()
.map(|ch| {
if ch.is_ascii_alphanumeric() {
ch
} else {
'-'
}
})
.collect::<String>()
.split('-')
.filter(|part| !part.is_empty())
.collect::<Vec<_>>()
.join("-")
}
fn page_url(&self, target: &Target, page: u8) -> String {
match target {
// Feed targets are served from the apiv2 API, not the SSR page URL.
Target::Feed { .. } => format!("{}/", self.url),
Target::Search { query } => {
let encoded = utf8_percent_encode(query, NON_ALPHANUMERIC).to_string();
if page <= 1 {
format!("{}/?s={encoded}", self.url)
} else {
format!("{}/?s={encoded}&page={page}", self.url)
}
}
Target::Archive { path } => {
let path = path.trim_start_matches('/');
if page <= 1 {
format!("{}/{path}", self.url)
} else {
format!("{}/{path}?page={page}", self.url)
}
}
}
}
async fn fetch_page_props(
&self,
requester: &mut crate::util::requester::Requester,
target: &Target,
page: u8,
) -> Result<PageProps> {
if let Target::Search { query } = target {
if let Some(build_id) = self.current_build_id() {
let encoded = utf8_percent_encode(query, NON_ALPHANUMERIC).to_string();
let data_url = if page <= 1 {
format!("{}/_next/data/{build_id}/search.json?s={encoded}", self.url)
} else {
format!(
"{}/_next/data/{build_id}/search.json?s={encoded}&page={page}",
self.url
)
};
if let Ok(body) = Self::fetch_text(requester, &data_url, &self.url).await {
if let Ok(next) = serde_json::from_str::<NextProps>(&body) {
return Ok(next.page_props);
}
}
}
}
let page_url = self.page_url(target, page);
let body = Self::fetch_text(requester, &page_url, &self.url).await?;
let next = Self::extract_next_data(&body)?;
if !next.build_id.is_empty() {
self.set_build_id(next.build_id);
}
Ok(next.props.page_props)
}
async fn fetch_text(
requester: &mut crate::util::requester::Requester,
url: &str,
referer: &str,
) -> Result<String> {
requester
.get_with_headers(
url,
vec![
("Referer".to_string(), referer.to_string()),
("User-Agent".to_string(), BROWSER_UA.to_string()),
(
"Accept".to_string(),
"text/html,application/json;q=0.9,*/*;q=0.8".to_string(),
),
],
None,
)
.await
.map_err(|error| Error::from(ErrorKind::Parse(error.to_string())))
}
fn extract_next_data(html: &str) -> Result<NextData> {
let regex = Regex::new(
r#"<script id="__NEXT_DATA__" type="application/json">(?s:(.*?))</script>"#,
)?;
let captures = regex.captures(html).ok_or_else(|| {
Error::from(ErrorKind::Parse(
"__NEXT_DATA__ script not found".to_string(),
))
})?;
let raw = captures.get(1).map(|value| value.as_str()).unwrap_or("");
serde_json::from_str::<NextData>(raw).map_err(Error::from)
}
fn current_build_id(&self) -> Option<String> {
self.build_id.read().ok().and_then(|value| value.clone())
}
fn set_build_id(&self, build_id: String) {
if let Ok(mut stored) = self.build_id.write() {
*stored = Some(build_id);
}
}
fn collect_videos(target: &Target, props: PageProps) -> (Vec<TikVideo>, bool) {
match target {
// Feed targets never reach the SSR collector.
Target::Feed { .. } => (Vec::new(), false),
Target::Search { .. } => props
.initial_video_results
.map(|list| (Self::dedupe(list.data), list.pagination.has_more))
.unwrap_or_default(),
Target::Archive { .. } => props
.videos
.map(|list| (Self::dedupe(list.data), list.pagination.has_more))
.unwrap_or_default(),
}
}
fn dedupe(items: Vec<TikVideo>) -> Vec<TikVideo> {
let mut seen = std::collections::HashSet::new();
items
.into_iter()
.filter(|item| item.id != 0 && seen.insert(item.id))
.collect()
}
fn video_to_item(video: TikVideo) -> Option<VideoItem> {
if video.id == 0 {
return None;
}
let page_path = if video.base_path.trim().is_empty() {
format!("/video/{}", video.id)
} else {
video.base_path.clone()
};
let page_url = Self::absolute_url(&page_path);
let title = Self::best_title(&video);
let thumb = Self::best_thumb(&video);
if title.is_empty() || thumb.is_empty() {
return None;
}
let mut item = VideoItem::new(
video.id.to_string(),
title,
page_url.clone(),
CHANNEL_ID.to_string(),
thumb,
video.duration,
);
if video.views > 0 {
item.views = Some(video.views);
}
if video.likes > 0 {
item.rating = Some(100.0);
}
if let Some(uploaded_at) = Self::parse_uploaded_at(&video.metadata.upload_date) {
item.uploadedAt = Some(uploaded_at);
}
let uploader = Self::best_uploader(&video);
if let Some(entity) = uploader {
item.uploader = Some(entity.name.clone());
item.uploaderUrl = Some(Self::absolute_url(&entity.base_path));
item.uploaderId = Some(format!(
"{CHANNEL_ID}:{}:{}",
Self::entity_kind(&entity.base_path),
if entity.id > 0 {
entity.id.to_string()
} else {
entity.slug.clone()
}
));
}
let mut tags = Vec::new();
if let Some(action) = &video.action {
if !action.name.trim().is_empty() {
tags.push(action.name.trim().to_string());
}
}
for entity in video.tags.iter().chain(video.pornstars.iter()) {
if !entity.name.trim().is_empty() {
tags.push(entity.name.trim().to_string());
}
}
tags.sort();
tags.dedup();
if !tags.is_empty() {
item.tags = Some(tags);
}
let formats = Self::formats(&video, &page_url);
if !formats.is_empty() {
item.formats = Some(formats);
}
if let Some(source) = &video.source {
if source.mime_type.contains("video/") && !source.src.is_empty() {
item.preview = Some(source.src.clone());
}
}
Some(item)
}
fn best_title(video: &TikVideo) -> String {
let raw = if !video.metadata.title.trim().is_empty() {
video.metadata.title.as_str()
} else if !video.texts.video.parsed_text.trim().is_empty() {
video.texts.video.parsed_text.as_str()
} else {
video.texts.video.text.as_str()
};
let title = decode(raw.as_bytes())
.to_string()
.unwrap_or_else(|_| raw.to_string())
.replace(" | Tik.Porn", "")
.replace("{{", "")
.replace("}}", "");
title.split_whitespace().collect::<Vec<_>>().join(" ")
}
fn best_thumb(video: &TikVideo) -> String {
if !video.metadata.thumbnail.trim().is_empty() {
return video.metadata.thumbnail.clone();
}
if !video.thumbnails.md.trim().is_empty() {
return video.thumbnails.md.clone();
}
if !video.thumbnails.sm.trim().is_empty() {
return video.thumbnails.sm.clone();
}
video.poster.clone()
}
fn best_uploader(video: &TikVideo) -> Option<Entity> {
video
.creator
.iter()
.chain(video.pornstars.iter())
.chain(video.producer.iter())
.chain(video.user.iter())
.find(|entity| !entity.name.trim().is_empty() && !entity.base_path.trim().is_empty())
.cloned()
}
fn entity_kind(path: &str) -> &'static str {
if path.starts_with("/tag/") {
"tag"
} else if path.starts_with("/action/") {
"action"
} else {
"profile"
}
}
fn formats(video: &TikVideo, referer: &str) -> Vec<VideoFormat> {
let mut formats = Vec::new();
for source in video.sources.iter().chain(video.source.iter()) {
let url = source.src.trim();
if url.is_empty() {
continue;
}
let mut format = if source.mime_type.contains("mpegURL") || url.contains(".m3u8") {
VideoFormat::m3u8(url.to_string(), "hls".to_string(), "m3u8".to_string())
} else if source.mime_type.contains("mp4") || url.contains(".mp4") {
VideoFormat::new(url.to_string(), "direct".to_string(), "mp4".to_string())
} else {
continue;
};
format.add_http_header("Referer".to_string(), referer.to_string());
format.add_http_header("User-Agent".to_string(), BROWSER_UA.to_string());
formats.push(format);
}
formats
}
fn parse_uploaded_at(value: &str) -> Option<u64> {
DateTime::parse_from_rfc3339(value)
.ok()
.map(|date| date.with_timezone(&Utc).timestamp())
.and_then(|timestamp| u64::try_from(timestamp).ok())
}
fn absolute_url(value: &str) -> String {
if value.starts_with("http://") || value.starts_with("https://") {
value.to_string()
} else {
format!("{BASE_URL}/{}", value.trim_start_matches('/'))
}
}
/// Search/archive targets are served by the Next.js SSR `__NEXT_DATA__` payload.
async fn fetch_ssr(
&self,
requester: &mut crate::util::requester::Requester,
target: &Target,
page: u8,
) -> Result<Vec<VideoItem>> {
let props = self.fetch_page_props(requester, target, page).await?;
let (videos, _has_more) = Self::collect_videos(target, props);
Ok(videos.into_iter().filter_map(Self::video_to_item).collect())
}
/// The default browse feed is served by the apiv2 JSON API, which returns fully
/// hydrated rows (signed HLS/MP4, thumbnails, title, tags, pornstars).
async fn fetch_feed(
&self,
requester: &mut crate::util::requester::Requester,
kind: FeedKind,
page: u8,
) -> Result<Vec<VideoItem>> {
// `/getrecentvideos` is a live feed that ignores `offset`, so only page 1 uses
// it; deeper "newest" pages continue through the popular feed, which paginates.
let (path, offset) = match (kind, page) {
(FeedKind::Newest, 1) => ("getrecentvideos", 0),
(FeedKind::Newest, p) => ("videos/popular", u32::from(p - 2) * FEED_PAGE_SIZE),
(FeedKind::Popular, p) => ("videos/popular", u32::from(p - 1) * FEED_PAGE_SIZE),
};
let url = format!("{API_BASE}/{path}?limit={FEED_PAGE_SIZE}&offset={offset}");
let body = Self::fetch_text(requester, &url, &format!("{}/", self.url)).await?;
let response = serde_json::from_str::<ApiResponse>(&body)?;
Ok(response
.data
.into_iter()
.filter_map(Self::api_video_to_item)
.collect())
}
fn api_video_to_item(video: ApiVideo) -> Option<VideoItem> {
if video.video_id == 0 {
return None;
}
let page_url = format!("{BASE_URL}/video/{}", video.video_id);
let title = {
let cleaned = Self::clean_title(&video.video_text.meta_title.default.text);
if cleaned.is_empty() {
video.action_name.trim().to_string()
} else {
cleaned
}
};
let thumb = [
&video.medium_thumb,
&video.small_thumb,
&video.thumbnail_url,
&video.poster_url,
]
.into_iter()
.map(|value| value.trim())
.find(|value| !value.is_empty())
.unwrap_or("")
.to_string();
if title.is_empty() || thumb.is_empty() {
return None;
}
let mut item = VideoItem::new(
video.video_id.to_string(),
title,
page_url,
CHANNEL_ID.to_string(),
thumb,
video.duration,
);
if video.view_count > 0 {
item.views = Some(video.view_count);
}
let uploaded = if video.published.trim().is_empty() {
video.video_date.as_str()
} else {
video.published.as_str()
};
if let Some(timestamp) = Self::parse_api_date(uploaded) {
item.uploadedAt = Some(timestamp);
}
// Uploader: prefer the studio/producer, then the first named pornstar.
let uploader = match (video.producer_name.as_deref(), video.producer_slug.as_deref()) {
(Some(name), Some(slug)) if !name.trim().is_empty() && !slug.trim().is_empty() => {
Some((name.trim().to_string(), slug.trim().to_string()))
}
_ => video
.pornstars
.iter()
.find(|entity| !entity.name.trim().is_empty() && !entity.slug.trim().is_empty())
.map(|entity| (entity.name.trim().to_string(), entity.slug.trim().to_string())),
};
if let Some((name, slug)) = uploader {
item.uploader = Some(name);
item.uploaderUrl = Some(format!("{BASE_URL}/{slug}"));
item.uploaderId = Some(format!("{CHANNEL_ID}:profile:{slug}"));
}
let mut tags = Vec::new();
if !video.action_name.trim().is_empty() {
tags.push(video.action_name.trim().to_string());
}
for entity in video.tags.iter().chain(video.pornstars.iter()) {
if !entity.name.trim().is_empty() {
tags.push(entity.name.trim().to_string());
}
}
tags.sort();
tags.dedup();
if !tags.is_empty() {
item.tags = Some(tags);
}
let referer = format!("{BASE_URL}/");
let mut formats = Vec::new();
if let Some(hls) = video
.hls_url
.as_deref()
.map(str::trim)
.filter(|value| !value.is_empty())
{
let mut format =
VideoFormat::m3u8(hls.to_string(), "hls".to_string(), "m3u8".to_string());
format.add_http_header("Referer".to_string(), referer.clone());
format.add_http_header("User-Agent".to_string(), BROWSER_UA.to_string());
formats.push(format);
}
if let Some(mp4) = video
.mp4_url
.as_deref()
.map(str::trim)
.filter(|value| !value.is_empty())
{
let mut format =
VideoFormat::new(mp4.to_string(), "direct".to_string(), "mp4".to_string());
format.add_http_header("Referer".to_string(), referer);
format.add_http_header("User-Agent".to_string(), BROWSER_UA.to_string());
formats.push(format);
}
if !formats.is_empty() {
item.formats = Some(formats);
}
Some(item)
}
fn clean_title(raw: &str) -> String {
let trimmed = raw.trim();
let decoded = decode(trimmed.as_bytes())
.to_string()
.unwrap_or_else(|_| trimmed.to_string());
decoded
.replace(" | Tik.Porn", "")
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
}
fn parse_api_date(value: &str) -> Option<u64> {
NaiveDateTime::parse_from_str(value.trim(), "%Y-%m-%d %H:%M:%S")
.ok()
.map(|naive| naive.and_utc().timestamp())
.and_then(|timestamp| u64::try_from(timestamp).ok())
}
}
#[async_trait]
impl Provider for TikpornProvider {
async fn get_videos(
&self,
_cache: VideoCache,
_pool: DbPool,
sort: String,
query: Option<String>,
page: String,
_per_page: String,
options: ServerOptions,
) -> Vec<VideoItem> {
let page = page.parse::<u8>().unwrap_or(1).max(1);
let target = Self::resolve_target(query.as_deref(), &sort);
let mut requester = requester_or_default(&options, CHANNEL_ID, "get_videos");
let result = match &target {
Target::Feed { kind } => self.fetch_feed(&mut requester, *kind, page).await,
other => self.fetch_ssr(&mut requester, other, page).await,
};
match result {
Ok(items) => items,
Err(error) => {
report_provider_error(CHANNEL_ID, "get_videos", &error.to_string()).await;
Vec::new()
}
}
}
fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
Some(self.build_channel(clientversion))
}
}

601
src/providers/tube8.rs Normal file
View File

@@ -0,0 +1,601 @@
use crate::DbPool;
use crate::api::ClientVersion;
use crate::providers::{Provider, build_proxy_url, report_provider_error, requester_or_default};
use crate::status::*;
use crate::util::cache::VideoCache;
use crate::util::parse_abbreviated_number;
use crate::util::time::parse_time_to_seconds;
use crate::videos::{ServerOptions, VideoFormat, VideoItem};
use async_trait::async_trait;
use htmlentity::entity::{ICodedDataTrait, decode};
use scraper::{ElementRef, Html, Selector};
use url::form_urlencoded;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "mainstream-tube",
tags: &["mainstream", "studio", "search"],
};
const BASE_URL: &str = "https://www.tube8.com";
const CHANNEL_ID: &str = "tube8";
#[derive(Debug, Clone)]
pub struct Tube8Provider {
url: String,
}
#[derive(Debug, Clone)]
enum Target {
Latest,
MostViewed,
TopRated,
Search { query: String },
Tag { slug: String },
Category { slug: String },
Channel { slug: String },
Pornstar { slug: String },
}
impl Tube8Provider {
pub fn new() -> Self {
Self {
url: BASE_URL.to_string(),
}
}
fn build_channel(&self, _cv: ClientVersion) -> Channel {
Channel {
id: CHANNEL_ID.to_string(),
name: "Tube8".to_string(),
description:
"Tube8 mainstream tube with latest, most-viewed, top-rated, search, and tag/channel/pornstar shortcuts. Playback uses a signed HLS proxy."
.to_string(),
premium: false,
favicon: "https://www.google.com/s2/favicons?sz=64&domain=tube8.com".to_string(),
status: "active".to_string(),
categories: vec![],
options: vec![ChannelOption {
id: "sort".to_string(),
title: "Sort".to_string(),
description: "Browse Tube8 by sort order.".to_string(),
systemImage: "list.number".to_string(),
colorName: "blue".to_string(),
options: vec![
FilterOption {
id: "new".to_string(),
title: "Newest".to_string(),
},
FilterOption {
id: "popular".to_string(),
title: "Most Viewed".to_string(),
},
FilterOption {
id: "rated".to_string(),
title: "Top Rated".to_string(),
},
],
multiSelect: false,
}],
nsfw: true,
cacheDuration: Some(1800),
}
}
fn selector(value: &str) -> Option<Selector> {
Selector::parse(value).ok()
}
fn normalize_text(value: &str) -> String {
decode(value.as_bytes())
.to_string()
.unwrap_or_else(|_| value.to_string())
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
.trim()
.to_string()
}
fn normalize_url(&self, value: &str) -> String {
let trimmed = value.trim();
if trimmed.is_empty() {
return String::new();
}
if trimmed.starts_with("http://") || trimmed.starts_with("https://") {
return trimmed.to_string();
}
if trimmed.starts_with("//") {
return format!("https:{trimmed}");
}
format!(
"{}/{}",
self.url.trim_end_matches('/'),
trimmed.trim_start_matches('/')
)
}
fn html_headers() -> Vec<(String, String)> {
vec![
(
"User-Agent".to_string(),
"Mozilla/5.0 (X11; Linux x86_64; rv:125.0) Gecko/20100101 Firefox/125.0"
.to_string(),
),
(
"Accept".to_string(),
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8"
.to_string(),
),
("Accept-Language".to_string(), "en-US,en;q=0.5".to_string()),
("Referer".to_string(), format!("{BASE_URL}/")),
]
}
fn target_from_request(query: Option<&str>, sort: &str) -> Target {
if let Some(q) = query.map(str::trim).filter(|v| !v.is_empty()) {
let lower = q.to_ascii_lowercase();
for (prefix, kind) in [
("tag:", "tag"),
("category:", "category"),
("cat:", "category"),
("channel:", "channel"),
("pornstar:", "pornstar"),
("model:", "pornstar"),
] {
if let Some(rest) = lower.strip_prefix(prefix) {
let slug = rest.trim().replace(' ', "-");
if !slug.is_empty() {
return match kind {
"tag" => Target::Tag { slug },
"category" => Target::Category { slug },
"channel" => Target::Channel { slug },
_ => Target::Pornstar { slug },
};
}
}
}
return Target::Search { query: q.to_string() };
}
match sort.trim().to_ascii_lowercase().as_str() {
"popular" | "viewed" | "most_viewed" | "mv" => Target::MostViewed,
"rated" | "top" | "top_rated" | "tr" => Target::TopRated,
_ => Target::Latest,
}
}
fn build_url(&self, target: &Target, page: u16) -> String {
match target {
Target::Latest => {
// Page 1 is the home page; page 2+ use /newest/page/N/
if page > 1 {
format!("{}/newest/page/{page}/", self.url)
} else {
format!("{}/", self.url)
}
}
Target::MostViewed => {
// Page 1: /mostviewed.html; page 2+: /most-viewed/page/N/
if page > 1 {
format!("{}/most-viewed/page/{page}/", self.url)
} else {
format!("{}/mostviewed.html/", self.url)
}
}
Target::TopRated => {
// Page 1: /top.html; page 2+: /top/page/N/
if page > 1 {
format!("{}/top/page/{page}/", self.url)
} else {
format!("{}/top.html/", self.url)
}
}
Target::Search { query } => {
let encoded: String =
form_urlencoded::byte_serialize(query.as_bytes()).collect();
if page > 1 {
format!("{}/searches.html/?q={encoded}&page={page}", self.url)
} else {
format!("{}/searches.html/?q={encoded}", self.url)
}
}
Target::Tag { slug } => {
if page > 1 {
format!("{}/porntags/{slug}/?page={page}", self.url)
} else {
format!("{}/porntags/{slug}/", self.url)
}
}
Target::Category { slug } => {
if page > 1 {
format!("{}/cat/{slug}/?page={page}", self.url)
} else {
format!("{}/cat/{slug}/", self.url)
}
}
Target::Channel { slug } => {
if page > 1 {
format!("{}/channel/{slug}/?page={page}", self.url)
} else {
format!("{}/channel/{slug}/", self.url)
}
}
Target::Pornstar { slug } => {
if page > 1 {
format!("{}/pornstar/{slug}/?page={page}", self.url)
} else {
format!("{}/pornstar/{slug}/", self.url)
}
}
}
}
fn text_of(node: Option<ElementRef<'_>>) -> String {
node.map(|v| Self::normalize_text(&v.text().collect::<String>()))
.unwrap_or_default()
}
fn parse_items(&self, html: &str, options: &ServerOptions) -> Vec<VideoItem> {
let document = Html::parse_document(html);
let Some(card_sel) = Self::selector("article.video-box.js_video-box") else {
return vec![];
};
let link_sel = Self::selector("a[data-testid='plw_video_thumbnail_link']");
let title_sel = Self::selector("a.video-title-text span");
let thumb_sel = Self::selector("img.thumb-image");
let duration_sel = Self::selector(".tm_video_duration span");
let views_sel = Self::selector("span.info-views");
let uploader_sel = Self::selector("a.author-title-text");
let performer_sel = Self::selector("a.channel-performer");
let mut items = Vec::new();
for card in document.select(&card_sel) {
let id = card
.value()
.attr("data-video-id")
.map(|v| v.to_string())
.filter(|v| !v.is_empty())
.unwrap_or_default();
if id.is_empty() {
continue;
}
// Title - prefer aria-label on article; fall back to title link span
let title = card
.value()
.attr("aria-label")
.map(Self::normalize_text)
.filter(|v| !v.is_empty())
.or_else(|| {
title_sel
.as_ref()
.and_then(|s| card.select(s).next())
.map(|v| Self::normalize_text(&v.text().collect::<String>()))
.filter(|v| !v.is_empty())
})
.unwrap_or_default();
if title.is_empty() {
continue;
}
// Thumbnail
let thumb = thumb_sel
.as_ref()
.and_then(|s| card.select(s).next())
.and_then(|v| {
v.value()
.attr("data-src")
.or_else(|| v.value().attr("data-poster"))
.or_else(|| v.value().attr("src"))
})
.map(|v| self.normalize_url(v))
.unwrap_or_default();
// Preview flipbook clip from the thumbnail link's data-mediabook
let preview = link_sel
.as_ref()
.and_then(|s| card.select(s).next())
.and_then(|v| v.value().attr("data-mediabook"))
.map(|v| v.replace("&amp;", "&"))
.filter(|v| !v.is_empty() && !v.starts_with("data:"));
// Duration
let duration_text =
Self::text_of(duration_sel.as_ref().and_then(|s| card.select(s).next()));
let duration = parse_time_to_seconds(&duration_text).unwrap_or(0) as u32;
// Views (first span.info-views) and rating (second span.info-views, contains "XX%")
let all_views: Vec<_> = views_sel
.as_ref()
.map(|s| card.select(s).collect())
.unwrap_or_default();
let views = all_views
.first()
.map(|v| Self::normalize_text(&v.text().collect::<String>()))
.and_then(|t| parse_abbreviated_number(&t))
.map(|v| v as u32);
let rating = all_views
.get(1)
.map(|v| {
Self::normalize_text(&v.text().collect::<String>()).replace('%', "")
})
.and_then(|v| v.parse::<f32>().ok());
// Uploader name from article data attribute (most reliable); href from link
let uploader_name = card
.value()
.attr("data-uploader-name")
.map(Self::normalize_text)
.filter(|v| !v.is_empty())
.or_else(|| {
uploader_sel
.as_ref()
.and_then(|s| card.select(s).next())
.map(|v| Self::normalize_text(&v.text().collect::<String>()))
.filter(|v| !v.is_empty())
});
let uploader_href = uploader_sel
.as_ref()
.and_then(|s| card.select(s).next())
.and_then(|v| v.value().attr("href"))
.map(|v| self.normalize_url(v));
// Namespaced uploader ID from article attribute
let uploader_id = card
.value()
.attr("data-uploader-id")
.filter(|v| !v.is_empty())
.map(|v| format!("{CHANNEL_ID}:{v}"));
// Performer tags from channel-performer links
let mut tags: Vec<String> = Vec::new();
if let Some(sel) = &performer_sel {
for p in card.select(sel) {
let t = Self::normalize_text(&p.text().collect::<String>());
if !t.is_empty()
&& !tags.iter().any(|x: &String| x.eq_ignore_ascii_case(&t))
{
tags.push(t);
}
}
}
// Proxy URL resolves signed HLS via our redirect proxy
let proxy_url = build_proxy_url(options, CHANNEL_ID, &id);
let mut item = VideoItem::new(
id.clone(),
title,
format!("https://www.tube8.com/porn-video/{id}/"),
CHANNEL_ID.to_string(),
thumb,
duration,
);
item.views = views;
if let Some(r) = rating {
item = item.rating(r);
}
if let Some(name) = uploader_name {
item = item.uploader(name);
}
if let Some(url) = uploader_href {
item.uploaderUrl = Some(url);
}
if let Some(uid) = uploader_id {
item.uploaderId = Some(uid);
}
if let Some(p) = preview {
item = item.preview(p);
}
if !tags.is_empty() {
item = item.tags(tags);
}
item = item.formats(vec![
VideoFormat::m3u8(proxy_url, "auto".to_string(), "tube8".to_string())
.ext("mp4".to_string())
.protocol("m3u8_native".to_string())
.video_ext("mp4".to_string())
]);
items.push(item);
}
items
}
}
#[async_trait]
impl Provider for Tube8Provider {
async fn get_videos(
&self,
cache: VideoCache,
_db_pool: DbPool,
sort: String,
query: Option<String>,
page: String,
_per_page: String,
options: ServerOptions,
) -> Vec<VideoItem> {
let page = page.parse::<u16>().unwrap_or(1).max(1);
let target = Self::target_from_request(query.as_deref(), &sort);
let url = self.build_url(&target, page);
let old_items = match cache.get(&url) {
Some((time, items)) if time.elapsed().unwrap_or_default().as_secs() < 300 => {
return items.clone();
}
Some((_, items)) => items.clone(),
None => vec![],
};
let mut requester = requester_or_default(&options, CHANNEL_ID, "get_videos");
let text = match requester
.get_with_headers(&url, Self::html_headers(), None)
.await
{
Ok(v) => v,
Err(e) => {
report_provider_error(
CHANNEL_ID,
"get_videos.request",
&format!("url={url}; error={e}"),
)
.await;
return old_items;
}
};
let items = self.parse_items(&text, &options);
if items.is_empty() {
return old_items;
}
cache.remove(&url);
cache.insert(url, items.clone());
items
}
fn get_channel(&self, cv: ClientVersion) -> Option<Channel> {
Some(self.build_channel(cv))
}
}
#[cfg(test)]
mod tests {
use super::*;
fn provider() -> Tube8Provider {
Tube8Provider::new()
}
#[test]
fn resolves_sort_targets() {
assert!(matches!(
Tube8Provider::target_from_request(None, "new"),
Target::Latest
));
assert!(matches!(
Tube8Provider::target_from_request(None, "popular"),
Target::MostViewed
));
assert!(matches!(
Tube8Provider::target_from_request(None, "rated"),
Target::TopRated
));
}
#[test]
fn resolves_prefix_shortcuts() {
let p = provider();
let _ = p;
assert!(matches!(
Tube8Provider::target_from_request(Some("tag:lesbian"), "new"),
Target::Tag { slug } if slug == "lesbian"
));
assert!(matches!(
Tube8Provider::target_from_request(Some("channel:brazzers"), "new"),
Target::Channel { slug } if slug == "brazzers"
));
assert!(matches!(
Tube8Provider::target_from_request(Some("pornstar:mia khalifa"), "new"),
Target::Pornstar { slug } if slug == "mia-khalifa"
));
assert!(matches!(
Tube8Provider::target_from_request(Some("cat:teens"), "new"),
Target::Category { slug } if slug == "teens"
));
}
#[test]
fn builds_latest_pages() {
let p = provider();
assert_eq!(p.build_url(&Target::Latest, 1), "https://www.tube8.com/");
assert_eq!(
p.build_url(&Target::Latest, 2),
"https://www.tube8.com/newest/page/2/"
);
}
#[test]
fn builds_search_pages() {
let p = provider();
let t = Target::Search { query: "teen creampie".to_string() };
assert_eq!(
p.build_url(&t, 1),
"https://www.tube8.com/searches.html/?q=teen+creampie"
);
assert_eq!(
p.build_url(&t, 2),
"https://www.tube8.com/searches.html/?q=teen+creampie&page=2"
);
}
#[test]
fn parses_video_cards() {
let p = provider();
let html = r#"
<article class="video-box pc js_video-box js-pop"
data-video-id="12345"
data-uploader-id="999"
data-uploader-name="TestChannel"
aria-label="Test Video Title">
<a href="/porn-video/12345/" data-testid="plw_video_thumbnail_link"
data-mediabook="https://ev-ph.t8cdn.com/videos/test_fb.mp4?validfrom=1&validto=2">
<div class="thumb-image-container">
<img class="thumb-image js_lazy"
data-src="https://ei-ph.t8cdn.com/videos/test/thumb.jpg"
src="data:image/png;base64,xxx"/>
</div>
<div class="video-properties">
<div class="video-duration tm_video_duration"><span>08:30</span></div>
</div>
</a>
<div class="thumb-info-wrapper">
<a class="author-title-text" href="/channel/testchannel/">TestChannel</a>
<a class="channel-performer" href="/pornstar/jane-doe/">Jane Doe</a>
<span class='info-views'>12.3K</span>
<span class='info-views'>87%</span>
</div>
</article>
"#;
let opts = ServerOptions {
featured: None,
category: None,
sites: None,
filter: None,
language: None,
public_url_base: None,
requester: None,
network: None,
stars: None,
categories: None,
duration: None,
sort: None,
sexuality: None,
};
let items = p.parse_items(html, &opts);
assert_eq!(items.len(), 1);
let item = &items[0];
assert_eq!(item.id, "12345");
assert_eq!(item.title, "Test Video Title");
assert_eq!(item.thumb, "https://ei-ph.t8cdn.com/videos/test/thumb.jpg");
assert_eq!(item.duration, 510);
assert_eq!(item.views, Some(12300));
assert_eq!(item.rating, Some(87.0));
assert_eq!(item.uploader.as_deref(), Some("TestChannel"));
assert!(item
.tags
.as_ref()
.is_some_and(|t| t.iter().any(|v| v == "Jane Doe")));
assert!(item
.preview
.as_ref()
.is_some_and(|p| p.contains("_fb.mp4")));
}
}

View File

@@ -5,7 +5,7 @@ use crate::status::*;
use crate::util::cache::VideoCache;
use crate::util::parse_abbreviated_number;
use crate::util::time::parse_time_to_seconds;
use crate::videos::{ServerOptions, VideoFormat, VideoItem};
use crate::videos::{ServerOptions, VideoItem};
use async_trait::async_trait;
use error_chain::error_chain;
use htmlentity::entity::{ICodedDataTrait, decode};
@@ -260,6 +260,14 @@ impl ViralxxxpornProvider {
format!("{}/{}", self.url, url.trim_start_matches("./"))
}
fn normalize_video_item_url(&self, url: &str) -> String {
let normalized = self.normalize_url(url);
if normalized.contains("/videos/") {
return normalized.replacen("/videos/", "/video/", 1);
}
normalized
}
fn extract_id_from_url(url: &str) -> String {
let parts = url
.trim_end_matches('/')
@@ -353,7 +361,7 @@ impl ViralxxxpornProvider {
let href = captures
.name("href")
.map(|m| self.normalize_url(m.as_str()))
.map(|m| self.normalize_video_item_url(m.as_str()))
.unwrap_or_default();
let body = captures
.name("body")
@@ -402,13 +410,7 @@ impl ViralxxxpornProvider {
item = item.views(views);
}
if !preview.is_empty() {
let mut format = VideoFormat::new(
preview.clone(),
"preview".to_string(),
"video/mp4".to_string(),
);
format.add_http_header("Referer".to_string(), item.url.clone());
item = item.preview(preview).formats(vec![format]);
item = item.preview(preview);
}
items.push(item);
}
@@ -450,7 +452,7 @@ impl ViralxxxpornProvider {
else {
continue;
};
let video_url = self.normalize_url(&video_url_raw);
let video_url = self.normalize_video_item_url(&video_url_raw);
let id = Self::extract_id_from_url(&video_url);
if id.is_empty() {
continue;
@@ -506,13 +508,7 @@ impl ViralxxxpornProvider {
item = item.views(views);
}
if !preview.is_empty() {
let mut format = VideoFormat::new(
preview.clone(),
"preview".to_string(),
"video/mp4".to_string(),
);
format.add_http_header("Referer".to_string(), item.url.clone());
item = item.preview(preview).formats(vec![format]);
item = item.preview(preview);
}
items.push(item);
}
@@ -616,11 +612,12 @@ mod tests {
assert_eq!(items[0].title, "Sample & Title");
assert_eq!(
items[0].url,
"https://viralxxxporn.com/videos/336186/sample-video/"
"https://viralxxxporn.com/video/336186/sample-video/"
);
assert_eq!(items[0].thumb, "https://cdn.example/thumb.jpg");
assert_eq!(items[0].duration, 754);
assert_eq!(items[0].views, Some(1_200_000));
assert!(items[0].formats.is_none());
}
#[test]
@@ -645,6 +642,7 @@ mod tests {
assert_eq!(items[0].thumb, "https://cdn.example.com/thumb.jpg");
assert_eq!(items[0].duration, 780);
assert_eq!(items[0].views, Some(29_000));
assert!(items[0].formats.is_none());
}
#[test]

1747
src/providers/vjav.rs Normal file

File diff suppressed because it is too large Load Diff

443
src/providers/wowxxx.rs Normal file
View File

@@ -0,0 +1,443 @@
use crate::DbPool;
use crate::api::ClientVersion;
use crate::providers::{Provider, report_provider_error, requester_or_default};
use crate::status::*;
use crate::util::cache::VideoCache;
use crate::util::parse_abbreviated_number;
use crate::util::time::parse_time_to_seconds;
use crate::videos::{ServerOptions, VideoItem};
use async_trait::async_trait;
use error_chain::error_chain;
use htmlentity::entity::{ICodedDataTrait, decode};
use regex::Regex;
use scraper::{ElementRef, Html, Selector};
use wreq::Version;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "studio-network",
tags: &["premium", "studios", "networks", "models"],
};
const BASE_URL: &str = "https://www.wow.xxx";
const CHANNEL_ID: &str = "wowxxx";
const FIREFOX_UA: &str =
"Mozilla/5.0 (X11; Linux x86_64; rv:147.0) Gecko/20100101 Firefox/147.0";
const HTML_ACCEPT: &str =
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8";
error_chain! {
foreign_links {
Io(std::io::Error);
}
errors {
Parse(msg: String) {
description("parse error")
display("parse error: {}", msg)
}
}
}
#[derive(Debug, Clone)]
enum Target {
Latest,
MostPopular,
TopRated,
Search(String),
Archive(String),
}
#[derive(Debug, Clone)]
pub struct WowxxxProvider;
impl WowxxxProvider {
pub fn new() -> Self {
Self
}
fn build_channel(&self, _cv: ClientVersion) -> Channel {
Channel {
id: CHANNEL_ID.to_string(),
name: "WOW.XXX".to_string(),
description:
"Premium studio aggregator with latest updates, search, and direct archive routing for sites/networks/models/tags/categories."
.to_string(),
premium: false,
favicon: "https://www.google.com/s2/favicons?sz=64&domain=wow.xxx".to_string(),
status: "active".to_string(),
categories: vec![],
options: vec![ChannelOption {
id: "sort".to_string(),
title: "Sort".to_string(),
description: "Browse WOW.XXX feeds.".to_string(),
systemImage: "list.number".to_string(),
colorName: "blue".to_string(),
options: vec![
FilterOption {
id: "new".to_string(),
title: "Latest".to_string(),
},
FilterOption {
id: "popular".to_string(),
title: "Most Viewed".to_string(),
},
FilterOption {
id: "rated".to_string(),
title: "Top Rated".to_string(),
},
],
multiSelect: false,
}],
nsfw: true,
cacheDuration: Some(1800),
}
}
fn selector(value: &str) -> Result<Selector> {
Selector::parse(value)
.map_err(|e| Error::from(format!("selector `{value}` parse failed: {e}")))
}
fn decode_html(text: &str) -> String {
decode(text.as_bytes())
.to_string()
.unwrap_or_else(|_| text.to_string())
}
fn text_of(el: &ElementRef<'_>) -> String {
let raw = el.text().collect::<Vec<_>>().join(" ");
Self::decode_html(&raw.split_whitespace().collect::<Vec<_>>().join(" "))
}
fn normalize_url(path: &str) -> String {
let path = path.trim();
if path.starts_with("http://") || path.starts_with("https://") {
return path.to_string();
}
if path.starts_with("//") {
return format!("https:{path}");
}
if path.starts_with('/') {
return format!("{BASE_URL}{path}");
}
format!("{BASE_URL}/{path}")
}
fn slug_from_url(url: &str, segment: &str) -> Option<String> {
let marker = format!("/{segment}/");
let (_, tail) = url.split_once(&marker)?;
let slug = tail.trim_matches('/').split('/').next()?.trim();
if slug.is_empty() {
None
} else {
Some(slug.to_string())
}
}
fn normalize_search_query(query: &str) -> String {
let mut cleaned = query.trim().trim_start_matches('#').to_string();
while cleaned.contains("//") {
cleaned = cleaned.replace("//", "/");
}
cleaned
}
fn page_url(base: &str, page: u16) -> String {
let base = base.trim_end_matches('/');
if page <= 1 {
format!("{base}/")
} else {
format!("{base}/{page}/")
}
}
fn target_url(target: &Target, page: u16) -> String {
match target {
Target::Latest => Self::page_url(&format!("{BASE_URL}/latest-updates"), page),
Target::MostPopular => Self::page_url(&format!("{BASE_URL}/most-popular/all"), page),
Target::TopRated => Self::page_url(&format!("{BASE_URL}/top-rated/all"), page),
Target::Search(query) => {
let encoded: String =
url::form_urlencoded::byte_serialize(query.trim().as_bytes()).collect();
let base = format!("{BASE_URL}/search/{encoded}/relevance");
Self::page_url(&base, page)
}
Target::Archive(base) => Self::page_url(base, page),
}
}
fn resolve_query_target(query: &str) -> Option<Target> {
let q = query.trim();
let lower = q.to_ascii_lowercase();
let prefixes = [
("site:", "sites"),
("studio:", "sites"),
("network:", "networks"),
("model:", "models"),
("pornstar:", "models"),
("tag:", "tags"),
("cat:", "categories"),
("category:", "categories"),
];
for (prefix, segment) in prefixes {
if let Some((_, value)) = lower.split_once(prefix) {
let slug = value.trim().trim_matches('/');
if !slug.is_empty() {
return Some(Target::Archive(format!("{BASE_URL}/{segment}/{slug}/")));
}
}
}
None
}
fn resolve_target(query: Option<&str>, sort: &str) -> Target {
if let Some(q) = query {
let trimmed = q.trim();
if !trimmed.is_empty() {
if let Some(shortcut) = Self::resolve_query_target(trimmed) {
return shortcut;
}
return Target::Search(Self::normalize_search_query(trimmed));
}
}
match sort {
"popular" => Target::MostPopular,
"rated" => Target::TopRated,
_ => Target::Latest,
}
}
fn html_headers(referer: &str) -> Vec<(String, String)> {
vec![
("User-Agent".to_string(), FIREFOX_UA.to_string()),
("Accept".to_string(), HTML_ACCEPT.to_string()),
("Accept-Language".to_string(), "en-US,en;q=0.5".to_string()),
("Referer".to_string(), referer.to_string()),
]
}
async fn fetch_html(
requester: &mut crate::util::requester::Requester,
url: &str,
) -> Result<String> {
requester
.get_with_headers(url, Self::html_headers(BASE_URL), Some(Version::HTTP_11))
.await
.map_err(|e| Error::from(format!("request failed for {url}: {e}")))
}
fn parse_duration(duration_text: &str) -> u32 {
let cleaned = duration_text
.replace("Full Video", "")
.replace("Full", "")
.trim()
.to_string();
parse_time_to_seconds(&cleaned).unwrap_or(0) as u32
}
fn parse_video_items(html: &str) -> Result<Vec<VideoItem>> {
let document = Html::parse_document(html);
let item_sel = Self::selector("div.list-videos div.item")?;
let a_sel = Self::selector("a.thumb_img")?;
let title_sel = Self::selector("a.thumb_title strong.title, a.thumb_title")?;
let img_sel = Self::selector("div.thumb__img img")?;
let thumb_wrap_sel = Self::selector("div.thumb__img")?;
let duration_sel = Self::selector("span.duration")?;
let rating_sel = Self::selector("div.rating")?;
let views_sel = Self::selector("div.views")?;
let site_sel = Self::selector("a.models__item.thumb_cs")?;
let model_sel = Self::selector("a.models__item.thumb_model")?;
let id_sel = Self::selector("span.js-favourites[data-object_id]")?;
let rating_re = Regex::new(r"(\d{1,3})").map_err(|e| Error::from(e.to_string()))?;
let mut out = vec![];
for item in document.select(&item_sel) {
let Some(anchor) = item.select(&a_sel).next() else {
continue;
};
let href = anchor.value().attr("href").unwrap_or("").trim();
if href.is_empty() {
continue;
}
let video_url = Self::normalize_url(href);
let id = item
.select(&id_sel)
.next()
.and_then(|el| el.value().attr("data-object_id"))
.map(|v| v.to_string())
.or_else(|| {
let tail = video_url.trim_end_matches('/').split('/').next_back()?;
if tail.is_empty() {
None
} else {
Some(tail.to_string())
}
})
.unwrap_or_default();
if id.is_empty() {
continue;
}
let title = item
.select(&title_sel)
.next()
.map(|el| Self::text_of(&el))
.filter(|value| !value.is_empty())
.unwrap_or_else(|| {
anchor
.value()
.attr("title")
.map(Self::decode_html)
.unwrap_or_else(|| "Untitled".to_string())
});
let thumb = item
.select(&img_sel)
.next()
.and_then(|img| {
img.value()
.attr("data-src")
.or_else(|| img.value().attr("src"))
})
.map(Self::normalize_url)
.unwrap_or_default();
if thumb.is_empty() {
continue;
}
let preview = item
.select(&thumb_wrap_sel)
.next()
.and_then(|el| el.value().attr("data-preview"))
.map(Self::normalize_url)
.filter(|value| !value.is_empty());
let duration = item
.select(&duration_sel)
.next()
.map(|el| Self::parse_duration(&Self::text_of(&el)))
.unwrap_or(0);
let views = item
.select(&views_sel)
.next()
.map(|el| Self::text_of(&el))
.and_then(|v| parse_abbreviated_number(v.trim()))
.unwrap_or(0);
let rating = item
.select(&rating_sel)
.next()
.map(|el| Self::text_of(&el))
.and_then(|raw| {
rating_re
.captures(&raw)
.and_then(|caps| caps.get(1))
.and_then(|m| m.as_str().parse::<f32>().ok())
});
let uploader_name = item
.select(&site_sel)
.next()
.map(|el| Self::text_of(&el))
.filter(|value| !value.is_empty());
let uploader_url = item
.select(&site_sel)
.next()
.and_then(|el| el.value().attr("href"))
.map(Self::normalize_url);
let uploader_id = uploader_url
.as_deref()
.and_then(|url| Self::slug_from_url(url, "sites"))
.map(|slug| format!("{CHANNEL_ID}:site:{slug}"));
let mut tags = vec![];
for model in item.select(&model_sel) {
let value = Self::text_of(&model);
if !value.is_empty() {
tags.push(value);
}
}
let mut video = VideoItem::new(
id,
title,
video_url,
CHANNEL_ID.to_string(),
thumb,
duration,
)
.views(views)
.tags(tags);
if let Some(r) = rating {
video = video.rating(r);
}
if let Some(preview) = preview {
video = video.preview(preview);
}
if let Some(uploader) = uploader_name {
video = video.uploader(uploader);
}
if let Some(url) = uploader_url {
video = video.uploader_url(url);
}
if let Some(uid) = uploader_id {
video.uploaderId = Some(uid);
}
out.push(video);
}
Ok(out)
}
async fn get(
&self,
_db: DbPool,
_cache: VideoCache,
page: u16,
sort: &str,
query: Option<String>,
options: ServerOptions,
) -> Result<Vec<VideoItem>> {
let target = Self::resolve_target(query.as_deref(), sort);
let url = Self::target_url(&target, page);
let mut requester = requester_or_default(&options, CHANNEL_ID, "get_videos");
let html = Self::fetch_html(&mut requester, &url).await?;
Self::parse_video_items(&html)
}
}
#[async_trait]
impl Provider for WowxxxProvider {
async fn get_videos(
&self,
cache: VideoCache,
db: DbPool,
sort: String,
query: Option<String>,
page: String,
_per_page: String,
options: ServerOptions,
) -> Vec<VideoItem> {
let page = page.parse::<u16>().unwrap_or(1).max(1);
match self.get(db, cache, page, &sort, query, options).await {
Ok(v) => v,
Err(e) => {
report_provider_error(CHANNEL_ID, "get_videos", &e.to_string()).await;
vec![]
}
}
}
fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
Some(self.build_channel(clientversion))
}
}

629
src/providers/xhamster.rs Normal file
View File

@@ -0,0 +1,629 @@
use crate::DbPool;
use crate::api::ClientVersion;
use crate::providers::{Provider, report_provider_error, requester_or_default};
use crate::status::*;
use crate::util::cache::VideoCache;
use crate::util::parse_abbreviated_number;
use crate::util::time::parse_time_to_seconds;
use crate::videos::{ServerOptions, VideoItem};
use async_trait::async_trait;
use error_chain::error_chain;
use htmlentity::entity::{ICodedDataTrait, decode};
use scraper::{ElementRef, Html, Selector};
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "mainstream-tube",
tags: &["mainstream", "tube", "hd", "general"],
};
const BASE_URL: &str = "https://xhamster.com";
const CHANNEL_ID: &str = "xhamster";
const FIREFOX_UA: &str =
"Mozilla/5.0 (X11; Linux x86_64; rv:120.0) Gecko/20100101 Firefox/120.0";
const HTML_ACCEPT: &str =
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8";
error_chain! {
foreign_links {
Io(std::io::Error);
}
errors {
Parse(msg: String) {
description("parse error")
display("parse error: {}", msg)
}
}
}
// Static category list — xhamster has 600+ categories; this covers the mainstream ones
const CATEGORIES: &[(&str, &str)] = &[
("18-year-old", "18 Year Old"),
("amateur", "Amateur"),
("anal", "Anal"),
("asian", "Asian"),
("bbw", "BBW"),
("bdsm", "BDSM"),
("big-ass", "Big Ass"),
("big-tits", "Big Tits"),
("bisexual", "Bisexual"),
("blonde", "Blonde"),
("blowjob", "Blowjob"),
("bondage", "Bondage"),
("brunette", "Brunette"),
("creampie", "Creampie"),
("cumshot", "Cumshot"),
("ebony", "Ebony"),
("fetish", "Fetish"),
("gay", "Gay"),
("granny", "Granny"),
("hardcore", "Hardcore"),
("hentai", "Hentai"),
("homemade", "Homemade"),
("indian", "Indian"),
("interracial", "Interracial"),
("japanese", "Japanese"),
("latina", "Latina"),
("lesbian", "Lesbian"),
("massage", "Massage"),
("masturbation", "Masturbation"),
("mature", "Mature"),
("milf", "MILF"),
("old-young", "Old & Young"),
("orgasm", "Orgasm"),
("pov", "POV"),
("public", "Public"),
("russian", "Russian"),
("shemale", "Shemale"),
("small-tits", "Small Tits"),
("squirt", "Squirt"),
("teen", "Teen"),
("threesome", "Threesome"),
("toys", "Toys"),
("vintage", "Vintage"),
("webcam", "Webcam"),
];
#[derive(Debug, Clone)]
enum Target {
Newest,
MostViewed,
Best,
Search(String),
Category(String),
Channel(String),
}
#[derive(Debug, Clone)]
pub struct XhamsterProvider;
impl XhamsterProvider {
pub fn new() -> Self {
Self
}
fn build_channel(&self, _cv: ClientVersion) -> Channel {
let mut cat_options = vec![FilterOption {
id: "all".to_string(),
title: "All".to_string(),
}];
for (slug, label) in CATEGORIES {
cat_options.push(FilterOption {
id: slug.to_string(),
title: label.to_string(),
});
}
Channel {
id: CHANNEL_ID.to_string(),
name: "xHamster".to_string(),
description:
"xHamster — free porn with newest, most viewed, category, channel, and search routing."
.to_string(),
premium: false,
favicon: "https://www.google.com/s2/favicons?sz=64&domain=xhamster.com".to_string(),
status: "active".to_string(),
categories: CATEGORIES
.iter()
.map(|(_, label)| label.to_string())
.collect(),
options: vec![
ChannelOption {
id: "sort".to_string(),
title: "Sort".to_string(),
description: "Browse xHamster by sort order.".to_string(),
systemImage: "list.number".to_string(),
colorName: "blue".to_string(),
options: vec![
FilterOption {
id: "new".to_string(),
title: "Newest".to_string(),
},
FilterOption {
id: "popular".to_string(),
title: "Most Viewed".to_string(),
},
FilterOption {
id: "best".to_string(),
title: "Best".to_string(),
},
],
multiSelect: false,
},
ChannelOption {
id: "categories".to_string(),
title: "Categories".to_string(),
description: "Browse an xHamster category archive.".to_string(),
systemImage: "square.grid.2x2".to_string(),
colorName: "orange".to_string(),
options: cat_options,
multiSelect: false,
},
],
nsfw: true,
cacheDuration: Some(1800),
}
}
fn selector(value: &str) -> Result<Selector> {
Selector::parse(value)
.map_err(|e| Error::from(format!("selector `{value}` parse failed: {e}")))
}
fn decode_html(text: &str) -> String {
decode(text.as_bytes())
.to_string()
.unwrap_or_else(|_| text.to_string())
}
fn text_of(el: &ElementRef<'_>) -> String {
el.text()
.collect::<Vec<_>>()
.join(" ")
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
}
fn normalize_key(s: &str) -> String {
s.trim()
.replace(['-', '_'], " ")
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
.to_ascii_lowercase()
}
fn target_url(target: &Target, page: u16) -> String {
let base = match target {
Target::Newest => format!("{BASE_URL}/newest"),
Target::MostViewed => format!("{BASE_URL}/most-viewed"),
Target::Best => format!("{BASE_URL}/best"),
Target::Search(q) => {
let encoded = q.trim().replace(' ', "+");
format!("{BASE_URL}/search/{encoded}")
}
Target::Category(slug) => format!("{BASE_URL}/categories/{slug}"),
Target::Channel(slug) => format!("{BASE_URL}/channels/{slug}"),
};
if page <= 1 {
base
} else {
format!("{base}?page={page}")
}
}
fn parse_views(text: &str) -> Option<u32> {
let cleaned = text
.replace("views", "")
.replace("view", "")
.replace([',', ' '], "");
parse_abbreviated_number(cleaned.trim())
}
fn html_headers(referer: &str) -> Vec<(String, String)> {
vec![
("User-Agent".to_string(), FIREFOX_UA.to_string()),
("Accept".to_string(), HTML_ACCEPT.to_string()),
("Referer".to_string(), referer.to_string()),
]
}
fn parse_list_page(html: &str) -> Result<Vec<VideoItem>> {
let document = Html::parse_document(html);
let card_sel = Self::selector("div[data-video-type=\"video\"]")?;
let thumb_link_sel = Self::selector("a[data-role=\"thumb-link\"]")?;
let img_sel = Self::selector("img[data-role=\"thumb-preview-img\"]")?;
let dur_sel = Self::selector("div[data-role=\"video-duration\"]")?;
let title_sel = Self::selector("a.video-thumb-info__name")?;
let uploader_name_sel = Self::selector("a.video-uploader__name")?;
let uploader_link_sel = Self::selector("a[data-role=\"video-uploader-link\"]")?;
let views_sel = Self::selector("div.video-thumb-views")?;
let mut items = Vec::new();
for card in document.select(&card_sel) {
let id = match card.value().attr("data-video-id") {
Some(v) if !v.is_empty() => v.to_string(),
_ => continue,
};
let thumb_link = match card.select(&thumb_link_sel).next() {
Some(el) => el,
None => continue,
};
let href = thumb_link.value().attr("href").unwrap_or_default();
if href.is_empty() {
continue;
}
let page_url = if href.starts_with("https://") {
href.to_string()
} else {
format!("{BASE_URL}{href}")
};
let preview = thumb_link
.value()
.attr("data-previewvideo")
.or_else(|| thumb_link.value().attr("data-previewvideo-fallback"))
.map(str::to_string);
// srcset holds a smaller 526x298 thumb; fall back to src for the large one
let thumb = card
.select(&img_sel)
.next()
.and_then(|el| {
el.value()
.attr("srcset")
.or_else(|| el.value().attr("src"))
})
.map(|v| {
// srcset may have descriptor suffix like " 1w"; take first whitespace token
v.split_whitespace().next().unwrap_or(v).to_string()
})
.unwrap_or_default();
let duration = card
.select(&dur_sel)
.next()
.map(|el| Self::text_of(&el))
.and_then(|text| parse_time_to_seconds(&text))
.and_then(|v| u32::try_from(v).ok())
.unwrap_or(0);
let title = card
.select(&title_sel)
.next()
.and_then(|el| el.value().attr("title"))
.map(Self::decode_html)
.filter(|v| !v.trim().is_empty())
.or_else(|| {
// fallback: aria-label on thumb link
thumb_link
.value()
.attr("aria-label")
.map(Self::decode_html)
.filter(|v| !v.is_empty())
})
.unwrap_or_default();
if title.is_empty() {
continue;
}
let uploader_name_el = card.select(&uploader_name_sel).next();
let uploader_link_el = card.select(&uploader_link_sel).next();
let uploader = uploader_name_el
.as_ref()
.map(|el| Self::decode_html(&Self::text_of(el)))
.filter(|v| !v.is_empty());
let uploader_url = uploader_link_el
.and_then(|el| el.value().attr("href"))
.map(|v| {
if v.starts_with("https://") {
v.to_string()
} else {
format!("{BASE_URL}{v}")
}
})
.filter(|v| !v.is_empty());
let views = card
.select(&views_sel)
.next()
.and_then(|el| Self::parse_views(&Self::text_of(&el)));
let mut item = VideoItem::new(
id,
title,
page_url,
CHANNEL_ID.to_string(),
thumb,
duration,
);
item.views = views;
item.preview = preview;
item.uploader = uploader;
item.uploaderUrl = uploader_url.clone();
if let Some(url) = &uploader_url {
let slug = url
.trim_end_matches('/')
.rsplit('/')
.next()
.unwrap_or_default();
if !slug.is_empty() {
let kind = if url.contains("/channels/") {
"channel"
} else if url.contains("/pornstars/") {
"pornstar"
} else {
"creator"
};
item.uploaderId = Some(format!("{CHANNEL_ID}:{kind}:{slug}"));
}
}
items.push(item);
}
Ok(items)
}
fn resolve_query_target(&self, query: &str) -> Target {
let trimmed = query.trim();
if let Some((kind, value)) = trimmed.split_once(':') {
let slug = value.trim().replace(' ', "-").to_ascii_lowercase();
if !slug.is_empty() {
match kind.trim().to_ascii_lowercase().as_str() {
"cat" | "category" => return Target::Category(slug),
"channel" | "channels" => return Target::Channel(slug),
_ => {}
}
}
}
// Check static category list by label or slug
let normalized = Self::normalize_key(trimmed);
for (slug, label) in CATEGORIES {
if Self::normalize_key(label) == normalized || Self::normalize_key(slug) == normalized {
return Target::Category(slug.to_string());
}
}
Target::Search(trimmed.to_string())
}
fn resolve_sort_target(sort: &str) -> Target {
match sort.trim().to_ascii_lowercase().as_str() {
"popular" | "viewed" | "most_viewed" | "mostviewed" => Target::MostViewed,
"best" => Target::Best,
_ => Target::Newest,
}
}
fn resolve_option_target(&self, options: &ServerOptions, sort: &str) -> Target {
if let Some(cat) = options.categories.as_deref() {
if cat != "all" && !cat.is_empty() {
return Target::Category(cat.to_string());
}
}
Self::resolve_sort_target(sort)
}
async fn fetch_target(
&self,
cache: VideoCache,
target: Target,
page: u16,
per_page: usize,
options: ServerOptions,
) -> Result<Vec<VideoItem>> {
let url = Self::target_url(&target, page);
let cache_key = format!("{url}#per={per_page}");
if let Some((ts, cached)) = cache.get(&cache_key) {
if ts.elapsed().unwrap_or_default().as_secs() < 300 {
return Ok(cached.clone());
}
}
let mut requester =
requester_or_default(&options, CHANNEL_ID, "xhamster.fetch_target");
let html = requester
.get_with_headers(&url, Self::html_headers(&url), None)
.await
.map_err(|e| Error::from(format!("request failed for {url}: {e}")))?;
if html.trim().is_empty() {
return Err(Error::from(format!("empty response for {url}")));
}
let all = Self::parse_list_page(&html)?;
let items: Vec<VideoItem> = all.into_iter().take(per_page.max(1)).collect();
if !items.is_empty() {
cache.insert(cache_key, items.clone());
}
Ok(items)
}
}
#[async_trait]
impl Provider for XhamsterProvider {
async fn get_videos(
&self,
cache: VideoCache,
_pool: DbPool,
sort: String,
query: Option<String>,
page: String,
per_page: String,
options: ServerOptions,
) -> Vec<VideoItem> {
let page = page.parse::<u16>().unwrap_or(1).max(1);
let per_page = per_page.parse::<usize>().unwrap_or(10).clamp(1, 60);
let target = match query {
Some(q) if !q.trim().is_empty() => self.resolve_query_target(q.trim()),
_ => self.resolve_option_target(&options, &sort),
};
match self
.fetch_target(cache, target, page, per_page, options)
.await
{
Ok(items) => items,
Err(e) => {
report_provider_error(CHANNEL_ID, "get_videos", &e.to_string()).await;
vec![]
}
}
}
fn get_channel(&self, cv: ClientVersion) -> Option<Channel> {
Some(self.build_channel(cv))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parses_target_url_pagination() {
assert_eq!(
XhamsterProvider::target_url(&Target::Newest, 1),
"https://xhamster.com/newest"
);
assert_eq!(
XhamsterProvider::target_url(&Target::Newest, 2),
"https://xhamster.com/newest?page=2"
);
assert_eq!(
XhamsterProvider::target_url(&Target::MostViewed, 3),
"https://xhamster.com/most-viewed?page=3"
);
assert_eq!(
XhamsterProvider::target_url(&Target::Search("big ass".to_string()), 1),
"https://xhamster.com/search/big+ass"
);
assert_eq!(
XhamsterProvider::target_url(&Target::Category("amateur".to_string()), 2),
"https://xhamster.com/categories/amateur?page=2"
);
assert_eq!(
XhamsterProvider::target_url(&Target::Channel("vip4k".to_string()), 1),
"https://xhamster.com/channels/vip4k"
);
}
#[test]
fn resolves_category_by_label_and_slug() {
let p = XhamsterProvider::new();
assert!(matches!(
p.resolve_query_target("amateur"),
Target::Category(s) if s == "amateur"
));
assert!(matches!(
p.resolve_query_target("Big Ass"),
Target::Category(s) if s == "big-ass"
));
assert!(matches!(
p.resolve_query_target("Old & Young"),
Target::Category(s) if s == "old-young"
));
}
#[test]
fn resolves_explicit_shortcuts() {
let p = XhamsterProvider::new();
assert!(matches!(
p.resolve_query_target("cat:milf"),
Target::Category(s) if s == "milf"
));
assert!(matches!(
p.resolve_query_target("channel:vip4k"),
Target::Channel(s) if s == "vip4k"
));
}
#[test]
fn falls_through_to_search() {
let p = XhamsterProvider::new();
assert!(matches!(
p.resolve_query_target("some unknown query"),
Target::Search(_)
));
}
#[test]
fn parses_listing_card() {
let p = XhamsterProvider::new();
let html = r#"
<html><body>
<div data-video-type="video" data-video-id="12345678">
<a data-role="thumb-link"
href="https://xhamster.com/videos/test-video-xh12345"
data-previewvideo="https://thumb-v1.xhcdn.com/a/abc/012/345/678/526x298.t.mp4"
aria-label="Test Video Title">
<img data-role="thumb-preview-img"
src="https://ic-vt-nss.xhcdn.com/a/abc/012/345/678/1280x720.1.jpg"
srcset="https://ic-vt-nss.xhcdn.com/a/xyz/012/345/678/526x298.1.jpg"/>
<div data-role="video-duration">
<div>10:30</div>
</div>
</a>
<div class="container-f146a video-thumb-info">
<a class="video-thumb-info__name" title="Test Video Title"
href="https://xhamster.com/videos/test-video-xh12345">
Test Video Title
</a>
<div class="video-thumb-uploader">
<div class="video-uploader-data">
<a data-role="video-uploader-link"
href="https://xhamster.com/channels/testchannel">TestChannel</a>
<a class="video-uploader__name"
href="https://xhamster.com/channels/testchannel">TestChannel</a>
<div class="video-thumb-views">1.2M views</div>
</div>
</div>
</div>
</div>
</body></html>
"#;
let items = XhamsterProvider::parse_list_page(html).expect("parse should succeed");
assert_eq!(items.len(), 1);
let item = &items[0];
assert_eq!(item.id, "12345678");
assert_eq!(item.title, "Test Video Title");
assert_eq!(
item.url,
"https://xhamster.com/videos/test-video-xh12345"
);
assert!(item.thumb.contains("526x298") || item.thumb.contains("1280x720"));
assert_eq!(item.duration, 630);
assert_eq!(item.views, Some(1200000));
assert_eq!(item.uploader.as_deref(), Some("TestChannel"));
assert_eq!(
item.uploaderUrl.as_deref(),
Some("https://xhamster.com/channels/testchannel")
);
assert_eq!(
item.uploaderId.as_deref(),
Some("xhamster:channel:testchannel")
);
assert_eq!(
item.preview.as_deref(),
Some("https://thumb-v1.xhcdn.com/a/abc/012/345/678/526x298.t.mp4")
);
}
}

492
src/providers/xnxx.rs Normal file
View File

@@ -0,0 +1,492 @@
use crate::DbPool;
use crate::api::ClientVersion;
use crate::providers::{
Provider, report_provider_error, requester_or_default,
};
use crate::status::*;
use crate::util::cache::VideoCache;
use crate::util::parse_abbreviated_number;
use crate::videos::{ServerOptions, VideoItem};
use async_trait::async_trait;
use error_chain::error_chain;
use htmlentity::entity::{ICodedDataTrait, decode};
use regex::Regex;
use scraper::{ElementRef, Html, Selector};
use wreq::Version;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "mainstream-tube",
tags: &["tube", "hd", "mixed", "search"],
};
const BASE_URL: &str = "https://www.xnxx.com";
const CHANNEL_ID: &str = "xnxx";
const FIREFOX_UA: &str =
"Mozilla/5.0 (X11; Linux x86_64; rv:127.0) Gecko/20100101 Firefox/127.0";
const HTML_ACCEPT: &str =
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8";
error_chain! {
foreign_links {
Io(std::io::Error);
}
errors {
Parse(msg: String) {
description("parse error")
display("parse error: {}", msg)
}
}
}
#[derive(Debug, Clone)]
enum Target {
// Most-viewed global feed — the best "default" xnxx has
Hits,
// Keyword search (also covers tag shortcuts since /search/{term} works for both)
Search(String),
}
#[derive(Debug, Clone)]
pub struct XnxxProvider;
impl XnxxProvider {
pub fn new() -> Self {
Self
}
fn build_channel(&self, _cv: ClientVersion) -> Channel {
Channel {
id: CHANNEL_ID.to_string(),
name: "XNXX".to_string(),
description: "XNXX — 10M+ free HD porn videos with keyword search, tag routing, and a most-viewed global feed.".to_string(),
premium: false,
favicon: "https://www.google.com/s2/favicons?sz=64&domain=xnxx.com".to_string(),
status: "active".to_string(),
categories: vec![],
options: vec![
ChannelOption {
id: "sort".to_string(),
title: "Sort".to_string(),
description: "Browse XNXX ranking feeds.".to_string(),
systemImage: "list.number".to_string(),
colorName: "blue".to_string(),
options: vec![
FilterOption {
id: "popular".to_string(),
title: "Most Viewed".to_string(),
},
FilterOption {
id: "new".to_string(),
title: "Latest (Most Viewed)".to_string(),
},
],
multiSelect: false,
},
],
nsfw: true,
cacheDuration: Some(1800),
}
}
fn selector(value: &str) -> Result<Selector> {
Selector::parse(value)
.map_err(|e| Error::from(format!("selector `{value}` failed: {e}")))
}
fn decode_html(text: &str) -> String {
decode(text.as_bytes())
.to_string()
.unwrap_or_else(|_| text.to_string())
}
fn text_of(el: &ElementRef<'_>) -> String {
let raw: String = el.text().collect::<Vec<_>>().join(" ");
Self::decode_html(&raw.split_whitespace().collect::<Vec<_>>().join(" "))
}
fn normalize_url(path: &str) -> String {
let path = path.trim();
if path.starts_with("http://") || path.starts_with("https://") {
return path.to_string();
}
if path.starts_with("//") {
return format!("https:{path}");
}
if path.starts_with('/') {
return format!("{BASE_URL}{path}");
}
format!("{BASE_URL}/{path}")
}
/// Build a 0-indexed paged URL.
/// page 1 → `{base}`, page N → `{base}/{N-1}`
fn page_url(base: &str, page: u16) -> String {
let base = base.trim_end_matches('/');
if page <= 1 {
base.to_string()
} else {
format!("{base}/{}", page - 1)
}
}
fn target_url(target: &Target, page: u16) -> String {
match target {
Target::Hits => Self::page_url(&format!("{BASE_URL}/hits"), page),
Target::Search(q) => {
// Encode the query as slug: lowercase, spaces become hyphens
// xnxx search uses URL-encoded spaces but also accepts hyphens
let slug = q.trim()
.replace(' ', "-")
.to_ascii_lowercase();
Self::page_url(&format!("{BASE_URL}/search/{slug}"), page)
}
}
}
fn html_headers(referer: &str) -> Vec<(String, String)> {
vec![
("User-Agent".to_string(), FIREFOX_UA.to_string()),
("Accept".to_string(), HTML_ACCEPT.to_string()),
("Accept-Language".to_string(), "en-US,en;q=0.5".to_string()),
("Referer".to_string(), referer.to_string()),
]
}
async fn fetch_html(
requester: &mut crate::util::requester::Requester,
url: &str,
) -> Result<String> {
requester
.get_with_headers(url, Self::html_headers(url), Some(Version::HTTP_11))
.await
.map_err(|e| Error::from(format!("request failed for {url}: {e}")))
}
fn parse_duration_mins(text: &str) -> u32 {
// Matches patterns like "16min", "23min", "1h20min", "1h"
let re_hm = Regex::new(r"(\d+)h\s*(\d+)?min").ok();
let re_h = Regex::new(r"(\d+)h(?:our)?s?").ok();
let re_m = Regex::new(r"(\d+)\s*min").ok();
let re_s = Regex::new(r"(\d+)\s*sec").ok();
let text = text.trim();
if let Some(re) = re_hm.as_ref() {
if let Some(cap) = re.captures(text) {
let h: u32 = cap.get(1).and_then(|m| m.as_str().parse().ok()).unwrap_or(0);
let m: u32 = cap.get(2).and_then(|m| m.as_str().parse().ok()).unwrap_or(0);
return h * 3600 + m * 60;
}
}
if let Some(re) = re_h.as_ref() {
if let Some(cap) = re.captures(text) {
let h: u32 = cap.get(1).and_then(|m| m.as_str().parse().ok()).unwrap_or(0);
return h * 3600;
}
}
if let Some(re) = re_m.as_ref() {
if let Some(cap) = re.captures(text) {
let m: u32 = cap.get(1).and_then(|m| m.as_str().parse().ok()).unwrap_or(0);
return m * 60;
}
}
if let Some(re) = re_s.as_ref() {
if let Some(cap) = re.captures(text) {
let s: u32 = cap.get(1).and_then(|m| m.as_str().parse().ok()).unwrap_or(0);
return s;
}
}
0
}
fn parse_views(text: &str) -> Option<u32> {
// text looks like "471.4M " or "15.5M " — extract the number+suffix before whitespace/icon
let cleaned = text
.split_whitespace()
.next()
.unwrap_or("")
.trim_end_matches(',');
parse_abbreviated_number(cleaned)
}
fn parse_rating_pct(text: &str) -> Option<f32> {
let digits: String = text.chars().filter(|c| c.is_ascii_digit()).collect();
digits.parse::<f32>().ok().map(|v| v / 100.0)
}
/// Parse video cards from both xnxx listing page formats.
///
/// Format A (search pages): outer div has `data-eid` attribute directly.
/// Format B (hits/browse pages): outer div has `data-video` JSON attribute.
fn parse_listing(html: &str, limit: usize) -> Result<Vec<VideoItem>> {
let document = Html::parse_document(html);
let card_sel = Self::selector("div.thumb-block")?;
let link_sel = Self::selector("a[href]")?;
let img_sel = Self::selector("img[data-src]")?;
let uploader_sel = Self::selector(".uploader a")?;
let metadata_sel = Self::selector("div.metadata, p.metadata")?;
let views_right_sel = Self::selector("span.right")?;
let duration_left_sel = Self::selector("span.left")?;
let superfluous_sel = Self::selector("span.superfluous")?;
// Title: either `a.title[title]` (hits) or `p a[title]` (search)
let title_sel = Self::selector("a.title[title], p a[title], a[title][href]")?;
let mut items = Vec::new();
'card: for card in document.select(&card_sel) {
// Find a link that goes to a /video- page
let video_link = card
.select(&link_sel)
.find(|el| {
el.value()
.attr("href")
.map(|h| h.contains("/video-"))
.unwrap_or(false)
});
let Some(video_link) = video_link else { continue };
let href = video_link.value().attr("href").unwrap_or_default();
let page_url = Self::normalize_url(href);
if page_url.is_empty() {
continue;
}
// Extract eid from the URL path: /video-{eid}/{slug}
// Also works as the video id for deduplication
let eid = href
.trim_matches('/')
.split('/')
.find(|s| s.starts_with("video-"))
.and_then(|s| s.strip_prefix("video-"))
.unwrap_or_default()
.to_string();
if eid.is_empty() {
continue;
}
// Numeric id: prefer data-id, then data-video JSON, then eid
let numeric_id = card.value().attr("data-id")
.map(str::to_string)
.filter(|s| !s.is_empty())
.or_else(|| {
// Try to extract from data-video JSON: {"id":12345,...}
card.value().attr("data-video")
.and_then(|dv| {
let re = Regex::new(r#""id"\s*:\s*(\d+)"#).ok()?;
re.captures(dv)?.get(1).map(|m| m.as_str().to_string())
})
})
.unwrap_or_else(|| eid.clone());
// Thumbnail
let thumb = card
.select(&img_sel)
.next()
.and_then(|el| el.value().attr("data-src").map(str::to_string))
.unwrap_or_default();
if thumb.is_empty() {
continue 'card;
}
// Title: find an <a title="..."> element pointing to the video
let title = card
.select(&title_sel)
.find(|el| {
el.value()
.attr("href")
.map(|h| h.contains("/video-"))
.unwrap_or(false)
})
.and_then(|el| el.value().attr("title").map(Self::decode_html))
.filter(|t| !t.trim().is_empty());
let Some(title) = title else { continue };
// Uploader
let uploader_el = card.select(&uploader_sel).next();
let uploader_name = uploader_el.as_ref().map(|el| Self::text_of(el))
.filter(|s| !s.is_empty());
let uploader_href = uploader_el
.and_then(|el| el.value().attr("href").map(Self::normalize_url));
// Metadata: views, rating, duration
// Both formats share: views in span.right, rating in span.superfluous
// Duration: in span.left (hits) or as text between span.right and end (search)
let metadata_el = card.select(&metadata_sel).next();
let (duration, views, rating) = if let Some(meta) = metadata_el {
// Rating from .superfluous
let rating = meta
.select(&superfluous_sel)
.map(|el| Self::text_of(&el))
.find(|t| t.contains('%'))
.and_then(|t| Self::parse_rating_pct(&t));
// Views from span.right (text before the eye icon)
let views = meta
.select(&views_right_sel)
.next()
.map(|el| Self::text_of(&el))
.and_then(|t| Self::parse_views(&t));
// Duration: try span.left first (hits format), then raw metadata text (search format)
let duration = meta
.select(&duration_left_sel)
.next()
.map(|el| Self::text_of(&el))
.map(|t| Self::parse_duration_mins(&t))
.filter(|&d| d > 0)
.unwrap_or_else(|| {
// Search format: duration text is a direct text node in p.metadata
let full_text = Self::text_of(&meta);
Self::parse_duration_mins(&full_text)
});
(duration, views, rating)
} else {
(0, None, None)
};
let mut item = VideoItem::new(
numeric_id,
title.trim().to_string(),
page_url,
CHANNEL_ID.to_string(),
thumb,
duration,
);
if let Some(v) = views {
item.views = Some(v);
}
if let Some(r) = rating {
item.rating = Some(r);
}
if let Some(name) = uploader_name {
item.uploader = Some(name);
}
if let Some(url) = uploader_href.filter(|u| !u.is_empty()) {
let uploader_id = url
.trim_end_matches('/')
.rsplit('/')
.next()
.unwrap_or_default()
.to_string();
if !uploader_id.is_empty() {
item.uploaderId = Some(format!("{CHANNEL_ID}:{uploader_id}"));
}
item.uploaderUrl = Some(url);
}
items.push(item);
if items.len() >= limit {
break;
}
}
Ok(items)
}
fn resolve_query_target(query: &str) -> Target {
let trimmed = query.trim().trim_start_matches('@');
// Explicit prefix shortcuts: tag:X, cat:X
if let Some((kind, value)) = trimmed.split_once(':') {
let value = value.trim();
if !value.is_empty() {
match kind.trim().to_ascii_lowercase().as_str() {
"tag" | "cat" | "category" => return Target::Search(value.to_string()),
_ => {}
}
}
}
Target::Search(trimmed.to_string())
}
async fn fetch_target(
&self,
cache: VideoCache,
target: Target,
page: u16,
per_page: usize,
options: ServerOptions,
) -> Result<Vec<VideoItem>> {
let url = Self::target_url(&target, page);
let cache_key = format!("{url}#per={per_page}");
if let Some((ts, cached)) = cache.get(&cache_key) {
if ts.elapsed().unwrap_or_default().as_secs() < 300 {
return Ok(cached.clone());
}
}
let mut requester =
requester_or_default(&options, CHANNEL_ID, "xnxx.fetch_target.missing_requester");
let html = match Self::fetch_html(&mut requester, &url).await {
Ok(v) => v,
Err(e) => {
report_provider_error(
CHANNEL_ID,
"fetch_target.request",
&format!("url={url}; error={e}"),
)
.await;
return Ok(vec![]);
}
};
if html.trim().is_empty() {
report_provider_error(CHANNEL_ID, "fetch_target.empty", &format!("url={url}")).await;
return Ok(vec![]);
}
let items = Self::parse_listing(&html, per_page)?;
if !items.is_empty() {
cache.insert(cache_key, items.clone());
}
Ok(items)
}
}
#[async_trait]
impl Provider for XnxxProvider {
async fn get_videos(
&self,
cache: VideoCache,
_pool: DbPool,
sort: String,
query: Option<String>,
page: String,
per_page: String,
options: ServerOptions,
) -> Vec<VideoItem> {
let page = page.parse::<u16>().unwrap_or(1).max(1);
let per_page = per_page.parse::<usize>().unwrap_or(10).clamp(1, 60);
let target = match query {
Some(q) if !q.trim().is_empty() => Self::resolve_query_target(q.trim()),
_ => Target::Hits,
};
// sort=new falls back to Hits since xnxx has no chronological listing
let target = match (&target, sort.trim().to_ascii_lowercase().as_str()) {
(Target::Hits, _) => Target::Hits,
(Target::Search(_), _) => target,
};
match self.fetch_target(cache, target, page, per_page, options).await {
Ok(items) => items,
Err(e) => {
crate::providers::report_provider_error(
CHANNEL_ID,
"get_videos",
&format!("sort={sort}; page={page}; error={e}"),
)
.await;
vec![]
}
}
}
fn get_channel(&self, cv: ClientVersion) -> Option<Channel> {
Some(self.build_channel(cv))
}
}

652
src/providers/xvideos.rs Normal file
View File

@@ -0,0 +1,652 @@
use crate::DbPool;
use crate::api::ClientVersion;
use crate::providers::{Provider, report_provider_error, requester_or_default};
use crate::status::*;
use crate::util::cache::VideoCache;
use crate::util::parse_abbreviated_number;
use crate::videos::{ServerOptions, VideoItem};
use async_trait::async_trait;
use chrono::{Datelike, Local, Months};
use error_chain::error_chain;
use htmlentity::entity::{ICodedDataTrait, decode};
use regex::Regex;
use scraper::{ElementRef, Html, Selector};
use wreq::Version;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "mainstream-tube",
tags: &["tube", "hd", "mixed", "search"],
};
const BASE_URL: &str = "https://www.xvideos.com";
const CHANNEL_ID: &str = "xvideos";
const FIREFOX_UA: &str =
"Mozilla/5.0 (X11; Linux x86_64; rv:147.0) Gecko/20100101 Firefox/147.0";
const HTML_ACCEPT: &str =
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8";
error_chain! {
foreign_links {
Io(std::io::Error);
}
errors {
Parse(msg: String) {
description("parse error")
display("parse error: {}", msg)
}
}
}
// Slug format is `{SiteName}-{ID}` as returned by /c listing.
const CATEGORIES: &[(&str, &str)] = &[
("AI-239", "AI"),
("Amateur-65", "Amateur"),
("Anal-12", "Anal"),
("Arab-159", "Arab"),
("Asian_Woman-32", "Asian"),
("ASMR-229", "ASMR"),
("Ass-14", "Ass"),
("bbw-51", "BBW"),
("Bi_Sexual-62", "Bi"),
("Big_Ass-24", "Big Ass"),
("Big_Cock-34", "Big Cock"),
("Big_Tits-23", "Big Tits"),
("Black_Woman-30", "Black"),
("Blonde-20", "Blonde"),
("Blowjob-15", "Blowjob"),
("Brunette-25", "Brunette"),
("Cam_Porn-58", "Cam Porn"),
("Creampie-40", "Creampie"),
("Cuckold-237", "Cuckold"),
("Cumshot-18", "Cumshot"),
("Femdom-235", "Femdom"),
("Fisting-165", "Fisting"),
("Fucked_Up_Family-81", "Step Family"),
("Gangbang-69", "Gangbang"),
("Gapes-167", "Gapes"),
("Indian-89", "Indian"),
("Interracial-27", "Interracial"),
("Latina-16", "Latina"),
("Lesbian-26", "Lesbian"),
("Lingerie-83", "Lingerie"),
("Mature-38", "Mature"),
("Milf-19", "MILF"),
("Oiled-22", "Oiled"),
("Redhead-31", "Redhead"),
("Solo_and_Masturbation-33", "Solo"),
("Squirting-56", "Squirting"),
("Stockings-28", "Stockings"),
("Teen-13", "Teen"),
];
#[derive(Debug, Clone)]
enum Target {
Latest,
Best,
Search(String),
Archive(String),
}
#[derive(Debug, Clone)]
pub struct XvideosProvider;
impl XvideosProvider {
pub fn new() -> Self {
Self
}
fn build_channel(&self, _cv: ClientVersion) -> Channel {
let mut cat_options: Vec<FilterOption> = vec![FilterOption {
id: "all".to_string(),
title: "All".to_string(),
}];
for (slug, label) in CATEGORIES {
cat_options.push(FilterOption {
id: slug.to_string(),
title: label.to_string(),
});
}
Channel {
id: CHANNEL_ID.to_string(),
name: "XVideos".to_string(),
description:
"XVideos — one of the world's largest free porn sites with latest, best-of-month, category, tag, and keyword search."
.to_string(),
premium: false,
favicon: "https://www.google.com/s2/favicons?sz=64&domain=xvideos.com".to_string(),
status: "active".to_string(),
categories: CATEGORIES.iter().map(|(_, label)| label.to_string()).collect(),
options: vec![
ChannelOption {
id: "sort".to_string(),
title: "Sort".to_string(),
description: "Browse XVideos ranking feeds.".to_string(),
systemImage: "list.number".to_string(),
colorName: "blue".to_string(),
options: vec![
FilterOption {
id: "new".to_string(),
title: "Latest".to_string(),
},
FilterOption {
id: "best".to_string(),
title: "Best of Month".to_string(),
},
],
multiSelect: false,
},
ChannelOption {
id: "categories".to_string(),
title: "Categories".to_string(),
description: "Browse an XVideos category archive.".to_string(),
systemImage: "square.grid.2x2".to_string(),
colorName: "orange".to_string(),
options: cat_options,
multiSelect: false,
},
],
nsfw: true,
cacheDuration: Some(1800),
}
}
fn selector(value: &str) -> Result<Selector> {
Selector::parse(value)
.map_err(|e| Error::from(format!("selector `{value}` parse failed: {e}")))
}
fn decode_html(text: &str) -> String {
decode(text.as_bytes())
.to_string()
.unwrap_or_else(|_| text.to_string())
}
fn text_of(el: &ElementRef<'_>) -> String {
let raw: String = el.text().collect::<Vec<_>>().join(" ");
Self::decode_html(&raw.split_whitespace().collect::<Vec<_>>().join(" "))
}
fn normalize_key(s: &str) -> String {
s.trim()
.trim_start_matches('#')
.replace(['_', '-'], " ")
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
.to_ascii_lowercase()
}
fn normalize_url(path: &str) -> String {
let path = path.trim();
if path.starts_with("http://") || path.starts_with("https://") {
return path.to_string();
}
if path.starts_with("//") {
return format!("https:{path}");
}
if path.starts_with('/') {
return format!("{BASE_URL}{path}");
}
format!("{BASE_URL}/{path}")
}
fn html_headers(referer: &str) -> Vec<(String, String)> {
vec![
("User-Agent".to_string(), FIREFOX_UA.to_string()),
("Accept".to_string(), HTML_ACCEPT.to_string()),
("Accept-Language".to_string(), "en-US,en;q=0.5".to_string()),
("Referer".to_string(), referer.to_string()),
]
}
// /best always redirects to the previous calendar month's archive.
fn best_base_url() -> String {
let now = Local::now();
let prev = now
.checked_sub_months(Months::new(1))
.unwrap_or(now);
format!("{BASE_URL}/best/{}-{:02}", prev.year(), prev.month())
}
fn target_url(target: &Target, page: u16) -> String {
match target {
Target::Latest => {
if page <= 1 {
format!("{BASE_URL}/")
} else {
// page 2 = /new/1, page 3 = /new/2, ...
format!("{BASE_URL}/new/{}", page - 1)
}
}
Target::Best => {
let base = Self::best_base_url();
if page <= 1 {
base
} else {
format!("{base}/{}", page - 1)
}
}
Target::Search(q) => {
let encoded: String =
url::form_urlencoded::byte_serialize(q.trim().as_bytes()).collect();
if page <= 1 {
format!("{BASE_URL}/?k={encoded}")
} else {
format!("{BASE_URL}/?k={encoded}&p={}", page - 1)
}
}
Target::Archive(base_url) => {
let base = base_url.trim_end_matches('/');
if page <= 1 {
base.to_string()
} else {
format!("{base}/{}", page - 1)
}
}
}
}
async fn fetch_html(
requester: &mut crate::util::requester::Requester,
url: &str,
) -> Result<String> {
requester
.get_with_headers(url, Self::html_headers(url), Some(Version::HTTP_11))
.await
.map_err(|e| Error::from(format!("request failed for {url}: {e}")))
}
// Parses "21 min", "1h20min", "2h", "45sec", "MM:SS", "HH:MM:SS"
fn parse_duration(text: &str) -> u32 {
let text = text.trim();
// Colon-separated MM:SS and HH:MM:SS
let parts: Vec<&str> = text.split(':').collect();
if parts.len() == 2 {
let m: u32 = parts[0].trim().parse().unwrap_or(0);
let s: u32 = parts[1].trim().parse().unwrap_or(0);
return m * 60 + s;
}
if parts.len() == 3 {
let h: u32 = parts[0].trim().parse().unwrap_or(0);
let m: u32 = parts[1].trim().parse().unwrap_or(0);
let s: u32 = parts[2].trim().parse().unwrap_or(0);
return h * 3600 + m * 60 + s;
}
// Word-based: "1h20min", "30 min", "45sec".
// Trim the "before" slice so trailing spaces don't swallow the digits.
let low = text.to_ascii_lowercase();
let extract_num = |needle: &str| -> u32 {
low.find(needle).and_then(|i| {
let before = low[..i].trim_end(); // drop trailing whitespace
let start = before
.rfind(|c: char| !c.is_ascii_digit())
.map(|j| j + 1)
.unwrap_or(0);
before[start..].parse().ok()
}).unwrap_or(0)
};
let h = extract_num("h");
let m = extract_num("min");
let s = extract_num("sec");
h * 3600 + m * 60 + s
}
fn parse_views(text: &str) -> Option<u32> {
// "877.3k Views", "1.2M Views" — strip suffix then parse
let cleaned = text
.replace("Views", "")
.replace("views", "")
.replace("View", "")
.replace(',', "");
parse_abbreviated_number(cleaned.trim())
}
fn parse_listing(html: &str, limit: usize) -> Result<Vec<VideoItem>> {
let document = Html::parse_document(html);
let card_sel = Self::selector("div.thumb-block")?;
let img_sel = Self::selector("img[data-src]")?;
let link_sel = Self::selector("a[href]")?;
// Homepage format: p.title a[title="…"]; best-page format: div.title a[href]
let title_attr_sel = Self::selector("p.title a[title], a.title[title]")?;
let title_text_sel = Self::selector("div.title a[href]")?;
// Homepage uploader: p.metadata a span.name; best-page: div.video-metadata a.name
let uploader_a_sel =
Self::selector("p.metadata a[href], div.video-metadata a.name[href]")?;
let dur_sel = Self::selector(".thumb-under span.duration")?;
// Homepage views: text near "Views" in p.metadata
let metadata_sel = Self::selector("p.metadata")?;
// Best-page views: span.views-count
let views_count_sel = Self::selector("span.views-count")?;
let mut items = Vec::new();
'card: for card in document.select(&card_sel) {
// Find the anchor whose href contains /video.
let video_link = card
.select(&link_sel)
.find(|el| {
el.value()
.attr("href")
.map(|h| h.contains("/video."))
.unwrap_or(false)
});
let Some(video_link) = video_link else {
continue;
};
let href = video_link.value().attr("href").unwrap_or_default();
let page_url = Self::normalize_url(href);
if page_url.is_empty() {
continue;
}
// eid: path segment starting with "video." e.g. "video.ohedfck8b21"
let eid = href
.split('/')
.find(|s| s.starts_with("video."))
.and_then(|s| s.strip_prefix("video."))
.unwrap_or_default()
.to_string();
if eid.is_empty() {
continue;
}
// Numeric id: try data-id attr, then data-video JSON, then eid
let video_id = card
.value()
.attr("data-id")
.filter(|s| !s.is_empty())
.map(str::to_string)
.or_else(|| {
card.value().attr("data-video").and_then(|dv| {
// {"id":57049413,...}
let re = Regex::new(r#""id"\s*:\s*(\d+)"#).ok()?;
re.captures(dv)?.get(1).map(|m| m.as_str().to_string())
})
})
.unwrap_or_else(|| eid.clone());
// Thumbnail (lazy-loaded in data-src)
let thumb = card
.select(&img_sel)
.next()
.and_then(|el| el.value().attr("data-src"))
.map(str::to_string)
.unwrap_or_default();
if thumb.is_empty() {
continue 'card;
}
// Preview video clip.
// Homepage: data-pvv on the img; best page: "previewVideo" in data-video JSON.
let preview = card
.select(&img_sel)
.next()
.and_then(|el| el.value().attr("data-pvv"))
.map(str::to_string)
.filter(|s| !s.is_empty())
.or_else(|| {
card.value().attr("data-video").and_then(|dv| {
let re =
Regex::new(r#""previewVideo"\s*:\s*"([^"]+)""#).ok()?;
re.captures(dv)
.and_then(|c| c.get(1))
.map(|m| m.as_str().replace("\\/", "/"))
})
});
// Title: prefer title attr (homepage), fall back to text (best page)
let title = card
.select(&title_attr_sel)
.next()
.and_then(|el| el.value().attr("title").map(Self::decode_html))
.filter(|t| !t.trim().is_empty())
.or_else(|| {
card.select(&title_text_sel)
.next()
.map(|el| Self::text_of(&el))
.filter(|t| !t.trim().is_empty())
});
let Some(title) = title else {
continue;
};
// Duration from span.duration inside .thumb-under
let duration = card
.select(&dur_sel)
.next()
.map(|el| Self::parse_duration(&Self::text_of(&el)))
.unwrap_or(0);
// Uploader: pick the first non-video anchor in the metadata area.
// Skip anchors that link to a video page.
let uploader_el = card.select(&uploader_a_sel).find(|el| {
!el.value()
.attr("href")
.map(|h| h.contains("/video."))
.unwrap_or(false)
});
let uploader_name = uploader_el
.map(|el| Self::text_of(&el))
.filter(|s| !s.is_empty());
let uploader_url = uploader_el
.and_then(|el| el.value().attr("href").map(Self::normalize_url))
.filter(|u| !u.is_empty());
// Views: best page has span.views-count; homepage has text near "Views"
let views = card
.select(&views_count_sel)
.next()
.map(|el| Self::text_of(&el))
.and_then(|t| Self::parse_views(&t))
.or_else(|| {
card.select(&metadata_sel).next().and_then(|meta| {
let text = Self::text_of(&meta);
let low = text.to_ascii_lowercase();
low.find("views").and_then(|idx| {
text[..idx]
.split_whitespace()
.last()
.and_then(|w| Self::parse_views(w))
})
})
});
let mut item = VideoItem::new(
video_id,
title.trim().to_string(),
page_url,
CHANNEL_ID.to_string(),
thumb,
duration,
);
if let Some(v) = views {
item.views = Some(v);
}
if let Some(p) = preview {
item.preview = Some(p);
}
if let Some(name) = uploader_name {
item.uploader = Some(name);
}
if let Some(url) = uploader_url {
let uploader_id = url
.trim_end_matches('/')
.rsplit('/')
.next()
.unwrap_or_default()
.to_string();
if !uploader_id.is_empty() {
item.uploaderId = Some(format!("{CHANNEL_ID}:{uploader_id}"));
}
item.uploaderUrl = Some(url);
}
items.push(item);
if items.len() >= limit {
break;
}
}
Ok(items)
}
fn lookup_category(query: &str) -> Option<String> {
let normalized = Self::normalize_key(query);
for (slug, label) in CATEGORIES {
if Self::normalize_key(label) == normalized || Self::normalize_key(slug) == normalized {
return Some(format!("{BASE_URL}/c/{slug}"));
}
}
None
}
fn resolve_query_target(query: &str) -> Target {
let trimmed = query.trim().trim_start_matches('@');
if let Some((kind, value)) = trimmed.split_once(':') {
let value = value.trim();
if !value.is_empty() {
match kind.trim().to_ascii_lowercase().as_str() {
"tag" => {
let slug = value.replace(' ', "-").to_ascii_lowercase();
return Target::Archive(format!("{BASE_URL}/tags/{slug}"));
}
"cat" | "category" => {
if let Some(url) = Self::lookup_category(value) {
return Target::Archive(url);
}
let slug = value.replace(' ', "_");
return Target::Archive(format!("{BASE_URL}/c/{slug}"));
}
"uploader" | "channel" | "profile" => {
let slug = value.replace(' ', "_").to_ascii_lowercase();
return Target::Archive(format!("{BASE_URL}/{slug}"));
}
_ => {}
}
}
}
// Category name lookup
if let Some(url) = Self::lookup_category(trimmed) {
return Target::Archive(url);
}
Target::Search(trimmed.to_string())
}
fn resolve_option_target(options: &ServerOptions, sort: &str) -> Target {
if let Some(cat) = options.categories.as_deref() {
if cat != "all" && !cat.is_empty() {
return Target::Archive(format!("{BASE_URL}/c/{cat}"));
}
}
match sort.trim().to_ascii_lowercase().as_str() {
"best" | "top" => Target::Best,
_ => Target::Latest,
}
}
async fn fetch_target(
&self,
cache: VideoCache,
target: Target,
page: u16,
per_page: usize,
options: ServerOptions,
) -> Result<Vec<VideoItem>> {
let url = Self::target_url(&target, page);
let cache_key = format!("{url}#per={per_page}");
if let Some((ts, cached)) = cache.get(&cache_key) {
if ts.elapsed().unwrap_or_default().as_secs() < 300 {
return Ok(cached.clone());
}
}
let mut requester = requester_or_default(
&options,
CHANNEL_ID,
"xvideos.fetch_target.missing_requester",
);
let html = match Self::fetch_html(&mut requester, &url).await {
Ok(v) => v,
Err(e) => {
report_provider_error(
CHANNEL_ID,
"fetch_target.request",
&format!("url={url}; error={e}"),
)
.await;
return Ok(vec![]);
}
};
if html.trim().is_empty() {
report_provider_error(
CHANNEL_ID,
"fetch_target.empty",
&format!("url={url}"),
)
.await;
return Ok(vec![]);
}
let items = Self::parse_listing(&html, per_page)?;
if !items.is_empty() {
cache.insert(cache_key, items.clone());
}
Ok(items)
}
}
#[async_trait]
impl Provider for XvideosProvider {
async fn get_videos(
&self,
cache: VideoCache,
_pool: DbPool,
sort: String,
query: Option<String>,
page: String,
per_page: String,
options: ServerOptions,
) -> Vec<VideoItem> {
let page = page.parse::<u16>().unwrap_or(1).max(1);
let per_page = per_page.parse::<usize>().unwrap_or(10).clamp(1, 60);
let target = match query {
Some(q) if !q.trim().is_empty() => Self::resolve_query_target(q.trim()),
_ => Self::resolve_option_target(&options, &sort),
};
match self
.fetch_target(cache, target, page, per_page, options)
.await
{
Ok(items) => items,
Err(e) => {
report_provider_error(
CHANNEL_ID,
"get_videos",
&format!("sort={sort}; page={page}; error={e}"),
)
.await;
vec![]
}
}
}
fn get_channel(&self, cv: ClientVersion) -> Option<Channel> {
Some(self.build_channel(cv))
}
}

View File

@@ -322,13 +322,7 @@ impl XxthotsProvider {
)
.views(views);
if !preview.is_empty() {
let mut format = VideoFormat::new(
preview.clone(),
"preview".to_string(),
"video/mp4".to_string(),
);
format.add_http_header("Referer".to_string(), video_url.clone());
video_item = video_item.preview(preview).formats(vec![format]);
video_item = video_item.preview(preview);
}
items.push(video_item);
}

View File

@@ -405,7 +405,7 @@ impl YespornProvider {
)));
}
let canonical_url = response.uri().to_string();
let canonical_url = response.url().to_string();
let body = response
.text()
.await
@@ -949,10 +949,12 @@ impl YespornProvider {
duration,
);
if let Some(preview) = preview {
let mut format =
VideoFormat::new(preview.clone(), "preview".to_string(), "video/mp4".to_string());
format.add_http_header("Referer".to_string(), item.url.clone());
item = item.preview(preview).formats(vec![format]);
// let mut format =
// VideoFormat::new(preview.clone(), "preview".to_string(), "video/mp4".to_string());
// format.add_http_header("Referer".to_string(), item.url.clone());
item = item.preview(preview)
// .formats(vec![format])
;
}
if let Some(views) = views {
item = item.views(views);

587
src/providers/youporn.rs Normal file
View File

@@ -0,0 +1,587 @@
use crate::DbPool;
use crate::api::ClientVersion;
use crate::providers::{Provider, report_provider_error, requester_or_default};
use crate::status::*;
use crate::util::cache::VideoCache;
use crate::util::parse_abbreviated_number;
use crate::util::time::parse_time_to_seconds;
use crate::videos::{ServerOptions, VideoItem};
use async_trait::async_trait;
use error_chain::error_chain;
use htmlentity::entity::{ICodedDataTrait, decode};
use scraper::{ElementRef, Html, Selector};
use std::collections::HashMap;
use std::sync::{Arc, RwLock};
use std::thread;
use url::{Url, form_urlencoded};
use wreq::Version;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "mainstream-tube",
tags: &["mainstream", "studio", "search"],
};
const BASE_URL: &str = "https://www.youporn.com";
const CHANNEL_ID: &str = "youporn";
error_chain! {
foreign_links {
Io(std::io::Error);
HttpRequest(wreq::Error);
}
}
#[derive(Debug, Clone)]
pub struct YoupornProvider {
url: String,
shortcuts: Arc<RwLock<HashMap<String, Target>>>,
}
#[derive(Debug, Clone)]
enum Target {
Latest { sort: String },
Search { query: String },
Tag { slug: String, sort: String },
Channel { slug: String, sort: String },
Pornstar { slug: String, sort: String },
Amateur { slug: String, sort: String },
}
impl YoupornProvider {
pub fn new() -> Self {
let provider = Self {
url: BASE_URL.to_string(),
shortcuts: Arc::new(RwLock::new(HashMap::new())),
};
provider.spawn_initial_load();
provider
}
fn spawn_initial_load(&self) {
let shortcuts = Arc::clone(&self.shortcuts);
let url = self.url.clone();
thread::spawn(move || {
let rt = match tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()
{
Ok(v) => v,
Err(_) => return,
};
rt.block_on(async move {
let mut requester = crate::util::requester::Requester::new();
if let Ok(html) = requester.get(&url, None).await {
let map = Self::collect_shortcuts(&html);
if let Ok(mut guard) = shortcuts.write() {
*guard = map;
}
}
});
});
}
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
Channel {
id: CHANNEL_ID.to_string(),
name: "YouPorn".to_string(),
description: "YouPorn listings with search, tag/channel shortcuts, and watch-page playback URLs."
.to_string(),
premium: false,
favicon: "https://www.google.com/s2/favicons?sz=64&domain=youporn.com".to_string(),
status: "active".to_string(),
categories: vec![],
options: vec![ChannelOption {
id: "sort".to_string(),
title: "Sort".to_string(),
description: "Latest feed ordering.".to_string(),
systemImage: "list.number".to_string(),
colorName: "blue".to_string(),
options: vec![FilterOption {
id: "new".to_string(),
title: "Most Recent".to_string(),
}],
multiSelect: false,
}],
nsfw: true,
cacheDuration: Some(1800),
}
}
fn selector(value: &str) -> Option<Selector> {
Selector::parse(value).ok()
}
fn normalize_text(value: &str) -> String {
decode(value.as_bytes())
.to_string()
.unwrap_or_else(|_| value.to_string())
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
.trim()
.to_string()
}
fn normalize_url(&self, value: &str) -> String {
let trimmed = value.trim();
if trimmed.is_empty() {
return String::new();
}
if trimmed.starts_with("http://") || trimmed.starts_with("https://") {
return trimmed.to_string();
}
if trimmed.starts_with("//") {
return format!("https:{trimmed}");
}
format!(
"{}/{}",
self.url.trim_end_matches('/'),
trimmed.trim_start_matches('/')
)
}
fn normalized_sort(sort: &str) -> &'static str {
let _ = sort;
"new"
}
fn sort_suffix(sort: &str) -> &'static str {
let _ = sort;
""
}
fn page_suffix(page: u8) -> String {
if page > 1 {
format!("?page={page}")
} else {
String::new()
}
}
fn html_headers(referer: &str) -> Vec<(String, String)> {
vec![
(
"accept".to_string(),
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8".to_string(),
),
("accept-language".to_string(), "en-US,en;q=0.7".to_string()),
("cache-control".to_string(), "no-cache".to_string()),
("pragma".to_string(), "no-cache".to_string()),
(
"user-agent".to_string(),
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36".to_string(),
),
("referer".to_string(), referer.to_string()),
]
}
fn target_from_query(&self, query: &str, sort: &str) -> Target {
let q = query.trim();
if q.is_empty() {
return Target::Latest {
sort: Self::normalized_sort(sort).to_string(),
};
}
let lower = q.to_ascii_lowercase();
for (prefix, kind) in [
("tag:", "tag"),
("channel:", "channel"),
("pornstar:", "pornstar"),
("amateur:", "amateur"),
] {
if let Some(rest) = lower.strip_prefix(prefix) {
let slug = rest.trim().replace(' ', "-");
if !slug.is_empty() {
return match kind {
"tag" => Target::Tag {
slug,
sort: Self::normalized_sort(sort).to_string(),
},
"channel" => Target::Channel {
slug,
sort: Self::normalized_sort(sort).to_string(),
},
"pornstar" => Target::Pornstar {
slug,
sort: Self::normalized_sort(sort).to_string(),
},
_ => Target::Amateur {
slug,
sort: Self::normalized_sort(sort).to_string(),
},
};
}
}
}
let shortcut_key = lower.split_whitespace().collect::<Vec<_>>().join(" ");
if let Ok(guard) = self.shortcuts.read()
&& let Some(target) = guard.get(&shortcut_key)
{
return match target {
Target::Tag { slug, .. } => Target::Tag {
slug: slug.clone(),
sort: Self::normalized_sort(sort).to_string(),
},
Target::Channel { slug, .. } => Target::Channel {
slug: slug.clone(),
sort: Self::normalized_sort(sort).to_string(),
},
Target::Pornstar { slug, .. } => Target::Pornstar {
slug: slug.clone(),
sort: Self::normalized_sort(sort).to_string(),
},
Target::Amateur { slug, .. } => Target::Amateur {
slug: slug.clone(),
sort: Self::normalized_sort(sort).to_string(),
},
_ => target.clone(),
};
}
Target::Search {
query: q.to_string(),
}
}
fn build_url(&self, target: &Target, page: u8) -> String {
match target {
Target::Latest { sort } => format!(
"{}/{}{}",
self.url,
Self::sort_suffix(sort),
Self::page_suffix(page)
),
Target::Search { query } => {
let encoded: String = form_urlencoded::byte_serialize(query.as_bytes()).collect();
if page > 1 {
format!("{}/search/?query={encoded}&page={page}", self.url)
} else {
format!("{}/search/?query={encoded}", self.url)
}
}
Target::Tag { slug, sort } => format!(
"{}/porntags/{}/{}{}",
self.url,
slug.trim_matches('/'),
Self::sort_suffix(sort),
Self::page_suffix(page)
),
Target::Channel { slug, sort } => format!(
"{}/channel/{}/{}{}",
self.url,
slug.trim_matches('/'),
Self::sort_suffix(sort),
Self::page_suffix(page)
),
Target::Pornstar { slug, sort } => format!(
"{}/pornstar/{}/{}{}",
self.url,
slug.trim_matches('/'),
Self::sort_suffix(sort),
Self::page_suffix(page)
),
Target::Amateur { slug, sort } => format!(
"{}/amateur/{}/{}{}",
self.url,
slug.trim_matches('/'),
Self::sort_suffix(sort),
Self::page_suffix(page)
),
}
}
fn collect_shortcuts(html: &str) -> HashMap<String, Target> {
let mut map = HashMap::new();
let document = Html::parse_document(html);
let Some(link_selector) = Self::selector("a[href]") else {
return map;
};
for link in document.select(&link_selector) {
let Some(href) = link.value().attr("href") else {
continue;
};
let title = Self::normalize_text(&link.text().collect::<String>()).to_ascii_lowercase();
if title.is_empty() {
continue;
}
let path = if href.starts_with("http://") || href.starts_with("https://") {
Url::parse(href)
.ok()
.map(|u| u.path().to_string())
.unwrap_or_default()
} else {
href.to_string()
};
if let Some(slug) = path
.strip_prefix("/porntags/")
.map(|v| v.trim_matches('/').to_string())
{
if !slug.is_empty() {
map.insert(
title,
Target::Tag {
slug,
sort: "new".to_string(),
},
);
}
continue;
}
if let Some(slug) = path
.strip_prefix("/channel/")
.map(|v| v.trim_matches('/').to_string())
{
if !slug.is_empty() {
map.insert(
title,
Target::Channel {
slug,
sort: "new".to_string(),
},
);
}
continue;
}
if let Some(slug) = path
.strip_prefix("/pornstar/")
.map(|v| v.trim_matches('/').to_string())
{
if !slug.is_empty() {
map.insert(
title,
Target::Pornstar {
slug,
sort: "new".to_string(),
},
);
}
continue;
}
if let Some(slug) = path
.strip_prefix("/amateur/")
.map(|v| v.trim_matches('/').to_string())
&& !slug.is_empty()
{
map.insert(
title,
Target::Amateur {
slug,
sort: "new".to_string(),
},
);
}
}
map
}
fn text_of(node: Option<ElementRef<'_>>) -> String {
node.map(|v| Self::normalize_text(&v.text().collect::<String>()))
.unwrap_or_default()
}
fn parse_items(&self, html: &str) -> Vec<VideoItem> {
let document = Html::parse_document(html);
let Some(card_selector) = Self::selector("article.video-box.js_video-box") else {
return vec![];
};
let link_selector = Self::selector("a[data-testid='plw_video_thumbnail_link'], a.video-box-image, a.video-title-text");
let title_selector = Self::selector("a.video-title-text");
let thumb_selector = Self::selector("img");
let duration_selector = Self::selector(".tm_video_duration");
let views_selector = Self::selector("span.info-views");
let uploader_selector = Self::selector("a.author-title-text");
let tag_selector = Self::selector("a.bubble-porntag");
let mut items = Vec::new();
for card in document.select(&card_selector) {
let link_node = link_selector
.as_ref()
.and_then(|s| card.select(s).next());
let href = link_node
.and_then(|v| v.value().attr("href"))
.unwrap_or_default();
if !href.contains("/watch/") {
continue;
}
let id = card
.value()
.attr("data-video-id")
.map(|v| v.to_string())
.or_else(|| {
href.split("/watch/")
.nth(1)
.and_then(|v| v.split('/').next())
.map(|v| v.to_string())
})
.unwrap_or_default();
if id.is_empty() {
continue;
}
let title = title_selector
.as_ref()
.and_then(|s| card.select(s).next())
.map(|v| {
let from_title = v.value().attr("title").unwrap_or_default();
if from_title.is_empty() {
Self::normalize_text(&v.text().collect::<String>())
} else {
Self::normalize_text(from_title)
}
})
.unwrap_or_default();
let thumb = thumb_selector
.as_ref()
.and_then(|s| card.select(s).next())
.and_then(|v| {
v.value()
.attr("data-original")
.or_else(|| v.value().attr("data-src"))
.or_else(|| v.value().attr("src"))
})
.map(|v| self.normalize_url(v))
.unwrap_or_default();
let duration_text = Self::text_of(duration_selector.as_ref().and_then(|s| card.select(s).next()));
let duration = parse_time_to_seconds(&duration_text).unwrap_or(0) as u32;
let view_text = views_selector
.as_ref()
.and_then(|s| card.select(s).next())
.map(|v| Self::normalize_text(&v.text().collect::<String>()))
.unwrap_or_default();
let views = parse_abbreviated_number(&view_text).unwrap_or(0) as u32;
let rating = views_selector
.as_ref()
.and_then(|s| card.select(s).nth(1))
.map(|v| Self::normalize_text(&v.text().collect::<String>()).replace('%', ""))
.and_then(|v| v.parse::<f32>().ok());
let uploader_node = uploader_selector.as_ref().and_then(|s| card.select(s).next());
let uploader_name = uploader_node
.as_ref()
.map(|v| Self::normalize_text(&v.text().collect::<String>()))
.unwrap_or_default();
let uploader_href = uploader_node
.and_then(|v| v.value().attr("href"))
.map(|v| self.normalize_url(v));
let uploader_id = card
.value()
.attr("data-uploader-id")
.map(|v| format!("{CHANNEL_ID}:{v}"));
let preview = link_node
.and_then(|v| v.value().attr("data-mediabook"))
.map(|v| v.replace("&amp;", "&"));
let mut tags = Vec::new();
if let Some(sel) = &tag_selector {
for tag in card.select(sel) {
let title = Self::normalize_text(&tag.text().collect::<String>());
if !title.is_empty() {
tags.push(title);
}
}
}
let mut item = VideoItem::new(
id,
title,
self.normalize_url(href),
CHANNEL_ID.to_string(),
thumb,
duration,
)
.views(views);
if let Some(value) = rating {
item = item.rating(value);
}
if !uploader_name.is_empty() {
item = item.uploader(uploader_name);
}
if let Some(value) = uploader_href {
item.uploaderUrl = Some(value);
}
if let Some(value) = uploader_id {
item.uploaderId = Some(value);
}
if let Some(value) = preview {
item = item.preview(value);
}
if !tags.is_empty() {
item = item.tags(tags);
}
items.push(item);
}
items
}
}
#[async_trait]
impl Provider for YoupornProvider {
async fn get_videos(
&self,
cache: VideoCache,
_db_pool: DbPool,
sort: String,
query: Option<String>,
page: String,
_per_page: String,
options: ServerOptions,
) -> Vec<VideoItem> {
let query = query.unwrap_or_default();
let page = page.parse::<u8>().unwrap_or(1);
let target = self.target_from_query(&query, &sort);
let video_url = self.build_url(&target, page);
let old_items = match cache.get(&video_url) {
Some((time, items)) if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 => {
return items.clone();
}
Some((_time, items)) => items.clone(),
None => vec![],
};
let mut requester = requester_or_default(&options, CHANNEL_ID, "get_videos");
let referer = format!("{}/", self.url.trim_end_matches('/'));
let text = match requester
.get_with_headers(&video_url, Self::html_headers(&referer), Some(Version::HTTP_11))
.await
{
Ok(text) => text,
Err(e) => {
report_provider_error(
CHANNEL_ID,
"get_videos.request",
&format!("url={video_url}; error={e}"),
)
.await;
return old_items;
}
};
let items = self.parse_items(&text);
if items.is_empty() {
return old_items;
}
cache.remove(&video_url);
cache.insert(video_url, items.clone());
items
}
fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
Some(self.build_channel(clientversion))
}
}

View File

@@ -0,0 +1,222 @@
use std::sync::Arc;
use ntex::web::{self, HttpRequest};
use wreq::cookie::Jar;
use wreq::redirect::Policy;
use wreq_util::Emulation;
use crate::providers::strip_url_scheme;
use crate::util::hoster_proxy::proxy_name_for_url;
use crate::util::requester::Requester;
const BASE_URL: &str = "https://allpornstream.com";
const BROWSER_UA: &str =
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36";
#[derive(Debug, Clone)]
pub struct AllPornStreamProxy {}
impl AllPornStreamProxy {
pub fn new() -> Self {
Self {}
}
fn build_chrome_client() -> Option<wreq::Client> {
let jar = Arc::new(Jar::default());
wreq::Client::builder()
.cert_verification(false)
.emulation(Emulation::Chrome120)
.cookie_provider(jar)
.redirect(Policy::default())
.build()
.ok()
}
fn normalize_detail_url(endpoint: &str) -> Option<String> {
let value = endpoint.trim().trim_start_matches('/');
if value.is_empty() {
return None;
}
let detail_url = if value.starts_with("http://") || value.starts_with("https://") {
value.to_string()
} else {
format!("https://{value}")
};
let detail_url = detail_url.replacen("http://", "https://", 1);
let parsed = url::Url::parse(&detail_url).ok()?;
let host = parsed.host_str()?;
if !(host == "allpornstream.com" || host == "www.allpornstream.com") {
return None;
}
if !parsed.path().starts_with("/post/") {
return None;
}
Some(detail_url)
}
fn request_headers() -> Vec<(String, String)> {
vec![
(
"accept".to_string(),
"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8".to_string(),
),
("accept-language".to_string(), "en-US,en;q=0.5".to_string()),
("user-agent".to_string(), BROWSER_UA.to_string()),
("referer".to_string(), BASE_URL.to_string()),
]
}
// Rank streaming hosts: lower = preferred
fn host_rank(url: &str) -> u8 {
if url.contains("voe.sx") {
0
} else if url.contains("dood") {
1
} else if url.contains("streamtape") {
2
} else if url.contains("filemoon") || url.contains("moonplayer") {
3
} else {
4
}
}
fn extract_stream_url(html: &str) -> Option<String> {
// RSC data uses \" for quotes. Format inside script tags:
// \"video_urls\":{\"link\":[[\"PROVIDER\",\"https://...\"],...]
let needle = r#"\"video_urls\":{\"link\":["#;
let pos = html.find(needle)?;
let after = &html[pos + needle.len()..];
// If the link array is empty ([]) there are no known hosting entries.
// Do not scan further — the iframe section that follows contains embed
// URLs for unknown providers (e.g. mydaddy.cc) that we cannot use.
if after.starts_with(']') {
return None;
}
// Capture up to the closing ]] of the link array (max 2000 chars)
let array_end = after.find("]]").unwrap_or(after.len().min(2000));
let array_str = &after[..array_end];
// Extract all https:// URLs from within the array slice
let mut candidates: Vec<String> = Vec::new();
let mut remaining = array_str;
while let Some(start) = remaining.find("https://") {
let url_str = &remaining[start..];
let end = url_str
.find("\\\"")
.or_else(|| url_str.find('"'))
.unwrap_or(url_str.len().min(300));
let url = &url_str[..end];
if !url.is_empty() {
candidates.push(url.to_string());
}
if end + 1 >= url_str.len() {
break;
}
remaining = &remaining[start + end + 1..];
}
candidates.into_iter().min_by_key(|u| Self::host_rank(u))
}
}
impl crate::proxies::Proxy for AllPornStreamProxy {
async fn get_video_url(&self, url: String, _requester: web::types::State<Requester>) -> String {
let Some(detail_url) = Self::normalize_detail_url(&url) else {
return String::new();
};
let Some(client) = Self::build_chrome_client() else {
return String::new();
};
let mut request = client.get(&detail_url);
for (key, value) in Self::request_headers() {
request = request.header(key, value);
}
let Ok(response) = request.send().await else {
return String::new();
};
if !response.status().is_success() {
return String::new();
}
let html = response.text().await.unwrap_or_default();
if html.is_empty() {
return String::new();
}
Self::extract_stream_url(&html).unwrap_or_default()
}
}
/// Route handler for `/proxy/allpornstream/{endpoint}*`.
///
/// Fetches the allpornstream detail page, extracts the embedded hoster URL, rewrites
/// it to the corresponding local proxy URL (e.g. `/proxy/doodstream/…`), and returns
/// a 302 redirect. This lets the client resolve the final stream URL through the correct
/// per-hoster proxy rather than hitting the raw hoster URL directly.
pub async fn serve(
req: HttpRequest,
requester: web::types::State<Requester>,
) -> Result<impl web::Responder, web::Error> {
let endpoint = req.match_info().query("endpoint").to_string();
let Some(detail_url) = AllPornStreamProxy::normalize_detail_url(&endpoint) else {
return Ok(web::HttpResponse::BadRequest().finish());
};
let Some(client) = AllPornStreamProxy::build_chrome_client() else {
return Ok(web::HttpResponse::InternalServerError().finish());
};
let mut request = client.get(&detail_url);
for (key, value) in AllPornStreamProxy::request_headers() {
request = request.header(key, value);
}
let response = match request.send().await {
Ok(r) if r.status().is_success() => r,
_ => return Ok(web::HttpResponse::BadGateway().finish()),
};
let html = match response.text().await {
Ok(h) if !h.is_empty() => h,
_ => return Ok(web::HttpResponse::BadGateway().finish()),
};
let Some(hoster_url) = AllPornStreamProxy::extract_stream_url(&html) else {
return Ok(web::HttpResponse::BadGateway().finish());
};
// For doodstream URLs call the proxy's Chrome extraction directly so
// the /d/ → /e/ normalisation and Cloudflare bypass happen server-side.
if proxy_name_for_url(&hoster_url) == Some("doodstream") {
let embed_url = crate::proxies::doodstream::DoodstreamProxy::normalize_embed_url(&hoster_url)
.unwrap_or(hoster_url.clone());
let Some(cdn_url) = crate::proxies::doodstream::DoodstreamProxy::try_chrome_extraction(&embed_url).await else {
return Ok(web::HttpResponse::BadGateway().finish());
};
return Ok(web::HttpResponse::Found()
.header("Location", cdn_url)
.finish());
}
// For other known hosters redirect to the corresponding local proxy.
let redirect_url = match proxy_name_for_url(&hoster_url) {
Some(proxy_name) => {
let ci = req.connection_info();
format!(
"{}://{}/proxy/{}/{}",
ci.scheme(),
ci.host(),
proxy_name,
strip_url_scheme(&hoster_url)
)
}
None => hoster_url,
};
Ok(web::HttpResponse::Found()
.header("Location", redirect_url)
.finish())
}

321
src/proxies/archivebate.rs Normal file
View File

@@ -0,0 +1,321 @@
use std::time::Duration as StdDuration;
use ntex::web;
use regex::Regex;
use scraper::{Html, Selector};
use url::Url;
use wreq::Version;
use crate::util::requester::Requester;
const FIREFOX_UA: &str =
"Mozilla/5.0 (X11; Linux x86_64; rv:146.0) Gecko/20100101 Firefox/146.0";
#[derive(Debug, Clone)]
pub struct ArchivebateProxy {}
impl ArchivebateProxy {
pub fn new() -> Self {
Self {}
}
fn normalize_detail_request(endpoint: &str) -> Option<String> {
let endpoint = endpoint.trim().trim_start_matches('/');
if endpoint.is_empty() {
return None;
}
let detail_url = if endpoint.starts_with("http://") || endpoint.starts_with("https://") {
endpoint.to_string()
} else {
format!("https://{}", endpoint.trim_start_matches('/'))
};
Self::is_allowed_detail_url(&detail_url).then_some(detail_url)
}
fn is_allowed_detail_url(url: &str) -> bool {
let Some(parsed) = Url::parse(url).ok() else {
return false;
};
if parsed.scheme() != "https" {
return false;
}
let Some(host) = parsed.host_str() else {
return false;
};
(host == "archivebate.com" || host == "www.archivebate.com")
&& parsed.path().starts_with("/watch/")
}
fn host_from_url(url: &str) -> Option<String> {
let parsed = Url::parse(url).ok()?;
parsed.host_str().map(|value| value.to_ascii_lowercase())
}
fn is_mixdrop_host(url: &str) -> bool {
let Some(host) = Self::host_from_url(url) else {
return false;
};
host.contains("mixdrop") || host.contains("m1xdrop")
}
fn html_headers(referer: &str) -> Vec<(String, String)> {
vec![
("Referer".to_string(), referer.to_string()),
("User-Agent".to_string(), FIREFOX_UA.to_string()),
(
"Accept".to_string(),
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8"
.to_string(),
),
("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()),
]
}
fn first_iframe_source_from_html(html: &str) -> Option<String> {
let document = Html::parse_document(html);
let selector = Selector::parse("iframe[src]").ok()?;
document
.select(&selector)
.next()
.and_then(|node| node.value().attr("src"))
.map(str::to_string)
}
fn download_fid_from_detail_html(html: &str) -> Option<String> {
let document = Html::parse_document(html);
let selector = Selector::parse("input[name='fid'][value]").ok()?;
document
.select(&selector)
.next()
.and_then(|node| node.value().attr("value"))
.map(str::trim)
.filter(|value| !value.is_empty())
.map(ToOwned::to_owned)
}
fn mixdrop_embed_url_from_download_url(url: &str) -> Option<String> {
let parsed = Url::parse(url).ok()?;
let host = parsed.host_str()?;
let host_lc = host.to_ascii_lowercase();
if !host_lc.contains("mixdrop") && !host_lc.contains("m1xdrop") {
return None;
}
let mut segments = parsed.path_segments()?.filter(|segment| !segment.is_empty());
let kind = segments.next()?.to_ascii_lowercase();
if kind != "e" && kind != "f" {
return None;
}
let media_id = segments.next()?.trim();
if media_id.is_empty() {
return None;
}
Some(format!("{}://{host}/e/{media_id}", parsed.scheme()))
}
fn normalize_possible_protocol_relative(value: &str) -> String {
let trimmed = value.trim();
if trimmed.starts_with("//") {
format!("https:{trimmed}")
} else {
trimmed.to_string()
}
}
fn extract_mixdrop_media_url(html: &str) -> Option<String> {
let direct_regex = Regex::new(r#"MDCore\.wurl\s*=\s*"([^"]+)""#).ok()?;
if let Some(url) = direct_regex
.captures(html)
.and_then(|captures| captures.get(1).map(|value| value.as_str().to_string()))
{
return Some(Self::normalize_possible_protocol_relative(&url));
}
let unpacked = Self::parse_mixin_packed_eval(html)?;
let unpacked_regex = Regex::new(r#"MDCore\.wurl\s*=\s*"([^"]+)""#).ok()?;
unpacked_regex
.captures(&unpacked)
.and_then(|captures| captures.get(1).map(|value| value.as_str().to_string()))
.map(|value| Self::normalize_possible_protocol_relative(&value))
}
fn parse_mixin_packed_eval(html: &str) -> Option<String> {
let eval_regex = Regex::new(
r#"(?s)eval\(function\(p,a,c,k,e,d\)\{.*?\}\('(?P<payload>.*?)',\s*(?P<radix>[0-9]+),\s*(?P<count>[0-9]+),\s*'(?P<tokens>.*?)'\.split\('\|'\)"#,
)
.ok()?;
let captures = eval_regex.captures(html)?;
let payload_raw = captures.name("payload")?.as_str();
let radix = captures.name("radix")?.as_str().parse::<u32>().ok()?;
let count = captures.name("count")?.as_str().parse::<usize>().ok()?;
if !(2..=36).contains(&radix) {
return None;
}
let payload = Self::unescape_js_single_quoted(payload_raw);
let tokens_raw = captures.name("tokens")?.as_str();
let tokens = tokens_raw.split('|').collect::<Vec<_>>();
let mut unpacked = payload;
for index in (0..count).rev() {
let Some(token) = tokens.get(index) else {
continue;
};
if token.is_empty() {
continue;
}
let key = Self::to_radix(index, radix);
let pattern = format!(r"\b{}\b", regex::escape(&key));
let re = Regex::new(&pattern).ok()?;
unpacked = re.replace_all(&unpacked, *token).into_owned();
}
Some(unpacked)
}
fn unescape_js_single_quoted(value: &str) -> String {
let mut output = String::with_capacity(value.len());
let mut chars = value.chars();
while let Some(character) = chars.next() {
if character != '\\' {
output.push(character);
continue;
}
let Some(next) = chars.next() else {
break;
};
match next {
'\\' => output.push('\\'),
'\'' => output.push('\''),
'"' => output.push('"'),
'n' => output.push('\n'),
'r' => output.push('\r'),
't' => output.push('\t'),
_ => output.push(next),
}
}
output
}
fn to_radix(mut value: usize, radix: u32) -> String {
if value == 0 {
return "0".to_string();
}
let alphabet = b"0123456789abcdefghijklmnopqrstuvwxyz";
let mut out = Vec::new();
while value > 0 {
let digit = value % radix as usize;
out.push(alphabet[digit] as char);
value /= radix as usize;
}
out.iter().rev().collect()
}
fn absolute_url(value: &str) -> String {
if value.starts_with("http://") || value.starts_with("https://") {
return value.to_string();
}
if value.starts_with("//") {
return format!("https:{value}");
}
format!("https://archivebate.com/{}", value.trim_start_matches('/'))
}
async fn resolve_mixdrop_media_from_embed(
detail_url: &str,
embed_url: &str,
requester: &mut Requester,
) -> Option<String> {
let response = requester
.get_raw_with_headers_timeout(
embed_url,
Self::html_headers(detail_url),
Some(StdDuration::from_secs(8)),
)
.await
.ok()?;
if !response.status().is_success() {
return None;
}
let html = response.text().await.ok()?;
Self::extract_mixdrop_media_url(&html)
}
}
impl crate::proxies::Proxy for ArchivebateProxy {
async fn get_video_url(&self, url: String, requester: web::types::State<Requester>) -> String {
let Some(detail_url) = Self::normalize_detail_request(&url) else {
return String::new();
};
let mut requester = requester.get_ref().clone();
let detail_html = requester
.get_with_headers(
&detail_url,
Self::html_headers("https://archivebate.com/"),
Some(Version::HTTP_11),
)
.await
.unwrap_or_default();
if detail_html.is_empty() {
return String::new();
}
if let Some(iframe_url) = Self::first_iframe_source_from_html(&detail_html).map(|value| Self::absolute_url(&value)) {
if Self::is_mixdrop_host(&iframe_url) {
if let Some(media_url) =
Self::resolve_mixdrop_media_from_embed(&detail_url, &iframe_url, &mut requester).await
{
return media_url;
}
}
}
if let Some(download_fid) = Self::download_fid_from_detail_html(&detail_html).map(|value| Self::absolute_url(&value)) {
if let Some(embed_url) = Self::mixdrop_embed_url_from_download_url(&download_fid) {
if let Some(media_url) =
Self::resolve_mixdrop_media_from_embed(&detail_url, &embed_url, &mut requester).await
{
return media_url;
}
}
}
String::new()
}
}
#[cfg(test)]
mod tests {
use super::ArchivebateProxy;
#[test]
fn normalizes_detail_request() {
let detail = ArchivebateProxy::normalize_detail_request("archivebate.com/watch/123456");
assert_eq!(detail.as_deref(), Some("https://archivebate.com/watch/123456"));
}
#[test]
fn rejects_non_watch_paths() {
assert!(ArchivebateProxy::normalize_detail_request("archivebate.com/profile/test").is_none());
}
#[test]
fn extracts_mixdrop_wurl_from_packed_eval() {
let html = r#"
<script>
eval(function(p,a,c,k,e,d){e=function(c){return c};if(!''.replace(/^/,String)){while(c--){d[c]=k[c]||c}k=[function(e){return d[e]}];e=function(){return'\\w+'};c=1};while(c--){if(k[c]){p=p.replace(new RegExp('\\b'+e(c)+'\\b','g'),k[c])}}return p}('1.2="//o230m5y6z.3.4/5/6.7?8=9&a=b";',12,12,'|MDCore|wurl|mxcontent|net|v2|r6pkwozjber741|mp4|s|TvNTJe3_z_6nKveumEHk8Q|e|1776460168'.split('|'),0,{}))
</script>
"#;
let extracted = ArchivebateProxy::extract_mixdrop_media_url(html)
.expect("expected extracted media url");
assert_eq!(
extracted,
"https://o230m5y6z.mxcontent.net/v2/r6pkwozjber741.mp4?s=TvNTJe3_z_6nKveumEHk8Q&e=1776460168"
);
}
}

113
src/proxies/clapdat.rs Normal file
View File

@@ -0,0 +1,113 @@
use ntex::web;
use regex::Regex;
use crate::util::requester::Requester;
const BASE_URL: &str = "https://www.clapdat.com";
#[derive(Debug, Clone)]
pub struct ClapdatProxy {}
impl ClapdatProxy {
pub fn new() -> Self {
Self {}
}
fn normalize_detail_url(endpoint: &str) -> Option<String> {
let value = endpoint.trim().trim_start_matches('/');
if value.is_empty() {
return None;
}
let detail_url = if value.starts_with("http://") || value.starts_with("https://") {
value.to_string()
} else {
format!("https://{}", value)
};
let detail_url = detail_url.replacen("http://", "https://", 1);
let parsed = url::Url::parse(&detail_url).ok()?;
let host = parsed.host_str()?;
if !(host == "www.clapdat.com" || host == "clapdat.com") {
return None;
}
if !parsed.path().starts_with("/video/") {
return None;
}
Some(detail_url)
}
fn clapdat_decode(input: &str) -> Option<Vec<u8>> {
let compact = if input.len() > 209 {
format!("{}{}", &input[..19], &input[209..])
} else {
input.to_string()
};
let cleaned: String = compact
.chars()
.filter(|c| c.is_ascii_alphanumeric() || *c == '+' || *c == '/')
.collect();
if cleaned.is_empty() {
return None;
}
let mut padded = cleaned;
while padded.len() % 4 != 0 {
padded.push('=');
}
base64::Engine::decode(&base64::engine::general_purpose::STANDARD, padded.as_bytes()).ok()
}
fn extract_media_url(html: &str) -> Option<String> {
let domain_re = Regex::new(r#"file_domain:"([^"]+)""#).ok()?;
let file_re = Regex::new(r#"file:"([^"]+)""#).ok()?;
let domain = domain_re
.captures(html)
.and_then(|caps| caps.get(1).map(|m| m.as_str().trim().to_string()))?;
let encoded = file_re
.captures(html)
.and_then(|caps| caps.get(1).map(|m| m.as_str().trim().to_string()))?;
let decoded = Self::clapdat_decode(&encoded)?;
let path: String = decoded.into_iter().map(char::from).collect();
if path.is_empty() {
return None;
}
Some(format!("https://{}/{}", domain, path.trim_start_matches('/')))
}
}
impl crate::proxies::Proxy for ClapdatProxy {
async fn get_video_url(&self, url: String, requester: web::types::State<Requester>) -> String {
let Some(detail_url) = Self::normalize_detail_url(&url) else {
return String::new();
};
let mut requester = requester.get_ref().clone();
let headers = vec![
(
"accept".to_string(),
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8".to_string(),
),
("accept-language".to_string(), "en-US,en;q=0.8".to_string()),
(
"user-agent".to_string(),
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36".to_string(),
),
("referer".to_string(), BASE_URL.to_string()),
];
let html = requester
.get_with_headers(&detail_url, headers, Some(wreq::Version::HTTP_11))
.await
.unwrap_or_default();
if html.is_empty() {
return String::new();
}
Self::extract_media_url(&html).unwrap_or_default()
}
}

View File

@@ -1,6 +1,11 @@
use std::sync::Arc;
use ntex::web;
use regex::{Captures, Regex};
use url::Url;
use wreq::cookie::Jar;
use wreq::redirect::Policy;
use wreq_util::Emulation;
use crate::util::requester::Requester;
@@ -12,14 +17,39 @@ impl DoodstreamProxy {
Self {}
}
/// Convert any doodstream URL variant to the embed-player URL (`/e/{id}`).
/// Handles `/d/` (download page) and pass-through for already-correct `/e/` paths.
/// Returns `None` only if the host is not an allowed doodstream host.
pub(crate) fn normalize_embed_url(url: &str) -> Option<String> {
let parsed = Url::parse(url).ok()?;
let host = parsed.host_str()?;
if !Self::is_allowed_host(host) {
return None;
}
let path = parsed.path();
// Replace /d/ with /e/; everything else stays as-is
let new_path = if let Some(id) = path.strip_prefix("/d/") {
format!("/e/{id}")
} else {
path.to_string()
};
Some(format!("https://{host}{new_path}"))
}
fn normalize_detail_url(endpoint: &str) -> Option<String> {
let normalized = if endpoint.starts_with("http://") || endpoint.starts_with("https://") {
let url = if endpoint.starts_with("http://") || endpoint.starts_with("https://") {
endpoint.trim().to_string()
} else {
format!("https://{}", endpoint.trim_start_matches('/'))
};
Self::is_allowed_detail_url(&normalized).then_some(normalized)
// is_allowed_detail_url checks host + path prefix (/t/, /e/, /d/)
if !Self::is_allowed_detail_url(&url) {
return None;
}
// Normalise /d/ (download page) to /e/ (embed player)
Some(Self::normalize_embed_url(&url).unwrap_or(url))
}
fn is_allowed_host(host: &str) -> bool {
@@ -31,6 +61,8 @@ impl DoodstreamProxy {
| "www.trailerhg.xyz"
| "streamhg.com"
| "www.streamhg.com"
| "doodstream.com"
| "www.doodstream.com"
)
}
@@ -242,6 +274,22 @@ impl DoodstreamProxy {
Some(format!("{origin}{relative}"))
}
fn random_alphanumeric(len: usize) -> String {
let chars = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
let mut seed = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.subsec_nanos() as u64;
(0..len)
.map(|_| {
seed = seed
.wrapping_mul(6364136223846793005)
.wrapping_add(1442695040888963407);
chars[(seed >> 33) as usize % chars.len()] as char
})
.collect()
}
fn compose_pass_md5_media_url(pass_md5_url: &str, response_body: &str) -> Option<String> {
let raw = response_body
.trim()
@@ -262,32 +310,45 @@ impl DoodstreamProxy {
format!("{}://{}{}", parsed.scheme(), host, raw)
};
let query = Url::parse(pass_md5_url)
.ok()
.and_then(|url| url.query().map(str::to_string));
if let Some(query) = query {
let parsed = Url::parse(pass_md5_url).ok()?;
if let Some(query) = parsed.query().map(str::to_string) {
// Old format: token and expiry are in the pass_md5 query string
if !query.is_empty() && !media_url.contains("token=") {
let separator = if media_url.contains('?') { '&' } else { '?' };
media_url.push(separator);
media_url.push_str(&query);
}
} else {
// New doodstream format: token is the last path segment, base URL needs
// a random 10-char suffix and expiry appended (mirrors makePlay() in the player JS)
let token = parsed.path_segments()?.last()?.to_string();
if !token.is_empty() {
let rand = Self::random_alphanumeric(10);
let expiry = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_millis();
media_url.push_str(&rand);
media_url.push_str(&format!("?token={token}&expiry={expiry}"));
}
}
Some(Self::sanitize_media_url(&media_url))
}
async fn resolve_stream_from_pass_md5(
detail_url: &str,
page_url: &str,
html: &str,
requester: &mut Requester,
) -> Option<String> {
let pass_md5_url = Self::extract_pass_md5_url(html, detail_url).or_else(|| {
let pass_md5_url = Self::extract_pass_md5_url(html, page_url).or_else(|| {
Self::unpack_packer(html)
.and_then(|unpacked| Self::extract_pass_md5_url(&unpacked, detail_url))
.and_then(|unpacked| Self::extract_pass_md5_url(&unpacked, page_url))
})?;
let headers = vec![
("Referer".to_string(), detail_url.to_string()),
("Referer".to_string(), page_url.to_string()),
("X-Requested-With".to_string(), "XMLHttpRequest".to_string()),
("Accept".to_string(), "*/*".to_string()),
];
@@ -299,19 +360,96 @@ impl DoodstreamProxy {
}
}
impl DoodstreamProxy {
// Cloudflare on playmogo.com (where doodstream.com redirects) requires a Chrome TLS
// fingerprint. Firefox136 (the default Requester emulation) gets 403. Chrome120 bypasses it.
fn build_chrome_client() -> Option<wreq::Client> {
let jar = Arc::new(Jar::default());
wreq::Client::builder()
.cert_verification(false)
.emulation(Emulation::Chrome120)
.cookie_provider(jar)
.redirect(Policy::default())
.build()
.ok()
}
pub(crate) async fn try_chrome_extraction(detail_url: &str) -> Option<String> {
let client = Self::build_chrome_client()?;
// No version override — let Chrome120 emulation negotiate HTTP/2 via ALPN,
// which Cloudflare requires for Chrome fingerprints (HTTP/1.1 gets 403)
let mut request = client.get(detail_url);
for (key, value) in Self::request_headers(detail_url) {
request = request.header(key, value);
}
let response = request.send().await.ok()?;
if !response.status().is_success() {
return None;
}
let effective_url = response.url().to_string();
let html = response.text().await.ok()?;
if let Some(url) = Self::extract_stream_url(&html) {
return Some(url);
}
let pass_md5_url = Self::extract_pass_md5_url(&html, &effective_url).or_else(|| {
Self::unpack_packer(&html)
.and_then(|unpacked| Self::extract_pass_md5_url(&unpacked, &effective_url))
})?;
let pm_response = client
.get(&pass_md5_url)
.header("Referer", &effective_url)
.header("X-Requested-With", "XMLHttpRequest")
.header("Accept", "*/*")
.send()
.await
.ok()?;
if !pm_response.status().is_success() {
return None;
}
let pm_body = pm_response.text().await.ok()?;
Self::compose_pass_md5_media_url(&pass_md5_url, &pm_body)
}
}
impl crate::proxies::Proxy for DoodstreamProxy {
async fn get_video_url(&self, url: String, requester: web::types::State<Requester>) -> String {
let Some(detail_url) = Self::normalize_detail_url(&url) else {
return String::new();
};
// Chrome120 emulation bypasses Cloudflare on playmogo.com (doodstream's redirect target)
if let Some(cdn_url) = Self::try_chrome_extraction(&detail_url).await {
return cdn_url;
}
// Fallback: standard Requester (Firefox136 + FlareSolverr)
let mut requester = requester.get_ref().clone();
let html = match requester
.get_with_headers(&detail_url, Self::request_headers(&detail_url), None)
let (html, effective_url) = match requester
.get_raw_with_headers(&detail_url, Self::request_headers(&detail_url))
.await
{
Ok(text) => text,
Err(_) => return String::new(),
Ok(response) if response.status().is_success() => {
let effective_url = response.url().to_string();
match response.text().await {
Ok(text) => (text, effective_url),
Err(_) => return String::new(),
}
}
_ => {
let html = match requester
.get_with_headers(&detail_url, Self::request_headers(&detail_url), None)
.await
{
Ok(text) => text,
Err(_) => return String::new(),
};
(html, detail_url.clone())
}
};
if let Some(url) = Self::extract_stream_url(&html) {
@@ -319,7 +457,7 @@ impl crate::proxies::Proxy for DoodstreamProxy {
}
if let Some(url) =
Self::resolve_stream_from_pass_md5(&detail_url, &html, &mut requester).await
Self::resolve_stream_from_pass_md5(&effective_url, &html, &mut requester).await
{
return url;
}
@@ -340,6 +478,9 @@ mod tests {
assert!(DoodstreamProxy::is_allowed_detail_url(
"https://trailerhg.xyz/e/ttdc7a6qpskt"
));
assert!(DoodstreamProxy::is_allowed_detail_url(
"https://doodstream.com/e/31xp1rqt975g"
));
assert!(!DoodstreamProxy::is_allowed_detail_url(
"http://turboplayers.xyz/t/69bdfb21cc640"
));
@@ -390,6 +531,27 @@ mod tests {
);
}
#[test]
fn composes_media_url_from_pass_md5_response_new_format() {
// New doodstream format: token in path, no query string, base URL needs
// random suffix + ?token=TOKEN&expiry=TIMESTAMP appended
let pass_md5_url =
"https://playmogo.com/pass_md5/263443276-hash/wyr3joknzwbzdhufty55banc";
let body =
"https://mx273o.cloudatacdn.com/u5kj6mn5xpa3sdgge7d24z/vgz4woi6uq~";
let result = DoodstreamProxy::compose_pass_md5_media_url(pass_md5_url, body)
.expect("should produce a URL");
assert!(result.starts_with(body), "must start with base URL");
assert!(
result.contains("?token=wyr3joknzwbzdhufty55banc&expiry="),
"must contain token and expiry"
);
// random suffix is 10 chars between base URL and '?'
let suffix_start = body.len();
let query_start = result.find('?').unwrap();
assert_eq!(query_start - suffix_start, 10, "random suffix must be 10 chars");
}
#[test]
fn extracts_relative_pass_md5_url() {
let html = r#"

View File

@@ -0,0 +1,65 @@
use ntex::http::header::{CONTENT_LENGTH, CONTENT_TYPE};
use ntex::{
http::Response,
web::{self, HttpRequest, error},
};
use crate::util::requester::Requester;
const REFERER: &str = "https://fikfap.com/";
fn endpoint_to_image_url(req: &HttpRequest) -> String {
let endpoint = req.match_info().query("endpoint").trim_start_matches('/');
let mut image_url = if endpoint.starts_with("http://") || endpoint.starts_with("https://") {
endpoint.to_string()
} else {
format!("https://{endpoint}")
};
let query = req.query_string();
if !query.is_empty() && !image_url.contains('?') {
image_url.push('?');
image_url.push_str(query);
}
image_url
}
pub async fn get_image(
req: HttpRequest,
requester: web::types::State<Requester>,
) -> Result<impl web::Responder, web::Error> {
let image_url = endpoint_to_image_url(&req);
let upstream = match requester
.get_ref()
.clone()
.get_raw_with_headers(
image_url.as_str(),
vec![("Referer".to_string(), REFERER.to_string())],
)
.await
{
Ok(response) => response,
Err(_) => return Ok(web::HttpResponse::NotFound().finish()),
};
let status = upstream.status();
let headers = upstream.headers().clone();
let bytes = upstream.bytes().await.map_err(error::ErrorBadGateway)?;
let mut resp = Response::build(status);
if let Some(ct) = headers.get(CONTENT_TYPE) {
if let Ok(ct_str) = ct.to_str() {
resp.set_header(CONTENT_TYPE, ct_str);
}
}
if let Some(cl) = headers.get(CONTENT_LENGTH) {
if let Ok(cl_str) = cl.to_str() {
resp.set_header(CONTENT_LENGTH, cl_str);
}
}
Ok(resp.body(bytes.to_vec()))
}

112
src/proxies/hanimethumb.rs Normal file
View File

@@ -0,0 +1,112 @@
use ntex::http::header::{CONTENT_LENGTH, CONTENT_TYPE};
use ntex::{
http::Response,
web::{self, HttpRequest, error},
};
use scraper::{Html, Selector};
use crate::util::requester::Requester;
fn normalize_page_url(endpoint: &str) -> String {
let endpoint = endpoint.trim_start_matches('/');
if endpoint.starts_with("http://") || endpoint.starts_with("https://") {
endpoint.to_string()
} else if endpoint.starts_with("hanime.tv/") {
format!("https://{endpoint}")
} else {
format!("https://hanime.tv/videos/hentai/{endpoint}")
}
}
async fn fetch_cover_url(page_url: &str, requester: &Requester) -> Option<String> {
let html = requester
.clone()
.get_raw_with_headers(
page_url,
vec![("Referer".to_string(), "https://hanime.tv/".to_string())],
)
.await
.ok()?
.text()
.await
.ok()?;
let doc = Html::parse_document(&html);
let selector = Selector::parse("div.hvpi-cover-container img.hvpi-cover").ok()?;
let img = doc.select(&selector).next()?;
img.value().attr("src").map(str::to_string)
}
pub async fn get_image(
req: HttpRequest,
requester: web::types::State<Requester>,
) -> Result<impl web::Responder, web::Error> {
let endpoint = req.match_info().query("endpoint").to_string();
let page_url = normalize_page_url(&endpoint);
let cover_url = match fetch_cover_url(&page_url, requester.get_ref()).await {
Some(url) => url,
None => return Ok(web::HttpResponse::NotFound().finish()),
};
let upstream = match requester
.get_ref()
.clone()
.get_raw_with_headers(
&cover_url,
vec![("Referer".to_string(), "https://hanime.tv/".to_string())],
)
.await
{
Ok(response) => response,
Err(_) => return Ok(web::HttpResponse::NotFound().finish()),
};
let status = upstream.status();
let headers = upstream.headers().clone();
let bytes = upstream.bytes().await.map_err(error::ErrorBadGateway)?;
let mut resp = Response::build(status);
if let Some(ct) = headers.get(CONTENT_TYPE) {
if let Ok(ct_str) = ct.to_str() {
resp.set_header(CONTENT_TYPE, ct_str);
}
}
if let Some(cl) = headers.get(CONTENT_LENGTH) {
if let Ok(cl_str) = cl.to_str() {
resp.set_header(CONTENT_LENGTH, cl_str);
}
}
Ok(resp.body(bytes.to_vec()))
}
#[cfg(test)]
mod tests {
use super::normalize_page_url;
#[test]
fn slug_becomes_full_url() {
assert_eq!(
normalize_page_url("reika-wa-karei-na-boku-no-joou-3"),
"https://hanime.tv/videos/hentai/reika-wa-karei-na-boku-no-joou-3"
);
}
#[test]
fn full_url_passes_through() {
assert_eq!(
normalize_page_url("https://hanime.tv/videos/hentai/reika-wa-karei-na-boku-no-joou-3"),
"https://hanime.tv/videos/hentai/reika-wa-karei-na-boku-no-joou-3"
);
}
#[test]
fn hanime_tv_host_gets_scheme() {
assert_eq!(
normalize_page_url("hanime.tv/videos/hentai/some-slug"),
"https://hanime.tv/videos/hentai/some-slug"
);
}
}

171
src/proxies/heavyfetish.rs Normal file
View File

@@ -0,0 +1,171 @@
use std::collections::HashMap;
use ntex::web;
use regex::Regex;
use scraper::{Html, Selector};
use url::Url;
use crate::util::requester::Requester;
#[derive(Debug, Clone)]
pub struct HeavyfetishProxy {}
impl HeavyfetishProxy {
pub fn new() -> Self {
Self {}
}
fn normalize_detail_url(endpoint: &str) -> Option<String> {
let endpoint = endpoint.trim().trim_start_matches('/');
if endpoint.is_empty() {
return None;
}
let detail_url = if endpoint.starts_with("http://") || endpoint.starts_with("https://") {
endpoint.to_string()
} else {
format!("https://{}", endpoint.trim_start_matches('/'))
};
Self::is_allowed_detail_url(&detail_url).then_some(detail_url)
}
fn is_allowed_detail_url(url: &str) -> bool {
let Some(parsed) = Url::parse(url).ok() else {
return false;
};
if parsed.scheme() != "https" {
return false;
}
let Some(host) = parsed.host_str() else {
return false;
};
(host == "heavyfetish.com" || host == "www.heavyfetish.com")
&& parsed.path().starts_with("/videos/")
}
fn normalize_url(raw: &str) -> String {
let value = raw.trim().replace("\\/", "/");
if value.is_empty() {
return String::new();
}
if value.starts_with("//") {
return format!("https:{value}");
}
if value.starts_with('/') {
return format!("https://heavyfetish.com{value}");
}
if value.starts_with("http://") {
return value.replacen("http://", "https://", 1);
}
value
}
fn quality_from_url(url: &str) -> String {
for quality in ["2160p", "1440p", "1080p", "720p", "480p", "360p", "240p"] {
if url.contains(quality) {
return quality.to_string();
}
}
"480p".to_string()
}
fn quality_score(label: &str) -> u32 {
label
.chars()
.filter(|value| value.is_ascii_digit())
.collect::<String>()
.parse::<u32>()
.unwrap_or(0)
}
fn regex(value: &str) -> Option<Regex> {
Regex::new(value).ok()
}
fn extract_js_value(block: &str, regex: &Regex) -> Option<String> {
regex
.captures(block)
.and_then(|captures| captures.get(1))
.map(|value| value.as_str().replace("\\/", "/").replace("\\'", "'"))
}
fn selector(value: &str) -> Option<Selector> {
Selector::parse(value).ok()
}
fn extract_source_url(html: &str) -> Option<String> {
let flashvars_regex = Self::regex(r#"(?s)var\s+flashvars\s*=\s*\{(.*?)\};"#)?;
let value_regex = |key: &str| Self::regex(&format!(r#"{key}:\s*'((?:\\'|[^'])*)'"#));
let mut seen = HashMap::<String, String>::new();
if let Some(flashvars) = flashvars_regex
.captures(html)
.and_then(|value| value.get(1))
.map(|value| value.as_str().to_string())
{
for key in ["video_alt_url2", "video_alt_url", "video_url"] {
let Some(url_regex) = value_regex(key) else {
continue;
};
let Some(text_regex) = value_regex(&format!("{key}_text")) else {
continue;
};
let Some(url) = Self::extract_js_value(&flashvars, &url_regex) else {
continue;
};
let normalized = Self::normalize_url(&url);
if normalized.is_empty() {
continue;
}
let quality = Self::extract_js_value(&flashvars, &text_regex)
.filter(|value| !value.is_empty())
.unwrap_or_else(|| Self::quality_from_url(&normalized));
seen.entry(quality).or_insert(normalized);
}
}
let document = Html::parse_document(html);
let Some(download_selector) = Self::selector("#download_popup a[href*='/get_file/']")
else {
return seen
.iter()
.max_by_key(|(quality, _)| Self::quality_score(quality))
.map(|(_, url)| url.clone());
};
for element in document.select(&download_selector) {
let href = element.value().attr("href").unwrap_or_default();
let normalized = Self::normalize_url(href);
if normalized.is_empty() {
continue;
}
let quality = Self::quality_from_url(&normalized);
seen.entry(quality).or_insert(normalized);
}
seen.iter()
.max_by_key(|(quality, _)| Self::quality_score(quality))
.map(|(_, url)| url.clone())
}
}
impl crate::proxies::Proxy for HeavyfetishProxy {
async fn get_video_url(&self, url: String, requester: web::types::State<Requester>) -> String {
let Some(detail_url) = Self::normalize_detail_url(&url) else {
return String::new();
};
let mut requester = requester.get_ref().clone();
let html = requester.get(&detail_url, None).await.unwrap_or_default();
if html.is_empty() {
return String::new();
}
Self::extract_source_url(&html).unwrap_or_default()
}
}

341
src/proxies/hqporner.rs Normal file
View File

@@ -0,0 +1,341 @@
use ntex::web;
use regex::Regex;
use std::collections::HashMap;
use url::Url;
use crate::util::requester::Requester;
#[derive(Debug, Clone)]
pub struct HqpornerProxy {}
impl HqpornerProxy {
pub fn new() -> Self {
Self {}
}
fn normalize_detail_request(endpoint: &str) -> Option<(String, Option<u16>)> {
let endpoint = endpoint.trim().trim_start_matches('/');
if endpoint.is_empty() {
return None;
}
let (detail_part, quality) = match endpoint.split_once("/__quality__/") {
Some((detail, quality)) => {
let requested = quality
.trim()
.trim_end_matches('/')
.trim_end_matches('p')
.parse::<u16>()
.ok();
(detail, requested)
}
None => (endpoint, None),
};
let detail_url = if detail_part.starts_with("http://") || detail_part.starts_with("https://")
{
detail_part.to_string()
} else {
format!("https://{}", detail_part.trim_start_matches('/'))
};
Self::is_allowed_detail_url(&detail_url).then_some((detail_url, quality))
}
fn is_allowed_detail_url(url: &str) -> bool {
let Some(url) = Url::parse(url).ok() else {
return false;
};
if url.scheme() != "https" {
return false;
}
let Some(host) = url.host_str() else {
return false;
};
(host == "hqporner.com" || host == "www.hqporner.com") && url.path().starts_with("/hdporn/")
}
fn normalize_url(raw: &str) -> String {
let value = raw.trim();
if value.is_empty() {
return String::new();
}
if value.starts_with("//") {
return format!("https:{value}");
}
if value.starts_with('/') {
return format!("https://www.hqporner.com{value}");
}
if value.starts_with("http://") {
return value.replacen("http://", "https://", 1);
}
value.to_string()
}
fn regex(value: &str) -> Option<Regex> {
Regex::new(value).ok()
}
fn extract_player_url(detail_html: &str) -> Option<String> {
let pattern = r#"(?is)url\s*:\s*['"](/blocks/(?:altplayer|nativeplayer)\.php\?i=[^'"]+)['"]"#;
let captures = Self::regex(pattern)?.captures(detail_html)?;
let path = captures.get(1)?.as_str();
Some(Self::normalize_url(path))
}
fn extract_source_url(player_html: &str) -> Option<String> {
for source in player_html.split("<source ").skip(1) {
let src = source
.split("src=\\\"")
.nth(1)
.and_then(|s| s.split("\\\"").next())
.or_else(|| {
source
.split("src=\"")
.nth(1)
.and_then(|s| s.split('"').next())
})
.unwrap_or_default();
let url = Self::normalize_url(src);
if !url.is_empty() {
return Some(url);
}
}
let iframe_regexes = [
r#"(?is)<iframe[^>]+src="([^"]+)""#,
r#"(?is)<iframe[^>]+src='([^']+)'"#,
r#"(?is)src=\\\"([^\\"]+)\\\""#,
r#"(?is)src=\\'([^\\']+)\\'"#,
];
for pattern in iframe_regexes {
let Some(regex) = Self::regex(pattern) else {
continue;
};
if let Some(url) = regex
.captures(player_html)
.and_then(|caps| caps.get(1))
.map(|m| Self::normalize_url(m.as_str()))
.filter(|value| !value.is_empty())
{
return Some(url);
}
}
let source_regex = Self::regex(r#"src=\\\"([^\\"]+)\\\""#)?;
source_regex
.captures(player_html)
.and_then(|caps| caps.get(1))
.map(|m| Self::normalize_url(m.as_str()))
.filter(|value| !value.is_empty())
}
fn extract_quality_urls(video_page_html: &str) -> HashMap<u16, String> {
let mut urls = HashMap::new();
let Some(regex) =
Self::regex(r#"(?i)(?:https?:)?//[^"'\\\s]+/pubs/[A-Za-z0-9._-]+/(360|720|1080)\.mp4"#)
else {
return urls;
};
for captures in regex.captures_iter(video_page_html) {
let Some(full_match) = captures.get(0) else {
continue;
};
let Some(quality_match) = captures.get(1) else {
continue;
};
let Some(quality) = quality_match.as_str().parse::<u16>().ok() else {
continue;
};
let normalized = Self::normalize_url(full_match.as_str());
if !normalized.is_empty() {
urls.insert(quality, normalized);
}
}
urls
}
fn select_quality_url(quality_urls: &HashMap<u16, String>, requested: Option<u16>) -> Option<String> {
let fallbacks = match requested.unwrap_or(1080) {
1080 => [1080u16, 720, 360].as_slice(),
720 => [720u16, 360].as_slice(),
360 => [360u16].as_slice(),
other if other > 1080 => [1080u16, 720, 360].as_slice(),
other if other > 720 => [720u16, 360].as_slice(),
_ => [360u16].as_slice(),
};
for quality in fallbacks {
if let Some(url) = quality_urls.get(quality) {
return Some(url.clone());
}
}
if let Some(url) = quality_urls.get(&1080) {
return Some(url.clone());
}
if let Some(url) = quality_urls.get(&720) {
return Some(url.clone());
}
quality_urls.get(&360).cloned()
}
}
impl crate::proxies::Proxy for HqpornerProxy {
async fn get_video_url(&self, url: String, requester: web::types::State<Requester>) -> String {
let Some((detail_url, requested_quality)) = Self::normalize_detail_request(&url) else {
return String::new();
};
let mut requester = requester.get_ref().clone();
let headers = vec![("Referer".to_string(), "https://hqporner.com/".to_string())];
let detail_html = requester
.get_with_headers(&detail_url, headers.clone(), None)
.await
.unwrap_or_default();
if detail_html.is_empty() {
return String::new();
}
let mut source_page_url = String::new();
if let Some(player_url) = Self::extract_player_url(&detail_html) {
let player_html = requester
.get_with_headers(&player_url, headers.clone(), None)
.await
.unwrap_or_default();
if !player_html.is_empty() {
if let Some(url) = Self::extract_source_url(&player_html) {
source_page_url = url;
}
}
}
if source_page_url.is_empty() {
source_page_url = Self::extract_source_url(&detail_html).unwrap_or_default();
}
if source_page_url.is_empty() {
return String::new();
}
let source_page_html = requester
.get_with_headers(&source_page_url, headers, None)
.await
.unwrap_or_default();
if source_page_html.is_empty() {
return String::new();
}
let quality_urls = Self::extract_quality_urls(&source_page_html);
if quality_urls.is_empty() {
return String::new();
}
Self::select_quality_url(&quality_urls, requested_quality).unwrap_or_default()
}
}
#[cfg(test)]
mod tests {
use super::HqpornerProxy;
use std::collections::HashMap;
#[test]
fn extract_source_url_supports_iframe_src() {
let html = r#"<iframe width="560" height="350" src="//mydaddy.cc/video/f7cbb41e218d3b1dca/&alt" frameborder="0" allowfullscreen=""></iframe>"#;
let extracted = HqpornerProxy::extract_source_url(html);
assert_eq!(
extracted.as_deref(),
Some("https://mydaddy.cc/video/f7cbb41e218d3b1dca/&alt")
);
}
#[test]
fn extract_source_url_supports_source_tag_src() {
let html =
r#"<video><source src=\"https://cdn.example.com/video.mp4\" type=\"video/mp4\"></video>"#;
let extracted = HqpornerProxy::extract_source_url(html);
assert_eq!(
extracted.as_deref(),
Some("https://cdn.example.com/video.mp4")
);
}
#[test]
fn extract_player_url_supports_altplayer_path() {
let html = r#"
<script>
function altPlayer() {
$.ajax({
type: 'POST',
url: '/blocks/altplayer.php?i=//mydaddy.cc/video/f7cbb41e218d3b1dca/',
success: function(data) {}
});
}
</script>
"#;
let extracted = HqpornerProxy::extract_player_url(html);
assert_eq!(
extracted.as_deref(),
Some(
"https://www.hqporner.com/blocks/altplayer.php?i=//mydaddy.cc/video/f7cbb41e218d3b1dca/"
)
);
}
#[test]
fn extract_quality_urls_from_mydaddy_html() {
let html = r#"
timelinePreview:{file:"//s43.bigcdn.cc/pubs/69ecfb39b17117.73515587/tile.vtt",spriteRelativePath:true,type:"VTT"}
<source src="//s43.bigcdn.cc/pubs/69ecfb39b17117.73515587/360.mp4" title="360p" type="video/mp4" />
<source src="//s43.bigcdn.cc/pubs/69ecfb39b17117.73515587/720.mp4" title="720p HD" type="video/mp4" />
<source src="//s43.bigcdn.cc/pubs/69ecfb39b17117.73515587/1080.mp4" title="1080p Full HD" type="video/mp4" />
"#;
let urls = HqpornerProxy::extract_quality_urls(html);
assert_eq!(
urls.get(&360).map(String::as_str),
Some("https://s43.bigcdn.cc/pubs/69ecfb39b17117.73515587/360.mp4")
);
assert_eq!(
urls.get(&720).map(String::as_str),
Some("https://s43.bigcdn.cc/pubs/69ecfb39b17117.73515587/720.mp4")
);
assert_eq!(
urls.get(&1080).map(String::as_str),
Some("https://s43.bigcdn.cc/pubs/69ecfb39b17117.73515587/1080.mp4")
);
}
#[test]
fn select_quality_url_falls_back_to_next_lower_quality() {
let mut urls = HashMap::new();
urls.insert(
360,
"https://s43.bigcdn.cc/pubs/69ecfb39b17117.73515587/360.mp4".to_string(),
);
urls.insert(
720,
"https://s43.bigcdn.cc/pubs/69ecfb39b17117.73515587/720.mp4".to_string(),
);
let requested_1080 = HqpornerProxy::select_quality_url(&urls, Some(1080));
assert_eq!(
requested_1080.as_deref(),
Some("https://s43.bigcdn.cc/pubs/69ecfb39b17117.73515587/720.mp4")
);
let requested_720 = HqpornerProxy::select_quality_url(&urls, Some(720));
assert_eq!(
requested_720.as_deref(),
Some("https://s43.bigcdn.cc/pubs/69ecfb39b17117.73515587/720.mp4")
);
let requested_360 = HqpornerProxy::select_quality_url(&urls, Some(360));
assert_eq!(
requested_360.as_deref(),
Some("https://s43.bigcdn.cc/pubs/69ecfb39b17117.73515587/360.mp4")
);
}
}

28
src/proxies/jable.rs Normal file
View File

@@ -0,0 +1,28 @@
use ntex::web;
const BASE_URL: &str = "https://jable.tv";
pub async fn redirect_to_page(
req: web::HttpRequest,
) -> impl web::Responder {
let slug = req
.match_info()
.query("slug")
.trim_matches('/')
.to_string();
if slug.is_empty() {
return web::HttpResponse::NotFound().finish();
}
// HEAD: check.py health check — just confirm the endpoint exists
if req.method() == ntex::http::Method::HEAD {
return web::HttpResponse::Ok().finish();
}
// GET: open original page in browser
let location = format!("{BASE_URL}/videos/{slug}/");
web::HttpResponse::Found()
.header("Location", location)
.finish()
}

View File

@@ -1,4 +1,5 @@
use ntex::web;
use url::Url;
use wreq::Version;
use crate::util::requester::Requester;
@@ -11,59 +12,107 @@ impl JavtifulProxy {
JavtifulProxy {}
}
fn normalize_detail_request(endpoint: &str) -> Option<(String, String)> {
let endpoint = endpoint.trim().trim_start_matches('/');
if endpoint.is_empty() {
return None;
}
let detail_url = if endpoint.starts_with("http://") || endpoint.starts_with("https://") {
endpoint.to_string()
} else if endpoint.starts_with("javtiful.com/") || endpoint.starts_with("www.javtiful.com/")
{
format!("https://{endpoint}")
} else {
format!("https://javtiful.com/{endpoint}")
};
let detail_url = if detail_url.starts_with("http://") {
detail_url.replacen("http://", "https://", 1)
} else {
detail_url
};
if !Self::is_allowed_detail_url(&detail_url) {
return None;
}
let video_id = Url::parse(&detail_url)
.ok()
.and_then(|url| {
let mut segments = url.path_segments()?;
if segments.next()? != "video" {
return None;
}
segments.next().map(ToOwned::to_owned)
})
.filter(|value| value.chars().all(|c| c.is_ascii_digit()) && !value.is_empty())?;
Some((detail_url, video_id))
}
fn is_allowed_detail_url(url: &str) -> bool {
let Some(parsed) = Url::parse(url).ok() else {
return false;
};
if parsed.scheme() != "https" {
return false;
}
let Some(host) = parsed.host_str() else {
return false;
};
(host == "javtiful.com" || host == "www.javtiful.com")
&& parsed.path().starts_with("/video/")
}
pub async fn get_video_url(
&self,
url: String,
requester: web::types::State<Requester>,
) -> String {
let mut requester = requester.get_ref().clone();
let endpoint = url
.trim_start_matches('/')
.strip_prefix("https://")
.or_else(|| url.trim_start_matches('/').strip_prefix("http://"))
.unwrap_or(url.trim_start_matches('/'))
.trim_start_matches("www.javtiful.com/")
.trim_start_matches("javtiful.com/")
.trim_start_matches('/')
.to_string();
let detail_url = format!("https://javtiful.com/{endpoint}");
let text = requester.get(&detail_url, None).await.unwrap_or_default();
if text.is_empty() {
return "".to_string();
let Some((detail_url, _)) = Self::normalize_detail_request(&url) else {
println!("JavtifulProxy: Invalid detail URL: {url}");
return String::new();
};
let html = requester.get(&detail_url, Some(Version::HTTP_11)).await;
let Ok(html) = html else {
return String::new();
};
if html.is_empty() {
return String::new();
}
let video_id = endpoint.split('/').nth(1).unwrap_or("").to_string();
let token = text
.split("data-csrf-token=\"")
let mut media_url: String = html.split("playerSources\":[{\"src\":\"")
.nth(1)
.and_then(|s| s.split('"').next())
.unwrap_or("")
.to_string();
let form = wreq::multipart::Form::new()
.text("video_id", video_id.clone())
.text("pid_c", "".to_string())
.text("token", token.clone());
let resp = match requester
.post_multipart(
"https://javtiful.com/ajax/get_cdn",
form,
vec![("Referer".to_string(), detail_url)],
Some(Version::HTTP_11),
)
.await
{
Ok(r) => r,
Err(_) => return "".to_string(),
.map(str::trim)
.map(ToOwned::to_owned).unwrap_or_default().replace("\\u0026", "&");
media_url = match media_url.starts_with("/"){
true => format!("https://javtiful.com{media_url}"),
false => media_url
};
let text = resp.text().await.unwrap_or_default();
let json: serde_json::Value =
serde_json::from_str(&text).unwrap_or(serde_json::Value::Null);
let video_url = json
.get("playlists")
.map(|v| v.to_string().replace("\"", ""))
.unwrap_or_default();
return video_url;
return media_url;
}
}
#[cfg(test)]
mod tests {
use super::JavtifulProxy;
#[test]
fn normalizes_detail_request_with_full_url() {
let (url, video_id) =
JavtifulProxy::normalize_detail_request("https://javtiful.com/video/106796/fns-176")
.expect("detail request should parse");
assert_eq!(url, "https://javtiful.com/video/106796/fns-176");
assert_eq!(video_id, "106796");
}
#[test]
fn normalizes_detail_request_with_path_only() {
let (url, video_id) = JavtifulProxy::normalize_detail_request("video/1000/demo")
.expect("detail request should parse");
assert_eq!(url, "https://javtiful.com/video/1000/demo");
assert_eq!(video_id, "1000");
}
}

184
src/proxies/lulustream.rs Normal file
View File

@@ -0,0 +1,184 @@
use std::sync::Arc;
use ntex::web;
use url::Url;
use wreq::cookie::Jar;
use wreq::redirect::Policy;
use wreq_util::Emulation;
use crate::util::{dean_edwards, requester::Requester};
#[derive(Debug, Clone)]
pub struct LulustreamProxy {}
impl LulustreamProxy {
pub fn new() -> Self {
LulustreamProxy {}
}
fn normalize_detail_request(endpoint: &str) -> Option<(String, String)> {
let endpoint = endpoint.trim().trim_start_matches('/');
if endpoint.is_empty() {
return None;
}
let detail_url = if endpoint.starts_with("http://") || endpoint.starts_with("https://") {
endpoint.to_string()
} else if endpoint.starts_with("lulustream.com/")
|| endpoint.starts_with("www.lulustream.com/")
|| endpoint.starts_with("luluvid.com/")
{
format!("https://{endpoint}")
} else {
format!("https://lulustream.com/{endpoint}")
};
if !Self::is_allowed_detail_url(&detail_url) {
return None;
}
let parsed = Url::parse(&detail_url).ok()?;
let video_id = parsed.path_segments()?.last().map(ToOwned::to_owned)?;
Some((detail_url, video_id))
}
fn is_allowed_detail_url(url: &str) -> bool {
let Some(parsed) = Url::parse(url).ok() else {
return false;
};
if parsed.scheme() != "https" {
return false;
}
let Some(host) = parsed.host_str() else {
return false;
};
(host == "lulustream.com" || host == "www.lulustream.com" || host == "luluvid.com")
&& !parsed.path().is_empty()
&& parsed.path() != "/"
}
// Chrome120 emulation bypasses Cloudflare on luluvid.com (Firefox136 gets blocked).
fn build_chrome_client() -> Option<wreq::Client> {
let jar = Arc::new(Jar::default());
wreq::Client::builder()
.cert_verification(false)
.emulation(Emulation::Chrome120)
.cookie_provider(jar)
.redirect(Policy::default())
.build()
.ok()
}
fn extract_media_url(html: &str) -> Option<String> {
// Fast path: file URL present in plain text (no packing)
if html.contains("[{file:\"") {
let url = html
.split("[{file:\"")
.nth(1)
.and_then(|s| s.split('"').next())?
.to_string();
if !url.is_empty() {
return Some(url);
}
}
// Unpack the Dean Edwards p,a,c,k,e,d script that embeds the player config.
// The packed payload encodes the jwplayer setup call; after decoding it contains
// `sources:[{file:"https://cdn*.cdn-tnmr.org/hls2/.../master.m3u8?..."}]`.
let packed = html
.split("<script type='text/javascript'>")
.nth(1)
.and_then(|t| t.split("</script>").next())?;
let unpacked = dean_edwards::unpack(packed).ok()?;
unpacked
.split("[{file:\"")
.nth(1)
.and_then(|s| s.split('"').next())
.map(|s| s.to_string())
.filter(|s| !s.is_empty())
}
async fn try_chrome_extraction(embed_url: &str) -> Option<String> {
let client = Self::build_chrome_client()?;
let response = client.get(embed_url).send().await.ok()?;
if !response.status().is_success() {
return None;
}
let html = response.text().await.ok()?;
Self::extract_media_url(&html)
}
}
impl crate::proxies::Proxy for LulustreamProxy {
async fn get_video_url(&self, url: String, requester: web::types::State<Requester>) -> String {
let Some((_detail_url, video_id)) = Self::normalize_detail_request(&url) else {
return String::new();
};
let embed_url = format!("https://luluvid.com/e/{video_id}");
// Chrome120 emulation bypasses Cloudflare; try it first.
if let Some(media_url) = Self::try_chrome_extraction(&embed_url).await {
return media_url;
}
// Fallback: standard requester (Firefox136 + optional FlareSolverr).
let mut requester = requester.get_ref().clone();
let html = requester.get(&embed_url, None).await.unwrap_or_default();
Self::extract_media_url(&html).unwrap_or_default()
}
}
#[cfg(test)]
mod tests {
use super::LulustreamProxy;
#[test]
fn normalizes_detail_request_with_full_url() {
let (url, video_id) =
LulustreamProxy::normalize_detail_request("https://lulustream.com/d/s484n23k8opy")
.expect("detail request should parse");
assert_eq!(url, "https://lulustream.com/d/s484n23k8opy");
assert_eq!(video_id, "s484n23k8opy");
}
#[test]
fn normalizes_detail_request_with_path_only() {
let (url, video_id) = LulustreamProxy::normalize_detail_request("d/s484n23k8opy")
.expect("detail request should parse");
assert_eq!(url, "https://lulustream.com/d/s484n23k8opy");
assert_eq!(video_id, "s484n23k8opy");
}
#[test]
fn normalizes_luluvid_url() {
let (url, video_id) =
LulustreamProxy::normalize_detail_request("https://luluvid.com/e/s484n23k8opy")
.expect("detail request should parse");
assert_eq!(url, "https://luluvid.com/e/s484n23k8opy");
assert_eq!(video_id, "s484n23k8opy");
}
#[test]
fn extracts_media_url_from_plain_html() {
let html = r#"[{file:"https://cdn1007.cdn-tnmr.org/hls2/02/02723/abc_h/master.m3u8?t=TOKEN&s=12345&e=28800&f=999&i=0.3&sp=0"}]"#;
assert_eq!(
LulustreamProxy::extract_media_url(html).as_deref(),
Some("https://cdn1007.cdn-tnmr.org/hls2/02/02723/abc_h/master.m3u8?t=TOKEN&s=12345&e=28800&f=999&i=0.3&sp=0")
);
}
#[test]
fn extracts_media_url_from_packed_script() {
// Minimal valid packed script that decodes to a jwplayer sources array.
// Original: jwplayer("vplayer").setup({sources:[{file:"https://cdn.example.com/video.m3u8"}]})
// We fake it with a trivial packer (base 10, a few words).
let fake_packed = r#"eval(function(p,a,c,k,e,d){while(c--)if(k[c])p=p.replace(new RegExp('\b'+c.toString(a)+'\b','g'),k[c]);return p}('0("[{1:\"https://cdn.example.com/video.m3u8\"}]")',10,2,'sources|file'.split('|'),0,{}))"#;
let html = format!("<script type='text/javascript'>{fake_packed}</script>");
let url = LulustreamProxy::extract_media_url(&html);
assert!(url.is_some(), "should extract a URL from packed script");
}
}

View File

@@ -1,35 +1,70 @@
use crate::proxies::archivebate::ArchivebateProxy;
use crate::proxies::clapdat::ClapdatProxy;
use crate::proxies::doodstream::DoodstreamProxy;
use crate::proxies::heavyfetish::HeavyfetishProxy;
use crate::proxies::hqporner::HqpornerProxy;
use crate::proxies::pornhd3x::Pornhd3xProxy;
use crate::proxies::supjav::SupjavProxy;
use crate::proxies::tube8::Tube8Proxy;
use ntex::web;
use crate::proxies::pimpbunny::PimpbunnyProxy;
use crate::proxies::porndish::PorndishProxy;
use crate::proxies::spankbang::SpankbangProxy;
use crate::proxies::vjav::VjavProxy;
use crate::{proxies::sxyprn::SxyprnProxy, util::requester::Requester};
use crate::proxies::vidara::VidaraProxy;
use crate::proxies::lulustream::LulustreamProxy;
use crate::proxies::thaiporntv::ThaipornTvProxy;
pub mod allpornstream;
pub mod archivebate;
pub mod clapdat;
pub mod doodstream;
pub mod fikfapthumb;
pub mod hanimecdn;
pub mod hanimethumb;
pub mod heavyfetish;
pub mod hqporner;
pub mod hqpornerthumb;
pub mod javtiful;
pub mod lulustream;
pub mod noodlemagazine;
pub mod pimpbunny;
pub mod pimpbunnythumb;
pub mod porndish;
pub mod porndishthumb;
pub mod pornhd3x;
pub mod pornhubthumb;
pub mod shooshtime;
pub mod spankbang;
pub mod supjav;
pub mod sxyprn;
pub mod thaiporntv;
pub mod jable;
pub mod tube8;
pub mod thepornbunny;
pub mod vidara;
pub mod vjav;
#[derive(Debug, Clone)]
pub enum AnyProxy {
Archivebate(ArchivebateProxy),
Doodstream(DoodstreamProxy),
Sxyprn(SxyprnProxy),
Javtiful(javtiful::JavtifulProxy),
Lulustream(LulustreamProxy),
Pornhd3x(Pornhd3xProxy),
Pimpbunny(PimpbunnyProxy),
Porndish(PorndishProxy),
Spankbang(SpankbangProxy),
Hqporner(HqpornerProxy),
Heavyfetish(HeavyfetishProxy),
Vjav(VjavProxy),
Vidara(VidaraProxy),
Clapdat(ClapdatProxy),
ThaipornTv(ThaipornTvProxy),
Tube8(Tube8Proxy),
Supjav(SupjavProxy),
}
pub trait Proxy {
@@ -39,13 +74,23 @@ pub trait Proxy {
impl Proxy for AnyProxy {
async fn get_video_url(&self, url: String, requester: web::types::State<Requester>) -> String {
match self {
AnyProxy::Archivebate(p) => p.get_video_url(url, requester).await,
AnyProxy::Doodstream(p) => p.get_video_url(url, requester).await,
AnyProxy::Sxyprn(p) => p.get_video_url(url, requester).await,
AnyProxy::Javtiful(p) => p.get_video_url(url, requester).await,
AnyProxy::Lulustream(p) => p.get_video_url(url, requester).await,
AnyProxy::Pornhd3x(p) => p.get_video_url(url, requester).await,
AnyProxy::Pimpbunny(p) => p.get_video_url(url, requester).await,
AnyProxy::Porndish(p) => p.get_video_url(url, requester).await,
AnyProxy::Spankbang(p) => p.get_video_url(url, requester).await,
AnyProxy::Hqporner(p) => p.get_video_url(url, requester).await,
AnyProxy::Heavyfetish(p) => p.get_video_url(url, requester).await,
AnyProxy::Vjav(p) => p.get_video_url(url, requester).await,
AnyProxy::Vidara(p) => p.get_video_url(url, requester).await,
AnyProxy::Clapdat(p) => p.get_video_url(url, requester).await,
AnyProxy::ThaipornTv(p) => p.get_video_url(url, requester).await,
AnyProxy::Tube8(p) => p.get_video_url(url, requester).await,
AnyProxy::Supjav(p) => p.get_video_url(url, requester).await,
}
}
}

View File

@@ -1,20 +1,14 @@
use crate::util::browser;
use crate::util::requester::Requester;
use ntex::web;
use regex::Regex;
use serde_json::Value;
use std::time::{SystemTime, UNIX_EPOCH};
use url::Url;
use wreq::Version;
use crate::util::requester::Requester;
#[derive(Debug, Clone)]
pub struct PimpbunnyProxy {}
impl PimpbunnyProxy {
const FIREFOX_USER_AGENT: &'static str =
"Mozilla/5.0 (X11; Linux x86_64; rv:147.0) Gecko/20100101 Firefox/147.0";
const HTML_ACCEPT: &'static str =
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8";
pub fn new() -> Self {
PimpbunnyProxy {}
}
@@ -25,12 +19,11 @@ impl PimpbunnyProxy {
} else {
format!("https://{}", url.trim_start_matches('/'))
};
Self::is_allowed_detail_url(&normalized).then_some(normalized)
}
fn is_allowed_detail_url(url: &str) -> bool {
let Some(url) = Url::parse(url).ok() else {
let Ok(url) = Url::parse(url) else {
return false;
};
if url.scheme() != "https" {
@@ -39,214 +32,297 @@ impl PimpbunnyProxy {
let Some(host) = url.host_str() else {
return false;
};
matches!(host, "pimpbunny.com" | "www.pimpbunny.com")
&& !url.path().starts_with("/contents/videos_screenshots/")
}
fn root_referer() -> &'static str {
"https://pimpbunny.com/"
}
/// Generate 32-char numeric key from a pimpbunny license_code (e.g. '$576262819011919').
///
/// Stage 1 of the KVS player URL decoder, extracted from kt_player.js:
/// - d_raw = license_code (keeps '$' at index 0 for the d[g+h] lookup)
/// - d_mod = strip '$', replace every '0' digit with '1'
/// - split d_mod at midpoint j = floor(len/2) to get k and l
/// - f_str = 4 * |k - l| as a string
/// - for g in 0..=j, h in 1..=4: n = d_raw[g+h] + f_str[g]; if n >= 10 then n -= 10
fn generate_key(license_code: &str) -> Option<String> {
let d_raw = license_code;
// Strip '$' and replace '0' with '1'
let d_mod: String = license_code
.get(1..)?
.chars()
.map(|c| if c == '0' { '1' } else { c })
.collect();
fn html_headers_with_referer(referer: &str) -> Vec<(String, String)> {
vec![
("Referer".to_string(), referer.to_string()),
(
"User-Agent".to_string(),
Self::FIREFOX_USER_AGENT.to_string(),
),
("Accept".to_string(), Self::HTML_ACCEPT.to_string()),
("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()),
]
}
let j = d_mod.len() / 2;
let k: i64 = d_mod.get(..j + 1)?.parse().ok()?;
let l: i64 = d_mod.get(j..)?.parse().ok()?;
let f_str = (4 * (k - l).abs()).to_string();
fn headers_with_cookies(
requester: &Requester,
request_url: &str,
referer: &str,
) -> Vec<(String, String)> {
let mut headers = Self::html_headers_with_referer(referer);
if let Some(cookie) = requester.cookie_header_for_url(request_url) {
headers.push(("Cookie".to_string(), cookie));
let threshold = 10i64;
let d_bytes = d_raw.as_bytes();
let f_bytes = f_str.as_bytes();
let mut key = String::with_capacity(32);
for g in 0..=j {
for h in 1..=4usize {
let d_digit = d_bytes.get(g + h).copied().unwrap_or(b'0') as i64 - b'0' as i64;
let f_digit = f_bytes.get(g).copied().unwrap_or(b'0') as i64 - b'0' as i64;
let mut n = d_digit + f_digit;
if n >= threshold {
n -= threshold;
}
key.push((b'0' + n as u8) as char);
}
}
headers
Some(key)
}
async fn warm_root_session(requester: &mut Requester) {
let _ = requester
.get_with_headers(
Self::root_referer(),
Self::html_headers_with_referer(Self::root_referer()),
Some(Version::HTTP_11),
)
.await;
/// Shuffle (permute) a 32-char hex hash using the key.
///
/// Stage 2: for k from 31 down to 0, compute l = (k + sum of key[k..]) % 32, swap h[k] and h[l].
fn shuffle_hash(hash: &str, key: &str) -> String {
let mut h: Vec<char> = hash.chars().collect();
let n = h.len();
let key_bytes = key.as_bytes();
for k in (0..n).rev() {
let mut l = k as i64;
for m in k..key_bytes.len() {
l += (key_bytes[m] - b'0') as i64;
}
l %= n as i64;
h.swap(k, l as usize);
}
h.into_iter().collect()
}
fn extract_json_ld_video(text: &str) -> Option<Value> {
let script_regex =
Regex::new(r#"(?s)<script[^>]+application/ld\+json[^>]*>(.*?)</script>"#).ok()?;
/// Decode a KVS-encoded 'function/N/https://…' video URL into a usable URL.
///
/// Stage 3: strip the function/N/ prefix, shuffle the 32-char hash in segment 6,
/// then append ?rnd=<epoch_ms>.
fn decode_encoded_url(encoded: &str, license_code: &str) -> Option<String> {
let after_prefix = encoded.strip_prefix("function/")?;
// Split '0/https://host/path/hash_segment/…' into parts by '/'
let parts: Vec<&str> = after_prefix.split('/').collect();
// Layout: [0]version [1]'https:' [2]'' [3]host [4]'get_file' [5]segment_no [6]hash_segment …
if parts.len() < 7 {
return None;
}
let hash_segment = parts[6];
if hash_segment.len() < 32 {
return None;
}
for captures in script_regex.captures_iter(text) {
let raw = captures.get(1).map(|value| value.as_str().trim())?;
let parsed: Value = serde_json::from_str(raw).ok()?;
let key = Self::generate_key(license_code)?;
let shuffled = Self::shuffle_hash(&hash_segment[..32], &key);
let new_hash_segment = format!("{}{}", shuffled, &hash_segment[32..]);
if let Some(video) = Self::find_video_object(&parsed) {
return Some(video);
let mut new_parts: Vec<String> = parts.iter().map(|s| s.to_string()).collect();
new_parts[6] = new_hash_segment;
new_parts.remove(0); // remove version number
let rnd = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_millis();
let url = new_parts.join("/");
Some(format!("{}?rnd={}", url, rnd))
}
/// Extract the license_code from the KVS player config in the page HTML.
/// The license_code property holds a 16-char string starting with '$'.
fn extract_license_code(html: &str) -> Option<String> {
let re = Regex::new(r"license_code:\s*'(\$[0-9]{15})'").ok()?;
Some(re.captures(html)?[1].to_string())
}
/// Extract all 'function/…' video URLs from the KVS player config in the page HTML.
/// Returns them in document order (lowest to highest quality for pimpbunny).
fn extract_video_urls(html: &str) -> Vec<String> {
let Ok(re) = Regex::new(r"video(?:_alt)?_url\d*:\s*'(function/[^']+)'") else {
return vec![];
};
re.captures_iter(html)
.map(|cap| cap[1].to_string())
.collect()
}
/// GET the decoded URL without following redirects and return the Location header
/// if the server responds with a 302, or None if it responds with anything else
/// (including 403 "access denied").
///
/// The PHPSESSID cookie — acquired when we fetched the detail page — must be
/// forwarded; the server won't issue the 302 without it.
async fn follow_302(url: &str, requester: &Requester) -> Option<String> {
let client = wreq::Client::builder()
.redirect(wreq::redirect::Policy::none())
.build()
.ok()?;
let mut req = client.get(url);
if let Some(cookie) = requester.cookie_header_for_url(url) {
req = req.header("Cookie", cookie);
}
let resp = req.send().await.ok()?;
if resp.status().as_u16() != 302 {
return None;
}
resp.headers()
.get("location")
.and_then(|v| v.to_str().ok())
.map(|s| s.to_string())
}
/// Try to decode the video URL using the KVS algorithm extracted from kt_player.js.
/// Returns the CDN redirect target (Location from the 302) on success, None otherwise.
async fn try_decode(detail_url: &str, requester: &mut Requester) -> Option<String> {
let html = requester.get(detail_url, None).await.ok()?;
let license_code = Self::extract_license_code(&html)?;
let encoded_urls = Self::extract_video_urls(&html);
if encoded_urls.is_empty() {
return None;
}
// Attempt highest quality first (last URL in the config = highest non-redirect quality).
for encoded_url in encoded_urls.iter().rev() {
let Some(decoded) = Self::decode_encoded_url(encoded_url, &license_code) else {
continue;
};
if let Some(location) = Self::follow_302(&decoded, requester).await {
return Some(location);
}
}
None
}
fn find_video_object(parsed: &Value) -> Option<Value> {
if parsed
.get("@type")
.and_then(Value::as_str)
.is_some_and(|value| value == "VideoObject")
{
return Some(parsed.clone());
}
if parsed
.get("contentUrl")
.and_then(Value::as_str)
.is_some_and(|value| !value.trim().is_empty())
{
return Some(parsed.clone());
}
if let Some(graph) = parsed.get("@graph").and_then(Value::as_array) {
for item in graph {
if item
.get("@type")
.and_then(Value::as_str)
.is_some_and(|value| value == "VideoObject")
{
return Some(item.clone());
}
if item
.get("contentUrl")
.and_then(Value::as_str)
.is_some_and(|value| !value.trim().is_empty())
{
return Some(item.clone());
}
}
}
if let Some(array) = parsed.as_array() {
for item in array {
if let Some(video) = Self::find_video_object(item) {
return Some(video);
}
}
}
None
}
fn extract_stream_url(json_ld: &Value) -> Option<String> {
json_ld
.get("contentUrl")
.and_then(Value::as_str)
.map(str::trim)
.filter(|value| !value.is_empty())
.map(str::to_string)
}
fn extract_stream_url_from_html(text: &str) -> Option<String> {
Regex::new(r#""contentUrl"\s*:\s*"([^"]+)""#)
.ok()?
.captures(text)
.and_then(|captures| captures.get(1))
.map(|value| value.as_str().trim().to_string())
.filter(|value| !value.is_empty())
}
}
impl crate::proxies::Proxy for PimpbunnyProxy {
async fn get_video_url(&self, url: String, requester: web::types::State<Requester>) -> String {
async fn get_video_url(
&self,
url: String,
requester: web::types::State<crate::util::requester::Requester>,
) -> String {
let Some(detail_url) = Self::normalize_detail_url(&url) else {
return String::new();
};
let mut requester = requester.get_ref().clone();
Self::warm_root_session(&mut requester).await;
let headers = Self::headers_with_cookies(&requester, &detail_url, Self::root_referer());
let text = match requester
.get_with_headers(&detail_url, headers, Some(Version::HTTP_2))
.await
{
Ok(text) => text,
Err(_) => return String::new(),
};
let mut req = requester.get_ref().clone();
Self::extract_json_ld_video(&text)
.and_then(|json_ld| Self::extract_stream_url(&json_ld))
.or_else(|| Self::extract_stream_url_from_html(&text))
if let Some(video_url) = Self::try_decode(&detail_url, &mut req).await {
return video_url;
}
// Fall back to browser-based extraction when the decoder fails or the decoded
// URL is rejected (access denied).
browser::wait_for_src(&detail_url, "//video[@src]")
.await
.unwrap_or_default()
}
}
#[cfg(test)]
mod tests {
use super::PimpbunnyProxy;
use super::*;
#[test]
fn allows_only_pimpbunny_detail_urls() {
assert!(PimpbunnyProxy::is_allowed_detail_url(
"https://pimpbunny.com/videos/example-video/"
));
assert!(PimpbunnyProxy::is_allowed_detail_url(
"https://www.pimpbunny.com/video/example/"
));
assert!(!PimpbunnyProxy::is_allowed_detail_url(
"http://pimpbunny.com/videos/example-video/"
));
assert!(!PimpbunnyProxy::is_allowed_detail_url(
"https://pimpbunny.com/contents/videos_screenshots/1/2/3.jpg"
));
assert!(!PimpbunnyProxy::is_allowed_detail_url(
"https://example.com/videos/example-video/"
));
fn test_normalize_adds_https_scheme() {
let url =
PimpbunnyProxy::normalize_detail_url("pimpbunny.com/videos/some-video/").unwrap();
assert!(url.starts_with("https://"));
}
#[test]
fn extracts_content_url_from_json_ld() {
fn test_normalize_passes_through_valid_https() {
let url = "https://pimpbunny.com/videos/some-video/";
assert_eq!(
PimpbunnyProxy::normalize_detail_url(url).unwrap(),
url
);
}
#[test]
fn test_normalize_rejects_screenshots_path() {
let url =
"https://pimpbunny.com/contents/videos_screenshots/473000/473894/preview.jpg";
assert!(PimpbunnyProxy::normalize_detail_url(url).is_none());
}
#[test]
fn test_normalize_rejects_other_host() {
assert!(PimpbunnyProxy::normalize_detail_url("https://evil.com/videos/x/").is_none());
}
#[test]
fn test_normalize_rejects_http() {
assert!(PimpbunnyProxy::normalize_detail_url(
"http://pimpbunny.com/videos/some-video/"
)
.is_none());
}
#[test]
fn test_www_subdomain_is_allowed() {
let url = "https://www.pimpbunny.com/videos/some-video/";
assert!(PimpbunnyProxy::normalize_detail_url(url).is_some());
}
#[test]
fn test_generate_key() {
let key = PimpbunnyProxy::generate_key("$576262819011919").unwrap();
assert_eq!(key, "68732171060626281736958625345345");
assert_eq!(key.len(), 32);
}
#[test]
fn test_shuffle_hash() {
let key = "68732171060626281736958625345345";
let input = "7e4df9f504c7ec5b02b0101ed28edfa4";
let output = PimpbunnyProxy::shuffle_hash(input, key);
assert_eq!(output, "fde01fe04b42c7e72d0d0a8c95b1e45f");
}
#[test]
fn test_decode_encoded_url() {
let encoded = "function/0/https://pimpbunny.com/get_file/40/7e4df9f504c7ec5b02b0101ed28edfa4c687186454/564000/564161/564161_720p.mp4/";
let license = "$576262819011919";
let decoded = PimpbunnyProxy::decode_encoded_url(encoded, license).unwrap();
assert!(decoded.starts_with(
"https://pimpbunny.com/get_file/40/fde01fe04b42c7e72d0d0a8c95b1e45fc687186454/564000/564161/564161_720p.mp4/"
));
assert!(decoded.contains("?rnd="));
}
#[test]
fn test_extract_license_code() {
let html = r#"license_code: '$576262819011919', lrc: '77028724',"#;
assert_eq!(
PimpbunnyProxy::extract_license_code(html).unwrap(),
"$576262819011919"
);
}
#[test]
fn test_extract_video_urls() {
let html = r#"
<script type="application/ld+json">{"contentUrl":"https://cdn.example/video.mp4"}</script>
video_url: 'function/0/https://pimpbunny.com/get_file/34/abc/530000/530112/530112_pb_360p.mp4/',
video_alt_url: 'function/0/https://pimpbunny.com/get_file/34/def/530000/530112/530112_pb_480p.mp4/',
video_alt_url2: 'function/0/https://pimpbunny.com/get_file/34/ghi/530000/530112/530112_pb_720p.mp4/',
video_alt_url5: 'https://pimpbunny.com/?upgrade=true',
"#;
let json_ld = PimpbunnyProxy::extract_json_ld_video(html).expect("json-ld should parse");
assert_eq!(
PimpbunnyProxy::extract_stream_url(&json_ld).as_deref(),
Some("https://cdn.example/video.mp4")
);
let urls = PimpbunnyProxy::extract_video_urls(html);
assert_eq!(urls.len(), 3);
assert!(urls[0].starts_with("function/0/"));
assert!(urls[2].contains("720p"));
}
#[test]
fn extracts_video_object_from_graph_script() {
let html = r#"
<script type="application/ld+json">
{"@graph":[{"@type":"BreadcrumbList"},{"@type":"VideoObject","contentUrl":"https://cdn.example/graph.mp4"}]}
</script>
"#;
let json_ld =
PimpbunnyProxy::extract_json_ld_video(html).expect("video object should parse");
assert_eq!(
PimpbunnyProxy::extract_stream_url(&json_ld).as_deref(),
Some("https://cdn.example/graph.mp4")
);
}
#[test]
fn falls_back_to_raw_content_url_match() {
let html = r#"{"contentUrl":"https://cdn.example/fallback.mp4"}"#;
assert_eq!(
PimpbunnyProxy::extract_stream_url_from_html(html).as_deref(),
Some("https://cdn.example/fallback.mp4")
);
#[tokio::test]
async fn test_live_extract_charlotte_sins() {
let url = "https://pimpbunny.com/videos/charlotte-sins-gets-drilled-by-jax-slayher/";
let src = browser::wait_for_src(url, "//video[@src]").await;
println!("video src: {:?}", src);
assert!(src.is_some(), "expected a video URL but got None");
let src = src.unwrap();
assert!(src.starts_with("http"), "expected http URL, got: {src}");
}
}

220
src/proxies/pornhubthumb.rs Normal file
View File

@@ -0,0 +1,220 @@
use ntex::web::{self, HttpRequest};
use regex::Regex;
use scraper::{Html, Selector};
use url::Url;
use crate::util::requester::Requester;
const PORNHUB_ROOT: &str = "https://www.pornhub.com/";
fn endpoint_to_page_url(req: &HttpRequest) -> String {
let endpoint = req.match_info().query("endpoint").trim_start_matches('/');
let mut page_url = if endpoint.starts_with("http://") || endpoint.starts_with("https://") {
endpoint.to_string()
} else {
format!("https://{endpoint}")
};
let query = req.query_string();
if !query.is_empty() && !page_url.contains('?') {
page_url.push('?');
page_url.push_str(query);
}
page_url
}
fn is_allowed_video_page_url(url: &str) -> bool {
let Some(url) = Url::parse(url).ok() else {
return false;
};
if url.scheme() != "https" {
return false;
}
let Some(host) = url.host_str() else {
return false;
};
if host != "pornhub.com" && host != "www.pornhub.com" && !host.ends_with(".pornhub.com") {
return false;
}
url.path().starts_with("/view_video.php") || url.path().starts_with("/video/")
}
fn normalize_candidate_url(candidate: &str, page_url: &Url) -> Option<String> {
if candidate.is_empty() {
return None;
}
if candidate.starts_with("//") {
return Some(format!("https:{candidate}"));
}
if candidate.starts_with("https://") || candidate.starts_with("http://") {
return Some(candidate.to_string());
}
if candidate.starts_with('/') {
let host = page_url.host_str()?;
return Some(format!("{}://{}{}", page_url.scheme(), host, candidate));
}
None
}
fn is_allowed_thumb_url(url: &str) -> bool {
let Some(url) = Url::parse(url).ok() else {
return false;
};
if url.scheme() != "https" {
return false;
}
let Some(host) = url.host_str() else {
return false;
};
let allowed_host = host == "pornhub.com"
|| host == "www.pornhub.com"
|| host.ends_with(".pornhub.com")
|| host.ends_with(".phncdn.com");
if !allowed_host {
return false;
}
let path = url.path().to_ascii_lowercase();
[".jpg", ".jpeg", ".png", ".webp", ".avif"]
.iter()
.any(|ext| path.ends_with(ext))
}
fn decode_js_string(value: &str) -> String {
value
.replace("\\/", "/")
.replace("\\u002F", "/")
.replace("\\u003A", ":")
}
fn find_thumb_in_html(html: &str, page_url: &Url) -> Option<String> {
let document = Html::parse_document(html);
let selector = Selector::parse(
"meta[property=\"og:image\"], meta[name=\"twitter:image\"], meta[itemprop=\"thumbnailUrl\"]",
)
.ok()?;
for meta in document.select(&selector) {
let value = meta.value().attr("content").unwrap_or_default().trim();
if let Some(candidate) = normalize_candidate_url(value, page_url) {
if is_allowed_thumb_url(&candidate) {
return Some(candidate);
}
}
}
let image_url_re = Regex::new(r#""image_url"\s*:\s*"([^"]+)""#).ok()?;
if let Some(captures) = image_url_re.captures(html) {
let raw = captures
.get(1)
.map(|value| value.as_str())
.unwrap_or_default();
let decoded = decode_js_string(raw);
if let Some(candidate) = normalize_candidate_url(&decoded, page_url) {
if is_allowed_thumb_url(&candidate) {
return Some(candidate);
}
}
}
None
}
pub async fn get_image(
req: HttpRequest,
requester: web::types::State<Requester>,
) -> Result<impl web::Responder, web::Error> {
let page_url = endpoint_to_page_url(&req);
if !is_allowed_video_page_url(&page_url) {
return Ok(web::HttpResponse::BadRequest().finish());
}
let mut requester = requester.get_ref().clone();
let html = match requester
.get_with_headers(
page_url.as_str(),
vec![("Referer".to_string(), PORNHUB_ROOT.to_string())],
None,
)
.await
{
Ok(value) => value,
Err(_) => return Ok(web::HttpResponse::NotFound().finish()),
};
let parsed_page_url = match Url::parse(&page_url) {
Ok(value) => value,
Err(_) => return Ok(web::HttpResponse::BadRequest().finish()),
};
let Some(image_url) = find_thumb_in_html(&html, &parsed_page_url) else {
return Ok(web::HttpResponse::NotFound().finish());
};
Ok(web::HttpResponse::Found()
.header("Location", image_url)
.finish())
}
#[cfg(test)]
mod tests {
use super::{
decode_js_string, is_allowed_thumb_url, is_allowed_video_page_url, normalize_candidate_url,
};
use url::Url;
#[test]
fn validates_allowed_video_pages() {
assert!(is_allowed_video_page_url(
"https://www.pornhub.com/view_video.php?viewkey=abc123"
));
assert!(is_allowed_video_page_url(
"https://www.pornhub.com/video/search?search=test"
));
assert!(!is_allowed_video_page_url(
"https://example.com/view_video.php?viewkey=abc123"
));
assert!(!is_allowed_video_page_url(
"http://www.pornhub.com/view_video.php?viewkey=abc123"
));
}
#[test]
fn validates_allowed_thumb_hosts_and_extensions() {
assert!(is_allowed_thumb_url(
"https://pix-cdn77.phncdn.com/videos/2026/04/01/1/(m=eafTGgaaaa)(mh=abc123)1.jpg"
));
assert!(is_allowed_thumb_url(
"https://www.pornhub.com/webmasters/thumb.webp"
));
assert!(!is_allowed_thumb_url("https://example.com/thumb.jpg"));
assert!(!is_allowed_thumb_url(
"https://pix-cdn77.phncdn.com/videos/2026/04/01/1/manifest.m3u8"
));
}
#[test]
fn normalizes_protocol_relative_and_root_relative_urls() {
let page_url = Url::parse("https://www.pornhub.com/view_video.php?viewkey=abc").unwrap();
let protocol_relative =
normalize_candidate_url("//pix-cdn77.phncdn.com/thumb.jpg", &page_url);
assert_eq!(
protocol_relative.as_deref(),
Some("https://pix-cdn77.phncdn.com/thumb.jpg")
);
let root_relative = normalize_candidate_url("/assets/thumb.jpg", &page_url);
assert_eq!(
root_relative.as_deref(),
Some("https://www.pornhub.com/assets/thumb.jpg")
);
}
#[test]
fn decodes_js_escaped_urls() {
assert_eq!(
decode_js_string(r#"https:\/\/pix-cdn77.phncdn.com\/thumb.jpg"#),
"https://pix-cdn77.phncdn.com/thumb.jpg"
);
}
}

View File

@@ -18,6 +18,10 @@ struct SourceCandidate {
pub struct ShooshtimeProxy {}
impl ShooshtimeProxy {
pub fn new() -> Self {
Self {}
}
fn normalize_detail_request(endpoint: &str) -> Option<(String, Option<String>)> {
let endpoint = endpoint.trim().trim_start_matches('/');
if endpoint.is_empty() {
@@ -191,6 +195,22 @@ impl ShooshtimeProxy {
}
}
impl crate::proxies::Proxy for ShooshtimeProxy {
async fn get_video_url(&self, url: String, requester: web::types::State<Requester>) -> String {
let Some((detail_url, quality)) = Self::normalize_detail_request(&url) else {
return String::new();
};
let mut requester = requester.get_ref().clone();
let html = requester.get(&detail_url, None).await.unwrap_or_default();
if html.is_empty() {
return String::new();
}
Self::select_source_url(&html, quality.as_deref()).unwrap_or_default()
}
}
pub async fn serve_media(
req: HttpRequest,
requester: web::types::State<Requester>,

View File

@@ -13,7 +13,38 @@ impl SpankbangProxy {
}
fn request_headers() -> Vec<(String, String)> {
vec![("Referer".to_string(), "https://spankbang.com/".to_string())]
vec![
(
"accept".to_string(),
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8"
.to_string(),
),
("accept-language".to_string(), "en-US,en;q=0.6".to_string()),
("cache-control".to_string(), "no-cache".to_string()),
("pragma".to_string(), "no-cache".to_string()),
("priority".to_string(), "u=0, i".to_string()),
(
"sec-ch-ua".to_string(),
r#""Chromium";v="146", "Not-A.Brand";v="24", "Brave";v="146""#.to_string(),
),
("sec-ch-ua-mobile".to_string(), "?0".to_string()),
("sec-ch-ua-platform".to_string(), "\"Linux\"".to_string()),
("sec-fetch-dest".to_string(), "document".to_string()),
("sec-fetch-mode".to_string(), "navigate".to_string()),
("sec-fetch-site".to_string(), "none".to_string()),
("sec-fetch-user".to_string(), "?1".to_string()),
("sec-gpc".to_string(), "1".to_string()),
(
"upgrade-insecure-requests".to_string(),
"1".to_string(),
),
(
"user-agent".to_string(),
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36"
.to_string(),
),
("Referer".to_string(), "https://spankbang.com/".to_string()),
]
}
fn extract_stream_data(text: &str) -> Option<&str> {
@@ -76,7 +107,38 @@ mod tests {
fn prefers_m3u8_when_present() {
assert_eq!(
SpankbangProxy::request_headers(),
vec![("Referer".to_string(), "https://spankbang.com/".to_string())]
vec![
(
"accept".to_string(),
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8"
.to_string(),
),
("accept-language".to_string(), "en-US,en;q=0.6".to_string()),
("cache-control".to_string(), "no-cache".to_string()),
("pragma".to_string(), "no-cache".to_string()),
("priority".to_string(), "u=0, i".to_string()),
(
"sec-ch-ua".to_string(),
r#""Chromium";v="146", "Not-A.Brand";v="24", "Brave";v="146""#.to_string(),
),
("sec-ch-ua-mobile".to_string(), "?0".to_string()),
("sec-ch-ua-platform".to_string(), "\"Linux\"".to_string()),
("sec-fetch-dest".to_string(), "document".to_string()),
("sec-fetch-mode".to_string(), "navigate".to_string()),
("sec-fetch-site".to_string(), "none".to_string()),
("sec-fetch-user".to_string(), "?1".to_string()),
("sec-gpc".to_string(), "1".to_string()),
(
"upgrade-insecure-requests".to_string(),
"1".to_string(),
),
(
"user-agent".to_string(),
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36"
.to_string(),
),
("Referer".to_string(), "https://spankbang.com/".to_string()),
]
);
let data = r#"

62
src/proxies/supjav.rs Normal file
View File

@@ -0,0 +1,62 @@
use ntex::web;
use url::Url;
use crate::util::requester::Requester;
#[derive(Debug, Clone)]
pub struct SupjavProxy {}
impl SupjavProxy {
pub fn new() -> Self {
Self {}
}
fn normalize_target(endpoint: &str) -> Option<String> {
let endpoint = endpoint.trim().trim_start_matches('/');
if endpoint.is_empty() {
return None;
}
let target = if endpoint.starts_with("http://") || endpoint.starts_with("https://") {
endpoint.to_string()
} else {
format!("https://{endpoint}")
};
Self::is_allowed_media_url(&target).then_some(target)
}
fn is_allowed_media_url(url: &str) -> bool {
let Some(parsed) = Url::parse(url).ok() else {
return false;
};
if parsed.scheme() != "https" {
return false;
}
let Some(host) = parsed.host_str() else {
return false;
};
let host = host.to_ascii_lowercase();
if !(host == "turbovidhls.com"
|| host == "turboviplay.com"
|| host.ends_with(".turboviplay.com")
|| host.ends_with(".turbovidhls.com"))
{
return false;
}
parsed.path().to_ascii_lowercase().contains(".m3u8")
}
}
impl crate::proxies::Proxy for SupjavProxy {
async fn get_video_url(
&self,
url: String,
_requester: web::types::State<Requester>,
) -> String {
Self::normalize_target(&url).unwrap_or_default()
}
}

View File

@@ -38,6 +38,7 @@ impl SxyprnProxy {
) -> String {
let mut requester = requester.get_ref().clone();
let url = "https://sxyprn.com/".to_string() + &url;
// println!("Fetching URL: {}", url);
let text = requester.get(&url, None).await.unwrap_or("".to_string());
if text.is_empty() {
return "".to_string();
@@ -48,43 +49,31 @@ impl SxyprnProxy {
.split("\"}")
.collect::<Vec<&str>>()[0]
.replace("\\", "");
//println!("src: {}",data_string);
// println!("src: {}", data_string);
let mut tmp = data_string
.split("/")
.map(|s| s.to_string())
.collect::<Vec<String>>();
//println!("tmp: {:?}",tmp);
// println!("tmp: {:?}", tmp);
tmp[1] = format!(
"{}8/{}",
tmp[1],
boo(ssut51(tmp[6].as_str()), ssut51(tmp[7].as_str()))
);
//println!("tmp[1]: {:?}",tmp[1]);
// println!("tmp[1]: {:?}", tmp[1]);
//preda
tmp[5] = format!(
"{}",
tmp[5].parse::<u32>().unwrap() - ssut51(tmp[6].as_str()) - ssut51(tmp[7].as_str())
);
//println!("tmp: {:?}",tmp);
// println!("tmp: {:?}", tmp);
let sxyprn_video_url = format!("https://sxyprn.com{}", tmp.join("/"));
let response = requester.get_raw(&sxyprn_video_url).await;
match response {
Ok(resp) => {
return format!(
"https:{}",
resp.headers()
.get("Location")
.unwrap()
.to_str()
.unwrap_or("")
.to_string()
);
}
Err(e) => {
println!("Error fetching video URL: {}", e);
}
// println!("sxyprn_video_url: {}", sxyprn_video_url);
match crate::util::get_redirect_location(&sxyprn_video_url) {
Ok(Some(loc)) => {return format!("https:{}", loc)},
Ok(None) => println!("No redirect found for {}", sxyprn_video_url),
Err(e) => eprintln!("Request failed: {}", e),
}
return "".to_string();
}

113
src/proxies/thaiporntv.rs Normal file
View File

@@ -0,0 +1,113 @@
use base64::{engine::general_purpose, Engine};
use ntex::web;
use crate::util::requester::Requester;
use crate::videos::VideoFormat;
use crate::providers::report_provider_error_background;
const CHANNEL_ID: &str = "thaiporntv";
const BASE_URL: &str = "https://www.thaiporntv.com";
#[derive(Debug, Clone)]
pub struct ThaipornTvProxy {}
impl ThaipornTvProxy {
pub fn new() -> Self {
ThaipornTvProxy {}
}
fn decode_data_enc(encoded_data: &str) -> Option<Vec<VideoFormat>> {
let cleaned_data = encoded_data.replace("-", "+").replace("_", "/");
let padding = (4 - cleaned_data.len() % 4) % 4;
let padded_data = format!("{}{}", cleaned_data, "=".repeat(padding));
let decoded_bytes = match general_purpose::STANDARD.decode(&padded_data) {
Ok(bytes) => bytes,
Err(e) => {
report_provider_error_background(CHANNEL_ID, "proxy.decode_data_enc.base64", &format!("error={e}"));
return None;
}
};
if decoded_bytes.is_empty() {
report_provider_error_background(CHANNEL_ID, "proxy.decode_data_enc.empty", "decoded bytes are empty");
return None;
}
let key = decoded_bytes[0];
let decrypted_bytes: Vec<u8> = decoded_bytes[1..]
.iter()
.map(|&b| b ^ key)
.collect();
let json_str = match String::from_utf8(decrypted_bytes) {
Ok(s) => s,
Err(e) => {
report_provider_error_background(CHANNEL_ID, "proxy.decode_data_enc.utf8", &format!("error={e}"));
return None;
}
};
let formats_json: serde_json::Value = match serde_json::from_str(&json_str) {
Ok(value) => value,
Err(e) => {
report_provider_error_background(CHANNEL_ID, "proxy.decode_data_enc.json_parse", &format!("error={e}"));
return None;
}
};
let Some(formats_array) = formats_json.as_array() else {
report_provider_error_background(CHANNEL_ID, "proxy.decode_data_enc.not_array", "JSON is not an array");
return None;
};
let mut formats = Vec::new();
for format_val in formats_array {
let Some(u) = format_val.get("u").and_then(|v| v.as_str()) else { continue; };
let Some(q) = format_val.get("q").and_then(|v| v.as_str()) else { continue; };
let mut format = VideoFormat::new(
u.to_string(),
q.to_string(),
"application/x-mpegURL".to_string(),
);
// Add referer to the format
format.add_http_header("Referer".to_string(), BASE_URL.to_string());
formats.push(format);
}
Some(formats)
}
pub async fn get_video_url(
&self,
url_path: String, // Expects something like videos/2019/thai-student-have-sex-at-taxi-1658/
requester: web::types::State<Requester>,
) -> String {
let mut requester = requester.get_ref().clone();
let video_detail_url = format!("{}/{}", BASE_URL, url_path.trim_start_matches('/'));
let html = match requester.get(&video_detail_url, None).await {
Ok(text) => text,
Err(e) => {
report_provider_error_background(CHANNEL_ID, "proxy.get_video_url.fetch_html", &format!("url={}; error={}", video_detail_url, e));
return String::new();
}
};
let document = scraper::Html::parse_document(&html);
let video_element_selector = scraper::Selector::parse("video[data-enc]").unwrap();
if let Some(video_element) = document.select(&video_element_selector).next() {
if let Some(data_enc) = video_element.value().attr("data-enc") {
if let Some(formats) = Self::decode_data_enc(data_enc) {
// Return the URL of the best quality format, or the first one if quality is not a factor.
// For m3u8, we typically just return the m3u8 playlist URL.
if let Some(format) = formats.first() {
return format.url.clone();
}
}
}
}
String::new()
}
}

250
src/proxies/thepornbunny.rs Normal file
View File

@@ -0,0 +1,250 @@
use aes::Aes256;
use base64::{Engine as _, engine::general_purpose::STANDARD as B64};
use cbc::cipher::{BlockDecryptMut, KeyIvInit, block_padding::Pkcs7};
use hmac::Hmac;
use ntex::web;
use pbkdf2::pbkdf2;
use sha2::Sha512;
use crate::util::requester::Requester;
const BASE_URL: &str = "https://www.thepornbunny.com";
const OKRU_API: &str = "https://api.ok.ru/fb.do?application_key=CBAFJIICABABABABA\
&fields=video.url_tiny%2Cvideo.url_low%2Cvideo.url_high%2Cvideo.url_medium\
%2Cvideo.url_quadhd%2Cvideo.url_mobile%2Cvideo.url_ultrahd%2Cvideo.url_fullhd%2C\
&method=video.get&format=json&session_key=";
pub async fn redirect_to_media(
req: web::HttpRequest,
requester: web::types::State<Requester>,
) -> impl web::Responder {
let slug = req
.match_info()
.query("slug")
.trim_matches('/')
.to_string();
if slug.is_empty() {
return web::HttpResponse::NotFound().finish();
}
if req.method() == ntex::http::Method::HEAD {
return web::HttpResponse::Ok().finish();
}
let page_url = format!("{BASE_URL}/video/{slug}/");
let mut client = requester.get_ref().clone();
let html = match client.get(&page_url, None).await {
Ok(h) => h,
Err(_) => return web::HttpResponse::BadGateway().finish(),
};
let (enc_data, key, rnd) = match extract_generate_mp4_args(&html) {
Some(args) => args,
None => {
// Fallback: try legacy get_stream approach
return match extract_and_resolve_stream(&html).await {
Some(url) => web::HttpResponse::Found().header("Location", url).finish(),
None => web::HttpResponse::NotFound().finish(),
};
}
};
let session_key = match decrypt_session_key(&enc_data, &key) {
Some(k) => k,
None => return web::HttpResponse::BadGateway().finish(),
};
let api_url = format!("{OKRU_API}{session_key}&vids={rnd}");
let json = match client.get(&api_url, None).await {
Ok(j) => j,
Err(_) => return web::HttpResponse::BadGateway().finish(),
};
match extract_best_okru_url(&json) {
Some(url) => web::HttpResponse::Found().header("Location", url).finish(),
None => web::HttpResponse::BadGateway().finish(),
}
}
fn extract_generate_mp4_args(html: &str) -> Option<(String, String, String)> {
let call_start = html.find("generate_mp4(")?;
let after = &html[call_start + "generate_mp4(".len()..];
let close = after.find(')')?;
let args_str = &after[..close];
let mut args = Vec::new();
let mut in_quote = false;
let mut quote_char = ' ';
let mut current = String::new();
let mut escape = false;
for ch in args_str.chars() {
if escape {
current.push(ch);
escape = false;
continue;
}
if ch == '\\' {
escape = true;
continue;
}
if in_quote {
if ch == quote_char {
in_quote = false;
args.push(current.clone());
current.clear();
} else {
current.push(ch);
}
} else if ch == '\'' || ch == '"' {
in_quote = true;
quote_char = ch;
}
}
if args.len() < 3 {
return None;
}
Some((args[0].clone(), args[1].clone(), args[2].clone()))
}
fn decrypt_session_key(enc_data_b64: &str, key: &str) -> Option<String> {
let decoded = B64.decode(enc_data_b64).ok()?;
let obj: serde_json::Value = serde_json::from_slice(&decoded).ok()?;
let ciphertext_b64 = obj["ciphertext"].as_str()?;
let iv_hex = obj["iv"].as_str()?;
let salt_hex = obj["salt"].as_str()?;
let iterations = obj["iterations"].as_u64().unwrap_or(999) as u32;
let ciphertext = B64.decode(ciphertext_b64).ok()?;
let iv = hex::decode(iv_hex).ok()?;
let salt = hex::decode(salt_hex).ok()?;
if iv.len() != 16 || ciphertext.len() % 16 != 0 {
return None;
}
let mut derived_key = [0u8; 32];
pbkdf2::<Hmac<Sha512>>(key.as_bytes(), &salt, iterations, &mut derived_key).ok()?;
let iv_arr: [u8; 16] = iv.try_into().ok()?;
let key_arr: [u8; 32] = derived_key;
type Aes256CbcDec = cbc::Decryptor<Aes256>;
let mut buf = ciphertext.to_vec();
let plaintext = Aes256CbcDec::new(&key_arr.into(), &iv_arr.into())
.decrypt_padded_mut::<Pkcs7>(&mut buf)
.ok()?;
std::str::from_utf8(plaintext).ok().map(|s| s.to_string())
}
fn extract_best_okru_url(json: &str) -> Option<String> {
let data: serde_json::Value = serde_json::from_str(json).ok()?;
let videos = data["videos"].as_array()?;
// Pick the video object with the most quality levels (likely the main one)
let video = videos.iter().max_by_key(|v| {
v.as_object().map(|o| o.len()).unwrap_or(0)
})?;
for quality in &["url_fullhd", "url_quadhd", "url_high", "url_medium", "url_low", "url_mobile", "url_tiny"] {
if let Some(url) = video[quality].as_str() {
if !url.is_empty() {
return Some(url.to_string());
}
}
}
None
}
// Fallback for videos that still use the old get_stream mechanism
async fn extract_and_resolve_stream(html: &str) -> Option<String> {
let stream_url = extract_best_stream_url(html)?;
resolve_cdn_url(&stream_url).await
}
fn extract_best_stream_url(html: &str) -> Option<String> {
for key in &["video_alt_url2", "video_alt_url", "video_url"] {
if let Some(url) = extract_flashvar(html, key) {
return Some(url);
}
}
None
}
fn extract_flashvar(html: &str, key: &str) -> Option<String> {
let pattern = format!("{key}:");
let pos = html.find(&pattern)?;
let rest = &html[pos + pattern.len()..];
let q1 = rest.find('\'')?;
let value_start = &rest[q1 + 1..];
let q2 = value_start.find('\'')?;
let value = &value_start[..q2];
if value.starts_with("https://") {
Some(value.to_string())
} else {
None
}
}
async fn resolve_cdn_url(stream_url: &str) -> Option<String> {
let client = wreq::Client::builder()
.redirect(wreq::redirect::Policy::none())
.build()
.ok()?;
let resp = client
.get(stream_url)
.header("Referer", BASE_URL)
.send()
.await
.ok()?;
if resp.status().as_u16() != 302 {
return None;
}
resp.headers()
.get("location")
.and_then(|v| v.to_str().ok())
.map(|s| s.to_string())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parses_generate_mp4_args() {
let html = r#"generate_mp4('eyJABC', 'key123', 'vid1,vid2', '10010686');"#;
let (data, key, rnd) = extract_generate_mp4_args(html).unwrap();
assert_eq!(data, "eyJABC");
assert_eq!(key, "key123");
assert_eq!(rnd, "vid1,vid2");
}
#[test]
fn extracts_best_okru_url_fullhd_first() {
let json = r#"{"videos":[{"url_mobile":"http://mobile.example","url_fullhd":"http://fullhd.example","url_high":"http://high.example"}]}"#;
let url = extract_best_okru_url(json).unwrap();
assert_eq!(url, "http://fullhd.example");
}
#[test]
fn extracts_1080p_first() {
let html = r#"
var flashvars = {
video_url: 'https://www.thepornbunny.com/get_stream/1418-480.mp4?md5=abc&timestamp=123',
video_alt_url: 'https://www.thepornbunny.com/get_stream/1418-720.mp4?md5=abc&timestamp=123',
video_alt_url2: 'https://www.thepornbunny.com/get_stream/1418-1080.mp4?md5=abc&timestamp=123',
};
"#;
let url = extract_best_stream_url(html).expect("should extract");
assert!(url.contains("1080"), "expected 1080 url, got: {url}");
}
}

173
src/proxies/tube8.rs Normal file
View File

@@ -0,0 +1,173 @@
use ntex::web;
use crate::util::requester::Requester;
const BASE_URL: &str = "https://www.tube8.com";
#[derive(Debug, Clone)]
pub struct Tube8Proxy {}
impl Tube8Proxy {
pub fn new() -> Self {
Tube8Proxy {}
}
fn html_headers() -> Vec<(String, String)> {
vec![
(
"User-Agent".to_string(),
"Mozilla/5.0 (X11; Linux x86_64; rv:125.0) Gecko/20100101 Firefox/125.0"
.to_string(),
),
(
"Accept".to_string(),
"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8".to_string(),
),
("Accept-Language".to_string(), "en-US,en;q=0.5".to_string()),
("Referer".to_string(), format!("{BASE_URL}/")),
]
}
fn api_headers(referer: &str) -> Vec<(String, String)> {
vec![
(
"User-Agent".to_string(),
"Mozilla/5.0 (X11; Linux x86_64; rv:125.0) Gecko/20100101 Firefox/125.0"
.to_string(),
),
("Accept".to_string(), "application/json, text/javascript, */*; q=0.01".to_string()),
("Referer".to_string(), referer.to_string()),
("X-Requested-With".to_string(), "XMLHttpRequest".to_string()),
]
}
// Extract the first /media/hls/?s=... URL from a video page.
// The page embeds it as: "videoUrl":"https:\/\/www.tube8.com\/media\/hls\/?s=TOKEN"
fn extract_hls_endpoint(html: &str) -> Option<String> {
let needle = r#""format":"hls","videoUrl":""#;
let start = html.find(needle)? + needle.len();
let rest = &html[start..];
let end = rest.find('"')?;
let raw = &rest[..end];
// JSON-escaped forward slashes → real URL
Some(raw.replace(r"\/", "/"))
}
// Parse the JSON quality array returned by /media/hls/?s=...
// Returns the highest-quality HLS master playlist URL.
fn best_hls_url(json: &str) -> Option<String> {
let parsed: serde_json::Value = serde_json::from_str(json).ok()?;
let arr = parsed.as_array()?;
// Prefer highest numeric quality; fall back to defaultQuality
let mut best_quality: i64 = -1;
let mut best_url: Option<String> = None;
let mut default_url: Option<String> = None;
for entry in arr {
let url = entry
.get("videoUrl")
.and_then(|v| v.as_str())
.map(|v| v.replace(r"\/", "/"))
.filter(|v| !v.is_empty())?;
if entry
.get("defaultQuality")
.and_then(|v| v.as_bool())
.unwrap_or(false)
&& default_url.is_none()
{
default_url = Some(url.clone());
}
if let Some(q) = entry
.get("quality")
.and_then(|v| v.as_str())
.and_then(|v| v.parse::<i64>().ok())
{
if q > best_quality {
best_quality = q;
best_url = Some(url);
}
}
}
best_url.or(default_url)
}
pub async fn get_video_url(
&self,
video_id: String,
requester: web::types::State<Requester>,
) -> String {
let video_id = video_id.trim_matches('/').trim();
if video_id.is_empty() {
return String::new();
}
let page_url = format!("{BASE_URL}/porn-video/{video_id}/");
let mut req = requester.get_ref().clone();
// Step 1: fetch video page to get the signed /media/hls/ endpoint
let html = match req
.get_with_headers(&page_url, Self::html_headers(), None)
.await
{
Ok(v) => v,
Err(_) => return String::new(),
};
let hls_endpoint = match Self::extract_hls_endpoint(&html) {
Some(url) => url,
None => return String::new(),
};
// Step 2: call the signed endpoint to get quality options
let json = match req
.get_with_headers(&hls_endpoint, Self::api_headers(&page_url), None)
.await
{
Ok(v) => v,
Err(_) => return String::new(),
};
Self::best_hls_url(&json).unwrap_or_default()
}
}
#[cfg(test)]
mod tests {
use super::Tube8Proxy;
#[test]
fn extracts_hls_endpoint_from_page() {
let html = r#"
mediaDefinition: [{"format":"hls","videoUrl":"https:\/\/www.tube8.com\/media\/hls\/?s=eyJTOKEN","remote":true},
{"format":"mp4","videoUrl":"https:\/\/www.tube8.com\/media\/mp4\/?s=eyJTOKEN","remote":true}],
"#;
let url = Tube8Proxy::extract_hls_endpoint(html).expect("should extract");
assert_eq!(url, "https://www.tube8.com/media/hls/?s=eyJTOKEN");
}
#[test]
fn picks_best_hls_quality() {
let json = r#"[
{"defaultQuality":true,"format":"hls","quality":"480","videoUrl":"https://cdn.example/480/master.m3u8"},
{"defaultQuality":false,"format":"hls","quality":"720","videoUrl":"https://cdn.example/720/master.m3u8"},
{"defaultQuality":false,"format":"hls","quality":"1080","videoUrl":"https://cdn.example/1080/master.m3u8"},
{"defaultQuality":false,"format":"hls","quality":"240","videoUrl":"https://cdn.example/240/master.m3u8"}
]"#;
let url = Tube8Proxy::best_hls_url(json).expect("should parse");
assert_eq!(url, "https://cdn.example/1080/master.m3u8");
}
#[test]
fn falls_back_to_default_quality_when_no_numeric() {
let json = r#"[
{"defaultQuality":true,"format":"hls","videoUrl":"https://cdn.example/default/master.m3u8"},
{"defaultQuality":false,"format":"hls","videoUrl":"https://cdn.example/other/master.m3u8"}
]"#;
let url = Tube8Proxy::best_hls_url(json).expect("should parse");
assert_eq!(url, "https://cdn.example/default/master.m3u8");
}
}

124
src/proxies/vidara.rs Normal file
View File

@@ -0,0 +1,124 @@
use ntex::web;
use url::Url;
use serde_json::json;
use crate::util::requester::Requester;
#[derive(Debug, Clone)]
pub struct VidaraProxy {}
impl VidaraProxy {
pub fn new() -> Self {
VidaraProxy {}
}
fn normalize_detail_request(endpoint: &str) -> Option<(String, String)> {
let endpoint = endpoint.trim().trim_start_matches('/');
if endpoint.is_empty() {
return None;
}
let detail_url = if endpoint.starts_with("http://") || endpoint.starts_with("https://") {
endpoint.to_string()
} else if endpoint.starts_with("vidara.so/") || endpoint.starts_with("www.vidara.so/")
{
format!("https://{endpoint}")
} else {
format!("https://vidara.so/{endpoint}")
};
if !Self::is_allowed_detail_url(&detail_url) {
return None;
}
let parsed = Url::parse(&detail_url).ok()?;
let video_id = parsed.path_segments()?
.last()
.map(ToOwned::to_owned)?;
Some((detail_url, video_id))
}
fn is_allowed_detail_url(url: &str) -> bool {
let Some(parsed) = Url::parse(url).ok() else {
return false;
};
if parsed.scheme() != "https" {
return false;
}
let Some(host) = parsed.host_str() else {
return false;
};
(host == "vidara.so" || host == "www.vidara.so")
&& (parsed.path().starts_with("/v/")||parsed.path().starts_with("/e/"))
}
pub async fn get_video_url(
&self,
url: String,
requester: web::types::State<Requester>,
) -> String {
let mut requester = requester.get_ref().clone();
let Some((detail_url, video_id)) = Self::normalize_detail_request(&url) else {
println!("VidaraProxy: Invalid detail URL: {url}");
return String::new();
};
let body = json!({
"filecode": video_id,
"device": "web"
});
// println!("VidaraProxy: Requesting streaming URL for {detail_url} with body: {body}");
let response = requester
.post_json(
"https://vidara.so/api/stream",
&body,
vec![
("Referer".to_string(), detail_url.clone())
],
)
.await;
// println!("VidaraProxy: Requested streaming URL for {detail_url}, got response: {:?}", response);
let Ok(response) = response else {
return String::new();
};
let Ok(response_text) = response.text().await else {
return String::new();
};
// println!("VidaraProxy: Response text for {detail_url}: {response_text}");
let Ok(json): Result<serde_json::Value, _> = serde_json::from_str(&response_text) else {
return String::new();
};
json["streaming_url"]
.as_str()
.map(ToOwned::to_owned)
.unwrap_or_default()
}
}
#[cfg(test)]
mod tests {
use super::VidaraProxy;
#[test]
fn normalizes_detail_request_with_full_url() {
let (url, video_id) =
VidaraProxy::normalize_detail_request("https://vidara.so/v/eJ9O4QqG1Ln2")
.expect("detail request should parse");
assert_eq!(url, "https://vidara.so/v/eJ9O4QqG1Ln2");
assert_eq!(video_id, "eJ9O4QqG1Ln2");
}
#[test]
fn normalizes_detail_request_with_path_only() {
let (url, video_id) = VidaraProxy::normalize_detail_request("video/1000/demo")
.expect("detail request should parse");
assert_eq!(url, "https://vidara.so/video/1000/demo");
assert_eq!(video_id, "1000");
}
}

187
src/proxies/vjav.rs Normal file
View File

@@ -0,0 +1,187 @@
use base64::{Engine as _, engine::general_purpose::STANDARD};
use ntex::web;
use serde::Deserialize;
use url::Url;
use crate::util::requester::Requester;
const BASE_URL: &str = "https://vjav.com";
#[derive(Debug, Clone)]
pub struct VjavProxy {}
#[derive(Debug, Deserialize, Clone, Default)]
struct VideofileEntry {
#[serde(default)]
video_url: String,
#[serde(default)]
is_default: i32,
}
impl VjavProxy {
pub fn new() -> Self {
Self {}
}
fn normalize_detail_url(endpoint: &str) -> Option<String> {
let endpoint = endpoint.trim().trim_start_matches('/');
if endpoint.is_empty() {
return None;
}
let detail_url = if endpoint.starts_with("http://") || endpoint.starts_with("https://") {
endpoint.to_string()
} else {
format!("https://{}", endpoint.trim_start_matches('/'))
};
Self::is_allowed_detail_url(&detail_url).then_some(detail_url)
}
fn is_allowed_detail_url(url: &str) -> bool {
let Some(parsed) = Url::parse(url).ok() else {
return false;
};
if parsed.scheme() != "https" {
return false;
}
let Some(host) = parsed.host_str() else {
return false;
};
if host != "vjav.com" && host != "www.vjav.com" {
return false;
}
let Some(video_id) = Self::extract_video_id(parsed.path()) else {
return false;
};
!video_id.is_empty()
}
fn extract_video_id(path: &str) -> Option<String> {
let mut segments = path.split('/').filter(|segment| !segment.is_empty());
let first = segments.next()?;
let second = segments.next()?;
if first != "videos" {
return None;
}
second
.chars()
.all(|value| value.is_ascii_digit())
.then_some(second.to_string())
}
fn decode_obfuscated_base64(value: &str) -> String {
value
.chars()
.map(|character| match character {
'А' => 'A',
'В' => 'B',
'Е' => 'E',
'К' => 'K',
'М' => 'M',
'Н' => 'H',
'О' => 'O',
'Р' => 'P',
'С' => 'C',
'Т' => 'T',
'Х' => 'X',
'а' => 'a',
'е' => 'e',
'о' => 'o',
'р' => 'p',
'с' => 'c',
'у' => 'y',
'х' => 'x',
'к' => 'k',
'м' => 'm',
'і' => 'i',
'І' => 'I',
_ => character,
})
.collect()
}
fn decode_base64ish(value: &str) -> Option<String> {
let mut normalized = value.trim().replace('~', "=");
while normalized.len() % 4 != 0 {
normalized.push('=');
}
let bytes = STANDARD.decode(normalized).ok()?;
String::from_utf8(bytes).ok()
}
fn decode_video_url(value: &str) -> Option<String> {
let normalized = Self::decode_obfuscated_base64(value);
if normalized.contains(',') {
let mut parts = normalized.split(',');
let path_part = parts.next()?;
let query_part = parts.next()?;
let path = Self::decode_base64ish(path_part)?;
let query = Self::decode_base64ish(query_part)?;
let separator = if path.contains('?') { "&" } else { "?" };
return Some(format!("{BASE_URL}{path}{separator}{query}&f=video.m3u8"));
}
let decoded = Self::decode_base64ish(&normalized)?;
if decoded.starts_with("http://") || decoded.starts_with("https://") {
return Some(decoded);
}
if decoded.starts_with('/') {
return Some(format!("{BASE_URL}{decoded}"));
}
None
}
}
impl crate::proxies::Proxy for VjavProxy {
async fn get_video_url(&self, url: String, requester: web::types::State<Requester>) -> String {
let Some(detail_url) = Self::normalize_detail_url(&url) else {
return String::new();
};
let Some(video_id) = Url::parse(&detail_url)
.ok()
.and_then(|value| Self::extract_video_id(value.path()))
else {
return String::new();
};
let api_url = format!("{BASE_URL}/api/videofile.php?video_id={video_id}&lifetime=8640000");
let mut requester = requester.get_ref().clone();
let text = requester.get(&api_url, None).await.unwrap_or_default();
if text.is_empty() {
return String::new();
}
let Ok(entries) = serde_json::from_str::<Vec<VideofileEntry>>(&text) else {
return String::new();
};
let mut fallback = String::new();
for entry in entries {
if entry.video_url.trim().is_empty() {
continue;
}
let Some(decoded) = Self::decode_video_url(&entry.video_url) else {
continue;
};
if entry.is_default == 1 {
return decoded;
}
if fallback.is_empty() {
fallback = decoded;
}
}
fallback
}
}

View File

@@ -1,21 +1,46 @@
use ntex::web::{self, HttpRequest};
use crate::proxies::archivebate::ArchivebateProxy;
use crate::proxies::clapdat::ClapdatProxy;
use crate::proxies::doodstream::DoodstreamProxy;
use crate::proxies::heavyfetish::HeavyfetishProxy;
use crate::proxies::hqporner::HqpornerProxy;
use crate::proxies::javtiful::JavtifulProxy;
use crate::proxies::pimpbunny::PimpbunnyProxy;
use crate::proxies::porndish::PorndishProxy;
use crate::proxies::pornhd3x::Pornhd3xProxy;
use crate::proxies::spankbang::SpankbangProxy;
use crate::proxies::supjav::SupjavProxy;
use crate::proxies::sxyprn::SxyprnProxy;
use crate::proxies::tube8::Tube8Proxy;
use crate::proxies::vjav::VjavProxy;
use crate::proxies::vidara::VidaraProxy;
use crate::proxies::lulustream::LulustreamProxy;
use crate::proxies::thaiporntv::ThaipornTvProxy;
use crate::proxies::*;
use crate::util::requester::Requester;
pub fn config(cfg: &mut web::ServiceConfig) {
cfg.service(
web::resource("/archivebate/{endpoint}*")
.route(web::post().to(proxy2redirect))
.route(web::get().to(proxy2redirect)),
)
.service(
web::resource("/clapdat/{endpoint}*")
.route(web::post().to(proxy2redirect))
.route(web::get().to(proxy2redirect)),
)
.service(
web::resource("/doodstream/{endpoint}*")
.route(web::post().to(proxy2redirect))
.route(web::get().to(proxy2redirect)),
)
.service(
web::resource("/lulustream/{endpoint}*")
.route(web::post().to(proxy2redirect))
.route(web::get().to(proxy2redirect)),
)
.service(
web::resource("/sxyprn/{endpoint}*")
.route(web::post().to(proxy2redirect))
@@ -36,13 +61,33 @@ pub fn config(cfg: &mut web::ServiceConfig) {
.route(web::post().to(proxy2redirect))
.route(web::get().to(proxy2redirect)),
)
.service(
web::resource("/hqporner/{endpoint}*")
.route(web::post().to(proxy2redirect))
.route(web::get().to(proxy2redirect)),
)
.service(
web::resource("/heavyfetish/{endpoint}*")
.route(web::post().to(proxy2redirect))
.route(web::get().to(proxy2redirect)),
)
.service(
web::resource("/vjav/{endpoint}*")
.route(web::post().to(proxy2redirect))
.route(web::get().to(proxy2redirect)),
)
.service(
web::resource("/pornhd3x/{endpoint}*")
.route(web::post().to(proxy2redirect))
.route(web::get().to(proxy2redirect)),
)
.service(
web::resource("/shooshtime/{endpoint}*")
web::resource("/vidara/{endpoint}*")
.route(web::post().to(proxy2redirect))
.route(web::get().to(proxy2redirect)),
)
.service(
web::resource("/shooshtime-media/{endpoint}*")
.route(web::post().to(crate::proxies::shooshtime::serve_media))
.route(web::get().to(crate::proxies::shooshtime::serve_media)),
)
@@ -61,11 +106,21 @@ pub fn config(cfg: &mut web::ServiceConfig) {
.route(web::post().to(crate::proxies::noodlemagazine::get_image))
.route(web::get().to(crate::proxies::noodlemagazine::get_image)),
)
.service(
web::resource("/fikfap-thumb/{endpoint}*")
.route(web::post().to(crate::proxies::fikfapthumb::get_image))
.route(web::get().to(crate::proxies::fikfapthumb::get_image)),
)
.service(
web::resource("/hanime-cdn/{endpoint}*")
.route(web::post().to(crate::proxies::hanimecdn::get_image))
.route(web::get().to(crate::proxies::hanimecdn::get_image)),
)
.service(
web::resource("/hanime-thumb/{endpoint}*")
.route(web::post().to(crate::proxies::hanimethumb::get_image))
.route(web::get().to(crate::proxies::hanimethumb::get_image)),
)
.service(
web::resource("/hqporner-thumb/{endpoint}*")
.route(web::post().to(crate::proxies::hqpornerthumb::get_image))
@@ -77,9 +132,40 @@ pub fn config(cfg: &mut web::ServiceConfig) {
.route(web::get().to(crate::proxies::porndishthumb::get_image)),
);
cfg.service(
web::resource("/pimpbunny-thumb/{endpoint}*")
.route(web::post().to(crate::proxies::pimpbunnythumb::get_image))
.route(web::get().to(crate::proxies::pimpbunnythumb::get_image)),
web::resource("/proxy/pornhub-thumb/{endpoint}*")
.route(web::post().to(crate::proxies::pornhubthumb::get_image))
.route(web::get().to(crate::proxies::pornhubthumb::get_image)),
);
cfg.service(
web::resource("/thaiporntv/{endpoint}*")
.route(web::post().to(proxy2redirect))
.route(web::get().to(proxy2redirect)),
);
cfg.service(
web::resource("/tube8/{endpoint}*")
.route(web::post().to(proxy2redirect))
.route(web::get().to(proxy2redirect)),
);
cfg.service(
web::resource("/supjav/{endpoint}*")
.route(web::post().to(proxy2redirect))
.route(web::get().to(proxy2redirect))
.route(web::head().to(proxy2redirect)),
);
cfg.service(
web::resource("/jable/{slug}*")
.route(web::get().to(crate::proxies::jable::redirect_to_page))
.route(web::head().to(crate::proxies::jable::redirect_to_page)),
);
cfg.service(
web::resource("/thepornbunny/{slug}*")
.route(web::get().to(crate::proxies::thepornbunny::redirect_to_media))
.route(web::head().to(crate::proxies::thepornbunny::redirect_to_media)),
);
cfg.service(
web::resource("/aps/{endpoint}*")
.route(web::post().to(crate::proxies::allpornstream::serve))
.route(web::get().to(crate::proxies::allpornstream::serve)),
);
}
@@ -100,13 +186,23 @@ async fn proxy2redirect(
fn get_proxy(proxy: &str) -> Option<AnyProxy> {
match proxy {
"archivebate" => Some(AnyProxy::Archivebate(ArchivebateProxy::new())),
"clapdat" => Some(AnyProxy::Clapdat(ClapdatProxy::new())),
"doodstream" => Some(AnyProxy::Doodstream(DoodstreamProxy::new())),
"sxyprn" => Some(AnyProxy::Sxyprn(SxyprnProxy::new())),
"javtiful" => Some(AnyProxy::Javtiful(JavtifulProxy::new())),
"hqporner" => Some(AnyProxy::Hqporner(HqpornerProxy::new())),
"heavyfetish" => Some(AnyProxy::Heavyfetish(HeavyfetishProxy::new())),
"vjav" => Some(AnyProxy::Vjav(VjavProxy::new())),
"pornhd3x" => Some(AnyProxy::Pornhd3x(Pornhd3xProxy::new())),
"vidara" => Some(AnyProxy::Vidara(VidaraProxy::new())),
"pimpbunny" => Some(AnyProxy::Pimpbunny(PimpbunnyProxy::new())),
"porndish" => Some(AnyProxy::Porndish(PorndishProxy::new())),
"spankbang" => Some(AnyProxy::Spankbang(SpankbangProxy::new())),
"lulustream" => Some(AnyProxy::Lulustream(LulustreamProxy::new())),
"thaiporntv" => Some(AnyProxy::ThaipornTv(ThaipornTvProxy::new())),
"tube8" => Some(AnyProxy::Tube8(Tube8Proxy::new())),
"supjav" => Some(AnyProxy::Supjav(SupjavProxy::new())),
_ => None,
}
}

View File

@@ -70,6 +70,13 @@ pub struct Subscription {
pub status: String, //"incomplete"
}
#[derive(serde::Serialize)]
pub struct CdnReferrer {
pub hostContains: String,
pub referer: String,
pub origin: String,
}
impl Subscription {
pub fn new() -> Self {
Subscription {
@@ -93,6 +100,7 @@ pub struct Status {
pub nsfw: bool,
pub categories: Vec<String>,
pub options: Vec<Options>,
pub cdnReferrers: Vec<CdnReferrer>,
pub filtersFooter: String, //"Help us improve our algorithms by selecting the categories that best describe you. These will not necessarily affect your search results, but will help us tailor the app to your interests."
}
impl Status {
@@ -113,6 +121,11 @@ impl Status {
nsfw: true,
categories: vec![],
options: vec![],
cdnReferrers: vec![CdnReferrer {
hostContains: "phncdn".to_string(),
referer: "https://www.pornhub.com/".to_string(),
origin: "https://www.pornhub.com".to_string(),
}],
filtersFooter:
"Help us improve our algorithms by giving us your feedback on the hottub discord."
.to_string(),
@@ -162,5 +175,6 @@ pub struct StatusResponse {
pub nsfw: bool,
pub categories: Vec<String>,
pub options: Vec<Options>,
pub cdnReferrers: Vec<CdnReferrer>,
pub filtersFooter: String,
}

View File

@@ -82,6 +82,7 @@ pub struct UploaderVideoRef {
}
impl UploaderVideoRef {
#[allow(dead_code)]
pub fn from_video_item(item: &VideoItem, uploader_name: &str, uploader_id: &str) -> Self {
Self {
id: item.id.clone(),
@@ -117,6 +118,7 @@ pub struct UploaderLayoutRow {
}
impl UploaderLayoutRow {
#[allow(dead_code)]
pub fn horizontal(title: Option<String>, video_ids: Vec<String>) -> Self {
Self {
rowType: UploaderLayoutRowType::Horizontal,
@@ -125,6 +127,7 @@ impl UploaderLayoutRow {
}
}
#[allow(dead_code)]
pub fn videos(title: Option<String>) -> Self {
Self {
rowType: UploaderLayoutRowType::Videos,
@@ -150,12 +153,14 @@ pub fn normalize_optional_string(value: Option<String>) -> Option<String> {
})
}
#[allow(dead_code)]
pub fn iso_timestamp_from_unix(value: Option<u64>) -> Option<String> {
let timestamp = value?;
let dt = Utc.timestamp_opt(timestamp as i64, 0).single()?;
Some(dt.to_rfc3339_opts(SecondsFormat::Millis, true))
}
#[allow(dead_code)]
fn normalize_rating(value: f32) -> u32 {
value.clamp(0.0, 100.0).round() as u32
}

41
src/util/browser.rs Normal file
View File

@@ -0,0 +1,41 @@
use crate::util::{playwright, webdriver};
use std::time::Duration;
use tokio::sync::mpsc;
const DEFAULT_TIMEOUT: Duration = Duration::from_secs(30);
/// Races chromiumoxide (webdriver) and Playwright against each other.
/// Navigates to `url` and returns the `src` attribute of the first element
/// matched by `xpath` — whichever backend resolves it first wins.
pub async fn wait_for_src(url: &str, xpath: &str) -> Option<String> {
wait_for_attribute(url, xpath, "src", DEFAULT_TIMEOUT).await
}
/// Like [`wait_for_src`] but lets the caller choose the attribute name and timeout.
pub async fn wait_for_attribute(
url: &str,
xpath: &str,
attribute: &str,
wait_timeout: Duration,
) -> Option<String> {
// Buffer of 1: the first send wins; the losing task's send is silently
// dropped when the receiver is gone.
let (tx, mut rx) = mpsc::channel::<String>(1);
let (u, x, a) = (url.to_string(), xpath.to_string(), attribute.to_string());
let tx1 = tx.clone();
tokio::spawn(async move {
if let Some(src) = webdriver::wait_for_attribute(&u, &x, &a, wait_timeout).await {
let _ = tx1.send(src).await;
}
});
let (u, x, a) = (url.to_string(), xpath.to_string(), attribute.to_string());
tokio::spawn(async move {
if let Some(src) = playwright::wait_for_attribute(&u, &x, &a, wait_timeout).await {
let _ = tx.send(src).await;
}
});
rx.recv().await
}

View File

@@ -46,6 +46,7 @@ impl VideoCache {
}
}
#[allow(dead_code)]
pub fn entries(&self) -> Option<Vec<(String, (SystemTime, Vec<VideoItem>))>> {
if let Ok(cache) = self.cache.lock() {
// Return a cloned vector of the cache entries
@@ -54,6 +55,7 @@ impl VideoCache {
None
}
#[allow(dead_code)]
pub async fn check(&self) -> Result<(), Box<dyn std::error::Error>> {
let iter = match self.entries() {
Some(iter) => iter,

334
src/util/dean_edwards.rs Normal file
View File

@@ -0,0 +1,334 @@
/// Dean Edwards p,a,c,k,e,d unpacker.
///
/// Mirrors the original JS decoder:
/// while(c--) if(k[c]) p = p.replace(/\b{c.toString(a)}\b/g, k[c]);
///
/// Usage:
/// let source = r#"eval(function(p,a,c,k,e,d){...}('...',36,N,'w0|w1|...'.split('|'),0,{}))"#;
/// let plain = unpack(source)?;
use std::fmt;
// ── Error type ────────────────────────────────────────────────────────────────
#[derive(Debug)]
pub enum UnpackError {
NotPacked,
MalformedArgs(String),
BadBase(u32),
}
impl fmt::Display for UnpackError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::NotPacked => write!(f, "input does not look like a packed script"),
Self::MalformedArgs(s) => write!(f, "could not parse packed arguments: {s}"),
Self::BadBase(b) => write!(f, "unsupported base {b} (supported: 262)"),
}
}
}
impl std::error::Error for UnpackError {}
// ── Public entry point ────────────────────────────────────────────────────────
/// Detect, parse, and unpack a Dean Edwards packed script.
/// Returns the deobfuscated source on success.
pub fn unpack(input: &str) -> Result<String, UnpackError> {
let args = extract_args(input.trim())?;
decode(args)
}
// ── Argument extraction ───────────────────────────────────────────────────────
struct PackedArgs {
payload: String, // p
base: u32, // a
words: Vec<String>, // k (already split)
}
/// Find the argument list of the outer `function(p,a,c,k,e,d){...}(...)` call
/// and parse p, a, c, k from it.
fn extract_args(input: &str) -> Result<PackedArgs, UnpackError> {
// Locate the opening of the argument tuple: the '(' that directly follows
// the closing '}' of the function body.
let body_end = input
.find("}('") // most common: }('payload',…
.or_else(|| input.find("}(\""))
.ok_or(UnpackError::NotPacked)?;
// args_start points at '(' skip it to reach the opening quote of the payload.
let args_start = body_end + 1;
let args_str = input[args_start..].trim_start_matches('(');
// Pull the payload string (first argument).
let (payload, after_payload) = parse_js_string(args_str)
.ok_or_else(|| UnpackError::MalformedArgs("could not read payload string".into()))?;
// Expect ',base,count,'
let rest = after_payload
.trim_start_matches(',');
let (base_str, rest) = rest
.split_once(',')
.ok_or_else(|| UnpackError::MalformedArgs("missing base".into()))?;
let base: u32 = base_str
.trim()
.parse()
.map_err(|_| UnpackError::MalformedArgs(format!("bad base: {base_str}")))?;
let (count_str, rest) = rest
.split_once(',')
.ok_or_else(|| UnpackError::MalformedArgs("missing count".into()))?;
let count: usize = count_str
.trim()
.parse()
.map_err(|_| UnpackError::MalformedArgs(format!("bad count: {count_str}")))?;
// Parse the dictionary k. Two common forms:
// 'w0|w1|w2'.split('|')
// ["w0","w1","w2"]
let words = parse_dictionary(rest.trim(), count)
.ok_or_else(|| UnpackError::MalformedArgs("could not parse dictionary".into()))?;
Ok(PackedArgs { payload, base, words })
}
// ── Dictionary parser ─────────────────────────────────────────────────────────
fn parse_dictionary(s: &str, count: usize) -> Option<Vec<String>> {
if s.starts_with('\'') || s.starts_with('"') {
// 'w0|w1|…'.split('|') separator can be any single char
let (joined, rest) = parse_js_string(s)?;
// find .split('<sep>')
let sep_start = rest.find(".split(")?;
let after_split = &rest[sep_start + 7..]; // skip `.split(`
let (sep, _) = parse_js_string(after_split.trim())?;
let sep_char = if sep.is_empty() { '|' } else { sep.chars().next().unwrap() };
let words: Vec<String> = joined.split(sep_char).map(str::to_owned).collect();
Some(pad_to(words, count))
} else if s.starts_with('[') {
// ["w0","w1",…]
let end = s.find(']')?;
let inner = &s[1..end];
let words = parse_array_literal(inner);
Some(pad_to(words, count))
} else {
None
}
}
/// Parse a JS array literal (no nesting needed for the k array).
fn parse_array_literal(s: &str) -> Vec<String> {
let mut words = Vec::new();
let mut rest = s.trim();
loop {
rest = rest.trim_start_matches(',').trim();
if rest.is_empty() { break; }
if rest.starts_with('\'') || rest.starts_with('"') {
if let Some((w, after)) = parse_js_string(rest) {
words.push(w);
rest = after.trim();
} else {
break;
}
} else {
// empty slot → push empty string
if let Some(pos) = rest.find(',') {
words.push(String::new());
rest = &rest[pos..];
} else {
break;
}
}
}
words
}
fn pad_to(mut v: Vec<String>, n: usize) -> Vec<String> {
v.resize(n, String::new());
v
}
// ── JS string parser ──────────────────────────────────────────────────────────
/// Parse a single-quoted or double-quoted JS string literal at the start of `s`.
/// Returns (unescaped content, remainder after closing quote).
fn parse_js_string(s: &str) -> Option<(String, &str)> {
let mut chars = s.char_indices();
let (_, quote) = chars.next()?;
if quote != '\'' && quote != '"' { return None; }
let mut result = String::new();
let mut escaped = false;
for (i, ch) in chars {
if escaped {
match ch {
'n' => result.push('\n'),
'r' => result.push('\r'),
't' => result.push('\t'),
'\\' => result.push('\\'),
'\'' => result.push('\''),
'"' => result.push('"'),
_ => { result.push('\\'); result.push(ch); }
}
escaped = false;
} else if ch == '\\' {
escaped = true;
} else if ch == quote {
return Some((result, &s[i + ch.len_utf8()..]));
} else {
result.push(ch);
}
}
None // unclosed string
}
// ── Number → string for arbitrary base ───────────────────────────────────────
/// JavaScript's `Number.prototype.toString(radix)` for bases 262.
/// Digits: 0-9, then a-z (10-35), then A-Z (36-61).
fn num_to_base_str(mut n: usize, base: u32) -> Result<String, UnpackError> {
if base < 2 || base > 62 {
return Err(UnpackError::BadBase(base));
}
if n == 0 { return Ok("0".to_owned()); }
const DIGITS: &[u8] = b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
let base = base as usize;
let mut buf = Vec::new();
while n > 0 {
buf.push(DIGITS[n % base] as char);
n /= base;
}
buf.reverse();
Ok(buf.into_iter().collect())
}
// ── Core decode ───────────────────────────────────────────────────────────────
/// JS regex `\b` word-boundary replacement.
/// We use a hand-rolled matcher so we don't need an external crate.
fn replace_word_boundary(haystack: &str, needle: &str, replacement: &str) -> String {
if needle.is_empty() { return haystack.to_owned(); }
let h: Vec<char> = haystack.chars().collect();
let n: Vec<char> = needle.chars().collect();
let nlen = n.len();
let hlen = h.len();
let is_word = |c: char| c.is_ascii_alphanumeric() || c == '_';
let mut out = String::with_capacity(haystack.len());
let mut i = 0;
while i <= hlen.saturating_sub(nlen) {
// Check if h[i..i+nlen] == needle
if h[i..i + nlen] == n[..] {
// Word-boundary checks
let left_ok = i == 0 || !is_word(h[i - 1]);
let right_ok = i + nlen == hlen || !is_word(h[i + nlen]);
if left_ok && right_ok {
out.push_str(replacement);
i += nlen;
continue;
}
}
out.push(h[i]);
i += 1;
}
// Append whatever is left
for ch in &h[i..] { out.push(*ch); }
out
}
fn decode(args: PackedArgs) -> Result<String, UnpackError> {
let PackedArgs { mut payload, base, words } = args;
// Validate base once up front.
if base < 2 || base > 62 {
return Err(UnpackError::BadBase(base));
}
// Mirror the JS: for c from (words.len()-1) down to 0
let count = words.len();
for c in (0..count).rev() {
if words[c].is_empty() { continue; }
let key = num_to_base_str(c, base)?;
payload = replace_word_boundary(&payload, &key, &words[c]);
}
Ok(payload)
}
// ── Tests ─────────────────────────────────────────────────────────────────────
#[cfg(test)]
mod tests {
use super::*;
/// Minimal self-contained packed snippet (base 36, 3 words).
/// Original source: `var hello = world + foo;`
/// Packed manually for the test:
/// payload = "var 1 = 2 + 0;"
/// base = 36
/// count = 3
/// words = ["foo", "hello", "world"] (indices 0,1,2)
#[test]
fn test_basic_unpack() {
// Build a fake packed string without the eval wrapper for direct testing.
let args = PackedArgs {
payload: "var 1 = 2 + 0;".to_owned(),
base: 36,
words: vec!["foo".to_owned(), "hello".to_owned(), "world".to_owned()],
};
let result = decode(args).unwrap();
assert_eq!(result, "var hello = world + foo;");
}
#[test]
fn test_word_boundary() {
// "foo10bar" should NOT replace "10", but " 10 " should.
let result = replace_word_boundary("foo10bar baz 10 qux10", "10", "X");
assert_eq!(result, "foo10bar baz X qux10");
}
#[test]
fn test_num_to_base_str() {
assert_eq!(num_to_base_str(0, 36).unwrap(), "0");
assert_eq!(num_to_base_str(10, 36).unwrap(), "a");
assert_eq!(num_to_base_str(35, 36).unwrap(), "z");
assert_eq!(num_to_base_str(36, 36).unwrap(), "10");
assert_eq!(num_to_base_str(255, 16).unwrap(), "ff");
assert_eq!(num_to_base_str(7, 2).unwrap(), "111");
}
#[test]
fn test_parse_split_dictionary() {
let input = r#"'foo|bar|baz'.split('|'),0,{})"#;
let words = parse_dictionary(input, 3).unwrap();
assert_eq!(words, vec!["foo", "bar", "baz"]);
}
#[test]
fn test_parse_array_dictionary() {
let input = r#"["alpha","","gamma"],0,{})"#;
let words = parse_dictionary(input, 3).unwrap();
assert_eq!(words[0], "alpha");
assert_eq!(words[1], "");
assert_eq!(words[2], "gamma");
}
/// Full round-trip with the eval wrapper (base 10, tiny example).
#[test]
fn test_full_eval_wrapper() {
// payload: "0 1 2" words: ["hello","world","rust"] base:10 count:3
let packed = r#"eval(function(p,a,c,k,e,d){while(c--)if(k[c])p=p.replace(new RegExp('\\b'+c.toString(a)+'\\b','g'),k[c]);return p}('0 1 2',10,3,'hello|world|rust'.split('|'),0,{}))"#;
let result = unpack(packed).unwrap();
assert_eq!(result, "hello world rust");
}
}

View File

@@ -10,6 +10,7 @@ use std::time::{SystemTime, UNIX_EPOCH};
static ERROR_CACHE: Lazy<DashMap<String, u64>> = Lazy::new(DashMap::new);
// const COOLDOWN_SECONDS: u64 = 3600; // 1 Hour cooldown
#[allow(dead_code)]
pub fn format_error_chain(err: &dyn Error) -> String {
let mut chain_str = String::new();
let mut current_err: Option<&dyn Error> = Some(err);
@@ -51,11 +52,6 @@ pub async fn send_discord_error_report(
ERROR_CACHE.insert(error_signature, now);
// ---------------------------
let webhook_url = match std::env::var("DISCORD_WEBHOOK") {
Ok(url) => url,
Err(_) => return,
};
const MAX_FIELD: usize = 1024;
let truncate = |s: &str| {
if s.len() > MAX_FIELD {
@@ -102,6 +98,13 @@ pub async fn send_discord_error_report(
}]
});
let webhook_url = match std::env::var("DISCORD_WEBHOOK") {
Ok(url) => url,
Err(_) => {
// println!("{:#?}", &payload);
return;
}
};
let mut requester = requester::Requester::new();
let _ = requester.post_json(&webhook_url, &payload, vec![]).await;
}

View File

@@ -1,10 +1,12 @@
use std::{collections::HashMap, env};
use std::{collections::HashMap, env, sync::Arc};
use serde_json::Value;
use serde_json::json;
use tokio::sync::Mutex;
use wreq::{Client, Proxy};
use wreq_util::Emulation;
#[derive(serde::Serialize, serde::Deserialize, Debug)]
#[derive(serde::Serialize, serde::Deserialize, Debug, Clone)]
pub struct FlareSolverrRequest {
pub cmd: String,
pub url: String,
@@ -50,11 +52,22 @@ pub struct FlareSolverrResponse {
pub version: String,
}
#[derive(Clone)]
pub struct Flaresolverr {
url: String,
proxy: bool,
}
#[derive(Debug, Default)]
struct SessionState {
ready_session: Option<String>,
}
fn global_session_state() -> &'static Arc<Mutex<SessionState>> {
static STATE: std::sync::OnceLock<Arc<Mutex<SessionState>>> = std::sync::OnceLock::new();
STATE.get_or_init(|| Arc::new(Mutex::new(SessionState::default())))
}
impl Flaresolverr {
pub fn new(url: String) -> Self {
Self { url, proxy: false }
@@ -64,20 +77,16 @@ impl Flaresolverr {
self.proxy = proxy;
}
pub async fn solve(
async fn post_payload(
&self,
request: FlareSolverrRequest,
) -> Result<FlareSolverrResponse, Box<dyn std::error::Error>> {
payload: Value,
) -> Result<Value, Box<dyn std::error::Error + Send + Sync>> {
let client = Client::builder().emulation(Emulation::Firefox136).build()?;
let mut req = client
.post(&self.url)
.header("Content-Type", "application/json")
.json(&json!({
"cmd": request.cmd,
"url": request.url,
"maxTimeout": request.maxTimeout,
}));
.json(&payload);
if self.proxy {
if let Ok(proxy_url) = env::var("BURP_URL") {
@@ -93,8 +102,120 @@ impl Flaresolverr {
}
let response = req.send().await?;
let body = response.json::<FlareSolverrResponse>().await?;
let body = response.json::<Value>().await?;
if body
.get("status")
.and_then(Value::as_str)
.is_some_and(|status| status.eq_ignore_ascii_case("error"))
{
let message = body
.get("message")
.and_then(Value::as_str)
.unwrap_or("FlareSolverr returned status=error");
return Err(message.to_string().into());
}
Ok(body)
}
async fn create_session(&self) -> Result<String, Box<dyn std::error::Error + Send + Sync>> {
let body = self
.post_payload(json!({ "cmd": "sessions.create" }))
.await?;
let session = body
.get("session")
.and_then(Value::as_str)
.ok_or("sessions.create response missing `session`")?;
Ok(session.to_string())
}
async fn destroy_session(
&self,
session: &str,
) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let _ = self
.post_payload(json!({
"cmd": "sessions.destroy",
"session": session,
}))
.await?;
Ok(())
}
async fn solve_with_session(
&self,
request: FlareSolverrRequest,
session: &str,
) -> Result<FlareSolverrResponse, Box<dyn std::error::Error + Send + Sync>> {
let body = self
.post_payload(json!({
"cmd": request.cmd,
"url": request.url,
"maxTimeout": request.maxTimeout,
"session": session,
}))
.await?;
let typed = serde_json::from_value::<FlareSolverrResponse>(body)?;
Ok(typed)
}
async fn ensure_ready_session_locked(
&self,
state: &mut SessionState,
) -> Result<String, Box<dyn std::error::Error + Send + Sync>> {
if let Some(existing) = state.ready_session.clone() {
return Ok(existing);
}
let created = self.create_session().await?;
state.ready_session = Some(created.clone());
Ok(created)
}
pub async fn solve(
&self,
request: FlareSolverrRequest,
) -> Result<FlareSolverrResponse, Box<dyn std::error::Error + Send + Sync>> {
// Keep one ready session globally and rotate it per solve:
// - solve with current ready session
// - create replacement session in parallel
// - destroy old session
// - keep replacement as new ready session
let session_state = global_session_state().clone();
let mut state = session_state.lock().await;
let active_session = self.ensure_ready_session_locked(&mut state).await?;
let replacement_creator = {
let solver = self.clone();
tokio::spawn(async move { solver.create_session().await })
};
let solve_result = self.solve_with_session(request, &active_session).await;
let replacement_session = match replacement_creator.await {
Ok(Ok(session)) => session,
Ok(Err(error)) => {
eprintln!(
"FlareSolverr replacement session creation failed, retrying inline: {}",
error
);
self.create_session().await?
}
Err(join_error) => {
eprintln!(
"FlareSolverr replacement task join failed, retrying inline: {}",
join_error
);
self.create_session().await?
}
};
if let Err(error) = self.destroy_session(&active_session).await {
eprintln!(
"FlareSolverr session cleanup failed for session '{}': {}",
active_session, error
);
}
state.ready_session = Some(replacement_session);
solve_result
}
}

View File

@@ -4,25 +4,44 @@ use crate::providers::{build_proxy_url, strip_url_scheme};
use crate::videos::ServerOptions;
const DOODSTREAM_HOSTS: &[&str] = &[
"doodstream.com",
"turboplayers.xyz",
"www.turboplayers.xyz",
"trailerhg.xyz",
"www.trailerhg.xyz",
"streamhg.com",
"www.streamhg.com",
];
pub fn proxy_name_for_url(url: &str) -> Option<&'static str> {
let parsed = Url::parse(url).ok()?;
let host = parsed.host_str()?.to_ascii_lowercase();
const LULUSTREAM_HOSTS: &[&str] = &[
"luluvdo.com",
"lulustream.com",
];
const VIDARA_HOSTS: &[&str] = &[
"vidara.so",
];
#[allow(dead_code)]
pub fn proxy_name_for_url(url: &str) -> Option<&'static str> {
let parsed = match !url.starts_with("http://") && !url.starts_with("https://"){
true => Url::parse(&format!("https://{}", url)).ok()?,
false => Url::parse(url).ok()?
};
let host = parsed.host_str()?.to_ascii_lowercase();
if DOODSTREAM_HOSTS.contains(&host.as_str()) {
return Some("doodstream");
}
if LULUSTREAM_HOSTS.contains(&host.as_str()) {
return Some("lulustream");
}
if VIDARA_HOSTS.contains(&host.as_str()) {
return Some("vidara");
}
None
}
#[allow(dead_code)]
pub fn rewrite_hoster_url(options: &ServerOptions, url: &str) -> String {
match proxy_name_for_url(url) {
Some(proxy_name) => build_proxy_url(options, proxy_name, &strip_url_scheme(url)),

View File

@@ -1,3 +1,6 @@
use std::error::Error;
use std::process::Command;
pub mod cache;
pub mod discord;
pub mod flaresolverr;
@@ -6,6 +9,10 @@ pub mod hoster_proxy;
pub mod proxy;
pub mod requester;
pub mod time;
pub mod browser;
pub mod dean_edwards;
pub mod playwright;
pub mod webdriver;
pub fn parse_abbreviated_number(s: &str) -> Option<u32> {
let s = s.trim();
@@ -50,3 +57,37 @@ pub fn interleave<T: Clone>(lists: &[Vec<T>]) -> Vec<T> {
result
}
pub fn get_redirect_location(url: &str) -> Result<Option<String>, Box<dyn Error>> {
// 1. Execute curl:
// -s: Silent (no progress bar)
// -I: Fetch headers only (HEAD request)
let output = Command::new("curl")
.arg("-sI")
.arg(url)
.output()?;
// Check if the command executed successfully
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
return Err(format!("curl command failed: {}", stderr).into());
}
// 2. Parse the stdout
let stdout = String::from_utf8_lossy(&output.stdout);
// HTTP headers are separated by \r\n or \n
for line in stdout.lines() {
// Case-insensitive check for "Location:"
if line.to_lowercase().starts_with("location:") {
// Split "Location: https://example.com" into ["Location", " https://example.com"]
let parts: Vec<&str> = line.splitn(2, ':').collect();
if parts.len() == 2 {
// Trim whitespace and potential carriage returns (\r)
return Ok(Some(parts[1].trim().to_string()));
}
}
}
Ok(None)
}

Some files were not shown because too many files have changed in this diff Show More