Compare commits

..

60 Commits

Author SHA1 Message Date
Simon
c0717fdacf supjav tags-fixes 2026-04-03 18:17:14 +00:00
Simon
e680319541 pornhub 2026-04-03 18:01:03 +00:00
Simon
543e025dda supjav 2026-04-03 17:27:54 +00:00
Simon
067ff3d1da docs 2026-04-03 17:20:11 +00:00
Simon
4b017fafdf install dep 2026-04-01 21:38:28 +00:00
Simon
b07d269154 video from url 2026-04-01 12:21:14 +00:00
Simon
e2796bfd71 freejse searches 2026-03-31 23:01:51 +00:00
Simon
38acb2b5a5 freeuseporn 2026-03-31 16:39:27 +00:00
Simon
fb9098c689 omgxxx fix 2026-03-31 15:08:38 +00:00
Simon
c4be911d8b omgxx uploader 2026-03-31 14:40:31 +00:00
Simon
ef459fa6b5 noodlemagazine fix 2026-03-31 13:50:41 +00:00
Simon
bdc7d61121 uploaders 2026-03-31 13:39:11 +00:00
Simon
80207efa73 noodlemagazine upgrade 2026-03-31 13:09:51 +00:00
Simon
01831c70e7 sextb 2026-03-30 19:21:42 +00:00
Simon
429fb16fbd upgrades 2026-03-30 17:06:28 +00:00
Simon
4df2a672b7 yesporn fix 2026-03-30 06:59:38 +00:00
Simon
bc984a4791 spankbang fix 2026-03-30 02:53:31 +00:00
Simon
0df84a1fac fixed warnings 2026-03-30 02:34:26 +00:00
Simon
de8f88bf41 runtime test fixes 2026-03-30 02:21:53 +00:00
Simon
bbb1792dbe increase runtime test timout to 100s 2026-03-29 17:37:32 +00:00
Simon
e031396459 missav fix 2026-03-29 17:06:23 +00:00
Simon
4e95354880 javtiful fix 2026-03-29 17:05:32 +00:00
Simon
243d19cec0 runtime error handling 2026-03-29 16:24:49 +00:00
Simon
99fe4c947c shooshtime fix 2026-03-23 13:46:55 +00:00
Simon
90ce9c684b pornhd3x 2026-03-23 11:32:22 +00:00
Simon
9021521c00 fixes 2026-03-22 17:26:12 +00:00
Simon
fbe04fc752 upgrades 2026-03-22 15:56:25 +00:00
Simon
52f108da8e tiktok group 2026-03-22 12:46:30 +00:00
Simon
50ea0e73b7 pimpbunny fix 2026-03-22 12:27:46 +00:00
Simon
a2d31d90a1 more debug info 2026-03-21 22:29:45 +00:00
Simon
43594a6cfe hottub skill 2026-03-21 21:27:38 +00:00
Simon
7b66e5b28a debugging and single provider compime 2026-03-21 21:18:43 +00:00
Simon
05ea90405b globe for status 2026-03-21 20:21:43 +00:00
Simon
9bba981796 status changes 2026-03-21 19:29:30 +00:00
Simon
cecc1f994b status updated 2026-03-21 19:15:35 +00:00
Simon
75b7241803 hentaihaven fix 2026-03-21 17:53:07 +00:00
Simon
1b32df0c35 pimpbunny fix 2026-03-20 22:08:02 +00:00
Simon
259a07686d noodlemagazine fix 2026-03-20 21:05:18 +00:00
Simon
46cd348148 pimpbunny changes 2026-03-20 21:02:47 +00:00
Simon
dd7c4ec6a1 noodlemagazine thumb proxy 2026-03-20 13:52:06 +00:00
Simon
99e4a77507 no embed in video element 2026-03-19 19:04:49 +00:00
Simon
2b26019a66 vrporn 2026-03-18 22:54:51 +00:00
Simon
f88b789f25 yesporn 2026-03-18 21:48:05 +00:00
Simon
21ef0ebf17 hsex page >1 fix 2026-03-18 12:56:11 +00:00
Simon
ce1afd9873 status upgrade 2026-03-18 12:13:28 +00:00
Simon
ce781e2099 hsex 2026-03-18 11:22:48 +00:00
Simon
a66f44c747 heavyfetish and other changes 2026-03-17 21:04:11 +00:00
Simon
9ca9e820d9 remove embed 2026-03-17 09:58:48 +00:00
Simon
0563a7231a pimpbunny updates 2026-03-17 09:53:34 +00:00
Simon
3c3af70ed6 thumb updates 2026-03-17 09:44:38 +00:00
Simon
7680a93fab pimpbunny thumb 2026-03-17 09:17:28 +00:00
Simon
3a2e77436e swap to curl-cffi 2026-03-17 08:41:48 +00:00
Simon
9172941ac6 fixes 2026-03-17 01:12:52 +00:00
Simon
a977381b3b porndish fix 2026-03-17 00:57:50 +00:00
Simon
0d20fc7a7e docker update 2026-03-17 00:31:10 +00:00
Simon
0c11959d94 porndish 2026-03-17 00:24:29 +00:00
Simon
f8a09b0e97 normalize queries 2026-03-16 19:46:00 +00:00
Simon
9751c25b95 shooshtime 2026-03-16 19:37:05 +00:00
Simon
1f99eec5a3 fix 2 electric boogaloo 2026-03-16 00:16:07 +00:00
Simon
448efeff1e hanime thumbnail fix 2026-03-15 23:47:32 +00:00
81 changed files with 30453 additions and 747 deletions

3
.gitignore vendored
View File

@@ -3,7 +3,7 @@
# will have compiled files and executables
debug/
target/
.testing/
.*/
# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
@@ -17,4 +17,3 @@ Cargo.lock
*.db
migrations/.keep
.vscode

View File

@@ -2,6 +2,10 @@
name = "hottub"
version = "0.1.0"
edition = "2024"
build = "build.rs"
[features]
debug = []
[dependencies]
cute = "0.3.0"
@@ -32,6 +36,7 @@ dashmap = "6.1.0"
lru = "0.16.3"
rand = "0.10.0"
chrono = "0.4.44"
md5 = "0.8.0"
[lints.rust]
unexpected_cfgs = "allow"

View File

@@ -1,10 +1,10 @@
FROM debian
FROM ubuntu:24.04
# FROM consol/debian-xfce-vnc:latest
# Switch to root user to install additional software
USER 0
RUN apt update
RUN apt install -yq libssl-dev \
RUN apt install -yq yt-dlp libssl-dev \
wget curl unzip \
openssl \
ca-certificates \
@@ -28,5 +28,6 @@ RUN apt install -yq libssl-dev \
sudo \
&& apt-get clean
USER 1000
RUN python3 -m pip install --break-system-packages --no-cache-dir curl_cffi
USER 1000

323
SKILL.md Normal file
View File

@@ -0,0 +1,323 @@
---
name: hottub
description: Work on the Hottub Rust server. Use this skill when you need the real build/run commands, compile-time single-provider builds, runtime env vars, API and proxy endpoint trigger examples, or yt-dlp verification steps for returned media URLs.
---
# Hottub
Hottub is a Rust `ntex` server. The main entrypoints are:
- `src/main.rs`: server startup, env loading, root redirect, `/api`, `/proxy`, static files
- `src/api.rs`: `/api/status`, `/api/videos`, `/api/test`, `/api/proxies`
- `src/proxy.rs`: `/proxy/...` redirect and media/image proxy routes
- `src/providers/mod.rs`: provider registry, compile-time provider selection, channel metadata
- `src/util/requester.rs`: outbound HTTP, Burp proxy support, FlareSolverr fallback
## Build and run
Default local run:
```bash
cargo run
```
Run with compiled-in debug logs:
```bash
cargo run --features debug
```
Compile a single-provider binary:
```bash
HOT_TUB_PROVIDER=hsex cargo build
```
Single-provider binary with debug logs:
```bash
HOT_TUB_PROVIDER=hsex cargo run --features debug
```
Notes:
- `HOT_TUB_PROVIDER` is the preferred compile-time selector.
- `HOTTUB_PROVIDER` is also supported as a fallback alias.
- Single-provider builds register only that provider at compile time, so other providers are not constructed and their init paths do not run.
- In a single-provider build, `/api/videos` requests with `"channel": "all"` are remapped to the compiled provider.
- The server binds to `0.0.0.0:18080`.
Useful checks:
```bash
cargo check -q
HOT_TUB_PROVIDER=hsex cargo check -q
HOT_TUB_PROVIDER=hsex cargo check -q --features debug
```
## Environment
Runtime env vars:
- `DATABASE_URL` required. SQLite path, for example `hottub.db`.
- `RUST_LOG` optional. Defaults to `warn` if unset.
- `PROXY` optional. Any value other than `"0"` enables proxy mode in the shared requester.
- `BURP_URL` optional. Outbound HTTP proxy used when `PROXY` is enabled.
- `FLARE_URL` optional but strongly recommended for provider work. Used for FlareSolverr fallback and some providers that call it directly.
- `DOMAIN` optional. Used for the `/` redirect target.
- `DISCORD_WEBHOOK` optional. Enables `/api/test` and provider error reporting to Discord.
Build-time env vars:
- `HOT_TUB_PROVIDER` optional. Compile only one provider into the binary.
- `HOTTUB_PROVIDER` optional fallback alias for the same purpose.
Practical `.env` baseline:
```dotenv
DATABASE_URL=hottub.db
RUST_LOG=info
PROXY=0
BURP_URL=http://127.0.0.1:8081
FLARE_URL=http://127.0.0.1:8191/v1
DOMAIN=127.0.0.1:18080
DISCORD_WEBHOOK=
```
## Endpoint surface
Root:
- `GET /`
- Returns `302 Found`
- Redirects to `hottub://source?url=<DOMAIN-or-request-host>`
Status API:
- `GET /api/status`
- `POST /api/status`
- Returns the server status and channel list
- The `User-Agent` matters because channel visibility can depend on parsed client version
Videos API:
- `POST /api/videos`
- Main provider execution endpoint
- Body is JSON matching `VideosRequest` in `src/videos.rs`
Diagnostics:
- `GET /api/test`
- Sends a Discord test error if `DISCORD_WEBHOOK` is configured
- `GET /api/proxies`
- Returns the current outbound proxy snapshot
Proxy endpoints:
- Redirect proxies:
- `GET|POST /proxy/sxyprn/{endpoint}*`
- `GET|POST /proxy/javtiful/{endpoint}*`
- `GET|POST /proxy/spankbang/{endpoint}*`
- `GET|POST /proxy/porndish/{endpoint}*`
- `GET|POST /proxy/pimpbunny/{endpoint}*`
- Media/image proxies:
- `GET|POST /proxy/noodlemagazine/{endpoint}*`
- `GET|POST /proxy/noodlemagazine-thumb/{endpoint}*`
- `GET|POST /proxy/hanime-cdn/{endpoint}*`
- `GET|POST /proxy/hqporner-thumb/{endpoint}*`
- `GET|POST /proxy/porndish-thumb/{endpoint}*`
- `GET|POST /proxy/pimpbunny-thumb/{endpoint}*`
Everything else under `/` is served from `static/`.
## How to trigger endpoints
Verify the root redirect:
```bash
curl -i http://127.0.0.1:18080/
```
Fetch status with a Hot Tub-like user agent:
```bash
curl -s \
-H 'User-Agent: Hot%20Tub/22c CFNetwork/1494.0.7 Darwin/23.4.0' \
http://127.0.0.1:18080/api/status | jq
```
Equivalent `POST /api/status`:
```bash
curl -s -X POST http://127.0.0.1:18080/api/status | jq
```
Minimal videos request:
```bash
curl -s http://127.0.0.1:18080/api/videos \
-H 'Content-Type: application/json' \
-H 'User-Agent: Hot%20Tub/22c CFNetwork/1494.0.7 Darwin/23.4.0' \
-d '{"channel":"hsex","sort":"date","page":1,"perPage":10}' | jq
```
Use `"all"` against a normal multi-provider build:
```bash
curl -s http://127.0.0.1:18080/api/videos \
-H 'Content-Type: application/json' \
-d '{"channel":"all","sort":"date","page":1,"perPage":10}' | jq
```
Use `"all"` against a single-provider build:
```bash
HOT_TUB_PROVIDER=hsex cargo run --features debug
curl -s http://127.0.0.1:18080/api/videos \
-H 'Content-Type: application/json' \
-d '{"channel":"all","sort":"date","page":1,"perPage":10}' | jq
```
Literal query behavior:
- Quoted queries are treated as literal substring filters after provider fetch.
- Leading `#` is stripped before matching.
Example:
```bash
curl -s http://127.0.0.1:18080/api/videos \
-H 'Content-Type: application/json' \
-d '{"channel":"hsex","query":"\"teacher\"","page":1,"perPage":10}' | jq
```
Trigger the Discord test route:
```bash
curl -i http://127.0.0.1:18080/api/test
```
Inspect proxy state:
```bash
curl -s http://127.0.0.1:18080/api/proxies | jq
```
Trigger a redirect proxy and inspect the `Location` header:
```bash
curl -I 'http://127.0.0.1:18080/proxy/spankbang/some/provider/path'
```
Trigger a media proxy directly:
```bash
curl -I 'http://127.0.0.1:18080/proxy/noodlemagazine/some/media/path'
```
## Videos request fields
Commonly useful request keys:
- `channel`
- `sort`
- `query`
- `page`
- `perPage`
- `featured`
- `category`
- `sites`
- `all_provider_sites`
- `filter`
- `language`
- `networks`
- `stars`
- `categories`
- `duration`
- `sexuality`
Most provider debugging only needs:
```json
{
"channel": "hsex",
"sort": "date",
"query": null,
"page": 1,
"perPage": 10
}
```
## Recommended provider-debug workflow
1. Build only the provider you care about.
2. Run with `--features debug`.
3. Hit `/api/status` to confirm only the expected channel is present.
4. Hit `/api/videos` with either the provider id or `"all"`.
5. Inspect `.items[0].url`, `.items[0].formats`, `.items[0].thumb`, and any local `/proxy/...` URLs.
6. Verify the media URL with `yt-dlp`.
Example:
```bash
HOT_TUB_PROVIDER=hsex cargo run --features debug
curl -s http://127.0.0.1:18080/api/status | jq '.channels[].id'
curl -s http://127.0.0.1:18080/api/videos \
-H 'Content-Type: application/json' \
-d '{"channel":"all","page":1,"perPage":1}' | tee /tmp/hottub-video.json | jq
```
## yt-dlp verification
Use `yt-dlp` to prove that a returned video URL or format is actually consumable.
Check the primary item URL:
```bash
URL="$(jq -r '.items[0].url' /tmp/hottub-video.json)"
yt-dlp -v --simulate "$URL"
```
Prefer the first explicit format when present:
```bash
FORMAT_URL="$(jq -r '.items[0].formats[0].url' /tmp/hottub-video.json)"
yt-dlp -v -F "$FORMAT_URL"
yt-dlp -v --simulate "$FORMAT_URL"
```
If the format contains required HTTP headers, pass them through:
```bash
yt-dlp -v --simulate \
--add-header 'Referer: https://example.com/' \
--add-header 'User-Agent: Mozilla/5.0 ...' \
"$FORMAT_URL"
```
If you want to build the command from JSON:
```bash
FORMAT_URL="$(jq -r '.items[0].formats[0].url' /tmp/hottub-video.json)"
mapfile -t HDRS < <(
jq -r '.items[0].formats[0].http_headers // {} | to_entries[] | "--add-header=\(.key): \(.value)"' \
/tmp/hottub-video.json
)
yt-dlp -v --simulate "${HDRS[@]}" "$FORMAT_URL"
```
For local proxy URLs returned by Hottub, verify the server endpoint directly:
```bash
LOCAL_URL="$(jq -r '.items[0].formats[0].url // .items[0].url' /tmp/hottub-video.json)"
yt-dlp -v --simulate "$LOCAL_URL"
```
## Interaction rules
- Prefer compile-time single-provider builds for provider work.
- Prefer `/api/status` before `/api/videos` so you know what channels the current binary exposes.
- When reproducing client-specific issues, send a realistic `User-Agent`.
- When debugging fetch failures, enable `debug` and set `FLARE_URL`.
- When debugging outbound request behavior, set `PROXY=1` and `BURP_URL=...`.
- Use `/api/test` only when you intentionally want a Discord notification.

360
build.rs Normal file
View File

@@ -0,0 +1,360 @@
use std::env;
use std::fs;
use std::path::PathBuf;
struct ProviderDef {
id: &'static str,
module: &'static str,
ty: &'static str,
}
const PROVIDERS: &[ProviderDef] = &[
ProviderDef {
id: "all",
module: "all",
ty: "AllProvider",
},
ProviderDef {
id: "perverzija",
module: "perverzija",
ty: "PerverzijaProvider",
},
ProviderDef {
id: "hanime",
module: "hanime",
ty: "HanimeProvider",
},
ProviderDef {
id: "pornhub",
module: "pornhub",
ty: "PornhubProvider",
},
ProviderDef {
id: "pornhd3x",
module: "pornhd3x",
ty: "Pornhd3xProvider",
},
ProviderDef {
id: "spankbang",
module: "spankbang",
ty: "SpankbangProvider",
},
ProviderDef {
id: "rule34video",
module: "rule34video",
ty: "Rule34videoProvider",
},
ProviderDef {
id: "redtube",
module: "redtube",
ty: "RedtubeProvider",
},
ProviderDef {
id: "okporn",
module: "okporn",
ty: "OkpornProvider",
},
ProviderDef {
id: "pornhat",
module: "pornhat",
ty: "PornhatProvider",
},
ProviderDef {
id: "perfectgirls",
module: "perfectgirls",
ty: "PerfectgirlsProvider",
},
ProviderDef {
id: "okxxx",
module: "okxxx",
ty: "OkxxxProvider",
},
ProviderDef {
id: "homoxxx",
module: "homoxxx",
ty: "HomoxxxProvider",
},
ProviderDef {
id: "missav",
module: "missav",
ty: "MissavProvider",
},
ProviderDef {
id: "xxthots",
module: "xxthots",
ty: "XxthotsProvider",
},
ProviderDef {
id: "yesporn",
module: "yesporn",
ty: "YespornProvider",
},
ProviderDef {
id: "sxyprn",
module: "sxyprn",
ty: "SxyprnProvider",
},
ProviderDef {
id: "porn00",
module: "porn00",
ty: "Porn00Provider",
},
ProviderDef {
id: "youjizz",
module: "youjizz",
ty: "YoujizzProvider",
},
ProviderDef {
id: "paradisehill",
module: "paradisehill",
ty: "ParadisehillProvider",
},
ProviderDef {
id: "porn4fans",
module: "porn4fans",
ty: "Porn4fansProvider",
},
ProviderDef {
id: "pornmz",
module: "pornmz",
ty: "PornmzProvider",
},
ProviderDef {
id: "porndish",
module: "porndish",
ty: "PorndishProvider",
},
ProviderDef {
id: "shooshtime",
module: "shooshtime",
ty: "ShooshtimeProvider",
},
ProviderDef {
id: "pornzog",
module: "pornzog",
ty: "PornzogProvider",
},
ProviderDef {
id: "omgxxx",
module: "omgxxx",
ty: "OmgxxxProvider",
},
ProviderDef {
id: "beeg",
module: "beeg",
ty: "BeegProvider",
},
ProviderDef {
id: "tnaflix",
module: "tnaflix",
ty: "TnaflixProvider",
},
ProviderDef {
id: "tokyomotion",
module: "tokyomotion",
ty: "TokyomotionProvider",
},
ProviderDef {
id: "viralxxxporn",
module: "viralxxxporn",
ty: "ViralxxxpornProvider",
},
ProviderDef {
id: "vrporn",
module: "vrporn",
ty: "VrpornProvider",
},
ProviderDef {
id: "rule34gen",
module: "rule34gen",
ty: "Rule34genProvider",
},
ProviderDef {
id: "xxdbx",
module: "xxdbx",
ty: "XxdbxProvider",
},
ProviderDef {
id: "xfree",
module: "xfree",
ty: "XfreeProvider",
},
ProviderDef {
id: "hqporner",
module: "hqporner",
ty: "HqpornerProvider",
},
ProviderDef {
id: "pmvhaven",
module: "pmvhaven",
ty: "PmvhavenProvider",
},
ProviderDef {
id: "noodlemagazine",
module: "noodlemagazine",
ty: "NoodlemagazineProvider",
},
ProviderDef {
id: "pimpbunny",
module: "pimpbunny",
ty: "PimpbunnyProvider",
},
ProviderDef {
id: "javtiful",
module: "javtiful",
ty: "JavtifulProvider",
},
ProviderDef {
id: "supjav",
module: "supjav",
ty: "SupjavProvider",
},
ProviderDef {
id: "hypnotube",
module: "hypnotube",
ty: "HypnotubeProvider",
},
ProviderDef {
id: "freepornvideosxxx",
module: "freepornvideosxxx",
ty: "FreepornvideosxxxProvider",
},
ProviderDef {
id: "freeuseporn",
module: "freeuseporn",
ty: "FreeusepornProvider",
},
ProviderDef {
id: "heavyfetish",
module: "heavyfetish",
ty: "HeavyfetishProvider",
},
ProviderDef {
id: "hsex",
module: "hsex",
ty: "HsexProvider",
},
ProviderDef {
id: "sextb",
module: "sextb",
ty: "SextbProvider",
},
ProviderDef {
id: "hentaihaven",
module: "hentaihaven",
ty: "HentaihavenProvider",
},
ProviderDef {
id: "chaturbate",
module: "chaturbate",
ty: "ChaturbateProvider",
},
];
fn main() {
println!("cargo:rerun-if-changed=build.rs");
println!("cargo:rerun-if-env-changed=HOT_TUB_PROVIDER");
println!("cargo:rerun-if-env-changed=HOTTUB_PROVIDER");
println!("cargo:rustc-check-cfg=cfg(hottub_single_provider)");
let provider_cfg_values = PROVIDERS
.iter()
.map(|provider| format!("\"{}\"", provider.id))
.collect::<Vec<_>>()
.join(", ");
println!("cargo:rustc-check-cfg=cfg(hottub_provider, values({provider_cfg_values}))");
let selected = env::var("HOT_TUB_PROVIDER")
.or_else(|_| env::var("HOTTUB_PROVIDER"))
.ok()
.map(|value| value.trim().to_string())
.filter(|value| !value.is_empty());
let providers = match selected.as_deref() {
Some(selected_id) => {
let provider = PROVIDERS
.iter()
.find(|provider| provider.id == selected_id)
.unwrap_or_else(|| {
panic!("Unknown provider `{selected_id}` from HOT_TUB_PROVIDER/HOTTUB_PROVIDER")
});
println!("cargo:rustc-cfg=hottub_single_provider");
println!("cargo:rustc-cfg=hottub_provider=\"{selected_id}\"");
vec![provider]
}
None => PROVIDERS.iter().collect(),
};
let out_dir = PathBuf::from(env::var("OUT_DIR").expect("OUT_DIR"));
let manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").expect("CARGO_MANIFEST_DIR"));
let modules = providers
.iter()
.map(|provider| {
let module_path = manifest_dir
.join("src/providers")
.join(format!("{}.rs", provider.module));
format!(
"#[path = r#\"{}\"#]\npub mod {};",
module_path.display(),
provider.module
)
})
.collect::<Vec<_>>()
.join("\n");
fs::write(out_dir.join("provider_modules.rs"), format!("{modules}\n"))
.expect("write provider_modules.rs");
let registry = providers
.iter()
.map(|provider| {
format!(
"m.insert(\"{id}\", Arc::new({module}::{ty}::new()) as DynProvider);",
id = provider.id,
module = provider.module,
ty = provider.ty
)
})
.collect::<Vec<_>>()
.join("\n");
fs::write(
out_dir.join("provider_registry.rs"),
format!("{{\n{registry}\n}}\n"),
)
.expect("write provider_registry.rs");
let metadata_arms = providers
.iter()
.map(|provider| {
if provider.id == "all" {
format!(
"\"all\" | \"hottub\" => Some({module}::CHANNEL_METADATA),",
module = provider.module
)
} else {
format!(
"\"{id}\" => Some({module}::CHANNEL_METADATA),",
id = provider.id,
module = provider.module
)
}
})
.collect::<Vec<_>>()
.join("\n");
fs::write(
out_dir.join("provider_metadata_fn.rs"),
format!("match id {{\n{metadata_arms}\n_ => None,\n}}\n"),
)
.expect("write provider_metadata_fn.rs");
let selection = match selected.as_deref() {
Some(selected_id) => format!(
"pub const COMPILE_TIME_SELECTED_PROVIDER: Option<&str> = Some(\"{selected_id}\");"
),
None => "pub const COMPILE_TIME_SELECTED_PROVIDER: Option<&str> = None;".to_string(),
};
fs::write(
out_dir.join("provider_selection.rs"),
format!("{selection}\n"),
)
.expect("write provider_selection.rs");
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,60 @@
# Uploaders Endpoint Plan
## Summary
Implement `POST /api/uploaders` using the Hot Tub uploader profile contract and ship it framework-first. The server will expose shared uploader request/response types, a provider hook for uploader lookup, endpoint routing in `src/api.rs`, and a first real provider implementation in `hsex`.
## Implementation
- Add dedicated uploader API types in `src/uploaders.rs`:
- `UploadersRequest`
- `UploaderProfile`
- `UploaderChannelStat`
- `UploaderVideoRef`
- `UploaderLayoutRow`
- Keep camelCase as the canonical serialized shape.
- Accept documented decode aliases:
- `uploader_id`
- `uploader_name`
- `profile_content`
- `profile_picture_url`
- `video_ids`
- `horizontal_videos`
- Add `POST /api/uploaders` in `src/api.rs`.
- Validate that at least one of `uploaderId` or `uploaderName` is present.
- Return:
- `400` for invalid request
- `404` for no match
- `500` for provider execution failure
- Add `Provider::get_uploader(...)` with a default `Ok(None)` implementation.
- Add a guarded uploader execution helper in `src/providers/mod.rs`.
- Use canonical uploader IDs in the format `<channel>:<provider-local-id>`.
- Implement the first provider-backed uploader profile in `src/providers/hsex.rs`.
## Hsex Strategy
- Resolve uploader lookup by canonical uploader ID or exact uploader name.
- Reuse existing uploader archive discovery and archive page fetching.
- Build uploader profile metadata from uploader archive pages.
- Populate `videos` with `UploaderVideoRef` values derived from existing `VideoItem`s.
- Always return `layout`.
- When `profileContent == true`, return:
- `videos`
- `tapes: []`
- `playlists: []`
- a `"For You"` horizontal row plus the default videos row
- When `profileContent == false`, return metadata and layout only.
## Tests
- Request alias decoding for uploader request fields.
- Response alias decoding for avatar and layout row compatibility fields.
- Endpoint helper tests for request validation and provider routing.
- Hsex uploader ID generation and uploader page parsing coverage.
## Assumptions
- The first ship focuses on the endpoint framework and one real provider implementation.
- Providers without explicit uploader support remain unsupported by `/api/uploaders`.
- Name-based resolution uses exact display-name matching.
- `videoCount` and `totalViews` are best-effort when the upstream site does not expose authoritative profile totals.

1
prompts/new-channel.md Normal file
View File

@@ -0,0 +1 @@
write a provider for the site "vjav.com". use playwright-mcp to get the index site and get the layout of videos from this. also figure out how searches work and if it has a different layout then. also find out how more videos get loaded/more pages urls. also find if it has video sites like recommended,most views etc and include them. try to find as much information about a video on the site as possible and build the video items with all the information. put tags and uploader into lists so they can be utilized later. on query check if the search already exists in these FilterObject arrays and use them for the url instead (similar to omgxxx). after coding this, test that all provided urls work, yt-dlp can download the video.url, the thumbnails work, searches, tag-/uploader- searches work, more pages work. Keep a key-value storage for all provided Tag titles to tag IDs to do the correct crawl for queries of a tag. analzye the provider too and set the channel tags and put it in a group that matches this provider or create a new group where it fits in. do not include the embed field in the videoitems responses. when parsing uploaders, make sure that the uploaders. if unsure about the endpoints, check with the docs dir.

View File

@@ -1,7 +1,9 @@
use crate::providers::{
ALL_PROVIDERS, DynProvider, panic_payload_to_string, report_provider_error,
run_provider_guarded,
ALL_PROVIDERS, DynProvider, build_status_response, panic_payload_to_string,
report_provider_error, resolve_provider_for_build, run_provider_guarded,
run_uploader_provider_guarded,
};
use crate::uploaders::{UploaderProfile, UploadersRequest};
use crate::util::cache::VideoCache;
use crate::util::discord::send_discord_error_report;
use crate::util::proxy::{Proxy, all_proxies_snapshot};
@@ -10,9 +12,12 @@ use crate::{DbPool, db, status::*, videos::*};
use ntex::http::header;
use ntex::web;
use ntex::web::HttpRequest;
use serde_json::Value;
use std::cmp::Ordering;
use std::io;
use std::process::Command;
use tokio::task;
use url::Url;
#[derive(Debug, Clone)]
pub struct ClientVersion {
@@ -83,6 +88,201 @@ impl Ord for ClientVersion {
}
}
fn normalize_query(raw_query: Option<&str>) -> (Option<String>, Option<String>) {
let Some(raw_query) = raw_query else {
return (None, None);
};
let mut query = raw_query.trim();
if query.is_empty() {
return (None, None);
}
while let Some(stripped) = query.strip_prefix('#') {
query = stripped.trim_start();
}
if query.is_empty() {
return (None, None);
}
let literal_query = if query.len() >= 2
&& ((query.starts_with('"') && query.ends_with('"'))
|| (query.starts_with('\'') && query.ends_with('\'')))
{
let inner = query[1..query.len() - 1].trim();
if inner.is_empty() {
None
} else {
query = inner;
Some(inner.to_ascii_lowercase())
}
} else {
None
};
(Some(query.to_string()), literal_query)
}
fn video_matches_literal_query(video: &VideoItem, literal_query: &str) -> bool {
let contains_literal = |value: &str| value.to_ascii_lowercase().contains(literal_query);
contains_literal(&video.title)
|| video.uploader.as_deref().is_some_and(contains_literal)
|| video
.tags
.as_ref()
.is_some_and(|tags| tags.iter().any(|tag| contains_literal(tag)))
}
fn normalize_query_url(query: &str) -> Option<String> {
let trimmed = query.trim();
if trimmed.is_empty() {
return None;
}
let parsed = Url::parse(trimmed).ok()?;
match parsed.scheme() {
"http" | "https" => Some(parsed.to_string()),
_ => None,
}
}
fn video_item_from_ytdlp_payload(
channel: &str,
fallback_url: &str,
payload: &Value,
) -> Option<VideoItem> {
let title = payload
.get("title")
.and_then(|value| value.as_str())
.filter(|value| !value.trim().is_empty())?
.to_string();
let page_url = payload
.get("webpage_url")
.and_then(|value| value.as_str())
.filter(|value| value.starts_with("http://") || value.starts_with("https://"))
.unwrap_or(fallback_url)
.to_string();
let id = payload
.get("id")
.and_then(|value| value.as_str())
.filter(|value| !value.trim().is_empty())
.map(ToOwned::to_owned)
.or_else(|| {
Url::parse(&page_url)
.ok()
.and_then(|parsed| parsed.path_segments()?.next_back().map(ToOwned::to_owned))
})?;
let thumb = payload
.get("thumbnail")
.and_then(|value| value.as_str())
.unwrap_or("")
.to_string();
let duration = payload
.get("duration")
.and_then(|value| value.as_u64())
.and_then(|value| u32::try_from(value).ok())
.unwrap_or(0);
let mut item = VideoItem::new(id, title, page_url, channel.to_string(), thumb, duration);
item.views = payload
.get("view_count")
.and_then(|value| value.as_u64())
.and_then(|value| u32::try_from(value).ok());
item.uploader = payload
.get("uploader")
.and_then(|value| value.as_str())
.filter(|value| !value.trim().is_empty())
.map(ToOwned::to_owned);
item.uploaderUrl = payload
.get("uploader_url")
.and_then(|value| value.as_str())
.filter(|value| !value.trim().is_empty())
.map(ToOwned::to_owned);
item.preview = payload
.get("thumbnail")
.and_then(|value| value.as_str())
.filter(|value| !value.trim().is_empty())
.map(ToOwned::to_owned);
let formats = payload
.get("formats")
.and_then(|value| value.as_array())
.map(|entries| {
entries
.iter()
.filter_map(|format| {
let format_url =
format
.get("url")
.and_then(|value| value.as_str())
.filter(|value| {
value.starts_with("http://") || value.starts_with("https://")
})?;
let quality = format
.get("format_id")
.and_then(|value| value.as_str())
.or_else(|| format.get("format").and_then(|value| value.as_str()))
.or_else(|| format.get("resolution").and_then(|value| value.as_str()))
.unwrap_or("auto")
.to_string();
let ext = format
.get("ext")
.and_then(|value| value.as_str())
.unwrap_or("mp4")
.to_string();
let mut video_format =
VideoFormat::new(format_url.to_string(), quality.clone(), ext)
.format_id(quality.clone());
if let Some(note) = format.get("format_note").and_then(|value| value.as_str()) {
if !note.trim().is_empty() {
video_format = video_format.format_note(note.to_string());
}
}
Some(video_format)
})
.collect::<Vec<_>>()
})
.unwrap_or_default();
if !formats.is_empty() {
item.formats = Some(formats);
}
Some(item)
}
fn videos_from_ytdlp_query_url(
channel: &str,
query_url: &str,
limit: usize,
) -> Option<Vec<VideoItem>> {
let output = Command::new("yt-dlp")
.arg("-J")
.arg("--no-warnings")
.arg("--extractor-args")
.arg("generic:impersonate=chrome")
.arg(query_url)
.output()
.ok()?;
if !output.status.success() {
return None;
}
let payload: Value = serde_json::from_slice(&output.stdout).ok()?;
if let Some(entries) = payload.get("entries").and_then(|value| value.as_array()) {
let items = entries
.iter()
.filter_map(|entry| video_item_from_ytdlp_payload(channel, query_url, entry))
.take(limit)
.collect::<Vec<_>>();
return (!items.is_empty()).then_some(items);
}
video_item_from_ytdlp_payload(channel, query_url, &payload).map(|item| vec![item])
}
pub fn config(cfg: &mut web::ServiceConfig) {
cfg.service(
web::resource("/status")
@@ -94,11 +294,64 @@ pub fn config(cfg: &mut web::ServiceConfig) {
// .route(web::get().to(videos_get))
.route(web::post().to(videos_post)),
)
.service(web::resource("/uploaders").route(web::post().to(uploaders_post)))
.service(web::resource("/test").route(web::get().to(test)))
.service(web::resource("/proxies").route(web::get().to(proxies)));
}
fn uploader_request_is_valid(request: &UploadersRequest) -> bool {
request.uploaderId.is_some() || request.uploaderName.is_some()
}
fn provider_hint_from_uploader_id(uploader_id: &str) -> Option<String> {
let (channel, _) = uploader_id.split_once(':')?;
Some(resolve_provider_for_build(channel).to_string())
}
fn uploader_provider_ids() -> Vec<String> {
let mut ids = ALL_PROVIDERS
.iter()
.filter_map(|(provider_id, _)| (*provider_id != "all").then(|| (*provider_id).to_string()))
.collect::<Vec<_>>();
ids.sort();
ids
}
fn uploader_match_sort_key(profile: &UploaderProfile) -> (u64, String, String) {
(
profile.videoCount,
profile.channel.clone().unwrap_or_default(),
profile.id.clone(),
)
}
async fn lookup_uploader_with_provider(
provider_id: &str,
provider: DynProvider,
cache: VideoCache,
pool: DbPool,
request: &UploadersRequest,
options: crate::videos::ServerOptions,
) -> Result<Option<UploaderProfile>, String> {
run_uploader_provider_guarded(
provider_id,
"uploaders_post.get_uploader",
provider.get_uploader(
cache,
pool,
request.uploaderId.clone(),
request.uploaderName.clone(),
request.query.clone(),
request.profileContent,
options,
),
)
.await
}
async fn status(req: HttpRequest) -> Result<impl web::Responder, web::Error> {
#[cfg(feature = "debug")]
let trace_id = crate::util::flow_debug::next_trace_id("status");
let clientversion: ClientVersion = match req.headers().get("User-Agent") {
Some(v) => match v.to_str() {
Ok(useragent) => ClientVersion::parse(useragent)
@@ -112,6 +365,12 @@ async fn status(req: HttpRequest) -> Result<impl web::Responder, web::Error> {
"Received status request with client version: {:?}",
clientversion
);
crate::flow_debug!(
"trace={} status request host={} client={:?}",
trace_id,
req.connection_info().host(),
&clientversion
);
let host = req
.headers()
@@ -121,8 +380,15 @@ async fn status(req: HttpRequest) -> Result<impl web::Responder, web::Error> {
.to_string();
let public_url_base = format!("{}://{}", req.connection_info().scheme(), host);
let mut status = Status::new();
#[cfg(feature = "debug")]
let mut channel_count = 0usize;
for (provider_name, provider) in ALL_PROVIDERS.iter() {
crate::flow_debug!(
"trace={} status inspecting provider={}",
trace_id,
provider_name
);
let channel_result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
provider.get_channel(clientversion.clone())
}));
@@ -131,26 +397,50 @@ async fn status(req: HttpRequest) -> Result<impl web::Responder, web::Error> {
if channel.favicon.starts_with('/') {
channel.favicon = format!("{}{}", public_url_base, channel.favicon);
}
#[cfg(feature = "debug")]
{
channel_count += 1;
}
crate::flow_debug!(
"trace={} status added channel id={} provider={}",
trace_id,
channel.id.as_str(),
provider_name
);
status.add_channel(channel)
}
Ok(None) => {}
Err(payload) => {
let panic_msg = panic_payload_to_string(payload);
crate::flow_debug!(
"trace={} status provider panic provider={} panic={}",
trace_id,
provider_name,
&panic_msg
);
report_provider_error(provider_name, "status.get_channel", &panic_msg).await;
}
}
}
status.iconUrl = format!("{}/favicon.ico", public_url_base).to_string();
Ok(web::HttpResponse::Ok().json(&status))
let response = build_status_response(status);
crate::flow_debug!(
"trace={} status response channels={} groups={}",
trace_id,
channel_count,
response.channelGroups.len()
);
Ok(web::HttpResponse::Ok().json(&response))
}
async fn videos_post(
mut video_request: web::types::Json<VideosRequest>,
video_request: web::types::Json<VideosRequest>,
cache: web::types::State<VideoCache>,
pool: web::types::State<DbPool>,
requester: web::types::State<Requester>,
req: HttpRequest,
) -> Result<impl web::Responder, web::Error> {
let trace_id = crate::util::flow_debug::next_trace_id("videos");
let clientversion: ClientVersion = match req.headers().get("User-Agent") {
Some(v) => match v.to_str() {
Ok(useragent) => ClientVersion::parse(useragent)
@@ -159,12 +449,6 @@ async fn videos_post(
},
_ => ClientVersion::new(999, 0, "Hot%20Tub".to_string()),
};
match video_request.query.as_deref() {
Some(query) if query.starts_with("#") => {
video_request.query = Some(query.trim_start_matches("#").to_string());
}
_ => {}
}
let requester = requester.get_ref().clone();
// Ensure "videos" table exists with two string columns.
match pool.get() {
@@ -194,16 +478,14 @@ async fn videos_post(
},
items: vec![],
};
let channel: String = video_request
let requested_channel: String = video_request
.channel
.as_deref()
.unwrap_or("all")
.to_string();
let channel = resolve_provider_for_build(requested_channel.as_str()).to_string();
let sort: String = video_request.sort.as_deref().unwrap_or("date").to_string();
let mut query: Option<String> = video_request.query.clone();
if video_request.query.as_deref() == Some("") {
query = None;
}
let (query, literal_query) = normalize_query(video_request.query.as_deref());
let page: u8 = video_request
.page
.as_ref()
@@ -256,6 +538,22 @@ async fn videos_post(
req.connection_info().scheme(),
req.connection_info().host()
);
crate::flow_debug!(
"trace={} videos request requested_channel={} resolved_channel={} sort={} query={:?} page={} per_page={} filter={} category={} sites={} client={:?}",
trace_id,
&requested_channel,
&channel,
&sort,
&query,
page,
perPage,
&filter,
&category,
&sites,
&clientversion
);
let mut requester = requester;
requester.set_debug_trace_id(Some(trace_id.clone()));
let options = ServerOptions {
featured: Some(featured),
category: Some(category),
@@ -271,6 +569,71 @@ async fn videos_post(
sort: Some(sort.clone()),
sexuality: Some(sexuality),
};
if let Some(query_url) = query.as_deref().and_then(normalize_query_url) {
crate::flow_debug!(
"trace={} videos attempting ytdlp url fast path provider={} url={}",
trace_id,
&channel,
crate::util::flow_debug::preview(&query_url, 160)
);
if let Some(mut video_items) =
videos_from_ytdlp_query_url(&channel, &query_url, perPage as usize)
{
if clientversion == ClientVersion::new(38, 0, "Hot%20Tub".to_string()) {
video_items = video_items
.into_iter()
.filter_map(|video| {
let last_url = video
.formats
.as_ref()
.and_then(|formats| formats.last().map(|f| f.url.clone()));
if let Some(url) = last_url {
let mut v = video;
v.url = url;
return Some(v);
}
Some(video)
})
.collect();
}
for video in video_items.iter_mut() {
if video.duration <= 120 {
let mut preview_url = video.url.clone();
if let Some(formats) = &video.formats {
if let Some(first) = formats.first() {
preview_url = first.url.clone();
}
}
video.preview = Some(preview_url);
}
}
videos.pageInfo = PageInfo {
hasNextPage: false,
resultsPerPage: perPage as u32,
};
videos.items = video_items;
crate::flow_debug!(
"trace={} videos ytdlp url fast path returned count={}",
trace_id,
videos.items.len()
);
return Ok(web::HttpResponse::Ok().json(&videos));
}
crate::flow_debug!(
"trace={} videos ytdlp url fast path fell back to provider",
trace_id
);
}
crate::flow_debug!(
"trace={} videos provider dispatch provider={} literal_query={:?}",
trace_id,
&channel,
&literal_query
);
let mut video_items = run_provider_guarded(
&channel,
"videos_post.get_videos",
@@ -285,6 +648,11 @@ async fn videos_post(
),
)
.await;
crate::flow_debug!(
"trace={} videos provider returned count={}",
trace_id,
video_items.len()
);
// There is a bug in Hottub38 that makes the client error for a 403-url even though formats work fine
if clientversion == ClientVersion::new(38, 0, "Hot%20Tub".to_string()) {
@@ -306,6 +674,18 @@ async fn videos_post(
.collect();
}
if let Some(literal_query) = literal_query.as_deref() {
#[cfg(feature = "debug")]
let before = video_items.len();
video_items.retain(|video| video_matches_literal_query(video, literal_query));
crate::flow_debug!(
"trace={} videos literal filter kept={} removed={}",
trace_id,
video_items.len(),
before.saturating_sub(video_items.len())
);
}
videos.items = video_items.clone();
if video_items.len() == 0 {
videos.pageInfo = PageInfo {
@@ -323,7 +703,15 @@ async fn videos_post(
let per_page_clone = perPage.to_string();
let options_clone = options.clone();
let channel_clone = channel.clone();
#[cfg(feature = "debug")]
let prefetch_trace_id = trace_id.clone();
task::spawn_local(async move {
crate::flow_debug!(
"trace={} videos prefetch spawn next_page={} provider={}",
prefetch_trace_id,
next_page,
&channel_clone
);
// if let AnyProvider::Spankbang(_) = provider_clone {
// // Spankbang has a delay for the next page
// ntex::time::sleep(ntex::time::Seconds(80)).await;
@@ -357,11 +745,160 @@ async fn videos_post(
}
}
crate::flow_debug!(
"trace={} videos response items={} has_next={}",
trace_id,
videos.items.len(),
videos.pageInfo.hasNextPage
);
Ok(web::HttpResponse::Ok().json(&videos))
}
async fn uploaders_post(
uploader_request: web::types::Json<UploadersRequest>,
cache: web::types::State<VideoCache>,
pool: web::types::State<DbPool>,
requester: web::types::State<Requester>,
req: HttpRequest,
) -> Result<impl web::Responder, web::Error> {
let trace_id = crate::util::flow_debug::next_trace_id("uploaders");
let request = uploader_request.into_inner().normalized();
if !uploader_request_is_valid(&request) {
return Ok(web::HttpResponse::BadRequest()
.body("At least one of uploaderId or uploaderName must be provided"));
}
let public_url_base = format!(
"{}://{}",
req.connection_info().scheme(),
req.connection_info().host()
);
let mut requester = requester.get_ref().clone();
requester.set_debug_trace_id(Some(trace_id.clone()));
let options = ServerOptions {
featured: None,
category: None,
sites: None,
filter: None,
language: None,
public_url_base: Some(public_url_base),
requester: Some(requester),
network: None,
stars: None,
categories: None,
duration: None,
sort: None,
sexuality: None,
};
crate::flow_debug!(
"trace={} uploaders request uploader_id={:?} uploader_name={:?} profile_content={} query={:?}",
trace_id,
&request.uploaderId,
&request.uploaderName,
request.profileContent,
&request.query
);
if let Some(uploader_id) = request.uploaderId.as_deref() {
if let Some(provider_id) = provider_hint_from_uploader_id(uploader_id) {
let Some(provider) = get_provider(&provider_id) else {
return Ok(web::HttpResponse::NotFound().finish());
};
let result = lookup_uploader_with_provider(
&provider_id,
provider,
cache.get_ref().clone(),
pool.get_ref().clone(),
&request,
options,
)
.await;
return match result {
Ok(Some(profile)) => Ok(web::HttpResponse::Ok().json(&profile)),
Ok(None) => Ok(web::HttpResponse::NotFound().finish()),
Err(_error) => {
crate::flow_debug!(
"trace={} uploaders targeted provider failed provider={} error={}",
trace_id,
&provider_id,
&_error
);
Ok(web::HttpResponse::InternalServerError().finish())
}
};
}
}
let mut matches = Vec::new();
let mut saw_error = false;
let requested_name = request
.uploaderName
.as_ref()
.map(|value| value.to_ascii_lowercase());
for provider_id in uploader_provider_ids() {
let Some(provider) = get_provider(&provider_id) else {
continue;
};
let result = lookup_uploader_with_provider(
&provider_id,
provider,
cache.get_ref().clone(),
pool.get_ref().clone(),
&request,
options.clone(),
)
.await;
match result {
Ok(Some(profile)) => {
if let Some(requested_name) = requested_name.as_deref() {
if profile.name.to_ascii_lowercase() != requested_name {
crate::flow_debug!(
"trace={} uploaders ignoring non_exact_match provider={} requested={} returned={}",
trace_id,
&provider_id,
requested_name,
&profile.name
);
continue;
}
}
matches.push(profile);
}
Ok(None) => {}
Err(_error) => {
saw_error = true;
crate::flow_debug!(
"trace={} uploaders provider failed provider={} error={}",
trace_id,
&provider_id,
&_error
);
}
}
}
if matches.is_empty() {
if saw_error {
return Ok(web::HttpResponse::InternalServerError().finish());
}
return Ok(web::HttpResponse::NotFound().finish());
}
matches.sort_by(|a, b| uploader_match_sort_key(b).cmp(&uploader_match_sort_key(a)));
Ok(web::HttpResponse::Ok().json(&matches[0]))
}
pub fn get_provider(channel: &str) -> Option<DynProvider> {
ALL_PROVIDERS.get(channel).cloned()
let provider = ALL_PROVIDERS.get(channel).cloned();
crate::flow_debug!(
"provider lookup channel={} found={}",
channel,
provider.is_some()
);
provider
}
pub async fn test() -> Result<impl web::Responder, web::Error> {
@@ -382,6 +919,7 @@ pub async fn test() -> Result<impl web::Responder, web::Error> {
pub async fn proxies() -> Result<impl web::Responder, web::Error> {
let proxies = all_proxies_snapshot().await.unwrap_or_default();
crate::flow_debug!("proxies endpoint snapshot_count={}", proxies.len());
let mut by_protocol: std::collections::BTreeMap<String, Vec<Proxy>> =
std::collections::BTreeMap::new();
for proxy in proxies {
@@ -401,3 +939,106 @@ pub async fn proxies() -> Result<impl web::Responder, web::Error> {
}
Ok(web::HttpResponse::Ok().json(&by_protocol))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn uploaders_request_requires_id_or_name() {
let invalid = UploadersRequest::default();
let valid = UploadersRequest {
uploaderName: Some("Example".to_string()),
..UploadersRequest::default()
};
assert!(!uploader_request_is_valid(&invalid));
assert!(uploader_request_is_valid(&valid));
}
#[test]
fn uploader_provider_hint_uses_channel_prefix() {
assert_eq!(
provider_hint_from_uploader_id("hsex:xihongshiddd").as_deref(),
Some("hsex")
);
assert_eq!(provider_hint_from_uploader_id("plain-id"), None);
}
#[test]
fn uploader_match_prefers_higher_video_count() {
let a = UploaderProfile {
id: "a".to_string(),
name: "Example".to_string(),
channel: Some("alpha".to_string()),
videoCount: 3,
..UploaderProfile::default()
};
let b = UploaderProfile {
id: "b".to_string(),
name: "Example".to_string(),
channel: Some("beta".to_string()),
videoCount: 9,
..UploaderProfile::default()
};
assert!(uploader_match_sort_key(&b) > uploader_match_sort_key(&a));
}
#[test]
fn detects_http_and_https_query_urls() {
assert_eq!(
normalize_query_url(" https://www.freeuseporn.com/video/9579/example "),
Some("https://www.freeuseporn.com/video/9579/example".to_string())
);
assert_eq!(
normalize_query_url("http://example.com/video"),
Some("http://example.com/video".to_string())
);
assert_eq!(normalize_query_url("Nicole Kitt"), None);
assert_eq!(normalize_query_url("ftp://example.com/video"), None);
}
#[test]
fn builds_video_item_from_ytdlp_payload() {
let payload = serde_json::json!({
"id": "9579",
"title": "Nicole Kitt - Example",
"webpage_url": "https://www.freeuseporn.com/video/9579/nicole-kitt-example",
"thumbnail": "https://www.freeuseporn.com/media/videos/tmb/9579/1.jpg",
"duration": 3549,
"view_count": 52180,
"uploader": "FreeusePorn",
"formats": [
{
"url": "https://www.freeuseporn.com/media/videos/h264/9579_720p.mp4",
"format_id": "720p",
"format_note": "720p",
"ext": "mp4"
},
{
"url": "https://www.freeuseporn.com/media/videos/h264/9579_480p.mp4",
"format_id": "480p",
"ext": "mp4"
}
]
});
let item = video_item_from_ytdlp_payload(
"freeuseporn",
"https://www.freeuseporn.com/video/9579/nicole-kitt-example",
&payload,
)
.expect("item should parse");
assert_eq!(item.id, "9579");
assert_eq!(item.title, "Nicole Kitt - Example");
assert_eq!(
item.url,
"https://www.freeuseporn.com/video/9579/nicole-kitt-example"
);
assert_eq!(item.views, Some(52180));
assert_eq!(item.uploader.as_deref(), Some("FreeusePorn"));
assert_eq!(item.formats.as_ref().map(|formats| formats.len()), Some(2));
}
}

View File

@@ -1,10 +1,17 @@
use crate::models::DBVideo;
use diesel::prelude::*;
#[cfg(any(
not(hottub_single_provider),
hottub_provider = "hanime",
hottub_provider = "hentaihaven",
hottub_provider = "missav",
hottub_provider = "perverzija",
))]
pub fn get_video(
conn: &mut SqliteConnection,
video_id: String,
) -> Result<Option<String>, diesel::result::Error> {
use crate::models::DBVideo;
use crate::schema::videos::dsl::*;
let result = videos
.filter(id.eq(video_id))
@@ -16,11 +23,19 @@ pub fn get_video(
}
}
#[cfg(any(
not(hottub_single_provider),
hottub_provider = "hanime",
hottub_provider = "hentaihaven",
hottub_provider = "missav",
hottub_provider = "perverzija",
))]
pub fn insert_video(
conn: &mut SqliteConnection,
new_id: &str,
new_url: &str,
) -> Result<usize, diesel::result::Error> {
use crate::models::DBVideo;
use crate::schema::videos::dsl::*;
diesel::insert_into(videos)
.values(DBVideo {
@@ -30,6 +45,13 @@ pub fn insert_video(
.execute(conn)
}
#[cfg(any(
not(hottub_single_provider),
hottub_provider = "hanime",
hottub_provider = "hentaihaven",
hottub_provider = "missav",
hottub_provider = "perverzija",
))]
pub fn delete_video(
conn: &mut SqliteConnection,
video_id: String,

View File

@@ -19,6 +19,7 @@ mod proxies;
mod proxy;
mod schema;
mod status;
mod uploaders;
mod util;
mod videos;
@@ -39,6 +40,11 @@ async fn main() -> std::io::Result<()> {
}
}
env_logger::init(); // You need this to actually see logs
crate::flow_debug!(
"startup begin rust_log={} debug_compiled={}",
std::env::var("RUST_LOG").unwrap_or_else(|_| "unset".to_string()),
cfg!(feature = "debug")
);
// set up database connection pool
let connspec = std::env::var("DATABASE_URL").expect("DATABASE_URL");
@@ -46,15 +52,24 @@ async fn main() -> std::io::Result<()> {
let pool = r2d2::Pool::builder()
.build(manager)
.expect("Failed to create pool.");
crate::flow_debug!(
"database pool ready database_url={}",
crate::util::flow_debug::preview(&connspec, 96)
);
let mut requester = util::requester::Requester::new();
requester.set_proxy(env::var("PROXY").unwrap_or("0".to_string()) != "0".to_string());
let proxy_enabled = env::var("PROXY").unwrap_or("0".to_string()) != "0".to_string();
requester.set_proxy(proxy_enabled);
crate::flow_debug!("requester initialized proxy_enabled={}", proxy_enabled);
let cache: util::cache::VideoCache = crate::util::cache::VideoCache::new()
.max_size(100_000)
.to_owned();
crate::flow_debug!("video cache initialized max_size=100000");
let _ = providers::configure_runtime_validation(pool.clone(), cache.clone(), requester.clone());
thread::spawn(move || {
crate::flow_debug!("provider init thread spawned");
// Create a tiny runtime just for these async tasks
let rt = tokio::runtime::Builder::new_current_thread()
.enable_all()
@@ -62,10 +77,13 @@ async fn main() -> std::io::Result<()> {
.expect("build tokio runtime");
rt.block_on(async move {
crate::flow_debug!("provider init begin");
providers::init_providers_now();
crate::flow_debug!("provider init complete");
});
});
crate::flow_debug!("http server binding addr=0.0.0.0:18080 workers=8");
web::HttpServer::new(move || {
web::App::new()
.state(pool.clone())

View File

@@ -14,6 +14,12 @@ use futures::stream::FuturesUnordered;
use std::fs;
use std::time::Duration;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "meta-search",
tags: &["aggregator", "multi-site", "search"],
};
error_chain! {
foreign_links {
Io(std::io::Error);
@@ -22,7 +28,6 @@ error_chain! {
}
#[derive(Debug, Clone)]
#[allow(dead_code)]
pub struct AllProvider {}
impl AllProvider {

View File

@@ -14,6 +14,12 @@ use std::thread;
use std::time::Duration;
use std::vec;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "mainstream-tube",
tags: &["mainstream", "clips", "mixed"],
};
error_chain! {
foreign_links {
Io(std::io::Error);

View File

@@ -8,6 +8,12 @@ use async_trait::async_trait;
use error_chain::error_chain;
use htmlentity::entity::{ICodedDataTrait, decode};
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "live-cams",
tags: &["live", "cams", "amateur"],
};
error_chain! {
foreign_links {
Io(std::io::Error);

View File

@@ -13,6 +13,12 @@ use std::sync::{Arc, RwLock};
use std::thread;
use std::vec;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "studio-network",
tags: &["tube", "networked", "mixed"],
};
error_chain! {
foreign_links {
Io(std::io::Error);

View File

@@ -0,0 +1,611 @@
use crate::DbPool;
use crate::api::ClientVersion;
use crate::providers::{Provider, report_provider_error, requester_or_default};
use crate::status::*;
use crate::util::cache::VideoCache;
use crate::util::parse_abbreviated_number;
use crate::util::time::parse_time_to_seconds;
use crate::videos::{ServerOptions, VideoFormat, VideoItem};
use async_trait::async_trait;
use error_chain::error_chain;
use htmlentity::entity::{ICodedDataTrait, decode};
use scraper::{Html, Selector};
use std::collections::HashSet;
use std::vec;
use url::form_urlencoded::Serializer;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "fetish-kink",
tags: &["freeuse", "hypno", "mind-control"],
};
error_chain! {
foreign_links {
Io(std::io::Error);
HttpRequest(wreq::Error);
}
}
#[derive(Debug, Clone)]
pub struct FreeusepornProvider {
url: String,
}
impl FreeusepornProvider {
pub fn new() -> Self {
Self {
url: "https://www.freeuseporn.com".to_string(),
}
}
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
Channel {
id: "freeuseporn".to_string(),
name: "FreeusePorn".to_string(),
description: "FreeusePorn streams freeuse, hypno, mind control, ignored sex, and related fetish videos.".to_string(),
premium: false,
favicon: "https://www.google.com/s2/favicons?sz=64&domain=freeuseporn.com".to_string(),
status: "active".to_string(),
categories: vec![],
options: vec![
ChannelOption {
id: "sort".to_string(),
title: "Sort".to_string(),
description: "Sort the videos".to_string(),
systemImage: "list.number".to_string(),
colorName: "blue".to_string(),
options: vec![
FilterOption {
id: "recent".to_string(),
title: "Most Recent".to_string(),
},
FilterOption {
id: "viewed".to_string(),
title: "Most Viewed".to_string(),
},
FilterOption {
id: "rated".to_string(),
title: "Top Rated".to_string(),
},
FilterOption {
id: "favorites".to_string(),
title: "Top Favorites".to_string(),
},
FilterOption {
id: "watched".to_string(),
title: "Being Watched".to_string(),
},
],
multiSelect: false,
},
ChannelOption {
id: "category".to_string(),
title: "Category".to_string(),
description: "Filter by category".to_string(),
systemImage: "square.grid.2x2".to_string(),
colorName: "orange".to_string(),
options: vec![
FilterOption {
id: "all".to_string(),
title: "All".to_string(),
},
FilterOption {
id: "mind-control".to_string(),
title: "Mind Control".to_string(),
},
FilterOption {
id: "general-freeuse".to_string(),
title: "General Freeuse".to_string(),
},
FilterOption {
id: "free-service".to_string(),
title: "Free Service".to_string(),
},
FilterOption {
id: "forced".to_string(),
title: "Forced".to_string(),
},
FilterOption {
id: "japanese".to_string(),
title: "Japanese".to_string(),
},
FilterOption {
id: "time-stop".to_string(),
title: "Time Stop".to_string(),
},
FilterOption {
id: "ignored-sex".to_string(),
title: "Ignored Sex".to_string(),
},
FilterOption {
id: "glory-hole".to_string(),
title: "Glory Hole".to_string(),
},
],
multiSelect: false,
},
],
nsfw: true,
cacheDuration: Some(1800),
}
}
fn absolute_url(&self, url: &str) -> String {
if url.starts_with("http://") || url.starts_with("https://") {
url.to_string()
} else if url.starts_with('/') {
format!("{}{}", self.url, url)
} else {
format!("{}/{}", self.url, url.trim_start_matches('/'))
}
}
fn sort_param(sort: &str) -> &'static str {
match sort {
"viewed" => "mv",
"rated" => "tr",
"favorites" => "tf",
"watched" => "bw",
_ => "mr",
}
}
fn append_sort_and_page(&self, base_url: &str, sort: &str, page: u8) -> String {
let mut params = vec![format!("o={}", Self::sort_param(sort))];
if page > 1 {
params.push(format!("page={page}"));
}
if params.is_empty() {
return base_url.to_string();
}
let separator = if base_url.contains('?') { "&" } else { "?" };
format!("{base_url}{separator}{}", params.join("&"))
}
fn build_list_url(&self, sort: &str, page: u8, category: Option<&str>) -> String {
let path = if let Some(category) = category
.map(str::trim)
.filter(|value| !value.is_empty() && *value != "all")
{
format!("/videos/{}", category)
} else {
"/videos".to_string()
};
let base_url = format!("{}{}", self.url, path);
self.append_sort_and_page(&base_url, sort, page)
}
fn build_search_request_body(query: &str) -> String {
let mut serializer = Serializer::new(String::new());
serializer.append_pair("search_query", query);
serializer.finish()
}
async fn resolve_search_url(&self, query: &str, options: &ServerOptions) -> Result<String> {
let search_url = format!("{}/search/videos", self.url);
let search_body = Self::build_search_request_body(query);
let referer = format!("{}/videos", self.url);
let mut requester = requester_or_default(options, module_path!(), "missing_requester");
let response = requester
.post(
&search_url,
&search_body,
vec![
("Content-Type", "application/x-www-form-urlencoded"),
("Referer", referer.as_str()),
],
)
.await
.map_err(|error| format!("search submit failed url={search_url}; error={error}"))?;
Ok(response.uri().to_string().trim_end_matches('/').to_string())
}
fn build_formats(&self, id: &str) -> Vec<VideoFormat> {
let hd = VideoFormat::new(
format!("{}/media/videos/h264/{}_720p.mp4", self.url, id),
"720p".to_string(),
"video/mp4".to_string(),
)
.format_id("720p".to_string())
.format_note("720p".to_string());
let sd = VideoFormat::new(
format!("{}/media/videos/h264/{}_480p.mp4", self.url, id),
"480p".to_string(),
"video/mp4".to_string(),
)
.format_id("480p".to_string())
.format_note("480p".to_string());
vec![hd, sd]
}
fn normalized_text(text: &str) -> String {
text.split_whitespace().collect::<Vec<_>>().join(" ")
}
fn decode_text(value: &str) -> String {
decode(value.as_bytes())
.to_string()
.unwrap_or_else(|_| value.to_string())
}
fn parse_views(value: &str) -> Option<u32> {
let digits = value
.chars()
.filter(|character| character.is_ascii_digit() || *character == '.' || *character == 'K' || *character == 'M' || *character == 'B' || *character == 'k' || *character == 'm' || *character == 'b')
.collect::<String>();
if digits.is_empty() {
return None;
}
parse_abbreviated_number(&digits).map(|views| views as u32)
}
fn parse_rating(value: &str) -> Option<f32> {
value
.trim()
.trim_end_matches('%')
.parse::<f32>()
.ok()
}
fn parse_video_item_from_anchor(
&self,
anchor: scraper::ElementRef<'_>,
selectors: &FreeusepornSelectors,
) -> Option<VideoItem> {
let href = anchor.value().attr("href")?;
if !href.contains("/video/") {
return None;
}
let absolute_url = self.absolute_url(href);
let id = absolute_url.split('/').nth(4)?.to_string();
if id.is_empty() {
return None;
}
let title_raw = anchor
.select(&selectors.title)
.next()
.map(|element| Self::normalized_text(&element.text().collect::<Vec<_>>().join(" ")))
.filter(|value| !value.is_empty())
.or_else(|| anchor.value().attr("title").map(Self::normalized_text))
.or_else(|| {
anchor
.select(&selectors.image)
.next()
.and_then(|element| element.value().attr("alt"))
.map(Self::normalized_text)
})?;
let title = Self::decode_text(&title_raw);
let thumb = anchor
.select(&selectors.image)
.next()
.and_then(|element| element.value().attr("src"))
.map(|src| self.absolute_url(src))
.unwrap_or_default();
let duration = anchor
.select(&selectors.duration)
.next()
.map(|element| Self::normalized_text(&element.text().collect::<Vec<_>>().join(" ")))
.and_then(|value| parse_time_to_seconds(&value))
.unwrap_or(0) as u32;
let mut stats = anchor
.select(&selectors.video_stat)
.map(|element| Self::normalized_text(&element.text().collect::<Vec<_>>().join(" ")))
.collect::<Vec<_>>();
stats.retain(|value| !value.is_empty());
let views = stats.first().and_then(|value| Self::parse_views(value));
let rating = stats.get(1).and_then(|value| Self::parse_rating(value));
let mut item = VideoItem::new(
id.clone(),
title,
absolute_url,
"freeuseporn".to_string(),
thumb,
duration,
)
.views(views.unwrap_or(0));
if views.is_none() {
item.views = None;
}
item.rating = rating;
item.formats = Some(self.build_formats(&id));
Some(item)
}
fn get_video_items_from_html(&self, html: &str) -> Vec<VideoItem> {
if html.trim().is_empty() {
return vec![];
}
let document = Html::parse_document(html);
let selectors = FreeusepornSelectors::new();
let primary_anchors = document
.select(&selectors.list_anchor)
.collect::<Vec<_>>();
let anchors = if primary_anchors.is_empty() {
document
.select(&selectors.fallback_anchor)
.collect::<Vec<_>>()
} else {
primary_anchors
};
let mut seen = HashSet::new();
let mut items = Vec::new();
for anchor in anchors {
let Some(item) = self.parse_video_item_from_anchor(anchor, &selectors) else {
continue;
};
if seen.insert(item.id.clone()) {
items.push(item);
}
}
items
}
async fn fetch_listing(
&self,
cache: VideoCache,
url: String,
options: ServerOptions,
error_context: &str,
) -> Result<Vec<VideoItem>> {
let old_items = match cache.get(&url) {
Some((time, items)) => {
if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 {
return Ok(items.clone());
}
items.clone()
}
None => vec![],
};
let mut requester = requester_or_default(&options, module_path!(), "missing_requester");
let text = match requester.get(&url, None).await {
Ok(text) => text,
Err(error) => {
report_provider_error(
"freeuseporn",
error_context,
&format!("url={url}; error={error}"),
)
.await;
return Ok(old_items);
}
};
let items = self.get_video_items_from_html(&text);
if items.is_empty() {
return Ok(old_items);
}
cache.remove(&url);
cache.insert(url, items.clone());
Ok(items)
}
async fn get(
&self,
cache: VideoCache,
page: u8,
sort: &str,
options: ServerOptions,
) -> Result<Vec<VideoItem>> {
let url = self.build_list_url(sort, page, options.category.as_deref());
self.fetch_listing(cache, url, options, "get.request").await
}
async fn query(
&self,
cache: VideoCache,
page: u8,
query: &str,
sort: &str,
options: ServerOptions,
) -> Result<Vec<VideoItem>> {
let search_base = match self.resolve_search_url(query, &options).await {
Ok(url) => url,
Err(error) => {
report_provider_error(
"freeuseporn",
"query.search_submit",
&error.to_string(),
)
.await;
return Ok(vec![]);
}
};
let url = self.append_sort_and_page(&search_base, sort, page);
self.fetch_listing(cache, url, options, "query.request").await
}
}
struct FreeusepornSelectors {
list_anchor: Selector,
fallback_anchor: Selector,
title: Selector,
image: Selector,
duration: Selector,
video_stat: Selector,
}
impl FreeusepornSelectors {
fn new() -> Self {
Self {
list_anchor: Selector::parse("#videos-list a[href]").expect("valid freeuseporn list selector"),
fallback_anchor: Selector::parse("a[href]").expect("valid freeuseporn fallback selector"),
title: Selector::parse(".v-name").expect("valid freeuseporn title selector"),
image: Selector::parse("img").expect("valid freeuseporn image selector"),
duration: Selector::parse(".duration").expect("valid freeuseporn duration selector"),
video_stat: Selector::parse(".video-stats li").expect("valid freeuseporn stats selector"),
}
}
}
#[async_trait]
impl Provider for FreeusepornProvider {
async fn get_videos(
&self,
cache: VideoCache,
_pool: DbPool,
sort: String,
query: Option<String>,
page: String,
_per_page: String,
options: ServerOptions,
) -> Vec<VideoItem> {
let page = page.parse::<u8>().unwrap_or(1);
let videos = match query {
Some(query) => self.query(cache, page, &query, &sort, options).await,
None => self.get(cache, page, &sort, options).await,
};
match videos {
Ok(items) => items,
Err(error) => {
eprintln!("freeuseporn provider error: {error}");
vec![]
}
}
}
fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
Some(self.build_channel(clientversion))
}
}
#[cfg(test)]
mod tests {
use super::*;
fn provider() -> FreeusepornProvider {
FreeusepornProvider::new()
}
#[test]
fn builds_listing_urls_for_sort_category_and_search() {
let provider = provider();
assert_eq!(
provider.build_list_url("recent", 1, None),
"https://www.freeuseporn.com/videos?o=mr"
);
assert_eq!(
provider.build_list_url("viewed", 2, Some("mind-control")),
"https://www.freeuseporn.com/videos/mind-control?o=mv&page=2"
);
assert_eq!(
provider.append_sort_and_page(
"https://www.freeuseporn.com/search/videos/Nicole-Kitt",
"favorites",
3
),
"https://www.freeuseporn.com/search/videos/Nicole-Kitt?o=tf&page=3"
);
}
#[test]
fn builds_search_request_body_with_form_encoding() {
assert_eq!(
FreeusepornProvider::build_search_request_body("Nicole Kitt & Cory Chase"),
"search_query=Nicole+Kitt+%26+Cory+Chase"
);
}
#[test]
fn parses_listing_items_and_builds_formats() {
let provider = provider();
let html = r#"
<ul class="grid" id="videos-list">
<li>
<div class="item">
<div class="thumbnail">
<div class="embed">
<iframe src="https://ads.example"></iframe>
</div>
</div>
</div>
</li>
<li>
<a href="/video/9579/nicole-kitt-shady-slut-keeps-confessing" class="thumb-wrap-link">
<div class="item">
<div class="thumbnail overlay" id="playvthumb_9579">
<div class="sub-data">
<span class="duration">59:09</span>
</div>
<img src="https://www.freeuseporn.com/media/videos/tmb/9579/1.jpg" alt="Nicole Kitt &amp; The Truth"/>
</div>
<div class="info">
<span class="v-name">Nicole Kitt &amp; The Truth</span>
<ul class="video-stats">
<li><i class="far fa-eye"></i>52180</li>
<li><i class="far fa-heart"></i>100%</li>
</ul>
</div>
</div>
</a>
</li>
<li>
<a href="https://www.freeuseporn.com/video/9578/lollipop-time-stop-2">
<div class="item">
<div class="thumbnail overlay">
<div class="sub-data">
<span class="duration">16:27</span>
</div>
<img src="https://www.freeuseporn.com/media/videos/tmb/9578/1.jpg" alt="Lollipop time stop 2"/>
</div>
<div class="info">
<span class="v-name">Lollipop time stop 2</span>
<ul class="video-stats">
<li><i class="far fa-eye"></i>35058</li>
<li><i class="far fa-heart"></i>88%</li>
</ul>
</div>
</div>
</a>
</li>
</ul>
"#;
let items = provider.get_video_items_from_html(html);
assert_eq!(items.len(), 2);
assert_eq!(items[0].id, "9579");
assert_eq!(items[0].title, "Nicole Kitt & The Truth");
assert_eq!(
items[0].url,
"https://www.freeuseporn.com/video/9579/nicole-kitt-shady-slut-keeps-confessing"
);
assert_eq!(
items[0].thumb,
"https://www.freeuseporn.com/media/videos/tmb/9579/1.jpg"
);
assert_eq!(items[0].duration, 3549);
assert_eq!(items[0].views, Some(52180));
assert_eq!(items[0].rating, Some(100.0));
assert_eq!(items[0].formats.as_ref().map(|formats| formats.len()), Some(2));
assert_eq!(
items[0]
.formats
.as_ref()
.and_then(|formats| formats.first())
.map(|format| format.url.as_str()),
Some("https://www.freeuseporn.com/media/videos/h264/9579_720p.mp4")
);
assert_eq!(items[1].id, "9578");
assert_eq!(items[1].rating, Some(88.0));
}
}

View File

@@ -12,6 +12,12 @@ use crate::status::*;
use crate::util::cache::VideoCache;
use crate::videos::{self, ServerOptions, VideoItem};
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "hentai-animation",
tags: &["hentai", "anime", "premium"],
};
error_chain! {
foreign_links {
Io(std::io::Error);
@@ -31,7 +37,6 @@ struct HanimeSearchRequest {
page: u8,
}
#[allow(dead_code)]
impl HanimeSearchRequest {
pub fn new() -> Self {
HanimeSearchRequest {
@@ -45,26 +50,10 @@ impl HanimeSearchRequest {
page: 0,
}
}
pub fn tags(mut self, tags: Vec<String>) -> Self {
self.tags = tags;
self
}
pub fn search_text(mut self, search_text: String) -> Self {
self.search_text = search_text;
self
}
pub fn tags_mode(mut self, tags_mode: String) -> Self {
self.tags_mode = tags_mode;
self
}
pub fn brands(mut self, brands: Vec<String>) -> Self {
self.brands = brands;
self
}
pub fn blacklist(mut self, blacklist: Vec<String>) -> Self {
self.blacklist = blacklist;
self
}
pub fn order_by(mut self, order_by: String) -> Self {
self.order_by = order_by;
self
@@ -114,16 +103,11 @@ struct HanimeSearchResult {
}
#[derive(Debug, Clone)]
#[allow(dead_code)]
pub struct HanimeProvider {
url: String,
}
pub struct HanimeProvider;
impl HanimeProvider {
pub fn new() -> Self {
HanimeProvider {
url: "https://hanime.tv/".to_string(),
}
HanimeProvider
}
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {

1261
src/providers/heavyfetish.rs Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -16,6 +16,12 @@ use std::vec;
use titlecase::Titlecase;
use wreq::Version;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "hentai-animation",
tags: &["hentai", "anime", "curated"],
};
error_chain! {
foreign_links {
Io(std::io::Error);
@@ -195,7 +201,12 @@ impl HentaihavenProvider {
let block = match html
.split("previouspostslink")
.next()
.and_then(|s| s.split("vraven_manga_list").nth(1))
.and_then(|s| {
s.split("vraven_manga_list").nth(1).or_else(|| {
s.find(r#"<div class="page-content-listing item-big_thumbnail">"#)
.map(|idx| &s[idx..])
})
})
{
Some(b) => b,
None => {

View File

@@ -14,6 +14,12 @@ use std::vec;
use wreq::Client;
use wreq_util::Emulation;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "gay-male",
tags: &["gay", "male", "tube"],
};
error_chain! {
foreign_links {
Io(std::io::Error);

View File

@@ -15,6 +15,12 @@ use std::sync::{Arc, RwLock};
use std::{thread, vec};
use titlecase::Titlecase;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "studio-network",
tags: &["studio", "hd", "scenes"],
};
error_chain! {
foreign_links {
Io(std::io::Error);

1368
src/providers/hsex.rs Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -16,6 +16,12 @@ use std::{thread, vec};
use titlecase::Titlecase;
use wreq::Version;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "fetish-kink",
tags: &["hypnosis", "fetish", "sissy"],
};
error_chain! {
foreign_links {
Io(std::io::Error);

View File

@@ -17,6 +17,12 @@ use std::vec;
use titlecase::Titlecase;
use wreq::Version;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "jav",
tags: &["jav", "asian", "streaming"],
};
error_chain! {
foreign_links {
Io(std::io::Error);
@@ -356,13 +362,18 @@ impl JavtifulProvider {
.unwrap_or("")
.to_string();
let duration = parse_time_to_seconds(&raw_duration).unwrap_or(0) as u32;
let (tags, formats, views) = self
let (tags, mut formats, views) = self
.extract_media(&video_url, &mut requester, options)
.await?;
if preview.len() == 0 {
preview = format!("https://trailers.jav.si/preview/{id}.mp4");
}
if formats.is_empty() && !preview.is_empty() {
let mut format = VideoFormat::new(preview.clone(), "preview".to_string(), "video/mp4".to_string());
format.add_http_header("Referer".to_string(), video_url.clone());
formats.push(format);
}
let video_item = VideoItem::new(id, title, video_url, "javtiful".into(), thumb, duration)
.formats(formats)
.tags(tags)
@@ -422,16 +433,55 @@ impl JavtifulProvider {
.unwrap_or(0);
let quality = "1080p".to_string();
let video_url = crate::providers::build_proxy_url(
options,
"javtiful",
&crate::providers::strip_url_scheme(url),
);
Ok((
tags,
vec![VideoFormat::new(video_url, quality, "video/mp4".into())],
views,
))
let mut formats = Vec::new();
let video_id = url
.split("/video/")
.nth(1)
.and_then(|value| value.split('/').next())
.unwrap_or("")
.trim();
let token = text
.split("data-csrf-token=\"")
.nth(1)
.and_then(|value| value.split('"').next())
.unwrap_or("")
.trim();
if !video_id.is_empty() && !token.is_empty() {
let form = wreq::multipart::Form::new()
.text("video_id", video_id.to_string())
.text("pid_c", "".to_string())
.text("token", token.to_string());
if let Ok(response) = requester
.post_multipart(
"https://javtiful.com/ajax/get_cdn",
form,
vec![("Referer".to_string(), url.to_string())],
Some(Version::HTTP_11),
)
.await
{
let payload = response.text().await.unwrap_or_default();
if let Ok(json) = serde_json::from_str::<serde_json::Value>(&payload) {
if let Some(cdn_url) = json.get("playlists").and_then(|value| value.as_str()) {
if !cdn_url.trim().is_empty() {
let mut format = VideoFormat::new(
cdn_url.to_string(),
quality.clone(),
"m3u8".into(),
);
format.add_http_header("Referer".to_string(), url.to_string());
formats.push(format);
}
}
}
}
}
let _ = options;
Ok((tags, formats, views))
}
}

View File

@@ -6,6 +6,7 @@ use crate::status::*;
use crate::util::cache::VideoCache;
use crate::util::discord::{format_error_chain, send_discord_error_report};
use crate::util::requester::Requester;
use crate::videos::VideoFormat;
use crate::videos::ServerOptions;
use crate::videos::VideoItem;
use async_trait::async_trait;
@@ -16,6 +17,12 @@ use htmlentity::entity::{ICodedDataTrait, decode};
use std::vec;
use wreq::Version;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "jav",
tags: &["jav", "asian", "uncensored"],
};
error_chain! {
foreign_links {
Io(std::io::Error);
@@ -416,13 +423,15 @@ impl MissavProvider {
})()
.ok_or_else(|| ErrorKind::ParsingError(format!("video_url\n{:?}", vid).to_string()))?;
let video_item =
VideoItem::new(id, title, video_url, "missav".to_string(), thumb, duration)
.tags(tags)
.preview(format!(
"https://fourhoi.com/{}/preview.mp4",
url_str.split('/').last().unwrap_or_default()
));
let mut format = VideoFormat::new(video_url.clone(), "auto".to_string(), "m3u8".to_string());
format.add_http_header("Referer".to_string(), "https://missav.ws/".to_string());
let video_item = VideoItem::new(id, title, video_url, "missav".to_string(), thumb, duration)
.formats(vec![format])
.tags(tags)
.preview(format!(
"https://fourhoi.com/{}/preview.mp4",
url_str.split('/').last().unwrap_or_default()
));
// 5. Cache to DB
if let Ok(mut conn) = pool.get() {

File diff suppressed because it is too large Load Diff

View File

@@ -10,10 +10,19 @@ use crate::videos::{ServerOptions, VideoFormat, VideoItem};
use async_trait::async_trait;
use error_chain::error_chain;
use htmlentity::entity::{ICodedDataTrait, decode};
use regex::Regex;
use std::net::IpAddr;
use std::vec;
use titlecase::Titlecase;
use url::Url;
use wreq::Version;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "mainstream-tube",
tags: &["search", "mixed", "user-upload"],
};
error_chain! {
foreign_links {
Io(std::io::Error);
@@ -45,21 +54,230 @@ impl NoodlemagazineProvider {
favicon: "https://www.google.com/s2/favicons?sz=64&domain=noodlemagazine.com".into(),
status: "active".into(),
categories: vec![],
options: vec![],
options: vec![
ChannelOption {
id: "category".into(),
title: "Popular Period".into(),
description: "Pick which popular feed to browse.".into(),
systemImage: "clock".into(),
colorName: "blue".into(),
options: vec![
FilterOption {
id: "recent".into(),
title: "Recent".into(),
},
FilterOption {
id: "week".into(),
title: "This Week".into(),
},
FilterOption {
id: "month".into(),
title: "This Month".into(),
},
],
multiSelect: false,
},
ChannelOption {
id: "sort".into(),
title: "Sort By".into(),
description: "Sort popular feed results.".into(),
systemImage: "arrow.up.arrow.down".into(),
colorName: "orange".into(),
options: vec![
FilterOption {
id: "views".into(),
title: "Views".into(),
},
FilterOption {
id: "date".into(),
title: "Newest".into(),
},
FilterOption {
id: "duration".into(),
title: "Duration".into(),
},
],
multiSelect: false,
},
ChannelOption {
id: "filter".into(),
title: "Order".into(),
description: "Ascending or descending order.".into(),
systemImage: "list.number".into(),
colorName: "green".into(),
options: vec![
FilterOption {
id: "desc".into(),
title: "Descending".into(),
},
FilterOption {
id: "asc".into(),
title: "Ascending".into(),
},
],
multiSelect: false,
},
],
nsfw: true,
cacheDuration: Some(1800),
}
}
fn resolve_popular_period(options: &ServerOptions) -> &'static str {
match options.category.as_deref() {
Some("week") => "week",
Some("month") => "month",
// The upstream site does not expose a valid /popular/all route.
// Keep "all" as a backward-compatible alias for stale clients.
Some("all") => "recent",
_ => "recent",
}
}
fn resolve_sort_by(sort: &str, options: &ServerOptions) -> &'static str {
match options.sort.as_deref().unwrap_or(sort) {
"date" | "new" | "latest" => "date",
"duration" | "length" => "duration",
_ => "views",
}
}
fn resolve_sort_order(options: &ServerOptions) -> &'static str {
match options.filter.as_deref() {
Some("asc") => "asc",
_ => "desc",
}
}
fn mirror_url(url: &str) -> String {
let stripped = url
.strip_prefix("https://")
.or_else(|| url.strip_prefix("http://"))
.unwrap_or(url);
format!("https://r.jina.ai/http://{stripped}")
}
fn looks_like_bot_challenge_or_block(html: &str) -> bool {
let lower = html.to_ascii_lowercase();
lower.contains("just a moment")
|| lower.contains("cf-browser-verification")
|| lower.contains("cf-chl")
|| lower.contains("access restricted")
|| lower.contains("cloudflare")
}
fn parse_markdown_listing_items(
&self,
markdown: &str,
options: &ServerOptions,
) -> Vec<VideoItem> {
let Some(regex) = Regex::new(
r#"(?is)\[\!\[Image\s+\d+:\s*(?P<title>.*?)\]\((?P<thumb>https?://[^)\s]+)\)(?P<meta>.*?)\]\((?P<url>https?://noodlemagazine\.com/watch/[^)\s]+)\)"#,
)
.ok() else {
return vec![];
};
let Some(duration_regex) = Regex::new(r"(?P<duration>\d{1,2}:\d{2}(?::\d{2})?)").ok() else {
return vec![];
};
let Some(views_regex) = Regex::new(r"(?P<views>[0-9]+(?:\.[0-9]+)?[KMB]?)\s+\d{1,2}:\d{2}(?::\d{2})?").ok() else {
return vec![];
};
regex
.captures_iter(markdown)
.filter_map(|caps| {
let title_raw = caps.name("title")?.as_str().trim();
let thumb = caps.name("thumb")?.as_str().trim();
let video_url = caps.name("url")?.as_str().trim();
let meta = caps.name("meta").map(|m| m.as_str()).unwrap_or("");
let parsed_url = Url::parse(video_url).ok()?;
let id = parsed_url
.path_segments()
.and_then(|mut segs| segs.next_back())
.filter(|value| !value.is_empty())
.map(|value| value.to_string())?;
let duration = duration_regex
.captures(meta)
.and_then(|m| m.name("duration").map(|v| v.as_str()))
.and_then(|v| parse_time_to_seconds(v))
.unwrap_or(0) as u32;
let views = views_regex
.captures(meta)
.and_then(|m| m.name("views").map(|v| v.as_str()))
.and_then(|v| parse_abbreviated_number(v.trim()))
.unwrap_or(0);
let title = decode(title_raw.as_bytes())
.to_string()
.unwrap_or_else(|_| title_raw.to_string())
.titlecase();
let proxy_url = self.proxy_url(options, video_url);
let proxied_thumb = self.proxied_thumb(options, thumb);
Some(
VideoItem::new(
id,
title,
proxy_url.clone(),
"noodlemagazine".into(),
proxied_thumb,
duration,
)
.views(views)
.formats(vec![
VideoFormat::new(proxy_url, "auto".into(), "video/mp4".into())
.format_id("auto".into())
.format_note("proxied".into())
.http_header("Referer".into(), video_url.to_string()),
]),
)
})
.collect()
}
async fn fetch_listing_items(
&self,
requester: &mut crate::util::requester::Requester,
page_url: &str,
options: &ServerOptions,
) -> Vec<VideoItem> {
let html = requester
.get(page_url, Some(Version::HTTP_2))
.await
.unwrap_or_default();
let mut items = self.get_video_items_from_html(html.clone(), options);
if !items.is_empty() {
return items;
}
if !Self::looks_like_bot_challenge_or_block(&html) {
return items;
}
let mirror = requester
.get(&Self::mirror_url(page_url), Some(Version::HTTP_11))
.await
.unwrap_or_default();
items = self.parse_markdown_listing_items(&mirror, options);
items
}
async fn get(
&self,
cache: VideoCache,
page: u8,
_sort: &str,
sort: &str,
options: ServerOptions,
) -> Result<Vec<VideoItem>> {
let period = Self::resolve_popular_period(&options);
let sort_by = Self::resolve_sort_by(sort, &options);
let sort_order = Self::resolve_sort_order(&options);
let video_url = format!(
"{}/popular/recent?sort_by=views&sort_order=desc&p={}",
"{}/popular/{period}?sort_by={sort_by}&sort_order={sort_order}&p={}",
self.url,
page.saturating_sub(1)
);
@@ -75,13 +293,9 @@ impl NoodlemagazineProvider {
None => return Ok(old_items),
};
let text = requester
.get(&video_url, Some(Version::HTTP_2))
.await
.unwrap_or_default();
let proxy_base_url = options.public_url_base.as_deref().unwrap_or_default();
let items = self.get_video_items_from_html(text, proxy_base_url);
let items = self
.fetch_listing_items(&mut requester, &video_url, &options)
.await;
if items.is_empty() {
Ok(old_items)
@@ -113,13 +327,9 @@ impl NoodlemagazineProvider {
None => return Ok(old_items),
};
let text = requester
.get(&video_url, Some(Version::HTTP_2))
.await
.unwrap_or_default();
let proxy_base_url = options.public_url_base.as_deref().unwrap_or_default();
let items = self.get_video_items_from_html(text, proxy_base_url);
let items = self
.fetch_listing_items(&mut requester, &video_url, &options)
.await;
if items.is_empty() {
Ok(old_items)
@@ -130,7 +340,7 @@ impl NoodlemagazineProvider {
}
}
fn get_video_items_from_html(&self, html: String, proxy_base_url: &str) -> Vec<VideoItem> {
fn get_video_items_from_html(&self, html: String, options: &ServerOptions) -> Vec<VideoItem> {
if html.is_empty() || html.contains("404 Not Found") {
return vec![];
}
@@ -148,34 +358,137 @@ impl NoodlemagazineProvider {
None => return vec![],
};
list.split("<div class=\"item\">")
list.split("<div class=\"item")
.skip(1)
.filter_map(|segment| {
self.get_video_item(segment.to_string(), proxy_base_url)
.ok()
self.get_video_item(segment.to_string(), options).ok()
})
.collect()
}
fn proxy_url(&self, proxy_base_url: &str, video_url: &str) -> String {
let target = video_url
.strip_prefix("https://")
.or_else(|| video_url.strip_prefix("http://"))
.unwrap_or(video_url)
.trim_start_matches('/');
if proxy_base_url.is_empty() {
return format!("/proxy/noodlemagazine/{target}");
}
format!(
"{}/proxy/noodlemagazine/{}",
proxy_base_url.trim_end_matches('/'),
target
fn proxy_url(&self, options: &ServerOptions, video_url: &str) -> String {
crate::providers::build_proxy_url(
options,
"noodlemagazine",
&crate::providers::strip_url_scheme(video_url),
)
}
fn get_video_item(&self, video_segment: String, proxy_base_url: &str) -> Result<VideoItem> {
fn normalize_thumb_url(&self, thumb: &str) -> String {
let thumb = thumb.trim();
if thumb.is_empty() {
return String::new();
}
if thumb.starts_with("http://") || thumb.starts_with("https://") {
return thumb.to_string();
}
if thumb.starts_with("//") {
return format!("https:{thumb}");
}
if thumb.starts_with('/') {
return format!("{}{}", self.url, thumb);
}
format!("{}/{}", self.url.trim_end_matches('/'), thumb.trim_start_matches('/'))
}
fn has_allowed_image_extension(path: &str) -> bool {
let path = path.to_ascii_lowercase();
[".jpg", ".jpeg", ".png", ".webp", ".avif", ".gif"]
.iter()
.any(|ext| path.ends_with(ext))
}
fn is_known_preview_host(host: &str) -> bool {
let host = host.to_ascii_lowercase();
host.ends_with("pvvstream.pro")
|| host.ends_with("okcdn.ru")
|| host.ends_with("vkuserphoto.ru")
|| host.ends_with("noodlemagazine.com")
}
fn has_preview_signature(url: &Url) -> bool {
let path = url.path().to_ascii_lowercase();
let query = url.query().unwrap_or("").to_ascii_lowercase();
path.contains("/preview/")
|| path.contains("/poster/")
|| path.contains("getvideopreview")
|| query.contains("type=video_thumb")
|| query.contains("keep_aspect_ratio=")
}
fn is_disallowed_thumb_host(host: &str) -> bool {
if host.eq_ignore_ascii_case("localhost") {
return true;
}
match host.parse::<IpAddr>() {
Ok(IpAddr::V4(ip)) => {
ip.is_private()
|| ip.is_loopback()
|| ip.is_link_local()
|| ip.is_broadcast()
|| ip.is_documentation()
|| ip.is_unspecified()
}
Ok(IpAddr::V6(ip)) => {
ip.is_loopback()
|| ip.is_unspecified()
|| ip.is_multicast()
|| ip.is_unique_local()
|| ip.is_unicast_link_local()
}
Err(_) => false,
}
}
fn is_allowed_thumb_url(&self, url: &str) -> bool {
let Some(url) = Url::parse(url).ok() else {
return false;
};
if url.scheme() != "https" {
return false;
}
let Some(host) = url.host_str() else {
return false;
};
if Self::is_disallowed_thumb_host(host) {
return false;
}
if Self::has_allowed_image_extension(url.path()) {
return true;
}
Self::is_known_preview_host(host) && Self::has_preview_signature(&url)
}
fn proxied_thumb(&self, _options: &ServerOptions, thumb: &str) -> String {
let normalized = self.normalize_thumb_url(thumb);
if normalized.is_empty() || !self.is_allowed_thumb_url(&normalized) {
return String::new();
}
let Some(url) = Url::parse(&normalized).ok() else {
return String::new();
};
if url
.host_str()
.is_some_and(|host| host.eq_ignore_ascii_case("img.pvvstream.pro"))
{
return crate::providers::build_proxy_url(
_options,
"noodlemagazine-thumb",
&crate::providers::strip_url_scheme(&normalized),
);
}
normalized
}
fn get_video_item(&self, video_segment: String, options: &ServerOptions) -> Result<VideoItem> {
let href = video_segment
.split("<a href=\"")
.nth(1)
@@ -204,12 +517,17 @@ impl NoodlemagazineProvider {
.ok_or_else(|| Error::from("missing id"))?
.to_string();
let thumb = video_segment
.split("data-src=\"")
.nth(1)
.and_then(|s| s.split('"').next())
.unwrap_or("")
.to_string();
let thumb = Regex::new(
r#"(?i)(?:data-src|data-original|data-webp|src|poster)\s*=\s*"(?P<url>[^"]+)""#,
)
.ok()
.and_then(|regex| {
regex
.captures_iter(&video_segment)
.filter_map(|captures| captures.name("url").map(|value| value.as_str().to_string()))
.find(|candidate| !candidate.starts_with("data:image/"))
})
.unwrap_or_default();
let raw_duration = video_segment
.split("#clock-o\"></use></svg>")
@@ -225,14 +543,15 @@ impl NoodlemagazineProvider {
.and_then(|s| s.split('<').next())
.and_then(|v| parse_abbreviated_number(v.trim()))
.unwrap_or(0);
let proxy_url = self.proxy_url(proxy_base_url, &video_url);
let proxy_url = self.proxy_url(options, &video_url);
let proxied_thumb = self.proxied_thumb(options, &thumb);
Ok(VideoItem::new(
id,
title,
proxy_url.clone(),
"noodlemagazine".into(),
thumb,
proxied_thumb,
duration,
)
.views(views)
@@ -281,16 +600,33 @@ impl Provider for NoodlemagazineProvider {
#[cfg(test)]
mod tests {
use super::NoodlemagazineProvider;
use crate::videos::ServerOptions;
fn options() -> ServerOptions {
ServerOptions {
featured: None,
category: None,
sites: None,
filter: None,
language: None,
public_url_base: Some("https://example.com".to_string()),
requester: None,
network: None,
stars: None,
categories: None,
duration: None,
sort: None,
sexuality: None,
}
}
#[test]
fn rewrites_video_pages_to_hottub_proxy() {
let provider = NoodlemagazineProvider::new();
let options = options();
assert_eq!(
provider.proxy_url(
"https://example.com",
"https://noodlemagazine.com/watch/-123_456"
),
provider.proxy_url(&options, "https://noodlemagazine.com/watch/-123_456"),
"https://example.com/proxy/noodlemagazine/noodlemagazine.com/watch/-123_456"
);
}
@@ -298,11 +634,12 @@ mod tests {
#[test]
fn parses_listing_without_detail_page_requests() {
let provider = NoodlemagazineProvider::new();
let options = options();
let html = r#"
<div class="list_videos" id="list_videos">
<div class="item">
<a href="/watch/-123_456">
<img data-src="https://thumb.example/test.jpg" />
<img data-src="https://noodlemagazine.com/thumbs/test.jpg" />
</a>
<div class="title">sample &amp; title</div>
<svg><use></use></svg>#clock-o"></use></svg>12:34<
@@ -311,13 +648,124 @@ mod tests {
>Show more</div>
"#;
let items = provider.get_video_items_from_html(html.to_string(), "https://example.com");
let items = provider.get_video_items_from_html(html.to_string(), &options);
assert_eq!(items.len(), 1);
assert_eq!(
items[0].url,
"https://example.com/proxy/noodlemagazine/noodlemagazine.com/watch/-123_456"
);
assert_eq!(
items[0].thumb,
"https://noodlemagazine.com/thumbs/test.jpg"
);
assert_eq!(items[0].formats.as_ref().map(|f| f.len()), Some(1));
}
#[test]
fn keeps_https_cdn_thumbs_but_drops_non_images() {
let provider = NoodlemagazineProvider::new();
let options = options();
let html = r#"
<div class="list_videos" id="list_videos">
<div class="item">
<a href="/watch/-123_456">
<img data-src="https://cdn.example/thumb.jpg" />
</a>
<div class="title">sample</div>
<svg><use></use></svg>#clock-o"></use></svg>12:34<
<svg><use></use></svg>#eye"></use></svg>1.2K<
</div>
<div class="item">
<a href="/watch/-555_666">
<img data-src="https://noodlemagazine.com/watch/not-an-image" />
</a>
<div class="title">sample 2</div>
<svg><use></use></svg>#clock-o"></use></svg>00:42<
<svg><use></use></svg>#eye"></use></svg>123<
</div>
>Show more</div>
"#;
let items = provider.get_video_items_from_html(html.to_string(), &options);
assert_eq!(items.len(), 2);
assert_eq!(
items[0].thumb,
"https://cdn.example/thumb.jpg"
);
assert!(items[1].thumb.is_empty());
}
#[test]
fn keeps_preview_urls_without_file_extension() {
let provider = NoodlemagazineProvider::new();
let options = options();
let html = r#"
<div class="list_videos" id="list_videos">
<div class="item">
<a href="/watch/-111_222">
<img data-src="https://img.pvvstream.pro/preview/abc/-111_222/240/iv.okcdn.ru/getVideoPreview?id=1&type=39&fn=vid_l" />
</a>
<div class="title">sample</div>
<svg><use></use></svg>#clock-o"></use></svg>12:34<
<svg><use></use></svg>#eye"></use></svg>1.2K<
</div>
>Show more</div>
"#;
let items = provider.get_video_items_from_html(html.to_string(), &options);
assert_eq!(items.len(), 1);
assert_eq!(
items[0].thumb,
"https://example.com/proxy/noodlemagazine-thumb/img.pvvstream.pro/preview/abc/-111_222/240/iv.okcdn.ru/getVideoPreview?id=1&type=39&fn=vid_l"
);
}
#[test]
fn parses_item_variants_and_alternate_thumb_attributes() {
let provider = NoodlemagazineProvider::new();
let options = options();
let html = r#"
<div class="list_videos" id="list_videos">
<div class="item has-video" data-id="123">
<a href="/watch/-333_444">
<img data-original="https://cdn2.pvvstream.pro/videos/-333/444/preview_320.jpg" />
</a>
<div class="title">sample alt</div>
<svg><use></use></svg>#clock-o"></use></svg>00:42<
<svg><use></use></svg>#eye"></use></svg>123<
</div>
>Show more</div>
"#;
let items = provider.get_video_items_from_html(html.to_string(), &options);
assert_eq!(items.len(), 1);
assert_eq!(
items[0].thumb,
"https://cdn2.pvvstream.pro/videos/-333/444/preview_320.jpg"
);
}
#[test]
fn resolves_popular_filters_for_usability_options() {
let mut options = options();
options.category = Some("month".to_string());
options.sort = Some("date".to_string());
options.filter = Some("asc".to_string());
assert_eq!(NoodlemagazineProvider::resolve_popular_period(&options), "month");
assert_eq!(NoodlemagazineProvider::resolve_sort_by("views", &options), "date");
assert_eq!(NoodlemagazineProvider::resolve_sort_order(&options), "asc");
}
#[test]
fn maps_legacy_all_time_period_to_recent_feed() {
let mut options = options();
options.category = Some("all".to_string());
options.sort = Some("views".to_string());
options.filter = Some("desc".to_string());
assert_eq!(NoodlemagazineProvider::resolve_popular_period(&options), "recent");
}
}

View File

@@ -14,6 +14,12 @@ use std::vec;
use wreq::Client;
use wreq_util::Emulation;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "mainstream-tube",
tags: &["tube", "hd", "mixed"],
};
error_chain! {
foreign_links {
Io(std::io::Error);

View File

@@ -15,6 +15,12 @@ use std::vec;
use wreq::Client;
use wreq_util::Emulation;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "mainstream-tube",
tags: &["tube", "mixed", "search"],
};
error_chain! {
foreign_links {
Io(std::io::Error);

View File

@@ -1,6 +1,12 @@
use crate::DbPool;
use crate::api::ClientVersion;
use crate::providers::{Provider, report_provider_error, report_provider_error_background};
use crate::providers::{
Provider, report_provider_error, report_provider_error_background, requester_or_default,
};
use crate::uploaders::{
UploaderChannelStat, UploaderLayoutRow, UploaderProfile, UploaderVideoRef,
iso_timestamp_from_unix,
};
use crate::util::cache::VideoCache;
use crate::util::parse_abbreviated_number;
use crate::util::time::parse_time_to_seconds;
@@ -9,10 +15,19 @@ use crate::{status::*, util};
use async_trait::async_trait;
use error_chain::error_chain;
use htmlentity::entity::{ICodedDataTrait, decode};
use percent_encoding::{NON_ALPHANUMERIC, percent_decode_str, utf8_percent_encode};
use regex::Regex;
use scraper::{Html, Selector};
use std::sync::{Arc, RwLock};
use std::thread;
use std::vec;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "studio-network",
tags: &["studio", "networks", "models"],
};
error_chain! {
foreign_links {
Io(std::io::Error);
@@ -27,6 +42,20 @@ pub struct OmgxxxProvider {
networks: Arc<RwLock<Vec<FilterOption>>>,
stars: Arc<RwLock<Vec<FilterOption>>>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
enum OmgUploaderTargetKind {
Site,
Network,
}
#[derive(Debug, Clone)]
struct OmgUploaderTarget {
kind: OmgUploaderTargetKind,
id: String,
title: String,
}
impl OmgxxxProvider {
pub fn new() -> Self {
let provider = OmgxxxProvider {
@@ -411,6 +440,371 @@ impl OmgxxxProvider {
}
}
fn canonical_uploader_id(kind: &OmgUploaderTargetKind, id: &str) -> String {
let kind = match kind {
OmgUploaderTargetKind::Site => "site",
OmgUploaderTargetKind::Network => "network",
};
format!(
"omgxxx:{kind}:{}",
utf8_percent_encode(id, NON_ALPHANUMERIC)
)
}
fn uploader_target_from_id(&self, uploader_id: &str) -> Option<OmgUploaderTarget> {
let parts = uploader_id.split(':').collect::<Vec<_>>();
if parts.is_empty() {
return None;
}
let (kind, raw_id) = match parts.as_slice() {
["omgxxx", kind, raw_id] => (*kind, *raw_id),
["omgxxx", raw_id] => ("site", *raw_id),
_ => return None,
};
let decoded_id = percent_decode_str(raw_id).decode_utf8().ok()?.to_string();
match kind {
"site" => self
.sites
.read()
.ok()?
.iter()
.find(|option| option.id == decoded_id)
.map(|option| OmgUploaderTarget {
kind: OmgUploaderTargetKind::Site,
id: option.id.clone(),
title: option.title.clone(),
})
.or_else(|| {
Some(OmgUploaderTarget {
kind: OmgUploaderTargetKind::Site,
id: decoded_id.clone(),
title: decoded_id.clone(),
})
}),
"network" => self
.networks
.read()
.ok()?
.iter()
.find(|option| option.id == decoded_id)
.map(|option| OmgUploaderTarget {
kind: OmgUploaderTargetKind::Network,
id: option.id.clone(),
title: option.title.clone(),
})
.or_else(|| {
Some(OmgUploaderTarget {
kind: OmgUploaderTargetKind::Network,
id: decoded_id.clone(),
title: decoded_id.clone(),
})
}),
_ => None,
}
}
fn uploader_target_from_name(&self, uploader_name: &str) -> Option<OmgUploaderTarget> {
let normalized = uploader_name.trim();
if normalized.is_empty() {
return None;
}
let lowered = normalized.to_ascii_lowercase();
if let Ok(sites) = self.sites.read() {
if let Some(option) = sites.iter().find(|option| {
option.title.eq_ignore_ascii_case(normalized)
|| option.id.eq_ignore_ascii_case(normalized)
|| option.title.to_ascii_lowercase() == lowered
}) {
return Some(OmgUploaderTarget {
kind: OmgUploaderTargetKind::Site,
id: option.id.clone(),
title: option.title.clone(),
});
}
}
if let Ok(networks) = self.networks.read() {
if let Some(option) = networks.iter().find(|option| {
option.title.eq_ignore_ascii_case(normalized)
|| option.id.eq_ignore_ascii_case(normalized)
|| option.title.to_ascii_lowercase() == lowered
}) {
return Some(OmgUploaderTarget {
kind: OmgUploaderTargetKind::Network,
id: option.id.clone(),
title: option.title.clone(),
});
}
}
None
}
fn resolve_uploader_target(
&self,
uploader_id: Option<&str>,
uploader_name: Option<&str>,
) -> Option<OmgUploaderTarget> {
uploader_id
.and_then(|value| self.uploader_target_from_id(value))
.or_else(|| uploader_name.and_then(|value| self.uploader_target_from_name(value)))
}
fn uploader_target_url(&self, target: &OmgUploaderTarget, page: u8) -> String {
let base = match target.kind {
OmgUploaderTargetKind::Site => "sites",
OmgUploaderTargetKind::Network => "networks",
};
format!("{}/{}/{}/{}/", self.url, base, target.id, page.max(1))
}
fn uploader_target_last_page(&self, html: &str, target: &OmgUploaderTarget) -> Option<u8> {
let base = match target.kind {
OmgUploaderTargetKind::Site => "sites",
OmgUploaderTargetKind::Network => "networks",
};
let pattern = format!(r#"/{}/{}/(?P<page>\d+)/"#, regex::escape(base), regex::escape(&target.id));
let regex = Regex::new(&pattern).ok()?;
regex
.captures_iter(html)
.filter_map(|captures| captures.name("page")?.as_str().parse::<u8>().ok())
.max()
}
fn display_name_for_uploader_target(
&self,
target: &OmgUploaderTarget,
html: &str,
first_page_items: &[VideoItem],
) -> String {
if !target.title.trim().is_empty() && target.title != target.id {
return target.title.clone();
}
if let Some(title) = html
.split("<title>")
.nth(1)
.and_then(|segment| segment.split("</title>").next())
.map(|title| decode(title.as_bytes()).to_string().unwrap_or_else(|_| title.to_string()))
.map(|title| title.replace(" Porn! 😮 - OMG.XXX", ""))
.map(|title| title.replace(" - OMG.XXX", ""))
.map(|title| title.trim().to_string())
.filter(|title| !title.is_empty())
{
return title;
}
if let Some(site_name) = first_page_items.iter().find_map(|item| {
item.title
.strip_prefix('[')
.and_then(|title| title.split(']').next())
.map(str::trim)
.filter(|value| !value.is_empty())
.map(ToOwned::to_owned)
}) {
return site_name;
}
target
.id
.split('-')
.map(|part| {
let mut chars = part.chars();
match chars.next() {
Some(first) => first.to_uppercase().collect::<String>() + chars.as_str(),
None => String::new(),
}
})
.collect::<Vec<_>>()
.join(" ")
}
fn rank_uploader_videos(
videos: &[UploaderVideoRef],
query: Option<&str>,
) -> Vec<UploaderVideoRef> {
let Some(query) = query.map(str::trim).filter(|value| !value.is_empty()) else {
return videos.to_vec();
};
let query = query.to_ascii_lowercase();
let mut ranked = videos.to_vec();
ranked.sort_by(|a, b| {
let score = |video: &UploaderVideoRef| {
let mut score = 0u8;
if video.title.to_ascii_lowercase().contains(&query) {
score += 2;
}
if video.uploader.to_ascii_lowercase().contains(&query) {
score += 1;
}
score
};
score(b)
.cmp(&score(a))
.then(b.views.cmp(&a.views))
.then_with(|| a.id.cmp(&b.id))
});
ranked
}
fn uploader_target_from_href_and_title(&self, href: &str, title: &str) -> Option<OmgUploaderTarget> {
if href.contains("/sites/") {
let id = href
.split("/sites/")
.nth(1)
.unwrap_or_default()
.split('/')
.next()
.unwrap_or_default()
.to_string();
if id.is_empty() {
return None;
}
return Some(OmgUploaderTarget {
kind: OmgUploaderTargetKind::Site,
id,
title: title.to_string(),
});
}
if href.contains("/networks/") {
let id = href
.split("/networks/")
.nth(1)
.unwrap_or_default()
.split('/')
.next()
.unwrap_or_default()
.to_string();
if id.is_empty() {
return None;
}
return Some(OmgUploaderTarget {
kind: OmgUploaderTargetKind::Network,
id,
title: title.to_string(),
});
}
None
}
async fn build_uploader_profile(
&self,
_cache: VideoCache,
target: &OmgUploaderTarget,
query: Option<&str>,
profile_content: bool,
options: &ServerOptions,
) -> Result<Option<UploaderProfile>> {
let first_page_url = self.uploader_target_url(target, 1);
let mut requester = requester_or_default(options, module_path!(), "missing_requester");
let first_page_html = match requester.get(&first_page_url, None).await {
Ok(html) => html,
Err(error) => {
return Err(Error::from(format!(
"uploader page request failed url={first_page_url}; error={error}"
)));
}
};
let first_page_items = self.get_video_items_from_html(first_page_html.clone());
if first_page_items.is_empty() {
return Ok(None);
}
let display_name =
self.display_name_for_uploader_target(target, &first_page_html, &first_page_items);
let last_page = self
.uploader_target_last_page(&first_page_html, target)
.unwrap_or(1);
let last_page_items = if last_page > 1 {
let mut requester = requester_or_default(options, module_path!(), "missing_requester");
let html = requester
.get(&self.uploader_target_url(target, last_page), None)
.await
.unwrap_or_default();
self.get_video_items_from_html(html)
} else {
Vec::new()
};
let uploader_id = Self::canonical_uploader_id(&target.kind, &target.id);
let mut video_refs = first_page_items
.iter()
.map(|item| {
let mut video =
UploaderVideoRef::from_video_item(item, &display_name, &uploader_id);
video.uploader = display_name.clone();
video.uploaderId = uploader_id.clone();
video
})
.collect::<Vec<_>>();
let ranked = Self::rank_uploader_videos(&video_refs, query);
let featured_ids = ranked
.iter()
.take(12)
.map(|video| video.id.clone())
.collect::<Vec<_>>();
let video_count = if last_page > 1 {
((last_page as u64 - 1) * first_page_items.len() as u64) + last_page_items.len() as u64
} else {
first_page_items.len() as u64
};
let total_views = first_page_items
.iter()
.chain(last_page_items.iter())
.filter_map(|item| item.views)
.map(u64::from)
.sum();
let newest = first_page_items.iter().filter_map(|item| item.uploadedAt).max();
let oldest = last_page_items
.iter()
.filter_map(|item| item.uploadedAt)
.min()
.or_else(|| first_page_items.iter().filter_map(|item| item.uploadedAt).min());
for video in &mut video_refs {
video.uploader = display_name.clone();
video.uploaderId = uploader_id.clone();
}
let layout = if featured_ids.is_empty() {
vec![UploaderLayoutRow::videos(None)]
} else {
vec![
UploaderLayoutRow::horizontal(Some("For You".to_string()), featured_ids),
UploaderLayoutRow::videos(None),
]
};
Ok(Some(UploaderProfile {
id: uploader_id,
name: display_name,
url: Some(first_page_url),
channel: Some("omgxxx".to_string()),
verified: false,
videoCount: video_count,
totalViews: total_views,
channels: Some(vec![UploaderChannelStat {
channel: "omgxxx".to_string(),
videoCount: video_count,
firstSeenAt: iso_timestamp_from_unix(oldest),
lastSeenAt: iso_timestamp_from_unix(newest),
}]),
avatar: None,
description: None,
bio: None,
videos: profile_content.then_some(video_refs),
tapes: profile_content.then_some(Vec::new()),
playlists: profile_content.then_some(Vec::new()),
layout: Some(layout),
}))
}
async fn get(
&self,
cache: VideoCache,
@@ -457,8 +851,7 @@ impl OmgxxxProvider {
}
};
let mut requester =
crate::providers::requester_or_default(&options, module_path!(), "missing_requester");
let mut requester = requester_or_default(&options, module_path!(), "missing_requester");
let text = match requester.get(&video_url, None).await {
Ok(text) => text,
Err(e) => {
@@ -535,8 +928,7 @@ impl OmgxxxProvider {
}
};
let mut requester =
crate::providers::requester_or_default(&options, module_path!(), "missing_requester");
let mut requester = requester_or_default(&options, module_path!(), "missing_requester");
let text = match requester.get(&video_url, None).await {
Ok(text) => text,
Err(e) => {
@@ -586,6 +978,28 @@ impl OmgxxxProvider {
return None;
}
fn extract_tag_entries(&self, video_segment: &str) -> Vec<(String, String)> {
let fragment = Html::parse_fragment(video_segment);
let selector = Selector::parse("div.models a").expect("valid omgxxx models selector");
fragment
.select(&selector)
.filter_map(|anchor| {
let href = anchor.value().attr("href")?.to_string();
let title = anchor
.text()
.collect::<Vec<_>>()
.join(" ")
.split_whitespace()
.collect::<Vec<_>>()
.join(" ");
if title.is_empty() {
return None;
}
Some((href, title))
})
.collect()
}
fn get_video_items_from_html(&self, html: String) -> Vec<VideoItem> {
if html.is_empty() {
println!("HTML is empty");
@@ -751,76 +1165,87 @@ impl OmgxxxProvider {
let site_id = self
.get_site_id_from_name(site_name)
.unwrap_or("".to_string());
let mut tags = match video_segment.contains("class=\"models\">") {
true => video_segment
.split("class=\"models\">")
.collect::<Vec<&str>>()
.get(1)
.copied()
.unwrap_or_default()
.split("</div>")
.collect::<Vec<&str>>()
.get(0)
.copied()
.unwrap_or_default()
.split("href=\"")
.collect::<Vec<&str>>()[1..]
.into_iter()
.map(|s| {
let mut tags = Vec::new();
let mut site_uploader: Option<OmgUploaderTarget> = None;
let mut network_uploader: Option<OmgUploaderTarget> = None;
for (href, tag_title) in self.extract_tag_entries(video_segment) {
if href.contains("/models/") {
let model_id = href
.split("/models/")
.nth(1)
.unwrap_or_default()
.split('/')
.next()
.unwrap_or_default()
.to_string();
if !model_id.is_empty() {
Self::push_unique(
&self.stars,
FilterOption {
id: s
.split("/")
.collect::<Vec<&str>>()
.get(4)
.copied()
.unwrap_or_default()
.to_string(),
title: s
.split(">")
.collect::<Vec<&str>>()
.get(1)
.copied()
.unwrap_or_default()
.split("<")
.collect::<Vec<&str>>()
.get(0)
.copied()
.unwrap_or_default()
.trim()
.to_string(),
id: model_id,
title: tag_title.clone(),
},
);
s.split(">")
.collect::<Vec<&str>>()
.get(1)
.copied()
.unwrap_or_default()
.split("<")
.collect::<Vec<&str>>()
.get(0)
.copied()
.unwrap_or_default()
.trim()
.to_string()
})
.collect::<Vec<String>>()
.to_vec(),
false => vec![],
};
}
}
if href.contains("/sites/") {
let site_id = href
.split("/sites/")
.nth(1)
.unwrap_or_default()
.split('/')
.next()
.unwrap_or_default()
.to_string();
if !site_id.is_empty() {
Self::push_unique(
&self.sites,
FilterOption {
id: site_id,
title: tag_title.clone(),
},
);
}
}
if let Some(target) = self.uploader_target_from_href_and_title(&href, &tag_title) {
match target.kind {
OmgUploaderTargetKind::Site => {
if site_uploader.is_none() {
site_uploader = Some(target.clone());
}
}
OmgUploaderTargetKind::Network => {
if network_uploader.is_none() {
network_uploader = Some(target.clone());
}
}
}
}
if !tags.iter().any(|existing| existing == &tag_title) {
tags.push(tag_title);
}
}
if !site_id.is_empty() {
Self::push_unique(
&self.sites,
FilterOption {
id: site_id,
id: site_id.clone(),
title: site_name.to_string(),
},
);
tags.push(site_name.to_string());
if !tags.iter().any(|existing| existing == site_name) {
tags.push(site_name.to_string());
}
if site_uploader.is_none() {
site_uploader = Some(OmgUploaderTarget {
kind: OmgUploaderTargetKind::Site,
id: site_id.clone(),
title: site_name.to_string(),
});
}
}
let video_item = VideoItem::new(
let mut video_item = VideoItem::new(
id,
title,
video_url.to_string(),
@@ -829,14 +1254,225 @@ impl OmgxxxProvider {
duration,
)
.views(views)
.preview(preview)
.tags(tags);
.preview(preview);
let uploader_target = site_uploader.or(network_uploader);
if let Some(uploader_target) = uploader_target {
video_item.uploader = Some(uploader_target.title.clone());
video_item.uploaderUrl = Some(format!(
"{}/{}/{}/",
self.url,
match uploader_target.kind {
OmgUploaderTargetKind::Site => "sites",
OmgUploaderTargetKind::Network => "networks",
},
uploader_target.id
));
video_item.uploaderId = Some(Self::canonical_uploader_id(
&uploader_target.kind,
&uploader_target.id,
));
}
if !tags.is_empty() {
video_item.tags = Some(tags);
}
items.push(video_item);
}
return items;
}
}
#[cfg(test)]
mod tests {
use super::*;
fn test_provider() -> OmgxxxProvider {
OmgxxxProvider {
url: "https://www.omg.xxx".to_string(),
sites: Arc::new(RwLock::new(vec![FilterOption {
id: "clubsweethearts".to_string(),
title: "Club Sweethearts".to_string(),
}])),
networks: Arc::new(RwLock::new(vec![FilterOption {
id: "mofos".to_string(),
title: "Club Sweethearts".to_string(),
}])),
stars: Arc::new(RwLock::new(vec![])),
}
}
#[test]
fn uploader_name_prefers_site_before_network() {
let provider = test_provider();
let target = provider
.uploader_target_from_name("Club Sweethearts")
.expect("target should resolve");
assert!(matches!(target.kind, OmgUploaderTargetKind::Site));
assert_eq!(target.id, "clubsweethearts");
}
#[test]
fn uploader_id_round_trips_for_networks() {
let provider = test_provider();
let target = provider
.uploader_target_from_id("omgxxx:network:mofos")
.expect("target should resolve");
assert!(matches!(target.kind, OmgUploaderTargetKind::Network));
assert_eq!(target.id, "mofos");
assert_eq!(
OmgxxxProvider::canonical_uploader_id(&target.kind, &target.id),
"omgxxx:network:mofos"
);
}
#[test]
fn parses_model_and_site_tags_without_empty_strings() {
let provider = test_provider();
let html = r##"
<div class="list-videos">
<div class="item">
<a href="https://www.omg.xxx/videos/4290034/example-video/" title="[Club Sweethearts] Example Video"></a>
<img loading="lazy" data-src="https://cdn.example/thumb.jpg" />
<span class="duration">Duration 12:34</span>
<div class="views">1.2M</div>
<div class="thumb" data-preview="https://cdn.example/preview.mp4"></div>
<div class="models">
<a class="models__item thumb_cs" href="https://www.omg.xxx/sites/clubsweethearts/" style="order: 0;">
<svg class="icon icon-tv"><use xlink:href="#icon-tv"></use></svg>
<span>Club Sweethearts</span>
</a>
<a class="models__item thumb_model" href="https://www.omg.xxx/models/oliver-trunk/" style="order: 0;">
<svg class="icon icon-star"><use xlink:href="#icon-star"></use></svg>
<span>Oliver Trunk</span>
</a>
<a class="models__item thumb_model" href="https://www.omg.xxx/models/sara-bork/" style="order: 0;">
<svg class="icon icon-star"><use xlink:href="#icon-star"></use></svg>
<span>Sara Bork</span>
</a>
</div>
</div>
</div>
"##
.to_string();
let items = provider.get_video_items_from_html(html);
assert_eq!(items.len(), 1);
assert_eq!(
items[0].tags,
Some(vec![
"Club Sweethearts".to_string(),
"Oliver Trunk".to_string(),
"Sara Bork".to_string()
])
);
assert!(
items[0]
.tags
.as_ref()
.unwrap()
.iter()
.all(|tag| !tag.is_empty())
);
let stars = provider.stars.read().unwrap().clone();
assert!(
stars
.iter()
.any(|tag| tag.id == "oliver-trunk" && tag.title == "Oliver Trunk")
);
assert!(
stars
.iter()
.any(|tag| tag.id == "sara-bork" && tag.title == "Sara Bork")
);
assert_eq!(items[0].uploader.as_deref(), Some("Club Sweethearts"));
assert_eq!(
items[0].uploaderUrl.as_deref(),
Some("https://www.omg.xxx/sites/clubsweethearts/")
);
assert_eq!(
items[0].uploaderId.as_deref(),
Some("omgxxx:site:clubsweethearts")
);
}
#[test]
fn parses_live_item_shape_with_channel_and_pornstar_info() {
let provider = test_provider();
let html = r##"
<div class="list-videos">
<div class="item">
<a href="https://www.omg.xxx/videos/93763302/step-daughter-vol-2-scene-3/" target="_blank" title="Step Daughter Vol.2 Scene 3">
<div class="img thumb__img" data-preview="https://cast.omg.xxx/preview/93763302.mp4">
<img loading="lazy" class="thumb lazyloaded" src="https://img.omg.xxx/93763000/93763302/medium@2x/1.jpg" data-src="https://img.omg.xxx/93763000/93763302/medium@2x/1.jpg" alt="Step Daughter Vol.2 Scene 3" width="0" height="0">
<span class="duration"> Full Video 26:44 </span>
<span class="js-favourites thumb-favourites" data-action="add" data-type="video" data-object_id="93763302">
<svg class="icon icon-heart-plus"><use xlink:href="#icon-heart-plus"></use></svg>
<svg class="icon icon-trashcan"><use xlink:href="#icon-trashcan"></use></svg>
</span>
</div>
</a>
<div class="item-info">
<a href="https://www.omg.xxx/videos/93763302/step-daughter-vol-2-scene-3/" title="Step Daughter Vol.2 Scene 3">
<strong class="title"> Step Daughter Vol.2 Scene 3 </strong>
</a>
<div class="models is-truncated">
<a class="models__item thumb_cs" href="https://www.omg.xxx/sites/family-sinners/" style="order: 0;">
<svg class="icon icon-tv"><use xlink:href="#icon-tv"></use></svg>
<span>Family Sinners</span>
</a>
<a class="models__item" href="https://www.omg.xxx/models/vienna-rose/" style="order: 0;">
<svg class="icon icon-star"><use xlink:href="#icon-star"></use></svg>
<span>Vienna Rose</span>
</a>
<a class="models__item" href="https://www.omg.xxx/models/mark-wood/" style="order: 1;">
<svg class="icon icon-star"><use xlink:href="#icon-star"></use></svg>
<span>Mark Wood</span>
</a>
</div>
<div class="wrap">
<div class="rating positive "> 100% </div>
<div class="views">4.8K</div>
</div>
</div>
</div>
</div>
"##
.to_string();
let items = provider.get_video_items_from_html(html);
assert_eq!(items.len(), 1);
assert_eq!(
items[0].tags,
Some(vec![
"Family Sinners".to_string(),
"Vienna Rose".to_string(),
"Mark Wood".to_string()
])
);
let sites = provider.sites.read().unwrap().clone();
assert!(
sites
.iter()
.any(|tag| tag.id == "family-sinners" && tag.title == "Family Sinners")
);
let stars = provider.stars.read().unwrap().clone();
assert!(
stars
.iter()
.any(|tag| tag.id == "vienna-rose" && tag.title == "Vienna Rose")
);
assert!(
stars
.iter()
.any(|tag| tag.id == "mark-wood" && tag.title == "Mark Wood")
);
}
}
#[async_trait]
impl Provider for OmgxxxProvider {
async fn get_videos(
@@ -872,4 +1508,26 @@ impl Provider for OmgxxxProvider {
fn get_channel(&self, clientversion: ClientVersion) -> Option<crate::status::Channel> {
Some(self.build_channel(clientversion))
}
async fn get_uploader(
&self,
cache: VideoCache,
pool: DbPool,
uploader_id: Option<String>,
uploader_name: Option<String>,
query: Option<String>,
profile_content: bool,
options: ServerOptions,
) -> std::result::Result<Option<UploaderProfile>, String> {
let _ = pool;
let Some(target) =
self.resolve_uploader_target(uploader_id.as_deref(), uploader_name.as_deref())
else {
return Ok(None);
};
self.build_uploader_profile(cache, &target, query.as_deref(), profile_content, &options)
.await
.map_err(|error| error.to_string())
}
}

View File

@@ -10,6 +10,12 @@ use async_trait::async_trait;
use error_chain::error_chain;
use htmlentity::entity::{ICodedDataTrait, decode};
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "mainstream-tube",
tags: &["tube", "mixed", "movies"],
};
error_chain! {
foreign_links {
Io(std::io::Error);

View File

@@ -15,6 +15,12 @@ use std::vec;
use wreq::Client;
use wreq_util::Emulation;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "studio-network",
tags: &["glamour", "softcore", "solo"],
};
error_chain! {
foreign_links {
Io(std::io::Error);

View File

@@ -17,6 +17,12 @@ use wreq::Client;
use wreq::Version;
use wreq_util::Emulation;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "studio-network",
tags: &["regional", "amateur", "mixed"],
};
error_chain! {
foreign_links {
Io(std::io::Error);

View File

@@ -2,6 +2,7 @@ use crate::DbPool;
use crate::api::ClientVersion;
use crate::providers::Provider;
use crate::status::*;
use crate::util::parse_abbreviated_number;
use crate::util::cache::VideoCache;
use crate::util::discord::{format_error_chain, send_discord_error_report};
use crate::util::requester::Requester;
@@ -10,13 +11,19 @@ use crate::videos::{ServerOptions, VideoFormat, VideoItem};
use async_trait::async_trait;
use error_chain::error_chain;
use futures::future::join_all;
use htmlentity::entity::{ICodedDataTrait, decode};
use std::sync::{Arc, RwLock};
use std::{thread, vec};
use titlecase::Titlecase;
use url::Url;
use wreq::Version;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "onlyfans",
tags: &["creator", "onlyfans", "amateur"],
};
error_chain! {
foreign_links {
Io(std::io::Error);
@@ -39,6 +46,10 @@ pub struct PimpbunnyProvider {
}
impl PimpbunnyProvider {
const FIREFOX_USER_AGENT: &'static str =
"Mozilla/5.0 (X11; Linux x86_64; rv:147.0) Gecko/20100101 Firefox/147.0";
const HTML_ACCEPT: &'static str =
"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
pub fn new() -> Self {
let provider = Self {
url: "https://pimpbunny.com".to_string(),
@@ -167,11 +178,304 @@ impl PimpbunnyProvider {
}
}
fn is_allowed_thumb_url(url: &str) -> bool {
let Some(url) = Url::parse(url).ok() else {
return false;
};
if url.scheme() != "https" {
return false;
}
let Some(host) = url.host_str() else {
return false;
};
matches!(host, "pimpbunny.com" | "www.pimpbunny.com")
&& url.path().starts_with("/contents/videos_screenshots/")
}
fn proxied_thumb(&self, options: &ServerOptions, thumb: &str) -> String {
if thumb.is_empty() || !Self::is_allowed_thumb_url(thumb) {
return thumb.to_string();
}
crate::providers::build_proxy_url(
options,
"pimpbunny-thumb",
&crate::providers::strip_url_scheme(thumb),
)
}
fn is_allowed_detail_url(url: &str) -> bool {
let Some(url) = Url::parse(url).ok() else {
return false;
};
if url.scheme() != "https" {
return false;
}
let Some(host) = url.host_str() else {
return false;
};
matches!(host, "pimpbunny.com" | "www.pimpbunny.com")
&& !url.path().starts_with("/contents/videos_screenshots/")
}
fn proxied_video(&self, options: &ServerOptions, page_url: &str) -> String {
if page_url.is_empty() || !Self::is_allowed_detail_url(page_url) {
return page_url.to_string();
}
crate::providers::build_proxy_url(
options,
"pimpbunny",
&crate::providers::strip_url_scheme(page_url),
)
}
fn root_referer(&self) -> String {
format!("{}/", self.url.trim_end_matches('/'))
}
fn sort_by(sort: &str) -> &'static str {
match sort {
"best rated" => "rating",
"most viewed" => "video_viewed",
_ => "post_date",
}
}
fn build_search_path_query(query: &str, separator: &str) -> String {
query.split_whitespace().collect::<Vec<_>>().join(separator)
}
fn append_archive_query(url: String, sort: &str) -> String {
let separator = if url.contains('?') { '&' } else { '?' };
format!("{url}{separator}sort_by={}", Self::sort_by(sort))
}
fn page_family_referer(&self, request_url: &str) -> String {
let Some(url) = Url::parse(request_url).ok() else {
return self.root_referer();
};
let path = url.path();
let referer_path = if path.starts_with("/videos/") {
"/videos/".to_string()
} else if path.starts_with("/search/") {
let parts: Vec<_> = path.trim_matches('/').split('/').collect();
if parts.len() >= 2 {
format!("/search/{}/", parts[1])
} else {
"/search/".to_string()
}
} else if path.starts_with("/categories/") {
let parts: Vec<_> = path.trim_matches('/').split('/').collect();
if parts.len() >= 2 {
format!("/categories/{}/", parts[1])
} else {
"/categories/".to_string()
}
} else if path.starts_with("/onlyfans-models/") {
let parts: Vec<_> = path.trim_matches('/').split('/').collect();
if parts.len() >= 2 {
format!("/onlyfans-models/{}/", parts[1])
} else {
"/onlyfans-models/".to_string()
}
} else {
"/".to_string()
};
format!("{}{}", self.url.trim_end_matches('/'), referer_path)
}
fn build_browse_url(&self, page: u8, sort: &str) -> String {
let base = if page <= 1 {
format!("{}/videos/", self.url)
} else {
format!("{}/videos/{page}/", self.url)
};
Self::append_archive_query(base, sort)
}
fn build_search_url(&self, query: &str, page: u8, sort: &str) -> String {
let path_query = Self::build_search_path_query(query, "-");
let base = if page <= 1 {
format!("{}/search/{path_query}/", self.url)
} else {
format!("{}/search/{path_query}/{page}/", self.url)
};
Self::append_archive_query(base, sort)
}
fn build_common_archive_url(&self, archive_path: &str, page: u8, sort: &str) -> String {
let canonical = format!(
"{}/{}",
self.url.trim_end_matches('/'),
archive_path.trim_start_matches('/')
);
let base = if page <= 1 {
canonical
} else {
format!("{}/{}", canonical.trim_end_matches('/'), page)
};
let base = if base.ends_with('/') {
base
} else {
format!("{base}/")
};
Self::append_archive_query(base, sort)
}
fn navigation_headers(
referer: Option<&str>,
sec_fetch_site: &'static str,
) -> Vec<(String, String)> {
let mut headers = vec![
(
"User-Agent".to_string(),
Self::FIREFOX_USER_AGENT.to_string(),
),
("Accept".to_string(), Self::HTML_ACCEPT.to_string()),
("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()),
("Cache-Control".to_string(), "no-cache".to_string()),
("Pragma".to_string(), "no-cache".to_string()),
("Priority".to_string(), "u=0, i".to_string()),
("Connection".to_string(), "keep-alive".to_string()),
("TE".to_string(), "trailers".to_string()),
("Sec-Fetch-Dest".to_string(), "document".to_string()),
("Sec-Fetch-Mode".to_string(), "navigate".to_string()),
("Sec-Fetch-Site".to_string(), sec_fetch_site.to_string()),
("Sec-Fetch-User".to_string(), "?1".to_string()),
("Upgrade-Insecure-Requests".to_string(), "1".to_string()),
];
if let Some(referer) = referer {
headers.push(("Referer".to_string(), referer.to_string()));
}
headers
}
fn headers_with_cookies(
&self,
requester: &Requester,
request_url: &str,
referer: Option<&str>,
sec_fetch_site: &'static str,
) -> Vec<(String, String)> {
let mut headers = Self::navigation_headers(referer, sec_fetch_site);
if let Some(cookie) = requester.cookie_header_for_url(request_url) {
headers.push(("Cookie".to_string(), cookie));
}
headers
}
fn is_cloudflare_challenge(html: &str) -> bool {
html.contains("cf-turnstile-response")
|| html.contains("Performing security verification")
|| html.contains("__cf_chl_rt_tk")
|| html.contains("cUPMDTk:\"")
|| html.contains("Just a moment...")
}
fn extract_challenge_path(html: &str) -> Option<String> {
html.split("cUPMDTk:\"")
.nth(1)
.and_then(|s| s.split('"').next())
.map(str::to_string)
.or_else(|| {
html.split("__cf_chl_rt_tk=")
.nth(1)
.and_then(|s| s.split('"').next())
.map(|token| format!("/?__cf_chl_rt_tk={token}"))
})
}
fn absolute_site_url(&self, path_or_url: &str) -> String {
if path_or_url.starts_with("http://") || path_or_url.starts_with("https://") {
path_or_url.to_string()
} else {
format!(
"{}/{}",
self.url.trim_end_matches('/'),
path_or_url.trim_start_matches('/')
)
}
}
async fn fetch_html(
&self,
requester: &mut Requester,
request_url: &str,
referer: Option<&str>,
sec_fetch_site: &'static str,
) -> Result<String> {
let headers = self.headers_with_cookies(requester, request_url, referer, sec_fetch_site);
let response = requester
.get_raw_with_headers(request_url, headers.clone())
.await
.map_err(Error::from)?;
let status = response.status();
let body = response.text().await.map_err(Error::from)?;
if status.is_success() || status.as_u16() == 404 {
return Ok(body);
}
if status.as_u16() == 403 && Self::is_cloudflare_challenge(&body) {
if let Some(challenge_path) = Self::extract_challenge_path(&body) {
let challenge_url = self.absolute_site_url(&challenge_path);
let challenge_headers = self.headers_with_cookies(
requester,
&challenge_url,
Some(request_url),
"same-origin",
);
let _ = requester
.get_raw_with_headers(&challenge_url, challenge_headers)
.await;
}
}
let retry_headers =
self.headers_with_cookies(requester, request_url, referer, sec_fetch_site);
requester
.get_with_headers(request_url, retry_headers, Some(Version::HTTP_11))
.await
.map_err(|e| Error::from(format!("{e}")))
}
async fn warm_root_session(&self, requester: &mut Requester) {
let root_url = self.root_referer();
let _ = self
.fetch_html(requester, &root_url, None, "none")
.await;
}
async fn warm_root_session_for_base(base: &str, requester: &mut Requester) {
let root_url = format!("{}/", base.trim_end_matches('/'));
let _ = requester
.get_with_headers(
&root_url,
Self::navigation_headers(None, "none"),
Some(Version::HTTP_11),
)
.await;
}
async fn load_stars(base: &str, stars: Arc<RwLock<Vec<FilterOption>>>) -> Result<()> {
let mut requester = Requester::new();
Self::warm_root_session_for_base(base, &mut requester).await;
let request_url = format!("{base}/onlyfans-models/?models_per_page=20");
let headers = {
let root_url = format!("{}/", base.trim_end_matches('/'));
let mut headers = Self::navigation_headers(Some(&root_url), "same-origin");
if let Some(cookie) = requester.cookie_header_for_url(&request_url) {
headers.push(("Cookie".to_string(), cookie));
}
headers
};
let text = requester
.get(
&format!("{base}/onlyfans-models/?models_per_page=20"),
.get_with_headers(
&request_url,
headers,
Some(Version::HTTP_2),
)
.await
@@ -213,9 +517,20 @@ impl PimpbunnyProvider {
async fn load_categories(base: &str, cats: Arc<RwLock<Vec<FilterOption>>>) -> Result<()> {
let mut requester = Requester::new();
Self::warm_root_session_for_base(base, &mut requester).await;
let request_url = format!("{base}/categories/?items_per_page=120");
let headers = {
let root_url = format!("{}/", base.trim_end_matches('/'));
let mut headers = Self::navigation_headers(Some(&root_url), "same-origin");
if let Some(cookie) = requester.cookie_header_for_url(&request_url) {
headers.push(("Cookie".to_string(), cookie));
}
headers
};
let text = requester
.get(
&format!("{base}/categories/?items_per_page=120"),
.get_with_headers(
&request_url,
headers,
Some(Version::HTTP_2),
)
.await
@@ -256,15 +571,7 @@ impl PimpbunnyProvider {
sort: &str,
options: ServerOptions,
) -> Result<Vec<VideoItem>> {
let sort_string = match sort {
"best rated" => "&sort_by=rating",
"most viewed" => "&sort_by=video_viewed",
_ => "&sort_by=post_date",
};
let video_url = format!(
"{}/videos/{}/?videos_per_page=32{}",
self.url, page, sort_string
);
let video_url = self.build_browse_url(page, sort);
let old_items = match cache.get(&video_url) {
Some((time, items)) => {
if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 {
@@ -279,7 +586,17 @@ impl PimpbunnyProvider {
};
let mut requester =
crate::providers::requester_or_default(&options, module_path!(), "missing_requester");
let text = match requester.get(&video_url, Some(Version::HTTP_11)).await {
self.warm_root_session(&mut requester).await;
let referer = self.page_family_referer(&video_url);
let text = match self
.fetch_html(
&mut requester,
&video_url,
Some(&referer),
"same-origin",
)
.await
{
Ok(text) => text,
Err(e) => {
crate::providers::report_provider_error(
@@ -291,9 +608,7 @@ impl PimpbunnyProvider {
return Ok(old_items);
}
};
let video_items: Vec<VideoItem> = self
.get_video_items_from_html(text.clone(), &mut requester)
.await;
let video_items = self.get_video_items_from_html(text.clone(), &options);
if !video_items.is_empty() {
cache.remove(&video_url);
cache.insert(video_url.clone(), video_items.clone());
@@ -311,27 +626,17 @@ impl PimpbunnyProvider {
options: ServerOptions,
) -> Result<Vec<VideoItem>> {
let search_string = query.trim().to_string();
let mut video_url = format!(
"{}/search/{}/?mode=async&function=get_block&block_id=list_videos_videos_list_search_result&videos_per_page=32&from_videos={}",
self.url,
search_string.replace(" ", "-"),
page
);
let sort_string = match options.sort.as_deref().unwrap_or("") {
"best rated" => "&sort_by=rating",
"most viewed" => "&sort_by=video_viewed",
_ => "&sort_by=post_date",
};
let sort = options.sort.as_deref().unwrap_or("");
let mut video_url = self.build_search_url(&search_string, page, sort);
if let Ok(stars) = self.stars.read() {
if let Some(star) = stars
.iter()
.find(|s| s.title.to_ascii_lowercase() == search_string.to_ascii_lowercase())
{
video_url = format!(
"{}/onlyfans-models/{}/{}/?videos_per_page=20{}",
self.url, star.id, page, sort_string
video_url = self.build_common_archive_url(
&format!("/onlyfans-models/{}/", star.id),
page,
sort,
);
}
} else {
@@ -346,10 +651,8 @@ impl PimpbunnyProvider {
.iter()
.find(|c| c.title.to_ascii_lowercase() == search_string.to_ascii_lowercase())
{
video_url = format!(
"{}/categories/{}/{}/?videos_per_page=20{}",
self.url, cat.id, page, sort_string
);
video_url =
self.build_common_archive_url(&format!("/categories/{}/", cat.id), page, sort);
}
} else {
crate::providers::report_provider_error_background(
@@ -375,8 +678,17 @@ impl PimpbunnyProvider {
let mut requester =
crate::providers::requester_or_default(&options, module_path!(), "missing_requester");
println!("Fetching URL: {}", video_url);
let text = match requester.get(&video_url, Some(Version::HTTP_2)).await {
self.warm_root_session(&mut requester).await;
let referer = self.page_family_referer(&video_url);
let text = match self
.fetch_html(
&mut requester,
&video_url,
Some(&referer),
"same-origin",
)
.await
{
Ok(text) => text,
Err(e) => {
crate::providers::report_provider_error(
@@ -388,9 +700,7 @@ impl PimpbunnyProvider {
return Ok(old_items);
}
};
let video_items: Vec<VideoItem> = self
.get_video_items_from_html(text.clone(), &mut requester)
.await;
let video_items = self.get_video_items_from_html(text.clone(), &options);
if !video_items.is_empty() {
cache.remove(&video_url);
cache.insert(video_url.clone(), video_items.clone());
@@ -400,11 +710,7 @@ impl PimpbunnyProvider {
Ok(video_items)
}
async fn get_video_items_from_html(
&self,
html: String,
requester: &mut Requester,
) -> Vec<VideoItem> {
fn get_video_items_from_html(&self, html: String, options: &ServerOptions) -> Vec<VideoItem> {
if html.is_empty() || html.contains("404 Not Found") {
return vec![];
}
@@ -418,19 +724,42 @@ impl PimpbunnyProvider {
None => return vec![],
};
let futures = block
block
.split("<div class=\"col\">")
.skip(1)
.map(|el| self.get_video_item(el.to_string(), requester.clone()));
join_all(futures)
.await
.filter_map(|el| self.get_video_item(el.to_string(), options).ok())
.into_iter()
.filter_map(Result::ok)
.collect()
}
async fn get_video_item(&self, seg: String, mut requester: Requester) -> Result<VideoItem> {
fn extract_duration_from_segment(&self, seg: &str) -> u32 {
for token in seg.split(|ch: char| ch == '<' || ch == '>' || ch.is_whitespace()) {
let candidate = token.trim();
if candidate.is_empty() || !candidate.contains(':') {
continue;
}
if let Some(parsed) = parse_time_to_seconds(candidate) {
return parsed as u32;
}
}
0
}
fn extract_views_from_segment(&self, seg: &str) -> u32 {
let Some(before_views) = seg.split("Views").next() else {
return 0;
};
let candidate = before_views
.split(|ch: char| ch == '<' || ch == '>' || ch.is_whitespace())
.filter(|value| !value.trim().is_empty())
.next_back()
.unwrap_or("")
.trim_matches(|ch: char| ch == '(' || ch == ')' || ch == ',');
parse_abbreviated_number(candidate).unwrap_or(0)
}
fn get_video_item(&self, seg: String, options: &ServerOptions) -> Result<VideoItem> {
let video_url = seg
.split(" href=\"")
.nth(1)
@@ -485,64 +814,22 @@ impl PimpbunnyProvider {
.and_then(|s| s.split('"').next())
.unwrap_or("")
.to_string();
let (tags, formats, views, duration) =
self.extract_media(&video_url, &mut requester).await?;
let proxy_url = self.proxied_video(options, &video_url);
let views = self.extract_views_from_segment(&seg);
let duration = self.extract_duration_from_segment(&seg);
let formats = vec![
VideoFormat::new(proxy_url.clone(), "auto".into(), "video/mp4".into())
.format_id("auto".into())
.format_note("proxied".into()),
];
Ok(
VideoItem::new(id, title, video_url, "pimpbunny".into(), thumb, duration)
VideoItem::new(id, title, proxy_url, "pimpbunny".into(), thumb, duration)
.formats(formats)
.tags(tags)
.preview(preview)
.views(views),
)
}
async fn extract_media(
&self,
url: &str,
requester: &mut Requester,
) -> Result<(Vec<String>, Vec<VideoFormat>, u32, u32)> {
let text = requester
.get(url, Some(Version::HTTP_2))
.await
.map_err(|e| Error::from(format!("{}", e)))?;
let json_str = text
.split("application/ld+json\">")
.nth(1)
.and_then(|s| s.split("</script>").next())
.ok_or_else(|| ErrorKind::Parse("ld+json".into()))?;
let json: serde_json::Value = serde_json::from_str(json_str)?;
let video_url = json["contentUrl"].as_str().unwrap_or("").to_string();
let quality = video_url
.split('_')
.last()
.and_then(|s| s.split('.').next())
.unwrap_or("")
.to_string();
let views = json["interactionStatistic"]
.as_array()
.and_then(|a| a.first())
.and_then(|v| v["userInteractionCount"].as_str())
.and_then(|v| v.parse().ok())
.unwrap_or(0);
let duration = json["duration"]
.as_str()
.map(|d| parse_time_to_seconds(&d.replace(['P', 'T', 'H', 'M', 'S'], "")).unwrap_or(0))
.unwrap_or(0) as u32;
Ok((
vec![],
vec![VideoFormat::new(video_url, quality, "video/mp4".into())],
views,
duration,
))
}
}
#[async_trait]
@@ -558,6 +845,7 @@ impl Provider for PimpbunnyProvider {
options: ServerOptions,
) -> Vec<VideoItem> {
let page = page.parse::<u8>().unwrap_or(1);
let thumb_options = options.clone();
let res = match query {
Some(q) => self.to_owned().query(cache, page, &q, options).await,
@@ -568,9 +856,226 @@ impl Provider for PimpbunnyProvider {
eprintln!("pimpbunny error: {e}");
vec![]
})
.into_iter()
.map(|mut item| {
if !item.thumb.is_empty() {
item.thumb = self.proxied_thumb(&thumb_options, &item.thumb);
}
item
})
.collect()
}
fn get_channel(&self, v: ClientVersion) -> Option<Channel> {
Some(self.build_channel(v))
}
}
#[cfg(test)]
mod tests {
use super::PimpbunnyProvider;
use crate::videos::ServerOptions;
use std::sync::{Arc, RwLock};
fn test_provider() -> PimpbunnyProvider {
PimpbunnyProvider {
url: "https://pimpbunny.com".to_string(),
stars: Arc::new(RwLock::new(vec![])),
categories: Arc::new(RwLock::new(vec![])),
}
}
#[test]
fn rewrites_allowed_thumbs_to_proxy_urls() {
let provider = test_provider();
let options = ServerOptions {
featured: None,
category: None,
sites: None,
filter: None,
language: None,
public_url_base: Some("https://example.com".to_string()),
requester: None,
network: None,
stars: None,
categories: None,
duration: None,
sort: None,
sexuality: None,
};
let proxied = provider.proxied_thumb(
&options,
"https://pimpbunny.com/contents/videos_screenshots/517000/517329/800x450/1.jpg",
);
assert_eq!(
proxied,
"https://example.com/proxy/pimpbunny-thumb/pimpbunny.com/contents/videos_screenshots/517000/517329/800x450/1.jpg"
);
}
#[test]
fn rewrites_video_pages_to_redirect_proxy() {
let provider = test_provider();
let options = ServerOptions {
featured: None,
category: None,
sites: None,
filter: None,
language: None,
public_url_base: Some("https://example.com".to_string()),
requester: None,
network: None,
stars: None,
categories: None,
duration: None,
sort: None,
sexuality: None,
};
let proxied = provider.proxied_video(
&options,
"https://pimpbunny.com/videos/example-video/",
);
assert_eq!(
proxied,
"https://example.com/proxy/pimpbunny/pimpbunny.com/videos/example-video/"
);
}
#[test]
fn parses_listing_without_detail_requests() {
let provider = test_provider();
let options = ServerOptions {
featured: None,
category: None,
sites: None,
filter: None,
language: None,
public_url_base: Some("https://example.com".to_string()),
requester: None,
network: None,
stars: None,
categories: None,
duration: None,
sort: None,
sexuality: None,
};
let html = r#"
<div class="videos_videos_list"></div>
<div class="videos_videos_list">
<div class="col">
<a href="https://pimpbunny.com/videos/example-video/">
<div class="card-thumbnail">
<img src="https://pimpbunny.com/contents/videos_screenshots/517000/517329/800x450/1.jpg" data-preview="https://pimpbunny.com/preview.mp4" />
</div>
<div class="card-title">sample &amp; title</div>
<div>12:34</div>
<div>1.2K Views</div>
</a>
</div>
-pagination-wrapper
"#;
let items = provider.get_video_items_from_html(html.to_string(), &options);
assert_eq!(items.len(), 1);
assert_eq!(
items[0].url,
"https://example.com/proxy/pimpbunny/pimpbunny.com/videos/example-video/"
);
assert_eq!(items[0].duration, 754);
assert_eq!(items[0].views, Some(1200));
assert_eq!(items[0].formats.as_ref().map(|f| f.len()), Some(1));
}
#[test]
fn extracts_cloudflare_challenge_path() {
let html = r#"
<script type="text/javascript">
(function(){
window._cf_chl_opt = {
cUPMDTk:"/?mode=async&function=get_block&block_id=videos_videos_list&videos_per_page=8&sort_by=post_date&from=1&__cf_chl_tk=test-token"
};
}());
</script>
"#;
assert!(PimpbunnyProvider::is_cloudflare_challenge(html));
assert_eq!(
PimpbunnyProvider::extract_challenge_path(html).as_deref(),
Some(
"/?mode=async&function=get_block&block_id=videos_videos_list&videos_per_page=8&sort_by=post_date&from=1&__cf_chl_tk=test-token"
)
);
}
#[test]
fn builds_async_browse_url_instead_of_numbered_videos_path() {
let provider = test_provider();
assert_eq!(
provider.build_browse_url(1, "most recent"),
"https://pimpbunny.com/videos/?sort_by=post_date"
);
assert_eq!(
provider.build_browse_url(2, "most recent"),
"https://pimpbunny.com/videos/2/?sort_by=post_date"
);
}
#[test]
fn builds_search_url_with_query_and_pagination() {
let provider = test_provider();
assert_eq!(
provider.build_search_url("adriana chechik", 1, "most viewed"),
"https://pimpbunny.com/search/adriana-chechik/?sort_by=video_viewed"
);
assert_eq!(
provider.build_search_url("adriana chechik", 3, "most viewed"),
"https://pimpbunny.com/search/adriana-chechik/3/?sort_by=video_viewed"
);
}
#[test]
fn builds_common_archive_url_with_async_block() {
let provider = test_provider();
assert_eq!(
provider.build_common_archive_url("/categories/amateur/", 1, "best rated"),
"https://pimpbunny.com/categories/amateur/?sort_by=rating"
);
assert_eq!(
provider.build_common_archive_url("/categories/amateur/", 4, "best rated"),
"https://pimpbunny.com/categories/amateur/4/?sort_by=rating"
);
}
#[test]
fn derives_page_family_referer() {
let provider = test_provider();
assert_eq!(
provider.page_family_referer("https://pimpbunny.com/videos/2/?sort_by=post_date"),
"https://pimpbunny.com/videos/"
);
assert_eq!(
provider.page_family_referer(
"https://pimpbunny.com/categories/blowjob/2/?sort_by=post_date"
),
"https://pimpbunny.com/categories/blowjob/"
);
assert_eq!(
provider.page_family_referer(
"https://pimpbunny.com/search/adriana-chechik/3/?sort_by=video_viewed"
),
"https://pimpbunny.com/search/adriana-chechik/"
);
assert_eq!(
provider.page_family_referer(
"https://pimpbunny.com/onlyfans-models/momoitenshi/3/?sort_by=post_date"
),
"https://pimpbunny.com/onlyfans-models/momoitenshi/"
);
}
}

View File

@@ -14,6 +14,12 @@ use std::sync::{Arc, RwLock};
use std::vec;
use url::form_urlencoded::Serializer;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "pmv-compilation",
tags: &["pmv", "music", "compilation"],
};
error_chain! {
foreign_links {
Io(std::io::Error);

View File

@@ -11,6 +11,12 @@ use error_chain::error_chain;
use htmlentity::entity::{ICodedDataTrait, decode};
use std::vec;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "mainstream-tube",
tags: &["tube", "hd", "mixed"],
};
error_chain! {
foreign_links {
Io(std::io::Error);

View File

@@ -11,8 +11,15 @@ use error_chain::error_chain;
use futures::future::join_all;
use htmlentity::entity::{ICodedDataTrait, decode};
use regex::Regex;
use scraper::{Html, Selector};
use std::collections::HashSet;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "onlyfans",
tags: &["creator", "premium", "clips"],
};
error_chain! {
foreign_links {
Io(std::io::Error);
@@ -266,6 +273,36 @@ impl Porn4fansProvider {
text.replace("\\/", "/").replace("&amp;", "&")
}
fn decode_html_text(text: &str) -> String {
decode(text.as_bytes())
.to_string()
.unwrap_or_else(|_| text.to_string())
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
.trim()
.to_string()
}
fn strip_tags(text: &str) -> String {
Regex::new(r"(?is)<[^>]+>")
.ok()
.map(|regex| regex.replace_all(text, "").to_string())
.unwrap_or_else(|| text.to_string())
}
fn push_unique_tag(values: &mut Vec<String>, value: String) {
let value = value.trim().to_string();
if value.is_empty()
|| values
.iter()
.any(|existing| existing.eq_ignore_ascii_case(&value))
{
return;
}
values.push(value);
}
fn extract_views(text: &str) -> Option<u32> {
Regex::new(r"(?i)<svg[^>]+icon-eye[^>]*>.*?</svg>\s*<span>([^<]+)</span>")
.ok()
@@ -303,6 +340,34 @@ impl Porn4fansProvider {
None
}
fn collect_texts(document: &Html, selector: &str) -> Vec<String> {
let Ok(selector) = Selector::parse(selector) else {
return vec![];
};
let mut values = Vec::new();
for element in document.select(&selector) {
let raw_text = element.text().collect::<Vec<_>>().join(" ");
let cleaned = Self::decode_html_text(&Self::strip_tags(&raw_text));
Self::push_unique_tag(&mut values, cleaned);
}
values
}
fn extract_page_models_and_categories(text: &str) -> (Vec<String>, Vec<String>) {
let document = Html::parse_document(text);
let models = Self::collect_texts(&document, ".player-models-list a[href*=\"/models/\"]");
let mut categories =
Self::collect_texts(&document, ".categories-row a[href*=\"/categories/\"]");
for value in Self::collect_texts(&document, ".tags-row a[href*=\"/tags/\"]") {
Self::push_unique_tag(&mut categories, value);
}
(models, categories)
}
fn parse_video_cards_from_html(&self, html: &str) -> Vec<Porn4fansCard> {
if html.trim().is_empty() {
return vec![];
@@ -375,9 +440,17 @@ impl Porn4fansProvider {
None,
)
.await
.ok()
.and_then(|text| Self::extract_direct_video_url_from_page(&text))
.unwrap_or_else(|| card.page_url.clone());
.ok();
let (direct_url, models, categories) = match direct_url {
Some(text) => {
let url = Self::extract_direct_video_url_from_page(&text)
.unwrap_or_else(|| card.page_url.clone());
let (models, categories) = Self::extract_page_models_and_categories(&text);
(url, models, categories)
}
None => (card.page_url.clone(), vec![], vec![]),
};
let mut item = VideoItem::new(
card.id,
@@ -393,6 +466,10 @@ impl Porn4fansProvider {
if let Some(rating) = card.rating {
item = item.rating(rating);
}
if let Some(model) = models.first() {
item = item.uploader(model.clone());
}
item = item.tags(categories);
item
}
@@ -541,4 +618,33 @@ mod tests {
)
);
}
#[test]
fn extracts_models_and_categories_from_video_page() {
let html = r#"
<div class="player-models-list">
<div class="player-model-item">
<a href="/models/piper-rockelle/"><span class="player-model-name">Piper Rockelle</span></a>
</div>
</div>
<ul class="categories-row">
<li class="visible"><a href="/categories/striptease/">Striptease</a></li>
<li class="visible"><a href="/categories/teen/">Teen</a></li>
</ul>
<ul class="tags-row">
<li class="visible"><a href="/tags/bathroom/">Bathroom</a></li>
</ul>
"#;
let (models, categories) = Porn4fansProvider::extract_page_models_and_categories(html);
assert_eq!(models, vec!["Piper Rockelle".to_string()]);
assert_eq!(
categories,
vec![
"Striptease".to_string(),
"Teen".to_string(),
"Bathroom".to_string()
]
);
}
}

1482
src/providers/porndish.rs Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -11,6 +11,12 @@ use error_chain::error_chain;
use htmlentity::entity::{ICodedDataTrait, decode};
use std::vec;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "mainstream-tube",
tags: &["tube", "hd", "mixed"],
};
error_chain! {
foreign_links {
Io(std::io::Error);

1368
src/providers/pornhd3x.rs Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

1248
src/providers/pornmz.rs Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -11,6 +11,12 @@ use error_chain::error_chain;
use htmlentity::entity::{ICodedDataTrait, decode};
use std::vec;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "mainstream-tube",
tags: &["tube", "clips", "mixed"],
};
error_chain! {
foreign_links {
Io(std::io::Error);

View File

@@ -12,6 +12,12 @@ use htmlentity::entity::{ICodedDataTrait, decode};
use serde_json::Value;
use std::vec;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "mainstream-tube",
tags: &["mainstream", "legacy", "general"],
};
error_chain! {
foreign_links {
Io(std::io::Error);

View File

@@ -11,6 +11,12 @@ use error_chain::error_chain;
use htmlentity::entity::{ICodedDataTrait, decode};
use std::vec;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "ai",
tags: &["rule34", "ai-generated", "animation"],
};
error_chain! {
foreign_links {
Io(std::io::Error);

View File

@@ -13,6 +13,12 @@ use htmlentity::entity::{ICodedDataTrait, decode};
use std::time::{SystemTime, UNIX_EPOCH};
use std::vec;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "hentai-animation",
tags: &["rule34", "animation", "fandom"],
};
error_chain! {
foreign_links {
Io(std::io::Error);

1430
src/providers/sextb.rs Normal file

File diff suppressed because it is too large Load Diff

1436
src/providers/shooshtime.rs Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -5,13 +5,21 @@ use crate::status::*;
use crate::util::cache::VideoCache;
use crate::util::parse_abbreviated_number;
use crate::util::time::parse_time_to_seconds;
use crate::videos::{ServerOptions, VideoItem};
use crate::videos::{ServerOptions, VideoFormat, VideoItem};
use async_trait::async_trait;
use error_chain::error_chain;
use htmlentity::entity::{ICodedDataTrait, decode};
use scraper::{ElementRef, Html, Selector};
use std::process::Command;
use std::time::Duration;
use url::form_urlencoded::byte_serialize;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "mainstream-tube",
tags: &["mainstream", "mixed", "search"],
};
error_chain! {
foreign_links {
Io(std::io::Error);
@@ -138,6 +146,177 @@ impl SpankbangProvider {
vec![("Referer".to_string(), format!("{}/", self.url))]
}
fn is_cloudflare_block(text: &str) -> bool {
let lowercase = text.to_ascii_lowercase();
lowercase.contains("attention required")
|| lowercase.contains("you have been blocked")
|| lowercase.contains("cloudflare ray id")
}
fn fallback_items_from_ytdlp(&self, page_url: &str, limit: usize) -> Vec<VideoItem> {
let output = match Command::new("yt-dlp")
.arg("-J")
.arg("--flat-playlist")
.arg("--extractor-args")
.arg("generic:impersonate=chrome")
.arg(page_url)
.output()
{
Ok(output) if output.status.success() => output,
_ => return vec![],
};
let payload: serde_json::Value = match serde_json::from_slice(&output.stdout) {
Ok(payload) => payload,
Err(_) => return vec![],
};
let entries = match payload.get("entries").and_then(|value| value.as_array()) {
Some(entries) => entries,
None => return vec![],
};
let mut items = Vec::new();
for (index, entry) in entries.iter().take(limit).enumerate() {
let Some(url) = entry.get("url").and_then(|value| value.as_str()) else {
continue;
};
if !(url.starts_with("https://") || url.starts_with("http://")) {
continue;
}
let id = entry
.get("id")
.and_then(|value| value.as_str())
.filter(|value| !value.is_empty())
.map(ToOwned::to_owned)
.unwrap_or_else(|| format!("spankbang-fallback-{}", index + 1));
let title = entry
.get("title")
.and_then(|value| value.as_str())
.filter(|value| !value.is_empty())
.map(Self::decode_html)
.unwrap_or_else(|| format!("SpankBang Video {}", index + 1));
let thumb = entry
.get("thumbnail")
.and_then(|value| value.as_str())
.unwrap_or("")
.to_string();
let duration = entry
.get("duration")
.and_then(|value| value.as_u64())
.and_then(|value| u32::try_from(value).ok())
.unwrap_or(0);
let format_kind = if url.contains(".m3u8") {
"m3u8"
} else {
"video/mp4"
};
let mut format = VideoFormat::new(url.to_string(), "auto".to_string(), format_kind.to_string());
if let Some(headers) = entry.get("http_headers").and_then(|value| value.as_object()) {
for (key, value) in headers {
if let Some(value) = value.as_str() {
format.add_http_header(key.to_string(), value.to_string());
}
}
}
if entry
.get("http_headers")
.and_then(|value| value.as_object())
.is_none()
{
format.add_http_header("Referer".to_string(), format!("{}/", self.url));
}
let mut item = VideoItem::new(
id,
title,
url.to_string(),
"spankbang".to_string(),
thumb,
duration,
)
.formats(vec![format]);
if let Some(views) = entry
.get("view_count")
.and_then(|value| value.as_u64())
.and_then(|value| u32::try_from(value).ok())
{
item = item.views(views);
}
if let Some(uploader) = entry
.get("uploader")
.and_then(|value| value.as_str())
.filter(|value| !value.is_empty())
{
item = item.uploader(uploader.to_string());
}
items.push(item);
}
items
}
async fn fallback_items_with_working_media(
&self,
page_url: &str,
options: &ServerOptions,
) -> Vec<VideoItem> {
let fallback_items = self.fallback_items_from_ytdlp(page_url, 72);
if fallback_items.is_empty() {
return vec![];
}
let mut requester = requester_or_default(
options,
"spankbang",
"spankbang.fallback_items_with_working_media.missing_requester",
);
let mut working_items = Vec::new();
for item in fallback_items {
let format_headers = item
.formats
.as_ref()
.and_then(|formats| formats.first())
.map(|format| format.http_headers_pairs())
.unwrap_or_default();
let media_url = item
.formats
.as_ref()
.and_then(|formats| formats.first())
.map(|format| format.url.clone())
.unwrap_or_else(|| item.url.clone());
if media_url.is_empty() {
continue;
}
let mut headers = format_headers;
if !headers
.iter()
.any(|(key, _)| key.eq_ignore_ascii_case("range"))
{
headers.push(("Range".to_string(), "bytes=0-2047".to_string()));
}
let is_working = match requester
.get_raw_with_headers_timeout(&media_url, headers, Some(Duration::from_secs(20)))
.await
{
Ok(response) => response.status().is_success(),
Err(_) => false,
};
if is_working {
working_items.push(item);
}
}
working_items
}
fn build_query_url(&self, query: &str, page: u32, sort: &str) -> String {
let encoded_query = Self::encode_search_query(query);
let mut url = if page > 1 {
@@ -277,6 +456,7 @@ impl SpankbangProvider {
.select(video_link_selector)
.find_map(|link| link.value().attr("href"))
.map(ToString::to_string)?;
let detail_url = self.normalize_url(&href);
let thumb = card
.select(thumb_selector)
.find_map(|img| img.value().attr("src"))
@@ -332,7 +512,10 @@ impl SpankbangProvider {
item = item.rating(rating);
}
if let Some(preview) = preview {
item = item.preview(preview);
let mut format =
VideoFormat::new(preview.clone(), "preview".to_string(), "video/mp4".to_string());
format.add_http_header("Referer".to_string(), detail_url.clone());
item = item.preview(preview).formats(vec![format]);
}
if let Some(meta_link) = card.select(meta_link_selector).next() {
@@ -426,6 +609,14 @@ impl SpankbangProvider {
&format!("url={video_url}; error={e}"),
)
.await;
let fallback_items = self
.fallback_items_with_working_media(&video_url, &options)
.await;
if !fallback_items.is_empty() {
cache.remove(&video_url);
cache.insert(video_url.clone(), fallback_items.clone());
return Ok(fallback_items);
}
return Ok(old_items);
}
};
@@ -437,6 +628,32 @@ impl SpankbangProvider {
&format!("url={video_url}"),
)
.await;
let fallback_items = self
.fallback_items_with_working_media(&video_url, &options)
.await;
if !fallback_items.is_empty() {
cache.remove(&video_url);
cache.insert(video_url.clone(), fallback_items.clone());
return Ok(fallback_items);
}
return Ok(old_items);
}
if Self::is_cloudflare_block(&text) {
report_provider_error(
"spankbang",
"get.cloudflare_block",
&format!("url={video_url}"),
)
.await;
let fallback_items = self
.fallback_items_with_working_media(&video_url, &options)
.await;
if !fallback_items.is_empty() {
cache.remove(&video_url);
cache.insert(video_url.clone(), fallback_items.clone());
return Ok(fallback_items);
}
return Ok(old_items);
}
@@ -484,6 +701,14 @@ impl SpankbangProvider {
&format!("url={video_url}; error={e}"),
)
.await;
let fallback_items = self
.fallback_items_with_working_media(&video_url, &options)
.await;
if !fallback_items.is_empty() {
cache.remove(&video_url);
cache.insert(video_url.clone(), fallback_items.clone());
return Ok(fallback_items);
}
return Ok(old_items);
}
};
@@ -495,6 +720,32 @@ impl SpankbangProvider {
&format!("url={video_url}"),
)
.await;
let fallback_items = self
.fallback_items_with_working_media(&video_url, &options)
.await;
if !fallback_items.is_empty() {
cache.remove(&video_url);
cache.insert(video_url.clone(), fallback_items.clone());
return Ok(fallback_items);
}
return Ok(old_items);
}
if Self::is_cloudflare_block(&text) {
report_provider_error(
"spankbang",
"query.cloudflare_block",
&format!("url={video_url}"),
)
.await;
let fallback_items = self
.fallback_items_with_working_media(&video_url, &options)
.await;
if !fallback_items.is_empty() {
cache.remove(&video_url);
cache.insert(video_url.clone(), fallback_items.clone());
return Ok(fallback_items);
}
return Ok(old_items);
}

2003
src/providers/supjav.rs Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -15,6 +15,12 @@ use htmlentity::entity::{ICodedDataTrait, decode};
use scraper::{Html, Selector};
use std::vec;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "mainstream-tube",
tags: &["tube", "community", "mixed"],
};
error_chain! {
foreign_links {
Io(std::io::Error);

View File

@@ -12,6 +12,12 @@ use error_chain::error_chain;
use htmlentity::entity::{ICodedDataTrait, decode};
use std::vec;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "mainstream-tube",
tags: &["mainstream", "legacy", "studio"],
};
error_chain! {
foreign_links {
Io(std::io::Error);

View File

@@ -12,6 +12,12 @@ use htmlentity::entity::{ICodedDataTrait, decode};
use regex::Regex;
use url::form_urlencoded::Serializer;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "jav",
tags: &["japanese", "amateur", "jav"],
};
error_chain! {
foreign_links {
Io(std::io::Error);

View File

@@ -5,13 +5,19 @@ use crate::status::*;
use crate::util::cache::VideoCache;
use crate::util::parse_abbreviated_number;
use crate::util::time::parse_time_to_seconds;
use crate::videos::{ServerOptions, VideoItem};
use crate::videos::{ServerOptions, VideoFormat, VideoItem};
use async_trait::async_trait;
use error_chain::error_chain;
use htmlentity::entity::{ICodedDataTrait, decode};
use regex::Regex;
use std::collections::HashSet;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "mainstream-tube",
tags: &["tube", "viral", "mixed"],
};
error_chain! {
foreign_links {
Io(std::io::Error);
@@ -378,6 +384,9 @@ impl ViralxxxpornProvider {
}
let thumb = self.extract_thumb_url(segment);
let preview = Self::first_non_empty_attr(segment, &["data-preview=\""])
.map(|value| self.normalize_url(&value))
.unwrap_or_default();
let text_segment = Self::normalize_ws(&Self::decode_html(&Self::strip_tags(segment)));
let duration = Self::extract_duration_seconds(segment)
@@ -392,6 +401,15 @@ impl ViralxxxpornProvider {
if views > 0 {
item = item.views(views);
}
if !preview.is_empty() {
let mut format = VideoFormat::new(
preview.clone(),
"preview".to_string(),
"video/mp4".to_string(),
);
format.add_http_header("Referer".to_string(), item.url.clone());
item = item.preview(preview).formats(vec![format]);
}
items.push(item);
}
@@ -454,6 +472,9 @@ impl ViralxxxpornProvider {
}
let thumb = self.extract_thumb_url(segment);
let preview = Self::first_non_empty_attr(segment, &["data-preview=\""])
.map(|value| self.normalize_url(&value))
.unwrap_or_default();
let raw_duration = Self::extract_between(segment, "<div class=\"duration\">", "<")
.or_else(|| Self::extract_between(segment, "<div class=\"time\">", "<"))
@@ -484,6 +505,15 @@ impl ViralxxxpornProvider {
if views > 0 {
item = item.views(views);
}
if !preview.is_empty() {
let mut format = VideoFormat::new(
preview.clone(),
"preview".to_string(),
"video/mp4".to_string(),
);
format.add_http_header("Referer".to_string(), item.url.clone());
item = item.preview(preview).formats(vec![format]);
}
items.push(item);
}

1249
src/providers/vrporn.rs Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -12,6 +12,12 @@ use std::sync::{Arc, RwLock};
use std::vec;
use wreq::Version;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "tiktok",
tags: &["tube", "mixed", "search"],
};
error_chain! {
foreign_links {
Io(std::io::Error);

View File

@@ -12,6 +12,12 @@ use regex::Regex;
use std::sync::{Arc, RwLock};
use std::vec;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "onlyfans",
tags: &["database", "clips", "mixed"],
};
error_chain! {
foreign_links {
Io(std::io::Error);

View File

@@ -5,12 +5,18 @@ use crate::status::*;
use crate::util::cache::VideoCache;
use crate::util::parse_abbreviated_number;
use crate::util::time::parse_time_to_seconds;
use crate::videos::{ServerOptions, VideoItem};
use crate::videos::{ServerOptions, VideoFormat, VideoItem};
use async_trait::async_trait;
use error_chain::error_chain;
use htmlentity::entity::{ICodedDataTrait, decode};
use std::vec;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "onlyfans",
tags: &["onlyfans", "leaks", "creator"],
};
error_chain! {
foreign_links {
Io(std::io::Error);
@@ -293,8 +299,20 @@ impl XxthotsProvider {
.unwrap_or_default()
.to_string();
let views = parse_abbreviated_number(&views_part).unwrap_or(0) as u32;
let preview = video_segment
.split("data-preview=\"")
.collect::<Vec<&str>>()
.get(1)
.copied()
.unwrap_or_default()
.split('"')
.collect::<Vec<&str>>()
.first()
.copied()
.unwrap_or_default()
.to_string();
let video_item = VideoItem::new(
let mut video_item = VideoItem::new(
id,
title,
video_url.to_string(),
@@ -303,6 +321,15 @@ impl XxthotsProvider {
duration,
)
.views(views);
if !preview.is_empty() {
let mut format = VideoFormat::new(
preview.clone(),
"preview".to_string(),
"video/mp4".to_string(),
);
format.add_http_header("Referer".to_string(), video_url.clone());
video_item = video_item.preview(preview).formats(vec![format]);
}
items.push(video_item);
}
return items;

1516
src/providers/yesporn.rs Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -11,6 +11,12 @@ use error_chain::error_chain;
use htmlentity::entity::{ICodedDataTrait, decode};
use std::vec;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "mainstream-tube",
tags: &["mainstream", "mixed", "search"],
};
error_chain! {
foreign_links {
Io(std::io::Error);

406
src/proxies/doodstream.rs Normal file
View File

@@ -0,0 +1,406 @@
use ntex::web;
use regex::{Captures, Regex};
use url::Url;
use crate::util::requester::Requester;
#[derive(Debug, Clone)]
pub struct DoodstreamProxy {}
impl DoodstreamProxy {
pub fn new() -> Self {
Self {}
}
fn normalize_detail_url(endpoint: &str) -> Option<String> {
let normalized = if endpoint.starts_with("http://") || endpoint.starts_with("https://") {
endpoint.trim().to_string()
} else {
format!("https://{}", endpoint.trim_start_matches('/'))
};
Self::is_allowed_detail_url(&normalized).then_some(normalized)
}
fn is_allowed_host(host: &str) -> bool {
matches!(
host,
"turboplayers.xyz"
| "www.turboplayers.xyz"
| "trailerhg.xyz"
| "www.trailerhg.xyz"
| "streamhg.com"
| "www.streamhg.com"
)
}
fn is_allowed_detail_url(url: &str) -> bool {
let Some(url) = Url::parse(url).ok() else {
return false;
};
if url.scheme() != "https" {
return false;
}
let Some(host) = url.host_str() else {
return false;
};
if !Self::is_allowed_host(host) {
return false;
}
url.path().starts_with("/t/")
|| url.path().starts_with("/e/")
|| url.path().starts_with("/d/")
}
fn request_origin(detail_url: &str) -> Option<String> {
let parsed = Url::parse(detail_url).ok()?;
let host = parsed.host_str()?;
Some(format!("{}://{}", parsed.scheme(), host))
}
fn request_headers(detail_url: &str) -> Vec<(String, String)> {
let origin = Self::request_origin(detail_url)
.unwrap_or_else(|| "https://turboplayers.xyz".to_string());
vec![
(
"Referer".to_string(),
format!("{}/", origin.trim_end_matches('/')),
),
("Origin".to_string(), origin),
(
"Accept".to_string(),
"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8".to_string(),
),
("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()),
("Sec-Fetch-Site".to_string(), "same-origin".to_string()),
]
}
fn regex(pattern: &str) -> Option<Regex> {
Regex::new(pattern).ok()
}
fn decode_base36(token: &str) -> Option<usize> {
usize::from_str_radix(token, 36).ok()
}
fn sanitize_media_url(url: &str) -> String {
url.trim()
.trim_end_matches('\\')
.trim_end_matches('"')
.trim_end_matches('\'')
.to_string()
}
fn extract_literal_url(text: &str) -> Option<String> {
let direct_patterns = [
r#"urlPlay\s*=\s*'(?P<url>https?://[^']+)'"#,
r#"data-hash\s*=\s*"(?P<url>https?://[^"]+)""#,
r#""(?P<url>https?://[^"]+\.(?:m3u8|mp4)(?:\?[^"]*)?)""#,
r#"'(?P<url>https?://[^']+\.(?:m3u8|mp4)(?:\?[^']*)?)'"#,
];
for pattern in direct_patterns {
let Some(regex) = Self::regex(pattern) else {
continue;
};
if let Some(url) = regex
.captures(text)
.and_then(|captures| captures.name("url"))
.map(|value| Self::sanitize_media_url(value.as_str()))
{
return Some(url);
}
}
None
}
fn extract_packed_eval_args(text: &str) -> Option<(String, usize, usize, Vec<String>)> {
let regex = Self::regex(
r#"eval\(function\(p,a,c,k,e,d\)\{.*?\}\('(?P<payload>(?:\\'|\\\\|[^'])*)',(?P<radix>\d+),(?P<count>\d+),'(?P<symbols>(?:\\'|\\\\|[^'])*)'\.split\('\|'\)"#,
)?;
let captures = regex.captures(text)?;
let payload = Self::decode_js_single_quoted(captures.name("payload")?.as_str());
let radix = captures.name("radix")?.as_str().parse::<usize>().ok()?;
let count = captures.name("count")?.as_str().parse::<usize>().ok()?;
let symbols = Self::decode_js_single_quoted(captures.name("symbols")?.as_str());
let parts = symbols.split('|').map(|value| value.to_string()).collect();
Some((payload, radix, count, parts))
}
fn decode_js_single_quoted(value: &str) -> String {
let mut result = String::with_capacity(value.len());
let mut chars = value.chars();
while let Some(ch) = chars.next() {
if ch != '\\' {
result.push(ch);
continue;
}
match chars.next() {
Some('\\') => result.push('\\'),
Some('\'') => result.push('\''),
Some('"') => result.push('"'),
Some('n') => result.push('\n'),
Some('r') => result.push('\r'),
Some('t') => result.push('\t'),
Some(other) => {
result.push('\\');
result.push(other);
}
None => result.push('\\'),
}
}
result
}
fn unpack_packer(text: &str) -> Option<String> {
let (mut payload, radix, count, symbols) = Self::extract_packed_eval_args(text)?;
if radix != 36 {
return None;
}
let token_regex = Self::regex(r"\b[0-9a-z]+\b")?;
payload = token_regex
.replace_all(&payload, |captures: &Captures| {
let token = captures
.get(0)
.map(|value| value.as_str())
.unwrap_or_default();
let Some(index) = Self::decode_base36(token) else {
return token.to_string();
};
if index >= count {
return token.to_string();
}
let replacement = symbols.get(index).map(|value| value.as_str()).unwrap_or("");
if replacement.is_empty() {
token.to_string()
} else {
replacement.to_string()
}
})
.to_string();
Some(payload)
}
fn collect_media_candidates(text: &str) -> Vec<String> {
let Some(regex) = Self::regex(r#"https?://[^\s"'<>]+?\.(?:m3u8|mp4|txt)(?:\?[^\s"'<>]*)?"#)
else {
return vec![];
};
let mut urls = regex
.find_iter(text)
.map(|value| Self::sanitize_media_url(value.as_str()))
.filter(|url| url.starts_with("https://"))
.collect::<Vec<_>>();
urls.sort_by_key(|url| {
if url.contains(".m3u8") {
0
} else if url.contains(".mp4") {
1
} else {
2
}
});
urls.dedup();
urls
}
fn extract_stream_url(text: &str) -> Option<String> {
if let Some(url) = Self::extract_literal_url(text) {
return Some(url);
}
let unpacked = Self::unpack_packer(text)?;
Self::collect_media_candidates(&unpacked)
.into_iter()
.next()
.or_else(|| Self::extract_literal_url(&unpacked))
}
fn extract_pass_md5_url(text: &str, detail_url: &str) -> Option<String> {
let decoded = text.replace("\\/", "/");
let absolute_regex = Self::regex(r#"https?://[^\s"'<>]+/pass_md5/[^\s"'<>]+"#)?;
if let Some(url) = absolute_regex
.find(&decoded)
.map(|value| value.as_str().to_string())
{
return Some(url);
}
let relative_regex = Self::regex(r#"/pass_md5/[^\s"'<>]+"#)?;
let relative = relative_regex.find(&decoded)?.as_str();
let origin = Self::request_origin(detail_url)?;
Some(format!("{origin}{relative}"))
}
fn compose_pass_md5_media_url(pass_md5_url: &str, response_body: &str) -> Option<String> {
let raw = response_body
.trim()
.trim_matches('"')
.trim_matches('\'')
.replace("\\/", "/");
if raw.is_empty() {
return None;
}
let mut media_url = if raw.starts_with("https://") || raw.starts_with("http://") {
raw
} else if let Some(rest) = raw.strip_prefix("//") {
format!("https://{rest}")
} else {
let parsed = Url::parse(pass_md5_url).ok()?;
let host = parsed.host_str()?;
format!("{}://{}{}", parsed.scheme(), host, raw)
};
let query = Url::parse(pass_md5_url)
.ok()
.and_then(|url| url.query().map(str::to_string));
if let Some(query) = query {
if !query.is_empty() && !media_url.contains("token=") {
let separator = if media_url.contains('?') { '&' } else { '?' };
media_url.push(separator);
media_url.push_str(&query);
}
}
Some(Self::sanitize_media_url(&media_url))
}
async fn resolve_stream_from_pass_md5(
detail_url: &str,
html: &str,
requester: &mut Requester,
) -> Option<String> {
let pass_md5_url = Self::extract_pass_md5_url(html, detail_url).or_else(|| {
Self::unpack_packer(html)
.and_then(|unpacked| Self::extract_pass_md5_url(&unpacked, detail_url))
})?;
let headers = vec![
("Referer".to_string(), detail_url.to_string()),
("X-Requested-With".to_string(), "XMLHttpRequest".to_string()),
("Accept".to_string(), "*/*".to_string()),
];
let response = requester
.get_with_headers(&pass_md5_url, headers, None)
.await
.ok()?;
Self::compose_pass_md5_media_url(&pass_md5_url, &response)
}
}
impl crate::proxies::Proxy for DoodstreamProxy {
async fn get_video_url(&self, url: String, requester: web::types::State<Requester>) -> String {
let Some(detail_url) = Self::normalize_detail_url(&url) else {
return String::new();
};
let mut requester = requester.get_ref().clone();
let html = match requester
.get_with_headers(&detail_url, Self::request_headers(&detail_url), None)
.await
{
Ok(text) => text,
Err(_) => return String::new(),
};
if let Some(url) = Self::extract_stream_url(&html) {
return url;
}
if let Some(url) =
Self::resolve_stream_from_pass_md5(&detail_url, &html, &mut requester).await
{
return url;
}
String::new()
}
}
#[cfg(test)]
mod tests {
use super::DoodstreamProxy;
#[test]
fn allows_only_known_doodstream_hosts() {
assert!(DoodstreamProxy::is_allowed_detail_url(
"https://turboplayers.xyz/t/69bdfb21cc640"
));
assert!(DoodstreamProxy::is_allowed_detail_url(
"https://trailerhg.xyz/e/ttdc7a6qpskt"
));
assert!(!DoodstreamProxy::is_allowed_detail_url(
"http://turboplayers.xyz/t/69bdfb21cc640"
));
assert!(!DoodstreamProxy::is_allowed_detail_url(
"https://example.com/t/69bdfb21cc640"
));
}
#[test]
fn extracts_clear_hls_url_from_turboplayers_layout() {
let html = r#"
<div id="video_player" data-hash="https://cdn4.turboviplay.com/data1/69bdfa8ce1f4d/69bdfa8ce1f4d.m3u8"></div>
<script>
var urlPlay = 'https://cdn4.turboviplay.com/data1/69bdfa8ce1f4d/69bdfa8ce1f4d.m3u8';
</script>
"#;
assert_eq!(
DoodstreamProxy::extract_stream_url(html).as_deref(),
Some("https://cdn4.turboviplay.com/data1/69bdfa8ce1f4d/69bdfa8ce1f4d.m3u8")
);
}
#[test]
fn unpacks_streamhg_style_player_config() {
let html = r#"
<script type='text/javascript'>
eval(function(p,a,c,k,e,d){while(c--)if(k[c])p=p.replace(new RegExp('\\b'+c.toString(a)+'\\b','g'),k[c]);return p}('0 1={\"2\":\"https://cdn.example/master.m3u8?t=1\",\"3\":\"https://cdn.example/master.txt\"};4(\"5\").6({7:[{8:1.2,9:\"a\"}]});',36,11,'var|links|hls2|hls3|jwplayer|vplayer|setup|sources|file|type|hls'.split('|')))
</script>
"#;
assert_eq!(
DoodstreamProxy::extract_stream_url(html).as_deref(),
Some("https://cdn.example/master.m3u8?t=1")
);
}
#[test]
fn composes_media_url_from_pass_md5_response() {
let pass_md5_url =
"https://trailerhg.xyz/pass_md5/abc123/def456?token=t0k3n&expiry=1775000000";
let body = "https://g4vsrqvtrj.pinebrookproductionlab.shop/1ghkpx2e8jnal/hls3/01/08534/syyzvotfnhaa_l/master.txt";
assert_eq!(
DoodstreamProxy::compose_pass_md5_media_url(pass_md5_url, body).as_deref(),
Some(
"https://g4vsrqvtrj.pinebrookproductionlab.shop/1ghkpx2e8jnal/hls3/01/08534/syyzvotfnhaa_l/master.txt?token=t0k3n&expiry=1775000000"
)
);
}
#[test]
fn extracts_relative_pass_md5_url() {
let html = r#"
<script>
var file = "/pass_md5/abc123/def456?token=t0k3n&expiry=1775000000";
</script>
"#;
assert_eq!(
DoodstreamProxy::extract_pass_md5_url(html, "https://trailerhg.xyz/e/ttdc7a6qpskt")
.as_deref(),
Some("https://trailerhg.xyz/pass_md5/abc123/def456?token=t0k3n&expiry=1775000000")
);
}
}

View File

@@ -6,12 +6,23 @@ use ntex::{
use crate::util::requester::Requester;
fn normalize_image_url(endpoint: &str) -> String {
let endpoint = endpoint.trim_start_matches('/');
if endpoint.starts_with("http://") || endpoint.starts_with("https://") {
endpoint.to_string()
} else if endpoint.starts_with("hanime-cdn.com/") || endpoint == "hanime-cdn.com" {
format!("https://{endpoint}")
} else {
format!("https://{endpoint}")
}
}
pub async fn get_image(
req: HttpRequest,
requester: web::types::State<Requester>,
) -> Result<impl web::Responder, web::Error> {
let endpoint = req.match_info().query("endpoint").to_string();
let image_url = format!("https://hanime-cdn.com/{}", endpoint);
let image_url = normalize_image_url(&endpoint);
let upstream = match requester
.get_ref()
@@ -52,3 +63,24 @@ pub async fn get_image(
// ...or simple & compatible:
Ok(resp.body(bytes.to_vec()))
}
#[cfg(test)]
mod tests {
use super::normalize_image_url;
#[test]
fn keeps_full_hanime_cdn_host_path_without_duplication() {
assert_eq!(
normalize_image_url("hanime-cdn.com/images/covers/natsu-zuma-2-cv1.png"),
"https://hanime-cdn.com/images/covers/natsu-zuma-2-cv1.png"
);
}
#[test]
fn prefixes_relative_paths_with_hanime_cdn_host() {
assert_eq!(
normalize_image_url("/images/covers/natsu-zuma-2-cv1.png"),
"https://hanime-cdn.com/images/covers/natsu-zuma-2-cv1.png"
);
}
}

View File

@@ -17,12 +17,21 @@ impl JavtifulProxy {
requester: web::types::State<Requester>,
) -> String {
let mut requester = requester.get_ref().clone();
let url = "https://javtiful.com/".to_string() + &url;
let text = requester.get(&url, None).await.unwrap_or("".to_string());
let endpoint = url
.trim_start_matches('/')
.strip_prefix("https://")
.or_else(|| url.trim_start_matches('/').strip_prefix("http://"))
.unwrap_or(url.trim_start_matches('/'))
.trim_start_matches("www.javtiful.com/")
.trim_start_matches("javtiful.com/")
.trim_start_matches('/')
.to_string();
let detail_url = format!("https://javtiful.com/{endpoint}");
let text = requester.get(&detail_url, None).await.unwrap_or_default();
if text.is_empty() {
return "".to_string();
}
let video_id = url.split('/').nth(4).unwrap_or("").to_string();
let video_id = endpoint.split('/').nth(1).unwrap_or("").to_string();
let token = text
.split("data-csrf-token=\"")
@@ -39,7 +48,7 @@ impl JavtifulProxy {
.post_multipart(
"https://javtiful.com/ajax/get_cdn",
form,
vec![("Referer".to_string(), url.to_string())],
vec![("Referer".to_string(), detail_url)],
Some(Version::HTTP_11),
)
.await

View File

@@ -1,19 +1,34 @@
use crate::proxies::doodstream::DoodstreamProxy;
use crate::proxies::pornhd3x::Pornhd3xProxy;
use ntex::web;
use crate::proxies::pimpbunny::PimpbunnyProxy;
use crate::proxies::porndish::PorndishProxy;
use crate::proxies::spankbang::SpankbangProxy;
use crate::{proxies::sxyprn::SxyprnProxy, util::requester::Requester};
pub mod doodstream;
pub mod hanimecdn;
pub mod hqpornerthumb;
pub mod javtiful;
pub mod noodlemagazine;
pub mod pimpbunny;
pub mod pimpbunnythumb;
pub mod porndish;
pub mod porndishthumb;
pub mod pornhd3x;
pub mod shooshtime;
pub mod spankbang;
pub mod sxyprn;
#[derive(Debug, Clone)]
pub enum AnyProxy {
Doodstream(DoodstreamProxy),
Sxyprn(SxyprnProxy),
Javtiful(javtiful::JavtifulProxy),
Pornhd3x(Pornhd3xProxy),
Pimpbunny(PimpbunnyProxy),
Porndish(PorndishProxy),
Spankbang(SpankbangProxy),
}
@@ -24,8 +39,12 @@ pub trait Proxy {
impl Proxy for AnyProxy {
async fn get_video_url(&self, url: String, requester: web::types::State<Requester>) -> String {
match self {
AnyProxy::Doodstream(p) => p.get_video_url(url, requester).await,
AnyProxy::Sxyprn(p) => p.get_video_url(url, requester).await,
AnyProxy::Javtiful(p) => p.get_video_url(url, requester).await,
AnyProxy::Pornhd3x(p) => p.get_video_url(url, requester).await,
AnyProxy::Pimpbunny(p) => p.get_video_url(url, requester).await,
AnyProxy::Porndish(p) => p.get_video_url(url, requester).await,
AnyProxy::Spankbang(p) => p.get_video_url(url, requester).await,
}
}

View File

@@ -1,11 +1,21 @@
use ntex::http::header::CONTENT_TYPE;
use ntex::web::{self, HttpRequest, error};
use ntex::http::header::{CONTENT_LENGTH, CONTENT_TYPE};
use ntex::{
http::Response,
web::{self, HttpRequest, error},
};
use serde_json::Value;
use std::net::IpAddr;
use url::Url;
use wreq::Version;
use crate::util::requester::Requester;
const FIREFOX_USER_AGENT: &str =
"Mozilla/5.0 (X11; Linux x86_64; rv:147.0) Gecko/20100101 Firefox/147.0";
const HTML_ACCEPT: &str =
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8";
const IMAGE_ACCEPT: &str = "image/avif,image/webp,image/png,image/svg+xml,image/*;q=0.8,*/*;q=0.5";
#[derive(Debug, Clone)]
pub struct NoodlemagazineProxy {}
@@ -57,6 +67,99 @@ impl NoodlemagazineProxy {
}
}
fn normalize_image_url(url: &str) -> String {
if url.starts_with("http://") || url.starts_with("https://") {
url.to_string()
} else {
format!("https://{}", url.trim_start_matches('/'))
}
}
fn root_referer() -> &'static str {
"https://noodlemagazine.com/"
}
fn root_html_headers() -> Vec<(String, String)> {
vec![
("Referer".to_string(), Self::root_referer().to_string()),
("User-Agent".to_string(), FIREFOX_USER_AGENT.to_string()),
("Accept".to_string(), HTML_ACCEPT.to_string()),
("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()),
]
}
fn image_headers(requester: &Requester, image_url: &str) -> Vec<(String, String)> {
let mut headers = vec![
("Referer".to_string(), Self::root_referer().to_string()),
("User-Agent".to_string(), FIREFOX_USER_AGENT.to_string()),
("Accept".to_string(), IMAGE_ACCEPT.to_string()),
("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()),
];
if let Some(cookie) = requester.cookie_header_for_url(image_url) {
headers.push(("Cookie".to_string(), cookie));
}
headers
}
fn has_allowed_image_extension(path: &str) -> bool {
let path = path.to_ascii_lowercase();
[".jpg", ".jpeg", ".png", ".webp", ".avif", ".gif"]
.iter()
.any(|ext| path.ends_with(ext))
}
fn is_disallowed_thumb_host(host: &str) -> bool {
if host.eq_ignore_ascii_case("localhost") {
return true;
}
match host.parse::<IpAddr>() {
Ok(IpAddr::V4(ip)) => {
ip.is_private()
|| ip.is_loopback()
|| ip.is_link_local()
|| ip.is_broadcast()
|| ip.is_documentation()
|| ip.is_unspecified()
}
Ok(IpAddr::V6(ip)) => {
ip.is_loopback()
|| ip.is_unspecified()
|| ip.is_multicast()
|| ip.is_unique_local()
|| ip.is_unicast_link_local()
}
Err(_) => false,
}
}
fn is_allowed_thumb_url(url: &str) -> bool {
let Some(url) = Url::parse(url).ok() else {
return false;
};
if url.scheme() != "https" {
return false;
}
let Some(host) = url.host_str() else {
return false;
};
!Self::is_disallowed_thumb_host(host) && Self::has_allowed_image_extension(url.path())
}
fn is_binary_image_content_type(content_type: &str) -> bool {
let media_type = content_type
.split(';')
.next()
.unwrap_or_default()
.trim()
.to_ascii_lowercase();
media_type.starts_with("image/")
}
fn is_hls_url(url: &str) -> bool {
Url::parse(url)
.ok()
@@ -184,6 +287,78 @@ pub async fn serve_media(
.body(rewritten_manifest))
}
pub async fn get_image(
req: HttpRequest,
requester: web::types::State<Requester>,
) -> Result<impl web::Responder, web::Error> {
let endpoint = req.match_info().query("endpoint").to_string();
let image_url = NoodlemagazineProxy::normalize_image_url(&endpoint);
if !NoodlemagazineProxy::is_allowed_thumb_url(&image_url) {
return Ok(web::HttpResponse::BadRequest().finish());
}
let mut requester = requester.get_ref().clone();
let _ = requester
.get_with_headers(
NoodlemagazineProxy::root_referer(),
NoodlemagazineProxy::root_html_headers(),
Some(Version::HTTP_11),
)
.await;
let mut headers = NoodlemagazineProxy::image_headers(&requester, image_url.as_str());
let mut upstream = requester
.get_raw_with_headers(image_url.as_str(), headers.clone())
.await
.ok();
let needs_warmup = upstream
.as_ref()
.map(|response| !response.status().is_success())
.unwrap_or(true);
if needs_warmup {
let _ = requester
.get_with_headers(image_url.as_str(), headers.clone(), Some(Version::HTTP_11))
.await;
headers = NoodlemagazineProxy::image_headers(&requester, image_url.as_str());
upstream = requester
.get_raw_with_headers(image_url.as_str(), headers)
.await
.ok();
}
let Some(upstream) = upstream.filter(|response| response.status().is_success()) else {
return Ok(web::HttpResponse::NotFound().finish());
};
let status = upstream.status();
let headers = upstream.headers().clone();
let content_type = headers
.get(CONTENT_TYPE)
.and_then(|value| value.to_str().ok())
.map(str::to_string)
.unwrap_or_default();
if !NoodlemagazineProxy::is_binary_image_content_type(&content_type) {
return Ok(web::HttpResponse::BadGateway().finish());
}
let bytes = upstream.bytes().await.map_err(error::ErrorBadGateway)?;
let mut resp = Response::build(status);
if !content_type.is_empty() {
resp.set_header(CONTENT_TYPE, content_type);
}
if let Some(cl) = headers.get(CONTENT_LENGTH) {
if let Ok(cl_str) = cl.to_str() {
resp.set_header(CONTENT_LENGTH, cl_str);
}
}
Ok(resp.body(bytes.to_vec()))
}
#[cfg(test)]
mod tests {
use super::NoodlemagazineProxy;
@@ -231,4 +406,36 @@ mod tests {
"#EXTM3U\n#EXT-X-STREAM-INF:BANDWIDTH=1\nhttps://cdn.example/hls/low/index.m3u8\n#EXT-X-KEY:METHOD=AES-128,URI=\"https://cdn.example/hls/keys/key.bin\"\nhttps://cdn.example/hls/segment0.ts"
);
}
#[test]
fn allows_https_image_thumbs_but_rejects_local_or_non_images() {
assert!(NoodlemagazineProxy::is_allowed_thumb_url(
"https://noodlemagazine.com/thumbs/example.webp"
));
assert!(NoodlemagazineProxy::is_allowed_thumb_url(
"https://cdn.example/previews/example.jpg"
));
assert!(!NoodlemagazineProxy::is_allowed_thumb_url(
"https://noodlemagazine.com/watch/-123_456"
));
assert!(!NoodlemagazineProxy::is_allowed_thumb_url(
"https://localhost/thumb.jpg"
));
}
#[test]
fn recognizes_binary_image_content_types() {
assert!(NoodlemagazineProxy::is_binary_image_content_type(
"image/webp"
));
assert!(NoodlemagazineProxy::is_binary_image_content_type(
"image/jpeg; charset=binary"
));
assert!(!NoodlemagazineProxy::is_binary_image_content_type(
"text/html; charset=utf-8"
));
assert!(!NoodlemagazineProxy::is_binary_image_content_type(
"application/json"
));
}
}

252
src/proxies/pimpbunny.rs Normal file
View File

@@ -0,0 +1,252 @@
use ntex::web;
use regex::Regex;
use serde_json::Value;
use url::Url;
use wreq::Version;
use crate::util::requester::Requester;
#[derive(Debug, Clone)]
pub struct PimpbunnyProxy {}
impl PimpbunnyProxy {
const FIREFOX_USER_AGENT: &'static str =
"Mozilla/5.0 (X11; Linux x86_64; rv:147.0) Gecko/20100101 Firefox/147.0";
const HTML_ACCEPT: &'static str =
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8";
pub fn new() -> Self {
PimpbunnyProxy {}
}
fn normalize_detail_url(url: &str) -> Option<String> {
let normalized = if url.starts_with("http://") || url.starts_with("https://") {
url.to_string()
} else {
format!("https://{}", url.trim_start_matches('/'))
};
Self::is_allowed_detail_url(&normalized).then_some(normalized)
}
fn is_allowed_detail_url(url: &str) -> bool {
let Some(url) = Url::parse(url).ok() else {
return false;
};
if url.scheme() != "https" {
return false;
}
let Some(host) = url.host_str() else {
return false;
};
matches!(host, "pimpbunny.com" | "www.pimpbunny.com")
&& !url.path().starts_with("/contents/videos_screenshots/")
}
fn root_referer() -> &'static str {
"https://pimpbunny.com/"
}
fn html_headers_with_referer(referer: &str) -> Vec<(String, String)> {
vec![
("Referer".to_string(), referer.to_string()),
(
"User-Agent".to_string(),
Self::FIREFOX_USER_AGENT.to_string(),
),
("Accept".to_string(), Self::HTML_ACCEPT.to_string()),
("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()),
]
}
fn headers_with_cookies(
requester: &Requester,
request_url: &str,
referer: &str,
) -> Vec<(String, String)> {
let mut headers = Self::html_headers_with_referer(referer);
if let Some(cookie) = requester.cookie_header_for_url(request_url) {
headers.push(("Cookie".to_string(), cookie));
}
headers
}
async fn warm_root_session(requester: &mut Requester) {
let _ = requester
.get_with_headers(
Self::root_referer(),
Self::html_headers_with_referer(Self::root_referer()),
Some(Version::HTTP_11),
)
.await;
}
fn extract_json_ld_video(text: &str) -> Option<Value> {
let script_regex =
Regex::new(r#"(?s)<script[^>]+application/ld\+json[^>]*>(.*?)</script>"#).ok()?;
for captures in script_regex.captures_iter(text) {
let raw = captures.get(1).map(|value| value.as_str().trim())?;
let parsed: Value = serde_json::from_str(raw).ok()?;
if let Some(video) = Self::find_video_object(&parsed) {
return Some(video);
}
}
None
}
fn find_video_object(parsed: &Value) -> Option<Value> {
if parsed
.get("@type")
.and_then(Value::as_str)
.is_some_and(|value| value == "VideoObject")
{
return Some(parsed.clone());
}
if parsed
.get("contentUrl")
.and_then(Value::as_str)
.is_some_and(|value| !value.trim().is_empty())
{
return Some(parsed.clone());
}
if let Some(graph) = parsed.get("@graph").and_then(Value::as_array) {
for item in graph {
if item
.get("@type")
.and_then(Value::as_str)
.is_some_and(|value| value == "VideoObject")
{
return Some(item.clone());
}
if item
.get("contentUrl")
.and_then(Value::as_str)
.is_some_and(|value| !value.trim().is_empty())
{
return Some(item.clone());
}
}
}
if let Some(array) = parsed.as_array() {
for item in array {
if let Some(video) = Self::find_video_object(item) {
return Some(video);
}
}
}
None
}
fn extract_stream_url(json_ld: &Value) -> Option<String> {
json_ld
.get("contentUrl")
.and_then(Value::as_str)
.map(str::trim)
.filter(|value| !value.is_empty())
.map(str::to_string)
}
fn extract_stream_url_from_html(text: &str) -> Option<String> {
Regex::new(r#""contentUrl"\s*:\s*"([^"]+)""#)
.ok()?
.captures(text)
.and_then(|captures| captures.get(1))
.map(|value| value.as_str().trim().to_string())
.filter(|value| !value.is_empty())
}
}
impl crate::proxies::Proxy for PimpbunnyProxy {
async fn get_video_url(&self, url: String, requester: web::types::State<Requester>) -> String {
let Some(detail_url) = Self::normalize_detail_url(&url) else {
return String::new();
};
let mut requester = requester.get_ref().clone();
Self::warm_root_session(&mut requester).await;
let headers = Self::headers_with_cookies(&requester, &detail_url, Self::root_referer());
let text = match requester
.get_with_headers(&detail_url, headers, Some(Version::HTTP_2))
.await
{
Ok(text) => text,
Err(_) => return String::new(),
};
Self::extract_json_ld_video(&text)
.and_then(|json_ld| Self::extract_stream_url(&json_ld))
.or_else(|| Self::extract_stream_url_from_html(&text))
.unwrap_or_default()
}
}
#[cfg(test)]
mod tests {
use super::PimpbunnyProxy;
#[test]
fn allows_only_pimpbunny_detail_urls() {
assert!(PimpbunnyProxy::is_allowed_detail_url(
"https://pimpbunny.com/videos/example-video/"
));
assert!(PimpbunnyProxy::is_allowed_detail_url(
"https://www.pimpbunny.com/video/example/"
));
assert!(!PimpbunnyProxy::is_allowed_detail_url(
"http://pimpbunny.com/videos/example-video/"
));
assert!(!PimpbunnyProxy::is_allowed_detail_url(
"https://pimpbunny.com/contents/videos_screenshots/1/2/3.jpg"
));
assert!(!PimpbunnyProxy::is_allowed_detail_url(
"https://example.com/videos/example-video/"
));
}
#[test]
fn extracts_content_url_from_json_ld() {
let html = r#"
<script type="application/ld+json">{"contentUrl":"https://cdn.example/video.mp4"}</script>
"#;
let json_ld = PimpbunnyProxy::extract_json_ld_video(html).expect("json-ld should parse");
assert_eq!(
PimpbunnyProxy::extract_stream_url(&json_ld).as_deref(),
Some("https://cdn.example/video.mp4")
);
}
#[test]
fn extracts_video_object_from_graph_script() {
let html = r#"
<script type="application/ld+json">
{"@graph":[{"@type":"BreadcrumbList"},{"@type":"VideoObject","contentUrl":"https://cdn.example/graph.mp4"}]}
</script>
"#;
let json_ld =
PimpbunnyProxy::extract_json_ld_video(html).expect("video object should parse");
assert_eq!(
PimpbunnyProxy::extract_stream_url(&json_ld).as_deref(),
Some("https://cdn.example/graph.mp4")
);
}
#[test]
fn falls_back_to_raw_content_url_match() {
let html = r#"{"contentUrl":"https://cdn.example/fallback.mp4"}"#;
assert_eq!(
PimpbunnyProxy::extract_stream_url_from_html(html).as_deref(),
Some("https://cdn.example/fallback.mp4")
);
}
}

View File

@@ -0,0 +1,169 @@
use ntex::http::header::{CONTENT_LENGTH, CONTENT_TYPE};
use ntex::{
http::Response,
web::{self, HttpRequest, error},
};
use url::Url;
use wreq::Version;
use crate::util::requester::Requester;
const FIREFOX_USER_AGENT: &str =
"Mozilla/5.0 (X11; Linux x86_64; rv:147.0) Gecko/20100101 Firefox/147.0";
const HTML_ACCEPT: &str =
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8";
const IMAGE_ACCEPT: &str = "image/avif,image/webp,image/png,image/svg+xml,image/*;q=0.8,*/*;q=0.5";
fn root_referer() -> &'static str {
"https://pimpbunny.com/"
}
fn root_html_headers() -> Vec<(String, String)> {
vec![
("Referer".to_string(), root_referer().to_string()),
("User-Agent".to_string(), FIREFOX_USER_AGENT.to_string()),
("Accept".to_string(), HTML_ACCEPT.to_string()),
("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()),
]
}
fn image_headers(requester: &Requester, image_url: &str) -> Vec<(String, String)> {
let mut headers = vec![
("Referer".to_string(), root_referer().to_string()),
("User-Agent".to_string(), FIREFOX_USER_AGENT.to_string()),
("Accept".to_string(), IMAGE_ACCEPT.to_string()),
("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()),
];
if let Some(cookie) = requester.cookie_header_for_url(image_url) {
headers.push(("Cookie".to_string(), cookie));
}
headers
}
fn is_allowed_thumb_url(url: &str) -> bool {
let Some(url) = Url::parse(url).ok() else {
return false;
};
if url.scheme() != "https" {
return false;
}
let Some(host) = url.host_str() else {
return false;
};
matches!(host, "pimpbunny.com" | "www.pimpbunny.com")
&& url.path().starts_with("/contents/videos_screenshots/")
}
pub async fn get_image(
req: HttpRequest,
requester: web::types::State<Requester>,
) -> Result<impl web::Responder, web::Error> {
let endpoint = req.match_info().query("endpoint").to_string();
let image_url = if endpoint.starts_with("http://") || endpoint.starts_with("https://") {
endpoint
} else {
format!("https://{}", endpoint.trim_start_matches('/'))
};
if !is_allowed_thumb_url(&image_url) {
return Ok(web::HttpResponse::BadRequest().finish());
}
let mut requester = requester.get_ref().clone();
let _ = requester
.get_with_headers(root_referer(), root_html_headers(), Some(Version::HTTP_11))
.await;
let mut headers = image_headers(&requester, image_url.as_str());
let mut upstream = requester
.get_raw_with_headers(image_url.as_str(), headers.clone())
.await
.ok();
let needs_warmup = upstream
.as_ref()
.map(|response| !response.status().is_success())
.unwrap_or(true);
if needs_warmup {
let _ = requester
.get_with_headers(root_referer(), root_html_headers(), Some(Version::HTTP_11))
.await;
headers = image_headers(&requester, image_url.as_str());
upstream = requester
.get_raw_with_headers(image_url.as_str(), headers.clone())
.await
.ok();
}
let needs_image_specific_warmup = upstream
.as_ref()
.map(|response| !response.status().is_success())
.unwrap_or(true);
if needs_image_specific_warmup {
let _ = requester
.get_with_headers(image_url.as_str(), headers.clone(), Some(Version::HTTP_11))
.await;
headers = image_headers(&requester, image_url.as_str());
upstream = requester
.get_raw_with_headers(image_url.as_str(), headers)
.await
.ok();
}
let Some(upstream) = upstream.filter(|response| response.status().is_success()) else {
return Ok(web::HttpResponse::NotFound().finish());
};
let status = upstream.status();
let headers = upstream.headers().clone();
let bytes = upstream.bytes().await.map_err(error::ErrorBadGateway)?;
let mut resp = Response::build(status);
if let Some(ct) = headers.get(CONTENT_TYPE) {
if let Ok(ct_str) = ct.to_str() {
resp.set_header(CONTENT_TYPE, ct_str);
}
}
if let Some(cl) = headers.get(CONTENT_LENGTH) {
if let Ok(cl_str) = cl.to_str() {
resp.set_header(CONTENT_LENGTH, cl_str);
}
}
Ok(resp.body(bytes.to_vec()))
}
#[cfg(test)]
mod tests {
use super::is_allowed_thumb_url;
#[test]
fn allows_expected_pimpbunny_thumb_paths() {
assert!(is_allowed_thumb_url(
"https://pimpbunny.com/contents/videos_screenshots/517000/517329/800x450/1.jpg"
));
assert!(is_allowed_thumb_url(
"https://www.pimpbunny.com/contents/videos_screenshots/1/2/800x450/3.webp"
));
}
#[test]
fn rejects_non_thumb_or_non_pimpbunny_urls() {
assert!(!is_allowed_thumb_url(
"http://pimpbunny.com/contents/videos_screenshots/x.jpg"
));
assert!(!is_allowed_thumb_url(
"https://pimpbunny.com/videos/example-video/"
));
assert!(!is_allowed_thumb_url(
"https://example.com/contents/videos_screenshots/x.jpg"
));
}
}

369
src/proxies/porndish.rs Normal file
View File

@@ -0,0 +1,369 @@
use ntex::web;
use regex::Regex;
use std::process::Command;
use url::Url;
use crate::util::requester::Requester;
#[derive(Debug, Clone)]
pub struct PorndishProxy {}
impl PorndishProxy {
pub fn new() -> Self {
Self {}
}
fn normalize_detail_url(endpoint: &str) -> Option<String> {
let endpoint = endpoint.trim();
if endpoint.is_empty() {
return None;
}
if endpoint.starts_with("http://") || endpoint.starts_with("https://") {
Some(endpoint.to_string())
} else {
Some(format!("https://{}", endpoint.trim_start_matches('/')))
}
}
fn parse_url(url: &str) -> Option<Url> {
Url::parse(url).ok()
}
fn is_porndish_host(host: &str) -> bool {
matches!(host, "www.porndish.com" | "porndish.com")
}
fn is_myvidplay_host(host: &str) -> bool {
matches!(host, "myvidplay.com" | "www.myvidplay.com")
}
fn is_vidara_host(host: &str) -> bool {
matches!(host, "vidara.so" | "www.vidara.so")
}
fn is_allowed_detail_url(url: &str) -> bool {
let Some(url) = Self::parse_url(url) else {
return false;
};
if url.scheme() != "https" {
return false;
}
let Some(host) = url.host_str() else {
return false;
};
Self::is_porndish_host(host) && url.path().starts_with("/porn/")
}
fn is_allowed_myvidplay_iframe_url(url: &str) -> bool {
let Some(url) = Self::parse_url(url) else {
return false;
};
if url.scheme() != "https" {
return false;
}
let Some(host) = url.host_str() else {
return false;
};
Self::is_myvidplay_host(host) && url.path().starts_with("/e/")
}
fn is_allowed_myvidplay_pass_url(url: &str) -> bool {
let Some(url) = Self::parse_url(url) else {
return false;
};
if url.scheme() != "https" {
return false;
}
let Some(host) = url.host_str() else {
return false;
};
Self::is_myvidplay_host(host) && url.path().starts_with("/pass_md5/")
}
fn is_allowed_vidara_iframe_url(url: &str) -> bool {
let Some(url) = Self::parse_url(url) else {
return false;
};
if url.scheme() != "https" {
return false;
}
let Some(host) = url.host_str() else {
return false;
};
Self::is_vidara_host(host) && url.path().starts_with("/e/")
}
fn vidara_api_url(iframe_url: &str) -> Option<String> {
let url = Self::parse_url(iframe_url)?;
if !Self::is_allowed_vidara_iframe_url(iframe_url) {
return None;
}
let filecode = url
.path_segments()?
.filter(|segment| !segment.is_empty())
.next_back()?
.to_string();
if filecode.is_empty() {
return None;
}
Some(format!("https://vidara.so/api/stream?filecode={filecode}"))
}
fn regex(value: &str) -> Option<Regex> {
Regex::new(value).ok()
}
async fn fetch_with_curl_cffi(url: &str, referer: Option<&str>) -> Option<String> {
let url = url.to_string();
let referer = referer.unwrap_or("").to_string();
let output = tokio::task::spawn_blocking(move || {
Command::new("python3")
.arg("-c")
.arg(
r#"
import sys
from curl_cffi import requests
url = sys.argv[1]
referer = sys.argv[2] if len(sys.argv) > 2 else ""
headers = {}
if referer:
headers["Referer"] = referer
response = requests.get(
url,
impersonate="chrome",
timeout=30,
allow_redirects=True,
headers=headers,
)
if response.status_code >= 400:
sys.exit(1)
sys.stdout.buffer.write(response.content)
"#,
)
.arg(url)
.arg(referer)
.output()
})
.await
.ok()?
.ok()?;
if !output.status.success() {
return None;
}
Some(String::from_utf8_lossy(&output.stdout).to_string())
}
async fn resolve_first_redirect(url: &str) -> Option<String> {
let url = url.to_string();
let output = tokio::task::spawn_blocking(move || {
Command::new("python3")
.arg("-c")
.arg(
r#"
import sys
from curl_cffi import requests
url = sys.argv[1]
response = requests.get(
url,
impersonate="chrome",
timeout=30,
allow_redirects=False,
)
location = response.headers.get("location", "")
if location:
sys.stdout.write(location)
"#,
)
.arg(url)
.output()
})
.await
.ok()?
.ok()?;
let location = String::from_utf8_lossy(&output.stdout).trim().to_string();
if location.is_empty() {
None
} else {
Some(location)
}
}
fn extract_iframe_fragments(html: &str) -> Vec<String> {
let Some(regex) = Self::regex(r#"const\s+[A-Za-z0-9_]+Content\s*=\s*"((?:\\.|[^"\\])*)";"#)
else {
return vec![];
};
let mut fragments = Vec::new();
for captures in regex.captures_iter(html) {
let Some(value) = captures.get(1).map(|value| value.as_str()) else {
continue;
};
let encoded = format!("\"{value}\"");
let decoded = serde_json::from_str::<String>(&encoded).unwrap_or_default();
if decoded.contains("<iframe") {
fragments.push(decoded);
}
}
fragments
}
fn parse_embed_source(fragment: &str) -> Option<String> {
let regex = Self::regex(r#"(?is)<iframe[^>]+src="([^"]+)"[^>]*>"#)?;
regex
.captures(fragment)
.and_then(|captures| captures.get(1))
.map(|value| value.as_str().to_string())
}
async fn resolve_myvidplay_stream(iframe_url: &str) -> Option<String> {
if !Self::is_allowed_myvidplay_iframe_url(iframe_url) {
return None;
}
let html =
Self::fetch_with_curl_cffi(iframe_url, Some("https://www.porndish.com/")).await?;
let pass_regex = Self::regex(r#"\$\.get\(\s*['"](/pass_md5/[^'"]+)['"]"#)?;
let path = pass_regex
.captures(&html)
.and_then(|captures| captures.get(1))
.map(|value| value.as_str().to_string())?;
let token = path.trim_end_matches('/').rsplit('/').next()?.to_string();
if token.is_empty() {
return None;
}
let pass_url = if path.starts_with("http://") || path.starts_with("https://") {
path
} else {
let base = Url::parse(iframe_url).ok()?;
base.join(&path).ok()?.to_string()
};
if !Self::is_allowed_myvidplay_pass_url(&pass_url) {
return None;
}
let base = Self::fetch_with_curl_cffi(&pass_url, Some(iframe_url))
.await?
.trim()
.to_string();
if base.is_empty() || base == "RELOAD" || !base.starts_with("http") {
return None;
}
let chars = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
let now = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.ok()?
.as_millis();
let suffix = (0..10)
.map(|index| {
let pos = ((now + (index as u128 * 17)) % chars.len() as u128) as usize;
chars[pos] as char
})
.collect::<String>();
let stream_url = format!("{base}{suffix}?token={token}&expiry={now}");
Some(
Self::resolve_first_redirect(&stream_url)
.await
.unwrap_or(stream_url),
)
}
async fn resolve_vidara_stream(iframe_url: &str) -> Option<String> {
let api_url = Self::vidara_api_url(iframe_url)?;
let response = Self::fetch_with_curl_cffi(&api_url, Some(iframe_url)).await?;
let json: serde_json::Value = serde_json::from_str(&response).ok()?;
let stream_url = json
.get("streaming_url")
.and_then(|value| value.as_str())?
.trim()
.to_string();
if stream_url.is_empty() {
return None;
}
Some(stream_url)
}
pub async fn get_video_url(
&self,
url: String,
_requester: web::types::State<Requester>,
) -> String {
let Some(detail_url) = Self::normalize_detail_url(&url) else {
return String::new();
};
if !Self::is_allowed_detail_url(&detail_url) {
return String::new();
}
let Some(html) =
Self::fetch_with_curl_cffi(&detail_url, Some("https://www.porndish.com/")).await
else {
return String::new();
};
let mut fallback_iframe: Option<String> = None;
for fragment in Self::extract_iframe_fragments(&html) {
let Some(iframe_url) = Self::parse_embed_source(&fragment) else {
continue;
};
let iframe_url =
if iframe_url.starts_with("http://") || iframe_url.starts_with("https://") {
iframe_url
} else if iframe_url.starts_with("//") {
format!("https:{iframe_url}")
} else {
continue;
};
if Self::is_allowed_vidara_iframe_url(&iframe_url) {
if let Some(stream_url) = Self::resolve_vidara_stream(&iframe_url).await {
return stream_url;
}
}
if fallback_iframe.is_none() && Self::is_allowed_myvidplay_iframe_url(&iframe_url) {
fallback_iframe = Some(iframe_url);
}
}
if let Some(iframe_url) = fallback_iframe {
if let Some(stream_url) = Self::resolve_myvidplay_stream(&iframe_url).await {
return stream_url;
}
}
String::new()
}
}
#[cfg(test)]
mod tests {
use super::PorndishProxy;
#[test]
fn allows_only_porndish_detail_urls() {
assert!(PorndishProxy::is_allowed_detail_url(
"https://www.porndish.com/porn/example/"
));
assert!(!PorndishProxy::is_allowed_detail_url(
"https://www.porndish.com/search/example/"
));
assert!(!PorndishProxy::is_allowed_detail_url(
"https://example.com/porn/example/"
));
}
}

View File

@@ -0,0 +1,80 @@
use ntex::http::header::CONTENT_TYPE;
use ntex::{
http::Response,
web::{self, HttpRequest, error},
};
use std::process::Command;
use url::Url;
use crate::util::requester::Requester;
fn is_allowed_thumb_url(url: &str) -> bool {
let Some(url) = Url::parse(url).ok() else {
return false;
};
if url.scheme() != "https" {
return false;
}
let Some(host) = url.host_str() else {
return false;
};
matches!(host, "www.porndish.com" | "porndish.com")
&& url.path().starts_with("/wp-content/uploads/")
}
pub async fn get_image(
req: HttpRequest,
_requester: web::types::State<Requester>,
) -> Result<impl web::Responder, web::Error> {
let endpoint = req.match_info().query("endpoint").to_string();
let image_url = if endpoint.starts_with("http://") || endpoint.starts_with("https://") {
endpoint
} else {
format!("https://{}", endpoint.trim_start_matches('/'))
};
if !is_allowed_thumb_url(&image_url) {
return Ok(web::HttpResponse::BadRequest().finish());
}
let output = tokio::task::spawn_blocking(move || {
Command::new("python3")
.arg("-c")
.arg(
r#"
import sys
from curl_cffi import requests
url = sys.argv[1]
response = requests.get(
url,
impersonate="chrome",
timeout=30,
allow_redirects=True,
headers={"Referer": "https://www.porndish.com/"},
)
if response.status_code >= 400:
sys.stderr.write(f"status={response.status_code}\n")
sys.exit(1)
sys.stderr.write(response.headers.get("content-type", "application/octet-stream"))
sys.stdout.buffer.write(response.content)
"#,
)
.arg(image_url)
.output()
})
.await
.map_err(error::ErrorBadGateway)?
.map_err(error::ErrorBadGateway)?;
if !output.status.success() {
return Ok(web::HttpResponse::NotFound().finish());
}
let content_type = String::from_utf8_lossy(&output.stderr).trim().to_string();
let mut resp = Response::build(ntex::http::StatusCode::OK);
if !content_type.is_empty() {
resp.set_header(CONTENT_TYPE, content_type);
}
Ok(resp.body(output.stdout))
}

243
src/proxies/pornhd3x.rs Normal file
View File

@@ -0,0 +1,243 @@
use std::sync::Arc;
use std::sync::atomic::{AtomicU32, Ordering};
use ntex::web;
use regex::Regex;
use serde_json::Value;
use url::Url;
use wreq::Version;
use crate::util::requester::Requester;
const BASE_URL: &str = "https://www.pornhd3x.tv";
const SOURCE_SECRET: &str = "98126avrbi6m49vd7shxkn985";
const SOURCE_COOKIE_PREFIX: &str = "826avrbi6m49vd7shxkn985m";
const SOURCE_COOKIE_SUFFIX: &str = "k06twz87wwxtp3dqiicks2df";
#[derive(Debug, Clone)]
pub struct Pornhd3xProxy {
source_counter: Arc<AtomicU32>,
}
impl Pornhd3xProxy {
pub fn new() -> Self {
Self {
source_counter: Arc::new(AtomicU32::new(0)),
}
}
fn normalize_detail_request(endpoint: &str) -> Option<(String, Option<String>)> {
let endpoint = endpoint.trim().trim_start_matches('/');
if endpoint.is_empty() {
return None;
}
let (detail_part, quality) = match endpoint.split_once("/__quality__/") {
Some((detail, quality)) => {
(detail, Some(quality.replace("%20", " ").trim().to_string()))
}
None => (endpoint, None),
};
let detail_url =
if detail_part.starts_with("http://") || detail_part.starts_with("https://") {
detail_part.to_string()
} else {
format!("https://{}", detail_part.trim_start_matches('/'))
};
Self::is_allowed_detail_url(&detail_url)
.then_some((detail_url, quality.filter(|value| !value.is_empty())))
}
fn is_allowed_detail_url(url: &str) -> bool {
let Some(url) = Url::parse(url).ok() else {
return false;
};
if url.scheme() != "https" {
return false;
}
let Some(host) = url.host_str() else {
return false;
};
(host == "www.pornhd3x.tv" || host == "pornhd3x.tv") && url.path().starts_with("/movies/")
}
fn normalize_url(raw: &str) -> String {
let value = raw.trim();
if value.is_empty() {
return String::new();
}
if value.starts_with("//") {
return format!("https:{value}");
}
if value.starts_with('/') {
return format!("{BASE_URL}{value}");
}
if value.starts_with("http://") {
return value.replacen("http://", "https://", 1);
}
value.to_string()
}
fn extract_episode_id(html: &str) -> Option<String> {
Regex::new(r#"(?is)(?:id=["']uuid["'][^>]*value=["']|episode-id=["'])([A-Za-z0-9]+)"#)
.ok()?
.captures(html)
.and_then(|captures| captures.get(1))
.map(|value| value.as_str().to_string())
}
fn build_source_cookie_name(episode_id: &str) -> String {
format!("{SOURCE_COOKIE_PREFIX}{episode_id}{SOURCE_COOKIE_SUFFIX}")
}
fn build_source_hash(episode_id: &str, nonce: &str) -> String {
format!(
"{:x}",
md5::compute(format!("{episode_id}{nonce}{SOURCE_SECRET}"))
)
}
fn next_source_request(&self) -> (u32, String) {
let count = self.source_counter.fetch_add(1, Ordering::Relaxed) + 1;
let nonce = format!("{:06x}", count % 0xFF_FFFF);
(count, nonce)
}
async fn fetch_sources(
&self,
requester: &mut Requester,
referer: &str,
episode_id: &str,
) -> Option<Value> {
let (count, nonce) = self.next_source_request();
let source_url = format!(
"{BASE_URL}/ajax/get_sources/{episode_id}/{hash}?count={count}&mobile=true",
hash = Self::build_source_hash(episode_id, &nonce),
);
let existing_cookie = requester.cookie_header_for_url(&source_url);
let cookie_value = format!("{}={nonce}", Self::build_source_cookie_name(episode_id));
let combined_cookie = match existing_cookie {
Some(existing) if !existing.trim().is_empty() => format!("{existing}; {cookie_value}"),
_ => cookie_value,
};
let response = requester
.get_with_headers(
&source_url,
vec![
("Cookie".to_string(), combined_cookie),
("Referer".to_string(), referer.to_string()),
("X-Requested-With".to_string(), "XMLHttpRequest".to_string()),
(
"Accept".to_string(),
"application/json, text/javascript, */*; q=0.01".to_string(),
),
],
Some(Version::HTTP_11),
)
.await
.ok()?;
serde_json::from_str::<Value>(&response).ok()
}
fn select_source_url(payload: &Value, quality: Option<&str>) -> Option<String> {
let sources = payload
.get("playlist")
.and_then(Value::as_array)
.into_iter()
.flatten()
.flat_map(|playlist| {
playlist
.get("sources")
.and_then(Value::as_array)
.into_iter()
.flatten()
})
.collect::<Vec<_>>();
if let Some(quality) = quality {
let quality = quality.trim().to_ascii_lowercase();
for source in &sources {
let label = source
.get("label")
.and_then(Value::as_str)
.unwrap_or_default()
.trim()
.to_ascii_lowercase();
if label == quality {
let file = source.get("file").and_then(Value::as_str)?;
return Some(Self::normalize_url(file));
}
}
}
for source in sources {
let Some(file) = source.get("file").and_then(Value::as_str) else {
continue;
};
let url = Self::normalize_url(file);
if !url.is_empty() {
return Some(url);
}
}
None
}
}
impl crate::proxies::Proxy for Pornhd3xProxy {
async fn get_video_url(&self, url: String, requester: web::types::State<Requester>) -> String {
let Some((detail_url, quality)) = Self::normalize_detail_request(&url) else {
return String::new();
};
let mut requester = requester.get_ref().clone();
let detail_html = match requester.get(&detail_url, Some(Version::HTTP_11)).await {
Ok(text) => text,
Err(_) => return String::new(),
};
let Some(episode_id) = Self::extract_episode_id(&detail_html) else {
return String::new();
};
let Some(payload) = self
.fetch_sources(&mut requester, &detail_url, &episode_id)
.await
else {
return String::new();
};
Self::select_source_url(&payload, quality.as_deref()).unwrap_or_default()
}
}
#[cfg(test)]
mod tests {
use super::Pornhd3xProxy;
#[test]
fn normalizes_detail_endpoint_and_quality() {
let (url, quality) = Pornhd3xProxy::normalize_detail_request(
"www.pornhd3x.tv/movies/example-video/__quality__/720p",
)
.expect("proxy target should parse");
assert_eq!(url, "https://www.pornhd3x.tv/movies/example-video");
assert_eq!(quality.as_deref(), Some("720p"));
}
#[test]
fn extracts_episode_id_from_detail_markup() {
let html = r#"
<input id="uuid" value="49Q27JL3HCPVNJQN">
<a class="btn-eps" episode-id="OTHER"></a>
"#;
assert_eq!(
Pornhd3xProxy::extract_episode_id(html).as_deref(),
Some("49Q27JL3HCPVNJQN")
);
}
}

297
src/proxies/shooshtime.rs Normal file
View File

@@ -0,0 +1,297 @@
use ntex::http::Response;
use ntex::http::header::{CONTENT_LENGTH, CONTENT_RANGE, CONTENT_TYPE};
use ntex::web::{self, HttpRequest, error};
use regex::Regex;
use url::Url;
use crate::util::requester::Requester;
const BASE_URL: &str = "https://shooshtime.com";
#[derive(Debug, Clone)]
struct SourceCandidate {
url: String,
quality: String,
}
#[derive(Debug, Clone)]
pub struct ShooshtimeProxy {}
impl ShooshtimeProxy {
fn normalize_detail_request(endpoint: &str) -> Option<(String, Option<String>)> {
let endpoint = endpoint.trim().trim_start_matches('/');
if endpoint.is_empty() {
return None;
}
let (detail_part, quality) = match endpoint.split_once("/__quality__/") {
Some((detail, quality)) => {
(detail, Some(quality.replace("%20", " ").trim().to_string()))
}
None => (endpoint, None),
};
let mut detail_url =
if detail_part.starts_with("http://") || detail_part.starts_with("https://") {
detail_part.to_string()
} else {
format!("https://{}", detail_part.trim_start_matches('/'))
};
if detail_url.contains("/videos/") && !detail_url.ends_with('/') {
detail_url.push('/');
}
Self::is_allowed_detail_url(&detail_url)
.then_some((detail_url, quality.filter(|value| !value.is_empty())))
}
fn is_allowed_detail_url(url: &str) -> bool {
let Some(url) = Url::parse(url).ok() else {
return false;
};
if url.scheme() != "https" {
return false;
}
let Some(host) = url.host_str() else {
return false;
};
(host == "shooshtime.com" || host == "www.shooshtime.com")
&& url.path().starts_with("/videos/")
}
fn is_allowed_media_url(url: &str) -> bool {
let Some(url) = Url::parse(url).ok() else {
return false;
};
if url.scheme() != "https" {
return false;
}
let Some(host) = url.host_str() else {
return false;
};
(host == "shooshtime.com" || host == "www.shooshtime.com")
&& url.path().starts_with("/get_file/")
}
fn normalize_url(raw: &str) -> String {
let value = raw.trim().replace("\\/", "/");
if value.is_empty() {
return String::new();
}
if value.starts_with("//") {
return format!("https:{value}");
}
if value.starts_with('/') {
return format!("{BASE_URL}{value}");
}
if value.starts_with("http://") {
return value.replacen("http://", "https://", 1);
}
value
}
fn regex(value: &str) -> Option<Regex> {
Regex::new(value).ok()
}
fn extract_js_value(block: &str, regex: &Regex) -> Option<String> {
regex
.captures(block)
.and_then(|value| value.get(1))
.map(|value| value.as_str().replace("\\/", "/").replace("\\'", "'"))
}
fn extract_sources(html: &str) -> Vec<SourceCandidate> {
let Some(flashvars_regex) = Self::regex(r#"(?s)var\s+flashvars\s*=\s*\{(.*?)\};"#) else {
return vec![];
};
let Some(flashvars) = flashvars_regex
.captures(html)
.and_then(|value| value.get(1))
.map(|value| value.as_str().to_string())
else {
return vec![];
};
let value_regex = |key: &str| Self::regex(&format!(r#"{key}:\s*'([^']*)'"#));
let primary_url_regex = match value_regex("video_url") {
Some(value) => value,
None => return vec![],
};
let primary_quality_regex = match value_regex("video_url_text") {
Some(value) => value,
None => return vec![],
};
let alt_url_regex = match value_regex("video_alt_url") {
Some(value) => value,
None => return vec![],
};
let alt_quality_regex = match value_regex("video_alt_url_text") {
Some(value) => value,
None => return vec![],
};
let mut sources = Vec::new();
if let Some(url) = Self::extract_js_value(&flashvars, &primary_url_regex) {
let normalized = Self::normalize_url(&url);
if !normalized.is_empty() && Self::is_allowed_media_url(&normalized) {
sources.push(SourceCandidate {
url: normalized,
quality: Self::extract_js_value(&flashvars, &primary_quality_regex)
.unwrap_or_else(|| "480p".to_string()),
});
}
}
if let Some(url) = Self::extract_js_value(&flashvars, &alt_url_regex) {
let normalized = Self::normalize_url(&url);
if !normalized.is_empty() && Self::is_allowed_media_url(&normalized) {
sources.push(SourceCandidate {
url: normalized,
quality: Self::extract_js_value(&flashvars, &alt_quality_regex)
.unwrap_or_else(|| "720p".to_string()),
});
}
}
sources
}
fn quality_score(label: &str) -> u32 {
label
.chars()
.filter(|value| value.is_ascii_digit())
.collect::<String>()
.parse::<u32>()
.unwrap_or(0)
}
fn select_source_url(html: &str, quality: Option<&str>) -> Option<String> {
let sources = Self::extract_sources(html);
if sources.is_empty() {
return None;
}
if let Some(quality) = quality {
let wanted = quality.trim().to_ascii_lowercase();
if let Some(source) = sources
.iter()
.find(|source| source.quality.trim().to_ascii_lowercase() == wanted)
{
return Some(source.url.clone());
}
}
sources
.iter()
.max_by_key(|source| Self::quality_score(&source.quality))
.map(|source| source.url.clone())
}
}
pub async fn serve_media(
req: HttpRequest,
requester: web::types::State<Requester>,
) -> Result<impl web::Responder, web::Error> {
let endpoint = req.match_info().query("endpoint").to_string();
let Some((detail_url, quality)) = ShooshtimeProxy::normalize_detail_request(&endpoint) else {
return Ok(web::HttpResponse::BadRequest().finish());
};
let mut requester = requester.get_ref().clone();
let html = match requester.get(&detail_url, None).await {
Ok(html) => html,
Err(_) => return Ok(web::HttpResponse::BadGateway().finish()),
};
let Some(source_url) = ShooshtimeProxy::select_source_url(&html, quality.as_deref()) else {
return Ok(web::HttpResponse::BadGateway().finish());
};
let mut headers = vec![("Referer".to_string(), detail_url)];
if let Some(range) = req
.headers()
.get("Range")
.and_then(|value| value.to_str().ok())
{
headers.push(("Range".to_string(), range.to_string()));
}
let upstream = match requester.get_raw_with_headers(&source_url, headers).await {
Ok(response) => response,
Err(_) => return Ok(web::HttpResponse::BadGateway().finish()),
};
let status = upstream.status();
let upstream_headers = upstream.headers().clone();
let bytes = upstream.bytes().await.map_err(error::ErrorBadGateway)?;
let mut response = Response::build(status);
if let Some(value) = upstream_headers
.get(CONTENT_TYPE)
.and_then(|value| value.to_str().ok())
{
response.set_header(CONTENT_TYPE, value);
}
if let Some(value) = upstream_headers
.get(CONTENT_LENGTH)
.and_then(|value| value.to_str().ok())
{
response.set_header(CONTENT_LENGTH, value);
}
if let Some(value) = upstream_headers
.get(CONTENT_RANGE)
.and_then(|value| value.to_str().ok())
{
response.set_header(CONTENT_RANGE, value);
}
if let Some(value) = upstream_headers
.get("Accept-Ranges")
.and_then(|value| value.to_str().ok())
{
response.set_header("Accept-Ranges", value);
}
Ok(response.body(bytes.to_vec()))
}
#[cfg(test)]
mod tests {
use super::ShooshtimeProxy;
#[test]
fn normalizes_detail_endpoint_and_quality() {
let (url, quality) = ShooshtimeProxy::normalize_detail_request(
"shooshtime.com/videos/example/123/__quality__/720p",
)
.expect("proxy target should parse");
assert_eq!(url, "https://shooshtime.com/videos/example/123/");
assert_eq!(quality.as_deref(), Some("720p"));
}
#[test]
fn selects_requested_or_best_quality() {
let html = r#"
<script>
var flashvars = {
video_url: 'https://shooshtime.com/get_file/1/token/1/2/3.mp4/?x=1',
video_url_text: '480p',
video_alt_url: 'https://shooshtime.com/get_file/1/token/1/2/3_720p.mp4/?x=2',
video_alt_url_text: '720p'
};
</script>
"#;
assert_eq!(
ShooshtimeProxy::select_source_url(html, Some("480p")).as_deref(),
Some("https://shooshtime.com/get_file/1/token/1/2/3.mp4/?x=1")
);
assert_eq!(
ShooshtimeProxy::select_source_url(html, None).as_deref(),
Some("https://shooshtime.com/get_file/1/token/1/2/3_720p.mp4/?x=2")
);
}
}

View File

@@ -1,6 +1,10 @@
use ntex::web::{self, HttpRequest};
use crate::proxies::doodstream::DoodstreamProxy;
use crate::proxies::javtiful::JavtifulProxy;
use crate::proxies::pimpbunny::PimpbunnyProxy;
use crate::proxies::porndish::PorndishProxy;
use crate::proxies::pornhd3x::Pornhd3xProxy;
use crate::proxies::spankbang::SpankbangProxy;
use crate::proxies::sxyprn::SxyprnProxy;
use crate::proxies::*;
@@ -8,6 +12,11 @@ use crate::util::requester::Requester;
pub fn config(cfg: &mut web::ServiceConfig) {
cfg.service(
web::resource("/doodstream/{endpoint}*")
.route(web::post().to(proxy2redirect))
.route(web::get().to(proxy2redirect)),
)
.service(
web::resource("/sxyprn/{endpoint}*")
.route(web::post().to(proxy2redirect))
.route(web::get().to(proxy2redirect)),
@@ -22,11 +31,36 @@ pub fn config(cfg: &mut web::ServiceConfig) {
.route(web::post().to(proxy2redirect))
.route(web::get().to(proxy2redirect)),
)
.service(
web::resource("/porndish/{endpoint}*")
.route(web::post().to(proxy2redirect))
.route(web::get().to(proxy2redirect)),
)
.service(
web::resource("/pornhd3x/{endpoint}*")
.route(web::post().to(proxy2redirect))
.route(web::get().to(proxy2redirect)),
)
.service(
web::resource("/shooshtime/{endpoint}*")
.route(web::post().to(crate::proxies::shooshtime::serve_media))
.route(web::get().to(crate::proxies::shooshtime::serve_media)),
)
.service(
web::resource("/pimpbunny/{endpoint}*")
.route(web::post().to(proxy2redirect))
.route(web::get().to(proxy2redirect)),
)
.service(
web::resource("/noodlemagazine/{endpoint}*")
.route(web::post().to(crate::proxies::noodlemagazine::serve_media))
.route(web::get().to(crate::proxies::noodlemagazine::serve_media)),
)
.service(
web::resource("/noodlemagazine-thumb/{endpoint}*")
.route(web::post().to(crate::proxies::noodlemagazine::get_image))
.route(web::get().to(crate::proxies::noodlemagazine::get_image)),
)
.service(
web::resource("/hanime-cdn/{endpoint}*")
.route(web::post().to(crate::proxies::hanimecdn::get_image))
@@ -36,6 +70,16 @@ pub fn config(cfg: &mut web::ServiceConfig) {
web::resource("/hqporner-thumb/{endpoint}*")
.route(web::post().to(crate::proxies::hqpornerthumb::get_image))
.route(web::get().to(crate::proxies::hqpornerthumb::get_image)),
)
.service(
web::resource("/porndish-thumb/{endpoint}*")
.route(web::post().to(crate::proxies::porndishthumb::get_image))
.route(web::get().to(crate::proxies::porndishthumb::get_image)),
);
cfg.service(
web::resource("/pimpbunny-thumb/{endpoint}*")
.route(web::post().to(crate::proxies::pimpbunnythumb::get_image))
.route(web::get().to(crate::proxies::pimpbunnythumb::get_image)),
);
}
@@ -56,8 +100,12 @@ async fn proxy2redirect(
fn get_proxy(proxy: &str) -> Option<AnyProxy> {
match proxy {
"doodstream" => Some(AnyProxy::Doodstream(DoodstreamProxy::new())),
"sxyprn" => Some(AnyProxy::Sxyprn(SxyprnProxy::new())),
"javtiful" => Some(AnyProxy::Javtiful(JavtifulProxy::new())),
"pornhd3x" => Some(AnyProxy::Pornhd3x(Pornhd3xProxy::new())),
"pimpbunny" => Some(AnyProxy::Pimpbunny(PimpbunnyProxy::new())),
"porndish" => Some(AnyProxy::Porndish(PorndishProxy::new())),
"spankbang" => Some(AnyProxy::Spankbang(SpankbangProxy::new())),
_ => None,
}

View File

@@ -22,6 +22,15 @@ pub struct Channel {
pub cacheDuration: Option<u32>, //Some(86400)
}
#[derive(serde::Serialize, Debug, Clone)]
pub struct ChannelGroup {
pub id: String,
pub title: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub systemImage: Option<String>,
pub channelIds: Vec<String>,
}
#[derive(serde::Serialize)]
pub struct ChannelOption {
pub id: String, //"channels",
@@ -109,20 +118,49 @@ impl Status {
.to_string(),
}
}
#[allow(dead_code)]
pub fn add_notice(&mut self, notice: Notice) {
self.notices.push(notice);
}
#[allow(dead_code)]
pub fn add_channel(&mut self, channel: Channel) {
self.channels.push(channel);
}
#[allow(dead_code)]
pub fn add_option(&mut self, option: Options) {
self.options.push(option);
}
#[allow(dead_code)]
pub fn add_category(&mut self, category: String) {
self.categories.push(category);
}
}
#[derive(serde::Serialize)]
pub struct ChannelView {
pub id: String,
pub name: String,
pub description: String,
pub premium: bool,
pub favicon: String,
pub status: String,
pub categories: Vec<String>,
pub options: Vec<ChannelOption>,
pub nsfw: bool,
#[serde(skip_serializing_if = "Option::is_none")]
pub groupKey: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub sortOrder: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub tags: Option<Vec<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub ytdlpCommand: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub cacheDuration: Option<u32>,
}
#[derive(serde::Serialize)]
pub struct StatusResponse {
pub id: String,
pub name: String,
pub subtitle: String,
pub description: String,
pub iconUrl: String,
pub color: String,
pub status: String,
pub notices: Vec<Notice>,
pub channels: Vec<ChannelView>,
pub channelGroups: Vec<ChannelGroup>,
pub subscription: Subscription,
pub nsfw: bool,
pub categories: Vec<String>,
pub options: Vec<Options>,
pub filtersFooter: String,
}

216
src/uploaders.rs Normal file
View File

@@ -0,0 +1,216 @@
use chrono::{SecondsFormat, TimeZone, Utc};
use serde::{Deserialize, Serialize};
use crate::videos::VideoItem;
#[derive(Serialize, Deserialize, Debug, Clone, Default)]
pub struct UploadersRequest {
#[serde(default, alias = "uploader_id")]
pub uploaderId: Option<String>,
#[serde(default, alias = "uploader_name")]
pub uploaderName: Option<String>,
#[serde(default, alias = "profile_content")]
pub profileContent: bool,
#[serde(default)]
pub query: Option<String>,
}
impl UploadersRequest {
pub fn normalized(self) -> Self {
Self {
uploaderId: normalize_optional_string(self.uploaderId),
uploaderName: normalize_optional_string(self.uploaderName),
profileContent: self.profileContent,
query: normalize_optional_string(self.query),
}
}
}
#[derive(Serialize, Deserialize, Debug, Clone, Default)]
pub struct UploaderProfile {
pub id: String,
pub name: String,
pub url: Option<String>,
pub channel: Option<String>,
pub verified: bool,
pub videoCount: u64,
pub totalViews: u64,
#[serde(default)]
pub channels: Option<Vec<UploaderChannelStat>>,
#[serde(default, alias = "profile_picture_url")]
pub avatar: Option<String>,
pub description: Option<String>,
pub bio: Option<String>,
#[serde(default)]
pub videos: Option<Vec<UploaderVideoRef>>,
#[serde(default)]
pub tapes: Option<Vec<serde_json::Value>>,
#[serde(default)]
pub playlists: Option<Vec<serde_json::Value>>,
#[serde(default)]
pub layout: Option<Vec<UploaderLayoutRow>>,
}
#[derive(Serialize, Deserialize, Debug, Clone, Default)]
pub struct UploaderChannelStat {
pub channel: String,
pub videoCount: u64,
#[serde(default, alias = "first_seen_at")]
pub firstSeenAt: Option<String>,
#[serde(default, alias = "last_seen_at")]
pub lastSeenAt: Option<String>,
}
#[derive(Serialize, Deserialize, Debug, Clone, Default)]
pub struct UploaderVideoRef {
pub id: String,
pub url: String,
pub title: String,
pub duration: u32,
pub channel: String,
#[serde(default, alias = "uploaded_at")]
pub uploadedAt: Option<String>,
pub uploader: String,
#[serde(alias = "uploader_id")]
pub uploaderId: String,
pub thumb: String,
pub preview: Option<String>,
pub views: u32,
pub rating: u32,
#[serde(default, alias = "aspect_ratio")]
pub aspectRatio: Option<f32>,
}
impl UploaderVideoRef {
pub fn from_video_item(item: &VideoItem, uploader_name: &str, uploader_id: &str) -> Self {
Self {
id: item.id.clone(),
url: item.url.clone(),
title: item.title.clone(),
duration: item.duration,
channel: item.channel.clone(),
uploadedAt: iso_timestamp_from_unix(item.uploadedAt),
uploader: item
.uploader
.clone()
.unwrap_or_else(|| uploader_name.to_string()),
uploaderId: item
.uploaderId
.clone()
.unwrap_or_else(|| uploader_id.to_string()),
thumb: item.thumb.clone(),
preview: item.preview.clone(),
views: item.views.unwrap_or_default(),
rating: item.rating.map(normalize_rating).unwrap_or_default(),
aspectRatio: item.aspectRatio,
}
}
}
#[derive(Serialize, Deserialize, Debug, Clone, Default)]
pub struct UploaderLayoutRow {
#[serde(rename = "type")]
pub rowType: UploaderLayoutRowType,
pub title: Option<String>,
#[serde(default, alias = "video_ids")]
pub videoIds: Option<Vec<String>>,
}
impl UploaderLayoutRow {
pub fn horizontal(title: Option<String>, video_ids: Vec<String>) -> Self {
Self {
rowType: UploaderLayoutRowType::Horizontal,
title,
videoIds: Some(video_ids),
}
}
pub fn videos(title: Option<String>) -> Self {
Self {
rowType: UploaderLayoutRowType::Videos,
title,
videoIds: None,
}
}
}
#[derive(Serialize, Deserialize, Debug, Clone, Default, PartialEq, Eq)]
pub enum UploaderLayoutRowType {
#[default]
#[serde(rename = "videos")]
Videos,
#[serde(rename = "horizontal", alias = "horizontal_videos")]
Horizontal,
}
pub fn normalize_optional_string(value: Option<String>) -> Option<String> {
value.and_then(|value| {
let trimmed = value.trim();
(!trimmed.is_empty()).then(|| trimmed.to_string())
})
}
pub fn iso_timestamp_from_unix(value: Option<u64>) -> Option<String> {
let timestamp = value?;
let dt = Utc.timestamp_opt(timestamp as i64, 0).single()?;
Some(dt.to_rfc3339_opts(SecondsFormat::Millis, true))
}
fn normalize_rating(value: f32) -> u32 {
value.clamp(0.0, 100.0).round() as u32
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn request_accepts_snake_case_aliases() {
let request: UploadersRequest = serde_json::from_str(
r#"{
"uploader_id": "hsex:xihongshiddd",
"uploader_name": "xihongshiddd",
"profile_content": true,
"query": "teacher"
}"#,
)
.expect("request should decode");
assert_eq!(request.uploaderId.as_deref(), Some("hsex:xihongshiddd"));
assert_eq!(request.uploaderName.as_deref(), Some("xihongshiddd"));
assert!(request.profileContent);
assert_eq!(request.query.as_deref(), Some("teacher"));
}
#[test]
fn layout_aliases_decode() {
let row: UploaderLayoutRow = serde_json::from_str(
r#"{
"type": "horizontal_videos",
"title": "For You",
"video_ids": ["one", "two"]
}"#,
)
.expect("row should decode");
assert_eq!(row.rowType, UploaderLayoutRowType::Horizontal);
assert_eq!(row.videoIds.as_ref().map(Vec::len), Some(2));
}
#[test]
fn avatar_alias_decodes() {
let profile: UploaderProfile = serde_json::from_str(
r#"{
"id": "abc",
"name": "Example",
"verified": false,
"videoCount": 1,
"totalViews": 2,
"profile_picture_url": "https://example.com/a.jpg"
}"#,
)
.expect("profile should decode");
assert_eq!(profile.avatar.as_deref(), Some("https://example.com/a.jpg"));
}
}

41
src/util/flow_debug.rs Normal file
View File

@@ -0,0 +1,41 @@
use std::sync::atomic::{AtomicU64, Ordering};
#[cfg(feature = "debug")]
use std::time::{SystemTime, UNIX_EPOCH};
static NEXT_TRACE_ID: AtomicU64 = AtomicU64::new(1);
pub fn next_trace_id(prefix: &str) -> String {
let id = NEXT_TRACE_ID.fetch_add(1, Ordering::Relaxed);
format!("{prefix}-{id:06}")
}
#[cfg(feature = "debug")]
pub fn emit(module: &str, line: u32, message: String) {
let millis = SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|duration| duration.as_millis())
.unwrap_or_default();
eprintln!("[debug][{millis}][{module}:{line}] {message}");
}
pub fn preview(value: &str, limit: usize) -> String {
if value.len() <= limit {
return value.to_string();
}
let mut end = limit;
while !value.is_char_boundary(end) {
end -= 1;
}
format!("{}...", &value[..end])
}
#[macro_export]
macro_rules! flow_debug {
($($arg:tt)*) => {{
#[cfg(feature = "debug")]
{
$crate::util::flow_debug::emit(module_path!(), line!(), format!($($arg)*));
}
}};
}

84
src/util/hoster_proxy.rs Normal file
View File

@@ -0,0 +1,84 @@
use url::Url;
use crate::providers::{build_proxy_url, strip_url_scheme};
use crate::videos::ServerOptions;
const DOODSTREAM_HOSTS: &[&str] = &[
"turboplayers.xyz",
"www.turboplayers.xyz",
"trailerhg.xyz",
"www.trailerhg.xyz",
"streamhg.com",
"www.streamhg.com",
];
pub fn proxy_name_for_url(url: &str) -> Option<&'static str> {
let parsed = Url::parse(url).ok()?;
let host = parsed.host_str()?.to_ascii_lowercase();
if DOODSTREAM_HOSTS.contains(&host.as_str()) {
return Some("doodstream");
}
None
}
pub fn rewrite_hoster_url(options: &ServerOptions, url: &str) -> String {
match proxy_name_for_url(url) {
Some(proxy_name) => build_proxy_url(options, proxy_name, &strip_url_scheme(url)),
None => url.to_string(),
}
}
#[cfg(test)]
mod tests {
use super::{proxy_name_for_url, rewrite_hoster_url};
use crate::videos::ServerOptions;
fn options() -> ServerOptions {
ServerOptions {
featured: None,
category: None,
sites: None,
filter: None,
language: None,
public_url_base: Some("https://example.com".to_string()),
requester: None,
network: None,
stars: None,
categories: None,
duration: None,
sort: None,
sexuality: None,
}
}
#[test]
fn matches_doodstream_family_hosts() {
assert_eq!(
proxy_name_for_url("https://turboplayers.xyz/t/69bdfb21cc640"),
Some("doodstream")
);
assert_eq!(
proxy_name_for_url("https://trailerhg.xyz/e/ttdc7a6qpskt"),
Some("doodstream")
);
assert_eq!(
proxy_name_for_url("https://streamhg.com/about"),
Some("doodstream")
);
assert_eq!(proxy_name_for_url("https://example.com/video"), None);
}
#[test]
fn rewrites_known_hoster_urls_to_proxy_urls() {
assert_eq!(
rewrite_hoster_url(&options(), "https://turboplayers.xyz/t/69bdfb21cc640"),
"https://example.com/proxy/doodstream/turboplayers.xyz/t/69bdfb21cc640"
);
assert_eq!(
rewrite_hoster_url(&options(), "https://example.com/video"),
"https://example.com/video"
);
}
}

View File

@@ -1,6 +1,8 @@
pub mod cache;
pub mod discord;
pub mod flaresolverr;
pub mod flow_debug;
pub mod hoster_proxy;
pub mod proxy;
pub mod requester;
pub mod time;
@@ -26,6 +28,7 @@ pub fn parse_abbreviated_number(s: &str) -> Option<u32> {
.map(|n| (n * multiplier) as u32)
}
#[cfg(not(hottub_single_provider))]
pub fn interleave<T: Clone>(lists: &[Vec<T>]) -> Vec<T> {
let mut result = Vec::new();

View File

@@ -1,13 +1,15 @@
use serde::Serialize;
use std::env;
use std::fmt;
use std::sync::Arc;
use std::sync::{Arc, OnceLock};
use std::time::Duration;
use wreq::Client;
use wreq::Proxy;
use wreq::Response;
use wreq::Uri;
use wreq::Version;
use wreq::cookie::Jar;
use wreq::header::{HeaderMap, HeaderValue, USER_AGENT};
use wreq::cookie::{CookieStore, Cookies, Jar};
use wreq::header::{HeaderMap, HeaderValue, SET_COOKIE, USER_AGENT};
use wreq::multipart::Form;
use wreq::redirect::Policy;
use wreq_util::Emulation;
@@ -25,6 +27,8 @@ pub struct Requester {
client: Client,
#[serde(skip)]
cookie_jar: Arc<Jar>,
#[serde(skip)]
debug_trace_id: Option<String>,
proxy: bool,
flaresolverr_session: Option<String>,
user_agent: Option<String>,
@@ -34,6 +38,7 @@ impl fmt::Debug for Requester {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("Requester")
.field("proxy", &self.proxy)
.field("debug_trace_id", &self.debug_trace_id)
.field("flaresolverr_session", &self.flaresolverr_session)
.field("user_agent", &self.user_agent)
.finish()
@@ -41,6 +46,150 @@ impl fmt::Debug for Requester {
}
impl Requester {
fn shared_cookie_jar() -> Arc<Jar> {
static SHARED_COOKIE_JAR: OnceLock<Arc<Jar>> = OnceLock::new();
SHARED_COOKIE_JAR
.get_or_init(|| Arc::new(Jar::default()))
.clone()
}
fn origin_url_for_cookie_scope(url: &str) -> Option<url::Url> {
let parsed = url::Url::parse(url).ok()?;
let host = parsed.host_str()?;
let scheme = parsed.scheme();
url::Url::parse(&format!("{scheme}://{host}/")).ok()
}
fn store_response_cookies(&self, url: &str, response: &Response) {
let Some(origin) = Self::origin_url_for_cookie_scope(url) else {
return;
};
for value in response.headers().get_all(SET_COOKIE).iter() {
if let Ok(cookie) = value.to_str() {
self.cookie_jar.add_cookie_str(cookie, &origin.to_string());
}
}
}
fn store_flaresolverr_cookies(
&mut self,
request_url: &str,
cookies: &[crate::util::flaresolverr::FlaresolverrCookie],
) {
let fallback_origin = Self::origin_url_for_cookie_scope(request_url);
for cookie in cookies {
let origin = if !cookie.domain.is_empty() {
let scheme = fallback_origin
.as_ref()
.map(|url| url.scheme())
.unwrap_or("https");
let host = cookie.domain.trim_start_matches('.');
url::Url::parse(&format!("{scheme}://{host}/"))
.ok()
.or_else(|| fallback_origin.clone())
} else {
fallback_origin.clone()
};
let Some(origin) = origin else {
continue;
};
let mut cookie_string =
format!("{}={}; Path={}", cookie.name, cookie.value, cookie.path);
if !cookie.domain.is_empty() {
cookie_string.push_str(&format!("; Domain={}", cookie.domain));
}
if cookie.secure {
cookie_string.push_str("; Secure");
}
if cookie.httpOnly {
cookie_string.push_str("; HttpOnly");
}
if let Some(same_site) = cookie.sameSite.as_deref() {
if !same_site.is_empty() {
cookie_string.push_str(&format!("; SameSite={same_site}"));
}
}
self.cookie_jar
.add_cookie_str(&cookie_string, &origin.to_string());
}
}
fn debug_cookie_preview_from_owned_headers(
&self,
url: &str,
headers: &[(String, String)],
) -> String {
if let Some((_, value)) = headers
.iter()
.find(|(key, _)| key.eq_ignore_ascii_case("cookie"))
{
return crate::util::flow_debug::preview(value, 160);
}
self.cookie_header_for_url(url)
.map(|cookie| crate::util::flow_debug::preview(&cookie, 160))
.unwrap_or_else(|| "none".to_string())
}
fn jina_mirror_url(url: &str) -> Option<String> {
if url.trim().is_empty() {
return None;
}
if url.starts_with("https://r.jina.ai/") || url.starts_with("http://r.jina.ai/") {
return Some(url.to_string());
}
let stripped = url
.strip_prefix("https://")
.or_else(|| url.strip_prefix("http://"))
.unwrap_or(url);
Some(format!("https://r.jina.ai/http://{stripped}"))
}
async fn fetch_jina_mirror_body(
cookie_jar: Arc<Jar>,
user_agent: Option<String>,
proxy_enabled: bool,
url: &str,
) -> Result<String, AnyErr> {
let mirror_url = Self::jina_mirror_url(url).ok_or("invalid mirror url")?;
let client = Self::build_client(cookie_jar, user_agent.as_deref());
let mut request = client.get(&mirror_url).version(Version::HTTP_11);
if proxy_enabled {
if let Ok(proxy_url) = env::var("BURP_URL") {
let proxy = Proxy::all(&proxy_url)?;
request = request.proxy(proxy);
}
}
let response = request.send().await?;
if !response.status().is_success() {
return Err(format!("jina mirror returned status {}", response.status()).into());
}
Ok(response.text().await?)
}
#[cfg(any(not(hottub_single_provider), hottub_provider = "hypnotube"))]
fn debug_cookie_preview_from_borrowed_headers(
&self,
url: &str,
headers: &[(&str, &str)],
) -> String {
if let Some((_, value)) = headers
.iter()
.find(|(key, _)| key.eq_ignore_ascii_case("cookie"))
{
return crate::util::flow_debug::preview(value, 160);
}
self.cookie_header_for_url(url)
.map(|cookie| crate::util::flow_debug::preview(&cookie, 160))
.unwrap_or_else(|| "none".to_string())
}
fn build_client(cookie_jar: Arc<Jar>, user_agent: Option<&str>) -> Client {
let mut builder = Client::builder()
.cert_verification(false)
@@ -60,12 +209,13 @@ impl Requester {
}
pub fn new() -> Self {
let cookie_jar = Arc::new(Jar::default());
let cookie_jar = Self::shared_cookie_jar();
let client = Self::build_client(cookie_jar.clone(), None);
let requester = Requester {
client,
cookie_jar,
debug_trace_id: None,
proxy: false,
flaresolverr_session: None,
user_agent: None,
@@ -83,7 +233,46 @@ impl Requester {
self.proxy = proxy;
}
pub fn set_debug_trace_id(&mut self, debug_trace_id: Option<String>) {
self.debug_trace_id = debug_trace_id;
}
#[cfg(feature = "debug")]
pub fn debug_trace_id(&self) -> Option<&str> {
self.debug_trace_id.as_deref()
}
pub fn cookie_header_for_url(&self, url: &str) -> Option<String> {
let parsed = url.parse::<Uri>().ok()?;
match self.cookie_jar.cookies(&parsed) {
Cookies::Compressed(value) => value.to_str().ok().map(ToOwned::to_owned),
Cookies::Uncompressed(values) => {
let joined = values
.into_iter()
.filter_map(|value| value.to_str().ok().map(ToOwned::to_owned))
.collect::<Vec<_>>()
.join("; ");
(!joined.is_empty()).then_some(joined)
}
Cookies::Empty => None,
_ => None,
}
}
pub async fn get_raw(&mut self, url: &str) -> Result<Response, wreq::Error> {
let cookie_preview = self
.cookie_header_for_url(url)
.map(|cookie| crate::util::flow_debug::preview(&cookie, 160))
.unwrap_or_else(|| "none".to_string());
#[cfg(not(feature = "debug"))]
let _ = &cookie_preview;
crate::flow_debug!(
"trace={} requester get_raw url={} cookies={} proxy={}",
self.debug_trace_id().unwrap_or("none"),
crate::util::flow_debug::preview(url, 120),
cookie_preview,
self.proxy
);
let client = Self::build_client(self.cookie_jar.clone(), self.user_agent.as_deref());
let mut request = client.get(url).version(Version::HTTP_11);
@@ -95,7 +284,9 @@ impl Requester {
}
}
request.send().await
let response = request.send().await?;
self.store_response_cookies(url, &response);
Ok(response)
}
pub async fn get_raw_with_headers(
@@ -103,9 +294,32 @@ impl Requester {
url: &str,
headers: Vec<(String, String)>,
) -> Result<Response, wreq::Error> {
self.get_raw_with_headers_timeout(url, headers, None).await
}
pub async fn get_raw_with_headers_timeout(
&mut self,
url: &str,
headers: Vec<(String, String)>,
timeout: Option<Duration>,
) -> Result<Response, wreq::Error> {
let cookie_preview = self.debug_cookie_preview_from_owned_headers(url, &headers);
#[cfg(not(feature = "debug"))]
let _ = &cookie_preview;
crate::flow_debug!(
"trace={} requester get_raw_with_headers url={} headers={} cookies={} proxy={}",
self.debug_trace_id().unwrap_or("none"),
crate::util::flow_debug::preview(url, 120),
headers.len(),
cookie_preview,
self.proxy
);
let client = Self::build_client(self.cookie_jar.clone(), self.user_agent.as_deref());
let mut request = client.get(url).version(Version::HTTP_11);
if let Some(timeout) = timeout {
request = request.timeout(timeout);
}
if self.proxy {
if let Ok(proxy_url) = env::var("BURP_URL") {
@@ -117,7 +331,9 @@ impl Requester {
for (key, value) in headers.iter() {
request = request.header(key, value);
}
request.send().await
let response = request.send().await?;
self.store_response_cookies(url, &response);
Ok(response)
}
pub async fn post_json<S>(
@@ -129,6 +345,17 @@ impl Requester {
where
S: Serialize + ?Sized,
{
let cookie_preview = self.debug_cookie_preview_from_owned_headers(url, &headers);
#[cfg(not(feature = "debug"))]
let _ = &cookie_preview;
crate::flow_debug!(
"trace={} requester post_json url={} headers={} cookies={} proxy={}",
self.debug_trace_id().unwrap_or("none"),
crate::util::flow_debug::preview(url, 120),
headers.len(),
cookie_preview,
self.proxy
);
let mut request = self.client.post(url).version(Version::HTTP_11).json(data);
// Set custom headers
@@ -143,15 +370,34 @@ impl Requester {
}
}
request.send().await
let response = request.send().await?;
self.store_response_cookies(url, &response);
Ok(response)
}
#[cfg(any(
not(hottub_single_provider),
hottub_provider = "hypnotube",
hottub_provider = "freeuseporn",
))]
pub async fn post(
&mut self,
url: &str,
data: &str,
headers: Vec<(&str, &str)>,
) -> Result<Response, wreq::Error> {
let cookie_preview = self.debug_cookie_preview_from_borrowed_headers(url, &headers);
#[cfg(not(feature = "debug"))]
let _ = &cookie_preview;
crate::flow_debug!(
"trace={} requester post url={} headers={} cookies={} body_len={} proxy={}",
self.debug_trace_id().unwrap_or("none"),
crate::util::flow_debug::preview(url, 120),
headers.len(),
cookie_preview,
data.len(),
self.proxy
);
let mut request = self
.client
.post(url)
@@ -170,7 +416,9 @@ impl Requester {
}
}
request.send().await
let response = request.send().await?;
self.store_response_cookies(url, &response);
Ok(response)
}
pub async fn post_multipart(
@@ -180,6 +428,17 @@ impl Requester {
headers: Vec<(String, String)>,
_http_version: Option<Version>,
) -> Result<Response, wreq::Error> {
let cookie_preview = self.debug_cookie_preview_from_owned_headers(url, &headers);
#[cfg(not(feature = "debug"))]
let _ = &cookie_preview;
crate::flow_debug!(
"trace={} requester post_multipart url={} headers={} cookies={} proxy={}",
self.debug_trace_id().unwrap_or("none"),
crate::util::flow_debug::preview(url, 120),
headers.len(),
cookie_preview,
self.proxy
);
let http_version = match _http_version {
Some(v) => v,
None => Version::HTTP_11,
@@ -199,7 +458,9 @@ impl Requester {
}
}
request.send().await
let response = request.send().await?;
self.store_response_cookies(url, &response);
Ok(response)
}
pub async fn get(
@@ -216,10 +477,30 @@ impl Requester {
headers: Vec<(String, String)>,
_http_version: Option<Version>,
) -> Result<String, AnyErr> {
let cookie_preview = self.debug_cookie_preview_from_owned_headers(url, &headers);
#[cfg(not(feature = "debug"))]
let _ = &cookie_preview;
crate::flow_debug!(
"trace={} requester get_with_headers start url={} headers={} cookies={} http_version={:?} proxy={}",
self.debug_trace_id().unwrap_or("none"),
crate::util::flow_debug::preview(url, 120),
headers.len(),
cookie_preview,
_http_version,
self.proxy
);
let http_version = match _http_version {
Some(v) => v,
None => Version::HTTP_11,
};
let cookie_jar = self.cookie_jar.clone();
let user_agent = self.user_agent.clone();
let proxy_enabled = self.proxy;
let _trace_id = self.debug_trace_id.as_deref().unwrap_or("none").to_string();
let url_owned = url.to_string();
let jina_handle = tokio::spawn(async move {
Self::fetch_jina_mirror_body(cookie_jar, user_agent, proxy_enabled, &url_owned).await
});
loop {
let mut request = self.client.get(url).version(http_version);
for (key, value) in headers.iter() {
@@ -231,19 +512,83 @@ impl Requester {
request = request.proxy(proxy);
}
}
let response = request.send().await?;
let response = match request.send().await {
Ok(response) => response,
Err(error) => {
crate::flow_debug!(
"trace={} requester direct transport failed url={} error={} using_jina_fallback=true",
_trace_id,
crate::util::flow_debug::preview(url, 120),
crate::util::flow_debug::preview(&error.to_string(), 160)
);
match jina_handle.await {
Ok(Ok(body)) => return Ok(body),
Ok(Err(_jina_error)) => {
crate::flow_debug!(
"trace={} requester jina fallback failed after transport error url={} error={}",
_trace_id,
crate::util::flow_debug::preview(url, 120),
crate::util::flow_debug::preview(&_jina_error.to_string(), 160)
);
return Err(error.into());
}
Err(_join_error) => {
crate::flow_debug!(
"trace={} requester jina task join failed after transport error url={} error={}",
_trace_id,
crate::util::flow_debug::preview(url, 120),
crate::util::flow_debug::preview(&_join_error.to_string(), 160)
);
return Err(error.into());
}
}
}
};
self.store_response_cookies(url, &response);
crate::flow_debug!(
"trace={} requester direct response url={} status={}",
self.debug_trace_id().unwrap_or("none"),
crate::util::flow_debug::preview(url, 120),
response.status()
);
if response.status().is_success() || response.status().as_u16() == 404 {
jina_handle.abort();
return Ok(response.text().await?);
}
if response.status().as_u16() == 429 {
crate::flow_debug!(
"trace={} requester direct retry url={} status=429",
self.debug_trace_id().unwrap_or("none"),
crate::util::flow_debug::preview(url, 120)
);
tokio::time::sleep(std::time::Duration::from_secs(1)).await;
continue;
} else {
println!(
"Direct request to {} failed with status: {}",
url,
crate::flow_debug!(
"trace={} requester direct failed url={} status={} using_jina_fallback=true",
_trace_id,
crate::util::flow_debug::preview(url, 120),
response.status()
);
match jina_handle.await {
Ok(Ok(body)) => return Ok(body),
Ok(Err(_error)) => {
crate::flow_debug!(
"trace={} requester jina fallback failed url={} error={}",
_trace_id,
crate::util::flow_debug::preview(url, 120),
crate::util::flow_debug::preview(&_error.to_string(), 160)
);
}
Err(_error) => {
crate::flow_debug!(
"trace={} requester jina task join failed url={} error={}",
_trace_id,
crate::util::flow_debug::preview(url, 120),
crate::util::flow_debug::preview(&_error.to_string(), 160)
);
}
}
break;
}
}
@@ -258,6 +603,12 @@ impl Requester {
if self.proxy && env::var("BURP_URL").is_ok() {
flare.set_proxy(true);
}
crate::flow_debug!(
"trace={} requester flaresolverr url={} proxy={}",
self.debug_trace_id().unwrap_or("none"),
crate::util::flow_debug::preview(url, 120),
self.proxy
);
let res = flare
.solve(FlareSolverrRequest {
@@ -269,19 +620,17 @@ impl Requester {
.map_err(|e| -> AnyErr { format!("Failed to solve FlareSolverr: {e}").into() })?;
// Rebuild client and apply UA/cookies from FlareSolverr
let cookie_origin = url.split('/').take(3).collect::<Vec<&str>>().join("/");
let useragent = res.solution.userAgent;
self.user_agent = Some(useragent);
if url::Url::parse(&cookie_origin).is_ok() {
for cookie in res.solution.cookies {
self.cookie_jar
.add_cookie_str(&format!("{}={}", cookie.name, cookie.value), &cookie_origin);
}
}
self.store_flaresolverr_cookies(url, &res.solution.cookies);
self.client = Self::build_client(self.cookie_jar.clone(), self.user_agent.as_deref());
crate::flow_debug!(
"trace={} requester flaresolverr solved url={} user_agent={}",
self.debug_trace_id().unwrap_or("none"),
crate::util::flow_debug::preview(url, 120),
crate::util::flow_debug::preview(self.user_agent.as_deref().unwrap_or("unknown"), 96)
);
// Retry the original URL with the updated client & (optional) proxy
let mut request = self.client.get(url).version(Version::HTTP_11);
@@ -296,11 +645,44 @@ impl Requester {
}
let response = request.send().await?;
self.store_response_cookies(url, &response);
crate::flow_debug!(
"trace={} requester retry response url={} status={}",
self.debug_trace_id().unwrap_or("none"),
crate::util::flow_debug::preview(url, 120),
response.status()
);
if response.status().is_success() {
return Ok(response.text().await?);
}
// Fall back to FlareSolverr-provided body
crate::flow_debug!(
"trace={} requester fallback body url={}",
self.debug_trace_id().unwrap_or("none"),
crate::util::flow_debug::preview(url, 120)
);
Ok(res.solution.response)
}
}
#[cfg(test)]
mod tests {
use super::Requester;
#[test]
fn new_requesters_share_cookie_jar() {
let a = Requester::new();
let b = Requester::new();
let origin = "https://shared-cookie-requester-test.invalid/";
a.cookie_jar
.add_cookie_str("shared_cookie=1; Path=/; SameSite=Lax", origin);
let cookie_header = b
.cookie_header_for_url("https://shared-cookie-requester-test.invalid/path")
.unwrap_or_default();
assert!(cookie_header.contains("shared_cookie=1"));
}
}

View File

@@ -100,6 +100,8 @@ pub struct VideoItem {
#[serde(skip_serializing_if = "Option::is_none")]
pub uploaderUrl: Option<String>, // "https://www.youtube.com/@petcollective",
#[serde(skip_serializing_if = "Option::is_none")]
pub uploaderId: Option<String>, // "petcollective",
#[serde(skip_serializing_if = "Option::is_none")]
pub verified: Option<bool>, // false,
#[serde(skip_serializing_if = "Option::is_none")]
pub tags: Option<Vec<String>>, // [],
@@ -114,7 +116,6 @@ pub struct VideoItem {
#[serde(skip_serializing_if = "Option::is_none")]
pub aspectRatio: Option<f32>,
}
#[allow(dead_code)]
impl VideoItem {
pub fn new(
id: String,
@@ -136,6 +137,7 @@ impl VideoItem {
thumb,
uploader: None,
uploaderUrl: None,
uploaderId: None,
verified: None,
tags: None, // Placeholder, adjust as needed
uploadedAt: None,
@@ -145,9 +147,11 @@ impl VideoItem {
aspectRatio: None,
}
}
#[cfg(any(not(hottub_single_provider), hottub_provider = "hentaihaven"))]
pub fn from(s: String) -> Result<Self, serde_json::Error> {
serde_json::from_str::<VideoItem>(&s)
}
#[cfg(any(not(hottub_single_provider), hottub_provider = "hanime"))]
pub fn tags(mut self, tags: Vec<String>) -> Self {
if tags.is_empty() {
return self;
@@ -155,30 +159,113 @@ impl VideoItem {
self.tags = Some(tags);
self
}
#[cfg(any(
not(hottub_single_provider),
hottub_provider = "hanime",
hottub_provider = "heavyfetish",
hottub_provider = "porndish",
hottub_provider = "shooshtime",
hottub_provider = "spankbang",
hottub_provider = "chaturbate",
hottub_provider = "porn4fans",
hottub_provider = "xfree",
hottub_provider = "pornhub",
))]
pub fn uploader(mut self, uploader: String) -> Self {
self.uploader = Some(uploader);
self
}
#[cfg(any(
not(hottub_single_provider),
hottub_provider = "heavyfetish",
hottub_provider = "porndish",
hottub_provider = "shooshtime",
hottub_provider = "spankbang",
hottub_provider = "chaturbate",
))]
pub fn uploader_url(mut self, uploader_url: String) -> Self {
self.uploaderUrl = Some(uploader_url);
self
}
pub fn verified(mut self, verified: bool) -> Self {
self.verified = Some(verified);
self
}
#[cfg(any(
not(hottub_single_provider),
hottub_provider = "beeg",
hottub_provider = "chaturbate",
hottub_provider = "freepornvideosxxx",
hottub_provider = "hanime",
hottub_provider = "heavyfetish",
hottub_provider = "hentaihaven",
hottub_provider = "hypnotube",
hottub_provider = "javtiful",
hottub_provider = "noodlemagazine",
hottub_provider = "okxxx",
hottub_provider = "omgxxx",
hottub_provider = "perfectgirls",
hottub_provider = "pimpbunny",
hottub_provider = "pmvhaven",
hottub_provider = "porn00",
hottub_provider = "porn4fans",
hottub_provider = "porndish",
hottub_provider = "pornhat",
hottub_provider = "pornhub",
hottub_provider = "redtube",
hottub_provider = "rule34gen",
hottub_provider = "rule34video",
hottub_provider = "shooshtime",
hottub_provider = "spankbang",
hottub_provider = "sxyprn",
hottub_provider = "tnaflix",
hottub_provider = "tokyomotion",
hottub_provider = "viralxxxporn",
hottub_provider = "xfree",
hottub_provider = "xxthots",
hottub_provider = "yesporn",
hottub_provider = "youjizz",
))]
pub fn views(mut self, views: u32) -> Self {
self.views = Some(views);
self
}
#[cfg(any(
not(hottub_single_provider),
hottub_provider = "beeg",
hottub_provider = "hanime",
hottub_provider = "heavyfetish",
hottub_provider = "hsex",
hottub_provider = "porn4fans",
hottub_provider = "shooshtime",
hottub_provider = "spankbang",
hottub_provider = "tokyomotion",
hottub_provider = "vrporn",
hottub_provider = "yesporn",
))]
pub fn rating(mut self, rating: f32) -> Self {
self.rating = Some(rating);
self
}
#[cfg(any(
not(hottub_single_provider),
hottub_provider = "porndish",
hottub_provider = "shooshtime",
hottub_provider = "heavyfetish",
hottub_provider = "xfree",
))]
pub fn uploaded_at(mut self, uploaded_at: u64) -> Self {
self.uploadedAt = Some(uploaded_at);
self
}
#[cfg(any(
not(hottub_single_provider),
hottub_provider = "hanime",
hottub_provider = "heavyfetish",
hottub_provider = "hentaihaven",
hottub_provider = "hqporner",
hottub_provider = "javtiful",
hottub_provider = "noodlemagazine",
hottub_provider = "pimpbunny",
hottub_provider = "pmvhaven",
hottub_provider = "shooshtime",
))]
pub fn formats(mut self, formats: Vec<VideoFormat>) -> Self {
if formats.is_empty() {
return self;
@@ -186,27 +273,48 @@ impl VideoItem {
self.formats = Some(formats);
self
}
pub fn add_format(mut self, format: VideoFormat) {
if let Some(formats) = self.formats.as_mut() {
formats.push(format);
} else {
self.formats = Some(vec![format]);
}
}
pub fn embed(mut self, embed: VideoEmbed) -> Self {
self.embed = Some(embed);
self
}
#[cfg(any(
not(hottub_single_provider),
hottub_provider = "freepornvideosxxx",
hottub_provider = "heavyfetish",
hottub_provider = "homoxxx",
hottub_provider = "javtiful",
hottub_provider = "missav",
hottub_provider = "okxxx",
hottub_provider = "omgxxx",
hottub_provider = "perfectgirls",
hottub_provider = "pimpbunny",
hottub_provider = "pmvhaven",
hottub_provider = "pornhat",
hottub_provider = "redtube",
hottub_provider = "rule34gen",
hottub_provider = "shooshtime",
hottub_provider = "spankbang",
hottub_provider = "sxyprn",
hottub_provider = "tnaflix",
hottub_provider = "xfree",
hottub_provider = "xxdbx",
hottub_provider = "yesporn",
))]
pub fn preview(mut self, preview: String) -> Self {
self.preview = Some(preview);
self
}
#[cfg(any(
not(hottub_single_provider),
hottub_provider = "hentaihaven",
hottub_provider = "hanime",
hottub_provider = "heavyfetish",
hottub_provider = "paradisehill",
hottub_provider = "xfree",
))]
pub fn aspect_ratio(mut self, aspect_ratio: f32) -> Self {
self.aspectRatio = Some(aspect_ratio);
self
}
#[cfg(any(not(hottub_single_provider), hottub_provider = "chaturbate"))]
pub fn is_live(mut self, is_live: bool) -> Self {
self.isLive = is_live;
self
@@ -294,6 +402,13 @@ impl VideoFormat {
http_headers: None,
}
}
#[cfg(any(
not(hottub_single_provider),
hottub_provider = "vrporn",
hottub_provider = "perverzija",
hottub_provider = "porndish",
hottub_provider = "spankbang",
))]
pub fn add_http_header(&mut self, key: String, value: String) {
if self.http_headers.is_none() {
self.http_headers = Some(HashMap::new());
@@ -302,6 +417,14 @@ impl VideoFormat {
headers.insert(key, value);
}
}
#[cfg(any(
not(hottub_single_provider),
hottub_provider = "hentaihaven",
hottub_provider = "noodlemagazine",
hottub_provider = "shooshtime",
hottub_provider = "heavyfetish",
hottub_provider = "hsex",
))]
pub fn http_header(&mut self, key: String, value: String) -> Self {
if self.http_headers.is_none() {
self.http_headers = Some(HashMap::new());
@@ -311,6 +434,13 @@ impl VideoFormat {
}
self.to_owned()
}
pub fn http_headers_pairs(&self) -> Vec<(String, String)> {
self.http_headers
.clone()
.unwrap_or_default()
.into_iter()
.collect()
}
pub fn format_id(mut self, format_id: String) -> Self {
self.format_id = Some(format_id);
self