camsoda and pornhub shorties

This commit is contained in:
Simon
2026-05-22 10:26:05 +00:00
committed by ForgeCode
parent e4941b1209
commit 95d2defa13
5 changed files with 1014 additions and 6 deletions

View File

@@ -64,6 +64,23 @@ class Results:
_BROWSER_UA = "Mozilla/5.0 (X11; Linux x86_64; rv:146.0) Gecko/20100101 Firefox/146.0"
# Domains known to be Cloudflare-protected and return 403/connection-refused to direct
# HTTP checks. URL reachability failures for these hosts are downgraded to warnings.
_CF_PROTECTED_HOSTS = {
"www.camsoda.com",
"camsoda.com",
}
def _is_cf_protected(url: str) -> bool:
"""Return True if the URL's host is known to be CF-protected."""
try:
from urllib.parse import urlparse
host = urlparse(url).hostname or ""
return host in _CF_PROTECTED_HOSTS
except Exception:
return False
def http_ok(url: str, headers: dict | None = None) -> tuple[bool, int]:
"""Return (ok, http_status). Tries HEAD then ranged GET on 405."""
@@ -234,7 +251,10 @@ def check_video(video: dict, channel_id: str, results: Results, run_ytdlp: bool)
else:
ok, code = http_ok(vurl)
if not ok:
results.err(channel_id, f"{label}: url unreachable HTTP={code}: {vurl}")
if _is_cf_protected(vurl):
results.warn(channel_id, f"{label}: url unreachable HTTP={code} (CF-protected host, expected): {vurl}")
else:
results.err(channel_id, f"{label}: url unreachable HTTP={code}: {vurl}")
else:
results.info(channel_id, f"{label}: url OK (HTTP {code})")
@@ -275,11 +295,18 @@ def check_video(video: dict, channel_id: str, results: Results, run_ytdlp: bool)
results.info(channel_id, f"{label}: yt-dlp extract {ytdlp_url}")
yt, stderr = ytdlp_extract(ytdlp_url)
if yt is None:
results.err(
channel_id,
f"{label}: yt-dlp failed for {ytdlp_url}"
+ (f": {stderr[:300]}" if stderr else ""),
)
if _is_cf_protected(ytdlp_url):
results.warn(
channel_id,
f"{label}: yt-dlp failed for {ytdlp_url} (CF-protected host, expected)"
+ (f": {stderr[:200]}" if stderr else ""),
)
else:
results.err(
channel_id,
f"{label}: yt-dlp failed for {ytdlp_url}"
+ (f": {stderr[:300]}" if stderr else ""),
)
else:
yt_title = (yt.get("title") or "").strip()
api_title = (video.get("title") or "").strip()