From 3da09dc561564dd48756f674267d209931925b14 Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 5 May 2026 14:20:46 +0000 Subject: [PATCH] erome launch --- build.rs | 5 + docs/provider-catalog.md | 1 + src/providers/erome.rs | 384 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 390 insertions(+) create mode 100644 src/providers/erome.rs diff --git a/build.rs b/build.rs index 4687c12..fa67ae9 100644 --- a/build.rs +++ b/build.rs @@ -256,6 +256,11 @@ const PROVIDERS: &[ProviderDef] = &[ module: "hsex", ty: "HsexProvider", }, + ProviderDef { + id: "erome", + module: "erome", + ty: "EromeProvider", + }, ProviderDef { id: "sextb", module: "sextb", diff --git a/docs/provider-catalog.md b/docs/provider-catalog.md index 642328c..3385683 100644 --- a/docs/provider-catalog.md +++ b/docs/provider-catalog.md @@ -10,6 +10,7 @@ This is the current implementation inventory as of this snapshot of the repo. Us | `archivebate` | `live-cams` | no | no | Livewire-backed cam archive listings with platform/gender/profile shortcuts. | | `beeg` | `mainstream-tube` | no | no | Basic mainstream tube pattern. | | `chaturbate` | `live-cams` | no | no | Live cam channel. | +| `erome` | `amateur-homemade` | no | no | HTML album scraper with hot/new feeds, keyword search, and uploader-slug shortcuts (`uploader:`). | | `freepornvideosxxx` | `studio-network` | no | no | Studio-style scraper. | | `freeuseporn` | `fetish-kink` | no | no | Fetish archive pattern. | | `hanime` | `hentai-animation` | no | yes | Uses proxied CDN/thumb handling. | diff --git a/src/providers/erome.rs b/src/providers/erome.rs new file mode 100644 index 0000000..8c8f80e --- /dev/null +++ b/src/providers/erome.rs @@ -0,0 +1,384 @@ +use crate::DbPool; +use crate::api::ClientVersion; +use crate::providers::{Provider, report_provider_error, requester_or_default}; +use crate::status::*; +use crate::util::parse_abbreviated_number; +use crate::videos::{ServerOptions, VideoItem}; + +use async_trait::async_trait; +use error_chain::error_chain; +use htmlentity::entity::{ICodedDataTrait, decode}; +use regex::Regex; +use scraper::{ElementRef, Html, Selector}; +use url::form_urlencoded; +use wreq::Version; + +pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata = + crate::providers::ProviderChannelMetadata { + group_id: "amateur-homemade", + tags: &["amateur", "albums", "homemade"], + }; + +const BASE_URL: &str = "https://www.erome.com"; +const CHANNEL_ID: &str = "erome"; + +error_chain! { + foreign_links { + Io(std::io::Error); + HttpRequest(wreq::Error); + } +} + +#[derive(Debug, Clone)] +pub struct EromeProvider { + url: String, +} + +#[derive(Debug, Clone)] +enum Target { + ExploreHot, + ExploreNew, + Search { query: String, order_new: bool }, + UploaderPosts { slug: String }, +} + +impl EromeProvider { + pub fn new() -> Self { + Self { + url: BASE_URL.to_string(), + } + } + + fn build_channel(&self, _clientversion: ClientVersion) -> Channel { + Channel { + id: CHANNEL_ID.to_string(), + name: "EroMe".to_string(), + description: + "EroMe album feed with hot/new routing, search, and uploader profile shortcuts." + .to_string(), + premium: false, + favicon: "https://www.google.com/s2/favicons?sz=64&domain=erome.com".to_string(), + status: "active".to_string(), + categories: vec![], + options: vec![ + ChannelOption { + id: "sort".to_string(), + title: "Sort".to_string(), + description: "Browse EroMe hot or new feeds.".to_string(), + systemImage: "list.number".to_string(), + colorName: "blue".to_string(), + options: vec![ + FilterOption { + id: "new".to_string(), + title: "New".to_string(), + }, + FilterOption { + id: "hot".to_string(), + title: "Hot".to_string(), + }, + ], + multiSelect: false, + }, + ChannelOption { + id: "sites".to_string(), + title: "Uploader".to_string(), + description: "Jump directly to an uploader profile slug.".to_string(), + systemImage: "person.crop.square".to_string(), + colorName: "purple".to_string(), + options: vec![FilterOption { + id: "all".to_string(), + title: "All".to_string(), + }], + multiSelect: false, + }, + ], + nsfw: true, + cacheDuration: Some(1800), + } + } + + fn selector(value: &str) -> Option { + Selector::parse(value).ok() + } + + fn regex(value: &str) -> Option { + Regex::new(value).ok() + } + + fn normalize_text(value: &str) -> String { + decode(value.as_bytes()) + .to_string() + .unwrap_or_else(|_| value.to_string()) + .replace('\u{a0}', " ") + .split_whitespace() + .collect::>() + .join(" ") + .trim() + .to_string() + } + + fn normalize_url(&self, value: &str) -> String { + let trimmed = value.trim(); + if trimmed.is_empty() { + return String::new(); + } + if trimmed.starts_with("http://") || trimmed.starts_with("https://") { + return trimmed.to_string(); + } + if trimmed.starts_with("//") { + return format!("https:{trimmed}"); + } + format!( + "{}/{}", + self.url.trim_end_matches('/'), + trimmed.trim_start_matches('/') + ) + } + + fn extract_album_guid_from_url(value: &str) -> Option { + let re = Self::regex(r#"/a/([A-Za-z0-9]+)"#)?; + re.captures(value) + .and_then(|caps| caps.get(1).map(|m| m.as_str().to_string())) + } + + fn uploader_slug_from_query(query: &str) -> Option { + let q = query.trim(); + if q.is_empty() { + return None; + } + let lower = q.to_ascii_lowercase(); + let stripped = if let Some(rest) = lower.strip_prefix("uploader:") { + rest.trim().to_string() + } else if let Some(rest) = lower.strip_prefix("user:") { + rest.trim().to_string() + } else { + return None; + }; + + let slug = stripped + .trim_start_matches('@') + .chars() + .filter(|c| c.is_ascii_alphanumeric() || *c == '_' || *c == '-') + .collect::(); + (!slug.is_empty()).then_some(slug) + } + + fn resolve_target(&self, query: &str, options: &ServerOptions, sort: &str) -> Target { + if let Some(site) = options.sites.as_deref() { + let normalized = site.trim(); + if !normalized.is_empty() && !normalized.eq_ignore_ascii_case("all") { + let slug = normalized + .trim_start_matches('@') + .chars() + .filter(|c| c.is_ascii_alphanumeric() || *c == '_' || *c == '-') + .collect::(); + if !slug.is_empty() { + return Target::UploaderPosts { slug }; + } + } + } + + if !query.trim().is_empty() { + if let Some(slug) = Self::uploader_slug_from_query(query) { + return Target::UploaderPosts { slug }; + } + return Target::Search { + query: query.trim().to_string(), + order_new: matches!(sort, "new" | "latest"), + }; + } + + if matches!(sort, "new" | "latest") { + Target::ExploreNew + } else { + Target::ExploreHot + } + } + + fn build_url_for_target(&self, target: &Target, page: u16) -> String { + let page = page.max(1); + match target { + Target::ExploreHot => { + if page == 1 { + format!("{}/explore", self.url) + } else { + format!("{}/explore?page={page}", self.url) + } + } + Target::ExploreNew => { + if page == 1 { + format!("{}/explore/new", self.url) + } else { + format!("{}/explore/new?page={page}", self.url) + } + } + Target::Search { query, order_new } => { + let mut serializer = form_urlencoded::Serializer::new(String::new()); + serializer.append_pair("q", query); + if *order_new { + serializer.append_pair("o", "new"); + } + serializer.append_pair("page", &page.to_string()); + format!("{}/search?{}", self.url, serializer.finish()) + } + Target::UploaderPosts { slug } => { + let mut serializer = form_urlencoded::Serializer::new(String::new()); + serializer.append_pair("t", "posts"); + serializer.append_pair("page", &page.to_string()); + format!("{}/{}?{}", self.url, slug, serializer.finish()) + } + } + } + + fn text_from_selector(parent: &ElementRef<'_>, selector: &Selector) -> String { + parent + .select(selector) + .next() + .map(|node| Self::normalize_text(&node.text().collect::>().join(" "))) + .unwrap_or_default() + } + + fn parse_video_item(&self, card: &ElementRef<'_>) -> Option { + let link_selector = Self::selector("a.album-link[href]")?; + let title_selector = Self::selector("a.album-title")?; + let thumb_selector = Self::selector("img.album-thumbnail")?; + let user_selector = Self::selector("span.album-user")?; + let views_selector = Self::selector("span.album-bottom-views")?; + + let link = card.select(&link_selector).next()?; + let href = link.value().attr("href")?; + let album_url = self.normalize_url(href); + let album_id = Self::extract_album_guid_from_url(&album_url)?; + + let title = Self::text_from_selector(card, &title_selector); + let fallback_title = link + .value() + .attr("title") + .map(Self::normalize_text) + .unwrap_or_default(); + let final_title = if !title.is_empty() { title } else { fallback_title }; + if final_title.is_empty() { + return None; + } + + let thumb = card + .select(&thumb_selector) + .find_map(|node| { + node.value() + .attr("src") + .or_else(|| node.value().attr("data-rotate-src")) + }) + .map(|value| self.normalize_url(value)) + .unwrap_or_default(); + + let uploader = Self::text_from_selector(card, &user_selector); + let uploader_slug = uploader + .trim() + .trim_start_matches('@') + .chars() + .filter(|c| c.is_ascii_alphanumeric() || *c == '_' || *c == '-') + .collect::(); + + let views_text = Self::text_from_selector(card, &views_selector); + let views = parse_abbreviated_number(views_text.replace(',', ".").replace(' ', "").as_str()); + + let uploader_url = if uploader_slug.is_empty() { + String::new() + } else { + format!("{}/{}?t=posts", self.url, uploader_slug) + }; + + let mut item = VideoItem::new( + album_id, + final_title, + album_url, + CHANNEL_ID.to_string(), + thumb, + 0, + ); + if let Some(value) = views { + item = item.views(value); + } + if !uploader.is_empty() { + item = item.uploader(uploader); + } + if !uploader_url.is_empty() { + item = item.uploader_url(uploader_url); + } + if !uploader_slug.is_empty() { + item.uploaderId = Some(format!("{CHANNEL_ID}:{uploader_slug}")); + } + Some(item) + } + + fn parse_listing(&self, html: &str) -> Vec { + let document = Html::parse_document(html); + let Some(card_selector) = Self::selector("div.album") else { + return vec![]; + }; + + document + .select(&card_selector) + .filter_map(|card| self.parse_video_item(&card)) + .collect() + } + + fn html_headers(referer: &str) -> Vec<(String, String)> { + vec![ + ( + "accept".to_string(), + "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8".to_string(), + ), + ("accept-language".to_string(), "en-US,en;q=0.8".to_string()), + ( + "user-agent".to_string(), + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36".to_string(), + ), + ("referer".to_string(), referer.to_string()), + ] + } + + async fn fetch_page(&self, options: &ServerOptions, url: &str) -> Result { + let mut requester = requester_or_default(options, CHANNEL_ID, "get_videos"); + requester + .get_with_headers(url, Self::html_headers(&format!("{}/explore", self.url)), Some(Version::HTTP_11)) + .await + .map_err(|error| Error::from(format!("request failed for {url}: {error}"))) + } +} + +#[async_trait] +impl Provider for EromeProvider { + async fn get_videos( + &self, + _cache: crate::util::cache::VideoCache, + _pool: DbPool, + sort: String, + query: Option, + page: String, + _per_page: String, + options: ServerOptions, + ) -> Vec { + let sort_value = if sort.is_empty() { + options.sort.as_deref().unwrap_or("new").to_string() + } else { + sort + }; + let query_value = query.unwrap_or_default(); + let page_value = page.parse::().unwrap_or(1); + let target = self.resolve_target(&query_value, &options, &sort_value); + let url = self.build_url_for_target(&target, page_value); + + match self.fetch_page(&options, &url).await { + Ok(html) => self.parse_listing(&html), + Err(error) => { + report_provider_error(CHANNEL_ID, "get_videos.fetch", &error.to_string()).await; + vec![] + } + } + } + + fn get_channel(&self, clientversion: ClientVersion) -> Option { + Some(self.build_channel(clientversion)) + } +}