redtube fix
This commit is contained in:
@@ -144,11 +144,99 @@ impl RedtubeProvider {
|
||||
Ok(video_items)
|
||||
}
|
||||
|
||||
fn extract_between<'a>(&self, text: &'a str, start: &str, end: &str) -> Option<&'a str> {
|
||||
let start_idx = text.find(start)?;
|
||||
let from = start_idx + start.len();
|
||||
let rest = &text[from..];
|
||||
let end_idx = rest.find(end)?;
|
||||
Some(&rest[..end_idx])
|
||||
}
|
||||
|
||||
fn parse_video_grid_items(&self, html: &str) -> Vec<VideoItem> {
|
||||
if !html.contains("videos_grid") {
|
||||
return vec![];
|
||||
}
|
||||
|
||||
let listing = html
|
||||
.split("videos_grid")
|
||||
.nth(1)
|
||||
.unwrap_or_default()
|
||||
.split("</ul>")
|
||||
.next()
|
||||
.unwrap_or_default();
|
||||
|
||||
let mut items: Vec<VideoItem> = Vec::new();
|
||||
for li in listing.split("<li id=\"").skip(1) {
|
||||
let id = self
|
||||
.extract_between(li, "data-video-id=\"", "\"")
|
||||
.unwrap_or_default()
|
||||
.trim()
|
||||
.to_string();
|
||||
if id.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let title = li
|
||||
.split("video-title-wrapper")
|
||||
.nth(1)
|
||||
.and_then(|part| self.extract_between(part, "title=\"", "\""))
|
||||
.or_else(|| {
|
||||
li.split("class=\"video-title-text")
|
||||
.nth(1)
|
||||
.and_then(|part| self.extract_between(part, "title=\"", "\""))
|
||||
})
|
||||
.or_else(|| self.extract_between(li, "<a title=\"", "\""))
|
||||
.unwrap_or_default()
|
||||
.trim()
|
||||
.to_string();
|
||||
let title = decode(title.as_bytes()).to_string().unwrap_or(title);
|
||||
|
||||
let thumb = self
|
||||
.extract_between(li, "data-src=\"", "\"")
|
||||
.or_else(|| self.extract_between(li, "data-o_thumb=\"", "\""))
|
||||
.unwrap_or_default()
|
||||
.replace("&", "&");
|
||||
|
||||
let raw_duration = self
|
||||
.extract_between(li, "<span class=\"video-properties tm_video_duration\">", "</span>")
|
||||
.unwrap_or_default()
|
||||
.trim()
|
||||
.to_string();
|
||||
let duration = parse_time_to_seconds(&raw_duration).unwrap_or(0) as u32;
|
||||
|
||||
let views_str = self
|
||||
.extract_between(li, "<span class='info-views'>", "</span>")
|
||||
.unwrap_or_default()
|
||||
.trim()
|
||||
.to_string();
|
||||
let views = parse_abbreviated_number(&views_str).unwrap_or(0) as u32;
|
||||
|
||||
let preview = self
|
||||
.extract_between(li, "data-mediabook=\"", "\"")
|
||||
.unwrap_or_default()
|
||||
.replace("&", "&");
|
||||
|
||||
let video_url = format!("{}/{}", self.url, id);
|
||||
let video_item =
|
||||
VideoItem::new(id, title, video_url, "redtube".to_string(), thumb, duration)
|
||||
.views(views)
|
||||
.preview(preview);
|
||||
items.push(video_item);
|
||||
}
|
||||
|
||||
items
|
||||
}
|
||||
|
||||
fn get_video_items_from_html(&self, html: String) -> Vec<VideoItem> {
|
||||
if html.is_empty() {
|
||||
println!("HTML is empty");
|
||||
return vec![];
|
||||
}
|
||||
let card_items = self.parse_video_grid_items(&html);
|
||||
if !card_items.is_empty() {
|
||||
return card_items;
|
||||
}
|
||||
|
||||
let mut items: Vec<VideoItem> = Vec::new();
|
||||
let video_listing_content = html
|
||||
.split("<script type=\"application/ld+json\">")
|
||||
@@ -220,117 +308,7 @@ impl RedtubeProvider {
|
||||
println!("HTML is empty");
|
||||
return vec![];
|
||||
}
|
||||
let mut items: Vec<VideoItem> = Vec::new();
|
||||
let video_listing_content = html
|
||||
.split("videos_grid")
|
||||
.collect::<Vec<&str>>()
|
||||
.get(1)
|
||||
.copied()
|
||||
.unwrap_or_default();
|
||||
let videos = video_listing_content
|
||||
.split("<li id=\"tags_videos_")
|
||||
.collect::<Vec<&str>>()[1..]
|
||||
.to_vec();
|
||||
for vid in videos {
|
||||
// for (i, c) in vid.split("\n").enumerate() {
|
||||
// println!("{}: {}", i, c);
|
||||
// }
|
||||
let id = vid
|
||||
.split("data-video-id=\"")
|
||||
.collect::<Vec<&str>>()
|
||||
.get(1)
|
||||
.copied()
|
||||
.unwrap_or_default()
|
||||
.split("\"")
|
||||
.collect::<Vec<&str>>()
|
||||
.get(0)
|
||||
.copied()
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
let video_url = format!("{}/{}", self.url, id);
|
||||
let title = vid
|
||||
.split(" <a title=\"")
|
||||
.collect::<Vec<&str>>()
|
||||
.get(1)
|
||||
.copied()
|
||||
.unwrap_or_default()
|
||||
.split("\"")
|
||||
.collect::<Vec<&str>>()
|
||||
.get(0)
|
||||
.copied()
|
||||
.unwrap_or_default()
|
||||
.trim()
|
||||
.to_string();
|
||||
let thumb = vid
|
||||
.split("<img")
|
||||
.collect::<Vec<&str>>()
|
||||
.get(1)
|
||||
.copied()
|
||||
.unwrap_or_default()
|
||||
.split(" data-src=\"")
|
||||
.collect::<Vec<&str>>()
|
||||
.get(1)
|
||||
.copied()
|
||||
.unwrap_or_default()
|
||||
.split("\"")
|
||||
.collect::<Vec<&str>>()
|
||||
.get(0)
|
||||
.copied()
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
let raw_duration = vid
|
||||
.split("<span class=\"video-properties tm_video_duration\">")
|
||||
.collect::<Vec<&str>>()
|
||||
.get(1)
|
||||
.copied()
|
||||
.unwrap_or_default()
|
||||
.split("</span>")
|
||||
.collect::<Vec<&str>>()
|
||||
.get(0)
|
||||
.copied()
|
||||
.unwrap_or_default()
|
||||
.trim()
|
||||
.to_string();
|
||||
let duration = parse_time_to_seconds(&raw_duration).unwrap_or(0) as u32;
|
||||
let views_str = vid
|
||||
.split("<span class='info-views'>")
|
||||
.collect::<Vec<&str>>()
|
||||
.get(1)
|
||||
.copied()
|
||||
.unwrap_or_default()
|
||||
.split("</span>")
|
||||
.collect::<Vec<&str>>()
|
||||
.get(0)
|
||||
.copied()
|
||||
.unwrap_or_default()
|
||||
.trim()
|
||||
.to_string();
|
||||
let views = parse_abbreviated_number(&views_str).unwrap_or(0) as u32;
|
||||
let preview = vid
|
||||
.split("<img")
|
||||
.collect::<Vec<&str>>()
|
||||
.get(1)
|
||||
.copied()
|
||||
.unwrap_or_default()
|
||||
.split(" data-mediabook=\"")
|
||||
.collect::<Vec<&str>>()
|
||||
.get(1)
|
||||
.copied()
|
||||
.unwrap_or_default()
|
||||
.split("\"")
|
||||
.collect::<Vec<&str>>()
|
||||
.get(0)
|
||||
.copied()
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
|
||||
let video_item =
|
||||
VideoItem::new(id, title, video_url, "redtube".to_string(), thumb, duration)
|
||||
.views(views)
|
||||
.preview(preview);
|
||||
items.push(video_item);
|
||||
}
|
||||
return items;
|
||||
self.parse_video_grid_items(&html)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -376,3 +354,63 @@ impl Provider for RedtubeProvider {
|
||||
Some(self.build_channel(clientversion))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::RedtubeProvider;
|
||||
|
||||
#[test]
|
||||
fn parse_video_grid_items_handles_browse_cards() {
|
||||
let provider = RedtubeProvider::new();
|
||||
let html = r#"
|
||||
<ul id="block_browse" class="videos_grid">
|
||||
<li id="browse_195840661" data-video-id="195840661">
|
||||
<a data-testid="plw_video_thumbnail_link" href="/195840661" data-video-id="195840661">
|
||||
<img data-src="https://cdn.example/thumb.jpg" data-mediabook="https://cdn.example/preview.mp4?x=1&y=2">
|
||||
</a>
|
||||
<a class="video-title-text js-pop tm_video_title " title="Stepmoms & More"></a>
|
||||
<span class="video-properties tm_video_duration">2:17:57</span>
|
||||
<span class='info-views'>981K</span>
|
||||
</li>
|
||||
</ul>
|
||||
"#;
|
||||
|
||||
let items = provider.parse_video_grid_items(html);
|
||||
assert_eq!(items.len(), 1);
|
||||
assert_eq!(items[0].id, "195840661");
|
||||
assert_eq!(items[0].title, "Stepmoms & More");
|
||||
assert_eq!(items[0].url, "https://www.redtube.com/195840661");
|
||||
assert_eq!(items[0].thumb, "https://cdn.example/thumb.jpg");
|
||||
assert_eq!(
|
||||
items[0].preview.as_deref(),
|
||||
Some("https://cdn.example/preview.mp4?x=1&y=2")
|
||||
);
|
||||
assert_eq!(items[0].duration, 8277);
|
||||
assert_eq!(items[0].views, Some(981000));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_video_grid_items_handles_tags_cards() {
|
||||
let provider = RedtubeProvider::new();
|
||||
let html = r#"
|
||||
<div><ul class="videos_grid">
|
||||
<li id="tags_videos_42785231" data-video-id="42785231">
|
||||
<a data-testid="plw_video_thumbnail_link" href="/42785231" data-video-id="42785231">
|
||||
<img data-o_thumb="https://cdn.example/thumb2.jpg" data-mediabook="https://cdn.example/p2.mp4">
|
||||
</a>
|
||||
<a class="video-title-text js-pop tm_video_title " title="Title 2"></a>
|
||||
<span class="video-properties tm_video_duration">13:06</span>
|
||||
<span class='info-views'>51.2K</span>
|
||||
</li>
|
||||
</ul></div>
|
||||
"#;
|
||||
|
||||
let items = provider.parse_video_grid_items(html);
|
||||
assert_eq!(items.len(), 1);
|
||||
assert_eq!(items[0].id, "42785231");
|
||||
assert_eq!(items[0].url, "https://www.redtube.com/42785231");
|
||||
assert_eq!(items[0].thumb, "https://cdn.example/thumb2.jpg");
|
||||
assert_eq!(items[0].duration, 786);
|
||||
assert_eq!(items[0].views, Some(51200));
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user