Add support for playlists and channels in search

This commit is contained in:
Omar Roth
2018-09-20 09:36:09 -05:00
parent 1627cfc2fa
commit 62380933b2
11 changed files with 414 additions and 210 deletions

View File

@@ -196,8 +196,14 @@ def html_to_content(description_html)
end
def extract_videos(nodeset, ucid = nil)
videos = extract_items(nodeset, ucid)
videos.select! { |item| !item.is_a?(SearchChannel | SearchPlaylist) }
videos.map { |video| video.as(SearchVideo) }
end
def extract_items(nodeset, ucid = nil)
# TODO: Make this a 'common', so it makes more sense to be used here
videos = [] of SearchVideo
items = [] of SearchItem
nodeset.each do |node|
anchor = node.xpath_node(%q(.//h3[contains(@class,"yt-lockup-title")]/a))
@@ -209,78 +215,147 @@ def extract_videos(nodeset, ucid = nil)
next
end
case node.xpath_node(%q(.//div)).not_nil!["class"]
when .includes? "yt-lockup-playlist"
next
when .includes? "yt-lockup-channel"
next
end
title = anchor.content.strip
id = anchor["href"].lchop("/watch?v=")
if ucid
anchor = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-byline")]/a))
if !anchor
author = ""
author_id = ""
else
anchor = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-byline")]/a))
if !anchor
next
end
author = anchor.content
author_id = anchor["href"].split("/")[-1]
end
metadata = node.xpath_nodes(%q(.//div[contains(@class,"yt-lockup-meta")]/ul/li))
if metadata.empty?
anchor = node.xpath_node(%q(.//h3[contains(@class, "yt-lockup-title")]/a))
if !anchor
next
end
begin
published = decode_date(metadata[0].content.lchop("Streamed ").lchop("Starts "))
rescue ex
end
begin
published ||= Time.epoch(metadata[0].xpath_node(%q(.//span)).not_nil!["data-timestamp"].to_i64)
rescue ex
end
published ||= Time.now
begin
view_count = metadata[0].content.rchop(" watching").delete(",").try &.to_i64?
rescue ex
end
begin
view_count ||= metadata.try &.[1].content.delete("No views,").try &.to_i64?
rescue ex
end
view_count ||= 0_i64
title = anchor.content.strip
id = anchor["href"]
description_html = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-description")]))
description_html, description = html_to_content(description_html)
length_seconds = node.xpath_node(%q(.//span[@class="video-time"]))
if length_seconds
length_seconds = decode_length_seconds(length_seconds.content)
else
length_seconds = -1
end
case node.xpath_node(%q(.//div)).not_nil!["class"]
when .includes? "yt-lockup-playlist"
plid = HTTP::Params.parse(URI.parse(id).query.not_nil!)["list"]
videos << SearchVideo.new(
title,
id,
author,
author_id,
published,
view_count,
description,
description_html,
length_seconds,
)
anchor = node.xpath_node(%q(.//ul[@class="yt-lockup-meta-info"]/li/a))
if anchor
video_count = anchor.content.match(/View full playlist \((?<count>\d+)/).try &.["count"].to_i?
end
video_count ||= 0
videos = [] of SearchPlaylistVideo
node.xpath_nodes(%q(.//ol[contains(@class, "yt-lockup-playlist-items")]/li)).each do |video|
anchor = video.xpath_node(%q(.//a))
if anchor
video_title = anchor.content
id = HTTP::Params.parse(URI.parse(anchor["href"]).query.not_nil!)["v"]
end
video_title ||= ""
id ||= ""
anchor = video.xpath_node(%q(.//span/span))
if anchor
length_seconds = decode_length_seconds(anchor.content)
end
length_seconds ||= 0
videos << SearchPlaylistVideo.new(
video_title,
id,
length_seconds
)
end
items << SearchPlaylist.new(
title,
plid,
author,
author_id,
video_count,
videos
)
when .includes? "yt-lockup-channel"
author = title
ucid = id.split("/")[-1]
author_thumbnail = node.xpath_node(%q(.//div/span/img)).try &.["data-thumb"]?
author_thumbnail ||= node.xpath_node(%q(.//div/span/img)).try &.["src"]
author_thumbnail ||= ""
subscriber_count = node.xpath_node(%q(.//span[contains(@class, "yt-subscriber-count")])).try &.["title"].delete(",").to_i?
subscriber_count ||= 0
video_count = node.xpath_node(%q(.//ul[@class="yt-lockup-meta-info"]/li)).try &.content.split(" ")[0].delete(",").to_i?
video_count ||= 0
items << SearchChannel.new(
author,
ucid,
author_thumbnail,
subscriber_count,
video_count,
description,
description_html
)
else
id = id.lchop("/watch?v=")
metadata = node.xpath_nodes(%q(.//div[contains(@class,"yt-lockup-meta")]/ul/li))
if metadata.empty?
next
end
begin
published = decode_date(metadata[0].content.lchop("Streamed ").lchop("Starts "))
rescue ex
end
begin
published ||= Time.epoch(metadata[0].xpath_node(%q(.//span)).not_nil!["data-timestamp"].to_i64)
rescue ex
end
published ||= Time.now
begin
view_count = metadata[0].content.rchop(" watching").delete(",").try &.to_i64?
rescue ex
end
begin
view_count ||= metadata.try &.[1].content.delete("No views,").try &.to_i64?
rescue ex
end
view_count ||= 0_i64
length_seconds = node.xpath_node(%q(.//span[@class="video-time"]))
if length_seconds
length_seconds = decode_length_seconds(length_seconds.content)
else
length_seconds = -1
end
live_now = node.xpath_node(%q(.//span[contains(@class, "yt-badge-live")]))
if live_now
live_now = true
else
live_now = false
end
items << SearchVideo.new(
title,
id,
author,
author_id,
published,
view_count,
description,
description_html,
length_seconds,
live_now
)
end
end
return videos
return items
end