Debloat channels.cr into multiple files (#2225)

Cherry picked from ui overhaul branch with a few modifications:
  - channel folder is renamed to channels
  - parsing for channel home and featured channels are removed due to
    lack of infrastructure from other commits

(cherry picked from commit 44d18b8e147b47ad06a54cc6fd08423d9f39074d)
This commit is contained in:
syeopite 2021-07-14 08:46:12 -07:00 committed by GitHub
parent b633f8d207
commit ae61662f61
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 961 additions and 963 deletions

View File

@ -5,7 +5,7 @@ require "protodec/utils"
require "spec"
require "yaml"
require "../src/invidious/helpers/*"
require "../src/invidious/channels"
require "../src/invidious/channels/*"
require "../src/invidious/comments"
require "../src/invidious/playlists"
require "../src/invidious/search"

View File

@ -27,6 +27,7 @@ require "compress/zip"
require "protodec/utils"
require "./invidious/helpers/*"
require "./invidious/*"
require "./invidious/channels/*"
require "./invidious/routes/**"
require "./invidious/jobs/**"

View File

@ -1,962 +0,0 @@
struct InvidiousChannel
include DB::Serializable
property id : String
property author : String
property updated : Time
property deleted : Bool
property subscribed : Time?
end
struct ChannelVideo
include DB::Serializable
property id : String
property title : String
property published : Time
property updated : Time
property ucid : String
property author : String
property length_seconds : Int32 = 0
property live_now : Bool = false
property premiere_timestamp : Time? = nil
property views : Int64? = nil
def to_json(locale, json : JSON::Builder)
json.object do
json.field "type", "shortVideo"
json.field "title", self.title
json.field "videoId", self.id
json.field "videoThumbnails" do
generate_thumbnails(json, self.id)
end
json.field "lengthSeconds", self.length_seconds
json.field "author", self.author
json.field "authorId", self.ucid
json.field "authorUrl", "/channel/#{self.ucid}"
json.field "published", self.published.to_unix
json.field "publishedText", translate(locale, "`x` ago", recode_date(self.published, locale))
json.field "viewCount", self.views
end
end
def to_json(locale, json : JSON::Builder | Nil = nil)
if json
to_json(locale, json)
else
JSON.build do |json|
to_json(locale, json)
end
end
end
def to_xml(locale, query_params, xml : XML::Builder)
query_params["v"] = self.id
xml.element("entry") do
xml.element("id") { xml.text "yt:video:#{self.id}" }
xml.element("yt:videoId") { xml.text self.id }
xml.element("yt:channelId") { xml.text self.ucid }
xml.element("title") { xml.text self.title }
xml.element("link", rel: "alternate", href: "#{HOST_URL}/watch?#{query_params}")
xml.element("author") do
xml.element("name") { xml.text self.author }
xml.element("uri") { xml.text "#{HOST_URL}/channel/#{self.ucid}" }
end
xml.element("content", type: "xhtml") do
xml.element("div", xmlns: "http://www.w3.org/1999/xhtml") do
xml.element("a", href: "#{HOST_URL}/watch?#{query_params}") do
xml.element("img", src: "#{HOST_URL}/vi/#{self.id}/mqdefault.jpg")
end
end
end
xml.element("published") { xml.text self.published.to_s("%Y-%m-%dT%H:%M:%S%:z") }
xml.element("updated") { xml.text self.updated.to_s("%Y-%m-%dT%H:%M:%S%:z") }
xml.element("media:group") do
xml.element("media:title") { xml.text self.title }
xml.element("media:thumbnail", url: "#{HOST_URL}/vi/#{self.id}/mqdefault.jpg",
width: "320", height: "180")
end
end
end
def to_xml(locale, xml : XML::Builder | Nil = nil)
if xml
to_xml(locale, xml)
else
XML.build do |xml|
to_xml(locale, xml)
end
end
end
def to_tuple
{% begin %}
{
{{*@type.instance_vars.map { |var| var.name }}}
}
{% end %}
end
end
struct AboutRelatedChannel
include DB::Serializable
property ucid : String
property author : String
property author_url : String
property author_thumbnail : String
end
# TODO: Refactor into either SearchChannel or InvidiousChannel
struct AboutChannel
include DB::Serializable
property ucid : String
property author : String
property auto_generated : Bool
property author_url : String
property author_thumbnail : String
property banner : String?
property description_html : String
property paid : Bool
property total_views : Int64
property sub_count : Int32
property joined : Time
property is_family_friendly : Bool
property allowed_regions : Array(String)
property related_channels : Array(AboutRelatedChannel)
property tabs : Array(String)
end
class ChannelRedirect < Exception
property channel_id : String
def initialize(@channel_id)
end
end
def get_batch_channels(channels, db, refresh = false, pull_all_videos = true, max_threads = 10)
finished_channel = Channel(String | Nil).new
spawn do
active_threads = 0
active_channel = Channel(Nil).new
channels.each do |ucid|
if active_threads >= max_threads
active_channel.receive
active_threads -= 1
end
active_threads += 1
spawn do
begin
get_channel(ucid, db, refresh, pull_all_videos)
finished_channel.send(ucid)
rescue ex
finished_channel.send(nil)
ensure
active_channel.send(nil)
end
end
end
end
final = [] of String
channels.size.times do
if ucid = finished_channel.receive
final << ucid
end
end
return final
end
def get_channel(id, db, refresh = true, pull_all_videos = true)
if channel = db.query_one?("SELECT * FROM channels WHERE id = $1", id, as: InvidiousChannel)
if refresh && Time.utc - channel.updated > 10.minutes
channel = fetch_channel(id, db, pull_all_videos: pull_all_videos)
channel_array = channel.to_a
args = arg_array(channel_array)
db.exec("INSERT INTO channels VALUES (#{args}) \
ON CONFLICT (id) DO UPDATE SET author = $2, updated = $3", args: channel_array)
end
else
channel = fetch_channel(id, db, pull_all_videos: pull_all_videos)
channel_array = channel.to_a
args = arg_array(channel_array)
db.exec("INSERT INTO channels VALUES (#{args})", args: channel_array)
end
return channel
end
def fetch_channel(ucid, db, pull_all_videos = true, locale = nil)
LOGGER.debug("fetch_channel: #{ucid}")
LOGGER.trace("fetch_channel: #{ucid} : pull_all_videos = #{pull_all_videos}, locale = #{locale}")
LOGGER.trace("fetch_channel: #{ucid} : Downloading RSS feed")
rss = YT_POOL.client &.get("/feeds/videos.xml?channel_id=#{ucid}").body
LOGGER.trace("fetch_channel: #{ucid} : Parsing RSS feed")
rss = XML.parse_html(rss)
author = rss.xpath_node(%q(//feed/title))
if !author
raise InfoException.new("Deleted or invalid channel")
end
author = author.content
# Auto-generated channels
# https://support.google.com/youtube/answer/2579942
if author.ends_with?(" - Topic") ||
{"Popular on YouTube", "Music", "Sports", "Gaming"}.includes? author
auto_generated = true
end
LOGGER.trace("fetch_channel: #{ucid} : author = #{author}, auto_generated = #{auto_generated}")
page = 1
LOGGER.trace("fetch_channel: #{ucid} : Downloading channel videos page")
initial_data = get_channel_videos_response(ucid, page, auto_generated: auto_generated)
videos = extract_videos(initial_data, author, ucid)
LOGGER.trace("fetch_channel: #{ucid} : Extracting videos from channel RSS feed")
rss.xpath_nodes("//feed/entry").each do |entry|
video_id = entry.xpath_node("videoid").not_nil!.content
title = entry.xpath_node("title").not_nil!.content
published = Time.parse_rfc3339(entry.xpath_node("published").not_nil!.content)
updated = Time.parse_rfc3339(entry.xpath_node("updated").not_nil!.content)
author = entry.xpath_node("author/name").not_nil!.content
ucid = entry.xpath_node("channelid").not_nil!.content
views = entry.xpath_node("group/community/statistics").try &.["views"]?.try &.to_i64?
views ||= 0_i64
channel_video = videos.select { |video| video.id == video_id }[0]?
length_seconds = channel_video.try &.length_seconds
length_seconds ||= 0
live_now = channel_video.try &.live_now
live_now ||= false
premiere_timestamp = channel_video.try &.premiere_timestamp
video = ChannelVideo.new({
id: video_id,
title: title,
published: published,
updated: Time.utc,
ucid: ucid,
author: author,
length_seconds: length_seconds,
live_now: live_now,
premiere_timestamp: premiere_timestamp,
views: views,
})
LOGGER.trace("fetch_channel: #{ucid} : video #{video_id} : Updating or inserting video")
# We don't include the 'premiere_timestamp' here because channel pages don't include them,
# meaning the above timestamp is always null
was_insert = db.query_one("INSERT INTO channel_videos VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) \
ON CONFLICT (id) DO UPDATE SET title = $2, published = $3, \
updated = $4, ucid = $5, author = $6, length_seconds = $7, \
live_now = $8, views = $10 returning (xmax=0) as was_insert", *video.to_tuple, as: Bool)
if was_insert
LOGGER.trace("fetch_channel: #{ucid} : video #{video_id} : Inserted, updating subscriptions")
db.exec("UPDATE users SET notifications = array_append(notifications, $1), \
feed_needs_update = true WHERE $2 = ANY(subscriptions)", video.id, video.ucid)
else
LOGGER.trace("fetch_channel: #{ucid} : video #{video_id} : Updated")
end
end
if pull_all_videos
page += 1
ids = [] of String
loop do
initial_data = get_channel_videos_response(ucid, page, auto_generated: auto_generated)
videos = extract_videos(initial_data, author, ucid)
count = videos.size
videos = videos.map { |video| ChannelVideo.new({
id: video.id,
title: video.title,
published: video.published,
updated: Time.utc,
ucid: video.ucid,
author: video.author,
length_seconds: video.length_seconds,
live_now: video.live_now,
premiere_timestamp: video.premiere_timestamp,
views: video.views,
}) }
videos.each do |video|
ids << video.id
# We are notified of Red videos elsewhere (PubSub), which includes a correct published date,
# so since they don't provide a published date here we can safely ignore them.
if Time.utc - video.published > 1.minute
was_insert = db.query_one("INSERT INTO channel_videos VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) \
ON CONFLICT (id) DO UPDATE SET title = $2, published = $3, \
updated = $4, ucid = $5, author = $6, length_seconds = $7, \
live_now = $8, views = $10 returning (xmax=0) as was_insert", *video.to_tuple, as: Bool)
db.exec("UPDATE users SET notifications = array_append(notifications, $1), \
feed_needs_update = true WHERE $2 = ANY(subscriptions)", video.id, video.ucid) if was_insert
end
end
break if count < 25
page += 1
end
end
channel = InvidiousChannel.new({
id: ucid,
author: author,
updated: Time.utc,
deleted: false,
subscribed: nil,
})
return channel
end
def fetch_channel_playlists(ucid, author, continuation, sort_by)
if continuation
response_json = request_youtube_api_browse(continuation)
continuationItems = response_json["onResponseReceivedActions"]?
.try &.[0]["appendContinuationItemsAction"]["continuationItems"]
return [] of SearchItem, nil if !continuationItems
items = [] of SearchItem
continuationItems.as_a.select(&.as_h.has_key?("gridPlaylistRenderer")).each { |item|
extract_item(item, author, ucid).try { |t| items << t }
}
continuation = continuationItems.as_a.last["continuationItemRenderer"]?
.try &.["continuationEndpoint"]["continuationCommand"]["token"].as_s
else
url = "/channel/#{ucid}/playlists?flow=list&view=1"
case sort_by
when "last", "last_added"
#
when "oldest", "oldest_created"
url += "&sort=da"
when "newest", "newest_created"
url += "&sort=dd"
else nil # Ignore
end
response = YT_POOL.client &.get(url)
initial_data = extract_initial_data(response.body)
return [] of SearchItem, nil if !initial_data
items = extract_items(initial_data, author, ucid)
continuation = response.body.match(/"token":"(?<continuation>[^"]+)"/).try &.["continuation"]?
end
return items, continuation
end
def produce_channel_videos_continuation(ucid, page = 1, auto_generated = nil, sort_by = "newest", v2 = false)
object = {
"80226972:embedded" => {
"2:string" => ucid,
"3:base64" => {
"2:string" => "videos",
"6:varint" => 2_i64,
"7:varint" => 1_i64,
"12:varint" => 1_i64,
"13:string" => "",
"23:varint" => 0_i64,
},
},
}
if !v2
if auto_generated
seed = Time.unix(1525757349)
until seed >= Time.utc
seed += 1.month
end
timestamp = seed - (page - 1).months
object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0x36_i64
object["80226972:embedded"]["3:base64"].as(Hash)["15:string"] = "#{timestamp.to_unix}"
else
object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0_i64
object["80226972:embedded"]["3:base64"].as(Hash)["15:string"] = "#{page}"
end
else
object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0_i64
object["80226972:embedded"]["3:base64"].as(Hash)["61:string"] = Base64.urlsafe_encode(Protodec::Any.from_json(Protodec::Any.cast_json({
"1:string" => Base64.urlsafe_encode(Protodec::Any.from_json(Protodec::Any.cast_json({
"1:varint" => 30_i64 * (page - 1),
}))),
})))
end
case sort_by
when "newest"
when "popular"
object["80226972:embedded"]["3:base64"].as(Hash)["3:varint"] = 0x01_i64
when "oldest"
object["80226972:embedded"]["3:base64"].as(Hash)["3:varint"] = 0x02_i64
else nil # Ignore
end
object["80226972:embedded"]["3:string"] = Base64.urlsafe_encode(Protodec::Any.from_json(Protodec::Any.cast_json(object["80226972:embedded"]["3:base64"])))
object["80226972:embedded"].delete("3:base64")
continuation = object.try { |i| Protodec::Any.cast_json(object) }
.try { |i| Protodec::Any.from_json(i) }
.try { |i| Base64.urlsafe_encode(i) }
.try { |i| URI.encode_www_form(i) }
return continuation
end
# Used in bypass_captcha_job.cr
def produce_channel_videos_url(ucid, page = 1, auto_generated = nil, sort_by = "newest", v2 = false)
continuation = produce_channel_videos_continuation(ucid, page, auto_generated, sort_by, v2)
return "/browse_ajax?continuation=#{continuation}&gl=US&hl=en"
end
# ## NOTE: DEPRECATED
# Reason -> Unstable
# The Protobuf object must be provided with an id of the last playlist from the current "page"
# in order to fetch the next one accurately
# (if the id isn't included, entries shift around erratically between pages,
# leading to repetitions and skip overs)
#
# Since it's impossible to produce the appropriate Protobuf without an id being provided by the user,
# it's better to stick to continuation tokens provided by the first request and onward
def produce_channel_playlists_url(ucid, cursor, sort = "newest", auto_generated = false)
object = {
"80226972:embedded" => {
"2:string" => ucid,
"3:base64" => {
"2:string" => "playlists",
"6:varint" => 2_i64,
"7:varint" => 1_i64,
"12:varint" => 1_i64,
"13:string" => "",
"23:varint" => 0_i64,
},
},
}
if cursor
cursor = Base64.urlsafe_encode(cursor, false) if !auto_generated
object["80226972:embedded"]["3:base64"].as(Hash)["15:string"] = cursor
end
if auto_generated
object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0x32_i64
else
object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 1_i64
case sort
when "oldest", "oldest_created"
object["80226972:embedded"]["3:base64"].as(Hash)["3:varint"] = 2_i64
when "newest", "newest_created"
object["80226972:embedded"]["3:base64"].as(Hash)["3:varint"] = 3_i64
when "last", "last_added"
object["80226972:embedded"]["3:base64"].as(Hash)["3:varint"] = 4_i64
else nil # Ignore
end
end
object["80226972:embedded"]["3:string"] = Base64.urlsafe_encode(Protodec::Any.from_json(Protodec::Any.cast_json(object["80226972:embedded"]["3:base64"])))
object["80226972:embedded"].delete("3:base64")
continuation = object.try { |i| Protodec::Any.cast_json(object) }
.try { |i| Protodec::Any.from_json(i) }
.try { |i| Base64.urlsafe_encode(i) }
.try { |i| URI.encode_www_form(i) }
return "/browse_ajax?continuation=#{continuation}&gl=US&hl=en"
end
# TODO: Add "sort_by"
def fetch_channel_community(ucid, continuation, locale, format, thin_mode)
response = YT_POOL.client &.get("/channel/#{ucid}/community?gl=US&hl=en")
if response.status_code != 200
response = YT_POOL.client &.get("/user/#{ucid}/community?gl=US&hl=en")
end
if response.status_code != 200
raise InfoException.new("This channel does not exist.")
end
ucid = response.body.match(/https:\/\/www.youtube.com\/channel\/(?<ucid>UC[a-zA-Z0-9_-]{22})/).not_nil!["ucid"]
if !continuation || continuation.empty?
initial_data = extract_initial_data(response.body)
body = initial_data["contents"]?.try &.["twoColumnBrowseResultsRenderer"]["tabs"].as_a.select { |tab| tab["tabRenderer"]?.try &.["selected"].as_bool.== true }[0]?
if !body
raise InfoException.new("Could not extract community tab.")
end
body = body["tabRenderer"]["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]
else
continuation = produce_channel_community_continuation(ucid, continuation)
headers = HTTP::Headers.new
headers["cookie"] = response.cookies.add_request_headers(headers)["cookie"]
session_token = response.body.match(/"XSRF_TOKEN":"(?<session_token>[^"]+)"/).try &.["session_token"]? || ""
post_req = {
session_token: session_token,
}
response = YT_POOL.client &.post("/comment_service_ajax?action_get_comments=1&ctoken=#{continuation}&continuation=#{continuation}&hl=en&gl=US", headers, form: post_req)
body = JSON.parse(response.body)
body = body["response"]["continuationContents"]["itemSectionContinuation"]? ||
body["response"]["continuationContents"]["backstageCommentsContinuation"]?
if !body
raise InfoException.new("Could not extract continuation.")
end
end
continuation = body["continuations"]?.try &.[0]["nextContinuationData"]["continuation"].as_s
posts = body["contents"].as_a
if message = posts[0]["messageRenderer"]?
error_message = (message["text"]["simpleText"]? ||
message["text"]["runs"]?.try &.[0]?.try &.["text"]?)
.try &.as_s || ""
raise InfoException.new(error_message)
end
response = JSON.build do |json|
json.object do
json.field "authorId", ucid
json.field "comments" do
json.array do
posts.each do |post|
comments = post["backstagePostThreadRenderer"]?.try &.["comments"]? ||
post["backstageCommentsContinuation"]?
post = post["backstagePostThreadRenderer"]?.try &.["post"]["backstagePostRenderer"]? ||
post["commentThreadRenderer"]?.try &.["comment"]["commentRenderer"]?
next if !post
content_html = post["contentText"]?.try { |t| parse_content(t) } || ""
author = post["authorText"]?.try &.["simpleText"]? || ""
json.object do
json.field "author", author
json.field "authorThumbnails" do
json.array do
qualities = {32, 48, 76, 100, 176, 512}
author_thumbnail = post["authorThumbnail"]["thumbnails"].as_a[0]["url"].as_s
qualities.each do |quality|
json.object do
json.field "url", author_thumbnail.gsub(/s\d+-/, "s#{quality}-")
json.field "width", quality
json.field "height", quality
end
end
end
end
if post["authorEndpoint"]?
json.field "authorId", post["authorEndpoint"]["browseEndpoint"]["browseId"]
json.field "authorUrl", post["authorEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"].as_s
else
json.field "authorId", ""
json.field "authorUrl", ""
end
published_text = post["publishedTimeText"]["runs"][0]["text"].as_s
published = decode_date(published_text.rchop(" (edited)"))
if published_text.includes?(" (edited)")
json.field "isEdited", true
else
json.field "isEdited", false
end
like_count = post["actionButtons"]["commentActionButtonsRenderer"]["likeButton"]["toggleButtonRenderer"]["accessibilityData"]["accessibilityData"]["label"]
.try &.as_s.gsub(/\D/, "").to_i? || 0
json.field "content", html_to_content(content_html)
json.field "contentHtml", content_html
json.field "published", published.to_unix
json.field "publishedText", translate(locale, "`x` ago", recode_date(published, locale))
json.field "likeCount", like_count
json.field "commentId", post["postId"]? || post["commentId"]? || ""
json.field "authorIsChannelOwner", post["authorEndpoint"]["browseEndpoint"]["browseId"] == ucid
if attachment = post["backstageAttachment"]?
json.field "attachment" do
json.object do
case attachment.as_h
when .has_key?("videoRenderer")
attachment = attachment["videoRenderer"]
json.field "type", "video"
if !attachment["videoId"]?
error_message = (attachment["title"]["simpleText"]? ||
attachment["title"]["runs"]?.try &.[0]?.try &.["text"]?)
json.field "error", error_message
else
video_id = attachment["videoId"].as_s
video_title = attachment["title"]["simpleText"]? || attachment["title"]["runs"]?.try &.[0]?.try &.["text"]?
json.field "title", video_title
json.field "videoId", video_id
json.field "videoThumbnails" do
generate_thumbnails(json, video_id)
end
json.field "lengthSeconds", decode_length_seconds(attachment["lengthText"]["simpleText"].as_s)
author_info = attachment["ownerText"]["runs"][0].as_h
json.field "author", author_info["text"].as_s
json.field "authorId", author_info["navigationEndpoint"]["browseEndpoint"]["browseId"]
json.field "authorUrl", author_info["navigationEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"]
# TODO: json.field "authorThumbnails", "channelThumbnailSupportedRenderers"
# TODO: json.field "authorVerified", "ownerBadges"
published = decode_date(attachment["publishedTimeText"]["simpleText"].as_s)
json.field "published", published.to_unix
json.field "publishedText", translate(locale, "`x` ago", recode_date(published, locale))
view_count = attachment["viewCountText"]?.try &.["simpleText"].as_s.gsub(/\D/, "").to_i64? || 0_i64
json.field "viewCount", view_count
json.field "viewCountText", translate(locale, "`x` views", number_to_short_text(view_count))
end
when .has_key?("backstageImageRenderer")
attachment = attachment["backstageImageRenderer"]
json.field "type", "image"
json.field "imageThumbnails" do
json.array do
thumbnail = attachment["image"]["thumbnails"][0].as_h
width = thumbnail["width"].as_i
height = thumbnail["height"].as_i
aspect_ratio = (width.to_f / height.to_f)
url = thumbnail["url"].as_s.gsub(/=w\d+-h\d+(-p)?(-nd)?(-df)?(-rwa)?/, "=s640")
qualities = {320, 560, 640, 1280, 2000}
qualities.each do |quality|
json.object do
json.field "url", url.gsub(/=s\d+/, "=s#{quality}")
json.field "width", quality
json.field "height", (quality / aspect_ratio).ceil.to_i
end
end
end
end
# TODO
# when .has_key?("pollRenderer")
# attachment = attachment["pollRenderer"]
# json.field "type", "poll"
else
json.field "type", "unknown"
json.field "error", "Unrecognized attachment type."
end
end
end
end
if comments && (reply_count = (comments["backstageCommentsRenderer"]["moreText"]["simpleText"]? ||
comments["backstageCommentsRenderer"]["moreText"]["runs"]?.try &.[0]?.try &.["text"]?)
.try &.as_s.gsub(/\D/, "").to_i?)
continuation = comments["backstageCommentsRenderer"]["continuations"]?.try &.as_a[0]["nextContinuationData"]["continuation"].as_s
continuation ||= ""
json.field "replies" do
json.object do
json.field "replyCount", reply_count
json.field "continuation", extract_channel_community_cursor(continuation)
end
end
end
end
end
end
end
if body["continuations"]?
continuation = body["continuations"][0]["nextContinuationData"]["continuation"].as_s
json.field "continuation", extract_channel_community_cursor(continuation)
end
end
end
if format == "html"
response = JSON.parse(response)
content_html = template_youtube_comments(response, locale, thin_mode)
response = JSON.build do |json|
json.object do
json.field "contentHtml", content_html
end
end
end
return response
end
def produce_channel_community_continuation(ucid, cursor)
object = {
"80226972:embedded" => {
"2:string" => ucid,
"3:string" => cursor || "",
},
}
continuation = object.try { |i| Protodec::Any.cast_json(object) }
.try { |i| Protodec::Any.from_json(i) }
.try { |i| Base64.urlsafe_encode(i) }
.try { |i| URI.encode_www_form(i) }
return continuation
end
def extract_channel_community_cursor(continuation)
object = URI.decode_www_form(continuation)
.try { |i| Base64.decode(i) }
.try { |i| IO::Memory.new(i) }
.try { |i| Protodec::Any.parse(i) }
.try { |i| i["80226972:0:embedded"]["3:1:base64"].as_h }
if object["53:2:embedded"]?.try &.["3:0:embedded"]?
object["53:2:embedded"]["3:0:embedded"]["2:0:string"] = object["53:2:embedded"]["3:0:embedded"]
.try { |i| i["2:0:base64"].as_h }
.try { |i| Protodec::Any.cast_json(i) }
.try { |i| Protodec::Any.from_json(i) }
.try { |i| Base64.urlsafe_encode(i, padding: false) }
object["53:2:embedded"]["3:0:embedded"].as_h.delete("2:0:base64")
end
cursor = Protodec::Any.cast_json(object)
.try { |i| Protodec::Any.from_json(i) }
.try { |i| Base64.urlsafe_encode(i) }
cursor
end
def get_about_info(ucid, locale)
result = YT_POOL.client &.get("/channel/#{ucid}/about?gl=US&hl=en")
if result.status_code != 200
result = YT_POOL.client &.get("/user/#{ucid}/about?gl=US&hl=en")
end
if md = result.headers["location"]?.try &.match(/\/channel\/(?<ucid>UC[a-zA-Z0-9_-]{22})/)
raise ChannelRedirect.new(channel_id: md["ucid"])
end
if result.status_code != 200
raise InfoException.new("This channel does not exist.")
end
about = XML.parse_html(result.body)
if about.xpath_node(%q(//div[contains(@class, "channel-empty-message")]))
raise InfoException.new("This channel does not exist.")
end
initdata = extract_initial_data(result.body)
if initdata.empty?
error_message = about.xpath_node(%q(//div[@class="yt-alert-content"])).try &.content.strip
error_message ||= translate(locale, "Could not get channel info.")
raise InfoException.new(error_message)
end
if browse_endpoint = initdata["onResponseReceivedActions"]?.try &.[0]?.try &.["navigateAction"]?.try &.["endpoint"]?.try &.["browseEndpoint"]?
raise ChannelRedirect.new(channel_id: browse_endpoint["browseId"].to_s)
end
auto_generated = false
# Check for special auto generated gaming channels
if !initdata.has_key?("metadata")
auto_generated = true
end
if auto_generated
author = initdata["header"]["interactiveTabbedHeaderRenderer"]["title"]["simpleText"].as_s
author_url = initdata["microformat"]["microformatDataRenderer"]["urlCanonical"].as_s
author_thumbnail = initdata["header"]["interactiveTabbedHeaderRenderer"]["boxArt"]["thumbnails"][0]["url"].as_s
# Raises a KeyError on failure.
banners = initdata["header"]["interactiveTabbedHeaderRenderer"]?.try &.["banner"]?.try &.["thumbnails"]?
banner = banners.try &.[-1]?.try &.["url"].as_s?
description = initdata["header"]["interactiveTabbedHeaderRenderer"]["description"]["simpleText"].as_s
description_html = HTML.escape(description).gsub("\n", "<br>")
paid = false
is_family_friendly = initdata["microformat"]["microformatDataRenderer"]["familySafe"].as_bool
allowed_regions = initdata["microformat"]["microformatDataRenderer"]["availableCountries"].as_a.map { |a| a.as_s }
related_channels = [] of AboutRelatedChannel
else
author = initdata["metadata"]["channelMetadataRenderer"]["title"].as_s
author_url = initdata["metadata"]["channelMetadataRenderer"]["channelUrl"].as_s
author_thumbnail = initdata["metadata"]["channelMetadataRenderer"]["avatar"]["thumbnails"][0]["url"].as_s
ucid = initdata["metadata"]["channelMetadataRenderer"]["externalId"].as_s
# Raises a KeyError on failure.
banners = initdata["header"]["c4TabbedHeaderRenderer"]?.try &.["banner"]?.try &.["thumbnails"]?
banner = banners.try &.[-1]?.try &.["url"].as_s?
# if banner.includes? "channels/c4/default_banner"
# banner = nil
# end
description = initdata["metadata"]["channelMetadataRenderer"]?.try &.["description"]?.try &.as_s? || ""
description_html = HTML.escape(description).gsub("\n", "<br>")
paid = about.xpath_node(%q(//meta[@itemprop="paid"])).not_nil!["content"] == "True"
is_family_friendly = about.xpath_node(%q(//meta[@itemprop="isFamilyFriendly"])).not_nil!["content"] == "True"
allowed_regions = about.xpath_node(%q(//meta[@itemprop="regionsAllowed"])).not_nil!["content"].split(",")
related_channels = initdata["contents"]["twoColumnBrowseResultsRenderer"]
.["secondaryContents"]?.try &.["browseSecondaryContentsRenderer"]["contents"][0]?
.try &.["verticalChannelSectionRenderer"]?.try &.["items"]?.try &.as_a.map do |node|
renderer = node["miniChannelRenderer"]?
related_id = renderer.try &.["channelId"]?.try &.as_s?
related_id ||= ""
related_title = renderer.try &.["title"]?.try &.["simpleText"]?.try &.as_s?
related_title ||= ""
related_author_url = renderer.try &.["navigationEndpoint"]?.try &.["commandMetadata"]?.try &.["webCommandMetadata"]?
.try &.["url"]?.try &.as_s?
related_author_url ||= ""
related_author_thumbnails = renderer.try &.["thumbnail"]?.try &.["thumbnails"]?.try &.as_a?
related_author_thumbnails ||= [] of JSON::Any
related_author_thumbnail = ""
if related_author_thumbnails.size > 0
related_author_thumbnail = related_author_thumbnails[-1]["url"]?.try &.as_s?
related_author_thumbnail ||= ""
end
AboutRelatedChannel.new({
ucid: related_id,
author: related_title,
author_url: related_author_url,
author_thumbnail: related_author_thumbnail,
})
end
related_channels ||= [] of AboutRelatedChannel
end
total_views = 0_i64
joined = Time.unix(0)
tabs = [] of String
tabs_json = initdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"]?.try &.as_a?
if !tabs_json.nil?
# Retrieve information from the tabs array. The index we are looking for varies between channels.
tabs_json.each do |node|
# Try to find the about section which is located in only one of the tabs.
channel_about_meta = node["tabRenderer"]?.try &.["content"]?.try &.["sectionListRenderer"]?
.try &.["contents"]?.try &.[0]?.try &.["itemSectionRenderer"]?.try &.["contents"]?
.try &.[0]?.try &.["channelAboutFullMetadataRenderer"]?
if !channel_about_meta.nil?
total_views = channel_about_meta["viewCountText"]?.try &.["simpleText"]?.try &.as_s.gsub(/\D/, "").to_i64? || 0_i64
# The joined text is split to several sub strings. The reduce joins those strings before parsing the date.
joined = channel_about_meta["joinedDateText"]?.try &.["runs"]?.try &.as_a.reduce("") { |acc, node| acc + node["text"].as_s }
.try { |text| Time.parse(text, "Joined %b %-d, %Y", Time::Location.local) } || Time.unix(0)
# Normal Auto-generated channels
# https://support.google.com/youtube/answer/2579942
# For auto-generated channels, channel_about_meta only has ["description"]["simpleText"] and ["primaryLinks"][0]["title"]["simpleText"]
if (channel_about_meta["primaryLinks"]?.try &.size || 0) == 1 && (channel_about_meta["primaryLinks"][0]?) &&
(channel_about_meta["primaryLinks"][0]["title"]?.try &.["simpleText"]?.try &.as_s? || "") == "Auto-generated by YouTube"
auto_generated = true
end
end
end
tabs = tabs_json.reject { |node| node["tabRenderer"]?.nil? }.map { |node| node["tabRenderer"]["title"].as_s.downcase }
end
sub_count = initdata["header"]["c4TabbedHeaderRenderer"]?.try &.["subscriberCountText"]?.try &.["simpleText"]?.try &.as_s?
.try { |text| short_text_to_number(text.split(" ")[0]) } || 0
AboutChannel.new({
ucid: ucid,
author: author,
auto_generated: auto_generated,
author_url: author_url,
author_thumbnail: author_thumbnail,
banner: banner,
description_html: description_html,
paid: paid,
total_views: total_views,
sub_count: sub_count,
joined: joined,
is_family_friendly: is_family_friendly,
allowed_regions: allowed_regions,
related_channels: related_channels,
tabs: tabs,
})
end
def get_channel_videos_response(ucid, page = 1, auto_generated = nil, sort_by = "newest")
continuation = produce_channel_videos_continuation(ucid, page,
auto_generated: auto_generated, sort_by: sort_by, v2: true)
return request_youtube_api_browse(continuation)
end
def get_60_videos(ucid, author, page, auto_generated, sort_by = "newest")
videos = [] of SearchVideo
2.times do |i|
initial_data = get_channel_videos_response(ucid, page * 2 + (i - 1), auto_generated: auto_generated, sort_by: sort_by)
videos.concat extract_videos(initial_data, author, ucid)
end
return videos.size, videos
end
def get_latest_videos(ucid)
initial_data = get_channel_videos_response(ucid)
author = initial_data["metadata"]?.try &.["channelMetadataRenderer"]?.try &.["title"]?.try &.as_s
return extract_videos(initial_data, author, ucid)
end

View File

@ -0,0 +1,192 @@
# TODO: Refactor into either SearchChannel or InvidiousChannel
struct AboutChannel
include DB::Serializable
property ucid : String
property author : String
property auto_generated : Bool
property author_url : String
property author_thumbnail : String
property banner : String?
property description_html : String
property paid : Bool
property total_views : Int64
property sub_count : Int32
property joined : Time
property is_family_friendly : Bool
property allowed_regions : Array(String)
property related_channels : Array(AboutRelatedChannel)
property tabs : Array(String)
end
struct AboutRelatedChannel
include DB::Serializable
property ucid : String
property author : String
property author_url : String
property author_thumbnail : String
end
def get_about_info(ucid, locale)
result = YT_POOL.client &.get("/channel/#{ucid}/about?gl=US&hl=en")
if result.status_code != 200
result = YT_POOL.client &.get("/user/#{ucid}/about?gl=US&hl=en")
end
if md = result.headers["location"]?.try &.match(/\/channel\/(?<ucid>UC[a-zA-Z0-9_-]{22})/)
raise ChannelRedirect.new(channel_id: md["ucid"])
end
if result.status_code != 200
raise InfoException.new("This channel does not exist.")
end
about = XML.parse_html(result.body)
if about.xpath_node(%q(//div[contains(@class, "channel-empty-message")]))
raise InfoException.new("This channel does not exist.")
end
initdata = extract_initial_data(result.body)
if initdata.empty?
error_message = about.xpath_node(%q(//div[@class="yt-alert-content"])).try &.content.strip
error_message ||= translate(locale, "Could not get channel info.")
raise InfoException.new(error_message)
end
if browse_endpoint = initdata["onResponseReceivedActions"]?.try &.[0]?.try &.["navigateAction"]?.try &.["endpoint"]?.try &.["browseEndpoint"]?
raise ChannelRedirect.new(channel_id: browse_endpoint["browseId"].to_s)
end
auto_generated = false
# Check for special auto generated gaming channels
if !initdata.has_key?("metadata")
auto_generated = true
end
if auto_generated
author = initdata["header"]["interactiveTabbedHeaderRenderer"]["title"]["simpleText"].as_s
author_url = initdata["microformat"]["microformatDataRenderer"]["urlCanonical"].as_s
author_thumbnail = initdata["header"]["interactiveTabbedHeaderRenderer"]["boxArt"]["thumbnails"][0]["url"].as_s
# Raises a KeyError on failure.
banners = initdata["header"]["interactiveTabbedHeaderRenderer"]?.try &.["banner"]?.try &.["thumbnails"]?
banner = banners.try &.[-1]?.try &.["url"].as_s?
description = initdata["header"]["interactiveTabbedHeaderRenderer"]["description"]["simpleText"].as_s
description_html = HTML.escape(description).gsub("\n", "<br>")
paid = false
is_family_friendly = initdata["microformat"]["microformatDataRenderer"]["familySafe"].as_bool
allowed_regions = initdata["microformat"]["microformatDataRenderer"]["availableCountries"].as_a.map { |a| a.as_s }
related_channels = [] of AboutRelatedChannel
else
author = initdata["metadata"]["channelMetadataRenderer"]["title"].as_s
author_url = initdata["metadata"]["channelMetadataRenderer"]["channelUrl"].as_s
author_thumbnail = initdata["metadata"]["channelMetadataRenderer"]["avatar"]["thumbnails"][0]["url"].as_s
ucid = initdata["metadata"]["channelMetadataRenderer"]["externalId"].as_s
# Raises a KeyError on failure.
banners = initdata["header"]["c4TabbedHeaderRenderer"]?.try &.["banner"]?.try &.["thumbnails"]?
banner = banners.try &.[-1]?.try &.["url"].as_s?
# if banner.includes? "channels/c4/default_banner"
# banner = nil
# end
description = initdata["metadata"]["channelMetadataRenderer"]?.try &.["description"]?.try &.as_s? || ""
description_html = HTML.escape(description).gsub("\n", "<br>")
paid = about.xpath_node(%q(//meta[@itemprop="paid"])).not_nil!["content"] == "True"
is_family_friendly = about.xpath_node(%q(//meta[@itemprop="isFamilyFriendly"])).not_nil!["content"] == "True"
allowed_regions = about.xpath_node(%q(//meta[@itemprop="regionsAllowed"])).not_nil!["content"].split(",")
related_channels = initdata["contents"]["twoColumnBrowseResultsRenderer"]
.["secondaryContents"]?.try &.["browseSecondaryContentsRenderer"]["contents"][0]?
.try &.["verticalChannelSectionRenderer"]?.try &.["items"]?.try &.as_a.map do |node|
renderer = node["miniChannelRenderer"]?
related_id = renderer.try &.["channelId"]?.try &.as_s?
related_id ||= ""
related_title = renderer.try &.["title"]?.try &.["simpleText"]?.try &.as_s?
related_title ||= ""
related_author_url = renderer.try &.["navigationEndpoint"]?.try &.["commandMetadata"]?.try &.["webCommandMetadata"]?
.try &.["url"]?.try &.as_s?
related_author_url ||= ""
related_author_thumbnails = renderer.try &.["thumbnail"]?.try &.["thumbnails"]?.try &.as_a?
related_author_thumbnails ||= [] of JSON::Any
related_author_thumbnail = ""
if related_author_thumbnails.size > 0
related_author_thumbnail = related_author_thumbnails[-1]["url"]?.try &.as_s?
related_author_thumbnail ||= ""
end
AboutRelatedChannel.new({
ucid: related_id,
author: related_title,
author_url: related_author_url,
author_thumbnail: related_author_thumbnail,
})
end
related_channels ||= [] of AboutRelatedChannel
end
total_views = 0_i64
joined = Time.unix(0)
tabs = [] of String
tabs_json = initdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"]?.try &.as_a?
if !tabs_json.nil?
# Retrieve information from the tabs array. The index we are looking for varies between channels.
tabs_json.each do |node|
# Try to find the about section which is located in only one of the tabs.
channel_about_meta = node["tabRenderer"]?.try &.["content"]?.try &.["sectionListRenderer"]?
.try &.["contents"]?.try &.[0]?.try &.["itemSectionRenderer"]?.try &.["contents"]?
.try &.[0]?.try &.["channelAboutFullMetadataRenderer"]?
if !channel_about_meta.nil?
total_views = channel_about_meta["viewCountText"]?.try &.["simpleText"]?.try &.as_s.gsub(/\D/, "").to_i64? || 0_i64
# The joined text is split to several sub strings. The reduce joins those strings before parsing the date.
joined = channel_about_meta["joinedDateText"]?.try &.["runs"]?.try &.as_a.reduce("") { |acc, node| acc + node["text"].as_s }
.try { |text| Time.parse(text, "Joined %b %-d, %Y", Time::Location.local) } || Time.unix(0)
# Normal Auto-generated channels
# https://support.google.com/youtube/answer/2579942
# For auto-generated channels, channel_about_meta only has ["description"]["simpleText"] and ["primaryLinks"][0]["title"]["simpleText"]
if (channel_about_meta["primaryLinks"]?.try &.size || 0) == 1 && (channel_about_meta["primaryLinks"][0]?) &&
(channel_about_meta["primaryLinks"][0]["title"]?.try &.["simpleText"]?.try &.as_s? || "") == "Auto-generated by YouTube"
auto_generated = true
end
end
end
tabs = tabs_json.reject { |node| node["tabRenderer"]?.nil? }.map { |node| node["tabRenderer"]["title"].as_s.downcase }
end
sub_count = initdata["header"]["c4TabbedHeaderRenderer"]?.try &.["subscriberCountText"]?.try &.["simpleText"]?.try &.as_s?
.try { |text| short_text_to_number(text.split(" ")[0]) } || 0
AboutChannel.new({
ucid: ucid,
author: author,
auto_generated: auto_generated,
author_url: author_url,
author_thumbnail: author_thumbnail,
banner: banner,
description_html: description_html,
paid: paid,
total_views: total_views,
sub_count: sub_count,
joined: joined,
is_family_friendly: is_family_friendly,
allowed_regions: allowed_regions,
related_channels: related_channels,
tabs: tabs,
})
end

View File

@ -0,0 +1,310 @@
struct InvidiousChannel
include DB::Serializable
property id : String
property author : String
property updated : Time
property deleted : Bool
property subscribed : Time?
end
struct ChannelVideo
include DB::Serializable
property id : String
property title : String
property published : Time
property updated : Time
property ucid : String
property author : String
property length_seconds : Int32 = 0
property live_now : Bool = false
property premiere_timestamp : Time? = nil
property views : Int64? = nil
def to_json(locale, json : JSON::Builder)
json.object do
json.field "type", "shortVideo"
json.field "title", self.title
json.field "videoId", self.id
json.field "videoThumbnails" do
generate_thumbnails(json, self.id)
end
json.field "lengthSeconds", self.length_seconds
json.field "author", self.author
json.field "authorId", self.ucid
json.field "authorUrl", "/channel/#{self.ucid}"
json.field "published", self.published.to_unix
json.field "publishedText", translate(locale, "`x` ago", recode_date(self.published, locale))
json.field "viewCount", self.views
end
end
def to_json(locale, json : JSON::Builder | Nil = nil)
if json
to_json(locale, json)
else
JSON.build do |json|
to_json(locale, json)
end
end
end
def to_xml(locale, query_params, xml : XML::Builder)
query_params["v"] = self.id
xml.element("entry") do
xml.element("id") { xml.text "yt:video:#{self.id}" }
xml.element("yt:videoId") { xml.text self.id }
xml.element("yt:channelId") { xml.text self.ucid }
xml.element("title") { xml.text self.title }
xml.element("link", rel: "alternate", href: "#{HOST_URL}/watch?#{query_params}")
xml.element("author") do
xml.element("name") { xml.text self.author }
xml.element("uri") { xml.text "#{HOST_URL}/channel/#{self.ucid}" }
end
xml.element("content", type: "xhtml") do
xml.element("div", xmlns: "http://www.w3.org/1999/xhtml") do
xml.element("a", href: "#{HOST_URL}/watch?#{query_params}") do
xml.element("img", src: "#{HOST_URL}/vi/#{self.id}/mqdefault.jpg")
end
end
end
xml.element("published") { xml.text self.published.to_s("%Y-%m-%dT%H:%M:%S%:z") }
xml.element("updated") { xml.text self.updated.to_s("%Y-%m-%dT%H:%M:%S%:z") }
xml.element("media:group") do
xml.element("media:title") { xml.text self.title }
xml.element("media:thumbnail", url: "#{HOST_URL}/vi/#{self.id}/mqdefault.jpg",
width: "320", height: "180")
end
end
end
def to_xml(locale, xml : XML::Builder | Nil = nil)
if xml
to_xml(locale, xml)
else
XML.build do |xml|
to_xml(locale, xml)
end
end
end
def to_tuple
{% begin %}
{
{{*@type.instance_vars.map { |var| var.name }}}
}
{% end %}
end
end
class ChannelRedirect < Exception
property channel_id : String
def initialize(@channel_id)
end
end
def get_batch_channels(channels, db, refresh = false, pull_all_videos = true, max_threads = 10)
finished_channel = Channel(String | Nil).new
spawn do
active_threads = 0
active_channel = Channel(Nil).new
channels.each do |ucid|
if active_threads >= max_threads
active_channel.receive
active_threads -= 1
end
active_threads += 1
spawn do
begin
get_channel(ucid, db, refresh, pull_all_videos)
finished_channel.send(ucid)
rescue ex
finished_channel.send(nil)
ensure
active_channel.send(nil)
end
end
end
end
final = [] of String
channels.size.times do
if ucid = finished_channel.receive
final << ucid
end
end
return final
end
def get_channel(id, db, refresh = true, pull_all_videos = true)
if channel = db.query_one?("SELECT * FROM channels WHERE id = $1", id, as: InvidiousChannel)
if refresh && Time.utc - channel.updated > 10.minutes
channel = fetch_channel(id, db, pull_all_videos: pull_all_videos)
channel_array = channel.to_a
args = arg_array(channel_array)
db.exec("INSERT INTO channels VALUES (#{args}) \
ON CONFLICT (id) DO UPDATE SET author = $2, updated = $3", args: channel_array)
end
else
channel = fetch_channel(id, db, pull_all_videos: pull_all_videos)
channel_array = channel.to_a
args = arg_array(channel_array)
db.exec("INSERT INTO channels VALUES (#{args})", args: channel_array)
end
return channel
end
def fetch_channel(ucid, db, pull_all_videos = true, locale = nil)
LOGGER.debug("fetch_channel: #{ucid}")
LOGGER.trace("fetch_channel: #{ucid} : pull_all_videos = #{pull_all_videos}, locale = #{locale}")
LOGGER.trace("fetch_channel: #{ucid} : Downloading RSS feed")
rss = YT_POOL.client &.get("/feeds/videos.xml?channel_id=#{ucid}").body
LOGGER.trace("fetch_channel: #{ucid} : Parsing RSS feed")
rss = XML.parse_html(rss)
author = rss.xpath_node(%q(//feed/title))
if !author
raise InfoException.new("Deleted or invalid channel")
end
author = author.content
# Auto-generated channels
# https://support.google.com/youtube/answer/2579942
if author.ends_with?(" - Topic") ||
{"Popular on YouTube", "Music", "Sports", "Gaming"}.includes? author
auto_generated = true
end
LOGGER.trace("fetch_channel: #{ucid} : author = #{author}, auto_generated = #{auto_generated}")
page = 1
LOGGER.trace("fetch_channel: #{ucid} : Downloading channel videos page")
initial_data = get_channel_videos_response(ucid, page, auto_generated: auto_generated)
videos = extract_videos(initial_data, author, ucid)
LOGGER.trace("fetch_channel: #{ucid} : Extracting videos from channel RSS feed")
rss.xpath_nodes("//feed/entry").each do |entry|
video_id = entry.xpath_node("videoid").not_nil!.content
title = entry.xpath_node("title").not_nil!.content
published = Time.parse_rfc3339(entry.xpath_node("published").not_nil!.content)
updated = Time.parse_rfc3339(entry.xpath_node("updated").not_nil!.content)
author = entry.xpath_node("author/name").not_nil!.content
ucid = entry.xpath_node("channelid").not_nil!.content
views = entry.xpath_node("group/community/statistics").try &.["views"]?.try &.to_i64?
views ||= 0_i64
channel_video = videos.select { |video| video.id == video_id }[0]?
length_seconds = channel_video.try &.length_seconds
length_seconds ||= 0
live_now = channel_video.try &.live_now
live_now ||= false
premiere_timestamp = channel_video.try &.premiere_timestamp
video = ChannelVideo.new({
id: video_id,
title: title,
published: published,
updated: Time.utc,
ucid: ucid,
author: author,
length_seconds: length_seconds,
live_now: live_now,
premiere_timestamp: premiere_timestamp,
views: views,
})
LOGGER.trace("fetch_channel: #{ucid} : video #{video_id} : Updating or inserting video")
# We don't include the 'premiere_timestamp' here because channel pages don't include them,
# meaning the above timestamp is always null
was_insert = db.query_one("INSERT INTO channel_videos VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) \
ON CONFLICT (id) DO UPDATE SET title = $2, published = $3, \
updated = $4, ucid = $5, author = $6, length_seconds = $7, \
live_now = $8, views = $10 returning (xmax=0) as was_insert", *video.to_tuple, as: Bool)
if was_insert
LOGGER.trace("fetch_channel: #{ucid} : video #{video_id} : Inserted, updating subscriptions")
db.exec("UPDATE users SET notifications = array_append(notifications, $1), \
feed_needs_update = true WHERE $2 = ANY(subscriptions)", video.id, video.ucid)
else
LOGGER.trace("fetch_channel: #{ucid} : video #{video_id} : Updated")
end
end
if pull_all_videos
page += 1
ids = [] of String
loop do
initial_data = get_channel_videos_response(ucid, page, auto_generated: auto_generated)
videos = extract_videos(initial_data, author, ucid)
count = videos.size
videos = videos.map { |video| ChannelVideo.new({
id: video.id,
title: video.title,
published: video.published,
updated: Time.utc,
ucid: video.ucid,
author: video.author,
length_seconds: video.length_seconds,
live_now: video.live_now,
premiere_timestamp: video.premiere_timestamp,
views: video.views,
}) }
videos.each do |video|
ids << video.id
# We are notified of Red videos elsewhere (PubSub), which includes a correct published date,
# so since they don't provide a published date here we can safely ignore them.
if Time.utc - video.published > 1.minute
was_insert = db.query_one("INSERT INTO channel_videos VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) \
ON CONFLICT (id) DO UPDATE SET title = $2, published = $3, \
updated = $4, ucid = $5, author = $6, length_seconds = $7, \
live_now = $8, views = $10 returning (xmax=0) as was_insert", *video.to_tuple, as: Bool)
db.exec("UPDATE users SET notifications = array_append(notifications, $1), \
feed_needs_update = true WHERE $2 = ANY(subscriptions)", video.id, video.ucid) if was_insert
end
end
break if count < 25
page += 1
end
end
channel = InvidiousChannel.new({
id: ucid,
author: author,
updated: Time.utc,
deleted: false,
subscribed: nil,
})
return channel
end

View File

@ -0,0 +1,275 @@
# TODO: Add "sort_by"
def fetch_channel_community(ucid, continuation, locale, format, thin_mode)
response = YT_POOL.client &.get("/channel/#{ucid}/community?gl=US&hl=en")
if response.status_code != 200
response = YT_POOL.client &.get("/user/#{ucid}/community?gl=US&hl=en")
end
if response.status_code != 200
raise InfoException.new("This channel does not exist.")
end
ucid = response.body.match(/https:\/\/www.youtube.com\/channel\/(?<ucid>UC[a-zA-Z0-9_-]{22})/).not_nil!["ucid"]
if !continuation || continuation.empty?
initial_data = extract_initial_data(response.body)
body = initial_data["contents"]?.try &.["twoColumnBrowseResultsRenderer"]["tabs"].as_a.select { |tab| tab["tabRenderer"]?.try &.["selected"].as_bool.== true }[0]?
if !body
raise InfoException.new("Could not extract community tab.")
end
body = body["tabRenderer"]["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]
else
continuation = produce_channel_community_continuation(ucid, continuation)
headers = HTTP::Headers.new
headers["cookie"] = response.cookies.add_request_headers(headers)["cookie"]
session_token = response.body.match(/"XSRF_TOKEN":"(?<session_token>[^"]+)"/).try &.["session_token"]? || ""
post_req = {
session_token: session_token,
}
response = YT_POOL.client &.post("/comment_service_ajax?action_get_comments=1&ctoken=#{continuation}&continuation=#{continuation}&hl=en&gl=US", headers, form: post_req)
body = JSON.parse(response.body)
body = body["response"]["continuationContents"]["itemSectionContinuation"]? ||
body["response"]["continuationContents"]["backstageCommentsContinuation"]?
if !body
raise InfoException.new("Could not extract continuation.")
end
end
continuation = body["continuations"]?.try &.[0]["nextContinuationData"]["continuation"].as_s
posts = body["contents"].as_a
if message = posts[0]["messageRenderer"]?
error_message = (message["text"]["simpleText"]? ||
message["text"]["runs"]?.try &.[0]?.try &.["text"]?)
.try &.as_s || ""
raise InfoException.new(error_message)
end
response = JSON.build do |json|
json.object do
json.field "authorId", ucid
json.field "comments" do
json.array do
posts.each do |post|
comments = post["backstagePostThreadRenderer"]?.try &.["comments"]? ||
post["backstageCommentsContinuation"]?
post = post["backstagePostThreadRenderer"]?.try &.["post"]["backstagePostRenderer"]? ||
post["commentThreadRenderer"]?.try &.["comment"]["commentRenderer"]?
next if !post
content_html = post["contentText"]?.try { |t| parse_content(t) } || ""
author = post["authorText"]?.try &.["simpleText"]? || ""
json.object do
json.field "author", author
json.field "authorThumbnails" do
json.array do
qualities = {32, 48, 76, 100, 176, 512}
author_thumbnail = post["authorThumbnail"]["thumbnails"].as_a[0]["url"].as_s
qualities.each do |quality|
json.object do
json.field "url", author_thumbnail.gsub(/s\d+-/, "s#{quality}-")
json.field "width", quality
json.field "height", quality
end
end
end
end
if post["authorEndpoint"]?
json.field "authorId", post["authorEndpoint"]["browseEndpoint"]["browseId"]
json.field "authorUrl", post["authorEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"].as_s
else
json.field "authorId", ""
json.field "authorUrl", ""
end
published_text = post["publishedTimeText"]["runs"][0]["text"].as_s
published = decode_date(published_text.rchop(" (edited)"))
if published_text.includes?(" (edited)")
json.field "isEdited", true
else
json.field "isEdited", false
end
like_count = post["actionButtons"]["commentActionButtonsRenderer"]["likeButton"]["toggleButtonRenderer"]["accessibilityData"]["accessibilityData"]["label"]
.try &.as_s.gsub(/\D/, "").to_i? || 0
json.field "content", html_to_content(content_html)
json.field "contentHtml", content_html
json.field "published", published.to_unix
json.field "publishedText", translate(locale, "`x` ago", recode_date(published, locale))
json.field "likeCount", like_count
json.field "commentId", post["postId"]? || post["commentId"]? || ""
json.field "authorIsChannelOwner", post["authorEndpoint"]["browseEndpoint"]["browseId"] == ucid
if attachment = post["backstageAttachment"]?
json.field "attachment" do
json.object do
case attachment.as_h
when .has_key?("videoRenderer")
attachment = attachment["videoRenderer"]
json.field "type", "video"
if !attachment["videoId"]?
error_message = (attachment["title"]["simpleText"]? ||
attachment["title"]["runs"]?.try &.[0]?.try &.["text"]?)
json.field "error", error_message
else
video_id = attachment["videoId"].as_s
video_title = attachment["title"]["simpleText"]? || attachment["title"]["runs"]?.try &.[0]?.try &.["text"]?
json.field "title", video_title
json.field "videoId", video_id
json.field "videoThumbnails" do
generate_thumbnails(json, video_id)
end
json.field "lengthSeconds", decode_length_seconds(attachment["lengthText"]["simpleText"].as_s)
author_info = attachment["ownerText"]["runs"][0].as_h
json.field "author", author_info["text"].as_s
json.field "authorId", author_info["navigationEndpoint"]["browseEndpoint"]["browseId"]
json.field "authorUrl", author_info["navigationEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"]
# TODO: json.field "authorThumbnails", "channelThumbnailSupportedRenderers"
# TODO: json.field "authorVerified", "ownerBadges"
published = decode_date(attachment["publishedTimeText"]["simpleText"].as_s)
json.field "published", published.to_unix
json.field "publishedText", translate(locale, "`x` ago", recode_date(published, locale))
view_count = attachment["viewCountText"]?.try &.["simpleText"].as_s.gsub(/\D/, "").to_i64? || 0_i64
json.field "viewCount", view_count
json.field "viewCountText", translate(locale, "`x` views", number_to_short_text(view_count))
end
when .has_key?("backstageImageRenderer")
attachment = attachment["backstageImageRenderer"]
json.field "type", "image"
json.field "imageThumbnails" do
json.array do
thumbnail = attachment["image"]["thumbnails"][0].as_h
width = thumbnail["width"].as_i
height = thumbnail["height"].as_i
aspect_ratio = (width.to_f / height.to_f)
url = thumbnail["url"].as_s.gsub(/=w\d+-h\d+(-p)?(-nd)?(-df)?(-rwa)?/, "=s640")
qualities = {320, 560, 640, 1280, 2000}
qualities.each do |quality|
json.object do
json.field "url", url.gsub(/=s\d+/, "=s#{quality}")
json.field "width", quality
json.field "height", (quality / aspect_ratio).ceil.to_i
end
end
end
end
# TODO
# when .has_key?("pollRenderer")
# attachment = attachment["pollRenderer"]
# json.field "type", "poll"
else
json.field "type", "unknown"
json.field "error", "Unrecognized attachment type."
end
end
end
end
if comments && (reply_count = (comments["backstageCommentsRenderer"]["moreText"]["simpleText"]? ||
comments["backstageCommentsRenderer"]["moreText"]["runs"]?.try &.[0]?.try &.["text"]?)
.try &.as_s.gsub(/\D/, "").to_i?)
continuation = comments["backstageCommentsRenderer"]["continuations"]?.try &.as_a[0]["nextContinuationData"]["continuation"].as_s
continuation ||= ""
json.field "replies" do
json.object do
json.field "replyCount", reply_count
json.field "continuation", extract_channel_community_cursor(continuation)
end
end
end
end
end
end
end
if body["continuations"]?
continuation = body["continuations"][0]["nextContinuationData"]["continuation"].as_s
json.field "continuation", extract_channel_community_cursor(continuation)
end
end
end
if format == "html"
response = JSON.parse(response)
content_html = template_youtube_comments(response, locale, thin_mode)
response = JSON.build do |json|
json.object do
json.field "contentHtml", content_html
end
end
end
return response
end
def produce_channel_community_continuation(ucid, cursor)
object = {
"80226972:embedded" => {
"2:string" => ucid,
"3:string" => cursor || "",
},
}
continuation = object.try { |i| Protodec::Any.cast_json(object) }
.try { |i| Protodec::Any.from_json(i) }
.try { |i| Base64.urlsafe_encode(i) }
.try { |i| URI.encode_www_form(i) }
return continuation
end
def extract_channel_community_cursor(continuation)
object = URI.decode_www_form(continuation)
.try { |i| Base64.decode(i) }
.try { |i| IO::Memory.new(i) }
.try { |i| Protodec::Any.parse(i) }
.try { |i| i["80226972:0:embedded"]["3:1:base64"].as_h }
if object["53:2:embedded"]?.try &.["3:0:embedded"]?
object["53:2:embedded"]["3:0:embedded"]["2:0:string"] = object["53:2:embedded"]["3:0:embedded"]
.try { |i| i["2:0:base64"].as_h }
.try { |i| Protodec::Any.cast_json(i) }
.try { |i| Protodec::Any.from_json(i) }
.try { |i| Base64.urlsafe_encode(i, padding: false) }
object["53:2:embedded"]["3:0:embedded"].as_h.delete("2:0:base64")
end
cursor = Protodec::Any.cast_json(object)
.try { |i| Protodec::Any.from_json(i) }
.try { |i| Base64.urlsafe_encode(i) }
cursor
end

View File

@ -0,0 +1,93 @@
def fetch_channel_playlists(ucid, author, continuation, sort_by)
if continuation
response_json = request_youtube_api_browse(continuation)
continuationItems = response_json["onResponseReceivedActions"]?
.try &.[0]["appendContinuationItemsAction"]["continuationItems"]
return [] of SearchItem, nil if !continuationItems
items = [] of SearchItem
continuationItems.as_a.select(&.as_h.has_key?("gridPlaylistRenderer")).each { |item|
extract_item(item, author, ucid).try { |t| items << t }
}
continuation = continuationItems.as_a.last["continuationItemRenderer"]?
.try &.["continuationEndpoint"]["continuationCommand"]["token"].as_s
else
url = "/channel/#{ucid}/playlists?flow=list&view=1"
case sort_by
when "last", "last_added"
#
when "oldest", "oldest_created"
url += "&sort=da"
when "newest", "newest_created"
url += "&sort=dd"
else nil # Ignore
end
response = YT_POOL.client &.get(url)
initial_data = extract_initial_data(response.body)
return [] of SearchItem, nil if !initial_data
items = extract_items(initial_data, author, ucid)
continuation = response.body.match(/"token":"(?<continuation>[^"]+)"/).try &.["continuation"]?
end
return items, continuation
end
# ## NOTE: DEPRECATED
# Reason -> Unstable
# The Protobuf object must be provided with an id of the last playlist from the current "page"
# in order to fetch the next one accurately
# (if the id isn't included, entries shift around erratically between pages,
# leading to repetitions and skip overs)
#
# Since it's impossible to produce the appropriate Protobuf without an id being provided by the user,
# it's better to stick to continuation tokens provided by the first request and onward
def produce_channel_playlists_url(ucid, cursor, sort = "newest", auto_generated = false)
object = {
"80226972:embedded" => {
"2:string" => ucid,
"3:base64" => {
"2:string" => "playlists",
"6:varint" => 2_i64,
"7:varint" => 1_i64,
"12:varint" => 1_i64,
"13:string" => "",
"23:varint" => 0_i64,
},
},
}
if cursor
cursor = Base64.urlsafe_encode(cursor, false) if !auto_generated
object["80226972:embedded"]["3:base64"].as(Hash)["15:string"] = cursor
end
if auto_generated
object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0x32_i64
else
object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 1_i64
case sort
when "oldest", "oldest_created"
object["80226972:embedded"]["3:base64"].as(Hash)["3:varint"] = 2_i64
when "newest", "newest_created"
object["80226972:embedded"]["3:base64"].as(Hash)["3:varint"] = 3_i64
when "last", "last_added"
object["80226972:embedded"]["3:base64"].as(Hash)["3:varint"] = 4_i64
else nil # Ignore
end
end
object["80226972:embedded"]["3:string"] = Base64.urlsafe_encode(Protodec::Any.from_json(Protodec::Any.cast_json(object["80226972:embedded"]["3:base64"])))
object["80226972:embedded"].delete("3:base64")
continuation = object.try { |i| Protodec::Any.cast_json(object) }
.try { |i| Protodec::Any.from_json(i) }
.try { |i| Base64.urlsafe_encode(i) }
.try { |i| URI.encode_www_form(i) }
return "/browse_ajax?continuation=#{continuation}&gl=US&hl=en"
end

View File

@ -0,0 +1,89 @@
def produce_channel_videos_continuation(ucid, page = 1, auto_generated = nil, sort_by = "newest", v2 = false)
object = {
"80226972:embedded" => {
"2:string" => ucid,
"3:base64" => {
"2:string" => "videos",
"6:varint" => 2_i64,
"7:varint" => 1_i64,
"12:varint" => 1_i64,
"13:string" => "",
"23:varint" => 0_i64,
},
},
}
if !v2
if auto_generated
seed = Time.unix(1525757349)
until seed >= Time.utc
seed += 1.month
end
timestamp = seed - (page - 1).months
object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0x36_i64
object["80226972:embedded"]["3:base64"].as(Hash)["15:string"] = "#{timestamp.to_unix}"
else
object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0_i64
object["80226972:embedded"]["3:base64"].as(Hash)["15:string"] = "#{page}"
end
else
object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0_i64
object["80226972:embedded"]["3:base64"].as(Hash)["61:string"] = Base64.urlsafe_encode(Protodec::Any.from_json(Protodec::Any.cast_json({
"1:string" => Base64.urlsafe_encode(Protodec::Any.from_json(Protodec::Any.cast_json({
"1:varint" => 30_i64 * (page - 1),
}))),
})))
end
case sort_by
when "newest"
when "popular"
object["80226972:embedded"]["3:base64"].as(Hash)["3:varint"] = 0x01_i64
when "oldest"
object["80226972:embedded"]["3:base64"].as(Hash)["3:varint"] = 0x02_i64
else nil # Ignore
end
object["80226972:embedded"]["3:string"] = Base64.urlsafe_encode(Protodec::Any.from_json(Protodec::Any.cast_json(object["80226972:embedded"]["3:base64"])))
object["80226972:embedded"].delete("3:base64")
continuation = object.try { |i| Protodec::Any.cast_json(object) }
.try { |i| Protodec::Any.from_json(i) }
.try { |i| Base64.urlsafe_encode(i) }
.try { |i| URI.encode_www_form(i) }
return continuation
end
def get_channel_videos_response(ucid, page = 1, auto_generated = nil, sort_by = "newest")
continuation = produce_channel_videos_continuation(ucid, page,
auto_generated: auto_generated, sort_by: sort_by, v2: true)
return request_youtube_api_browse(continuation)
end
def get_60_videos(ucid, author, page, auto_generated, sort_by = "newest")
videos = [] of SearchVideo
2.times do |i|
initial_data = get_channel_videos_response(ucid, page * 2 + (i - 1), auto_generated: auto_generated, sort_by: sort_by)
videos.concat extract_videos(initial_data, author, ucid)
end
return videos.size, videos
end
def get_latest_videos(ucid)
initial_data = get_channel_videos_response(ucid)
author = initial_data["metadata"]?.try &.["channelMetadataRenderer"]?.try &.["title"]?.try &.as_s
return extract_videos(initial_data, author, ucid)
end
# Used in bypass_captcha_job.cr
def produce_channel_videos_url(ucid, page = 1, auto_generated = nil, sort_by = "newest", v2 = false)
continuation = produce_channel_videos_continuation(ucid, page, auto_generated, sort_by, v2)
return "/browse_ajax?continuation=#{continuation}&gl=US&hl=en"
end