Compare commits

..

No commits in common. "d956b1826e15da6cfcd9a1531b0f1e6ef577dd10" and "69e2eaccc017cbf0caed2f79fa789cdb0b2a2cdf" have entirely different histories.

2 changed files with 52 additions and 56 deletions

View File

@ -1,31 +1,49 @@
private IMAGE_QUALITIES = {320, 560, 640, 1280, 2000} private IMAGE_QUALITIES = {320, 560, 640, 1280, 2000}
# TODO: Add "sort_by" # TODO: Add "sort_by"
def fetch_channel_community(ucid, cursor, locale, format, thin_mode) def fetch_channel_community(ucid, continuation, locale, format, thin_mode)
if cursor.nil? response = YT_POOL.client &.get("/channel/#{ucid}/community?gl=US&hl=en")
# Egljb21tdW5pdHk%3D is the protobuf object to load "community" if response.status_code != 200
initial_data = YoutubeAPI.browse(ucid, params: "Egljb21tdW5pdHk%3D") response = YT_POOL.client &.get("/user/#{ucid}/community?gl=US&hl=en")
end
items = [] of JSON::Any if response.status_code != 200
extract_items(initial_data) do |item| raise NotFoundException.new("This channel does not exist.")
items << item end
ucid = response.body.match(/https:\/\/www.youtube.com\/channel\/(?<ucid>UC[a-zA-Z0-9_-]{22})/).not_nil!["ucid"]
if !continuation || continuation.empty?
initial_data = extract_initial_data(response.body)
body = extract_selected_tab(initial_data["contents"]["twoColumnBrowseResultsRenderer"]["tabs"])["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]
if !body
raise InfoException.new("Could not extract community tab.")
end end
else else
continuation = produce_channel_community_continuation(ucid, cursor) continuation = produce_channel_community_continuation(ucid, continuation)
initial_data = YoutubeAPI.browse(continuation: continuation)
container = initial_data.dig?("continuationContents", "itemSectionContinuation", "contents") headers = HTTP::Headers.new
headers["cookie"] = response.cookies.add_request_headers(headers)["cookie"]
raise InfoException.new("Can't extract community data") if container.nil? session_token = response.body.match(/"XSRF_TOKEN":"(?<session_token>[^"]+)"/).try &.["session_token"]? || ""
post_req = {
session_token: session_token,
}
items = container.as_a body = YoutubeAPI.browse(continuation)
body = body.dig?("continuationContents", "itemSectionContinuation") ||
body.dig?("continuationContents", "backstageCommentsContinuation")
if !body
raise InfoException.new("Could not extract continuation.")
end
end end
return extract_channel_community(items, ucid: ucid, locale: locale, format: format, thin_mode: thin_mode) posts = body["contents"].as_a
end
def extract_channel_community(items, *, ucid, locale, format, thin_mode) if message = posts[0]["messageRenderer"]?
if message = items[0]["messageRenderer"]?
error_message = (message["text"]["simpleText"]? || error_message = (message["text"]["simpleText"]? ||
message["text"]["runs"]?.try &.[0]?.try &.["text"]?) message["text"]["runs"]?.try &.[0]?.try &.["text"]?)
.try &.as_s || "" .try &.as_s || ""
@ -41,7 +59,7 @@ def extract_channel_community(items, *, ucid, locale, format, thin_mode)
json.field "authorId", ucid json.field "authorId", ucid
json.field "comments" do json.field "comments" do
json.array do json.array do
items.each do |post| posts.each do |post|
comments = post["backstagePostThreadRenderer"]?.try &.["comments"]? || comments = post["backstagePostThreadRenderer"]?.try &.["comments"]? ||
post["backstageCommentsContinuation"]? post["backstageCommentsContinuation"]?
@ -198,22 +216,6 @@ def extract_channel_community(items, *, ucid, locale, format, thin_mode)
parse_item(attachment) parse_item(attachment)
.as(SearchPlaylist) .as(SearchPlaylist)
.to_json(locale, json) .to_json(locale, json)
when .has_key?("quizRenderer")
json.object do
attachment = attachment["quizRenderer"]
json.field "type", "quiz"
json.field "totalVotes", short_text_to_number(attachment["totalVotes"]["simpleText"].as_s.split(" ")[0])
json.field "choices" do
json.array do
attachment["choices"].as_a.each do |choice|
json.object do
json.field "text", choice.dig("text", "runs", 0, "text").as_s
json.field "isCorrect", choice["isCorrect"].as_bool
end
end
end
end
end
else else
json.object do json.object do
json.field "type", "unknown" json.field "type", "unknown"
@ -240,7 +242,7 @@ def extract_channel_community(items, *, ucid, locale, format, thin_mode)
end end
end end
end end
if cont = items.dig?(-1, "continuationItemRenderer", "continuationEndpoint", "continuationCommand", "token") if cont = posts.dig?(-1, "continuationItemRenderer", "continuationEndpoint", "continuationCommand", "token")
json.field "continuation", extract_channel_community_cursor(cont.as_s) json.field "continuation", extract_channel_community_cursor(cont.as_s)
end end
end end

View File

@ -608,25 +608,19 @@ private module Extractors
private def self.unpack_section_list(contents) private def self.unpack_section_list(contents)
raw_items = [] of JSON::Any raw_items = [] of JSON::Any
contents.as_a.each do |item| contents.as_a.each do |renderer_container|
if item_section_content = item.dig?("itemSectionRenderer", "contents") renderer_container_contents = renderer_container["itemSectionRenderer"]["contents"][0]
raw_items += self.unpack_item_section(item_section_content)
else
raw_items << item
end
end
return raw_items
end
private def self.unpack_item_section(contents)
raw_items = [] of JSON::Any
contents.as_a.each do |item|
# Category extraction # Category extraction
if container = item.dig?("gridRenderer", "items") || item.dig?("items") if items_container = renderer_container_contents["shelfRenderer"]?
raw_items += container.as_a raw_items << renderer_container_contents
next
elsif items_container = renderer_container_contents["gridRenderer"]?
else else
items_container = renderer_container_contents
end
items_container["items"]?.try &.as_a.each do |item|
raw_items << item raw_items << item
end end
end end