From d1409d0f08c51f1a6e9fcad690f199f247ff005a Mon Sep 17 00:00:00 2001 From: Chunky programmer <78101139+ChunkyProgrammer@users.noreply.github.com> Date: Tue, 6 Jun 2023 19:31:55 -0400 Subject: [PATCH 1/5] Parse hashtag header when getting the first hashtag page --- src/invidious/hashtag.cr | 142 +++++++++++++++++++++----- src/invidious/routes/api/v1/search.cr | 12 +-- src/invidious/routes/search.cr | 3 +- 3 files changed, 123 insertions(+), 34 deletions(-) diff --git a/src/invidious/hashtag.cr b/src/invidious/hashtag.cr index d9d584c9..15faba7c 100644 --- a/src/invidious/hashtag.cr +++ b/src/invidious/hashtag.cr @@ -1,42 +1,138 @@ module Invidious::Hashtag extend self - def fetch(hashtag : String, page : Int, region : String? = nil) : Array(SearchItem) + struct HashtagPage + include DB::Serializable + + property videos : Array(SearchItem) | Array(Video) + property header : HashtagHeader? + property has_next_continuation : Bool + + def to_json(locale : String?, json : JSON::Builder) + json.object do + json.field "type", "hashtag" + if self.header != nil + json.field "header" do + self.header.to_json(json) + end + end + json.field "results" do + json.array do + self.videos.each do |item| + item.to_json(locale, json) + end + end + end + json.field "hasNextPage", self.has_next_continuation + end + end + + # TODO: remove the locale and follow the crystal convention + def to_json(locale : String?, _json : Nil) + JSON.build do |json| + to_json(locale, json) + end + end + + def to_json(json : JSON::Builder) + to_json(nil, json) + end + end + + struct HashtagHeader + include DB::Serializable + + property tag : String + property channel_count : Int64 + property video_count : Int64 + + def to_json(json : JSON::Builder) + json.object do + json.field "hashtag", self.tag + json.field "channelCount", self.channel_count + json.field "videoCount", self.video_count + end + end + + def to_json(_json : Nil) + JSON.build do |json| + to_json(json) + end + end + end + + def fetch(hashtag : String, page : Int, region : String? = nil) : HashtagPage cursor = (page - 1) * 60 - ctoken = generate_continuation(hashtag, cursor) - + header = nil client_config = YoutubeAPI::ClientConfig.new(region: region) - response = YoutubeAPI.browse(continuation: ctoken, client_config: client_config) + item = generate_continuation(hashtag, cursor) + # item is a ctoken + if cursor > 0 + response = YoutubeAPI.browse(continuation: item, client_config: client_config) + else + # item browses the first page (including metadata) + response = YoutubeAPI.browse("FEhashtag", params: item, client_config: client_config) + if item_contents = response.dig?("header", "hashtagHeaderRenderer") + header = parse_hashtag_renderer(item_contents) + end + end - items, _ = extract_items(response) - return items + items, next_continuation = extract_items(response) + return HashtagPage.new({ + videos: items, + header: header, + has_next_continuation: next_continuation != nil, + }) end def generate_continuation(hashtag : String, cursor : Int) object = { - "80226972:embedded" => { - "2:string" => "FEhashtag", - "3:base64" => { - "1:varint" => 60_i64, # result count - "15:base64" => { - "1:varint" => cursor.to_i64, - "2:varint" => 0_i64, - }, - "93:2:embedded" => { - "1:string" => hashtag, - "2:varint" => 0_i64, - "3:varint" => 1_i64, - }, - }, - "35:string" => "browse-feedFEhashtag", + "93:2:embedded" => { + "1:string" => hashtag, + "2:varint" => 0_i64, + "3:varint" => 1_i64, }, } + if cursor > 0 + object = { + "80226972:embedded" => { + "2:string" => "FEhashtag", + "3:base64" => { + "1:varint" => 60_i64, # result count + "15:base64" => { + "1:varint" => cursor.to_i64, + "2:varint" => 0_i64, + }, + "93:2:embedded" => { + "1:string" => hashtag, + "2:varint" => 0_i64, + "3:varint" => 1_i64, + }, + }, + "35:string" => "browse-feedFEhashtag", + }, + } + end - continuation = object.try { |i| Protodec::Any.cast_json(i) } + return object.try { |i| Protodec::Any.cast_json(i) } .try { |i| Protodec::Any.from_json(i) } .try { |i| Base64.urlsafe_encode(i) } .try { |i| URI.encode_www_form(i) } + end - return continuation + def parse_hashtag_renderer(item_contents) + info = extract_text(item_contents.dig?("hashtagInfoText")) || "" + + regex_match = /(?\d+\S)\D+(?\d+\S)/.match(info) + + hashtag = extract_text(item_contents.dig?("hashtag")) || "" + videos = short_text_to_number(regex_match.try &.["videos"]?.try &.to_s || "0") + channels = short_text_to_number(regex_match.try &.["channels"]?.try &.to_s || "0") + + return HashtagHeader.new({ + tag: hashtag, + channel_count: channels, + video_count: videos, + }) end end diff --git a/src/invidious/routes/api/v1/search.cr b/src/invidious/routes/api/v1/search.cr index 2922b060..1777060a 100644 --- a/src/invidious/routes/api/v1/search.cr +++ b/src/invidious/routes/api/v1/search.cr @@ -69,21 +69,13 @@ module Invidious::Routes::API::V1::Search env.response.content_type = "application/json" begin - results = Invidious::Hashtag.fetch(hashtag, page, region) + hashtagPage = Invidious::Hashtag.fetch(hashtag, page, region) rescue ex return error_json(400, ex) end JSON.build do |json| - json.object do - json.field "results" do - json.array do - results.each do |item| - item.to_json(locale, json) - end - end - end - end + hashtagPage.to_json(locale, json) end end end diff --git a/src/invidious/routes/search.cr b/src/invidious/routes/search.cr index 44970922..0a6665b4 100644 --- a/src/invidious/routes/search.cr +++ b/src/invidious/routes/search.cr @@ -101,7 +101,8 @@ module Invidious::Routes::Search end begin - items = Invidious::Hashtag.fetch(hashtag, page) + hashtagPage = Invidious::Hashtag.fetch(hashtag, page) + items = hashtagPage.videos rescue ex return error_template(500, ex) end From dc94218dbdeaef0aff02fae6c3afd4272f1987bb Mon Sep 17 00:00:00 2001 From: ChunkyProgrammer <78101139+ChunkyProgrammer@users.noreply.github.com> Date: Sun, 16 Jul 2023 13:31:31 -0700 Subject: [PATCH 2/5] Hashtag: Show next page if a continuation exists --- src/invidious/routes/search.cr | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/invidious/routes/search.cr b/src/invidious/routes/search.cr index 0a6665b4..aa06c191 100644 --- a/src/invidious/routes/search.cr +++ b/src/invidious/routes/search.cr @@ -112,7 +112,7 @@ module Invidious::Routes::Search page_nav_html = Frontend::Pagination.nav_numeric(locale, base_url: "/hashtag/#{hashtag_encoded}", current_page: page, - show_next: (items.size >= 60) + show_next: hashtagPage.has_next_continuation ) templated "hashtag" From e6e485c6918bd0e4cddeb212ff2095734b9379d6 Mon Sep 17 00:00:00 2001 From: ChunkyProgrammer <78101139+ChunkyProgrammer@users.noreply.github.com> Date: Thu, 7 Sep 2023 00:54:21 -0400 Subject: [PATCH 3/5] Use `SearchHashtag` for parsing the header of hashtag pages --- src/invidious/hashtag.cr | 59 +++----------------------- src/invidious/yt_backend/extractors.cr | 14 ++++-- 2 files changed, 16 insertions(+), 57 deletions(-) diff --git a/src/invidious/hashtag.cr b/src/invidious/hashtag.cr index 15faba7c..a0f7f718 100644 --- a/src/invidious/hashtag.cr +++ b/src/invidious/hashtag.cr @@ -5,15 +5,15 @@ module Invidious::Hashtag include DB::Serializable property videos : Array(SearchItem) | Array(Video) - property header : HashtagHeader? + property header : SearchHashtag? property has_next_continuation : Bool def to_json(locale : String?, json : JSON::Builder) json.object do - json.field "type", "hashtag" + json.field "type", "hashtagPage" if self.header != nil json.field "header" do - self.header.to_json(json) + self.header.try &.as(SearchHashtag).to_json(locale, json) end end json.field "results" do @@ -26,39 +26,6 @@ module Invidious::Hashtag json.field "hasNextPage", self.has_next_continuation end end - - # TODO: remove the locale and follow the crystal convention - def to_json(locale : String?, _json : Nil) - JSON.build do |json| - to_json(locale, json) - end - end - - def to_json(json : JSON::Builder) - to_json(nil, json) - end - end - - struct HashtagHeader - include DB::Serializable - - property tag : String - property channel_count : Int64 - property video_count : Int64 - - def to_json(json : JSON::Builder) - json.object do - json.field "hashtag", self.tag - json.field "channelCount", self.channel_count - json.field "videoCount", self.video_count - end - end - - def to_json(_json : Nil) - JSON.build do |json| - to_json(json) - end - end end def fetch(hashtag : String, page : Int, region : String? = nil) : HashtagPage @@ -72,8 +39,8 @@ module Invidious::Hashtag else # item browses the first page (including metadata) response = YoutubeAPI.browse("FEhashtag", params: item, client_config: client_config) - if item_contents = response.dig?("header", "hashtagHeaderRenderer") - header = parse_hashtag_renderer(item_contents) + if item_contents = response.dig?("header") + header = parse_item(item_contents).try &.as(SearchHashtag) end end @@ -119,20 +86,4 @@ module Invidious::Hashtag .try { |i| Base64.urlsafe_encode(i) } .try { |i| URI.encode_www_form(i) } end - - def parse_hashtag_renderer(item_contents) - info = extract_text(item_contents.dig?("hashtagInfoText")) || "" - - regex_match = /(?\d+\S)\D+(?\d+\S)/.match(info) - - hashtag = extract_text(item_contents.dig?("hashtag")) || "" - videos = short_text_to_number(regex_match.try &.["videos"]?.try &.to_s || "0") - channels = short_text_to_number(regex_match.try &.["channels"]?.try &.to_s || "0") - - return HashtagHeader.new({ - tag: hashtag, - channel_count: channels, - video_count: videos, - }) - end end diff --git a/src/invidious/yt_backend/extractors.cr b/src/invidious/yt_backend/extractors.cr index 4074de86..331ff2b5 100644 --- a/src/invidious/yt_backend/extractors.cr +++ b/src/invidious/yt_backend/extractors.cr @@ -225,9 +225,11 @@ private module Parsers # # A `hashtagTileRenderer` is a kind of search result. # It can be found when searching for any hashtag (e.g "#hi" or "#shorts") + # + # A `hashtagHeaderRenderer` is displayed on the first page of the hashtag page. module HashtagRendererParser def self.process(item : JSON::Any, author_fallback : AuthorFallback) - if item_contents = item["hashtagTileRenderer"]? + if item_contents = (item["hashtagTileRenderer"]? || item["hashtagHeaderRenderer"]?) return self.parse(item_contents) end end @@ -239,8 +241,14 @@ private module Parsers url = item_contents.dig?("onTapCommand", "commandMetadata", "webCommandMetadata", "url").try &.as_s url ||= URI.encode_path("/hashtag/#{title.lchop('#')}") - video_count_txt = extract_text(item_contents["hashtagVideoCount"]?) # E.g "203K videos" - channel_count_txt = extract_text(item_contents["hashtagChannelCount"]?) # E.g "81K channels" + if info = extract_text(item_contents.dig?("hashtagInfoText")) + regex_match = /(?\d+\S)\D+(?\d+\S)/.match(info) + videos = regex_match.try &.["videos"]?.try &.to_s + channels = regex_match.try &.["channels"]?.try &.to_s + else + video_count_txt = extract_text(item_contents["hashtagVideoCount"]?) # E.g "203K videos" + channel_count_txt = extract_text(item_contents["hashtagChannelCount"]?) # E.g "81K channels" + end # Fallback for video/channel counts if channel_count_txt.nil? || video_count_txt.nil? From a4ab4f1d0b005b0d324a77a445736f773a0e1096 Mon Sep 17 00:00:00 2001 From: ChunkyProgrammer <78101139+ChunkyProgrammer@users.noreply.github.com> Date: Thu, 7 Mar 2024 21:41:22 -0500 Subject: [PATCH 4/5] add support for new hashtag header format --- src/invidious/yt_backend/extractors.cr | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/src/invidious/yt_backend/extractors.cr b/src/invidious/yt_backend/extractors.cr index 331ff2b5..259910c0 100644 --- a/src/invidious/yt_backend/extractors.cr +++ b/src/invidious/yt_backend/extractors.cr @@ -229,31 +229,28 @@ private module Parsers # A `hashtagHeaderRenderer` is displayed on the first page of the hashtag page. module HashtagRendererParser def self.process(item : JSON::Any, author_fallback : AuthorFallback) - if item_contents = (item["hashtagTileRenderer"]? || item["hashtagHeaderRenderer"]?) + if item_contents = (item["hashtagTileRenderer"]? || item["hashtagHeaderRenderer"]? || item["pageHeaderRenderer"]?) return self.parse(item_contents) end end private def self.parse(item_contents) - title = extract_text(item_contents["hashtag"]).not_nil! # E.g "#hi" + title = item_contents.dig?("pageTitle").try &.as_s + title ||= extract_text(item_contents["hashtag"]).not_nil! # E.g "#hi" # E.g "/hashtag/hi" url = item_contents.dig?("onTapCommand", "commandMetadata", "webCommandMetadata", "url").try &.as_s url ||= URI.encode_path("/hashtag/#{title.lchop('#')}") - if info = extract_text(item_contents.dig?("hashtagInfoText")) - regex_match = /(?\d+\S)\D+(?\d+\S)/.match(info) - videos = regex_match.try &.["videos"]?.try &.to_s - channels = regex_match.try &.["channels"]?.try &.to_s - else - video_count_txt = extract_text(item_contents["hashtagVideoCount"]?) # E.g "203K videos" - channel_count_txt = extract_text(item_contents["hashtagChannelCount"]?) # E.g "81K channels" - end + video_count_txt = extract_text(item_contents["hashtagVideoCount"]?) # E.g "203K videos" + channel_count_txt = extract_text(item_contents["hashtagChannelCount"]?) # E.g "81K channels" # Fallback for video/channel counts if channel_count_txt.nil? || video_count_txt.nil? + info_text = (item_contents.dig?("content", "pageHeaderViewModel", "metadata", "contentMetadataViewModel", "metadataRows", 0, "metadataParts", 0, "text", "content").try &.as_s || + extract_text(item_contents.dig?("hashtagInfoText"))).try &.split(" • ") + # E.g: "203K videos • 81K channels" - info_text = extract_text(item_contents["hashtagInfoText"]?).try &.split(" • ") if info_text && info_text.size == 2 video_count_txt ||= info_text[0] From 29c0036b282c1afd82c6f32aab6ffe31fb48be6e Mon Sep 17 00:00:00 2001 From: ChunkyProgrammer <78101139+ChunkyProgrammer@users.noreply.github.com> Date: Sat, 24 Aug 2024 18:05:35 -0400 Subject: [PATCH 5/5] Fix Ameba error --- src/invidious/routes/api/v1/search.cr | 4 ++-- src/invidious/routes/search.cr | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/invidious/routes/api/v1/search.cr b/src/invidious/routes/api/v1/search.cr index 1777060a..8c787a67 100644 --- a/src/invidious/routes/api/v1/search.cr +++ b/src/invidious/routes/api/v1/search.cr @@ -69,13 +69,13 @@ module Invidious::Routes::API::V1::Search env.response.content_type = "application/json" begin - hashtagPage = Invidious::Hashtag.fetch(hashtag, page, region) + hashtag_page = Invidious::Hashtag.fetch(hashtag, page, region) rescue ex return error_json(400, ex) end JSON.build do |json| - hashtagPage.to_json(locale, json) + hashtag_page.to_json(locale, json) end end end diff --git a/src/invidious/routes/search.cr b/src/invidious/routes/search.cr index aa06c191..da6ce18d 100644 --- a/src/invidious/routes/search.cr +++ b/src/invidious/routes/search.cr @@ -101,8 +101,8 @@ module Invidious::Routes::Search end begin - hashtagPage = Invidious::Hashtag.fetch(hashtag, page) - items = hashtagPage.videos + hashtag_page = Invidious::Hashtag.fetch(hashtag, page) + items = hashtag_page.videos rescue ex return error_template(500, ex) end @@ -112,7 +112,7 @@ module Invidious::Routes::Search page_nav_html = Frontend::Pagination.nav_numeric(locale, base_url: "/hashtag/#{hashtag_encoded}", current_page: page, - show_next: hashtagPage.has_next_continuation + show_next: hashtag_page.has_next_continuation ) templated "hashtag"