From 4b3ac1a757a5ee14919e83a84de31a3d0bd14a4c Mon Sep 17 00:00:00 2001 From: syeopite Date: Sun, 23 Jul 2023 03:22:19 -0700 Subject: [PATCH] Add method to parse transcript JSON into structs --- src/invidious/videos/transcript.cr | 37 ++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/src/invidious/videos/transcript.cr b/src/invidious/videos/transcript.cr index c50f7569..0d8b0b25 100644 --- a/src/invidious/videos/transcript.cr +++ b/src/invidious/videos/transcript.cr @@ -1,6 +1,8 @@ module Invidious::Videos # Namespace for methods primarily relating to Transcripts module Transcript + record TranscriptLine, start_ms : Time::Span, end_ms : Time::Span, line : String + def self.generate_param(video_id : String, language_code : String, auto_generated : Bool) : String if !auto_generated is_auto_generated = "" @@ -30,5 +32,40 @@ module Invidious::Videos return params end + + def self.convert_transcripts_to_vtt(initial_data : JSON::Any, target_language : String) : String + # Convert into TranscriptLine + + vtt = String.build do |vtt| + result << <<-END_VTT + WEBVTT + Kind: captions + Language: #{tlang} + + + END_VTT + + vtt << "\n\n" + end + end + + def self.parse(initial_data : Hash(String, JSON::Any)) + body = initial_data.dig("actions", 0, "updateEngagementPanelAction", "content", "transcriptRenderer", + "content", "transcriptSearchPanelRenderer", "body", "transcriptSegmentListRenderer", + "initialSegments").as_a + + lines = [] of TranscriptLine + body.each do |line| + line = line["transcriptSegmentRenderer"] + start_ms = line["startMs"].as_s.to_i.millisecond + end_ms = line["endMs"].as_s.to_i.millisecond + + text = extract_text(line["snippet"]) || "" + + lines << TranscriptLine.new(start_ms, end_ms, text) + end + + return lines + end end end