forked from midou/invidious
Merge branch 'iv-org:master' into dark-mode-contrast
This commit is contained in:
commit
8542c974c8
7
.github/workflows/ci.yml
vendored
7
.github/workflows/ci.yml
vendored
@ -38,11 +38,10 @@ jobs:
|
|||||||
matrix:
|
matrix:
|
||||||
stable: [true]
|
stable: [true]
|
||||||
crystal:
|
crystal:
|
||||||
- 1.4.1
|
|
||||||
- 1.5.1
|
|
||||||
- 1.6.2
|
- 1.6.2
|
||||||
- 1.7.3
|
- 1.7.3
|
||||||
- 1.8.1
|
- 1.8.2
|
||||||
|
- 1.9.2
|
||||||
include:
|
include:
|
||||||
- crystal: nightly
|
- crystal: nightly
|
||||||
stable: false
|
stable: false
|
||||||
@ -53,7 +52,7 @@ jobs:
|
|||||||
submodules: true
|
submodules: true
|
||||||
|
|
||||||
- name: Install Crystal
|
- name: Install Crystal
|
||||||
uses: crystal-lang/install-crystal@v1.7.0
|
uses: crystal-lang/install-crystal@v1.8.0
|
||||||
with:
|
with:
|
||||||
crystal: ${{ matrix.crystal }}
|
crystal: ${{ matrix.crystal }}
|
||||||
|
|
||||||
|
5
.github/workflows/container-release.yml
vendored
5
.github/workflows/container-release.yml
vendored
@ -25,9 +25,9 @@ jobs:
|
|||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v3
|
||||||
|
|
||||||
- name: Install Crystal
|
- name: Install Crystal
|
||||||
uses: crystal-lang/install-crystal@v1.6.0
|
uses: crystal-lang/install-crystal@v1.8.0
|
||||||
with:
|
with:
|
||||||
crystal: 1.5.0
|
crystal: 1.9.2
|
||||||
|
|
||||||
- name: Run lint
|
- name: Run lint
|
||||||
run: |
|
run: |
|
||||||
@ -77,4 +77,3 @@ jobs:
|
|||||||
tags: quay.io/invidious/invidious:${{ github.sha }}-arm64,quay.io/invidious/invidious:latest-arm64
|
tags: quay.io/invidious/invidious:${{ github.sha }}-arm64,quay.io/invidious/invidious:latest-arm64
|
||||||
build-args: |
|
build-args: |
|
||||||
"release=1"
|
"release=1"
|
||||||
|
|
||||||
|
2
.github/workflows/stale.yml
vendored
2
.github/workflows/stale.yml
vendored
@ -14,7 +14,7 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
days-before-stale: 365
|
days-before-stale: 365
|
||||||
days-before-pr-stale: 45 # PRs should be active. Anything that hasn't had activity in more than 45 days should be considered abandoned.
|
days-before-pr-stale: 90
|
||||||
days-before-close: 30
|
days-before-close: 30
|
||||||
exempt-pr-labels: blocked
|
exempt-pr-labels: blocked
|
||||||
stale-issue-message: 'This issue has been automatically marked as stale and will be closed in 30 days because it has not had recent activity and is much likely outdated. If you think this issue is still relevant and applicable, you just have to post a comment and it will be unmarked.'
|
stale-issue-message: 'This issue has been automatically marked as stale and will be closed in 30 days because it has not had recent activity and is much likely outdated. If you think this issue is still relevant and applicable, you just have to post a comment and it will be unmarked.'
|
||||||
|
@ -161,6 +161,19 @@ https_only: false
|
|||||||
#force_resolve:
|
#force_resolve:
|
||||||
|
|
||||||
|
|
||||||
|
##
|
||||||
|
## Use Innertube's transcripts API instead of timedtext for closed captions
|
||||||
|
##
|
||||||
|
## Useful for larger instances as InnerTube is **not ratelimited**. See https://github.com/iv-org/invidious/issues/2567
|
||||||
|
##
|
||||||
|
## Subtitle experience may differ slightly on Invidious.
|
||||||
|
##
|
||||||
|
## Accepted values: true, false
|
||||||
|
## Default: false
|
||||||
|
##
|
||||||
|
# use_innertube_for_captions: false
|
||||||
|
|
||||||
|
|
||||||
# -----------------------------
|
# -----------------------------
|
||||||
# Logging
|
# Logging
|
||||||
# -----------------------------
|
# -----------------------------
|
||||||
|
@ -127,6 +127,9 @@ class Config
|
|||||||
# Pool size for HTTP requests to youtube.com and ytimg.com (each domain has a separate pool of `pool_size`)
|
# Pool size for HTTP requests to youtube.com and ytimg.com (each domain has a separate pool of `pool_size`)
|
||||||
property pool_size : Int32 = 100
|
property pool_size : Int32 = 100
|
||||||
|
|
||||||
|
# Use Innertube's transcripts API instead of timedtext for closed captions
|
||||||
|
property use_innertube_for_captions : Bool = false
|
||||||
|
|
||||||
# Saved cookies in "name1=value1; name2=value2..." format
|
# Saved cookies in "name1=value1; name2=value2..." format
|
||||||
@[YAML::Field(converter: Preferences::StringToCookies)]
|
@[YAML::Field(converter: Preferences::StringToCookies)]
|
||||||
property cookies : HTTP::Cookies = HTTP::Cookies.new
|
property cookies : HTTP::Cookies = HTTP::Cookies.new
|
||||||
|
@ -7,7 +7,7 @@ module Invidious::Frontend::WatchPage
|
|||||||
getter full_videos : Array(Hash(String, JSON::Any))
|
getter full_videos : Array(Hash(String, JSON::Any))
|
||||||
getter video_streams : Array(Hash(String, JSON::Any))
|
getter video_streams : Array(Hash(String, JSON::Any))
|
||||||
getter audio_streams : Array(Hash(String, JSON::Any))
|
getter audio_streams : Array(Hash(String, JSON::Any))
|
||||||
getter captions : Array(Invidious::Videos::Caption)
|
getter captions : Array(Invidious::Videos::Captions::Metadata)
|
||||||
|
|
||||||
def initialize(
|
def initialize(
|
||||||
@full_videos,
|
@full_videos,
|
||||||
|
@ -89,6 +89,7 @@ struct Playlist
|
|||||||
property views : Int64
|
property views : Int64
|
||||||
property updated : Time
|
property updated : Time
|
||||||
property thumbnail : String?
|
property thumbnail : String?
|
||||||
|
property subtitle : String?
|
||||||
|
|
||||||
def to_json(offset, json : JSON::Builder, video_id : String? = nil)
|
def to_json(offset, json : JSON::Builder, video_id : String? = nil)
|
||||||
json.object do
|
json.object do
|
||||||
@ -100,6 +101,7 @@ struct Playlist
|
|||||||
json.field "author", self.author
|
json.field "author", self.author
|
||||||
json.field "authorId", self.ucid
|
json.field "authorId", self.ucid
|
||||||
json.field "authorUrl", "/channel/#{self.ucid}"
|
json.field "authorUrl", "/channel/#{self.ucid}"
|
||||||
|
json.field "subtitle", self.subtitle
|
||||||
|
|
||||||
json.field "authorThumbnails" do
|
json.field "authorThumbnails" do
|
||||||
json.array do
|
json.array do
|
||||||
@ -356,6 +358,8 @@ def fetch_playlist(plid : String)
|
|||||||
updated = Time.utc
|
updated = Time.utc
|
||||||
video_count = 0
|
video_count = 0
|
||||||
|
|
||||||
|
subtitle = extract_text(initial_data.dig?("header", "playlistHeaderRenderer", "subtitle"))
|
||||||
|
|
||||||
playlist_info["stats"]?.try &.as_a.each do |stat|
|
playlist_info["stats"]?.try &.as_a.each do |stat|
|
||||||
text = stat["runs"]?.try &.as_a.map(&.["text"].as_s).join("") || stat["simpleText"]?.try &.as_s
|
text = stat["runs"]?.try &.as_a.map(&.["text"].as_s).join("") || stat["simpleText"]?.try &.as_s
|
||||||
next if !text
|
next if !text
|
||||||
@ -397,6 +401,7 @@ def fetch_playlist(plid : String)
|
|||||||
views: views,
|
views: views,
|
||||||
updated: updated,
|
updated: updated,
|
||||||
thumbnail: thumbnail,
|
thumbnail: thumbnail,
|
||||||
|
subtitle: subtitle,
|
||||||
})
|
})
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -87,70 +87,78 @@ module Invidious::Routes::API::V1::Videos
|
|||||||
caption = caption[0]
|
caption = caption[0]
|
||||||
end
|
end
|
||||||
|
|
||||||
url = URI.parse("#{caption.base_url}&tlang=#{tlang}").request_target
|
if CONFIG.use_innertube_for_captions
|
||||||
|
params = Invidious::Videos::Transcript.generate_param(id, caption.language_code, caption.auto_generated)
|
||||||
|
initial_data = YoutubeAPI.get_transcript(params)
|
||||||
|
|
||||||
# Auto-generated captions often have cues that aren't aligned properly with the video,
|
webvtt = Invidious::Videos::Transcript.convert_transcripts_to_vtt(initial_data, caption.language_code)
|
||||||
# as well as some other markup that makes it cumbersome, so we try to fix that here
|
else
|
||||||
if caption.name.includes? "auto-generated"
|
# Timedtext API handling
|
||||||
caption_xml = YT_POOL.client &.get(url).body
|
url = URI.parse("#{caption.base_url}&tlang=#{tlang}").request_target
|
||||||
|
|
||||||
if caption_xml.starts_with?("<?xml")
|
# Auto-generated captions often have cues that aren't aligned properly with the video,
|
||||||
webvtt = caption.timedtext_to_vtt(caption_xml, tlang)
|
# as well as some other markup that makes it cumbersome, so we try to fix that here
|
||||||
else
|
if caption.name.includes? "auto-generated"
|
||||||
caption_xml = XML.parse(caption_xml)
|
caption_xml = YT_POOL.client &.get(url).body
|
||||||
|
|
||||||
webvtt = String.build do |str|
|
if caption_xml.starts_with?("<?xml")
|
||||||
str << <<-END_VTT
|
webvtt = caption.timedtext_to_vtt(caption_xml, tlang)
|
||||||
WEBVTT
|
else
|
||||||
Kind: captions
|
caption_xml = XML.parse(caption_xml)
|
||||||
Language: #{tlang || caption.language_code}
|
|
||||||
|
webvtt = String.build do |str|
|
||||||
|
str << <<-END_VTT
|
||||||
|
WEBVTT
|
||||||
|
Kind: captions
|
||||||
|
Language: #{tlang || caption.language_code}
|
||||||
|
|
||||||
|
|
||||||
END_VTT
|
END_VTT
|
||||||
|
|
||||||
caption_nodes = caption_xml.xpath_nodes("//transcript/text")
|
caption_nodes = caption_xml.xpath_nodes("//transcript/text")
|
||||||
caption_nodes.each_with_index do |node, i|
|
caption_nodes.each_with_index do |node, i|
|
||||||
start_time = node["start"].to_f.seconds
|
start_time = node["start"].to_f.seconds
|
||||||
duration = node["dur"]?.try &.to_f.seconds
|
duration = node["dur"]?.try &.to_f.seconds
|
||||||
duration ||= start_time
|
duration ||= start_time
|
||||||
|
|
||||||
if caption_nodes.size > i + 1
|
if caption_nodes.size > i + 1
|
||||||
end_time = caption_nodes[i + 1]["start"].to_f.seconds
|
end_time = caption_nodes[i + 1]["start"].to_f.seconds
|
||||||
else
|
else
|
||||||
end_time = start_time + duration
|
end_time = start_time + duration
|
||||||
|
end
|
||||||
|
|
||||||
|
start_time = "#{start_time.hours.to_s.rjust(2, '0')}:#{start_time.minutes.to_s.rjust(2, '0')}:#{start_time.seconds.to_s.rjust(2, '0')}.#{start_time.milliseconds.to_s.rjust(3, '0')}"
|
||||||
|
end_time = "#{end_time.hours.to_s.rjust(2, '0')}:#{end_time.minutes.to_s.rjust(2, '0')}:#{end_time.seconds.to_s.rjust(2, '0')}.#{end_time.milliseconds.to_s.rjust(3, '0')}"
|
||||||
|
|
||||||
|
text = HTML.unescape(node.content)
|
||||||
|
text = text.gsub(/<font color="#[a-fA-F0-9]{6}">/, "")
|
||||||
|
text = text.gsub(/<\/font>/, "")
|
||||||
|
if md = text.match(/(?<name>.*) : (?<text>.*)/)
|
||||||
|
text = "<v #{md["name"]}>#{md["text"]}</v>"
|
||||||
|
end
|
||||||
|
|
||||||
|
str << <<-END_CUE
|
||||||
|
#{start_time} --> #{end_time}
|
||||||
|
#{text}
|
||||||
|
|
||||||
|
|
||||||
|
END_CUE
|
||||||
end
|
end
|
||||||
|
|
||||||
start_time = "#{start_time.hours.to_s.rjust(2, '0')}:#{start_time.minutes.to_s.rjust(2, '0')}:#{start_time.seconds.to_s.rjust(2, '0')}.#{start_time.milliseconds.to_s.rjust(3, '0')}"
|
|
||||||
end_time = "#{end_time.hours.to_s.rjust(2, '0')}:#{end_time.minutes.to_s.rjust(2, '0')}:#{end_time.seconds.to_s.rjust(2, '0')}.#{end_time.milliseconds.to_s.rjust(3, '0')}"
|
|
||||||
|
|
||||||
text = HTML.unescape(node.content)
|
|
||||||
text = text.gsub(/<font color="#[a-fA-F0-9]{6}">/, "")
|
|
||||||
text = text.gsub(/<\/font>/, "")
|
|
||||||
if md = text.match(/(?<name>.*) : (?<text>.*)/)
|
|
||||||
text = "<v #{md["name"]}>#{md["text"]}</v>"
|
|
||||||
end
|
|
||||||
|
|
||||||
str << <<-END_CUE
|
|
||||||
#{start_time} --> #{end_time}
|
|
||||||
#{text}
|
|
||||||
|
|
||||||
|
|
||||||
END_CUE
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
|
||||||
else
|
|
||||||
# Some captions have "align:[start/end]" and "position:[num]%"
|
|
||||||
# attributes. Those are causing issues with VideoJS, which is unable
|
|
||||||
# to properly align the captions on the video, so we remove them.
|
|
||||||
#
|
|
||||||
# See: https://github.com/iv-org/invidious/issues/2391
|
|
||||||
webvtt = YT_POOL.client &.get("#{url}&format=vtt").body
|
|
||||||
if webvtt.starts_with?("<?xml")
|
|
||||||
webvtt = caption.timedtext_to_vtt(webvtt)
|
|
||||||
else
|
else
|
||||||
|
# Some captions have "align:[start/end]" and "position:[num]%"
|
||||||
|
# attributes. Those are causing issues with VideoJS, which is unable
|
||||||
|
# to properly align the captions on the video, so we remove them.
|
||||||
|
#
|
||||||
|
# See: https://github.com/iv-org/invidious/issues/2391
|
||||||
webvtt = YT_POOL.client &.get("#{url}&format=vtt").body
|
webvtt = YT_POOL.client &.get("#{url}&format=vtt").body
|
||||||
.gsub(/([0-9:.]{12} --> [0-9:.]{12}).+/, "\\1")
|
if webvtt.starts_with?("<?xml")
|
||||||
|
webvtt = caption.timedtext_to_vtt(webvtt)
|
||||||
|
else
|
||||||
|
webvtt = YT_POOL.client &.get("#{url}&format=vtt").body
|
||||||
|
.gsub(/([0-9:.]{12} --> [0-9:.]{12}).+/, "\\1")
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -24,7 +24,7 @@ struct Video
|
|||||||
property updated : Time
|
property updated : Time
|
||||||
|
|
||||||
@[DB::Field(ignore: true)]
|
@[DB::Field(ignore: true)]
|
||||||
@captions = [] of Invidious::Videos::Caption
|
@captions = [] of Invidious::Videos::Captions::Metadata
|
||||||
|
|
||||||
@[DB::Field(ignore: true)]
|
@[DB::Field(ignore: true)]
|
||||||
property adaptive_fmts : Array(Hash(String, JSON::Any))?
|
property adaptive_fmts : Array(Hash(String, JSON::Any))?
|
||||||
@ -215,9 +215,9 @@ struct Video
|
|||||||
keywords.includes? "YouTube Red"
|
keywords.includes? "YouTube Red"
|
||||||
end
|
end
|
||||||
|
|
||||||
def captions : Array(Invidious::Videos::Caption)
|
def captions : Array(Invidious::Videos::Captions::Metadata)
|
||||||
if @captions.empty? && @info.has_key?("captions")
|
if @captions.empty? && @info.has_key?("captions")
|
||||||
@captions = Invidious::Videos::Caption.from_yt_json(info["captions"])
|
@captions = Invidious::Videos::Captions::Metadata.from_yt_json(info["captions"])
|
||||||
end
|
end
|
||||||
|
|
||||||
return @captions
|
return @captions
|
||||||
|
@ -1,100 +1,106 @@
|
|||||||
require "json"
|
require "json"
|
||||||
|
|
||||||
module Invidious::Videos
|
module Invidious::Videos
|
||||||
struct Caption
|
module Captions
|
||||||
property name : String
|
struct Metadata
|
||||||
property language_code : String
|
property name : String
|
||||||
property base_url : String
|
property language_code : String
|
||||||
|
property base_url : String
|
||||||
|
|
||||||
def initialize(@name, @language_code, @base_url)
|
property auto_generated : Bool
|
||||||
end
|
|
||||||
|
|
||||||
# Parse the JSON structure from Youtube
|
def initialize(@name, @language_code, @base_url, @auto_generated)
|
||||||
def self.from_yt_json(container : JSON::Any) : Array(Caption)
|
|
||||||
caption_tracks = container
|
|
||||||
.dig?("playerCaptionsTracklistRenderer", "captionTracks")
|
|
||||||
.try &.as_a
|
|
||||||
|
|
||||||
captions_list = [] of Caption
|
|
||||||
return captions_list if caption_tracks.nil?
|
|
||||||
|
|
||||||
caption_tracks.each do |caption|
|
|
||||||
name = caption["name"]["simpleText"]? || caption["name"]["runs"][0]["text"]
|
|
||||||
name = name.to_s.split(" - ")[0]
|
|
||||||
|
|
||||||
language_code = caption["languageCode"].to_s
|
|
||||||
base_url = caption["baseUrl"].to_s
|
|
||||||
|
|
||||||
captions_list << Caption.new(name, language_code, base_url)
|
|
||||||
end
|
end
|
||||||
|
|
||||||
return captions_list
|
# Parse the JSON structure from Youtube
|
||||||
end
|
def self.from_yt_json(container : JSON::Any) : Array(Captions::Metadata)
|
||||||
|
caption_tracks = container
|
||||||
|
.dig?("playerCaptionsTracklistRenderer", "captionTracks")
|
||||||
|
.try &.as_a
|
||||||
|
|
||||||
def timedtext_to_vtt(timedtext : String, tlang = nil) : String
|
captions_list = [] of Captions::Metadata
|
||||||
# In the future, we could just directly work with the url. This is more of a POC
|
return captions_list if caption_tracks.nil?
|
||||||
cues = [] of XML::Node
|
|
||||||
tree = XML.parse(timedtext)
|
|
||||||
tree = tree.children.first
|
|
||||||
|
|
||||||
tree.children.each do |item|
|
caption_tracks.each do |caption|
|
||||||
if item.name == "body"
|
name = caption["name"]["simpleText"]? || caption["name"]["runs"][0]["text"]
|
||||||
item.children.each do |cue|
|
name = name.to_s.split(" - ")[0]
|
||||||
if cue.name == "p" && !(cue.children.size == 1 && cue.children[0].content == "\n")
|
|
||||||
cues << cue
|
language_code = caption["languageCode"].to_s
|
||||||
|
base_url = caption["baseUrl"].to_s
|
||||||
|
|
||||||
|
auto_generated = (caption["kind"]? == "asr")
|
||||||
|
|
||||||
|
captions_list << Captions::Metadata.new(name, language_code, base_url, auto_generated)
|
||||||
|
end
|
||||||
|
|
||||||
|
return captions_list
|
||||||
|
end
|
||||||
|
|
||||||
|
def timedtext_to_vtt(timedtext : String, tlang = nil) : String
|
||||||
|
# In the future, we could just directly work with the url. This is more of a POC
|
||||||
|
cues = [] of XML::Node
|
||||||
|
tree = XML.parse(timedtext)
|
||||||
|
tree = tree.children.first
|
||||||
|
|
||||||
|
tree.children.each do |item|
|
||||||
|
if item.name == "body"
|
||||||
|
item.children.each do |cue|
|
||||||
|
if cue.name == "p" && !(cue.children.size == 1 && cue.children[0].content == "\n")
|
||||||
|
cues << cue
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
break
|
||||||
end
|
end
|
||||||
break
|
|
||||||
end
|
end
|
||||||
end
|
result = String.build do |result|
|
||||||
result = String.build do |result|
|
result << <<-END_VTT
|
||||||
result << <<-END_VTT
|
WEBVTT
|
||||||
WEBVTT
|
Kind: captions
|
||||||
Kind: captions
|
Language: #{tlang || @language_code}
|
||||||
Language: #{tlang || @language_code}
|
|
||||||
|
|
||||||
|
|
||||||
END_VTT
|
END_VTT
|
||||||
|
|
||||||
result << "\n\n"
|
result << "\n\n"
|
||||||
|
|
||||||
cues.each_with_index do |node, i|
|
cues.each_with_index do |node, i|
|
||||||
start_time = node["t"].to_f.milliseconds
|
start_time = node["t"].to_f.milliseconds
|
||||||
|
|
||||||
duration = node["d"]?.try &.to_f.milliseconds
|
duration = node["d"]?.try &.to_f.milliseconds
|
||||||
|
|
||||||
duration ||= start_time
|
duration ||= start_time
|
||||||
|
|
||||||
if cues.size > i + 1
|
if cues.size > i + 1
|
||||||
end_time = cues[i + 1]["t"].to_f.milliseconds
|
end_time = cues[i + 1]["t"].to_f.milliseconds
|
||||||
else
|
else
|
||||||
end_time = start_time + duration
|
end_time = start_time + duration
|
||||||
|
end
|
||||||
|
|
||||||
|
# start_time
|
||||||
|
result << start_time.hours.to_s.rjust(2, '0')
|
||||||
|
result << ':' << start_time.minutes.to_s.rjust(2, '0')
|
||||||
|
result << ':' << start_time.seconds.to_s.rjust(2, '0')
|
||||||
|
result << '.' << start_time.milliseconds.to_s.rjust(3, '0')
|
||||||
|
|
||||||
|
result << " --> "
|
||||||
|
|
||||||
|
# end_time
|
||||||
|
result << end_time.hours.to_s.rjust(2, '0')
|
||||||
|
result << ':' << end_time.minutes.to_s.rjust(2, '0')
|
||||||
|
result << ':' << end_time.seconds.to_s.rjust(2, '0')
|
||||||
|
result << '.' << end_time.milliseconds.to_s.rjust(3, '0')
|
||||||
|
|
||||||
|
result << "\n"
|
||||||
|
|
||||||
|
node.children.each do |s|
|
||||||
|
result << s.content
|
||||||
|
end
|
||||||
|
result << "\n"
|
||||||
|
result << "\n"
|
||||||
end
|
end
|
||||||
|
|
||||||
# start_time
|
|
||||||
result << start_time.hours.to_s.rjust(2, '0')
|
|
||||||
result << ':' << start_time.minutes.to_s.rjust(2, '0')
|
|
||||||
result << ':' << start_time.seconds.to_s.rjust(2, '0')
|
|
||||||
result << '.' << start_time.milliseconds.to_s.rjust(3, '0')
|
|
||||||
|
|
||||||
result << " --> "
|
|
||||||
|
|
||||||
# end_time
|
|
||||||
result << end_time.hours.to_s.rjust(2, '0')
|
|
||||||
result << ':' << end_time.minutes.to_s.rjust(2, '0')
|
|
||||||
result << ':' << end_time.seconds.to_s.rjust(2, '0')
|
|
||||||
result << '.' << end_time.milliseconds.to_s.rjust(3, '0')
|
|
||||||
|
|
||||||
result << "\n"
|
|
||||||
|
|
||||||
node.children.each do |s|
|
|
||||||
result << s.content
|
|
||||||
end
|
|
||||||
result << "\n"
|
|
||||||
result << "\n"
|
|
||||||
end
|
end
|
||||||
|
return result
|
||||||
end
|
end
|
||||||
return result
|
|
||||||
end
|
end
|
||||||
|
|
||||||
# List of all caption languages available on Youtube.
|
# List of all caption languages available on Youtube.
|
||||||
|
103
src/invidious/videos/transcript.cr
Normal file
103
src/invidious/videos/transcript.cr
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
module Invidious::Videos
|
||||||
|
# Namespace for methods primarily relating to Transcripts
|
||||||
|
module Transcript
|
||||||
|
record TranscriptLine, start_ms : Time::Span, end_ms : Time::Span, line : String
|
||||||
|
|
||||||
|
def self.generate_param(video_id : String, language_code : String, auto_generated : Bool) : String
|
||||||
|
kind = auto_generated ? "asr" : ""
|
||||||
|
|
||||||
|
object = {
|
||||||
|
"1:0:string" => video_id,
|
||||||
|
|
||||||
|
"2:base64" => {
|
||||||
|
"1:string" => kind,
|
||||||
|
"2:string" => language_code,
|
||||||
|
"3:string" => "",
|
||||||
|
},
|
||||||
|
|
||||||
|
"3:varint" => 1_i64,
|
||||||
|
"5:string" => "engagement-panel-searchable-transcript-search-panel",
|
||||||
|
"6:varint" => 1_i64,
|
||||||
|
"7:varint" => 1_i64,
|
||||||
|
"8:varint" => 1_i64,
|
||||||
|
}
|
||||||
|
|
||||||
|
params = object.try { |i| Protodec::Any.cast_json(i) }
|
||||||
|
.try { |i| Protodec::Any.from_json(i) }
|
||||||
|
.try { |i| Base64.urlsafe_encode(i) }
|
||||||
|
.try { |i| URI.encode_www_form(i) }
|
||||||
|
|
||||||
|
return params
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.convert_transcripts_to_vtt(initial_data : Hash(String, JSON::Any), target_language : String) : String
|
||||||
|
# Convert into array of TranscriptLine
|
||||||
|
lines = self.parse(initial_data)
|
||||||
|
|
||||||
|
# Taken from Invidious::Videos::Captions::Metadata.timedtext_to_vtt()
|
||||||
|
vtt = String.build do |vtt|
|
||||||
|
vtt << <<-END_VTT
|
||||||
|
WEBVTT
|
||||||
|
Kind: captions
|
||||||
|
Language: #{target_language}
|
||||||
|
|
||||||
|
|
||||||
|
END_VTT
|
||||||
|
|
||||||
|
vtt << "\n\n"
|
||||||
|
|
||||||
|
lines.each do |line|
|
||||||
|
start_time = line.start_ms
|
||||||
|
end_time = line.end_ms
|
||||||
|
|
||||||
|
# start_time
|
||||||
|
vtt << start_time.hours.to_s.rjust(2, '0')
|
||||||
|
vtt << ':' << start_time.minutes.to_s.rjust(2, '0')
|
||||||
|
vtt << ':' << start_time.seconds.to_s.rjust(2, '0')
|
||||||
|
vtt << '.' << start_time.milliseconds.to_s.rjust(3, '0')
|
||||||
|
|
||||||
|
vtt << " --> "
|
||||||
|
|
||||||
|
# end_time
|
||||||
|
vtt << end_time.hours.to_s.rjust(2, '0')
|
||||||
|
vtt << ':' << end_time.minutes.to_s.rjust(2, '0')
|
||||||
|
vtt << ':' << end_time.seconds.to_s.rjust(2, '0')
|
||||||
|
vtt << '.' << end_time.milliseconds.to_s.rjust(3, '0')
|
||||||
|
|
||||||
|
vtt << "\n"
|
||||||
|
vtt << line.line
|
||||||
|
|
||||||
|
vtt << "\n"
|
||||||
|
vtt << "\n"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return vtt
|
||||||
|
end
|
||||||
|
|
||||||
|
private def self.parse(initial_data : Hash(String, JSON::Any))
|
||||||
|
body = initial_data.dig("actions", 0, "updateEngagementPanelAction", "content", "transcriptRenderer",
|
||||||
|
"content", "transcriptSearchPanelRenderer", "body", "transcriptSegmentListRenderer",
|
||||||
|
"initialSegments").as_a
|
||||||
|
|
||||||
|
lines = [] of TranscriptLine
|
||||||
|
body.each do |line|
|
||||||
|
# Transcript section headers. They are not apart of the captions and as such we can safely skip them.
|
||||||
|
if line.as_h.has_key?("transcriptSectionHeaderRenderer")
|
||||||
|
next
|
||||||
|
end
|
||||||
|
|
||||||
|
line = line["transcriptSegmentRenderer"]
|
||||||
|
|
||||||
|
start_ms = line["startMs"].as_s.to_i.millisecond
|
||||||
|
end_ms = line["endMs"].as_s.to_i.millisecond
|
||||||
|
|
||||||
|
text = extract_text(line["snippet"]) || ""
|
||||||
|
|
||||||
|
lines << TranscriptLine.new(start_ms, end_ms, text)
|
||||||
|
end
|
||||||
|
|
||||||
|
return lines
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
@ -70,7 +70,12 @@
|
|||||||
</b>
|
</b>
|
||||||
<% else %>
|
<% else %>
|
||||||
<b>
|
<b>
|
||||||
<a href="/channel/<%= playlist.ucid %>"><%= author %></a> |
|
<% if !author.empty? %>
|
||||||
|
<a href="/channel/<%= playlist.ucid %>"><%= author %></a> |
|
||||||
|
<% elsif !playlist.subtitle.nil? %>
|
||||||
|
<% subtitle = playlist.subtitle || "" %>
|
||||||
|
<span><%= HTML.escape(subtitle[0..subtitle.rindex(" • ") || subtitle.size]) %></span> |
|
||||||
|
<% end %>
|
||||||
<%= translate_count(locale, "generic_videos_count", playlist.video_count) %> |
|
<%= translate_count(locale, "generic_videos_count", playlist.video_count) %> |
|
||||||
<%= translate(locale, "Updated `x` ago", recode_date(playlist.updated, locale)) %>
|
<%= translate(locale, "Updated `x` ago", recode_date(playlist.updated, locale)) %>
|
||||||
</b>
|
</b>
|
||||||
|
@ -89,7 +89,7 @@
|
|||||||
<label for="captions[0]"><%= translate(locale, "preferences_captions_label") %></label>
|
<label for="captions[0]"><%= translate(locale, "preferences_captions_label") %></label>
|
||||||
<% preferences.captions.each_with_index do |caption, index| %>
|
<% preferences.captions.each_with_index do |caption, index| %>
|
||||||
<select class="pure-u-1-6" name="captions[<%= index %>]" id="captions[<%= index %>]">
|
<select class="pure-u-1-6" name="captions[<%= index %>]" id="captions[<%= index %>]">
|
||||||
<% Invidious::Videos::Caption::LANGUAGES.each do |option| %>
|
<% Invidious::Videos::Captions::LANGUAGES.each do |option| %>
|
||||||
<option value="<%= option %>" <% if preferences.captions[index] == option %> selected <% end %>><%= translate(locale, option.blank? ? "none" : option) %></option>
|
<option value="<%= option %>" <% if preferences.captions[index] == option %> selected <% end %>><%= translate(locale, option.blank? ? "none" : option) %></option>
|
||||||
<% end %>
|
<% end %>
|
||||||
</select>
|
</select>
|
||||||
|
@ -557,6 +557,30 @@ module YoutubeAPI
|
|||||||
return self._post_json("/youtubei/v1/search", data, client_config)
|
return self._post_json("/youtubei/v1/search", data, client_config)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
####################################################################
|
||||||
|
# get_transcript(params, client_config?)
|
||||||
|
#
|
||||||
|
# Requests the youtubei/v1/get_transcript endpoint with the required headers
|
||||||
|
# and POST data in order to get a JSON reply.
|
||||||
|
#
|
||||||
|
# The requested data is a specially encoded protobuf string that denotes the specific language requested.
|
||||||
|
#
|
||||||
|
# An optional ClientConfig parameter can be passed, too (see
|
||||||
|
# `struct ClientConfig` above for more details).
|
||||||
|
#
|
||||||
|
|
||||||
|
def get_transcript(
|
||||||
|
params : String,
|
||||||
|
client_config : ClientConfig | Nil = nil
|
||||||
|
) : Hash(String, JSON::Any)
|
||||||
|
data = {
|
||||||
|
"context" => self.make_context(client_config),
|
||||||
|
"params" => params,
|
||||||
|
}
|
||||||
|
|
||||||
|
return self._post_json("/youtubei/v1/get_transcript", data, client_config)
|
||||||
|
end
|
||||||
|
|
||||||
####################################################################
|
####################################################################
|
||||||
# _post_json(endpoint, data, client_config?)
|
# _post_json(endpoint, data, client_config?)
|
||||||
#
|
#
|
||||||
|
Loading…
Reference in New Issue
Block a user