mirror of
https://github.com/iv-org/invidious.git
synced 2024-12-27 18:40:21 +05:30
Performance: Improve speed of automatic instance redirection (#4193)
The automatic instance redirection implemented in #1940 fetches a new list of instances each time someone queries the /redirect endpoint. This is extremely inefficient... This PR optimizes all that into a background job that only fetches a single list every 30 minutes. This should performance quite a bit. No related issue was opened.
This commit is contained in:
commit
7c79ee7cc2
@ -189,6 +189,8 @@ Invidious::Jobs.register Invidious::Jobs::NotificationJob.new(CONNECTION_CHANNEL
|
|||||||
|
|
||||||
Invidious::Jobs.register Invidious::Jobs::ClearExpiredItemsJob.new
|
Invidious::Jobs.register Invidious::Jobs::ClearExpiredItemsJob.new
|
||||||
|
|
||||||
|
Invidious::Jobs.register Invidious::Jobs::InstanceListRefreshJob.new
|
||||||
|
|
||||||
Invidious::Jobs.start_all
|
Invidious::Jobs.start_all
|
||||||
|
|
||||||
def popular_videos
|
def popular_videos
|
||||||
|
@ -323,68 +323,6 @@ def parse_range(range)
|
|||||||
return 0_i64, nil
|
return 0_i64, nil
|
||||||
end
|
end
|
||||||
|
|
||||||
def fetch_random_instance
|
|
||||||
begin
|
|
||||||
instance_api_client = make_client(URI.parse("https://api.invidious.io"))
|
|
||||||
|
|
||||||
# Timeouts
|
|
||||||
instance_api_client.connect_timeout = 10.seconds
|
|
||||||
instance_api_client.dns_timeout = 10.seconds
|
|
||||||
|
|
||||||
instance_list = JSON.parse(instance_api_client.get("/instances.json").body).as_a
|
|
||||||
instance_api_client.close
|
|
||||||
rescue Socket::ConnectError | IO::TimeoutError | JSON::ParseException
|
|
||||||
instance_list = [] of JSON::Any
|
|
||||||
end
|
|
||||||
|
|
||||||
filtered_instance_list = [] of String
|
|
||||||
|
|
||||||
instance_list.each do |data|
|
|
||||||
# TODO Check if current URL is onion instance and use .onion types if so.
|
|
||||||
if data[1]["type"] == "https"
|
|
||||||
# Instances can have statistics disabled, which is an requirement of version validation.
|
|
||||||
# as_nil? doesn't exist. Thus we'll have to handle the error raised if as_nil fails.
|
|
||||||
begin
|
|
||||||
data[1]["stats"].as_nil
|
|
||||||
next
|
|
||||||
rescue TypeCastError
|
|
||||||
end
|
|
||||||
|
|
||||||
# stats endpoint could also lack the software dict.
|
|
||||||
next if data[1]["stats"]["software"]?.nil?
|
|
||||||
|
|
||||||
# Makes sure the instance isn't too outdated.
|
|
||||||
if remote_version = data[1]["stats"]?.try &.["software"]?.try &.["version"]
|
|
||||||
remote_commit_date = remote_version.as_s.match(/\d{4}\.\d{2}\.\d{2}/)
|
|
||||||
next if !remote_commit_date
|
|
||||||
|
|
||||||
remote_commit_date = Time.parse(remote_commit_date[0], "%Y.%m.%d", Time::Location::UTC)
|
|
||||||
local_commit_date = Time.parse(CURRENT_VERSION, "%Y.%m.%d", Time::Location::UTC)
|
|
||||||
|
|
||||||
next if (remote_commit_date - local_commit_date).abs.days > 30
|
|
||||||
|
|
||||||
begin
|
|
||||||
data[1]["monitor"].as_nil
|
|
||||||
health = data[1]["monitor"].as_h["dailyRatios"][0].as_h["ratio"]
|
|
||||||
filtered_instance_list << data[0].as_s if health.to_s.to_f > 90
|
|
||||||
rescue TypeCastError
|
|
||||||
# We can't check the health if the monitoring is broken. Thus we'll just add it to the list
|
|
||||||
# and move on. Ideally we'll ignore any instance that has broken health monitoring but due to the fact that
|
|
||||||
# it's an error that often occurs with all the instances at the same time, we have to just skip the check.
|
|
||||||
filtered_instance_list << data[0].as_s
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# If for some reason no instances managed to get fetched successfully then we'll just redirect to redirect.invidious.io
|
|
||||||
if filtered_instance_list.size == 0
|
|
||||||
return "redirect.invidious.io"
|
|
||||||
end
|
|
||||||
|
|
||||||
return filtered_instance_list.sample(1)[0]
|
|
||||||
end
|
|
||||||
|
|
||||||
def reduce_uri(uri : URI | String, max_length : Int32 = 50, suffix : String = "…") : String
|
def reduce_uri(uri : URI | String, max_length : Int32 = 50, suffix : String = "…") : String
|
||||||
str = uri.to_s.sub(/^https?:\/\//, "")
|
str = uri.to_s.sub(/^https?:\/\//, "")
|
||||||
if str.size > max_length
|
if str.size > max_length
|
||||||
|
97
src/invidious/jobs/instance_refresh_job.cr
Normal file
97
src/invidious/jobs/instance_refresh_job.cr
Normal file
@ -0,0 +1,97 @@
|
|||||||
|
class Invidious::Jobs::InstanceListRefreshJob < Invidious::Jobs::BaseJob
|
||||||
|
# We update the internals of a constant as so it can be accessed from anywhere
|
||||||
|
# within the codebase
|
||||||
|
#
|
||||||
|
# "INSTANCES" => Array(Tuple(String, String)) # region, instance
|
||||||
|
|
||||||
|
INSTANCES = {"INSTANCES" => [] of Tuple(String, String)}
|
||||||
|
|
||||||
|
def initialize
|
||||||
|
end
|
||||||
|
|
||||||
|
def begin
|
||||||
|
loop do
|
||||||
|
refresh_instances
|
||||||
|
LOGGER.info("InstanceListRefreshJob: Done, sleeping for 30 minutes")
|
||||||
|
sleep 30.minute
|
||||||
|
Fiber.yield
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Refreshes the list of instances used for redirects.
|
||||||
|
#
|
||||||
|
# Does the following three checks for each instance
|
||||||
|
# - Is it a clear-net instance?
|
||||||
|
# - Is it an instance with a good uptime?
|
||||||
|
# - Is it an updated instance?
|
||||||
|
private def refresh_instances
|
||||||
|
raw_instance_list = self.fetch_instances
|
||||||
|
filtered_instance_list = [] of Tuple(String, String)
|
||||||
|
|
||||||
|
raw_instance_list.each do |instance_data|
|
||||||
|
# TODO allow Tor hidden service instances when the current instance
|
||||||
|
# is also a hidden service. Same for i2p and any other non-clearnet instances.
|
||||||
|
begin
|
||||||
|
domain = instance_data[0]
|
||||||
|
info = instance_data[1]
|
||||||
|
stats = info["stats"]
|
||||||
|
|
||||||
|
next unless info["type"] == "https"
|
||||||
|
next if bad_uptime?(info["monitor"])
|
||||||
|
next if outdated?(stats["software"]["version"])
|
||||||
|
|
||||||
|
filtered_instance_list << {info["region"].as_s, domain.as_s}
|
||||||
|
rescue ex
|
||||||
|
if domain
|
||||||
|
LOGGER.info("InstanceListRefreshJob: failed to parse information from '#{domain}' because \"#{ex}\"\n\"#{ex.backtrace.join('\n')}\" ")
|
||||||
|
else
|
||||||
|
LOGGER.info("InstanceListRefreshJob: failed to parse information from an instance because \"#{ex}\"\n\"#{ex.backtrace.join('\n')}\" ")
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
if !filtered_instance_list.empty?
|
||||||
|
INSTANCES["INSTANCES"] = filtered_instance_list
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Fetches information regarding instances from api.invidious.io or an otherwise configured URL
|
||||||
|
private def fetch_instances : Array(JSON::Any)
|
||||||
|
begin
|
||||||
|
# We directly call the stdlib HTTP::Client here as it allows us to negate the effects
|
||||||
|
# of the force_resolve config option. This is needed as api.invidious.io does not support ipv6
|
||||||
|
# and as such the following request raises if we were to use force_resolve with the ipv6 value.
|
||||||
|
instance_api_client = HTTP::Client.new(URI.parse("https://api.invidious.io"))
|
||||||
|
|
||||||
|
# Timeouts
|
||||||
|
instance_api_client.connect_timeout = 10.seconds
|
||||||
|
instance_api_client.dns_timeout = 10.seconds
|
||||||
|
|
||||||
|
raw_instance_list = JSON.parse(instance_api_client.get("/instances.json").body).as_a
|
||||||
|
instance_api_client.close
|
||||||
|
rescue ex : Socket::ConnectError | IO::TimeoutError | JSON::ParseException
|
||||||
|
raw_instance_list = [] of JSON::Any
|
||||||
|
end
|
||||||
|
|
||||||
|
return raw_instance_list
|
||||||
|
end
|
||||||
|
|
||||||
|
# Checks if the given target instance is outdated
|
||||||
|
private def outdated?(target_instance_version) : Bool
|
||||||
|
remote_commit_date = target_instance_version.as_s.match(/\d{4}\.\d{2}\.\d{2}/)
|
||||||
|
return false if !remote_commit_date
|
||||||
|
|
||||||
|
remote_commit_date = Time.parse(remote_commit_date[0], "%Y.%m.%d", Time::Location::UTC)
|
||||||
|
local_commit_date = Time.parse(CURRENT_VERSION, "%Y.%m.%d", Time::Location::UTC)
|
||||||
|
|
||||||
|
return (remote_commit_date - local_commit_date).abs.days > 30
|
||||||
|
end
|
||||||
|
|
||||||
|
# Checks if the uptime of the target instance is greater than 90% over a 30 day period
|
||||||
|
private def bad_uptime?(target_instance_health_monitor) : Bool
|
||||||
|
return true if !target_instance_health_monitor["down"].as_bool == false
|
||||||
|
return true if target_instance_health_monitor["uptime"].as_f < 90
|
||||||
|
|
||||||
|
return false
|
||||||
|
end
|
||||||
|
end
|
@ -40,7 +40,16 @@ module Invidious::Routes::Misc
|
|||||||
|
|
||||||
def self.cross_instance_redirect(env)
|
def self.cross_instance_redirect(env)
|
||||||
referer = get_referer(env)
|
referer = get_referer(env)
|
||||||
instance_url = fetch_random_instance
|
|
||||||
|
instance_list = Invidious::Jobs::InstanceListRefreshJob::INSTANCES["INSTANCES"]
|
||||||
|
if instance_list.empty?
|
||||||
|
instance_url = "redirect.invidious.io"
|
||||||
|
else
|
||||||
|
# Sample returns an array
|
||||||
|
# Instances are packaged as {region, domain} in the instance list
|
||||||
|
instance_url = instance_list.sample(1)[0][1]
|
||||||
|
end
|
||||||
|
|
||||||
env.redirect "https://#{instance_url}#{referer}"
|
env.redirect "https://#{instance_url}#{referer}"
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
Loading…
Reference in New Issue
Block a user