Proxy: Use connection pools for images (#4326)

Theoretically this should improve memory usage and performance by quite a bit
as we aren't creating a new HTTP::Client and in a turn a new connection for
every image we request from YouTube.

Closes issue 4009
This commit is contained in:
Samantaz Fox 2024-10-30 13:55:28 +01:00
commit 9957da28dc
No known key found for this signature in database
GPG Key ID: F42821059186176E
3 changed files with 64 additions and 97 deletions

View File

@ -93,6 +93,10 @@ SOFTWARE = {
YT_POOL = YoutubeConnectionPool.new(YT_URL, capacity: CONFIG.pool_size)
# Image request pool
GGPHT_POOL = YoutubeConnectionPool.new(URI.parse("https://yt3.ggpht.com"), capacity: CONFIG.pool_size)
# CLI
Kemal.config.extra_options do |parser|
parser.banner = "Usage: invidious [arguments]"

View File

@ -11,29 +11,9 @@ module Invidious::Routes::Images
end
end
# We're encapsulating this into a proc in order to easily reuse this
# portion of the code for each request block below.
request_proc = ->(response : HTTP::Client::Response) {
env.response.status_code = response.status_code
response.headers.each do |key, value|
if !RESPONSE_HEADERS_BLACKLIST.includes?(key.downcase)
env.response.headers[key] = value
end
end
env.response.headers["Access-Control-Allow-Origin"] = "*"
if response.status_code >= 300
env.response.headers.delete("Transfer-Encoding")
return
end
proxy_file(response, env)
}
begin
HTTP::Client.get("https://yt3.ggpht.com#{url}") do |resp|
return request_proc.call(resp)
GGPHT_POOL.client &.get(url, headers) do |resp|
return self.proxy_image(env, resp)
end
rescue ex
end
@ -61,27 +41,10 @@ module Invidious::Routes::Images
end
end
request_proc = ->(response : HTTP::Client::Response) {
env.response.status_code = response.status_code
response.headers.each do |key, value|
if !RESPONSE_HEADERS_BLACKLIST.includes?(key.downcase)
env.response.headers[key] = value
end
end
env.response.headers["Connection"] = "close"
env.response.headers["Access-Control-Allow-Origin"] = "*"
if response.status_code >= 300
return env.response.headers.delete("Transfer-Encoding")
end
proxy_file(response, env)
}
begin
HTTP::Client.get("https://#{authority}.ytimg.com#{url}") do |resp|
return request_proc.call(resp)
get_ytimg_pool(authority).client &.get(url, headers) do |resp|
env.response.headers["Connection"] = "close"
return self.proxy_image(env, resp)
end
rescue ex
end
@ -101,26 +64,9 @@ module Invidious::Routes::Images
end
end
request_proc = ->(response : HTTP::Client::Response) {
env.response.status_code = response.status_code
response.headers.each do |key, value|
if !RESPONSE_HEADERS_BLACKLIST.includes?(key.downcase)
env.response.headers[key] = value
end
end
env.response.headers["Access-Control-Allow-Origin"] = "*"
if response.status_code >= 300 && response.status_code != 404
return env.response.headers.delete("Transfer-Encoding")
end
proxy_file(response, env)
}
begin
HTTP::Client.get("https://i9.ytimg.com#{url}") do |resp|
return request_proc.call(resp)
get_ytimg_pool("i9").client &.get(url, headers) do |resp|
return self.proxy_image(env, resp)
end
rescue ex
end
@ -165,8 +111,7 @@ module Invidious::Routes::Images
if name == "maxres.jpg"
build_thumbnails(id).each do |thumb|
thumbnail_resource_path = "/vi/#{id}/#{thumb[:url]}.jpg"
# This can likely be optimized into a (small) pool sometime in the future.
if HTTP::Client.head("https://i.ytimg.com#{thumbnail_resource_path}").status_code == 200
if get_ytimg_pool("i9").client &.head(thumbnail_resource_path, headers).status_code == 200
name = thumb[:url] + ".jpg"
break
end
@ -181,29 +126,28 @@ module Invidious::Routes::Images
end
end
request_proc = ->(response : HTTP::Client::Response) {
env.response.status_code = response.status_code
response.headers.each do |key, value|
if !RESPONSE_HEADERS_BLACKLIST.includes?(key.downcase)
env.response.headers[key] = value
end
end
env.response.headers["Access-Control-Allow-Origin"] = "*"
if response.status_code >= 300 && response.status_code != 404
return env.response.headers.delete("Transfer-Encoding")
end
proxy_file(response, env)
}
begin
# This can likely be optimized into a (small) pool sometime in the future.
HTTP::Client.get("https://i.ytimg.com#{url}") do |resp|
return request_proc.call(resp)
get_ytimg_pool("i").client &.get(url, headers) do |resp|
return self.proxy_image(env, resp)
end
rescue ex
end
end
private def self.proxy_image(env, response)
env.response.status_code = response.status_code
response.headers.each do |key, value|
if !RESPONSE_HEADERS_BLACKLIST.includes?(key.downcase)
env.response.headers[key] = value
end
end
env.response.headers["Access-Control-Allow-Origin"] = "*"
if response.status_code >= 300
return env.response.headers.delete("Transfer-Encoding")
end
return proxy_file(response, env)
end
end

View File

@ -1,17 +1,6 @@
def add_yt_headers(request)
request.headers.delete("User-Agent") if request.headers["User-Agent"] == "Crystal"
request.headers["User-Agent"] ||= "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36"
request.headers["Accept-Charset"] ||= "ISO-8859-1,utf-8;q=0.7,*;q=0.7"
request.headers["Accept"] ||= "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
request.headers["Accept-Language"] ||= "en-us,en;q=0.5"
# Preserve original cookies and add new YT consent cookie for EU servers
request.headers["Cookie"] = "#{request.headers["cookie"]?}; CONSENT=PENDING+#{Random.rand(100..999)}"
if !CONFIG.cookies.empty?
request.headers["Cookie"] = "#{(CONFIG.cookies.map { |c| "#{c.name}=#{c.value}" }).join("; ")}; #{request.headers["cookie"]?}"
end
end
# Mapping of subdomain => YoutubeConnectionPool
# This is needed as we may need to access arbitrary subdomains of ytimg
private YTIMG_POOLS = {} of String => YoutubeConnectionPool
struct YoutubeConnectionPool
property! url : URI
@ -58,6 +47,21 @@ struct YoutubeConnectionPool
end
end
def add_yt_headers(request)
request.headers.delete("User-Agent") if request.headers["User-Agent"] == "Crystal"
request.headers["User-Agent"] ||= "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36"
request.headers["Accept-Charset"] ||= "ISO-8859-1,utf-8;q=0.7,*;q=0.7"
request.headers["Accept"] ||= "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
request.headers["Accept-Language"] ||= "en-us,en;q=0.5"
# Preserve original cookies and add new YT consent cookie for EU servers
request.headers["Cookie"] = "#{request.headers["cookie"]?}; CONSENT=PENDING+#{Random.rand(100..999)}"
if !CONFIG.cookies.empty?
request.headers["Cookie"] = "#{(CONFIG.cookies.map { |c| "#{c.name}=#{c.value}" }).join("; ")}; #{request.headers["cookie"]?}"
end
end
def make_client(url : URI, region = nil, force_resolve : Bool = false)
client = HTTP::Client.new(url)
@ -94,3 +98,18 @@ def make_configured_http_proxy_client
password: config_proxy.password,
)
end
# Fetches a HTTP pool for the specified subdomain of ytimg.com
#
# Creates a new one when the specified pool for the subdomain does not exist
def get_ytimg_pool(subdomain)
if pool = YTIMG_POOLS[subdomain]?
return pool
else
LOGGER.info("ytimg_pool: Creating a new HTTP pool for \"https://#{subdomain}.ytimg.com\"")
pool = YoutubeConnectionPool.new(URI.parse("https://#{subdomain}.ytimg.com"), capacity: CONFIG.pool_size)
YTIMG_POOLS[subdomain] = pool
return pool
end
end