From 3e3de1890abf98dc5146eb0d3af17776e273f2e8 Mon Sep 17 00:00:00 2001 From: Omar Roth Date: Tue, 25 Sep 2018 17:56:59 -0500 Subject: [PATCH] Overhaul geo-bypass --- src/invidious.cr | 19 ++++++++++++----- src/invidious/helpers/proxy.cr | 8 +++++++ src/invidious/jobs.cr | 38 ++++++++++++++++++++++++++++++++++ src/invidious/videos.cr | 24 +++++++++------------ 4 files changed, 70 insertions(+), 19 deletions(-) diff --git a/src/invidious.cr b/src/invidious.cr index 7871ea3e..abc61137 100644 --- a/src/invidious.cr +++ b/src/invidious.cr @@ -105,6 +105,15 @@ spawn do end end +proxies = {} of String => Array({ip: String, port: Int32}) +spawn do + find_working_proxies(BYPASS_REGIONS) do |region, list| + if !list.empty? + proxies[region] = list + end + end +end + before_all do |env| env.response.headers["X-XSS-Protection"] = "1; mode=block;" env.response.headers["X-Content-Type-Options"] = "nosniff" @@ -225,7 +234,7 @@ get "/watch" do |env| end begin - video = get_video(id, PG_DB) + video = get_video(id, PG_DB, proxies) rescue ex error_message = ex.message STDOUT << id << " : " << ex.message << "\n" @@ -325,7 +334,7 @@ get "/embed/:id" do |env| params = process_video_params(env.params.query, nil) begin - video = get_video(id, PG_DB) + video = get_video(id, PG_DB, proxies) rescue ex error_message = ex.message next templated "error" @@ -1722,7 +1731,7 @@ get "/api/v1/captions/:id" do |env| client = make_client(YT_URL) begin - video = get_video(id, PG_DB) + video = get_video(id, PG_DB, proxies) rescue ex halt env, status_code: 403 end @@ -2116,7 +2125,7 @@ get "/api/v1/videos/:id" do |env| id = env.params.url["id"] begin - video = get_video(id, PG_DB) + video = get_video(id, PG_DB, proxies) rescue ex env.response.content_type = "application/json" error_message = {"error" => ex.message}.to_json @@ -2906,7 +2915,7 @@ get "/api/manifest/dash/id/:id" do |env| client = make_client(YT_URL) begin - video = get_video(id, PG_DB) + video = get_video(id, PG_DB, proxies) rescue ex halt env, status_code: 403 end diff --git a/src/invidious/helpers/proxy.cr b/src/invidious/helpers/proxy.cr index e978f6f7..94118248 100644 --- a/src/invidious/helpers/proxy.cr +++ b/src/invidious/helpers/proxy.cr @@ -108,7 +108,15 @@ def get_proxies(country_code = "US") "xf4" => "0", "xf5" => "1", } + response = client.post("/free-proxy-list/#{country_code}/", headers, form: body) + 20.times do + if response.status_code == 200 + break + end + response = client.post("/free-proxy-list/#{country_code}/", headers, form: body) + end + response = XML.parse_html(response.body) mapping = response.xpath_node(%q(.//body/script)).not_nil!.content diff --git a/src/invidious/jobs.cr b/src/invidious/jobs.cr index 7424ef62..b9feb1b8 100644 --- a/src/invidious/jobs.cr +++ b/src/invidious/jobs.cr @@ -154,3 +154,41 @@ def update_decrypt_function Fiber.yield end end + +def find_working_proxies(regions) + proxy_channel = Channel({String, Array({ip: String, port: Int32})}).new + + regions.each do |region| + spawn do + loop do + begin + proxies = get_proxies(region).first(20) + rescue ex + next proxy_channel.send({region, Array({ip: String, port: Int32}).new}) + end + + proxies.select! do |proxy| + begin + client = HTTPClient.new(YT_URL) + client.read_timeout = 10.seconds + client.connect_timeout = 10.seconds + + proxy = HTTPProxy.new(proxy_host: proxy[:ip], proxy_port: proxy[:port]) + client.set_proxy(proxy) + + client.get("/").status_code == 200 + rescue ex + false + end + end + proxies = proxies.map { |proxy| {ip: proxy[:ip], port: proxy[:port]} } + + proxy_channel.send({region, proxies}) + end + end + end + + loop do + yield proxy_channel.receive + end +end diff --git a/src/invidious/videos.cr b/src/invidious/videos.cr index e879ce2c..aebe4164 100644 --- a/src/invidious/videos.cr +++ b/src/invidious/videos.cr @@ -129,7 +129,6 @@ BYPASS_REGIONS = { "ID", "BD", "MX", - "ET", "PH", "EG", "VN", @@ -466,14 +465,14 @@ class CaptionName ) end -def get_video(id, db, refresh = true) +def get_video(id, db, refresh = true, proxies = {} of String => Array({ip: String, port: Int32, score: Float64})) if db.query_one?("SELECT EXISTS (SELECT true FROM videos WHERE id = $1)", id, as: Bool) video = db.query_one("SELECT * FROM videos WHERE id = $1", id, as: Video) # If record was last updated over 10 minutes ago, refresh (expire param in response lasts for 6 hours) if refresh && Time.now - video.updated > 10.minutes begin - video = fetch_video(id) + video = fetch_video(id, proxies) video_array = video.to_a args = arg_array(video_array[1..-1], 2) @@ -488,7 +487,7 @@ def get_video(id, db, refresh = true) end end else - video = fetch_video(id) + video = fetch_video(id, proxies) video_array = video.to_a args = arg_array(video_array) @@ -499,7 +498,7 @@ def get_video(id, db, refresh = true) return video end -def fetch_video(id) +def fetch_video(id, proxies = {} of String => Array({ip: String, port: Int32, score: Float64})) html_channel = Channel(XML::Node).new info_channel = Channel(HTTP::Params).new @@ -530,21 +529,18 @@ def fetch_video(id) if info["reason"]? && info["reason"].includes? "your country" bypass_channel = Channel({HTTP::Params | Nil, XML::Node | Nil}).new - BYPASS_REGIONS.each do |country_code| + proxies.each do |region, list| spawn do begin - proxies = get_proxies(country_code) - - # Try not to overload single proxy - proxy = proxies[0, 5].sample(1)[0] - proxy = HTTPProxy.new(proxy_host: proxy[:ip], proxy_port: proxy[:port]) - - client = HTTPClient.new(URI.parse("https://www.youtube.com")) + client = HTTPClient.new(YT_URL) client.read_timeout = 10.seconds client.connect_timeout = 10.seconds + + proxy = list.sample(1)[0] + proxy = HTTPProxy.new(proxy_host: proxy[:ip], proxy_port: proxy[:port]) client.set_proxy(proxy) - proxy_info = client.get("/get_video_info?video_id=#{id}&el=detailpage&ps=default&eurl=&gl=US&hl=en&disable_polymer=1") + proxy_info = client.get("/get_video_info?video_id=#{id}&ps=default&eurl=&gl=US&hl=en&disable_polymer=1") proxy_info = HTTP::Params.parse(proxy_info.body) if !proxy_info["reason"]?