add projectsegfault instances; instances2json script

2024-03-16 22:29:52 +05:30
parent 05d39a5013
commit 10f76a7387
3 changed files with 87 additions and 0 deletions
@@ -70,6 +70,7 @@ These envvars turn off/on engines. By default all of them are enabled.
 | [mozhi.aryak.me](https://mozhi.aryak.me) | No | India | Airtel |
 | [translate.bus-hit.me](https://translate.bus-hit.me) | No | Canada | Oracle |
 | [nyc1.mz.ggtyler.dev](https://nyc1.mz.ggtyler.dev) | No | USA | Royale Hosting |
+| [translate.projectsegfau.lt](https://translate.projectsegfau.lt) | No | Germany / USA / India | Avoro / Racknerd / Airtel |

 ## Features
 - An all mode where the responses of all supported engines will be shown.
@@ -0,0 +1,20 @@
+[
+    {
+        "country": "India",
+        "link": "https://mozhi.aryak.me",
+        "cloudflare": false,
+        "host": "Airtel"
+    },
+    {
+        "country": "Canada",
+        "link": "https://translate.bus-hit.me",
+        "cloudflare": false,
+        "host": "Oracle"
+    },
+    {
+        "country": "USA",
+        "link": "https://nyc1.mz.ggtyler.dev",
+        "cloudflare": false,
+        "host": "Royale Hosting"
+    }
+]
@@ -0,0 +1,66 @@
+#!/usr/bin/python3
+import requests
+import json
+from bs4 import BeautifulSoup
+
+print("Getting HTML")
+
+headers = {
+    'User-Agent': 'Mozilla/5.0 MozhiInstanceFetcher/1.0 (+codeberg.org/aryak/mozhi)'
+}
+
+# Get the HTML from the page
+r = requests.get('https://codeberg.org/aryak/mozhi', headers=headers)
+
+# Parse the HTML
+soup = BeautifulSoup(r.text, 'html.parser')
+
+print("Scraping started")
+
+# Get tables
+tables = soup.find_all('table')
+
+# Get table with header 'Master Branch'
+table = tables[1]
+
+# Get all rows and columns. Skip the first row because it's the header
+rows = table.find_all('tr')[1:]
+
+theJson = []
+
+for row in rows:
+
+    link = row.find_all('td')[0].find('a')['href']
+    cloudflare = row.find_all('td')[1].text
+    country = row.find_all('td')[2].text
+    host = row.find_all('td')[3].text
+
+    print("Scraping " + row.find_all('td')[0].find('a')['href'] + ' instance...')
+    if cloudflare == 'Yes':
+        isCloudflare = True
+    else:
+        isCloudflare = False
+
+    try:
+        r = requests.get(link + '/', headers=headers)
+        if r.status_code != 200:
+            print("Error while fetching " + link + '/. We got a ' + str(r.status_code) + ' status code. Skipping...')
+            continue
+    except:
+        print("Error while fetching " + link + '/. Skipping...')
+        continue
+
+    theJson.append({
+        'country': country,
+        'link': link,
+        'cloudflare': isCloudflare,
+        'host': host,
+    })
+
+
+print("Scraping finished. Saving JSON...")
+
+# save JSON
+with open('instances.json', 'w') as outfile:
+    json.dump(theJson, outfile, indent=4)
+    print("File saved as instances.json")