mirror of
https://codeberg.org/aryak/mozhi
synced 2024-11-18 14:02:58 +05:30
add projectsegfault instances; instances2json script
This commit is contained in:
parent
05d39a5013
commit
10f76a7387
@ -70,6 +70,7 @@ These envvars turn off/on engines. By default all of them are enabled.
|
||||
| [mozhi.aryak.me](https://mozhi.aryak.me) | No | India | Airtel |
|
||||
| [translate.bus-hit.me](https://translate.bus-hit.me) | No | Canada | Oracle |
|
||||
| [nyc1.mz.ggtyler.dev](https://nyc1.mz.ggtyler.dev) | No | USA | Royale Hosting |
|
||||
| [translate.projectsegfau.lt](https://translate.projectsegfau.lt) | No | Germany / USA / India | Avoro / Racknerd / Airtel |
|
||||
|
||||
## Features
|
||||
- An all mode where the responses of all supported engines will be shown.
|
||||
|
20
instances.json
Normal file
20
instances.json
Normal file
@ -0,0 +1,20 @@
|
||||
[
|
||||
{
|
||||
"country": "India",
|
||||
"link": "https://mozhi.aryak.me",
|
||||
"cloudflare": false,
|
||||
"host": "Airtel"
|
||||
},
|
||||
{
|
||||
"country": "Canada",
|
||||
"link": "https://translate.bus-hit.me",
|
||||
"cloudflare": false,
|
||||
"host": "Oracle"
|
||||
},
|
||||
{
|
||||
"country": "USA",
|
||||
"link": "https://nyc1.mz.ggtyler.dev",
|
||||
"cloudflare": false,
|
||||
"host": "Royale Hosting"
|
||||
}
|
||||
]
|
66
instances2json.py
Normal file
66
instances2json.py
Normal file
@ -0,0 +1,66 @@
|
||||
#!/usr/bin/python3
|
||||
import requests
|
||||
import json
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
print("Getting HTML")
|
||||
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 MozhiInstanceFetcher/1.0 (+codeberg.org/aryak/mozhi)'
|
||||
}
|
||||
|
||||
# Get the HTML from the page
|
||||
r = requests.get('https://codeberg.org/aryak/mozhi', headers=headers)
|
||||
|
||||
# Parse the HTML
|
||||
soup = BeautifulSoup(r.text, 'html.parser')
|
||||
|
||||
print("Scraping started")
|
||||
|
||||
# Get tables
|
||||
tables = soup.find_all('table')
|
||||
|
||||
# Get table with header 'Master Branch'
|
||||
table = tables[1]
|
||||
|
||||
# Get all rows and columns. Skip the first row because it's the header
|
||||
rows = table.find_all('tr')[1:]
|
||||
|
||||
theJson = []
|
||||
|
||||
for row in rows:
|
||||
|
||||
link = row.find_all('td')[0].find('a')['href']
|
||||
cloudflare = row.find_all('td')[1].text
|
||||
country = row.find_all('td')[2].text
|
||||
host = row.find_all('td')[3].text
|
||||
|
||||
print("Scraping " + row.find_all('td')[0].find('a')['href'] + ' instance...')
|
||||
if cloudflare == 'Yes':
|
||||
isCloudflare = True
|
||||
else:
|
||||
isCloudflare = False
|
||||
|
||||
try:
|
||||
r = requests.get(link + '/', headers=headers)
|
||||
if r.status_code != 200:
|
||||
print("Error while fetching " + link + '/. We got a ' + str(r.status_code) + ' status code. Skipping...')
|
||||
continue
|
||||
except:
|
||||
print("Error while fetching " + link + '/. Skipping...')
|
||||
continue
|
||||
|
||||
theJson.append({
|
||||
'country': country,
|
||||
'link': link,
|
||||
'cloudflare': isCloudflare,
|
||||
'host': host,
|
||||
})
|
||||
|
||||
|
||||
print("Scraping finished. Saving JSON...")
|
||||
|
||||
# save JSON
|
||||
with open('instances.json', 'w') as outfile:
|
||||
json.dump(theJson, outfile, indent=4)
|
||||
print("File saved as instances.json")
|
Loading…
Reference in New Issue
Block a user