Added some peaces
This commit is contained in:
parent
e046e1fbd5
commit
7ad18b34e2
15
Action.py
Normal file
15
Action.py
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
#!/usr/bin/python3
|
||||||
|
|
||||||
|
"""Action(s) that should be performed, when valid numbers found"""
|
||||||
|
|
||||||
|
try:
|
||||||
|
from pprint import pprint
|
||||||
|
except:
|
||||||
|
pprint = None
|
||||||
|
|
||||||
|
|
||||||
|
def Run(result):
|
||||||
|
if pprint:
|
||||||
|
pprint(result)
|
||||||
|
else:
|
||||||
|
print(f"*doing something with {result}*")
|
27
README.md
27
README.md
@ -1,2 +1,27 @@
|
|||||||
# vnp
|
# Virtual Numbers Parser
|
||||||
|
|
||||||
|
Script that checks several web-sites with virtual numbers for unused ones. It is recommended to use this programm with some proxy, through proxychains for example.
|
||||||
|
|
||||||
|
ATTENTION! THIS IS MOSTLY INCOMPLETE!
|
||||||
|
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
```bash
|
||||||
|
[torsocks/proxychains4] python3 vnp.py
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Currently supported web-sites
|
||||||
|
|
||||||
|
- freesmscenter.com
|
||||||
|
- receivesms.co
|
||||||
|
|
||||||
|
|
||||||
|
## To-implement list
|
||||||
|
|
||||||
|
- anonymsms.com (CF block if no JS and Tor)
|
||||||
|
- getfreesmsnumber.com (sometimes guardian block if Tor)
|
||||||
|
- receive-smss.com (CF block if via Tor)
|
||||||
|
- smsreceivefree.com (CF block if via Tor)
|
||||||
|
- temporary-phone-number.com (CF block if via Tor)
|
132
WebSiteParser.py
Normal file
132
WebSiteParser.py
Normal file
@ -0,0 +1,132 @@
|
|||||||
|
import re
|
||||||
|
import requests
|
||||||
|
|
||||||
|
# Dirty hack to mute warnings about unverified certificates
|
||||||
|
def __nothing(a, b): pass
|
||||||
|
requests.warnings.warn = __nothing
|
||||||
|
|
||||||
|
|
||||||
|
class ParseIsNotDoneException(Exception):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__("you should perform parsing first")
|
||||||
|
|
||||||
|
|
||||||
|
class FailedToParseException(Exception):
|
||||||
|
def __init__(self, url, errtext):
|
||||||
|
super().__init__(f"cant parse web site \"{url}\"; error message: {errtext}")
|
||||||
|
|
||||||
|
|
||||||
|
class CantRequestPageException(Exception):
|
||||||
|
def __init__(self, url, errtext=None):
|
||||||
|
if errtext:
|
||||||
|
super().__init__(f"cant request page \"{url}\"; reason: {errtext}")
|
||||||
|
else:
|
||||||
|
super().__init__(f"cant request page \"{url}\"")
|
||||||
|
|
||||||
|
|
||||||
|
class WebSiteParser:
|
||||||
|
def __init__(self, url):
|
||||||
|
self.EMPTY = str(None)
|
||||||
|
|
||||||
|
self.CFBlockText = "Enable JavaScript and cookies to continue".lower()
|
||||||
|
|
||||||
|
self.DefaultHeaders = {
|
||||||
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; rv:102.0) Gecko/20100101 Firefox/102.0",
|
||||||
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
||||||
|
"Accept-Language": "en-US,en;q=0.5",
|
||||||
|
"Accept-Encoding": "gzip, deflate, br",
|
||||||
|
"Connection": "keep-alive",
|
||||||
|
"Upgrade-Insecure-Requests": "1",
|
||||||
|
"Sec-Fetch-Dest": "document",
|
||||||
|
"Sec-Fetch-Mode": "navigate",
|
||||||
|
#"Sec-Fetch-Site": "none",
|
||||||
|
"Sec-Fetch-User": "?1"
|
||||||
|
}
|
||||||
|
# Single result number format:
|
||||||
|
# {
|
||||||
|
# "number": "88005553535",
|
||||||
|
# "country": "USSR",
|
||||||
|
# "last_use": "11 milliseconds ago",
|
||||||
|
# "born": "9 eras ago",
|
||||||
|
# "url": "https://example.com/number/..."
|
||||||
|
# }
|
||||||
|
# Single number internal format:
|
||||||
|
# {
|
||||||
|
# "number": "...",
|
||||||
|
# "country": "...",
|
||||||
|
# "times_used": 100500,
|
||||||
|
# "last_use_raw": "log(12451 ^ 76346 mod 54) * 420000 / 146 seconds ago",
|
||||||
|
# "born_raw": "2 << 2 | 1 eRaS aGo ayo dude"
|
||||||
|
# "uri_raw": "/numbers/..."
|
||||||
|
# }
|
||||||
|
self.Output = {
|
||||||
|
"heavily_used": [],
|
||||||
|
"almost_not_used": [],
|
||||||
|
"not_used": []
|
||||||
|
}
|
||||||
|
self.ParseDone = False
|
||||||
|
self.AlmostNotUsedThreshold = 5 # If number is used less or equal {} times, than it counted as almost not used
|
||||||
|
self.ErrorsWhileParsing = 0
|
||||||
|
|
||||||
|
self.WebSiteURL = url
|
||||||
|
self.WebSiteFullURL = f"https://{url}/"
|
||||||
|
|
||||||
|
|
||||||
|
def Cut(self, text, l=64):
|
||||||
|
"""Cut trailing symbols"""
|
||||||
|
if type(text) != str:
|
||||||
|
text = str(text)
|
||||||
|
if len(text) > l:
|
||||||
|
text = text[:l-1] + "..."
|
||||||
|
return text
|
||||||
|
|
||||||
|
def Log(self, text, src=None):
|
||||||
|
"""Write text to stdout"""
|
||||||
|
if not src:
|
||||||
|
print(f"{self.WebSiteURL} parser: {text}")
|
||||||
|
else:
|
||||||
|
print(f"{self.WebSiteURL} parser at {src}: {text}")
|
||||||
|
|
||||||
|
|
||||||
|
def RequestPage(self, location=""):
|
||||||
|
"""Request page at given location"""
|
||||||
|
url = f"{self.WebSiteFullURL}{location}"
|
||||||
|
try:
|
||||||
|
resp = requests.get(
|
||||||
|
url,
|
||||||
|
headers=self.DefaultHeaders,
|
||||||
|
verify=False
|
||||||
|
)
|
||||||
|
except requests.exceptions.ConnectionError:
|
||||||
|
raise CantRequestPageException(url, "cant connect, retries limit exceeded")
|
||||||
|
if not (resp.status_code in (200, 302)):
|
||||||
|
if (resp.status_code == 403) and (self.CFBlockText in resp.text.lower()):
|
||||||
|
raise CantRequestPageException(url, "blocked by cloudflare")
|
||||||
|
elif (resp.status_code == 403) and (resp.headers.get("Server") == "cloudflare"):
|
||||||
|
raise CantRequestPageException(url, "seems like blocked by cloudflare")
|
||||||
|
raise CantRequestPageException(url, f"status code is {resp.status_code}")
|
||||||
|
else:
|
||||||
|
return resp
|
||||||
|
|
||||||
|
|
||||||
|
def ParseAllFromPage(self, exp, to_remove="", location=""):
|
||||||
|
"""Parse all things which fit regular expression "exp" and remove from them all things that fit RE "to_remove"
|
||||||
|
"""
|
||||||
|
markup = self.RequestPage(location).text
|
||||||
|
peaces_of_markup = re.findall(exp, markup)
|
||||||
|
result = []
|
||||||
|
for peace in peaces_of_markup:
|
||||||
|
if type(peace) != str:
|
||||||
|
e = self.Cut(str(peace))
|
||||||
|
self.Log(f"warning: unexpected result while parsing: {e}", f"ParseAllFromPage(self, \"{self.Cut(exp,32)}\", \"{self.Cut(to_remove,32)}\", \"{self.Cut(location,32)}\")")
|
||||||
|
continue
|
||||||
|
result.append(
|
||||||
|
re.sub(to_remove, "", peace) if to_remove else peace
|
||||||
|
)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def GetResult(self):
|
||||||
|
if not self.ParseDone:
|
||||||
|
raise ParseIsNotDoneException
|
||||||
|
return self.Output
|
15
sites/AnonymsmsComParser.py
Normal file
15
sites/AnonymsmsComParser.py
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
import requests
|
||||||
|
import WebSiteParser
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class AnonymsmsComParser(WebSiteParser.WebSiteParser):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__("anonymsms.com")
|
||||||
|
|
||||||
|
def Parse(self):
|
||||||
|
raise NotImplementedError("support for this service is not yet implemented")
|
||||||
|
|
||||||
|
resp = self.RequestPage()
|
||||||
|
|
||||||
|
self.ParseDone = True
|
216
sites/FreesmscenterCom.py
Normal file
216
sites/FreesmscenterCom.py
Normal file
@ -0,0 +1,216 @@
|
|||||||
|
"""Module for parsing freesmscenter.com
|
||||||
|
NOTICE: this can be rewritten in more clean manner in future
|
||||||
|
"""
|
||||||
|
|
||||||
|
import re
|
||||||
|
import WebSiteParser
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class FreesmscenterCom(WebSiteParser.WebSiteParser):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__("freesmscenter.com")
|
||||||
|
|
||||||
|
|
||||||
|
def Parse(self):
|
||||||
|
"""Perform parsing of entire web site.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Parsing main page with list of countries
|
||||||
|
country_uris = self.ParseURIsFromPage(
|
||||||
|
"<a href=\"/FreeSms/.+-Phone-Number\" class=\"btn btn-primary\">Show Phone Numbers</a>",
|
||||||
|
"<a href=\"/",
|
||||||
|
"\" class=\"btn btn-primary\">Show Phone Numbers</a>"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Parsing all single number from page with country's number list
|
||||||
|
numbers_raw = []
|
||||||
|
for country_uri in country_uris:
|
||||||
|
tmp = country_uri.replace("FreeSms", "SmsReceive")
|
||||||
|
number_uris = self.ParseURIsFromPage(
|
||||||
|
f"<a href=\"/SmsReceive/.+/[0-9]+\" class=\"btn btn-primary\">Receive SMS</a>",
|
||||||
|
"<a href=\"/",
|
||||||
|
"\" class=\"btn btn-primary\">Receive SMS</a>",
|
||||||
|
country_uri
|
||||||
|
)
|
||||||
|
if not number_uris:
|
||||||
|
number_uris = self.ParseURIsFromPage(
|
||||||
|
f"<a href=\"/SmsReceive/.+/[0-9]+\" class=\"btn btn-primary\">Receive SMS</a>",
|
||||||
|
"<a href=\"/",
|
||||||
|
"\" class=\"btn btn-primary\">Receive SMS</a>",
|
||||||
|
country_uri
|
||||||
|
)
|
||||||
|
for number_uri in number_uris:
|
||||||
|
numbers_raw.append({
|
||||||
|
"uri_raw": number_uri,
|
||||||
|
"country": country_uri.replace("FreeSms/", "").replace("-Phone-Number", "").replace("%20", " ")
|
||||||
|
})
|
||||||
|
|
||||||
|
# Parse number's page
|
||||||
|
for i in range(len(numbers_raw)):
|
||||||
|
numbers_raw[i].update(
|
||||||
|
self.ParseNumberPage(numbers_raw[i]["uri_raw"])
|
||||||
|
)
|
||||||
|
|
||||||
|
self.ProcessRawNumbers(numbers_raw)
|
||||||
|
self.ParseDone = True
|
||||||
|
self.Log("parsing done")
|
||||||
|
|
||||||
|
|
||||||
|
def ParseURIsFromPage(self, exp, r1, r2, uri=""):
|
||||||
|
"""Parse all URIs from selected page.
|
||||||
|
"""
|
||||||
|
|
||||||
|
markup = self.RequestPage(uri).text
|
||||||
|
peaces_of_markup = re.findall(exp, markup)
|
||||||
|
result_uris = []
|
||||||
|
for peace in peaces_of_markup:
|
||||||
|
result_uris.append(
|
||||||
|
peace.replace(r1, "").replace(r2, "")
|
||||||
|
)
|
||||||
|
return result_uris
|
||||||
|
|
||||||
|
|
||||||
|
def ParseNumberPage(self, uri):
|
||||||
|
"""Parse page with history of messages, related to single number.
|
||||||
|
|
||||||
|
NOTICE: this can be better,
|
||||||
|
but a lot of hardcoded sheat is required then,
|
||||||
|
just because original site has a lot of bugs.
|
||||||
|
However, i fixed most of major things.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def die(text):
|
||||||
|
self.Log(text, f"ParseNumberPage(self at {id(self)}, \"{uri}\")")
|
||||||
|
|
||||||
|
def p(l):
|
||||||
|
for i in range(len(l)):
|
||||||
|
if not l[i]:
|
||||||
|
l[i] = self.EMPTY
|
||||||
|
return l
|
||||||
|
|
||||||
|
result = {
|
||||||
|
"number": self.EMPTY,
|
||||||
|
"last_use_raw": self.EMPTY,
|
||||||
|
"born_raw": self.EMPTY,
|
||||||
|
"times_used": self.EMPTY
|
||||||
|
}
|
||||||
|
numExp = "<h1 id=\"numberget\" class=\"cw\">\+[0-9]+</h1>"
|
||||||
|
tableCellExp = "<td.*\">.*</td>"
|
||||||
|
cellFromExp = "<td.*datesize3.*>*</td>"
|
||||||
|
cellContExp = "<td.*datesize5.*>.*</td>"
|
||||||
|
cellTimeExp = "<td.*datesize4.*>.*</td>"
|
||||||
|
|
||||||
|
markup = self.RequestPage(uri).text
|
||||||
|
number = re.findall(numExp, markup)
|
||||||
|
if not number:
|
||||||
|
die("error: can't parse page, number is empty")
|
||||||
|
self.ErrorsWhileParsing += 1
|
||||||
|
return result
|
||||||
|
result["number"] = re.findall("\+[0-9]+", number[0])[0]
|
||||||
|
tableCellsRaw = re.findall(tableCellExp, markup)
|
||||||
|
if not len(tableCellsRaw):
|
||||||
|
die("error: can't parse page, tableCellsRaw is empty")
|
||||||
|
self.ErrorsWhileParsing += 1
|
||||||
|
return result
|
||||||
|
elif len(tableCellsRaw) % 3:
|
||||||
|
die("warning: length tableCellsRaw should be divisible by 3, incorrect results are expected")
|
||||||
|
|
||||||
|
tableLines = []
|
||||||
|
tmpLine = [None, None, None]
|
||||||
|
|
||||||
|
for i in range(0, len(tableCellsRaw)):
|
||||||
|
if re.search(cellFromExp, tableCellsRaw[i]) and (i < len(tableCellsRaw) - 3):
|
||||||
|
if re.search(cellContExp, tableCellsRaw[i+1]):
|
||||||
|
if re.search(cellTimeExp, tableCellsRaw[i+2]):
|
||||||
|
tableLines.append([
|
||||||
|
tableCellsRaw[i],
|
||||||
|
tableCellsRaw[i+1],
|
||||||
|
tableCellsRaw[i+2]
|
||||||
|
])
|
||||||
|
tmpLine = [None, None, None]
|
||||||
|
i += 2
|
||||||
|
continue
|
||||||
|
|
||||||
|
if re.search(cellFromExp, tableCellsRaw[i]) and not tmpLine[0]:
|
||||||
|
if not re.search("[a-zA-Z0-9]", tableCellsRaw[i]):
|
||||||
|
tmpLine[0] = self.EMPTY
|
||||||
|
else:
|
||||||
|
tmpLine[0] = tableCellsRaw[i]
|
||||||
|
elif tmpLine[0]:
|
||||||
|
tmpLine = p(tmpLine)
|
||||||
|
if re.search(cellContExp, tableCellsRaw[i]) and not tmpLine[1]:
|
||||||
|
# if not re.search("[a-zA-Z0-9]", tableCellsRaw[i]):
|
||||||
|
# tmpLine[1] = self.EMPTY
|
||||||
|
# else:
|
||||||
|
tmpLine[1] = tableCellsRaw[i]
|
||||||
|
elif tmpLine[1]:
|
||||||
|
tmpLine = p(tmpLine)
|
||||||
|
if re.search(cellTimeExp, tableCellsRaw[i]) and not tmpLine[2]:
|
||||||
|
if not re.search("[a-zA-Z0-9]", tableCellsRaw[i]):
|
||||||
|
tmpLine[2] = self.EMPTY
|
||||||
|
else:
|
||||||
|
tmpLine[2] = tableCellsRaw[i]
|
||||||
|
elif tmpLine[2]:
|
||||||
|
tmpLine = p(tmpLine)
|
||||||
|
|
||||||
|
if tmpLine[0] and tmpLine[1] and tmpLine[2]:
|
||||||
|
tableLines.append(tmpLine)
|
||||||
|
tmpLine = [None, None, None]
|
||||||
|
|
||||||
|
if not tableLines:
|
||||||
|
die("error: can't parse page, tableLines is empty")
|
||||||
|
self.ErrorsWhileParsing += 1
|
||||||
|
return result
|
||||||
|
|
||||||
|
for i in range(len(tableLines)):
|
||||||
|
if tableLines[i][2] != self.EMPTY:
|
||||||
|
result["last_use_raw"] = re.sub(
|
||||||
|
"(<td.*\">)|(</td>)",
|
||||||
|
"",
|
||||||
|
tableLines[i][2]
|
||||||
|
)
|
||||||
|
break
|
||||||
|
|
||||||
|
for i in range(len(tableLines)-1, -1, -1):
|
||||||
|
if tableLines[i][2] != self.EMPTY:
|
||||||
|
result["born_raw"] = re.sub(
|
||||||
|
"(<td.*\">)|(</td>)",
|
||||||
|
"",
|
||||||
|
tableLines[i][2]
|
||||||
|
)
|
||||||
|
break
|
||||||
|
result["times_used"] = len(tableLines)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def ProcessRawNumbers(self, raws):
|
||||||
|
"""Process list of raw parsed numbers and make them look cool ayo.
|
||||||
|
"""
|
||||||
|
|
||||||
|
for i in range(0, len(raws)):
|
||||||
|
if not re.search("\+[0-9]+", raws[i]["number"]):
|
||||||
|
self.Log(f"error: incorrect number: \"{raws[i]['number']}\"", f"ProcessRawNumbers(self at {id(self)}, raws at {id(raws)})")
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not re.search("[a-zA-Z0-9]+", raws[i]["last_use_raw"]):
|
||||||
|
self.Log(f"warning: malformed last_use_raw field: \"{raws[i]['last_use_raw']}\"", f"ProcessRawNumbers(self at {id(self)}, raws at {id(raws)})")
|
||||||
|
raws[i]["last_use_raw"] = self.EMPTY
|
||||||
|
|
||||||
|
raws[i]["last_use"] = raws[i]["last_use_raw"]
|
||||||
|
|
||||||
|
if not re.search("[a-zA-Z0-9]+", raws[i]["born_raw"]):
|
||||||
|
self.Log(f"warning: malformed born_raw field: \"{raws[i]['born_raw']}\"", f"ProcessRawNumbers(self at {id(self)}, raws at {id(raws)})")
|
||||||
|
raws[i]["born_raw"] = self.EMPTY
|
||||||
|
|
||||||
|
raws[i]["born"] = raws[i]["born_raw"]
|
||||||
|
|
||||||
|
raws[i]["url"] = self.WebSiteFullURL + raws[i]["uri_raw"]
|
||||||
|
|
||||||
|
if raws[i]["times_used"] == 0:
|
||||||
|
self.Output["not_used"].append(raws[i])
|
||||||
|
elif raws[i]["times_used"] < self.AlmostNotUsedThreshold:
|
||||||
|
self.Output["almost_not_used"].append(raws[i])
|
||||||
|
else:
|
||||||
|
self.Output["heavily_used"].append(raws[i])
|
160
sites/ReceivesmsCo.py
Normal file
160
sites/ReceivesmsCo.py
Normal file
@ -0,0 +1,160 @@
|
|||||||
|
import re
|
||||||
|
import WebSiteParser
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class ReceivesmsCo(WebSiteParser.WebSiteParser):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__("receivesms.co")
|
||||||
|
|
||||||
|
|
||||||
|
def Parse(self):
|
||||||
|
"""Perform parsing of entire web site.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Parsing main page with list of countries
|
||||||
|
country_uris = self.ParseAllFromPage(
|
||||||
|
"<td.*table_link.*>[\s]*<a.*href=.*phone-numbers/.*>.*</a>",
|
||||||
|
to_remove="(<td.*table_link.*>[\s]*<a.*href=(\"|'))|((\"|').*>.*</a>)"
|
||||||
|
)
|
||||||
|
|
||||||
|
numbers = []
|
||||||
|
for country_uri in country_uris:
|
||||||
|
country_number_uris = self.ParseAllFromPage(
|
||||||
|
"<td>[\s]*<a.*href=.*target=['\"]_self['\"]>.*</a>[\s]*</td>",
|
||||||
|
to_remove="(<td>[\s]*<a.*href=(\"|'))|((\"|').*target=(\"|')_self(\"|')>.*</a>[\s]*</td>)",
|
||||||
|
location=(country_uri if country_uri[0]!="/" else country_uri[1:])
|
||||||
|
)
|
||||||
|
for num_uri in country_number_uris:
|
||||||
|
numbers.append({
|
||||||
|
"uri_raw": (num_uri if num_uri[0]!="/" else num_uri[1:])
|
||||||
|
})
|
||||||
|
break # TODO: remove
|
||||||
|
|
||||||
|
for i in range(len(numbers)):
|
||||||
|
numbers[i].update(
|
||||||
|
self.ParseNumberPage(numbers[i]["uri_raw"])
|
||||||
|
)
|
||||||
|
|
||||||
|
self.ProcessRawNumbers(numbers)
|
||||||
|
self.ParseDone = True
|
||||||
|
self.Log("parsing done")
|
||||||
|
|
||||||
|
def ParseNumberPage(self, uri):
|
||||||
|
"""Parse page with history of messages, related to single number.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def die(text):
|
||||||
|
self.Log(text, f"ParseNumberPage(self at {id(self)}, \"{uri}\")")
|
||||||
|
|
||||||
|
result = {
|
||||||
|
"number": self.EMPTY,
|
||||||
|
"country": self.EMPTY,
|
||||||
|
"last_use_raw": self.EMPTY,
|
||||||
|
"born_raw": self.EMPTY,
|
||||||
|
"times_used": self.EMPTY
|
||||||
|
}
|
||||||
|
|
||||||
|
markup = self.RequestPage(uri).text
|
||||||
|
|
||||||
|
country = re.findall(
|
||||||
|
"<h3>[\s]*<a.*href=.*>[\s]*<i.*class=.*></i></a>.*[\s]-[\s].*Phone Number[\s]*</h3>",
|
||||||
|
markup
|
||||||
|
)
|
||||||
|
country = re.sub(
|
||||||
|
"(<h3>[\s]*<a.*href=.*>[\s]*<i.*class=.*></i></a>[\s]*)|([\s]-[\s].*Phone Number[\s]*</h3>)",
|
||||||
|
"",
|
||||||
|
str(country[0]) if country else ""
|
||||||
|
)
|
||||||
|
if not country:
|
||||||
|
die("error: page parsing failed, country is empty")
|
||||||
|
return result
|
||||||
|
result["country"] = country
|
||||||
|
|
||||||
|
number = re.findall(
|
||||||
|
"<div.*class=[\"'].*h3.*[\"'].*>+.*</div",
|
||||||
|
markup
|
||||||
|
)
|
||||||
|
number = re.sub(
|
||||||
|
"(<div.*class=[\"'].*h3.*[\"'].*>)|(</div)",
|
||||||
|
"",
|
||||||
|
str(number[0]) if number else ""
|
||||||
|
)
|
||||||
|
if not number:
|
||||||
|
die("error: page parsing failed, number is empty")
|
||||||
|
return result
|
||||||
|
result["number"] = number
|
||||||
|
|
||||||
|
expMsgHead = "<div .*class=[\"'].*message_head.*[\"'].*>[\s]*From[\s]*<a.*>.*</a>[\s]*.*[(].+ago[)][\s]*</div"
|
||||||
|
expMsgHead2 = "(<div .*class=[\"'].*message_head.*[\"'].*>[\s]*From[\s]*<a.*>.*</a>[\s]*.*[(])|([)][\s]*</div)"
|
||||||
|
|
||||||
|
msg_heads = re.findall(
|
||||||
|
expMsgHead,
|
||||||
|
markup
|
||||||
|
)
|
||||||
|
if msg_heads:
|
||||||
|
result["last_use_raw"] = re.sub(
|
||||||
|
expMsgHead2,
|
||||||
|
"",
|
||||||
|
msg_heads[0]
|
||||||
|
)
|
||||||
|
|
||||||
|
nav_links = re.findall(
|
||||||
|
"<li class=[\"']page-item[a-z- ]*[\"']>[\s]*<a class=[\"'][a-zA-Z0-9 -]*[\"'] href=[\"'][a-zA-Z0-9/#-]*[\"']",
|
||||||
|
markup
|
||||||
|
)
|
||||||
|
pages_amount = len(nav_links)
|
||||||
|
msgs_at_last_page = 0
|
||||||
|
if nav_links:
|
||||||
|
if len(nav_links) > 1:
|
||||||
|
pages_amount -= 2
|
||||||
|
|
||||||
|
last_page_uri = nav_links[-1]
|
||||||
|
last_page_uri = re.sub(
|
||||||
|
"(^<li class=[\"']page-item[a-z- ]*[\"']>[\s]*<a class=[\"'][a-zA-Z0-9 -]*[\"'] href=[\"'])|([\"']$)",
|
||||||
|
"",
|
||||||
|
last_page_uri
|
||||||
|
)
|
||||||
|
msg_heads_last = self.ParseAllFromPage(
|
||||||
|
expMsgHead,
|
||||||
|
expMsgHead2,
|
||||||
|
last_page_uri
|
||||||
|
)
|
||||||
|
if msg_heads_last:
|
||||||
|
result["born_raw"] = msg_heads_last[-1]
|
||||||
|
msgs_at_last_page = len(msg_heads_last)
|
||||||
|
|
||||||
|
result["times_used"] = (pages_amount - 1) * len(msg_heads) + msgs_at_last_page
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def ProcessRawNumbers(self, raws):
|
||||||
|
"""Process list of raw parsed numbers and make them look as designed.
|
||||||
|
"""
|
||||||
|
|
||||||
|
for i in range(0, len(raws)):
|
||||||
|
if not re.search("\+[0-9]+", raws[i]["number"]):
|
||||||
|
self.Log(f"error: incorrect number: \"{raws[i]['number']}\"", f"ProcessRawNumbers(self at {id(self)}, raws at {id(raws)})")
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not re.search("[a-zA-Z0-9]+", raws[i]["last_use_raw"]):
|
||||||
|
self.Log(f"warning: malformed last_use_raw field: \"{raws[i]['last_use_raw']}\"", f"ProcessRawNumbers(self at {id(self)}, raws at {id(raws)})")
|
||||||
|
raws[i]["last_use_raw"] = self.EMPTY
|
||||||
|
|
||||||
|
raws[i]["last_use"] = raws[i]["last_use_raw"]
|
||||||
|
|
||||||
|
if not re.search("[a-zA-Z0-9]+", raws[i]["born_raw"]):
|
||||||
|
self.Log(f"warning: malformed born_raw field: \"{raws[i]['born_raw']}\"", f"ProcessRawNumbers(self at {id(self)}, raws at {id(raws)})")
|
||||||
|
raws[i]["born_raw"] = self.EMPTY
|
||||||
|
|
||||||
|
raws[i]["born"] = raws[i]["born_raw"]
|
||||||
|
|
||||||
|
raws[i]["url"] = self.WebSiteFullURL + raws[i]["uri_raw"]
|
||||||
|
|
||||||
|
if raws[i]["times_used"] == 0:
|
||||||
|
self.Output["not_used"].append(raws[i])
|
||||||
|
elif raws[i]["times_used"] < self.AlmostNotUsedThreshold:
|
||||||
|
self.Output["almost_not_used"].append(raws[i])
|
||||||
|
else:
|
||||||
|
self.Output["heavily_used"].append(raws[i])
|
15
sites/SmsreceivefreeCom.py
Normal file
15
sites/SmsreceivefreeCom.py
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
import re
|
||||||
|
import WebSiteParser
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class SmsreceivefreeCom(WebSiteParser.WebSiteParser):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__("smsreceivefree.com")
|
||||||
|
|
||||||
|
|
||||||
|
def Parse(self):
|
||||||
|
"""Perform parsing of entire web site.
|
||||||
|
"""
|
||||||
|
|
||||||
|
raise NotImplementedError("support for this service is not yet implemented")
|
0
sites/__init__.py
Normal file
0
sites/__init__.py
Normal file
39
vnp.py
Normal file
39
vnp.py
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
import os
|
||||||
|
import threading
|
||||||
|
import requests
|
||||||
|
|
||||||
|
import Action
|
||||||
|
#from sites.AnonymsmsComParser import AnonymsmsComParser
|
||||||
|
from sites.FreesmscenterCom import FreesmscenterCom
|
||||||
|
from sites.ReceivesmsCo import ReceivesmsCo
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def ProccessSite(siteClass):
|
||||||
|
site = siteClass()
|
||||||
|
print(f"Parsing site {site.WebSiteURL}")
|
||||||
|
site.Parse()
|
||||||
|
Action.Run(site.GetResult())
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
#requests.adapters.DEFAULT_RETRIES = 128 # Is it working?
|
||||||
|
|
||||||
|
siteClasses = [
|
||||||
|
FreesmscenterCom
|
||||||
|
ReceivesmsCo
|
||||||
|
]
|
||||||
|
|
||||||
|
running_threads = []
|
||||||
|
for siteClass in siteClasses[:-1]:
|
||||||
|
running_threads.append(
|
||||||
|
threading.Thread(
|
||||||
|
target=ProccessSite,
|
||||||
|
args=(siteClass,)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
running_threads[-1].start()
|
||||||
|
ProccessSite(siteClasses[-1])
|
||||||
|
for th in running_threads:
|
||||||
|
th.join()
|
Loading…
Reference in New Issue
Block a user