From 7ad18b34e29ab6c4638c263d18455f6fb7f7ce14 Mon Sep 17 00:00:00 2001 From: xxx_stroboscope_420_xxx Date: Mon, 12 Jun 2023 02:43:51 +0300 Subject: [PATCH] Added some peaces --- Action.py | 15 +++ README.md | 27 ++++- WebSiteParser.py | 132 ++++++++++++++++++++++ sites/AnonymsmsComParser.py | 15 +++ sites/FreesmscenterCom.py | 216 ++++++++++++++++++++++++++++++++++++ sites/ReceivesmsCo.py | 160 ++++++++++++++++++++++++++ sites/SmsreceivefreeCom.py | 15 +++ sites/__init__.py | 0 vnp.py | 39 +++++++ 9 files changed, 618 insertions(+), 1 deletion(-) create mode 100644 Action.py create mode 100644 WebSiteParser.py create mode 100644 sites/AnonymsmsComParser.py create mode 100644 sites/FreesmscenterCom.py create mode 100644 sites/ReceivesmsCo.py create mode 100644 sites/SmsreceivefreeCom.py create mode 100644 sites/__init__.py create mode 100644 vnp.py diff --git a/Action.py b/Action.py new file mode 100644 index 0000000..bfed55e --- /dev/null +++ b/Action.py @@ -0,0 +1,15 @@ +#!/usr/bin/python3 + +"""Action(s) that should be performed, when valid numbers found""" + +try: + from pprint import pprint +except: + pprint = None + + +def Run(result): + if pprint: + pprint(result) + else: + print(f"*doing something with {result}*") \ No newline at end of file diff --git a/README.md b/README.md index 7fb6c5a..978c517 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,27 @@ -# vnp +# Virtual Numbers Parser +Script that checks several web-sites with virtual numbers for unused ones. It is recommended to use this programm with some proxy, through proxychains for example. + +ATTENTION! THIS IS MOSTLY INCOMPLETE! + + +## Usage + +```bash +[torsocks/proxychains4] python3 vnp.py +``` + + +## Currently supported web-sites + +- freesmscenter.com +- receivesms.co + + +## To-implement list + +- anonymsms.com (CF block if no JS and Tor) +- getfreesmsnumber.com (sometimes guardian block if Tor) +- receive-smss.com (CF block if via Tor) +- smsreceivefree.com (CF block if via Tor) +- temporary-phone-number.com (CF block if via Tor) \ No newline at end of file diff --git a/WebSiteParser.py b/WebSiteParser.py new file mode 100644 index 0000000..a9533f9 --- /dev/null +++ b/WebSiteParser.py @@ -0,0 +1,132 @@ +import re +import requests + +# Dirty hack to mute warnings about unverified certificates +def __nothing(a, b): pass +requests.warnings.warn = __nothing + + +class ParseIsNotDoneException(Exception): + def __init__(self): + super().__init__("you should perform parsing first") + + +class FailedToParseException(Exception): + def __init__(self, url, errtext): + super().__init__(f"cant parse web site \"{url}\"; error message: {errtext}") + + +class CantRequestPageException(Exception): + def __init__(self, url, errtext=None): + if errtext: + super().__init__(f"cant request page \"{url}\"; reason: {errtext}") + else: + super().__init__(f"cant request page \"{url}\"") + + +class WebSiteParser: + def __init__(self, url): + self.EMPTY = str(None) + + self.CFBlockText = "Enable JavaScript and cookies to continue".lower() + + self.DefaultHeaders = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; rv:102.0) Gecko/20100101 Firefox/102.0", + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", + "Accept-Language": "en-US,en;q=0.5", + "Accept-Encoding": "gzip, deflate, br", + "Connection": "keep-alive", + "Upgrade-Insecure-Requests": "1", + "Sec-Fetch-Dest": "document", + "Sec-Fetch-Mode": "navigate", + #"Sec-Fetch-Site": "none", + "Sec-Fetch-User": "?1" + } + # Single result number format: + # { + # "number": "88005553535", + # "country": "USSR", + # "last_use": "11 milliseconds ago", + # "born": "9 eras ago", + # "url": "https://example.com/number/..." + # } + # Single number internal format: + # { + # "number": "...", + # "country": "...", + # "times_used": 100500, + # "last_use_raw": "log(12451 ^ 76346 mod 54) * 420000 / 146 seconds ago", + # "born_raw": "2 << 2 | 1 eRaS aGo ayo dude" + # "uri_raw": "/numbers/..." + # } + self.Output = { + "heavily_used": [], + "almost_not_used": [], + "not_used": [] + } + self.ParseDone = False + self.AlmostNotUsedThreshold = 5 # If number is used less or equal {} times, than it counted as almost not used + self.ErrorsWhileParsing = 0 + + self.WebSiteURL = url + self.WebSiteFullURL = f"https://{url}/" + + + def Cut(self, text, l=64): + """Cut trailing symbols""" + if type(text) != str: + text = str(text) + if len(text) > l: + text = text[:l-1] + "..." + return text + + def Log(self, text, src=None): + """Write text to stdout""" + if not src: + print(f"{self.WebSiteURL} parser: {text}") + else: + print(f"{self.WebSiteURL} parser at {src}: {text}") + + + def RequestPage(self, location=""): + """Request page at given location""" + url = f"{self.WebSiteFullURL}{location}" + try: + resp = requests.get( + url, + headers=self.DefaultHeaders, + verify=False + ) + except requests.exceptions.ConnectionError: + raise CantRequestPageException(url, "cant connect, retries limit exceeded") + if not (resp.status_code in (200, 302)): + if (resp.status_code == 403) and (self.CFBlockText in resp.text.lower()): + raise CantRequestPageException(url, "blocked by cloudflare") + elif (resp.status_code == 403) and (resp.headers.get("Server") == "cloudflare"): + raise CantRequestPageException(url, "seems like blocked by cloudflare") + raise CantRequestPageException(url, f"status code is {resp.status_code}") + else: + return resp + + + def ParseAllFromPage(self, exp, to_remove="", location=""): + """Parse all things which fit regular expression "exp" and remove from them all things that fit RE "to_remove" + """ + markup = self.RequestPage(location).text + peaces_of_markup = re.findall(exp, markup) + result = [] + for peace in peaces_of_markup: + if type(peace) != str: + e = self.Cut(str(peace)) + self.Log(f"warning: unexpected result while parsing: {e}", f"ParseAllFromPage(self, \"{self.Cut(exp,32)}\", \"{self.Cut(to_remove,32)}\", \"{self.Cut(location,32)}\")") + continue + result.append( + re.sub(to_remove, "", peace) if to_remove else peace + ) + return result + + + def GetResult(self): + if not self.ParseDone: + raise ParseIsNotDoneException + return self.Output \ No newline at end of file diff --git a/sites/AnonymsmsComParser.py b/sites/AnonymsmsComParser.py new file mode 100644 index 0000000..ae96657 --- /dev/null +++ b/sites/AnonymsmsComParser.py @@ -0,0 +1,15 @@ +import requests +import WebSiteParser + + + +class AnonymsmsComParser(WebSiteParser.WebSiteParser): + def __init__(self): + super().__init__("anonymsms.com") + + def Parse(self): + raise NotImplementedError("support for this service is not yet implemented") + + resp = self.RequestPage() + + self.ParseDone = True \ No newline at end of file diff --git a/sites/FreesmscenterCom.py b/sites/FreesmscenterCom.py new file mode 100644 index 0000000..92f153c --- /dev/null +++ b/sites/FreesmscenterCom.py @@ -0,0 +1,216 @@ +"""Module for parsing freesmscenter.com +NOTICE: this can be rewritten in more clean manner in future +""" + +import re +import WebSiteParser + + + +class FreesmscenterCom(WebSiteParser.WebSiteParser): + def __init__(self): + super().__init__("freesmscenter.com") + + + def Parse(self): + """Perform parsing of entire web site. + """ + + # Parsing main page with list of countries + country_uris = self.ParseURIsFromPage( + "Show Phone Numbers", + "Show Phone Numbers" + ) + + # Parsing all single number from page with country's number list + numbers_raw = [] + for country_uri in country_uris: + tmp = country_uri.replace("FreeSms", "SmsReceive") + number_uris = self.ParseURIsFromPage( + f"Receive SMS", + "Receive SMS", + country_uri + ) + if not number_uris: + number_uris = self.ParseURIsFromPage( + f"Receive SMS", + "Receive SMS", + country_uri + ) + for number_uri in number_uris: + numbers_raw.append({ + "uri_raw": number_uri, + "country": country_uri.replace("FreeSms/", "").replace("-Phone-Number", "").replace("%20", " ") + }) + + # Parse number's page + for i in range(len(numbers_raw)): + numbers_raw[i].update( + self.ParseNumberPage(numbers_raw[i]["uri_raw"]) + ) + + self.ProcessRawNumbers(numbers_raw) + self.ParseDone = True + self.Log("parsing done") + + + def ParseURIsFromPage(self, exp, r1, r2, uri=""): + """Parse all URIs from selected page. + """ + + markup = self.RequestPage(uri).text + peaces_of_markup = re.findall(exp, markup) + result_uris = [] + for peace in peaces_of_markup: + result_uris.append( + peace.replace(r1, "").replace(r2, "") + ) + return result_uris + + + def ParseNumberPage(self, uri): + """Parse page with history of messages, related to single number. + + NOTICE: this can be better, + but a lot of hardcoded sheat is required then, + just because original site has a lot of bugs. + However, i fixed most of major things. + """ + + def die(text): + self.Log(text, f"ParseNumberPage(self at {id(self)}, \"{uri}\")") + + def p(l): + for i in range(len(l)): + if not l[i]: + l[i] = self.EMPTY + return l + + result = { + "number": self.EMPTY, + "last_use_raw": self.EMPTY, + "born_raw": self.EMPTY, + "times_used": self.EMPTY + } + numExp = "

\+[0-9]+

" + tableCellExp = ".*" + cellFromExp = "*" + cellContExp = ".*" + cellTimeExp = ".*" + + markup = self.RequestPage(uri).text + number = re.findall(numExp, markup) + if not number: + die("error: can't parse page, number is empty") + self.ErrorsWhileParsing += 1 + return result + result["number"] = re.findall("\+[0-9]+", number[0])[0] + tableCellsRaw = re.findall(tableCellExp, markup) + if not len(tableCellsRaw): + die("error: can't parse page, tableCellsRaw is empty") + self.ErrorsWhileParsing += 1 + return result + elif len(tableCellsRaw) % 3: + die("warning: length tableCellsRaw should be divisible by 3, incorrect results are expected") + + tableLines = [] + tmpLine = [None, None, None] + + for i in range(0, len(tableCellsRaw)): + if re.search(cellFromExp, tableCellsRaw[i]) and (i < len(tableCellsRaw) - 3): + if re.search(cellContExp, tableCellsRaw[i+1]): + if re.search(cellTimeExp, tableCellsRaw[i+2]): + tableLines.append([ + tableCellsRaw[i], + tableCellsRaw[i+1], + tableCellsRaw[i+2] + ]) + tmpLine = [None, None, None] + i += 2 + continue + + if re.search(cellFromExp, tableCellsRaw[i]) and not tmpLine[0]: + if not re.search("[a-zA-Z0-9]", tableCellsRaw[i]): + tmpLine[0] = self.EMPTY + else: + tmpLine[0] = tableCellsRaw[i] + elif tmpLine[0]: + tmpLine = p(tmpLine) + if re.search(cellContExp, tableCellsRaw[i]) and not tmpLine[1]: + # if not re.search("[a-zA-Z0-9]", tableCellsRaw[i]): + # tmpLine[1] = self.EMPTY + # else: + tmpLine[1] = tableCellsRaw[i] + elif tmpLine[1]: + tmpLine = p(tmpLine) + if re.search(cellTimeExp, tableCellsRaw[i]) and not tmpLine[2]: + if not re.search("[a-zA-Z0-9]", tableCellsRaw[i]): + tmpLine[2] = self.EMPTY + else: + tmpLine[2] = tableCellsRaw[i] + elif tmpLine[2]: + tmpLine = p(tmpLine) + + if tmpLine[0] and tmpLine[1] and tmpLine[2]: + tableLines.append(tmpLine) + tmpLine = [None, None, None] + + if not tableLines: + die("error: can't parse page, tableLines is empty") + self.ErrorsWhileParsing += 1 + return result + + for i in range(len(tableLines)): + if tableLines[i][2] != self.EMPTY: + result["last_use_raw"] = re.sub( + "()|()", + "", + tableLines[i][2] + ) + break + + for i in range(len(tableLines)-1, -1, -1): + if tableLines[i][2] != self.EMPTY: + result["born_raw"] = re.sub( + "()|()", + "", + tableLines[i][2] + ) + break + result["times_used"] = len(tableLines) + + return result + + + def ProcessRawNumbers(self, raws): + """Process list of raw parsed numbers and make them look cool ayo. + """ + + for i in range(0, len(raws)): + if not re.search("\+[0-9]+", raws[i]["number"]): + self.Log(f"error: incorrect number: \"{raws[i]['number']}\"", f"ProcessRawNumbers(self at {id(self)}, raws at {id(raws)})") + continue + + if not re.search("[a-zA-Z0-9]+", raws[i]["last_use_raw"]): + self.Log(f"warning: malformed last_use_raw field: \"{raws[i]['last_use_raw']}\"", f"ProcessRawNumbers(self at {id(self)}, raws at {id(raws)})") + raws[i]["last_use_raw"] = self.EMPTY + + raws[i]["last_use"] = raws[i]["last_use_raw"] + + if not re.search("[a-zA-Z0-9]+", raws[i]["born_raw"]): + self.Log(f"warning: malformed born_raw field: \"{raws[i]['born_raw']}\"", f"ProcessRawNumbers(self at {id(self)}, raws at {id(raws)})") + raws[i]["born_raw"] = self.EMPTY + + raws[i]["born"] = raws[i]["born_raw"] + + raws[i]["url"] = self.WebSiteFullURL + raws[i]["uri_raw"] + + if raws[i]["times_used"] == 0: + self.Output["not_used"].append(raws[i]) + elif raws[i]["times_used"] < self.AlmostNotUsedThreshold: + self.Output["almost_not_used"].append(raws[i]) + else: + self.Output["heavily_used"].append(raws[i]) diff --git a/sites/ReceivesmsCo.py b/sites/ReceivesmsCo.py new file mode 100644 index 0000000..a07a027 --- /dev/null +++ b/sites/ReceivesmsCo.py @@ -0,0 +1,160 @@ +import re +import WebSiteParser + + + +class ReceivesmsCo(WebSiteParser.WebSiteParser): + def __init__(self): + super().__init__("receivesms.co") + + + def Parse(self): + """Perform parsing of entire web site. + """ + + # Parsing main page with list of countries + country_uris = self.ParseAllFromPage( + "[\s]*.*", + to_remove="([\s]*.*)" + ) + + numbers = [] + for country_uri in country_uris: + country_number_uris = self.ParseAllFromPage( + "[\s]*.*[\s]*", + to_remove="([\s]*.*[\s]*)", + location=(country_uri if country_uri[0]!="/" else country_uri[1:]) + ) + for num_uri in country_number_uris: + numbers.append({ + "uri_raw": (num_uri if num_uri[0]!="/" else num_uri[1:]) + }) + break # TODO: remove + + for i in range(len(numbers)): + numbers[i].update( + self.ParseNumberPage(numbers[i]["uri_raw"]) + ) + + self.ProcessRawNumbers(numbers) + self.ParseDone = True + self.Log("parsing done") + + def ParseNumberPage(self, uri): + """Parse page with history of messages, related to single number. + """ + + def die(text): + self.Log(text, f"ParseNumberPage(self at {id(self)}, \"{uri}\")") + + result = { + "number": self.EMPTY, + "country": self.EMPTY, + "last_use_raw": self.EMPTY, + "born_raw": self.EMPTY, + "times_used": self.EMPTY + } + + markup = self.RequestPage(uri).text + + country = re.findall( + "

[\s]*[\s]*.*[\s]-[\s].*Phone Number[\s]*

", + markup + ) + country = re.sub( + "(

[\s]*[\s]*[\s]*)|([\s]-[\s].*Phone Number[\s]*

)", + "", + str(country[0]) if country else "" + ) + if not country: + die("error: page parsing failed, country is empty") + return result + result["country"] = country + + number = re.findall( + "+.*)|([\s]*From[\s]*.*[\s]*.*[(].+ago[)][\s]*[\s]*From[\s]*.*[\s]*.*[(])|([)][\s]*[\s]* 1: + pages_amount -= 2 + + last_page_uri = nav_links[-1] + last_page_uri = re.sub( + "(^
  • [\s]*