222 lines
7.7 KiB
Python
222 lines
7.7 KiB
Python
import re, argparse
|
|
import sqlite3 as sql
|
|
import logging
|
|
import logging.handlers
|
|
|
|
from os import listdir, mkdir
|
|
from os.path import isfile, exists
|
|
from urllib.parse import unquote
|
|
|
|
#------------------------------------------------+
|
|
# get_list_from_server_txt
|
|
#------------------------------------------------+
|
|
# Rows in the game server database are
|
|
# occasionally concatenated into one line.
|
|
# To simplify, they are unmerged.
|
|
#
|
|
# The final result is every row in the game server
|
|
# database with its own index in a list.
|
|
#------------------------------------------------+
|
|
|
|
def get_list_from_server_txt(filename):
|
|
def unmerge_rows(line, char, x):
|
|
chunks = line.split(char)
|
|
newrows = [char.join(chunks[:x]), char.join(chunks[x:])]
|
|
# need to prefix each row with a char.
|
|
# only the last row will be missing it.
|
|
newrows[-1] = char + newrows[-1]
|
|
if newrows[-1].count(char) > (x - 1):
|
|
newrows += unmerge_rows(newrows.pop(), char, x)
|
|
return newrows
|
|
rows = []
|
|
with open(filename, 'r') as f:
|
|
# server database has a lot of newlines to ignore
|
|
rows = [line for line in f if line != "\n"]
|
|
output = []
|
|
n = 3
|
|
backslash = '\\'
|
|
for row in rows:
|
|
# The first and last column is prefixed with a backslash.
|
|
# So multiple rows on one line should be split at the 3rd backslash.
|
|
if row.count(backslash) > (n - 1):
|
|
unmerged = unmerge_rows(row, backslash, n)
|
|
for u in unmerged:
|
|
output.append(u)
|
|
else:
|
|
output.append(row)
|
|
return output
|
|
|
|
def init_logging():
|
|
filename = "_logs/dbimport-%s.log"
|
|
i = 0
|
|
while exists(filename % i):
|
|
i += 1
|
|
filename = filename % i
|
|
f = open(filename, mode='a', encoding='utf-8')
|
|
logging.basicConfig(stream=f, level=logging.DEBUG)
|
|
return filename
|
|
|
|
#------------------------------------------------+
|
|
# Functions: Clean up.
|
|
#------------------------------------------------+
|
|
# Unlike other rows,
|
|
# the separator character, '/' is part of the value of the second column.
|
|
# so an ordinary match for '/' or '\' can not be done like the other types of rows.
|
|
# example from game server db:
|
|
# \/uid2name/Mnumg2Yh/yxNFDTqGI+YyhlM7QDI0fpEmAaBJ8cI5dU=\Tuxxy
|
|
# it should become:
|
|
# ["uid2name", "Mnumg2Yh/yxNFDTqGI+YyhlM7QDI0fpEmAaBJ8cI5dU=", "Tuxxy"]
|
|
def uid2namefix(row):
|
|
# quick fix
|
|
# replace first and last occurrence of backslash
|
|
# this results in [,/uid2name/cryptoid_fp, name]
|
|
e = re.sub(r'^([^\\]*)\\|\\(?=[^\\]*$)', ',', row)
|
|
# replace first two occurence of forward slash
|
|
# this results in [,,uid2name,cryptoid_fp, name]
|
|
ee = e.replace('/', ',', 2)
|
|
# split on comma
|
|
# but start from index 2 because the first commas are left over
|
|
# c is now a list of strings.
|
|
# ["uid2name", <crypto_idfp value>, <player name value>]
|
|
c = ee[2:].split(',')
|
|
c[2] = unquote(c[2])
|
|
c[2] = c[2].strip('\n')
|
|
return c
|
|
|
|
# O(n) and organize cts related data into list of rows.
|
|
def filters(db):
|
|
tt = []
|
|
tr = []
|
|
ti = []
|
|
rank_index = 2
|
|
for d in db:
|
|
if d.find("uid2name") != -1:
|
|
ti.append(uid2namefix(d))
|
|
else:
|
|
# regex:
|
|
# find substrings that do not contain backslash, forwardslash, or newline.
|
|
e = re.findall(r'[^\\/\n]+', d)
|
|
if d.find("cts100record/time") != -1:
|
|
e[rank_index] = int(e[rank_index].replace("time", ""))
|
|
tt.append(e)
|
|
if d.find("cts100record/crypto_idfp") != -1:
|
|
e[3] = unquote(e[3])
|
|
e[rank_index] = int(e[rank_index].replace("crypto_idfp", ""))
|
|
tr.append(e)
|
|
if d.find("cts100record/speed") != -1:
|
|
# print(d)
|
|
# speed records - not implemented
|
|
pass
|
|
return tt, tr, ti
|
|
|
|
#------------------------------------------------+
|
|
# Functions: Database Creation
|
|
#------------------------------------------------+
|
|
|
|
def inserttodb(c, q, d):
|
|
for x in d:
|
|
# possible to do executemany
|
|
# but want to be able to catch the problematic rows
|
|
# as it is iterated through.
|
|
# and proceed with adding OK rows.
|
|
try:
|
|
c.execute(q, x)
|
|
except sql.ProgrammingError as e:
|
|
print(e)
|
|
print(x)
|
|
|
|
#------------------------------------------------+
|
|
|
|
# insert new data directly into new database file
|
|
def i(d, s):
|
|
con = sql.connect(d)
|
|
with con:
|
|
csr = con.cursor()
|
|
try:
|
|
times, ranks, ids = filters(get_list_from_server_txt(s))
|
|
if times:
|
|
inserttodb(csr, "INSERT OR REPLACE INTO Cts_times VALUES(?, ?, ?, ?)", times)
|
|
logging.info('\n'.join(y for y in [str(x) for x in times]))
|
|
if ranks:
|
|
inserttodb(csr, "INSERT OR REPLACE INTO Cts_ranks VALUES(?, ?, ?, ?)", ranks)
|
|
logging.info('\n'.join(y for y in [str(x) for x in ranks]))
|
|
if ids:
|
|
inserttodb(csr, "INSERT OR REPLACE INTO Id2alias VALUES(?, ?, ?)", ids)
|
|
logging.info('\n'.join(y for y in [str(x) for x in ids]))
|
|
except sql.Error:
|
|
logging.exception("sql error encountered in function 'i'")
|
|
if con:
|
|
con.rollback()
|
|
|
|
# 'insert' new data into a file i.e sql query file
|
|
def f(d, s):
|
|
with open(d, 'w', encoding='utf-8') as h:
|
|
times, ranks, ids = filters(get_list_from_server_txt(s))
|
|
for t in times:
|
|
h.write("INSERT OR REPLACE INTO Cts_times VALUES(%s, %s, %s, %s)\n" % tuple(t))
|
|
pass
|
|
for r in ranks:
|
|
h.write("INSERT OR REPLACE INTO Cts_ranks VALUES(%s, %s, %s, %s)\n" % tuple(r))
|
|
pass
|
|
for i in ids:
|
|
h.write("INSERT OR REPLACE INTO Id2aslias VALUES(%s, %s, %s)\n" % tuple(i))
|
|
pass
|
|
pass
|
|
pass
|
|
|
|
# Test whether repeat rows are added.
|
|
def duplicatestest(d, s):
|
|
c = sql.connect(d)
|
|
p = True
|
|
with c:
|
|
cs = c.cursor()
|
|
try:
|
|
logging.info("Inserting into database (1/2)")
|
|
i(d, s)
|
|
logging.info("Querying (1/2)")
|
|
cs.execute("SELECT * FROM Cts_times")
|
|
a = cs.fetchall()
|
|
cs.execute("SELECT * FROM Cts_ranks")
|
|
b = cs.fetchall()
|
|
cs.execute("SELECT * FROM Id2alias")
|
|
c = cs.fetchall()
|
|
logging.info("Inserting into database (2/2)")
|
|
i(d, s)
|
|
logging.info("Querying (2/2)")
|
|
cs.execute("SELECT * FROM Cts_times")
|
|
x = cs.fetchall()
|
|
cs.execute("SELECT * FROM Cts_ranks")
|
|
y = cs.fetchall()
|
|
cs.execute("SELECT * FROM Id2alias")
|
|
z = cs.fetchall()
|
|
if len(a) != len(x):
|
|
logging.error("Issue with Cts_times")
|
|
p = False
|
|
if len(b) != len(y):
|
|
logging.error("Issue with Cts_ranks")
|
|
p = False
|
|
if len(c) != len(z):
|
|
logging.error("Issue with Id2alias")
|
|
p = False
|
|
if p:
|
|
logging.info("Database ok - no repeat rows added.")
|
|
except sql.Error:
|
|
logging.exception("encountered sql error in function 'duplicate test'.")
|
|
|
|
if __name__ == "__main__":
|
|
ap = argparse.ArgumentParser()
|
|
ap.add_argument('db')
|
|
ap.add_argument('src')
|
|
ap.add_argument('-t', '--test', action='store_true')
|
|
ap.add_argument('-q', '--sql', action='store_true')
|
|
args = ap.parse_args()
|
|
log_file = init_logging()
|
|
print("Writing log to ", log_file)
|
|
if args.test:
|
|
duplicatestest(args.db, args.src)
|
|
if args.sql:
|
|
f(args.db, args.src)
|
|
else:
|
|
i(args.db, args.src)
|
|
|