From eec418c192cc5e90bf96beb205b8ed690ce60c57 Mon Sep 17 00:00:00 2001 From: Doug Blank Date: Wed, 27 Jan 2016 07:18:12 -0500 Subject: [PATCH] DB-API: allow handles as secondary fields * mother_handle is now secondary field * fixed issues in join in primaryobj._follow_chain * added logging to dbapi loading * optimize rebuild, only what is needed --- gramps/gen/lib/primaryobj.py | 40 +++++++++---------- gramps/plugins/database/dbapi.py | 68 +++++++++++++++++++++++++------- 2 files changed, 72 insertions(+), 36 deletions(-) diff --git a/gramps/gen/lib/primaryobj.py b/gramps/gen/lib/primaryobj.py index c661cf9ff..f62011783 100644 --- a/gramps/gen/lib/primaryobj.py +++ b/gramps/gen/lib/primaryobj.py @@ -171,8 +171,8 @@ class BasicPrimaryObject(TableObject, PrivacyBase, TagBase): """ from .handle import HandleClass return ([(key, value) for (key, value) in cls.get_schema().items() - if value in [str, int, float, bool] and - not isinstance(value, HandleClass)] + + if value in [str, int, float, bool] or + isinstance(value, HandleClass)] + cls.get_extra_secondary_fields()) @classmethod @@ -241,19 +241,18 @@ class BasicPrimaryObject(TableObject, PrivacyBase, TagBase): # expand when you reach multiple answers [obj, chain_left, []] # if you get to an endpoint, put results # go until nothing left to expand - current = self - todo = [(self, current, chain, [])] + todo = [(self, self, [], chain)] results = [] while todo: - parent, current, chain, path_to = todo.pop() + parent, current, path_to, chain = todo.pop() #print("expand:", parent.__class__.__name__, # current.__class__.__name__, - # chain, - # path_to) + # path_to, + # chain) keep_going = True p = 0 while p < len(chain) and keep_going: - #print("while:", p, chain, chain[p]) + #print("while:", path_to, chain[p:]) part = chain[p] if hasattr(current, part): # attribute current = getattr(current, part) @@ -269,14 +268,14 @@ class BasicPrimaryObject(TableObject, PrivacyBase, TagBase): current = None keeping_going = False else: - raise + raise Exception("invalid index position") else: # else branch! in middle, split paths for i in range(len(current)): - #print("split :", self.__class__.__name__, + #print("split list:", self.__class__.__name__, # current.__class__.__name__, - # [str(i)] + chain[p:], - # path_to[:]) - todo.append([self, current, [str(i)] + chain[p:], path_to[:]]) + # path_to[:], + # [str(i)] + chain[p:]) + todo.append([self, current, path_to[:], [str(i)] + chain[p:]]) current = None keep_going = False else: # part not found on this self @@ -291,19 +290,18 @@ class BasicPrimaryObject(TableObject, PrivacyBase, TagBase): if part == "self": current = obj path_to = [] - #print("split :", obj.__class__.__name__, + #print("split self:", obj.__class__.__name__, # current.__class__.__name__, - # chain[p + 1:], - # path_to[p + 1:]) - todo.append([obj, current, chain[p + 1:], chain[p + 1:]]) + # path_to, + # chain[p + 1:]) + todo.append([obj, current, path_to, chain[p + 1:]]) elif obj: current = getattr(obj, part) - path_to = [] #print("split :", obj.__class__.__name__, # current.__class__.__name__, - # chain[p + 1:], - # path_to[p:]) - todo.append([obj, current, chain[p + 1:], chain[p:]]) + # [part], + # chain[p + 1:]) + todo.append([obj, current, [part], chain[p + 1:]]) current = None keep_going = False else: diff --git a/gramps/plugins/database/dbapi.py b/gramps/plugins/database/dbapi.py index 99efb4236..093b31241 100644 --- a/gramps/plugins/database/dbapi.py +++ b/gramps/plugins/database/dbapi.py @@ -32,6 +32,9 @@ import dbapi_support import time import pickle +import logging +LOG = logging.getLogger(".dbapi") + class DBAPI(DbGeneric): """ Database backends class for DB-API 2.0 databases @@ -1479,7 +1482,7 @@ class DBAPI(DbGeneric): """ from gramps.gen.lib.handle import HandleClass if isinstance(python_type, HandleClass): - raise Exception("should not make new handle columns") + return "VARCHAR(50)" elif python_type == str: return "TEXT" elif python_type in [bool, int]: @@ -1493,11 +1496,23 @@ class DBAPI(DbGeneric): """ Add secondary fields, update, and create indexes. """ - any_altered = False + LOG.info("Rebuilding secondary fields...") for table in self._tables.keys(): - altered = False if not hasattr(self._tables[table]["class_func"], "get_secondary_fields"): continue + # do a select on all; if it works, then it is ok; else, check them all + try: + fields = [self._hash_name(table, field) for (field, ptype) in + self._tables[table]["class_func"].get_secondary_fields()] + if fields: + self.dbapi.execute("select %s from %s limit 1;" % (", ".join(fields), table)) + # if no error, continue + LOG.info("Table %s is up to date" % table) + continue + except: + pass # got to add missing ones, so continue + LOG.info("Table %s needs rebuilding..." % table) + altered = False for field_pair in self._tables[table]["class_func"].get_secondary_fields(): field, python_type = field_pair field = self._hash_name(table, field) @@ -1505,18 +1520,17 @@ class DBAPI(DbGeneric): try: # test to see if it exists: self.dbapi.execute("SELECT %s FROM %s LIMIT 1;" % (field, table)) + LOG.info(" Table %s, field %s is up to date" % (table, field)) except: # if not, let's add it + LOG.info(" Table %s, field %s was added" % (table, field)) self.dbapi.execute("ALTER TABLE %s ADD COLUMN %s %s;" % (table, field, sql_type)) altered = True - any_altered = True if altered: + LOG.info("Table %s is being committed, rebuilt, and indexed..." % (table, field)) self.dbapi.commit() - # Update values: - if any_altered: - self.update_secondary_values_all() - # Build indexes: - self.create_secondary_indexes() + self.update_secondary_values_table(table) + self.create_secondary_indexes_table(table) def create_secondary_indexes(self): """ @@ -1525,10 +1539,16 @@ class DBAPI(DbGeneric): for table in self._tables.keys(): if not hasattr(self._tables[table]["class_func"], "get_index_fields"): continue - for fields in self._tables[table]["class_func"].get_index_fields(): - for field in fields: - field = self._hash_name(table, field) - self.dbapi.try_execute("CREATE INDEX %s ON %s(%s);" % (field, table, field)) + self.create_secondary_indexes_table(table) + + def create_secondary_indexes_table(self, table): + """ + Create secondary indexes for just this table. + """ + for fields in self._tables[table]["class_func"].get_index_fields(): + for field in fields: + field = self._hash_name(table, field) + self.dbapi.try_execute("CREATE INDEX %s_%s ON %s(%s);" % (table, field, table, field)) def update_secondary_values_all(self): """ @@ -1639,8 +1659,25 @@ class DBAPI(DbGeneric): """ Check to make sure all where fields are defined. If not, then we need to do the Python-based select. + + secondary_fields are hashed. """ - return True + if where is None: + return True + elif len(where) == 2: # ["AND" [...]] | ["OR" [...]] | ["NOT" expr] + connector, exprs = where + if connector in ["AND", "OR"]: + for expr in exprs: + value = self.check_where_fields(table, expr, secondary_fields) + if value == False: + return False + return True + else: # "NOT" + return self.check_where_fields(table, exprs, secondary_fields) + elif len(where) == 3: # (name, op, value) + (name, op, value) = where + # just the ones we need for where + return (self._hash_name(table, name) in secondary_fields) def select(self, table, fields=None, start=0, limit=-1, where=None, order_by=None): @@ -1659,12 +1696,13 @@ class DBAPI(DbGeneric): ["NOT", where] order_by - [[fieldname, "ASC" | "DESC"], ...] """ - fields = [self._hash_name(table, field) for field in fields] + hashed_fields = [self._hash_name(table, field) for field in fields] secondary_fields = ([self._hash_name(table, field) for (field, ptype) in self._tables[table]["class_func"].get_secondary_fields()] + ["handle"]) # handle is a sql field, but not listed in secondaries if not self.check_where_fields(table, where, secondary_fields): return super().select(table, fields, start, limit, where, order_by) + fields = hashed_fields start_time = time.time() where_clause = self.build_where_clause(table, where) order_clause = self.build_order_clause(table, order_by)