DB-API: allow handles as secondary fields

* mother_handle is now secondary field
* fixed issues in join in primaryobj._follow_chain
* added logging to dbapi loading
* optimize rebuild, only what is needed
This commit is contained in:
Doug Blank 2016-01-27 07:18:12 -05:00
parent 78be94481d
commit eec418c192
2 changed files with 72 additions and 36 deletions

View File

@ -171,8 +171,8 @@ class BasicPrimaryObject(TableObject, PrivacyBase, TagBase):
""" """
from .handle import HandleClass from .handle import HandleClass
return ([(key, value) for (key, value) in cls.get_schema().items() return ([(key, value) for (key, value) in cls.get_schema().items()
if value in [str, int, float, bool] and if value in [str, int, float, bool] or
not isinstance(value, HandleClass)] + isinstance(value, HandleClass)] +
cls.get_extra_secondary_fields()) cls.get_extra_secondary_fields())
@classmethod @classmethod
@ -241,19 +241,18 @@ class BasicPrimaryObject(TableObject, PrivacyBase, TagBase):
# expand when you reach multiple answers [obj, chain_left, []] # expand when you reach multiple answers [obj, chain_left, []]
# if you get to an endpoint, put results # if you get to an endpoint, put results
# go until nothing left to expand # go until nothing left to expand
current = self todo = [(self, self, [], chain)]
todo = [(self, current, chain, [])]
results = [] results = []
while todo: while todo:
parent, current, chain, path_to = todo.pop() parent, current, path_to, chain = todo.pop()
#print("expand:", parent.__class__.__name__, #print("expand:", parent.__class__.__name__,
# current.__class__.__name__, # current.__class__.__name__,
# chain, # path_to,
# path_to) # chain)
keep_going = True keep_going = True
p = 0 p = 0
while p < len(chain) and keep_going: while p < len(chain) and keep_going:
#print("while:", p, chain, chain[p]) #print("while:", path_to, chain[p:])
part = chain[p] part = chain[p]
if hasattr(current, part): # attribute if hasattr(current, part): # attribute
current = getattr(current, part) current = getattr(current, part)
@ -269,14 +268,14 @@ class BasicPrimaryObject(TableObject, PrivacyBase, TagBase):
current = None current = None
keeping_going = False keeping_going = False
else: else:
raise raise Exception("invalid index position")
else: # else branch! in middle, split paths else: # else branch! in middle, split paths
for i in range(len(current)): for i in range(len(current)):
#print("split :", self.__class__.__name__, #print("split list:", self.__class__.__name__,
# current.__class__.__name__, # current.__class__.__name__,
# [str(i)] + chain[p:], # path_to[:],
# path_to[:]) # [str(i)] + chain[p:])
todo.append([self, current, [str(i)] + chain[p:], path_to[:]]) todo.append([self, current, path_to[:], [str(i)] + chain[p:]])
current = None current = None
keep_going = False keep_going = False
else: # part not found on this self else: # part not found on this self
@ -291,19 +290,18 @@ class BasicPrimaryObject(TableObject, PrivacyBase, TagBase):
if part == "self": if part == "self":
current = obj current = obj
path_to = [] path_to = []
#print("split :", obj.__class__.__name__, #print("split self:", obj.__class__.__name__,
# current.__class__.__name__, # current.__class__.__name__,
# chain[p + 1:], # path_to,
# path_to[p + 1:]) # chain[p + 1:])
todo.append([obj, current, chain[p + 1:], chain[p + 1:]]) todo.append([obj, current, path_to, chain[p + 1:]])
elif obj: elif obj:
current = getattr(obj, part) current = getattr(obj, part)
path_to = []
#print("split :", obj.__class__.__name__, #print("split :", obj.__class__.__name__,
# current.__class__.__name__, # current.__class__.__name__,
# chain[p + 1:], # [part],
# path_to[p:]) # chain[p + 1:])
todo.append([obj, current, chain[p + 1:], chain[p:]]) todo.append([obj, current, [part], chain[p + 1:]])
current = None current = None
keep_going = False keep_going = False
else: else:

View File

@ -32,6 +32,9 @@ import dbapi_support
import time import time
import pickle import pickle
import logging
LOG = logging.getLogger(".dbapi")
class DBAPI(DbGeneric): class DBAPI(DbGeneric):
""" """
Database backends class for DB-API 2.0 databases Database backends class for DB-API 2.0 databases
@ -1479,7 +1482,7 @@ class DBAPI(DbGeneric):
""" """
from gramps.gen.lib.handle import HandleClass from gramps.gen.lib.handle import HandleClass
if isinstance(python_type, HandleClass): if isinstance(python_type, HandleClass):
raise Exception("should not make new handle columns") return "VARCHAR(50)"
elif python_type == str: elif python_type == str:
return "TEXT" return "TEXT"
elif python_type in [bool, int]: elif python_type in [bool, int]:
@ -1493,11 +1496,23 @@ class DBAPI(DbGeneric):
""" """
Add secondary fields, update, and create indexes. Add secondary fields, update, and create indexes.
""" """
any_altered = False LOG.info("Rebuilding secondary fields...")
for table in self._tables.keys(): for table in self._tables.keys():
altered = False
if not hasattr(self._tables[table]["class_func"], "get_secondary_fields"): if not hasattr(self._tables[table]["class_func"], "get_secondary_fields"):
continue continue
# do a select on all; if it works, then it is ok; else, check them all
try:
fields = [self._hash_name(table, field) for (field, ptype) in
self._tables[table]["class_func"].get_secondary_fields()]
if fields:
self.dbapi.execute("select %s from %s limit 1;" % (", ".join(fields), table))
# if no error, continue
LOG.info("Table %s is up to date" % table)
continue
except:
pass # got to add missing ones, so continue
LOG.info("Table %s needs rebuilding..." % table)
altered = False
for field_pair in self._tables[table]["class_func"].get_secondary_fields(): for field_pair in self._tables[table]["class_func"].get_secondary_fields():
field, python_type = field_pair field, python_type = field_pair
field = self._hash_name(table, field) field = self._hash_name(table, field)
@ -1505,18 +1520,17 @@ class DBAPI(DbGeneric):
try: try:
# test to see if it exists: # test to see if it exists:
self.dbapi.execute("SELECT %s FROM %s LIMIT 1;" % (field, table)) self.dbapi.execute("SELECT %s FROM %s LIMIT 1;" % (field, table))
LOG.info(" Table %s, field %s is up to date" % (table, field))
except: except:
# if not, let's add it # if not, let's add it
LOG.info(" Table %s, field %s was added" % (table, field))
self.dbapi.execute("ALTER TABLE %s ADD COLUMN %s %s;" % (table, field, sql_type)) self.dbapi.execute("ALTER TABLE %s ADD COLUMN %s %s;" % (table, field, sql_type))
altered = True altered = True
any_altered = True
if altered: if altered:
LOG.info("Table %s is being committed, rebuilt, and indexed..." % (table, field))
self.dbapi.commit() self.dbapi.commit()
# Update values: self.update_secondary_values_table(table)
if any_altered: self.create_secondary_indexes_table(table)
self.update_secondary_values_all()
# Build indexes:
self.create_secondary_indexes()
def create_secondary_indexes(self): def create_secondary_indexes(self):
""" """
@ -1525,10 +1539,16 @@ class DBAPI(DbGeneric):
for table in self._tables.keys(): for table in self._tables.keys():
if not hasattr(self._tables[table]["class_func"], "get_index_fields"): if not hasattr(self._tables[table]["class_func"], "get_index_fields"):
continue continue
self.create_secondary_indexes_table(table)
def create_secondary_indexes_table(self, table):
"""
Create secondary indexes for just this table.
"""
for fields in self._tables[table]["class_func"].get_index_fields(): for fields in self._tables[table]["class_func"].get_index_fields():
for field in fields: for field in fields:
field = self._hash_name(table, field) field = self._hash_name(table, field)
self.dbapi.try_execute("CREATE INDEX %s ON %s(%s);" % (field, table, field)) self.dbapi.try_execute("CREATE INDEX %s_%s ON %s(%s);" % (table, field, table, field))
def update_secondary_values_all(self): def update_secondary_values_all(self):
""" """
@ -1639,8 +1659,25 @@ class DBAPI(DbGeneric):
""" """
Check to make sure all where fields are defined. If not, then Check to make sure all where fields are defined. If not, then
we need to do the Python-based select. we need to do the Python-based select.
secondary_fields are hashed.
""" """
if where is None:
return True return True
elif len(where) == 2: # ["AND" [...]] | ["OR" [...]] | ["NOT" expr]
connector, exprs = where
if connector in ["AND", "OR"]:
for expr in exprs:
value = self.check_where_fields(table, expr, secondary_fields)
if value == False:
return False
return True
else: # "NOT"
return self.check_where_fields(table, exprs, secondary_fields)
elif len(where) == 3: # (name, op, value)
(name, op, value) = where
# just the ones we need for where
return (self._hash_name(table, name) in secondary_fields)
def select(self, table, fields=None, start=0, limit=-1, def select(self, table, fields=None, start=0, limit=-1,
where=None, order_by=None): where=None, order_by=None):
@ -1659,12 +1696,13 @@ class DBAPI(DbGeneric):
["NOT", where] ["NOT", where]
order_by - [[fieldname, "ASC" | "DESC"], ...] order_by - [[fieldname, "ASC" | "DESC"], ...]
""" """
fields = [self._hash_name(table, field) for field in fields] hashed_fields = [self._hash_name(table, field) for field in fields]
secondary_fields = ([self._hash_name(table, field) for (field, ptype) in secondary_fields = ([self._hash_name(table, field) for (field, ptype) in
self._tables[table]["class_func"].get_secondary_fields()] + self._tables[table]["class_func"].get_secondary_fields()] +
["handle"]) # handle is a sql field, but not listed in secondaries ["handle"]) # handle is a sql field, but not listed in secondaries
if not self.check_where_fields(table, where, secondary_fields): if not self.check_where_fields(table, where, secondary_fields):
return super().select(table, fields, start, limit, where, order_by) return super().select(table, fields, start, limit, where, order_by)
fields = hashed_fields
start_time = time.time() start_time = time.time()
where_clause = self.build_where_clause(table, where) where_clause = self.build_where_clause(table, where)
order_clause = self.build_order_clause(table, order_by) order_clause = self.build_order_clause(table, order_by)