Speed up Check and Repair, backlinks check stage. (#656)

Fixes #10618

* Speed up Check and Repair, backlinks check stage.

* Fix Check and repair; backlinks scan to deal with bad references
which should have been fixed in earlier checks!
This commit is contained in:
Paul Culley 2018-09-04 20:40:57 -05:00 committed by Sam Manzi
parent f3b5f75e37
commit b6c57ab3c3

View File

@ -1237,52 +1237,87 @@ class CheckIntegrity:
total = self.db.get_total() total = self.db.get_total()
self.progress.set_pass(_('Looking for backlink reference problems'), self.progress.set_pass(_('Looking for backlink reference problems') +
total) ' (1)', total)
logging.info('Looking for backlink reference problems') logging.info('Looking for backlink reference problems')
# dict of object handles indexed by forward link created here
my_blinks = defaultdict(list)
my_items = 0 # count of my backlinks for progress meter
# dict of object handles indexed by forward link from db
db_blinks = {}
db_items = 0 # count of db backlinks for progress meter
# first we assemble our own backlinks table, and while we have the
# handle, gather up a second table with the db's backlinks
for obj_class in CLASS_TO_KEY_MAP.keys(): for obj_class in CLASS_TO_KEY_MAP.keys():
obj_type = obj_class.lower() for handle in self.db.method("iter_%s_handles", obj_class)():
for handle in getattr(self.db, "iter_%s_handles" % obj_type)():
self.progress.step() self.progress.step()
pri_obj = getattr(self.db, "get_%s_from_handle" blinks = list(self.db.find_backlink_handles(handle))
% obj_type)(handle) db_blinks[(obj_class, handle)] = blinks
db_items += len(blinks)
pri_obj = self.db.method('get_%s_from_handle',
obj_class)(handle)
handle_list = pri_obj.get_referenced_handles_recursively() handle_list = pri_obj.get_referenced_handles_recursively()
# check that each reference has a backlink my_items += len(handle_list)
for item in handle_list: for item in handle_list:
bl_list = list(self.db.find_backlink_handles(item[1])) my_blinks[item].append((obj_class, handle))
if (obj_class, handle) not in bl_list:
# Object has reference with no cooresponding backlink # Now we go through our backlinks and the dbs table comparing them
self.bad_backlinks += 1 # check that each real reference has a backlink in the db table
logging.warning(' FAIL: the "%(cls)s" [%(gid)s] ' self.progress.set_pass(_('Looking for backlink reference problems') +
'has a "%(cls2)s" reference' ' (2)', my_items)
' with no corresponding backlink.', for key, blinks in my_blinks.items():
{'gid': pri_obj.gramps_id, for item in blinks:
'cls': obj_class, 'cls2': item[0]}) self.progress.step()
# Check for backlinks that don't have a reference if key not in db_blinks:
bl_list = self.db.find_backlink_handles(handle) # object has reference to something not in db;
for item in bl_list: # should have been found in previous checks
if not getattr(self.db, "has_%s_handle" logging.warning(' Fail: reference to an object %(obj)s'
% item[0].lower())(item[1]): ' not in the db by %(ref)s!',
# backlink to object entirely missing {'obj': key, 'ref': item})
self.bad_backlinks += 1 continue
logging.warning(' FAIL: the "%(cls)s" [%(gid)s] ' if item not in db_blinks[key]:
'has a backlink to a missing' # Object has reference with no cooresponding backlink
' "%(cls2)s".', self.bad_backlinks += 1
{'gid': pri_obj.gramps_id, pri_obj = self.db.method('get_%s_from_handle',
'cls': obj_class, 'cls2': item[0]}) key[0])(key[1])
continue logging.warning(' FAIL: the "%(cls)s" [%(gid)s] '
obj = getattr(self.db, "get_%s_from_handle" 'has a "%(cls2)s" reference'
% item[0].lower())(item[1]) ' with no corresponding backlink.',
handle_list = obj.get_referenced_handles_recursively() {'gid': pri_obj.gramps_id,
if (obj_class, handle) not in handle_list: 'cls': key[0], 'cls2': item[0]})
# backlink to object which doesn't have reference
self.bad_backlinks += 1 # Now we go through the db table and make checks against ours
logging.warning(' FAIL: the "%(cls)s" [%(gid)s] ' # Check for db backlinks that don't have a reference object at all
'has a backlink to a "%(cls2)s"' self.progress.set_pass(_('Looking for backlink reference problems') +
' with no corresponding reference.', ' (3)', db_items)
{'gid': pri_obj.gramps_id, for key, blinks in db_blinks.items():
'cls': obj_class, 'cls2': item[0]}) for item in blinks:
self.progress.step()
if item not in db_blinks:
# backlink to object entirely missing
self.bad_backlinks += 1
pri_obj = self.db.method('get_%s_from_handle',
key[0])(key[1])
logging.warning(' FAIL: the "%(cls)s" [%(gid)s] '
'has a backlink to a missing'
' "%(cls2)s" object.',
{'gid': pri_obj.gramps_id,
'cls': key[0], 'cls2': item[0]})
continue
# Check if the object has a reference to the backlinked one
if key not in my_blinks or item not in my_blinks[key]:
# backlink to object which doesn't have reference
self.bad_backlinks += 1
pri_obj = self.db.method('get_%s_from_handle',
key[0])(key[1])
logging.warning(' FAIL: the "%(cls)s" [%(gid)s] '
'has a backlink to a "%(cls2)s"'
' with no corresponding reference.',
{'gid': pri_obj.gramps_id,
'cls': key[0], 'cls2': item[0]})
def callback(self, *args): def callback(self, *args):
self.progress.step() self.progress.step()