From 427ee2d0fd9ee7d7bc4e13ee0354c339e8893859 Mon Sep 17 00:00:00 2001 From: kulath Date: Tue, 4 Mar 2014 17:39:16 +0000 Subject: [PATCH] 6194: Database corrupted - TypeError: unhashable type: 'list' Fixed upgrade to deal with sourceref in media references in Sources. Also fixed Check and Repair to repair previously broken databases. --- gramps/gen/db/upgrade.py | 112 ++++++++++++++++++++++++----------- gramps/plugins/tool/check.py | 70 +++++++++++++++++++++- 2 files changed, 145 insertions(+), 37 deletions(-) diff --git a/gramps/gen/db/upgrade.py b/gramps/gen/db/upgrade.py index eb2635fe2..7c4498d93 100644 --- a/gramps/gen/db/upgrade.py +++ b/gramps/gen/db/upgrade.py @@ -28,7 +28,7 @@ from ..lib.markertype import MarkerType from ..lib.tag import Tag import time import logging -LOG = logging.getLogger(".citation") +LOG = logging.getLogger(".upgrade") from ..const import GRAMPS_LOCALE as glocale _ = glocale.translation.gettext @@ -46,7 +46,7 @@ from . import BSDDBTxn from ..lib.nameorigintype import NameOriginType from .write import _mkname, SURNAMES from .dbconst import (PERSON_KEY, FAMILY_KEY, EVENT_KEY, - MEDIA_KEY, PLACE_KEY, REPOSITORY_KEY) + MEDIA_KEY, PLACE_KEY, REPOSITORY_KEY, SOURCE_KEY) from gramps.gui.dialog import (InfoDialog) def gramps_upgrade_16(self): @@ -61,26 +61,30 @@ def gramps_upgrade_16(self): (4) add backlinks for references from object to Citations (5) add backlinks for references from Citation to Source - the backlinks are all updated at the end by calling - reindex_reference_map + the backlinks are all updated on return to write.py gramps_upgrade by + calling reindex_reference_map """ - length = (len(self.note_map) + len(self.person_map) + + # Only People, Families, Events, Media Objects, Places, Sources and + # Repositories need to be updated, because these are the only primary + # objects that can have source citations. + length = (len(self.person_map) + len(self.event_map) + len(self.family_map) + len(self.repository_map) + len(self.media_map) + - len(self.place_map) + len(self.source_map)) + 10 + len(self.place_map) + len(self.source_map)) self.set_total(length) # Setup data for upgrade statistics information dialogue keyorder = [PERSON_KEY, FAMILY_KEY, EVENT_KEY, MEDIA_KEY, - PLACE_KEY, REPOSITORY_KEY] + PLACE_KEY, REPOSITORY_KEY, SOURCE_KEY] key2data = { PERSON_KEY : 0, FAMILY_KEY : 1, EVENT_KEY: 2, MEDIA_KEY: 3, PLACE_KEY: 4, - REPOSITORY_KEY: 5, + REPOSITORY_KEY: 5, + SOURCE_KEY : 6, } key2string = { PERSON_KEY : _('%6d People upgraded with %6d citations in %6d secs\n'), @@ -89,8 +93,9 @@ def gramps_upgrade_16(self): MEDIA_KEY : _('%6d Media Objects upgraded with %6d citations in %6d secs\n'), PLACE_KEY : _('%6d Places upgraded with %6d citations in %6d secs\n'), REPOSITORY_KEY : _('%6d Repositories upgraded with %6d citations in %6d secs\n'), + SOURCE_KEY : _('%6d Sources upgraded with %6d citations in %6d secs\n'), } - data_upgradeobject = [0] * 6 + data_upgradeobject = [0] * 7 # Initialise the citation gramps ID number self.cmap_index = 0 @@ -102,6 +107,15 @@ def gramps_upgrade_16(self): start_time = time.time() for person_handle in self.person_map.keys(): person = self.person_map[person_handle] + try: + # The parameters are evaluated before deciding whether logging is on + # or not. Since the retrieval of names is so complex, I think it is + # safer to protect this with a try except block, even though it + # seems to work for names being present and not. + LOG.debug("upgrade person %s %s" % (person[3][4], + " ".join([name[0] for name in person[3][5]]))) + except: + pass (handle, gramps_id, gender, primary_name, alternate_names, death_ref_index, birth_ref_index, event_ref_list, family_list, parent_family_list, media_list, address_list, attribute_list, @@ -144,6 +158,7 @@ def gramps_upgrade_16(self): attribute_list, urls, lds_seal_list, new_citation_list, note_list, change, tag_list, private, person_ref_list) + LOG.debug(" upgrade new_person %s" % [new_person]) with BSDDBTxn(self.env, self.person_map) as txn: if isinstance(handle, UNITYPE): handle = handle.encode('utf-8') @@ -165,7 +180,7 @@ def gramps_upgrade_16(self): start_time = time.time() for media_handle in self.media_map.keys(): media = self.media_map[media_handle] - LOG.debug("upgrade media %s" % media[4]) + LOG.debug("upgrade media object %s" % media[4]) (handle, gramps_id, path, mime, desc, attribute_list, source_list, note_list, change, date, tag_list, private) = media @@ -182,12 +197,11 @@ def gramps_upgrade_16(self): if isinstance(handle, UNITYPE): handle = handle.encode('utf-8') txn.put(handle, new_media) - LOG.debug(" update ref map media %s" % [handle, - self.get_object_from_handle(handle) ]) self.update() - LOG.debug("Media upgrade %d citations upgraded in %d seconds" % - (self.cmap_index - start_num_citations, + LOG.debug("%d media objects upgraded with %d citations in %d seconds" % + (len(self.media_map.keys()), + self.cmap_index - start_num_citations, int(time.time() - start_time))) data_upgradeobject[key2data[MEDIA_KEY]] = (len(list(self.media_map.keys())), self.cmap_index - start_num_citations, @@ -200,6 +214,7 @@ def gramps_upgrade_16(self): start_time = time.time() for place_handle in self.place_map.keys(): place = self.place_map[place_handle] + LOG.debug("upgrade place %s" % place[2]) (handle, gramps_id, title, longi, lat, main_loc, alt_loc, urls, media_list, source_list, note_list, change, private) = place @@ -216,6 +231,7 @@ def gramps_upgrade_16(self): longi, lat, main_loc, alt_loc, urls, media_list, new_citation_list, note_list, change, private) + LOG.debug(" upgrade new_place %s" % [new_place]) with BSDDBTxn(self.env, self.place_map) as txn: if isinstance(handle, UNITYPE): handle = handle.encode('utf-8') @@ -237,6 +253,7 @@ def gramps_upgrade_16(self): start_time = time.time() for family_handle in self.family_map.keys(): family = self.family_map[family_handle] + LOG.debug("upgrade family (gramps_id) %s" % family[1]) (handle, gramps_id, father_handle, mother_handle, child_ref_list, the_type, event_ref_list, media_list, attribute_list, lds_seal_list, source_list, note_list, @@ -266,13 +283,12 @@ def gramps_upgrade_16(self): child_ref_list, the_type, event_ref_list, media_list, attribute_list, lds_seal_list, new_citation_list, note_list, change, tag_list, private) + LOG.debug(" upgrade new_family %s" % [new_family]) with BSDDBTxn(self.env, self.family_map) as txn: - if isinstance(handle, UNITYPE): - handle = handle.encode('utf-8') - txn.put(handle, new_family) + txn.put(str(handle), new_family) self.update() - LOG.debug("%d familys upgraded with %d citations in %d seconds. " % + LOG.debug("%d families upgraded with %d citations in %d seconds. " % (len(list(self.family_map.keys())), self.cmap_index - start_num_citations, time.time() - start_time)) @@ -282,13 +298,11 @@ def gramps_upgrade_16(self): # --------------------------------- # Modify Events # --------------------------------- - upgrade_time = 0 - backlink_time = 0 start_num_citations = self.cmap_index start_time = time.time() for event_handle in self.event_map.keys(): - t1 = time.time() event = self.event_map[event_handle] + LOG.debug("upgrade event %s" % event[4]) (handle, gramps_id, the_type, date, description, place, source_list, note_list, media_list, attribute_list, change, private) = event @@ -308,21 +322,17 @@ def gramps_upgrade_16(self): new_citation_list, note_list, media_list, attribute_list, change, private) + LOG.debug(" upgrade new_event %s" % [new_event]) with BSDDBTxn(self.env, self.event_map) as txn: if isinstance(handle, UNITYPE): handle = handle.encode('utf-8') txn.put(handle, new_event) - t2 = time.time() - upgrade_time += t2 - t1 - t3 = time.time() - backlink_time += t3 - t2 self.update() - LOG.debug("%d events upgraded with %d citations in %d seconds. " - "Backlinks took %d seconds" % - (len(list(self.event_map.keys())), + LOG.debug("%d events upgraded with %d citations in %d seconds. " % + (len(self.event_map.keys()), self.cmap_index - start_num_citations, - int(upgrade_time), int(backlink_time))) + time.time() - start_time)) data_upgradeobject[key2data[EVENT_KEY]] = (len(list(self.event_map.keys())), self.cmap_index - start_num_citations, time.time() - start_time) @@ -334,6 +344,7 @@ def gramps_upgrade_16(self): start_time = time.time() for repository_handle in self.repository_map.keys(): repository = self.repository_map[repository_handle] + LOG.debug("upgrade repository %s" % repository[3]) (handle, gramps_id, the_type, name, note_list, address_list, urls, change, private) = repository if address_list: @@ -342,19 +353,54 @@ def gramps_upgrade_16(self): if address_list: new_repository = (handle, gramps_id, the_type, name, note_list, address_list, urls, change, private) + LOG.debug(" upgrade new_repository %s" % [new_repository]) with BSDDBTxn(self.env, self.repository_map) as txn: if isinstance(handle, UNITYPE): handle = handle.encode('utf-8') txn.put(handle, new_repository) self.update() - LOG.debug("%d repositorys upgraded with %d citations in %d seconds. " % + LOG.debug("%d repositories upgraded with %d citations in %d seconds. " % (len(list(self.repository_map.keys())), self.cmap_index - start_num_citations, time.time() - start_time)) data_upgradeobject[key2data[REPOSITORY_KEY]] = (len(list(self.repository_map.keys())), self.cmap_index - start_num_citations, time.time() - start_time) + + # --------------------------------- + # Modify Source + # --------------------------------- + start_num_citations = self.cmap_index + start_time = time.time() + for source_handle in self.source_map.keys(): + source = self.source_map[source_handle] + LOG.debug("upgrade source %s" % source[2]) + (handle, gramps_id, title, author, + pubinfo, note_list, media_list, + abbrev, change, datamap, reporef_list, + private) = source + if media_list: + media_list = upgrade_media_list_16( + self, media_list) + + new_source = (handle, gramps_id, title, author, + pubinfo, note_list, media_list, + abbrev, change, datamap, reporef_list, + private) + LOG.debug(" upgrade new_source %s" % [new_source]) + with BSDDBTxn(self.env, self.source_map) as txn: + txn.put(str(handle), new_source) + self.update() + + LOG.debug("%d sources upgraded with %d citations in %d seconds" % + (len(self.source_map.keys()), + self.cmap_index - start_num_citations, + int(time.time() - start_time))) + data_upgradeobject[key2data[SOURCE_KEY]] = (len(self.source_map.keys()), + self.cmap_index - start_num_citations, + time.time() - start_time) + # --------------------------------- @@ -552,12 +598,6 @@ def convert_source_list_to_citation_list_16(self, source_list): new_handle = new_handle.encode('utf-8') txn.put(new_handle, new_citation) self.cmap_index += 1 -# # add backlinks for references from Citation to Source -# with BSDDBTxn(self.env) as txn: -# self.update_reference_map( -# self.get_citation_from_handle(new_handle), -# transaction, -# txn.txn) citation_list.append((new_handle)) return citation_list diff --git a/gramps/plugins/tool/check.py b/gramps/plugins/tool/check.py index 49e1e7d90..b869fb080 100644 --- a/gramps/plugins/tool/check.py +++ b/gramps/plugins/tool/check.py @@ -201,6 +201,7 @@ class Check(tool.BatchTool): checker.check_repo_references() checker.check_note_references() checker.check_tag_references() + checker.check_media_sourceref() self.db.enable_signals() self.db.request_rebuild() @@ -241,6 +242,7 @@ class CheckIntegrity(object): self.invalid_dates = [] self.removed_name_format = [] self.empty_objects = defaultdict(list) + self.replaced_sourceref = [] self.last_img_dir = config.get('behavior.addmedia-image-dir') self.progress = ProgressMeter(_('Checking Database'),'') self.explanation = Note(_('Objects referenced by this note ' @@ -1876,6 +1878,64 @@ class CheckIntegrity(object): if len(self.invalid_tag_references) == 0: logging.info(' OK: no tag reference problems found') + def check_media_sourceref(self): + """ + This repairs a problem with database upgrade from database schema + version 15 to 16. Mediarefs on source primary objects can contain + sourcerefs, and these were not converted to citations. + """ + total = ( + self.db.get_number_of_sources() + ) + + self.progress.set_pass(_('Looking for media source reference problems'), + total) + logging.info('Looking for media source reference problems') + + for handle in self.db.source_map.keys(): + self.progress.step() + info = self.db.source_map[handle] + source = gen.lib.Source() + source.unserialize(info) + new_media_ref_list = [] + for media_ref in source.get_media_list(): + citation_list = media_ref.get_citation_list() + new_citation_list = [] + for citation_handle in citation_list: + # Either citation_handle is a handle, in which case it has + # been converted, or it is a 6-tuple, in which case it now + # needs to be converted. + if len(citation_handle) == 6: + if len(citation_handle) == 6: + sourceref = citation_handle + else: + sourceref = eval(citation_handle) + new_citation = gen.lib.Citation() + new_citation.set_date_object(sourceref[0]) + new_citation.set_privacy(sourceref[1]) + new_citation.set_note_list(sourceref[2]) + new_citation.set_confidence_level(sourceref[3]) + new_citation.set_reference_handle(sourceref[4]) + new_citation.set_page(sourceref[5]) + citation_handle = Utils.create_id() + new_citation.set_handle(citation_handle) + self.replaced_sourceref.append(handle) + logging.warning(' FAIL: the source "%s" has a media ' + 'reference with a source citation ' + 'which is invalid' % (source.gramps_id)) + self.db.add_citation(new_citation, self.trans) + + new_citation_list.append(citation_handle) + + media_ref.set_citation_list(new_citation_list) + new_media_ref_list.append(media_ref) + + source.set_media_list(new_media_ref_list) + self.db.commit_source(source, self.trans) + + if len(self.replaced_sourceref) > 0: + logging.info(' OK: no broken source citations on mediarefs found') + def class_person(self, handle): person = Person() person.set_handle(handle) @@ -1982,7 +2042,8 @@ class CheckIntegrity(object): note_references = len(self.invalid_note_references) tag_references = len(self.invalid_tag_references) name_format = len(self.removed_name_format) - empty_objs = sum(len(obj) for obj in self.empty_objects.values()) + replaced_sourcerefs = len(self.replaced_sourceref) + empty_objs = sum(len(obj) for obj in self.empty_objects.itervalues()) errors = (photos + efam + blink + plink + slink + rel + event_invalid + person + @@ -2216,6 +2277,13 @@ class CheckIntegrity(object): name_format) % {'quantity' : name_format} ) + if replaced_sourcerefs: + self.text.write( + ngettext("%(quantity)d invalid source citation was fixed\n", + "%(quantity)d invalid source citations were fixed\n", + replaced_sourcerefs) % {'quantity' : replaced_sourcerefs} + ) + if empty_objs > 0 : self.text.write(_("%(empty_obj)d empty objects removed:\n" " %(person)d person objects\n"