From 9dd546521620ee4977239996be5ac3773a6b9e25 Mon Sep 17 00:00:00 2001 From: Michiel Nauta Date: Tue, 16 Aug 2011 20:06:43 +0000 Subject: [PATCH] 5140: Import of Gramps XML file with cross table duplicate handles crashes svn: r18036 --- src/plugins/import/ImportXml.py | 207 +++++++++++++------------------- src/plugins/tool/Check.py | 31 ++++- 2 files changed, 112 insertions(+), 126 deletions(-) diff --git a/src/plugins/import/ImportXml.py b/src/plugins/import/ImportXml.py index 6c903fb27..500856528 100644 --- a/src/plugins/import/ImportXml.py +++ b/src/plugins/import/ImportXml.py @@ -625,9 +625,10 @@ class GrampsParser(UpdateCallback): "reporef": (self.start_reporef, self.stop_reporef), "rname": (None, self.stop_rname), } + self.grampsuri = re.compile(r"^gramps://(?P[A-Z][a-z]+)/" + "handle/(?P\w+)$") - def inaugurate(self, handle, prim_obj, has_handle_func, add_func, - get_raw_obj_data): + def inaugurate(self, handle, target, prim_obj): """ Assign a handle (identity) to a primary object (and create it if it doesn't exist yet) and add it to the database. @@ -641,28 +642,40 @@ class GrampsParser(UpdateCallback): :param handle: The handle of the primary object, typically as read directly from the XML attributes. :type handle: str + :param target: Indicates the primary object type this handle relates to. + :type targe": str, identical to target attr of bookmarks. :param prim_obj: template of the primary object that is to be created. :type prim_obj: Either an empty instance of a primary object or the class object of a primary object. - :param has_handle_func: function to determine if the database contains - a given handle for a specific object type. - :type has_handle_func: func - :param add_func: function to add a primary object of a specific type - to the database. - :type add_func: func - :param get_raw_obj_data: function to read the content of a primary - object from the db in serialized form. - :type get_raw_obj_data: func :returns: The handle of the primary object. :rtype: str """ handle = str(handle.replace('_', '')) - if handle in self.import_handles: - handle = self.import_handles[handle] + if (handle in self.import_handles and + target in self.import_handles[handle]): + handle = self.import_handles[handle][target] if not callable(prim_obj): # This method is called by a start_ method. + get_raw_obj_data = {"person": self.db.get_raw_person_data, + "family": self.db.get_raw_family_data, + "event": self.db.get_raw_event_data, + "place": self.db.get_raw_place_data, + "source": self.db.get_raw_source_data, + "repository": self.db.get_raw_repository_data, + "media": self.db.get_raw_object_data, + "note": self.db.get_raw_note_data, + "tag": self.db.get_raw_tag_data}[target] raw = get_raw_obj_data(handle) prim_obj.unserialize(raw) + return handle + elif handle in self.import_handles: + LOG.warn("The file you import contains duplicate handles " + "which is illegal and being fixed now.") + orig_handle = handle + handle = Utils.create_id() + while handle in self.import_handles: + handle = Utils.create_id() + self.import_handles[orig_handle][target] = handle else: orig_handle = handle if self.replace_import_handle: @@ -670,16 +683,33 @@ class GrampsParser(UpdateCallback): while handle in self.import_handles: handle = Utils.create_id() else: + has_handle_func = {"person": self.db.has_person_handle, + "family": self.db.has_family_handle, + "event": self.db.has_event_handle, + "place": self.db.has_place_handle, + "source": self.db.has_source_handle, + "repository": self.db.has_repository_handle, + "media": self.db.has_object_handle, + "note": self.db.has_note_handle, + "tag": self.db.has_tag_handle}[target] while has_handle_func(handle): handle = Utils.create_id() - self.import_handles[orig_handle] = handle - if callable(prim_obj): # method is called by a reference - prim_obj = prim_obj() - prim_obj.set_handle(handle) - if add_func == self.db.add_tag: - add_func(prim_obj, self.trans) - else: - add_func(prim_obj, self.trans, set_gid=False) + self.import_handles[orig_handle] = {target: handle} + if callable(prim_obj): # method is called by a reference + prim_obj = prim_obj() + prim_obj.set_handle(handle) + if target == "tag": + self.db.add_tag(prim_obj, self.trans) + else: + add_func = {"person": self.db.add_person, + "family": self.db.add_family, + "event": self.db.add_event, + "place": self.db.add_place, + "source": self.db.add_source, + "repository": self.db.add_repository, + "media": self.db.add_object, + "note": self.db.add_note}[target] + add_func(prim_obj, self.trans, set_gid=False) return handle def inaugurate_id(self, id_, key, prim_obj): @@ -954,10 +984,7 @@ class GrampsParser(UpdateCallback): Add a family reference to the LDS ordinance currently processed. """ if 'hlink' in attrs: - handle = self.inaugurate(attrs['hlink'], gen.lib.Family, - self.db.has_family_handle, - self.db.add_family, - self.db.get_raw_family_data) + handle = self.inaugurate(attrs['hlink'], "family", gen.lib.Family) else: # old style XML handle = self.inaugurate_id(attrs.get('ref'), FAMILY_KEY, gen.lib.Family) @@ -967,10 +994,7 @@ class GrampsParser(UpdateCallback): """A reference to a place in an object: event or lds_ord """ if 'hlink' in attrs: - handle = self.inaugurate(attrs['hlink'], gen.lib.Place, - self.db.has_place_handle, - self.db.add_place, - self.db.get_raw_place_data) + handle = self.inaugurate(attrs['hlink'], "place", gen.lib.Place) else: # old style XML handle = self.inaugurate_id(attrs.get('ref'), PLACE_KEY, gen.lib.Place) @@ -989,10 +1013,7 @@ class GrampsParser(UpdateCallback): orig_handle = attrs['handle'].replace('_', '') is_merge_candidate = (self.replace_import_handle and self.db.has_place_handle(orig_handle)) - self.inaugurate(orig_handle, self.placeobj, - self.db.has_place_handle, - self.db.add_place, - self.db.get_raw_place_data) + self.inaugurate(orig_handle, "place", self.placeobj) gramps_id = self.legalize_id(attrs.get('id'), PLACE_KEY, self.pidswap, self.db.pid2user_format, self.db.find_next_place_gramps_id) @@ -1056,8 +1077,7 @@ class GrampsParser(UpdateCallback): person = gen.lib.Person() if 'hlink' in attrs: - self.inaugurate(attrs['hlink'], person, self.db.has_person_handle, - self.db.add_person, self.db.get_raw_person_data) + self.inaugurate(attrs['hlink'], "person", person) elif 'ref' in attrs: self.inaugurate_id(attrs['ref'], PERSON_KEY, person) else: @@ -1100,10 +1120,7 @@ class GrampsParser(UpdateCallback): orig_handle = attrs['handle'].replace('_', '') is_merge_candidate = (self.replace_import_handle and self.db.has_event_handle(orig_handle)) - self.inaugurate(orig_handle, self.event, - self.db.has_event_handle, - self.db.add_event, - self.db.get_raw_event_data) + self.inaugurate(orig_handle, "event", self.event) gramps_id = self.legalize_id(attrs.get('id'), EVENT_KEY, self.eidswap, self.db.eid2user_format, self.db.find_next_event_gramps_id) @@ -1124,10 +1141,7 @@ class GrampsParser(UpdateCallback): """ self.eventref = gen.lib.EventRef() if 'hlink' in attrs: - handle = self.inaugurate(attrs['hlink'], gen.lib.Event, - self.db.has_event_handle, - self.db.add_event, - self.db.get_raw_event_data) + handle = self.inaugurate(attrs['hlink'], "event", gen.lib.Event) else: # there is no old style XML raise GrampsImportError(_("The Gramps Xml you are trying to " "import is malformed."), _("Any event reference must have a " @@ -1194,10 +1208,8 @@ class GrampsParser(UpdateCallback): # Old XML. Can be either handle or id reference # and this is guaranteed to be a person bookmark if 'hlink' in attrs: - handle = self.inaugurate(attrs['hlink'], gen.lib.Person, - self.db.has_person_handle, - self.db.add_person, - self.db.get_raw_person_data) + handle = self.inaugurate(attrs['hlink'], "person", + gen.lib.Person) else: handle = self.inaugurate_id(attrs.get('ref'), PERSON_KEY, gen.lib.Person) @@ -1206,7 +1218,7 @@ class GrampsParser(UpdateCallback): # This is new XML, so we are guaranteed to have a handle ref handle = attrs['hlink'].replace('_', '') - handle = self.import_handles[handle] + handle = self.import_handles[handle][target] # Due to pre 2.2.9 bug, bookmarks might be handle of other object # Make sure those are filtered out. # Bookmarks are at end, so all handle must exist before we do bookmrks @@ -1279,10 +1291,7 @@ class GrampsParser(UpdateCallback): orig_handle = attrs['handle'].replace('_', '') is_merge_candidate = (self.replace_import_handle and self.db.has_person_handle(orig_handle)) - self.inaugurate(orig_handle, self.person, - self.db.has_person_handle, - self.db.add_person, - self.db.get_raw_person_data) + self.inaugurate(orig_handle, "person", self.person) gramps_id = self.legalize_id(attrs.get('id'), PERSON_KEY, self.idswap, self.db.id2user_format, self.db.find_next_person_gramps_id) @@ -1303,10 +1312,7 @@ class GrampsParser(UpdateCallback): Store the home person of the database. """ if 'home' in attrs: - handle = self.inaugurate(attrs['home'], gen.lib.Person, - self.db.has_person_handle, - self.db.add_person, - self.db.get_raw_person_data) + handle = self.inaugurate(attrs['home'], "person", gen.lib.Person) self.home = handle def start_father(self, attrs): @@ -1314,10 +1320,7 @@ class GrampsParser(UpdateCallback): Add a father reference to the family currently processed. """ if 'hlink' in attrs: - handle = self.inaugurate(attrs['hlink'], gen.lib.Person, - self.db.has_person_handle, - self.db.add_person, - self.db.get_raw_person_data) + handle = self.inaugurate(attrs['hlink'], "person", gen.lib.Person) else: # old style XML handle = self.inaugurate_id(attrs.get('ref'), PERSON_KEY, gen.lib.Person) @@ -1328,10 +1331,7 @@ class GrampsParser(UpdateCallback): Add a mother reference to the family currently processed. """ if 'hlink' in attrs: - handle = self.inaugurate(attrs['hlink'], gen.lib.Person, - self.db.has_person_handle, - self.db.add_person, - self.db.get_raw_person_data) + handle = self.inaugurate(attrs['hlink'], "person", gen.lib.Person) else: # old style XML handle = self.inaugurate_id(attrs.get('ref'), PERSON_KEY, gen.lib.Person) @@ -1345,10 +1345,7 @@ class GrampsParser(UpdateCallback): frel and mrel belonged to the "childof" tag """ if 'hlink' in attrs: - handle = self.inaugurate(attrs['hlink'], gen.lib.Person, - self.db.has_person_handle, - self.db.add_person, - self.db.get_raw_person_data) + handle = self.inaugurate(attrs['hlink'], "person", gen.lib.Person) else: # old style XML handle = self.inaugurate_id(attrs.get('ref'), PERSON_KEY, gen.lib.Person) @@ -1366,10 +1363,7 @@ class GrampsParser(UpdateCallback): belong to the "childref" tag under family. """ self.childref = gen.lib.ChildRef() - handle = self.inaugurate(attrs['hlink'], gen.lib.Person, - self.db.has_person_handle, - self.db.add_person, - self.db.get_raw_person_data) + handle = self.inaugurate(attrs['hlink'], "person", gen.lib.Person) self.childref.ref = handle self.childref.private = bool(attrs.get('priv')) @@ -1392,10 +1386,7 @@ class GrampsParser(UpdateCallback): """ self.personref = gen.lib.PersonRef() if 'hlink' in attrs: - handle = self.inaugurate(attrs['hlink'], gen.lib.Person, - self.db.has_person_handle, - self.db.add_person, - self.db.get_raw_person_data) + handle = self.inaugurate(attrs['hlink'], "person", gen.lib.Person) else: # there is no old style XML raise GrampsImportError(_("The Gramps Xml you are trying to " "import is malformed."), _("Any person reference must have a " @@ -1431,10 +1422,7 @@ class GrampsParser(UpdateCallback): orig_handle = attrs['handle'].replace('_', '') is_merge_candidate = (self.replace_import_handle and self.db.has_family_handle(orig_handle)) - self.inaugurate(orig_handle, self.family, - self.db.has_family_handle, - self.db.add_family, - self.db.get_raw_family_data) + self.inaugurate(orig_handle, "family", self.family) gramps_id = self.legalize_id(attrs.get('id'), FAMILY_KEY, self.fidswap, self.db.fid2user_format, self.db.find_next_family_gramps_id) @@ -1478,10 +1466,7 @@ class GrampsParser(UpdateCallback): person is a child. """ if 'hlink' in attrs: - handle = self.inaugurate(attrs['hlink'], gen.lib.Family, - self.db.has_family_handle, - self.db.add_family, - self.db.get_raw_family_data) + handle = self.inaugurate(attrs['hlink'], "family", gen.lib.Family) else: # old style XML handle = self.inaugurate_id(attrs.get('ref'), FAMILY_KEY, gen.lib.Family) @@ -1510,10 +1495,7 @@ class GrampsParser(UpdateCallback): person is a parent. """ if 'hlink' in attrs: - handle = self.inaugurate(attrs['hlink'], gen.lib.Family, - self.db.has_family_handle, - self.db.add_family, - self.db.get_raw_family_data) + handle = self.inaugurate(attrs['hlink'], "family", gen.lib.Family) else: # old style XML handle = self.inaugurate_id(attrs.get('ref'), FAMILY_KEY, gen.lib.Family) @@ -1613,8 +1595,7 @@ class GrampsParser(UpdateCallback): # Tag defintion self.tag = gen.lib.Tag() - self.inaugurate(attrs['handle'], self.tag, self.db.has_tag_handle, - self.db.add_tag, self.db.get_raw_tag_data) + self.inaugurate(attrs['handle'], "tag", self.tag) self.tag.change = int(attrs.get('change', self.change)) self.info.add('new-object', TAG_KEY, self.tag) self.tag.set_name(attrs['name']) @@ -1626,9 +1607,7 @@ class GrampsParser(UpdateCallback): """ Tag reference in a primary object. """ - handle = self.inaugurate(attrs['hlink'], gen.lib.Tag, - self.db.has_tag_handle, - self.db.add_tag, self.db.get_raw_tag_data) + handle = self.inaugurate(attrs['hlink'], "tag", gen.lib.Tag) if self.person: self.person.add_tag(handle) @@ -1660,10 +1639,7 @@ class GrampsParser(UpdateCallback): orig_handle = attrs['handle'].replace('_', '') is_merge_candidate = (self.replace_import_handle and self.db.has_note_handle(orig_handle)) - self.inaugurate(orig_handle, self.note, - self.db.has_note_handle, - self.db.add_note, - self.db.get_raw_note_data) + self.inaugurate(orig_handle, "note", self.note) gramps_id = self.legalize_id(attrs.get('id'), NOTE_KEY, self.nidswap, self.db.nid2user_format, self.db.find_next_note_gramps_id) @@ -1763,10 +1739,7 @@ class GrampsParser(UpdateCallback): Add a note reference to the object currently processed. """ if 'hlink' in attrs: - handle = self.inaugurate(attrs['hlink'], gen.lib.Note, - self.db.has_note_handle, - self.db.add_note, - self.db.get_raw_note_data) + handle = self.inaugurate(attrs['hlink'], "note", gen.lib.Note) else: raise GrampsImportError(_("The Gramps Xml you are trying to " "import is malformed."), _("Any note reference must have a " @@ -1817,10 +1790,7 @@ class GrampsParser(UpdateCallback): """ self.source_ref = gen.lib.SourceRef() if 'hlink' in attrs: - handle = self.inaugurate(attrs['hlink'], gen.lib.Source, - self.db.has_source_handle, - self.db.add_source, - self.db.get_raw_source_data) + handle = self.inaugurate(attrs['hlink'], "source", gen.lib.Source) else: handle = self.inaugurate_id(attrs.get('ref'), SOURCE_KEY, gen.lib.Source) @@ -1866,10 +1836,7 @@ class GrampsParser(UpdateCallback): orig_handle = attrs['handle'].replace('_', '') is_merge_candidate = (self.replace_import_handle and self.db.has_source_handle(orig_handle)) - self.inaugurate(orig_handle, self.source, - self.db.has_source_handle, - self.db.add_source, - self.db.get_raw_source_data) + self.inaugurate(orig_handle, "source", self.source) gramps_id = self.legalize_id(attrs.get('id'), SOURCE_KEY, self.sidswap, self.db.sid2user_format, self.db.find_next_source_gramps_id) @@ -1890,10 +1857,8 @@ class GrampsParser(UpdateCallback): """ self.reporef = gen.lib.RepoRef() if 'hlink' in attrs: - handle = self.inaugurate(attrs['hlink'], gen.lib.Repository, - self.db.has_repository_handle, - self.db.add_repository, - self.db.get_raw_repository_data) + handle = self.inaugurate(attrs['hlink'], "repository", + gen.lib.Repository) else: # old style XML handle = self.inaugurate_id(attrs.get('ref'), REPOSITORY_KEY, gen.lib.Repository) @@ -1912,10 +1877,8 @@ class GrampsParser(UpdateCallback): """ self.objref = gen.lib.MediaRef() if 'hlink' in attrs: - handle = self.inaugurate(attrs['hlink'], gen.lib.MediaObject, - self.db.has_object_handle, - self.db.add_object, - self.db.get_raw_object_data) + handle = self.inaugurate(attrs['hlink'], "media", + gen.lib.MediaObject) else: # old style XML handle = self.inaugurate_id(attrs.get('ref'), MEDIA_KEY, gen.lib.MediaObject) @@ -1949,10 +1912,7 @@ class GrampsParser(UpdateCallback): orig_handle = attrs['handle'].replace('_', '') is_merge_candidate = (self.replace_import_handle and self.db.has_object_handle(orig_handle)) - self.inaugurate(orig_handle, self.object, - self.db.has_object_handle, - self.db.add_object, - self.db.get_raw_object_data) + self.inaugurate(orig_handle, "media", self.object) gramps_id = self.legalize_id(attrs.get('id'), MEDIA_KEY, self.oidswap, self.db.oid2user_format, self.db.find_next_object_gramps_id) @@ -1986,10 +1946,7 @@ class GrampsParser(UpdateCallback): orig_handle = attrs['handle'].replace('_', '') is_merge_candidate = (self.replace_import_handle and self.db.has_repository_handle(orig_handle)) - self.inaugurate(orig_handle, self.repo, - self.db.has_repository_handle, - self.db.add_repository, - self.db.get_raw_repository_data) + self.inaugurate(orig_handle, "repository", self.repo) gramps_id = self.legalize_id(attrs.get('id'), REPOSITORY_KEY, self.ridswap, self.db.rid2user_format, self.db.find_next_repository_gramps_id) diff --git a/src/plugins/tool/Check.py b/src/plugins/tool/Check.py index 4047b4406..0a8d1ef87 100644 --- a/src/plugins/tool/Check.py +++ b/src/plugins/tool/Check.py @@ -150,6 +150,27 @@ def _table_low_level(db,table): table.sync() return True +def cross_table_duplicates(db): + """ + Function to find the presence of identical handles that occur in different + database tables. + + Assumes there are no intable duplicates, see low_level function. + + :param db: the database to check + :type db: :class:`gen.db.read.DbBsddbRead` + :returns: the presence of cross table duplicate handles + :rtype: bool + """ + total_nr_handles = 0 + all_handles = set([]) + for the_map in [db.person_map, db.family_map, db.event_map, db.place_map, + db.source_map, db.media_map, db.repository_map, db.note_map]: + handle_list = the_map.keys() + total_nr_handles += len(handle_list) + all_handles.update(handle_list) + return total_nr_handles > len(all_handles) + #------------------------------------------------------------------------- # # runTool @@ -174,7 +195,15 @@ class Check(tool.BatchTool): # We only do this for the dbdir backend. if self.db.__class__.__name__ == 'DbBsddb': low_level(self.db) - + if cross_table_duplicates(self.db): + Report(uistate, _( + "Your family tree contains cross table duplicate handles.\n " + "This is bad and can be fixed by making a backup of your\n" + "family tree and importing that backup in an empty family\n" + "tree. The rest of the checking is skipped, the Check and\n" + "Repair tool should be run anew on this new family tree."), cli) + return + with DbTxn(_("Check Integrity"), self.db, batch=True) as trans: self.db.disable_signals() checker = CheckIntegrity(dbstate, uistate, trans)