From 75d54125dbf05202aff1e55d28ec32b8fdcfb03d Mon Sep 17 00:00:00 2001 From: Michiel Nauta Date: Sat, 14 Apr 2012 12:19:00 +0000 Subject: [PATCH] 5466: On import and Check and Repair need to check references to absent objects svn: r19327 --- src/Utils.py | 153 +++++- src/gen/lib/tagbase.py | 24 + src/plugins/import/ImportXml.py | 113 +++- src/plugins/lib/libgedcom.py | 134 +++++ src/plugins/tool/Check.py | 890 ++++++++++++++++++++------------ 5 files changed, 963 insertions(+), 351 deletions(-) diff --git a/src/Utils.py b/src/Utils.py index 916358d97..1078fcdcb 100644 --- a/src/Utils.py +++ b/src/Utils.py @@ -53,7 +53,7 @@ from GrampsLocale import codeset from Date import Date import DateHandler -from const import TEMP_DIR, USER_HOME, GRAMPS_UUID +from const import TEMP_DIR, USER_HOME, GRAMPS_UUID, IMAGE_DIR import constfunc from gen.ggettext import sgettext as _ @@ -1581,3 +1581,154 @@ def format_time(secs): t = time.localtime(secs) d = Date(t.tm_year, t.tm_mon, t.tm_mday) return DateHandler.displayer.display(d) + time.strftime(' %X', t) + +#------------------------------------------------------------------------- +# +# make_unknown +# +#------------------------------------------------------------------------- +def make_unknown(class_arg, explanation, class_func, commit_func, transaction, + **argv): + """ + Make a primary object and set some property so that it qualifies as + "Unknown". + + Some object types need extra parameters: + Family: db, Event: type (optional), + Citation: methods to create/store source. + + Some theoretical underpinning + This function exploits the fact that all import methods basically do the + same thing: Create an object of the right type, fill it with some + attributes, store it in the database. This function does the same, so + the observation is why not use the creation and storage methods that the + import routines use themselves, that makes nice reuse of code. To do this + formally correct we would need to specify a interface (in the OOP sence) + which the import methods would need to implement. For now, that is deemed + too restrictive and here we just slip through because of the similarity in + code of both GEDCOM and XML import methods. + + :param class_arg: The argument the class_func needs, typically a kind of id. + :type class_arg: unspecified + :param explanation: Handle of a note that explains the origin of primary obj + :type explanation: str + :param class_func: Method to create primary object. + :type class_func: method + :param commit_func: Method to store primary object in db. + :type commit_func: method + :param transactino: Database transaction handle + :type transaction: str + :param argv: Possible additional parameters + :type param: unspecified + :returns: List of newly created objects. + :rtype: list + """ + retval = [] + obj = class_func(class_arg) + if isinstance(obj, gen.lib.Person): + surname = gen.lib.Surname() + surname.set_surname('Unknown') + name = gen.lib.Name() + name.add_surname(surname) + name.set_type(gen.lib.NameType.UNKNOWN) + obj.set_primary_name(name) + elif isinstance(obj, gen.lib.Family): + obj.set_relationship(gen.lib.FamilyRelType.UNKNOWN) + handle = obj.handle + if getattr(argv['db'].transaction, 'no_magic', False): + backlinks = argv['db'].find_backlink_handles( + handle, [gen.lib.Person.__name__]) + for dummy, person_handle in backlinks: + person = argv['db'].get_person_from_handle(person_handle) + add_personref_to_family(obj, person) + else: + for person in argv['db'].iter_people(): + if person._has_handle_reference('Family', handle): + add_personref_to_family(obj, person) + elif isinstance(obj, gen.lib.Event): + if 'type' in argv: + obj.set_type(argv['type']) + else: + obj.set_type(gen.lib.EventType.UNKNOWN) + elif isinstance(obj, gen.lib.Place): + obj.set_title(_('Unknown')) + elif isinstance(obj, gen.lib.Source): + obj.set_title(_('Unknown')) + elif isinstance(obj, gen.lib.Citation): + #TODO create a new source for every citation? + obj2 = argv['source_class_func'](argv['source_class_arg']) + obj2.set_title(_('Unknown')) + obj2.add_note(explanation) + argv['source_commit_func'](obj2, transaction, time.time()) + retval.append(obj2) + obj.set_reference_handle(obj2.handle) + elif isinstance(obj, gen.lib.Repository): + obj.set_name(_('Unknown')) + obj.set_type(gen.lib.RepositoryType.UNKNOWN) + elif isinstance(obj, gen.lib.MediaObject): + obj.set_path(os.path.join(IMAGE_DIR, "image-missing.png")) + obj.set_mime_type('image/png') + obj.set_description(_('Unknown')) + elif isinstance(obj, gen.lib.Note): + obj.set_type(gen.lib.NoteType.UNKNOWN); + text = _('Unknown, created to replace a missing note object.') + link_start = text.index(',') + 2 + link_end = len(text) - 1 + tag = gen.lib.StyledTextTag(gen.lib.StyledTextTagType.LINK, + 'gramps://Note/handle/%s' % explanation, + [(link_start, link_end)]) + obj.set_styledtext(gen.lib.StyledText(text, [tag])) + elif isinstance(obj, gen.lib.Tag): + if not hasattr(make_unknown, 'count'): + make_unknown.count = 1 #primitive static variable + obj.set_name(_("Unknown, was missing %s (%d)") % + (time.strftime('%x %X', time.localtime()), + make_unknown.count)) + make_unknown.count += 1 + else: + raise TypeError("Object if of unsupported type") + + if hasattr(obj, 'add_note'): + obj.add_note(explanation) + commit_func(obj, transaction, time.time()) + retval.append(obj) + return retval + +def create_explanation_note(dbase): + """ + When creating objects to fill missing primary objects in imported files, + those objects of type "Unknown" need a explanatory note. This funcion + provides such a note for import methods. + """ + note = gen.lib.Note( _('Objects referenced by this note ' + 'were missing in a file imported on %s.') % + time.strftime('%x %X', time.localtime())) + note.set_handle(create_id()) + note.set_gramps_id(dbase.find_next_note_gramps_id()) + # Use defaults for privacy, format and type. + return note + +def add_personref_to_family(family, person): + """ + Given a family and person, set the parent/child references in the family, + that match the person. + """ + handle = family.handle + person_handle = person.handle + if handle in person.get_family_handle_list(): + if ((person.get_gender() == gen.lib.Person.FEMALE) and + (family.get_mother_handle() is None)): + family.set_mother_handle(person_handle) + else: + # This includes cases of gen.lib.Person.UNKNOWN + if family.get_father_handle() is None: + family.set_father_handle(person_handle) + else: + family.set_mother_handle(person_handle) + if handle in person.get_parent_family_handle_list(): + childref = gen.lib.ChildRef() + childref.set_reference_handle(person_handle) + childref.set_mother_relation(gen.lib.ChildRefType.UNKNOWN) + childref.set_father_relation(gen.lib.ChildRefType.UNKNOWN) + family.add_child_ref(childref) + diff --git a/src/gen/lib/tagbase.py b/src/gen/lib/tagbase.py index 8e1ce4864..f04992fe7 100644 --- a/src/gen/lib/tagbase.py +++ b/src/gen/lib/tagbase.py @@ -128,3 +128,27 @@ class TagBase(object): """ for addendum in acquisition.get_tag_list(): self.add_tag(addendum) + + def replace_tag_references(self, old_handle, new_handle): + """ + Replace references to note handles in the list of this object and + merge equivalent entries. + + :param old_handle: The note handle to be replaced. + :type old_handle: str + :param new_handle: The note handle to replace the old one with. + :type new_handle: str + """ + refs_list = self.tag_list[:] + new_ref = None + if new_handle in self.tag_list: + new_ref = new_handle + n_replace = refs_list.count(old_handle) + for ix_replace in xrange(n_replace): + idx = refs_list.index(old_handle) + if new_ref: + self.tag_list.pop(idx) + refs_list.pop(idx) + else: + self.tag_list[idx] = new_handle + diff --git a/src/plugins/import/ImportXml.py b/src/plugins/import/ImportXml.py index 032fb3bbb..e5943b741 100644 --- a/src/plugins/import/ImportXml.py +++ b/src/plugins/import/ImportXml.py @@ -47,6 +47,7 @@ from QuestionDialog import ErrorDialog, WarningDialog import gen.mime import gen.lib from gen.db import DbTxn +from gen.db.write import CLASS_TO_KEY_MAP from Errors import GrampsImportError import Utils import DateHandler @@ -85,6 +86,9 @@ EVENT_FAMILY_STR = _("%(event_name)s of %(family)s") # feature requests 2356, 1658: avoid genitive form EVENT_PERSON_STR = _("%(event_name)s of %(person)s") +HANDLE = 0 +INSTANTIATED = 1 + #------------------------------------------------------------------------- # # Importing data into the currently open database. @@ -215,6 +219,8 @@ class ImportInfo(object): """ self.data_mergecandidate = [{}, {}, {}, {}, {}, {}, {}, {}, {}, {}] self.data_newobject = [0] * 10 + self.data_unknownobject = [0] * 10 + self.expl_note = '' self.data_relpath = False def add(self, category, key, obj, sec_obj=None): @@ -227,6 +233,8 @@ class ImportInfo(object): self._extract_mergeinfo(key, obj, sec_obj) elif category == 'new-object': self.data_newobject[self.key2data[key]] += 1 + elif category == 'unknown-object': + self.data_unknownobject[self.key2data[key]] += 1 elif category == 'relative-path': self.data_relpath = True @@ -286,7 +294,20 @@ class ImportInfo(object): } txt = _("Number of new objects imported:\n") for key in self.keyorder: - txt += key2string[key] % self.data_newobject[self.key2data[key]] + if any(self.data_unknownobject): + strng = key2string[key][0:-1] + ' (%d)\n' + txt += strng % (self.data_newobject[self.key2data[key]], + self.data_unknownobject[self.key2data[key]]) + else: + txt += key2string[key] % self.data_newobject[self.key2data[key]] + if any(self.data_unknownobject): + txt += _("\n The imported file was not self-contained.\n" + "To correct for that, %d objects were created and\n" + "their typifying attribute was set to 'Unknown'.\n" + "The breakdown per category is depicted by the\n" + "number in parentheses. Where possible these\n" + "'Unkown' objects are referenced by note %s.\n" + ) % (sum(self.data_unknownobject), self.expl_note) if self.data_relpath: txt += _("\nMedia objects with relative paths have been\n" "imported. These paths are considered relative to\n" @@ -631,7 +652,7 @@ class GrampsParser(UpdateCallback): "stitle": (None, self.stop_stitle), "street": (None, self.stop_street), "style": (self.start_style, None), - "tag": (self.start_tag, None), + "tag": (self.start_tag, self.stop_tag), "tagref": (self.start_tagref, None), "tags": (None, None), "text": (None, self.stop_text), @@ -666,9 +687,10 @@ class GrampsParser(UpdateCallback): :rtype: str """ handle = str(handle.replace('_', '')) - if (handle in self.import_handles and - target in self.import_handles[handle]): - handle = self.import_handles[handle][target] + orig_handle = handle + if (orig_handle in self.import_handles and + target in self.import_handles[orig_handle]): + handle = self.import_handles[handle][target][HANDLE] if not callable(prim_obj): # This method is called by a start_ method. get_raw_obj_data = {"person": self.db.get_raw_person_data, @@ -683,15 +705,15 @@ class GrampsParser(UpdateCallback): "tag": self.db.get_raw_tag_data}[target] raw = get_raw_obj_data(handle) prim_obj.unserialize(raw) + self.import_handles[orig_handle][target][INSTANTIATED] = True return handle elif handle in self.import_handles: LOG.warn("The file you import contains duplicate handles " "which is illegal and being fixed now.") - orig_handle = handle handle = Utils.create_id() while handle in self.import_handles: handle = Utils.create_id() - self.import_handles[orig_handle][target] = handle + self.import_handles[orig_handle][target] = [handle, False] else: orig_handle = handle if self.replace_import_handle: @@ -711,9 +733,11 @@ class GrampsParser(UpdateCallback): "tag": self.db.has_tag_handle}[target] while has_handle_func(handle): handle = Utils.create_id() - self.import_handles[orig_handle] = {target: handle} + self.import_handles[orig_handle] = {target: [handle, False]} if callable(prim_obj): # method is called by a reference prim_obj = prim_obj() + else: + self.import_handles[orig_handle][target][INSTANTIATED] = True prim_obj.set_handle(handle) if target == "tag": self.db.add_tag(prim_obj, self.trans) @@ -878,6 +902,7 @@ class GrampsParser(UpdateCallback): "path in the Preferences." ) % self.mediapath ) + self.fix_not_instantiated() for key in self.func_map.keys(): del self.func_map[key] del self.func_map @@ -1050,6 +1075,7 @@ class GrampsParser(UpdateCallback): self.placeobj.title = attrs.get('title', '') self.locations = 0 self.update(self.p.CurrentLineNumber) + return self.placeobj def start_location(self, attrs): """Bypass the function calls for this one, since it appears to @@ -1152,6 +1178,7 @@ class GrampsParser(UpdateCallback): self.event.private = bool(attrs.get("priv")) self.event.change = int(attrs.get('change', self.change)) self.info.add('new-object', EVENT_KEY, self.event) + return self.event def start_eventref(self, attrs): """ @@ -1236,7 +1263,7 @@ class GrampsParser(UpdateCallback): # This is new XML, so we are guaranteed to have a handle ref handle = attrs['hlink'].replace('_', '') - handle = self.import_handles[handle][target] + handle = self.import_handles[handle][target][HANDLE] # Due to pre 2.2.9 bug, bookmarks might be handle of other object # Make sure those are filtered out. # Bookmarks are at end, so all handle must exist before we do bookmrks @@ -1324,6 +1351,7 @@ class GrampsParser(UpdateCallback): self.person.change = int(attrs.get('change', self.change)) self.info.add('new-object', PERSON_KEY, self.person) self.convert_marker(attrs, self.person) + return self.person def start_people(self, attrs): """ @@ -1459,6 +1487,7 @@ class GrampsParser(UpdateCallback): if 'type' in attrs: self.family.type.set_from_xml_str(attrs["type"]) self.convert_marker(attrs, self.family) + return self.family def start_rel(self, attrs): if 'type' in attrs: @@ -1606,7 +1635,7 @@ class GrampsParser(UpdateCallback): val = "gramps://%s/handle/%s" % ( match.group('object_class'), self.import_handles[match.group('handle')] - [target]) + [target][HANDLE]) tagvalue = gen.lib.StyledTextTagType.STYLE_TYPE[int(tagtype)](val) except KeyError: tagvalue = None @@ -1629,10 +1658,17 @@ class GrampsParser(UpdateCallback): self.inaugurate(attrs['handle'], "tag", self.tag) self.tag.change = int(attrs.get('change', self.change)) self.info.add('new-object', TAG_KEY, self.tag) - self.tag.set_name(attrs['name']) - self.tag.set_color(attrs['color']) - self.tag.set_priority(int(attrs['priority'])) + self.tag.set_name(attrs.get('name', _('Unknown when imported'))) + self.tag.set_color(attrs.get('color', '#000000000000')) + self.tag.set_priority(int(attrs.get('priority', 0))) + return self.tag + + def stop_tag(self, *tag): + if self.note is not None: + # Styled text tag in notes (prior to v1.4.0) + return self.db.commit_tag(self.tag, self.trans, self.tag.get_change_time()) + self.tag = None def start_tagref(self, attrs): """ @@ -1685,8 +1721,8 @@ class GrampsParser(UpdateCallback): self.note.change = int(attrs.get('change', self.change)) self.info.add('new-object', NOTE_KEY, self.note) self.note.format = int(attrs.get('format', gen.lib.Note.FLOWED)) - self.note.type.set_from_xml_str(attrs['type']) - + self.note.type.set_from_xml_str(attrs.get('type', + gen.lib.NoteType.UNKNOWN)) self.convert_marker(attrs, self.note) # Since StyledText was introduced (XML v1.3.0) the clear text @@ -1764,6 +1800,7 @@ class GrampsParser(UpdateCallback): #set correct change time self.db.commit_note(self.note, self.trans, self.change) self.info.add('new-object', NOTE_KEY, self.note) + return self.note def start_noteref(self, attrs): """ @@ -1877,6 +1914,7 @@ class GrampsParser(UpdateCallback): self.citation.change = int(attrs.get('change', self.change)) self.citation.confidence = self.conf # default self.info.add('new-object', CITATION_KEY, self.citation) + return self.citation def start_sourceref(self, attrs): """ @@ -1929,6 +1967,7 @@ class GrampsParser(UpdateCallback): self.source.private = bool(attrs.get("priv")) self.source.change = int(attrs.get('change', self.change)) self.info.add('new-object', SOURCE_KEY, self.source) + return self.source def start_reporef(self, attrs): """ @@ -2016,6 +2055,7 @@ class GrampsParser(UpdateCallback): src = attrs.get("src", '') if src: self.object.path = src + return self.object def start_repo(self, attrs): """ @@ -2041,6 +2081,7 @@ class GrampsParser(UpdateCallback): self.repo.private = bool(attrs.get("priv")) self.repo.change = int(attrs.get('change', self.change)) self.info.add('new-object', REPOSITORY_KEY, self.repo) + return self.repo def stop_people(self, *tag): pass @@ -2776,6 +2817,10 @@ class GrampsParser(UpdateCallback): self.db.commit_note(self.note, self.trans, self.note.get_change_time()) self.note = None + def stop_note_asothers(self, *tag): + self.db.commit_note(self.note, self.trans, self.note.get_change_time()) + self.note = None + def stop_research(self, tag): self.owner.set_name(self.resname) self.owner.set_address(self.resaddr) @@ -2872,6 +2917,44 @@ class GrampsParser(UpdateCallback): tag_handle = tag.get_handle() obj.add_tag(tag_handle) + def fix_not_instantiated(self): + uninstantiated = [(orig_handle, target) for orig_handle in + self.import_handles.keys() if + [target for target in self.import_handles[orig_handle].keys() if + not self.import_handles[orig_handle][target][INSTANTIATED]]] + if uninstantiated: + expl_note = Utils.create_explanation_note(self.db) + self.db.commit_note(expl_note, self.trans, time.time()) + self.info.expl_note = expl_note.get_gramps_id() + for orig_handle, target in uninstantiated: + class_arg = {'handle': orig_handle, 'id': None, 'priv': False} + if target == 'family': + objs = Utils.make_unknown(class_arg, expl_note.handle, + self.func_map[target][0], self.func_map[target][1], + self.trans, db=self.db) + elif target == 'citation': + objs = Utils.make_unknown(class_arg, expl_note.handle, + self.func_map[target][0], self.func_map[target][1], + self.trans, + source_class_func=self.func_map['source'][0], + source_commit_func=self.func_map['source'][1], + source_class_arg={'handle':Utils.create_id(), 'id':None, 'priv':False}) + elif target == 'note': + objs = Utils.make_unknown(class_arg, expl_note.handle, + self.func_map[target][0], self.stop_note_asothers, + self.trans) + else: + if target == 'place': + target = 'placeobj' + elif target == 'media': + target = 'object' + objs = Utils.make_unknown(class_arg, expl_note.handle, + self.func_map[target][0], self.func_map[target][1], + self.trans) + for obj in objs: + key = CLASS_TO_KEY_MAP[obj.__class__.__name__] + self.info.add('unknown-object', key, obj) + def append_value(orig, val): if orig: return "%s, %s" % (orig, val) diff --git a/src/plugins/lib/libgedcom.py b/src/plugins/lib/libgedcom.py index 835dc2a28..98f841d81 100644 --- a/src/plugins/lib/libgedcom.py +++ b/src/plugins/lib/libgedcom.py @@ -2565,6 +2565,7 @@ class GedcomParser(UpdateCallback): src.set_title(title) self.dbase.add_source(src, self.trans) + self.__check_xref() self.dbase.enable_signals() self.dbase.request_rebuild() if self.number_of_errors == 0: @@ -2913,6 +2914,131 @@ class GedcomParser(UpdateCallback): """ self.backoff = True + def __check_xref(self): + + def __check(map, trans, class_func, commit_func, gramps_id2handle, msg): + for input_id, gramps_id in map.map().iteritems(): + # Check whether an object exists for the mapped gramps_id + if not trans.get(str(gramps_id)): + handle = self.__find_from_handle(gramps_id, + gramps_id2handle) + if msg == "FAM": + Utils.make_unknown(gramps_id, self.explanation.handle, + class_func, commit_func, self.trans, + db=self.dbase) + self.__add_msg(_("Error: %(msg)s '%(gramps_id)s'" + " (input as @%(xref)s@) not in input" + " GEDCOM. Record synthesised") % + {'msg' : msg, 'gramps_id' : gramps_id, + 'xref' : input_id}) + else: + Utils.make_unknown(gramps_id, self.explanation.handle, + class_func, commit_func, self.trans) + self.missing_references +=1 + self.__add_msg(_("Error: %(msg)s '%(gramps_id)s'" + " (input as @%(xref)s@) not in input" + " GEDCOM. Record with typifying" + " attribute 'Unknown' created") % + {'msg' : msg, 'gramps_id' : gramps_id, + 'xref' : input_id}) + + self.explanation = Utils.create_explanation_note(self.dbase) + + self.missing_references = 0 + previous_errors = self.number_of_errors + __check(self.pid_map, self.dbase.id_trans, self.__find_or_create_person, + self.dbase.commit_person, self.gid2id, "INDI") + __check(self.fid_map, self.dbase.fid_trans, self.__find_or_create_family, + self.dbase.commit_family, self.fid2id, "FAM") + __check(self.sid_map, self.dbase.sid_trans, self.__find_or_create_source, + self.dbase.commit_source, self.sid2id, "SOUR") + __check(self.oid_map, self.dbase.oid_trans, self.__find_or_create_object, + self.dbase.commit_media_object, self.oid2id, "OBJE") + __check(self.rid_map, self.dbase.rid_trans, self.__find_or_create_repository, + self.dbase.commit_repository, self.rid2id, "REPO") + __check(self.nid_map, self.dbase.nid_trans, self.__find_or_create_note, + self.dbase.commit_note, self.nid2id, "NOTE") + + # Check persons membership in referenced families + def __input_fid(gramps_id): + for (k,v) in self.fid_map.map().iteritems(): + if v == gramps_id: + return k + + for input_id, gramps_id in self.pid_map.map().iteritems(): + person_handle = self.__find_from_handle(gramps_id, self.gid2id) + person = self.dbase.get_person_from_handle(person_handle) + for family_handle in person.get_family_handle_list(): + family = self.dbase.get_family_from_handle(family_handle) + if family and family.get_father_handle() != person_handle and \ + family.get_mother_handle() != person_handle: + person.remove_family_handle(family_handle) + self.dbase.commit_person(person, self.trans) + self.__add_msg(_("Error: family '%(family)s' (input as" + " @%(orig_family)s@) person %(person)s" + " (input as %(orig_person)s) is not a" + " member of the referenced family." + " Family reference removed from person") % + {'family' : family.gramps_id, + 'orig_family' : + __input_fid(family.gramps_id), + 'person' : person.gramps_id, + 'orig_person' : input_id}) + + def __input_pid(gramps_id): + for (k,v) in self.pid_map.map().iteritems(): + if v == gramps_id: + return k + + for input_id, gramps_id in self.fid_map.map().iteritems(): + family_handle = self.__find_from_handle(gramps_id, self.fid2id) + family = self.dbase.get_family_from_handle(family_handle) + father_handle = family.get_father_handle() + mother_handle = family.get_mother_handle() + + if father_handle: + father = self.dbase.get_person_from_handle(father_handle) + if father and \ + family_handle not in father.get_family_handle_list(): + father.add_family_handle(family_handle) + self.dbase.commit_person(father, self.trans) + self.__add_msg("Error: family '%(family)s' (input as" + " @%(orig_family)s@) father '%(father)s'" + " (input as '%(orig_father)s') does not refer" + " back to the family. Reference added." % + {'family' : family.gramps_id, + 'orig_family' : input_id, + 'father' : father.gramps_id, + 'orig_father' : + __input_pid(father.gramps_id)}) + + if mother_handle: + mother = self.dbase.get_person_from_handle(mother_handle) + if mother and \ + family_handle not in mother.get_family_handle_list(): + mother.add_family_handle(family_handle) + self.dbase.commit_person(mother, self.trans) + self.__add_msg("Error: family '%(family)s' (input as" + " @%(orig_family)s@) mother '%(mother)s'" + " (input as '%(orig_mother)s') does not refer" + " back to the family. Reference added." % + {'family' : family.gramps_id, + 'orig_family' : input_id, + 'mother' : mother.gramps_id, + 'orig_mother' : + __input_pid(mother.gramps_id)}) + + if self.missing_references: + self.dbase.commit_note(self.explanation, self.trans, time.time()) + txt = _("\nThe imported file was not self-contained.\n" + "To correct for that, %d objects were created and\n" + "their typifying attribute was set to 'Unknown'.\n" + "Where possible these 'Unkown' objects are \n" + "referenced by note %s.\n" + ) % (self.missing_references, self.explanation.gramps_id) + self.__add_msg(txt) + self.number_of_errors -= 1 + def __parse_trailer(self): """ Looks for the expected TRLR token @@ -3317,6 +3443,7 @@ class GedcomParser(UpdateCallback): if line.data and line.data[0] == '@': # Reference to a named multimedia object defined elsewhere gramps_id = self.oid_map[line.data] + handle = self.__find_object_handle(gramps_id) ref = gen.lib.MediaRef() ref.set_reference_handle(handle) @@ -6835,6 +6962,13 @@ class GedcomParser(UpdateCallback): self.inline_srcs[title] = handle else: src = self.__find_or_create_source(self.sid_map[line.data]) + # We need to set the title to the cross reference identifier of the + # SOURce record, just in case we never find the source record. If we + # din't find the source record, then the source object would have + # got deleted by Chack and repair because the record is empty. If we + # find the source record, the title is overwritten in + # __source_title. + src.set_title(line.data) self.dbase.commit_source(src, self.trans) self.__parse_source_reference(citation, level, src.handle, state) citation.set_reference_handle(src.handle) diff --git a/src/plugins/tool/Check.py b/src/plugins/tool/Check.py index a06cad359..84ae79686 100644 --- a/src/plugins/tool/Check.py +++ b/src/plugins/tool/Check.py @@ -5,6 +5,7 @@ # Copyright (C) 2008 Brian G. Matherly # Copyright (C) 2010 Jakim Friant # Copyright (C) 2011 Tim G L Lyons +# Copyright (C) 2012 Michiel D. Nauta # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -35,6 +36,7 @@ from __future__ import with_statement import os import sys import cStringIO +import time from gen.ggettext import gettext as _ from gen.ggettext import ngettext @@ -177,7 +179,8 @@ class Check(tool.BatchTool): checker.check_person_references() checker.check_family_references() checker.check_place_references() - checker.check_source_and_citation_references() + checker.check_source_references() + checker.check_citation_references() checker.check_media_references() checker.check_repo_references() checker.check_note_references() @@ -207,23 +210,28 @@ class CheckIntegrity(object): self.duplicate_links = [] self.broken_parent_links = [] self.fam_rel = [] - self.invalid_events = [] - self.invalid_birth_events = [] - self.invalid_death_events = [] - self.invalid_person_references = [] - self.invalid_family_references = [] - self.invalid_place_references = [] - self.invalid_source_references = [] - self.invalid_citation_references = [] - self.invalid_repo_references = [] - self.invalid_media_references = [] - self.invalid_note_references = [] - self.invalid_tag_references = [] + self.invalid_events = set() + self.invalid_birth_events = set() + self.invalid_death_events = set() + self.invalid_person_references = set() + self.invalid_family_references = set() + self.invalid_place_references = set() + self.invalid_source_references = set() + self.invalid_citation_references = set() + self.invalid_repo_references = set() + self.invalid_media_references = set() + self.invalid_note_references = set() + self.invalid_tag_references = set() self.invalid_dates = [] self.removed_name_format = [] self.empty_objects = defaultdict(list) self.last_img_dir = config.get('behavior.addmedia-image-dir') self.progress = ProgressMeter(_('Checking Database'),'') + self.explanation = gen.lib.Note(_('Objects referenced by this note ' + 'were referenced but missing so that is why they have been created ' + 'when you ran Check and Repair on %s.') % + time.strftime('%x %X', time.localtime())) + self.explanation.set_handle(Utils.create_id()) def family_errors(self): return (len(self.broken_parent_links) + @@ -922,19 +930,25 @@ class CheckIntegrity(object): person = self.db.get_person_from_handle(key) birth_ref = person.get_birth_ref() + none_handle = False if birth_ref: + newref = birth_ref + if birth_ref.ref is None: + none_handle = True + birth_ref.ref = Utils.create_id() birth_handle = birth_ref.ref birth = self.db.get_event_from_handle(birth_handle) if not birth: # The birth event referenced by the birth handle # does not exist in the database # This is tested by TestcaseGenerator person "Broken11" - person.set_birth_ref(None) + Utils.make_unknown(birth_handle, self.explanation.handle, + self.class_event, self.commit_event, self.trans, + type=gen.lib.EventType.BIRTH) LOG(' FAIL: the person "%s" refers to a birth event' ' "%s" which does not exist in the database' % (person.gramps_id, birth_handle)) - self.db.commit_person(person, self.trans) - self.invalid_events.append(key) + self.invalid_events.add(key) else: if int(birth.get_type()) != gen.lib.EventType.BIRTH: # Birth event was not of the type "Birth" @@ -944,9 +958,18 @@ class CheckIntegrity(object): (person.gramps_id, int(birth.get_type()))) birth.set_type(gen.lib.EventType(gen.lib.EventType.BIRTH)) self.db.commit_event(birth, self.trans) - self.invalid_birth_events.append(key) + self.invalid_birth_events.add(key) + if none_handle: + person.set_birth_ref(newref) + self.db.commit_person(person, self.trans) + + none_handle = False death_ref = person.get_death_ref() if death_ref: + newref = death_ref + if death_ref.ref is None: + none_handle = True + death_ref.ref = Utils.create_id() death_handle = death_ref.ref death = self.db.get_event_from_handle(death_handle) if not death: @@ -956,9 +979,10 @@ class CheckIntegrity(object): LOG(' FAIL: the person "%s" refers to a death event' ' "%s" which does not exist in the database' % (person.gramps_id, death_handle)) - person.set_death_ref(None) - self.db.commit_person(person, self.trans) - self.invalid_events.append(key) + Utils.make_unknown(death_handle, self.explanation.handle, + self.class_event, self.commit_event, self.trans, + type=gen.lib.EventType.DEATH) + self.invalid_events.add(key) else: if int(death.get_type()) != gen.lib.EventType.DEATH: # Death event was not of the type "Death" @@ -968,10 +992,19 @@ class CheckIntegrity(object): (person.gramps_id, int(death.get_type()))) death.set_type(gen.lib.EventType(gen.lib.EventType.DEATH)) self.db.commit_event(death, self.trans) - self.invalid_death_events.append(key) + self.invalid_death_events.add(key) + if none_handle: + person.set_death_ref(newref) + self.db.commit_person(person, self.trans) + none_handle = False + newlist = [] if person.get_event_ref_list(): for event_ref in person.get_event_ref_list(): + newlist.append(event_ref) + if event_ref.ref is None: + none_handle = True + event_ref.ref = Utils.create_id() event_handle = event_ref.ref event = self.db.get_event_from_handle(event_handle) if not event: @@ -984,22 +1017,32 @@ class CheckIntegrity(object): LOG(' FAIL: the person "%s" refers to an event' ' "%s" which does not exist in the database' % (person.gramps_id, event_handle)) - person.get_event_ref_list().remove(event_ref) - self.db.commit_person(person,self.trans) - self.invalid_events.append(key) + Utils.make_unknown(event_handle, + self.explanation.handle, self.class_event, + self.commit_event, self.trans) + self.invalid_events.add(key) + if none_handle: + person.set_event_ref_list(newlist) + self.db.commit_person(person, self.trans) elif not isinstance(person.get_event_ref_list(), list): # event_list is None or other garbage LOG(' FAIL: the person "%s" has an event ref list' ' which is invalid' % (person.gramps_id)) person.set_event_ref_list([]) self.db.commit_person(person, self.trans) - self.invalid_events.append(key) + self.invalid_events.add(key) for key in self.db.get_family_handles(): self.progress.step() family = self.db.get_family_from_handle(key) if family.get_event_ref_list(): + none_handle = False + newlist = [] for event_ref in family.get_event_ref_list(): + newlist.append(event_ref) + if event_ref.ref is None: + none_handle = True + event_ref.ref = Utils.create_id() event_handle = event_ref.ref event = self.db.get_event_from_handle(event_handle) if not event: @@ -1008,18 +1051,19 @@ class CheckIntegrity(object): LOG(' FAIL: the family "%s" refers to an event' ' "%s" which does not exist in the database' % (family.gramps_id, event_handle)) - nlist = [x for x in family.get_event_ref_list() - if x.ref != event_handle] - family.set_event_ref_list(nlist) - self.db.commit_family(family, self.trans) - self.invalid_events.append(key) + Utils.make_unknown(event_handle, self.explanation, + self.class_event, self.commit_event, self.trans) + self.invalid_events.add(key) + if none_handle: + family.set_event_ref_list(newlist) + self.db.commit_family(family, self.trans) elif not isinstance(family.get_event_ref_list(), list): # event_list is None or other garbage LOG(' FAIL: the family "%s" has an event ref list' ' which is invalid' % (family.gramps_id)) family.set_event_ref_list([]) self.db.commit_family(family, self.trans) - self.invalid_events.append(key) + self.invalid_events.add(key) if len (self.invalid_birth_events) + len(self.invalid_death_events) +\ len(self.invalid_events) == 0: @@ -1034,14 +1078,23 @@ class CheckIntegrity(object): for key in plist: self.progress.step() + none_handle = False + newlist = [] person = self.db.get_person_from_handle(key) for pref in person.get_person_ref_list(): + newlist.append(pref) + if pref.ref is None: + none_handle = True + pref.ref = Utils.create_id() p = self.db.get_person_from_handle( pref.ref) if not p: # The referenced person does not exist in the database - person.get_person_ref_list().remove(pref) - self.db.commit_person(person, self.trans) - self.invalid_person_references.append(key) + Utils.make_unknown(pref.ref, self.explanation.handle, + self.class_person, self.commit_person, self.trans) + self.invalid_person_references.add(key) + if none_handle: + person.set_person_ref_list(newlist) + self.db.commit_person(person, self.trans) if len (self.invalid_person_references) == 0: LOG(' OK: no event problems found') @@ -1062,9 +1115,10 @@ class CheckIntegrity(object): family = self.db.get_family_from_handle(family_handle) if not family: # The referenced family does not exist in the database - ordinance.set_family_handle(None) - self.db.commit_person(person, self.trans) - self.invalid_family_references.append(key) + Utils.make_unknown(family_handle, + self.explanation.handle, self.class_family, + self.commit_family, self.trans, db=self.db) + self.invalid_family_references.add(key) if len (self.invalid_family_references) == 0: LOG(' OK: no event problems found') @@ -1078,14 +1132,23 @@ class CheckIntegrity(object): for key in slist: self.progress.step() + none_handle = False + newlist = [] source = self.db.get_source_from_handle(key) for reporef in source.get_reporef_list(): + newlist.append(reporef) + if reporef.ref is None: + none_handle = True + reporef.ref = Utils.create_id() r = self.db.get_repository_from_handle(reporef.ref) if not r: # The referenced repository does not exist in the database - source.get_reporef_list().remove(reporef) - self.db.commit_source(source, self.trans) - self.invalid_repo_references.append(key) + Utils.make_unknown(reporef.ref, self.explanation.handle, + self.class_repo, self.commit_repo, self.trans) + self.invalid_repo_references.add(key) + if none_handle: + source.set_reporef_list(newlist); + self.db.commit_source(source, self.trans) if len (self.invalid_repo_references) == 0: LOG(' OK: no repository reference problems found') @@ -1110,12 +1173,13 @@ class CheckIntegrity(object): # The referenced place does not exist in the database # This is tested by TestcaseGenerator person "Broken17" # This is tested by TestcaseGenerator person "Broken18" - ordinance.set_place_handle("") + Utils.make_unknown(place_handle, + self.explanation.handle, self.class_place, + self.commit_place, self.trans) LOG(' FAIL: the person "%s" refers to an LdsOrd' ' place "%s" which does not exist in the database' % (person.gramps_id, place_handle)) - self.db.commit_person(person, self.trans) - self.invalid_place_references.append(key) + self.invalid_place_references.add(key) # check families -> the LdsOrd references a place for key in flist: self.progress.step() @@ -1126,12 +1190,13 @@ class CheckIntegrity(object): place = self.db.get_place_from_handle(place_handle) if not place: # The referenced place does not exist in the database - ordinance.set_place_handle("") + Utils.make_unknown(place_handle, + self.explanation.handle, self.class_place, + self.commit_place, self.trans) LOG(' FAIL: the family "%s" refers to an LdsOrd' ' place "%s" which does not exist in the database' % (family.gramps_id, place_handle)) - self.db.commit_family(family, self.trans) - self.invalid_place_references.append(key) + self.invalid_place_references.add(key) # check events for key in elist: self.progress.step() @@ -1141,148 +1206,184 @@ class CheckIntegrity(object): place = self.db.get_place_from_handle(place_handle) if not place: # The referenced place does not exist in the database - event.set_place_handle("") + Utils.make_unknown(place_handle, + self.explanation.handle, self.class_place, + self.commit_place, self.trans) LOG(' FAIL: the event "%s" refers to an LdsOrd place' ' "%s" which does not exist in the database' % (event.gramps_id, place_handle)) - self.db.commit_event(event, self.trans) - self.invalid_place_references.append(key) + self.invalid_place_references.add(key) if len (self.invalid_place_references) == 0: LOG(' OK: no place reference problems found') - def check_source_and_citation_references(self): - # We check both source and citations in one pass. If there is a problem - # with a citation reference from an object to a citation, then we need - # to remove the reference from the object. This is the same as any other - # reference check. However, if there is a problem with a source - # reference from a citation to a source, we can't just remove the source - # reference from the citation object (as we would in other cases), - # because all citations must have exactly one source. Therefore we must - # remove the citation as a whole, and also remove the reference to the - # citation from the object. Hence the reason why we do this while we are - # processing the object. - - # bad_citation_handles and invalid_citation_references are citation - # handles which occur in objects and we need to remove these citation - # references from the object. The citation reference needs to be removed - # either because there is no such citation, or because the citation will - # be removed as it doesn't point validly to a source object. - - # invalid_source_references are also citation handles, but these refer - # to real citation object which need to be deleted because the citations - # don't refer to valid sources. - - known_source_handles = self.db.get_source_handles() - good_citation_handles = set() + def check_citation_references(self): + known_handles = self.db.get_citation_handles() total = ( self.db.get_number_of_people() + self.db.get_number_of_families() + self.db.get_number_of_events() + self.db.get_number_of_places() + - self.db.get_number_of_media_objects() + + self.db.get_number_of_citations() + self.db.get_number_of_sources() + + self.db.get_number_of_media_objects() + self.db.get_number_of_repositories() ) - self.progress.set_pass(_('Looking for source and citation reference' - ' problems'), total) - LOG('Looking for source and citation reference problems') - - def check(name, map_func, class_func, commit_func): - for handle in map_func.keys(): - self.progress.step() - info = map_func[handle] - obj = class_func() - obj.unserialize(info) - handle_list = obj.get_referenced_handles_recursively() - - bad_citation_handles = set() - bad_citation_text = set() - for classn, handle in handle_list: - if classn == 'Citation': - if not handle: - bad_citation_handles.add(handle) - bad_citation_text.add("None") - else: - citation = self.db.get_citation_from_handle(handle) - if not citation: - bad_citation_handles.add(handle) - bad_citation_text.add(handle) - else: - # The citation is good, check whether the - # source_handle is OK - source_handle = citation.source_handle - if not source_handle or \ - source_handle not in known_source_handles: - bad_citation_handles.add(handle) - bad_citation_text.add(citation.gramps_id + - ": " + citation.page) - if handle not in \ - self.invalid_source_references: - self.invalid_source_references.append( - handle) - else: - good_citation_handles.add(handle) - if bad_citation_handles: - LOG(' FAIL: the %s "%s" refers to citation(s) "%s"' - ' which do not exist in the database' - ' or where the referenced source does not exist' % - (name, obj.gramps_id, - " ".join(h for h in bad_citation_text))) - obj.remove_citation_references(list(bad_citation_handles)) - commit_func(obj,self.trans) - new_bad_handles = [handle for handle in bad_citation_handles - if handle - not in self.invalid_citation_references] - self.invalid_citation_references += new_bad_handles - - check("person", self.db.person_map, gen.lib.Person, - self.db.commit_person) - check("family", self.db.family_map, gen.lib.Family, - self.db.commit_family) - check("event", self.db.event_map, gen.lib.Event, - self.db.commit_event) - check("media object", self.db.media_map, gen.lib.MediaObject, - self.db.commit_media_object) - check("place", self.db.place_map, gen.lib.Place, self.db.commit_place) - check("repository", self.db.repository_map, gen.lib.Repository, - self.db.commit_repository) - # There is no point in checking sources, because they don't have - # citations. - # check("source", self.db.source_map, gen.lib.Source, - # self.db.commit_source) - - # Now we need to check any citations that are not referenced from other - # objects, in case they too have invalid source references. - for handle in self.db.citation_map: - if handle not in good_citation_handles: - citation = self.db.get_citation_from_handle(handle) - source_handle = citation.source_handle - if not source_handle or \ - source_handle not in known_source_handles: - LOG(' FAIL: the citation "%s" refers to a source' - ' "%s" which does not exist in the database' % - (citation.gramps_id, citation.source_handle)) - if handle not in \ - self.invalid_source_references: - self.invalid_source_references.append( - handle) - - # bad citation references in objects have already been removed. Now - # remove any bad citations that were detected. - for citation_handle in self.invalid_source_references: - LOG(' FAIL: the citation "%s" which refers to source handle "%s"' - ' has been removed' % - (self.db.get_citation_from_handle(citation_handle).gramps_id, - self.db.get_citation_from_handle(citation_handle).source_handle)) - self.db.remove_citation(citation_handle, self.trans) - - if len(self.invalid_source_references) + \ - len(self.invalid_citation_references) == 0: - LOG(' OK: no invalid source or citation references found') - return + self.progress.set_pass(_('Looking for citation reference problems'), + total) + LOG('Looking for citation reference problems') + + for handle in self.db.person_map.keys(): + self.progress.step() + info = self.db.person_map[handle] + person = gen.lib.Person() + person.unserialize(info) + handle_list = person.get_referenced_handles_recursively() + for item in handle_list: + if item[0] == 'Citation': + if item[1] is None: + new_handle = Utils.create_id() + person.replace_citation_references(None, new_handle) + self.db.commit_person(person, self.trans) + self.invalid_citation_references.add(new_handle) + elif item[1] not in known_handles: + self.invalid_citation_references.add(item[1]) + + for handle in self.db.family_map.keys(): + self.progress.step() + info = self.db.family_map[handle] + family = gen.lib.Family() + family.unserialize(info) + handle_list = family.get_referenced_handles_recursively() + for item in handle_list: + if item[0] == 'Citation': + if item[1] is None: + new_handle = Utils.create_id() + family.replace_citation_references(None, new_handle) + self.db.commit_family(family, self.trans) + self.invalid_citation_references.add(new_handle) + elif item[1] not in known_handles: + self.invalid_citation_references.add(item[1]) + + for handle in self.db.place_map.keys(): + self.progress.step() + info = self.db.place_map[handle] + place = gen.lib.Place() + place.unserialize(info) + handle_list = place.get_referenced_handles_recursively() + for item in handle_list: + if item[0] == 'Citation': + if item[1] is None: + new_handle = Utils.create_id() + place.replace_citation_references(None, new_handle) + self.db.commit_place(place, self.trans) + self.invalid_citation_references.add(new_handle) + elif item[1] not in known_handles: + self.invalid_citation_references.add(item[1]) + + for handle in self.db.citation_map.keys(): + self.progress.step() + info = self.db.citation_map[handle] + citation = gen.lib.Citation() + citation.unserialize(info) + handle_list = citation.get_referenced_handles_recursively() + for item in handle_list: + if item[0] == 'Citation': + if item[1] is None: + new_handle = Utils.create_id() + citation.replace_citation_references(None, new_handle) + self.db.commit_citation(citation, self.trans) + self.invalid_citation_references.add(new_handle) + elif item[1] not in known_handles: + self.invalid_citation_references.add(item[1]) + + for handle in self.db.repository_map.keys(): + self.progress.step() + info = self.db.repository_map[handle] + repository = gen.lib.Repository() + repository.unserialize(info) + handle_list = repository.get_referenced_handles_recursively() + for item in handle_list: + if item[0] == 'Citation': + if item[1] is None: + new_handle = Utils.create_id() + repository.replace_citation_references(None, new_handle) + self.db.commit_repository(repository, self.trans) + self.invalid_citation_references.add(new_handle) + elif item[1] not in known_handles: + self.invalid_citation_references.add(item[1]) + + for handle in self.db.media_map.keys(): + self.progress.step() + info = self.db.media_map[handle] + obj = gen.lib.MediaObject() + obj.unserialize(info) + handle_list = obj.get_referenced_handles_recursively() + for item in handle_list: + if item[0] == 'Citation': + if item[1] is None: + new_handle = Utils.create_id() + obj.replace_citation_references(None, new_handle) + self.db.commit_media_object(obj, self.trans) + self.invalid_citation_references.add(new_handle) + elif item[1] not in known_handles: + self.invalid_citation_references.add(item[1]) + + for handle in self.db.event_map.keys(): + self.progress.step() + info = self.db.event_map[handle] + event = gen.lib.Event() + event.unserialize(info) + handle_list = event.get_referenced_handles_recursively() + for item in handle_list: + if item[0] == 'Citation': + if item[1] is None: + new_handle = Utils.create_id() + event.replace_citation_references(None, new_handle) + self.db.commit_event(event, self.trans) + self.invalid_citation_references.add(new_handle) + elif item[1] not in known_handles: + self.invalid_citation_references.add(item[1]) + + for bad_handle in self.invalid_citation_references: + created = Utils.make_unknown(bad_handle, self.explanation.handle, + self.class_citation, self.commit_citation, self.trans, + source_class_func=self.class_source, + source_commit_func=self.commit_source, + source_class_arg=Utils.create_id()) + self.invalid_source_references.add(created[0].handle) + + if len(self.invalid_citation_references) == 0: + LOG(' OK: no citation reference problems found') + + def check_source_references(self): + clist = self.db.get_citation_handles() + self.progress.set_pass(_('Looking for source reference problems'), + len(clist)) + LOG('Looking for source reference problems') + + for key in clist: + self.progress.step() + citation = self.db.get_citation_from_handle(key) + source_handle = citation.get_reference_handle() + if source_handle is None: + source_handle = Utils.create_id() + citation.set_reference_handle(source_handle) + self.db.commit_citation(citation, self.trans) + if source_handle: + source = self.db.get_source_from_handle(source_handle) + if not source: + # The referenced source does not exist in the database + Utils.make_unknown(source_handle, self.explanation.handle, + self.class_source, self.commit_source, self.trans) + LOG(' FAIL: the citation "%s" refers to source ' + ' "%s" which does not exist in the database' % + (citation.gramps_id, source_handle)) + self.invalid_source_references.add(key) + if len(self.invalid_source_references) == 0: + LOG(' OK: no source reference problems found') def check_media_references(self): known_handles = self.db.get_media_object_handles(False) @@ -1306,15 +1407,15 @@ class CheckIntegrity(object): person = gen.lib.Person() person.unserialize(info) handle_list = person.get_referenced_handles_recursively() - bad_handles = [ item[1] for item in handle_list - if item[0] == 'MediaObject' and - item[1] not in known_handles ] - if bad_handles: - person.remove_media_references(bad_handles) - self.db.commit_person(person, self.trans) - new_bad_handles = [handle for handle in bad_handles if handle - not in self.invalid_media_references] - self.invalid_media_references += new_bad_handles + for item in handle_list: + if item[0] == 'MediaObject': + if item[1] is None: + new_handle = Utils.create_id() + person.replace_media_references(None, new_handle) + self.db.commit_person(person, self.trans) + self.invalid_media_references.add(new_handle) + elif item[1] not in known_handles: + self.invalid_media_references.add(item[1]) for handle in self.db.family_map.keys(): self.progress.step() @@ -1322,15 +1423,15 @@ class CheckIntegrity(object): family = gen.lib.Family() family.unserialize(info) handle_list = family.get_referenced_handles_recursively() - bad_handles = [ item[1] for item in handle_list - if item[0] == 'MediaObject' and - item[1] not in known_handles ] - if bad_handles: - family.remove_media_references(bad_handles) - self.db.commit_family(family, self.trans) - new_bad_handles = [handle for handle in bad_handles if handle - not in self.invalid_media_references] - self.invalid_media_references += new_bad_handles + for item in handle_list: + if item[0] == 'MediaObject': + if item[1] is None: + new_handle = Utils.create_id() + family.replace_media_references(None, new_handle) + self.db.commit_family(family, self.trans) + self.invalid_media_references.add(new_handle) + elif item[1] not in known_handles: + self.invalid_media_references.add(item[1]) for handle in self.db.place_map.keys(): self.progress.step() @@ -1338,15 +1439,15 @@ class CheckIntegrity(object): place = gen.lib.Place() place.unserialize(info) handle_list = place.get_referenced_handles_recursively() - bad_handles = [ item[1] for item in handle_list - if item[0] == 'MediaObject' and - item[1] not in known_handles ] - if bad_handles: - place.remove_media_references(bad_handles) - self.db.commit_place(place, self.trans) - new_bad_handles = [handle for handle in bad_handles if handle - not in self.invalid_media_references] - self.invalid_media_references += new_bad_handles + for item in handle_list: + if item[0] == 'MediaObject': + if item[1] is None: + new_handle = Utils.create_id() + place.replace_media_references(None, new_handle) + self.db.commit_place(place, self.trans) + self.invalid_media_references.add(new_handle) + elif item[1] not in known_handles: + self.invalid_media_references.add(item[1]) for handle in self.db.event_map.keys(): self.progress.step() @@ -1354,15 +1455,15 @@ class CheckIntegrity(object): event = gen.lib.Event() event.unserialize(info) handle_list = event.get_referenced_handles_recursively() - bad_handles = [ item[1] for item in handle_list - if item[0] == 'MediaObject' and - item[1] not in known_handles ] - if bad_handles: - event.remove_media_references(bad_handles) - self.db.commit_event(event, self.trans) - new_bad_handles = [handle for handle in bad_handles if handle - not in self.invalid_media_references] - self.invalid_media_references += new_bad_handles + for item in handle_list: + if item[0] == 'MediaObject': + if item[1] is None: + new_handle = Utils.create_id() + event.replace_media_references(None, new_handle) + self.db.commit_event(event, self.trans) + self.invalid_media_references.add(new_handle) + elif item[1] not in known_handles: + self.invalid_media_references.add(item[1]) for handle in self.db.citation_map.keys(): self.progress.step() @@ -1370,15 +1471,15 @@ class CheckIntegrity(object): citation = gen.lib.Citation() citation.unserialize(info) handle_list = citation.get_referenced_handles_recursively() - bad_handles = [ item[1] for item in handle_list - if item[0] == 'MediaObject' and - item[1] not in known_handles ] - if bad_handles: - citation.remove_media_references(bad_handles) - self.db.commit_citation(citation, self.trans) - new_bad_handles = [handle for handle in bad_handles if handle - not in self.invalid_media_references] - self.invalid_media_references += new_bad_handles + for item in handle_list: + if item[0] == 'MediaObject': + if item[1] is None: + new_handle = Utils.create_id() + citation.replace_media_references(None, new_handle) + self.db.commit_citation(citation, self.trans) + self.invalid_media_references.add(new_handle) + elif item[1] not in known_handles: + self.invalid_media_references.add(item[1]) for handle in self.db.source_map.keys(): self.progress.step() @@ -1386,21 +1487,40 @@ class CheckIntegrity(object): source = gen.lib.Source() source.unserialize(info) handle_list = source.get_referenced_handles_recursively() - bad_handles = [ item[1] for item in handle_list - if item[0] == 'MediaObject' and - item[1] not in known_handles ] - if bad_handles: - source.remove_media_references(bad_handles) - self.db.commit_source(source, self.trans) - new_bad_handles = [handle for handle in bad_handles if handle - not in self.invalid_media_references] - self.invalid_media_references += new_bad_handles + for item in handle_list: + if item[0] == 'MediaObject': + if item[1] is None: + new_handle = Utils.create_id() + source.replace_media_references(None, new_handle) + self.db.commit_source(source, self.trans) + self.invalid_media_references.add(new_handle) + elif item[1] not in known_handles: + self.invalid_media_references.add(item[1]) + + for bad_handle in self.invalid_media_references: + Utils.make_unknown(bad_handle, self.explanation.handle, + self.class_object, self.commit_object, self.trans) if len (self.invalid_media_references) == 0: LOG(' OK: no media reference problems found') def check_note_references(self): + # Here I assume check note_references runs after all the next checks. + missing_references = (len(self.invalid_person_references) + + len(self.invalid_family_references) + + len(self.invalid_birth_events) + + len(self.invalid_death_events) + + len(self.invalid_events) + + len(self.invalid_place_references) + + len(self.invalid_citation_references) + + len(self.invalid_source_references) + + len(self.invalid_repo_references) + + len(self.invalid_media_references)) + if missing_references: + self.db.add_note(self.explanation, self.trans, set_gid=True) + known_handles = self.db.get_note_handles() + bad_handles = [] total = ( self.db.get_number_of_people() + @@ -1423,15 +1543,15 @@ class CheckIntegrity(object): person = gen.lib.Person() person.unserialize(info) handle_list = person.get_referenced_handles_recursively() - bad_handles = [ item[1] for item in handle_list - if item[0] == 'Note' and - item[1] not in known_handles ] - if bad_handles: - map(person.remove_note, bad_handles) - self.db.commit_person(person, self.trans) - new_bad_handles = [handle for handle in bad_handles if handle - not in self.invalid_note_references] - self.invalid_note_references += new_bad_handles + for item in handle_list: + if item[0] == 'Note': + if item[1] is None: + new_handle = Utils.create_id() + person.replace_note_references(None, new_handle) + self.db.commit_person(person, self.trans) + self.invalid_note_references.add(new_handle) + elif item[1] not in known_handles: + self.invalid_note_references.add(item[1]) for handle in self.db.family_map.keys(): self.progress.step() @@ -1439,15 +1559,15 @@ class CheckIntegrity(object): family = gen.lib.Family() family.unserialize(info) handle_list = family.get_referenced_handles_recursively() - bad_handles = [ item[1] for item in handle_list - if item[0] == 'Note' and - item[1] not in known_handles ] - if bad_handles: - map(family.remove_note, bad_handles) - self.db.commit_family(family, self.trans) - new_bad_handles = [handle for handle in bad_handles if handle - not in self.invalid_note_references] - self.invalid_note_references += new_bad_handles + for item in handle_list: + if item[0] == 'Note': + if item[1] is None: + new_handle = Utils.create_id() + family.replace_note_references(None, new_handle) + self.db.commit_family(family, self.trans) + self.invalid_note_references.add(new_handle) + elif item[1] not in known_handles: + self.invalid_note_references.add(item[1]) for handle in self.db.place_map.keys(): self.progress.step() @@ -1455,15 +1575,15 @@ class CheckIntegrity(object): place = gen.lib.Place() place.unserialize(info) handle_list = place.get_referenced_handles_recursively() - bad_handles = [ item[1] for item in handle_list - if item[0] == 'Note' and - item[1] not in known_handles ] - if bad_handles: - map(place.remove_note, bad_handles) - self.db.commit_place(place, self.trans) - new_bad_handles = [handle for handle in bad_handles if handle - not in self.invalid_note_references] - self.invalid_note_references += new_bad_handles + for item in handle_list: + if item[0] == 'Note': + if item[1] is None: + new_handle = Utils.create_id() + place.replace_note_references(None, new_handle) + self.db.commit_place(place, self.trans) + self.invalid_note_references.add(new_handle) + elif item[1] not in known_handles: + self.invalid_note_references.add(item[1]) for handle in self.db.citation_map.keys(): self.progress.step() @@ -1471,15 +1591,15 @@ class CheckIntegrity(object): citation = gen.lib.Citation() citation.unserialize(info) handle_list = citation.get_referenced_handles_recursively() - bad_handles = [ item[1] for item in handle_list - if item[0] == 'Note' and - item[1] not in known_handles ] - if bad_handles: - map(citation.remove_note, bad_handles) - self.db.commit_citation(citation, self.trans) - new_bad_handles = [handle for handle in bad_handles if handle - not in self.invalid_note_references] - self.invalid_note_references += new_bad_handles + for item in handle_list: + if item[0] == 'Note': + if item[1] is None: + new_handle = Utils.create_id() + citation.replace_note_references(None, new_handle) + self.db.commit_citation(citation, self.trans) + self.invalid_note_references.add(new_handle) + elif item[1] not in known_handles: + self.invalid_note_references.add(item[1]) for handle in self.db.source_map.keys(): self.progress.step() @@ -1487,15 +1607,15 @@ class CheckIntegrity(object): source = gen.lib.Source() source.unserialize(info) handle_list = source.get_referenced_handles_recursively() - bad_handles = [ item[1] for item in handle_list - if item[0] == 'Note' and - item[1] not in known_handles ] - if bad_handles: - map(source.remove_note, bad_handles) - self.db.commit_source(source, self.trans) - new_bad_handles = [handle for handle in bad_handles if handle - not in self.invalid_note_references] - self.invalid_note_references += new_bad_handles + for item in handle_list: + if item[0] == 'Note': + if item[1] is None: + new_handle = Utils.create_id() + source.replace_note_references(None, new_handle) + self.db.commit_source(source, self.trans) + self.invalid_note_references.add(new_handle) + elif item[1] not in known_handles: + self.invalid_note_references.add(item[1]) for handle in self.db.media_map.keys(): self.progress.step() @@ -1503,15 +1623,15 @@ class CheckIntegrity(object): obj = gen.lib.MediaObject() obj.unserialize(info) handle_list = obj.get_referenced_handles_recursively() - bad_handles = [ item[1] for item in handle_list - if item[0] == 'Note' and - item[1] not in known_handles ] - if bad_handles: - map(obj.remove_note, bad_handles) - self.db.commit_media_object(obj, self.trans) - new_bad_handles = [handle for handle in bad_handles if handle - not in self.invalid_note_references] - self.invalid_note_references += new_bad_handles + for item in handle_list: + if item[0] == 'Note': + if item[1] is None: + new_handle = Utils.create_id() + obj.replace_note_references(None, new_handle) + self.db.commit_media_object(obj, self.trans) + self.invalid_note_references.add(new_handle) + elif item[1] not in known_handles: + self.invalid_note_references.add(item[1]) for handle in self.db.event_map.keys(): self.progress.step() @@ -1519,15 +1639,15 @@ class CheckIntegrity(object): event = gen.lib.Event() event.unserialize(info) handle_list = event.get_referenced_handles_recursively() - bad_handles = [ item[1] for item in handle_list - if item[0] == 'Note' and - item[1] not in known_handles ] - if bad_handles: - map(event.remove_note, bad_handles) - self.db.commit_event(event, self.trans) - new_bad_handles = [handle for handle in bad_handles if handle - not in self.invalid_note_references] - self.invalid_note_references += new_bad_handles + for item in handle_list: + if item[0] == 'Note': + if item[1] is None: + new_handle = Utils.create_id() + event.replace_note_references(None, new_handle) + self.db.commit_event(event, self.trans) + self.invalid_note_references.add(new_handle) + elif item[1] not in known_handles: + self.invalid_note_references.add(item[1]) for handle in self.db.repository_map.keys(): self.progress.step() @@ -1535,18 +1655,25 @@ class CheckIntegrity(object): repo = gen.lib.Repository() repo.unserialize(info) handle_list = repo.get_referenced_handles_recursively() - bad_handles = [ item[1] for item in handle_list - if item[0] == 'Note' and - item[1] not in known_handles ] - if bad_handles: - map(repo.remove_note, bad_handles) - self.db.commit_repository(repo, self.trans) - new_bad_handles = [handle for handle in bad_handles if handle - not in self.invalid_note_references] - self.invalid_note_references += new_bad_handles + for item in handle_list: + if item[0] == 'Note': + if item[1] is None: + new_handle = Utils.create_id() + repo.replace_note_references(None, new_handle) + self.db.commit_repository(repo, self.trans) + self.invalid_note_references.add(new_handle) + elif item[1] not in known_handles: + self.invalid_note_references.add(item[1]) + + for bad_handle in self.invalid_note_references: + Utils.make_unknown(bad_handle, self.explanation.handle, + self.class_note, self.commit_note, self.trans) if len (self.invalid_note_references) == 0: LOG(' OK: no note reference problems found') + else: + if not missing_references: + self.db.add_note(self.explanation, self.trans, set_gid=True) def check_tag_references(self): known_handles = self.db.get_tag_handles() @@ -1560,6 +1687,7 @@ class CheckIntegrity(object): self.progress.set_pass(_('Looking for tag reference problems'), total) + LOG('Looking for tag reference problems') for handle in self.db.person_map.keys(): self.progress.step() @@ -1567,15 +1695,15 @@ class CheckIntegrity(object): person = gen.lib.Person() person.unserialize(info) handle_list = person.get_referenced_handles_recursively() - bad_handles = [ item[1] for item in handle_list - if item[0] == 'Tag' and - item[1] not in known_handles ] - if bad_handles: - map(person.remove_tag, bad_handles) - self.db.commit_person(person, self.trans) - new_bad_handles = [handle for handle in bad_handles if handle - not in self.invalid_tag_references] - self.invalid_tag_references += new_bad_handles + for item in handle_list: + if item[0] == 'Tag': + if item[1] is None: + new_handle = Utils.create_id() + person.replace_tag_references(None, new_handle) + self.db.commit_person(person, self.trans) + self.invalid_tag_references.add(new_handle) + elif item[1] not in known_handles: + self.invalid_tag_references.add(item[1]) for handle in self.db.family_map.keys(): self.progress.step() @@ -1583,15 +1711,15 @@ class CheckIntegrity(object): family = gen.lib.Family() family.unserialize(info) handle_list = family.get_referenced_handles_recursively() - bad_handles = [ item[1] for item in handle_list - if item[0] == 'Tag' and - item[1] not in known_handles ] - if bad_handles: - map(family.remove_tag, bad_handles) - self.db.commit_family(family, self.trans) - new_bad_handles = [handle for handle in bad_handles if handle - not in self.invalid_tag_references] - self.invalid_tag_references += new_bad_handles + for item in handle_list: + if item[0] == 'Tag': + if item[1] is None: + new_handle = Utils.create_id() + family.replace_tag_references(None, new_handle) + self.db.commit_family(family, self.trans) + self.invalid_tag_references.add(new_handle) + elif item[1] not in known_handles: + self.invalid_tag_references.add(item[1]) for handle in self.db.media_map.keys(): self.progress.step() @@ -1599,15 +1727,15 @@ class CheckIntegrity(object): obj = gen.lib.MediaObject() obj.unserialize(info) handle_list = obj.get_referenced_handles_recursively() - bad_handles = [ item[1] for item in handle_list - if item[0] == 'Tag' and - item[1] not in known_handles ] - if bad_handles: - map(obj.remove_tag, bad_handles) - self.db.commit_object(obj, self.trans) - new_bad_handles = [handle for handle in bad_handles if handle - not in self.invalid_tag_references] - self.invalid_tag_references += new_bad_handles + for item in handle_list: + if item[0] == 'Tag': + if item[1] is None: + new_handle = Utils.create_id() + obj.replace_tag_references(None, new_handle) + self.db.commit_media_object(obj, self.trans) + self.invalid_tag_references.add(new_handle) + elif item[1] not in known_handles: + self.invalid_tag_references.add(item[1]) for handle in self.db.note_map.keys(): self.progress.step() @@ -1615,16 +1743,102 @@ class CheckIntegrity(object): note = gen.lib.Note() note.unserialize(info) handle_list = note.get_referenced_handles_recursively() - bad_handles = [ item[1] for item in handle_list - if item[0] == 'Tag' and - item[1] not in known_handles ] - if bad_handles: - map(note.remove_tag, bad_handles) - self.db.commit_note(note, self.trans) - new_bad_handles = [handle for handle in bad_handles if handle - not in self.invalid_tag_references] - self.invalid_tag_references += new_bad_handles + for item in handle_list: + if item[0] == 'Tag': + if item[1] is None: + new_handle = Utils.create_id() + note.replace_tag_references(None, new_handle) + self.db.commit_note(note, self.trans) + self.invalid_tag_references.add(new_handle) + elif item[1] not in known_handles: + self.invalid_tag_references.add(item[1]) + for bad_handle in self.invalid_tag_references: + Utils.make_unknown(bad_handle, None, self.class_tag, + self.commit_tag, self.trans) + + if len(self.invalid_tag_references) == 0: + LOG(' OK: no tag reference problems found') + + def class_person(self, handle): + person = gen.lib.Person() + person.set_handle(handle) + return person + + def commit_person(self, person, trans, change): + self.db.add_person(person, trans, set_gid=True) + + def class_family(self, handle): + family = gen.lib.Family() + family.set_handle(handle) + return family + + def commit_family(self, family, trans, change): + self.db.add_family(family, trans, set_gid=True) + + def class_event(self, handle): + event = gen.lib.Event() + event.set_handle(handle) + return event + + def commit_event(self, event, trans, change): + self.db.add_event(event, trans, set_gid=True) + + def class_place(self, handle): + place = gen.lib.Place() + place.set_handle(handle) + return place + + def commit_place(self, place, trans, change): + self.db.add_place(place, trans, set_gid=True) + + def class_source(self, handle): + source = gen.lib.Source() + source.set_handle(handle) + return source + + def commit_source(self, source, trans, change): + self.db.add_source(source, trans, set_gid=True) + + def class_citation(self, handle): + citation = gen.lib.Citation() + citation.set_handle(handle) + return citation + + def commit_citation(self, citation, trans, change): + self.db.add_citation(citation, trans, set_gid=True) + + def class_repo(self, handle): + repo = gen.lib.Repository() + repo.set_handle(handle) + return repo + + def commit_repo(self, repo, trans, change): + self.db.add_repository(repo, trans, set_gid=True) + + def class_object(self, handle): + object = gen.lib.MediaObject() + object.set_handle(handle) + return object + + def commit_object(self, object, trans, change): + self.db.add_object(object, trans, set_gid=True) + + def class_note(self, handle): + note = gen.lib.Note() + note.set_handle(handle) + return note + + def commit_note(self, note, trans, change): + self.db.add_note(note, trans, set_gid=True) + + def class_tag(self, handle): + tag = gen.lib.Tag() + tag.set_handle(handle) + return tag + + def commit_tag(self, tag, trans, change): + self.db.add_tag(tag, trans) def build_report(self, uistate=None): self.progress.close() @@ -1814,8 +2028,8 @@ class CheckIntegrity(object): if event_invalid: self.text.write( - ngettext("%(quantity)d invalid event reference was removed\n", - "%(quantity)d invalid event references were removed\n", + ngettext("%(quantity)d event was referenced but not found\n", + "%(quantity)d events were referenced, but not found\n", event_invalid) % {'quantity': event_invalid} ) @@ -1873,6 +2087,12 @@ class CheckIntegrity(object): "%(quantity)d tag objects were referenced but not found\n", tag_references) % {'quantity': tag_references}) + if tag_references: + self.text.write( + ngettext("%(quantity)d tag object was referenced but not found\n", + "%(quantity)d tag objects were referenced but not found\n", + tag_references) % {'quantity': tag_references}) + if name_format: self.text.write( ngettext("%(quantity)d invalid name format reference was removed\n",