From dd6be5ad7242dba8c40466f9e075862f2528e658 Mon Sep 17 00:00:00 2001 From: Don Allingham Date: Wed, 28 Feb 2007 05:20:30 +0000 Subject: [PATCH] * src/GrampsDbUtils/_GedcomChar.py: handle stripping of newlines * src/GrampsDbUtils/_GedcomLex.py: handle newlines and eof the declaration line, give notes and id/handle svn: r8256 --- ChangeLog | 5 +- src/Editors/_EditSourceRef.py | 4 +- src/GrampsDb/_GrampsInMemDB.py | 2 +- src/GrampsDbUtils/_GedcomChar.py | 8 +- src/GrampsDbUtils/_GedcomLex.py | 17 +- src/GrampsDbUtils/_GedcomParse.py | 367 ++++++++++++++++-------------- src/RelLib/_Source.py | 4 +- 7 files changed, 223 insertions(+), 184 deletions(-) diff --git a/ChangeLog b/ChangeLog index 3de07d533..0e66a7972 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,6 @@ 2007-02-27 Don Allingham + * src/GrampsDbUtils/_GedcomChar.py: handle stripping of newlines + * src/GrampsDbUtils/_GedcomLex.py: handle newlines and eof * src/GrampsDbUtils/_GedcomParse.py (GedcomParser.parse_fam): use parse_level @@ -21,7 +23,7 @@ 2007-02-26 Don Allingham * src/GrampsDbUtils/_GedcomParse.py: handle EVENTS with argument on - the declaration line + the declaration line, give notes and id/handle * src/GrampsDbUtils/_GedcomLex.py: handle EVENTS with argument on the declaration line @@ -32,7 +34,6 @@ data; Parse MAP/LATI/LONG; Fix creation of Nobility Title * src/GrampsDbUtils/_GedcomTokens.py: Add MAP,LATI,LONG - 2007-02-26 Don Allingham * src/GrampsDbUtils/_GedcomParse.py: encode file name properly diff --git a/src/Editors/_EditSourceRef.py b/src/Editors/_EditSourceRef.py index eda1a69d4..404051b18 100644 --- a/src/Editors/_EditSourceRef.py +++ b/src/Editors/_EditSourceRef.py @@ -79,8 +79,6 @@ class EditSourceRef(EditReference): # that problem is fixed, but _cleanup_on_exit SHOULD NOT be run # in close(), because close() is called on OK. # Until this is fixed, notes, text and comments are not saved!!! - self.note_tab.cancel() - self.text_tab.cancel() self.comment_tab.cancel() def _setup_fields(self): @@ -185,7 +183,7 @@ class EditSourceRef(EditReference): self.comment_tab = self._add_tab( notebook_ref, NoteTab(self.dbstate, self.uistate, self.track, - self.source_ref.get_note_list(),_('Comments'))) + self.source_ref.get_note_list())) self._setup_notebook_tabs( notebook_src) self._setup_notebook_tabs( notebook_ref) diff --git a/src/GrampsDb/_GrampsInMemDB.py b/src/GrampsDb/_GrampsInMemDB.py index b7090b79d..dcc1979dd 100644 --- a/src/GrampsDb/_GrampsInMemDB.py +++ b/src/GrampsDb/_GrampsInMemDB.py @@ -312,7 +312,7 @@ class GrampsInMemDB(GrampsDbBase): GrampsDbBase.commit_repository(self,repository,transaction,change_time) def commit_note(self,note,transaction,change_time=None): - if not self._commit_inmem_base(note,self.note_map, + if not self._commit_inmem_base(note, self.note_map, self.nid_trans): return GrampsDbBase.commit_note(self,note,transaction,change_time) diff --git a/src/GrampsDbUtils/_GedcomChar.py b/src/GrampsDbUtils/_GedcomChar.py index 1e68410da..0d19078ed 100644 --- a/src/GrampsDbUtils/_GedcomChar.py +++ b/src/GrampsDbUtils/_GedcomChar.py @@ -31,7 +31,7 @@ class BaseReader: def readline(self): return unicode(self.ifile.readline(), encoding=self.enc, - errors='replace').strip('\n\r') + errors='replace') class UTF8Reader(BaseReader): @@ -47,7 +47,7 @@ class UTF8Reader(BaseReader): def readline(self): return unicode(self.ifile.readline(), encoding=self.enc, - errors='replace').strip('\n\r') + errors='replace') class UTF16Reader(BaseReader): @@ -71,6 +71,4 @@ class AnselReader(BaseReader): BaseReader.__init__(self, ifile, "") def readline(self): - return ansel_to_utf8(self.ifile.readline().strip('\n\r')) - - + return ansel_to_utf8(self.ifile.readline()) diff --git a/src/GrampsDbUtils/_GedcomLex.py b/src/GrampsDbUtils/_GedcomLex.py index fb3eaf45d..de9e48165 100644 --- a/src/GrampsDbUtils/_GedcomLex.py +++ b/src/GrampsDbUtils/_GedcomLex.py @@ -44,6 +44,14 @@ from _GedcomTokens import * import RelLib from DateHandler._DateParser import DateParser +#------------------------------------------------------------------------ +# +# Set up logging +# +#------------------------------------------------------------------------ +import logging +LOG = logging.getLogger(".GedcomImport") + #------------------------------------------------------------------------- # # constants # @@ -355,16 +363,13 @@ class Reader: self.eof = True return - line = line.split(None, 2) + [''] - - val = line[2] - try: + line = line.strip('\n\r').split(None, 2) + [''] level = int(line[0]) except: - level = 0 + continue - data = (level, tokens.get(line[1], TOKEN_UNKNOWN), val, line[1], + data = (level, tokens.get(line[1], TOKEN_UNKNOWN), line[2], line[1], self.index) func = self.func_map.get(data[1]) diff --git a/src/GrampsDbUtils/_GedcomParse.py b/src/GrampsDbUtils/_GedcomParse.py index e6f978636..aa17c3024 100644 --- a/src/GrampsDbUtils/_GedcomParse.py +++ b/src/GrampsDbUtils/_GedcomParse.py @@ -248,7 +248,8 @@ class StageOne: level = 0 key = key.strip() except: - raise Errors.GedcomError("Corrupted file at line %d" % self.lcnt) + LOG.warn(_("Invalid line %d in GEDCOM file.") % self.lcnt) + #raise Errors.GedcomError("Corrupted file at line %d" % self.lcnt) if level == 0 and key[0] == '@': if value == ("FAM", "FAMILY") : @@ -318,7 +319,6 @@ class GedcomParser(UpdateCallback): self.fams_map = stage_one.get_fams_map() self.place_parser = GedcomUtils.PlaceParser() - self.debug = False self.inline_srcs = {} self.media_map = {} self.refn = {} @@ -355,6 +355,10 @@ class GedcomParser(UpdateCallback): self.dbase.rid_trans, self.dbase.find_next_repository_gramps_id, self.dbase.get_number_of_repositories()) + self.nid_map = GedcomUtils.IdMapper( + self.dbase.nid_trans, + self.dbase.find_next_note_gramps_id, + self.dbase.get_number_of_notes()) self.gid2id = {} self.oid2id = {} @@ -409,7 +413,7 @@ class GedcomParser(UpdateCallback): TOKEN_OBJE : self.func_person_object, # +1 <> {0:M} TOKEN_NOTE : self.func_person_note, - TOKEN_RNOTE : self.func_person_rnote, + TOKEN_RNOTE : self.func_person_note, TOKEN__COMM : self.func_person_note, # +1 RFN {0:1} TOKEN_RFN : self.func_person_attr, @@ -500,6 +504,9 @@ class GedcomParser(UpdateCallback): TOKEN_HUSB : self.func_event_husb, TOKEN_WIFE : self.func_event_wife, TOKEN_FAMC : self.func_person_birth_famc, + # Not legal, but inserted by Ultimate Family Tree + TOKEN_CHAN : self.func_ignore, + TOKEN_QUAY : self.func_ignore, } self.adopt_parse_tbl = { @@ -526,6 +533,9 @@ class GedcomParser(UpdateCallback): TOKEN_TEMP : self.func_ignore, TOKEN_OBJE : self.func_event_object, TOKEN_FAMC : self.func_person_adopt_famc, + # Not legal, but inserted by Ultimate Family Tree + TOKEN_CHAN : self.func_ignore, + TOKEN_QUAY : self.func_ignore, } self.famc_parse_tbl = { @@ -664,6 +674,7 @@ class GedcomParser(UpdateCallback): # +1 RIN {0:1} # +1 <> {0:1} TOKEN_CHAN : self.func_family_chan, + TOKEN_ENDL : self.func_ignore, TOKEN_ADDR : self.func_family_addr, TOKEN_RIN : self.func_family_cust_attr, @@ -698,6 +709,8 @@ class GedcomParser(UpdateCallback): TOKEN_DATA : self.func_ignore, TOKEN_TYPE : self.func_source_attr, TOKEN_CALN : self.func_ignore, + # not legal, but Ultimate Family Tree does this + TOKEN_DATE : self.func_ignore, TOKEN_IGNORE: self.func_ignore, } @@ -743,6 +756,8 @@ class GedcomParser(UpdateCallback): TOKEN_SOUR : self.func_event_place_sour, TOKEN__LOC : self.func_ignore, TOKEN_MAP : self.func_place_map, + # Not legal, but generated by Ultimate Family Tree + TOKEN_QUAY : self.func_ignore, } self.place_map_tbl = { @@ -760,6 +775,43 @@ class GedcomParser(UpdateCallback): TOKEN_ADOP : self.func_person_adopt_famc_adopt, } + self.opt_note_tbl = { + TOKEN_RNOTE: self.func_optional_note, + TOKEN_NOTE: self.func_optional_note, + } + + self.srcref_data_tbl = { + TOKEN_DATE : self.func_source_data_date, + TOKEN_TEXT : self.func_source_data_text, + TOKEN_RNOTE: self.func_source_data_note, + TOKEN_NOTE : self.func_source_data_note, + } + + self.header_sour = { + TOKEN_SOUR : self.func_header_sour, + TOKEN_NAME : self.func_ignore, + TOKEN_VERS : self.func_header_vers, + TOKEN_FILE : self.func_header_file, + TOKEN_COPR : self.func_header_copr, + TOKEN_SUBM : self.func_header_subm, + TOKEN_CORP : self.func_ignore, + TOKEN_DATA : self.func_ignore, + TOKEN_SUBN : self.func_ignore, + TOKEN_LANG : self.func_ignore, + TOKEN_TIME : self.func_ignore, + TOKEN_DEST : self.func_header_dest, + TOKEN_CHAR : self.func_ignore, + TOKEN_GEDC : self.func_ignore, + TOKEN__SCHEMA: self.func_ignore, + TOKEN_PLAC : self.func_header_plac, + TOKEN_DATE : self.func_header_date, + TOKEN_NOTE : self.func_header_note, + } + + self.header_subm = { + TOKEN_NAME : self.func_header_subm_name, + } + # look for existing place titles, build a map self.place_names = {} cursor = dbase.get_place_cursor() @@ -1045,8 +1097,8 @@ class GedcomParser(UpdateCallback): def backup(self): """ - Sets the backup flag so that the current line can be accessed by the next - level up. + Sets the backup flag so that the current line can be accessed by the + next level up. """ self.backoff = True @@ -1057,6 +1109,7 @@ class GedcomParser(UpdateCallback): no_magic = self.maxpeople < 1000 self.trans = self.dbase.transaction_begin("", not use_trans, no_magic) + self.debug = False self.dbase.disable_signals() self.parse_header_head() self.parse_header_source() @@ -1161,7 +1214,7 @@ class GedcomParser(UpdateCallback): else: func = func_map.get(line.token, default) if self.debug: - print line + print line, func func(line, state) def func_undefined(self, line, state): @@ -2232,7 +2285,10 @@ class GedcomParser(UpdateCallback): """ handle = self.find_family_handle(self.fid_map[line.data]) state.person.add_family_handle(handle) - self.parse_optional_note(self.person, 2) + + sub_state = GedcomUtils.CurrentState(level=state.level+1) + sub_state.obj = state.person + self.parse_level(sub_state, self.opt_note_tbl, self.func_ignore) def func_person_asso(self, line, state): """ @@ -2508,7 +2564,7 @@ class GedcomParser(UpdateCallback): sub_state.mrel = None sub_state.frel = None - self.parse_level(sub_state, self.family_rel_tbl, self.func_undefined) + self.parse_level(sub_state, self.family_rel_tbl, self.func_ignore) child = self.find_or_create_person(self.pid_map[line.data]) @@ -3200,7 +3256,7 @@ class GedcomParser(UpdateCallback): @param state: The current state @type state: CurrentState """ - handle = self.find_family_handle(line.data.strip()[1:-1]) + handle = self.find_family_handle(self.fid_map[line.data]) if state.person.get_main_parents_family_handle() == handle: state.person.set_main_parent_family_handle(None) @@ -3342,10 +3398,19 @@ class GedcomParser(UpdateCallback): @param state: The current state @type state: CurrentState """ - date, text = self.parse_source_data(state.level+2) - if date: - state.src_ref.set_date_object(date) - state.src_ref.set_text(text) + sub_state = GedcomUtils.CurrentState(level=state.level+1) + sub_state.src_ref = state.src_ref + + self.parse_level(sub_state, self.srcref_data_tbl, self.func_undefined) + + def func_source_data_date(self, line, state): + state.src_ref.set_date_object(line.data) + + def func_source_data_text(self, line, state): + state.src_ref.set_text(line.data) + + def func_source_data_note(self, line, state): + self.parse_note(line, state.src_ref, state.level) def func_srcref_obje(self, line, state): """ @@ -3982,181 +4047,153 @@ class GedcomParser(UpdateCallback): famc_handle = self.find_family_handle(mapped_id) self.person.add_parent_family_handle(famc_handle) + def func_optional_note(self, line, state): + """ + @param line: The current line in GedLine format + @type line: GedLine + @param state: The current state + @type state: CurrentState + """ + self.parse_note(line, state.obj, state.level) + + def parse_header_source(self): + state = GedcomUtils.CurrentState(level=1) + self.parse_level(state, self.header_sour, self.func_undefined) + + def func_header_sour(self, line, state): + """ + @param line: The current line in GedLine format + @type line: GedLine + @param state: The current state + @type state: CurrentState + """ + self.gedsource = self.gedmap.get_from_source_tag(line.data) + self.lexer.set_broken_conc(self.gedsource.get_conc()) + if line.data == "FTW": + self.is_ftw = True + elif line.data == "Ancestry.com Family Trees": + self.is_ancestry_com = True + state.genby = line.data + + def func_header_vers(self, line, state): + """ + @param line: The current line in GedLine format + @type line: GedLine + @param state: The current state + @type state: CurrentState + """ + if self.use_def_src: + self.def_src.set_data_item('Generated by',"%s %s" % + (state.genby, line.data)) + + def func_header_file(self, line, state): + """ + @param line: The current line in GedLine format + @type line: GedLine + @param state: The current state + @type state: CurrentState + """ + if self.use_def_src: + filename = os.path.basename(line.data).split('\\')[-1] + self.def_src.set_title(_("Import from %s") % filename) + + def func_header_copr(self, line, state): + """ + @param line: The current line in GedLine format + @type line: GedLine + @param state: The current state + @type state: CurrentState + """ + if self.use_def_src: + self.def_src.set_publication_info(line.data) + + def func_header_subm(self, line, state): + """ + @param line: The current line in GedLine format + @type line: GedLine + @param state: The current state + @type state: CurrentState + """ + sub_state = GedcomUtils.CurrentState(level=state.level+1) + self.parse_level(sub_state, self.header_subm, self.func_ignore) + + def func_header_dest(self, line, state): + """ + @param line: The current line in GedLine format + @type line: GedLine + @param state: The current state + @type state: CurrentState + """ + if state.genby == "GRAMPS": + self.gedsource = self.gedmap.get_from_source_tag(line.data) + self.lexer.set_broken_conc(self.gedsource.get_conc()) + + def func_header_plac(self, line, state): + """ + @param line: The current line in GedLine format + @type line: GedLine + @param state: The current state + @type state: CurrentState + """ + self.parse_place_form(2) + + def func_header_date(self, line, state): + """ + @param line: The current line in GedLine format + @type line: GedLine + @param state: The current state + @type state: CurrentState + """ + if self.use_def_src: + self.def_src.set_data_item('Creation date', line.data) + + def func_header_note(self, line, state): + """ + @param line: The current line in GedLine format + @type line: GedLine + @param state: The current state + @type state: CurrentState + """ + if self.use_def_src: + self.parse_note(line, self.def_src, 2, '') + + def func_header_subm_name(self, line, state): + """ + @param line: The current line in GedLine format + @type line: GedLine + @param state: The current state + @type state: CurrentState + """ + if self.use_def_src: + self.def_src.set_author(line.data) + ############################################################################### def parse_note(self, line, obj, level): # reference to a named note defined elsewhere if line.token == TOKEN_RNOTE: - obj.add_note(line.data.strip()) + obj.add_note(self.nid_map[line.data.strip()]) else: new_note = RelLib.Note(line.data) + new_note.set_gramps_id(self.dbase.find_next_note_gramps_id()) + new_note.set_handle(Utils.create_id()) self.dbase.commit_note(new_note,self.trans) obj.add_note(new_note.handle) self.skip_subordinate_levels(level+1) - def parse_comment(self, line, obj, level, old_note): - return self.parse_note_base(line, obj, level, old_note, obj.add_note) - - def parse_optional_note(self, obj, level): - note = "" - while True: - line = self.get_next() - - if self.level_is_finished(line, level): - return note - elif line.token == TOKEN_NOTE or line.token == TOKEN_RNOTE: - self.parse_note(line, obj, level) - else: - self.not_recognized(level+1) - return None - def parse_source_reference(self, src_ref, level, handle): """Reads the data associated with a SOUR reference""" state = GedcomUtils.CurrentState() - state.level = level+1 + state.level = level state.src_ref = src_ref state.handle = handle self.parse_level(state, self.srcref_parse_tbl, self.func_ignore) - - def parse_source_data(self, level): - """Parses the source data""" - date = "" - note = "" - while True: - line = self.get_next() - if self.level_is_finished(line, level): - break - elif line.token == TOKEN_DATE: - date = line.data - elif line.token == TOKEN_TEXT: - note = line.data - elif line.token == TOKEN_NOTE: - continue - elif line.token == TOKEN_RNOTE: - continue - else: - self.not_recognized(level+1) - return (date, note) def parse_header_head(self): """validiates that this is a valid GEDCOM file""" line = self.lexer.readline() if line.token != TOKEN_HEAD: raise Errors.GedcomError("%s is not a GEDCOM file" % self.filename) - - def parse_header_source(self): - genby = "" - while True: - line = self.get_next() - if self.level_is_finished(line, 1): - return - elif line.token == TOKEN_SOUR: - self.gedsource = self.gedmap.get_from_source_tag(line.data) - self.lexer.set_broken_conc(self.gedsource.get_conc()) - if line.data == "FTW": - self.is_ftw = True - if line.data == "Ancestry.com Family Trees": - self.is_ancestry_com = True - genby = line.data - elif line.token == TOKEN_NAME: - pass - elif line.token == TOKEN_VERS: - if self.use_def_src: - self.def_src.set_data_item('Generated by',"%s %s" % - (genby, line.data)) - elif line.token == TOKEN_FILE: - if self.use_def_src: - filename = os.path.basename(line.data).split('\\')[-1] - self.def_src.set_title(_("Import from %s") % filename) - elif line.token == TOKEN_COPR: - if self.use_def_src: - self.def_src.set_publication_info(line.data) - elif line.token == TOKEN_SUBM: - self.parse_subm(2) - elif line.token in (TOKEN_CORP, TOKEN_DATA, TOKEN_SUBN, - TOKEN_LANG, TOKEN_TIME): - self.skip_subordinate_levels(2) - elif line.token == TOKEN_DEST: - if genby == "GRAMPS": - self.gedsource = self.gedmap.get_from_source_tag(line.data) - self.lexer.set_broken_conc(self.gedsource.get_conc()) - elif line.token == TOKEN_CHAR: - pass - self.skip_subordinate_levels(2) - elif line.token == TOKEN_GEDC: - self.skip_subordinate_levels(2) - elif line.token == TOKEN__SCHEMA: - self.parse_ftw_schema(2) - elif line.token == TOKEN_PLAC: - self.parse_place_form(2) - elif line.token == TOKEN_DATE: - self.parse_date(2) - if self.use_def_src: - self.def_src.set_data_item('Creation date', line.data) - elif line.token == TOKEN_NOTE: - if self.use_def_src: - self.parse_note(line, self.def_src, 2, '') - elif line.token == TOKEN_UNKNOWN: - self.skip_subordinate_levels(2) - else: - self.not_recognized(2) - - def parse_subm(self, level): - while True: - line = self.get_next() - - if self.level_is_finished(line, level): - break - elif line.token == TOKEN_NAME: - if self.use_def_src: - self.def_src.set_author(line.data) - else: - self.skip_subordinate_levels(2) - - def parse_ftw_schema(self, level): - while True: - line = self.get_next() - - if self.level_is_finished(line, level): - break - elif line.token == TOKEN_INDI: - self.parse_ftw_indi_schema(level+1) - elif line.token == TOKEN_FAM: - self.parse_ftw_fam_schema(level+1) - else: - self.not_recognized(2) - - def parse_ftw_indi_schema(self, level): - while True: - line = self.get_next() - - if self.level_is_finished(line, level): - break - else: - GED_2_GRAMPS[line.token] = self.parse_label(level+1) - - def parse_label(self, level): - value = None - - while True: - line = self.get_next() - - if self.level_is_finished(line, level): - break - elif line.token == TOKEN_LABL: - value = line.data - else: - self.not_recognized(2) - return value - - def parse_ftw_fam_schema(self, level): - while True: - line = self.get_next() - - if self.level_is_finished(line, level): - break - else: - GED_2_FAMILY_CUSTOM[line.token_text] = self.parse_label(level+1) def skip_subordinate_levels(self, level): while True: diff --git a/src/RelLib/_Source.py b/src/RelLib/_Source.py index 30e0ddf6c..0189252b4 100644 --- a/src/RelLib/_Source.py +++ b/src/RelLib/_Source.py @@ -66,9 +66,9 @@ class Source(MediaBase, NoteBase, PrimaryObject): unicode(self.author), unicode(self.pubinfo), NoteBase.serialize(self), MediaBase.serialize(self), unicode(self.abbrev), - self.change,self.datamap, + self.change, self.datamap, [rr.serialize() for rr in self.reporef_list], - self.marker.serialize(),self.private) + self.marker.serialize(), self.private) def unserialize(self, data): """