* src/GrampsDbUtils/_GedcomChar.py: handle stripping of newlines

* src/GrampsDbUtils/_GedcomLex.py: handle newlines and eof
	the declaration line, give notes and id/handle


svn: r8256
This commit is contained in:
Don Allingham 2007-02-28 05:20:30 +00:00
parent 0a75738eb5
commit dd6be5ad72
7 changed files with 223 additions and 184 deletions

View File

@ -1,4 +1,6 @@
2007-02-27 Don Allingham <don@gramps-project.org>
* src/GrampsDbUtils/_GedcomChar.py: handle stripping of newlines
* src/GrampsDbUtils/_GedcomLex.py: handle newlines and eof
* src/GrampsDbUtils/_GedcomParse.py (GedcomParser.parse_fam): use
parse_level
@ -21,7 +23,7 @@
2007-02-26 Don Allingham <don@gramps-project.org>
* src/GrampsDbUtils/_GedcomParse.py: handle EVENTS with argument on
the declaration line
the declaration line, give notes and id/handle
* src/GrampsDbUtils/_GedcomLex.py: handle EVENTS with argument on
the declaration line
@ -32,7 +34,6 @@
data; Parse MAP/LATI/LONG; Fix creation of Nobility Title
* src/GrampsDbUtils/_GedcomTokens.py: Add MAP,LATI,LONG
2007-02-26 Don Allingham <don@gramps-project.org>
* src/GrampsDbUtils/_GedcomParse.py: encode file name properly

View File

@ -79,8 +79,6 @@ class EditSourceRef(EditReference):
# that problem is fixed, but _cleanup_on_exit SHOULD NOT be run
# in close(), because close() is called on OK.
# Until this is fixed, notes, text and comments are not saved!!!
self.note_tab.cancel()
self.text_tab.cancel()
self.comment_tab.cancel()
def _setup_fields(self):
@ -185,7 +183,7 @@ class EditSourceRef(EditReference):
self.comment_tab = self._add_tab(
notebook_ref,
NoteTab(self.dbstate, self.uistate, self.track,
self.source_ref.get_note_list(),_('Comments')))
self.source_ref.get_note_list()))
self._setup_notebook_tabs( notebook_src)
self._setup_notebook_tabs( notebook_ref)

View File

@ -312,7 +312,7 @@ class GrampsInMemDB(GrampsDbBase):
GrampsDbBase.commit_repository(self,repository,transaction,change_time)
def commit_note(self,note,transaction,change_time=None):
if not self._commit_inmem_base(note,self.note_map,
if not self._commit_inmem_base(note, self.note_map,
self.nid_trans):
return
GrampsDbBase.commit_note(self,note,transaction,change_time)

View File

@ -31,7 +31,7 @@ class BaseReader:
def readline(self):
return unicode(self.ifile.readline(),
encoding=self.enc,
errors='replace').strip('\n\r')
errors='replace')
class UTF8Reader(BaseReader):
@ -47,7 +47,7 @@ class UTF8Reader(BaseReader):
def readline(self):
return unicode(self.ifile.readline(),
encoding=self.enc,
errors='replace').strip('\n\r')
errors='replace')
class UTF16Reader(BaseReader):
@ -71,6 +71,4 @@ class AnselReader(BaseReader):
BaseReader.__init__(self, ifile, "")
def readline(self):
return ansel_to_utf8(self.ifile.readline().strip('\n\r'))
return ansel_to_utf8(self.ifile.readline())

View File

@ -44,6 +44,14 @@ from _GedcomTokens import *
import RelLib
from DateHandler._DateParser import DateParser
#------------------------------------------------------------------------
#
# Set up logging
#
#------------------------------------------------------------------------
import logging
LOG = logging.getLogger(".GedcomImport")
#-------------------------------------------------------------------------
#
# constants #
@ -355,16 +363,13 @@ class Reader:
self.eof = True
return
line = line.split(None, 2) + ['']
val = line[2]
try:
line = line.strip('\n\r').split(None, 2) + ['']
level = int(line[0])
except:
level = 0
continue
data = (level, tokens.get(line[1], TOKEN_UNKNOWN), val, line[1],
data = (level, tokens.get(line[1], TOKEN_UNKNOWN), line[2], line[1],
self.index)
func = self.func_map.get(data[1])

View File

@ -248,7 +248,8 @@ class StageOne:
level = 0
key = key.strip()
except:
raise Errors.GedcomError("Corrupted file at line %d" % self.lcnt)
LOG.warn(_("Invalid line %d in GEDCOM file.") % self.lcnt)
#raise Errors.GedcomError("Corrupted file at line %d" % self.lcnt)
if level == 0 and key[0] == '@':
if value == ("FAM", "FAMILY") :
@ -318,7 +319,6 @@ class GedcomParser(UpdateCallback):
self.fams_map = stage_one.get_fams_map()
self.place_parser = GedcomUtils.PlaceParser()
self.debug = False
self.inline_srcs = {}
self.media_map = {}
self.refn = {}
@ -355,6 +355,10 @@ class GedcomParser(UpdateCallback):
self.dbase.rid_trans,
self.dbase.find_next_repository_gramps_id,
self.dbase.get_number_of_repositories())
self.nid_map = GedcomUtils.IdMapper(
self.dbase.nid_trans,
self.dbase.find_next_note_gramps_id,
self.dbase.get_number_of_notes())
self.gid2id = {}
self.oid2id = {}
@ -409,7 +413,7 @@ class GedcomParser(UpdateCallback):
TOKEN_OBJE : self.func_person_object,
# +1 <<NOTE_STRUCTURE>> {0:M}
TOKEN_NOTE : self.func_person_note,
TOKEN_RNOTE : self.func_person_rnote,
TOKEN_RNOTE : self.func_person_note,
TOKEN__COMM : self.func_person_note,
# +1 RFN <PERMANENT_RECORD_FILE_NUMBER> {0:1}
TOKEN_RFN : self.func_person_attr,
@ -500,6 +504,9 @@ class GedcomParser(UpdateCallback):
TOKEN_HUSB : self.func_event_husb,
TOKEN_WIFE : self.func_event_wife,
TOKEN_FAMC : self.func_person_birth_famc,
# Not legal, but inserted by Ultimate Family Tree
TOKEN_CHAN : self.func_ignore,
TOKEN_QUAY : self.func_ignore,
}
self.adopt_parse_tbl = {
@ -526,6 +533,9 @@ class GedcomParser(UpdateCallback):
TOKEN_TEMP : self.func_ignore,
TOKEN_OBJE : self.func_event_object,
TOKEN_FAMC : self.func_person_adopt_famc,
# Not legal, but inserted by Ultimate Family Tree
TOKEN_CHAN : self.func_ignore,
TOKEN_QUAY : self.func_ignore,
}
self.famc_parse_tbl = {
@ -664,6 +674,7 @@ class GedcomParser(UpdateCallback):
# +1 RIN <AUTOMATED_RECORD_ID> {0:1}
# +1 <<CHANGE_DATE>> {0:1}
TOKEN_CHAN : self.func_family_chan,
TOKEN_ENDL : self.func_ignore,
TOKEN_ADDR : self.func_family_addr,
TOKEN_RIN : self.func_family_cust_attr,
@ -698,6 +709,8 @@ class GedcomParser(UpdateCallback):
TOKEN_DATA : self.func_ignore,
TOKEN_TYPE : self.func_source_attr,
TOKEN_CALN : self.func_ignore,
# not legal, but Ultimate Family Tree does this
TOKEN_DATE : self.func_ignore,
TOKEN_IGNORE: self.func_ignore,
}
@ -743,6 +756,8 @@ class GedcomParser(UpdateCallback):
TOKEN_SOUR : self.func_event_place_sour,
TOKEN__LOC : self.func_ignore,
TOKEN_MAP : self.func_place_map,
# Not legal, but generated by Ultimate Family Tree
TOKEN_QUAY : self.func_ignore,
}
self.place_map_tbl = {
@ -760,6 +775,43 @@ class GedcomParser(UpdateCallback):
TOKEN_ADOP : self.func_person_adopt_famc_adopt,
}
self.opt_note_tbl = {
TOKEN_RNOTE: self.func_optional_note,
TOKEN_NOTE: self.func_optional_note,
}
self.srcref_data_tbl = {
TOKEN_DATE : self.func_source_data_date,
TOKEN_TEXT : self.func_source_data_text,
TOKEN_RNOTE: self.func_source_data_note,
TOKEN_NOTE : self.func_source_data_note,
}
self.header_sour = {
TOKEN_SOUR : self.func_header_sour,
TOKEN_NAME : self.func_ignore,
TOKEN_VERS : self.func_header_vers,
TOKEN_FILE : self.func_header_file,
TOKEN_COPR : self.func_header_copr,
TOKEN_SUBM : self.func_header_subm,
TOKEN_CORP : self.func_ignore,
TOKEN_DATA : self.func_ignore,
TOKEN_SUBN : self.func_ignore,
TOKEN_LANG : self.func_ignore,
TOKEN_TIME : self.func_ignore,
TOKEN_DEST : self.func_header_dest,
TOKEN_CHAR : self.func_ignore,
TOKEN_GEDC : self.func_ignore,
TOKEN__SCHEMA: self.func_ignore,
TOKEN_PLAC : self.func_header_plac,
TOKEN_DATE : self.func_header_date,
TOKEN_NOTE : self.func_header_note,
}
self.header_subm = {
TOKEN_NAME : self.func_header_subm_name,
}
# look for existing place titles, build a map
self.place_names = {}
cursor = dbase.get_place_cursor()
@ -1045,8 +1097,8 @@ class GedcomParser(UpdateCallback):
def backup(self):
"""
Sets the backup flag so that the current line can be accessed by the next
level up.
Sets the backup flag so that the current line can be accessed by the
next level up.
"""
self.backoff = True
@ -1057,6 +1109,7 @@ class GedcomParser(UpdateCallback):
no_magic = self.maxpeople < 1000
self.trans = self.dbase.transaction_begin("", not use_trans, no_magic)
self.debug = False
self.dbase.disable_signals()
self.parse_header_head()
self.parse_header_source()
@ -1161,7 +1214,7 @@ class GedcomParser(UpdateCallback):
else:
func = func_map.get(line.token, default)
if self.debug:
print line
print line, func
func(line, state)
def func_undefined(self, line, state):
@ -2232,7 +2285,10 @@ class GedcomParser(UpdateCallback):
"""
handle = self.find_family_handle(self.fid_map[line.data])
state.person.add_family_handle(handle)
self.parse_optional_note(self.person, 2)
sub_state = GedcomUtils.CurrentState(level=state.level+1)
sub_state.obj = state.person
self.parse_level(sub_state, self.opt_note_tbl, self.func_ignore)
def func_person_asso(self, line, state):
"""
@ -2508,7 +2564,7 @@ class GedcomParser(UpdateCallback):
sub_state.mrel = None
sub_state.frel = None
self.parse_level(sub_state, self.family_rel_tbl, self.func_undefined)
self.parse_level(sub_state, self.family_rel_tbl, self.func_ignore)
child = self.find_or_create_person(self.pid_map[line.data])
@ -3200,7 +3256,7 @@ class GedcomParser(UpdateCallback):
@param state: The current state
@type state: CurrentState
"""
handle = self.find_family_handle(line.data.strip()[1:-1])
handle = self.find_family_handle(self.fid_map[line.data])
if state.person.get_main_parents_family_handle() == handle:
state.person.set_main_parent_family_handle(None)
@ -3342,10 +3398,19 @@ class GedcomParser(UpdateCallback):
@param state: The current state
@type state: CurrentState
"""
date, text = self.parse_source_data(state.level+2)
if date:
state.src_ref.set_date_object(date)
state.src_ref.set_text(text)
sub_state = GedcomUtils.CurrentState(level=state.level+1)
sub_state.src_ref = state.src_ref
self.parse_level(sub_state, self.srcref_data_tbl, self.func_undefined)
def func_source_data_date(self, line, state):
state.src_ref.set_date_object(line.data)
def func_source_data_text(self, line, state):
state.src_ref.set_text(line.data)
def func_source_data_note(self, line, state):
self.parse_note(line, state.src_ref, state.level)
def func_srcref_obje(self, line, state):
"""
@ -3982,181 +4047,153 @@ class GedcomParser(UpdateCallback):
famc_handle = self.find_family_handle(mapped_id)
self.person.add_parent_family_handle(famc_handle)
def func_optional_note(self, line, state):
"""
@param line: The current line in GedLine format
@type line: GedLine
@param state: The current state
@type state: CurrentState
"""
self.parse_note(line, state.obj, state.level)
def parse_header_source(self):
state = GedcomUtils.CurrentState(level=1)
self.parse_level(state, self.header_sour, self.func_undefined)
def func_header_sour(self, line, state):
"""
@param line: The current line in GedLine format
@type line: GedLine
@param state: The current state
@type state: CurrentState
"""
self.gedsource = self.gedmap.get_from_source_tag(line.data)
self.lexer.set_broken_conc(self.gedsource.get_conc())
if line.data == "FTW":
self.is_ftw = True
elif line.data == "Ancestry.com Family Trees":
self.is_ancestry_com = True
state.genby = line.data
def func_header_vers(self, line, state):
"""
@param line: The current line in GedLine format
@type line: GedLine
@param state: The current state
@type state: CurrentState
"""
if self.use_def_src:
self.def_src.set_data_item('Generated by',"%s %s" %
(state.genby, line.data))
def func_header_file(self, line, state):
"""
@param line: The current line in GedLine format
@type line: GedLine
@param state: The current state
@type state: CurrentState
"""
if self.use_def_src:
filename = os.path.basename(line.data).split('\\')[-1]
self.def_src.set_title(_("Import from %s") % filename)
def func_header_copr(self, line, state):
"""
@param line: The current line in GedLine format
@type line: GedLine
@param state: The current state
@type state: CurrentState
"""
if self.use_def_src:
self.def_src.set_publication_info(line.data)
def func_header_subm(self, line, state):
"""
@param line: The current line in GedLine format
@type line: GedLine
@param state: The current state
@type state: CurrentState
"""
sub_state = GedcomUtils.CurrentState(level=state.level+1)
self.parse_level(sub_state, self.header_subm, self.func_ignore)
def func_header_dest(self, line, state):
"""
@param line: The current line in GedLine format
@type line: GedLine
@param state: The current state
@type state: CurrentState
"""
if state.genby == "GRAMPS":
self.gedsource = self.gedmap.get_from_source_tag(line.data)
self.lexer.set_broken_conc(self.gedsource.get_conc())
def func_header_plac(self, line, state):
"""
@param line: The current line in GedLine format
@type line: GedLine
@param state: The current state
@type state: CurrentState
"""
self.parse_place_form(2)
def func_header_date(self, line, state):
"""
@param line: The current line in GedLine format
@type line: GedLine
@param state: The current state
@type state: CurrentState
"""
if self.use_def_src:
self.def_src.set_data_item('Creation date', line.data)
def func_header_note(self, line, state):
"""
@param line: The current line in GedLine format
@type line: GedLine
@param state: The current state
@type state: CurrentState
"""
if self.use_def_src:
self.parse_note(line, self.def_src, 2, '')
def func_header_subm_name(self, line, state):
"""
@param line: The current line in GedLine format
@type line: GedLine
@param state: The current state
@type state: CurrentState
"""
if self.use_def_src:
self.def_src.set_author(line.data)
###############################################################################
def parse_note(self, line, obj, level):
# reference to a named note defined elsewhere
if line.token == TOKEN_RNOTE:
obj.add_note(line.data.strip())
obj.add_note(self.nid_map[line.data.strip()])
else:
new_note = RelLib.Note(line.data)
new_note.set_gramps_id(self.dbase.find_next_note_gramps_id())
new_note.set_handle(Utils.create_id())
self.dbase.commit_note(new_note,self.trans)
obj.add_note(new_note.handle)
self.skip_subordinate_levels(level+1)
def parse_comment(self, line, obj, level, old_note):
return self.parse_note_base(line, obj, level, old_note, obj.add_note)
def parse_optional_note(self, obj, level):
note = ""
while True:
line = self.get_next()
if self.level_is_finished(line, level):
return note
elif line.token == TOKEN_NOTE or line.token == TOKEN_RNOTE:
self.parse_note(line, obj, level)
else:
self.not_recognized(level+1)
return None
def parse_source_reference(self, src_ref, level, handle):
"""Reads the data associated with a SOUR reference"""
state = GedcomUtils.CurrentState()
state.level = level+1
state.level = level
state.src_ref = src_ref
state.handle = handle
self.parse_level(state, self.srcref_parse_tbl, self.func_ignore)
def parse_source_data(self, level):
"""Parses the source data"""
date = ""
note = ""
while True:
line = self.get_next()
if self.level_is_finished(line, level):
break
elif line.token == TOKEN_DATE:
date = line.data
elif line.token == TOKEN_TEXT:
note = line.data
elif line.token == TOKEN_NOTE:
continue
elif line.token == TOKEN_RNOTE:
continue
else:
self.not_recognized(level+1)
return (date, note)
def parse_header_head(self):
"""validiates that this is a valid GEDCOM file"""
line = self.lexer.readline()
if line.token != TOKEN_HEAD:
raise Errors.GedcomError("%s is not a GEDCOM file" % self.filename)
def parse_header_source(self):
genby = ""
while True:
line = self.get_next()
if self.level_is_finished(line, 1):
return
elif line.token == TOKEN_SOUR:
self.gedsource = self.gedmap.get_from_source_tag(line.data)
self.lexer.set_broken_conc(self.gedsource.get_conc())
if line.data == "FTW":
self.is_ftw = True
if line.data == "Ancestry.com Family Trees":
self.is_ancestry_com = True
genby = line.data
elif line.token == TOKEN_NAME:
pass
elif line.token == TOKEN_VERS:
if self.use_def_src:
self.def_src.set_data_item('Generated by',"%s %s" %
(genby, line.data))
elif line.token == TOKEN_FILE:
if self.use_def_src:
filename = os.path.basename(line.data).split('\\')[-1]
self.def_src.set_title(_("Import from %s") % filename)
elif line.token == TOKEN_COPR:
if self.use_def_src:
self.def_src.set_publication_info(line.data)
elif line.token == TOKEN_SUBM:
self.parse_subm(2)
elif line.token in (TOKEN_CORP, TOKEN_DATA, TOKEN_SUBN,
TOKEN_LANG, TOKEN_TIME):
self.skip_subordinate_levels(2)
elif line.token == TOKEN_DEST:
if genby == "GRAMPS":
self.gedsource = self.gedmap.get_from_source_tag(line.data)
self.lexer.set_broken_conc(self.gedsource.get_conc())
elif line.token == TOKEN_CHAR:
pass
self.skip_subordinate_levels(2)
elif line.token == TOKEN_GEDC:
self.skip_subordinate_levels(2)
elif line.token == TOKEN__SCHEMA:
self.parse_ftw_schema(2)
elif line.token == TOKEN_PLAC:
self.parse_place_form(2)
elif line.token == TOKEN_DATE:
self.parse_date(2)
if self.use_def_src:
self.def_src.set_data_item('Creation date', line.data)
elif line.token == TOKEN_NOTE:
if self.use_def_src:
self.parse_note(line, self.def_src, 2, '')
elif line.token == TOKEN_UNKNOWN:
self.skip_subordinate_levels(2)
else:
self.not_recognized(2)
def parse_subm(self, level):
while True:
line = self.get_next()
if self.level_is_finished(line, level):
break
elif line.token == TOKEN_NAME:
if self.use_def_src:
self.def_src.set_author(line.data)
else:
self.skip_subordinate_levels(2)
def parse_ftw_schema(self, level):
while True:
line = self.get_next()
if self.level_is_finished(line, level):
break
elif line.token == TOKEN_INDI:
self.parse_ftw_indi_schema(level+1)
elif line.token == TOKEN_FAM:
self.parse_ftw_fam_schema(level+1)
else:
self.not_recognized(2)
def parse_ftw_indi_schema(self, level):
while True:
line = self.get_next()
if self.level_is_finished(line, level):
break
else:
GED_2_GRAMPS[line.token] = self.parse_label(level+1)
def parse_label(self, level):
value = None
while True:
line = self.get_next()
if self.level_is_finished(line, level):
break
elif line.token == TOKEN_LABL:
value = line.data
else:
self.not_recognized(2)
return value
def parse_ftw_fam_schema(self, level):
while True:
line = self.get_next()
if self.level_is_finished(line, level):
break
else:
GED_2_FAMILY_CUSTOM[line.token_text] = self.parse_label(level+1)
def skip_subordinate_levels(self, level):
while True:

View File

@ -66,9 +66,9 @@ class Source(MediaBase, NoteBase, PrimaryObject):
unicode(self.author), unicode(self.pubinfo),
NoteBase.serialize(self),
MediaBase.serialize(self), unicode(self.abbrev),
self.change,self.datamap,
self.change, self.datamap,
[rr.serialize() for rr in self.reporef_list],
self.marker.serialize(),self.private)
self.marker.serialize(), self.private)
def unserialize(self, data):
"""