Cleaner method of handling whitespace. Contributed by Tim Lyons (tim.g.lyons@googlemail.com)
svn: r11601
This commit is contained in:
parent
4f07e2ed52
commit
ba48b9a6df
@ -255,7 +255,10 @@ def extract_date(text):
|
|||||||
"""
|
"""
|
||||||
dateobj = gen.lib.Date()
|
dateobj = gen.lib.Date()
|
||||||
|
|
||||||
text = text.replace('BET ABT','EST BET') # Horrible hack for Tim Lyons
|
text = text.replace('BET ABT','EST BET') # Horrible hack for importing
|
||||||
|
# illegal GEDCOM from
|
||||||
|
# Apple Macintosh Classic
|
||||||
|
# 'Gene' program
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# extract out the MOD line
|
# extract out the MOD line
|
||||||
@ -352,9 +355,13 @@ class Reader:
|
|||||||
def __fix_token_conc(self, data):
|
def __fix_token_conc(self, data):
|
||||||
line = self.current_list[0]
|
line = self.current_list[0]
|
||||||
if len(line[2]) == 4:
|
if len(line[2]) == 4:
|
||||||
|
# This deals with lines of the form
|
||||||
|
# 0 @<XREF:NOTE>@ NOTE
|
||||||
|
# 1 CONC <SUBMITTER TEXT>
|
||||||
# The previous line contains only a tag and no data so concat a
|
# The previous line contains only a tag and no data so concat a
|
||||||
# space to separate the new line from the tag. This prevents the
|
# space to separate the new line from the tag. This prevents the
|
||||||
# first letter of the new line being lost later.
|
# first letter of the new line being lost later
|
||||||
|
# in _GedcomParse.__parse_record
|
||||||
new_value = line[2] + ' ' + data[2]
|
new_value = line[2] + ' ' + data[2]
|
||||||
else:
|
else:
|
||||||
new_value = line[2] + data[2]
|
new_value = line[2] + data[2]
|
||||||
@ -362,29 +369,34 @@ class Reader:
|
|||||||
|
|
||||||
def __readahead(self):
|
def __readahead(self):
|
||||||
while len(self.current_list) < 5:
|
while len(self.current_list) < 5:
|
||||||
linetmp = self.ifile.readline()
|
line = self.ifile.readline()
|
||||||
self.index += 1
|
self.index += 1
|
||||||
if not linetmp:
|
if not line:
|
||||||
self.eof = True
|
self.eof = True
|
||||||
return
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# the space ensures no trailing whitespace on last parm
|
# According to the GEDCOM 5.5 standard,
|
||||||
line = linetmp.strip(' \n\r').split(None, 2) + ['']
|
# Chapter 1 subsection Grammar
|
||||||
# however keep trailing whitespace on notes only
|
#"leading whitespace preceeding a GEDCOM line should be ignored"
|
||||||
if line[1] == 'CONC' or line[2].startswith('NOTE'):
|
# We will also strip the terminator which is any combination
|
||||||
line = linetmp.strip('\n\r').split(None, 2) + ['']
|
# of carriage_return and line_feed
|
||||||
elif line[1] == 'CONT':
|
line = line.lstrip(' ').rstrip('\n\r')
|
||||||
# Make sure that whitespace is preserved at start and
|
# split into level+delim+rest
|
||||||
# end of CONT data
|
line = line.partition(' ')
|
||||||
part_line = linetmp.strip('\n\r').partition(' CONT ')
|
|
||||||
line = [part_line[0]] + ['CONT'] + [part_line[2]] + ['']
|
|
||||||
level = int(line[0])
|
level = int(line[0])
|
||||||
|
# there should only be one space after the level,
|
||||||
|
# but we can ignore more,
|
||||||
|
# then split into tag+delim+line_value
|
||||||
|
# or xfef_id+delim+rest
|
||||||
|
line = line[2].lstrip(' ').partition(' ')
|
||||||
|
tag = line[0]
|
||||||
|
line_value = line[2]
|
||||||
except:
|
except:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
token = GedcomTokens.TOKENS.get(line[1], GedcomTokens.TOKEN_UNKNOWN)
|
token = GedcomTokens.TOKENS.get(tag, GedcomTokens.TOKEN_UNKNOWN)
|
||||||
data = (level, token, line[2], line[1], self.index)
|
data = (level, token, line_value, tag, self.index)
|
||||||
|
|
||||||
func = self.func_map.get(data[1])
|
func = self.func_map.get(data[1])
|
||||||
if func:
|
if func:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user