* src/GrampsDbUtils/_GedcomParse.py: pylint fixes

2007-09-06  Don Allingham  <don@gramps-project.org>


svn: r8938
This commit is contained in:
Don Allingham 2007-09-07 04:58:02 +00:00
parent 19e136f0f8
commit e314d3b99d
2 changed files with 74 additions and 25 deletions

View File

@ -1,3 +1,6 @@
2007-09-06 Don Allingham <don@gramps-project.org>
* src/GrampsDbUtils/_GedcomParse.py: pylint fixes
2007-09-06 Don Allingham <don@gramps-project.org> 2007-09-06 Don Allingham <don@gramps-project.org>
* src/GrampsDbUtils/_WriteGedcom.py: more pylint fixes * src/GrampsDbUtils/_WriteGedcom.py: more pylint fixes

View File

@ -237,8 +237,33 @@ CONT_RE = re.compile(r"\s*\d+\s+CONT\s?(.*)$")
CONC_RE = re.compile(r"\s*\d+\s+CONC\s?(.*)$") CONC_RE = re.compile(r"\s*\d+\s+CONC\s?(.*)$")
PERSON_RE = re.compile(r"\s*\d+\s+\@(\S+)\@\s+INDI(.*)$") PERSON_RE = re.compile(r"\s*\d+\s+\@(\S+)\@\s+INDI(.*)$")
class StageOne: #-------------------------------------------------------------------------
#
# is_xref_value
#
#-------------------------------------------------------------------------
def is_xref_value(value):
"""
Returns True if value is in the form of a XREF value. We assume that
if we have a leading '@' character, then we are okay.
"""
return value and value[0] == '@'
#-------------------------------------------------------------------------
#
# StageOne
#
#-------------------------------------------------------------------------
class StageOne:
"""
The StageOne parser scans the file quickly, looking for a few things. This
includes:
1. Character set encoding
2. Number of people and families in the list
3. Child to family references, since Ancestry.com creates GEDCOM files
without the FAMC references.
"""
def __init__(self, ifile): def __init__(self, ifile):
self.ifile = ifile self.ifile = ifile
self.famc = {} self.famc = {}
@ -247,25 +272,37 @@ class StageOne:
self.pcnt = 0 self.pcnt = 0
self.lcnt = 0 self.lcnt = 0
def parse(self): def __detect_file_decoder(self, input_file):
current = "" """
Detects the file encoding of the file by looking for a BOM
line = self.ifile.read(2) (byte order marker) in the GEDCOM file. If we detect a UTF-16
encoded file, we must connect to a wrapper using the codecs
package.
"""
line = input_file.read(2)
if line == "\xef\xbb": if line == "\xef\xbb":
self.ifile.read(1) input_file.read(1)
self.enc = "UTF8" self.enc = "UTF8"
self.reader = self.ifile return input_file
elif line == "\xff\xfe": elif line == "\xff\xfe":
self.enc = "UTF16" self.enc = "UTF16"
self.ifile.seek(0) input_file.seek(0)
self.reader = codecs.EncodedFile(self.ifile, 'utf8', 'utf16') return codecs.EncodedFile(input_file, 'utf8', 'utf16')
elif line[0] == "\x00" or line[1] == "\x00": elif line[0] == "\x00" or line[1] == "\x00":
raise Errors.GedcomError(BAD_UTF16) raise Errors.GedcomError(BAD_UTF16)
else: else:
self.ifile.seek(0) input_file.seek(0)
self.reader = self.ifile return input_file
for line in self.reader: def parse(self):
"""
Parse the input file.
"""
current = ""
reader = self.__detect_file_decoder(self.ifile)
for line in reader:
line = line.strip() line = line.strip()
if not line: if not line:
continue continue
@ -275,10 +312,7 @@ class StageOne:
try: try:
(level, key, value) = data[:3] (level, key, value) = data[:3]
value = value.strip() value = value.strip()
try: level = int(level)
level = int(level)
except:
level = 0
key = key.strip() key = key.strip()
except: except:
LOG.warn(_("Invalid line %d in GEDCOM file.") % self.lcnt) LOG.warn(_("Invalid line %d in GEDCOM file.") % self.lcnt)
@ -286,17 +320,16 @@ class StageOne:
if level == 0 and key[0] == '@': if level == 0 and key[0] == '@':
if value == ("FAM", "FAMILY") : if value == ("FAM", "FAMILY") :
current = key.strip() current = key.strip()[1:-1]
current = current[1:-1]
elif value == ("INDI", "INDIVIDUAL"): elif value == ("INDI", "INDIVIDUAL"):
self.pcnt += 1 self.pcnt += 1
elif key in ("HUSB", "HUSBAND", "WIFE") and value and value[0] == '@': elif key in ("HUSB", "HUSBAND", "WIFE") and is_xref_value(value):
value = value[1:-1] value = value[1:-1]
if self.fams.has_key(value): if self.fams.has_key(value):
self.fams[value].append(current) self.fams[value].append(current)
else: else:
self.fams[value] = [current] self.fams[value] = [current]
elif key in ("CHIL", "CHILD") and value and value[0] == '@': elif key in ("CHIL", "CHILD") and is_xref_value(value):
value = value[1:-1] value = value[1:-1]
if self.famc.has_key(value): if self.famc.has_key(value):
self.famc[value].append(current) self.famc[value].append(current)
@ -307,22 +340,40 @@ class StageOne:
self.enc = value self.enc = value
def get_famc_map(self): def get_famc_map(self):
"""
Returns the Person to Child Family map
"""
return self.famc return self.famc
def get_fams_map(self): def get_fams_map(self):
"""
Returns the Person to Family map (where the person is a spouse)
"""
return self.fams return self.fams
def get_encoding(self): def get_encoding(self):
"""
Returns the detected encoding
"""
return self.enc.upper() return self.enc.upper()
def set_encoding(self, enc): def set_encoding(self, enc):
"""
Forces the encoding
"""
assert(type(enc) == str or type(enc) == unicode) assert(type(enc) == str or type(enc) == unicode)
self.enc = enc self.enc = enc
def get_person_count(self): def get_person_count(self):
"""
Returns the number of INDI records found
"""
return self.pcnt return self.pcnt
def get_line_count(self): def get_line_count(self):
"""
Returns the number of lines in the file
"""
return self.lcnt return self.lcnt
#------------------------------------------------------------------------- #-------------------------------------------------------------------------
@ -921,11 +972,6 @@ class GedcomParser(UpdateCallback):
self.dbase.enable_signals() self.dbase.enable_signals()
self.dbase.request_rebuild() self.dbase.request_rebuild()
#-------------------------------------------------------------------------
#
# Create new objects
#
#-------------------------------------------------------------------------
def __find_from_handle(self, gramps_id, table): def __find_from_handle(self, gramps_id, table):
""" """
Finds a handle corresponding the the specified GRAMPS ID. The passed Finds a handle corresponding the the specified GRAMPS ID. The passed