* src/GrampsDbUtils/_GedcomParse.py: pylint fixes
2007-09-06 Don Allingham <don@gramps-project.org> svn: r8938
This commit is contained in:
parent
19e136f0f8
commit
e314d3b99d
@ -1,3 +1,6 @@
|
|||||||
|
2007-09-06 Don Allingham <don@gramps-project.org>
|
||||||
|
* src/GrampsDbUtils/_GedcomParse.py: pylint fixes
|
||||||
|
|
||||||
2007-09-06 Don Allingham <don@gramps-project.org>
|
2007-09-06 Don Allingham <don@gramps-project.org>
|
||||||
* src/GrampsDbUtils/_WriteGedcom.py: more pylint fixes
|
* src/GrampsDbUtils/_WriteGedcom.py: more pylint fixes
|
||||||
|
|
||||||
|
@ -237,8 +237,33 @@ CONT_RE = re.compile(r"\s*\d+\s+CONT\s?(.*)$")
|
|||||||
CONC_RE = re.compile(r"\s*\d+\s+CONC\s?(.*)$")
|
CONC_RE = re.compile(r"\s*\d+\s+CONC\s?(.*)$")
|
||||||
PERSON_RE = re.compile(r"\s*\d+\s+\@(\S+)\@\s+INDI(.*)$")
|
PERSON_RE = re.compile(r"\s*\d+\s+\@(\S+)\@\s+INDI(.*)$")
|
||||||
|
|
||||||
class StageOne:
|
#-------------------------------------------------------------------------
|
||||||
|
#
|
||||||
|
# is_xref_value
|
||||||
|
#
|
||||||
|
#-------------------------------------------------------------------------
|
||||||
|
def is_xref_value(value):
|
||||||
|
"""
|
||||||
|
Returns True if value is in the form of a XREF value. We assume that
|
||||||
|
if we have a leading '@' character, then we are okay.
|
||||||
|
"""
|
||||||
|
return value and value[0] == '@'
|
||||||
|
|
||||||
|
#-------------------------------------------------------------------------
|
||||||
|
#
|
||||||
|
# StageOne
|
||||||
|
#
|
||||||
|
#-------------------------------------------------------------------------
|
||||||
|
class StageOne:
|
||||||
|
"""
|
||||||
|
The StageOne parser scans the file quickly, looking for a few things. This
|
||||||
|
includes:
|
||||||
|
|
||||||
|
1. Character set encoding
|
||||||
|
2. Number of people and families in the list
|
||||||
|
3. Child to family references, since Ancestry.com creates GEDCOM files
|
||||||
|
without the FAMC references.
|
||||||
|
"""
|
||||||
def __init__(self, ifile):
|
def __init__(self, ifile):
|
||||||
self.ifile = ifile
|
self.ifile = ifile
|
||||||
self.famc = {}
|
self.famc = {}
|
||||||
@ -247,25 +272,37 @@ class StageOne:
|
|||||||
self.pcnt = 0
|
self.pcnt = 0
|
||||||
self.lcnt = 0
|
self.lcnt = 0
|
||||||
|
|
||||||
def parse(self):
|
def __detect_file_decoder(self, input_file):
|
||||||
current = ""
|
"""
|
||||||
|
Detects the file encoding of the file by looking for a BOM
|
||||||
line = self.ifile.read(2)
|
(byte order marker) in the GEDCOM file. If we detect a UTF-16
|
||||||
|
encoded file, we must connect to a wrapper using the codecs
|
||||||
|
package.
|
||||||
|
"""
|
||||||
|
line = input_file.read(2)
|
||||||
if line == "\xef\xbb":
|
if line == "\xef\xbb":
|
||||||
self.ifile.read(1)
|
input_file.read(1)
|
||||||
self.enc = "UTF8"
|
self.enc = "UTF8"
|
||||||
self.reader = self.ifile
|
return input_file
|
||||||
elif line == "\xff\xfe":
|
elif line == "\xff\xfe":
|
||||||
self.enc = "UTF16"
|
self.enc = "UTF16"
|
||||||
self.ifile.seek(0)
|
input_file.seek(0)
|
||||||
self.reader = codecs.EncodedFile(self.ifile, 'utf8', 'utf16')
|
return codecs.EncodedFile(input_file, 'utf8', 'utf16')
|
||||||
elif line[0] == "\x00" or line[1] == "\x00":
|
elif line[0] == "\x00" or line[1] == "\x00":
|
||||||
raise Errors.GedcomError(BAD_UTF16)
|
raise Errors.GedcomError(BAD_UTF16)
|
||||||
else:
|
else:
|
||||||
self.ifile.seek(0)
|
input_file.seek(0)
|
||||||
self.reader = self.ifile
|
return input_file
|
||||||
|
|
||||||
for line in self.reader:
|
def parse(self):
|
||||||
|
"""
|
||||||
|
Parse the input file.
|
||||||
|
"""
|
||||||
|
current = ""
|
||||||
|
|
||||||
|
reader = self.__detect_file_decoder(self.ifile)
|
||||||
|
|
||||||
|
for line in reader:
|
||||||
line = line.strip()
|
line = line.strip()
|
||||||
if not line:
|
if not line:
|
||||||
continue
|
continue
|
||||||
@ -275,10 +312,7 @@ class StageOne:
|
|||||||
try:
|
try:
|
||||||
(level, key, value) = data[:3]
|
(level, key, value) = data[:3]
|
||||||
value = value.strip()
|
value = value.strip()
|
||||||
try:
|
level = int(level)
|
||||||
level = int(level)
|
|
||||||
except:
|
|
||||||
level = 0
|
|
||||||
key = key.strip()
|
key = key.strip()
|
||||||
except:
|
except:
|
||||||
LOG.warn(_("Invalid line %d in GEDCOM file.") % self.lcnt)
|
LOG.warn(_("Invalid line %d in GEDCOM file.") % self.lcnt)
|
||||||
@ -286,17 +320,16 @@ class StageOne:
|
|||||||
|
|
||||||
if level == 0 and key[0] == '@':
|
if level == 0 and key[0] == '@':
|
||||||
if value == ("FAM", "FAMILY") :
|
if value == ("FAM", "FAMILY") :
|
||||||
current = key.strip()
|
current = key.strip()[1:-1]
|
||||||
current = current[1:-1]
|
|
||||||
elif value == ("INDI", "INDIVIDUAL"):
|
elif value == ("INDI", "INDIVIDUAL"):
|
||||||
self.pcnt += 1
|
self.pcnt += 1
|
||||||
elif key in ("HUSB", "HUSBAND", "WIFE") and value and value[0] == '@':
|
elif key in ("HUSB", "HUSBAND", "WIFE") and is_xref_value(value):
|
||||||
value = value[1:-1]
|
value = value[1:-1]
|
||||||
if self.fams.has_key(value):
|
if self.fams.has_key(value):
|
||||||
self.fams[value].append(current)
|
self.fams[value].append(current)
|
||||||
else:
|
else:
|
||||||
self.fams[value] = [current]
|
self.fams[value] = [current]
|
||||||
elif key in ("CHIL", "CHILD") and value and value[0] == '@':
|
elif key in ("CHIL", "CHILD") and is_xref_value(value):
|
||||||
value = value[1:-1]
|
value = value[1:-1]
|
||||||
if self.famc.has_key(value):
|
if self.famc.has_key(value):
|
||||||
self.famc[value].append(current)
|
self.famc[value].append(current)
|
||||||
@ -307,22 +340,40 @@ class StageOne:
|
|||||||
self.enc = value
|
self.enc = value
|
||||||
|
|
||||||
def get_famc_map(self):
|
def get_famc_map(self):
|
||||||
|
"""
|
||||||
|
Returns the Person to Child Family map
|
||||||
|
"""
|
||||||
return self.famc
|
return self.famc
|
||||||
|
|
||||||
def get_fams_map(self):
|
def get_fams_map(self):
|
||||||
|
"""
|
||||||
|
Returns the Person to Family map (where the person is a spouse)
|
||||||
|
"""
|
||||||
return self.fams
|
return self.fams
|
||||||
|
|
||||||
def get_encoding(self):
|
def get_encoding(self):
|
||||||
|
"""
|
||||||
|
Returns the detected encoding
|
||||||
|
"""
|
||||||
return self.enc.upper()
|
return self.enc.upper()
|
||||||
|
|
||||||
def set_encoding(self, enc):
|
def set_encoding(self, enc):
|
||||||
|
"""
|
||||||
|
Forces the encoding
|
||||||
|
"""
|
||||||
assert(type(enc) == str or type(enc) == unicode)
|
assert(type(enc) == str or type(enc) == unicode)
|
||||||
self.enc = enc
|
self.enc = enc
|
||||||
|
|
||||||
def get_person_count(self):
|
def get_person_count(self):
|
||||||
|
"""
|
||||||
|
Returns the number of INDI records found
|
||||||
|
"""
|
||||||
return self.pcnt
|
return self.pcnt
|
||||||
|
|
||||||
def get_line_count(self):
|
def get_line_count(self):
|
||||||
|
"""
|
||||||
|
Returns the number of lines in the file
|
||||||
|
"""
|
||||||
return self.lcnt
|
return self.lcnt
|
||||||
|
|
||||||
#-------------------------------------------------------------------------
|
#-------------------------------------------------------------------------
|
||||||
@ -921,11 +972,6 @@ class GedcomParser(UpdateCallback):
|
|||||||
self.dbase.enable_signals()
|
self.dbase.enable_signals()
|
||||||
self.dbase.request_rebuild()
|
self.dbase.request_rebuild()
|
||||||
|
|
||||||
#-------------------------------------------------------------------------
|
|
||||||
#
|
|
||||||
# Create new objects
|
|
||||||
#
|
|
||||||
#-------------------------------------------------------------------------
|
|
||||||
def __find_from_handle(self, gramps_id, table):
|
def __find_from_handle(self, gramps_id, table):
|
||||||
"""
|
"""
|
||||||
Finds a handle corresponding the the specified GRAMPS ID. The passed
|
Finds a handle corresponding the the specified GRAMPS ID. The passed
|
||||||
|
Loading…
x
Reference in New Issue
Block a user