Enhanced GEDCOM parsing
svn: r568
This commit is contained in:
parent
fa1d21eca1
commit
bc63d8932e
@ -45,13 +45,10 @@ db = None
|
|||||||
callback = None
|
callback = None
|
||||||
glade_file = None
|
glade_file = None
|
||||||
clear_data = 0
|
clear_data = 0
|
||||||
is_ftw = 0
|
|
||||||
|
|
||||||
def nocnv(s):
|
def nocnv(s):
|
||||||
return s
|
return s
|
||||||
|
|
||||||
_cnv = nocnv
|
|
||||||
|
|
||||||
photo_types = [ "jpeg", "bmp", "pict", "pntg", "tpic", "png", "gif",
|
photo_types = [ "jpeg", "bmp", "pict", "pntg", "tpic", "png", "gif",
|
||||||
"jpg", "tiff", "pcx" ]
|
"jpg", "tiff", "pcx" ]
|
||||||
|
|
||||||
@ -71,8 +68,6 @@ lineRE = re.compile(r"\s*(\d+)\s+(\S+)\s*(.*)$")
|
|||||||
headRE = re.compile(r"\s*(\d+)\s+HEAD")
|
headRE = re.compile(r"\s*(\d+)\s+HEAD")
|
||||||
nameRegexp = re.compile(r"([\S\s]*\S)?\s*/([^/]+)?/\s*,?\s*([\S]+)?")
|
nameRegexp = re.compile(r"([\S\s]*\S)?\s*/([^/]+)?/\s*,?\s*([\S]+)?")
|
||||||
|
|
||||||
placemap = {}
|
|
||||||
|
|
||||||
#-------------------------------------------------------------------------
|
#-------------------------------------------------------------------------
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
@ -168,10 +163,12 @@ class GedcomParser:
|
|||||||
self.nmap = {}
|
self.nmap = {}
|
||||||
self.dir_path = os.path.dirname(file)
|
self.dir_path = os.path.dirname(file)
|
||||||
self.localref = 0
|
self.localref = 0
|
||||||
|
self.placemap = {}
|
||||||
|
|
||||||
self.f = open(file,"r")
|
self.f = open(file,"r")
|
||||||
self.index = 0
|
self.index = 0
|
||||||
self.backoff = 0
|
self.backoff = 0
|
||||||
|
self.cnv = nocnv
|
||||||
|
|
||||||
self.file_obj = window.get_widget("file")
|
self.file_obj = window.get_widget("file")
|
||||||
self.encoding_obj = window.get_widget("encoding")
|
self.encoding_obj = window.get_widget("encoding")
|
||||||
@ -201,7 +198,7 @@ class GedcomParser:
|
|||||||
|
|
||||||
def get_next(self):
|
def get_next(self):
|
||||||
if self.backoff == 0:
|
if self.backoff == 0:
|
||||||
self.text = _cnv(string.strip(self.f.readline()))
|
self.text = self.cnv(string.strip(self.f.readline()))
|
||||||
self.index = self.index + 1
|
self.index = self.index + 1
|
||||||
l = string.split(self.text, None, 2)
|
l = string.split(self.text, None, 2)
|
||||||
ln = len(l)
|
ln = len(l)
|
||||||
@ -260,11 +257,6 @@ class GedcomParser:
|
|||||||
self.parse_header_head()
|
self.parse_header_head()
|
||||||
self.parse_header_source()
|
self.parse_header_source()
|
||||||
|
|
||||||
#---------------------------------------------------------------------
|
|
||||||
#
|
|
||||||
#
|
|
||||||
#
|
|
||||||
#---------------------------------------------------------------------
|
|
||||||
def parse_submitter(self):
|
def parse_submitter(self):
|
||||||
matches = self.get_next()
|
matches = self.get_next()
|
||||||
|
|
||||||
@ -382,15 +374,24 @@ class GedcomParser:
|
|||||||
if int(matches[0]) < level:
|
if int(matches[0]) < level:
|
||||||
self.backup()
|
self.backup()
|
||||||
return (mrel,frel)
|
return (mrel,frel)
|
||||||
|
# FTW
|
||||||
elif matches[1] == "_FREL":
|
elif matches[1] == "_FREL":
|
||||||
if string.lower(matches[2]) != "natural":
|
if string.lower(matches[2]) != "natural":
|
||||||
frel = string.capitalize(matches[2])
|
frel = string.capitalize(matches[2])
|
||||||
|
# FTW
|
||||||
elif matches[1] == "_MREL":
|
elif matches[1] == "_MREL":
|
||||||
if string.lower(matches[2]) != "natural":
|
if string.lower(matches[2]) != "natural":
|
||||||
mrel = matches[2]
|
mrel = matches[2]
|
||||||
elif matches[1] == "ADOP":
|
elif matches[1] == "ADOP":
|
||||||
mrel = "Adopted"
|
mrel = "Adopted"
|
||||||
frel = "Adopted"
|
frel = "Adopted"
|
||||||
|
# Legacy
|
||||||
|
elif matches[1] == "_STAT":
|
||||||
|
mrel = matches[2]
|
||||||
|
frel = matches[2]
|
||||||
|
# Legacy _PREF
|
||||||
|
elif matches[1][0] == "_":
|
||||||
|
pass
|
||||||
else:
|
else:
|
||||||
self.barf(level+1)
|
self.barf(level+1)
|
||||||
|
|
||||||
@ -416,21 +417,23 @@ class GedcomParser:
|
|||||||
mrel,frel = self.parse_ftw_relations(2)
|
mrel,frel = self.parse_ftw_relations(2)
|
||||||
child = self.db.findPerson(matches[2],self.pmap)
|
child = self.db.findPerson(matches[2],self.pmap)
|
||||||
self.family.addChild(child)
|
self.family.addChild(child)
|
||||||
if (mrel == "Birth" or mrel == "") and \
|
|
||||||
(frel == "Birth" or frel == "") :
|
for f in child.getAltFamilyList():
|
||||||
|
if f[0] == self.family:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
if (mrel=="Birth" or mrel=="") and (frel=="Birth" or frel==""):
|
||||||
child.setMainFamily(self.family)
|
child.setMainFamily(self.family)
|
||||||
else:
|
else:
|
||||||
if child.getMainFamily() == self.family:
|
if child.getMainFamily() == self.family:
|
||||||
child.setMainFamily(None)
|
child.setMainFamily(None)
|
||||||
child.addAltFamily(self.family,mrel,frel)
|
child.addAltFamily(self.family,mrel,frel)
|
||||||
elif matches[1] == "NCHI" or matches[1] == "RIN" or matches[1] == "SUBM":
|
elif matches[1] == "NCHI":
|
||||||
a = Attribute()
|
a = Attribute()
|
||||||
a.setType("Number of Children")
|
a.setType("Number of Children")
|
||||||
a.setValue(matches[2])
|
a.setValue(matches[2])
|
||||||
self.family.addAttribute(a)
|
self.family.addAttribute(a)
|
||||||
elif matches[1] == "RIN" or matches[1] == "SUBM":
|
elif matches[1] in ["RIN", "SUBM", "REFN","CHAN","SOUR"]:
|
||||||
pass
|
|
||||||
elif matches[1] in ["REFN","CHAN","SOUR"]:
|
|
||||||
self.ignore_sub_junk(2)
|
self.ignore_sub_junk(2)
|
||||||
elif matches[1] == "OBJE":
|
elif matches[1] == "OBJE":
|
||||||
if matches[2] and matches[2][0] == '@':
|
if matches[2] and matches[2][0] == '@':
|
||||||
@ -488,10 +491,16 @@ class GedcomParser:
|
|||||||
self.parse_name(name,2)
|
self.parse_name(name,2)
|
||||||
elif matches[1] == "_UID":
|
elif matches[1] == "_UID":
|
||||||
self.person.setPafUid(matches[2])
|
self.person.setPafUid(matches[2])
|
||||||
elif matches[1] in ["AFN","CHAN","REFN","SOUR"]:
|
elif matches[1] == "ALIA":
|
||||||
self.ignore_sub_junk(2)
|
aka = Name()
|
||||||
elif matches[1] in ["ALIA", "ANCI","DESI","RIN","RFN"]:
|
names = nameRegexp.match(matches[2]).groups()
|
||||||
pass
|
if names[0]:
|
||||||
|
aka.setFirstName(names[0])
|
||||||
|
if names[1]:
|
||||||
|
aka.setSurname(names[1])
|
||||||
|
if names[2]:
|
||||||
|
aka.setSuffix(names[2])
|
||||||
|
self.person.addAlternateName(aka)
|
||||||
elif matches[1] == "OBJE":
|
elif matches[1] == "OBJE":
|
||||||
if matches[2] and matches[2][0] == '@':
|
if matches[2] and matches[2][0] == '@':
|
||||||
self.barf(2)
|
self.barf(2)
|
||||||
@ -523,12 +532,19 @@ class GedcomParser:
|
|||||||
elif matches[1] == "FAMC":
|
elif matches[1] == "FAMC":
|
||||||
type,note = self.parse_famc_type(2)
|
type,note = self.parse_famc_type(2)
|
||||||
family = self.db.findFamily(matches[2],self.fmap)
|
family = self.db.findFamily(matches[2],self.fmap)
|
||||||
|
|
||||||
|
for f in self.person.getAltFamilyList():
|
||||||
|
if f[0] == family:
|
||||||
|
break
|
||||||
|
else:
|
||||||
if type == "" or type == "Birth":
|
if type == "" or type == "Birth":
|
||||||
if self.person.getMainFamily() == None:
|
if self.person.getMainFamily() == None:
|
||||||
self.person.setMainFamily(family)
|
self.person.setMainFamily(family)
|
||||||
else:
|
else:
|
||||||
self.person.addAltFamily(family,"Unknown","Unknown")
|
self.person.addAltFamily(family,"Unknown","Unknown")
|
||||||
else:
|
else:
|
||||||
|
if self.person.getMainFamily() == family:
|
||||||
|
self.person.setMainFamily(None)
|
||||||
self.person.addAltFamily(family,type,type)
|
self.person.addAltFamily(family,type,type)
|
||||||
elif matches[1] == "RESI":
|
elif matches[1] == "RESI":
|
||||||
addr = Address()
|
addr = Address()
|
||||||
@ -555,7 +571,7 @@ class GedcomParser:
|
|||||||
event = Event()
|
event = Event()
|
||||||
event.setName("Adopted")
|
event.setName("Adopted")
|
||||||
self.person.addEvent(event)
|
self.person.addEvent(event)
|
||||||
self.parse_person_event(event,2)
|
self.parse_adopt_event(event,2)
|
||||||
elif matches[1] == "DEAT":
|
elif matches[1] == "DEAT":
|
||||||
event = Event()
|
event = Event()
|
||||||
if self.person.getDeath().getDate() != "" or \
|
if self.person.getDeath().getDate() != "" or \
|
||||||
@ -577,6 +593,10 @@ class GedcomParser:
|
|||||||
self.person.addAttribute(attr)
|
self.person.addAttribute(attr)
|
||||||
else:
|
else:
|
||||||
self.person.addEvent(event)
|
self.person.addEvent(event)
|
||||||
|
elif matches[1] in ["AFN","CHAN","REFN","SOUR"]:
|
||||||
|
self.ignore_sub_junk(2)
|
||||||
|
elif matches[1] in ["ANCI","DESI","RIN","RFN"]:
|
||||||
|
pass
|
||||||
else:
|
else:
|
||||||
event = Event()
|
event = Event()
|
||||||
n = string.strip(matches[1])
|
n = string.strip(matches[1])
|
||||||
@ -624,7 +644,7 @@ class GedcomParser:
|
|||||||
self.backup()
|
self.backup()
|
||||||
return (string.capitalize(type),note)
|
return (string.capitalize(type),note)
|
||||||
elif matches[1] == "PEDI":
|
elif matches[1] == "PEDI":
|
||||||
type = string.capitalize(matches[2])
|
type = matches[2]
|
||||||
elif matches[1] == "_PRIMARY":
|
elif matches[1] == "_PRIMARY":
|
||||||
type = matches[1]
|
type = matches[1]
|
||||||
elif matches[1] == "NOTE":
|
elif matches[1] == "NOTE":
|
||||||
@ -858,28 +878,20 @@ class GedcomParser:
|
|||||||
source_ref.setBase(self.db.findSource(matches[2],self.smap))
|
source_ref.setBase(self.db.findSource(matches[2],self.smap))
|
||||||
self.parse_source_reference(source_ref,level+1)
|
self.parse_source_reference(source_ref,level+1)
|
||||||
event.addSourceRef(source_ref)
|
event.addSourceRef(source_ref)
|
||||||
elif matches[1] == "FAMC":
|
|
||||||
family = self.db.findFamily(matches[2],self.fmap)
|
|
||||||
if event.getName() == "Birth":
|
|
||||||
self.person.setMainFamily(family)
|
|
||||||
else:
|
|
||||||
type = string.capitalize(event.getName())
|
|
||||||
self.person.addAltFamily(family,type,type)
|
|
||||||
self.ignore_sub_junk(level+1)
|
|
||||||
elif matches[1] == "PLAC":
|
elif matches[1] == "PLAC":
|
||||||
val = matches[2]
|
val = matches[2]
|
||||||
n = string.strip(event.getName())
|
n = string.strip(event.getName())
|
||||||
if is_ftw and n in ["Occupation","Degree","SSN"]:
|
if self.is_ftw and n in ["Occupation","Degree","SSN"]:
|
||||||
event.setDescription(val)
|
event.setDescription(val)
|
||||||
self.ignore_sub_junk(level+1)
|
self.ignore_sub_junk(level+1)
|
||||||
else:
|
else:
|
||||||
if placemap.has_key(val):
|
if self.placemap.has_key(val):
|
||||||
place = placemap[val]
|
place = self.placemap[val]
|
||||||
else:
|
else:
|
||||||
place = Place()
|
place = Place()
|
||||||
place.set_title(matches[2])
|
place.set_title(matches[2])
|
||||||
self.db.addPlace(place)
|
self.db.addPlace(place)
|
||||||
placemap[val] = place
|
self.placemap[val] = place
|
||||||
event.setPlace(place)
|
event.setPlace(place)
|
||||||
self.ignore_sub_junk(level+1)
|
self.ignore_sub_junk(level+1)
|
||||||
elif matches[1] == "CAUS":
|
elif matches[1] == "CAUS":
|
||||||
@ -898,6 +910,87 @@ class GedcomParser:
|
|||||||
else:
|
else:
|
||||||
self.barf(level+1)
|
self.barf(level+1)
|
||||||
|
|
||||||
|
def parse_adopt_event(self,event,level):
|
||||||
|
note = ""
|
||||||
|
while 1:
|
||||||
|
matches = self.get_next()
|
||||||
|
if int(matches[0]) < level:
|
||||||
|
if note != "":
|
||||||
|
event.setNote(note)
|
||||||
|
self.backup()
|
||||||
|
break
|
||||||
|
elif matches[1] == "DATE":
|
||||||
|
event.setDate(matches[2])
|
||||||
|
elif matches[1] == ["TIME","ADDR","AGE","AGNC","STAT","TEMP","OBJE"]:
|
||||||
|
self.ignore_sub_junk(level+1)
|
||||||
|
elif matches[1] == "SOUR":
|
||||||
|
source_ref = SourceRef()
|
||||||
|
if matches[2] and matches[2][0] != "@":
|
||||||
|
self.localref = self.localref + 1
|
||||||
|
ref = "gsr%d" % self.localref
|
||||||
|
s = self.db.findSource(ref,self.smap)
|
||||||
|
source_ref.setBase(s)
|
||||||
|
s.setTitle('Imported Source #%d' % self.localref)
|
||||||
|
s.setNote(matches[2] + self.parse_continue_data(1))
|
||||||
|
self.ignore_sub_junk(2)
|
||||||
|
else:
|
||||||
|
source_ref.setBase(self.db.findSource(matches[2],self.smap))
|
||||||
|
self.parse_source_reference(source_ref,level+1)
|
||||||
|
event.addSourceRef(source_ref)
|
||||||
|
elif matches[1] == "FAMC":
|
||||||
|
family = self.db.findFamily(matches[2],self.fmap)
|
||||||
|
mrel,frel = self.parse_adopt_famc(level+1);
|
||||||
|
if self.person.getMainFamily() == family:
|
||||||
|
self.person.setMainFamily(None)
|
||||||
|
self.person.addAltFamily(family,mrel,frel)
|
||||||
|
elif matches[1] == "PLAC":
|
||||||
|
val = matches[2]
|
||||||
|
n = string.strip(event.getName())
|
||||||
|
if self.is_ftw and n in ["Occupation","Degree","SSN"]:
|
||||||
|
event.setDescription(val)
|
||||||
|
self.ignore_sub_junk(level+1)
|
||||||
|
else:
|
||||||
|
if self.placemap.has_key(val):
|
||||||
|
place = self.placemap[val]
|
||||||
|
else:
|
||||||
|
place = Place()
|
||||||
|
place.set_title(matches[2])
|
||||||
|
self.db.addPlace(place)
|
||||||
|
self.placemap[val] = place
|
||||||
|
event.setPlace(place)
|
||||||
|
self.ignore_sub_junk(level+1)
|
||||||
|
elif matches[1] == "CAUS":
|
||||||
|
info = matches[2] + self.parse_continue_data(level+1)
|
||||||
|
event.setCause(info)
|
||||||
|
elif matches[1] == "NOTE":
|
||||||
|
info = matches[2] + self.parse_continue_data(level+1)
|
||||||
|
if note == "":
|
||||||
|
note = info
|
||||||
|
else:
|
||||||
|
note = "\n%s" % info
|
||||||
|
elif matches[1] == "CONC":
|
||||||
|
event.setDescription( "%s %s" % (event.getDescription(), matches[2]))
|
||||||
|
elif matches[1] == "CONT":
|
||||||
|
event.setDescription("%s\n%s" % (event.getDescription(),matches[2]))
|
||||||
|
else:
|
||||||
|
self.barf(level+1)
|
||||||
|
|
||||||
|
def parse_adopt_famc(self,level):
|
||||||
|
mrel = "Adopted"
|
||||||
|
frel = "Adopted"
|
||||||
|
while 1:
|
||||||
|
matches = self.get_next()
|
||||||
|
if int(matches[0]) < level:
|
||||||
|
self.backup()
|
||||||
|
return (mrel,frel)
|
||||||
|
elif matches[1] == "ADOP":
|
||||||
|
if matches[2] == "HUSB":
|
||||||
|
mrel = "Birth"
|
||||||
|
elif matches[2] == "WIFE":
|
||||||
|
frel = "Birth"
|
||||||
|
else:
|
||||||
|
self.barf(level+1)
|
||||||
|
|
||||||
def parse_person_attr(self,attr,level):
|
def parse_person_attr(self,attr,level):
|
||||||
note = ""
|
note = ""
|
||||||
while 1:
|
while 1:
|
||||||
@ -982,13 +1075,13 @@ class GedcomParser:
|
|||||||
event.addSourceRef(source_ref)
|
event.addSourceRef(source_ref)
|
||||||
elif matches[1] == "PLAC":
|
elif matches[1] == "PLAC":
|
||||||
val = matches[2]
|
val = matches[2]
|
||||||
if placemap.has_key(val):
|
if self.placemap.has_key(val):
|
||||||
place = placemap[val]
|
place = self.placemap[val]
|
||||||
else:
|
else:
|
||||||
place = Place()
|
place = Place()
|
||||||
place.set_title(matches[2])
|
place.set_title(matches[2])
|
||||||
self.db.addPlace(place)
|
self.db.addPlace(place)
|
||||||
placemap[val] = place
|
self.placemap[val] = place
|
||||||
event.setPlace(place)
|
event.setPlace(place)
|
||||||
self.ignore_sub_junk(level+1)
|
self.ignore_sub_junk(level+1)
|
||||||
elif matches[1] == "NOTE":
|
elif matches[1] == "NOTE":
|
||||||
@ -1091,8 +1184,18 @@ class GedcomParser:
|
|||||||
name.setSurname(matches[2])
|
name.setSurname(matches[2])
|
||||||
elif matches[1] == "NSFX":
|
elif matches[1] == "NSFX":
|
||||||
name.setSuffix(matches[2])
|
name.setSuffix(matches[2])
|
||||||
elif matches[1] == "NICK" or matches[1] == "_AKA":
|
elif matches[1] == "NICK":
|
||||||
self.person.setNickName(matches[2])
|
self.person.setNickName(matches[2])
|
||||||
|
elif matches[1] == "_AKA":
|
||||||
|
lname = string.split(matches[2])
|
||||||
|
l = len(lname)
|
||||||
|
if l == 1:
|
||||||
|
self.person.setNickName(matches[2])
|
||||||
|
else:
|
||||||
|
name = Name()
|
||||||
|
name.setSurname(lname[-1])
|
||||||
|
name.setFirstName(string.join(lname[0:l-1]))
|
||||||
|
self.person.addAlternateName(name)
|
||||||
elif matches[1] == "SOUR":
|
elif matches[1] == "SOUR":
|
||||||
source_ref = SourceRef()
|
source_ref = SourceRef()
|
||||||
source_ref.setBase(self.db.findSource(matches[2],self.smap))
|
source_ref.setBase(self.db.findSource(matches[2],self.smap))
|
||||||
@ -1122,8 +1225,6 @@ class GedcomParser:
|
|||||||
self.index = self.index + 1
|
self.index = self.index + 1
|
||||||
|
|
||||||
def parse_header_source(self):
|
def parse_header_source(self):
|
||||||
global is_ftw
|
|
||||||
global _cnv
|
|
||||||
|
|
||||||
while 1:
|
while 1:
|
||||||
matches = self.get_next()
|
matches = self.get_next()
|
||||||
@ -1135,7 +1236,7 @@ class GedcomParser:
|
|||||||
if self.created_obj.get_text() == "":
|
if self.created_obj.get_text() == "":
|
||||||
self.update(self.created_obj,matches[2])
|
self.update(self.created_obj,matches[2])
|
||||||
if matches[2] == "FTW":
|
if matches[2] == "FTW":
|
||||||
is_ftw = 1
|
self.is_ftw = 1
|
||||||
elif matches[1] == "NAME":
|
elif matches[1] == "NAME":
|
||||||
self.update(self.created_obj,matches[2])
|
self.update(self.created_obj,matches[2])
|
||||||
elif matches[1] == "VERS":
|
elif matches[1] == "VERS":
|
||||||
@ -1158,10 +1259,10 @@ class GedcomParser:
|
|||||||
if matches[2] == "UNICODE" or matches[2] == "UTF-8" or \
|
if matches[2] == "UNICODE" or matches[2] == "UTF-8" or \
|
||||||
matches[2] == "UTF8":
|
matches[2] == "UTF8":
|
||||||
self.code = UNICODE
|
self.code = UNICODE
|
||||||
_cnv = latin_utf8.utf8_to_latin
|
self.cnv = latin_utf8.utf8_to_latin
|
||||||
elif matches[2] == "ANSEL":
|
elif matches[2] == "ANSEL":
|
||||||
self.code = ANSEL
|
self.code = ANSEL
|
||||||
_cnv = latin_ansel.ansel_to_latin
|
self.cnv = latin_ansel.ansel_to_latin
|
||||||
self.ignore_sub_junk(2)
|
self.ignore_sub_junk(2)
|
||||||
self.update(self.encoding_obj,matches[2])
|
self.update(self.encoding_obj,matches[2])
|
||||||
elif matches[1] == "GEDC":
|
elif matches[1] == "GEDC":
|
||||||
|
Loading…
Reference in New Issue
Block a user