Faster GEDCOM imports, source lists

svn: r448
This commit is contained in:
Don Allingham
2001-10-06 18:25:31 +00:00
parent 17ab259c4a
commit ebe4af348d
18 changed files with 738 additions and 560 deletions

View File

@ -48,6 +48,11 @@ glade_file = None
clear_data = 0
is_ftw = 0
def nocnv(s):
return s
_cnv = nocnv
photo_types = [ "jpeg", "bmp", "pict", "pntg", "tpic", "png", "gif",
"tiff", "pcx" ]
@ -69,6 +74,8 @@ lineRE = re.compile(r"\s*(\d+)\s+(\S+)\s*(.*)$")
headRE = re.compile(r"\s*(\d+)\s+HEAD")
nameRegexp = re.compile(r"([\S\s]*\S)?\s*/([^/]+)?/\s*,?\s*([\S]+)?")
placemap = {}
#-------------------------------------------------------------------------
#
#
@ -96,7 +103,9 @@ def find_file(fullname,altpath):
#
#
#-------------------------------------------------------------------------
def importData(database, filename):
global callback
global topDialog
global glade_file
@ -125,12 +134,17 @@ def importData(database, filename):
GnomeErrorDialog(_("%s could not be opened\n") % filename)
return
import time
t1 = time.time()
g.parse_gedcom_file()
t2 = time.time()
print t2-t1
statusTop.get_widget("close").set_sensitive(1)
utils.modified()
callback(1)
if callback:
callback(1)
#-------------------------------------------------------------------------
#
@ -183,9 +197,9 @@ class GedcomParser:
self.dir_path = os.path.dirname(file)
self.localref = 0
f = open(file,"r")
self.lines = f.readlines()
f.close()
self.f = open(file,"r")
self.index = 0
self.backoff = 0
self.file_obj = window.get_widget("file")
self.encoding_obj = window.get_widget("encoding")
@ -200,8 +214,6 @@ class GedcomParser:
self.error_text_obj.set_word_wrap(0)
self.update(self.file_obj,file)
self.index = 0
self.code = 0
#---------------------------------------------------------------------
@ -220,24 +232,27 @@ class GedcomParser:
#
#---------------------------------------------------------------------
def get_next(self):
line = string.replace(self.lines[self.index],'\r','')
if self.code == ANSEL:
line = latin_ansel.ansel_to_latin(line)
elif self.code == UNICODE:
line = latin_utf8.utf8_to_latin(line)
match = lineRE.match(line)
if not match:
msg = _("Warning: line %d was not understood, so it was ignored.") % self.index
self.error_text_obj.insert_defaults(msg)
msg = "\n\t%s\n" % self.lines[self.index-1]
self.error_text_obj.insert_defaults(msg)
self.error_count = self.error_count + 1
self.update(self.errors_obj,str(self.error_count))
match = lineRE.match("999 XXX XXX")
self.index = self.index + 1
return match.groups()
if self.backoff == 0:
self.text = _cnv(string.strip(self.f.readline()))
self.index = self.index + 1
l = string.split(self.text, None, 2)
ln = len(l)
try:
if ln == 2:
self.groups = (int(l[0]),l[1],"")
else:
self.groups = (int(l[0]),l[1],l[2])
except:
msg = _("Warning: line %d was not understood, so it was ignored.") % self.index
self.error_text_obj.insert_defaults(msg)
msg = "\n\t%s\n" % self.text
self.error_text_obj.insert_defaults(msg)
self.error_count = self.error_count + 1
self.update(self.errors_obj,str(self.error_count))
self.groups = (999, "XXX", "XXX")
self.backoff = 0
return self.groups
#---------------------------------------------------------------------
#
#
@ -246,7 +261,7 @@ class GedcomParser:
def barf(self,level):
msg = _("Warning: line %d was not understood, so it was ignored.") % self.index
self.error_text_obj.insert_defaults(msg)
msg = "\n\t%s\n" % self.lines[self.index-1]
msg = "\n\t%s\n" % self.text
self.error_text_obj.insert_defaults(msg)
self.error_count = self.error_count + 1
self.update(self.errors_obj,str(self.error_count))
@ -268,7 +283,7 @@ class GedcomParser:
#
#---------------------------------------------------------------------
def backup(self):
self.index = self.index - 1
self.backoff = 1
#---------------------------------------------------------------------
#
@ -296,7 +311,8 @@ class GedcomParser:
if matches[1] != "TRLR":
self.barf(0)
self.f.close()
#---------------------------------------------------------------------
#
#
@ -325,7 +341,7 @@ class GedcomParser:
def parse_submitter(self):
matches = self.get_next()
if matches[2] != "SUBN":
if matches[2] != "SUBM":
self.backup()
return
else:
@ -414,7 +430,6 @@ class GedcomParser:
noteobj.set(text + self.parse_continue_data(1))
self.parse_note_data(1)
elif matches[2] == "OBJE":
print "OBJE",matches[1]
self.ignore_sub_junk(1)
elif matches[1] == "TRLR":
self.backup()
@ -505,6 +520,11 @@ class GedcomParser:
child.setMainFamily(None)
child.addAltFamily(self.family,mrel,frel)
elif matches[1] == "NCHI" or matches[1] == "RIN" or matches[1] == "SUBM":
a = Attribute()
a.setType("Number of Children")
a.setValue(matches[2])
self.family.addAttribute(a)
elif matches[1] == "RIN" or matches[1] == "SUBM":
pass
elif matches[1] == "REFN" or matches[1] == "CHAN":
self.ignore_sub_junk(2)
@ -601,7 +621,9 @@ class GedcomParser:
self.nmap[matches[2]] = noteobj
self.person.setNoteObj(noteobj)
elif matches[1] == "SEX":
if matches[2][0] == "M":
if matches[2] == '':
self.person.setGender(Person.unknown)
elif matches[2][0] == "M":
self.person.setGender(Person.male)
else:
self.person.setGender(Person.female)
@ -838,7 +860,7 @@ class GedcomParser:
elif matches[1] == "SOUR":
source_ref = SourceRef()
source_ref.setBase(self.db.findSource(matches[2],self.smap))
address.setSourceRef(source_ref)
address.addSourceRef(source_ref)
self.parse_source_reference(source_ref,level+1)
elif matches[1] == "PLAC":
address.setStreet(matches[2])
@ -937,7 +959,7 @@ class GedcomParser:
else:
source_ref.setBase(self.db.findSource(matches[2],self.smap))
self.parse_source_reference(source_ref,level+1)
event.setSourceRef(source_ref)
event.addSourceRef(source_ref)
elif matches[1] == "FAMC":
family = self.db.findFamily(matches[2],self.fmap)
if event.getName() == "Birth":
@ -953,11 +975,8 @@ class GedcomParser:
event.setDescription(val)
self.ignore_sub_junk(level+1)
else:
place = None
for p in self.db.getPlaceMap().values():
if val == p.get_title():
place = p
break
if placemap.has_key(val):
place = placemap[val]
else:
place = Place()
place.set_title(matches[2])
@ -1028,14 +1047,11 @@ class GedcomParser:
else:
source_ref.setBase(self.db.findSource(matches[2],self.smap))
self.parse_source_reference(source_ref,level+1)
event.setSourceRef(source_ref)
event.addSourceRef(source_ref)
elif matches[1] == "PLAC":
val = matches[2]
place = None
for p in self.db.getPlaceMap().values():
if val == p.get_title():
place = p
break
if placemap.has_key(val):
place = placemap[val]
else:
place = Place()
place.set_title(matches[2])
@ -1145,7 +1161,7 @@ class GedcomParser:
elif matches[1] == "SOUR":
source_ref = SourceRef()
source_ref.setBase(self.db.findSource(matches[2],self.smap))
name.setSourceRef(source_ref)
name.addSourceRef(source_ref)
self.parse_source_reference(source_ref,level+1)
elif matches[1][0:4] == "NOTE":
if matches[2] and matches[2][0] != "@":
@ -1164,10 +1180,10 @@ class GedcomParser:
def parse_header_head(self):
"""validiates that this is a valid GEDCOM file"""
line = string.replace(self.lines[self.index],'\r','')
line = string.replace(self.f.readline(),'\r','')
match = headRE.search(line)
if not match:
raise GedcomParser.BadFile, self.lines[self.index]
raise GedcomParser.BadFile, line
self.index = self.index + 1
#---------------------------------------------------------------------
@ -1177,6 +1193,7 @@ class GedcomParser:
#---------------------------------------------------------------------
def parse_header_source(self):
global is_ftw
global _cnv
while 1:
matches = self.get_next()
@ -1211,8 +1228,10 @@ class GedcomParser:
if matches[2] == "UNICODE" or matches[2] == "UTF-8" or \
matches[2] == "UTF8":
self.code = UNICODE
_cnv = latin_utf8.utf8_to_latin
elif matches[2] == "ANSEL":
self.code = ANSEL
_cnv = latin_ansel.ansel_to_latin
self.ignore_sub_junk(2)
self.update(self.encoding_obj,matches[2])
elif matches[1] == "GEDC":
@ -1598,3 +1617,13 @@ def readData(database,active_person,cb):
from Plugins import register_import
register_import(readData,_("Import from GEDCOM"))
if __name__ == "__main__":
import profile
import sys
global db
glade_file = "plugins/gedcomimport.glade"
db = RelDataBase()
profile.run('importData(db,sys.argv[1])')