Tried to allow the parser to work under Python 2.X and SAX2

svn: r23
This commit is contained in:
Don Allingham 2001-05-17 23:08:27 +00:00
parent 0eb1e57435
commit 61e2601680
3 changed files with 129 additions and 67 deletions

View File

@ -26,6 +26,19 @@ import os
from xml.sax import handler from xml.sax import handler
#-------------------------------------------------------------------------
#
# Try to abstract SAX1 from SAX2
#
#-------------------------------------------------------------------------
try:
import xml.sax.saxexts
sax = 1
except:
sax = 2
from latin_utf8 import utf8_to_latin
#------------------------------------------------------------------------- #-------------------------------------------------------------------------
# #
# Remove extraneous spaces # Remove extraneous spaces
@ -201,7 +214,7 @@ class GrampsParser(handler.ContentHandler):
#--------------------------------------------------------------------- #---------------------------------------------------------------------
def start_bmark(self,attrs): def start_bmark(self,attrs):
if self.is_import: if self.is_import:
person = self.database.findPerson("x" + attrs["ref"],self.pmap) person = self.database.findPerson("x%s" % attrs["ref"],self.pmap)
else: else:
person = self.database.findPersonNoMap(attrs["ref"]) person = self.database.findPersonNoMap(attrs["ref"])
self.database.bookmarks.append(person) self.database.bookmarks.append(person)
@ -216,7 +229,7 @@ class GrampsParser(handler.ContentHandler):
self.callback(float(self.count)/float(self.entries)) self.callback(float(self.count)/float(self.entries))
self.count = self.count + 1 self.count = self.count + 1
if self.is_import: if self.is_import:
self.person = self.database.findPerson("x" + attrs["id"],self.pmap) self.person = self.database.findPerson("x%s" % attrs["id"],self.pmap)
else: else:
self.person = self.database.findPersonNoMap(attrs["id"]) self.person = self.database.findPersonNoMap(attrs["id"])
@ -239,7 +252,7 @@ class GrampsParser(handler.ContentHandler):
#--------------------------------------------------------------------- #---------------------------------------------------------------------
def start_father(self,attrs): def start_father(self,attrs):
if self.is_import: if self.is_import:
father = self.database.findPerson("x" + attrs["ref"],self.pmap) father = self.database.findPerson("x%s" % attrs["ref"],self.pmap)
else: else:
father = self.database.findPersonNoMap(attrs["ref"]) father = self.database.findPersonNoMap(attrs["ref"])
self.family.setFather(father) self.family.setFather(father)
@ -251,7 +264,7 @@ class GrampsParser(handler.ContentHandler):
#--------------------------------------------------------------------- #---------------------------------------------------------------------
def start_mother(self,attrs): def start_mother(self,attrs):
if self.is_import: if self.is_import:
mother = self.database.findPerson("x" + attrs["ref"],self.pmap) mother = self.database.findPerson("x%s" % attrs["ref"],self.pmap)
else: else:
mother = self.database.findPersonNoMap(attrs["ref"]) mother = self.database.findPersonNoMap(attrs["ref"])
self.family.setMother(mother) self.family.setMother(mother)
@ -263,7 +276,7 @@ class GrampsParser(handler.ContentHandler):
#--------------------------------------------------------------------- #---------------------------------------------------------------------
def start_child(self,attrs): def start_child(self,attrs):
if self.is_import: if self.is_import:
child = self.database.findPerson("x" + attrs["ref"],self.pmap) child = self.database.findPerson("x%s" % attrs["ref"],self.pmap)
else: else:
child = self.database.findPersonNoMap(attrs["ref"]) child = self.database.findPersonNoMap(attrs["ref"])
self.family.addChild(child) self.family.addChild(child)
@ -277,14 +290,16 @@ class GrampsParser(handler.ContentHandler):
if not attrs.has_key("href"): if not attrs.has_key("href"):
return return
if attrs.has_key("description"): try:
desc = attrs["description"] desc = attrs["description"]
else: except KeyError:
desc = "" desc = ""
url = Url(attrs["href"],desc) try:
url = Url(attrs["href"],desc)
self.person.addUrl(url) self.person.addUrl(url)
except KeyError:
return
#--------------------------------------------------------------------- #---------------------------------------------------------------------
# #
@ -753,12 +768,10 @@ class GrampsParser(handler.ContentHandler):
#--------------------------------------------------------------------- #---------------------------------------------------------------------
def startElement(self,tag,attrs): def startElement(self,tag,attrs):
self.data[tag] = ""
self.active = tag self.active = tag
self.data[tag] = ""
if GrampsParser.start.has_key(tag): if GrampsParser.start.has_key(tag):
GrampsParser.start[tag](self,attrs) GrampsParser.start[tag](self,attrs)
return
#--------------------------------------------------------------------- #---------------------------------------------------------------------
# #
@ -766,16 +779,23 @@ class GrampsParser(handler.ContentHandler):
# #
#--------------------------------------------------------------------- #---------------------------------------------------------------------
def endElement(self,tag): def endElement(self,tag):
import latin_utf8
if GrampsParser.stop.has_key(tag): if GrampsParser.stop.has_key(tag):
data = latin_utf8.utf8_to_latin(self.data[tag]) if sax == 1:
data = utf8_to_latin(self.data[tag])
else:
data = self.data[tag]
GrampsParser.stop[tag](self,data) GrampsParser.stop[tag](self,data)
if sax == 1:
def characters(self, data, offset, length):
self.data[self.active] = self.data[self.active] + data
else:
def characters(self, data):
self.data[self.active] = self.data[self.active] + data
#--------------------------------------------------------------------- #---------------------------------------------------------------------
# #
# #
# #
#--------------------------------------------------------------------- #---------------------------------------------------------------------
def characters(self, data, offset, length):
self.data[self.active] = self.data[self.active] + data

View File

@ -21,6 +21,8 @@
from RelLib import * from RelLib import *
from GrampsParser import * from GrampsParser import *
import intl import intl
_ = intl.gettext
import string import string
import time import time
@ -29,11 +31,19 @@ import os
from gnome.ui import * from gnome.ui import *
import xml.sax import xml.sax
import xml.sax.saxexts
import xml.sax.saxutils import xml.sax.saxutils
import xml.parsers.expat
_ = intl.gettext #-------------------------------------------------------------------------
#
# Try to abstract SAX1 from SAX2
#
#-------------------------------------------------------------------------
try:
import xml.sax.saxexts
sax = 1
except:
from codecs import *
sax = 2
#------------------------------------------------------------------------- #-------------------------------------------------------------------------
# #
@ -43,15 +53,23 @@ _ = intl.gettext
#------------------------------------------------------------------------- #-------------------------------------------------------------------------
def importData(database, filename, callback): def importData(database, filename, callback):
parser = xml.sax.saxexts.make_parser()
basefile = os.path.dirname(filename) basefile = os.path.dirname(filename)
database.smap = {} database.smap = {}
database.pmap = {} database.pmap = {}
database.fmap = {} database.fmap = {}
parser.setDocumentHandler(GrampsParser(database,callback,basefile,1))
xml_file = gzip.open(filename,"rb")
parser.parseFile(xml_file) if sax == 1:
parser = xml.sax.saxexts.make_parser()
parser.setDocumentHandler(GrampsParser(database,callback,basefile,1))
parser.setErrorHandler(xml.sax.saxutils.ErrorRaiser())
xml_file = gzip.open(filename,"rb")
parser.parseFile(xml_file)
else:
parser = xml.sax.make_parser()
parser.setContentHandler(GrampsParser(database,callback,basefile,1))
xml_file = EncodedFile(gzip.open(filename,"rb"),'utf-8','latin-1')
parser.parse(xml_file)
xml_file.close() xml_file.close()
#------------------------------------------------------------------------- #-------------------------------------------------------------------------
@ -62,19 +80,26 @@ def importData(database, filename, callback):
#------------------------------------------------------------------------- #-------------------------------------------------------------------------
def loadData(database, filename, callback): def loadData(database, filename, callback):
parser = xml.sax.saxexts.make_parser()
basefile = os.path.dirname(filename) basefile = os.path.dirname(filename)
database.smap = {} database.smap = {}
database.pmap = {} database.pmap = {}
database.fmap = {} database.fmap = {}
parser.setErrorHandler(xml.sax.saxutils.ErrorRaiser())
parser.setDocumentHandler(GrampsParser(database,callback,basefile,0))
filename = os.path.normpath(filename) filename = os.path.normpath(filename)
if sax == 1:
parser = xml.sax.saxexts.make_parser()
parser.setDocumentHandler(GrampsParser(database,callback,basefile,0))
parser.setErrorHandler(xml.sax.saxutils.ErrorRaiser())
else:
parser = xml.sax.make_parser()
parser.setContentHandler(GrampsParser(database,callback,basefile,0))
try: try:
xml_file = gzip.open(filename,"rb") if sax == 1:
xml_file = gzip.open(filename,"rb")
else:
xml_file = EncodedFile(gzip.open(filename,"rb"),'utf-8','latin-1')
except IOError,msg: except IOError,msg:
GnomeErrorDialog(filename + _(" could not be opened\n") + str(msg)) GnomeErrorDialog(filename + _(" could not be opened\n") + str(msg))
return 0 return 0
@ -83,13 +108,13 @@ def loadData(database, filename, callback):
return 0 return 0
try: try:
parser.parseFile(xml_file) if sax == 1:
parser.parseFile(xml_file)
else:
parser.parse(xml_file)
except xml.sax.SAXParseException: except xml.sax.SAXParseException:
GnomeErrorDialog(filename + _(" is a corrupt file")) GnomeErrorDialog(filename + _(" is a corrupt file"))
return 0 return 0
except xml.parsers.expat.ExpatError:
GnomeErrorDialog(filename + _(" is a corrupt file"))
return 0
except IOError,msg: except IOError,msg:
GnomeErrorDialog(filename + _(" is not a valid gramps file\n") + \ GnomeErrorDialog(filename + _(" is not a valid gramps file\n") + \
str(msg)) str(msg))
@ -97,12 +122,29 @@ def loadData(database, filename, callback):
except: except:
GnomeErrorDialog(_("Could not read ") + filename) GnomeErrorDialog(_("Could not read ") + filename)
return 0 return 0
xml_file.close() xml_file.close()
return 1 return 1
if __name__ == "__main__":
import sys
import time
import profile
def lcb(val):
pass
db = RelDataBase()
file = sys.argv[1]
t1 = time.time()
loadData(db,file,lcb)
t2 = time.time()
print t2 - t1

View File

@ -1,34 +1,34 @@
from xml.unicode.utf8_iso import utf8_to_code, code_to_utf8
import cStringIO import cStringIO
def utf8_to_latin(s): try:
buff = cStringIO.StringIO() from xml.unicode.utf8_iso import code_to_utf8
while s: from xml.unicode.iso8859 import UTF8String
try:
head,s = utf8_to_code(1,s)
except Exception,e:
from traceback import print_exc
print_exc()
head = ''
s = s[1:0]
buff.write(head)
ans = buff.getvalue()
buff.close()
return ans
def latin_to_utf8(s): def utf8_to_latin(s):
buff = cStringIO.StringIO() y = UTF8String(s)
for c in s: return y.encode("iso-8859-1")
try:
cv = code_to_utf8(1,c) def latin_to_utf8(s):
except Exception,e: buff = cStringIO.StringIO()
from traceback import print_exc for c in s:
print_exc() try:
cv = '' cv = code_to_utf8(1,c)
buff.write(cv) except Exception,e:
ans = buff.getvalue() from traceback import print_exc
buff.close() print_exc()
return ans cv = ''
buff.write(cv)
ans = buff.getvalue()
buff.close()
return ans
except:
def utf8_to_latin(s):
return s
def latin_to_utf8(s):
return s