Tried to allow the parser to work under Python 2.X and SAX2

svn: r23
This commit is contained in:
Don Allingham 2001-05-17 23:08:27 +00:00
parent 0eb1e57435
commit 61e2601680
3 changed files with 129 additions and 67 deletions

View File

@ -26,6 +26,19 @@ import os
from xml.sax import handler
#-------------------------------------------------------------------------
#
# Try to abstract SAX1 from SAX2
#
#-------------------------------------------------------------------------
try:
import xml.sax.saxexts
sax = 1
except:
sax = 2
from latin_utf8 import utf8_to_latin
#-------------------------------------------------------------------------
#
# Remove extraneous spaces
@ -201,7 +214,7 @@ class GrampsParser(handler.ContentHandler):
#---------------------------------------------------------------------
def start_bmark(self,attrs):
if self.is_import:
person = self.database.findPerson("x" + attrs["ref"],self.pmap)
person = self.database.findPerson("x%s" % attrs["ref"],self.pmap)
else:
person = self.database.findPersonNoMap(attrs["ref"])
self.database.bookmarks.append(person)
@ -216,7 +229,7 @@ class GrampsParser(handler.ContentHandler):
self.callback(float(self.count)/float(self.entries))
self.count = self.count + 1
if self.is_import:
self.person = self.database.findPerson("x" + attrs["id"],self.pmap)
self.person = self.database.findPerson("x%s" % attrs["id"],self.pmap)
else:
self.person = self.database.findPersonNoMap(attrs["id"])
@ -239,7 +252,7 @@ class GrampsParser(handler.ContentHandler):
#---------------------------------------------------------------------
def start_father(self,attrs):
if self.is_import:
father = self.database.findPerson("x" + attrs["ref"],self.pmap)
father = self.database.findPerson("x%s" % attrs["ref"],self.pmap)
else:
father = self.database.findPersonNoMap(attrs["ref"])
self.family.setFather(father)
@ -251,7 +264,7 @@ class GrampsParser(handler.ContentHandler):
#---------------------------------------------------------------------
def start_mother(self,attrs):
if self.is_import:
mother = self.database.findPerson("x" + attrs["ref"],self.pmap)
mother = self.database.findPerson("x%s" % attrs["ref"],self.pmap)
else:
mother = self.database.findPersonNoMap(attrs["ref"])
self.family.setMother(mother)
@ -263,7 +276,7 @@ class GrampsParser(handler.ContentHandler):
#---------------------------------------------------------------------
def start_child(self,attrs):
if self.is_import:
child = self.database.findPerson("x" + attrs["ref"],self.pmap)
child = self.database.findPerson("x%s" % attrs["ref"],self.pmap)
else:
child = self.database.findPersonNoMap(attrs["ref"])
self.family.addChild(child)
@ -277,14 +290,16 @@ class GrampsParser(handler.ContentHandler):
if not attrs.has_key("href"):
return
if attrs.has_key("description"):
try:
desc = attrs["description"]
else:
except KeyError:
desc = ""
url = Url(attrs["href"],desc)
self.person.addUrl(url)
try:
url = Url(attrs["href"],desc)
self.person.addUrl(url)
except KeyError:
return
#---------------------------------------------------------------------
#
@ -753,12 +768,10 @@ class GrampsParser(handler.ContentHandler):
#---------------------------------------------------------------------
def startElement(self,tag,attrs):
self.data[tag] = ""
self.active = tag
self.data[tag] = ""
if GrampsParser.start.has_key(tag):
GrampsParser.start[tag](self,attrs)
return
#---------------------------------------------------------------------
#
@ -766,16 +779,23 @@ class GrampsParser(handler.ContentHandler):
#
#---------------------------------------------------------------------
def endElement(self,tag):
import latin_utf8
if GrampsParser.stop.has_key(tag):
data = latin_utf8.utf8_to_latin(self.data[tag])
if sax == 1:
data = utf8_to_latin(self.data[tag])
else:
data = self.data[tag]
GrampsParser.stop[tag](self,data)
if sax == 1:
def characters(self, data, offset, length):
self.data[self.active] = self.data[self.active] + data
else:
def characters(self, data):
self.data[self.active] = self.data[self.active] + data
#---------------------------------------------------------------------
#
#
#
#---------------------------------------------------------------------
def characters(self, data, offset, length):
self.data[self.active] = self.data[self.active] + data

View File

@ -21,6 +21,8 @@
from RelLib import *
from GrampsParser import *
import intl
_ = intl.gettext
import string
import time
@ -29,11 +31,19 @@ import os
from gnome.ui import *
import xml.sax
import xml.sax.saxexts
import xml.sax.saxutils
import xml.parsers.expat
_ = intl.gettext
#-------------------------------------------------------------------------
#
# Try to abstract SAX1 from SAX2
#
#-------------------------------------------------------------------------
try:
import xml.sax.saxexts
sax = 1
except:
from codecs import *
sax = 2
#-------------------------------------------------------------------------
#
@ -43,15 +53,23 @@ _ = intl.gettext
#-------------------------------------------------------------------------
def importData(database, filename, callback):
parser = xml.sax.saxexts.make_parser()
basefile = os.path.dirname(filename)
database.smap = {}
database.pmap = {}
database.fmap = {}
parser.setDocumentHandler(GrampsParser(database,callback,basefile,1))
xml_file = gzip.open(filename,"rb")
parser.parseFile(xml_file)
if sax == 1:
parser = xml.sax.saxexts.make_parser()
parser.setDocumentHandler(GrampsParser(database,callback,basefile,1))
parser.setErrorHandler(xml.sax.saxutils.ErrorRaiser())
xml_file = gzip.open(filename,"rb")
parser.parseFile(xml_file)
else:
parser = xml.sax.make_parser()
parser.setContentHandler(GrampsParser(database,callback,basefile,1))
xml_file = EncodedFile(gzip.open(filename,"rb"),'utf-8','latin-1')
parser.parse(xml_file)
xml_file.close()
#-------------------------------------------------------------------------
@ -62,19 +80,26 @@ def importData(database, filename, callback):
#-------------------------------------------------------------------------
def loadData(database, filename, callback):
parser = xml.sax.saxexts.make_parser()
basefile = os.path.dirname(filename)
database.smap = {}
database.pmap = {}
database.fmap = {}
parser.setErrorHandler(xml.sax.saxutils.ErrorRaiser())
parser.setDocumentHandler(GrampsParser(database,callback,basefile,0))
filename = os.path.normpath(filename)
if sax == 1:
parser = xml.sax.saxexts.make_parser()
parser.setDocumentHandler(GrampsParser(database,callback,basefile,0))
parser.setErrorHandler(xml.sax.saxutils.ErrorRaiser())
else:
parser = xml.sax.make_parser()
parser.setContentHandler(GrampsParser(database,callback,basefile,0))
try:
xml_file = gzip.open(filename,"rb")
if sax == 1:
xml_file = gzip.open(filename,"rb")
else:
xml_file = EncodedFile(gzip.open(filename,"rb"),'utf-8','latin-1')
except IOError,msg:
GnomeErrorDialog(filename + _(" could not be opened\n") + str(msg))
return 0
@ -83,13 +108,13 @@ def loadData(database, filename, callback):
return 0
try:
parser.parseFile(xml_file)
if sax == 1:
parser.parseFile(xml_file)
else:
parser.parse(xml_file)
except xml.sax.SAXParseException:
GnomeErrorDialog(filename + _(" is a corrupt file"))
return 0
except xml.parsers.expat.ExpatError:
GnomeErrorDialog(filename + _(" is a corrupt file"))
return 0
except IOError,msg:
GnomeErrorDialog(filename + _(" is not a valid gramps file\n") + \
str(msg))
@ -98,11 +123,28 @@ def loadData(database, filename, callback):
GnomeErrorDialog(_("Could not read ") + filename)
return 0
xml_file.close()
return 1
if __name__ == "__main__":
import sys
import time
import profile
def lcb(val):
pass
db = RelDataBase()
file = sys.argv[1]
t1 = time.time()
loadData(db,file,lcb)
t2 = time.time()
print t2 - t1

View File

@ -1,34 +1,34 @@
from xml.unicode.utf8_iso import utf8_to_code, code_to_utf8
import cStringIO
def utf8_to_latin(s):
buff = cStringIO.StringIO()
while s:
try:
head,s = utf8_to_code(1,s)
except Exception,e:
from traceback import print_exc
print_exc()
head = ''
s = s[1:0]
buff.write(head)
ans = buff.getvalue()
buff.close()
return ans
try:
from xml.unicode.utf8_iso import code_to_utf8
from xml.unicode.iso8859 import UTF8String
def utf8_to_latin(s):
y = UTF8String(s)
return y.encode("iso-8859-1")
def latin_to_utf8(s):
buff = cStringIO.StringIO()
for c in s:
try:
cv = code_to_utf8(1,c)
except Exception,e:
from traceback import print_exc
print_exc()
cv = ''
buff.write(cv)
ans = buff.getvalue()
buff.close()
return ans
except:
def utf8_to_latin(s):
return s
def latin_to_utf8(s):
return s
def latin_to_utf8(s):
buff = cStringIO.StringIO()
for c in s:
try:
cv = code_to_utf8(1,c)
except Exception,e:
from traceback import print_exc
print_exc()
cv = ''
buff.write(cv)
ans = buff.getvalue()
buff.close()
return ans