diff --git a/gramps/src/GrampsParser.py b/gramps/src/GrampsParser.py index c69028a75..2338aee81 100644 --- a/gramps/src/GrampsParser.py +++ b/gramps/src/GrampsParser.py @@ -26,6 +26,19 @@ import os from xml.sax import handler +#------------------------------------------------------------------------- +# +# Try to abstract SAX1 from SAX2 +# +#------------------------------------------------------------------------- +try: + import xml.sax.saxexts + sax = 1 +except: + sax = 2 + +from latin_utf8 import utf8_to_latin + #------------------------------------------------------------------------- # # Remove extraneous spaces @@ -201,7 +214,7 @@ class GrampsParser(handler.ContentHandler): #--------------------------------------------------------------------- def start_bmark(self,attrs): if self.is_import: - person = self.database.findPerson("x" + attrs["ref"],self.pmap) + person = self.database.findPerson("x%s" % attrs["ref"],self.pmap) else: person = self.database.findPersonNoMap(attrs["ref"]) self.database.bookmarks.append(person) @@ -216,7 +229,7 @@ class GrampsParser(handler.ContentHandler): self.callback(float(self.count)/float(self.entries)) self.count = self.count + 1 if self.is_import: - self.person = self.database.findPerson("x" + attrs["id"],self.pmap) + self.person = self.database.findPerson("x%s" % attrs["id"],self.pmap) else: self.person = self.database.findPersonNoMap(attrs["id"]) @@ -239,7 +252,7 @@ class GrampsParser(handler.ContentHandler): #--------------------------------------------------------------------- def start_father(self,attrs): if self.is_import: - father = self.database.findPerson("x" + attrs["ref"],self.pmap) + father = self.database.findPerson("x%s" % attrs["ref"],self.pmap) else: father = self.database.findPersonNoMap(attrs["ref"]) self.family.setFather(father) @@ -251,7 +264,7 @@ class GrampsParser(handler.ContentHandler): #--------------------------------------------------------------------- def start_mother(self,attrs): if self.is_import: - mother = self.database.findPerson("x" + attrs["ref"],self.pmap) + mother = self.database.findPerson("x%s" % attrs["ref"],self.pmap) else: mother = self.database.findPersonNoMap(attrs["ref"]) self.family.setMother(mother) @@ -263,7 +276,7 @@ class GrampsParser(handler.ContentHandler): #--------------------------------------------------------------------- def start_child(self,attrs): if self.is_import: - child = self.database.findPerson("x" + attrs["ref"],self.pmap) + child = self.database.findPerson("x%s" % attrs["ref"],self.pmap) else: child = self.database.findPersonNoMap(attrs["ref"]) self.family.addChild(child) @@ -277,14 +290,16 @@ class GrampsParser(handler.ContentHandler): if not attrs.has_key("href"): return - if attrs.has_key("description"): + try: desc = attrs["description"] - else: + except KeyError: desc = "" - - url = Url(attrs["href"],desc) - - self.person.addUrl(url) + + try: + url = Url(attrs["href"],desc) + self.person.addUrl(url) + except KeyError: + return #--------------------------------------------------------------------- # @@ -753,12 +768,10 @@ class GrampsParser(handler.ContentHandler): #--------------------------------------------------------------------- def startElement(self,tag,attrs): - self.data[tag] = "" self.active = tag - + self.data[tag] = "" if GrampsParser.start.has_key(tag): GrampsParser.start[tag](self,attrs) - return #--------------------------------------------------------------------- # @@ -766,16 +779,23 @@ class GrampsParser(handler.ContentHandler): # #--------------------------------------------------------------------- def endElement(self,tag): - import latin_utf8 - + if GrampsParser.stop.has_key(tag): - data = latin_utf8.utf8_to_latin(self.data[tag]) + if sax == 1: + data = utf8_to_latin(self.data[tag]) + else: + data = self.data[tag] GrampsParser.stop[tag](self,data) + if sax == 1: + def characters(self, data, offset, length): + self.data[self.active] = self.data[self.active] + data + else: + def characters(self, data): + self.data[self.active] = self.data[self.active] + data + #--------------------------------------------------------------------- # # # #--------------------------------------------------------------------- - def characters(self, data, offset, length): - self.data[self.active] = self.data[self.active] + data diff --git a/gramps/src/ReadXML.py b/gramps/src/ReadXML.py index 03cf8fab7..3cb8e08b3 100644 --- a/gramps/src/ReadXML.py +++ b/gramps/src/ReadXML.py @@ -21,6 +21,8 @@ from RelLib import * from GrampsParser import * import intl +_ = intl.gettext + import string import time @@ -29,11 +31,19 @@ import os from gnome.ui import * import xml.sax -import xml.sax.saxexts import xml.sax.saxutils -import xml.parsers.expat -_ = intl.gettext +#------------------------------------------------------------------------- +# +# Try to abstract SAX1 from SAX2 +# +#------------------------------------------------------------------------- +try: + import xml.sax.saxexts + sax = 1 +except: + from codecs import * + sax = 2 #------------------------------------------------------------------------- # @@ -43,15 +53,23 @@ _ = intl.gettext #------------------------------------------------------------------------- def importData(database, filename, callback): - parser = xml.sax.saxexts.make_parser() basefile = os.path.dirname(filename) database.smap = {} database.pmap = {} database.fmap = {} - parser.setDocumentHandler(GrampsParser(database,callback,basefile,1)) - xml_file = gzip.open(filename,"rb") - parser.parseFile(xml_file) + if sax == 1: + parser = xml.sax.saxexts.make_parser() + parser.setDocumentHandler(GrampsParser(database,callback,basefile,1)) + parser.setErrorHandler(xml.sax.saxutils.ErrorRaiser()) + xml_file = gzip.open(filename,"rb") + parser.parseFile(xml_file) + else: + parser = xml.sax.make_parser() + parser.setContentHandler(GrampsParser(database,callback,basefile,1)) + xml_file = EncodedFile(gzip.open(filename,"rb"),'utf-8','latin-1') + parser.parse(xml_file) + xml_file.close() #------------------------------------------------------------------------- @@ -62,19 +80,26 @@ def importData(database, filename, callback): #------------------------------------------------------------------------- def loadData(database, filename, callback): - parser = xml.sax.saxexts.make_parser() - basefile = os.path.dirname(filename) database.smap = {} database.pmap = {} database.fmap = {} - parser.setErrorHandler(xml.sax.saxutils.ErrorRaiser()) - parser.setDocumentHandler(GrampsParser(database,callback,basefile,0)) filename = os.path.normpath(filename) + if sax == 1: + parser = xml.sax.saxexts.make_parser() + parser.setDocumentHandler(GrampsParser(database,callback,basefile,0)) + parser.setErrorHandler(xml.sax.saxutils.ErrorRaiser()) + else: + parser = xml.sax.make_parser() + parser.setContentHandler(GrampsParser(database,callback,basefile,0)) + try: - xml_file = gzip.open(filename,"rb") + if sax == 1: + xml_file = gzip.open(filename,"rb") + else: + xml_file = EncodedFile(gzip.open(filename,"rb"),'utf-8','latin-1') except IOError,msg: GnomeErrorDialog(filename + _(" could not be opened\n") + str(msg)) return 0 @@ -83,13 +108,13 @@ def loadData(database, filename, callback): return 0 try: - parser.parseFile(xml_file) + if sax == 1: + parser.parseFile(xml_file) + else: + parser.parse(xml_file) except xml.sax.SAXParseException: GnomeErrorDialog(filename + _(" is a corrupt file")) return 0 - except xml.parsers.expat.ExpatError: - GnomeErrorDialog(filename + _(" is a corrupt file")) - return 0 except IOError,msg: GnomeErrorDialog(filename + _(" is not a valid gramps file\n") + \ str(msg)) @@ -97,12 +122,29 @@ def loadData(database, filename, callback): except: GnomeErrorDialog(_("Could not read ") + filename) return 0 - + + xml_file.close() return 1 - +if __name__ == "__main__": + + import sys + import time + import profile + + def lcb(val): + pass + + db = RelDataBase() + file = sys.argv[1] + + t1 = time.time() + + loadData(db,file,lcb) + t2 = time.time() + print t2 - t1 diff --git a/gramps/src/latin_utf8.py b/gramps/src/latin_utf8.py index c631d4fa3..a954d2f61 100644 --- a/gramps/src/latin_utf8.py +++ b/gramps/src/latin_utf8.py @@ -1,34 +1,34 @@ - -from xml.unicode.utf8_iso import utf8_to_code, code_to_utf8 - import cStringIO -def utf8_to_latin(s): - buff = cStringIO.StringIO() - while s: - try: - head,s = utf8_to_code(1,s) - except Exception,e: - from traceback import print_exc - print_exc() - head = '' - s = s[1:0] - buff.write(head) - ans = buff.getvalue() - buff.close() - return ans +try: + from xml.unicode.utf8_iso import code_to_utf8 + from xml.unicode.iso8859 import UTF8String -def latin_to_utf8(s): - buff = cStringIO.StringIO() - for c in s: - try: - cv = code_to_utf8(1,c) - except Exception,e: - from traceback import print_exc - print_exc() - cv = '' - buff.write(cv) - ans = buff.getvalue() - buff.close() - return ans + def utf8_to_latin(s): + y = UTF8String(s) + return y.encode("iso-8859-1") + + def latin_to_utf8(s): + buff = cStringIO.StringIO() + for c in s: + try: + cv = code_to_utf8(1,c) + except Exception,e: + from traceback import print_exc + print_exc() + cv = '' + buff.write(cv) + ans = buff.getvalue() + buff.close() + return ans + +except: + + def utf8_to_latin(s): + return s + + def latin_to_utf8(s): + return s + +