diff --git a/src/GrampsDbUtils/Makefile.am b/src/GrampsDbUtils/Makefile.am index 90c460bb1..ba4e97852 100644 --- a/src/GrampsDbUtils/Makefile.am +++ b/src/GrampsDbUtils/Makefile.am @@ -6,7 +6,6 @@ pkgdatadir = $(datadir)/@PACKAGE@/GrampsDbUtils pkgdata_PYTHON = \ - _GedcomChar.py\ _GedcomInfo.py\ _GedcomLex.py\ _GedcomParse.py\ diff --git a/src/GrampsDbUtils/_GedcomChar.py b/src/GrampsDbUtils/_GedcomChar.py deleted file mode 100644 index 7efc8abc8..000000000 --- a/src/GrampsDbUtils/_GedcomChar.py +++ /dev/null @@ -1,79 +0,0 @@ -# -# Gramps - a GTK+/GNOME based genealogy program -# -# Copyright (C) 2000-2005 Donald N. Allingham -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -# - -from ansel_utf8 import ansel_to_utf8 -import codecs - -class BaseReader(object): - def __init__(self, ifile, encoding): - self.ifile = ifile - self.enc = encoding - - def reset(self): - self.ifile.seek(0) - - def readline(self): - return unicode(self.ifile.readline(), - encoding=self.enc, - errors='replace') - -class UTF8Reader(BaseReader): - - def __init__(self, ifile): - BaseReader.__init__(self, ifile, 'utf8') - self.reset() - - def reset(self): - self.ifile.seek(0) - data = self.ifile.read(3) - if data != "\xef\xbb\xbf": - self.ifile.seek(0) - - def readline(self): - return unicode(self.ifile.readline(), - encoding=self.enc, - errors='replace') - -class UTF16Reader(BaseReader): - - def __init__(self, ifile): - new_file = codecs.EncodedFile(ifile, 'utf8', 'utf16') - BaseReader.__init__(self, new_file, 'utf16') - self.reset() - - def readline(self): - l = self.ifile.readline() - if l.strip(): - return l - else: - return self.ifile.readline() - -class AnsiReader(BaseReader): - - def __init__(self, ifile): - BaseReader.__init__(self, ifile, 'latin1') - -class AnselReader(BaseReader): - - def __init__(self, ifile): - BaseReader.__init__(self, ifile, "") - - def readline(self): - return ansel_to_utf8(self.ifile.readline()) diff --git a/src/GrampsDbUtils/_GedcomParse.py b/src/GrampsDbUtils/_GedcomParse.py index 3cc2314f7..f0527d7f4 100644 --- a/src/GrampsDbUtils/_GedcomParse.py +++ b/src/GrampsDbUtils/_GedcomParse.py @@ -88,9 +88,9 @@ all lines until the next level 2 token is found (in this case, skipping the # #------------------------------------------------------------------------- import os -import sys import re import time +import codecs from gettext import gettext as _ #------------------------------------------------------------------------ @@ -108,17 +108,17 @@ LOG = logging.getLogger(".GedcomImport") #------------------------------------------------------------------------- import Errors import gen.lib -from BasicUtils import name_displayer, UpdateCallback +from BasicUtils import UpdateCallback import Mime import LdsUtils import Utils +from ansel_utf8 import ansel_to_utf8 from _GedcomTokens import * import _GedcomInfo as GedcomInfo import _GedcomUtils as GedcomUtils import _GedcomLex as GedcomLex -import _GedcomChar as GedcomChar from gen.db.dbconst import EVENT_KEY @@ -238,6 +238,68 @@ def find_from_handle(gramps_id, table): table[gramps_id] = intid return intid +#------------------------------------------------------------------------- +# +# File Readers +# +#------------------------------------------------------------------------- +class BaseReader(object): + def __init__(self, ifile, encoding): + self.ifile = ifile + self.enc = encoding + + def reset(self): + self.ifile.seek(0) + + def readline(self): + return unicode(self.ifile.readline(), + encoding=self.enc, + errors='replace') + +class UTF8Reader(BaseReader): + + def __init__(self, ifile): + BaseReader.__init__(self, ifile, 'utf8') + self.reset() + + def reset(self): + self.ifile.seek(0) + data = self.ifile.read(3) + if data != "\xef\xbb\xbf": + self.ifile.seek(0) + + def readline(self): + return unicode(self.ifile.readline(), + encoding=self.enc, + errors='replace') + +class UTF16Reader(BaseReader): + + def __init__(self, ifile): + new_file = codecs.EncodedFile(ifile, 'utf8', 'utf16') + BaseReader.__init__(self, new_file, 'utf16') + self.reset() + + def readline(self): + l = self.ifile.readline() + if l.strip(): + return l + else: + return self.ifile.readline() + +class AnsiReader(BaseReader): + + def __init__(self, ifile): + BaseReader.__init__(self, ifile, 'latin1') + +class AnselReader(BaseReader): + + def __init__(self, ifile): + BaseReader.__init__(self, ifile, "") + + def readline(self): + return ansel_to_utf8(self.ifile.readline()) + #------------------------------------------------------------------------- # # GedcomParser @@ -783,13 +845,13 @@ class GedcomParser(UpdateCallback): enc = stage_one.get_encoding() if enc == "ANSEL": - rdr = GedcomChar.AnselReader(ifile) + rdr = AnselReader(ifile) elif enc in ("UTF-8", "UTF8"): - rdr = GedcomChar.UTF8Reader(ifile) + rdr = UTF8Reader(ifile) elif enc in ("UTF-16", "UTF16", "UNICODE"): - rdr = GedcomChar.UTF16Reader(ifile) + rdr = UTF16Reader(ifile) else: - rdr = GedcomChar.AnsiReader(ifile) + rdr = AnsiReader(ifile) self.lexer = GedcomLex.Reader(rdr) self.filename = filename