2007-02-24 Don Allingham <don@gramps-project.org>

* src/DisplayTabs/_NoteModel.py: added * src/DisplayTabs/_NoteTab.py: support new list * src/GrampsDbUtils/_GedcomParse.py: enhancements to parsing * src/GrampsDbUtils/_ReadGedcom.py: handle encoding properly * src/GrampsDbUtils/_GedcomChar.py: new encoding interface * src/GrampsDbUtils/_GedcomLex.py: cleanup svn: r8231
2007-02-25 05:26:32 +00:00
parent a8ad1dcdcf
commit 706916af15
7 changed files with 268 additions and 359 deletions
--- a/src/GrampsDbUtils/_GedcomChar.py
+++ b/src/GrampsDbUtils/_GedcomChar.py
@@ -0,0 +1,76 @@
+#
+# Gramps - a GTK+/GNOME based genealogy program
+#
+# Copyright (C) 2000-2005  Donald N. Allingham
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#
+
+from ansel_utf8 import ansel_to_utf8
+
+class BaseReader:
+    def __init__(self, ifile, encoding):
+        self.ifile = ifile
+        self.enc = encoding
+
+    def reset(self):
+        self.ifile.seek(0)
+
+    def readline(self):
+        return unicode(self.ifile.readline(), 
+                       encoding=self.enc,
+                       errors='replace').strip('\n\r')
+
+class UTF8Reader(BaseReader):
+
+    def __init__(self, ifile):
+        BaseReader.__init__(self, ifile, 'utf8')
+
+    def reset(self):
+        self.ifile.seek(0)
+        data = self.ifile.read(3)
+        if data != "\xef\xbb\xbf":
+            self.ifile.seek(0)
+
+    def readline(self):
+        return unicode(self.ifile.readline(),
+                       encoding=self.enc,
+                       errors='replace').strip('\n\r')
+
+class UTF16Reader(BaseReader):
+
+    def __init__(self, ifile):
+        BaseReader.__init__(self, ifile, 'utf16')
+
+    def reset(self):
+        self.ifile.seek(0)
+        data = self.ifile.read(2)
+        if data != "\xff\xfe":
+            self.ifile.seek(0)
+
+class AnsiReader(BaseReader):
+
+    def __init__(self, ifile):
+        BaseReader.__init__(self, ifile, 'latin1')
+    
+class AnselReader(BaseReader):
+
+    def __init__(self, ifile):
+        BaseReader.__init__(self, ifile, "")
+
+    def readline(self):
+        return ansel_to_utf8(self.ifile.readline().strip('\n\r'))
+
+        
--- a/src/GrampsDbUtils/_GedcomLex.py
+++ b/src/GrampsDbUtils/_GedcomLex.py
@@ -22,21 +22,22 @@

 "Import from GEDCOM"

+__revision__ = "$Revision: $"
+__author__ = "Don Allingham"
+
 #-------------------------------------------------------------------------
 #
 # standard python modules
 #
 #-------------------------------------------------------------------------
+
 import re
-import string
-from gettext import gettext as _

 #-------------------------------------------------------------------------
 #
 # GRAMPS modules
 #
 #-------------------------------------------------------------------------
-from ansel_utf8 import ansel_to_utf8

 from _GedcomInfo import *
 from _GedcomTokens import *
@@ -45,60 +46,25 @@ from DateHandler._DateParser import DateParser

 #-------------------------------------------------------------------------
 #
-# latin/utf8 conversions
-#
+# constants #
 #-------------------------------------------------------------------------

-def utf8_to_latin(msg):
-    """
-    Converts a string from unicode to iso-8859-1. If any illegal characters 
-    are found, they are converted to ?
-
-    @param msg: unicode string to convert
-    @type level: unicode
-    @return: Returns the string, converted to a ISO-8859-1 object
-    @rtype: str
-    """
-    return msg.encode('iso-8859-1', 'replace')
-
-def latin_to_utf8(s):
-    if type(s) == unicode:
-        return s
-    else:
-        return unicode(s,'iso-8859-1')
-
-def nocnv(s):
-    return unicode(s,errors='replace')
-
-#-------------------------------------------------------------------------
-#
-# constants
-#
-#-------------------------------------------------------------------------
-ANSEL = 1
-UNICODE = 2
-UPDATE = 25
-
-_transtable = string.maketrans('','')
-_delc = _transtable[0:8] + _transtable[10:31]
-_transtable2 = _transtable[0:128] + ('?' * 128)
-
-ged2gramps = {}
+GED2GRAMPS = {}
 for _val in personalConstantEvents.keys():
    _key = personalConstantEvents[_val]
    if _key != "":
-        ged2gramps[_key] = _val
+        GED2GRAMPS[_key] = _val

 for _val in familyConstantEvents.keys():
    _key = familyConstantEvents[_val]
    if _key != "":
-        ged2gramps[_key] = _val
+        GED2GRAMPS[_key] = _val

-ged2attr = {}
+GED2ATTR = {}
 for _val in personalConstantAttributes.keys():
    _key = personalConstantAttributes[_val]
    if _key != "":
-        ged2attr[_key] = _val
+        GED2ATTR[_key] = _val
    
 #-------------------------------------------------------------------------
 #
@@ -106,26 +72,24 @@ for _val in personalConstantAttributes.keys():
 #
 #-------------------------------------------------------------------------

-intRE       = re.compile(r"\s*(\d+)\s*$")
-modRegexp   = re.compile(r"\s*(INT|EST|CAL)\s+(.*)$")
-calRegexp   = re.compile(r"\s*(ABT|BEF|AFT)?\s*@#D([^@]+)@\s*(.*)$")
-rangeRegexp = re.compile(r"\s*BET\s+@#D([^@]+)@\s*(.*)\s+AND\s+@#D([^@]+)@\s*(.*)$")
-spanRegexp  = re.compile(r"\s*FROM\s+@#D([^@]+)@\s*(.*)\s+TO\s+@#D([^@]+)@\s*(.*)$")
-intRegexp   = re.compile(r"\s*INT\s+([^(]+)\((.*)\)$")
+MOD   = re.compile(r"\s*(INT|EST|CAL)\s+(.*)$")
+CAL   = re.compile(r"\s*(ABT|BEF|AFT)?\s*@#D([^@]+)@\s*(.*)$")
+RANGE = re.compile(r"\s*BET\s+@#D([^@]+)@\s*(.*)\s+AND\s+@#D([^@]+)@\s*(.*)$")
+SPAN  = re.compile(r"\s*FROM\s+@#D([^@]+)@\s*(.*)\s+TO\s+@#D([^@]+)@\s*(.*)$")

-_calendar_map = {
+CALENDAR_MAP = {
    "FRENCH R" : RelLib.Date.CAL_FRENCH,
    "JULIAN"   : RelLib.Date.CAL_JULIAN,
    "HEBREW"   : RelLib.Date.CAL_HEBREW,
 }

-_quality_map = {
+QUALITY_MAP = {
    'CAL' : RelLib.Date.QUAL_CALCULATED,
    'INT' : RelLib.Date.QUAL_CALCULATED,
    'EST' : RelLib.Date.QUAL_ESTIMATED,
 }

-_sex_map = {
+SEX_MAP = {
    'F' : RelLib.Person.FEMALE,
    'M' : RelLib.Person.MALE,
 }
@@ -185,20 +149,21 @@ class GedLine:
        self.data = data[2]

        if self.level == 0:
-            if self.token_text and self.token_text[0] == '@' and self.token_text[-1] == '@':
+            if self.token_text and self.token_text[0] == '@' \
+                    and self.token_text[-1] == '@':
                self.token = TOKEN_ID
                self.token_text = self.token_text[1:-1]
                self.data = self.data.strip()
        else:
-            f = MAP_DATA.get(self.token)
-            if f:
-                f(self)
+            func = MAP_DATA.get(self.token)
+            if func:
+                func(self)

    def calc_sex(self):
        """
        Converts the data field to a RelLib token indicating the gender
        """
-        self.data = _sex_map.get(self.data.strip(),RelLib.Person.UNKNOWN)
+        self.data = SEX_MAP.get(self.data.strip(), RelLib.Person.UNKNOWN)

    def calc_date(self):
        """
@@ -212,12 +177,12 @@ class GedLine:
        change the type from UNKNOWN to TOKEN_GEVENT (gedcom event), and
        the data is assigned to the associated GRAMPS EventType
        """
-        token = ged2gramps.get(self.token_text)
+        token = GED2GRAMPS.get(self.token_text)
        if token:
            self.token = TOKEN_GEVENT
            self.data = token
        else:
-            token = ged2attr.get(self.token_text)
+            token = GED2ATTR.get(self.token_text)
            if token:
                attr = RelLib.Attribute()
                attr.set_value(self.data)
@@ -226,10 +191,10 @@ class GedLine:
                self.data = attr

    def calc_note(self):
-        d = self.data.strip()
-        if len(d) > 2 and d[0] == '@' and d[-1] == '@':
+        gid = self.data.strip()
+        if len(gid) > 2 and gid[0] == '@' and gid[-1] == '@':
            self.token = TOKEN_RNOTE
-            self.data = d[1:-1]
+            self.data = gid[1:-1]

    def calc_nchi(self):
        attr = RelLib.Attribute()
@@ -245,10 +210,6 @@ class GedLine:
        self.data = attr
        self.token = TOKEN_ATTR

-    def calc_lds(self):
-        self.data = _
-        self.token = TOKEN_ATTR
-
    def __repr__(self):
        return "%d: %d (%d:%s) %s" % (self.line, self.level, self.token, 
                                      self.token_text, self.data)
@@ -276,7 +237,7 @@ MAP_DATA = {
 #
 #-------------------------------------------------------------------------

-_dp = GedcomDateParser()
+DATE_CNV = GedcomDateParser()

 def extract_date(text):
    """
@@ -285,54 +246,55 @@ def extract_date(text):
    dateobj = RelLib.Date()
    try:
        # extract out the MOD line
-        match = modRegexp.match(text)
+        match = MOD.match(text)
        if match:
            (mod, text) = match.groups()
-            qual = _quality_map.get(mod, RelLib.Date.QUAL_NONE)
+            qual = QUALITY_MAP.get(mod, RelLib.Date.QUAL_NONE)
        else:
            qual = RelLib.Date.QUAL_NONE

        # parse the range if we match, if so, return
-        match = rangeRegexp.match(text)
+        match = RANGE.match(text)
        if match:
-            (cal1,data1,cal2,data2) = match.groups()
+            (cal1, data1, cal2, data2) = match.groups()

-            cal = _calendar_map.get(cal1, RelLib.Date.CAL_GREGORIAN)
+            cal = CALENDAR_MAP.get(cal1, RelLib.Date.CAL_GREGORIAN)
                    
-            start = _dp.parse(data1)
-            stop =  _dp.parse(data2)
+            start = DATE_CNV.parse(data1)
+            stop =  DATE_CNV.parse(data2)
            dateobj.set(RelLib.Date.QUAL_NONE, RelLib.Date.MOD_RANGE, cal,
                        start.get_start_date() + stop.get_start_date())
            dateobj.set_quality(qual)
            return dateobj

        # parse a span if we match
-        match = spanRegexp.match(text)
+        match = SPAN.match(text)
        if match:
-            (cal1,data1,cal2,data2) = match.groups()
+            (cal1, data1, cal2, data2) = match.groups()

-            cal = _calendar_map.get(cal1, RelLib.Date.CAL_GREGORIAN)
+            cal = CALENDAR_MAP.get(cal1, RelLib.Date.CAL_GREGORIAN)
                    
-            start = _dp.parse(data1)
-            stop =  _dp.parse(data2)
+            start = DATE_CNV.parse(data1)
+            stop =  DATE_CNV.parse(data2)
            dateobj.set(RelLib.Date.QUAL_NONE, RelLib.Date.MOD_SPAN, cal,
                        start.get_start_date() + stop.get_start_date())
            dateobj.set_quality(qual)
            return dateobj
        
-        match = calRegexp.match(text)
+        match = CAL.match(text)
        if match:
-            (abt,cal,data) = match.groups()
-            dateobj = _dp.parse("%s %s" % (abt, data))
-            dateobj.set_calendar(_calendar_map.get(cal, RelLib.Date.CAL_GREGORIAN))
+            (abt, cal, data) = match.groups()
+            dateobj = DATE_CNV.parse("%s %s" % (abt, data))
+            dateobj.set_calendar(CALENDAR_MAP.get(cal, 
+                                                  RelLib.Date.CAL_GREGORIAN))
            dateobj.set_quality(qual)
            return dateobj

-        dateobj = _dp.parse(text)
+        dateobj = DATE_CNV.parse(text)
        dateobj.set_quality(qual)
        return dateobj
    except IOError:
-        return self.dp.set_text(text)
+        return DATE_CNV.set_text(text)

 #-------------------------------------------------------------------------
 #
@@ -341,8 +303,8 @@ def extract_date(text):
 #-------------------------------------------------------------------------
 class Reader:

-    def __init__(self, f):
-        self.f = f
+    def __init__(self, ifile):
+        self.ifile = ifile
        self.current_list = []
        self.eof = False
        self.cnv = None
@@ -353,11 +315,7 @@ class Reader:
            TOKEN_CONC : self._fix_token_conc,
            }

-    def set_charset_fn(self,cnv):
-        print "Character set changed", cnv
-        self.cnv = cnv
-
-    def set_broken_conc(self,broken):
+    def set_broken_conc(self, broken):
        self.func_map = {
            TOKEN_CONT : self._fix_token_cont,
            TOKEN_CONC : self._fix_token_broken_conc,
@@ -372,46 +330,39 @@ class Reader:
            return None

    def _fix_token_cont(self, data):
-        l = self.current_list[0]
-        new_value = l[2]+'\n'+data[2]
-        self.current_list[0] = (l[0], l[1], new_value, l[3], l[4])
+        line = self.current_list[0]
+        new_value = line[2]+'\n'+data[2]
+        self.current_list[0] = (line[0], line[1], new_value, line[3], line[4])

    def _fix_token_conc(self, data):
-        l = self.current_list[0]
-        new_value = l[2] + data[2]
-        self.current_list[0] = (l[0], l[1], new_value, l[3], l[4])
+        line = self.current_list[0]
+        new_value = line[2] + data[2]
+        self.current_list[0] = (line[0], line[1], new_value, line[3], line[4])

    def _fix_token_broken_conc(self, data):
-        l = self.current_list[0]
-        new_value = u"%s %s" % (l[2], data[2])
-        self.current_list[0] = (l[0], l[1], new_value, l[3], l[4])
+        line = self.current_list[0]
+        new_value = u"%s %s" % (line[2], data[2])
+        self.current_list[0] = (line[0], line[1], new_value, line[3], line[4])

    def readahead(self):
        while len(self.current_list) < 5:
-            line = self.f.readline()
+            line = self.ifile.readline()
            self.index += 1
            if not line:
                self.eof = True
                return

-            if self.cnv:
-                try:
-                    line = self.cnv(line)
-                except:
-                    line = self.cnv(line.translate(_transtable2))
-            else:
-                line = unicode(line,errors='replace')
+            line = line.split(None, 2) + ['']

-            line = line.split(None,2) + ['']
-
-            val = line[2].rstrip('\r\n')
+            val = line[2]
                
            try:
                level = int(line[0])
            except:
                level = 0

-            data = (level, tokens.get(line[1], TOKEN_UNKNOWN), val, line[1], self.index)
+            data = (level, tokens.get(line[1], TOKEN_UNKNOWN), val, line[1], 
+                    self.index)

            func = self.func_map.get(data[1])
            if func:
@@ -419,25 +370,3 @@ class Reader:
            else:
                self.current_list.insert(0, data)

-if __name__ == "__main__":
-    import sys
-
-    def run():
-        print "Reading", sys.argv[1]
-        a = Reader(sys.argv[1])
-        while True:
-            line = a.readline()
-            print line
-            if not line: break
-
-#    import Utils
-#    Utils.profile(run)
-    run()
-
-    print extract_date("20 JAN 2000")
-    print extract_date("EST 20 JAN 2000")
-    print extract_date("CAL 20 JAN 2000")
-    print extract_date("ABT 20 JAN 2000")
-    print extract_date("INT 20 JAN 2000")
-    print extract_date("BET 20 JAN 2000 AND FEB 2000")
-    print extract_date("FROM 20 JAN 2000 TO FEB 2000")
--- a/src/GrampsDbUtils/_GedcomParse.py
+++ b/src/GrampsDbUtils/_GedcomParse.py
@@ -64,13 +64,11 @@ all tokens at the lower level.

 For example:

-
 1 BIRT
  2 DATE 1 JAN 2000
  2 UKNOWN TAG
    3 NOTE DATA

-
 The function parsing the individual at level 1, would encounter the BIRT tag.
 It would look up the BIRT token in the table to see if a function as defined 
 for this TOKEN, and pass control to this function. This function would then
@@ -81,7 +79,6 @@ the level 2 parser, which would then encounter the "UKNOWN" tag. Since this is
 not a valid token, it would not be in the table, and a function that would skip
 all lines until the next level 2 token is found (in this case, skipping the 
 "3 NOTE DATA" line.
-
 """

 __revision__ = "$Revision: $"
@@ -94,10 +91,8 @@ __author__   = "Don Allingham"
 #-------------------------------------------------------------------------
 import os
 import re
-import string
 import time
 from gettext import gettext as _
-import copy 

 #------------------------------------------------------------------------
 #
@@ -114,20 +109,19 @@ LOG = logging.getLogger(".GedcomImport")
 #-------------------------------------------------------------------------
 import Errors
 import RelLib
-from BasicUtils import NameDisplay
+from BasicUtils import NameDisplay, UpdateCallback
 import Utils
 import Mime
 import LdsUtils
-from ansel_utf8 import ansel_to_utf8

 from _GedcomInfo import *
 from _GedcomTokens import *
 from _GedcomLex import Reader
+from _GedcomChar import *

 import _GedcomUtils as GedcomUtils 

 from GrampsDb._GrampsDbConst  import EVENT_KEY
-from BasicUtils import UpdateCallback

 try:
    import Config
@@ -145,53 +139,14 @@ ADDR_RE  = re.compile('(.+)([\n\r]+)(.+)\s*,(.+)\s+(\d+)\s*(.*)')
 ADDR2_RE = re.compile('(.+)([\n\r]+)(.+)\s*,(.+)\s+(\d+)')
 ADDR3_RE = re.compile('(.+)([\n\r]+)(.+)\s*,(.+)')

-
 TRUNC_MSG = _("Your GEDCOM file is corrupted. "
              "It appears to have been truncated.")

-#-------------------------------------------------------------------------
-#
-# latin/utf8 conversions
-#
-#-------------------------------------------------------------------------
-
-
-def latin_to_utf8(msg):
-    """
-    Converts a string from iso-8859-1 to unicode. If the string is already
-    unicode, we do nothing.
-
-    @param msg: string to convert
-    @type level: str
-    @return: Returns the string, converted to a unicode object
-    @rtype: unicode
-    """
-    if type(msg) == unicode:
-        return msg
-    else:
-        return unicode(msg, 'iso-8859-1')
-
-def nocnv(msg):
-    """
-    Null operation that makes sure that a unicode string remains a unicode 
-    string
-
-    @param msg: unicode to convert
-    @type level: unicode
-    @return: Returns the string, converted to a unicode object
-    @rtype: unicode
-    """
-    return unicode(msg)
-
 #-------------------------------------------------------------------------
 #
 # constants
 #
 #-------------------------------------------------------------------------
-ANSEL = 1
-UNICODE = 2
-UPDATE = 25
-
 TYPE_BIRTH  = RelLib.ChildRefType()
 TYPE_ADOPT  = RelLib.ChildRefType(RelLib.ChildRefType.ADOPTED)
 TYPE_FOSTER = RelLib.ChildRefType(RelLib.ChildRefType.FOSTER)
@@ -224,10 +179,6 @@ MIME_MAP = {
 EVENT_FAMILY_STR = _("%(event_name)s of %(family)s")
 EVENT_PERSON_STR = _("%(event_name)s of %(person)s")

-TRANS_TABLE = string.maketrans('', '')
-DEL_CHARS = TRANS_TABLE[0:8] + TRANS_TABLE[10:31]
-TRANS_TABLE2 = TRANS_TABLE[0:128] + ('?' * 128)
-
 FTW_BAD_PLACE = [
    RelLib.EventType.OCCUPATION, 
    RelLib.EventType.RELIGION,
@@ -265,6 +216,7 @@ CONC_RE    = re.compile(r"\s*\d+\s+CONC\s?(.*)$")
 PERSON_RE  = re.compile(r"\s*\d+\s+\@(\S+)\@\s+INDI(.*)$")

 class StageOne:
+
    def __init__(self, ifile):
 	self.ifile = ifile
 	self.famc = {}
@@ -275,44 +227,47 @@ class StageOne:

    def parse(self):
 	current = ""
+
+        line = self.ifile.read(3)
+        if line == "\xef\xbb":
+            self.ifile.read(1)
+            self.enc = "UTF8"
+        else:
+            self.ifile.seek(0)
+
 	for line in self.ifile:
 	    self.lcnt +=1
+
            data = line.split(None,2) + ['']
            try:
                (level, key, value) = data[:3]
-                value = value.strip()
-                # convert the first value to an integer. We have to be a bit
-                # careful here, since some GEDCOM files have garbage characters
-                # at the front of the first file if they are unicode encoded.
-                # So, if we have a failure to convert, check the last character
-                # of the string, which shoul de a '0'
                try:
                    level = int(level)
                except:
-                    level = int(level[-1])
+                    level = 0
                key = key.strip()
            except:
                raise Errors.GedcomError("Corrupted file at line %d" % self.lcnt)

 	    if level == 0 and key[0] == '@':
-                if value == "FAM":
+                if value == ("FAM", "FAMILY") :
                    current = key.strip()
                    current = current[1:-1]
-                elif value == "INDI":
+                elif value == ("INDI", "INDIVIDUAL"):
                    self.pcnt += 1
-	    elif key in ("HUSB", "WIFE") and value and value[0] == '@':
+	    elif key in ("HUSB", "HUSBAND", "WIFE") and value and value[0] == '@':
 		value = value[1:-1]
 		if self.fams.has_key(value):
 		    self.fams[value].append(current)
 		else:
 		    self.fams[value] = [current]
-	    elif key == "CHIL" and value and value[0] == '@':
+	    elif key in ("CHIL", "CHILD") and value and value[0] == '@':
 		value = value[1:-1]
 		if self.famc.has_key(value):
 		    self.famc[value].append(current)
 		else:
 		    self.famc[value] = [current]
-	    elif key == 'CHAR':
+	    elif key == 'CHAR' and not self.enc:
 		self.enc = value

    def get_famc_map(self):
@@ -322,7 +277,10 @@ class StageOne:
 	return self.fams

    def get_encoding(self):
-	return self.enc
+	return self.enc.upper()
+
+    def set_encoding(self, enc):
+	self.enc = enc

    def get_person_count(self):
 	return self.pcnt
@@ -806,16 +764,20 @@ class GedcomParser(UpdateCallback):
            data = cursor.next()
        cursor.close()

-        self.lexer = Reader(ifile)
+        enc = stage_one.get_encoding()
+
+        if enc == "ANSEL":
+            rdr = AnselReader(ifile)
+        elif enc in ("UTF-8", "UTF8"):
+            rdr = UTF8Reader(ifile)
+        elif enc in ("UTF-16", "UTF16", "UNICODE"):
+            rdr = UTF16Reader(ifile)
+        else:
+            rdr = AnsiReader(ifile)
+
+        self.lexer = Reader(rdr)
        self.filename = filename
        self.backoff = False
-        self.override = False
-#
-#        if self.override != 0:
-#            if self.override == 1:
-#                self.lexer.set_charset_fn(ansel_to_utf8)
-#            elif self.override == 2:
-#                self.lexer.set_charset_fn(latin_to_utf8)

        fullpath = os.path.normpath(os.path.abspath(filename))
        self.geddir = os.path.dirname(fullpath)
@@ -1064,9 +1026,6 @@ class GedcomParser(UpdateCallback):
        """
        text = self.groups.line
        msg = _("Line %d was not understood, so it was ignored.") % text
-        import traceback
-        traceback.print_stack()
-        print self.groups
        self.warn(msg)
        self.error_count += 1
        self.skip_subordinate_levels(level)
@@ -4039,11 +3998,8 @@ class GedcomParser(UpdateCallback):
                if genby == "GRAMPS":
                    self.gedsource = self.gedmap.get_from_source_tag(line.data)
                    self.lexer.set_broken_conc(self.gedsource.get_conc())
-            elif line.token == TOKEN_CHAR and not self.override:
-                if line.data == "ANSEL":
-                    self.lexer.set_charset_fn(ansel_to_utf8)
-                elif line.data not in ("UNICODE","UTF-8","UTF8"):
-                    self.lexer.set_charset_fn(latin_to_utf8)
+            elif line.token == TOKEN_CHAR:
+                pass
                self.skip_subordinate_levels(2)
            elif line.token == TOKEN_GEDC:
                self.skip_subordinate_levels(2)
--- a/src/GrampsDbUtils/_ReadGedcom.py
+++ b/src/GrampsDbUtils/_ReadGedcom.py
@@ -66,6 +66,7 @@ def importData(database, filename, callback=None, use_trans=False):
        dialog.destroy()
    else:
        code_set = None
+
    import2(database, filename, callback, code_set, use_trans)

 def import2(database, filename, callback, code_set, use_trans):
@@ -74,7 +75,10 @@ def import2(database, filename, callback, code_set, use_trans):
        ifile = open(filename,"rU")
        np = StageOne(ifile)
 	np.parse()
-	print np.get_encoding()
+
+        if code_set:
+            np.set_encoding(code_set)
+
 	ifile.seek(0)
        gedparse = GedcomParser(database, ifile, filename, callback, np)
    except IOError, msg:
@@ -85,7 +89,6 @@ def import2(database, filename, callback, code_set, use_trans):
                    _("%s could not be imported") % filename + "\n" + str(msg))
        return

-
    if database.get_number_of_people() == 0:
        use_trans = False