2007-03-08 Don Allingham <don@gramps-project.org>

* src/GrampsDbUtils/_GedcomParse.py * src/GrampsDbUtils/_GedcomChar.py * src/GrampsDbUtils/_GedcomLex.py svn: r8286
2007-03-09 06:58:45 +00:00
parent c1c6e32085
commit 32e6699bff
4 changed files with 62 additions and 43 deletions
--- a/5
+++ b/5
@@ -1,3 +1,8 @@
 2007-03-08  Don Allingham  <don@gramps-project.org>
 	* src/GrampsDbUtils/_GedcomParse.py
 	* src/GrampsDbUtils/_GedcomChar.py
 	* src/GrampsDbUtils/_GedcomLex.py
 2007-03-06  Brian Matherly  <brian@gramps-project.org>
 	* src/plugins/AncestorChart.py: Deleted - just an old version of 
 	  AncestorChart2.py
--- a/src/GrampsDbUtils/_GedcomChar.py
+++ b/src/GrampsDbUtils/_GedcomChar.py
@@ -19,6 +19,7 @@
 #
 from ansel_utf8 import ansel_to_utf8
 import codecs
 class BaseReader:
    def __init__(self, ifile, encoding):
@@ -53,13 +54,16 @@ class UTF8Reader(BaseReader):
 class UTF16Reader(BaseReader):
    def __init__(self, ifile):
-        BaseReader.__init__(self, ifile, 'utf16')
+        new_file = codecs.EncodedFile(ifile, 'utf8', 'utf16')
        BaseReader.__init__(self, new_file, 'utf16')
        self.reset()
-    def reset(self):
+    def readline(self):
-        self.ifile.seek(0)
+        l = self.ifile.readline()
-        data = self.ifile.read(2)
+        if l.strip():
-        if data != "\xff\xfe":
+            return l
-            self.ifile.seek(0)
+        else:
            return self.ifile.readline()
 class AnsiReader(BaseReader):
--- a/src/GrampsDbUtils/_GedcomLex.py
+++ b/src/GrampsDbUtils/_GedcomLex.py
@@ -18,8 +18,6 @@
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 #
 # $Id: _ReadGedcom.py 8032 2007-02-03 17:11:05Z hippy $
 "Import from GEDCOM"
 __revision__ = "$Revision: $"
@@ -131,9 +129,9 @@ class GedLine:
    Line Number, Level, Token Value, Token Text, and Data
-    Data is dependent on the context the Token Value. For most of tokens, this is
+    Data is dependent on the context the Token Value. For most of tokens, 
-    just a text string. However, for certain tokens where we know the context, we
+    this is just a text string. However, for certain tokens where we know 
-    can provide some value. The current parsed tokens are:
+    the context, we can provide some value. The current parsed tokens are:
    TOKEN_DATE   - RelLib.Date
    TOKEN_SEX    - RelLib.Person gender item
@@ -142,13 +140,13 @@ class GedLine:
    def __init__(self, data):
        """
-        If the level is 0, then this is a top level instance. In this case, we may
+        If the level is 0, then this is a top level instance. In this case, 
-        find items in the form of:
+        we may find items in the form of:
        <LEVEL> @ID@ <ITEM>
-        If this is not the top level, we check the MAP_DATA array to see if there is
+        If this is not the top level, we check the MAP_DATA array to see if 
-        a conversion function for the data.
+        there is a conversion function for the data.
        """
        self.line = data[4]
        self.level = data[0]
@@ -369,8 +367,8 @@ class Reader:
            except:
                continue
-            data = (level, tokens.get(line[1], TOKEN_UNKNOWN), line[2], line[1], 
+            token = tokens.get(line[1], TOKEN_UNKNOWN)
-                    self.index)
+            data = (level, token, line[2], line[1], self.index)
            func = self.func_map.get(data[1])
            if func:
--- a/src/GrampsDbUtils/_GedcomParse.py
+++ b/src/GrampsDbUtils/_GedcomParse.py
@@ -93,6 +93,7 @@ import os
 import sys
 import re
 import time
 import codecs
 from gettext import gettext as _
 #------------------------------------------------------------------------
@@ -227,7 +228,6 @@ for _val in familyConstantEvents.keys():
 # regular expressions
 #
 #-------------------------------------------------------------------------
 INT_RE	   = re.compile(r"\s*(\d+)\s*$")
 NOTE_RE	   = re.compile(r"\s*\d+\s+\@(\S+)\@\s+NOTE(.*)$")
 CONT_RE	   = re.compile(r"\s*\d+\s+CONT\s?(.*)$")
 CONC_RE	   = re.compile(r"\s*\d+\s+CONC\s?(.*)$")
@@ -246,14 +246,23 @@ class StageOne:
    def parse(self):
 	current = ""
-	line = self.ifile.read(3)
+	line = self.ifile.read(2)
 	if line == "\xef\xbb":
 	    self.ifile.read(1)
 	    self.enc = "UTF8"
            self.reader = self.ifile
        elif line == "\xff\xfe":
 	    self.enc = "UTF16"
            self.ifile.seek(0)
            self.reader = codecs.EncodedFile(self.ifile, 'utf8', 'utf16')
 	else:
 	    self.ifile.seek(0)
            self.reader = self.ifile
-	for line in self.ifile:
+	for line in self.reader:
            line = line.strip()
            if not line:
                continue
 	    self.lcnt += 1
 	    data = line.split(None, 2) + ['']
@@ -267,6 +276,7 @@ class StageOne:
 		key = key.strip()
 	    except:
 		LOG.warn(_("Invalid line %d in GEDCOM file.") % self.lcnt)
                continue
 	    if level == 0 and key[0] == '@':
 		if value == ("FAM", "FAMILY") :
@@ -908,8 +918,8 @@ class GedcomParser(UpdateCallback):
    def __find_from_handle(self, gramps_id, table):
 	"""
 	Finds a handle corresponding the the specified GRAMPS ID. The passed
-	table contains the mapping. If the value is found, we return it, otherwise
+	table contains the mapping. If the value is found, we return it, 
-	we create a new handle, store it, and return it.
+        otherwise we create a new handle, store it, and return it.
 	"""
 	intid = table.get(gramps_id)
 	if not intid:
@@ -1011,8 +1021,8 @@ class GedcomParser(UpdateCallback):
 	already used (is in the db), we return the item in the db. Otherwise,
 	we create a new repository, assign the handle and GRAMPS ID.
-	Some GEDCOM "flavors" destroy the specification, and declare the repository
+	Some GEDCOM "flavors" destroy the specification, and declare the 
-	inline instead of in a object. 
+        repository inline instead of in a object. 
 	"""
 	repository = RelLib.Repository()
 	if not gramps_id:
@@ -1038,8 +1048,8 @@ class GedcomParser(UpdateCallback):
 	already used (is in the db), we return the item in the db. Otherwise,
 	we create a new repository, assign the handle and GRAMPS ID.
-	Some GEDCOM "flavors" destroy the specification, and declare the repository
+	Some GEDCOM "flavors" destroy the specification, and declare the 
-	inline instead of in a object. 
+        repository inline instead of in a object. 
 	"""
 	note = RelLib.Note()
 	if not gramps_id:
@@ -1262,9 +1272,9 @@ class GedcomParser(UpdateCallback):
    def __parse_level(self, state, __map, default):
 	"""
-	Loops trough the current GEDCOM level level, calling the appropriate functions
+	Loops trough the current GEDCOM level level, calling the appropriate 
-	associated with the TOKEN. If no matching function for the token is found, the
+        functions associated with the TOKEN. If no matching function for the 
-	default function is called instead.
+        token is found, the default function is called instead.
 	"""
 	while True:
 	    line = self.__get_next_line()
@@ -1322,7 +1332,8 @@ class GedcomParser(UpdateCallback):
 	"""
 	# find the person
-	self.person = self.__find_or_create_person(self.pid_map[line.token_text])
+        real_id = self.pid_map[line.token_text]
 	self.person = self.__find_or_create_person(real_id)
 	# set up the state for the parsing
 	state = GedcomUtils.CurrentState(person=self.person, level=1)
@@ -1597,8 +1608,8 @@ class GedcomParser(UpdateCallback):
    def __person_birt(self, line, state):
 	"""
 	Parses GEDCOM BIRT tag into a GRAMPS birth event. Additional work
-	must be done, since additional handling must be done by GRAMPS to set this up
+	must be done, since additional handling must be done by GRAMPS to set 
-	as a birth reference event.
+        this up	as a birth reference event.
 	   n  BIRT [Y|<NULL>] {1:1}
 	   +1 <<EVENT_DETAIL>> {0:1} p.*
@@ -1642,8 +1653,8 @@ class GedcomParser(UpdateCallback):
    def __person_deat(self, line, state):
 	"""
 	Parses GEDCOM DEAT tag into a GRAMPS birth event. Additional work
-	must be done, since additional handling must be done by GRAMPS to set this up
+	must be done, since additional handling must be done by GRAMPS to set 
-	as a death reference event.
+        this up	as a death reference event.
 	   n  DEAT [Y|<NULL>] {1:1}
 	   +1 <<EVENT_DETAIL>> {0:1} p.*
@@ -2147,8 +2158,8 @@ class GedcomParser(UpdateCallback):
    def __lds_form(self, line, state): 
 	"""
-	Parses the FORM tag thate defines the place structure for a place. This
+	Parses the FORM tag thate defines the place structure for a place. 
-	tag, if found, will override any global place structure.
+        This tag, if found, will override any global place structure.
 	@param line: The current line in GedLine format
 	@type line: GedLine
@@ -2384,8 +2395,8 @@ class GedcomParser(UpdateCallback):
    def __person_asso_type(self, line, state): 
 	"""
 	Parses the INDI.ASSO.TYPE tag. GRAMPS only supports the ASSO tag when
-	the tag represents an INDI. So if the data is not INDI, we set the ignore
+	the tag represents an INDI. So if the data is not INDI, we set the 
-	flag, so that we ignore the record.
+        ignore flag, so that we ignore the record.
 	@param line: The current line in GedLine format
 	@type line: GedLine
@@ -2942,8 +2953,8 @@ class GedcomParser(UpdateCallback):
    def __event_place(self, line, state):
 	"""
 	Parse the place portion of a event. A special case has to be made for
-	Family Tree Maker, which violates the GEDCOM spec. It uses the PLAC field
+	Family Tree Maker, which violates the GEDCOM spec. It uses the PLAC 
-	to store the description or value assocated with the event.
+        field to store the description or value assocated with the event.
 	 n  PLAC <PLACE_VALUE> {1:1}
 	 +1 FORM <PLACE_HIERARCHY> {0:1}
@@ -3644,7 +3655,8 @@ class GedcomParser(UpdateCallback):
    def __repo_ref_medi(self, line, state):
 	name = line.data
-	mtype = MEDIA_MAP.get(name.lower(), (RelLib.SourceMediaType.CUSTOM, name))
+	mtype = MEDIA_MAP.get(name.lower(), 
                              (RelLib.SourceMediaType.CUSTOM, name))
 	state.repo_ref.set_media_type(mtype)
    def __repo_ref_note(self, line, state):