2007-03-08 Don Allingham <don@gramps-project.org>

* src/GrampsDbUtils/_GedcomParse.py
	* src/GrampsDbUtils/_GedcomChar.py
	* src/GrampsDbUtils/_GedcomLex.py



svn: r8286
This commit is contained in:
Don Allingham
2007-03-09 06:58:45 +00:00
parent c1c6e32085
commit 32e6699bff
4 changed files with 62 additions and 43 deletions

View File

@ -1,3 +1,8 @@
2007-03-08 Don Allingham <don@gramps-project.org>
* src/GrampsDbUtils/_GedcomParse.py
* src/GrampsDbUtils/_GedcomChar.py
* src/GrampsDbUtils/_GedcomLex.py
2007-03-06 Brian Matherly <brian@gramps-project.org> 2007-03-06 Brian Matherly <brian@gramps-project.org>
* src/plugins/AncestorChart.py: Deleted - just an old version of * src/plugins/AncestorChart.py: Deleted - just an old version of
AncestorChart2.py AncestorChart2.py

View File

@ -19,6 +19,7 @@
# #
from ansel_utf8 import ansel_to_utf8 from ansel_utf8 import ansel_to_utf8
import codecs
class BaseReader: class BaseReader:
def __init__(self, ifile, encoding): def __init__(self, ifile, encoding):
@ -53,13 +54,16 @@ class UTF8Reader(BaseReader):
class UTF16Reader(BaseReader): class UTF16Reader(BaseReader):
def __init__(self, ifile): def __init__(self, ifile):
BaseReader.__init__(self, ifile, 'utf16') new_file = codecs.EncodedFile(ifile, 'utf8', 'utf16')
BaseReader.__init__(self, new_file, 'utf16')
self.reset()
def reset(self): def readline(self):
self.ifile.seek(0) l = self.ifile.readline()
data = self.ifile.read(2) if l.strip():
if data != "\xff\xfe": return l
self.ifile.seek(0) else:
return self.ifile.readline()
class AnsiReader(BaseReader): class AnsiReader(BaseReader):

View File

@ -18,8 +18,6 @@
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
# #
# $Id: _ReadGedcom.py 8032 2007-02-03 17:11:05Z hippy $
"Import from GEDCOM" "Import from GEDCOM"
__revision__ = "$Revision: $" __revision__ = "$Revision: $"
@ -131,9 +129,9 @@ class GedLine:
Line Number, Level, Token Value, Token Text, and Data Line Number, Level, Token Value, Token Text, and Data
Data is dependent on the context the Token Value. For most of tokens, this is Data is dependent on the context the Token Value. For most of tokens,
just a text string. However, for certain tokens where we know the context, we this is just a text string. However, for certain tokens where we know
can provide some value. The current parsed tokens are: the context, we can provide some value. The current parsed tokens are:
TOKEN_DATE - RelLib.Date TOKEN_DATE - RelLib.Date
TOKEN_SEX - RelLib.Person gender item TOKEN_SEX - RelLib.Person gender item
@ -142,13 +140,13 @@ class GedLine:
def __init__(self, data): def __init__(self, data):
""" """
If the level is 0, then this is a top level instance. In this case, we may If the level is 0, then this is a top level instance. In this case,
find items in the form of: we may find items in the form of:
<LEVEL> @ID@ <ITEM> <LEVEL> @ID@ <ITEM>
If this is not the top level, we check the MAP_DATA array to see if there is If this is not the top level, we check the MAP_DATA array to see if
a conversion function for the data. there is a conversion function for the data.
""" """
self.line = data[4] self.line = data[4]
self.level = data[0] self.level = data[0]
@ -369,8 +367,8 @@ class Reader:
except: except:
continue continue
data = (level, tokens.get(line[1], TOKEN_UNKNOWN), line[2], line[1], token = tokens.get(line[1], TOKEN_UNKNOWN)
self.index) data = (level, token, line[2], line[1], self.index)
func = self.func_map.get(data[1]) func = self.func_map.get(data[1])
if func: if func:

View File

@ -93,6 +93,7 @@ import os
import sys import sys
import re import re
import time import time
import codecs
from gettext import gettext as _ from gettext import gettext as _
#------------------------------------------------------------------------ #------------------------------------------------------------------------
@ -227,7 +228,6 @@ for _val in familyConstantEvents.keys():
# regular expressions # regular expressions
# #
#------------------------------------------------------------------------- #-------------------------------------------------------------------------
INT_RE = re.compile(r"\s*(\d+)\s*$")
NOTE_RE = re.compile(r"\s*\d+\s+\@(\S+)\@\s+NOTE(.*)$") NOTE_RE = re.compile(r"\s*\d+\s+\@(\S+)\@\s+NOTE(.*)$")
CONT_RE = re.compile(r"\s*\d+\s+CONT\s?(.*)$") CONT_RE = re.compile(r"\s*\d+\s+CONT\s?(.*)$")
CONC_RE = re.compile(r"\s*\d+\s+CONC\s?(.*)$") CONC_RE = re.compile(r"\s*\d+\s+CONC\s?(.*)$")
@ -246,14 +246,23 @@ class StageOne:
def parse(self): def parse(self):
current = "" current = ""
line = self.ifile.read(3) line = self.ifile.read(2)
if line == "\xef\xbb": if line == "\xef\xbb":
self.ifile.read(1) self.ifile.read(1)
self.enc = "UTF8" self.enc = "UTF8"
self.reader = self.ifile
elif line == "\xff\xfe":
self.enc = "UTF16"
self.ifile.seek(0)
self.reader = codecs.EncodedFile(self.ifile, 'utf8', 'utf16')
else: else:
self.ifile.seek(0) self.ifile.seek(0)
self.reader = self.ifile
for line in self.ifile: for line in self.reader:
line = line.strip()
if not line:
continue
self.lcnt += 1 self.lcnt += 1
data = line.split(None, 2) + [''] data = line.split(None, 2) + ['']
@ -267,6 +276,7 @@ class StageOne:
key = key.strip() key = key.strip()
except: except:
LOG.warn(_("Invalid line %d in GEDCOM file.") % self.lcnt) LOG.warn(_("Invalid line %d in GEDCOM file.") % self.lcnt)
continue
if level == 0 and key[0] == '@': if level == 0 and key[0] == '@':
if value == ("FAM", "FAMILY") : if value == ("FAM", "FAMILY") :
@ -908,8 +918,8 @@ class GedcomParser(UpdateCallback):
def __find_from_handle(self, gramps_id, table): def __find_from_handle(self, gramps_id, table):
""" """
Finds a handle corresponding the the specified GRAMPS ID. The passed Finds a handle corresponding the the specified GRAMPS ID. The passed
table contains the mapping. If the value is found, we return it, otherwise table contains the mapping. If the value is found, we return it,
we create a new handle, store it, and return it. otherwise we create a new handle, store it, and return it.
""" """
intid = table.get(gramps_id) intid = table.get(gramps_id)
if not intid: if not intid:
@ -1011,8 +1021,8 @@ class GedcomParser(UpdateCallback):
already used (is in the db), we return the item in the db. Otherwise, already used (is in the db), we return the item in the db. Otherwise,
we create a new repository, assign the handle and GRAMPS ID. we create a new repository, assign the handle and GRAMPS ID.
Some GEDCOM "flavors" destroy the specification, and declare the repository Some GEDCOM "flavors" destroy the specification, and declare the
inline instead of in a object. repository inline instead of in a object.
""" """
repository = RelLib.Repository() repository = RelLib.Repository()
if not gramps_id: if not gramps_id:
@ -1038,8 +1048,8 @@ class GedcomParser(UpdateCallback):
already used (is in the db), we return the item in the db. Otherwise, already used (is in the db), we return the item in the db. Otherwise,
we create a new repository, assign the handle and GRAMPS ID. we create a new repository, assign the handle and GRAMPS ID.
Some GEDCOM "flavors" destroy the specification, and declare the repository Some GEDCOM "flavors" destroy the specification, and declare the
inline instead of in a object. repository inline instead of in a object.
""" """
note = RelLib.Note() note = RelLib.Note()
if not gramps_id: if not gramps_id:
@ -1262,9 +1272,9 @@ class GedcomParser(UpdateCallback):
def __parse_level(self, state, __map, default): def __parse_level(self, state, __map, default):
""" """
Loops trough the current GEDCOM level level, calling the appropriate functions Loops trough the current GEDCOM level level, calling the appropriate
associated with the TOKEN. If no matching function for the token is found, the functions associated with the TOKEN. If no matching function for the
default function is called instead. token is found, the default function is called instead.
""" """
while True: while True:
line = self.__get_next_line() line = self.__get_next_line()
@ -1322,7 +1332,8 @@ class GedcomParser(UpdateCallback):
""" """
# find the person # find the person
self.person = self.__find_or_create_person(self.pid_map[line.token_text]) real_id = self.pid_map[line.token_text]
self.person = self.__find_or_create_person(real_id)
# set up the state for the parsing # set up the state for the parsing
state = GedcomUtils.CurrentState(person=self.person, level=1) state = GedcomUtils.CurrentState(person=self.person, level=1)
@ -1597,8 +1608,8 @@ class GedcomParser(UpdateCallback):
def __person_birt(self, line, state): def __person_birt(self, line, state):
""" """
Parses GEDCOM BIRT tag into a GRAMPS birth event. Additional work Parses GEDCOM BIRT tag into a GRAMPS birth event. Additional work
must be done, since additional handling must be done by GRAMPS to set this up must be done, since additional handling must be done by GRAMPS to set
as a birth reference event. this up as a birth reference event.
n BIRT [Y|<NULL>] {1:1} n BIRT [Y|<NULL>] {1:1}
+1 <<EVENT_DETAIL>> {0:1} p.* +1 <<EVENT_DETAIL>> {0:1} p.*
@ -1642,8 +1653,8 @@ class GedcomParser(UpdateCallback):
def __person_deat(self, line, state): def __person_deat(self, line, state):
""" """
Parses GEDCOM DEAT tag into a GRAMPS birth event. Additional work Parses GEDCOM DEAT tag into a GRAMPS birth event. Additional work
must be done, since additional handling must be done by GRAMPS to set this up must be done, since additional handling must be done by GRAMPS to set
as a death reference event. this up as a death reference event.
n DEAT [Y|<NULL>] {1:1} n DEAT [Y|<NULL>] {1:1}
+1 <<EVENT_DETAIL>> {0:1} p.* +1 <<EVENT_DETAIL>> {0:1} p.*
@ -2147,8 +2158,8 @@ class GedcomParser(UpdateCallback):
def __lds_form(self, line, state): def __lds_form(self, line, state):
""" """
Parses the FORM tag thate defines the place structure for a place. This Parses the FORM tag thate defines the place structure for a place.
tag, if found, will override any global place structure. This tag, if found, will override any global place structure.
@param line: The current line in GedLine format @param line: The current line in GedLine format
@type line: GedLine @type line: GedLine
@ -2384,8 +2395,8 @@ class GedcomParser(UpdateCallback):
def __person_asso_type(self, line, state): def __person_asso_type(self, line, state):
""" """
Parses the INDI.ASSO.TYPE tag. GRAMPS only supports the ASSO tag when Parses the INDI.ASSO.TYPE tag. GRAMPS only supports the ASSO tag when
the tag represents an INDI. So if the data is not INDI, we set the ignore the tag represents an INDI. So if the data is not INDI, we set the
flag, so that we ignore the record. ignore flag, so that we ignore the record.
@param line: The current line in GedLine format @param line: The current line in GedLine format
@type line: GedLine @type line: GedLine
@ -2942,8 +2953,8 @@ class GedcomParser(UpdateCallback):
def __event_place(self, line, state): def __event_place(self, line, state):
""" """
Parse the place portion of a event. A special case has to be made for Parse the place portion of a event. A special case has to be made for
Family Tree Maker, which violates the GEDCOM spec. It uses the PLAC field Family Tree Maker, which violates the GEDCOM spec. It uses the PLAC
to store the description or value assocated with the event. field to store the description or value assocated with the event.
n PLAC <PLACE_VALUE> {1:1} n PLAC <PLACE_VALUE> {1:1}
+1 FORM <PLACE_HIERARCHY> {0:1} +1 FORM <PLACE_HIERARCHY> {0:1}
@ -3644,7 +3655,8 @@ class GedcomParser(UpdateCallback):
def __repo_ref_medi(self, line, state): def __repo_ref_medi(self, line, state):
name = line.data name = line.data
mtype = MEDIA_MAP.get(name.lower(), (RelLib.SourceMediaType.CUSTOM, name)) mtype = MEDIA_MAP.get(name.lower(),
(RelLib.SourceMediaType.CUSTOM, name))
state.repo_ref.set_media_type(mtype) state.repo_ref.set_media_type(mtype)
def __repo_ref_note(self, line, state): def __repo_ref_note(self, line, state):