2007-03-08 Don Allingham <don@gramps-project.org>
* src/GrampsDbUtils/_GedcomParse.py * src/GrampsDbUtils/_GedcomChar.py * src/GrampsDbUtils/_GedcomLex.py svn: r8286
This commit is contained in:
@ -1,3 +1,8 @@
|
|||||||
|
2007-03-08 Don Allingham <don@gramps-project.org>
|
||||||
|
* src/GrampsDbUtils/_GedcomParse.py
|
||||||
|
* src/GrampsDbUtils/_GedcomChar.py
|
||||||
|
* src/GrampsDbUtils/_GedcomLex.py
|
||||||
|
|
||||||
2007-03-06 Brian Matherly <brian@gramps-project.org>
|
2007-03-06 Brian Matherly <brian@gramps-project.org>
|
||||||
* src/plugins/AncestorChart.py: Deleted - just an old version of
|
* src/plugins/AncestorChart.py: Deleted - just an old version of
|
||||||
AncestorChart2.py
|
AncestorChart2.py
|
||||||
|
@ -19,6 +19,7 @@
|
|||||||
#
|
#
|
||||||
|
|
||||||
from ansel_utf8 import ansel_to_utf8
|
from ansel_utf8 import ansel_to_utf8
|
||||||
|
import codecs
|
||||||
|
|
||||||
class BaseReader:
|
class BaseReader:
|
||||||
def __init__(self, ifile, encoding):
|
def __init__(self, ifile, encoding):
|
||||||
@ -53,13 +54,16 @@ class UTF8Reader(BaseReader):
|
|||||||
class UTF16Reader(BaseReader):
|
class UTF16Reader(BaseReader):
|
||||||
|
|
||||||
def __init__(self, ifile):
|
def __init__(self, ifile):
|
||||||
BaseReader.__init__(self, ifile, 'utf16')
|
new_file = codecs.EncodedFile(ifile, 'utf8', 'utf16')
|
||||||
|
BaseReader.__init__(self, new_file, 'utf16')
|
||||||
|
self.reset()
|
||||||
|
|
||||||
def reset(self):
|
def readline(self):
|
||||||
self.ifile.seek(0)
|
l = self.ifile.readline()
|
||||||
data = self.ifile.read(2)
|
if l.strip():
|
||||||
if data != "\xff\xfe":
|
return l
|
||||||
self.ifile.seek(0)
|
else:
|
||||||
|
return self.ifile.readline()
|
||||||
|
|
||||||
class AnsiReader(BaseReader):
|
class AnsiReader(BaseReader):
|
||||||
|
|
||||||
|
@ -18,8 +18,6 @@
|
|||||||
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||||
#
|
#
|
||||||
|
|
||||||
# $Id: _ReadGedcom.py 8032 2007-02-03 17:11:05Z hippy $
|
|
||||||
|
|
||||||
"Import from GEDCOM"
|
"Import from GEDCOM"
|
||||||
|
|
||||||
__revision__ = "$Revision: $"
|
__revision__ = "$Revision: $"
|
||||||
@ -131,9 +129,9 @@ class GedLine:
|
|||||||
|
|
||||||
Line Number, Level, Token Value, Token Text, and Data
|
Line Number, Level, Token Value, Token Text, and Data
|
||||||
|
|
||||||
Data is dependent on the context the Token Value. For most of tokens, this is
|
Data is dependent on the context the Token Value. For most of tokens,
|
||||||
just a text string. However, for certain tokens where we know the context, we
|
this is just a text string. However, for certain tokens where we know
|
||||||
can provide some value. The current parsed tokens are:
|
the context, we can provide some value. The current parsed tokens are:
|
||||||
|
|
||||||
TOKEN_DATE - RelLib.Date
|
TOKEN_DATE - RelLib.Date
|
||||||
TOKEN_SEX - RelLib.Person gender item
|
TOKEN_SEX - RelLib.Person gender item
|
||||||
@ -142,13 +140,13 @@ class GedLine:
|
|||||||
|
|
||||||
def __init__(self, data):
|
def __init__(self, data):
|
||||||
"""
|
"""
|
||||||
If the level is 0, then this is a top level instance. In this case, we may
|
If the level is 0, then this is a top level instance. In this case,
|
||||||
find items in the form of:
|
we may find items in the form of:
|
||||||
|
|
||||||
<LEVEL> @ID@ <ITEM>
|
<LEVEL> @ID@ <ITEM>
|
||||||
|
|
||||||
If this is not the top level, we check the MAP_DATA array to see if there is
|
If this is not the top level, we check the MAP_DATA array to see if
|
||||||
a conversion function for the data.
|
there is a conversion function for the data.
|
||||||
"""
|
"""
|
||||||
self.line = data[4]
|
self.line = data[4]
|
||||||
self.level = data[0]
|
self.level = data[0]
|
||||||
@ -369,8 +367,8 @@ class Reader:
|
|||||||
except:
|
except:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
data = (level, tokens.get(line[1], TOKEN_UNKNOWN), line[2], line[1],
|
token = tokens.get(line[1], TOKEN_UNKNOWN)
|
||||||
self.index)
|
data = (level, token, line[2], line[1], self.index)
|
||||||
|
|
||||||
func = self.func_map.get(data[1])
|
func = self.func_map.get(data[1])
|
||||||
if func:
|
if func:
|
||||||
|
@ -93,6 +93,7 @@ import os
|
|||||||
import sys
|
import sys
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
|
import codecs
|
||||||
from gettext import gettext as _
|
from gettext import gettext as _
|
||||||
|
|
||||||
#------------------------------------------------------------------------
|
#------------------------------------------------------------------------
|
||||||
@ -227,7 +228,6 @@ for _val in familyConstantEvents.keys():
|
|||||||
# regular expressions
|
# regular expressions
|
||||||
#
|
#
|
||||||
#-------------------------------------------------------------------------
|
#-------------------------------------------------------------------------
|
||||||
INT_RE = re.compile(r"\s*(\d+)\s*$")
|
|
||||||
NOTE_RE = re.compile(r"\s*\d+\s+\@(\S+)\@\s+NOTE(.*)$")
|
NOTE_RE = re.compile(r"\s*\d+\s+\@(\S+)\@\s+NOTE(.*)$")
|
||||||
CONT_RE = re.compile(r"\s*\d+\s+CONT\s?(.*)$")
|
CONT_RE = re.compile(r"\s*\d+\s+CONT\s?(.*)$")
|
||||||
CONC_RE = re.compile(r"\s*\d+\s+CONC\s?(.*)$")
|
CONC_RE = re.compile(r"\s*\d+\s+CONC\s?(.*)$")
|
||||||
@ -246,14 +246,23 @@ class StageOne:
|
|||||||
def parse(self):
|
def parse(self):
|
||||||
current = ""
|
current = ""
|
||||||
|
|
||||||
line = self.ifile.read(3)
|
line = self.ifile.read(2)
|
||||||
if line == "\xef\xbb":
|
if line == "\xef\xbb":
|
||||||
self.ifile.read(1)
|
self.ifile.read(1)
|
||||||
self.enc = "UTF8"
|
self.enc = "UTF8"
|
||||||
|
self.reader = self.ifile
|
||||||
|
elif line == "\xff\xfe":
|
||||||
|
self.enc = "UTF16"
|
||||||
|
self.ifile.seek(0)
|
||||||
|
self.reader = codecs.EncodedFile(self.ifile, 'utf8', 'utf16')
|
||||||
else:
|
else:
|
||||||
self.ifile.seek(0)
|
self.ifile.seek(0)
|
||||||
|
self.reader = self.ifile
|
||||||
|
|
||||||
for line in self.ifile:
|
for line in self.reader:
|
||||||
|
line = line.strip()
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
self.lcnt += 1
|
self.lcnt += 1
|
||||||
|
|
||||||
data = line.split(None, 2) + ['']
|
data = line.split(None, 2) + ['']
|
||||||
@ -267,6 +276,7 @@ class StageOne:
|
|||||||
key = key.strip()
|
key = key.strip()
|
||||||
except:
|
except:
|
||||||
LOG.warn(_("Invalid line %d in GEDCOM file.") % self.lcnt)
|
LOG.warn(_("Invalid line %d in GEDCOM file.") % self.lcnt)
|
||||||
|
continue
|
||||||
|
|
||||||
if level == 0 and key[0] == '@':
|
if level == 0 and key[0] == '@':
|
||||||
if value == ("FAM", "FAMILY") :
|
if value == ("FAM", "FAMILY") :
|
||||||
@ -908,8 +918,8 @@ class GedcomParser(UpdateCallback):
|
|||||||
def __find_from_handle(self, gramps_id, table):
|
def __find_from_handle(self, gramps_id, table):
|
||||||
"""
|
"""
|
||||||
Finds a handle corresponding the the specified GRAMPS ID. The passed
|
Finds a handle corresponding the the specified GRAMPS ID. The passed
|
||||||
table contains the mapping. If the value is found, we return it, otherwise
|
table contains the mapping. If the value is found, we return it,
|
||||||
we create a new handle, store it, and return it.
|
otherwise we create a new handle, store it, and return it.
|
||||||
"""
|
"""
|
||||||
intid = table.get(gramps_id)
|
intid = table.get(gramps_id)
|
||||||
if not intid:
|
if not intid:
|
||||||
@ -1011,8 +1021,8 @@ class GedcomParser(UpdateCallback):
|
|||||||
already used (is in the db), we return the item in the db. Otherwise,
|
already used (is in the db), we return the item in the db. Otherwise,
|
||||||
we create a new repository, assign the handle and GRAMPS ID.
|
we create a new repository, assign the handle and GRAMPS ID.
|
||||||
|
|
||||||
Some GEDCOM "flavors" destroy the specification, and declare the repository
|
Some GEDCOM "flavors" destroy the specification, and declare the
|
||||||
inline instead of in a object.
|
repository inline instead of in a object.
|
||||||
"""
|
"""
|
||||||
repository = RelLib.Repository()
|
repository = RelLib.Repository()
|
||||||
if not gramps_id:
|
if not gramps_id:
|
||||||
@ -1038,8 +1048,8 @@ class GedcomParser(UpdateCallback):
|
|||||||
already used (is in the db), we return the item in the db. Otherwise,
|
already used (is in the db), we return the item in the db. Otherwise,
|
||||||
we create a new repository, assign the handle and GRAMPS ID.
|
we create a new repository, assign the handle and GRAMPS ID.
|
||||||
|
|
||||||
Some GEDCOM "flavors" destroy the specification, and declare the repository
|
Some GEDCOM "flavors" destroy the specification, and declare the
|
||||||
inline instead of in a object.
|
repository inline instead of in a object.
|
||||||
"""
|
"""
|
||||||
note = RelLib.Note()
|
note = RelLib.Note()
|
||||||
if not gramps_id:
|
if not gramps_id:
|
||||||
@ -1262,9 +1272,9 @@ class GedcomParser(UpdateCallback):
|
|||||||
|
|
||||||
def __parse_level(self, state, __map, default):
|
def __parse_level(self, state, __map, default):
|
||||||
"""
|
"""
|
||||||
Loops trough the current GEDCOM level level, calling the appropriate functions
|
Loops trough the current GEDCOM level level, calling the appropriate
|
||||||
associated with the TOKEN. If no matching function for the token is found, the
|
functions associated with the TOKEN. If no matching function for the
|
||||||
default function is called instead.
|
token is found, the default function is called instead.
|
||||||
"""
|
"""
|
||||||
while True:
|
while True:
|
||||||
line = self.__get_next_line()
|
line = self.__get_next_line()
|
||||||
@ -1322,7 +1332,8 @@ class GedcomParser(UpdateCallback):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
# find the person
|
# find the person
|
||||||
self.person = self.__find_or_create_person(self.pid_map[line.token_text])
|
real_id = self.pid_map[line.token_text]
|
||||||
|
self.person = self.__find_or_create_person(real_id)
|
||||||
|
|
||||||
# set up the state for the parsing
|
# set up the state for the parsing
|
||||||
state = GedcomUtils.CurrentState(person=self.person, level=1)
|
state = GedcomUtils.CurrentState(person=self.person, level=1)
|
||||||
@ -1597,8 +1608,8 @@ class GedcomParser(UpdateCallback):
|
|||||||
def __person_birt(self, line, state):
|
def __person_birt(self, line, state):
|
||||||
"""
|
"""
|
||||||
Parses GEDCOM BIRT tag into a GRAMPS birth event. Additional work
|
Parses GEDCOM BIRT tag into a GRAMPS birth event. Additional work
|
||||||
must be done, since additional handling must be done by GRAMPS to set this up
|
must be done, since additional handling must be done by GRAMPS to set
|
||||||
as a birth reference event.
|
this up as a birth reference event.
|
||||||
|
|
||||||
n BIRT [Y|<NULL>] {1:1}
|
n BIRT [Y|<NULL>] {1:1}
|
||||||
+1 <<EVENT_DETAIL>> {0:1} p.*
|
+1 <<EVENT_DETAIL>> {0:1} p.*
|
||||||
@ -1642,8 +1653,8 @@ class GedcomParser(UpdateCallback):
|
|||||||
def __person_deat(self, line, state):
|
def __person_deat(self, line, state):
|
||||||
"""
|
"""
|
||||||
Parses GEDCOM DEAT tag into a GRAMPS birth event. Additional work
|
Parses GEDCOM DEAT tag into a GRAMPS birth event. Additional work
|
||||||
must be done, since additional handling must be done by GRAMPS to set this up
|
must be done, since additional handling must be done by GRAMPS to set
|
||||||
as a death reference event.
|
this up as a death reference event.
|
||||||
|
|
||||||
n DEAT [Y|<NULL>] {1:1}
|
n DEAT [Y|<NULL>] {1:1}
|
||||||
+1 <<EVENT_DETAIL>> {0:1} p.*
|
+1 <<EVENT_DETAIL>> {0:1} p.*
|
||||||
@ -2147,8 +2158,8 @@ class GedcomParser(UpdateCallback):
|
|||||||
|
|
||||||
def __lds_form(self, line, state):
|
def __lds_form(self, line, state):
|
||||||
"""
|
"""
|
||||||
Parses the FORM tag thate defines the place structure for a place. This
|
Parses the FORM tag thate defines the place structure for a place.
|
||||||
tag, if found, will override any global place structure.
|
This tag, if found, will override any global place structure.
|
||||||
|
|
||||||
@param line: The current line in GedLine format
|
@param line: The current line in GedLine format
|
||||||
@type line: GedLine
|
@type line: GedLine
|
||||||
@ -2384,8 +2395,8 @@ class GedcomParser(UpdateCallback):
|
|||||||
def __person_asso_type(self, line, state):
|
def __person_asso_type(self, line, state):
|
||||||
"""
|
"""
|
||||||
Parses the INDI.ASSO.TYPE tag. GRAMPS only supports the ASSO tag when
|
Parses the INDI.ASSO.TYPE tag. GRAMPS only supports the ASSO tag when
|
||||||
the tag represents an INDI. So if the data is not INDI, we set the ignore
|
the tag represents an INDI. So if the data is not INDI, we set the
|
||||||
flag, so that we ignore the record.
|
ignore flag, so that we ignore the record.
|
||||||
|
|
||||||
@param line: The current line in GedLine format
|
@param line: The current line in GedLine format
|
||||||
@type line: GedLine
|
@type line: GedLine
|
||||||
@ -2942,8 +2953,8 @@ class GedcomParser(UpdateCallback):
|
|||||||
def __event_place(self, line, state):
|
def __event_place(self, line, state):
|
||||||
"""
|
"""
|
||||||
Parse the place portion of a event. A special case has to be made for
|
Parse the place portion of a event. A special case has to be made for
|
||||||
Family Tree Maker, which violates the GEDCOM spec. It uses the PLAC field
|
Family Tree Maker, which violates the GEDCOM spec. It uses the PLAC
|
||||||
to store the description or value assocated with the event.
|
field to store the description or value assocated with the event.
|
||||||
|
|
||||||
n PLAC <PLACE_VALUE> {1:1}
|
n PLAC <PLACE_VALUE> {1:1}
|
||||||
+1 FORM <PLACE_HIERARCHY> {0:1}
|
+1 FORM <PLACE_HIERARCHY> {0:1}
|
||||||
@ -3644,7 +3655,8 @@ class GedcomParser(UpdateCallback):
|
|||||||
|
|
||||||
def __repo_ref_medi(self, line, state):
|
def __repo_ref_medi(self, line, state):
|
||||||
name = line.data
|
name = line.data
|
||||||
mtype = MEDIA_MAP.get(name.lower(), (RelLib.SourceMediaType.CUSTOM, name))
|
mtype = MEDIA_MAP.get(name.lower(),
|
||||||
|
(RelLib.SourceMediaType.CUSTOM, name))
|
||||||
state.repo_ref.set_media_type(mtype)
|
state.repo_ref.set_media_type(mtype)
|
||||||
|
|
||||||
def __repo_ref_note(self, line, state):
|
def __repo_ref_note(self, line, state):
|
||||||
|
Reference in New Issue
Block a user