From f12196cf80237be672eec6a12904296eebe319f9 Mon Sep 17 00:00:00 2001 From: Brian Matherly Date: Mon, 28 Dec 2009 05:07:21 +0000 Subject: [PATCH] Merge GedcomInfo.py and GedcomLex.py into GedcomParse.py. svn: r13930 --- po/POTFILES.in | 2 - src/GrampsDbUtils/Makefile.am | 2 - src/GrampsDbUtils/_GedcomInfo.py | 350 --------------- src/GrampsDbUtils/_GedcomLex.py | 403 ----------------- src/GrampsDbUtils/_GedcomParse.py | 683 ++++++++++++++++++++++++++++- src/GrampsDbUtils/__init__.py | 3 - src/plugins/export/ExportGedcom.py | 10 +- 7 files changed, 664 insertions(+), 789 deletions(-) delete mode 100644 src/GrampsDbUtils/_GedcomInfo.py delete mode 100644 src/GrampsDbUtils/_GedcomLex.py diff --git a/po/POTFILES.in b/po/POTFILES.in index 2fb30fbe7..70f7ef356 100644 --- a/po/POTFILES.in +++ b/po/POTFILES.in @@ -342,9 +342,7 @@ src/docgen/SpreadSheetDoc.py src/docgen/TextBufDoc.py # GrampsDbUtils package -src/GrampsDbUtils/_GedcomInfo.py src/GrampsDbUtils/_GedcomStageOne.py -#src/GrampsDbUtils/_GedcomLex.py src/GrampsDbUtils/_GedcomParse.py src/GrampsDbUtils/_GedcomTokens.py src/GrampsDbUtils/__init__.py diff --git a/src/GrampsDbUtils/Makefile.am b/src/GrampsDbUtils/Makefile.am index ba4e97852..3f7d589cb 100644 --- a/src/GrampsDbUtils/Makefile.am +++ b/src/GrampsDbUtils/Makefile.am @@ -6,8 +6,6 @@ pkgdatadir = $(datadir)/@PACKAGE@/GrampsDbUtils pkgdata_PYTHON = \ - _GedcomInfo.py\ - _GedcomLex.py\ _GedcomParse.py\ _GedcomStageOne.py\ _GedcomTokens.py\ diff --git a/src/GrampsDbUtils/_GedcomInfo.py b/src/GrampsDbUtils/_GedcomInfo.py deleted file mode 100644 index d4905e781..000000000 --- a/src/GrampsDbUtils/_GedcomInfo.py +++ /dev/null @@ -1,350 +0,0 @@ -# -# Gramps - a GTK+/GNOME based genealogy program -# -# Copyright (C) 2000-2005 Donald N. Allingham -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -# - -# $Id:_GedcomInfo.py 9912 2008-01-22 09:17:46Z acraphae $ - -#------------------------------------------------------------------------- -# -# python modules -# -#------------------------------------------------------------------------- -import os -#------------------------------------------------------------------------- -# -# Gramps modules -# -#------------------------------------------------------------------------- -import const -import gen.lib - -ADOPT_NONE = 0 -ADOPT_EVENT = 1 -ADOPT_FTW = 2 -ADOPT_LEGACY = 3 -ADOPT_PEDI = 4 -ADOPT_STD = 5 -CONC_OK = 0 -CONC_BROKEN = 1 -ALT_NAME_NONE = 0 -ALT_NAME_STD = 1 -ALT_NAME_ALIAS = 2 -ALT_NAME_AKA = 3 -ALT_NAME_EVENT_AKA = 4 -ALT_NAME_UALIAS = 5 -CALENDAR_NO = 0 -CALENDAR_YES = 1 -OBJE_NO = 0 -OBJE_YES = 1 -PREFIX_NO = 0 -PREFIX_YES = 1 -RESIDENCE_ADDR = 0 -RESIDENCE_PLAC = 1 -SOURCE_REFS_NO = 0 -SOURCE_REFS_YES = 1 - -#------------------------------------------------------------------------- -# -# Integer to GEDCOM tag mappings for constants -# -#------------------------------------------------------------------------- -familyConstantEvents = { - gen.lib.EventType.ANNULMENT : "ANUL", - gen.lib.EventType.DIV_FILING : "DIVF", - gen.lib.EventType.DIVORCE : "DIV", - gen.lib.EventType.CENSUS : "CENS", - gen.lib.EventType.ENGAGEMENT : "ENGA", - gen.lib.EventType.MARR_BANNS : "MARB", - gen.lib.EventType.MARR_CONTR : "MARC", - gen.lib.EventType.MARR_LIC : "MARL", - gen.lib.EventType.MARR_SETTL : "MARS", - gen.lib.EventType.MARRIAGE : "MARR" - } - -personalConstantEvents = { - gen.lib.EventType.ADOPT : "ADOP", - gen.lib.EventType.ADULT_CHRISTEN : "CHRA", - gen.lib.EventType.BIRTH : "BIRT", - gen.lib.EventType.DEATH : "DEAT", - gen.lib.EventType.BAPTISM : "BAPM", - gen.lib.EventType.BAR_MITZVAH : "BARM", - gen.lib.EventType.BAS_MITZVAH : "BASM", - gen.lib.EventType.BLESS : "BLES", - gen.lib.EventType.BURIAL : "BURI", - gen.lib.EventType.CAUSE_DEATH : "CAUS", - gen.lib.EventType.ORDINATION : "ORDN", - gen.lib.EventType.CENSUS : "CENS", - gen.lib.EventType.CHRISTEN : "CHR" , - gen.lib.EventType.CONFIRMATION : "CONF", - gen.lib.EventType.CREMATION : "CREM", - gen.lib.EventType.DEGREE : "_DEG", - gen.lib.EventType.DIV_FILING : "DIVF", - gen.lib.EventType.EDUCATION : "EDUC", - gen.lib.EventType.ELECTED : "", - gen.lib.EventType.EMIGRATION : "EMIG", - gen.lib.EventType.FIRST_COMMUN : "FCOM", - gen.lib.EventType.GRADUATION : "GRAD", - gen.lib.EventType.MED_INFO : "_MDCL", - gen.lib.EventType.MILITARY_SERV : "_MILT", - gen.lib.EventType.NATURALIZATION : "NATU", - gen.lib.EventType.NOB_TITLE : "TITL", - gen.lib.EventType.NUM_MARRIAGES : "NMR", - gen.lib.EventType.IMMIGRATION : "IMMI", - gen.lib.EventType.OCCUPATION : "OCCU", - gen.lib.EventType.PROBATE : "PROB", - gen.lib.EventType.PROPERTY : "PROP", - gen.lib.EventType.RELIGION : "RELI", - gen.lib.EventType.RESIDENCE : "RESI", - gen.lib.EventType.RETIREMENT : "RETI", - gen.lib.EventType.WILL : "WILL", - } - -familyConstantAttributes = { - gen.lib.AttributeType.NUM_CHILD : "NCHI", - } - -personalConstantAttributes = { - gen.lib.AttributeType.CASTE : "CAST", - gen.lib.AttributeType.DESCRIPTION : "DSCR", - gen.lib.AttributeType.ID : "IDNO", - gen.lib.AttributeType.NATIONAL : "NATI", - gen.lib.AttributeType.NUM_CHILD : "NCHI", - gen.lib.AttributeType.SSN : "SSN", - } - -#------------------------------------------------------------------------- -# -# Gedcom to int constants -# -#------------------------------------------------------------------------- -lds_status = { - "BIC" : gen.lib.LdsOrd.STATUS_BIC, - "CANCELED" : gen.lib.LdsOrd.STATUS_CANCELED, - "CHILD" : gen.lib.LdsOrd.STATUS_CHILD, - "CLEARED" : gen.lib.LdsOrd.STATUS_CLEARED, - "COMPLETED": gen.lib.LdsOrd.STATUS_COMPLETED, - "DNS" : gen.lib.LdsOrd.STATUS_DNS, - "INFANT" : gen.lib.LdsOrd.STATUS_INFANT, - "PRE-1970" : gen.lib.LdsOrd.STATUS_PRE_1970, - "QUALIFIED": gen.lib.LdsOrd.STATUS_QUALIFIED, - "DNS/CAN" : gen.lib.LdsOrd.STATUS_DNS_CAN, - "STILLBORN": gen.lib.LdsOrd.STATUS_STILLBORN, - "SUBMITTED": gen.lib.LdsOrd.STATUS_SUBMITTED, - "UNCLEARED": gen.lib.LdsOrd.STATUS_UNCLEARED, - } - -#------------------------------------------------------------------------- -# -# XML parser -# -#------------------------------------------------------------------------- -from xml.parsers.expat import ParserCreate - -class GedcomDescription(object): - def __init__(self, name): - self.name = name - self.dest = "" - self.adopt = ADOPT_STD - self.conc = CONC_OK - self.altname = ALT_NAME_STD - self.cal = CALENDAR_YES - self.obje = OBJE_YES - self.resi = RESIDENCE_ADDR - self.source_refs = SOURCE_REFS_YES - self.gramps2tag_map = {} - self.tag2gramps_map = {} - self.prefix = PREFIX_YES - self.endl = "\n" - - def set_dest(self,val): - self.dest = val - - def get_dest(self): - return self.dest - - def set_endl(self,val): - self.endl = val.replace('\\r','\r').replace('\\n','\n') - - def get_endl(self): - return self.endl - - def set_adopt(self,val): - self.adopt = val - - def get_adopt(self): - return self.adopt - - def set_prefix(self,val): - self.prefix=val - - def get_prefix(self): - return self.prefix - - def set_conc(self,val): - self.conc = val - - def get_conc(self): - return self.conc - - def set_alt_name(self,val): - self.altname = val - - def get_alt_name(self): - return self.altname - - def set_alt_calendar(self,val): - self.cal = val - - def get_alt_calendar(self): - return self.cal - - def set_obje(self,val): - self.obje = val - - def get_obje(self): - return self.obje - - def set_resi(self,val): - self.resi = val - - def get_resi(self): - return self.resi - - def set_source_refs(self,val): - self.source_refs = val - - def get_source_refs(self): - return self.source_refs - - def add_tag_value(self,tag,value): - self.gramps2tag_map[value] = tag - self.tag2gramps_map[tag] = value - - def gramps2tag(self,key): - if key in self.gramps2tag_map: - return self.gramps2tag_map[key] - return "" - - def tag2gramps(self,key): - if key in self.tag2gramps_map: - return self.tag2gramps_map[key] - return key - -class GedcomInfoDB(object): - def __init__(self): - self.map = {} - - self.standard = GedcomDescription("GEDCOM 5.5 standard") - self.standard.set_dest("GEDCOM 5.5") - - try: - filepath = os.path.join(const.DATA_DIR,"gedcom.xml") - f = open(filepath.encode('iso8859-1'),"r") - except: - return - - parser = GedInfoParser(self) - parser.parse(f) - f.close() - - def add_description(self, name, obj): - self.map[name] = obj - - def get_description(self, name): - if name in self.map: - return self.map[name] - return self.standard - - def get_from_source_tag(self, name): - for k, val in self.map.iteritems(): - if val.get_dest() == name: - return val - return self.standard - - def get_name_list(self): - return ["GEDCOM 5.5 standard"] + sorted(self.map) - -#------------------------------------------------------------------------- -# -# -# -#------------------------------------------------------------------------- -class GedInfoParser(object): - def __init__(self,parent): - self.parent = parent - self.current = None - - def parse(self,file): - p = ParserCreate() - p.StartElementHandler = self.startElement - p.ParseFile(file) - - def startElement(self,tag,attrs): - if tag == "target": - name = attrs['name'] - self.current = GedcomDescription(name) - self.parent.add_description(name,self.current) - elif tag == "dest": - self.current.set_dest(attrs['val']) - elif tag == "endl": - self.current.set_endl(attrs['val']) - elif tag == "adopt": - val = attrs['val'] - if val == 'none': - self.current.set_adopt(ADOPT_NONE) - elif val == 'event': - self.current.set_adopt(ADOPT_EVENT) - elif val == 'ftw': - self.current.set_adopt(ADOPT_FTW) - elif val == 'legacy': - self.current.set_adopt(ADOPT_LEGACY) - elif val == 'pedigree': - self.current.set_adopt(ADOPT_PEDI) - elif tag == "conc": - if attrs['val'] == 'broken': - self.current.set_conc(CONC_BROKEN) - elif tag == "alternate_names": - val = attrs['val'] - if val == 'none': - self.current.set_alt_name(ALT_NAME_NONE) - elif val == 'event_aka': - self.current.set_alt_name(ALT_NAME_EVENT_AKA) - elif val == 'alias': - self.current.set_alt_name(ALT_NAME_ALIAS) - elif val == 'aka': - self.current.set_alt_name(ALT_NAME_AKA) - elif val == '_alias': - self.current.set_alt_name(ALT_NAME_UALIAS) - elif tag == "calendars": - if attrs['val'] == 'no': - self.current.set_alt_calendar(CALENDAR_NO) - elif tag == "event": - self.current.add_tag_value(attrs['tag'],attrs['value']) - elif tag == "object_support": - if attrs['val'] == 'no': - self.current.set_obje(OBJE_NO) - elif tag == "prefix": - if attrs['val'] == 'no': - self.current.set_obje(PREFIX_NO) - elif tag == "residence": - if attrs['val'] == 'place': - self.current.set_resi(RESIDENCE_PLAC) - elif tag == "source_refs": - if attrs['val'] == 'no': - self.current.set_source_refs(SOURCE_REFS_NO) diff --git a/src/GrampsDbUtils/_GedcomLex.py b/src/GrampsDbUtils/_GedcomLex.py deleted file mode 100644 index 434efed1b..000000000 --- a/src/GrampsDbUtils/_GedcomLex.py +++ /dev/null @@ -1,403 +0,0 @@ -# -# Gramps - a GTK+/GNOME based genealogy program -# -# Copyright (C) 2000-2006 Donald N. Allingham -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -# - -"Import from GEDCOM" - -#------------------------------------------------------------------------- -# -# standard python modules -# -#------------------------------------------------------------------------- - -import re - -#------------------------------------------------------------------------- -# -# GRAMPS modules -# -#------------------------------------------------------------------------- - -from GrampsDbUtils import (personalConstantEvents, personalConstantAttributes, - familyConstantEvents) -import _GedcomTokens as GedcomTokens -import gen.lib -from DateHandler._DateParser import DateParser - -#------------------------------------------------------------------------ -# -# Set up logging -# -#------------------------------------------------------------------------ -import logging -LOG = logging.getLogger(".GedcomImport") - -#------------------------------------------------------------------------- -# -# constants # -#------------------------------------------------------------------------- - -GED2GRAMPS = {} -for __val, __key in personalConstantEvents.iteritems(): - if __key != "": - GED2GRAMPS[__key] = __val - -for __val, __key in familyConstantEvents.iteritems(): - if __key != "": - GED2GRAMPS[__key] = __val - -GED2ATTR = {} -for __val, __key in personalConstantAttributes.iteritems(): - if __key != "": - GED2ATTR[__key] = __val - -#------------------------------------------------------------------------- -# -# -# -#------------------------------------------------------------------------- - -MOD = re.compile(r"\s*(INT|EST|CAL)\s+(.*)$") -CAL = re.compile(r"\s*(ABT|BEF|AFT)?\s*@#D?([^@]+)@\s*(.*)$") -RANGE = re.compile(r"\s*BET\s+@#D?([^@]+)@\s*(.*)\s+AND\s+@#D?([^@]+)@\s*(.*)$") -SPAN = re.compile(r"\s*FROM\s+@#D?([^@]+)@\s*(.*)\s+TO\s+@#D?([^@]+)@\s*(.*)$") - -CALENDAR_MAP = { - "FRENCH R" : gen.lib.Date.CAL_FRENCH, - "JULIAN" : gen.lib.Date.CAL_JULIAN, - "HEBREW" : gen.lib.Date.CAL_HEBREW, -} - -QUALITY_MAP = { - 'CAL' : gen.lib.Date.QUAL_CALCULATED, - 'INT' : gen.lib.Date.QUAL_CALCULATED, - 'EST' : gen.lib.Date.QUAL_ESTIMATED, -} - -SEX_MAP = { - 'F' : gen.lib.Person.FEMALE, - 'M' : gen.lib.Person.MALE, -} - -#----------------------------------------------------------------------- -# -# -# -#----------------------------------------------------------------------- -class GedcomDateParser(DateParser): - - month_to_int = { - 'jan' : 1, 'feb' : 2, 'mar' : 3, 'apr' : 4, - 'may' : 5, 'jun' : 6, 'jul' : 7, 'aug' : 8, - 'sep' : 9, 'oct' : 10, 'nov' : 11, 'dec' : 12, - } - -#----------------------------------------------------------------------- -# -# GedLine - represents a tokenized version of a GEDCOM line -# -#----------------------------------------------------------------------- -class GedLine(object): - """ - GedLine is a class the represents a GEDCOM line. The form of a GEDCOM line - is: - - - - This gets parsed into - - Line Number, Level, Token Value, Token Text, and Data - - Data is dependent on the context the Token Value. For most of tokens, - this is just a text string. However, for certain tokens where we know - the context, we can provide some value. The current parsed tokens are: - - TOKEN_DATE - gen.lib.Date - TOKEN_SEX - gen.lib.Person gender item - TOEKN_UKNOWN - Check to see if this is a known event - """ - - def __init__(self, data): - """ - If the level is 0, then this is a top level instance. In this case, - we may find items in the form of: - - @ID@ - - If this is not the top level, we check the MAP_DATA array to see if - there is a conversion function for the data. - """ - self.line = data[4] - self.level = data[0] - self.token = data[1] - self.token_text = data[3].strip() - self.data = data[2] - - if self.level == 0: - if self.token_text and self.token_text[0] == '@' \ - and self.token_text[-1] == '@': - self.token = GedcomTokens.TOKEN_ID - self.token_text = self.token_text[1:-1] - self.data = self.data.strip() - else: - func = MAP_DATA.get(self.token) - if func: - func(self) - - def calc_sex(self): - """ - Converts the data field to a gen.lib token indicating the gender - """ - try: - self.data = SEX_MAP.get(self.data.strip()[0], gen.lib.Person.UNKNOWN) - except: - self.data = gen.lib.Person.UNKNOWN - - def calc_date(self): - """ - Converts the data field to a gen.lib.Date object - """ - self.data = extract_date(self.data) - - def calc_unknown(self): - """ - Checks to see if the token maps a known GEDCOM event. If so, we - change the type from UNKNOWN to TOKEN_GEVENT (gedcom event), and - the data is assigned to the associated GRAMPS EventType - """ - token = GED2GRAMPS.get(self.token_text) - if token: - event = gen.lib.Event() - event.set_description(self.data) - event.set_type(token) - self.token = GedcomTokens.TOKEN_GEVENT - self.data = event - else: - token = GED2ATTR.get(self.token_text) - if token: - attr = gen.lib.Attribute() - attr.set_value(self.data) - attr.set_type(token) - self.token = GedcomTokens.TOKEN_ATTR - self.data = attr - - def calc_note(self): - gid = self.data.strip() - if len(gid) > 2 and gid[0] == '@' and gid[-1] == '@': - self.token = GedcomTokens.TOKEN_RNOTE - self.data = gid[1:-1] - - def calc_nchi(self): - attr = gen.lib.Attribute() - attr.set_value(self.data) - attr.set_type(gen.lib.AttributeType.NUM_CHILD) - self.data = attr - self.token = GedcomTokens.TOKEN_ATTR - - def calc_attr(self): - attr = gen.lib.Attribute() - attr.set_value(self.data) - attr.set_type((gen.lib.AttributeType.CUSTOM, self.token_text)) - self.data = attr - self.token = GedcomTokens.TOKEN_ATTR - - def __repr__(self): - return "%d: %d (%d:%s) %s" % (self.line, self.level, self.token, - self.token_text, self.data) - -#------------------------------------------------------------------------- -# -# MAP_DATA - kept as a separate table, so that it is static, and does not -# have to be initialized every time in the GedLine constructor -# -#------------------------------------------------------------------------- -MAP_DATA = { - GedcomTokens.TOKEN_UNKNOWN : GedLine.calc_unknown, - GedcomTokens.TOKEN_DATE : GedLine.calc_date, - GedcomTokens.TOKEN_SEX : GedLine.calc_sex, - GedcomTokens.TOKEN_NOTE : GedLine.calc_note, - GedcomTokens.TOKEN_NCHI : GedLine.calc_nchi, - GedcomTokens.TOKEN__STAT : GedLine.calc_attr, - GedcomTokens.TOKEN__UID : GedLine.calc_attr, - GedcomTokens.TOKEN_AFN : GedLine.calc_attr, - } - -#------------------------------------------------------------------------- -# -# extract_date -# -#------------------------------------------------------------------------- - -DATE_CNV = GedcomDateParser() - -def extract_date(text): - """ - Converts the specified text to a gen.lib.Date object. - """ - dateobj = gen.lib.Date() - - text = text.replace('BET ABT','EST BET') # Horrible hack for importing - # illegal GEDCOM from - # Apple Macintosh Classic - # 'Gene' program - - try: - # extract out the MOD line - match = MOD.match(text) - if match: - (mod, text) = match.groups() - qual = QUALITY_MAP.get(mod, gen.lib.Date.QUAL_NONE) - else: - qual = gen.lib.Date.QUAL_NONE - - # parse the range if we match, if so, return - match = RANGE.match(text) - if match: - (cal1, data1, cal2, data2) = match.groups() - - cal = CALENDAR_MAP.get(cal1, gen.lib.Date.CAL_GREGORIAN) - - start = DATE_CNV.parse(data1) - stop = DATE_CNV.parse(data2) - dateobj.set(gen.lib.Date.QUAL_NONE, gen.lib.Date.MOD_RANGE, cal, - start.get_start_date() + stop.get_start_date()) - dateobj.set_quality(qual) - return dateobj - - # parse a span if we match - match = SPAN.match(text) - if match: - (cal1, data1, cal2, data2) = match.groups() - - cal = CALENDAR_MAP.get(cal1, gen.lib.Date.CAL_GREGORIAN) - - start = DATE_CNV.parse(data1) - stop = DATE_CNV.parse(data2) - dateobj.set(gen.lib.Date.QUAL_NONE, gen.lib.Date.MOD_SPAN, cal, - start.get_start_date() + stop.get_start_date()) - dateobj.set_quality(qual) - return dateobj - - match = CAL.match(text) - if match: - (abt, cal, data) = match.groups() - if abt: - dateobj = DATE_CNV.parse("%s %s" % (abt, data)) - else: - dateobj = DATE_CNV.parse(data) - dateobj.set_calendar(CALENDAR_MAP.get(cal, - gen.lib.Date.CAL_GREGORIAN)) - dateobj.set_quality(qual) - return dateobj - - dateobj = DATE_CNV.parse(text) - dateobj.set_quality(qual) - return dateobj - - # FIXME: explain where/why an IOError might arise - # and also: is such a long try-clause needed - # having this fallback invites "what about other exceptions?" - except IOError: - # fallback strategy (evidently) - return DATE_CNV.set_text(text) - -#------------------------------------------------------------------------- -# -# Reader - serves as the lexical analysis engine -# -#------------------------------------------------------------------------- -class Reader(object): - - def __init__(self, ifile): - self.ifile = ifile - self.current_list = [] - self.eof = False - self.cnv = None - self.cnt = 0 - self.index = 0 - self.func_map = { - GedcomTokens.TOKEN_CONT : self.__fix_token_cont, - GedcomTokens.TOKEN_CONC : self.__fix_token_conc, - } - - def readline(self): - if len(self.current_list) <= 1 and not self.eof: - self.__readahead() - try: - return GedLine(self.current_list.pop()) - except: - return None - - def __fix_token_cont(self, data): - line = self.current_list[0] - new_value = line[2] + '\n' + data[2] - self.current_list[0] = (line[0], line[1], new_value, line[3], line[4]) - - def __fix_token_conc(self, data): - line = self.current_list[0] - if len(line[2]) == 4: - # This deals with lines of the form - # 0 @@ NOTE - # 1 CONC - # The previous line contains only a tag and no data so concat a - # space to separate the new line from the tag. This prevents the - # first letter of the new line being lost later - # in _GedcomParse.__parse_record - new_value = line[2] + ' ' + data[2] - else: - new_value = line[2] + data[2] - self.current_list[0] = (line[0], line[1], new_value, line[3], line[4]) - - def __readahead(self): - while len(self.current_list) < 5: - line = self.ifile.readline() - self.index += 1 - if not line: - self.eof = True - return - - try: - # According to the GEDCOM 5.5 standard, - # Chapter 1 subsection Grammar - #"leading whitespace preceeding a GEDCOM line should be ignored" - # We will also strip the terminator which is any combination - # of carriage_return and line_feed - line = line.lstrip(' ').rstrip('\n\r') - # split into level+delim+rest - line = line.partition(' ') - level = int(line[0]) - # there should only be one space after the level, - # but we can ignore more, - # then split into tag+delim+line_value - # or xfef_id+delim+rest - line = line[2].lstrip(' ').partition(' ') - tag = line[0] - line_value = line[2] - except: - continue - - token = GedcomTokens.TOKENS.get(tag, GedcomTokens.TOKEN_UNKNOWN) - data = (level, token, line_value, tag, self.index) - - func = self.func_map.get(data[1]) - if func: - func(data) - else: - self.current_list.insert(0, data) - diff --git a/src/GrampsDbUtils/_GedcomParse.py b/src/GrampsDbUtils/_GedcomParse.py index f0527d7f4..3b3b63c51 100644 --- a/src/GrampsDbUtils/_GedcomParse.py +++ b/src/GrampsDbUtils/_GedcomParse.py @@ -44,7 +44,7 @@ cynical, one might believe that the commercial programs were trying to make it difficult to transfer your data to another application. This parser takes a different approach to parsing a GEDCOM file. The first -state, GedcomLex, reads lines from the file, and does some basic lexical +state, Lexer, reads lines from the file, and does some basic lexical analysis on each line (actually several lines, since it automatically combines CONT and CONC tagged lines). Each logical line returned to this parser contains: @@ -92,6 +92,7 @@ import re import time import codecs from gettext import gettext as _ +from xml.parsers.expat import ParserCreate #------------------------------------------------------------------------ # @@ -107,18 +108,18 @@ LOG = logging.getLogger(".GedcomImport") # #------------------------------------------------------------------------- import Errors +import const import gen.lib from BasicUtils import UpdateCallback import Mime import LdsUtils import Utils from ansel_utf8 import ansel_to_utf8 +from DateHandler._DateParser import DateParser from _GedcomTokens import * -import _GedcomInfo as GedcomInfo import _GedcomUtils as GedcomUtils -import _GedcomLex as GedcomLex from gen.db.dbconst import EVENT_KEY @@ -139,6 +140,31 @@ TRUNC_MSG = _("Your GEDCOM file is corrupted. " # constants # #------------------------------------------------------------------------- +ADOPT_NONE = 0 +ADOPT_EVENT = 1 +ADOPT_FTW = 2 +ADOPT_LEGACY = 3 +ADOPT_PEDI = 4 +ADOPT_STD = 5 +CONC_OK = 0 +CONC_BROKEN = 1 +ALT_NAME_NONE = 0 +ALT_NAME_STD = 1 +ALT_NAME_ALIAS = 2 +ALT_NAME_AKA = 3 +ALT_NAME_EVENT_AKA = 4 +ALT_NAME_UALIAS = 5 +CALENDAR_NO = 0 +CALENDAR_YES = 1 +OBJE_NO = 0 +OBJE_YES = 1 +PREFIX_NO = 0 +PREFIX_YES = 1 +RESIDENCE_ADDR = 0 +RESIDENCE_PLAC = 1 +SOURCE_REFS_NO = 0 +SOURCE_REFS_YES = 1 + TYPE_BIRTH = gen.lib.ChildRefType() TYPE_ADOPT = gen.lib.ChildRefType(gen.lib.ChildRefType.ADOPTED) TYPE_FOSTER = gen.lib.ChildRefType(gen.lib.ChildRefType.FOSTER) @@ -192,22 +218,132 @@ MEDIA_MAP = { 'video' : gen.lib.SourceMediaType.VIDEO, } +#------------------------------------------------------------------------- +# +# Integer to GEDCOM tag mappings for constants +# +#------------------------------------------------------------------------- +CALENDAR_MAP = { + "FRENCH R" : gen.lib.Date.CAL_FRENCH, + "JULIAN" : gen.lib.Date.CAL_JULIAN, + "HEBREW" : gen.lib.Date.CAL_HEBREW, +} + +QUALITY_MAP = { + 'CAL' : gen.lib.Date.QUAL_CALCULATED, + 'INT' : gen.lib.Date.QUAL_CALCULATED, + 'EST' : gen.lib.Date.QUAL_ESTIMATED, +} + +SEX_MAP = { + 'F' : gen.lib.Person.FEMALE, + 'M' : gen.lib.Person.MALE, +} + +familyConstantEvents = { + gen.lib.EventType.ANNULMENT : "ANUL", + gen.lib.EventType.DIV_FILING : "DIVF", + gen.lib.EventType.DIVORCE : "DIV", + gen.lib.EventType.CENSUS : "CENS", + gen.lib.EventType.ENGAGEMENT : "ENGA", + gen.lib.EventType.MARR_BANNS : "MARB", + gen.lib.EventType.MARR_CONTR : "MARC", + gen.lib.EventType.MARR_LIC : "MARL", + gen.lib.EventType.MARR_SETTL : "MARS", + gen.lib.EventType.MARRIAGE : "MARR" + } + +personalConstantEvents = { + gen.lib.EventType.ADOPT : "ADOP", + gen.lib.EventType.ADULT_CHRISTEN : "CHRA", + gen.lib.EventType.BIRTH : "BIRT", + gen.lib.EventType.DEATH : "DEAT", + gen.lib.EventType.BAPTISM : "BAPM", + gen.lib.EventType.BAR_MITZVAH : "BARM", + gen.lib.EventType.BAS_MITZVAH : "BASM", + gen.lib.EventType.BLESS : "BLES", + gen.lib.EventType.BURIAL : "BURI", + gen.lib.EventType.CAUSE_DEATH : "CAUS", + gen.lib.EventType.ORDINATION : "ORDN", + gen.lib.EventType.CENSUS : "CENS", + gen.lib.EventType.CHRISTEN : "CHR" , + gen.lib.EventType.CONFIRMATION : "CONF", + gen.lib.EventType.CREMATION : "CREM", + gen.lib.EventType.DEGREE : "_DEG", + gen.lib.EventType.DIV_FILING : "DIVF", + gen.lib.EventType.EDUCATION : "EDUC", + gen.lib.EventType.ELECTED : "", + gen.lib.EventType.EMIGRATION : "EMIG", + gen.lib.EventType.FIRST_COMMUN : "FCOM", + gen.lib.EventType.GRADUATION : "GRAD", + gen.lib.EventType.MED_INFO : "_MDCL", + gen.lib.EventType.MILITARY_SERV : "_MILT", + gen.lib.EventType.NATURALIZATION : "NATU", + gen.lib.EventType.NOB_TITLE : "TITL", + gen.lib.EventType.NUM_MARRIAGES : "NMR", + gen.lib.EventType.IMMIGRATION : "IMMI", + gen.lib.EventType.OCCUPATION : "OCCU", + gen.lib.EventType.PROBATE : "PROB", + gen.lib.EventType.PROPERTY : "PROP", + gen.lib.EventType.RELIGION : "RELI", + gen.lib.EventType.RESIDENCE : "RESI", + gen.lib.EventType.RETIREMENT : "RETI", + gen.lib.EventType.WILL : "WILL", + } + +familyConstantAttributes = { + gen.lib.AttributeType.NUM_CHILD : "NCHI", + } + +personalConstantAttributes = { + gen.lib.AttributeType.CASTE : "CAST", + gen.lib.AttributeType.DESCRIPTION : "DSCR", + gen.lib.AttributeType.ID : "IDNO", + gen.lib.AttributeType.NATIONAL : "NATI", + gen.lib.AttributeType.NUM_CHILD : "NCHI", + gen.lib.AttributeType.SSN : "SSN", + } + +#------------------------------------------------------------------------- +# +# Gedcom to int constants +# +#------------------------------------------------------------------------- +lds_status = { + "BIC" : gen.lib.LdsOrd.STATUS_BIC, + "CANCELED" : gen.lib.LdsOrd.STATUS_CANCELED, + "CHILD" : gen.lib.LdsOrd.STATUS_CHILD, + "CLEARED" : gen.lib.LdsOrd.STATUS_CLEARED, + "COMPLETED": gen.lib.LdsOrd.STATUS_COMPLETED, + "DNS" : gen.lib.LdsOrd.STATUS_DNS, + "INFANT" : gen.lib.LdsOrd.STATUS_INFANT, + "PRE-1970" : gen.lib.LdsOrd.STATUS_PRE_1970, + "QUALIFIED": gen.lib.LdsOrd.STATUS_QUALIFIED, + "DNS/CAN" : gen.lib.LdsOrd.STATUS_DNS_CAN, + "STILLBORN": gen.lib.LdsOrd.STATUS_STILLBORN, + "SUBMITTED": gen.lib.LdsOrd.STATUS_SUBMITTED, + "UNCLEARED": gen.lib.LdsOrd.STATUS_UNCLEARED, + } + + #------------------------------------------------------------------------- # # GEDCOM events to GRAMPS events conversion # #------------------------------------------------------------------------- -GED_2_GRAMPS = {} -for _val, _key in GedcomInfo.personalConstantEvents.iteritems(): - #_key = GedcomInfo.personalConstantEvents[_val] - if _key != "": - GED_2_GRAMPS[_key] = _val +GED_TO_GRAMPS_EVENT = {} +for __val, __key in personalConstantEvents.iteritems(): + if __key != "": + GED_TO_GRAMPS_EVENT[__key] = __val -GED_2_FAMILY = {} -for _val, _key in GedcomInfo.familyConstantEvents.iteritems(): - #_key = GedcomInfo.familyConstantEvents[_val] - if _key != "": - GED_2_FAMILY[_key] = _val +for __val, __key in familyConstantEvents.iteritems(): + if __key != "": + GED_TO_GRAMPS_EVENT[__key] = __val + +GED_TO_GRAMPS_ATTR = {} +for __val, __key in personalConstantAttributes.iteritems(): + if __key != "": + GED_TO_GRAMPS_ATTR[__key] = __val #------------------------------------------------------------------------- # @@ -218,6 +354,10 @@ NOTE_RE = re.compile(r"\s*\d+\s+\@(\S+)\@\s+NOTE(.*)$") CONT_RE = re.compile(r"\s*\d+\s+CONT\s?(.*)$") CONC_RE = re.compile(r"\s*\d+\s+CONC\s?(.*)$") PERSON_RE = re.compile(r"\s*\d+\s+\@(\S+)\@\s+INDI(.*)$") +MOD = re.compile(r"\s*(INT|EST|CAL)\s+(.*)$") +CAL = re.compile(r"\s*(ABT|BEF|AFT)?\s*@#D?([^@]+)@\s*(.*)$") +RANGE = re.compile(r"\s*BET\s+@#D?([^@]+)@\s*(.*)\s+AND\s+@#D?([^@]+)@\s*(.*)$") +SPAN = re.compile(r"\s*FROM\s+@#D?([^@]+)@\s*(.*)\s+TO\s+@#D?([^@]+)@\s*(.*)$") #------------------------------------------------------------------------- # @@ -238,6 +378,502 @@ def find_from_handle(gramps_id, table): table[gramps_id] = intid return intid +#----------------------------------------------------------------------- +# +# GedcomDateParser +# +#----------------------------------------------------------------------- +class GedcomDateParser(DateParser): + + month_to_int = { + 'jan' : 1, 'feb' : 2, 'mar' : 3, 'apr' : 4, + 'may' : 5, 'jun' : 6, 'jul' : 7, 'aug' : 8, + 'sep' : 9, 'oct' : 10, 'nov' : 11, 'dec' : 12, + } + +DATE_CNV = GedcomDateParser() + +def extract_date(text): + """ + Converts the specified text to a gen.lib.Date object. + """ + dateobj = gen.lib.Date() + + text = text.replace('BET ABT','EST BET') # Horrible hack for importing + # illegal GEDCOM from + # Apple Macintosh Classic + # 'Gene' program + + try: + # extract out the MOD line + match = MOD.match(text) + if match: + (mod, text) = match.groups() + qual = QUALITY_MAP.get(mod, gen.lib.Date.QUAL_NONE) + else: + qual = gen.lib.Date.QUAL_NONE + + # parse the range if we match, if so, return + match = RANGE.match(text) + if match: + (cal1, data1, cal2, data2) = match.groups() + + cal = CALENDAR_MAP.get(cal1, gen.lib.Date.CAL_GREGORIAN) + + start = DATE_CNV.parse(data1) + stop = DATE_CNV.parse(data2) + dateobj.set(gen.lib.Date.QUAL_NONE, gen.lib.Date.MOD_RANGE, cal, + start.get_start_date() + stop.get_start_date()) + dateobj.set_quality(qual) + return dateobj + + # parse a span if we match + match = SPAN.match(text) + if match: + (cal1, data1, cal2, data2) = match.groups() + + cal = CALENDAR_MAP.get(cal1, gen.lib.Date.CAL_GREGORIAN) + + start = DATE_CNV.parse(data1) + stop = DATE_CNV.parse(data2) + dateobj.set(gen.lib.Date.QUAL_NONE, gen.lib.Date.MOD_SPAN, cal, + start.get_start_date() + stop.get_start_date()) + dateobj.set_quality(qual) + return dateobj + + match = CAL.match(text) + if match: + (abt, cal, data) = match.groups() + if abt: + dateobj = DATE_CNV.parse("%s %s" % (abt, data)) + else: + dateobj = DATE_CNV.parse(data) + dateobj.set_calendar(CALENDAR_MAP.get(cal, + gen.lib.Date.CAL_GREGORIAN)) + dateobj.set_quality(qual) + return dateobj + + dateobj = DATE_CNV.parse(text) + dateobj.set_quality(qual) + return dateobj + + # FIXME: explain where/why an IOError might arise + # and also: is such a long try-clause needed + # having this fallback invites "what about other exceptions?" + except IOError: + # fallback strategy (evidently) + return DATE_CNV.set_text(text) + +#------------------------------------------------------------------------- +# +# Lexer - serves as the lexical analysis engine +# +#------------------------------------------------------------------------- +class Lexer(object): + + def __init__(self, ifile): + self.ifile = ifile + self.current_list = [] + self.eof = False + self.cnv = None + self.cnt = 0 + self.index = 0 + self.func_map = { + TOKEN_CONT : self.__fix_token_cont, + TOKEN_CONC : self.__fix_token_conc, + } + + def readline(self): + if len(self.current_list) <= 1 and not self.eof: + self.__readahead() + try: + return GedLine(self.current_list.pop()) + except: + return None + + def __fix_token_cont(self, data): + line = self.current_list[0] + new_value = line[2] + '\n' + data[2] + self.current_list[0] = (line[0], line[1], new_value, line[3], line[4]) + + def __fix_token_conc(self, data): + line = self.current_list[0] + if len(line[2]) == 4: + # This deals with lines of the form + # 0 @@ NOTE + # 1 CONC + # The previous line contains only a tag and no data so concat a + # space to separate the new line from the tag. This prevents the + # first letter of the new line being lost later + # in _GedcomParse.__parse_record + new_value = line[2] + ' ' + data[2] + else: + new_value = line[2] + data[2] + self.current_list[0] = (line[0], line[1], new_value, line[3], line[4]) + + def __readahead(self): + while len(self.current_list) < 5: + line = self.ifile.readline() + self.index += 1 + if not line: + self.eof = True + return + + try: + # According to the GEDCOM 5.5 standard, + # Chapter 1 subsection Grammar + #"leading whitespace preceeding a GEDCOM line should be ignored" + # We will also strip the terminator which is any combination + # of carriage_return and line_feed + line = line.lstrip(' ').rstrip('\n\r') + # split into level+delim+rest + line = line.partition(' ') + level = int(line[0]) + # there should only be one space after the level, + # but we can ignore more, + # then split into tag+delim+line_value + # or xfef_id+delim+rest + line = line[2].lstrip(' ').partition(' ') + tag = line[0] + line_value = line[2] + except: + continue + + token = TOKENS.get(tag, TOKEN_UNKNOWN) + data = (level, token, line_value, tag, self.index) + + func = self.func_map.get(data[1]) + if func: + func(data) + else: + self.current_list.insert(0, data) + +#----------------------------------------------------------------------- +# +# GedLine - represents a tokenized version of a GEDCOM line +# +#----------------------------------------------------------------------- +class GedLine(object): + """ + GedLine is a class the represents a GEDCOM line. The form of a GEDCOM line + is: + + + + This gets parsed into + + Line Number, Level, Token Value, Token Text, and Data + + Data is dependent on the context the Token Value. For most of tokens, + this is just a text string. However, for certain tokens where we know + the context, we can provide some value. The current parsed tokens are: + + TOKEN_DATE - gen.lib.Date + TOKEN_SEX - gen.lib.Person gender item + TOEKN_UKNOWN - Check to see if this is a known event + """ + def __init__(self, data): + """ + If the level is 0, then this is a top level instance. In this case, + we may find items in the form of: + + @ID@ + + If this is not the top level, we check the MAP_DATA array to see if + there is a conversion function for the data. + """ + self.line = data[4] + self.level = data[0] + self.token = data[1] + self.token_text = data[3].strip() + self.data = data[2] + + if self.level == 0: + if self.token_text and self.token_text[0] == '@' \ + and self.token_text[-1] == '@': + self.token = TOKEN_ID + self.token_text = self.token_text[1:-1] + self.data = self.data.strip() + else: + func = _MAP_DATA.get(self.token) + if func: + func(self) + + def calc_sex(self): + """ + Converts the data field to a gen.lib token indicating the gender + """ + try: + self.data = SEX_MAP.get(self.data.strip()[0], gen.lib.Person.UNKNOWN) + except: + self.data = gen.lib.Person.UNKNOWN + + def calc_date(self): + """ + Converts the data field to a gen.lib.Date object + """ + self.data = extract_date(self.data) + + def calc_unknown(self): + """ + Checks to see if the token maps a known GEDCOM event. If so, we + change the type from UNKNOWN to TOKEN_GEVENT (gedcom event), and + the data is assigned to the associated GRAMPS EventType + """ + token = GED_TO_GRAMPS_EVENT.get(self.token_text) + if token: + event = gen.lib.Event() + event.set_description(self.data) + event.set_type(token) + self.token = TOKEN_GEVENT + self.data = event + else: + token = GED_TO_GRAMPS_ATTR.get(self.token_text) + if token: + attr = gen.lib.Attribute() + attr.set_value(self.data) + attr.set_type(token) + self.token = TOKEN_ATTR + self.data = attr + + def calc_note(self): + gid = self.data.strip() + if len(gid) > 2 and gid[0] == '@' and gid[-1] == '@': + self.token = TOKEN_RNOTE + self.data = gid[1:-1] + + def calc_nchi(self): + attr = gen.lib.Attribute() + attr.set_value(self.data) + attr.set_type(gen.lib.AttributeType.NUM_CHILD) + self.data = attr + self.token = TOKEN_ATTR + + def calc_attr(self): + attr = gen.lib.Attribute() + attr.set_value(self.data) + attr.set_type((gen.lib.AttributeType.CUSTOM, self.token_text)) + self.data = attr + self.token = TOKEN_ATTR + + def __repr__(self): + return "%d: %d (%d:%s) %s" % (self.line, self.level, self.token, + self.token_text, self.data) + +_MAP_DATA = { + TOKEN_UNKNOWN : GedLine.calc_unknown, + TOKEN_DATE : GedLine.calc_date, + TOKEN_SEX : GedLine.calc_sex, + TOKEN_NOTE : GedLine.calc_note, + TOKEN_NCHI : GedLine.calc_nchi, + TOKEN__STAT : GedLine.calc_attr, + TOKEN__UID : GedLine.calc_attr, + TOKEN_AFN : GedLine.calc_attr, + } + +#------------------------------------------------------------------------- +# +# GedcomDescription +# +#------------------------------------------------------------------------- +class GedcomDescription(object): + def __init__(self, name): + self.name = name + self.dest = "" + self.adopt = ADOPT_STD + self.conc = CONC_OK + self.altname = ALT_NAME_STD + self.cal = CALENDAR_YES + self.obje = OBJE_YES + self.resi = RESIDENCE_ADDR + self.source_refs = SOURCE_REFS_YES + self.gramps2tag_map = {} + self.tag2gramps_map = {} + self.prefix = PREFIX_YES + self.endl = "\n" + + def set_dest(self,val): + self.dest = val + + def get_dest(self): + return self.dest + + def set_endl(self,val): + self.endl = val.replace('\\r','\r').replace('\\n','\n') + + def get_endl(self): + return self.endl + + def set_adopt(self,val): + self.adopt = val + + def get_adopt(self): + return self.adopt + + def set_prefix(self,val): + self.prefix=val + + def get_prefix(self): + return self.prefix + + def set_conc(self,val): + self.conc = val + + def get_conc(self): + return self.conc + + def set_alt_name(self,val): + self.altname = val + + def get_alt_name(self): + return self.altname + + def set_alt_calendar(self,val): + self.cal = val + + def get_alt_calendar(self): + return self.cal + + def set_obje(self,val): + self.obje = val + + def get_obje(self): + return self.obje + + def set_resi(self,val): + self.resi = val + + def get_resi(self): + return self.resi + + def set_source_refs(self,val): + self.source_refs = val + + def get_source_refs(self): + return self.source_refs + + def add_tag_value(self,tag,value): + self.gramps2tag_map[value] = tag + self.tag2gramps_map[tag] = value + + def gramps2tag(self,key): + if key in self.gramps2tag_map: + return self.gramps2tag_map[key] + return "" + + def tag2gramps(self,key): + if key in self.tag2gramps_map: + return self.tag2gramps_map[key] + return key + +#------------------------------------------------------------------------- +# +# GedcomInfoDB +# +#------------------------------------------------------------------------- +class GedcomInfoDB(object): + def __init__(self): + self.map = {} + + self.standard = GedcomDescription("GEDCOM 5.5 standard") + self.standard.set_dest("GEDCOM 5.5") + + try: + filepath = os.path.join(const.DATA_DIR,"gedcom.xml") + f = open(filepath.encode('iso8859-1'),"r") + except: + return + + parser = GedInfoParser(self) + parser.parse(f) + f.close() + + def add_description(self, name, obj): + self.map[name] = obj + + def get_description(self, name): + if name in self.map: + return self.map[name] + return self.standard + + def get_from_source_tag(self, name): + for k, val in self.map.iteritems(): + if val.get_dest() == name: + return val + return self.standard + + def get_name_list(self): + return ["GEDCOM 5.5 standard"] + sorted(self.map) + +#------------------------------------------------------------------------- +# +# GedInfoParser +# +#------------------------------------------------------------------------- +class GedInfoParser(object): + def __init__(self,parent): + self.parent = parent + self.current = None + + def parse(self,file): + p = ParserCreate() + p.StartElementHandler = self.startElement + p.ParseFile(file) + + def startElement(self,tag,attrs): + if tag == "target": + name = attrs['name'] + self.current = GedcomDescription(name) + self.parent.add_description(name,self.current) + elif tag == "dest": + self.current.set_dest(attrs['val']) + elif tag == "endl": + self.current.set_endl(attrs['val']) + elif tag == "adopt": + val = attrs['val'] + if val == 'none': + self.current.set_adopt(ADOPT_NONE) + elif val == 'event': + self.current.set_adopt(ADOPT_EVENT) + elif val == 'ftw': + self.current.set_adopt(ADOPT_FTW) + elif val == 'legacy': + self.current.set_adopt(ADOPT_LEGACY) + elif val == 'pedigree': + self.current.set_adopt(ADOPT_PEDI) + elif tag == "conc": + if attrs['val'] == 'broken': + self.current.set_conc(CONC_BROKEN) + elif tag == "alternate_names": + val = attrs['val'] + if val == 'none': + self.current.set_alt_name(ALT_NAME_NONE) + elif val == 'event_aka': + self.current.set_alt_name(ALT_NAME_EVENT_AKA) + elif val == 'alias': + self.current.set_alt_name(ALT_NAME_ALIAS) + elif val == 'aka': + self.current.set_alt_name(ALT_NAME_AKA) + elif val == '_alias': + self.current.set_alt_name(ALT_NAME_UALIAS) + elif tag == "calendars": + if attrs['val'] == 'no': + self.current.set_alt_calendar(CALENDAR_NO) + elif tag == "event": + self.current.add_tag_value(attrs['tag'],attrs['value']) + elif tag == "object_support": + if attrs['val'] == 'no': + self.current.set_obje(OBJE_NO) + elif tag == "prefix": + if attrs['val'] == 'no': + self.current.set_obje(PREFIX_NO) + elif tag == "residence": + if attrs['val'] == 'place': + self.current.set_resi(RESIDENCE_PLAC) + elif tag == "source_refs": + if attrs['val'] == 'no': + self.current.set_source_refs(SOURCE_REFS_NO) + #------------------------------------------------------------------------- # # File Readers @@ -330,7 +966,7 @@ class GedcomParser(UpdateCallback): self.place_parser = GedcomUtils.PlaceParser() self.inline_srcs = {} self.media_map = {} - self.gedmap = GedcomInfo.GedcomInfoDB() + self.gedmap = GedcomInfoDB() self.gedsource = self.gedmap.get_from_source_tag('GEDCOM 5.5') self.use_def_src = default_source if self.use_def_src: @@ -853,7 +1489,7 @@ class GedcomParser(UpdateCallback): else: rdr = AnsiReader(ifile) - self.lexer = GedcomLex.Reader(rdr) + self.lexer = Lexer(rdr) self.filename = filename self.backoff = False @@ -861,7 +1497,7 @@ class GedcomParser(UpdateCallback): self.geddir = os.path.dirname(fullpath) self.error_count = 0 - amap = GedcomInfo.personalConstantAttributes + amap = personalConstantAttributes self.attrs = amap.values() self.gedattr = {} @@ -2095,7 +2731,7 @@ class GedcomParser(UpdateCallback): @param state: The current state @type state: CurrentState """ - status = GedcomInfo.lds_status.get(line.data, gen.lib.LdsOrd.STATUS_NONE) + status = lds_status.get(line.data, gen.lib.LdsOrd.STATUS_NONE) state.lds_ord.set_status(status) def __person_famc(self, line, state): @@ -2794,8 +3430,8 @@ class GedcomParser(UpdateCallback): @type state: CurrentState """ if state.event.get_type().is_custom(): - if line.data in GED_2_GRAMPS: - name = gen.lib.EventType(GED_2_GRAMPS[line.data]) + if line.data in GED_TO_GRAMPS_EVENT: + name = gen.lib.EventType(GED_TO_GRAMPS_EVENT[line.data]) else: val = self.gedsource.tag2gramps(line.data) if val: @@ -2809,8 +3445,7 @@ class GedcomParser(UpdateCallback): state.event.set_type(name) else: try: - if line.data not in GED_2_GRAMPS and \ - line.data not in GED_2_FAMILY and \ + if line.data not in GED_TO_GRAMPS_EVENT and \ line.data[0] != 'Y': state.event.set_description(line.data) except IndexError: @@ -3769,8 +4404,8 @@ class GedcomParser(UpdateCallback): @type state: CurrentState """ if state.attr.get_type() == "": - if line.data in GED_2_GRAMPS: - name = GED_2_GRAMPS[line.data] + if line.data in GED_TO_GRAMPS_EVENT: + name = GED_TO_GRAMPS_EVENT[line.data] else: val = self.gedsource.tag2gramps(line.data) if val: @@ -4229,7 +4864,7 @@ class GedcomParser(UpdateCallback): elif line.token == TOKEN_TIME: tstr = line.data elif line.token == TOKEN_DATE: - #GedcomLex converted already to Date object + #Lexer converted already to Date object dobj = line.data elif line.token == TOKEN_NOTE: self.__skip_subordinate_levels(level+1) diff --git a/src/GrampsDbUtils/__init__.py b/src/GrampsDbUtils/__init__.py index 8c1796cd3..b5315614b 100644 --- a/src/GrampsDbUtils/__init__.py +++ b/src/GrampsDbUtils/__init__.py @@ -23,9 +23,6 @@ """ This package implements additions to the the GrampsDb database. """ - -from _GedcomInfo import (personalConstantEvents, familyConstantEvents, - personalConstantAttributes) from _GedcomParse import GedcomParser diff --git a/src/plugins/export/ExportGedcom.py b/src/plugins/export/ExportGedcom.py index ff3f0ebce..8537158de 100644 --- a/src/plugins/export/ExportGedcom.py +++ b/src/plugins/export/ExportGedcom.py @@ -41,7 +41,7 @@ import time import gen.lib from gen.lib.date import make_gedcom_date, MONTH import const -import GrampsDbUtils._GedcomInfo as GedcomInfo +import GrampsDbUtils._GedcomParse as libgedcom import Errors from ExportOptions import WriterOptionBox import BasicUtils @@ -607,7 +607,7 @@ class GedcomWriter(BasicUtils.UpdateCallback): if etype in (gen.lib.EventType.BIRTH, gen.lib.EventType.DEATH): continue - val = GedcomInfo.personalConstantEvents.get(etype, "").strip() + val = libgedcom.personalConstantEvents.get(etype, "").strip() if val and val.strip(): if val in NEEDS_PARAMETER: @@ -685,7 +685,7 @@ class GedcomWriter(BasicUtils.UpdateCallback): for attr in attr_list: attr_type = int(attr.get_type()) - name = GedcomInfo.personalConstantAttributes.get(attr_type) + name = libgedcom.personalConstantAttributes.get(attr_type) key = str(attr.get_type()) value = attr.get_value().strip().replace('\r', ' ') @@ -876,7 +876,7 @@ class GedcomWriter(BasicUtils.UpdateCallback): for event_ref in [ ref for ref in family.get_event_ref_list()]: event = self.dbase.get_event_from_handle(event_ref.ref) etype = int(event.get_type()) - val = GedcomInfo.familyConstantEvents.get(etype) + val = libgedcom.familyConstantEvents.get(etype) if val: if event_has_subordinate_data(event, event_ref): @@ -931,7 +931,7 @@ class GedcomWriter(BasicUtils.UpdateCallback): for attr in attr_list: attr_type = int(attr.get_type()) - name = GedcomInfo.familyConstantAttributes.get(attr_type) + name = libgedcom.familyConstantAttributes.get(attr_type) value = attr.get_value().replace('\r', ' ') if attr_type in ("AFN", "RFN", "REFN", "_UID"):