2007-02-24 Don Allingham <don@gramps-project.org>
* src/DisplayTabs/_NoteModel.py: added * src/DisplayTabs/_NoteTab.py: support new list * src/GrampsDbUtils/_GedcomParse.py: enhancements to parsing * src/GrampsDbUtils/_ReadGedcom.py: handle encoding properly * src/GrampsDbUtils/_GedcomChar.py: new encoding interface * src/GrampsDbUtils/_GedcomLex.py: cleanup svn: r8231
This commit is contained in:
parent
a8ad1dcdcf
commit
706916af15
@ -1,3 +1,11 @@
|
||||
2007-02-24 Don Allingham <don@gramps-project.org>
|
||||
* src/DisplayTabs/_NoteModel.py: added
|
||||
* src/DisplayTabs/_NoteTab.py: support new list
|
||||
* src/GrampsDbUtils/_GedcomParse.py: enhancements to parsing
|
||||
* src/GrampsDbUtils/_ReadGedcom.py: handle encoding properly
|
||||
* src/GrampsDbUtils/_GedcomChar.py: new encoding interface
|
||||
* src/GrampsDbUtils/_GedcomLex.py: cleanup
|
||||
|
||||
2007-02-24 Brian Matherly <brian@gramps-project.org>
|
||||
* src/docgen/SvgDrawDoc.py.py: Fix XML error in draw_text.
|
||||
|
||||
|
46
src/DisplayTabs/_NoteModel.py
Normal file
46
src/DisplayTabs/_NoteModel.py
Normal file
@ -0,0 +1,46 @@
|
||||
#
|
||||
# Gramps - a GTK+/GNOME based genealogy program
|
||||
#
|
||||
# Copyright (C) 2000-2006 Donald N. Allingham
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
#
|
||||
|
||||
# $Id: _NoteModel.py 7068 2006-07-24 23:06:49Z rshura $
|
||||
|
||||
#-------------------------------------------------------------------------
|
||||
#
|
||||
# GTK libraries
|
||||
#
|
||||
#-------------------------------------------------------------------------
|
||||
import gtk
|
||||
|
||||
#-------------------------------------------------------------------------
|
||||
#
|
||||
# NoteModel
|
||||
#
|
||||
#-------------------------------------------------------------------------
|
||||
class NoteModel(gtk.ListStore):
|
||||
|
||||
def __init__(self, note_list, db):
|
||||
gtk.ListStore.__init__(self, str, str, object)
|
||||
self.db = db
|
||||
for handle in note_list:
|
||||
note = self.db.get_note_from_handle(handle)
|
||||
self.append(row=[
|
||||
str(note.get_type()),
|
||||
note.get().replace('\n', ' ')[:80],
|
||||
handle,
|
||||
])
|
@ -27,169 +27,60 @@
|
||||
#-------------------------------------------------------------------------
|
||||
from gettext import gettext as _
|
||||
|
||||
#-------------------------------------------------------------------------
|
||||
#
|
||||
# GTK libraries
|
||||
#
|
||||
#-------------------------------------------------------------------------
|
||||
import gtk
|
||||
import pango
|
||||
|
||||
#-------------------------------------------------------------------------
|
||||
#
|
||||
# GRAMPS classes
|
||||
#
|
||||
#-------------------------------------------------------------------------
|
||||
import Spell
|
||||
from _GrampsTab import GrampsTab
|
||||
from DisplayTabs import log
|
||||
from MarkupText import EditorBuffer
|
||||
from _NoteModel import NoteModel
|
||||
from _EmbeddedList import EmbeddedList
|
||||
|
||||
#-------------------------------------------------------------------------
|
||||
#
|
||||
# NoteTab
|
||||
#
|
||||
#-------------------------------------------------------------------------
|
||||
class NoteTab(GrampsTab):
|
||||
class NoteTab(EmbeddedList):
|
||||
|
||||
def __init__(self, dbstate, uistate, track, note_list, title=_('Note')):
|
||||
self.note_list = note_list
|
||||
self.original = note_list[:]
|
||||
_HANDLE_COL = 2
|
||||
|
||||
GrampsTab.__init__(self, dbstate, uistate, track, title)
|
||||
self.show_all()
|
||||
_column_names = [
|
||||
(_('Type'), 0, 100),
|
||||
(_('Preview'), 1, 200),
|
||||
]
|
||||
|
||||
def get_icon_name(self):
|
||||
return 'gramps-notes'
|
||||
def __init__(self, dbstate, uistate, track, data):
|
||||
self.data = data
|
||||
EmbeddedList.__init__(self, dbstate, uistate, track,
|
||||
_("Notes"), NoteModel)
|
||||
|
||||
def _update_label(self, *obj):
|
||||
cc = self.buf.get_char_count()
|
||||
if cc == 0 and not self.empty:
|
||||
self.empty = True
|
||||
self._set_label()
|
||||
elif cc != 0 and self.empty:
|
||||
self.empty = False
|
||||
self._set_label()
|
||||
def get_editor(self):
|
||||
pass
|
||||
|
||||
def is_empty(self):
|
||||
"""
|
||||
Indicates if the tab contains any data. This is used to determine
|
||||
how the label should be displayed.
|
||||
"""
|
||||
return self.buf.get_char_count() == 0
|
||||
def get_user_values(self):
|
||||
return []
|
||||
|
||||
def build_interface(self):
|
||||
BUTTON = [(_('Italic'),gtk.STOCK_ITALIC,'<i>i</i>','<Control>I'),
|
||||
(_('Bold'),gtk.STOCK_BOLD,'<b>b</b>','<Control>B'),
|
||||
(_('Underline'),gtk.STOCK_UNDERLINE,'<u>u</u>','<Control>U'),
|
||||
#('Separator', None, None, None),
|
||||
]
|
||||
def get_data(self):
|
||||
return self.data
|
||||
|
||||
vbox = gtk.VBox()
|
||||
def column_order(self):
|
||||
return ((1, 0), (1, 1))
|
||||
|
||||
self.text = gtk.TextView()
|
||||
self.text.set_accepts_tab(True)
|
||||
# Accelerator dictionary used for formatting shortcuts
|
||||
# key: tuple(key, modifier)
|
||||
# value: widget, to emit 'activate' signal on
|
||||
self.accelerator = {}
|
||||
self.text.connect('key-press-event', self._on_key_press_event)
|
||||
def add_button_clicked(self, obj):
|
||||
pass
|
||||
|
||||
self.flowed = gtk.RadioButton(None, _('Flowed'))
|
||||
self.format = gtk.RadioButton(self.flowed, _('Formatted'))
|
||||
|
||||
# if self.note_obj and self.note_obj.get_format():
|
||||
# self.format.set_active(True)
|
||||
# self.text.set_wrap_mode(gtk.WRAP_NONE)
|
||||
# else:
|
||||
# self.flowed.set_active(True)
|
||||
# self.text.set_wrap_mode(gtk.WRAP_WORD)
|
||||
self.spellcheck = Spell.Spell(self.text)
|
||||
|
||||
self.flowed.connect('toggled', self.flow_changed)
|
||||
|
||||
scroll = gtk.ScrolledWindow()
|
||||
scroll.set_policy(gtk.POLICY_AUTOMATIC, gtk.POLICY_AUTOMATIC)
|
||||
scroll.add(self.text)
|
||||
# FIXME: is this signal called at all
|
||||
scroll.connect('focus-out-event', self.update)
|
||||
|
||||
vbox.pack_start(scroll, True)
|
||||
vbox.set_spacing(6)
|
||||
vbox.set_border_width(6)
|
||||
|
||||
hbox = gtk.HBox()
|
||||
hbox.set_spacing(12)
|
||||
hbox.set_border_width(6)
|
||||
hbox.pack_start(self.flowed, False)
|
||||
hbox.pack_start(self.format, False)
|
||||
vbox.pack_start(hbox, False)
|
||||
self.pack_start(vbox, True)
|
||||
|
||||
self.buf = EditorBuffer()
|
||||
self.text.set_buffer(self.buf)
|
||||
tooltips = gtk.Tooltips()
|
||||
for tip, stock, markup, accel in BUTTON:
|
||||
if markup:
|
||||
button = gtk.ToggleButton()
|
||||
image = gtk.Image()
|
||||
image.set_from_stock(stock, gtk.ICON_SIZE_MENU)
|
||||
button.set_image(image)
|
||||
button.set_relief(gtk.RELIEF_NONE)
|
||||
tooltips.set_tip(button, tip)
|
||||
self.buf.setup_widget_from_xml(button, markup)
|
||||
key, mod = gtk.accelerator_parse(accel)
|
||||
self.accelerator[(key, mod)] = button
|
||||
hbox.pack_start(button, False)
|
||||
else:
|
||||
hbox.pack_start(gtk.VSeparator(), False)
|
||||
hbox.pack_start(gtk.Label(_('Additional Notes:')),False)
|
||||
self.menu = gtk.ComboBox()
|
||||
hbox.pack_start(self.menu, True)
|
||||
|
||||
# if self.note_obj:
|
||||
# self.empty = False
|
||||
# self.buf.set_text(self.note_obj.get(markup=True))
|
||||
# log.debug("Text: %s" % self.buf.get_text())
|
||||
# else:
|
||||
# self.empty = True
|
||||
|
||||
self.buf.connect('changed', self.update)
|
||||
self.buf.connect_after('apply-tag', self.update)
|
||||
self.buf.connect_after('remove-tag', self.update)
|
||||
def add_callback(self, name):
|
||||
self.get_data().append(name)
|
||||
self.changed = True
|
||||
self.rebuild()
|
||||
|
||||
def _on_key_press_event(self, widget, event):
|
||||
log.debug("Key %s (%d) was pressed on %s" %
|
||||
(gtk.gdk.keyval_name(event.keyval), event.keyval, widget))
|
||||
key = event.keyval
|
||||
mod = event.state
|
||||
if self.accelerator.has_key((key, mod)):
|
||||
self.accelerator[(key, mod)].emit('activate')
|
||||
return True
|
||||
def edit_button_clicked(self, obj):
|
||||
note = self.get_selected()
|
||||
if note:
|
||||
print note
|
||||
|
||||
def update(self, obj, *args):
|
||||
# if self.note_obj:
|
||||
# start = self.buf.get_start_iter()
|
||||
# stop = self.buf.get_end_iter()
|
||||
# text = self.buf.get_text(start, stop)
|
||||
# self.note_obj.set(text)
|
||||
# else:
|
||||
# print "NOTE OBJ DOES NOT EXIST"
|
||||
self._update_label(obj)
|
||||
return False
|
||||
|
||||
def flow_changed(self, obj):
|
||||
if obj.get_active():
|
||||
self.text.set_wrap_mode(gtk.WRAP_WORD)
|
||||
# self.note_obj.set_format(0)
|
||||
else:
|
||||
self.text.set_wrap_mode(gtk.WRAP_NONE)
|
||||
# self.note_obj.set_format(1)
|
||||
|
||||
def rebuild(self):
|
||||
self._set_label()
|
||||
|
||||
def cancel(self):
|
||||
pass
|
||||
# self.note_obj.unserialize(self.original)
|
||||
def edit_callback(self, name):
|
||||
self.changed = True
|
||||
self.rebuild()
|
||||
|
76
src/GrampsDbUtils/_GedcomChar.py
Normal file
76
src/GrampsDbUtils/_GedcomChar.py
Normal file
@ -0,0 +1,76 @@
|
||||
#
|
||||
# Gramps - a GTK+/GNOME based genealogy program
|
||||
#
|
||||
# Copyright (C) 2000-2005 Donald N. Allingham
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
#
|
||||
|
||||
from ansel_utf8 import ansel_to_utf8
|
||||
|
||||
class BaseReader:
|
||||
def __init__(self, ifile, encoding):
|
||||
self.ifile = ifile
|
||||
self.enc = encoding
|
||||
|
||||
def reset(self):
|
||||
self.ifile.seek(0)
|
||||
|
||||
def readline(self):
|
||||
return unicode(self.ifile.readline(),
|
||||
encoding=self.enc,
|
||||
errors='replace').strip('\n\r')
|
||||
|
||||
class UTF8Reader(BaseReader):
|
||||
|
||||
def __init__(self, ifile):
|
||||
BaseReader.__init__(self, ifile, 'utf8')
|
||||
|
||||
def reset(self):
|
||||
self.ifile.seek(0)
|
||||
data = self.ifile.read(3)
|
||||
if data != "\xef\xbb\xbf":
|
||||
self.ifile.seek(0)
|
||||
|
||||
def readline(self):
|
||||
return unicode(self.ifile.readline(),
|
||||
encoding=self.enc,
|
||||
errors='replace').strip('\n\r')
|
||||
|
||||
class UTF16Reader(BaseReader):
|
||||
|
||||
def __init__(self, ifile):
|
||||
BaseReader.__init__(self, ifile, 'utf16')
|
||||
|
||||
def reset(self):
|
||||
self.ifile.seek(0)
|
||||
data = self.ifile.read(2)
|
||||
if data != "\xff\xfe":
|
||||
self.ifile.seek(0)
|
||||
|
||||
class AnsiReader(BaseReader):
|
||||
|
||||
def __init__(self, ifile):
|
||||
BaseReader.__init__(self, ifile, 'latin1')
|
||||
|
||||
class AnselReader(BaseReader):
|
||||
|
||||
def __init__(self, ifile):
|
||||
BaseReader.__init__(self, ifile, "")
|
||||
|
||||
def readline(self):
|
||||
return ansel_to_utf8(self.ifile.readline().strip('\n\r'))
|
||||
|
||||
|
@ -22,21 +22,22 @@
|
||||
|
||||
"Import from GEDCOM"
|
||||
|
||||
__revision__ = "$Revision: $"
|
||||
__author__ = "Don Allingham"
|
||||
|
||||
#-------------------------------------------------------------------------
|
||||
#
|
||||
# standard python modules
|
||||
#
|
||||
#-------------------------------------------------------------------------
|
||||
|
||||
import re
|
||||
import string
|
||||
from gettext import gettext as _
|
||||
|
||||
#-------------------------------------------------------------------------
|
||||
#
|
||||
# GRAMPS modules
|
||||
#
|
||||
#-------------------------------------------------------------------------
|
||||
from ansel_utf8 import ansel_to_utf8
|
||||
|
||||
from _GedcomInfo import *
|
||||
from _GedcomTokens import *
|
||||
@ -45,60 +46,25 @@ from DateHandler._DateParser import DateParser
|
||||
|
||||
#-------------------------------------------------------------------------
|
||||
#
|
||||
# latin/utf8 conversions
|
||||
#
|
||||
# constants #
|
||||
#-------------------------------------------------------------------------
|
||||
|
||||
def utf8_to_latin(msg):
|
||||
"""
|
||||
Converts a string from unicode to iso-8859-1. If any illegal characters
|
||||
are found, they are converted to ?
|
||||
|
||||
@param msg: unicode string to convert
|
||||
@type level: unicode
|
||||
@return: Returns the string, converted to a ISO-8859-1 object
|
||||
@rtype: str
|
||||
"""
|
||||
return msg.encode('iso-8859-1', 'replace')
|
||||
|
||||
def latin_to_utf8(s):
|
||||
if type(s) == unicode:
|
||||
return s
|
||||
else:
|
||||
return unicode(s,'iso-8859-1')
|
||||
|
||||
def nocnv(s):
|
||||
return unicode(s,errors='replace')
|
||||
|
||||
#-------------------------------------------------------------------------
|
||||
#
|
||||
# constants
|
||||
#
|
||||
#-------------------------------------------------------------------------
|
||||
ANSEL = 1
|
||||
UNICODE = 2
|
||||
UPDATE = 25
|
||||
|
||||
_transtable = string.maketrans('','')
|
||||
_delc = _transtable[0:8] + _transtable[10:31]
|
||||
_transtable2 = _transtable[0:128] + ('?' * 128)
|
||||
|
||||
ged2gramps = {}
|
||||
GED2GRAMPS = {}
|
||||
for _val in personalConstantEvents.keys():
|
||||
_key = personalConstantEvents[_val]
|
||||
if _key != "":
|
||||
ged2gramps[_key] = _val
|
||||
GED2GRAMPS[_key] = _val
|
||||
|
||||
for _val in familyConstantEvents.keys():
|
||||
_key = familyConstantEvents[_val]
|
||||
if _key != "":
|
||||
ged2gramps[_key] = _val
|
||||
GED2GRAMPS[_key] = _val
|
||||
|
||||
ged2attr = {}
|
||||
GED2ATTR = {}
|
||||
for _val in personalConstantAttributes.keys():
|
||||
_key = personalConstantAttributes[_val]
|
||||
if _key != "":
|
||||
ged2attr[_key] = _val
|
||||
GED2ATTR[_key] = _val
|
||||
|
||||
#-------------------------------------------------------------------------
|
||||
#
|
||||
@ -106,26 +72,24 @@ for _val in personalConstantAttributes.keys():
|
||||
#
|
||||
#-------------------------------------------------------------------------
|
||||
|
||||
intRE = re.compile(r"\s*(\d+)\s*$")
|
||||
modRegexp = re.compile(r"\s*(INT|EST|CAL)\s+(.*)$")
|
||||
calRegexp = re.compile(r"\s*(ABT|BEF|AFT)?\s*@#D([^@]+)@\s*(.*)$")
|
||||
rangeRegexp = re.compile(r"\s*BET\s+@#D([^@]+)@\s*(.*)\s+AND\s+@#D([^@]+)@\s*(.*)$")
|
||||
spanRegexp = re.compile(r"\s*FROM\s+@#D([^@]+)@\s*(.*)\s+TO\s+@#D([^@]+)@\s*(.*)$")
|
||||
intRegexp = re.compile(r"\s*INT\s+([^(]+)\((.*)\)$")
|
||||
MOD = re.compile(r"\s*(INT|EST|CAL)\s+(.*)$")
|
||||
CAL = re.compile(r"\s*(ABT|BEF|AFT)?\s*@#D([^@]+)@\s*(.*)$")
|
||||
RANGE = re.compile(r"\s*BET\s+@#D([^@]+)@\s*(.*)\s+AND\s+@#D([^@]+)@\s*(.*)$")
|
||||
SPAN = re.compile(r"\s*FROM\s+@#D([^@]+)@\s*(.*)\s+TO\s+@#D([^@]+)@\s*(.*)$")
|
||||
|
||||
_calendar_map = {
|
||||
CALENDAR_MAP = {
|
||||
"FRENCH R" : RelLib.Date.CAL_FRENCH,
|
||||
"JULIAN" : RelLib.Date.CAL_JULIAN,
|
||||
"HEBREW" : RelLib.Date.CAL_HEBREW,
|
||||
}
|
||||
|
||||
_quality_map = {
|
||||
QUALITY_MAP = {
|
||||
'CAL' : RelLib.Date.QUAL_CALCULATED,
|
||||
'INT' : RelLib.Date.QUAL_CALCULATED,
|
||||
'EST' : RelLib.Date.QUAL_ESTIMATED,
|
||||
}
|
||||
|
||||
_sex_map = {
|
||||
SEX_MAP = {
|
||||
'F' : RelLib.Person.FEMALE,
|
||||
'M' : RelLib.Person.MALE,
|
||||
}
|
||||
@ -185,20 +149,21 @@ class GedLine:
|
||||
self.data = data[2]
|
||||
|
||||
if self.level == 0:
|
||||
if self.token_text and self.token_text[0] == '@' and self.token_text[-1] == '@':
|
||||
if self.token_text and self.token_text[0] == '@' \
|
||||
and self.token_text[-1] == '@':
|
||||
self.token = TOKEN_ID
|
||||
self.token_text = self.token_text[1:-1]
|
||||
self.data = self.data.strip()
|
||||
else:
|
||||
f = MAP_DATA.get(self.token)
|
||||
if f:
|
||||
f(self)
|
||||
func = MAP_DATA.get(self.token)
|
||||
if func:
|
||||
func(self)
|
||||
|
||||
def calc_sex(self):
|
||||
"""
|
||||
Converts the data field to a RelLib token indicating the gender
|
||||
"""
|
||||
self.data = _sex_map.get(self.data.strip(),RelLib.Person.UNKNOWN)
|
||||
self.data = SEX_MAP.get(self.data.strip(), RelLib.Person.UNKNOWN)
|
||||
|
||||
def calc_date(self):
|
||||
"""
|
||||
@ -212,12 +177,12 @@ class GedLine:
|
||||
change the type from UNKNOWN to TOKEN_GEVENT (gedcom event), and
|
||||
the data is assigned to the associated GRAMPS EventType
|
||||
"""
|
||||
token = ged2gramps.get(self.token_text)
|
||||
token = GED2GRAMPS.get(self.token_text)
|
||||
if token:
|
||||
self.token = TOKEN_GEVENT
|
||||
self.data = token
|
||||
else:
|
||||
token = ged2attr.get(self.token_text)
|
||||
token = GED2ATTR.get(self.token_text)
|
||||
if token:
|
||||
attr = RelLib.Attribute()
|
||||
attr.set_value(self.data)
|
||||
@ -226,10 +191,10 @@ class GedLine:
|
||||
self.data = attr
|
||||
|
||||
def calc_note(self):
|
||||
d = self.data.strip()
|
||||
if len(d) > 2 and d[0] == '@' and d[-1] == '@':
|
||||
gid = self.data.strip()
|
||||
if len(gid) > 2 and gid[0] == '@' and gid[-1] == '@':
|
||||
self.token = TOKEN_RNOTE
|
||||
self.data = d[1:-1]
|
||||
self.data = gid[1:-1]
|
||||
|
||||
def calc_nchi(self):
|
||||
attr = RelLib.Attribute()
|
||||
@ -245,10 +210,6 @@ class GedLine:
|
||||
self.data = attr
|
||||
self.token = TOKEN_ATTR
|
||||
|
||||
def calc_lds(self):
|
||||
self.data = _
|
||||
self.token = TOKEN_ATTR
|
||||
|
||||
def __repr__(self):
|
||||
return "%d: %d (%d:%s) %s" % (self.line, self.level, self.token,
|
||||
self.token_text, self.data)
|
||||
@ -276,7 +237,7 @@ MAP_DATA = {
|
||||
#
|
||||
#-------------------------------------------------------------------------
|
||||
|
||||
_dp = GedcomDateParser()
|
||||
DATE_CNV = GedcomDateParser()
|
||||
|
||||
def extract_date(text):
|
||||
"""
|
||||
@ -285,54 +246,55 @@ def extract_date(text):
|
||||
dateobj = RelLib.Date()
|
||||
try:
|
||||
# extract out the MOD line
|
||||
match = modRegexp.match(text)
|
||||
match = MOD.match(text)
|
||||
if match:
|
||||
(mod, text) = match.groups()
|
||||
qual = _quality_map.get(mod, RelLib.Date.QUAL_NONE)
|
||||
qual = QUALITY_MAP.get(mod, RelLib.Date.QUAL_NONE)
|
||||
else:
|
||||
qual = RelLib.Date.QUAL_NONE
|
||||
|
||||
# parse the range if we match, if so, return
|
||||
match = rangeRegexp.match(text)
|
||||
match = RANGE.match(text)
|
||||
if match:
|
||||
(cal1,data1,cal2,data2) = match.groups()
|
||||
(cal1, data1, cal2, data2) = match.groups()
|
||||
|
||||
cal = _calendar_map.get(cal1, RelLib.Date.CAL_GREGORIAN)
|
||||
cal = CALENDAR_MAP.get(cal1, RelLib.Date.CAL_GREGORIAN)
|
||||
|
||||
start = _dp.parse(data1)
|
||||
stop = _dp.parse(data2)
|
||||
start = DATE_CNV.parse(data1)
|
||||
stop = DATE_CNV.parse(data2)
|
||||
dateobj.set(RelLib.Date.QUAL_NONE, RelLib.Date.MOD_RANGE, cal,
|
||||
start.get_start_date() + stop.get_start_date())
|
||||
dateobj.set_quality(qual)
|
||||
return dateobj
|
||||
|
||||
# parse a span if we match
|
||||
match = spanRegexp.match(text)
|
||||
match = SPAN.match(text)
|
||||
if match:
|
||||
(cal1,data1,cal2,data2) = match.groups()
|
||||
(cal1, data1, cal2, data2) = match.groups()
|
||||
|
||||
cal = _calendar_map.get(cal1, RelLib.Date.CAL_GREGORIAN)
|
||||
cal = CALENDAR_MAP.get(cal1, RelLib.Date.CAL_GREGORIAN)
|
||||
|
||||
start = _dp.parse(data1)
|
||||
stop = _dp.parse(data2)
|
||||
start = DATE_CNV.parse(data1)
|
||||
stop = DATE_CNV.parse(data2)
|
||||
dateobj.set(RelLib.Date.QUAL_NONE, RelLib.Date.MOD_SPAN, cal,
|
||||
start.get_start_date() + stop.get_start_date())
|
||||
dateobj.set_quality(qual)
|
||||
return dateobj
|
||||
|
||||
match = calRegexp.match(text)
|
||||
match = CAL.match(text)
|
||||
if match:
|
||||
(abt,cal,data) = match.groups()
|
||||
dateobj = _dp.parse("%s %s" % (abt, data))
|
||||
dateobj.set_calendar(_calendar_map.get(cal, RelLib.Date.CAL_GREGORIAN))
|
||||
(abt, cal, data) = match.groups()
|
||||
dateobj = DATE_CNV.parse("%s %s" % (abt, data))
|
||||
dateobj.set_calendar(CALENDAR_MAP.get(cal,
|
||||
RelLib.Date.CAL_GREGORIAN))
|
||||
dateobj.set_quality(qual)
|
||||
return dateobj
|
||||
|
||||
dateobj = _dp.parse(text)
|
||||
dateobj = DATE_CNV.parse(text)
|
||||
dateobj.set_quality(qual)
|
||||
return dateobj
|
||||
except IOError:
|
||||
return self.dp.set_text(text)
|
||||
return DATE_CNV.set_text(text)
|
||||
|
||||
#-------------------------------------------------------------------------
|
||||
#
|
||||
@ -341,8 +303,8 @@ def extract_date(text):
|
||||
#-------------------------------------------------------------------------
|
||||
class Reader:
|
||||
|
||||
def __init__(self, f):
|
||||
self.f = f
|
||||
def __init__(self, ifile):
|
||||
self.ifile = ifile
|
||||
self.current_list = []
|
||||
self.eof = False
|
||||
self.cnv = None
|
||||
@ -353,11 +315,7 @@ class Reader:
|
||||
TOKEN_CONC : self._fix_token_conc,
|
||||
}
|
||||
|
||||
def set_charset_fn(self,cnv):
|
||||
print "Character set changed", cnv
|
||||
self.cnv = cnv
|
||||
|
||||
def set_broken_conc(self,broken):
|
||||
def set_broken_conc(self, broken):
|
||||
self.func_map = {
|
||||
TOKEN_CONT : self._fix_token_cont,
|
||||
TOKEN_CONC : self._fix_token_broken_conc,
|
||||
@ -372,46 +330,39 @@ class Reader:
|
||||
return None
|
||||
|
||||
def _fix_token_cont(self, data):
|
||||
l = self.current_list[0]
|
||||
new_value = l[2]+'\n'+data[2]
|
||||
self.current_list[0] = (l[0], l[1], new_value, l[3], l[4])
|
||||
line = self.current_list[0]
|
||||
new_value = line[2]+'\n'+data[2]
|
||||
self.current_list[0] = (line[0], line[1], new_value, line[3], line[4])
|
||||
|
||||
def _fix_token_conc(self, data):
|
||||
l = self.current_list[0]
|
||||
new_value = l[2] + data[2]
|
||||
self.current_list[0] = (l[0], l[1], new_value, l[3], l[4])
|
||||
line = self.current_list[0]
|
||||
new_value = line[2] + data[2]
|
||||
self.current_list[0] = (line[0], line[1], new_value, line[3], line[4])
|
||||
|
||||
def _fix_token_broken_conc(self, data):
|
||||
l = self.current_list[0]
|
||||
new_value = u"%s %s" % (l[2], data[2])
|
||||
self.current_list[0] = (l[0], l[1], new_value, l[3], l[4])
|
||||
line = self.current_list[0]
|
||||
new_value = u"%s %s" % (line[2], data[2])
|
||||
self.current_list[0] = (line[0], line[1], new_value, line[3], line[4])
|
||||
|
||||
def readahead(self):
|
||||
while len(self.current_list) < 5:
|
||||
line = self.f.readline()
|
||||
line = self.ifile.readline()
|
||||
self.index += 1
|
||||
if not line:
|
||||
self.eof = True
|
||||
return
|
||||
|
||||
if self.cnv:
|
||||
try:
|
||||
line = self.cnv(line)
|
||||
except:
|
||||
line = self.cnv(line.translate(_transtable2))
|
||||
else:
|
||||
line = unicode(line,errors='replace')
|
||||
line = line.split(None, 2) + ['']
|
||||
|
||||
line = line.split(None,2) + ['']
|
||||
|
||||
val = line[2].rstrip('\r\n')
|
||||
val = line[2]
|
||||
|
||||
try:
|
||||
level = int(line[0])
|
||||
except:
|
||||
level = 0
|
||||
|
||||
data = (level, tokens.get(line[1], TOKEN_UNKNOWN), val, line[1], self.index)
|
||||
data = (level, tokens.get(line[1], TOKEN_UNKNOWN), val, line[1],
|
||||
self.index)
|
||||
|
||||
func = self.func_map.get(data[1])
|
||||
if func:
|
||||
@ -419,25 +370,3 @@ class Reader:
|
||||
else:
|
||||
self.current_list.insert(0, data)
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
|
||||
def run():
|
||||
print "Reading", sys.argv[1]
|
||||
a = Reader(sys.argv[1])
|
||||
while True:
|
||||
line = a.readline()
|
||||
print line
|
||||
if not line: break
|
||||
|
||||
# import Utils
|
||||
# Utils.profile(run)
|
||||
run()
|
||||
|
||||
print extract_date("20 JAN 2000")
|
||||
print extract_date("EST 20 JAN 2000")
|
||||
print extract_date("CAL 20 JAN 2000")
|
||||
print extract_date("ABT 20 JAN 2000")
|
||||
print extract_date("INT 20 JAN 2000")
|
||||
print extract_date("BET 20 JAN 2000 AND FEB 2000")
|
||||
print extract_date("FROM 20 JAN 2000 TO FEB 2000")
|
||||
|
@ -64,13 +64,11 @@ all tokens at the lower level.
|
||||
|
||||
For example:
|
||||
|
||||
|
||||
1 BIRT
|
||||
2 DATE 1 JAN 2000
|
||||
2 UKNOWN TAG
|
||||
3 NOTE DATA
|
||||
|
||||
|
||||
The function parsing the individual at level 1, would encounter the BIRT tag.
|
||||
It would look up the BIRT token in the table to see if a function as defined
|
||||
for this TOKEN, and pass control to this function. This function would then
|
||||
@ -81,7 +79,6 @@ the level 2 parser, which would then encounter the "UKNOWN" tag. Since this is
|
||||
not a valid token, it would not be in the table, and a function that would skip
|
||||
all lines until the next level 2 token is found (in this case, skipping the
|
||||
"3 NOTE DATA" line.
|
||||
|
||||
"""
|
||||
|
||||
__revision__ = "$Revision: $"
|
||||
@ -94,10 +91,8 @@ __author__ = "Don Allingham"
|
||||
#-------------------------------------------------------------------------
|
||||
import os
|
||||
import re
|
||||
import string
|
||||
import time
|
||||
from gettext import gettext as _
|
||||
import copy
|
||||
|
||||
#------------------------------------------------------------------------
|
||||
#
|
||||
@ -114,20 +109,19 @@ LOG = logging.getLogger(".GedcomImport")
|
||||
#-------------------------------------------------------------------------
|
||||
import Errors
|
||||
import RelLib
|
||||
from BasicUtils import NameDisplay
|
||||
from BasicUtils import NameDisplay, UpdateCallback
|
||||
import Utils
|
||||
import Mime
|
||||
import LdsUtils
|
||||
from ansel_utf8 import ansel_to_utf8
|
||||
|
||||
from _GedcomInfo import *
|
||||
from _GedcomTokens import *
|
||||
from _GedcomLex import Reader
|
||||
from _GedcomChar import *
|
||||
|
||||
import _GedcomUtils as GedcomUtils
|
||||
|
||||
from GrampsDb._GrampsDbConst import EVENT_KEY
|
||||
from BasicUtils import UpdateCallback
|
||||
|
||||
try:
|
||||
import Config
|
||||
@ -145,53 +139,14 @@ ADDR_RE = re.compile('(.+)([\n\r]+)(.+)\s*,(.+)\s+(\d+)\s*(.*)')
|
||||
ADDR2_RE = re.compile('(.+)([\n\r]+)(.+)\s*,(.+)\s+(\d+)')
|
||||
ADDR3_RE = re.compile('(.+)([\n\r]+)(.+)\s*,(.+)')
|
||||
|
||||
|
||||
TRUNC_MSG = _("Your GEDCOM file is corrupted. "
|
||||
"It appears to have been truncated.")
|
||||
|
||||
#-------------------------------------------------------------------------
|
||||
#
|
||||
# latin/utf8 conversions
|
||||
#
|
||||
#-------------------------------------------------------------------------
|
||||
|
||||
|
||||
def latin_to_utf8(msg):
|
||||
"""
|
||||
Converts a string from iso-8859-1 to unicode. If the string is already
|
||||
unicode, we do nothing.
|
||||
|
||||
@param msg: string to convert
|
||||
@type level: str
|
||||
@return: Returns the string, converted to a unicode object
|
||||
@rtype: unicode
|
||||
"""
|
||||
if type(msg) == unicode:
|
||||
return msg
|
||||
else:
|
||||
return unicode(msg, 'iso-8859-1')
|
||||
|
||||
def nocnv(msg):
|
||||
"""
|
||||
Null operation that makes sure that a unicode string remains a unicode
|
||||
string
|
||||
|
||||
@param msg: unicode to convert
|
||||
@type level: unicode
|
||||
@return: Returns the string, converted to a unicode object
|
||||
@rtype: unicode
|
||||
"""
|
||||
return unicode(msg)
|
||||
|
||||
#-------------------------------------------------------------------------
|
||||
#
|
||||
# constants
|
||||
#
|
||||
#-------------------------------------------------------------------------
|
||||
ANSEL = 1
|
||||
UNICODE = 2
|
||||
UPDATE = 25
|
||||
|
||||
TYPE_BIRTH = RelLib.ChildRefType()
|
||||
TYPE_ADOPT = RelLib.ChildRefType(RelLib.ChildRefType.ADOPTED)
|
||||
TYPE_FOSTER = RelLib.ChildRefType(RelLib.ChildRefType.FOSTER)
|
||||
@ -224,10 +179,6 @@ MIME_MAP = {
|
||||
EVENT_FAMILY_STR = _("%(event_name)s of %(family)s")
|
||||
EVENT_PERSON_STR = _("%(event_name)s of %(person)s")
|
||||
|
||||
TRANS_TABLE = string.maketrans('', '')
|
||||
DEL_CHARS = TRANS_TABLE[0:8] + TRANS_TABLE[10:31]
|
||||
TRANS_TABLE2 = TRANS_TABLE[0:128] + ('?' * 128)
|
||||
|
||||
FTW_BAD_PLACE = [
|
||||
RelLib.EventType.OCCUPATION,
|
||||
RelLib.EventType.RELIGION,
|
||||
@ -265,6 +216,7 @@ CONC_RE = re.compile(r"\s*\d+\s+CONC\s?(.*)$")
|
||||
PERSON_RE = re.compile(r"\s*\d+\s+\@(\S+)\@\s+INDI(.*)$")
|
||||
|
||||
class StageOne:
|
||||
|
||||
def __init__(self, ifile):
|
||||
self.ifile = ifile
|
||||
self.famc = {}
|
||||
@ -275,44 +227,47 @@ class StageOne:
|
||||
|
||||
def parse(self):
|
||||
current = ""
|
||||
|
||||
line = self.ifile.read(3)
|
||||
if line == "\xef\xbb":
|
||||
self.ifile.read(1)
|
||||
self.enc = "UTF8"
|
||||
else:
|
||||
self.ifile.seek(0)
|
||||
|
||||
for line in self.ifile:
|
||||
self.lcnt +=1
|
||||
|
||||
data = line.split(None,2) + ['']
|
||||
try:
|
||||
(level, key, value) = data[:3]
|
||||
value = value.strip()
|
||||
# convert the first value to an integer. We have to be a bit
|
||||
# careful here, since some GEDCOM files have garbage characters
|
||||
# at the front of the first file if they are unicode encoded.
|
||||
# So, if we have a failure to convert, check the last character
|
||||
# of the string, which shoul de a '0'
|
||||
try:
|
||||
level = int(level)
|
||||
except:
|
||||
level = int(level[-1])
|
||||
level = 0
|
||||
key = key.strip()
|
||||
except:
|
||||
raise Errors.GedcomError("Corrupted file at line %d" % self.lcnt)
|
||||
|
||||
if level == 0 and key[0] == '@':
|
||||
if value == "FAM":
|
||||
if value == ("FAM", "FAMILY") :
|
||||
current = key.strip()
|
||||
current = current[1:-1]
|
||||
elif value == "INDI":
|
||||
elif value == ("INDI", "INDIVIDUAL"):
|
||||
self.pcnt += 1
|
||||
elif key in ("HUSB", "WIFE") and value and value[0] == '@':
|
||||
elif key in ("HUSB", "HUSBAND", "WIFE") and value and value[0] == '@':
|
||||
value = value[1:-1]
|
||||
if self.fams.has_key(value):
|
||||
self.fams[value].append(current)
|
||||
else:
|
||||
self.fams[value] = [current]
|
||||
elif key == "CHIL" and value and value[0] == '@':
|
||||
elif key in ("CHIL", "CHILD") and value and value[0] == '@':
|
||||
value = value[1:-1]
|
||||
if self.famc.has_key(value):
|
||||
self.famc[value].append(current)
|
||||
else:
|
||||
self.famc[value] = [current]
|
||||
elif key == 'CHAR':
|
||||
elif key == 'CHAR' and not self.enc:
|
||||
self.enc = value
|
||||
|
||||
def get_famc_map(self):
|
||||
@ -322,7 +277,10 @@ class StageOne:
|
||||
return self.fams
|
||||
|
||||
def get_encoding(self):
|
||||
return self.enc
|
||||
return self.enc.upper()
|
||||
|
||||
def set_encoding(self, enc):
|
||||
self.enc = enc
|
||||
|
||||
def get_person_count(self):
|
||||
return self.pcnt
|
||||
@ -806,16 +764,20 @@ class GedcomParser(UpdateCallback):
|
||||
data = cursor.next()
|
||||
cursor.close()
|
||||
|
||||
self.lexer = Reader(ifile)
|
||||
enc = stage_one.get_encoding()
|
||||
|
||||
if enc == "ANSEL":
|
||||
rdr = AnselReader(ifile)
|
||||
elif enc in ("UTF-8", "UTF8"):
|
||||
rdr = UTF8Reader(ifile)
|
||||
elif enc in ("UTF-16", "UTF16", "UNICODE"):
|
||||
rdr = UTF16Reader(ifile)
|
||||
else:
|
||||
rdr = AnsiReader(ifile)
|
||||
|
||||
self.lexer = Reader(rdr)
|
||||
self.filename = filename
|
||||
self.backoff = False
|
||||
self.override = False
|
||||
#
|
||||
# if self.override != 0:
|
||||
# if self.override == 1:
|
||||
# self.lexer.set_charset_fn(ansel_to_utf8)
|
||||
# elif self.override == 2:
|
||||
# self.lexer.set_charset_fn(latin_to_utf8)
|
||||
|
||||
fullpath = os.path.normpath(os.path.abspath(filename))
|
||||
self.geddir = os.path.dirname(fullpath)
|
||||
@ -1064,9 +1026,6 @@ class GedcomParser(UpdateCallback):
|
||||
"""
|
||||
text = self.groups.line
|
||||
msg = _("Line %d was not understood, so it was ignored.") % text
|
||||
import traceback
|
||||
traceback.print_stack()
|
||||
print self.groups
|
||||
self.warn(msg)
|
||||
self.error_count += 1
|
||||
self.skip_subordinate_levels(level)
|
||||
@ -4039,11 +3998,8 @@ class GedcomParser(UpdateCallback):
|
||||
if genby == "GRAMPS":
|
||||
self.gedsource = self.gedmap.get_from_source_tag(line.data)
|
||||
self.lexer.set_broken_conc(self.gedsource.get_conc())
|
||||
elif line.token == TOKEN_CHAR and not self.override:
|
||||
if line.data == "ANSEL":
|
||||
self.lexer.set_charset_fn(ansel_to_utf8)
|
||||
elif line.data not in ("UNICODE","UTF-8","UTF8"):
|
||||
self.lexer.set_charset_fn(latin_to_utf8)
|
||||
elif line.token == TOKEN_CHAR:
|
||||
pass
|
||||
self.skip_subordinate_levels(2)
|
||||
elif line.token == TOKEN_GEDC:
|
||||
self.skip_subordinate_levels(2)
|
||||
|
@ -66,6 +66,7 @@ def importData(database, filename, callback=None, use_trans=False):
|
||||
dialog.destroy()
|
||||
else:
|
||||
code_set = None
|
||||
|
||||
import2(database, filename, callback, code_set, use_trans)
|
||||
|
||||
def import2(database, filename, callback, code_set, use_trans):
|
||||
@ -74,7 +75,10 @@ def import2(database, filename, callback, code_set, use_trans):
|
||||
ifile = open(filename,"rU")
|
||||
np = StageOne(ifile)
|
||||
np.parse()
|
||||
print np.get_encoding()
|
||||
|
||||
if code_set:
|
||||
np.set_encoding(code_set)
|
||||
|
||||
ifile.seek(0)
|
||||
gedparse = GedcomParser(database, ifile, filename, callback, np)
|
||||
except IOError, msg:
|
||||
@ -85,7 +89,6 @@ def import2(database, filename, callback, code_set, use_trans):
|
||||
_("%s could not be imported") % filename + "\n" + str(msg))
|
||||
return
|
||||
|
||||
|
||||
if database.get_number_of_people() == 0:
|
||||
use_trans = False
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user