2004-09-17 03:30:04 +00:00
|
|
|
#
|
|
|
|
# Gramps - a GTK+/GNOME based genealogy program
|
|
|
|
#
|
|
|
|
# Copyright (C) 2004 Donald N. Allingham
|
|
|
|
#
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License as published by
|
|
|
|
# the Free Software Foundation; either version 2 of the License, or
|
|
|
|
# (at your option) any later version.
|
|
|
|
#
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
|
|
# along with this program; if not, write to the Free Software
|
|
|
|
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
|
#
|
|
|
|
|
|
|
|
"""
|
|
|
|
U.S. English date parsing class. Serves as the base class for any localized
|
|
|
|
date parsing class.
|
|
|
|
"""
|
|
|
|
|
|
|
|
__author__ = "Donald N. Allingham"
|
|
|
|
__version__ = "$Revision$"
|
|
|
|
|
|
|
|
import re
|
2004-09-17 23:52:09 +00:00
|
|
|
import time
|
|
|
|
import locale
|
|
|
|
|
2004-09-17 03:30:04 +00:00
|
|
|
import Date
|
|
|
|
|
2004-09-17 23:52:09 +00:00
|
|
|
|
2004-09-17 03:30:04 +00:00
|
|
|
class DateParser:
|
|
|
|
"""
|
|
|
|
Converts a text string into a Date object. If the date cannot be
|
|
|
|
converted, the text string is assigned.
|
|
|
|
"""
|
2004-09-17 23:52:09 +00:00
|
|
|
|
|
|
|
# determine the code set returned by nl_langinfo
|
|
|
|
_codeset = locale.nl_langinfo(locale.CODESET)
|
|
|
|
|
2004-09-17 03:30:04 +00:00
|
|
|
month_to_int = {
|
2004-09-17 23:52:09 +00:00
|
|
|
unicode(locale.nl_langinfo(locale.MON_1),_codeset).lower() : 1,
|
|
|
|
unicode(locale.nl_langinfo(locale.ABMON_1),_codeset).lower() : 1,
|
|
|
|
unicode(locale.nl_langinfo(locale.MON_2),_codeset).lower() : 2,
|
|
|
|
unicode(locale.nl_langinfo(locale.ABMON_2),_codeset).lower() : 2,
|
|
|
|
unicode(locale.nl_langinfo(locale.MON_3),_codeset).lower() : 3,
|
|
|
|
unicode(locale.nl_langinfo(locale.ABMON_3),_codeset).lower() : 3,
|
|
|
|
unicode(locale.nl_langinfo(locale.MON_4),_codeset).lower() : 4,
|
|
|
|
unicode(locale.nl_langinfo(locale.ABMON_4),_codeset).lower() : 4,
|
|
|
|
unicode(locale.nl_langinfo(locale.MON_5),_codeset).lower() : 5,
|
|
|
|
unicode(locale.nl_langinfo(locale.ABMON_5),_codeset).lower() : 5,
|
|
|
|
unicode(locale.nl_langinfo(locale.MON_6),_codeset).lower() : 6,
|
|
|
|
unicode(locale.nl_langinfo(locale.ABMON_6),_codeset).lower() : 6,
|
|
|
|
unicode(locale.nl_langinfo(locale.MON_7),_codeset).lower() : 7,
|
|
|
|
unicode(locale.nl_langinfo(locale.ABMON_7),_codeset).lower() : 7,
|
|
|
|
unicode(locale.nl_langinfo(locale.MON_8),_codeset).lower() : 8,
|
|
|
|
unicode(locale.nl_langinfo(locale.ABMON_8),_codeset).lower() : 8,
|
|
|
|
unicode(locale.nl_langinfo(locale.MON_9),_codeset).lower() : 9,
|
|
|
|
unicode(locale.nl_langinfo(locale.ABMON_9),_codeset).lower() : 9,
|
|
|
|
unicode(locale.nl_langinfo(locale.MON_10),_codeset).lower() : 10,
|
|
|
|
unicode(locale.nl_langinfo(locale.ABMON_10),_codeset).lower(): 10,
|
|
|
|
unicode(locale.nl_langinfo(locale.MON_11),_codeset).lower() : 11,
|
|
|
|
unicode(locale.nl_langinfo(locale.ABMON_11),_codeset).lower(): 11,
|
|
|
|
unicode(locale.nl_langinfo(locale.MON_12),_codeset).lower() : 12,
|
|
|
|
unicode(locale.nl_langinfo(locale.ABMON_12),_codeset).lower(): 12,
|
|
|
|
}
|
2004-09-17 03:30:04 +00:00
|
|
|
|
|
|
|
modifier_to_int = {
|
|
|
|
'before' : Date.MOD_BEFORE,
|
|
|
|
'bef' : Date.MOD_BEFORE,
|
|
|
|
'bef.' : Date.MOD_BEFORE,
|
|
|
|
'after' : Date.MOD_AFTER,
|
|
|
|
'aft' : Date.MOD_AFTER,
|
|
|
|
'aft.' : Date.MOD_AFTER,
|
|
|
|
'about' : Date.MOD_ABOUT,
|
|
|
|
'abt.' : Date.MOD_ABOUT,
|
|
|
|
'abt' : Date.MOD_ABOUT,
|
|
|
|
'circa' : Date.MOD_ABOUT,
|
|
|
|
'c.' : Date.MOD_ABOUT,
|
|
|
|
'around' : Date.MOD_ABOUT,
|
|
|
|
}
|
|
|
|
|
|
|
|
quality_to_int = {
|
|
|
|
'estimated' : Date.QUAL_ESTIMATED,
|
|
|
|
'est.' : Date.QUAL_ESTIMATED,
|
|
|
|
'est' : Date.QUAL_ESTIMATED,
|
|
|
|
'calc.' : Date.QUAL_CALCULATED,
|
|
|
|
'calc' : Date.QUAL_CALCULATED,
|
|
|
|
'calculated' : Date.QUAL_CALCULATED,
|
|
|
|
}
|
|
|
|
|
2004-09-17 23:52:09 +00:00
|
|
|
_qual_str = '(' + '|'.join(
|
|
|
|
[ key.replace('.','\.') for key in quality_to_int.keys() ]
|
|
|
|
) + ')'
|
|
|
|
_mod_str = '(' + '|'.join(
|
|
|
|
[ key.replace('.','\.') for key in modifier_to_int.keys() ]
|
|
|
|
) + ')'
|
2004-09-18 04:11:48 +00:00
|
|
|
_mon_str = '(' + '|'.join(month_to_int.keys()) + ')'
|
2004-09-17 03:30:04 +00:00
|
|
|
|
|
|
|
_qual = re.compile("%s\s+(.*)" % _qual_str,re.IGNORECASE)
|
|
|
|
_span = re.compile("from\s+(.*)\s+to\s+(.*)",re.IGNORECASE)
|
|
|
|
_range = re.compile("(bet.|between)\s+(.*)\s+and\s+(.*)",re.IGNORECASE)
|
|
|
|
_modifier = re.compile('%s\s+(.*)' % _mod_str,re.IGNORECASE)
|
|
|
|
_text = re.compile('%s\s+(\d+)?\s*,?\s*((\d+)(/\d+)?)?' % _mon_str,re.IGNORECASE)
|
|
|
|
_text2 = re.compile('(\d+)?\s+?%s\s*((\d+)(/\d+)?)?' % _mon_str,re.IGNORECASE)
|
|
|
|
_numeric = re.compile("((\d+)[/\.])?((\d+)[/\.])?(\d+)")
|
|
|
|
_iso = re.compile("(\d+)-(\d+)-(\d+)")
|
|
|
|
|
|
|
|
def _get_int(self,val):
|
|
|
|
"""
|
|
|
|
Converts the string to an integer if the value is not None. If the
|
|
|
|
value is None, a zero is returned
|
|
|
|
"""
|
|
|
|
if val == None:
|
|
|
|
return 0
|
|
|
|
else:
|
|
|
|
return int(val)
|
|
|
|
|
|
|
|
def _parse_subdate(self,text):
|
|
|
|
"""
|
|
|
|
Converts only the date portion of a date.
|
|
|
|
"""
|
2004-09-17 23:52:09 +00:00
|
|
|
try:
|
|
|
|
value = time.strptime(text)
|
|
|
|
return (value[2],value[1],value[0],False)
|
|
|
|
except ValueError:
|
|
|
|
pass
|
|
|
|
|
2004-09-17 03:30:04 +00:00
|
|
|
match = self._text.match(text)
|
|
|
|
if match:
|
|
|
|
groups = match.groups()
|
|
|
|
if groups[0] == None:
|
|
|
|
m = 0
|
|
|
|
else:
|
|
|
|
m = self.month_to_int[groups[0].lower()]
|
|
|
|
|
|
|
|
d = self._get_int(groups[1])
|
|
|
|
|
|
|
|
if groups[2] == None:
|
|
|
|
y = 0
|
|
|
|
s = None
|
|
|
|
else:
|
|
|
|
y = int(groups[3])
|
|
|
|
s = groups[4] != None
|
|
|
|
return (d,m,y,s)
|
|
|
|
|
|
|
|
match = self._text2.match(text)
|
|
|
|
if match:
|
|
|
|
groups = match.groups()
|
|
|
|
if groups[1] == None:
|
|
|
|
m = 0
|
|
|
|
else:
|
|
|
|
m = self.month_to_int[groups[1].lower()]
|
|
|
|
|
|
|
|
d = self._get_int(groups[0])
|
|
|
|
|
|
|
|
if groups[2] == None:
|
|
|
|
y = 0
|
|
|
|
s = None
|
|
|
|
else:
|
|
|
|
y = int(groups[3])
|
|
|
|
s = groups[4] != None
|
|
|
|
return (d,m,y,s)
|
|
|
|
|
|
|
|
match = self._iso.match(text)
|
|
|
|
if match:
|
|
|
|
groups = match.groups()
|
|
|
|
y = self._get_int(groups[0])
|
|
|
|
m = self._get_int(groups[1])
|
|
|
|
d = self._get_int(groups[2])
|
|
|
|
return (d,m,y,False)
|
|
|
|
|
|
|
|
match = self._numeric.match(text)
|
|
|
|
if match:
|
|
|
|
groups = match.groups()
|
|
|
|
m = self._get_int(groups[1])
|
|
|
|
d = self._get_int(groups[3])
|
|
|
|
y = self._get_int(groups[4])
|
|
|
|
return (d,m,y,False)
|
|
|
|
|
|
|
|
return Date.EMPTY
|
|
|
|
|
|
|
|
def set_date(self,date,text):
|
|
|
|
"""
|
2004-09-18 04:11:48 +00:00
|
|
|
Parses the text and sets the date according to the parsing.
|
2004-09-17 03:30:04 +00:00
|
|
|
"""
|
|
|
|
date.set_text_value(text)
|
|
|
|
qual = Date.QUAL_NONE
|
|
|
|
|
|
|
|
match = self._qual.match(text)
|
|
|
|
if match:
|
|
|
|
grps = match.groups()
|
|
|
|
qual = self.quality_to_int[grps[0].lower()]
|
|
|
|
text = grps[1]
|
|
|
|
|
|
|
|
match = self._span.match(text)
|
|
|
|
if match:
|
|
|
|
grps = match.groups()
|
|
|
|
start = self._parse_subdate(grps[0])
|
|
|
|
stop = self._parse_subdate(grps[1])
|
|
|
|
date.set(qual,Date.MOD_SPAN,Date.CAL_GREGORIAN,start + stop)
|
|
|
|
return
|
|
|
|
|
|
|
|
match = self._range.match(text)
|
|
|
|
if match:
|
|
|
|
grps = match.groups()
|
|
|
|
start = self._parse_subdate(grps[1])
|
|
|
|
stop = self._parse_subdate(grps[2])
|
|
|
|
date.set(qual,Date.MOD_RANGE,Date.CAL_GREGORIAN,start + stop)
|
|
|
|
return
|
|
|
|
|
|
|
|
match = self._modifier.match(text)
|
|
|
|
if match:
|
|
|
|
grps = match.groups()
|
|
|
|
start = self._parse_subdate(grps[1])
|
|
|
|
mod = self.modifier_to_int.get(grps[0].lower(),Date.MOD_NONE)
|
|
|
|
date.set(qual,mod,Date.CAL_GREGORIAN,start)
|
2004-09-18 04:11:48 +00:00
|
|
|
return
|
2004-09-17 03:30:04 +00:00
|
|
|
|
|
|
|
subdate = self._parse_subdate(text)
|
|
|
|
if subdate == Date.EMPTY:
|
|
|
|
date.set_as_text(text)
|
|
|
|
else:
|
|
|
|
date.set(qual,Date.MOD_NONE,Date.CAL_GREGORIAN,subdate)
|
|
|
|
|
|
|
|
def parse(self,text):
|
|
|
|
"""
|
|
|
|
Parses the text, returning a Date object.
|
|
|
|
"""
|
|
|
|
new_date = Date.Date()
|
|
|
|
self.set_date(new_date,text)
|
|
|
|
return new_date
|