223 lines
6.7 KiB
Python
223 lines
6.7 KiB
Python
|
#
|
||
|
# Gramps - a GTK+/GNOME based genealogy program
|
||
|
#
|
||
|
# Copyright (C) 2004 Donald N. Allingham
|
||
|
#
|
||
|
# This program is free software; you can redistribute it and/or modify
|
||
|
# it under the terms of the GNU General Public License as published by
|
||
|
# the Free Software Foundation; either version 2 of the License, or
|
||
|
# (at your option) any later version.
|
||
|
#
|
||
|
# This program is distributed in the hope that it will be useful,
|
||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
|
# GNU General Public License for more details.
|
||
|
#
|
||
|
# You should have received a copy of the GNU General Public License
|
||
|
# along with this program; if not, write to the Free Software
|
||
|
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||
|
#
|
||
|
|
||
|
"""
|
||
|
U.S. English date parsing class. Serves as the base class for any localized
|
||
|
date parsing class.
|
||
|
"""
|
||
|
|
||
|
__author__ = "Donald N. Allingham"
|
||
|
__version__ = "$Revision$"
|
||
|
|
||
|
import string
|
||
|
import re
|
||
|
import Date
|
||
|
|
||
|
class DateParser:
|
||
|
"""
|
||
|
Converts a text string into a Date object. If the date cannot be
|
||
|
converted, the text string is assigned.
|
||
|
"""
|
||
|
month_to_int = {
|
||
|
'jan' : 1,
|
||
|
'january' : 1,
|
||
|
'feb' : 2,
|
||
|
'february' : 2,
|
||
|
'mar' : 3,
|
||
|
'march' : 3,
|
||
|
'apr' : 4,
|
||
|
'april' : 4,
|
||
|
'may' : 5,
|
||
|
'june' : 6,
|
||
|
'jun' : 6,
|
||
|
'july' : 7,
|
||
|
'jul' : 7,
|
||
|
'august' : 8,
|
||
|
'aug' : 8,
|
||
|
'september': 9,
|
||
|
'sep' : 9,
|
||
|
'sept' : 9,
|
||
|
'oct' : 10,
|
||
|
'october' : 10,
|
||
|
'nov' : 11,
|
||
|
'november' : 11,
|
||
|
'dec' : 12,
|
||
|
'december' : 12,
|
||
|
}
|
||
|
|
||
|
modifier_to_int = {
|
||
|
'before' : Date.MOD_BEFORE,
|
||
|
'bef' : Date.MOD_BEFORE,
|
||
|
'bef.' : Date.MOD_BEFORE,
|
||
|
'after' : Date.MOD_AFTER,
|
||
|
'aft' : Date.MOD_AFTER,
|
||
|
'aft.' : Date.MOD_AFTER,
|
||
|
'about' : Date.MOD_ABOUT,
|
||
|
'abt.' : Date.MOD_ABOUT,
|
||
|
'abt' : Date.MOD_ABOUT,
|
||
|
'circa' : Date.MOD_ABOUT,
|
||
|
'c.' : Date.MOD_ABOUT,
|
||
|
'around' : Date.MOD_ABOUT,
|
||
|
}
|
||
|
|
||
|
quality_to_int = {
|
||
|
'estimated' : Date.QUAL_ESTIMATED,
|
||
|
'est.' : Date.QUAL_ESTIMATED,
|
||
|
'est' : Date.QUAL_ESTIMATED,
|
||
|
'calc.' : Date.QUAL_CALCULATED,
|
||
|
'calc' : Date.QUAL_CALCULATED,
|
||
|
'calculated' : Date.QUAL_CALCULATED,
|
||
|
}
|
||
|
|
||
|
_qual_str = '(' + string.join(quality_to_int.keys(),'|') + ')'
|
||
|
_mod_str = '(' + string.join(modifier_to_int.keys(),'|') + ')'
|
||
|
_mon_str = '(' + string.join(month_to_int.keys(),'|') + ')'
|
||
|
|
||
|
_qual = re.compile("%s\s+(.*)" % _qual_str,re.IGNORECASE)
|
||
|
_span = re.compile("from\s+(.*)\s+to\s+(.*)",re.IGNORECASE)
|
||
|
_range = re.compile("(bet.|between)\s+(.*)\s+and\s+(.*)",re.IGNORECASE)
|
||
|
_modifier = re.compile('%s\s+(.*)' % _mod_str,re.IGNORECASE)
|
||
|
_text = re.compile('%s\s+(\d+)?\s*,?\s*((\d+)(/\d+)?)?' % _mon_str,re.IGNORECASE)
|
||
|
_text2 = re.compile('(\d+)?\s+?%s\s*((\d+)(/\d+)?)?' % _mon_str,re.IGNORECASE)
|
||
|
_numeric = re.compile("((\d+)[/\.])?((\d+)[/\.])?(\d+)")
|
||
|
_iso = re.compile("(\d+)-(\d+)-(\d+)")
|
||
|
|
||
|
def _get_int(self,val):
|
||
|
"""
|
||
|
Converts the string to an integer if the value is not None. If the
|
||
|
value is None, a zero is returned
|
||
|
"""
|
||
|
if val == None:
|
||
|
return 0
|
||
|
else:
|
||
|
return int(val)
|
||
|
|
||
|
def _parse_subdate(self,text):
|
||
|
"""
|
||
|
Converts only the date portion of a date.
|
||
|
"""
|
||
|
match = self._text.match(text)
|
||
|
if match:
|
||
|
groups = match.groups()
|
||
|
if groups[0] == None:
|
||
|
m = 0
|
||
|
else:
|
||
|
m = self.month_to_int[groups[0].lower()]
|
||
|
|
||
|
d = self._get_int(groups[1])
|
||
|
|
||
|
if groups[2] == None:
|
||
|
y = 0
|
||
|
s = None
|
||
|
else:
|
||
|
y = int(groups[3])
|
||
|
s = groups[4] != None
|
||
|
return (d,m,y,s)
|
||
|
|
||
|
match = self._text2.match(text)
|
||
|
if match:
|
||
|
groups = match.groups()
|
||
|
if groups[1] == None:
|
||
|
m = 0
|
||
|
else:
|
||
|
m = self.month_to_int[groups[1].lower()]
|
||
|
|
||
|
d = self._get_int(groups[0])
|
||
|
|
||
|
if groups[2] == None:
|
||
|
y = 0
|
||
|
s = None
|
||
|
else:
|
||
|
y = int(groups[3])
|
||
|
s = groups[4] != None
|
||
|
return (d,m,y,s)
|
||
|
|
||
|
match = self._iso.match(text)
|
||
|
if match:
|
||
|
groups = match.groups()
|
||
|
y = self._get_int(groups[0])
|
||
|
m = self._get_int(groups[1])
|
||
|
d = self._get_int(groups[2])
|
||
|
return (d,m,y,False)
|
||
|
|
||
|
match = self._numeric.match(text)
|
||
|
if match:
|
||
|
groups = match.groups()
|
||
|
m = self._get_int(groups[1])
|
||
|
d = self._get_int(groups[3])
|
||
|
y = self._get_int(groups[4])
|
||
|
return (d,m,y,False)
|
||
|
|
||
|
return Date.EMPTY
|
||
|
|
||
|
def set_date(self,date,text):
|
||
|
"""
|
||
|
Parses the text, returning a Date object.
|
||
|
"""
|
||
|
date.set_text_value(text)
|
||
|
qual = Date.QUAL_NONE
|
||
|
|
||
|
match = self._qual.match(text)
|
||
|
if match:
|
||
|
grps = match.groups()
|
||
|
qual = self.quality_to_int[grps[0].lower()]
|
||
|
text = grps[1]
|
||
|
|
||
|
match = self._span.match(text)
|
||
|
if match:
|
||
|
grps = match.groups()
|
||
|
start = self._parse_subdate(grps[0])
|
||
|
stop = self._parse_subdate(grps[1])
|
||
|
date.set_modifier(Date.MOD_SPAN)
|
||
|
date.set(qual,Date.MOD_SPAN,Date.CAL_GREGORIAN,start + stop)
|
||
|
return
|
||
|
|
||
|
match = self._range.match(text)
|
||
|
if match:
|
||
|
grps = match.groups()
|
||
|
start = self._parse_subdate(grps[1])
|
||
|
stop = self._parse_subdate(grps[2])
|
||
|
date.set(qual,Date.MOD_RANGE,Date.CAL_GREGORIAN,start + stop)
|
||
|
return
|
||
|
|
||
|
match = self._modifier.match(text)
|
||
|
if match:
|
||
|
grps = match.groups()
|
||
|
start = self._parse_subdate(grps[1])
|
||
|
mod = self.modifier_to_int.get(grps[0].lower(),Date.MOD_NONE)
|
||
|
date.set(qual,mod,Date.CAL_GREGORIAN,start)
|
||
|
return date
|
||
|
|
||
|
subdate = self._parse_subdate(text)
|
||
|
if subdate == Date.EMPTY:
|
||
|
date.set_as_text(text)
|
||
|
else:
|
||
|
date.set(qual,Date.MOD_NONE,Date.CAL_GREGORIAN,subdate)
|
||
|
|
||
|
def parse(self,text):
|
||
|
"""
|
||
|
Parses the text, returning a Date object.
|
||
|
"""
|
||
|
new_date = Date.Date()
|
||
|
self.set_date(new_date,text)
|
||
|
return new_date
|
||
|
|
||
|
|