2004-11-11 05:07:41 +00:00
|
|
|
#
|
2004-09-17 03:30:04 +00:00
|
|
|
# Gramps - a GTK+/GNOME based genealogy program
|
|
|
|
#
|
|
|
|
# Copyright (C) 2004 Donald N. Allingham
|
|
|
|
#
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License as published by
|
|
|
|
# the Free Software Foundation; either version 2 of the License, or
|
|
|
|
# (at your option) any later version.
|
|
|
|
#
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
|
|
# along with this program; if not, write to the Free Software
|
|
|
|
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
|
#
|
|
|
|
|
2004-09-27 20:15:38 +00:00
|
|
|
# $Id$
|
|
|
|
|
2004-09-17 03:30:04 +00:00
|
|
|
"""
|
|
|
|
U.S. English date parsing class. Serves as the base class for any localized
|
|
|
|
date parsing class.
|
|
|
|
"""
|
|
|
|
|
|
|
|
__author__ = "Donald N. Allingham"
|
|
|
|
__version__ = "$Revision$"
|
|
|
|
|
2004-09-27 20:15:38 +00:00
|
|
|
#-------------------------------------------------------------------------
|
|
|
|
#
|
|
|
|
# Python modules
|
|
|
|
#
|
|
|
|
#-------------------------------------------------------------------------
|
2004-09-17 03:30:04 +00:00
|
|
|
import re
|
2004-09-17 23:52:09 +00:00
|
|
|
import locale
|
2004-11-15 02:07:21 +00:00
|
|
|
import calendar
|
2004-09-17 23:52:09 +00:00
|
|
|
|
2004-09-27 20:15:38 +00:00
|
|
|
#-------------------------------------------------------------------------
|
|
|
|
#
|
|
|
|
# GRAMPS modules
|
|
|
|
#
|
|
|
|
#-------------------------------------------------------------------------
|
2004-09-17 03:30:04 +00:00
|
|
|
import Date
|
|
|
|
|
2004-11-10 01:55:30 +00:00
|
|
|
#-------------------------------------------------------------------------
|
|
|
|
#
|
|
|
|
# Top-level module functions
|
|
|
|
#
|
|
|
|
#-------------------------------------------------------------------------
|
2004-11-15 02:07:21 +00:00
|
|
|
_max_days = [ 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 ]
|
|
|
|
_leap_days = [ 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 ]
|
2004-11-10 04:20:47 +00:00
|
|
|
|
2004-11-10 01:55:30 +00:00
|
|
|
def gregorian_valid(date_tuple):
|
|
|
|
day = date_tuple[0]
|
|
|
|
month = date_tuple[1]
|
|
|
|
valid = True
|
2004-11-10 04:20:47 +00:00
|
|
|
try:
|
|
|
|
if month > 12:
|
|
|
|
valid = False
|
2004-11-15 02:07:21 +00:00
|
|
|
elif calendar.isleap(date_tuple[2]):
|
|
|
|
if day > _leap_days[month-1]:
|
|
|
|
valid = False
|
2004-11-10 04:20:47 +00:00
|
|
|
elif day > _max_days[month-1]:
|
|
|
|
valid = False
|
|
|
|
except:
|
2004-11-10 01:55:30 +00:00
|
|
|
valid = False
|
|
|
|
return valid
|
|
|
|
|
2004-09-27 20:15:38 +00:00
|
|
|
#-------------------------------------------------------------------------
|
|
|
|
#
|
|
|
|
# Parser class
|
|
|
|
#
|
|
|
|
#-------------------------------------------------------------------------
|
2004-09-17 03:30:04 +00:00
|
|
|
class DateParser:
|
|
|
|
"""
|
|
|
|
Converts a text string into a Date object. If the date cannot be
|
|
|
|
converted, the text string is assigned.
|
|
|
|
"""
|
2004-09-17 23:52:09 +00:00
|
|
|
|
|
|
|
# determine the code set returned by nl_langinfo
|
|
|
|
_codeset = locale.nl_langinfo(locale.CODESET)
|
2004-10-12 01:29:39 +00:00
|
|
|
_fmt_parse = re.compile(".*%(\S).*%(\S).*%(\S).*")
|
2004-09-17 23:52:09 +00:00
|
|
|
|
2004-09-27 04:55:39 +00:00
|
|
|
# RFC-2822 only uses capitalized English abbreviated names, no locales.
|
|
|
|
_rfc_days = ('Sun','Mon','Tue','Wed','Thu','Fri','Sat')
|
|
|
|
_rfc_mons_to_int = {
|
2004-11-15 02:07:21 +00:00
|
|
|
'Jan' : 1, 'Feb' : 2, 'Mar' : 3, 'Apr' : 4,
|
|
|
|
'May' : 5, 'Jun' : 6, 'Jul' : 7, 'Aug' : 8,
|
|
|
|
'Sep' : 9, 'Oct' : 10, 'Nov' : 11, 'Dec' : 12,
|
2004-09-27 04:55:39 +00:00
|
|
|
}
|
2004-09-27 04:20:43 +00:00
|
|
|
|
2004-09-17 03:30:04 +00:00
|
|
|
month_to_int = {
|
2004-09-17 23:52:09 +00:00
|
|
|
unicode(locale.nl_langinfo(locale.MON_1),_codeset).lower() : 1,
|
|
|
|
unicode(locale.nl_langinfo(locale.ABMON_1),_codeset).lower() : 1,
|
|
|
|
unicode(locale.nl_langinfo(locale.MON_2),_codeset).lower() : 2,
|
|
|
|
unicode(locale.nl_langinfo(locale.ABMON_2),_codeset).lower() : 2,
|
|
|
|
unicode(locale.nl_langinfo(locale.MON_3),_codeset).lower() : 3,
|
|
|
|
unicode(locale.nl_langinfo(locale.ABMON_3),_codeset).lower() : 3,
|
|
|
|
unicode(locale.nl_langinfo(locale.MON_4),_codeset).lower() : 4,
|
|
|
|
unicode(locale.nl_langinfo(locale.ABMON_4),_codeset).lower() : 4,
|
|
|
|
unicode(locale.nl_langinfo(locale.MON_5),_codeset).lower() : 5,
|
|
|
|
unicode(locale.nl_langinfo(locale.ABMON_5),_codeset).lower() : 5,
|
|
|
|
unicode(locale.nl_langinfo(locale.MON_6),_codeset).lower() : 6,
|
|
|
|
unicode(locale.nl_langinfo(locale.ABMON_6),_codeset).lower() : 6,
|
|
|
|
unicode(locale.nl_langinfo(locale.MON_7),_codeset).lower() : 7,
|
|
|
|
unicode(locale.nl_langinfo(locale.ABMON_7),_codeset).lower() : 7,
|
|
|
|
unicode(locale.nl_langinfo(locale.MON_8),_codeset).lower() : 8,
|
|
|
|
unicode(locale.nl_langinfo(locale.ABMON_8),_codeset).lower() : 8,
|
|
|
|
unicode(locale.nl_langinfo(locale.MON_9),_codeset).lower() : 9,
|
|
|
|
unicode(locale.nl_langinfo(locale.ABMON_9),_codeset).lower() : 9,
|
|
|
|
unicode(locale.nl_langinfo(locale.MON_10),_codeset).lower() : 10,
|
|
|
|
unicode(locale.nl_langinfo(locale.ABMON_10),_codeset).lower(): 10,
|
|
|
|
unicode(locale.nl_langinfo(locale.MON_11),_codeset).lower() : 11,
|
|
|
|
unicode(locale.nl_langinfo(locale.ABMON_11),_codeset).lower(): 11,
|
|
|
|
unicode(locale.nl_langinfo(locale.MON_12),_codeset).lower() : 12,
|
|
|
|
unicode(locale.nl_langinfo(locale.ABMON_12),_codeset).lower(): 12,
|
|
|
|
}
|
2004-09-17 03:30:04 +00:00
|
|
|
|
|
|
|
modifier_to_int = {
|
2004-09-23 21:32:07 +00:00
|
|
|
'before' : Date.MOD_BEFORE, 'bef' : Date.MOD_BEFORE,
|
|
|
|
'bef.' : Date.MOD_BEFORE, 'after' : Date.MOD_AFTER,
|
|
|
|
'aft' : Date.MOD_AFTER, 'aft.' : Date.MOD_AFTER,
|
|
|
|
'about' : Date.MOD_ABOUT, 'abt.' : Date.MOD_ABOUT,
|
|
|
|
'abt' : Date.MOD_ABOUT, 'circa' : Date.MOD_ABOUT,
|
|
|
|
'c.' : Date.MOD_ABOUT, 'around' : Date.MOD_ABOUT,
|
|
|
|
}
|
|
|
|
|
|
|
|
hebrew_to_int = {
|
|
|
|
"tishri" : 1, "heshvan" : 2, "kislev" : 3,
|
|
|
|
"tevet" : 4, "shevat" : 5, "adari" : 6,
|
|
|
|
"adarii" : 7, "nisan" : 8, "iyyar" : 9,
|
|
|
|
"sivan" : 10, "tammuz" : 11, "av" : 12,
|
|
|
|
"elul" : 13,
|
|
|
|
}
|
|
|
|
|
|
|
|
french_to_int = {
|
2004-11-17 03:36:18 +00:00
|
|
|
u'vend\xc3\xa9miaire' : 1, 'brumaire' : 2,
|
|
|
|
'frimaire' : 3, u'niv\xc3\xb4se ': 4,
|
|
|
|
u'pluvi\xc3\xb4se' : 5, u'vent\xc3\xb4se' : 6,
|
|
|
|
'germinal' : 7, u'flor\xc3\xa9al' : 8,
|
2004-11-11 05:07:41 +00:00
|
|
|
'prairial' : 9, 'messidor' : 10,
|
|
|
|
'thermidor' : 11, 'fructidor' : 12,
|
|
|
|
'extra' : 13
|
2004-09-23 21:32:07 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
islamic_to_int = {
|
|
|
|
"muharram" : 1, "muharram ul haram" : 1,
|
|
|
|
"safar" : 2, "rabi`al-awwal" : 3,
|
|
|
|
"rabi'l" : 3, "rabi`ul-akhir" : 4,
|
|
|
|
"rabi`ath-thani" : 4, "rabi` ath-thani" : 4,
|
|
|
|
"rabi`al-thaany" : 4, "rabi` al-thaany" : 4,
|
|
|
|
"rabi' ii" : 4, "jumada l-ula" : 5,
|
|
|
|
"jumaada-ul-awwal" : 5, "jumaada i" : 5,
|
|
|
|
"jumada t-tania" : 6, "jumaada-ul-akhir" : 6,
|
|
|
|
"jumaada al-thaany" : 6, "jumaada ii" : 5,
|
|
|
|
"rajab" : 7, "sha`ban" : 8,
|
|
|
|
"sha`aban" : 8, "ramadan" : 9,
|
|
|
|
"ramadhan" : 9, "shawwal" : 10,
|
|
|
|
"dhu l-qa`da" : 11, "dhu qadah" : 11,
|
|
|
|
"thw al-qi`dah" : 11, "dhu l-hijja" : 12,
|
|
|
|
"dhu hijja" : 12, "thw al-hijjah" : 12,
|
|
|
|
}
|
|
|
|
|
|
|
|
persian_to_int = {
|
|
|
|
"Farvardin" : 1, "Ordibehesht" : 2,
|
|
|
|
"Khordad" : 3, "Tir" : 4,
|
|
|
|
"Mordad" : 5, "Shahrivar" : 6,
|
|
|
|
"Mehr" : 7, "Aban" : 8,
|
|
|
|
"Azar" : 9, "Dey" : 10,
|
|
|
|
"Bahman" : 11, "Esfand" : 12,
|
|
|
|
}
|
|
|
|
|
2004-10-16 05:10:35 +00:00
|
|
|
bce = ["BC", "B\.C", "B\.C\.", "BCE", "B\.C\.E", "B\.C\.E"]
|
2004-09-23 21:32:07 +00:00
|
|
|
|
|
|
|
calendar_to_int = {
|
|
|
|
'gregorian' : Date.CAL_GREGORIAN,
|
|
|
|
'g' : Date.CAL_GREGORIAN,
|
|
|
|
'julian' : Date.CAL_JULIAN,
|
|
|
|
'j' : Date.CAL_JULIAN,
|
|
|
|
'hebrew' : Date.CAL_HEBREW,
|
|
|
|
'h' : Date.CAL_HEBREW,
|
|
|
|
'islamic' : Date.CAL_ISLAMIC,
|
|
|
|
'i' : Date.CAL_ISLAMIC,
|
|
|
|
'french' : Date.CAL_FRENCH,
|
|
|
|
'french republican': Date.CAL_FRENCH,
|
|
|
|
'f' : Date.CAL_FRENCH,
|
|
|
|
'persian' : Date.CAL_PERSIAN,
|
|
|
|
'p' : Date.CAL_PERSIAN,
|
2004-09-17 03:30:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
quality_to_int = {
|
|
|
|
'estimated' : Date.QUAL_ESTIMATED,
|
|
|
|
'est.' : Date.QUAL_ESTIMATED,
|
|
|
|
'est' : Date.QUAL_ESTIMATED,
|
|
|
|
'calc.' : Date.QUAL_CALCULATED,
|
|
|
|
'calc' : Date.QUAL_CALCULATED,
|
|
|
|
'calculated' : Date.QUAL_CALCULATED,
|
|
|
|
}
|
|
|
|
|
2004-11-11 05:07:41 +00:00
|
|
|
def __init__(self):
|
|
|
|
self.init_strings()
|
|
|
|
self.parser = {
|
|
|
|
Date.CAL_GREGORIAN : self._parse_greg_julian,
|
|
|
|
Date.CAL_JULIAN : self._parse_greg_julian,
|
|
|
|
Date.CAL_PERSIAN : self._parse_persian,
|
|
|
|
Date.CAL_HEBREW : self._parse_hebrew,
|
|
|
|
Date.CAL_ISLAMIC : self._parse_islamic,
|
|
|
|
}
|
2004-10-16 05:10:35 +00:00
|
|
|
|
2004-11-11 05:07:41 +00:00
|
|
|
fmt = locale.nl_langinfo(locale.D_FMT)
|
|
|
|
match = self._fmt_parse.match(fmt.lower())
|
|
|
|
if match:
|
|
|
|
self.dmy = (match.groups() == ('d','m','y'))
|
|
|
|
else:
|
|
|
|
self.dmy = True
|
|
|
|
|
|
|
|
def init_strings(self):
|
|
|
|
"""
|
|
|
|
This method compiles regular expression strings for matching dates.
|
|
|
|
|
|
|
|
Most of the re's in most languages can stay as is. span and range
|
|
|
|
most likely will need to change. Whatever change is done, this method
|
|
|
|
may be called first as DateParser.init_strings(self) so that the
|
|
|
|
invariant expresions don't need to be repeteadly coded. All differences
|
|
|
|
can be coded after DateParser.init_strings(self) call, that way they
|
|
|
|
override stuff from this method. See DateParserRU() as an example.
|
|
|
|
"""
|
|
|
|
self._rfc_mon_str = '(' + '|'.join(self._rfc_mons_to_int.keys()) + ')'
|
|
|
|
self._rfc_day_str = '(' + '|'.join(self._rfc_days) + ')'
|
|
|
|
|
|
|
|
self._bce_str = '(' + '|'.join(self.bce) + ')'
|
2004-10-16 05:10:35 +00:00
|
|
|
|
2004-11-11 05:07:41 +00:00
|
|
|
self._qual_str = '(' + '|'.join(
|
|
|
|
[ key.replace('.','\.') for key in self.quality_to_int.keys() ]
|
|
|
|
) + ')'
|
|
|
|
self._mod_str = '(' + '|'.join(
|
|
|
|
[ key.replace('.','\.') for key in self.modifier_to_int.keys() ]
|
|
|
|
) + ')'
|
2004-11-17 03:36:18 +00:00
|
|
|
# Need to reverse-sort the keys, so that April matches before Apr does.
|
|
|
|
# Otherwise, 'april 2000' would be matched as 'apr' + garbage ('il 2000')
|
|
|
|
_month_keys = self.month_to_int.keys()
|
|
|
|
_month_keys.sort()
|
|
|
|
_month_keys.reverse()
|
|
|
|
self._mon_str = '(' + '|'.join(_month_keys) + ')'
|
2004-11-11 05:07:41 +00:00
|
|
|
self._jmon_str = '(' + '|'.join(self.hebrew_to_int.keys()) + ')'
|
|
|
|
self._fmon_str = '(' + '|'.join(self.french_to_int.keys()) + ')'
|
|
|
|
self._pmon_str = '(' + '|'.join(self.persian_to_int.keys()) + ')'
|
|
|
|
self._cal_str = '(' + '|'.join(self.calendar_to_int.keys()) + ')'
|
|
|
|
self._imon_str = '(' + '|'.join(self.islamic_to_int.keys()) + ')'
|
|
|
|
|
|
|
|
self._bce_re = re.compile("(.+)\s+%s" % self._bce_str)
|
2004-09-17 03:30:04 +00:00
|
|
|
|
2004-11-11 05:07:41 +00:00
|
|
|
self._cal = re.compile("(.+)\s\(%s\)" % self._cal_str,
|
2004-09-23 21:32:07 +00:00
|
|
|
re.IGNORECASE)
|
2004-11-11 05:07:41 +00:00
|
|
|
self._qual = re.compile("%s\s+(.+)" % self._qual_str,
|
2004-09-23 21:32:07 +00:00
|
|
|
re.IGNORECASE)
|
2004-11-11 05:07:41 +00:00
|
|
|
self._span = re.compile("(from)\s+(.+)\s+(to)\s+(.+)",
|
2004-09-23 21:32:07 +00:00
|
|
|
re.IGNORECASE)
|
2004-11-17 01:50:32 +00:00
|
|
|
self._range = re.compile("(bet|bet.|between)\s+(.+)\s+(and)\s+(.+)",
|
2004-09-23 21:32:07 +00:00
|
|
|
re.IGNORECASE)
|
2004-11-11 05:07:41 +00:00
|
|
|
self._modifier = re.compile('%s\s+(.*)' % self._mod_str,
|
2004-09-23 21:32:07 +00:00
|
|
|
re.IGNORECASE)
|
2004-11-11 05:07:41 +00:00
|
|
|
self._text = re.compile('%s\s+(\d+)?\s*,?\s*((\d+)(/\d+)?)?' % self._mon_str,
|
2004-09-23 21:32:07 +00:00
|
|
|
re.IGNORECASE)
|
2004-11-11 05:07:41 +00:00
|
|
|
self._text2 = re.compile('(\d+)?\s+?%s\s*((\d+)(/\d+)?)?' % self._mon_str,
|
2004-09-23 21:32:07 +00:00
|
|
|
re.IGNORECASE)
|
2004-11-11 05:07:41 +00:00
|
|
|
self._jtext = re.compile('%s\s+(\d+)?\s*,?\s*((\d+)(/\d+)?)?' % self._jmon_str,
|
2004-09-23 21:32:07 +00:00
|
|
|
re.IGNORECASE)
|
2004-11-11 05:07:41 +00:00
|
|
|
self._jtext2 = re.compile('(\d+)?\s+?%s\s*((\d+)(/\d+)?)?' % self._jmon_str,
|
2004-09-23 21:32:07 +00:00
|
|
|
re.IGNORECASE)
|
2004-11-11 05:07:41 +00:00
|
|
|
self._ftext = re.compile('%s\s+(\d+)?\s*,?\s*((\d+)(/\d+)?)?' % self._fmon_str,
|
2004-09-23 21:32:07 +00:00
|
|
|
re.IGNORECASE)
|
2004-11-11 05:07:41 +00:00
|
|
|
self._ftext2 = re.compile('(\d+)?\s+?%s\s*((\d+)(/\d+)?)?' % self._fmon_str,
|
2004-09-23 21:32:07 +00:00
|
|
|
re.IGNORECASE)
|
2004-11-11 05:07:41 +00:00
|
|
|
self._ptext = re.compile('%s\s+(\d+)?\s*,?\s*((\d+)(/\d+)?)?' % self._pmon_str,
|
2004-09-23 21:32:07 +00:00
|
|
|
re.IGNORECASE)
|
2004-11-11 05:07:41 +00:00
|
|
|
self._ptext2 = re.compile('(\d+)?\s+?%s\s*((\d+)(/\d+)?)?' % self._pmon_str,
|
2004-09-23 21:32:07 +00:00
|
|
|
re.IGNORECASE)
|
2004-11-11 05:07:41 +00:00
|
|
|
self._itext = re.compile('%s\s+(\d+)?\s*,?\s*((\d+)(/\d+)?)?' % self._imon_str,
|
2004-09-23 21:32:07 +00:00
|
|
|
re.IGNORECASE)
|
2004-11-11 05:07:41 +00:00
|
|
|
self._itext2 = re.compile('(\d+)?\s+?%s\s*((\d+)(/\d+)?)?' % self._imon_str,
|
2004-09-23 21:32:07 +00:00
|
|
|
re.IGNORECASE)
|
2004-11-11 05:07:41 +00:00
|
|
|
self._range2 = re.compile('%s\s+(\d+)-(\d+)\s*,?\s*((\d+)(/\d+)?)?' % self._mon_str,
|
2004-09-23 21:32:07 +00:00
|
|
|
re.IGNORECASE)
|
2004-11-11 05:07:41 +00:00
|
|
|
self._numeric = re.compile("((\d+)[/\.])?((\d+)[/\.])?(\d+)")
|
|
|
|
self._iso = re.compile("(\d+)-(\d+)-(\d+)")
|
|
|
|
self._rfc = re.compile("(%s,)?\s+(\d|\d\d)\s+%s\s+(\d+)\s+\d\d:\d\d(:\d\d)?\s+(\+|-)\d\d\d\d"
|
|
|
|
% (self._rfc_day_str,self._rfc_mon_str))
|
2004-09-23 21:32:07 +00:00
|
|
|
|
2004-09-17 03:30:04 +00:00
|
|
|
def _get_int(self,val):
|
|
|
|
"""
|
|
|
|
Converts the string to an integer if the value is not None. If the
|
|
|
|
value is None, a zero is returned
|
|
|
|
"""
|
|
|
|
if val == None:
|
|
|
|
return 0
|
|
|
|
else:
|
|
|
|
return int(val)
|
|
|
|
|
2004-09-23 21:32:07 +00:00
|
|
|
def _parse_hebrew(self,text):
|
|
|
|
return self._parse_calendar(text,self._jtext,self._jtext2,
|
|
|
|
self.hebrew_to_int)
|
|
|
|
|
|
|
|
def _parse_islamic(self,text):
|
|
|
|
return self._parse_calendar(text,self._itext,self._itext2,
|
|
|
|
self.islamic_to_int)
|
2004-09-17 23:52:09 +00:00
|
|
|
|
2004-09-23 21:32:07 +00:00
|
|
|
def _parse_persian(self,text):
|
|
|
|
return self._parse_calendar(text,self._ptext,self._ptext2,
|
|
|
|
self.persian_to_int)
|
|
|
|
|
|
|
|
def _parse_french(self,text):
|
|
|
|
return self._parse_calendar(text,self._ftext,self._ftext2,
|
|
|
|
self.french_to_int)
|
|
|
|
|
|
|
|
def _parse_greg_julian(self,text):
|
|
|
|
return self._parse_calendar(text,self._text,self._text2,
|
2004-11-15 02:07:21 +00:00
|
|
|
self.month_to_int,gregorian_valid)
|
2004-09-23 21:32:07 +00:00
|
|
|
|
2004-11-15 02:07:21 +00:00
|
|
|
def _parse_calendar(self,text,regex1,regex2,mmap,check=None):
|
2004-11-17 03:36:18 +00:00
|
|
|
match = regex1.match(text.lower())
|
2004-09-17 03:30:04 +00:00
|
|
|
if match:
|
|
|
|
groups = match.groups()
|
|
|
|
if groups[0] == None:
|
|
|
|
m = 0
|
|
|
|
else:
|
2004-09-23 21:32:07 +00:00
|
|
|
m = mmap[groups[0].lower()]
|
2004-09-17 03:30:04 +00:00
|
|
|
|
|
|
|
if groups[2] == None:
|
2004-10-22 04:07:02 +00:00
|
|
|
y = self._get_int(groups[1])
|
|
|
|
d = 0
|
2004-09-17 03:30:04 +00:00
|
|
|
s = None
|
|
|
|
else:
|
2004-10-22 04:07:02 +00:00
|
|
|
d = self._get_int(groups[1])
|
2004-09-17 03:30:04 +00:00
|
|
|
y = int(groups[3])
|
|
|
|
s = groups[4] != None
|
2004-11-15 02:07:21 +00:00
|
|
|
value = (d,m,y,s)
|
|
|
|
if check and not check((d,m,y)):
|
|
|
|
value = Date.EMPTY
|
|
|
|
return value
|
2004-09-17 03:30:04 +00:00
|
|
|
|
2004-11-17 03:36:18 +00:00
|
|
|
match = regex2.match(text.lower())
|
2004-09-17 03:30:04 +00:00
|
|
|
if match:
|
|
|
|
groups = match.groups()
|
|
|
|
if groups[1] == None:
|
|
|
|
m = 0
|
|
|
|
else:
|
2004-09-23 21:32:07 +00:00
|
|
|
m = mmap[groups[1].lower()]
|
2004-09-17 03:30:04 +00:00
|
|
|
|
|
|
|
d = self._get_int(groups[0])
|
|
|
|
|
|
|
|
if groups[2] == None:
|
|
|
|
y = 0
|
|
|
|
s = None
|
|
|
|
else:
|
|
|
|
y = int(groups[3])
|
|
|
|
s = groups[4] != None
|
2004-11-15 02:07:21 +00:00
|
|
|
value = (d,m,y,s)
|
|
|
|
if check and not check((d,m,y)):
|
2004-11-16 20:45:25 +00:00
|
|
|
value = Date.EMPTY
|
2004-11-15 02:07:21 +00:00
|
|
|
return value
|
|
|
|
|
2004-09-23 21:32:07 +00:00
|
|
|
return Date.EMPTY
|
|
|
|
|
|
|
|
def _parse_subdate(self,text,subparser=None):
|
|
|
|
"""
|
|
|
|
Converts only the date portion of a date.
|
|
|
|
"""
|
|
|
|
if subparser == None:
|
|
|
|
subparser = self._parse_greg_julian
|
2004-11-15 02:07:21 +00:00
|
|
|
check = gregorian_valid
|
|
|
|
else:
|
|
|
|
check = None
|
2004-09-23 21:32:07 +00:00
|
|
|
|
|
|
|
value = subparser(text)
|
|
|
|
if value != Date.EMPTY:
|
|
|
|
return value
|
|
|
|
|
2004-09-17 03:30:04 +00:00
|
|
|
match = self._iso.match(text)
|
|
|
|
if match:
|
|
|
|
groups = match.groups()
|
|
|
|
y = self._get_int(groups[0])
|
|
|
|
m = self._get_int(groups[1])
|
|
|
|
d = self._get_int(groups[2])
|
2004-11-15 02:07:21 +00:00
|
|
|
if gregorian_valid((d,m,y)):
|
2004-10-10 23:22:12 +00:00
|
|
|
return (d,m,y,False)
|
|
|
|
else:
|
|
|
|
return Date.EMPTY
|
2004-09-17 03:30:04 +00:00
|
|
|
|
2004-09-27 04:20:43 +00:00
|
|
|
match = self._rfc.match(text)
|
|
|
|
if match:
|
|
|
|
groups = match.groups()
|
2004-09-27 20:15:38 +00:00
|
|
|
d = self._get_int(groups[2])
|
|
|
|
m = self._rfc_mons_to_int[groups[3]]
|
|
|
|
y = self._get_int(groups[4])
|
2004-11-15 02:07:21 +00:00
|
|
|
if gregorian_valid((d,m,y)):
|
2004-10-10 23:22:12 +00:00
|
|
|
return (d,m,y,False)
|
|
|
|
else:
|
|
|
|
return Date.EMPTY
|
2004-09-27 04:20:43 +00:00
|
|
|
|
2004-09-17 03:30:04 +00:00
|
|
|
match = self._numeric.match(text)
|
|
|
|
if match:
|
|
|
|
groups = match.groups()
|
2004-10-12 01:29:39 +00:00
|
|
|
if self.dmy:
|
|
|
|
m = self._get_int(groups[3])
|
|
|
|
d = self._get_int(groups[1])
|
|
|
|
else:
|
|
|
|
m = self._get_int(groups[1])
|
|
|
|
d = self._get_int(groups[3])
|
2004-09-17 03:30:04 +00:00
|
|
|
y = self._get_int(groups[4])
|
2004-11-15 02:07:21 +00:00
|
|
|
value = (d,m,y,False)
|
|
|
|
if check and not check((d,m,y)):
|
|
|
|
value = Date.EMPTY
|
|
|
|
return value
|
|
|
|
|
2004-09-17 03:30:04 +00:00
|
|
|
return Date.EMPTY
|
|
|
|
|
2004-11-17 05:07:49 +00:00
|
|
|
def match_calendar(self,text,cal):
|
2004-09-17 03:30:04 +00:00
|
|
|
"""
|
2004-11-17 05:07:49 +00:00
|
|
|
Try parsing calendar.
|
2004-09-23 21:32:07 +00:00
|
|
|
|
2004-11-17 05:07:49 +00:00
|
|
|
Return calendar index and the remainder of text.
|
|
|
|
"""
|
2004-09-23 21:32:07 +00:00
|
|
|
match = self._cal.match(text)
|
|
|
|
if match:
|
|
|
|
grps = match.groups()
|
|
|
|
cal = self.calendar_to_int[grps[1].lower()]
|
|
|
|
text = grps[0]
|
2004-11-17 05:07:49 +00:00
|
|
|
return (text,cal)
|
2004-09-23 21:32:07 +00:00
|
|
|
|
2004-11-17 05:07:49 +00:00
|
|
|
def match_quality(self,text,qual):
|
|
|
|
"""
|
|
|
|
Try matching quality.
|
|
|
|
|
|
|
|
Return quality index and the remainder of text.
|
|
|
|
"""
|
2004-09-17 03:30:04 +00:00
|
|
|
match = self._qual.match(text)
|
|
|
|
if match:
|
|
|
|
grps = match.groups()
|
|
|
|
qual = self.quality_to_int[grps[0].lower()]
|
|
|
|
text = grps[1]
|
2004-11-17 05:07:49 +00:00
|
|
|
return (text,qual)
|
2004-09-23 21:32:07 +00:00
|
|
|
|
2004-11-17 05:07:49 +00:00
|
|
|
def match_span(self,text,cal,qual,date):
|
|
|
|
"""
|
|
|
|
Try matching span date.
|
|
|
|
|
|
|
|
On success, set the date and return 1. On failure return 0.
|
|
|
|
"""
|
2004-09-17 03:30:04 +00:00
|
|
|
match = self._span.match(text)
|
|
|
|
if match:
|
|
|
|
grps = match.groups()
|
2004-11-17 05:07:49 +00:00
|
|
|
text_parser = self.parser[cal]
|
2004-11-11 05:07:41 +00:00
|
|
|
start = self._parse_subdate(grps[1],text_parser)
|
|
|
|
stop = self._parse_subdate(grps[3],text_parser)
|
2004-09-23 21:32:07 +00:00
|
|
|
date.set(qual,Date.MOD_SPAN,cal,start + stop)
|
2004-11-17 05:07:49 +00:00
|
|
|
return 1
|
|
|
|
return 0
|
|
|
|
|
|
|
|
def match_range(self,text,cal,qual,date):
|
|
|
|
"""
|
|
|
|
Try matching range date.
|
|
|
|
|
|
|
|
On success, set the date and return 1. On failure return 0.
|
|
|
|
"""
|
2004-09-17 03:30:04 +00:00
|
|
|
match = self._range.match(text)
|
|
|
|
if match:
|
|
|
|
grps = match.groups()
|
2004-11-17 05:07:49 +00:00
|
|
|
text_parser = self.parser[cal]
|
2004-09-23 21:32:07 +00:00
|
|
|
start = self._parse_subdate(grps[1],text_parser)
|
2004-11-11 05:07:41 +00:00
|
|
|
stop = self._parse_subdate(grps[3],text_parser)
|
2004-09-23 21:32:07 +00:00
|
|
|
date.set(qual,Date.MOD_RANGE,cal,start + stop)
|
2004-11-17 05:07:49 +00:00
|
|
|
return 1
|
|
|
|
return 0
|
2004-09-20 03:12:51 +00:00
|
|
|
|
2004-11-17 05:07:49 +00:00
|
|
|
def match_range2(self,text,cal,qual,date):
|
|
|
|
"""
|
|
|
|
Try matching numerical range date.
|
|
|
|
|
|
|
|
On success, set the date and return 1. On failure return 0.
|
|
|
|
"""
|
2004-09-20 03:12:51 +00:00
|
|
|
match = self._range2.match(text)
|
|
|
|
if match:
|
|
|
|
grps = match.groups()
|
|
|
|
m = self.month_to_int[grps[0].lower()]
|
|
|
|
|
|
|
|
d0 = self._get_int(grps[1])
|
|
|
|
d1 = self._get_int(grps[2])
|
|
|
|
|
|
|
|
if grps[3] == None:
|
|
|
|
y = 0
|
|
|
|
s = None
|
|
|
|
else:
|
|
|
|
y = int(grps[3])
|
|
|
|
s = grps[4] != None
|
|
|
|
date.set(qual,Date.MOD_RANGE,Date.CAL_GREGORIAN,
|
|
|
|
(d0,m,y,s,d1,m,y,s))
|
2004-11-17 05:07:49 +00:00
|
|
|
return 1
|
|
|
|
return 0
|
|
|
|
|
|
|
|
def match_bce(self,text):
|
|
|
|
"""
|
|
|
|
Try matching BCE qualifier.
|
|
|
|
|
|
|
|
Return BCE (True/False) and the remainder of text.
|
|
|
|
"""
|
2004-10-16 05:10:35 +00:00
|
|
|
match = self._bce_re.match(text)
|
|
|
|
bc = False
|
|
|
|
if match:
|
|
|
|
text = match.groups()[0]
|
|
|
|
bc = True
|
2004-11-17 05:07:49 +00:00
|
|
|
return (text,bc)
|
|
|
|
|
|
|
|
def match_modifier(self,text,cal,qual,bc,date):
|
|
|
|
"""
|
|
|
|
Try matching date with modifier.
|
|
|
|
|
|
|
|
On success, set the date and return 1. On failure return 0.
|
|
|
|
"""
|
2004-09-17 03:30:04 +00:00
|
|
|
match = self._modifier.match(text)
|
|
|
|
if match:
|
|
|
|
grps = match.groups()
|
|
|
|
start = self._parse_subdate(grps[1])
|
|
|
|
mod = self.modifier_to_int.get(grps[0].lower(),Date.MOD_NONE)
|
2004-10-16 05:10:35 +00:00
|
|
|
if bc:
|
|
|
|
date.set(qual,mod,cal,self.invert_year(start))
|
|
|
|
else:
|
|
|
|
date.set(qual,mod,cal,start)
|
2004-11-17 05:07:49 +00:00
|
|
|
return 1
|
|
|
|
return 0
|
|
|
|
|
|
|
|
def set_date(self,date,text):
|
|
|
|
"""
|
|
|
|
Parses the text and sets the date according to the parsing.
|
|
|
|
"""
|
|
|
|
date.set_text_value(text)
|
|
|
|
qual = Date.QUAL_NONE
|
|
|
|
cal = Date.CAL_GREGORIAN
|
|
|
|
|
|
|
|
(text,cal) = self.match_calendar(text,cal)
|
|
|
|
(text,qual) = self.match_quality(text,qual)
|
|
|
|
if self.match_span(text,cal,qual,date):
|
|
|
|
return
|
|
|
|
if self.match_range(text,cal,qual,date):
|
|
|
|
return
|
|
|
|
if self.match_range2(text,cal,qual,date):
|
2004-09-18 04:11:48 +00:00
|
|
|
return
|
2004-09-17 03:30:04 +00:00
|
|
|
|
2004-11-17 05:07:49 +00:00
|
|
|
(text,bc) = self.match_bce(text)
|
|
|
|
if self.match_modifier(text,cal,qual,bc,date):
|
|
|
|
return
|
|
|
|
|
2004-09-17 03:30:04 +00:00
|
|
|
subdate = self._parse_subdate(text)
|
|
|
|
if subdate == Date.EMPTY:
|
2004-09-23 21:32:07 +00:00
|
|
|
subdate = self._parse_hebrew(text)
|
|
|
|
if subdate == Date.EMPTY:
|
|
|
|
subdate = self._parse_persian(text)
|
|
|
|
if subdate == Date.EMPTY:
|
|
|
|
subdate = self._parse_islamic(text)
|
|
|
|
if subdate == Date.EMPTY:
|
|
|
|
subdate = self._parse_french(text)
|
|
|
|
if subdate == Date.EMPTY:
|
|
|
|
date.set_as_text(text)
|
|
|
|
return
|
|
|
|
else:
|
|
|
|
cal = Date.CAL_FRENCH
|
|
|
|
else:
|
|
|
|
cal = Date.CAL_ISLAMIC
|
|
|
|
else:
|
|
|
|
cal = Date.CAL_PERSIAN
|
|
|
|
else:
|
|
|
|
cal = Date.CAL_HEBREW
|
2004-09-17 03:30:04 +00:00
|
|
|
|
2004-10-16 05:10:35 +00:00
|
|
|
if bc:
|
|
|
|
date.set(qual,Date.MOD_NONE,cal,self.invert_year(subdate))
|
|
|
|
else:
|
|
|
|
date.set(qual,Date.MOD_NONE,cal,subdate)
|
|
|
|
|
|
|
|
def invert_year(self,subdate):
|
|
|
|
return (subdate[0],subdate[1],-subdate[2],subdate[3])
|
|
|
|
|
2004-09-17 03:30:04 +00:00
|
|
|
def parse(self,text):
|
|
|
|
"""
|
|
|
|
Parses the text, returning a Date object.
|
|
|
|
"""
|
|
|
|
new_date = Date.Date()
|
|
|
|
self.set_date(new_date,text)
|
|
|
|
return new_date
|