From 30013bfbacac4b30ff61bab9ac98fea10cbbb6a2 Mon Sep 17 00:00:00 2001 From: Don Allingham Date: Thu, 23 Sep 2004 21:32:07 +0000 Subject: [PATCH] * src/Date.py: display calendar * src/DateParser.py: parse based on calendars * src/DateDisplay.py: use unicode encodings for french monts svn: r3570 --- ChangeLog | 5 + src/Date.py | 8 +- src/DateDisplay.py | 22 ++--- src/DateParser.py | 237 +++++++++++++++++++++++++++++++++++++-------- 4 files changed, 213 insertions(+), 59 deletions(-) diff --git a/ChangeLog b/ChangeLog index a0cff8a40..765e2d5c4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2004-09-23 Don Allingham + * src/Date.py: display calendar + * src/DateParser.py: parse based on calendars + * src/DateDisplay.py: use unicode encodings for french monts + 2004-09-22 Don Allingham * src/EditPerson.py: change sort mechanism to new Date sort value * src/FamilyView.py: change sort mechanism to new Date sort valu diff --git a/src/Date.py b/src/Date.py index 1668b2ff6..7ffe62fd0 100644 --- a/src/Date.py +++ b/src/Date.py @@ -174,6 +174,12 @@ class Date: pref = "abt " else: pref = "" + + if self.calendar != CAL_GREGORIAN: + cal = " (%s)" % self.calendar_names[self.calendar] + else: + cal = "" + if self.modifier == MOD_TEXTONLY: val = self.text @@ -184,7 +190,7 @@ class Date: else: val = "%04d-%02d-%02d" % ( self.dateval[_POS_YR],self.dateval[_POS_MON],self.dateval[_POS_DAY]) - return "%s%s%s" % (qual,pref,val) + return "%s%s%s%s" % (qual,pref,val,cal) def get_sort_value(self): """ diff --git a/src/DateDisplay.py b/src/DateDisplay.py index 70af1f354..38bb3b237 100644 --- a/src/DateDisplay.py +++ b/src/DateDisplay.py @@ -96,20 +96,11 @@ class DateDisplay: ) _french = ( - "", - unicode("Vendémiaire",'latin-1'), - unicode("Brumaire",'latin-1'), - unicode("Frimaire",'latin-1'), - unicode("Nivôse",'latin-1'), - unicode("Pluviôse",'latin-1'), - unicode("Ventôse",'latin-1'), - unicode("Germinal",'latin-1'), - unicode("Floréal",'latin-1'), - unicode("Prairial",'latin-1'), - unicode("Messidor",'latin-1'), - unicode("Thermidor",'latin-1'), - unicode("Fructidor",'latin-1'), - unicode("Extra",'latin-1'), + u'', u'Vend\xe9miaire', u'Brumaire', + u'Frimaire', u'Niv\xf4se', u'Pluvi\xf4se', + u'Vent\xf4se', u'Germinal', u'Flor\xe9al', + u'Prairial', u'Messidor', u'Thermidor', + u'Fructidor', u'Extra' ) _persian = ( @@ -150,7 +141,8 @@ class DateDisplay: """ Verifies that the format value is within the correct range. """ - assert(format < len(self.formats)-1) + pass + #assert(format < len(self.formats)-1) def quote_display(self,date): """ diff --git a/src/DateParser.py b/src/DateParser.py index 8602f08cf..1670ec3e8 100644 --- a/src/DateParser.py +++ b/src/DateParser.py @@ -70,18 +70,74 @@ class DateParser: } modifier_to_int = { - 'before' : Date.MOD_BEFORE, - 'bef' : Date.MOD_BEFORE, - 'bef.' : Date.MOD_BEFORE, - 'after' : Date.MOD_AFTER, - 'aft' : Date.MOD_AFTER, - 'aft.' : Date.MOD_AFTER, - 'about' : Date.MOD_ABOUT, - 'abt.' : Date.MOD_ABOUT, - 'abt' : Date.MOD_ABOUT, - 'circa' : Date.MOD_ABOUT, - 'c.' : Date.MOD_ABOUT, - 'around' : Date.MOD_ABOUT, + 'before' : Date.MOD_BEFORE, 'bef' : Date.MOD_BEFORE, + 'bef.' : Date.MOD_BEFORE, 'after' : Date.MOD_AFTER, + 'aft' : Date.MOD_AFTER, 'aft.' : Date.MOD_AFTER, + 'about' : Date.MOD_ABOUT, 'abt.' : Date.MOD_ABOUT, + 'abt' : Date.MOD_ABOUT, 'circa' : Date.MOD_ABOUT, + 'c.' : Date.MOD_ABOUT, 'around' : Date.MOD_ABOUT, + } + + hebrew_to_int = { + "tishri" : 1, "heshvan" : 2, "kislev" : 3, + "tevet" : 4, "shevat" : 5, "adari" : 6, + "adarii" : 7, "nisan" : 8, "iyyar" : 9, + "sivan" : 10, "tammuz" : 11, "av" : 12, + "elul" : 13, + } + + french_to_int = { + u'vend\xe9miaire' : 1, u'brumaire' : 2, + u'frimaire' : 3, u'niv\xf4se ': 4, + u'pluvi\xf4se' : 5, u'vent\xf4se' : 6, + u'germinal' : 7, u'flor\xe9al' : 8, + u'prairial' : 9, u'messidor' : 10, + u'thermidor' : 11, u'fructidor' : 12, + u'extra' : 13 + } + + islamic_to_int = { + "muharram" : 1, "muharram ul haram" : 1, + "safar" : 2, "rabi`al-awwal" : 3, + "rabi'l" : 3, "rabi`ul-akhir" : 4, + "rabi`ath-thani" : 4, "rabi` ath-thani" : 4, + "rabi`al-thaany" : 4, "rabi` al-thaany" : 4, + "rabi' ii" : 4, "jumada l-ula" : 5, + "jumaada-ul-awwal" : 5, "jumaada i" : 5, + "jumada t-tania" : 6, "jumaada-ul-akhir" : 6, + "jumaada al-thaany" : 6, "jumaada ii" : 5, + "rajab" : 7, "sha`ban" : 8, + "sha`aban" : 8, "ramadan" : 9, + "ramadhan" : 9, "shawwal" : 10, + "dhu l-qa`da" : 11, "dhu qadah" : 11, + "thw al-qi`dah" : 11, "dhu l-hijja" : 12, + "dhu hijja" : 12, "thw al-hijjah" : 12, + } + + persian_to_int = { + "Farvardin" : 1, "Ordibehesht" : 2, + "Khordad" : 3, "Tir" : 4, + "Mordad" : 5, "Shahrivar" : 6, + "Mehr" : 7, "Aban" : 8, + "Azar" : 9, "Dey" : 10, + "Bahman" : 11, "Esfand" : 12, + } + + + calendar_to_int = { + 'gregorian' : Date.CAL_GREGORIAN, + 'g' : Date.CAL_GREGORIAN, + 'julian' : Date.CAL_JULIAN, + 'j' : Date.CAL_JULIAN, + 'hebrew' : Date.CAL_HEBREW, + 'h' : Date.CAL_HEBREW, + 'islamic' : Date.CAL_ISLAMIC, + 'i' : Date.CAL_ISLAMIC, + 'french' : Date.CAL_FRENCH, + 'french republican': Date.CAL_FRENCH, + 'f' : Date.CAL_FRENCH, + 'persian' : Date.CAL_PERSIAN, + 'p' : Date.CAL_PERSIAN, } quality_to_int = { @@ -100,17 +156,57 @@ class DateParser: [ key.replace('.','\.') for key in modifier_to_int.keys() ] ) + ')' _mon_str = '(' + '|'.join(month_to_int.keys()) + ')' + _jmon_str = '(' + '|'.join(hebrew_to_int.keys()) + ')' + _fmon_str = '(' + '|'.join(french_to_int.keys()) + ')' + _pmon_str = '(' + '|'.join(persian_to_int.keys()) + ')' + _cal_str = '(' + '|'.join(calendar_to_int.keys()) + ')' + _imon_str = '(' + '|'.join(islamic_to_int.keys()) + ')' - _qual = re.compile("%s\s+(.+)" % _qual_str,re.IGNORECASE) - _span = re.compile("from\s+(.+)\s+to\s+(.+)",re.IGNORECASE) - _range = re.compile("(bet.|between)\s+(.+)\s+and\s+(.+)",re.IGNORECASE) - _modifier = re.compile('%s\s+(.*)' % _mod_str,re.IGNORECASE) - _text = re.compile('%s\s+(\d+)?\s*,?\s*((\d+)(/\d+)?)?' % _mon_str,re.IGNORECASE) - _text2 = re.compile('(\d+)?\s+?%s\s*((\d+)(/\d+)?)?' % _mon_str,re.IGNORECASE) - _range2 = re.compile('%s\s+(\d+)-(\d+)\s*,?\s*((\d+)(/\d+)?)?' % _mon_str,re.IGNORECASE) + _cal = re.compile("(.+)\s\(%s\)" % _cal_str, + re.IGNORECASE) + _qual = re.compile("%s\s+(.+)" % _qual_str, + re.IGNORECASE) + _span = re.compile("from\s+(.+)\s+to\s+(.+)", + re.IGNORECASE) + _range = re.compile("(bet.|between)\s+(.+)\s+and\s+(.+)", + re.IGNORECASE) + _modifier = re.compile('%s\s+(.*)' % _mod_str, + re.IGNORECASE) + _text = re.compile('%s\s+(\d+)?\s*,?\s*((\d+)(/\d+)?)?' % _mon_str, + re.IGNORECASE) + _text2 = re.compile('(\d+)?\s+?%s\s*((\d+)(/\d+)?)?' % _mon_str, + re.IGNORECASE) + _jtext = re.compile('%s\s+(\d+)?\s*,?\s*((\d+)(/\d+)?)?' % _jmon_str, + re.IGNORECASE) + _jtext2 = re.compile('(\d+)?\s+?%s\s*((\d+)(/\d+)?)?' % _jmon_str, + re.IGNORECASE) + _ftext = re.compile('%s\s+(\d+)?\s*,?\s*((\d+)(/\d+)?)?' % _fmon_str, + re.IGNORECASE) + _ftext2 = re.compile('(\d+)?\s+?%s\s*((\d+)(/\d+)?)?' % _fmon_str, + re.IGNORECASE) + _ptext = re.compile('%s\s+(\d+)?\s*,?\s*((\d+)(/\d+)?)?' % _pmon_str, + re.IGNORECASE) + _ptext2 = re.compile('(\d+)?\s+?%s\s*((\d+)(/\d+)?)?' % _pmon_str, + re.IGNORECASE) + _itext = re.compile('%s\s+(\d+)?\s*,?\s*((\d+)(/\d+)?)?' % _imon_str, + re.IGNORECASE) + _itext2 = re.compile('(\d+)?\s+?%s\s*((\d+)(/\d+)?)?' % _imon_str, + re.IGNORECASE) + _range2 = re.compile('%s\s+(\d+)-(\d+)\s*,?\s*((\d+)(/\d+)?)?' % _mon_str, + re.IGNORECASE) _numeric = re.compile("((\d+)[/\.])?((\d+)[/\.])?(\d+)") _iso = re.compile("(\d+)-(\d+)-(\d+)") + + def __init__(self): + self.parser = { + Date.CAL_GREGORIAN : self._parse_greg_julian, + Date.CAL_JULIAN : self._parse_greg_julian, + Date.CAL_PERSIAN : self._parse_persian, + Date.CAL_HEBREW : self._parse_hebrew, + Date.CAL_ISLAMIC : self._parse_islamic, + } + def _get_int(self,val): """ Converts the string to an integer if the value is not None. If the @@ -121,23 +217,34 @@ class DateParser: else: return int(val) - def _parse_subdate(self,text): - """ - Converts only the date portion of a date. - """ - try: - value = time.strptime(text) - return (value[2],value[1],value[0],False) - except ValueError: - pass + def _parse_hebrew(self,text): + return self._parse_calendar(text,self._jtext,self._jtext2, + self.hebrew_to_int) - match = self._text.match(text) + def _parse_islamic(self,text): + return self._parse_calendar(text,self._itext,self._itext2, + self.islamic_to_int) + + def _parse_persian(self,text): + return self._parse_calendar(text,self._ptext,self._ptext2, + self.persian_to_int) + + def _parse_french(self,text): + return self._parse_calendar(text,self._ftext,self._ftext2, + self.french_to_int) + + def _parse_greg_julian(self,text): + return self._parse_calendar(text,self._text,self._text2, + self.month_to_int) + + def _parse_calendar(self,text,regex1,regex2,mmap): + match = regex1.match(text) if match: groups = match.groups() if groups[0] == None: m = 0 else: - m = self.month_to_int[groups[0].lower()] + m = mmap[groups[0].lower()] d = self._get_int(groups[1]) @@ -149,13 +256,13 @@ class DateParser: s = groups[4] != None return (d,m,y,s) - match = self._text2.match(text) + match = regex2.match(text) if match: groups = match.groups() if groups[1] == None: m = 0 else: - m = self.month_to_int[groups[1].lower()] + m = mmap[groups[1].lower()] d = self._get_int(groups[0]) @@ -166,7 +273,25 @@ class DateParser: y = int(groups[3]) s = groups[4] != None return (d,m,y,s) + return Date.EMPTY + + def _parse_subdate(self,text,subparser=None): + """ + Converts only the date portion of a date. + """ + if subparser == None: + subparser = self._parse_greg_julian + + try: + value = time.strptime(text) + return (value[2],value[1],value[0],False) + except ValueError: + pass + value = subparser(text) + if value != Date.EMPTY: + return value + match = self._iso.match(text) if match: groups = match.groups() @@ -191,27 +316,36 @@ class DateParser: """ date.set_text_value(text) qual = Date.QUAL_NONE + cal = Date.CAL_GREGORIAN + + match = self._cal.match(text) + if match: + grps = match.groups() + cal = self.calendar_to_int[grps[1].lower()] + text = grps[0] + + text_parser = self.parser[cal] match = self._qual.match(text) if match: grps = match.groups() qual = self.quality_to_int[grps[0].lower()] text = grps[1] - + match = self._span.match(text) if match: grps = match.groups() - start = self._parse_subdate(grps[0]) - stop = self._parse_subdate(grps[1]) - date.set(qual,Date.MOD_SPAN,Date.CAL_GREGORIAN,start + stop) + start = self._parse_subdate(grps[0],text_parser) + stop = self._parse_subdate(grps[1],text_parser) + date.set(qual,Date.MOD_SPAN,cal,start + stop) return match = self._range.match(text) if match: grps = match.groups() - start = self._parse_subdate(grps[1]) - stop = self._parse_subdate(grps[2]) - date.set(qual,Date.MOD_RANGE,Date.CAL_GREGORIAN,start + stop) + start = self._parse_subdate(grps[1],text_parser) + stop = self._parse_subdate(grps[2],text_parser) + date.set(qual,Date.MOD_RANGE,cal,start + stop) return match = self._range2.match(text) @@ -237,14 +371,31 @@ class DateParser: grps = match.groups() start = self._parse_subdate(grps[1]) mod = self.modifier_to_int.get(grps[0].lower(),Date.MOD_NONE) - date.set(qual,mod,Date.CAL_GREGORIAN,start) + date.set(qual,mod,cal,start) return subdate = self._parse_subdate(text) if subdate == Date.EMPTY: - date.set_as_text(text) - else: - date.set(qual,Date.MOD_NONE,Date.CAL_GREGORIAN,subdate) + subdate = self._parse_hebrew(text) + if subdate == Date.EMPTY: + subdate = self._parse_persian(text) + if subdate == Date.EMPTY: + subdate = self._parse_islamic(text) + if subdate == Date.EMPTY: + subdate = self._parse_french(text) + if subdate == Date.EMPTY: + date.set_as_text(text) + return + else: + cal = Date.CAL_FRENCH + else: + cal = Date.CAL_ISLAMIC + else: + cal = Date.CAL_PERSIAN + else: + cal = Date.CAL_HEBREW + + date.set(qual,Date.MOD_NONE,cal,subdate) def parse(self,text): """