* src/DateParser.py: Factor out small matching chunks.

svn: r3734
This commit is contained in:
Alex Roitman 2004-11-17 05:07:49 +00:00
parent 8dd7840293
commit 284cc849c3
2 changed files with 78 additions and 15 deletions

View File

@ -9,6 +9,8 @@
* src/dates/Date_ru.py: Switch from utf8 strings to unicode. * src/dates/Date_ru.py: Switch from utf8 strings to unicode.
* src/dates/Date_fr.py: Switch from utf8 strings to unicode. * src/dates/Date_fr.py: Switch from utf8 strings to unicode.
* src/DateParser.py: Factor out small matching chunks.
2004-11-15 Alex Roitman <shura@alex.neuro.umn.edu> 2004-11-15 Alex Roitman <shura@alex.neuro.umn.edu>
* src/DateDisplay.py: Remove localized displayers. * src/DateDisplay.py: Remove localized displayers.
* src/DateParser.py: Remove localized parsers. * src/DateParser.py: Remove localized parsers.

View File

@ -345,7 +345,6 @@ class DateParser:
match = regex2.match(text.lower()) match = regex2.match(text.lower())
if match: if match:
groups = match.groups() groups = match.groups()
print groups #[ g.encode('utf8') for g in groups ]
if groups[1] == None: if groups[1] == None:
m = 0 m = 0
else: else:
@ -419,44 +418,70 @@ class DateParser:
return Date.EMPTY return Date.EMPTY
def set_date(self,date,text): def match_calendar(self,text,cal):
""" """
Parses the text and sets the date according to the parsing. Try parsing calendar.
"""
date.set_text_value(text)
qual = Date.QUAL_NONE
cal = Date.CAL_GREGORIAN
Return calendar index and the remainder of text.
"""
match = self._cal.match(text) match = self._cal.match(text)
if match: if match:
grps = match.groups() grps = match.groups()
cal = self.calendar_to_int[grps[1].lower()] cal = self.calendar_to_int[grps[1].lower()]
text = grps[0] text = grps[0]
return (text,cal)
text_parser = self.parser[cal] def match_quality(self,text,qual):
"""
Try matching quality.
Return quality index and the remainder of text.
"""
match = self._qual.match(text) match = self._qual.match(text)
if match: if match:
grps = match.groups() grps = match.groups()
qual = self.quality_to_int[grps[0].lower()] qual = self.quality_to_int[grps[0].lower()]
text = grps[1] text = grps[1]
return (text,qual)
def match_span(self,text,cal,qual,date):
"""
Try matching span date.
On success, set the date and return 1. On failure return 0.
"""
match = self._span.match(text) match = self._span.match(text)
if match: if match:
grps = match.groups() grps = match.groups()
text_parser = self.parser[cal]
start = self._parse_subdate(grps[1],text_parser) start = self._parse_subdate(grps[1],text_parser)
stop = self._parse_subdate(grps[3],text_parser) stop = self._parse_subdate(grps[3],text_parser)
date.set(qual,Date.MOD_SPAN,cal,start + stop) date.set(qual,Date.MOD_SPAN,cal,start + stop)
return return 1
return 0
def match_range(self,text,cal,qual,date):
"""
Try matching range date.
On success, set the date and return 1. On failure return 0.
"""
match = self._range.match(text) match = self._range.match(text)
if match: if match:
grps = match.groups() grps = match.groups()
text_parser = self.parser[cal]
start = self._parse_subdate(grps[1],text_parser) start = self._parse_subdate(grps[1],text_parser)
stop = self._parse_subdate(grps[3],text_parser) stop = self._parse_subdate(grps[3],text_parser)
date.set(qual,Date.MOD_RANGE,cal,start + stop) date.set(qual,Date.MOD_RANGE,cal,start + stop)
return return 1
return 0
def match_range2(self,text,cal,qual,date):
"""
Try matching numerical range date.
On success, set the date and return 1. On failure return 0.
"""
match = self._range2.match(text) match = self._range2.match(text)
if match: if match:
grps = match.groups() grps = match.groups()
@ -473,14 +498,28 @@ class DateParser:
s = grps[4] != None s = grps[4] != None
date.set(qual,Date.MOD_RANGE,Date.CAL_GREGORIAN, date.set(qual,Date.MOD_RANGE,Date.CAL_GREGORIAN,
(d0,m,y,s,d1,m,y,s)) (d0,m,y,s,d1,m,y,s))
return return 1
return 0
def match_bce(self,text):
"""
Try matching BCE qualifier.
Return BCE (True/False) and the remainder of text.
"""
match = self._bce_re.match(text) match = self._bce_re.match(text)
bc = False bc = False
if match: if match:
text = match.groups()[0] text = match.groups()[0]
bc = True bc = True
return (text,bc)
def match_modifier(self,text,cal,qual,bc,date):
"""
Try matching date with modifier.
On success, set the date and return 1. On failure return 0.
"""
match = self._modifier.match(text) match = self._modifier.match(text)
if match: if match:
grps = match.groups() grps = match.groups()
@ -490,6 +529,28 @@ class DateParser:
date.set(qual,mod,cal,self.invert_year(start)) date.set(qual,mod,cal,self.invert_year(start))
else: else:
date.set(qual,mod,cal,start) date.set(qual,mod,cal,start)
return 1
return 0
def set_date(self,date,text):
"""
Parses the text and sets the date according to the parsing.
"""
date.set_text_value(text)
qual = Date.QUAL_NONE
cal = Date.CAL_GREGORIAN
(text,cal) = self.match_calendar(text,cal)
(text,qual) = self.match_quality(text,qual)
if self.match_span(text,cal,qual,date):
return
if self.match_range(text,cal,qual,date):
return
if self.match_range2(text,cal,qual,date):
return
(text,bc) = self.match_bce(text)
if self.match_modifier(text,cal,qual,bc,date):
return return
subdate = self._parse_subdate(text) subdate = self._parse_subdate(text)