* src/DateParser.py: Factor out small matching chunks.
svn: r3734
This commit is contained in:
parent
8dd7840293
commit
284cc849c3
@ -9,6 +9,8 @@
|
|||||||
* src/dates/Date_ru.py: Switch from utf8 strings to unicode.
|
* src/dates/Date_ru.py: Switch from utf8 strings to unicode.
|
||||||
* src/dates/Date_fr.py: Switch from utf8 strings to unicode.
|
* src/dates/Date_fr.py: Switch from utf8 strings to unicode.
|
||||||
|
|
||||||
|
* src/DateParser.py: Factor out small matching chunks.
|
||||||
|
|
||||||
2004-11-15 Alex Roitman <shura@alex.neuro.umn.edu>
|
2004-11-15 Alex Roitman <shura@alex.neuro.umn.edu>
|
||||||
* src/DateDisplay.py: Remove localized displayers.
|
* src/DateDisplay.py: Remove localized displayers.
|
||||||
* src/DateParser.py: Remove localized parsers.
|
* src/DateParser.py: Remove localized parsers.
|
||||||
|
@ -345,7 +345,6 @@ class DateParser:
|
|||||||
match = regex2.match(text.lower())
|
match = regex2.match(text.lower())
|
||||||
if match:
|
if match:
|
||||||
groups = match.groups()
|
groups = match.groups()
|
||||||
print groups #[ g.encode('utf8') for g in groups ]
|
|
||||||
if groups[1] == None:
|
if groups[1] == None:
|
||||||
m = 0
|
m = 0
|
||||||
else:
|
else:
|
||||||
@ -419,44 +418,70 @@ class DateParser:
|
|||||||
|
|
||||||
return Date.EMPTY
|
return Date.EMPTY
|
||||||
|
|
||||||
def set_date(self,date,text):
|
def match_calendar(self,text,cal):
|
||||||
"""
|
"""
|
||||||
Parses the text and sets the date according to the parsing.
|
Try parsing calendar.
|
||||||
"""
|
|
||||||
date.set_text_value(text)
|
|
||||||
qual = Date.QUAL_NONE
|
|
||||||
cal = Date.CAL_GREGORIAN
|
|
||||||
|
|
||||||
|
Return calendar index and the remainder of text.
|
||||||
|
"""
|
||||||
match = self._cal.match(text)
|
match = self._cal.match(text)
|
||||||
if match:
|
if match:
|
||||||
grps = match.groups()
|
grps = match.groups()
|
||||||
cal = self.calendar_to_int[grps[1].lower()]
|
cal = self.calendar_to_int[grps[1].lower()]
|
||||||
text = grps[0]
|
text = grps[0]
|
||||||
|
return (text,cal)
|
||||||
|
|
||||||
text_parser = self.parser[cal]
|
def match_quality(self,text,qual):
|
||||||
|
"""
|
||||||
|
Try matching quality.
|
||||||
|
|
||||||
|
Return quality index and the remainder of text.
|
||||||
|
"""
|
||||||
match = self._qual.match(text)
|
match = self._qual.match(text)
|
||||||
if match:
|
if match:
|
||||||
grps = match.groups()
|
grps = match.groups()
|
||||||
qual = self.quality_to_int[grps[0].lower()]
|
qual = self.quality_to_int[grps[0].lower()]
|
||||||
text = grps[1]
|
text = grps[1]
|
||||||
|
return (text,qual)
|
||||||
|
|
||||||
|
def match_span(self,text,cal,qual,date):
|
||||||
|
"""
|
||||||
|
Try matching span date.
|
||||||
|
|
||||||
|
On success, set the date and return 1. On failure return 0.
|
||||||
|
"""
|
||||||
match = self._span.match(text)
|
match = self._span.match(text)
|
||||||
if match:
|
if match:
|
||||||
grps = match.groups()
|
grps = match.groups()
|
||||||
|
text_parser = self.parser[cal]
|
||||||
start = self._parse_subdate(grps[1],text_parser)
|
start = self._parse_subdate(grps[1],text_parser)
|
||||||
stop = self._parse_subdate(grps[3],text_parser)
|
stop = self._parse_subdate(grps[3],text_parser)
|
||||||
date.set(qual,Date.MOD_SPAN,cal,start + stop)
|
date.set(qual,Date.MOD_SPAN,cal,start + stop)
|
||||||
return
|
return 1
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def match_range(self,text,cal,qual,date):
|
||||||
|
"""
|
||||||
|
Try matching range date.
|
||||||
|
|
||||||
|
On success, set the date and return 1. On failure return 0.
|
||||||
|
"""
|
||||||
match = self._range.match(text)
|
match = self._range.match(text)
|
||||||
if match:
|
if match:
|
||||||
grps = match.groups()
|
grps = match.groups()
|
||||||
|
text_parser = self.parser[cal]
|
||||||
start = self._parse_subdate(grps[1],text_parser)
|
start = self._parse_subdate(grps[1],text_parser)
|
||||||
stop = self._parse_subdate(grps[3],text_parser)
|
stop = self._parse_subdate(grps[3],text_parser)
|
||||||
date.set(qual,Date.MOD_RANGE,cal,start + stop)
|
date.set(qual,Date.MOD_RANGE,cal,start + stop)
|
||||||
return
|
return 1
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def match_range2(self,text,cal,qual,date):
|
||||||
|
"""
|
||||||
|
Try matching numerical range date.
|
||||||
|
|
||||||
|
On success, set the date and return 1. On failure return 0.
|
||||||
|
"""
|
||||||
match = self._range2.match(text)
|
match = self._range2.match(text)
|
||||||
if match:
|
if match:
|
||||||
grps = match.groups()
|
grps = match.groups()
|
||||||
@ -473,14 +498,28 @@ class DateParser:
|
|||||||
s = grps[4] != None
|
s = grps[4] != None
|
||||||
date.set(qual,Date.MOD_RANGE,Date.CAL_GREGORIAN,
|
date.set(qual,Date.MOD_RANGE,Date.CAL_GREGORIAN,
|
||||||
(d0,m,y,s,d1,m,y,s))
|
(d0,m,y,s,d1,m,y,s))
|
||||||
return
|
return 1
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def match_bce(self,text):
|
||||||
|
"""
|
||||||
|
Try matching BCE qualifier.
|
||||||
|
|
||||||
|
Return BCE (True/False) and the remainder of text.
|
||||||
|
"""
|
||||||
match = self._bce_re.match(text)
|
match = self._bce_re.match(text)
|
||||||
bc = False
|
bc = False
|
||||||
if match:
|
if match:
|
||||||
text = match.groups()[0]
|
text = match.groups()[0]
|
||||||
bc = True
|
bc = True
|
||||||
|
return (text,bc)
|
||||||
|
|
||||||
|
def match_modifier(self,text,cal,qual,bc,date):
|
||||||
|
"""
|
||||||
|
Try matching date with modifier.
|
||||||
|
|
||||||
|
On success, set the date and return 1. On failure return 0.
|
||||||
|
"""
|
||||||
match = self._modifier.match(text)
|
match = self._modifier.match(text)
|
||||||
if match:
|
if match:
|
||||||
grps = match.groups()
|
grps = match.groups()
|
||||||
@ -490,8 +529,30 @@ class DateParser:
|
|||||||
date.set(qual,mod,cal,self.invert_year(start))
|
date.set(qual,mod,cal,self.invert_year(start))
|
||||||
else:
|
else:
|
||||||
date.set(qual,mod,cal,start)
|
date.set(qual,mod,cal,start)
|
||||||
|
return 1
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def set_date(self,date,text):
|
||||||
|
"""
|
||||||
|
Parses the text and sets the date according to the parsing.
|
||||||
|
"""
|
||||||
|
date.set_text_value(text)
|
||||||
|
qual = Date.QUAL_NONE
|
||||||
|
cal = Date.CAL_GREGORIAN
|
||||||
|
|
||||||
|
(text,cal) = self.match_calendar(text,cal)
|
||||||
|
(text,qual) = self.match_quality(text,qual)
|
||||||
|
if self.match_span(text,cal,qual,date):
|
||||||
|
return
|
||||||
|
if self.match_range(text,cal,qual,date):
|
||||||
|
return
|
||||||
|
if self.match_range2(text,cal,qual,date):
|
||||||
return
|
return
|
||||||
|
|
||||||
|
(text,bc) = self.match_bce(text)
|
||||||
|
if self.match_modifier(text,cal,qual,bc,date):
|
||||||
|
return
|
||||||
|
|
||||||
subdate = self._parse_subdate(text)
|
subdate = self._parse_subdate(text)
|
||||||
if subdate == Date.EMPTY:
|
if subdate == Date.EMPTY:
|
||||||
subdate = self._parse_hebrew(text)
|
subdate = self._parse_hebrew(text)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user