From f97b98c2cb5f4a4069518f8631f660539710fb4f Mon Sep 17 00:00:00 2001 From: Benny Malengier Date: Thu, 20 Sep 2007 19:59:46 +0000 Subject: [PATCH] * src/Config/_GrampsConfigKeys.py: add config keys for fuzzy date match * src/RelLib/_CalSdn.py: correct type in comment * src/RelLib/_Date.py: add new match function to match two dates with fuzzy date support (about 2000 matches about 2005). * src/Filters/Rules/_HasEventBase.py: use new match * src/Filters/Rules/MediaObject/_HasMedia.py: use new match * src/Filters/Rules/Person/_HasBirth.py: use new match * src/Filters/Rules/Person/_HasDeath.py: use new match * src/Filters/Rules/Person/_HasFamilyEvent.py: use new match * src/Filters/Rules/_RuleUtils.py: delete, contained old match date routines * src/Filters/Rules/Makefile.am: remove RuleUtils * po/POTFILES.in: remove RuleUtils Based on patch of Douglas S. Blank , ticket #1219 svn: r8995 --- ChangeLog | 15 ++ po/POTFILES.in | 1 - src/Config/_GrampsConfigKeys.py | 6 + src/Filters/Rules/Makefile.am | 1 - src/Filters/Rules/MediaObject/_HasMedia.py | 3 +- src/Filters/Rules/Person/_HasBirth.py | 3 +- src/Filters/Rules/Person/_HasDeath.py | 3 +- src/Filters/Rules/Person/_HasFamilyEvent.py | 3 +- src/Filters/Rules/_HasEventBase.py | 5 +- src/Filters/Rules/_RuleUtils.py | 72 ------ src/RelLib/_CalSdn.py | 2 +- src/RelLib/_Date.py | 273 +++++++++++++++++++- 12 files changed, 300 insertions(+), 87 deletions(-) delete mode 100644 src/Filters/Rules/_RuleUtils.py diff --git a/ChangeLog b/ChangeLog index 741c8af68..75e3db80d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,18 @@ +2007-09-20 Benny Malengier + * src/Config/_GrampsConfigKeys.py: add config keys for fuzzy date match + * src/RelLib/_CalSdn.py: correct type in comment + * src/RelLib/_Date.py: add new match function to match two dates with + fuzzy date support (about 2000 matches about 2005). + * src/Filters/Rules/_HasEventBase.py: use new match + * src/Filters/Rules/MediaObject/_HasMedia.py: use new match + * src/Filters/Rules/Person/_HasBirth.py: use new match + * src/Filters/Rules/Person/_HasDeath.py: use new match + * src/Filters/Rules/Person/_HasFamilyEvent.py: use new match + * src/Filters/Rules/_RuleUtils.py: delete, contained old match date routines + * src/Filters/Rules/Makefile.am: remove RuleUtils + * po/POTFILES.in: remove RuleUtils + Based on patch of Douglas S. Blank , ticket #1219 + 2007-09-20 Benny Malengier * src/DateHandler/_DateParser.py: apply rev 8351 to 3.0 branch: text date with correct modifier diff --git a/po/POTFILES.in b/po/POTFILES.in index a4d22ce20..d270cffdd 100644 --- a/po/POTFILES.in +++ b/po/POTFILES.in @@ -470,7 +470,6 @@ src/Filters/Rules/_IsPrivate.py src/Filters/Rules/_MatchesFilterBase.py src/Filters/Rules/_RegExpIdBase.py src/Filters/Rules/_Rule.py -src/Filters/Rules/_RuleUtils.py src/Filters/Rules/_HasMarkerBase.py # Filters.Rules.Person package diff --git a/src/Config/_GrampsConfigKeys.py b/src/Config/_GrampsConfigKeys.py index 05c8cab99..da6605234 100644 --- a/src/Config/_GrampsConfigKeys.py +++ b/src/Config/_GrampsConfigKeys.py @@ -148,6 +148,9 @@ PERSON_REF_WIDTH = ('interface', 'person-ref-width', 1) REPO_REF_HEIGHT = ('interface', 'repo-ref-height', 1) REPO_REF_WIDTH = ('interface', 'repo-ref-width', 1) OWNER_WARN = ('behavior', 'owner-warn', 0) +DATE_BEFORE_RANGE = ('behavior', 'date-before-range', 1) +DATE_AFTER_RANGE = ('behavior', 'date-after-range', 1) +DATE_ABOUT_RANGE = ('behavior', 'date-about-range', 1) default_value = { @@ -253,4 +256,7 @@ default_value = { OWNER_WARN : False, EXPORT_NO_PRIVATE : True, EXPORT_RESTRICT : True, + DATE_BEFORE_RANGE : 9999, + DATE_AFTER_RANGE : 9999, + DATE_ABOUT_RANGE : 10, } diff --git a/src/Filters/Rules/Makefile.am b/src/Filters/Rules/Makefile.am index 1816d2db7..7f2a2d10d 100644 --- a/src/Filters/Rules/Makefile.am +++ b/src/Filters/Rules/Makefile.am @@ -18,7 +18,6 @@ pkgdata_PYTHON = \ _IsPrivate.py\ _RegExpIdBase.py\ _Rule.py\ - _RuleUtils.py\ _MatchesFilterBase.py\ _HasMarkerBase.py diff --git a/src/Filters/Rules/MediaObject/_HasMedia.py b/src/Filters/Rules/MediaObject/_HasMedia.py index dedb1ac11..72c9781af 100644 --- a/src/Filters/Rules/MediaObject/_HasMedia.py +++ b/src/Filters/Rules/MediaObject/_HasMedia.py @@ -34,7 +34,6 @@ from gettext import gettext as _ #------------------------------------------------------------------------- import DateHandler from Filters.Rules._Rule import Rule -from Filters.Rules._RuleUtils import loose_date_cmp #------------------------------------------------------------------------- # @@ -73,7 +72,7 @@ class HasMedia(Rule): return False if self.date: - if not loose_date_cmp(self.date,obj.get_date_object()): + if not obj.get_date_object().match(self.date): return False return True diff --git a/src/Filters/Rules/Person/_HasBirth.py b/src/Filters/Rules/Person/_HasBirth.py index bd0e27001..c978f565b 100644 --- a/src/Filters/Rules/Person/_HasBirth.py +++ b/src/Filters/Rules/Person/_HasBirth.py @@ -35,7 +35,6 @@ from gettext import gettext as _ import DateHandler from RelLib import EventType,EventRoleType from Filters.Rules._Rule import Rule -from Filters.Rules._RuleUtils import loose_date_cmp #------------------------------------------------------------------------- # @@ -72,7 +71,7 @@ class HasBirth(Rule): # No match: wrong description continue if self.date: - if loose_date_cmp(self.date,event.get_date_object()) == 0: + if not event.get_date_object().match(self.date): # No match: wrong date continue if self.list[1]: diff --git a/src/Filters/Rules/Person/_HasDeath.py b/src/Filters/Rules/Person/_HasDeath.py index ad52e7962..d2e725c89 100644 --- a/src/Filters/Rules/Person/_HasDeath.py +++ b/src/Filters/Rules/Person/_HasDeath.py @@ -35,7 +35,6 @@ from gettext import gettext as _ import DateHandler from RelLib import EventType,EventRoleType from Filters.Rules._Rule import Rule -from Filters.Rules._RuleUtils import loose_date_cmp #------------------------------------------------------------------------- # @@ -72,7 +71,7 @@ class HasDeath(Rule): # No match: wrong description continue if self.date: - if loose_date_cmp(self.date,event.get_date_object()) == 0: + if not event.get_date_object().match(self.date): # No match: wrong date continue if self.list[1]: diff --git a/src/Filters/Rules/Person/_HasFamilyEvent.py b/src/Filters/Rules/Person/_HasFamilyEvent.py index de50dcc2e..cafef9f1f 100644 --- a/src/Filters/Rules/Person/_HasFamilyEvent.py +++ b/src/Filters/Rules/Person/_HasFamilyEvent.py @@ -35,7 +35,6 @@ from gettext import gettext as _ import DateHandler from RelLib import EventType from Filters.Rules._Rule import Rule -from Filters.Rules._RuleUtils import date_cmp #------------------------------------------------------------------------- # @@ -80,7 +79,7 @@ class HasFamilyEvent(Rule): if v and event.get_description().upper().find(v.upper())==-1: val = 0 if self.date: - if date_cmp(self.date,event.get_date_object()): + if event.get_date_object().match(self.date): val = 0 if self.list[2]: pl_id = event.get_place_handle() diff --git a/src/Filters/Rules/_HasEventBase.py b/src/Filters/Rules/_HasEventBase.py index 2453563b2..87754cd44 100644 --- a/src/Filters/Rules/_HasEventBase.py +++ b/src/Filters/Rules/_HasEventBase.py @@ -35,7 +35,6 @@ from gettext import gettext as _ import DateHandler from RelLib import EventType from Filters.Rules._Rule import Rule -from Filters.Rules._RuleUtils import loose_date_cmp #------------------------------------------------------------------------- # @@ -74,9 +73,11 @@ class HasEventBase(Rule): if self.list[3] and event.get_description().upper().find( self.list[3].upper())==-1: return False + if self.date: - if not loose_date_cmp(self.date,event.get_date_object()): + if not event.get_date_object().match(self.date): return False + if self.list[2]: pl_id = event.get_place_handle() if pl_id: diff --git a/src/Filters/Rules/_RuleUtils.py b/src/Filters/Rules/_RuleUtils.py deleted file mode 100644 index f6e10304a..000000000 --- a/src/Filters/Rules/_RuleUtils.py +++ /dev/null @@ -1,72 +0,0 @@ -# -# Gramps - a GTK+/GNOME based genealogy program -# -# Copyright (C) 2002-2006 Donald N. Allingham -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -# - -# $Id$ - -import RelLib -import DateHandler - -#------------------------------------------------------------------------- -# -# Useful functions used by some rules -# -#------------------------------------------------------------------------- -def date_cmp(rule,value): - s = rule.get_modifier() - if s == RelLib.Date.MOD_TEXTONLY: - # If the entered date did not parse, then we can only compare - # the text against the textual representation of the tested date - value_text = DateHandler.displayer.display(value) - return (value_text.upper().find(rule.text.upper()) != -1) - - sd = rule.get_start_date() - od = value.get_start_date() - cmp_rule = (sd[2],sd[1],sd[0]) - cmp_value = (od[2],od[1],od[0]) - if s == RelLib.Date.MOD_BEFORE: - return cmp_rule > cmp_value - elif s == RelLib.Date.MOD_AFTER: - return cmp_rule < cmp_value - else: - return cmp_rule == cmp_value - -def loose_date_cmp(rule,value): - s = rule.get_modifier() - if s == RelLib.Date.MOD_TEXTONLY: - # If the entered date did not parse, then we can only compare - # the text against the textual representation of the tested date - value_text = DateHandler.displayer.display(value) - return (value_text.upper().find(rule.text.upper()) != -1) - - sd = rule.get_start_date() - od = value.get_start_date() - cmp_rule = (sd[2],sd[1],sd[0]) - cmp_value = (od[2],od[1],od[0]) - - if s == RelLib.Date.MOD_BEFORE: - return cmp_rule > cmp_value - elif s == RelLib.Date.MOD_AFTER: - return cmp_rule < cmp_value - elif cmp_rule[0] and not cmp_rule[1] and not cmp_rule[2]: - return cmp_rule[0] == cmp_value[0] - elif cmp_rule[0] and cmp_rule[1] and not cmp_rule[2]: - return cmp_rule[0:2] == cmp_value[0:2] - else: - return cmp_rule == cmp_value diff --git a/src/RelLib/_CalSdn.py b/src/RelLib/_CalSdn.py index 6365b8636..c4ce64499 100644 --- a/src/RelLib/_CalSdn.py +++ b/src/RelLib/_CalSdn.py @@ -436,7 +436,7 @@ def gregorian_sdn(year, month, day): - _GRG_SDN_OFFSET ) def gregorian_ymd(sdn): - """Converts an SDN number to a gregorial date""" + """Converts an SDN number to a gregorian date""" temp = (_GRG_SDN_OFFSET + sdn) * 4 - 1 # Calculate the century (year/100) diff --git a/src/RelLib/_Date.py b/src/RelLib/_Date.py index 10ba70a0c..780681645 100644 --- a/src/RelLib/_Date.py +++ b/src/RelLib/_Date.py @@ -1,7 +1,7 @@ # # Gramps - a GTK+/GNOME based genealogy program # -# Copyright (C) 2000-2006 Donald N. Allingham +# Copyright (C) 2000-2007 Donald N. Allingham # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -20,14 +20,59 @@ # $Id$ -"Support for dates" +"""Support for dates +""" __author__ = "Donald N. Allingham" __revision__ = "$Revision$" +#------------------------------------------------------------------------ +# +# Python modules +# +#------------------------------------------------------------------------ from gettext import gettext as _ + +#------------------------------------------------------------------------ +# +# Set up logging +# +#------------------------------------------------------------------------ +import logging +log = logging.getLogger(".Date") + +#------------------------------------------------------------------------- +# +# Gnome/GTK modules +# +#------------------------------------------------------------------------- + + +#------------------------------------------------------------------------ +# +# Gramps modules +# +#------------------------------------------------------------------------ from _CalSdn import * +#------------------------------------------------------------------------ +# +# Constants +# +#------------------------------------------------------------------------ + +#obtain the ranges once, they do not change! +try: + import Config + _DATE_BEFORE_RANGE = Config.get(Config.DATE_BEFORE_RANGE) + _DATE_AFTER_RANGE = Config.get(Config.DATE_AFTER_RANGE) + _DATE_ABOUT_RANGE = Config.get(Config.DATE_ABOUT_RANGE) +except ImportError: + # RelLib used as module not part of GRAMPS + _DATE_BEFORE_RANGE = 9999 + _DATE_AFTER_RANGE = 9999 + _DATE_ABOUT_RANGE = 10 + #------------------------------------------------------------------------- # # DateError exception @@ -192,6 +237,109 @@ class Date: self.quality == other.quality and self.dateval == other.dateval) return value + + def get_start_stop_range(self): + """ + Returns the minimal start_date, and a maximal + stop_date corresponding to this date, given in Gregorian calendar. + Useful in doing range overlap comparisons between different dates. + + Note that we stay in (YR,MON,DAY) + """ + + def yr_mon_day(dateval): + """ Local function to swap order for easy comparisons, + and correct year of slash date. + Slash date is given as year1/year2, where year1 is Julian + year, and year2=year1+1 the Gregorian year + """ + if dateval[Date._POS_SL] : + return (dateval[Date._POS_YR]+1, dateval[Date._POS_MON], + dateval[Date._POS_DAY]) + else : + return (dateval[Date._POS_YR], dateval[Date._POS_MON], + dateval[Date._POS_DAY]) + def date_offset(dateval, offset): + """ Local function to do date arithmetic: add the offset, + return (year,month,day) in the Gregorian calendar + """ + new_date = Date() + new_date.set_yr_mon_day(dateval[0], dateval[1], dateval[2]) + return Date._calendar_change[Date.CAL_GREGORIAN]( + new_date.sortval + offset) + + datecopy = Date(self) + #we do all calculation in Gregorian calendar + datecopy.convert_calendar(Date.CAL_GREGORIAN) + start = yr_mon_day(datecopy.get_start_date()) + stop = yr_mon_day(datecopy.get_stop_date()) + + if stop == (0, 0, 0): + stop = start + + stopmax = list(stop) + if stopmax[0] == 0: # then use start_year, if one + stopmax[0] = start[Date._POS_YR] + if stopmax[1] == 0: + stopmax[1] = 12 + if stopmax[2] == 0: + stopmax[2] = 31 + startmin = list(start) + if startmin[1] == 0: + startmin[1] = 1 + if startmin[2] == 0: + startmin[2] = 1 + # if BEFORE, AFTER, or ABOUT/EST, adjust: + if self.modifier == Date.MOD_BEFORE: + stopmax = date_offset(startmin, -1) + f = _DATE_BEFORE_RANGE + startmin = (stopmax[0] - f, stopmax[1], stopmax[2]) + elif self.modifier == Date.MOD_AFTER: + startmin = date_offset(stopmax, 1) + f = _DATE_AFTER_RANGE + stopmax = (startmin[0] + f, startmin[1], startmin[2]) + elif (self.modifier == Date.MOD_ABOUT or + self.quality == Date.QUAL_ESTIMATED): + f = _DATE_ABOUT_RANGE + startmin = (startmin[0] - f, startmin[1], startmin[2]) + stopmax = (stopmax[0] + f, stopmax[1], stopmax[2]) + # return tuples not lists, for comparisons + return (tuple(startmin), tuple(stopmax)) + + def match(self, other_date): + """ + The other comparisons for Date don't actually look for anything + other than a straight match, or a simple comparison of the sortval. + This method allows a more sophisticated comparison looking for + any overlap between two possible dates, date spans, and qualities. + + Returns True if part of other_date matches part of the date-span + defined by self + """ + if (other_date.modifier == Date.MOD_TEXTONLY or + self.modifier == Date.MOD_TEXTONLY): + import DateHandler + # If either date is just text, then we can only compare textual + # representations + self_text = DateHandler.displayer.display(self) + ##DEBUG: print ' TEXT COMPARE ONLY ' + return (self_text.upper().find(other_date.text.upper()) != -1) + + # Obtain minimal start and maximal stop in Gregorian calendar + other_start, other_stop = other_date.get_start_stop_range() + self_start, self_stop = self.get_start_stop_range() + + ##DEBUG print " date compare:", self_start, self_stop, other_start, + ##DEBUG other_stop + + # If some overlap then match is True, otherwise False. + if ((self_start <= other_start <= self_stop) or + (self_start <= other_stop <= self_stop) or + (other_start <= self_start <= other_stop) or + (other_start <= self_stop <= other_stop)): + return True + else: + return False def __str__(self): """ @@ -379,6 +527,17 @@ class Date: """ return self._get_low_item(Date._POS_YR) + def set_yr_mon_day(self, year, month, day): + """ + Sets the year, month, and day values + """ + dv = list(self.dateval) + dv[Date._POS_YR] = year + dv[Date._POS_MON] = month + dv[Date._POS_DAY] = day + self.dateval = tuple(dv) + self._calc_sort_value() + def set_year(self, year): """ Sets the year value @@ -595,3 +754,113 @@ class Date: and self.quality == Date.QUAL_NONE \ and self.get_year_valid() and self.get_month_valid() \ and self.get_day_valid() + +if __name__ == "__main__": + """ Test function. Call it as follows from the command line (so as to find + imported modules): + export PYTHONPATH=/path/to/gramps/src python src/RelLib/_Date.py + """ + import DateHandler + df = DateHandler._DateParser.DateParser() # date factory + def test_date(d1, d2, expected1, expected2 = None): + if expected2 == None: + expected2 = expected1 + pos1 = 1 + if expected1 : + pos1 = 0 + pos2 = 1 + if expected2 : + pos2 = 0 + date1 = df.parse(d1) + date2 = df.parse(d2) + wrong = 0 + print "Testing '%s' and '%s'" % (d1, d2) + val = date2.match(date1) + try: + assert(val == expected1) + print [" correct: they match!" + ," correct: they do not match!"][pos1] + except: + print " Wrong! got %s" % (not expected1) + wrong += 1 + val = date1.match(date2) + try: + assert(val == expected2) + print [" correct: they match!" + ," correct: they do not match!"][pos2] + except: + print " Wrong! got %s" % (not expected2) + wrong += 1 + return {"incorrect": wrong, "correct": 2 - wrong } + + stats = {'incorrect':0, 'correct':0} + # create a bunch of tests: + # most are symmetric: #date1, date2, does d1 match d2? does d2 match d1? + tests = [("before 1960", "before 1961", True), + ("before 1960", "before 1960", True), + ("before 1961", "before 1961", True), + ("jan 1, 1960", "jan 1, 1960", True), + ("dec 31, 1959", "dec 31, 1959", True), + ("before 1960", "jan 1, 1960", False), + ("before 1960", "dec 31, 1959", True), + ("abt 1960", "1960", True), + ("abt 1960", "before 1960", True), + ("1960", "1960", True), + ("1960", "after 1960", False), + ("1960", "before 1960", False), + ("abt 1960", "abt 1960", True), + ("before 1960", "after 1960", False), + ("after jan 1, 1900", "jan 2, 1900", True), + ("abt jan 1, 1900", "jan 1, 1900", True), + ("from 1950 to 1955", "1950", True), + ("from 1950 to 1955", "1951", True), + ("from 1950 to 1955", "1952", True), + ("from 1950 to 1955", "1953", True), + ("from 1950 to 1955", "1954", True), + ("from 1950 to 1955", "1955", True), + ("from 1950 to 1955", "1956", False), + ("from 1950 to 1955", "dec 31, 1955", True), + ("from 1950 to 1955", "jan 1, 1955", True), + ("from 1950 to 1955", "dec 31, 1949", False), + ("from 1950 to 1955", "jan 1, 1956", False), + ("after jul 4, 1980", "jul 4, 1980", False), + ("after jul 4, 1980", "before jul 4, 1980", False), + ("after jul 4, 1980", "about jul 4, 1980", True), + ("after jul 4, 1980", "after jul 4, 1980", True), + ("between 1750 and 1752", "1750", True), + ("between 1750 and 1752", "about 1750", True), + ("between 1750 and 1752", "between 1749 and 1750", True), + ("between 1750 and 1752", "1749", False), + ("invalid date", "invalid date", True), + ("invalid date", "invalid", False, True), + ("invalid date 1", "invalid date 2", False), + ("abt jan 1, 2000", "dec 31, 1999", True), + ("jan 1, 2000", "dec 31, 1999", False), + ("aft jan 1, 2000", "dec 31, 1999", False), + ("after jan 1, 2000", "after dec 31, 1999", True), + ("after dec 31, 1999", "after jan 1, 2000", True), + ("1 31, 2000", "jan 1, 2000", False), + ("dec 31, 1999", "jan 1, 2000", False), + ("jan 1, 2000", "before dec 31, 1999", False), + ("aft jan 1, 2000", "before dec 31, 1999", False), + ("before jan 1, 2000", "after dec 31, 1999", False), + ("jan 1, 2000/1", "jan 1, 2000", False), + ("jan 1, 2000/1", "jan 1, 2001", True), + ("about 1984", "about 2005", False), + ("about 1990", "about 2005", True), + ("about 2007", "about 2006", True), + ("about 1995", "after 2000", True), + ("about 1995", "after 2005", False), + ("about 2007", "about 2003", True), + ("before 2007", "2000", True), + # different calendar, same date + ("Aug 3, 1982", "14 Thermidor 190 (French Republican)", True), + ("after Aug 3, 1982", "before 14 Thermidor 190 (French Republican)", False), + ] + # test them: + for data in tests: + results = test_date(*data) + for result in results: + stats[result] += results[result] + for result in stats: + print result, stats[result]