From 7d96db40edafb06a6233e3045bc8aaf481549f53 Mon Sep 17 00:00:00 2001 From: Benny Malengier Date: Fri, 31 Aug 2012 13:35:14 +0000 Subject: [PATCH] Gender stats are wrong on some imports. Added tool to rebuild the gender statistics svn: r20302 --- po/POTFILES.in | 1 + src/gen/lib/genderstats.py | 33 +++++-- src/plugins/tool/Makefile.am | 1 + src/plugins/tool/dumpgenderstats.py | 12 +-- src/plugins/tool/rebuildgenderstat.py | 129 ++++++++++++++++++++++++++ src/plugins/tool/tools.gpr.py | 24 ++++- 6 files changed, 187 insertions(+), 13 deletions(-) create mode 100644 src/plugins/tool/rebuildgenderstat.py diff --git a/po/POTFILES.in b/po/POTFILES.in index 9e2f3818e..8d95c85bf 100644 --- a/po/POTFILES.in +++ b/po/POTFILES.in @@ -677,6 +677,7 @@ src/plugins/tool/ownereditor.py src/plugins/tool/patchnames.py src/plugins/tool/rebuild.py src/plugins/tool/rebuildrefmap.py +src/plugins/tool/rebuildgenderstat.py src/plugins/tool/relcalc.py src/plugins/tool/removeunused.py src/plugins/tool/reorderids.py diff --git a/src/gen/lib/genderstats.py b/src/gen/lib/genderstats.py index 150dac3cf..c8207cfbb 100644 --- a/src/gen/lib/genderstats.py +++ b/src/gen/lib/genderstats.py @@ -2,6 +2,7 @@ # Gramps - a GTK+/GNOME based genealogy program # # Copyright (C) 2000-2005 Donald N. Allingham +# Copyright (C) 2012 Benny Malengier # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -53,6 +54,10 @@ class GenderStats(object): def save_stats(self): return self.stats + def clear_stats(self): + self.stats = {} + return self.stats + def _get_key (self, person): name = person.get_primary_name().get_first_name() return self._get_key_from_name (name) @@ -65,17 +70,29 @@ class GenderStats(object): return self.stats[name] return (0, 0, 0) + def count_name (self, name, gender): + """count a given name under gender in the gender stats + """ + keyname = self._get_key_from_name(name) + if not keyname: + return + + self._set_stats(keyname, gender) + def count_person (self, person, undo = 0): if not person: return # Let the Person do their own counting later - name = self._get_key (person) - if not name: + keyname = self._get_key (person) + if not keyname: return gender = person.get_gender() - (male, female, unknown) = self.name_stats (name) + self._set_stats(keyname, gender, undo) + + def _set_stats (self, keyname, gender, undo=0): + (male, female, unknown) = self.name_stats (keyname) if not undo: increment = 1 else: @@ -83,13 +100,18 @@ class GenderStats(object): if gender == Person.MALE: male += increment + if male < 0: + male = 0 elif gender == Person.FEMALE: female += increment + if female < 0: + female = 0 elif gender == Person.UNKNOWN: unknown += increment + if unknown < 0: + unknown = 0 - self.stats[name] = (male, female, unknown) - return + self.stats[keyname] = (male, female, unknown) def uncount_person (self, person): return self.count_person (person, undo = 1) @@ -113,4 +135,3 @@ class GenderStats(object): return Person.FEMALE return Person.UNKNOWN - diff --git a/src/plugins/tool/Makefile.am b/src/plugins/tool/Makefile.am index aa0065989..ea7304daf 100644 --- a/src/plugins/tool/Makefile.am +++ b/src/plugins/tool/Makefile.am @@ -22,6 +22,7 @@ pkgpython_PYTHON = \ ownereditor.py \ patchnames.py \ rebuild.py \ + rebuildgenderstat.py \ rebuildrefmap.py \ relcalc.py \ removeunused.py \ diff --git a/src/plugins/tool/dumpgenderstats.py b/src/plugins/tool/dumpgenderstats.py index 3421a01a4..87e74d465 100644 --- a/src/plugins/tool/dumpgenderstats.py +++ b/src/plugins/tool/dumpgenderstats.py @@ -67,17 +67,17 @@ class DumpGenderStats(tool.Tool, ManagedWindow): ] treeview = Gtk.TreeView() - model = ListModel(treeview,titles) + model = ListModel(treeview, titles) for entry in stats_list: - model.add(entry,entry[0]) + model.add(entry, entry[0]) window = Gtk.Window() - window.set_default_size(400,300) + window.set_default_size(400, 300) s = Gtk.ScrolledWindow() s.add(treeview) window.add(s) window.show_all() - self.set_window(window,None,self.label) + self.set_window(window, None, self.label) self.show() else: @@ -99,5 +99,5 @@ class DumpGenderStatsOptions(tool.ToolOptions): Defines options and provides handling interface. """ - def __init__(self, name,person_id=None): - tool.ToolOptions.__init__(self, name,person_id) + def __init__(self, name, person_id=None): + tool.ToolOptions.__init__(self, name, person_id) diff --git a/src/plugins/tool/rebuildgenderstat.py b/src/plugins/tool/rebuildgenderstat.py new file mode 100644 index 000000000..3ee75abbd --- /dev/null +++ b/src/plugins/tool/rebuildgenderstat.py @@ -0,0 +1,129 @@ +# +# Gramps - a GTK+/GNOME based genealogy program +# +# Copyright (C) 2012 Benny Malengier +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# + +# $Id$ + +# Written by Alex Roitman + +"Rebuild gender stat values" + +#------------------------------------------------------------------------- +# +# python modules +# +#------------------------------------------------------------------------- +from gen.ggettext import gettext as _ + +#------------------------------------------------------------------------ +# +# Set up logging +# +#------------------------------------------------------------------------ +import logging +log = logging.getLogger(".RebuildGenderStat") + +#------------------------------------------------------------------------- +# +# gtk modules +# +#------------------------------------------------------------------------- + +#------------------------------------------------------------------------- +# +# GRAMPS modules +# +#------------------------------------------------------------------------- +from gui.plug import tool +from gui.dialog import OkDialog +from gen.updatecallback import UpdateCallback +from gen.lib import Name + +#------------------------------------------------------------------------- +# +# runTool +# +#------------------------------------------------------------------------- + +COLUMN_GENDER = 2 +COLUMN_NAME = 3 +COLUMN_ALTNAMES = 4 + +class RebuildGenderStat(tool.Tool, UpdateCallback): + + def __init__(self, dbstate, uistate, options_class, name, callback=None): + + tool.Tool.__init__(self, dbstate, options_class, name) + + if self.db.readonly: + return + + self.db.disable_signals() + if uistate: + self.callback = uistate.pulse_progressbar + uistate.set_busy_cursor(True) + uistate.progress.show() + uistate.push_message(dbstate, _("Rebuilding gender statistics for name gender guessing...")) + else: + self.callback = None + print "Rebuilding gender statistics for name gender guessing..." + + UpdateCallback.__init__(self, self.callback) + self.set_total(self.db.get_number_of_people()) + self.rebuild_genderstats() + self.reset() + + if uistate: + uistate.set_busy_cursor(False) + uistate.progress.hide() + OkDialog(_("Gender statistics rebuilt"), + _('Gender statistics for name gender guessing have been rebuilt.'), + parent=uistate.window) + else: + print "Gender statistics for name gender guessing have been rebuilt." + self.db.enable_signals() + + def rebuild_genderstats(self): + """ + Function to rebuild the gender stats + """ + self.db.genderStats.clear_stats() + with self.db.get_person_cursor() as cursor: + #loop over database and store the sort field, and the handle, and + #allow for a third iter + for key, data in cursor: + rawprimname = data[COLUMN_NAME] + rawaltnames = data[COLUMN_ALTNAMES] + primary_name = Name().unserialize(rawprimname).get_first_name() + alternate_names = [Name().unserialize(name).get_first_name() + for name in rawaltnames] + self.db.genderStats.count_name(primary_name, data[COLUMN_GENDER]) + +#------------------------------------------------------------------------ +# +# +# +#------------------------------------------------------------------------ +class RebuildGenderStatOptions(tool.ToolOptions): + """ + Defines options and provides handling interface. + """ + + def __init__(self, name, person_id=None): + tool.ToolOptions.__init__(self, name, person_id) diff --git a/src/plugins/tool/tools.gpr.py b/src/plugins/tool/tools.gpr.py index a8ba75fb8..8709e8877 100644 --- a/src/plugins/tool/tools.gpr.py +++ b/src/plugins/tool/tools.gpr.py @@ -365,7 +365,7 @@ tool_modes = [TOOL_MODE_GUI, TOOL_MODE_CLI] #------------------------------------------------------------------------ # -# Rebuild Secondary Indices +# Rebuild Reference Maps # #------------------------------------------------------------------------ @@ -385,6 +385,28 @@ optionclass = 'RebuildRefMapOptions', tool_modes = [TOOL_MODE_GUI, TOOL_MODE_CLI] ) +#------------------------------------------------------------------------ +# +# Rebuild Gender Statistics +# +#------------------------------------------------------------------------ + +register(TOOL, +id = 'rebuild_genstats', +name = _("Rebuild Gender Statistics"), +description = _("Rebuilds gender statistics for name gender guessing..."), +version = '1.0', +gramps_target_version = '4.0', +status = STABLE, +fname = 'rebuildgenderstat.py', +authors = ["Benny Malengier"], +authors_email = ["benny.malengier@gramps-project.org"], +category = TOOL_DBFIX, +toolclass = 'RebuildGenderStat', +optionclass = 'RebuildGenderStatOptions', +tool_modes = [TOOL_MODE_GUI, TOOL_MODE_CLI] + ) + #------------------------------------------------------------------------ # # Relationship Calculator