new patch names tool version 1

svn: r16028
This commit is contained in:
Benny Malengier 2010-10-23 14:50:25 +00:00
parent ee91b33a43
commit d7178e96d7

View File

@ -70,25 +70,19 @@ WIKI_HELP_SEC = _('manual|Extract_Information_from_Names')
# List of possible surname prefixes. Notice that you must run the tool # List of possible surname prefixes. Notice that you must run the tool
# multiple times for prefixes such as "van der". # multiple times for prefixes such as "van der".
prefix_list = [ PREFIX_LIST = [
"de", "van", "von", "di", "le", "du", "dela", "della", "de", "van", "von", "di", "le", "du", "dela", "della",
"des", "vande", "ten", "da", "af", "den", "das", "dello", "des", "vande", "ten", "da", "af", "den", "das", "dello",
"del", "en", "ein", "el" "et", "les", "lo", "los", "un", "del", "en", "ein", "el" "et", "les", "lo", "los", "un",
"um", "una", "uno", "der", "ter", "te", "die", "um", "una", "uno", "der", "ter", "te", "die",
] ]
connector_list = ['e', 'y', ] CONNECTOR_LIST = ['e', 'y', ]
CONNECTOR_LIST_NONSPLIT = ['de', 'van']
_title_re = re.compile(r"^ ([A-Za-z][A-Za-z]+\.) \s+ (.+) $", re.VERBOSE) _title_re = re.compile(r"^ ([A-Za-z][A-Za-z]+\.) \s+ (.+) $", re.VERBOSE)
_nick_re = re.compile(r"(.+) \s* [(\"] (.+) [)\"]", re.VERBOSE) _nick_re = re.compile(r"(.+) \s* [(\"] (.+) [)\"]", re.VERBOSE)
# Find a prefix in the first_name
_fn_prefix_re = re.compile("(\S+)\s+(%s)\s*$" % '|'.join(prefix_list),
re.IGNORECASE)
# Find a prefix in the surname
_sn_prefix_re = re.compile("^\s*(%s)\s+(.+)" % '|'.join(prefix_list),
re.IGNORECASE)
#------------------------------------------------------------------------- #-------------------------------------------------------------------------
# #
@ -102,7 +96,11 @@ _sn_prefix_re = re.compile("^\s*(%s)\s+(.+)" % '|'.join(prefix_list),
class PatchNames(tool.BatchTool, ManagedWindow.ManagedWindow): class PatchNames(tool.BatchTool, ManagedWindow.ManagedWindow):
titleid = 1
nickid = 2
pref1id = 3
compid = 4
def __init__(self, dbstate, uistate, options_class, name, callback=None): def __init__(self, dbstate, uistate, options_class, name, callback=None):
self.label = _('Name and title extraction tool') self.label = _('Name and title extraction tool')
ManagedWindow.ManagedWindow.__init__(self, uistate, [], self.__class__) ManagedWindow.ManagedWindow.__init__(self, uistate, [], self.__class__)
@ -111,12 +109,63 @@ class PatchNames(tool.BatchTool, ManagedWindow.ManagedWindow):
tool.BatchTool.__init__(self, dbstate, options_class, name) tool.BatchTool.__init__(self, dbstate, options_class, name)
if self.fail: if self.fail:
return return
winprefix = gtk.Dialog("Default prefix and connector settings",
self.uistate.window,
gtk.DIALOG_MODAL|gtk.DIALOG_DESTROY_WITH_PARENT,
(gtk.STOCK_OK, gtk.RESPONSE_ACCEPT))
winprefix.set_has_separator(False)
winprefix.vbox.set_spacing(5)
hboxpref = gtk.HBox()
hboxpref.pack_start(gtk.Label(_('Prefixes to search for:')),
expand=False, padding=5)
self.prefixbox = gtk.Entry()
self.prefixbox.set_text(', '.join(PREFIX_LIST))
hboxpref.pack_start(self.prefixbox)
winprefix.vbox.pack_start(hboxpref)
hboxcon = gtk.HBox()
hboxcon.pack_start(gtk.Label(_('Connectors splitting surnames:')),
expand=False, padding=5)
self.conbox = gtk.Entry()
self.conbox.set_text(', '.join(CONNECTOR_LIST))
hboxcon.pack_start(self.conbox)
winprefix.vbox.pack_start(hboxcon)
hboxconns = gtk.HBox()
hboxconns.pack_start(gtk.Label(_('Connectors non-splitting surnames:')),
expand=False, padding=5)
self.connsbox = gtk.Entry()
self.connsbox.set_text(', '.join(CONNECTOR_LIST_NONSPLIT))
hboxconns.pack_start(self.connsbox)
winprefix.vbox.pack_start(hboxconns)
winprefix.show_all()
winprefix.resize(700, 100)
response = winprefix.run()
self.prefix_list = self.prefixbox.get_text().split(',')
self.prefix_list = map(strip, self.prefix_list)
self.prefixbox = None
self.connector_list = self.conbox.get_text().split(',')
self.connector_list = map(strip, self.connector_list)
self.conbox = None
self.connector_list_nonsplit = self.connsbox.get_text().split(',')
self.connector_list_nonsplit = map(strip, self.connector_list_nonsplit)
self.connsbox = None
# Find a prefix in the first_name
self._fn_prefix_re = re.compile("(\S+)\s+(%s)\s*$" % '|'.join(self.prefix_list),
re.IGNORECASE)
# Find a prefix in the surname
self._sn_prefix_re = re.compile("^\s*(%s)\s+(.+)" % '|'.join(self.prefix_list),
re.IGNORECASE)
# Find a connector in the surname
self._sn_con_re = re.compile("^\s*(.+)\s+(%s)\s+(.+)" % '|'.join(self.connector_list),
re.IGNORECASE)
winprefix.destroy()
self.cb = callback self.cb = callback
self.title_list = [] self.handle_to_action = {}
self.nick_list = []
self.prefix1_list = []
self.prefix2_list = []
self.progress = ProgressMeter( self.progress = ProgressMeter(
_('Extracting Information from Names'), '') _('Extracting Information from Names'), '')
@ -128,6 +177,18 @@ class PatchNames(tool.BatchTool, ManagedWindow.ManagedWindow):
name = person.get_primary_name() name = person.get_primary_name()
first = name.get_first_name() first = name.get_first_name()
sname = name.get_surname() sname = name.get_surname()
old_prefix = []
old_surn = []
old_con = []
old_prim = []
old_orig = []
for surn in name.get_surname_list():
old_prefix.append(surn.get_prefix())
old_surn.append(surn.get_surname())
old_con.append(surn.get_connector())
old_prim.append(surn.get_primary())
old_orig.append(surn.get_origintype())
if name.get_title(): if name.get_title():
old_title = [name.get_title()] old_title = [name.get_title()]
@ -141,46 +202,153 @@ class PatchNames(tool.BatchTool, ManagedWindow.ManagedWindow):
first = groups[1] first = groups[1]
new_title.append(groups[0]) new_title.append(groups[0])
match = _title_re.match(first) match = _title_re.match(first)
matchnick = _nick_re.match(first)
if new_title: if new_title:
self.title_list.append((key, " ".join(old_title+new_title), titleval = (" ".join(old_title+new_title), first)
first)) if key in self.handle_to_action:
continue self.handle_to_action[key][self.titleid] = titleval
match = _nick_re.match(first)
if match:
groups = match.groups()
self.nick_list.append((key, groups[0], groups[1]))
continue
old_prefix = name.get_surname_prefix()
# First try to find the name prefix in the first_name
match = _fn_prefix_re.match(first)
if match:
groups = match.groups()
if old_prefix:
# Put the found prefix before the old prefix
new_prefix = " ".join([groups[1], old_prefix])
else: else:
new_prefix = groups[1] self.handle_to_action[key] = {self.titleid: titleval}
self.prefix1_list.append((key, groups[0], new_prefix)) elif matchnick:
continue # we check for nick, which changes given name like title
groups = matchnick.groups()
# Next, try to find the name prefix in the surname nickval = (groups[0], groups[1])
match = _sn_prefix_re.match(sname) if key in self.handle_to_action:
if match: self.handle_to_action[key][self.nickid] = nickval
groups = match.groups()
if old_prefix:
# Put the found prefix after the old prefix
new_prefix = " ".join([old_prefix, groups[0]])
else: else:
new_prefix = groups[0] self.handle_to_action[key] = {self.nickid: nickval}
self.prefix2_list.append((key, groups[1], new_prefix)) else:
# Try to find the name prefix in the given name, also this
# changes given name
match = self._fn_prefix_re.match(first)
if match:
groups = match.groups()
if old_prefix[0]:
# Put the found prefix before the old prefix
new_prefix = " ".join([groups[1], old_prefix[0]])
else:
new_prefix = groups[1]
pref1val = (groups[0], new_prefix, groups[1])
if key in self.handle_to_action:
self.handle_to_action[key][self.pref1id] = pref1val
else:
self.handle_to_action[key] = {self.pref1id: pref1val}
#check for Gedcom import of compound surnames
if len(old_surn) == 1 and old_con[0] == '':
prefixes = old_prefix[0].split(',')
surnames = old_surn[0].split(',')
if len(prefixes) > 1 and len(prefixes) == len(surnames):
#assume a list of prefix and a list of surnames
prefixes = map(strip, prefixes)
surnames = map(strip, surnames)
primaries = [False] * len(prefixes)
primaries[0] = True
origs = []
for ind in range(len(prefixes)):
origs.append(gen.lib.NameOriginType())
origs[0] = old_orig[0]
compoundval = (surnames, prefixes, ['']*len(prefixes),
primaries, origs)
if key in self.handle_to_action:
self.handle_to_action[key][self.compid] = compoundval
else:
self.handle_to_action[key] = {self.compid: compoundval}
#we cannot check compound surnames, so continue the loop
continue
# Next, try to split surname in compounds: prefix surname connector
found = False
new_prefix_list = []
new_surname_list = []
new_connector_list = []
new_prim_list = []
new_orig_list = []
ind = 0
cont = True
for pref, surn, con, prim, orig in zip(old_prefix, old_surn,
old_con, old_prim, old_orig):
while cont:
new_prefix_list.append(pref)
new_surname_list.append('')
new_connector_list.append(con)
new_prim_list.append(prim)
new_orig_list.append(orig)
surnval = surn.split()
if surnval == []:
continue
val = surnval.pop(0)
while cont and (val.lower() in self.prefix_list):
found = True
if new_prefix_list[-1]:
new_prefix_list[-1] += ' ' + val
else:
new_prefix_list[-1] = val
try:
val = surnval.pop(0)
except IndexError:
val = ''
cont = False
#after prefix we have a surname
if cont:
new_surname_list[-1] = val
try:
val = surnval.pop(0)
except IndexError:
val = ''
cont = False
#if value after surname indicates continue, then continue
if cont and ((val.lower() in self.connector_list_nonsplit) or
(new_surname_list[-1].lower() in self.connector_list_nonsplit)):
#add this val to the current surname
new_surname_list[-1] += ' ' + val
try:
val = surnval.pop(0)
except IndexError:
val = ''
cont = False
# if previous is non-splitting connector, then add new val to
# current surname
if cont and (new_surname_list[-1].lower() in self.connector_list_nonsplit):
new_surname_list[-1] += ' ' + val
try:
val = surnval.pop(0)
except IndexError:
val = ''
cont = False
#if next is a connector, add it to the surname
if cont and val.lower() in self.connector_list:
found = True
if new_connector_list[-1]:
new_connector_list[-1] = ' ' + val
else:
new_prefix_list[-1] = val
try:
val = surnval.pop(0)
except IndexError:
val = ''
cont = False
#initialize for a next surname in case there are still
#val
if cont:
found = True # we split surname
pref=''
con = ''
prim = False
orig = gen.lib.NameOriginType()
ind += 1
if found:
compoundval = (new_surname_list, new_prefix_list,
new_connector_list, new_prim_list, new_orig_list)
if key in self.handle_to_action:
self.handle_to_action[key][self.compid] = compoundval
else:
self.handle_to_action[key] = {self.compid: compoundval}
self.progress.step() self.progress.step()
if self.nick_list or self.title_list or self.prefix1_list or self.prefix2_list: if self.handle_to_action:
self.display() self.display()
else: else:
self.progress.close() self.progress.close()
@ -229,7 +397,7 @@ class PatchNames(tool.BatchTool, ManagedWindow.ManagedWindow):
c = gtk.TreeViewColumn(_('Value'), gtk.CellRendererText(), text=3) c = gtk.TreeViewColumn(_('Value'), gtk.CellRendererText(), text=3)
self.list.append_column(c) self.list.append_column(c)
c = gtk.TreeViewColumn(_('Name'), gtk.CellRendererText(), text=4) c = gtk.TreeViewColumn(_('Current Name'), gtk.CellRendererText(), text=4)
self.list.append_column(c) self.list.append_column(c)
self.list.set_model(self.model) self.list.set_model(self.model)
@ -237,58 +405,65 @@ class PatchNames(tool.BatchTool, ManagedWindow.ManagedWindow):
self.nick_hash = {} self.nick_hash = {}
self.title_hash = {} self.title_hash = {}
self.prefix1_hash = {} self.prefix1_hash = {}
self.prefix2_hash = {} self.compound_hash = {}
self.progress.set_pass(_('Building display'), self.progress.set_pass(_('Building display'),
len(self.nick_list)+len(self.title_list) len(self.handle_to_action.keys()))
+len(self.prefix1_list)+len(self.prefix2_list))
for (pid, name, nick) in self.nick_list: for key, data in self.handle_to_action.items():
p = self.db.get_person_from_handle(pid) p = self.db.get_person_from_handle(key)
gid = p.get_gramps_id() gid = p.get_gramps_id()
handle = self.model.append() if self.nickid in data:
self.model.set_value(handle, 0, 1) given, nick = data[self.nickid]
self.model.set_value(handle, 1, gid) handle = self.model.append()
self.model.set_value(handle, 2, _('Nickname')) self.model.set_value(handle, 0, 1)
self.model.set_value(handle, 3, nick) self.model.set_value(handle, 1, gid)
self.model.set_value(handle, 4, p.get_primary_name().get_name()) self.model.set_value(handle, 2, _('Nickname'))
self.nick_hash[pid] = handle self.model.set_value(handle, 3, nick)
self.progress.step() self.model.set_value(handle, 4, p.get_primary_name().get_name())
self.nick_hash[key] = handle
for (pid, title, name) in self.title_list:
p = self.db.get_person_from_handle(pid) if self.titleid in data:
gid = p.get_gramps_id() title, given = data[self.titleid]
handle = self.model.append() handle = self.model.append()
self.model.set_value(handle, 0, 1) self.model.set_value(handle, 0, 1)
self.model.set_value(handle, 1, gid) self.model.set_value(handle, 1, gid)
self.model.set_value(handle, 2, _('Person|Title')) self.model.set_value(handle, 2, _('Person|Title'))
self.model.set_value(handle, 3, title) self.model.set_value(handle, 3, title)
self.model.set_value(handle, 4, p.get_primary_name().get_name()) self.model.set_value(handle, 4, p.get_primary_name().get_name())
self.title_hash[pid] = handle self.title_hash[key] = handle
self.progress.step()
if self.pref1id in data:
for (pid, fname, prefix) in self.prefix1_list: given, prefixtotal, new_prefix = data[self.pref1id]
p = self.db.get_person_from_handle(pid) handle = self.model.append()
gid = p.get_gramps_id() self.model.set_value(handle, 0, 1)
handle = self.model.append() self.model.set_value(handle, 1, gid)
self.model.set_value(handle, 0, 1) self.model.set_value(handle, 2, _('Prefix in given name'))
self.model.set_value(handle, 1, gid) self.model.set_value(handle, 3, prefixtotal)
self.model.set_value(handle, 2, _('Prefix')) self.model.set_value(handle, 4, p.get_primary_name().get_name())
self.model.set_value(handle, 3, prefix) self.prefix1_hash[key] = handle
self.model.set_value(handle, 4, p.get_primary_name().get_name())
self.prefix1_hash[pid] = handle if self.compid in data:
self.progress.step() surn_list, pref_list, con_list, prims, origs = data[self.compid]
handle = self.model.append()
for (pid, sname, prefix) in self.prefix2_list: self.model.set_value(handle, 0, 1)
p = self.db.get_person_from_handle(pid) self.model.set_value(handle, 1, gid)
gid = p.get_gramps_id() self.model.set_value(handle, 2, _('Compound surname'))
handle = self.model.append() newval = ''
self.model.set_value(handle, 0, 1) for sur, pre, con in zip(surn_list, pref_list, con_list):
self.model.set_value(handle, 1, gid) if newval:
self.model.set_value(handle, 2, _('Prefix')) newval += '-['
self.model.set_value(handle, 3, prefix) else:
self.model.set_value(handle, 4, p.get_primary_name().get_name()) newval = '['
self.prefix2_hash[pid] = handle newval += pre + ',' + sur
if con:
newval += ',' + con + ']'
else:
newval += ']'
self.model.set_value(handle, 3, newval)
self.model.set_value(handle, 4, p.get_primary_name().get_name())
self.compound_hash[key] = handle
self.progress.step() self.progress.step()
self.progress.close() self.progress.close()
@ -301,46 +476,58 @@ class PatchNames(tool.BatchTool, ManagedWindow.ManagedWindow):
def on_ok_clicked(self, obj): def on_ok_clicked(self, obj):
trans = self.db.transaction_begin("", batch=True) trans = self.db.transaction_begin("", batch=True)
self.db.disable_signals() self.db.disable_signals()
for grp in self.nick_list:
handle = self.nick_hash[grp[0]]
val = self.model.get_value(handle, 0)
if val:
p = self.db.get_person_from_handle(grp[0])
name = p.get_primary_name()
name.set_first_name(grp[1].strip())
nick_name = grp[2].strip()
name.set_nick_name(nick_name)
self.db.commit_person(p, trans)
for grp in self.title_list: for key, data in self.handle_to_action.items():
handle = self.title_hash[grp[0]] p = self.db.get_person_from_handle(key)
val = self.model.get_value(handle, 0) if self.nickid in data:
if val: modelhandle = self.nick_hash[key]
p = self.db.get_person_from_handle(grp[0]) val = self.model.get_value(modelhandle, 0)
name = p.get_primary_name() if val:
name.set_first_name(grp[2].strip()) given, nick = data[self.nickid]
name.set_title(grp[1].strip()) name = p.get_primary_name()
self.db.commit_person(p, trans) name.set_first_name(given.strip())
name.set_nick_name(nick.strip())
for grp in self.prefix1_list:
handle = self.prefix1_hash[grp[0]] if self.titleid in data:
val = self.model.get_value(handle, 0) modelhandle = self.title_hash[key]
if val: val = self.model.get_value(modelhandle, 0)
p = self.db.get_person_from_handle(grp[0]) if val:
name = p.get_primary_name() title, given = data[self.titleid]
name.set_first_name(grp[1].strip()) name = p.get_primary_name()
name.set_surname_prefix(grp[2].strip()) name.set_first_name(given.strip())
self.db.commit_person(p, trans) name.set_title(title.strip())
for grp in self.prefix2_list: if self.pref1id in data:
handle = self.prefix2_hash[grp[0]] modelhandle = self.prefix1_hash[key]
val = self.model.get_value(handle, 0) val = self.model.get_value(modelhandle, 0)
if val: if val:
p = self.db.get_person_from_handle(grp[0]) given, prefixtotal, prefix = data[self.pref1id]
name = p.get_primary_name() name = p.get_primary_name()
name.set_surname(grp[1].strip()) name.set_first_name(given.strip())
name.set_surname_prefix(grp[2].strip()) oldpref = name.get_surname_list()[0].get_prefix().strip()
self.db.commit_person(p, trans) if oldpref == '' or oldpref == prefix.strip():
name.get_surname_list()[0].set_prefix(prefix)
else:
name.get_surname_list()[0].set_prefix('%s %s' % (prefix, oldpref))
if self.compid in data:
modelhandle = self.compound_hash[key]
val = self.model.get_value(modelhandle, 0)
if val:
surns, prefs, cons, prims, origs = data[self.compid]
name = p.get_primary_name()
new_surn_list = []
for surn, pref, con, prim, orig in zip(surns, prefs, cons,
prims, origs):
new_surn_list.append(gen.lib.Surname())
new_surn_list[-1].set_surname(surn.strip())
new_surn_list[-1].set_prefix(pref.strip())
new_surn_list[-1].set_connector(con.strip())
new_surn_list[-1].set_primary(prim)
new_surn_list[-1].set_origintype(orig)
name.set_surname_list(new_surn_list)
self.db.commit_person(p, trans)
self.db.transaction_commit(trans, self.db.transaction_commit(trans,
_("Extract information from names")) _("Extract information from names"))
@ -357,3 +544,7 @@ class PatchNamesOptions(tool.ToolOptions):
def __init__(self, name, person_id=None): def __init__(self, name, person_id=None):
tool.ToolOptions.__init__(self, name, person_id) tool.ToolOptions.__init__(self, name, person_id)
def strip(arg):
return arg.strip()