From fe13b9775b1400cb81e3f216ae6d80cfb79022ae Mon Sep 17 00:00:00 2001 From: Don Allingham Date: Wed, 25 Jul 2007 03:54:31 +0000 Subject: [PATCH] 2007-07-24 Don Allingham * src/plugins/ExtractCity.py: fix regular expression matching svn: r8772 --- ChangeLog | 3 +++ src/plugins/ExtractCity.py | 47 ++++++++++++++++++++++++++++++-------- 2 files changed, 41 insertions(+), 9 deletions(-) diff --git a/ChangeLog b/ChangeLog index a10a984a5..3947eb909 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,6 @@ +2007-07-24 Don Allingham + * src/plugins/ExtractCity.py: fix regular expression matching + 2007-07-24 Doug Blank * src/Mime/_PythonMime.py: Add CSV mime type * src/plugins/ImportCSV.py: Added diff --git a/src/plugins/ExtractCity.py b/src/plugins/ExtractCity.py index 7abf61f20..496dabdc0 100644 --- a/src/plugins/ExtractCity.py +++ b/src/plugins/ExtractCity.py @@ -51,7 +51,9 @@ import GrampsDisplay from PluginUtils import Tool, register_tool import Utils -CITY_STATE = re.compile("^(.+),\s*([\w\s\.]+),?\s*([\d-])?") +CITY_STATE_ZIP = re.compile("((\w|\s)+)\s*,\s*((\w|\s)+)\s*(,\s*((\d|-)+))") +CITY_STATE = re.compile("((\w|\s)+)\s*,\s*((\w|\s)+)") +STATE_ZIP = re.compile("(.+)\s+([\d-]+)") COUNTRY = ( _(u"United States of America"), _(u"Canada"), _(u"France")) @@ -333,7 +335,7 @@ STATE_MAP = { u"RHONE-ALPES" : (u"Rhône-Alpes", 2), u"RAL" : (u"RAL-Rhône-Alpes", 2), u"AOM" : (u"AOM-Autres Territoires d'Outre-Mer", 2), - u"COM" : (u"DOM-Collectivité Territoriale d'Outre-Mer", 2), + u"COM" : (u"COM-Collectivité Territoriale d'Outre-Mer", 2), u"DOM" : (u"DOM-Départements d'Outre-Mer", 2), u"TOM" : (u"TOM-Territoires d'Outre-Mer", 2), } @@ -396,9 +398,14 @@ class ExtractCity(Tool.BatchTool, ManagedWindow.ManagedWindow): if loc.get_street() == "" and loc.get_city() == "" \ and loc.get_state() == "" and \ loc.get_postal_code() == "": - match = CITY_STATE.match(descr.strip()) + + match = CITY_STATE_ZIP.match(descr.strip()) if match: - (city, state, postal) = match.groups() + data = match.groups() + city = data[0] + state = data[2] + postal = data[5] + val = " ".join(state.strip().split()).upper() if state: new_state = STATE_MAP.get(val.upper()) @@ -406,13 +413,35 @@ class ExtractCity(Tool.BatchTool, ManagedWindow.ManagedWindow): self.name_list.append( (handle, (city, new_state[0], postal, COUNTRY[new_state[1]]))) - else: - val = " ".join(descr.strip().split()).upper() - new_state = STATE_MAP.get(val) + continue + + match = CITY_STATE.match(descr.strip()) + if match: + data = match.groups() + city = data[0] + state = data[2] + postal = None + + if state: + m0 = STATE_ZIP.match(state) + if m0: + (state, postal) = m0.groups() + + val = " ".join(state.strip().split()).upper() + if state: + new_state = STATE_MAP.get(val.upper()) if new_state: self.name_list.append( - (handle, (None, new_state[0], None, + (handle, (city, new_state[0], postal, COUNTRY[new_state[1]]))) + continue + + val = " ".join(descr.strip().split()).upper() + new_state = STATE_MAP.get(val) + if new_state: + self.name_list.append( + (handle, (None, new_state[0], None, + COUNTRY[new_state[1]]))) self.progress.close() if self.name_list: @@ -519,7 +548,7 @@ class ExtractCity(Tool.BatchTool, ManagedWindow.ManagedWindow): if state: place.get_main_location().set_state(state) if postal: - place.get_main_location().set_postal(postal) + place.get_main_location().set_postal_code(postal) if country: place.get_main_location().set_country(country) self.db.commit_place(place, self.trans)