Exportgedcom and importgedcom. 0006382: ADDR tag in GEDCOM export does not contain a full address. On export, all the elements of the structured address are output as ADDR/CONT. On import the structured address takes precedence (as at present), but a warning is only given if an element in the free-form address is missing from the structured address.
svn: r21610
This commit is contained in:
parent
709eb0ebcd
commit
7055827260
@ -356,39 +356,19 @@ class GedcomWriter(UpdateCallback):
|
||||
"""
|
||||
owner = self.dbase.get_researcher()
|
||||
name = owner.get_name()
|
||||
addr = owner.get_address()
|
||||
adr2 = owner.get_locality()
|
||||
city = owner.get_city()
|
||||
state = owner.get_state()
|
||||
ctry = owner.get_country()
|
||||
post = owner.get_postal_code()
|
||||
phon = owner.get_phone()
|
||||
mail = owner.get_email()
|
||||
|
||||
if not name :
|
||||
name = cuni('Not Provided')
|
||||
if not addr :
|
||||
addr = cuni('Not Provided')
|
||||
|
||||
self._writeln(0, "@SUBM@", "SUBM")
|
||||
self._writeln(1, "NAME", name)
|
||||
self._writeln(1, "ADDR", addr)
|
||||
if city and state and post:
|
||||
self._writeln(2, "CONT", "%s, %s %s" % (city, state, post))
|
||||
else:
|
||||
self._writeln(2, "CONT", cuni("Not Provided"))
|
||||
if addr:
|
||||
self._writeln(2, "ADR1", addr)
|
||||
if adr2:
|
||||
self._writeln(2, "ADR2", adr2)
|
||||
if city:
|
||||
self._writeln(2, "CITY", city)
|
||||
if state:
|
||||
self._writeln(2, "STAE", state)
|
||||
if post:
|
||||
self._writeln(2, "POST", post)
|
||||
if ctry:
|
||||
self._writeln(2, "CTRY", ctry)
|
||||
|
||||
# Researcher is a sub-type of LocationBase, so get_city etc. which are
|
||||
# used in __write_addr work fine. However, the database owner street is
|
||||
# stored in address, so we need to temporarily copy it into street so
|
||||
# __write_addr works properly
|
||||
owner.set_street(owner.get_address())
|
||||
self.__write_addr(1, owner)
|
||||
|
||||
if phon:
|
||||
self._writeln(1, "PHON", phon)
|
||||
if mail:
|
||||
@ -683,20 +663,8 @@ class GedcomWriter(UpdateCallback):
|
||||
"""
|
||||
for addr in person.get_address_list():
|
||||
self._writeln(1, 'RESI')
|
||||
self._date(2, addr.get_date_object())
|
||||
self._writeln(2, "ADDR", addr.get_street())
|
||||
if addr.get_street():
|
||||
self._writeln(3, 'ADR1', addr.get_street())
|
||||
if addr.get_locality():
|
||||
self._writeln(3, 'ADR2', addr.get_locality())
|
||||
if addr.get_city():
|
||||
self._writeln(3, 'CITY', addr.get_city())
|
||||
if addr.get_state():
|
||||
self._writeln(3, 'STAE', addr.get_state())
|
||||
if addr.get_postal_code():
|
||||
self._writeln(3, 'POST', addr.get_postal_code())
|
||||
if addr.get_country():
|
||||
self._writeln(3, 'CTRY', addr.get_country())
|
||||
self.__date(2, addr.get_date_object())
|
||||
self._write_addr(2, addr)
|
||||
if addr.get_phone():
|
||||
self._writeln(2, 'PHON', addr.get_phone())
|
||||
|
||||
@ -1019,19 +987,7 @@ class GedcomWriter(UpdateCallback):
|
||||
if repo.get_name():
|
||||
self._writeln(1, 'NAME', repo.get_name())
|
||||
for addr in repo.get_address_list():
|
||||
self._writeln(1, "ADDR", addr.get_street())
|
||||
if addr.get_street():
|
||||
self._writeln(2, 'ADR1', addr.get_street())
|
||||
if addr.get_locality():
|
||||
self._writeln(2, 'ADR2', addr.get_locality())
|
||||
if addr.get_city():
|
||||
self._writeln(2, 'CITY', addr.get_city())
|
||||
if addr.get_state():
|
||||
self._writeln(2, 'STAE', addr.get_state())
|
||||
if addr.get_postal_code():
|
||||
self._writeln(2, 'POST', addr.get_postal_code())
|
||||
if addr.get_country():
|
||||
self._writeln(2, 'CTRY', addr.get_country())
|
||||
self.__write_addr(1, addr)
|
||||
if addr.get_phone():
|
||||
self._writeln(1, 'PHON', addr.get_phone())
|
||||
for url in repo.get_url_list():
|
||||
@ -1432,6 +1388,58 @@ class GedcomWriter(UpdateCallback):
|
||||
|
||||
self._note_references(place.get_note_list(), level+1)
|
||||
|
||||
def __write_addr(self, level, addr):
|
||||
"""
|
||||
n ADDR <ADDRESS_LINE> {0:1}
|
||||
+1 CONT <ADDRESS_LINE> {0:M}
|
||||
+1 ADR1 <ADDRESS_LINE1> {0:1} (Street)
|
||||
+1 ADR2 <ADDRESS_LINE2> {0:1} (Locality)
|
||||
+1 CITY <ADDRESS_CITY> {0:1}
|
||||
+1 STAE <ADDRESS_STATE> {0:1}
|
||||
+1 POST <ADDRESS_POSTAL_CODE> {0:1}
|
||||
+1 CTRY <ADDRESS_COUNTRY> {0:1}
|
||||
|
||||
This is done along the lines suggested by Tamura Jones in
|
||||
http://www.tamurajones.net/GEDCOMADDR.xhtml as a result of bug 6382.
|
||||
"GEDCOM writers should always use the structured address format,
|
||||
and it use it for all addresses, including the submitter address and
|
||||
their own corporate address." "Vendors that want their product to pass
|
||||
even the strictest GEDCOM validation, should include export to the old
|
||||
free-form format..." [This goes on to say the free-form should be an
|
||||
option, but we have not made it an option in Gramps].
|
||||
|
||||
@param level: The level number for the ADDR tag
|
||||
@type level: Integer
|
||||
@param addr: The location or address
|
||||
@type addr: [a super-type of] LocationBase
|
||||
"""
|
||||
if addr.get_street() or addr.get_locality() or addr.get_city() or \
|
||||
addr.get_state() or addr.get_postal_code or addr.get_country():
|
||||
self._writeln(level, 'ADDR', addr.get_street())
|
||||
if addr.get_locality():
|
||||
self._writeln(level + 1, 'CONT', addr.get_locality())
|
||||
if addr.get_city():
|
||||
self._writeln(level + 1, 'CONT', addr.get_city())
|
||||
if addr.get_state():
|
||||
self._writeln(level + 1, 'CONT', addr.get_state())
|
||||
if addr.get_postal_code():
|
||||
self._writeln(level + 1, 'CONT', addr.get_postal_code())
|
||||
if addr.get_country():
|
||||
self._writeln(level + 1, 'CONT', addr.get_country())
|
||||
|
||||
if addr.get_street():
|
||||
self._writeln(level + 1, 'ADR1', addr.get_street())
|
||||
if addr.get_locality():
|
||||
self._writeln(level + 1, 'ADR2', addr.get_locality())
|
||||
if addr.get_city():
|
||||
self._writeln(level + 1, 'CITY', addr.get_city())
|
||||
if addr.get_state():
|
||||
self._writeln(level + 1, 'STAE', addr.get_state())
|
||||
if addr.get_postal_code():
|
||||
self._writeln(level + 1, 'POST', addr.get_postal_code())
|
||||
if addr.get_country():
|
||||
self._writeln(level + 1, 'CTRY', addr.get_country())
|
||||
|
||||
#-------------------------------------------------------------------------
|
||||
#
|
||||
#
|
||||
|
@ -49,7 +49,12 @@ from gramps.gen.errors import DbError, GedcomError
|
||||
from gramps.gui.glade import Glade
|
||||
from gramps.plugins.lib.libmixin import DbMixin
|
||||
from gramps.plugins.lib import libgedcom
|
||||
module = __import__("gramps.plugins.lib.libgedcom") # why o why ??
|
||||
# The following code is necessary to ensure that when Help->Plugin
|
||||
# Manager->Reload is executed, not only is the top-level exportgedcom file
|
||||
# reloaded, but also the dependent libgedcom. This ensures that testing can have
|
||||
# a quick turnround, without having to restart Gramps.
|
||||
module = __import__("gramps.plugins.lib.libgedcom",
|
||||
fromlist=["gramps.plugins.lib"]) # why o why ?? as above!
|
||||
if sys.version_info[0] < 3:
|
||||
reload (module)
|
||||
else:
|
||||
|
@ -98,6 +98,7 @@ import time
|
||||
import codecs
|
||||
from xml.parsers.expat import ParserCreate
|
||||
from collections import defaultdict
|
||||
import string
|
||||
if sys.version_info[0] < 3:
|
||||
from cStringIO import StringIO
|
||||
else:
|
||||
@ -646,9 +647,6 @@ DATE_QUALITY = {
|
||||
# regular expressions
|
||||
#
|
||||
#-------------------------------------------------------------------------
|
||||
ADDR_RE = re.compile('(.+)([\n\r]+)(.+)\s*, (.+)\s+(\d+)\s*(.*)')
|
||||
ADDR2_RE = re.compile('(.+)([\n\r]+)(.+)\s*, (.+)\s+(\d+)')
|
||||
ADDR3_RE = re.compile('(.+)([\n\r]+)(.+)\s*, (.+)')
|
||||
NOTE_RE = re.compile(r"\s*\d+\s+\@(\S+)\@\s+NOTE(.*)$")
|
||||
CONT_RE = re.compile(r"\s*\d+\s+CONT\s?(.*)$")
|
||||
CONC_RE = re.compile(r"\s*\d+\s+CONC\s?(.*)$")
|
||||
@ -2134,6 +2132,7 @@ class GedcomParser(UpdateCallback):
|
||||
TOKEN_SOUR : self.__event_source,
|
||||
TOKEN_PLAC : self.__event_place,
|
||||
TOKEN_ADDR : self.__event_addr,
|
||||
TOKEN_PHON : self.__event_phon,
|
||||
TOKEN_CAUS : self.__event_cause,
|
||||
TOKEN_AGNC : self.__event_agnc,
|
||||
TOKEN_AGE : self.__event_age,
|
||||
@ -2245,14 +2244,15 @@ class GedcomParser(UpdateCallback):
|
||||
self.func_list.append(self.object_parse_tbl)
|
||||
|
||||
self.parse_loc_tbl = {
|
||||
TOKEN_ADDR : self.__location_addr,
|
||||
TOKEN_ADR1 : self.__location_adr1,
|
||||
TOKEN_ADR2 : self.__location_adr2,
|
||||
TOKEN_DATE : self.__location_date,
|
||||
TOKEN_CITY : self.__location_city,
|
||||
TOKEN_STAE : self.__location_stae,
|
||||
TOKEN_POST : self.__location_post,
|
||||
TOKEN_CTRY : self.__location_ctry,
|
||||
# Not legal GEDCOM - not clear why these are included at this level
|
||||
TOKEN_ADDR : self.__ignore,
|
||||
TOKEN_DATE : self.__location_date,
|
||||
TOKEN_NOTE : self.__location_note,
|
||||
TOKEN_RNOTE : self.__location_note,
|
||||
TOKEN__LOC : self.__ignore,
|
||||
@ -2310,8 +2310,7 @@ class GedcomParser(UpdateCallback):
|
||||
# +1 <<CHANGE_DATE>> {0:1}
|
||||
TOKEN_CHAN : self.__family_chan,
|
||||
TOKEN_ENDL : self.__ignore,
|
||||
|
||||
TOKEN_ADDR : self.__family_addr,
|
||||
TOKEN_ADDR : self.__ignore,
|
||||
TOKEN_RIN : self.__family_cust_attr,
|
||||
TOKEN_SUBM : self.__ignore,
|
||||
TOKEN_ATTR : self.__family_attr,
|
||||
@ -3171,6 +3170,69 @@ class GedcomParser(UpdateCallback):
|
||||
self.__add_msg(txt)
|
||||
self.number_of_errors -= 1
|
||||
|
||||
def __merge_address(self, free_form_address, addr, line, state):
|
||||
"""
|
||||
Merge freeform and structured addrssses.
|
||||
n ADDR <ADDRESS_LINE> {0:1}
|
||||
+1 CONT <ADDRESS_LINE> {0:M}
|
||||
+1 ADR1 <ADDRESS_LINE1> {0:1} (Street)
|
||||
+1 ADR2 <ADDRESS_LINE2> {0:1} (Locality)
|
||||
+1 CITY <ADDRESS_CITY> {0:1}
|
||||
+1 STAE <ADDRESS_STATE> {0:1}
|
||||
+1 POST <ADDRESS_POSTAL_CODE> {0:1}
|
||||
+1 CTRY <ADDRESS_COUNTRY> {0:1}
|
||||
|
||||
This is done along the lines suggested by Tamura Jones in
|
||||
http://www.tamurajones.net/GEDCOMADDR.xhtml as a result of bug 6382.
|
||||
"When a GEDCOM reader encounters a double address, it should read the
|
||||
structured address. ... A GEDCOM reader that does verify that the
|
||||
addresses are the same should issue an error if they are not".
|
||||
|
||||
This is called for SUBMitter addresses (__subm_addr), INDIvidual
|
||||
addresses (__person_addr), REPO addresses and HEADer corp address
|
||||
(__repo_address) and EVENt addresses (__event_adr).
|
||||
|
||||
The structured address (if any) will have been accumulated into an
|
||||
object of type LocationBase, which will either be a Location, or an
|
||||
Address object.
|
||||
|
||||
If ADDR is provided, but none of ADR1, ADR2, CITY, STAE, or POST (not
|
||||
CTRY), then Street is set to the freeform address. N.B. this is a change
|
||||
for Repository addresses and HEADer Corp address where previously the
|
||||
free-form address was deconstrucated into different structured
|
||||
components. N.B. PAF provides a free-form address and a country, so this
|
||||
allows for that case.
|
||||
|
||||
If both forms of address are provided, then the structured address is
|
||||
used, and if the ADDR/CONT contains anything not in the structured
|
||||
address, a warning is issued.
|
||||
|
||||
If just ADR1, ADR2, CITY, STAE, POST or CTRY are provided (this is not
|
||||
actually legal GEDCOM symtax, but may be possible by GEDCOM extensions)
|
||||
then just the structrued address is used.
|
||||
"""
|
||||
if not (addr.get_street() or addr.get_locality() or
|
||||
addr.get_city() or addr.get_state() or
|
||||
addr.get_postal_code()):
|
||||
|
||||
addr.set_street(free_form_address)
|
||||
else:
|
||||
# structured address provided
|
||||
addr_list = free_form_address.split("\n")
|
||||
str_list = []
|
||||
for func in (addr.get_street(), addr.get_locality(),
|
||||
addr.get_city(), addr.get_state(),
|
||||
addr.get_postal_code(), addr.get_country()):
|
||||
str_list += [i.strip(',' + string.whitespace) for i in func.split("\n")]
|
||||
for elmn in addr_list:
|
||||
if elmn.strip(',' + string.whitespace) not in str_list:
|
||||
# message means that the element %s was ignored, but
|
||||
# expressed the wrong way round because the message is
|
||||
# truncated for output
|
||||
self.__add_msg(_("ADDR element ignored '%s'"
|
||||
% elmn), line, state)
|
||||
# The free-form address ADDR is discarded
|
||||
|
||||
def __parse_trailer(self):
|
||||
"""
|
||||
Looks for the expected TRLR token
|
||||
@ -3810,7 +3872,7 @@ class GedcomParser(UpdateCallback):
|
||||
|
||||
def __person_addr(self, line, state):
|
||||
"""
|
||||
Parses the Address structure
|
||||
Parses the INDIvidual <ADDRESS_STRUCTURE>
|
||||
|
||||
n ADDR <ADDRESS_LINE> {0:1}
|
||||
+1 CONT <ADDRESS_LINE> {0:M}
|
||||
@ -3827,13 +3889,16 @@ class GedcomParser(UpdateCallback):
|
||||
@param state: The current state
|
||||
@type state: CurrentState
|
||||
"""
|
||||
sub_state = CurrentState()
|
||||
sub_state.level = state.level+1
|
||||
free_form = line.data
|
||||
|
||||
sub_state = CurrentState(level=state.level + 1)
|
||||
sub_state.addr = Address()
|
||||
sub_state.addr.set_street(line.data)
|
||||
state.person.add_address(sub_state.addr)
|
||||
|
||||
self.__parse_level(sub_state, self.parse_addr_tbl, self.__ignore)
|
||||
state.msg += sub_state.msg
|
||||
|
||||
self.__merge_address(free_form, sub_state.addr, line, state)
|
||||
state.person.add_address(sub_state.addr)
|
||||
|
||||
def __person_phon(self, line, state):
|
||||
"""
|
||||
@ -4928,17 +4993,6 @@ class GedcomParser(UpdateCallback):
|
||||
"""
|
||||
self.__parse_change(line, state.family, state.level+1, state)
|
||||
|
||||
def __family_addr(self, line, state):
|
||||
"""
|
||||
@param line: The current line in GedLine format
|
||||
@type line: GedLine
|
||||
@param state: The current state
|
||||
@type state: CurrentState
|
||||
"""
|
||||
state.addr = Address()
|
||||
state.addr.set_street(line.data)
|
||||
self.__parse_level(state, self.parse_addr_tbl, self.__ignore)
|
||||
|
||||
def __family_attr(self, line, state):
|
||||
"""
|
||||
@param line: The current line in GedLine format
|
||||
@ -5294,15 +5348,18 @@ class GedcomParser(UpdateCallback):
|
||||
@param state: The current state
|
||||
@type state: CurrentState
|
||||
"""
|
||||
free_form = line.data
|
||||
|
||||
sub_state = CurrentState(level=state.level+1)
|
||||
sub_state.location = Location()
|
||||
sub_state.location.set_street(line.data)
|
||||
sub_state.note = []
|
||||
sub_state.event = state.event
|
||||
|
||||
self.__parse_level(sub_state, self.parse_loc_tbl, self.__undefined)
|
||||
state.msg += sub_state.msg
|
||||
|
||||
self.__merge_address(free_form, sub_state.location, line, state)
|
||||
|
||||
location = sub_state.location
|
||||
note_list = sub_state.note
|
||||
|
||||
@ -5703,6 +5760,7 @@ class GedcomParser(UpdateCallback):
|
||||
@type state: CurrentState
|
||||
"""
|
||||
# The ADDR may already have been parsed by the level above
|
||||
assert state.addr.get_street() == ""
|
||||
if state.addr.get_street() != "":
|
||||
self.__add_msg(_("Warn: ADDR overwritten"), line, state)
|
||||
state.addr.set_street(line.data)
|
||||
@ -5973,6 +6031,7 @@ class GedcomParser(UpdateCallback):
|
||||
|
||||
state = CurrentState()
|
||||
state.source = self.__find_or_create_source(self.sid_map[name])
|
||||
# SOURce with the given gramps_id had no title
|
||||
state.source.set_title(_("No title - ID %s") %
|
||||
state.source.get_gramps_id())
|
||||
state.level = level
|
||||
@ -6436,6 +6495,8 @@ class GedcomParser(UpdateCallback):
|
||||
|
||||
def __repo_addr(self, line, state):
|
||||
"""
|
||||
Parses the REPOsitory and HEADer COPR <ADDRESS_STRUCTURE>
|
||||
|
||||
n ADDR <ADDRESS_LINE> {0:1}
|
||||
+1 CONT <ADDRESS_LINE> {0:M}
|
||||
+1 ADR1 <ADDRESS_LINE1> {0:1} (Street)
|
||||
@ -6450,46 +6511,16 @@ class GedcomParser(UpdateCallback):
|
||||
instead they put everything on a single line. Try to determine
|
||||
if this happened, and try to fix it.
|
||||
"""
|
||||
free_form = line.data
|
||||
|
||||
addr = Address()
|
||||
addr.set_street(line.data)
|
||||
|
||||
sub_state = CurrentState()
|
||||
sub_state.level = state.level+1
|
||||
sub_state.addr = addr
|
||||
sub_state = CurrentState(level=state.level + 1)
|
||||
sub_state.addr = Address()
|
||||
|
||||
self.__parse_level(sub_state, self.parse_addr_tbl, self.__ignore)
|
||||
state.msg += sub_state.msg
|
||||
|
||||
text = addr.get_street()
|
||||
if not (addr.get_city() or addr.get_state() or
|
||||
addr.get_postal_code() or addr.get_country()):
|
||||
|
||||
match = ADDR_RE.match(text)
|
||||
if match:
|
||||
groups = match.groups()
|
||||
addr.set_street(groups[0].strip())
|
||||
addr.set_city(groups[2].strip())
|
||||
addr.set_state(groups[3].strip())
|
||||
addr.set_postal_code(groups[4].strip())
|
||||
addr.set_country(groups[5].strip())
|
||||
|
||||
match = ADDR2_RE.match(text)
|
||||
if match:
|
||||
groups = match.groups()
|
||||
addr.set_street(groups[0].strip())
|
||||
addr.set_city(groups[2].strip())
|
||||
addr.set_state(groups[3].strip())
|
||||
addr.set_postal_code(groups[4].strip())
|
||||
|
||||
match = ADDR3_RE.match(text)
|
||||
if match:
|
||||
groups = match.groups()
|
||||
addr.set_street(groups[0].strip())
|
||||
addr.set_city(groups[2].strip())
|
||||
addr.set_state(groups[3].strip())
|
||||
|
||||
state.repo.add_address(addr)
|
||||
self.__merge_address(free_form, sub_state.addr, line, state)
|
||||
state.repo.add_address(sub_state.addr)
|
||||
|
||||
def __repo_phon(self, line, state):
|
||||
"""
|
||||
@ -6526,22 +6557,6 @@ class GedcomParser(UpdateCallback):
|
||||
url.set_type(UrlType(UrlType.EMAIL))
|
||||
state.repo.add_url(url)
|
||||
|
||||
def __location_addr(self, line, state):
|
||||
"""
|
||||
@param line: The current line in GedLine format
|
||||
@type line: GedLine
|
||||
@param state: The current state
|
||||
@type state: CurrentState
|
||||
"""
|
||||
if not state.location:
|
||||
state.location = Location()
|
||||
val = state.location.get_street()
|
||||
if val:
|
||||
val = "%s, %s" % (val, line.data.strip())
|
||||
else:
|
||||
val = line.data.strip()
|
||||
state.location.set_street(val.replace('\n', ' '))
|
||||
|
||||
def __location_date(self, line, state):
|
||||
"""
|
||||
@param line: The current line in GedLine format
|
||||
@ -7394,20 +7409,20 @@ class GedcomParser(UpdateCallback):
|
||||
@param state: The current state
|
||||
@type state: CurrentState
|
||||
"""
|
||||
free_form = line.data
|
||||
|
||||
sub_state = CurrentState(level=state.level + 1)
|
||||
sub_state.location = Location()
|
||||
sub_state.location.set_street(line.data)
|
||||
sub_state.location = state.res
|
||||
|
||||
self.__parse_level(sub_state, self.parse_loc_tbl, self.__undefined)
|
||||
state.msg += sub_state.msg
|
||||
|
||||
location = sub_state.location
|
||||
state.res.set_address(location.get_street())
|
||||
state.res.set_locality(location.get_locality())
|
||||
state.res.set_city(location.get_city())
|
||||
state.res.set_state(location.get_state())
|
||||
state.res.set_country(location.get_country())
|
||||
state.res.set_postal_code(location.get_postal_code())
|
||||
self.__merge_address(free_form, state.res, line, state)
|
||||
# Researcher is a sub-type of LocationBase, so get_street and set_street
|
||||
# which are used in routines called from self.parse_loc_tbl work fine.
|
||||
# Unfortunately, Researcher also has get_address and set_address, so we
|
||||
# need to copy the street into that.
|
||||
state.res.set_address(state.res.get_street())
|
||||
|
||||
def __subm_phon(self, line, state):
|
||||
"""
|
||||
|
Loading…
x
Reference in New Issue
Block a user