Merge pull request #261 from prculley/csv_42

Fix bug 9733, CSV import Fails, 9676, failure for certain types of cross references etc.
This commit is contained in:
Sam Manzi 2016-10-29 14:20:38 +11:00 committed by GitHub
commit b0fd9bcceb

View File

@ -44,7 +44,7 @@ LOG = logging.getLogger(".ImportCSV")
#------------------------------------------------------------------------- #-------------------------------------------------------------------------
# #
# GRAMPS modules # Gramps modules
# #
#------------------------------------------------------------------------- #-------------------------------------------------------------------------
from gramps.gen.const import GRAMPS_LOCALE as glocale from gramps.gen.const import GRAMPS_LOCALE as glocale
@ -89,7 +89,7 @@ def rd(line_number, row, col, key, default = None):
""" Return Row data by column name """ """ Return Row data by column name """
if key in col: if key in col:
if col[key] >= len(row): if col[key] >= len(row):
LOG.warn("missing '%s, on line %d" % (key, line_number)) LOG.warning("missing '%s, on line %d" % (key, line_number))
return default return default
retval = row[col[key]].strip() retval = row[col[key]].strip()
if retval == "": if retval == "":
@ -107,7 +107,7 @@ def importData(dbase, filename, user):
parser = CSVParser(dbase, user, (config.get('preferences.tag-on-import-format') if parser = CSVParser(dbase, user, (config.get('preferences.tag-on-import-format') if
config.get('preferences.tag-on-import') else None)) config.get('preferences.tag-on-import') else None))
try: try:
with open(filename, 'r') as filehandle: with open(filename, 'rb') as filehandle:
line = filehandle.read(3) line = filehandle.read(3)
if line == codecs.BOM_UTF8: if line == codecs.BOM_UTF8:
filehandle.seek(0) filehandle.seek(0)
@ -128,7 +128,7 @@ def importData(dbase, filename, user):
# CSV Parser # CSV Parser
# #
#------------------------------------------------------------------------- #-------------------------------------------------------------------------
class CSVParser(object): class CSVParser:
"""Class to read data in CSV format from a file object.""" """Class to read data in CSV format from a file object."""
def __init__(self, dbase, user, default_tag_format=None): def __init__(self, dbase, user, default_tag_format=None):
self.db = dbase self.db = dbase
@ -154,94 +154,83 @@ class CSVParser(object):
self.place_types[custom_type] = 0 self.place_types[custom_type] = 0
self.place_types[custom_type.lower()] = 0 self.place_types[custom_type.lower()] = 0
column2label = { column2label = {
"surname": ("Lastname", "Surname", _("Surname"), "lastname", "surname": ("lastname", "last_name", "surname", _("surname"),
"last_name", "surname", _("surname")), _("Surname")),
"firstname": ("Firstname", "Given name", _("Given name"), "Given", "firstname": ("firstname", "first_name", "given_name", "given",
_("Given"), "firstname", "first_name", "given_name", "given name", _("given name"), _("given"),
"given name", _("given name"), "given", _("given")), _("Given"), _("Given name")),
"callname": ("Callname", "Call name", _("Call name"), "Call", "callname": ("call name", _("Call name"), "callname", "call_name",
_("Call"), "callname", "call_name", "call name", "call", "call", _("Call"), _("call")),
_("call")), "title": ("title", _("title"), _("Person or Place|title")),
"title": ("Title", _("Person or Place|Title"), "title", _("Person or Place|title")), "prefix": ("prefix", _("prefix"), _("Prefix")),
"prefix": ("Prefix", _("Prefix"), "prefix", _("prefix")), "suffix": ("suffix", _("suffix"), _("Suffix")),
"suffix": ("Suffix", _("Suffix"), "suffix", _("suffix")), "gender": ("gender", _("gender"), _("Gender")),
"gender": ("Gender", _("Gender"), "gender", _("gender")), "source": ("source", _("source"), _("Source")),
"source": ("Source", _("Source"), "source", _("source")), "note": ("note", _("note"), _("Note")),
"note": ("Note", _("Note"), "note", _("note")), "birthplace": ("birthplace", "birth_place", "birth place",
"birthplace": ("Birthplace", "Birth place", _("Birth place"), _("birth place"), _("Birth place")),
"birthplace", "birth_place", "birth place", _("birth place")), "birthplace_id": ("birthplaceid", "birth_place_id",
"birthplace_id": ("Birthplaceid", "Birth place id", _("Birth place id"), "birth place id", _("birth place id"),
"birthplaceid", "birth_place_id", "birth place id", _("birth place id"),
"birthplace_id"), "birthplace_id"),
"birthdate": ("Birthdate", "Birth date", _("Birth date"), "birthdate": ("birthdate", "birth_date", "birth date",
"birthdate", "birth_date", "birth date", _("birth date")), _("birth date")),
"birthsource": ("Birthsource", "Birth source", _("Birth source"), "birthsource": ("birthsource", "birth_source", "birth source",
"birthsource", "birth_source", "birth source",
_("birth source")), _("birth source")),
"baptismplace": ("Baptismplace", "Baptism place", "baptismplace": ("baptismplace", "baptism place",
_("Baptism place"), "baptismplace", "baptism place",
_("baptism place")), _("baptism place")),
"baptismplace_id": ("Baptismplaceid", "Baptism place id", "baptismplace_id": ("baptismplaceid", "baptism place id",
_("Baptism place id"), "baptismplaceid", "baptism place id",
_("baptism place id"), "baptism_place_id", _("baptism place id"), "baptism_place_id",
"baptismplace_id"), "baptismplace_id"),
"baptismdate": ("Baptismdate", "Baptism date", _("Baptism date"), "baptismdate": ("baptismdate", "baptism date", _("baptism date")),
"baptismdate", "baptism date", _("baptism date")), "baptismsource": ("baptismsource", "baptism source",
"baptismsource": ("Baptismsource", "Baptism source",
_("Baptism source"), "baptismsource", "baptism source",
_("baptism source")), _("baptism source")),
"burialplace": ("Burialplace", "Burial place", _("Burial place"), "burialplace": ("burialplace", "burial place", _("burial place")),
"burialplace", "burial place", _("burial place")), "burialplace_id": ("burialplaceid", "burial place id",
"burialplace_id": ("Burialplaceid", "Burial place id", _("Burial place id"), _("burial place id"), "burial_place_id",
"burialplaceid", "burial place id", _("burial place id"), "burialplace_id"),
"burial_place_id", "burialplace_id"), "burialdate": ("burialdate", "burial date", _("burial date")),
"burialdate": ("Burialdate", "Burial date", _("Burial date"), "burialsource": ("burialsource", "burial source",
"burialdate", "burial date", _("burial date")),
"burialsource": ("Burialsource", "Burial source",
_("Burial source"), "burialsource", "burial source",
_("burial source")), _("burial source")),
"deathplace": ("Deathplace", "Death place", _("Death place"), "deathplace": ("deathplace", "death_place", "death place",
"deathplace", "death_place", "death place", _("death place")), _("death place")),
"deathplace_id": ("Deathplaceid", "Death place id", _("Death place id"), "deathplace_id": ("deathplaceid", "death place id",
"deathplaceid", "death_place_id", "death place id", _("death place id"), _("death place id"), "death_place_id",
"death_place_id", "deathplace_id"), "deathplace_id"),
"deathdate": ("Deathdate", "Death date", _("Death date"), "deathdate": ("deathdate", "death_date", "death date",
"deathdate", "death_date", "death date", _("death date")), _("death date")),
"deathsource": ("Deathsource", "Death source", _("Death source"), "deathsource": ("deathsource", "death_source", "death source",
"deathsource", "death_source", "death source",
_("death source")), _("death source")),
"deathcause": ("Deathcause", "Death cause", _("Death cause"), "deathcause": ("deathcause", "death_cause", "death cause",
"deathcause", "death_cause", "death cause", _("death cause")), _("death cause")),
"grampsid": ("Grampsid", "ID", "Gramps id", _("Gramps ID"), "grampsid": (_("Gramps ID"), "grampsid", "id", "gramps_id",
"grampsid", "id", "gramps_id", "gramps id", _("Gramps id")), "gramps id"),
"person": ("Person", _("Person"), "person", _("person")), "person": ("person", _("person"), _("Person")),
# ---------------------------------- # ----------------------------------
"child": ("Child", _("Child"), "child", _("child")), "child": ("child", _("child"), _("Child")),
"family": ("Family", _("Family"), "family", _("family")), "family": ("family", _("family"), _("Family")),
# ---------------------------------- # ----------------------------------
"wife": ("Mother", _("Mother"), "Wife", _("Wife"), "Parent2", "wife": ("mother", _("mother"), _("Mother"),
_("Parent2"), "mother", _("mother"), "wife", _("wife"), "wife", _("wife"), _("Wife"),
"parent2", _("parent2")), "parent2", _("parent2")),
"husband": ("Father", _("Father"), "Husband", _("Husband"), "husband": ("father", _("father"), _("Father"),
"Parent1", _("Parent1"), "father", _("father"), "husband", "husband", _("husband"), _("Husband"),
_("husband"), "parent1", _("parent1")), "parent1", _("parent1")),
"marriage": ("Marriage", _("Marriage"), "marriage", _("marriage")), "marriage": ("marriage", _("marriage"), _("Marriage")),
"date": ("Date", _("Date"), "date", _("date")), "date": ("date", _("date"), _("Date")),
"place": ("Place", _("Place"), "place", _("place")), "place": ("place", _("place"), _("Place")),
"place_id": ("Placeid", "place id", "Place id", "place_id", "placeid"), "place_id": ("place id", "place_id", "placeid", _("place id")),
"name": ("Name", _("Name"), "name", _("name")), "name": ("name", _("name"), _("Name")),
"type": ("Type", _("Type"), "type", _("type")), "type": ("type", _("type"), _("Type")),
"latitude": ("Latitude", _("latitude"), "latitude", _("latitude")), "latitude": ("latitude", _("latitude")),
"longitude": ("Longitude", _("Longitude"), "longitude", _("longitude")), "longitude": ("longitude", _("longitude")),
"code": ("Code", _("Code"), "code", _("code")), "code": ("code", _("code"), _("Code")),
"enclosed_by": ("Enclosed by", _("Enclosed by"), "enclosed by", _("enclosed by"), "enclosed_by": ("enclosed by", _("enclosed by"),
"enclosed_by", _("enclosed_by"), "Enclosed_by", _("Enclosed_by"), "enclosed_by", _("enclosed_by"), "enclosedby")
"enclosedby")
} }
lab2col_dict = [] lab2col_dict = []
for key in list(column2label.keys()): for key in list(column2label.keys()):
for val in column2label[key]: for val in column2label[key]:
lab2col_dict.append((val, key)) lab2col_dict.append((val.lower(), key))
self.label2column = dict(lab2col_dict) self.label2column = dict(lab2col_dict)
if default_tag_format: if default_tag_format:
name = time.strftime(default_tag_format) name = time.strftime(default_tag_format)
@ -283,7 +272,9 @@ class CSVParser(object):
return self.lookup(type_, id_) return self.lookup(type_, id_)
else: else:
return db_lookup return db_lookup
elif id_.lower() in self.fref: else:
id_ = self.db.fid2user_format(id_)
if id_.lower() in self.fref:
return self.fref[id_.lower()] return self.fref[id_.lower()]
else: else:
return None return None
@ -295,24 +286,28 @@ class CSVParser(object):
return self.lookup(type_, id_) return self.lookup(type_, id_)
else: else:
return db_lookup return db_lookup
elif id_.lower() in self.pref: else:
id_ = self.db.id2user_format(id_)
if id_.lower() in self.pref:
return self.pref[id_.lower()] return self.pref[id_.lower()]
else: else:
return None return None
elif type_ == "place": elif type_ == "place":
if id_.startswith("[") and id_.endswith("]"): if id_.startswith("[") and id_.endswith("]"):
id_ = self.db.id2user_format(id_[1:-1]) id_ = self.db.pid2user_format(id_[1:-1])
db_lookup = self.db.get_place_from_gramps_id(id_) db_lookup = self.db.get_place_from_gramps_id(id_)
if db_lookup is None: if db_lookup is None:
return self.lookup(type_, id_) return self.lookup(type_, id_)
else: else:
return db_lookup return db_lookup
elif id_.lower() in self.placeref: else:
id_ = self.db.pid2user_format(id_)
if id_.lower() in self.placeref:
return self.placeref[id_.lower()] return self.placeref[id_.lower()]
else: else:
return None return None
else: else:
LOG.warn("invalid lookup type in CSV import: '%s'" % type_) LOG.warning("invalid lookup type in CSV import: '%s'" % type_)
return None return None
def storeup(self, type_, id_, object_): def storeup(self, type_, id_, object_):
@ -330,7 +325,7 @@ class CSVParser(object):
id_ = self.db.pid2user_format(id_) id_ = self.db.pid2user_format(id_)
self.placeref[id_.lower()] = object_ self.placeref[id_.lower()] = object_
else: else:
LOG.warn("invalid storeup type in CSV import: '%s'" % type_) LOG.warning("invalid storeup type in CSV import: '%s'" % type_)
def parse(self, filehandle): def parse(self, filehandle):
""" """
@ -382,7 +377,7 @@ class CSVParser(object):
continue continue
###################################### ######################################
if header is None: if header is None:
header = [self.cleanup_column_name(r) for r in row] header = [self.cleanup_column_name(r.lower()) for r in row]
col = {} col = {}
count = 0 count = 0
for key in header: for key in header:
@ -401,7 +396,7 @@ class CSVParser(object):
elif "place" in header: elif "place" in header:
self._parse_place(line_number, row, col) self._parse_place(line_number, row, col)
else: else:
LOG.warn("ignoring line %d" % line_number) LOG.warning("ignoring line %d" % line_number)
return None return None
def _parse_marriage(self, line_number, row, col): def _parse_marriage(self, line_number, row, col):
@ -418,7 +413,7 @@ class CSVParser(object):
husband = self.lookup("person", husband) husband = self.lookup("person", husband)
if husband is None and wife is None: if husband is None and wife is None:
# might have children, so go ahead and add # might have children, so go ahead and add
LOG.warn("no parents on line %d; adding family anyway" % LOG.warning("no parents on line %d; adding family anyway" %
line_number) line_number)
family = self.get_or_create_family(marriage_ref, husband, wife) family = self.get_or_create_family(marriage_ref, husband, wife)
# adjust gender, if not already provided # adjust gender, if not already provided
@ -433,7 +428,7 @@ class CSVParser(object):
wife.set_gender(Person.FEMALE) wife.set_gender(Person.FEMALE)
self.db.commit_person(wife, self.trans) self.db.commit_person(wife, self.trans)
if marriage_ref: if marriage_ref:
self.storeup("family", marriage_ref.lower(), family) self.storeup("family", marriage_ref, family)
if marriagesource: if marriagesource:
# add, if new # add, if new
new, marriagesource = self.get_or_create_source(marriagesource) new, marriagesource = self.get_or_create_source(marriagesource)
@ -489,7 +484,7 @@ class CSVParser(object):
"Parse the content of a family line" "Parse the content of a family line"
family_ref = rd(line_number, row, col, "family") family_ref = rd(line_number, row, col, "family")
if family_ref is None: if family_ref is None:
LOG.warn("no family reference found for family on line %d" % LOG.warning("no family reference found for family on line %d" %
line_number) line_number)
return # required return # required
child = rd(line_number, row, col, "child") child = rd(line_number, row, col, "child")
@ -499,11 +494,11 @@ class CSVParser(object):
child = self.lookup("person", child) child = self.lookup("person", child)
family = self.lookup("family", family_ref) family = self.lookup("family", family_ref)
if family is None: if family is None:
LOG.warn("no matching family reference found for family " LOG.warning("no matching family reference found for family "
"on line %d" % line_number) "on line %d" % line_number)
return return
if child is None: if child is None:
LOG.warn("no matching child reference found for family " LOG.warning("no matching child reference found for family "
"on line %d" % line_number) "on line %d" % line_number)
return return
# is this child already in this family? If so, don't add # is this child already in this family? If so, don't add
@ -596,7 +591,7 @@ class CSVParser(object):
person = self.lookup("person", person_ref) person = self.lookup("person", person_ref)
if person is None: if person is None:
if surname is None: if surname is None:
LOG.warn("empty surname for new person on line %d" % LOG.warning("empty surname for new person on line %d" %
line_number) line_number)
surname = "" surname = ""
# new person # new person
@ -786,8 +781,8 @@ class CSVParser(object):
place = self.create_place() place = self.create_place()
if place_id is not None: if place_id is not None:
if place_id.startswith("[") and place_id.endswith("]"): if place_id.startswith("[") and place_id.endswith("]"):
place.gramps_id = self.db.id2user_format(place_id[1:-1]) place.gramps_id = self.db.pid2user_format(place_id[1:-1])
self.storeup("place", place_id.lower(), place) self.storeup("place", place_id, place)
if place_title is not None: if place_title is not None:
place.title = place_title place.title = place_title
if place_name is not None: if place_name is not None: