Merge pull request #261 from prculley/csv_42

Fix bug 9733, CSV import Fails, 9676, failure for certain types of cross references etc.
This commit is contained in:
Sam Manzi 2016-10-29 14:20:38 +11:00 committed by GitHub
commit b0fd9bcceb

View File

@ -44,15 +44,15 @@ LOG = logging.getLogger(".ImportCSV")
#-------------------------------------------------------------------------
#
# GRAMPS modules
# Gramps modules
#
#-------------------------------------------------------------------------
from gramps.gen.const import GRAMPS_LOCALE as glocale
_ = glocale.translation.sgettext
ngettext = glocale.translation.ngettext # else "nearby" comments are ignored
from gramps.gen.lib import (ChildRef, Citation, Event, EventRef, EventType,
Family, FamilyRelType, Name, NameType, Note,
NoteType, Person, Place, Source, Surname, Tag,
from gramps.gen.lib import (ChildRef, Citation, Event, EventRef, EventType,
Family, FamilyRelType, Name, NameType, Note,
NoteType, Person, Place, Source, Surname, Tag,
PlaceName, PlaceType, PlaceRef)
from gramps.gen.db import DbTxn
from gramps.gen.datehandler import parser as _dp
@ -89,7 +89,7 @@ def rd(line_number, row, col, key, default = None):
""" Return Row data by column name """
if key in col:
if col[key] >= len(row):
LOG.warn("missing '%s, on line %d" % (key, line_number))
LOG.warning("missing '%s, on line %d" % (key, line_number))
return default
retval = row[col[key]].strip()
if retval == "":
@ -104,10 +104,10 @@ def importData(dbase, filename, user):
if dbase.get_feature("skip-import-additions"): # don't add source or tags
parser = CSVParser(dbase, user, None)
else:
parser = CSVParser(dbase, user, (config.get('preferences.tag-on-import-format') if
parser = CSVParser(dbase, user, (config.get('preferences.tag-on-import-format') if
config.get('preferences.tag-on-import') else None))
try:
with open(filename, 'r') as filehandle:
with open(filename, 'rb') as filehandle:
line = filehandle.read(3)
if line == codecs.BOM_UTF8:
filehandle.seek(0)
@ -125,10 +125,10 @@ def importData(dbase, filename, user):
#-------------------------------------------------------------------------
#
# CSV Parser
# CSV Parser
#
#-------------------------------------------------------------------------
class CSVParser(object):
class CSVParser:
"""Class to read data in CSV format from a file object."""
def __init__(self, dbase, user, default_tag_format=None):
self.db = dbase
@ -140,7 +140,7 @@ class CSVParser(object):
self.indi_count = 0
self.place_count = 0
self.pref = {} # person ref, internal to this sheet
self.fref = {} # family ref, internal to this sheet
self.fref = {} # family ref, internal to this sheet
self.placeref = {}
self.place_types = {}
# Build reverse dictionary, name to type number
@ -154,94 +154,83 @@ class CSVParser(object):
self.place_types[custom_type] = 0
self.place_types[custom_type.lower()] = 0
column2label = {
"surname": ("Lastname", "Surname", _("Surname"), "lastname",
"last_name", "surname", _("surname")),
"firstname": ("Firstname", "Given name", _("Given name"), "Given",
_("Given"), "firstname", "first_name", "given_name",
"given name", _("given name"), "given", _("given")),
"callname": ("Callname", "Call name", _("Call name"), "Call",
_("Call"), "callname", "call_name", "call name", "call",
_("call")),
"title": ("Title", _("Person or Place|Title"), "title", _("Person or Place|title")),
"prefix": ("Prefix", _("Prefix"), "prefix", _("prefix")),
"suffix": ("Suffix", _("Suffix"), "suffix", _("suffix")),
"gender": ("Gender", _("Gender"), "gender", _("gender")),
"source": ("Source", _("Source"), "source", _("source")),
"note": ("Note", _("Note"), "note", _("note")),
"birthplace": ("Birthplace", "Birth place", _("Birth place"),
"birthplace", "birth_place", "birth place", _("birth place")),
"birthplace_id": ("Birthplaceid", "Birth place id", _("Birth place id"),
"birthplaceid", "birth_place_id", "birth place id", _("birth place id"),
"surname": ("lastname", "last_name", "surname", _("surname"),
_("Surname")),
"firstname": ("firstname", "first_name", "given_name", "given",
"given name", _("given name"), _("given"),
_("Given"), _("Given name")),
"callname": ("call name", _("Call name"), "callname", "call_name",
"call", _("Call"), _("call")),
"title": ("title", _("title"), _("Person or Place|title")),
"prefix": ("prefix", _("prefix"), _("Prefix")),
"suffix": ("suffix", _("suffix"), _("Suffix")),
"gender": ("gender", _("gender"), _("Gender")),
"source": ("source", _("source"), _("Source")),
"note": ("note", _("note"), _("Note")),
"birthplace": ("birthplace", "birth_place", "birth place",
_("birth place"), _("Birth place")),
"birthplace_id": ("birthplaceid", "birth_place_id",
"birth place id", _("birth place id"),
"birthplace_id"),
"birthdate": ("Birthdate", "Birth date", _("Birth date"),
"birthdate", "birth_date", "birth date", _("birth date")),
"birthsource": ("Birthsource", "Birth source", _("Birth source"),
"birthsource", "birth_source", "birth source",
_("birth source")),
"baptismplace": ("Baptismplace", "Baptism place",
_("Baptism place"), "baptismplace", "baptism place",
_("baptism place")),
"baptismplace_id": ("Baptismplaceid", "Baptism place id",
_("Baptism place id"), "baptismplaceid", "baptism place id",
"birthdate": ("birthdate", "birth_date", "birth date",
_("birth date")),
"birthsource": ("birthsource", "birth_source", "birth source",
_("birth source")),
"baptismplace": ("baptismplace", "baptism place",
_("baptism place")),
"baptismplace_id": ("baptismplaceid", "baptism place id",
_("baptism place id"), "baptism_place_id",
"baptismplace_id"),
"baptismdate": ("Baptismdate", "Baptism date", _("Baptism date"),
"baptismdate", "baptism date", _("baptism date")),
"baptismsource": ("Baptismsource", "Baptism source",
_("Baptism source"), "baptismsource", "baptism source",
_("baptism source")),
"burialplace": ("Burialplace", "Burial place", _("Burial place"),
"burialplace", "burial place", _("burial place")),
"burialplace_id": ("Burialplaceid", "Burial place id", _("Burial place id"),
"burialplaceid", "burial place id", _("burial place id"),
"burial_place_id", "burialplace_id"),
"burialdate": ("Burialdate", "Burial date", _("Burial date"),
"burialdate", "burial date", _("burial date")),
"burialsource": ("Burialsource", "Burial source",
_("Burial source"), "burialsource", "burial source",
_("burial source")),
"deathplace": ("Deathplace", "Death place", _("Death place"),
"deathplace", "death_place", "death place", _("death place")),
"deathplace_id": ("Deathplaceid", "Death place id", _("Death place id"),
"deathplaceid", "death_place_id", "death place id", _("death place id"),
"death_place_id", "deathplace_id"),
"deathdate": ("Deathdate", "Death date", _("Death date"),
"deathdate", "death_date", "death date", _("death date")),
"deathsource": ("Deathsource", "Death source", _("Death source"),
"deathsource", "death_source", "death source",
_("death source")),
"deathcause": ("Deathcause", "Death cause", _("Death cause"),
"deathcause", "death_cause", "death cause", _("death cause")),
"grampsid": ("Grampsid", "ID", "Gramps id", _("Gramps ID"),
"grampsid", "id", "gramps_id", "gramps id", _("Gramps id")),
"person": ("Person", _("Person"), "person", _("person")),
"baptismdate": ("baptismdate", "baptism date", _("baptism date")),
"baptismsource": ("baptismsource", "baptism source",
_("baptism source")),
"burialplace": ("burialplace", "burial place", _("burial place")),
"burialplace_id": ("burialplaceid", "burial place id",
_("burial place id"), "burial_place_id",
"burialplace_id"),
"burialdate": ("burialdate", "burial date", _("burial date")),
"burialsource": ("burialsource", "burial source",
_("burial source")),
"deathplace": ("deathplace", "death_place", "death place",
_("death place")),
"deathplace_id": ("deathplaceid", "death place id",
_("death place id"), "death_place_id",
"deathplace_id"),
"deathdate": ("deathdate", "death_date", "death date",
_("death date")),
"deathsource": ("deathsource", "death_source", "death source",
_("death source")),
"deathcause": ("deathcause", "death_cause", "death cause",
_("death cause")),
"grampsid": (_("Gramps ID"), "grampsid", "id", "gramps_id",
"gramps id"),
"person": ("person", _("person"), _("Person")),
# ----------------------------------
"child": ("Child", _("Child"), "child", _("child")),
"family": ("Family", _("Family"), "family", _("family")),
"child": ("child", _("child"), _("Child")),
"family": ("family", _("family"), _("Family")),
# ----------------------------------
"wife": ("Mother", _("Mother"), "Wife", _("Wife"), "Parent2",
_("Parent2"), "mother", _("mother"), "wife", _("wife"),
"parent2", _("parent2")),
"husband": ("Father", _("Father"), "Husband", _("Husband"),
"Parent1", _("Parent1"), "father", _("father"), "husband",
_("husband"), "parent1", _("parent1")),
"marriage": ("Marriage", _("Marriage"), "marriage", _("marriage")),
"date": ("Date", _("Date"), "date", _("date")),
"place": ("Place", _("Place"), "place", _("place")),
"place_id": ("Placeid", "place id", "Place id", "place_id", "placeid"),
"name": ("Name", _("Name"), "name", _("name")),
"type": ("Type", _("Type"), "type", _("type")),
"latitude": ("Latitude", _("latitude"), "latitude", _("latitude")),
"longitude": ("Longitude", _("Longitude"), "longitude", _("longitude")),
"code": ("Code", _("Code"), "code", _("code")),
"enclosed_by": ("Enclosed by", _("Enclosed by"), "enclosed by", _("enclosed by"),
"enclosed_by", _("enclosed_by"), "Enclosed_by", _("Enclosed_by"),
"enclosedby")
"wife": ("mother", _("mother"), _("Mother"),
"wife", _("wife"), _("Wife"),
"parent2", _("parent2")),
"husband": ("father", _("father"), _("Father"),
"husband", _("husband"), _("Husband"),
"parent1", _("parent1")),
"marriage": ("marriage", _("marriage"), _("Marriage")),
"date": ("date", _("date"), _("Date")),
"place": ("place", _("place"), _("Place")),
"place_id": ("place id", "place_id", "placeid", _("place id")),
"name": ("name", _("name"), _("Name")),
"type": ("type", _("type"), _("Type")),
"latitude": ("latitude", _("latitude")),
"longitude": ("longitude", _("longitude")),
"code": ("code", _("code"), _("Code")),
"enclosed_by": ("enclosed by", _("enclosed by"),
"enclosed_by", _("enclosed_by"), "enclosedby")
}
lab2col_dict = []
for key in list(column2label.keys()):
for val in column2label[key]:
lab2col_dict.append((val, key))
lab2col_dict.append((val.lower(), key))
self.label2column = dict(lab2col_dict)
if default_tag_format:
name = time.strftime(default_tag_format)
@ -283,10 +272,12 @@ class CSVParser(object):
return self.lookup(type_, id_)
else:
return db_lookup
elif id_.lower() in self.fref:
return self.fref[id_.lower()]
else:
return None
id_ = self.db.fid2user_format(id_)
if id_.lower() in self.fref:
return self.fref[id_.lower()]
else:
return None
elif type_ == "person":
if id_.startswith("[") and id_.endswith("]"):
id_ = self.db.id2user_format(id_[1:-1])
@ -295,24 +286,28 @@ class CSVParser(object):
return self.lookup(type_, id_)
else:
return db_lookup
elif id_.lower() in self.pref:
return self.pref[id_.lower()]
else:
return None
id_ = self.db.id2user_format(id_)
if id_.lower() in self.pref:
return self.pref[id_.lower()]
else:
return None
elif type_ == "place":
if id_.startswith("[") and id_.endswith("]"):
id_ = self.db.id2user_format(id_[1:-1])
id_ = self.db.pid2user_format(id_[1:-1])
db_lookup = self.db.get_place_from_gramps_id(id_)
if db_lookup is None:
return self.lookup(type_, id_)
else:
return db_lookup
elif id_.lower() in self.placeref:
return self.placeref[id_.lower()]
else:
return None
id_ = self.db.pid2user_format(id_)
if id_.lower() in self.placeref:
return self.placeref[id_.lower()]
else:
return None
else:
LOG.warn("invalid lookup type in CSV import: '%s'" % type_)
LOG.warning("invalid lookup type in CSV import: '%s'" % type_)
return None
def storeup(self, type_, id_, object_):
@ -330,7 +325,7 @@ class CSVParser(object):
id_ = self.db.pid2user_format(id_)
self.placeref[id_.lower()] = object_
else:
LOG.warn("invalid storeup type in CSV import: '%s'" % type_)
LOG.warning("invalid storeup type in CSV import: '%s'" % type_)
def parse(self, filehandle):
"""
@ -339,11 +334,11 @@ class CSVParser(object):
:param filehandle: open file handle positioned at start of the file
"""
progress_title = _('CSV Import')
with self.user.progress(progress_title,
with self.user.progress(progress_title,
_('Reading data...'), 1) as step:
data = self.read_csv(filehandle)
with self.user.progress(progress_title,
with self.user.progress(progress_title,
_('Importing data...'), len(data)) as step:
tym = time.time()
self.db.disable_signals()
@ -370,7 +365,7 @@ class CSVParser(object):
self.indi_count = 0
self.place_count = 0
self.pref = {} # person ref, internal to this sheet
self.fref = {} # family ref, internal to this sheet
self.fref = {} # family ref, internal to this sheet
self.placeref = {}
header = None
line_number = 0
@ -382,7 +377,7 @@ class CSVParser(object):
continue
######################################
if header is None:
header = [self.cleanup_column_name(r) for r in row]
header = [self.cleanup_column_name(r.lower()) for r in row]
col = {}
count = 0
for key in header:
@ -401,7 +396,7 @@ class CSVParser(object):
elif "place" in header:
self._parse_place(line_number, row, col)
else:
LOG.warn("ignoring line %d" % line_number)
LOG.warning("ignoring line %d" % line_number)
return None
def _parse_marriage(self, line_number, row, col):
@ -418,8 +413,8 @@ class CSVParser(object):
husband = self.lookup("person", husband)
if husband is None and wife is None:
# might have children, so go ahead and add
LOG.warn("no parents on line %d; adding family anyway" %
line_number)
LOG.warning("no parents on line %d; adding family anyway" %
line_number)
family = self.get_or_create_family(marriage_ref, husband, wife)
# adjust gender, if not already provided
if husband:
@ -433,7 +428,7 @@ class CSVParser(object):
wife.set_gender(Person.FEMALE)
self.db.commit_person(wife, self.trans)
if marriage_ref:
self.storeup("family", marriage_ref.lower(), family)
self.storeup("family", marriage_ref, family)
if marriagesource:
# add, if new
new, marriagesource = self.get_or_create_source(marriagesource)
@ -489,8 +484,8 @@ class CSVParser(object):
"Parse the content of a family line"
family_ref = rd(line_number, row, col, "family")
if family_ref is None:
LOG.warn("no family reference found for family on line %d" %
line_number)
LOG.warning("no family reference found for family on line %d" %
line_number)
return # required
child = rd(line_number, row, col, "child")
source = rd(line_number, row, col, "source")
@ -499,12 +494,12 @@ class CSVParser(object):
child = self.lookup("person", child)
family = self.lookup("family", family_ref)
if family is None:
LOG.warn("no matching family reference found for family "
"on line %d" % line_number)
LOG.warning("no matching family reference found for family "
"on line %d" % line_number)
return
if child is None:
LOG.warn("no matching child reference found for family "
"on line %d" % line_number)
LOG.warning("no matching child reference found for family "
"on line %d" % line_number)
return
# is this child already in this family? If so, don't add
LOG.debug("children: %s", [ref.ref for ref in
@ -596,8 +591,8 @@ class CSVParser(object):
person = self.lookup("person", person_ref)
if person is None:
if surname is None:
LOG.warn("empty surname for new person on line %d" %
line_number)
LOG.warning("empty surname for new person on line %d" %
line_number)
surname = ""
# new person
person = self.create_person()
@ -681,8 +676,8 @@ class CSVParser(object):
if birthsource is not None:
new, birthsource = self.get_or_create_source(birthsource)
if birthdate or birthplace or birthsource:
new, birth = self.get_or_create_event(person,
EventType.BIRTH, birthdate,
new, birth = self.get_or_create_event(person,
EventType.BIRTH, birthdate,
birthplace, birthsource)
birth_ref = person.get_birth_ref()
if birth_ref is None:
@ -703,8 +698,8 @@ class CSVParser(object):
if baptismsource is not None:
new, baptismsource = self.get_or_create_source(baptismsource)
if baptismdate or baptismplace or baptismsource:
new, baptism = self.get_or_create_event(person,
EventType.BAPTISM, baptismdate,
new, baptism = self.get_or_create_event(person,
EventType.BAPTISM, baptismdate,
baptismplace, baptismsource)
baptism_ref = get_primary_event_ref_from_type(self.db, person,
"Baptism")
@ -751,8 +746,8 @@ class CSVParser(object):
if burialsource is not None:
new, burialsource = self.get_or_create_source(burialsource)
if burialdate or burialplace or burialsource:
new, burial = self.get_or_create_event(person,
EventType.BURIAL, burialdate,
new, burial = self.get_or_create_event(person,
EventType.BURIAL, burialdate,
burialplace, burialsource)
burial_ref = get_primary_event_ref_from_type(self.db, person,
"Burial")
@ -786,8 +781,8 @@ class CSVParser(object):
place = self.create_place()
if place_id is not None:
if place_id.startswith("[") and place_id.endswith("]"):
place.gramps_id = self.db.id2user_format(place_id[1:-1])
self.storeup("place", place_id.lower(), place)
place.gramps_id = self.db.pid2user_format(place_id[1:-1])
self.storeup("place", place_id, place)
if place_title is not None:
place.title = place_title
if place_name is not None:
@ -866,7 +861,7 @@ class CSVParser(object):
self.db.commit_person(wife, self.trans)
self.fam_count += 1
return family
def get_or_create_event(self, object_, type_, date=None, place=None,
source=None):
""" Add or find a type event on object """
@ -903,7 +898,7 @@ class CSVParser(object):
self.find_and_set_citation(event, source)
self.db.add_event(event, self.trans)
return (1, event)
def create_person(self):
""" Used to create a new person we know doesn't exist """
person = Person()
@ -973,6 +968,6 @@ class CSVParser(object):
LOG.debug(" creating citation")
citation.set_reference_handle(source.get_handle())
self.db.add_citation(citation, self.trans)
LOG.debug(" created citation, citation %s %s" %
LOG.debug(" created citation, citation %s %s" %
(citation, citation.get_gramps_id()))
obj.add_citation(citation.get_handle())