Merge pull request #261 from prculley/csv_42

Fix bug 9733, CSV import Fails, 9676, failure for certain types of cross references etc.
This commit is contained in:
Sam Manzi 2016-10-29 14:20:38 +11:00 committed by GitHub
commit b0fd9bcceb

View File

@ -44,15 +44,15 @@ LOG = logging.getLogger(".ImportCSV")
#------------------------------------------------------------------------- #-------------------------------------------------------------------------
# #
# GRAMPS modules # Gramps modules
# #
#------------------------------------------------------------------------- #-------------------------------------------------------------------------
from gramps.gen.const import GRAMPS_LOCALE as glocale from gramps.gen.const import GRAMPS_LOCALE as glocale
_ = glocale.translation.sgettext _ = glocale.translation.sgettext
ngettext = glocale.translation.ngettext # else "nearby" comments are ignored ngettext = glocale.translation.ngettext # else "nearby" comments are ignored
from gramps.gen.lib import (ChildRef, Citation, Event, EventRef, EventType, from gramps.gen.lib import (ChildRef, Citation, Event, EventRef, EventType,
Family, FamilyRelType, Name, NameType, Note, Family, FamilyRelType, Name, NameType, Note,
NoteType, Person, Place, Source, Surname, Tag, NoteType, Person, Place, Source, Surname, Tag,
PlaceName, PlaceType, PlaceRef) PlaceName, PlaceType, PlaceRef)
from gramps.gen.db import DbTxn from gramps.gen.db import DbTxn
from gramps.gen.datehandler import parser as _dp from gramps.gen.datehandler import parser as _dp
@ -89,7 +89,7 @@ def rd(line_number, row, col, key, default = None):
""" Return Row data by column name """ """ Return Row data by column name """
if key in col: if key in col:
if col[key] >= len(row): if col[key] >= len(row):
LOG.warn("missing '%s, on line %d" % (key, line_number)) LOG.warning("missing '%s, on line %d" % (key, line_number))
return default return default
retval = row[col[key]].strip() retval = row[col[key]].strip()
if retval == "": if retval == "":
@ -104,10 +104,10 @@ def importData(dbase, filename, user):
if dbase.get_feature("skip-import-additions"): # don't add source or tags if dbase.get_feature("skip-import-additions"): # don't add source or tags
parser = CSVParser(dbase, user, None) parser = CSVParser(dbase, user, None)
else: else:
parser = CSVParser(dbase, user, (config.get('preferences.tag-on-import-format') if parser = CSVParser(dbase, user, (config.get('preferences.tag-on-import-format') if
config.get('preferences.tag-on-import') else None)) config.get('preferences.tag-on-import') else None))
try: try:
with open(filename, 'r') as filehandle: with open(filename, 'rb') as filehandle:
line = filehandle.read(3) line = filehandle.read(3)
if line == codecs.BOM_UTF8: if line == codecs.BOM_UTF8:
filehandle.seek(0) filehandle.seek(0)
@ -125,10 +125,10 @@ def importData(dbase, filename, user):
#------------------------------------------------------------------------- #-------------------------------------------------------------------------
# #
# CSV Parser # CSV Parser
# #
#------------------------------------------------------------------------- #-------------------------------------------------------------------------
class CSVParser(object): class CSVParser:
"""Class to read data in CSV format from a file object.""" """Class to read data in CSV format from a file object."""
def __init__(self, dbase, user, default_tag_format=None): def __init__(self, dbase, user, default_tag_format=None):
self.db = dbase self.db = dbase
@ -140,7 +140,7 @@ class CSVParser(object):
self.indi_count = 0 self.indi_count = 0
self.place_count = 0 self.place_count = 0
self.pref = {} # person ref, internal to this sheet self.pref = {} # person ref, internal to this sheet
self.fref = {} # family ref, internal to this sheet self.fref = {} # family ref, internal to this sheet
self.placeref = {} self.placeref = {}
self.place_types = {} self.place_types = {}
# Build reverse dictionary, name to type number # Build reverse dictionary, name to type number
@ -154,94 +154,83 @@ class CSVParser(object):
self.place_types[custom_type] = 0 self.place_types[custom_type] = 0
self.place_types[custom_type.lower()] = 0 self.place_types[custom_type.lower()] = 0
column2label = { column2label = {
"surname": ("Lastname", "Surname", _("Surname"), "lastname", "surname": ("lastname", "last_name", "surname", _("surname"),
"last_name", "surname", _("surname")), _("Surname")),
"firstname": ("Firstname", "Given name", _("Given name"), "Given", "firstname": ("firstname", "first_name", "given_name", "given",
_("Given"), "firstname", "first_name", "given_name", "given name", _("given name"), _("given"),
"given name", _("given name"), "given", _("given")), _("Given"), _("Given name")),
"callname": ("Callname", "Call name", _("Call name"), "Call", "callname": ("call name", _("Call name"), "callname", "call_name",
_("Call"), "callname", "call_name", "call name", "call", "call", _("Call"), _("call")),
_("call")), "title": ("title", _("title"), _("Person or Place|title")),
"title": ("Title", _("Person or Place|Title"), "title", _("Person or Place|title")), "prefix": ("prefix", _("prefix"), _("Prefix")),
"prefix": ("Prefix", _("Prefix"), "prefix", _("prefix")), "suffix": ("suffix", _("suffix"), _("Suffix")),
"suffix": ("Suffix", _("Suffix"), "suffix", _("suffix")), "gender": ("gender", _("gender"), _("Gender")),
"gender": ("Gender", _("Gender"), "gender", _("gender")), "source": ("source", _("source"), _("Source")),
"source": ("Source", _("Source"), "source", _("source")), "note": ("note", _("note"), _("Note")),
"note": ("Note", _("Note"), "note", _("note")), "birthplace": ("birthplace", "birth_place", "birth place",
"birthplace": ("Birthplace", "Birth place", _("Birth place"), _("birth place"), _("Birth place")),
"birthplace", "birth_place", "birth place", _("birth place")), "birthplace_id": ("birthplaceid", "birth_place_id",
"birthplace_id": ("Birthplaceid", "Birth place id", _("Birth place id"), "birth place id", _("birth place id"),
"birthplaceid", "birth_place_id", "birth place id", _("birth place id"),
"birthplace_id"), "birthplace_id"),
"birthdate": ("Birthdate", "Birth date", _("Birth date"), "birthdate": ("birthdate", "birth_date", "birth date",
"birthdate", "birth_date", "birth date", _("birth date")), _("birth date")),
"birthsource": ("Birthsource", "Birth source", _("Birth source"), "birthsource": ("birthsource", "birth_source", "birth source",
"birthsource", "birth_source", "birth source", _("birth source")),
_("birth source")), "baptismplace": ("baptismplace", "baptism place",
"baptismplace": ("Baptismplace", "Baptism place", _("baptism place")),
_("Baptism place"), "baptismplace", "baptism place", "baptismplace_id": ("baptismplaceid", "baptism place id",
_("baptism place")),
"baptismplace_id": ("Baptismplaceid", "Baptism place id",
_("Baptism place id"), "baptismplaceid", "baptism place id",
_("baptism place id"), "baptism_place_id", _("baptism place id"), "baptism_place_id",
"baptismplace_id"), "baptismplace_id"),
"baptismdate": ("Baptismdate", "Baptism date", _("Baptism date"), "baptismdate": ("baptismdate", "baptism date", _("baptism date")),
"baptismdate", "baptism date", _("baptism date")), "baptismsource": ("baptismsource", "baptism source",
"baptismsource": ("Baptismsource", "Baptism source", _("baptism source")),
_("Baptism source"), "baptismsource", "baptism source", "burialplace": ("burialplace", "burial place", _("burial place")),
_("baptism source")), "burialplace_id": ("burialplaceid", "burial place id",
"burialplace": ("Burialplace", "Burial place", _("Burial place"), _("burial place id"), "burial_place_id",
"burialplace", "burial place", _("burial place")), "burialplace_id"),
"burialplace_id": ("Burialplaceid", "Burial place id", _("Burial place id"), "burialdate": ("burialdate", "burial date", _("burial date")),
"burialplaceid", "burial place id", _("burial place id"), "burialsource": ("burialsource", "burial source",
"burial_place_id", "burialplace_id"), _("burial source")),
"burialdate": ("Burialdate", "Burial date", _("Burial date"), "deathplace": ("deathplace", "death_place", "death place",
"burialdate", "burial date", _("burial date")), _("death place")),
"burialsource": ("Burialsource", "Burial source", "deathplace_id": ("deathplaceid", "death place id",
_("Burial source"), "burialsource", "burial source", _("death place id"), "death_place_id",
_("burial source")), "deathplace_id"),
"deathplace": ("Deathplace", "Death place", _("Death place"), "deathdate": ("deathdate", "death_date", "death date",
"deathplace", "death_place", "death place", _("death place")), _("death date")),
"deathplace_id": ("Deathplaceid", "Death place id", _("Death place id"), "deathsource": ("deathsource", "death_source", "death source",
"deathplaceid", "death_place_id", "death place id", _("death place id"), _("death source")),
"death_place_id", "deathplace_id"), "deathcause": ("deathcause", "death_cause", "death cause",
"deathdate": ("Deathdate", "Death date", _("Death date"), _("death cause")),
"deathdate", "death_date", "death date", _("death date")), "grampsid": (_("Gramps ID"), "grampsid", "id", "gramps_id",
"deathsource": ("Deathsource", "Death source", _("Death source"), "gramps id"),
"deathsource", "death_source", "death source", "person": ("person", _("person"), _("Person")),
_("death source")),
"deathcause": ("Deathcause", "Death cause", _("Death cause"),
"deathcause", "death_cause", "death cause", _("death cause")),
"grampsid": ("Grampsid", "ID", "Gramps id", _("Gramps ID"),
"grampsid", "id", "gramps_id", "gramps id", _("Gramps id")),
"person": ("Person", _("Person"), "person", _("person")),
# ---------------------------------- # ----------------------------------
"child": ("Child", _("Child"), "child", _("child")), "child": ("child", _("child"), _("Child")),
"family": ("Family", _("Family"), "family", _("family")), "family": ("family", _("family"), _("Family")),
# ---------------------------------- # ----------------------------------
"wife": ("Mother", _("Mother"), "Wife", _("Wife"), "Parent2", "wife": ("mother", _("mother"), _("Mother"),
_("Parent2"), "mother", _("mother"), "wife", _("wife"), "wife", _("wife"), _("Wife"),
"parent2", _("parent2")), "parent2", _("parent2")),
"husband": ("Father", _("Father"), "Husband", _("Husband"), "husband": ("father", _("father"), _("Father"),
"Parent1", _("Parent1"), "father", _("father"), "husband", "husband", _("husband"), _("Husband"),
_("husband"), "parent1", _("parent1")), "parent1", _("parent1")),
"marriage": ("Marriage", _("Marriage"), "marriage", _("marriage")), "marriage": ("marriage", _("marriage"), _("Marriage")),
"date": ("Date", _("Date"), "date", _("date")), "date": ("date", _("date"), _("Date")),
"place": ("Place", _("Place"), "place", _("place")), "place": ("place", _("place"), _("Place")),
"place_id": ("Placeid", "place id", "Place id", "place_id", "placeid"), "place_id": ("place id", "place_id", "placeid", _("place id")),
"name": ("Name", _("Name"), "name", _("name")), "name": ("name", _("name"), _("Name")),
"type": ("Type", _("Type"), "type", _("type")), "type": ("type", _("type"), _("Type")),
"latitude": ("Latitude", _("latitude"), "latitude", _("latitude")), "latitude": ("latitude", _("latitude")),
"longitude": ("Longitude", _("Longitude"), "longitude", _("longitude")), "longitude": ("longitude", _("longitude")),
"code": ("Code", _("Code"), "code", _("code")), "code": ("code", _("code"), _("Code")),
"enclosed_by": ("Enclosed by", _("Enclosed by"), "enclosed by", _("enclosed by"), "enclosed_by": ("enclosed by", _("enclosed by"),
"enclosed_by", _("enclosed_by"), "Enclosed_by", _("Enclosed_by"), "enclosed_by", _("enclosed_by"), "enclosedby")
"enclosedby")
} }
lab2col_dict = [] lab2col_dict = []
for key in list(column2label.keys()): for key in list(column2label.keys()):
for val in column2label[key]: for val in column2label[key]:
lab2col_dict.append((val, key)) lab2col_dict.append((val.lower(), key))
self.label2column = dict(lab2col_dict) self.label2column = dict(lab2col_dict)
if default_tag_format: if default_tag_format:
name = time.strftime(default_tag_format) name = time.strftime(default_tag_format)
@ -283,10 +272,12 @@ class CSVParser(object):
return self.lookup(type_, id_) return self.lookup(type_, id_)
else: else:
return db_lookup return db_lookup
elif id_.lower() in self.fref:
return self.fref[id_.lower()]
else: else:
return None id_ = self.db.fid2user_format(id_)
if id_.lower() in self.fref:
return self.fref[id_.lower()]
else:
return None
elif type_ == "person": elif type_ == "person":
if id_.startswith("[") and id_.endswith("]"): if id_.startswith("[") and id_.endswith("]"):
id_ = self.db.id2user_format(id_[1:-1]) id_ = self.db.id2user_format(id_[1:-1])
@ -295,24 +286,28 @@ class CSVParser(object):
return self.lookup(type_, id_) return self.lookup(type_, id_)
else: else:
return db_lookup return db_lookup
elif id_.lower() in self.pref:
return self.pref[id_.lower()]
else: else:
return None id_ = self.db.id2user_format(id_)
if id_.lower() in self.pref:
return self.pref[id_.lower()]
else:
return None
elif type_ == "place": elif type_ == "place":
if id_.startswith("[") and id_.endswith("]"): if id_.startswith("[") and id_.endswith("]"):
id_ = self.db.id2user_format(id_[1:-1]) id_ = self.db.pid2user_format(id_[1:-1])
db_lookup = self.db.get_place_from_gramps_id(id_) db_lookup = self.db.get_place_from_gramps_id(id_)
if db_lookup is None: if db_lookup is None:
return self.lookup(type_, id_) return self.lookup(type_, id_)
else: else:
return db_lookup return db_lookup
elif id_.lower() in self.placeref:
return self.placeref[id_.lower()]
else: else:
return None id_ = self.db.pid2user_format(id_)
if id_.lower() in self.placeref:
return self.placeref[id_.lower()]
else:
return None
else: else:
LOG.warn("invalid lookup type in CSV import: '%s'" % type_) LOG.warning("invalid lookup type in CSV import: '%s'" % type_)
return None return None
def storeup(self, type_, id_, object_): def storeup(self, type_, id_, object_):
@ -330,7 +325,7 @@ class CSVParser(object):
id_ = self.db.pid2user_format(id_) id_ = self.db.pid2user_format(id_)
self.placeref[id_.lower()] = object_ self.placeref[id_.lower()] = object_
else: else:
LOG.warn("invalid storeup type in CSV import: '%s'" % type_) LOG.warning("invalid storeup type in CSV import: '%s'" % type_)
def parse(self, filehandle): def parse(self, filehandle):
""" """
@ -339,11 +334,11 @@ class CSVParser(object):
:param filehandle: open file handle positioned at start of the file :param filehandle: open file handle positioned at start of the file
""" """
progress_title = _('CSV Import') progress_title = _('CSV Import')
with self.user.progress(progress_title, with self.user.progress(progress_title,
_('Reading data...'), 1) as step: _('Reading data...'), 1) as step:
data = self.read_csv(filehandle) data = self.read_csv(filehandle)
with self.user.progress(progress_title, with self.user.progress(progress_title,
_('Importing data...'), len(data)) as step: _('Importing data...'), len(data)) as step:
tym = time.time() tym = time.time()
self.db.disable_signals() self.db.disable_signals()
@ -370,7 +365,7 @@ class CSVParser(object):
self.indi_count = 0 self.indi_count = 0
self.place_count = 0 self.place_count = 0
self.pref = {} # person ref, internal to this sheet self.pref = {} # person ref, internal to this sheet
self.fref = {} # family ref, internal to this sheet self.fref = {} # family ref, internal to this sheet
self.placeref = {} self.placeref = {}
header = None header = None
line_number = 0 line_number = 0
@ -382,7 +377,7 @@ class CSVParser(object):
continue continue
###################################### ######################################
if header is None: if header is None:
header = [self.cleanup_column_name(r) for r in row] header = [self.cleanup_column_name(r.lower()) for r in row]
col = {} col = {}
count = 0 count = 0
for key in header: for key in header:
@ -401,7 +396,7 @@ class CSVParser(object):
elif "place" in header: elif "place" in header:
self._parse_place(line_number, row, col) self._parse_place(line_number, row, col)
else: else:
LOG.warn("ignoring line %d" % line_number) LOG.warning("ignoring line %d" % line_number)
return None return None
def _parse_marriage(self, line_number, row, col): def _parse_marriage(self, line_number, row, col):
@ -418,8 +413,8 @@ class CSVParser(object):
husband = self.lookup("person", husband) husband = self.lookup("person", husband)
if husband is None and wife is None: if husband is None and wife is None:
# might have children, so go ahead and add # might have children, so go ahead and add
LOG.warn("no parents on line %d; adding family anyway" % LOG.warning("no parents on line %d; adding family anyway" %
line_number) line_number)
family = self.get_or_create_family(marriage_ref, husband, wife) family = self.get_or_create_family(marriage_ref, husband, wife)
# adjust gender, if not already provided # adjust gender, if not already provided
if husband: if husband:
@ -433,7 +428,7 @@ class CSVParser(object):
wife.set_gender(Person.FEMALE) wife.set_gender(Person.FEMALE)
self.db.commit_person(wife, self.trans) self.db.commit_person(wife, self.trans)
if marriage_ref: if marriage_ref:
self.storeup("family", marriage_ref.lower(), family) self.storeup("family", marriage_ref, family)
if marriagesource: if marriagesource:
# add, if new # add, if new
new, marriagesource = self.get_or_create_source(marriagesource) new, marriagesource = self.get_or_create_source(marriagesource)
@ -489,8 +484,8 @@ class CSVParser(object):
"Parse the content of a family line" "Parse the content of a family line"
family_ref = rd(line_number, row, col, "family") family_ref = rd(line_number, row, col, "family")
if family_ref is None: if family_ref is None:
LOG.warn("no family reference found for family on line %d" % LOG.warning("no family reference found for family on line %d" %
line_number) line_number)
return # required return # required
child = rd(line_number, row, col, "child") child = rd(line_number, row, col, "child")
source = rd(line_number, row, col, "source") source = rd(line_number, row, col, "source")
@ -499,12 +494,12 @@ class CSVParser(object):
child = self.lookup("person", child) child = self.lookup("person", child)
family = self.lookup("family", family_ref) family = self.lookup("family", family_ref)
if family is None: if family is None:
LOG.warn("no matching family reference found for family " LOG.warning("no matching family reference found for family "
"on line %d" % line_number) "on line %d" % line_number)
return return
if child is None: if child is None:
LOG.warn("no matching child reference found for family " LOG.warning("no matching child reference found for family "
"on line %d" % line_number) "on line %d" % line_number)
return return
# is this child already in this family? If so, don't add # is this child already in this family? If so, don't add
LOG.debug("children: %s", [ref.ref for ref in LOG.debug("children: %s", [ref.ref for ref in
@ -596,8 +591,8 @@ class CSVParser(object):
person = self.lookup("person", person_ref) person = self.lookup("person", person_ref)
if person is None: if person is None:
if surname is None: if surname is None:
LOG.warn("empty surname for new person on line %d" % LOG.warning("empty surname for new person on line %d" %
line_number) line_number)
surname = "" surname = ""
# new person # new person
person = self.create_person() person = self.create_person()
@ -681,8 +676,8 @@ class CSVParser(object):
if birthsource is not None: if birthsource is not None:
new, birthsource = self.get_or_create_source(birthsource) new, birthsource = self.get_or_create_source(birthsource)
if birthdate or birthplace or birthsource: if birthdate or birthplace or birthsource:
new, birth = self.get_or_create_event(person, new, birth = self.get_or_create_event(person,
EventType.BIRTH, birthdate, EventType.BIRTH, birthdate,
birthplace, birthsource) birthplace, birthsource)
birth_ref = person.get_birth_ref() birth_ref = person.get_birth_ref()
if birth_ref is None: if birth_ref is None:
@ -703,8 +698,8 @@ class CSVParser(object):
if baptismsource is not None: if baptismsource is not None:
new, baptismsource = self.get_or_create_source(baptismsource) new, baptismsource = self.get_or_create_source(baptismsource)
if baptismdate or baptismplace or baptismsource: if baptismdate or baptismplace or baptismsource:
new, baptism = self.get_or_create_event(person, new, baptism = self.get_or_create_event(person,
EventType.BAPTISM, baptismdate, EventType.BAPTISM, baptismdate,
baptismplace, baptismsource) baptismplace, baptismsource)
baptism_ref = get_primary_event_ref_from_type(self.db, person, baptism_ref = get_primary_event_ref_from_type(self.db, person,
"Baptism") "Baptism")
@ -751,8 +746,8 @@ class CSVParser(object):
if burialsource is not None: if burialsource is not None:
new, burialsource = self.get_or_create_source(burialsource) new, burialsource = self.get_or_create_source(burialsource)
if burialdate or burialplace or burialsource: if burialdate or burialplace or burialsource:
new, burial = self.get_or_create_event(person, new, burial = self.get_or_create_event(person,
EventType.BURIAL, burialdate, EventType.BURIAL, burialdate,
burialplace, burialsource) burialplace, burialsource)
burial_ref = get_primary_event_ref_from_type(self.db, person, burial_ref = get_primary_event_ref_from_type(self.db, person,
"Burial") "Burial")
@ -786,8 +781,8 @@ class CSVParser(object):
place = self.create_place() place = self.create_place()
if place_id is not None: if place_id is not None:
if place_id.startswith("[") and place_id.endswith("]"): if place_id.startswith("[") and place_id.endswith("]"):
place.gramps_id = self.db.id2user_format(place_id[1:-1]) place.gramps_id = self.db.pid2user_format(place_id[1:-1])
self.storeup("place", place_id.lower(), place) self.storeup("place", place_id, place)
if place_title is not None: if place_title is not None:
place.title = place_title place.title = place_title
if place_name is not None: if place_name is not None:
@ -866,7 +861,7 @@ class CSVParser(object):
self.db.commit_person(wife, self.trans) self.db.commit_person(wife, self.trans)
self.fam_count += 1 self.fam_count += 1
return family return family
def get_or_create_event(self, object_, type_, date=None, place=None, def get_or_create_event(self, object_, type_, date=None, place=None,
source=None): source=None):
""" Add or find a type event on object """ """ Add or find a type event on object """
@ -903,7 +898,7 @@ class CSVParser(object):
self.find_and_set_citation(event, source) self.find_and_set_citation(event, source)
self.db.add_event(event, self.trans) self.db.add_event(event, self.trans)
return (1, event) return (1, event)
def create_person(self): def create_person(self):
""" Used to create a new person we know doesn't exist """ """ Used to create a new person we know doesn't exist """
person = Person() person = Person()
@ -973,6 +968,6 @@ class CSVParser(object):
LOG.debug(" creating citation") LOG.debug(" creating citation")
citation.set_reference_handle(source.get_handle()) citation.set_reference_handle(source.get_handle())
self.db.add_citation(citation, self.trans) self.db.add_citation(citation, self.trans)
LOG.debug(" created citation, citation %s %s" % LOG.debug(" created citation, citation %s %s" %
(citation, citation.get_gramps_id())) (citation, citation.get_gramps_id()))
obj.add_citation(citation.get_handle()) obj.add_citation(citation.get_handle())