6194: Database corrupted - TypeError: unhashable type: 'list'

Fixed upgrade to deal with sourceref in media references in Sources.
Also fixed Check and Repair to repair previously broken databases.
This commit is contained in:
kulath 2014-03-04 17:38:58 +00:00
parent b971373182
commit 56c67576d5
2 changed files with 142 additions and 32 deletions

View File

@ -27,7 +27,7 @@ from gen.lib.markertype import MarkerType
from gen.lib.tag import Tag from gen.lib.tag import Tag
import time import time
import logging import logging
LOG = logging.getLogger(".citation") LOG = logging.getLogger(".upgrade")
from gen.ggettext import gettext as _ from gen.ggettext import gettext as _
@ -43,7 +43,7 @@ from gen.db import BSDDBTxn
from gen.lib.nameorigintype import NameOriginType from gen.lib.nameorigintype import NameOriginType
from gen.db.write import _mkname, SURNAMES from gen.db.write import _mkname, SURNAMES
from gen.db.dbconst import (PERSON_KEY, FAMILY_KEY, EVENT_KEY, from gen.db.dbconst import (PERSON_KEY, FAMILY_KEY, EVENT_KEY,
MEDIA_KEY, PLACE_KEY, REPOSITORY_KEY) MEDIA_KEY, PLACE_KEY, REPOSITORY_KEY, SOURCE_KEY)
from QuestionDialog import (InfoDialog) from QuestionDialog import (InfoDialog)
def gramps_upgrade_16(self): def gramps_upgrade_16(self):
@ -58,26 +58,30 @@ def gramps_upgrade_16(self):
(4) add backlinks for references from object to Citations (4) add backlinks for references from object to Citations
(5) add backlinks for references from Citation to Source (5) add backlinks for references from Citation to Source
the backlinks are all updated at the end by calling the backlinks are all updated on return to write.py gramps_upgrade by
reindex_reference_map calling reindex_reference_map
""" """
length = (len(self.note_map) + len(self.person_map) + # Only People, Families, Events, Media Objects, Places, Sources and
# Repositories need to be updated, because these are the only primary
# objects that can have source citations.
length = (len(self.person_map) +
len(self.event_map) + len(self.family_map) + len(self.event_map) + len(self.family_map) +
len(self.repository_map) + len(self.media_map) + len(self.repository_map) + len(self.media_map) +
len(self.place_map) + len(self.source_map)) + 10 len(self.place_map) + len(self.source_map))
self.set_total(length) self.set_total(length)
# Setup data for upgrade statistics information dialogue # Setup data for upgrade statistics information dialogue
keyorder = [PERSON_KEY, FAMILY_KEY, EVENT_KEY, MEDIA_KEY, keyorder = [PERSON_KEY, FAMILY_KEY, EVENT_KEY, MEDIA_KEY,
PLACE_KEY, REPOSITORY_KEY] PLACE_KEY, REPOSITORY_KEY, SOURCE_KEY]
key2data = { key2data = {
PERSON_KEY : 0, PERSON_KEY : 0,
FAMILY_KEY : 1, FAMILY_KEY : 1,
EVENT_KEY: 2, EVENT_KEY: 2,
MEDIA_KEY: 3, MEDIA_KEY: 3,
PLACE_KEY: 4, PLACE_KEY: 4,
REPOSITORY_KEY: 5, REPOSITORY_KEY: 5,
SOURCE_KEY : 6,
} }
key2string = { key2string = {
PERSON_KEY : _('%6d People upgraded with %6d citations in %6d secs\n'), PERSON_KEY : _('%6d People upgraded with %6d citations in %6d secs\n'),
@ -86,8 +90,9 @@ def gramps_upgrade_16(self):
MEDIA_KEY : _('%6d Media Objects upgraded with %6d citations in %6d secs\n'), MEDIA_KEY : _('%6d Media Objects upgraded with %6d citations in %6d secs\n'),
PLACE_KEY : _('%6d Places upgraded with %6d citations in %6d secs\n'), PLACE_KEY : _('%6d Places upgraded with %6d citations in %6d secs\n'),
REPOSITORY_KEY : _('%6d Repositories upgraded with %6d citations in %6d secs\n'), REPOSITORY_KEY : _('%6d Repositories upgraded with %6d citations in %6d secs\n'),
SOURCE_KEY : _('%6d Sources upgraded with %6d citations in %6d secs\n'),
} }
data_upgradeobject = [0] * 6 data_upgradeobject = [0] * 7
# Initialise the citation gramps ID number # Initialise the citation gramps ID number
self.cmap_index = 0 self.cmap_index = 0
@ -99,6 +104,15 @@ def gramps_upgrade_16(self):
start_time = time.time() start_time = time.time()
for person_handle in self.person_map.keys(): for person_handle in self.person_map.keys():
person = self.person_map[person_handle] person = self.person_map[person_handle]
try:
# The parameters are evaluated before deciding whether logging is on
# or not. Since the retrieval of names is so complex, I think it is
# safer to protect this with a try except block, even though it
# seems to work for names being present and not.
LOG.debug("upgrade person %s %s" % (person[3][4],
" ".join([name[0] for name in person[3][5]])))
except:
pass
(handle, gramps_id, gender, primary_name, alternate_names, (handle, gramps_id, gender, primary_name, alternate_names,
death_ref_index, birth_ref_index, event_ref_list, family_list, death_ref_index, birth_ref_index, event_ref_list, family_list,
parent_family_list, media_list, address_list, attribute_list, parent_family_list, media_list, address_list, attribute_list,
@ -141,6 +155,7 @@ def gramps_upgrade_16(self):
attribute_list, urls, lds_seal_list, attribute_list, urls, lds_seal_list,
new_citation_list, note_list, change, tag_list, new_citation_list, note_list, change, tag_list,
private, person_ref_list) private, person_ref_list)
LOG.debug(" upgrade new_person %s" % [new_person])
with BSDDBTxn(self.env, self.person_map) as txn: with BSDDBTxn(self.env, self.person_map) as txn:
txn.put(str(handle), new_person) txn.put(str(handle), new_person)
self.update() self.update()
@ -160,7 +175,7 @@ def gramps_upgrade_16(self):
start_time = time.time() start_time = time.time()
for media_handle in self.media_map.keys(): for media_handle in self.media_map.keys():
media = self.media_map[media_handle] media = self.media_map[media_handle]
LOG.debug("upgrade media %s" % media[4]) LOG.debug("upgrade media object %s" % media[4])
(handle, gramps_id, path, mime, desc, (handle, gramps_id, path, mime, desc,
attribute_list, source_list, note_list, change, attribute_list, source_list, note_list, change,
date, tag_list, private) = media date, tag_list, private) = media
@ -175,12 +190,11 @@ def gramps_upgrade_16(self):
LOG.debug(" upgrade new_media %s" % [new_media]) LOG.debug(" upgrade new_media %s" % [new_media])
with BSDDBTxn(self.env, self.media_map) as txn: with BSDDBTxn(self.env, self.media_map) as txn:
txn.put(str(handle), new_media) txn.put(str(handle), new_media)
LOG.debug(" update ref map media %s" % [handle,
self.get_object_from_handle(handle) ])
self.update() self.update()
LOG.debug("Media upgrade %d citations upgraded in %d seconds" % LOG.debug("%d media objects upgraded with %d citations in %d seconds" %
(self.cmap_index - start_num_citations, (len(self.media_map.keys()),
self.cmap_index - start_num_citations,
int(time.time() - start_time))) int(time.time() - start_time)))
data_upgradeobject[key2data[MEDIA_KEY]] = (len(self.media_map.keys()), data_upgradeobject[key2data[MEDIA_KEY]] = (len(self.media_map.keys()),
self.cmap_index - start_num_citations, self.cmap_index - start_num_citations,
@ -193,6 +207,7 @@ def gramps_upgrade_16(self):
start_time = time.time() start_time = time.time()
for place_handle in self.place_map.keys(): for place_handle in self.place_map.keys():
place = self.place_map[place_handle] place = self.place_map[place_handle]
LOG.debug("upgrade place %s" % place[2])
(handle, gramps_id, title, long, lat, (handle, gramps_id, title, long, lat,
main_loc, alt_loc, urls, media_list, source_list, note_list, main_loc, alt_loc, urls, media_list, source_list, note_list,
change, private) = place change, private) = place
@ -209,6 +224,7 @@ def gramps_upgrade_16(self):
long, lat, main_loc, alt_loc, urls, long, lat, main_loc, alt_loc, urls,
media_list, new_citation_list, note_list, media_list, new_citation_list, note_list,
change, private) change, private)
LOG.debug(" upgrade new_place %s" % [new_place])
with BSDDBTxn(self.env, self.place_map) as txn: with BSDDBTxn(self.env, self.place_map) as txn:
txn.put(str(handle), new_place) txn.put(str(handle), new_place)
self.update() self.update()
@ -228,6 +244,7 @@ def gramps_upgrade_16(self):
start_time = time.time() start_time = time.time()
for family_handle in self.family_map.keys(): for family_handle in self.family_map.keys():
family = self.family_map[family_handle] family = self.family_map[family_handle]
LOG.debug("upgrade family (gramps_id) %s" % family[1])
(handle, gramps_id, father_handle, mother_handle, (handle, gramps_id, father_handle, mother_handle,
child_ref_list, the_type, event_ref_list, media_list, child_ref_list, the_type, event_ref_list, media_list,
attribute_list, lds_seal_list, source_list, note_list, attribute_list, lds_seal_list, source_list, note_list,
@ -257,11 +274,12 @@ def gramps_upgrade_16(self):
child_ref_list, the_type, event_ref_list, media_list, child_ref_list, the_type, event_ref_list, media_list,
attribute_list, lds_seal_list, new_citation_list, attribute_list, lds_seal_list, new_citation_list,
note_list, change, tag_list, private) note_list, change, tag_list, private)
LOG.debug(" upgrade new_family %s" % [new_family])
with BSDDBTxn(self.env, self.family_map) as txn: with BSDDBTxn(self.env, self.family_map) as txn:
txn.put(str(handle), new_family) txn.put(str(handle), new_family)
self.update() self.update()
LOG.debug("%d familys upgraded with %d citations in %d seconds. " % LOG.debug("%d families upgraded with %d citations in %d seconds. " %
(len(self.family_map.keys()), (len(self.family_map.keys()),
self.cmap_index - start_num_citations, self.cmap_index - start_num_citations,
time.time() - start_time)) time.time() - start_time))
@ -271,13 +289,11 @@ def gramps_upgrade_16(self):
# --------------------------------- # ---------------------------------
# Modify Events # Modify Events
# --------------------------------- # ---------------------------------
upgrade_time = 0
backlink_time = 0
start_num_citations = self.cmap_index start_num_citations = self.cmap_index
start_time = time.time() start_time = time.time()
for event_handle in self.event_map.keys(): for event_handle in self.event_map.keys():
t1 = time.time()
event = self.event_map[event_handle] event = self.event_map[event_handle]
LOG.debug("upgrade event %s" % event[4])
(handle, gramps_id, the_type, date, description, place, (handle, gramps_id, the_type, date, description, place,
source_list, note_list, media_list, attribute_list, source_list, note_list, media_list, attribute_list,
change, private) = event change, private) = event
@ -297,19 +313,15 @@ def gramps_upgrade_16(self):
new_citation_list, note_list, media_list, new_citation_list, note_list, media_list,
attribute_list, attribute_list,
change, private) change, private)
LOG.debug(" upgrade new_event %s" % [new_event])
with BSDDBTxn(self.env, self.event_map) as txn: with BSDDBTxn(self.env, self.event_map) as txn:
txn.put(str(handle), new_event) txn.put(str(handle), new_event)
t2 = time.time()
upgrade_time += t2 - t1
t3 = time.time()
backlink_time += t3 - t2
self.update() self.update()
LOG.debug("%d events upgraded with %d citations in %d seconds. " LOG.debug("%d events upgraded with %d citations in %d seconds. " %
"Backlinks took %d seconds" %
(len(self.event_map.keys()), (len(self.event_map.keys()),
self.cmap_index - start_num_citations, self.cmap_index - start_num_citations,
int(upgrade_time), int(backlink_time))) time.time() - start_time))
data_upgradeobject[key2data[EVENT_KEY]] = (len(self.event_map.keys()), data_upgradeobject[key2data[EVENT_KEY]] = (len(self.event_map.keys()),
self.cmap_index - start_num_citations, self.cmap_index - start_num_citations,
time.time() - start_time) time.time() - start_time)
@ -321,6 +333,7 @@ def gramps_upgrade_16(self):
start_time = time.time() start_time = time.time()
for repository_handle in self.repository_map.keys(): for repository_handle in self.repository_map.keys():
repository = self.repository_map[repository_handle] repository = self.repository_map[repository_handle]
LOG.debug("upgrade repository %s" % repository[3])
(handle, gramps_id, the_type, name, note_list, (handle, gramps_id, the_type, name, note_list,
address_list, urls, change, private) = repository address_list, urls, change, private) = repository
if address_list: if address_list:
@ -329,17 +342,52 @@ def gramps_upgrade_16(self):
if address_list: if address_list:
new_repository = (handle, gramps_id, the_type, name, note_list, new_repository = (handle, gramps_id, the_type, name, note_list,
address_list, urls, change, private) address_list, urls, change, private)
LOG.debug(" upgrade new_repository %s" % [new_repository])
with BSDDBTxn(self.env, self.repository_map) as txn: with BSDDBTxn(self.env, self.repository_map) as txn:
txn.put(str(handle), new_repository) txn.put(str(handle), new_repository)
self.update() self.update()
LOG.debug("%d repositorys upgraded with %d citations in %d seconds. " % LOG.debug("%d repositories upgraded with %d citations in %d seconds. " %
(len(self.repository_map.keys()), (len(self.repository_map.keys()),
self.cmap_index - start_num_citations, self.cmap_index - start_num_citations,
time.time() - start_time)) time.time() - start_time))
data_upgradeobject[key2data[REPOSITORY_KEY]] = (len(self.repository_map.keys()), data_upgradeobject[key2data[REPOSITORY_KEY]] = (len(self.repository_map.keys()),
self.cmap_index - start_num_citations, self.cmap_index - start_num_citations,
time.time() - start_time) time.time() - start_time)
# ---------------------------------
# Modify Source
# ---------------------------------
start_num_citations = self.cmap_index
start_time = time.time()
for source_handle in self.source_map.keys():
source = self.source_map[source_handle]
LOG.debug("upgrade source %s" % source[2])
(handle, gramps_id, title, author,
pubinfo, note_list, media_list,
abbrev, change, datamap, reporef_list,
private) = source
if media_list:
media_list = upgrade_media_list_16(
self, media_list)
new_source = (handle, gramps_id, title, author,
pubinfo, note_list, media_list,
abbrev, change, datamap, reporef_list,
private)
LOG.debug(" upgrade new_source %s" % [new_source])
with BSDDBTxn(self.env, self.source_map) as txn:
txn.put(str(handle), new_source)
self.update()
LOG.debug("%d sources upgraded with %d citations in %d seconds" %
(len(self.source_map.keys()),
self.cmap_index - start_num_citations,
int(time.time() - start_time)))
data_upgradeobject[key2data[SOURCE_KEY]] = (len(self.source_map.keys()),
self.cmap_index - start_num_citations,
time.time() - start_time)
# --------------------------------- # ---------------------------------
@ -535,12 +583,6 @@ def convert_source_list_to_citation_list_16(self, source_list):
with BSDDBTxn(self.env, self.citation_map) as txn: with BSDDBTxn(self.env, self.citation_map) as txn:
txn.put(str(new_handle), new_citation) txn.put(str(new_handle), new_citation)
self.cmap_index += 1 self.cmap_index += 1
# # add backlinks for references from Citation to Source
# with BSDDBTxn(self.env) as txn:
# self.update_reference_map(
# self.get_citation_from_handle(new_handle),
# transaction,
# txn.txn)
citation_list.append((new_handle)) citation_list.append((new_handle))
return citation_list return citation_list

View File

@ -183,6 +183,7 @@ class Check(tool.BatchTool):
checker.check_repo_references() checker.check_repo_references()
checker.check_note_references() checker.check_note_references()
checker.check_tag_references() checker.check_tag_references()
checker.check_media_sourceref()
self.db.enable_signals() self.db.enable_signals()
self.db.request_rebuild() self.db.request_rebuild()
@ -223,6 +224,7 @@ class CheckIntegrity(object):
self.invalid_dates = [] self.invalid_dates = []
self.removed_name_format = [] self.removed_name_format = []
self.empty_objects = defaultdict(list) self.empty_objects = defaultdict(list)
self.replaced_sourceref = []
self.last_img_dir = config.get('behavior.addmedia-image-dir') self.last_img_dir = config.get('behavior.addmedia-image-dir')
self.progress = ProgressMeter(_('Checking Database'),'') self.progress = ProgressMeter(_('Checking Database'),'')
self.explanation = gen.lib.Note(_('Objects referenced by this note ' self.explanation = gen.lib.Note(_('Objects referenced by this note '
@ -1813,6 +1815,64 @@ class CheckIntegrity(object):
if len(self.invalid_tag_references) == 0: if len(self.invalid_tag_references) == 0:
logging.info(' OK: no tag reference problems found') logging.info(' OK: no tag reference problems found')
def check_media_sourceref(self):
"""
This repairs a problem with database upgrade from database schema
version 15 to 16. Mediarefs on source primary objects can contain
sourcerefs, and these were not converted to citations.
"""
total = (
self.db.get_number_of_sources()
)
self.progress.set_pass(_('Looking for media source reference problems'),
total)
logging.info('Looking for media source reference problems')
for handle in self.db.source_map.keys():
self.progress.step()
info = self.db.source_map[handle]
source = gen.lib.Source()
source.unserialize(info)
new_media_ref_list = []
for media_ref in source.get_media_list():
citation_list = media_ref.get_citation_list()
new_citation_list = []
for citation_handle in citation_list:
# Either citation_handle is a handle, in which case it has
# been converted, or it is a 6-tuple, in which case it now
# needs to be converted.
if len(citation_handle) == 6:
if len(citation_handle) == 6:
sourceref = citation_handle
else:
sourceref = eval(citation_handle)
new_citation = gen.lib.Citation()
new_citation.set_date_object(sourceref[0])
new_citation.set_privacy(sourceref[1])
new_citation.set_note_list(sourceref[2])
new_citation.set_confidence_level(sourceref[3])
new_citation.set_reference_handle(sourceref[4])
new_citation.set_page(sourceref[5])
citation_handle = Utils.create_id()
new_citation.set_handle(citation_handle)
self.replaced_sourceref.append(handle)
logging.warning(' FAIL: the source "%s" has a media '
'reference with a source citation '
'which is invalid' % (source.gramps_id))
self.db.add_citation(new_citation, self.trans)
new_citation_list.append(citation_handle)
media_ref.set_citation_list(new_citation_list)
new_media_ref_list.append(media_ref)
source.set_media_list(new_media_ref_list)
self.db.commit_source(source, self.trans)
if len(self.replaced_sourceref) > 0:
logging.info(' OK: no broken source citations on mediarefs found')
def class_person(self, handle): def class_person(self, handle):
person = gen.lib.Person() person = gen.lib.Person()
person.set_handle(handle) person.set_handle(handle)
@ -1919,6 +1979,7 @@ class CheckIntegrity(object):
note_references = len(self.invalid_note_references) note_references = len(self.invalid_note_references)
tag_references = len(self.invalid_tag_references) tag_references = len(self.invalid_tag_references)
name_format = len(self.removed_name_format) name_format = len(self.removed_name_format)
replaced_sourcerefs = len(self.replaced_sourceref)
empty_objs = sum(len(obj) for obj in self.empty_objects.itervalues()) empty_objs = sum(len(obj) for obj in self.empty_objects.itervalues())
errors = (photos + efam + blink + plink + slink + rel + errors = (photos + efam + blink + plink + slink + rel +
@ -2153,6 +2214,13 @@ class CheckIntegrity(object):
name_format) % {'quantity' : name_format} name_format) % {'quantity' : name_format}
) )
if replaced_sourcerefs:
self.text.write(
ngettext("%(quantity)d invalid source citation was fixed\n",
"%(quantity)d invalid source citations were fixed\n",
replaced_sourcerefs) % {'quantity' : replaced_sourcerefs}
)
if empty_objs > 0 : if empty_objs > 0 :
self.text.write(_("%(empty_obj)d empty objects removed:\n" self.text.write(_("%(empty_obj)d empty objects removed:\n"
" %(person)d person objects\n" " %(person)d person objects\n"