6194: Database corrupted - TypeError: unhashable type: 'list'

Fixed upgrade to deal with sourceref in media references in Sources.
Also fixed Check and Repair to repair previously broken databases.
This commit is contained in:
kulath 2014-03-04 17:39:16 +00:00
parent f3d993be38
commit 427ee2d0fd
2 changed files with 145 additions and 37 deletions

View File

@ -28,7 +28,7 @@ from ..lib.markertype import MarkerType
from ..lib.tag import Tag
import time
import logging
LOG = logging.getLogger(".citation")
LOG = logging.getLogger(".upgrade")
from ..const import GRAMPS_LOCALE as glocale
_ = glocale.translation.gettext
@ -46,7 +46,7 @@ from . import BSDDBTxn
from ..lib.nameorigintype import NameOriginType
from .write import _mkname, SURNAMES
from .dbconst import (PERSON_KEY, FAMILY_KEY, EVENT_KEY,
MEDIA_KEY, PLACE_KEY, REPOSITORY_KEY)
MEDIA_KEY, PLACE_KEY, REPOSITORY_KEY, SOURCE_KEY)
from gramps.gui.dialog import (InfoDialog)
def gramps_upgrade_16(self):
@ -61,26 +61,30 @@ def gramps_upgrade_16(self):
(4) add backlinks for references from object to Citations
(5) add backlinks for references from Citation to Source
the backlinks are all updated at the end by calling
reindex_reference_map
the backlinks are all updated on return to write.py gramps_upgrade by
calling reindex_reference_map
"""
length = (len(self.note_map) + len(self.person_map) +
# Only People, Families, Events, Media Objects, Places, Sources and
# Repositories need to be updated, because these are the only primary
# objects that can have source citations.
length = (len(self.person_map) +
len(self.event_map) + len(self.family_map) +
len(self.repository_map) + len(self.media_map) +
len(self.place_map) + len(self.source_map)) + 10
len(self.place_map) + len(self.source_map))
self.set_total(length)
# Setup data for upgrade statistics information dialogue
keyorder = [PERSON_KEY, FAMILY_KEY, EVENT_KEY, MEDIA_KEY,
PLACE_KEY, REPOSITORY_KEY]
PLACE_KEY, REPOSITORY_KEY, SOURCE_KEY]
key2data = {
PERSON_KEY : 0,
FAMILY_KEY : 1,
EVENT_KEY: 2,
MEDIA_KEY: 3,
PLACE_KEY: 4,
REPOSITORY_KEY: 5,
REPOSITORY_KEY: 5,
SOURCE_KEY : 6,
}
key2string = {
PERSON_KEY : _('%6d People upgraded with %6d citations in %6d secs\n'),
@ -89,8 +93,9 @@ def gramps_upgrade_16(self):
MEDIA_KEY : _('%6d Media Objects upgraded with %6d citations in %6d secs\n'),
PLACE_KEY : _('%6d Places upgraded with %6d citations in %6d secs\n'),
REPOSITORY_KEY : _('%6d Repositories upgraded with %6d citations in %6d secs\n'),
SOURCE_KEY : _('%6d Sources upgraded with %6d citations in %6d secs\n'),
}
data_upgradeobject = [0] * 6
data_upgradeobject = [0] * 7
# Initialise the citation gramps ID number
self.cmap_index = 0
@ -102,6 +107,15 @@ def gramps_upgrade_16(self):
start_time = time.time()
for person_handle in self.person_map.keys():
person = self.person_map[person_handle]
try:
# The parameters are evaluated before deciding whether logging is on
# or not. Since the retrieval of names is so complex, I think it is
# safer to protect this with a try except block, even though it
# seems to work for names being present and not.
LOG.debug("upgrade person %s %s" % (person[3][4],
" ".join([name[0] for name in person[3][5]])))
except:
pass
(handle, gramps_id, gender, primary_name, alternate_names,
death_ref_index, birth_ref_index, event_ref_list, family_list,
parent_family_list, media_list, address_list, attribute_list,
@ -144,6 +158,7 @@ def gramps_upgrade_16(self):
attribute_list, urls, lds_seal_list,
new_citation_list, note_list, change, tag_list,
private, person_ref_list)
LOG.debug(" upgrade new_person %s" % [new_person])
with BSDDBTxn(self.env, self.person_map) as txn:
if isinstance(handle, UNITYPE):
handle = handle.encode('utf-8')
@ -165,7 +180,7 @@ def gramps_upgrade_16(self):
start_time = time.time()
for media_handle in self.media_map.keys():
media = self.media_map[media_handle]
LOG.debug("upgrade media %s" % media[4])
LOG.debug("upgrade media object %s" % media[4])
(handle, gramps_id, path, mime, desc,
attribute_list, source_list, note_list, change,
date, tag_list, private) = media
@ -182,12 +197,11 @@ def gramps_upgrade_16(self):
if isinstance(handle, UNITYPE):
handle = handle.encode('utf-8')
txn.put(handle, new_media)
LOG.debug(" update ref map media %s" % [handle,
self.get_object_from_handle(handle) ])
self.update()
LOG.debug("Media upgrade %d citations upgraded in %d seconds" %
(self.cmap_index - start_num_citations,
LOG.debug("%d media objects upgraded with %d citations in %d seconds" %
(len(self.media_map.keys()),
self.cmap_index - start_num_citations,
int(time.time() - start_time)))
data_upgradeobject[key2data[MEDIA_KEY]] = (len(list(self.media_map.keys())),
self.cmap_index - start_num_citations,
@ -200,6 +214,7 @@ def gramps_upgrade_16(self):
start_time = time.time()
for place_handle in self.place_map.keys():
place = self.place_map[place_handle]
LOG.debug("upgrade place %s" % place[2])
(handle, gramps_id, title, longi, lat,
main_loc, alt_loc, urls, media_list, source_list, note_list,
change, private) = place
@ -216,6 +231,7 @@ def gramps_upgrade_16(self):
longi, lat, main_loc, alt_loc, urls,
media_list, new_citation_list, note_list,
change, private)
LOG.debug(" upgrade new_place %s" % [new_place])
with BSDDBTxn(self.env, self.place_map) as txn:
if isinstance(handle, UNITYPE):
handle = handle.encode('utf-8')
@ -237,6 +253,7 @@ def gramps_upgrade_16(self):
start_time = time.time()
for family_handle in self.family_map.keys():
family = self.family_map[family_handle]
LOG.debug("upgrade family (gramps_id) %s" % family[1])
(handle, gramps_id, father_handle, mother_handle,
child_ref_list, the_type, event_ref_list, media_list,
attribute_list, lds_seal_list, source_list, note_list,
@ -266,13 +283,12 @@ def gramps_upgrade_16(self):
child_ref_list, the_type, event_ref_list, media_list,
attribute_list, lds_seal_list, new_citation_list,
note_list, change, tag_list, private)
LOG.debug(" upgrade new_family %s" % [new_family])
with BSDDBTxn(self.env, self.family_map) as txn:
if isinstance(handle, UNITYPE):
handle = handle.encode('utf-8')
txn.put(handle, new_family)
txn.put(str(handle), new_family)
self.update()
LOG.debug("%d familys upgraded with %d citations in %d seconds. " %
LOG.debug("%d families upgraded with %d citations in %d seconds. " %
(len(list(self.family_map.keys())),
self.cmap_index - start_num_citations,
time.time() - start_time))
@ -282,13 +298,11 @@ def gramps_upgrade_16(self):
# ---------------------------------
# Modify Events
# ---------------------------------
upgrade_time = 0
backlink_time = 0
start_num_citations = self.cmap_index
start_time = time.time()
for event_handle in self.event_map.keys():
t1 = time.time()
event = self.event_map[event_handle]
LOG.debug("upgrade event %s" % event[4])
(handle, gramps_id, the_type, date, description, place,
source_list, note_list, media_list, attribute_list,
change, private) = event
@ -308,21 +322,17 @@ def gramps_upgrade_16(self):
new_citation_list, note_list, media_list,
attribute_list,
change, private)
LOG.debug(" upgrade new_event %s" % [new_event])
with BSDDBTxn(self.env, self.event_map) as txn:
if isinstance(handle, UNITYPE):
handle = handle.encode('utf-8')
txn.put(handle, new_event)
t2 = time.time()
upgrade_time += t2 - t1
t3 = time.time()
backlink_time += t3 - t2
self.update()
LOG.debug("%d events upgraded with %d citations in %d seconds. "
"Backlinks took %d seconds" %
(len(list(self.event_map.keys())),
LOG.debug("%d events upgraded with %d citations in %d seconds. " %
(len(self.event_map.keys()),
self.cmap_index - start_num_citations,
int(upgrade_time), int(backlink_time)))
time.time() - start_time))
data_upgradeobject[key2data[EVENT_KEY]] = (len(list(self.event_map.keys())),
self.cmap_index - start_num_citations,
time.time() - start_time)
@ -334,6 +344,7 @@ def gramps_upgrade_16(self):
start_time = time.time()
for repository_handle in self.repository_map.keys():
repository = self.repository_map[repository_handle]
LOG.debug("upgrade repository %s" % repository[3])
(handle, gramps_id, the_type, name, note_list,
address_list, urls, change, private) = repository
if address_list:
@ -342,19 +353,54 @@ def gramps_upgrade_16(self):
if address_list:
new_repository = (handle, gramps_id, the_type, name, note_list,
address_list, urls, change, private)
LOG.debug(" upgrade new_repository %s" % [new_repository])
with BSDDBTxn(self.env, self.repository_map) as txn:
if isinstance(handle, UNITYPE):
handle = handle.encode('utf-8')
txn.put(handle, new_repository)
self.update()
LOG.debug("%d repositorys upgraded with %d citations in %d seconds. " %
LOG.debug("%d repositories upgraded with %d citations in %d seconds. " %
(len(list(self.repository_map.keys())),
self.cmap_index - start_num_citations,
time.time() - start_time))
data_upgradeobject[key2data[REPOSITORY_KEY]] = (len(list(self.repository_map.keys())),
self.cmap_index - start_num_citations,
time.time() - start_time)
# ---------------------------------
# Modify Source
# ---------------------------------
start_num_citations = self.cmap_index
start_time = time.time()
for source_handle in self.source_map.keys():
source = self.source_map[source_handle]
LOG.debug("upgrade source %s" % source[2])
(handle, gramps_id, title, author,
pubinfo, note_list, media_list,
abbrev, change, datamap, reporef_list,
private) = source
if media_list:
media_list = upgrade_media_list_16(
self, media_list)
new_source = (handle, gramps_id, title, author,
pubinfo, note_list, media_list,
abbrev, change, datamap, reporef_list,
private)
LOG.debug(" upgrade new_source %s" % [new_source])
with BSDDBTxn(self.env, self.source_map) as txn:
txn.put(str(handle), new_source)
self.update()
LOG.debug("%d sources upgraded with %d citations in %d seconds" %
(len(self.source_map.keys()),
self.cmap_index - start_num_citations,
int(time.time() - start_time)))
data_upgradeobject[key2data[SOURCE_KEY]] = (len(self.source_map.keys()),
self.cmap_index - start_num_citations,
time.time() - start_time)
# ---------------------------------
@ -552,12 +598,6 @@ def convert_source_list_to_citation_list_16(self, source_list):
new_handle = new_handle.encode('utf-8')
txn.put(new_handle, new_citation)
self.cmap_index += 1
# # add backlinks for references from Citation to Source
# with BSDDBTxn(self.env) as txn:
# self.update_reference_map(
# self.get_citation_from_handle(new_handle),
# transaction,
# txn.txn)
citation_list.append((new_handle))
return citation_list

View File

@ -201,6 +201,7 @@ class Check(tool.BatchTool):
checker.check_repo_references()
checker.check_note_references()
checker.check_tag_references()
checker.check_media_sourceref()
self.db.enable_signals()
self.db.request_rebuild()
@ -241,6 +242,7 @@ class CheckIntegrity(object):
self.invalid_dates = []
self.removed_name_format = []
self.empty_objects = defaultdict(list)
self.replaced_sourceref = []
self.last_img_dir = config.get('behavior.addmedia-image-dir')
self.progress = ProgressMeter(_('Checking Database'),'')
self.explanation = Note(_('Objects referenced by this note '
@ -1876,6 +1878,64 @@ class CheckIntegrity(object):
if len(self.invalid_tag_references) == 0:
logging.info(' OK: no tag reference problems found')
def check_media_sourceref(self):
"""
This repairs a problem with database upgrade from database schema
version 15 to 16. Mediarefs on source primary objects can contain
sourcerefs, and these were not converted to citations.
"""
total = (
self.db.get_number_of_sources()
)
self.progress.set_pass(_('Looking for media source reference problems'),
total)
logging.info('Looking for media source reference problems')
for handle in self.db.source_map.keys():
self.progress.step()
info = self.db.source_map[handle]
source = gen.lib.Source()
source.unserialize(info)
new_media_ref_list = []
for media_ref in source.get_media_list():
citation_list = media_ref.get_citation_list()
new_citation_list = []
for citation_handle in citation_list:
# Either citation_handle is a handle, in which case it has
# been converted, or it is a 6-tuple, in which case it now
# needs to be converted.
if len(citation_handle) == 6:
if len(citation_handle) == 6:
sourceref = citation_handle
else:
sourceref = eval(citation_handle)
new_citation = gen.lib.Citation()
new_citation.set_date_object(sourceref[0])
new_citation.set_privacy(sourceref[1])
new_citation.set_note_list(sourceref[2])
new_citation.set_confidence_level(sourceref[3])
new_citation.set_reference_handle(sourceref[4])
new_citation.set_page(sourceref[5])
citation_handle = Utils.create_id()
new_citation.set_handle(citation_handle)
self.replaced_sourceref.append(handle)
logging.warning(' FAIL: the source "%s" has a media '
'reference with a source citation '
'which is invalid' % (source.gramps_id))
self.db.add_citation(new_citation, self.trans)
new_citation_list.append(citation_handle)
media_ref.set_citation_list(new_citation_list)
new_media_ref_list.append(media_ref)
source.set_media_list(new_media_ref_list)
self.db.commit_source(source, self.trans)
if len(self.replaced_sourceref) > 0:
logging.info(' OK: no broken source citations on mediarefs found')
def class_person(self, handle):
person = Person()
person.set_handle(handle)
@ -1982,7 +2042,8 @@ class CheckIntegrity(object):
note_references = len(self.invalid_note_references)
tag_references = len(self.invalid_tag_references)
name_format = len(self.removed_name_format)
empty_objs = sum(len(obj) for obj in self.empty_objects.values())
replaced_sourcerefs = len(self.replaced_sourceref)
empty_objs = sum(len(obj) for obj in self.empty_objects.itervalues())
errors = (photos + efam + blink + plink + slink + rel +
event_invalid + person +
@ -2216,6 +2277,13 @@ class CheckIntegrity(object):
name_format) % {'quantity' : name_format}
)
if replaced_sourcerefs:
self.text.write(
ngettext("%(quantity)d invalid source citation was fixed\n",
"%(quantity)d invalid source citations were fixed\n",
replaced_sourcerefs) % {'quantity' : replaced_sourcerefs}
)
if empty_objs > 0 :
self.text.write(_("%(empty_obj)d empty objects removed:\n"
" %(person)d person objects\n"