7212: convert invalid dates to text on import

Back-port from gramps40 [cca72aa] and [befcd21]

Unit test run as follows (4/4 passed):
	LANG=en_GB.utf8 python plugins/import/test/ImportXml_test.py -v
This commit is contained in:
Vassilii Khachaturov 2013-11-23 21:23:01 +02:00
parent f527a82380
commit 36d91fc68b
2 changed files with 230 additions and 6 deletions

View File

@ -33,6 +33,7 @@ import os
import sys
import time
from xml.parsers.expat import ExpatError, ParserCreate
from xml.sax.saxutils import escape
from gen.ggettext import gettext as _
import re
import logging
@ -2240,10 +2241,17 @@ class GrampsParser(UpdateCallback):
else:
newyear = gen.lib.Date.newyear_to_code(newyear)
date_value.set(qual, mode, cal,
(day, month, year, dualdated,
rng_day, rng_month, rng_year, dualdated),
newyear=newyear)
try:
date_value.set(qual, mode, cal,
(day, month, year, dualdated,
rng_day, rng_month, rng_year, dualdated),
newyear=newyear)
except gen.lib.DateError as e:
self._set_date_to_xml_text(date_value, e,
xml_element_name = ("datespan" if
mode == gen.lib.Date.MOD_SPAN
else "daterange"),
xml_attrs = attrs)
def start_dateval(self, attrs):
if self.citation:
@ -2321,8 +2329,27 @@ class GrampsParser(UpdateCallback):
else:
newyear = gen.lib.Date.newyear_to_code(newyear)
date_value.set(qual, mod, cal, (day, month, year, dualdated),
newyear=newyear)
try:
date_value.set(qual, mod, cal, (day, month, year, dualdated),
newyear=newyear)
except gen.lib.DateError as e:
self._set_date_to_xml_text(date_value, e, 'dateval', attrs)
def _set_date_to_xml_text(self, date_value, date_error, xml_element_name, xml_attrs):
"""
Common handling of invalid dates for the date... element handlers.
Prints warning on console and sets date_value to a text-only date
with the problematic XML inside.
"""
xml = "<{element_name} {attrs}/>".format(
element_name = xml_element_name,
attrs = " ".join(
['{}="{}"'.format(k,escape(v, entities={'"' : "&quot;"}))
for k,v in xml_attrs.iteritems()]))
LOG.warning(_("Invalid date {} in XML {}, preserving XML as text"
).format(date_error.date.dateval, xml))
date_value.set(modifier=gen.lib.Date.MOD_TEXTONLY, text=xml)
def start_datestr(self, attrs):
if self.citation:

View File

@ -0,0 +1,197 @@
# Gramps - a GTK+/GNOME based genealogy program
#
# Copyright (C) 2011 Michiel D. Nauta
# Copyright (C) 2013 Vassilii Khachaturov
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
"""
Test XML import.
"""
from __future__ import print_function, unicode_literals
import unittest
import time
import sys
import os
sys.path.append(os.curdir)
sys.path.append(os.path.join(os.curdir, 'plugins', 'lib'))
import subprocess
import libxml2
import libxslt
from libgrampsxml import GRAMPS_XML_VERSION
from const import ROOT_DIR, VERSION, USER_PLUGINS
HAS_EXPORTRAW = os.path.isdir(os.path.join(USER_PLUGINS, 'ExportRaw'))
class CopiedDoc(object):
"""Context manager that creates a deep copy of an libxml-xml document."""
def __init__(self, xmldoc):
self.xmldoc = xmldoc
self.copy = libxml2.readDoc(str(self.xmldoc), '', None,
libxml2.XML_PARSE_NONET)
def __enter__(self):
return self.copy
def __exit__(self, exc_type, exc_val, exc_tb):
self.copy.freeDoc()
return False
class XpathContext(object):
"""Context manager that creates a libxml2 xpath context that allows
evaluation of xpath expressions."""
def __init__(self, xmldoc):
self.xmldoc = xmldoc
self.ctxt = self.xmldoc.xpathNewContext()
self.ctxt.xpathRegisterNs('g', 'http://gramps-project.org/xml/%s/' %
GRAMPS_XML_VERSION)
def __enter__(self):
return self.ctxt
def __exit__(self, exc_type, exc_val, exc_tb):
self.ctxt.xpathFreeContext()
return False
@unittest.skipUnless(HAS_EXPORTRAW,
'These tests need the 3rd-party plugin "ExportRaw".')
class BaseImportTest(unittest.TestCase):
def base_setup(self):
"""Set up code needed by all tests."""
date = time.localtime(time.time())
libxml2.keepBlanksDefault(0)
styledoc = libxml2.parseFile(os.path.join(ROOT_DIR,
"../data/gramps_canonicalize.xsl"))
self.style = libxslt.parseStylesheetDoc(styledoc)
self.basedoc = None
self.base_str = """<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE database PUBLIC "-//GRAMPS//DTD GRAMPS XML %s//EN"
"http://gramps-project.org/xml/%s/grampsxml.dtd">
<database xmlns="http://gramps-project.org/xml/%s/">
<header>
<created date="%04d-%02d-%02d" version="%s"/>
<researcher>\n </researcher>
</header>
""" % (GRAMPS_XML_VERSION, GRAMPS_XML_VERSION, GRAMPS_XML_VERSION,
date[0], date[1], date[2], VERSION)
def tearDown(self):
self.style.freeStylesheet()
self.basedoc.freeDoc()
def canonicalize(self, doctxt):
"""
Return a canonicalized string representation
:param doctxt: the text to bring in canonical form.
:type doctxt: either a string or an Xml document.
:returns: The text but in canonical form.
:rtype: string
"""
result = ''
if isinstance(doctxt, basestring):
doc = libxml2.readDoc(doctxt, '', None, libxml2.XML_PARSE_NONET)
elif isinstance(doctxt, libxml2.xmlDoc):
doc = doctxt
else:
raise TypeError
param = {}
canonical_doc = self.style.applyStylesheet(doc, param)
result = self.style.saveResultToString(canonical_doc)
canonical_doc.freeDoc()
if isinstance(doctxt, basestring):
doc.freeDoc()
return result
def do_test(self, input_doc, expect_doc,
test_error_str='', debug=False):
"""Do the import and "assert" the result."""
process = subprocess.Popen('python gramps.py -d .Date -d .ImportXML '
'--config=preferences.eprefix:DEFAULT '
'-i - -f gramps '
'-e - -f gramps',
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE, shell=True)
result_str, err_str = process.communicate(str(input_doc))
if err_str:
if test_error_str:
self.assertIn(test_error_str, err_str)
return
else:
if "Traceback (most recent call last):" in err_str:
raise Exception(err_str)
if debug:
print('err_str:', err_str)
print('input :', self.canonicalize(input_doc))
print('result:', self.canonicalize(result_str))
print('expect:', self.canonicalize(expect_doc))
self.assertEqual(self.canonicalize(result_str),
self.canonicalize(expect_doc))
class DateTest(BaseImportTest):
def setUp(self):
self.base_setup()
self.events_str = """
<events>
<event handle="_e0000" id="E0000">
<type>Birth</type>
{datexml}
<description>Event 0</description>
</event>
</events>
</database>"""
self.datexml_src = self.datexml_trg = None
def tearDown(self):
self.basedoc = libxml2.readDoc(
self.base_str + self.events_str.format(datexml=self.datexml_src),
'', None, libxml2.XML_PARSE_NONET)
expect = libxml2.readDoc(
self.base_str + self.events_str.format(datexml=self.datexml_trg),
'', None, libxml2.XML_PARSE_NONET)
try:
self.do_test(self.basedoc, expect)
except:
raise
finally:
expect.freeDoc()
def test_correct_dateval_passed_verbatim(self):
self.datexml_trg = self.datexml_src = \
'<dateval val="1787-05-20"/>'
def test_correct_daterange_passed_verbatim(self):
self.datexml_trg = self.datexml_src = \
'<daterange start="1746" stop="1755"/>'
def test_dateval_long_Feb_converted_to_datestr(self):
self.datexml_src = '<dateval val="1787-02-30"/>'
self.datexml_trg = '<datestr val="&lt;dateval val=&quot;1787-02-30&quot;/&gt;"/>'
def test_datespan_long_Feb_converted_to_datestr(self):
self.datexml_src = '<datespan start="1746-02-30" stop="2000"/>'
self.datexml_trg = '<datestr val="&lt;datespan start=&quot;1746-02-30&quot; stop=&quot;2000&quot;/&gt;"/>'
if __name__ == "__main__":
import sys
if not HAS_EXPORTRAW:
print('This program needs the third party "ExportRaw" plugin.', file=sys.stderr)
sys.exit(1)
unittest.main()