From 36d91fc68bc777f13447df9573169e2ad2a338fa Mon Sep 17 00:00:00 2001 From: Vassilii Khachaturov Date: Sat, 23 Nov 2013 21:23:01 +0200 Subject: [PATCH] 7212: convert invalid dates to text on import Back-port from gramps40 [cca72aa] and [befcd21] Unit test run as follows (4/4 passed): LANG=en_GB.utf8 python plugins/import/test/ImportXml_test.py -v --- src/plugins/import/ImportXml.py | 39 ++++- src/plugins/import/test/ImportXml_test.py | 197 ++++++++++++++++++++++ 2 files changed, 230 insertions(+), 6 deletions(-) create mode 100644 src/plugins/import/test/ImportXml_test.py diff --git a/src/plugins/import/ImportXml.py b/src/plugins/import/ImportXml.py index 9833035ee..29eddabb4 100644 --- a/src/plugins/import/ImportXml.py +++ b/src/plugins/import/ImportXml.py @@ -33,6 +33,7 @@ import os import sys import time from xml.parsers.expat import ExpatError, ParserCreate +from xml.sax.saxutils import escape from gen.ggettext import gettext as _ import re import logging @@ -2240,10 +2241,17 @@ class GrampsParser(UpdateCallback): else: newyear = gen.lib.Date.newyear_to_code(newyear) - date_value.set(qual, mode, cal, - (day, month, year, dualdated, - rng_day, rng_month, rng_year, dualdated), - newyear=newyear) + try: + date_value.set(qual, mode, cal, + (day, month, year, dualdated, + rng_day, rng_month, rng_year, dualdated), + newyear=newyear) + except gen.lib.DateError as e: + self._set_date_to_xml_text(date_value, e, + xml_element_name = ("datespan" if + mode == gen.lib.Date.MOD_SPAN + else "daterange"), + xml_attrs = attrs) def start_dateval(self, attrs): if self.citation: @@ -2321,8 +2329,27 @@ class GrampsParser(UpdateCallback): else: newyear = gen.lib.Date.newyear_to_code(newyear) - date_value.set(qual, mod, cal, (day, month, year, dualdated), - newyear=newyear) + try: + date_value.set(qual, mod, cal, (day, month, year, dualdated), + newyear=newyear) + except gen.lib.DateError as e: + self._set_date_to_xml_text(date_value, e, 'dateval', attrs) + + def _set_date_to_xml_text(self, date_value, date_error, xml_element_name, xml_attrs): + """ + Common handling of invalid dates for the date... element handlers. + + Prints warning on console and sets date_value to a text-only date + with the problematic XML inside. + """ + xml = "<{element_name} {attrs}/>".format( + element_name = xml_element_name, + attrs = " ".join( + ['{}="{}"'.format(k,escape(v, entities={'"' : """})) + for k,v in xml_attrs.iteritems()])) + LOG.warning(_("Invalid date {} in XML {}, preserving XML as text" + ).format(date_error.date.dateval, xml)) + date_value.set(modifier=gen.lib.Date.MOD_TEXTONLY, text=xml) def start_datestr(self, attrs): if self.citation: diff --git a/src/plugins/import/test/ImportXml_test.py b/src/plugins/import/test/ImportXml_test.py new file mode 100644 index 000000000..ded9c2455 --- /dev/null +++ b/src/plugins/import/test/ImportXml_test.py @@ -0,0 +1,197 @@ +# Gramps - a GTK+/GNOME based genealogy program +# +# Copyright (C) 2011 Michiel D. Nauta +# Copyright (C) 2013 Vassilii Khachaturov +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# + +""" +Test XML import. +""" +from __future__ import print_function, unicode_literals + +import unittest +import time +import sys +import os +sys.path.append(os.curdir) +sys.path.append(os.path.join(os.curdir, 'plugins', 'lib')) +import subprocess +import libxml2 +import libxslt + +from libgrampsxml import GRAMPS_XML_VERSION +from const import ROOT_DIR, VERSION, USER_PLUGINS + +HAS_EXPORTRAW = os.path.isdir(os.path.join(USER_PLUGINS, 'ExportRaw')) + +class CopiedDoc(object): + """Context manager that creates a deep copy of an libxml-xml document.""" + def __init__(self, xmldoc): + self.xmldoc = xmldoc + self.copy = libxml2.readDoc(str(self.xmldoc), '', None, + libxml2.XML_PARSE_NONET) + + def __enter__(self): + return self.copy + + def __exit__(self, exc_type, exc_val, exc_tb): + self.copy.freeDoc() + return False + +class XpathContext(object): + """Context manager that creates a libxml2 xpath context that allows + evaluation of xpath expressions.""" + def __init__(self, xmldoc): + self.xmldoc = xmldoc + self.ctxt = self.xmldoc.xpathNewContext() + self.ctxt.xpathRegisterNs('g', 'http://gramps-project.org/xml/%s/' % + GRAMPS_XML_VERSION) + + def __enter__(self): + return self.ctxt + + def __exit__(self, exc_type, exc_val, exc_tb): + self.ctxt.xpathFreeContext() + return False + +@unittest.skipUnless(HAS_EXPORTRAW, + 'These tests need the 3rd-party plugin "ExportRaw".') +class BaseImportTest(unittest.TestCase): + def base_setup(self): + """Set up code needed by all tests.""" + date = time.localtime(time.time()) + libxml2.keepBlanksDefault(0) + styledoc = libxml2.parseFile(os.path.join(ROOT_DIR, + "../data/gramps_canonicalize.xsl")) + self.style = libxslt.parseStylesheetDoc(styledoc) + self.basedoc = None + self.base_str = """ + + +
+ + \n +
+ """ % (GRAMPS_XML_VERSION, GRAMPS_XML_VERSION, GRAMPS_XML_VERSION, + date[0], date[1], date[2], VERSION) + + def tearDown(self): + self.style.freeStylesheet() + self.basedoc.freeDoc() + + def canonicalize(self, doctxt): + """ + Return a canonicalized string representation + + :param doctxt: the text to bring in canonical form. + :type doctxt: either a string or an Xml document. + :returns: The text but in canonical form. + :rtype: string + """ + result = '' + if isinstance(doctxt, basestring): + doc = libxml2.readDoc(doctxt, '', None, libxml2.XML_PARSE_NONET) + elif isinstance(doctxt, libxml2.xmlDoc): + doc = doctxt + else: + raise TypeError + param = {} + canonical_doc = self.style.applyStylesheet(doc, param) + result = self.style.saveResultToString(canonical_doc) + canonical_doc.freeDoc() + if isinstance(doctxt, basestring): + doc.freeDoc() + return result + + def do_test(self, input_doc, expect_doc, + test_error_str='', debug=False): + """Do the import and "assert" the result.""" + process = subprocess.Popen('python gramps.py -d .Date -d .ImportXML ' + '--config=preferences.eprefix:DEFAULT ' + '-i - -f gramps ' + '-e - -f gramps', + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, shell=True) + result_str, err_str = process.communicate(str(input_doc)) + if err_str: + if test_error_str: + self.assertIn(test_error_str, err_str) + return + else: + if "Traceback (most recent call last):" in err_str: + raise Exception(err_str) + if debug: + print('err_str:', err_str) + print('input :', self.canonicalize(input_doc)) + print('result:', self.canonicalize(result_str)) + print('expect:', self.canonicalize(expect_doc)) + self.assertEqual(self.canonicalize(result_str), + self.canonicalize(expect_doc)) + +class DateTest(BaseImportTest): + def setUp(self): + self.base_setup() + self.events_str = """ + + + Birth + {datexml} + Event 0 + + +
""" + self.datexml_src = self.datexml_trg = None + + def tearDown(self): + self.basedoc = libxml2.readDoc( + self.base_str + self.events_str.format(datexml=self.datexml_src), + '', None, libxml2.XML_PARSE_NONET) + expect = libxml2.readDoc( + self.base_str + self.events_str.format(datexml=self.datexml_trg), + '', None, libxml2.XML_PARSE_NONET) + try: + self.do_test(self.basedoc, expect) + except: + raise + finally: + expect.freeDoc() + + def test_correct_dateval_passed_verbatim(self): + self.datexml_trg = self.datexml_src = \ + '' + + def test_correct_daterange_passed_verbatim(self): + self.datexml_trg = self.datexml_src = \ + '' + + def test_dateval_long_Feb_converted_to_datestr(self): + self.datexml_src = '' + self.datexml_trg = '' + + def test_datespan_long_Feb_converted_to_datestr(self): + self.datexml_src = '' + self.datexml_trg = '' + + +if __name__ == "__main__": + import sys + if not HAS_EXPORTRAW: + print('This program needs the third party "ExportRaw" plugin.', file=sys.stderr) + sys.exit(1) + unittest.main()