diff --git a/ChangeLog b/ChangeLog index beb7096d0..9418398a6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +2007-11-13 Jim Sack + * src/test/regrtest.py: undo removal of module imports in scan-for-tests + * src/GrampsDbUtils/test/_GrampsDbWRFactories_test.py: minor improve. + * src/GrampsDbUtils/test/_GedcomChar_test.py: add tests for utf8 + The regrtest had a unwise "optimization" that actually broke the + ..Factories test. A comment was left in place where the un-doing was + done because it seems like a good thing to document. + 2007-11-13 Martin Hawlisch * src/TipOfDay.py: Catch exceptions when parsing the tips file. diff --git a/src/GrampsDbUtils/test/_GedcomChar_test.py b/src/GrampsDbUtils/test/_GedcomChar_test.py index abf27dc00..e7f4d88a2 100644 --- a/src/GrampsDbUtils/test/_GedcomChar_test.py +++ b/src/GrampsDbUtils/test/_GedcomChar_test.py @@ -31,9 +31,12 @@ def gen_chars(filename, encoding): """write generic test chars as given file and encoding""" if not os.path.exists(filename): codecs.open(filename, "wb", encoding).write(utest_chars) - + +### class Test1_ansi(unittest.TestCase): + """Test original "ANSI" reader and codecs: latin, cp1252""" enc = "latin-1" + cp = "cp1252" fil = os.path.join(cdir,enc) exp = utest_chars @@ -50,7 +53,13 @@ class Test1_ansi(unittest.TestCase): got=codecs.open(s.fil, encoding=s.enc).read() s.assertEquals(got,s.exp, m(got,s.exp, "using codec %s" % s.enc)) + def test1c_read_codec_cp1252(s): + got=codecs.open(s.fil, encoding=s.cp).read() + s.assertEquals(got,s.exp, m(got,s.exp, "using codec %s" % s.cp)) + +### class Test2_ansel(unittest.TestCase): + """Test original AnselReader (later: ansel codec)""" enc = "ansel" afil = os.path.join(cdir,enc) exp = a2u @@ -63,7 +72,91 @@ class Test2_ansel(unittest.TestCase): ra = G.AnselReader(f) got = ra.readline() s.assertEquals(got,s.exp, m(got,s.exp, "AnselReader")) - + +### +class Test3(unittest.TestCase): + """Test otriginal UTF8Reader and codecs: utf_8, utf_8_sig + with no 'BOM' (sig) in input (the common case) + + out of curiosity, verify behavior reading a 1-byte file + """ + enc = "utf_8" + enc_sig = enc + "_sig" + ufil = os.path.join(cdir, "chars.utf8") + f1byte = os.path.join(cdir, "1byte") + exp = utest_chars + + def setUp(s): + gen_chars(s.ufil, s.enc) + if not os.path.exists(s.f1byte): + open(s.f1byte, "wb").write("1") + + def test3a_u8_UTF8Reader_NO_BOM_sig(s): + f=open(s.ufil) + ra=G.UTF8Reader(f) + g = ra.readline() + s.assertEquals(g,s.exp, m(g,s.exp, "orig UTF8Reader")) + r2 = G.UTF8Reader(open(s.f1byte)) + g = r2.readline() + s.assertEquals(g,"1", + m(g,"1", "read 1-byte file")) + + # NB: utf_8 reads data and never expects a BOM-sig + def test3b_utf8_codec_NO_BOM_sig_as_expected(s): + g=codecs.open(s.ufil, encoding=s.enc).read() + s.assertEquals(g,s.exp, m(g,s.exp, "codec utf8")) + g=codecs.open(s.f1byte, encoding=s.enc).read() + s.assertEquals(g,"1", m(g,"1", "codec utf8")) + + # NB: utf_8_sig reads data even absent a BOM-sig (GOOD!) + def test3c_utf8_sig_codec_NO_BOM_sig_tolerated_GOOD(s): + g=codecs.open(s.ufil, encoding=s.enc_sig).read() + s.assertEquals(g,s.exp, + m(g,s.exp, "codec utf_8_sig NO sig input")) + g=codecs.open(s.f1byte, encoding=s.enc_sig).read() + s.assertEquals(g,"1", + m(g,"1", "codec utf_8_sig NO sig input")) + +### +class Test4(unittest.TestCase): + """Test otriginal UTF8Reader and codecs: utf_8, utf_8_sig + with 'BOM' (sig) in input (uncommon, [some?] MS Windows only?) + """ + enc = "utf_8" + enc_sig = enc + "_sig" + ufil = os.path.join(cdir, "chars.utf8_sig") + exp = utest_chars + + def setUp(s): + gen_chars(s.ufil, s.enc_sig) + + def test4a_u8_UTF8Reader_WITH_BOM_sig(s): + f=open(s.ufil) + ra=G.UTF8Reader(f) + g = ra.readline() + s.assertEquals(g,s.exp, m(g,s.exp, "orig UTF8Reader")) + + # utf_8 reads an initial BOM-sig as data -- oops, pity + # write the test to verify this known codec behavior + # ==> Recommend: do not use utf8 as input codec (use utf_8_sig) + def test4b_utf8_codec_WITH_BOM_sig_reads_as_data_PITY(s): + g=codecs.open(s.ufil, encoding=s.enc).read() + e0=u'\ufeff' + s.assertEquals(g[0], e0, + m(g[0],e0, "codec utf8 reads 'BOM'-sig as data" )) + g = g[1:] + s.assertEquals(g,s.exp, + m(g,s.exp, "codec utf8 reads rest of data ok")) + + # utf_8_sig reads and ignores the BOM-sig + def test4c_utf8_sig_codec_WITH_BOM_sig_as_expected(s): + g=codecs.open(s.ufil, encoding=s.enc_sig).read() + s.assertEquals(g,s.exp, + m(g,s.exp, "codec utf_8_sig NO sig input")) + +### + + if __name__ == "__main__": unittest.main() diff --git a/src/GrampsDbUtils/test/_GrampsDbWRFactories_test.py b/src/GrampsDbUtils/test/_GrampsDbWRFactories_test.py index 9a09806af..14fe79a07 100644 --- a/src/GrampsDbUtils/test/_GrampsDbWRFactories_test.py +++ b/src/GrampsDbUtils/test/_GrampsDbWRFactories_test.py @@ -2,7 +2,7 @@ import unittest from test import test_util as tu -tu.path_append_parent() +par = tu.path_append_parent() ### class Test1(unittest.TestCase): @@ -13,6 +13,9 @@ class Test1(unittest.TestCase): """ def test1a_buried_imports(s): + import sys + s.assertTrue(par in sys.path, + "par %r has to be in path!" % par) ilist = ( "_WriteGrdb", "_WriteXML", diff --git a/src/test/regrtest.py b/src/test/regrtest.py index f78335965..34cb12f54 100755 --- a/src/test/regrtest.py +++ b/src/test/regrtest.py @@ -80,7 +80,8 @@ def getTestSuites(loc=gramps_root): paths = [(path,files) for path,dirs,files in os.walk(loc) \ if test_mod(path,dirs) and match_mod(files)] - oldpath = list(sys.path) + ## NO -- see explanation below + ## oldpath = list(sys.path) for (dir,test_modules) in paths: sys.path.append(dir) @@ -96,8 +97,16 @@ def getTestSuites(loc=gramps_root): perf_suites.append(mod.perfSuite()) except: pass - # remove temporary paths added - sys.path = list(oldpath) + # NO: was: remove temporary paths added + # this seems like it should be reasonable, + # but it causes failure in _GrampsDbWRFactories_test.py + # (I suspect it is an actual bug in the runner + # but the easiest fix is to keep the imports, + # which is what other loaders seem to do) + # ==> this aspect of test frameworks is *hard* + ## NO -- do NOT: + ## remove temporary paths added + ## sys.path = list(oldpath) return (test_suites,perf_suites)