Remove GEDCOM tests - they are broken and not used.
svn: r13914
This commit is contained in:
		| @@ -1,146 +0,0 @@ | ||||
| """ GR_test.py | ||||
|  | ||||
| This is a first try at some gedcom read testing that does not | ||||
| require running a gramps CLI | ||||
|  | ||||
| The biggest difficulty is that every test fragment needs custom | ||||
| test code. Maybe that's unavoidable, and the best that can be | ||||
| done is to group similar tests together, so that setUp can be | ||||
| shared. | ||||
|  | ||||
| Maybe more can be shared: one commonly used test recipe is | ||||
| to develop a data structure that can be looped over to test | ||||
| similar fragments with the same piece of test code, putting | ||||
| fragments and possible control or other validation information | ||||
| in the data structure. | ||||
|  | ||||
| A controlling principle for such structures is that they should be | ||||
| designed for maximum ease (and intuitiveness) of data declaration | ||||
| """ | ||||
|  | ||||
| import os.path as op | ||||
|  | ||||
| import unittest as U | ||||
| import re | ||||
|  | ||||
| from test import test_util as tu | ||||
| from test import gedread_util as gr | ||||
|  | ||||
|  | ||||
| # NoteSource_frag | ||||
| #  tests structure: NOTE > SOUR | ||||
| #  using the 2 formats of the SOUR element | ||||
| # bug #(?) does not properly ignore the SOUR | ||||
| # test by looking for warning messages resulting | ||||
| # from parse_record seeing the non-skipped SOUR  | ||||
| # | ||||
| # SB: the NOTE data should contain the SOUR or xref | ||||
| #  but this is NYI (SOUR is ignored within NOTE) | ||||
| # ----------------------------------------------- | ||||
|  | ||||
|  | ||||
| # | ||||
| # numcheck based testing | ||||
| #   verifies the number of db items via a get_number_of_X() call | ||||
| #   returns an error string or None  | ||||
| # | ||||
| # ? candidate for inclusion in gedread_util.py | ||||
| # | ||||
| class nc(): | ||||
|     """nc object -- creates a numcheck function | ||||
|      | ||||
|     instantiate a nc object as follows | ||||
|       c = nc("people", 4) | ||||
|     and call, passing the database, as follows | ||||
|       err = c(db) | ||||
|     which will check for exactly 4 people in the db | ||||
|     and return a displayable message on error, else None | ||||
|  | ||||
|     NB: name _must_ match the X names in db get_number_of_X | ||||
|     """ | ||||
|     def dbncheck(self, dbcall): | ||||
|         err = None | ||||
|         got = dbcall() | ||||
|         if not got == self.num: | ||||
|             err = "%s: got %d, expected %d" % (self.name, got, self.num) | ||||
|         return err     | ||||
|     def __init__(self, name, num): | ||||
|         self.name = name | ||||
|         self.num = num | ||||
|         self.getname = "get_number_of_" + name        | ||||
|     def __call__(self, db): | ||||
|         dbcall = getattr(db,self.getname) | ||||
|         self.dbncheck(dbcall) | ||||
|  | ||||
| class fnci(): | ||||
|     """fnci (frag-numcheckset item) is a data container for: | ||||
|     a fragment of gedcom  | ||||
|     a sequence of nc items to check | ||||
|     """ | ||||
|     def __init__(self, frag, ncset): | ||||
|         self.frag = frag | ||||
|         self.ncset = ncset | ||||
|  | ||||
| # test data table for Test1.test1a_numchecks | ||||
| fnumchecks = ( | ||||
|     fnci("""0 @N1@ NOTE Note containing embedded source | ||||
| 1 SOUR embedded source""",  | ||||
|             (nc("notes", 1),) | ||||
|         ), | ||||
|     fnci("""0 @N2@ NOTE Note containing referenced source | ||||
| 1 SOUR @SOUR1@ | ||||
| 0 @SOUR1@ SOUR  | ||||
| 1 TITL Phoney source title""", | ||||
|             (nc("notes", 1), nc("sources",1),) | ||||
|         ), | ||||
|     )#end fnumchecks | ||||
|  | ||||
|  | ||||
| # | ||||
| # ? candidate for inclusion in test_util.py | ||||
| # | ||||
| def _checklog(tlogger, pat=None): | ||||
|     # look for absence of specific messages in log | ||||
|     matches = 0 | ||||
|     ltext = tlogger.logfile_getlines() | ||||
|     if ltext: | ||||
|         if pat is None: | ||||
|             matches = len(ltext) | ||||
|         else: | ||||
|             pat = re.compile(pat) | ||||
|             for l in ltext: | ||||
|                 match = re.match(pat, l)  | ||||
|                 if match: | ||||
|                     matches += 1 | ||||
|                     # debugging | ||||
|                     print "(%d) %r" % (matches, match) | ||||
|     return matches | ||||
|          | ||||
|  | ||||
|  | ||||
| class Test1(U.TestCase): | ||||
|     def setUp(self): | ||||
|         # make a test subdir and compose some pathnames | ||||
|         self.tdir = tu.make_subdir("RG_test") | ||||
|         self.tdb = op.join(self.tdir,"test_db") | ||||
|         self.ifil = op.join(self.tdir,"test_in.ged") | ||||
|         self.lfil = op.join(self.tdir,"test.log") | ||||
|  | ||||
|     def test1a_numchecks(self): | ||||
|         tl = tu.TestLogger() | ||||
|         for i,f in enumerate(fnumchecks): | ||||
|             gr.make_gedcom_input(self.ifil, f.frag) | ||||
|             db = gr.create_empty_db(self.tdb) | ||||
|             tl.logfile_init(self.lfil) | ||||
|             gr.gread(db,self.ifil) | ||||
|             errs = _checklog(tl, r"Line \d+") | ||||
|             self.assertEquals(errs, 0, | ||||
|                 "ncset(%d): got %d unexpected log messages" % | ||||
|                 (i,errs)) | ||||
|             # ok, no log error message, check db counts | ||||
|             for call in f.ncset: | ||||
|                 err = call(db) | ||||
|                 self.assertFalse(err, err) | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|     U.main() | ||||
| @@ -1,164 +0,0 @@ | ||||
| #!/usr/bin/env python | ||||
| import unittest | ||||
| import os.path | ||||
| import codecs | ||||
| import struct | ||||
|  | ||||
| from test import test_util as tu | ||||
| m = tu.msg | ||||
|  | ||||
| par = tu.path_append_parent() | ||||
| here = tu.absdir() | ||||
|  | ||||
| import _GedcomChar as G | ||||
|  | ||||
| cdir = tu.make_subdir("test_data") | ||||
|  | ||||
| # unicode block "latin1 supplement" chars | ||||
| utest_chars = "".join(map(unichr, range(0xA0,0x100))) + "\n" | ||||
|  | ||||
| # 12 ansel test chars (raw 8-bit bytes, here) | ||||
| atest_list = range(0xa1,0xa7) + range(0xb1,0xb7) + [0x0a,] | ||||
| atest_bytes = struct.pack("B"*13, *atest_list) | ||||
|  | ||||
| # unicode mappings of above (http://www.gymel.com/charsets/ANSEL.html) | ||||
| a2u = u"".join(map(unichr, ( | ||||
|     0x141, 0xd8, 0x110, 0xde, 0xc6, 0x152, | ||||
|     0x142, 0xf8, 0x111, 0xfe, 0xe6, 0x153, | ||||
|     0x0a, ))) | ||||
|  | ||||
| def gen_chars(filename, encoding): | ||||
|     """write generic test chars as given file and encoding""" | ||||
|     if not os.path.exists(filename): | ||||
|         codecs.open(filename, "wb", encoding).write(utest_chars) | ||||
|  | ||||
| ### | ||||
| class Test1_ansi(unittest.TestCase): | ||||
|     """Test original "ANSI" reader and codecs: latin, cp1252""" | ||||
|     enc = "latin-1" | ||||
|     cp = "cp1252" | ||||
|     fil = os.path.join(cdir,enc) | ||||
|     exp  = utest_chars | ||||
|      | ||||
|     def setUp(self): | ||||
|         gen_chars(self.fil, self.enc) | ||||
|  | ||||
|     def test1a_read_ansi(self):         | ||||
|         f = open(self.fil) | ||||
|         ra= G.AnsiReader(f) | ||||
|         got = ra.readline() | ||||
|         self.assertEquals(got,self.exp, m(got,self.exp, "AnsiReader")) | ||||
|  | ||||
|     def test1b_read_codec_latin1(self): | ||||
|         got=codecs.open(self.fil, encoding=self.enc).read() | ||||
|         self.assertEquals(got,self.exp, m(got,self.exp, "using codec %s" % self.enc)) | ||||
|  | ||||
|     def test1c_read_codec_cp1252(self): | ||||
|         got=codecs.open(self.fil, encoding=self.cp).read() | ||||
|         self.assertEquals(got,self.exp, m(got,self.exp, "using codec %s" % self.cp)) | ||||
|  | ||||
| ### | ||||
| class Test2_ansel(unittest.TestCase): | ||||
|     """Test original AnselReader (later: ansel codec)""" | ||||
|     enc = "ansel" | ||||
|     afil = os.path.join(cdir,enc) | ||||
|     exp  = a2u | ||||
|      | ||||
|     def setUp(self): | ||||
|         open(self.afil, "wb").write(atest_bytes) | ||||
|  | ||||
|     def test2a_read_ansel(self): | ||||
|         f = open(self.afil) | ||||
|         ra = G.AnselReader(f) | ||||
|         got = ra.readline() | ||||
|         self.assertEquals(got,self.exp, m(got,self.exp, "AnselReader")) | ||||
|  | ||||
| ### | ||||
| class Test3(unittest.TestCase): | ||||
|     """Test otriginal UTF8Reader and codecs: utf_8, utf_8_sig  | ||||
|     with no 'BOM' (sig) in input (the common case)  | ||||
|  | ||||
|     out of curiosity, verify behavior reading a 1-byte file | ||||
|     """ | ||||
|     enc = "utf_8" | ||||
|     enc_sig = enc + "_sig" | ||||
|     ufil = os.path.join(cdir, "chars.utf8") | ||||
|     f1byte = os.path.join(cdir, "1byte") | ||||
|     exp  = utest_chars | ||||
|      | ||||
|     def setUp(self): | ||||
|         gen_chars(self.ufil, self.enc) | ||||
|         if not os.path.exists(self.f1byte): | ||||
|             open(self.f1byte, "wb").write("1") | ||||
|   | ||||
|     def test3a_u8_UTF8Reader_NO_BOM_sig(self): | ||||
|         f=open(self.ufil) | ||||
|         ra=G.UTF8Reader(f) | ||||
|         g = ra.readline() | ||||
|         self.assertEquals(g,self.exp, m(g,self.exp, "orig UTF8Reader")) | ||||
|         r2 = G.UTF8Reader(open(self.f1byte)) | ||||
|         g = r2.readline() | ||||
|         self.assertEquals(g,"1",  | ||||
|             m(g,"1", "read 1-byte file")) | ||||
|    | ||||
|     # NB: utf_8 reads data and never expects a BOM-sig | ||||
|     def test3b_utf8_codec_NO_BOM_sig_as_expected(self): | ||||
|         g=codecs.open(self.ufil, encoding=self.enc).read() | ||||
|         self.assertEquals(g,self.exp, m(g,self.exp, "codec utf8")) | ||||
|         g=codecs.open(self.f1byte, encoding=self.enc).read() | ||||
|         self.assertEquals(g,"1", m(g,"1", "codec utf8")) | ||||
|    | ||||
|     # NB: utf_8_sig reads data even absent a BOM-sig (GOOD!) | ||||
|     def test3c_utf8_sig_codec_NO_BOM_sig_tolerated_GOOD(self): | ||||
|         g=codecs.open(self.ufil, encoding=self.enc_sig).read() | ||||
|         self.assertEquals(g,self.exp,  | ||||
|             m(g,self.exp, "codec utf_8_sig NO sig input")) | ||||
|         g=codecs.open(self.f1byte, encoding=self.enc_sig).read() | ||||
|         self.assertEquals(g,"1",  | ||||
|             m(g,"1", "codec utf_8_sig NO sig input")) | ||||
|  | ||||
| ### | ||||
| class Test4(unittest.TestCase): | ||||
|     """Test otriginal UTF8Reader and codecs: utf_8, utf_8_sig | ||||
|     with 'BOM' (sig) in input (uncommon, [some?] MS Windows only?)  | ||||
|     """ | ||||
|     enc = "utf_8" | ||||
|     enc_sig = enc + "_sig" | ||||
|     ufil = os.path.join(cdir, "chars.utf8_sig") | ||||
|     exp  = utest_chars | ||||
|      | ||||
|     def setUp(self): | ||||
|         gen_chars(self.ufil, self.enc_sig) | ||||
|   | ||||
|     def test4a_u8_UTF8Reader_WITH_BOM_sig(self): | ||||
|         f=open(self.ufil) | ||||
|         ra=G.UTF8Reader(f) | ||||
|         g = ra.readline() | ||||
|         self.assertEquals(g,self.exp, m(g,self.exp, "orig UTF8Reader")) | ||||
|     | ||||
|     # utf_8 reads an initial BOM-sig as data -- oops, pity | ||||
|     #  write the test to verify this known codec behavior  | ||||
|     # ==> Recommend: do not use utf8 as input codec (use utf_8_sig) | ||||
|     def test4b_utf8_codec_WITH_BOM_sig_reads_as_data_PITY(self): | ||||
|         g=codecs.open(self.ufil, encoding=self.enc).read() | ||||
|         e0=u'\ufeff' | ||||
|         self.assertEquals(g[0], e0,  | ||||
|             m(g[0],e0, "codec utf8 reads 'BOM'-sig as data" )) | ||||
|         g = g[1:] | ||||
|         self.assertEquals(g,self.exp,  | ||||
|             m(g,self.exp, "codec utf8 reads rest of data ok")) | ||||
|    | ||||
|     # utf_8_sig reads and ignores the BOM-sig | ||||
|     def test4c_utf8_sig_codec_WITH_BOM_sig_as_expected(self): | ||||
|         g=codecs.open(self.ufil, encoding=self.enc_sig).read() | ||||
|         self.assertEquals(g,self.exp,  | ||||
|             m(g,self.exp, "codec utf_8_sig NO sig input")) | ||||
|  | ||||
| ### | ||||
|  | ||||
|  | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|     unittest.main() | ||||
|  | ||||
| #===eof=== | ||||
| @@ -1,84 +0,0 @@ | ||||
| """test for GedcomParse empty notes and skipped subordinates  | ||||
|  | ||||
| Empty notes are discarded -- with a warning | ||||
| Skipped subordinate data also produce warnings | ||||
| """ | ||||
|  | ||||
| import os | ||||
| import unittest as U | ||||
| import logging | ||||
|  | ||||
| from test import test_util as tu | ||||
| from test import gedread_util as gr | ||||
|  | ||||
|  | ||||
| class Test(U.TestCase): | ||||
|     """ this test verifies fix for bug 1851 """ | ||||
|     def setUp(self): | ||||
|         # make a dir to hold an input gedcom file | ||||
|         self.tdir = tu.make_subdir("gsn_test") | ||||
|  | ||||
|     def test1(self): | ||||
|         test_fragment=""" | ||||
| 0 @I1@ INDI | ||||
| 1 NAME Adam /TheFirst/ | ||||
| 1 DEAT | ||||
| 2 DATE EST 2 FEB 2000 | ||||
| 2 PLAC Bdorf | ||||
| 2 ADDR Haus Nr. 44 | ||||
| 2 NOTE this one should stay | ||||
| 2 NOTE  | ||||
| 3 CONT this should stay too, but | ||||
| 3 CONT next one SB skipped (empty) | ||||
| 2 NOTE | ||||
| 0 @N101@ NOTE a real note | ||||
| 0 @N102@ NOTE | ||||
| 1 CONT a real continuation-only note | ||||
| 1 CONT should skip next (N103 empty) | ||||
| 2 JUNK2 | ||||
| 3 JUNK3 | ||||
| 0 @N103@ NOTE | ||||
| 0 @N199@ NOTE | ||||
| 1 SOUR @S987@ | ||||
| """ | ||||
|         # expect warnings for discarded empty notes above | ||||
|         wNotes=3 # lines 32,39,40 [here] | ||||
|         # expect warnings for skipped subordinates (blocks) above | ||||
|         # remember consececutive lines skipped give 1 message | ||||
|         wSubs=2  # lines (37+38), 41  | ||||
|  | ||||
|         # create a gedcom input file | ||||
|         #  from canned head/tail -- see gedread_util | ||||
|         infil = os.path.join(self.tdir,"test_in.ged") | ||||
|         gr.make_gedcom_input(infil, test_fragment) | ||||
|         self.assertTrue(os.path.isfile(infil),  | ||||
|             "create input file %s" % infil) | ||||
|  | ||||
|         # create an empty database | ||||
|         dbpath = os.path.join(self.tdir,"test_db") | ||||
|         db = gr.create_empty_db(dbpath) | ||||
|         self.assertTrue(os.path.isdir(dbpath), | ||||
|             "create database (dir) %s" % dbpath) | ||||
|  | ||||
|         # create logfile to test for read log-messages | ||||
|         # (note: uses recently added test_util  | ||||
|         log = os.path.join(self.tdir, "test_log") | ||||
|         tl = tu.TestLogger() | ||||
|         tl.logfile_init(log) | ||||
|         # now read the gedcom | ||||
|         gr.gread(db, infil) | ||||
|         loglines = tl.logfile_getlines() | ||||
|         #NB incorrect SUBM handling causes one extraneous warning | ||||
|         xWarns = 1 | ||||
|         ll = len(loglines) | ||||
|         tot = xWarns+wNotes+wSubs | ||||
|         self.assertEquals(ll,tot, tu.msg(ll,tot, | ||||
|             "log has expected warning content")) | ||||
|         # verify notes that survive | ||||
|         numNotes = 4 | ||||
|         nn = db.get_number_of_notes() | ||||
|         self.assertEquals(nn,numNotes,  | ||||
|             tu.msg(nn,numNotes, "db has right number of notes")) | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|     U.main() | ||||
| @@ -1,257 +0,0 @@ | ||||
| #!/usr/bin/python -tt | ||||
|  | ||||
| # Instructions for use | ||||
| # -------------------- | ||||
| # Eventually, this code might use a testing infrastructure (conventions TBD) | ||||
| # but, at present this is intended for use as a manual operation by placing | ||||
| # this file (temporarily) in the same dir as the module it tests. | ||||
| # | ||||
| # Running  | ||||
| #   ./test-ansel_utf8.py [-v] | ||||
| # should report 'OK' | ||||
| #   the -v option shows individual results for each test function | ||||
| # --------------------------------------------------------------------------- | ||||
|  | ||||
| # TODO | ||||
| # --------------------------------------------------------- | ||||
| # make table of test cases for readability | ||||
| # ansel U+xxxx UTF8 char-name string (char where appl) | ||||
| # --------------------------------------------------------- | ||||
|  | ||||
| import ansel_utf8 as A | ||||
| import unittest | ||||
|  | ||||
| # debugging provision to capture some strings for exernal examination | ||||
| # note that this debug output is ASCII, by virture of using `` (repr) | ||||
| OUT=0 | ||||
| if OUT > 0: | ||||
|     import sys | ||||
| #  set output levels 1,2,4 (or-ing ok) for string (repr) in tests 1a,1b,2a | ||||
| #  then manipulate that data with separate tools for additional validation | ||||
| # tools refs: | ||||
| #    http://search.cpan.org/~esummers/MARC-Charset-0.98/lib/MARC/Charset.pm | ||||
| #    http://pypi.python.org/pypi/pymarc | ||||
| # ---  | ||||
| # (perl) MARC::Charset | ||||
| # (python) pymarc omits eszett,euro (patchable); only does ansel-to-utf8  | ||||
| # shell: echo -e 'utf-8 encoded chars' works well | ||||
| # ==> NB: when examining unicode characters (rather than hexdump) externally, | ||||
| # it is absolutely essential to use a good unicode terminal for correct | ||||
| # display of combining forms (other than precomposed)     | ||||
| #    (eg: use xterm rather than konsole or gnome-terminal) | ||||
| # ==> and of course, use a locale with the UTF-8 charset | ||||
|  | ||||
|  | ||||
| # test convwenience utility extends python by showing got & expected (like perl) | ||||
| #  useful at least for the commonly used assertEquals() | ||||
| # conventions: | ||||
| #  dup the expected and got parms from the assertEquals and add a message | ||||
| #  (and an optional prefix to distinguish sub-tests) | ||||
| # ==> code the assert as assertEquals(got, exp, msg(got,exp,mess,pfx)) | ||||
| def msg(got, expect, msgbase, prefix=""): | ||||
|     if prefix: | ||||
|         prefix += ": " | ||||
|     return "%s%s\n .....got %s\n expected %s" % (prefix, msgbase, `got`, `expect`) | ||||
|  | ||||
|  | ||||
| class Test1(unittest.TestCase): | ||||
|     """ test basic ansel_to_unicode and inversion """ | ||||
|  | ||||
|     def test_1a(self): | ||||
|         """ 1a: map ansel onebyte to unicode and inverse """ | ||||
|         # no combining chars here .. see later test for those | ||||
|         count = 0 | ||||
|         sans  = "" | ||||
|         suni  = u"" | ||||
|         for acode in sorted(A._onebyte.keys()): | ||||
|             count += 1 | ||||
|             sans += acode | ||||
|             suni += A._onebyte[acode]  | ||||
|         if OUT & 1:     | ||||
|             print "test1a: %d codes" % count | ||||
|             print " ansel:%s" % `sans` | ||||
|             print " utf-8:%s" % `suni.encode("utf-8")`  # U8 for debugging | ||||
|             sys.stdout.flush() | ||||
|         a2u = A.ansel_to_utf8(sans) | ||||
|         self.assertEquals(a2u,suni, msg(a2u,suni, "map onebyte ansel to unicode")) | ||||
|         u2a = A.utf8_to_ansel(suni) | ||||
|         self.assertEquals(u2a,sans, msg(u2a, sans, "invert onebyte to unicode mapping")) | ||||
|  | ||||
|     def test_1b(self): | ||||
|         """ 1b: map ansel twobyte to unicode and inverse """ | ||||
|         # these are the precomposed combining forms | ||||
|         count = 0 | ||||
|         sans  = "" | ||||
|         suni  = u"" | ||||
|         for acode in sorted(A._twobyte.keys()): | ||||
|             count += 1 | ||||
|             sans += acode | ||||
|             suni += A._twobyte[acode]  | ||||
|         if OUT & 2:     | ||||
|             print "test1b: %d codes" % count | ||||
|             print " ansel:%s" % `sans` | ||||
|             print " utf-8:%s" % `suni.encode("utf-8")` # U8 | ||||
|             sys.stdout.flush() | ||||
|         a2u = A.ansel_to_utf8(sans) | ||||
|         self.assertEquals(a2u,suni, msg(a2u,suni,"map twobyte ansel to unicode")) | ||||
|         u2a = A.utf8_to_ansel(suni) | ||||
|         self.assertEquals(u2a,sans, msg(u2a,sans, "invert twobyte to unicode mapping")) | ||||
|          | ||||
| class Test2(unittest.TestCase): | ||||
|     """ test unicode_to_ansel (basic precomposed forms) and inversion """ | ||||
|  | ||||
|     def test_2a(self): | ||||
|         """ 2a: unicode to ansel and inverse """ | ||||
|         count = 0 | ||||
|         sans  = "" | ||||
|         suni  = u"" | ||||
|         for ucode in sorted(A._utoa.keys()): | ||||
|             count += 1 | ||||
|             suni += ucode | ||||
|             sans += A._utoa[ucode]  | ||||
|         if OUT & 4:     | ||||
|             print "test2a: %d codes" % count | ||||
|             print " utf-8:%s" % `suni.encode("utf-8")` # U8 | ||||
|             print " ansel:%s" % `sans` | ||||
|             sys.stdout.flush() | ||||
|         u2a = A.utf8_to_ansel(suni) | ||||
|         self.assertEquals(u2a,sans, msg(u2a,sans, "map unicode to ansel")) | ||||
|         a2u = A.ansel_to_utf8(sans) | ||||
|         self.assertEquals(a2u,suni, msg(a2u,suni, "invert unicode to ansel mapping")) | ||||
|  | ||||
| class Test3(unittest.TestCase): | ||||
|     """ test pass-through for matches with ansel ascii-subset """ | ||||
|  | ||||
|     def test3a(self): | ||||
|         """ 3a: ansel to unicode for matches with ascii and inverse """ | ||||
|         ascii_ok = "".join(A._use_ASCII) | ||||
|         ascii_uni =  unicode(ascii_ok) | ||||
|         a2u = A.ansel_to_utf8(ascii_ok) | ||||
|         # could match with lengths wrong? can't hurt to test | ||||
|         la = len(ascii_ok) | ||||
|         la2u = len(a2u) | ||||
|         self.assertEquals(la2u, la, msg(la2u, la, "ascii subset ansel to unicode lengths match")) | ||||
|         self.assertEquals(a2u, ascii_uni,  | ||||
|             msg(a2u, ascii_uni, "ascii subset ansel to unicode strings match")) | ||||
|         a2u2a = A.utf8_to_ansel(a2u) | ||||
|         self.assertEquals(a2u2a, ascii_ok,  | ||||
|             msg(a2u2a, ascii_ok, "invert ascii subset ansel to unicode")) | ||||
|  | ||||
|     def test3b(self): | ||||
|         """ 3b: (sample) non-matching ascii control chars map to space """ | ||||
|         for x in [0,1,8,9,11,26,28,127]: | ||||
|             a2u = A.ansel_to_utf8(chr(x)) | ||||
|             self.assertEquals(a2u, unicode(' '),  | ||||
|                 msg(a2u, unicode(' '), "map disallowed ASCII to unicode space")) | ||||
|             u2a = A.utf8_to_ansel(unichr(x)) | ||||
|             self.assertEquals(u2a, ' ', | ||||
|                 msg(u2a, ' ', "map unicode to space for disallowed ASCII")) | ||||
|      | ||||
|     def test3c(self): | ||||
|         """ 3c: (sample) no-match ansel to unicode cases """ | ||||
|         for x in [0x80,0x87,0x9F,0xFF]: | ||||
|             a2u = A.ansel_to_utf8(chr(x)) | ||||
|             self.assertEquals(a2u, u'\ufffd', | ||||
|                 msg(a2u, u'\ufffd', "ansel no-match should return unicode Replacement Char")) | ||||
|  | ||||
|     def test3d(self): | ||||
|         """ 3d: (sample) no-match unicode to ansel cases """ | ||||
|         for x in [1024,4096, 65535]: | ||||
|             u2a = A.utf8_to_ansel(unichr(x)) | ||||
|             self.assertEquals(u2a, '?',  | ||||
|                 msg(u2a, '?', "unicode no-match should return question mark")) | ||||
|  | ||||
| class Test4(unittest.TestCase): | ||||
|     """ test some special cases """ | ||||
|  | ||||
|     def test4a(self): | ||||
|         """ 4a: empty strings should return empty strings """ | ||||
|         self.assertEquals(A.ansel_to_utf8(""), u"", "empty a2u") | ||||
|         self.assertEquals(A.utf8_to_ansel(u""), "", "empty u2a") | ||||
|  | ||||
|     def test4b_unmapped_combos(self): | ||||
|         """ 4b: (sample) unmapped (non-precomposed) combinations """ | ||||
|         samples = (  | ||||
|             # ansel, unicode, failure-report-message .. see function msg() | ||||
|             ("b\xE5Ze", u"bZ\u0304e", "b Z+macron e"), | ||||
|             ( "\xE5Z",   u"Z\u0304", "Z+macron"), | ||||
|             ("b\xE5Z\xE9Xe", u"bZ\u0304X\u030ce", "b Z+macron X+caron e"), | ||||
|             ( "\xE5Z\xE9X",   u"Z\u0304X\u030c", "Z+macron X+caron"), | ||||
|         ) | ||||
|         for a,u,m in samples: | ||||
|             # ansel to unicode and inverse | ||||
|             a2u=A.ansel_to_utf8(a) | ||||
|             self.assertEquals(a2u, u, msg(a2u, u, m, "a2u")) | ||||
|             a2u2a = A.utf8_to_ansel(a2u) | ||||
|             self.assertEquals(a2u2a, a, msg(a2u2a, a, m, "a2u2a")) | ||||
|  | ||||
|             # unicode to ansel and inverse | ||||
|             u2a = A.utf8_to_ansel(u) | ||||
|             self.assertEquals(u2a, a, msg(u2a, a, m, "u2a")) | ||||
|             u2a2u = A.ansel_to_utf8(u2a) | ||||
|             self.assertEquals(u2a2u, u, msg(u2a2u, u, m, "u2a2u")) | ||||
|          | ||||
|     def test4c_multiple_combos(self): | ||||
|         """ 4c: (a2u) ignore multiple combinations (include precomposed) """ | ||||
|         samples = ( | ||||
|             ("b\xF0\xE5Ze", u"bZ\u0304e", "b <cedilla> Z+macron e"), | ||||
|             ( "\xF0\xE5Z",   u"Z\u0304", "<cedilla> Z+macron"), | ||||
|             ("\xF0\xE5Z\xE9X", u"Z\u0304X\u030c", "<cedilla> Z+macron X+caron"), | ||||
|             ("\xE5Z\xF0\xE9X", u"Z\u0304X\u030c", "Z+macron <cedilla> X+caron"), | ||||
|             ('\xF0\xE5A', u'\u0100', "<cedilla> A+macron"), | ||||
|             ("\xE5Z\xE5\xF0\xE9X", u"Z\u0304X\u030c", "Z+macron <macron> <cedilla> X+caron"), | ||||
|         ) | ||||
|         for a,u,m in samples: | ||||
|             a2u=A.ansel_to_utf8(a) | ||||
|             self.assertEquals(a2u, u, msg(a2u,u,m, "a2u drop extra <combiners>")) | ||||
|  | ||||
|     def test4d_multiple_combos(self): | ||||
|         """ 4c: (u2a) ignore multiple combinations (include precomposed) """ | ||||
|         samples = ( | ||||
|             ("b\xE5Ze", u"bZ\u0304\u0327e", "b Z+macron <cedilla> e"), | ||||
|             ("\xE5Z\xE5A", u"Z\u0304\u0327\u0100", "Z+macron <cedilla> A+macron"), | ||||
|             ("\xE5A\xE5Z", u"\u0100\u0327\u030cZ\u0304", "A+macron <cedilla> <caron> Z+macron"), | ||||
|         ) | ||||
|         for a,u,m in samples: | ||||
|             u2a=A.utf8_to_ansel(u) | ||||
|             self.assertEquals(u2a, a, msg(u2a,a,m, "u2a drop extra <combiners>")) | ||||
|  | ||||
| class Test99(unittest.TestCase): | ||||
|     """ test regression cases """ | ||||
|      | ||||
|     def test_99a(self): | ||||
|         """ 99a: sanity check on counts """ | ||||
|         n1B= len(A._onebyte) | ||||
|         n2B= len(A._twobyte) | ||||
|         na = n1B+n2B | ||||
|         nu = len(A._utoa) | ||||
|         self.assertEquals(na, nu, msg(na, nu, "basic counts: a2u=u2a")) | ||||
|         nac = len(A._acombiners) | ||||
|         nuc = len(A._ucombiners) | ||||
|         self.assertEquals(nac, nuc, msg(nac, nuc, "combiner counts: a2u=u2a")) | ||||
|  | ||||
|     def test_99b(self): | ||||
|         """ 99b: fix incorrect mapping for ansel 0xAE | ||||
|          | ||||
|         It used-to-be U+02be but was changed March 2005 to U+02bc | ||||
|         Note : the other revs per notes make double-wide combining | ||||
|         char halves into an ambiguous mess -- let's ignore that! | ||||
|             http://lcweb2.loc.gov/diglib/codetables/45.html | ||||
|         might as well add validation of other additions, though | ||||
|         """ | ||||
|          | ||||
|         # (ansel, uni, msg) | ||||
|         revs = ( | ||||
|             ('\xAE', u'\u02bc', "modifier right-half ring is now modifier Apostrophe"), | ||||
|             ('\xC7', u'\xdf',   "added code for eszet"), | ||||
|             ('\xC8', u'\u20ac', "added code for euro"), | ||||
|         ) | ||||
|         for a, u, m in revs: | ||||
|             g = A.ansel_to_utf8(a) | ||||
|             self.assertEquals(g,u,  | ||||
|             msg(g, u, m, "spec change")) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|  | ||||
| #===eof=== | ||||
| @@ -1,130 +0,0 @@ | ||||
| """unittest support utilities for reading gedcom | ||||
|  | ||||
| see gedread_test.py for sample usage | ||||
|  | ||||
| """ | ||||
|  | ||||
| import os.path | ||||
| import shutil | ||||
|  | ||||
| from test import test_util as tu | ||||
| from GrampsDbUtils import _ReadGedcom as RG | ||||
| import DbState | ||||
| import gen.db | ||||
| import config | ||||
|  | ||||
| # extraneous leading newlines do not seem to cause problems | ||||
| # (and actually make it convenient reading the test files!) | ||||
| # future: may need to remove such lines here if problems develop | ||||
|  | ||||
| # These ged-chunks provide/observe the following requirements | ||||
| # - minimum required header elements | ||||
| # - a trailer | ||||
| # - and one record (spec minimum), using a SUBM  | ||||
| # Note: not all specified requirements seem strongly enforcced | ||||
| #    eg: at least one record, also nonexistent references seem | ||||
| #    ok by design, so the SUBM could have been missing  | ||||
| # Also note that the 'tail' containing the SUBM record referenced | ||||
| #  in the header causes a line of console output because we | ||||
| #  presently do not process SUBM records at all | ||||
| #  (seems like a bug to me -- to be dealt with later) | ||||
| # --------------------------------------------------------------- | ||||
|  | ||||
| # _head is presently simply a header with minimum content | ||||
| _head =""" | ||||
| 0 HEAD | ||||
| 1 SOUR test_gedread_System_ID | ||||
| 1 SUBM @SUBM1@ | ||||
| 1 GEDC | ||||
| 2 VERS 5.5 | ||||
| 2 FORM LINEAGE-LINKED | ||||
| 1 CHAR ASCII | ||||
| """ | ||||
|  | ||||
| # _tail is presently a single (SUBM) record plus the trailer | ||||
| # to satisfy the "one or more records" in the spec | ||||
| # it also provides a target for the xref in the header | ||||
| # it also gives a "skipping 1 subordinate.." message error | ||||
| #   which presumeably will be fixed someday | ||||
| _tail = """ | ||||
| 0 @SUBM1@ SUBM | ||||
| 1 NAME test /gedread/ | ||||
| 0 TRLR | ||||
| """ | ||||
|  | ||||
| def make_gedcom_input(gfile, fragment): | ||||
|     """create gedcom file with 'fragment' between our head & tail | ||||
|  | ||||
|     fragment would normally be 1 or more complete records | ||||
|     fragment could be an empty string ("") | ||||
|  | ||||
|     """ | ||||
|     fh = open(gfile,"w") | ||||
|     for txt in (_head, fragment, _tail): | ||||
|         fh.write(txt) | ||||
|     fh.close() | ||||
|  | ||||
|  | ||||
| # code patterned after contents of ReadGedcom.import2, | ||||
| #  but avoiding the occurrence of a popup DialogError. | ||||
| # NOTE: may need rewrite to track mods in ReadGedcom  | ||||
| #  test this code via src/test/test/gedread_util_test.py  | ||||
| # ------------------------------------------------------- | ||||
| def gread(db, fname): | ||||
|     """read gedcom file into a test db | ||||
|  | ||||
|     NB: test modules may want to consider also, the simplified  | ||||
|     test logging (from test_util) which is especially helpful | ||||
|     for testing gedcom support | ||||
|  | ||||
|     """ | ||||
|     cback = None | ||||
|     DEF_SRC = False | ||||
|     ifile = open(fname,"rU") | ||||
|     try: | ||||
|         try: | ||||
|             s1 = RG.StageOne(ifile) | ||||
|             s1.parse() | ||||
|         except Exception,e: | ||||
|             raise tu.TestError("stage1 error %r" % e) | ||||
|  | ||||
|         useTrans = False | ||||
|         ifile.seek(0) | ||||
|         try: | ||||
|             gp = RG.GedcomParser(db, ifile, fname, cback, s1, DEF_SRC) | ||||
|         except Exception, e: | ||||
|             raise tu.TestError("parser init error %r" % e) | ||||
|  | ||||
|         ##ro = db.readonly | ||||
|         ##db.readonly = False  # why? | ||||
|         try: | ||||
|             gp.parse_gedcom_file(useTrans) | ||||
|             err = "" | ||||
|         except Exception, e: | ||||
|             raise tu.TestError("parse error %r" %e) | ||||
|         ##db.readonly = ro | ||||
|     finally: | ||||
|         ifile.close() | ||||
|  | ||||
|  | ||||
| # test db creation | ||||
| # | ||||
| #   This may deserve it's own module, but for now it is only used here | ||||
| # | ||||
| #    state doesn't seem to be necessary for testing | ||||
| #    let's try just returning the db | ||||
| #---------------------------------------------------- | ||||
| def create_empty_db(dbpath): | ||||
|     """create an empty db for the test caller""" | ||||
|     state =  DbState.DbState() | ||||
|     dbclass = gen.db.dbdir.GrampsDBDir | ||||
|     state.change_database(dbclass()) | ||||
|     # create empty db (files) via load() | ||||
|     cback = None | ||||
|     mode = "rw" | ||||
|     if os.path.isdir(dbpath): | ||||
|         shutil.rmtree(dbpath) | ||||
|     state.db.load(dbpath, cback, mode) | ||||
|     return state.db | ||||
|  | ||||
| #===eof=== | ||||
| @@ -52,6 +52,8 @@ class Test(U.TestCase): | ||||
|         np = db.get_number_of_people() | ||||
|         self.assertEquals(np,1,  | ||||
|             tu.msg(np,1, "db has exactly one person")) | ||||
|         db.close() | ||||
|         del tl | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|     U.main() | ||||
|   | ||||
| @@ -181,6 +181,7 @@ class Test4(U.TestCase): | ||||
|             nl = len(ll) | ||||
|             self.assertEquals(nl,2,  | ||||
|                 tu.msg(nl,2, "pass %d: expected line count" % i)) | ||||
|             del tl | ||||
|  | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|   | ||||
		Reference in New Issue
	
	Block a user