Remove GEDCOM tests - they are broken and not used.
svn: r13914
This commit is contained in:
parent
862d1446d0
commit
8b2da469cf
@ -1,146 +0,0 @@
|
|||||||
""" GR_test.py
|
|
||||||
|
|
||||||
This is a first try at some gedcom read testing that does not
|
|
||||||
require running a gramps CLI
|
|
||||||
|
|
||||||
The biggest difficulty is that every test fragment needs custom
|
|
||||||
test code. Maybe that's unavoidable, and the best that can be
|
|
||||||
done is to group similar tests together, so that setUp can be
|
|
||||||
shared.
|
|
||||||
|
|
||||||
Maybe more can be shared: one commonly used test recipe is
|
|
||||||
to develop a data structure that can be looped over to test
|
|
||||||
similar fragments with the same piece of test code, putting
|
|
||||||
fragments and possible control or other validation information
|
|
||||||
in the data structure.
|
|
||||||
|
|
||||||
A controlling principle for such structures is that they should be
|
|
||||||
designed for maximum ease (and intuitiveness) of data declaration
|
|
||||||
"""
|
|
||||||
|
|
||||||
import os.path as op
|
|
||||||
|
|
||||||
import unittest as U
|
|
||||||
import re
|
|
||||||
|
|
||||||
from test import test_util as tu
|
|
||||||
from test import gedread_util as gr
|
|
||||||
|
|
||||||
|
|
||||||
# NoteSource_frag
|
|
||||||
# tests structure: NOTE > SOUR
|
|
||||||
# using the 2 formats of the SOUR element
|
|
||||||
# bug #(?) does not properly ignore the SOUR
|
|
||||||
# test by looking for warning messages resulting
|
|
||||||
# from parse_record seeing the non-skipped SOUR
|
|
||||||
#
|
|
||||||
# SB: the NOTE data should contain the SOUR or xref
|
|
||||||
# but this is NYI (SOUR is ignored within NOTE)
|
|
||||||
# -----------------------------------------------
|
|
||||||
|
|
||||||
|
|
||||||
#
|
|
||||||
# numcheck based testing
|
|
||||||
# verifies the number of db items via a get_number_of_X() call
|
|
||||||
# returns an error string or None
|
|
||||||
#
|
|
||||||
# ? candidate for inclusion in gedread_util.py
|
|
||||||
#
|
|
||||||
class nc():
|
|
||||||
"""nc object -- creates a numcheck function
|
|
||||||
|
|
||||||
instantiate a nc object as follows
|
|
||||||
c = nc("people", 4)
|
|
||||||
and call, passing the database, as follows
|
|
||||||
err = c(db)
|
|
||||||
which will check for exactly 4 people in the db
|
|
||||||
and return a displayable message on error, else None
|
|
||||||
|
|
||||||
NB: name _must_ match the X names in db get_number_of_X
|
|
||||||
"""
|
|
||||||
def dbncheck(self, dbcall):
|
|
||||||
err = None
|
|
||||||
got = dbcall()
|
|
||||||
if not got == self.num:
|
|
||||||
err = "%s: got %d, expected %d" % (self.name, got, self.num)
|
|
||||||
return err
|
|
||||||
def __init__(self, name, num):
|
|
||||||
self.name = name
|
|
||||||
self.num = num
|
|
||||||
self.getname = "get_number_of_" + name
|
|
||||||
def __call__(self, db):
|
|
||||||
dbcall = getattr(db,self.getname)
|
|
||||||
self.dbncheck(dbcall)
|
|
||||||
|
|
||||||
class fnci():
|
|
||||||
"""fnci (frag-numcheckset item) is a data container for:
|
|
||||||
a fragment of gedcom
|
|
||||||
a sequence of nc items to check
|
|
||||||
"""
|
|
||||||
def __init__(self, frag, ncset):
|
|
||||||
self.frag = frag
|
|
||||||
self.ncset = ncset
|
|
||||||
|
|
||||||
# test data table for Test1.test1a_numchecks
|
|
||||||
fnumchecks = (
|
|
||||||
fnci("""0 @N1@ NOTE Note containing embedded source
|
|
||||||
1 SOUR embedded source""",
|
|
||||||
(nc("notes", 1),)
|
|
||||||
),
|
|
||||||
fnci("""0 @N2@ NOTE Note containing referenced source
|
|
||||||
1 SOUR @SOUR1@
|
|
||||||
0 @SOUR1@ SOUR
|
|
||||||
1 TITL Phoney source title""",
|
|
||||||
(nc("notes", 1), nc("sources",1),)
|
|
||||||
),
|
|
||||||
)#end fnumchecks
|
|
||||||
|
|
||||||
|
|
||||||
#
|
|
||||||
# ? candidate for inclusion in test_util.py
|
|
||||||
#
|
|
||||||
def _checklog(tlogger, pat=None):
|
|
||||||
# look for absence of specific messages in log
|
|
||||||
matches = 0
|
|
||||||
ltext = tlogger.logfile_getlines()
|
|
||||||
if ltext:
|
|
||||||
if pat is None:
|
|
||||||
matches = len(ltext)
|
|
||||||
else:
|
|
||||||
pat = re.compile(pat)
|
|
||||||
for l in ltext:
|
|
||||||
match = re.match(pat, l)
|
|
||||||
if match:
|
|
||||||
matches += 1
|
|
||||||
# debugging
|
|
||||||
print "(%d) %r" % (matches, match)
|
|
||||||
return matches
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class Test1(U.TestCase):
|
|
||||||
def setUp(self):
|
|
||||||
# make a test subdir and compose some pathnames
|
|
||||||
self.tdir = tu.make_subdir("RG_test")
|
|
||||||
self.tdb = op.join(self.tdir,"test_db")
|
|
||||||
self.ifil = op.join(self.tdir,"test_in.ged")
|
|
||||||
self.lfil = op.join(self.tdir,"test.log")
|
|
||||||
|
|
||||||
def test1a_numchecks(self):
|
|
||||||
tl = tu.TestLogger()
|
|
||||||
for i,f in enumerate(fnumchecks):
|
|
||||||
gr.make_gedcom_input(self.ifil, f.frag)
|
|
||||||
db = gr.create_empty_db(self.tdb)
|
|
||||||
tl.logfile_init(self.lfil)
|
|
||||||
gr.gread(db,self.ifil)
|
|
||||||
errs = _checklog(tl, r"Line \d+")
|
|
||||||
self.assertEquals(errs, 0,
|
|
||||||
"ncset(%d): got %d unexpected log messages" %
|
|
||||||
(i,errs))
|
|
||||||
# ok, no log error message, check db counts
|
|
||||||
for call in f.ncset:
|
|
||||||
err = call(db)
|
|
||||||
self.assertFalse(err, err)
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
U.main()
|
|
@ -1,164 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
import unittest
|
|
||||||
import os.path
|
|
||||||
import codecs
|
|
||||||
import struct
|
|
||||||
|
|
||||||
from test import test_util as tu
|
|
||||||
m = tu.msg
|
|
||||||
|
|
||||||
par = tu.path_append_parent()
|
|
||||||
here = tu.absdir()
|
|
||||||
|
|
||||||
import _GedcomChar as G
|
|
||||||
|
|
||||||
cdir = tu.make_subdir("test_data")
|
|
||||||
|
|
||||||
# unicode block "latin1 supplement" chars
|
|
||||||
utest_chars = "".join(map(unichr, range(0xA0,0x100))) + "\n"
|
|
||||||
|
|
||||||
# 12 ansel test chars (raw 8-bit bytes, here)
|
|
||||||
atest_list = range(0xa1,0xa7) + range(0xb1,0xb7) + [0x0a,]
|
|
||||||
atest_bytes = struct.pack("B"*13, *atest_list)
|
|
||||||
|
|
||||||
# unicode mappings of above (http://www.gymel.com/charsets/ANSEL.html)
|
|
||||||
a2u = u"".join(map(unichr, (
|
|
||||||
0x141, 0xd8, 0x110, 0xde, 0xc6, 0x152,
|
|
||||||
0x142, 0xf8, 0x111, 0xfe, 0xe6, 0x153,
|
|
||||||
0x0a, )))
|
|
||||||
|
|
||||||
def gen_chars(filename, encoding):
|
|
||||||
"""write generic test chars as given file and encoding"""
|
|
||||||
if not os.path.exists(filename):
|
|
||||||
codecs.open(filename, "wb", encoding).write(utest_chars)
|
|
||||||
|
|
||||||
###
|
|
||||||
class Test1_ansi(unittest.TestCase):
|
|
||||||
"""Test original "ANSI" reader and codecs: latin, cp1252"""
|
|
||||||
enc = "latin-1"
|
|
||||||
cp = "cp1252"
|
|
||||||
fil = os.path.join(cdir,enc)
|
|
||||||
exp = utest_chars
|
|
||||||
|
|
||||||
def setUp(self):
|
|
||||||
gen_chars(self.fil, self.enc)
|
|
||||||
|
|
||||||
def test1a_read_ansi(self):
|
|
||||||
f = open(self.fil)
|
|
||||||
ra= G.AnsiReader(f)
|
|
||||||
got = ra.readline()
|
|
||||||
self.assertEquals(got,self.exp, m(got,self.exp, "AnsiReader"))
|
|
||||||
|
|
||||||
def test1b_read_codec_latin1(self):
|
|
||||||
got=codecs.open(self.fil, encoding=self.enc).read()
|
|
||||||
self.assertEquals(got,self.exp, m(got,self.exp, "using codec %s" % self.enc))
|
|
||||||
|
|
||||||
def test1c_read_codec_cp1252(self):
|
|
||||||
got=codecs.open(self.fil, encoding=self.cp).read()
|
|
||||||
self.assertEquals(got,self.exp, m(got,self.exp, "using codec %s" % self.cp))
|
|
||||||
|
|
||||||
###
|
|
||||||
class Test2_ansel(unittest.TestCase):
|
|
||||||
"""Test original AnselReader (later: ansel codec)"""
|
|
||||||
enc = "ansel"
|
|
||||||
afil = os.path.join(cdir,enc)
|
|
||||||
exp = a2u
|
|
||||||
|
|
||||||
def setUp(self):
|
|
||||||
open(self.afil, "wb").write(atest_bytes)
|
|
||||||
|
|
||||||
def test2a_read_ansel(self):
|
|
||||||
f = open(self.afil)
|
|
||||||
ra = G.AnselReader(f)
|
|
||||||
got = ra.readline()
|
|
||||||
self.assertEquals(got,self.exp, m(got,self.exp, "AnselReader"))
|
|
||||||
|
|
||||||
###
|
|
||||||
class Test3(unittest.TestCase):
|
|
||||||
"""Test otriginal UTF8Reader and codecs: utf_8, utf_8_sig
|
|
||||||
with no 'BOM' (sig) in input (the common case)
|
|
||||||
|
|
||||||
out of curiosity, verify behavior reading a 1-byte file
|
|
||||||
"""
|
|
||||||
enc = "utf_8"
|
|
||||||
enc_sig = enc + "_sig"
|
|
||||||
ufil = os.path.join(cdir, "chars.utf8")
|
|
||||||
f1byte = os.path.join(cdir, "1byte")
|
|
||||||
exp = utest_chars
|
|
||||||
|
|
||||||
def setUp(self):
|
|
||||||
gen_chars(self.ufil, self.enc)
|
|
||||||
if not os.path.exists(self.f1byte):
|
|
||||||
open(self.f1byte, "wb").write("1")
|
|
||||||
|
|
||||||
def test3a_u8_UTF8Reader_NO_BOM_sig(self):
|
|
||||||
f=open(self.ufil)
|
|
||||||
ra=G.UTF8Reader(f)
|
|
||||||
g = ra.readline()
|
|
||||||
self.assertEquals(g,self.exp, m(g,self.exp, "orig UTF8Reader"))
|
|
||||||
r2 = G.UTF8Reader(open(self.f1byte))
|
|
||||||
g = r2.readline()
|
|
||||||
self.assertEquals(g,"1",
|
|
||||||
m(g,"1", "read 1-byte file"))
|
|
||||||
|
|
||||||
# NB: utf_8 reads data and never expects a BOM-sig
|
|
||||||
def test3b_utf8_codec_NO_BOM_sig_as_expected(self):
|
|
||||||
g=codecs.open(self.ufil, encoding=self.enc).read()
|
|
||||||
self.assertEquals(g,self.exp, m(g,self.exp, "codec utf8"))
|
|
||||||
g=codecs.open(self.f1byte, encoding=self.enc).read()
|
|
||||||
self.assertEquals(g,"1", m(g,"1", "codec utf8"))
|
|
||||||
|
|
||||||
# NB: utf_8_sig reads data even absent a BOM-sig (GOOD!)
|
|
||||||
def test3c_utf8_sig_codec_NO_BOM_sig_tolerated_GOOD(self):
|
|
||||||
g=codecs.open(self.ufil, encoding=self.enc_sig).read()
|
|
||||||
self.assertEquals(g,self.exp,
|
|
||||||
m(g,self.exp, "codec utf_8_sig NO sig input"))
|
|
||||||
g=codecs.open(self.f1byte, encoding=self.enc_sig).read()
|
|
||||||
self.assertEquals(g,"1",
|
|
||||||
m(g,"1", "codec utf_8_sig NO sig input"))
|
|
||||||
|
|
||||||
###
|
|
||||||
class Test4(unittest.TestCase):
|
|
||||||
"""Test otriginal UTF8Reader and codecs: utf_8, utf_8_sig
|
|
||||||
with 'BOM' (sig) in input (uncommon, [some?] MS Windows only?)
|
|
||||||
"""
|
|
||||||
enc = "utf_8"
|
|
||||||
enc_sig = enc + "_sig"
|
|
||||||
ufil = os.path.join(cdir, "chars.utf8_sig")
|
|
||||||
exp = utest_chars
|
|
||||||
|
|
||||||
def setUp(self):
|
|
||||||
gen_chars(self.ufil, self.enc_sig)
|
|
||||||
|
|
||||||
def test4a_u8_UTF8Reader_WITH_BOM_sig(self):
|
|
||||||
f=open(self.ufil)
|
|
||||||
ra=G.UTF8Reader(f)
|
|
||||||
g = ra.readline()
|
|
||||||
self.assertEquals(g,self.exp, m(g,self.exp, "orig UTF8Reader"))
|
|
||||||
|
|
||||||
# utf_8 reads an initial BOM-sig as data -- oops, pity
|
|
||||||
# write the test to verify this known codec behavior
|
|
||||||
# ==> Recommend: do not use utf8 as input codec (use utf_8_sig)
|
|
||||||
def test4b_utf8_codec_WITH_BOM_sig_reads_as_data_PITY(self):
|
|
||||||
g=codecs.open(self.ufil, encoding=self.enc).read()
|
|
||||||
e0=u'\ufeff'
|
|
||||||
self.assertEquals(g[0], e0,
|
|
||||||
m(g[0],e0, "codec utf8 reads 'BOM'-sig as data" ))
|
|
||||||
g = g[1:]
|
|
||||||
self.assertEquals(g,self.exp,
|
|
||||||
m(g,self.exp, "codec utf8 reads rest of data ok"))
|
|
||||||
|
|
||||||
# utf_8_sig reads and ignores the BOM-sig
|
|
||||||
def test4c_utf8_sig_codec_WITH_BOM_sig_as_expected(self):
|
|
||||||
g=codecs.open(self.ufil, encoding=self.enc_sig).read()
|
|
||||||
self.assertEquals(g,self.exp,
|
|
||||||
m(g,self.exp, "codec utf_8_sig NO sig input"))
|
|
||||||
|
|
||||||
###
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
unittest.main()
|
|
||||||
|
|
||||||
#===eof===
|
|
@ -1,84 +0,0 @@
|
|||||||
"""test for GedcomParse empty notes and skipped subordinates
|
|
||||||
|
|
||||||
Empty notes are discarded -- with a warning
|
|
||||||
Skipped subordinate data also produce warnings
|
|
||||||
"""
|
|
||||||
|
|
||||||
import os
|
|
||||||
import unittest as U
|
|
||||||
import logging
|
|
||||||
|
|
||||||
from test import test_util as tu
|
|
||||||
from test import gedread_util as gr
|
|
||||||
|
|
||||||
|
|
||||||
class Test(U.TestCase):
|
|
||||||
""" this test verifies fix for bug 1851 """
|
|
||||||
def setUp(self):
|
|
||||||
# make a dir to hold an input gedcom file
|
|
||||||
self.tdir = tu.make_subdir("gsn_test")
|
|
||||||
|
|
||||||
def test1(self):
|
|
||||||
test_fragment="""
|
|
||||||
0 @I1@ INDI
|
|
||||||
1 NAME Adam /TheFirst/
|
|
||||||
1 DEAT
|
|
||||||
2 DATE EST 2 FEB 2000
|
|
||||||
2 PLAC Bdorf
|
|
||||||
2 ADDR Haus Nr. 44
|
|
||||||
2 NOTE this one should stay
|
|
||||||
2 NOTE
|
|
||||||
3 CONT this should stay too, but
|
|
||||||
3 CONT next one SB skipped (empty)
|
|
||||||
2 NOTE
|
|
||||||
0 @N101@ NOTE a real note
|
|
||||||
0 @N102@ NOTE
|
|
||||||
1 CONT a real continuation-only note
|
|
||||||
1 CONT should skip next (N103 empty)
|
|
||||||
2 JUNK2
|
|
||||||
3 JUNK3
|
|
||||||
0 @N103@ NOTE
|
|
||||||
0 @N199@ NOTE
|
|
||||||
1 SOUR @S987@
|
|
||||||
"""
|
|
||||||
# expect warnings for discarded empty notes above
|
|
||||||
wNotes=3 # lines 32,39,40 [here]
|
|
||||||
# expect warnings for skipped subordinates (blocks) above
|
|
||||||
# remember consececutive lines skipped give 1 message
|
|
||||||
wSubs=2 # lines (37+38), 41
|
|
||||||
|
|
||||||
# create a gedcom input file
|
|
||||||
# from canned head/tail -- see gedread_util
|
|
||||||
infil = os.path.join(self.tdir,"test_in.ged")
|
|
||||||
gr.make_gedcom_input(infil, test_fragment)
|
|
||||||
self.assertTrue(os.path.isfile(infil),
|
|
||||||
"create input file %s" % infil)
|
|
||||||
|
|
||||||
# create an empty database
|
|
||||||
dbpath = os.path.join(self.tdir,"test_db")
|
|
||||||
db = gr.create_empty_db(dbpath)
|
|
||||||
self.assertTrue(os.path.isdir(dbpath),
|
|
||||||
"create database (dir) %s" % dbpath)
|
|
||||||
|
|
||||||
# create logfile to test for read log-messages
|
|
||||||
# (note: uses recently added test_util
|
|
||||||
log = os.path.join(self.tdir, "test_log")
|
|
||||||
tl = tu.TestLogger()
|
|
||||||
tl.logfile_init(log)
|
|
||||||
# now read the gedcom
|
|
||||||
gr.gread(db, infil)
|
|
||||||
loglines = tl.logfile_getlines()
|
|
||||||
#NB incorrect SUBM handling causes one extraneous warning
|
|
||||||
xWarns = 1
|
|
||||||
ll = len(loglines)
|
|
||||||
tot = xWarns+wNotes+wSubs
|
|
||||||
self.assertEquals(ll,tot, tu.msg(ll,tot,
|
|
||||||
"log has expected warning content"))
|
|
||||||
# verify notes that survive
|
|
||||||
numNotes = 4
|
|
||||||
nn = db.get_number_of_notes()
|
|
||||||
self.assertEquals(nn,numNotes,
|
|
||||||
tu.msg(nn,numNotes, "db has right number of notes"))
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
U.main()
|
|
@ -1,257 +0,0 @@
|
|||||||
#!/usr/bin/python -tt
|
|
||||||
|
|
||||||
# Instructions for use
|
|
||||||
# --------------------
|
|
||||||
# Eventually, this code might use a testing infrastructure (conventions TBD)
|
|
||||||
# but, at present this is intended for use as a manual operation by placing
|
|
||||||
# this file (temporarily) in the same dir as the module it tests.
|
|
||||||
#
|
|
||||||
# Running
|
|
||||||
# ./test-ansel_utf8.py [-v]
|
|
||||||
# should report 'OK'
|
|
||||||
# the -v option shows individual results for each test function
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
# TODO
|
|
||||||
# ---------------------------------------------------------
|
|
||||||
# make table of test cases for readability
|
|
||||||
# ansel U+xxxx UTF8 char-name string (char where appl)
|
|
||||||
# ---------------------------------------------------------
|
|
||||||
|
|
||||||
import ansel_utf8 as A
|
|
||||||
import unittest
|
|
||||||
|
|
||||||
# debugging provision to capture some strings for exernal examination
|
|
||||||
# note that this debug output is ASCII, by virture of using `` (repr)
|
|
||||||
OUT=0
|
|
||||||
if OUT > 0:
|
|
||||||
import sys
|
|
||||||
# set output levels 1,2,4 (or-ing ok) for string (repr) in tests 1a,1b,2a
|
|
||||||
# then manipulate that data with separate tools for additional validation
|
|
||||||
# tools refs:
|
|
||||||
# http://search.cpan.org/~esummers/MARC-Charset-0.98/lib/MARC/Charset.pm
|
|
||||||
# http://pypi.python.org/pypi/pymarc
|
|
||||||
# ---
|
|
||||||
# (perl) MARC::Charset
|
|
||||||
# (python) pymarc omits eszett,euro (patchable); only does ansel-to-utf8
|
|
||||||
# shell: echo -e 'utf-8 encoded chars' works well
|
|
||||||
# ==> NB: when examining unicode characters (rather than hexdump) externally,
|
|
||||||
# it is absolutely essential to use a good unicode terminal for correct
|
|
||||||
# display of combining forms (other than precomposed)
|
|
||||||
# (eg: use xterm rather than konsole or gnome-terminal)
|
|
||||||
# ==> and of course, use a locale with the UTF-8 charset
|
|
||||||
|
|
||||||
|
|
||||||
# test convwenience utility extends python by showing got & expected (like perl)
|
|
||||||
# useful at least for the commonly used assertEquals()
|
|
||||||
# conventions:
|
|
||||||
# dup the expected and got parms from the assertEquals and add a message
|
|
||||||
# (and an optional prefix to distinguish sub-tests)
|
|
||||||
# ==> code the assert as assertEquals(got, exp, msg(got,exp,mess,pfx))
|
|
||||||
def msg(got, expect, msgbase, prefix=""):
|
|
||||||
if prefix:
|
|
||||||
prefix += ": "
|
|
||||||
return "%s%s\n .....got %s\n expected %s" % (prefix, msgbase, `got`, `expect`)
|
|
||||||
|
|
||||||
|
|
||||||
class Test1(unittest.TestCase):
|
|
||||||
""" test basic ansel_to_unicode and inversion """
|
|
||||||
|
|
||||||
def test_1a(self):
|
|
||||||
""" 1a: map ansel onebyte to unicode and inverse """
|
|
||||||
# no combining chars here .. see later test for those
|
|
||||||
count = 0
|
|
||||||
sans = ""
|
|
||||||
suni = u""
|
|
||||||
for acode in sorted(A._onebyte.keys()):
|
|
||||||
count += 1
|
|
||||||
sans += acode
|
|
||||||
suni += A._onebyte[acode]
|
|
||||||
if OUT & 1:
|
|
||||||
print "test1a: %d codes" % count
|
|
||||||
print " ansel:%s" % `sans`
|
|
||||||
print " utf-8:%s" % `suni.encode("utf-8")` # U8 for debugging
|
|
||||||
sys.stdout.flush()
|
|
||||||
a2u = A.ansel_to_utf8(sans)
|
|
||||||
self.assertEquals(a2u,suni, msg(a2u,suni, "map onebyte ansel to unicode"))
|
|
||||||
u2a = A.utf8_to_ansel(suni)
|
|
||||||
self.assertEquals(u2a,sans, msg(u2a, sans, "invert onebyte to unicode mapping"))
|
|
||||||
|
|
||||||
def test_1b(self):
|
|
||||||
""" 1b: map ansel twobyte to unicode and inverse """
|
|
||||||
# these are the precomposed combining forms
|
|
||||||
count = 0
|
|
||||||
sans = ""
|
|
||||||
suni = u""
|
|
||||||
for acode in sorted(A._twobyte.keys()):
|
|
||||||
count += 1
|
|
||||||
sans += acode
|
|
||||||
suni += A._twobyte[acode]
|
|
||||||
if OUT & 2:
|
|
||||||
print "test1b: %d codes" % count
|
|
||||||
print " ansel:%s" % `sans`
|
|
||||||
print " utf-8:%s" % `suni.encode("utf-8")` # U8
|
|
||||||
sys.stdout.flush()
|
|
||||||
a2u = A.ansel_to_utf8(sans)
|
|
||||||
self.assertEquals(a2u,suni, msg(a2u,suni,"map twobyte ansel to unicode"))
|
|
||||||
u2a = A.utf8_to_ansel(suni)
|
|
||||||
self.assertEquals(u2a,sans, msg(u2a,sans, "invert twobyte to unicode mapping"))
|
|
||||||
|
|
||||||
class Test2(unittest.TestCase):
|
|
||||||
""" test unicode_to_ansel (basic precomposed forms) and inversion """
|
|
||||||
|
|
||||||
def test_2a(self):
|
|
||||||
""" 2a: unicode to ansel and inverse """
|
|
||||||
count = 0
|
|
||||||
sans = ""
|
|
||||||
suni = u""
|
|
||||||
for ucode in sorted(A._utoa.keys()):
|
|
||||||
count += 1
|
|
||||||
suni += ucode
|
|
||||||
sans += A._utoa[ucode]
|
|
||||||
if OUT & 4:
|
|
||||||
print "test2a: %d codes" % count
|
|
||||||
print " utf-8:%s" % `suni.encode("utf-8")` # U8
|
|
||||||
print " ansel:%s" % `sans`
|
|
||||||
sys.stdout.flush()
|
|
||||||
u2a = A.utf8_to_ansel(suni)
|
|
||||||
self.assertEquals(u2a,sans, msg(u2a,sans, "map unicode to ansel"))
|
|
||||||
a2u = A.ansel_to_utf8(sans)
|
|
||||||
self.assertEquals(a2u,suni, msg(a2u,suni, "invert unicode to ansel mapping"))
|
|
||||||
|
|
||||||
class Test3(unittest.TestCase):
|
|
||||||
""" test pass-through for matches with ansel ascii-subset """
|
|
||||||
|
|
||||||
def test3a(self):
|
|
||||||
""" 3a: ansel to unicode for matches with ascii and inverse """
|
|
||||||
ascii_ok = "".join(A._use_ASCII)
|
|
||||||
ascii_uni = unicode(ascii_ok)
|
|
||||||
a2u = A.ansel_to_utf8(ascii_ok)
|
|
||||||
# could match with lengths wrong? can't hurt to test
|
|
||||||
la = len(ascii_ok)
|
|
||||||
la2u = len(a2u)
|
|
||||||
self.assertEquals(la2u, la, msg(la2u, la, "ascii subset ansel to unicode lengths match"))
|
|
||||||
self.assertEquals(a2u, ascii_uni,
|
|
||||||
msg(a2u, ascii_uni, "ascii subset ansel to unicode strings match"))
|
|
||||||
a2u2a = A.utf8_to_ansel(a2u)
|
|
||||||
self.assertEquals(a2u2a, ascii_ok,
|
|
||||||
msg(a2u2a, ascii_ok, "invert ascii subset ansel to unicode"))
|
|
||||||
|
|
||||||
def test3b(self):
|
|
||||||
""" 3b: (sample) non-matching ascii control chars map to space """
|
|
||||||
for x in [0,1,8,9,11,26,28,127]:
|
|
||||||
a2u = A.ansel_to_utf8(chr(x))
|
|
||||||
self.assertEquals(a2u, unicode(' '),
|
|
||||||
msg(a2u, unicode(' '), "map disallowed ASCII to unicode space"))
|
|
||||||
u2a = A.utf8_to_ansel(unichr(x))
|
|
||||||
self.assertEquals(u2a, ' ',
|
|
||||||
msg(u2a, ' ', "map unicode to space for disallowed ASCII"))
|
|
||||||
|
|
||||||
def test3c(self):
|
|
||||||
""" 3c: (sample) no-match ansel to unicode cases """
|
|
||||||
for x in [0x80,0x87,0x9F,0xFF]:
|
|
||||||
a2u = A.ansel_to_utf8(chr(x))
|
|
||||||
self.assertEquals(a2u, u'\ufffd',
|
|
||||||
msg(a2u, u'\ufffd', "ansel no-match should return unicode Replacement Char"))
|
|
||||||
|
|
||||||
def test3d(self):
|
|
||||||
""" 3d: (sample) no-match unicode to ansel cases """
|
|
||||||
for x in [1024,4096, 65535]:
|
|
||||||
u2a = A.utf8_to_ansel(unichr(x))
|
|
||||||
self.assertEquals(u2a, '?',
|
|
||||||
msg(u2a, '?', "unicode no-match should return question mark"))
|
|
||||||
|
|
||||||
class Test4(unittest.TestCase):
|
|
||||||
""" test some special cases """
|
|
||||||
|
|
||||||
def test4a(self):
|
|
||||||
""" 4a: empty strings should return empty strings """
|
|
||||||
self.assertEquals(A.ansel_to_utf8(""), u"", "empty a2u")
|
|
||||||
self.assertEquals(A.utf8_to_ansel(u""), "", "empty u2a")
|
|
||||||
|
|
||||||
def test4b_unmapped_combos(self):
|
|
||||||
""" 4b: (sample) unmapped (non-precomposed) combinations """
|
|
||||||
samples = (
|
|
||||||
# ansel, unicode, failure-report-message .. see function msg()
|
|
||||||
("b\xE5Ze", u"bZ\u0304e", "b Z+macron e"),
|
|
||||||
( "\xE5Z", u"Z\u0304", "Z+macron"),
|
|
||||||
("b\xE5Z\xE9Xe", u"bZ\u0304X\u030ce", "b Z+macron X+caron e"),
|
|
||||||
( "\xE5Z\xE9X", u"Z\u0304X\u030c", "Z+macron X+caron"),
|
|
||||||
)
|
|
||||||
for a,u,m in samples:
|
|
||||||
# ansel to unicode and inverse
|
|
||||||
a2u=A.ansel_to_utf8(a)
|
|
||||||
self.assertEquals(a2u, u, msg(a2u, u, m, "a2u"))
|
|
||||||
a2u2a = A.utf8_to_ansel(a2u)
|
|
||||||
self.assertEquals(a2u2a, a, msg(a2u2a, a, m, "a2u2a"))
|
|
||||||
|
|
||||||
# unicode to ansel and inverse
|
|
||||||
u2a = A.utf8_to_ansel(u)
|
|
||||||
self.assertEquals(u2a, a, msg(u2a, a, m, "u2a"))
|
|
||||||
u2a2u = A.ansel_to_utf8(u2a)
|
|
||||||
self.assertEquals(u2a2u, u, msg(u2a2u, u, m, "u2a2u"))
|
|
||||||
|
|
||||||
def test4c_multiple_combos(self):
|
|
||||||
""" 4c: (a2u) ignore multiple combinations (include precomposed) """
|
|
||||||
samples = (
|
|
||||||
("b\xF0\xE5Ze", u"bZ\u0304e", "b <cedilla> Z+macron e"),
|
|
||||||
( "\xF0\xE5Z", u"Z\u0304", "<cedilla> Z+macron"),
|
|
||||||
("\xF0\xE5Z\xE9X", u"Z\u0304X\u030c", "<cedilla> Z+macron X+caron"),
|
|
||||||
("\xE5Z\xF0\xE9X", u"Z\u0304X\u030c", "Z+macron <cedilla> X+caron"),
|
|
||||||
('\xF0\xE5A', u'\u0100', "<cedilla> A+macron"),
|
|
||||||
("\xE5Z\xE5\xF0\xE9X", u"Z\u0304X\u030c", "Z+macron <macron> <cedilla> X+caron"),
|
|
||||||
)
|
|
||||||
for a,u,m in samples:
|
|
||||||
a2u=A.ansel_to_utf8(a)
|
|
||||||
self.assertEquals(a2u, u, msg(a2u,u,m, "a2u drop extra <combiners>"))
|
|
||||||
|
|
||||||
def test4d_multiple_combos(self):
|
|
||||||
""" 4c: (u2a) ignore multiple combinations (include precomposed) """
|
|
||||||
samples = (
|
|
||||||
("b\xE5Ze", u"bZ\u0304\u0327e", "b Z+macron <cedilla> e"),
|
|
||||||
("\xE5Z\xE5A", u"Z\u0304\u0327\u0100", "Z+macron <cedilla> A+macron"),
|
|
||||||
("\xE5A\xE5Z", u"\u0100\u0327\u030cZ\u0304", "A+macron <cedilla> <caron> Z+macron"),
|
|
||||||
)
|
|
||||||
for a,u,m in samples:
|
|
||||||
u2a=A.utf8_to_ansel(u)
|
|
||||||
self.assertEquals(u2a, a, msg(u2a,a,m, "u2a drop extra <combiners>"))
|
|
||||||
|
|
||||||
class Test99(unittest.TestCase):
|
|
||||||
""" test regression cases """
|
|
||||||
|
|
||||||
def test_99a(self):
|
|
||||||
""" 99a: sanity check on counts """
|
|
||||||
n1B= len(A._onebyte)
|
|
||||||
n2B= len(A._twobyte)
|
|
||||||
na = n1B+n2B
|
|
||||||
nu = len(A._utoa)
|
|
||||||
self.assertEquals(na, nu, msg(na, nu, "basic counts: a2u=u2a"))
|
|
||||||
nac = len(A._acombiners)
|
|
||||||
nuc = len(A._ucombiners)
|
|
||||||
self.assertEquals(nac, nuc, msg(nac, nuc, "combiner counts: a2u=u2a"))
|
|
||||||
|
|
||||||
def test_99b(self):
|
|
||||||
""" 99b: fix incorrect mapping for ansel 0xAE
|
|
||||||
|
|
||||||
It used-to-be U+02be but was changed March 2005 to U+02bc
|
|
||||||
Note : the other revs per notes make double-wide combining
|
|
||||||
char halves into an ambiguous mess -- let's ignore that!
|
|
||||||
http://lcweb2.loc.gov/diglib/codetables/45.html
|
|
||||||
might as well add validation of other additions, though
|
|
||||||
"""
|
|
||||||
|
|
||||||
# (ansel, uni, msg)
|
|
||||||
revs = (
|
|
||||||
('\xAE', u'\u02bc', "modifier right-half ring is now modifier Apostrophe"),
|
|
||||||
('\xC7', u'\xdf', "added code for eszet"),
|
|
||||||
('\xC8', u'\u20ac', "added code for euro"),
|
|
||||||
)
|
|
||||||
for a, u, m in revs:
|
|
||||||
g = A.ansel_to_utf8(a)
|
|
||||||
self.assertEquals(g,u,
|
|
||||||
msg(g, u, m, "spec change"))
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
unittest.main()
|
|
||||||
|
|
||||||
#===eof===
|
|
@ -1,130 +0,0 @@
|
|||||||
"""unittest support utilities for reading gedcom
|
|
||||||
|
|
||||||
see gedread_test.py for sample usage
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
import os.path
|
|
||||||
import shutil
|
|
||||||
|
|
||||||
from test import test_util as tu
|
|
||||||
from GrampsDbUtils import _ReadGedcom as RG
|
|
||||||
import DbState
|
|
||||||
import gen.db
|
|
||||||
import config
|
|
||||||
|
|
||||||
# extraneous leading newlines do not seem to cause problems
|
|
||||||
# (and actually make it convenient reading the test files!)
|
|
||||||
# future: may need to remove such lines here if problems develop
|
|
||||||
|
|
||||||
# These ged-chunks provide/observe the following requirements
|
|
||||||
# - minimum required header elements
|
|
||||||
# - a trailer
|
|
||||||
# - and one record (spec minimum), using a SUBM
|
|
||||||
# Note: not all specified requirements seem strongly enforcced
|
|
||||||
# eg: at least one record, also nonexistent references seem
|
|
||||||
# ok by design, so the SUBM could have been missing
|
|
||||||
# Also note that the 'tail' containing the SUBM record referenced
|
|
||||||
# in the header causes a line of console output because we
|
|
||||||
# presently do not process SUBM records at all
|
|
||||||
# (seems like a bug to me -- to be dealt with later)
|
|
||||||
# ---------------------------------------------------------------
|
|
||||||
|
|
||||||
# _head is presently simply a header with minimum content
|
|
||||||
_head ="""
|
|
||||||
0 HEAD
|
|
||||||
1 SOUR test_gedread_System_ID
|
|
||||||
1 SUBM @SUBM1@
|
|
||||||
1 GEDC
|
|
||||||
2 VERS 5.5
|
|
||||||
2 FORM LINEAGE-LINKED
|
|
||||||
1 CHAR ASCII
|
|
||||||
"""
|
|
||||||
|
|
||||||
# _tail is presently a single (SUBM) record plus the trailer
|
|
||||||
# to satisfy the "one or more records" in the spec
|
|
||||||
# it also provides a target for the xref in the header
|
|
||||||
# it also gives a "skipping 1 subordinate.." message error
|
|
||||||
# which presumeably will be fixed someday
|
|
||||||
_tail = """
|
|
||||||
0 @SUBM1@ SUBM
|
|
||||||
1 NAME test /gedread/
|
|
||||||
0 TRLR
|
|
||||||
"""
|
|
||||||
|
|
||||||
def make_gedcom_input(gfile, fragment):
|
|
||||||
"""create gedcom file with 'fragment' between our head & tail
|
|
||||||
|
|
||||||
fragment would normally be 1 or more complete records
|
|
||||||
fragment could be an empty string ("")
|
|
||||||
|
|
||||||
"""
|
|
||||||
fh = open(gfile,"w")
|
|
||||||
for txt in (_head, fragment, _tail):
|
|
||||||
fh.write(txt)
|
|
||||||
fh.close()
|
|
||||||
|
|
||||||
|
|
||||||
# code patterned after contents of ReadGedcom.import2,
|
|
||||||
# but avoiding the occurrence of a popup DialogError.
|
|
||||||
# NOTE: may need rewrite to track mods in ReadGedcom
|
|
||||||
# test this code via src/test/test/gedread_util_test.py
|
|
||||||
# -------------------------------------------------------
|
|
||||||
def gread(db, fname):
|
|
||||||
"""read gedcom file into a test db
|
|
||||||
|
|
||||||
NB: test modules may want to consider also, the simplified
|
|
||||||
test logging (from test_util) which is especially helpful
|
|
||||||
for testing gedcom support
|
|
||||||
|
|
||||||
"""
|
|
||||||
cback = None
|
|
||||||
DEF_SRC = False
|
|
||||||
ifile = open(fname,"rU")
|
|
||||||
try:
|
|
||||||
try:
|
|
||||||
s1 = RG.StageOne(ifile)
|
|
||||||
s1.parse()
|
|
||||||
except Exception,e:
|
|
||||||
raise tu.TestError("stage1 error %r" % e)
|
|
||||||
|
|
||||||
useTrans = False
|
|
||||||
ifile.seek(0)
|
|
||||||
try:
|
|
||||||
gp = RG.GedcomParser(db, ifile, fname, cback, s1, DEF_SRC)
|
|
||||||
except Exception, e:
|
|
||||||
raise tu.TestError("parser init error %r" % e)
|
|
||||||
|
|
||||||
##ro = db.readonly
|
|
||||||
##db.readonly = False # why?
|
|
||||||
try:
|
|
||||||
gp.parse_gedcom_file(useTrans)
|
|
||||||
err = ""
|
|
||||||
except Exception, e:
|
|
||||||
raise tu.TestError("parse error %r" %e)
|
|
||||||
##db.readonly = ro
|
|
||||||
finally:
|
|
||||||
ifile.close()
|
|
||||||
|
|
||||||
|
|
||||||
# test db creation
|
|
||||||
#
|
|
||||||
# This may deserve it's own module, but for now it is only used here
|
|
||||||
#
|
|
||||||
# state doesn't seem to be necessary for testing
|
|
||||||
# let's try just returning the db
|
|
||||||
#----------------------------------------------------
|
|
||||||
def create_empty_db(dbpath):
|
|
||||||
"""create an empty db for the test caller"""
|
|
||||||
state = DbState.DbState()
|
|
||||||
dbclass = gen.db.dbdir.GrampsDBDir
|
|
||||||
state.change_database(dbclass())
|
|
||||||
# create empty db (files) via load()
|
|
||||||
cback = None
|
|
||||||
mode = "rw"
|
|
||||||
if os.path.isdir(dbpath):
|
|
||||||
shutil.rmtree(dbpath)
|
|
||||||
state.db.load(dbpath, cback, mode)
|
|
||||||
return state.db
|
|
||||||
|
|
||||||
#===eof===
|
|
@ -52,6 +52,8 @@ class Test(U.TestCase):
|
|||||||
np = db.get_number_of_people()
|
np = db.get_number_of_people()
|
||||||
self.assertEquals(np,1,
|
self.assertEquals(np,1,
|
||||||
tu.msg(np,1, "db has exactly one person"))
|
tu.msg(np,1, "db has exactly one person"))
|
||||||
|
db.close()
|
||||||
|
del tl
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
U.main()
|
U.main()
|
||||||
|
@ -181,6 +181,7 @@ class Test4(U.TestCase):
|
|||||||
nl = len(ll)
|
nl = len(ll)
|
||||||
self.assertEquals(nl,2,
|
self.assertEquals(nl,2,
|
||||||
tu.msg(nl,2, "pass %d: expected line count" % i))
|
tu.msg(nl,2, "pass %d: expected line count" % i))
|
||||||
|
del tl
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
Loading…
x
Reference in New Issue
Block a user