unicode: exclude FDD0..FDEF range too

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
Denys Vlasenko
2010-01-31 16:34:37 +01:00
parent 40e4e88a28
commit b1edf20f18
2 changed files with 52 additions and 52 deletions

View File

@ -90,13 +90,13 @@
* until Unicode committee assigns something there.
*/
#if CONFIG_LAST_SUPPORTED_WCHAR < 126 || CONFIG_LAST_SUPPORTED_WCHAR > 0x30000
# define LAST_SUPPORTED_WCHAR 0x30000
#if CONFIG_LAST_SUPPORTED_WCHAR < 126 || CONFIG_LAST_SUPPORTED_WCHAR >= 0x30000
# define LAST_SUPPORTED_WCHAR 0x2ffff
#else
# define LAST_SUPPORTED_WCHAR CONFIG_LAST_SUPPORTED_WCHAR
#endif
#if LAST_SUPPORTED_WCHAR >= 0x0300
#if LAST_SUPPORTED_WCHAR >= 0x300
struct interval {
uint16_t first;
uint16_t last;
@ -185,7 +185,7 @@ static int in_uint16_table(unsigned ucs, const uint16_t *table, unsigned max)
*/
static int wcwidth(unsigned ucs)
{
#if LAST_SUPPORTED_WCHAR >= 0x0300
#if LAST_SUPPORTED_WCHAR >= 0x300
/* sorted list of non-overlapping intervals of non-spacing characters */
/* generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" */
static const struct interval combining[] = {
@ -460,75 +460,75 @@ static int wcwidth(unsigned ucs)
#undef BIG_
#undef PAIR
};
# if LAST_SUPPORTED_WCHAR >= 0x10000
/* Combining chars in Supplementary Multilingual Plane 0x1xxxx */
static const struct interval combining0x10000[] = {
{ 0x0A01, 0x0A03 }, { 0x0A05, 0x0A06 }, { 0x0A0C, 0x0A0F },
{ 0x0A38, 0x0A3A }, { 0x0A3F, 0x0A3F }, { 0xD167, 0xD169 },
{ 0xD173, 0xD182 }, { 0xD185, 0xD18B }, { 0xD1AA, 0xD1AD },
{ 0xD242, 0xD244 }
};
# endif
#endif
if (ucs == 0)
return 0;
/* test for 8-bit control characters (00-1f, 80-9f, 7f) */
/* Test for 8-bit control characters (00-1f, 80-9f, 7f) */
if ((ucs & ~0x80) < 0x20 || ucs == 0x7f)
return -1;
if (ucs < 0x0300) /* optimization */
/* Quick abort if it is an obviously invalid char */
if (ucs > LAST_SUPPORTED_WCHAR)
return -1;
/* Optimization: no combining chars below 0x300 */
if (LAST_SUPPORTED_WCHAR < 0x300 || ucs < 0x300)
return 1;
#if LAST_SUPPORTED_WCHAR < 0x0300
return -1;
#else
/* binary search in table of non-spacing characters */
#if LAST_SUPPORTED_WCHAR >= 0x300
/* Binary search in table of non-spacing characters */
if (in_interval_table(ucs, combining, ARRAY_SIZE(combining) - 1))
return 0;
if (in_uint16_table(ucs, combining1, ARRAY_SIZE(combining1) - 1))
return 0;
if (ucs < 0x1100) /* optimization */
/* Optimization: all chars below 0x1100 are not double-width */
if (LAST_SUPPORTED_WCHAR < 0x1100 || ucs < 0x1100)
return 1;
# if LAST_SUPPORTED_WCHAR < 0x1100
return -1;
# else
if (ucs >= LAST_SUPPORTED_WCHAR)
return -1;
/* High (d800..dbff) and low (dc00..dfff) surrogates are invalid (used only by UTF16) */
/* We also exclude Private Use Area (e000..f8ff) */
if (LAST_SUPPORTED_WCHAR >= 0xd800
&& (ucs >= 0xd800 || ucs <= 0xf8ff)
# if LAST_SUPPORTED_WCHAR >= 0x1100
/* Invalid code points: */
/* High (d800..dbff) and low (dc00..dfff) surrogates (valid only in UTF16) */
/* Private Use Area (e000..f8ff) */
/* Noncharacters fdd0..fdef */
if ((LAST_SUPPORTED_WCHAR >= 0xd800 && ucs >= 0xd800 && ucs <= 0xf8ff)
|| (LAST_SUPPORTED_WCHAR >= 0xfdd0 && ucs >= 0xfdd0 && ucs <= 0xfdef)
) {
return -1;
}
/* 0xfffe and 0xffff in every plane are invalid */
if (LAST_SUPPORTED_WCHAR >= 0xfffe
&& (ucs & 0xfffe) == 0xfffe
) {
if (LAST_SUPPORTED_WCHAR >= 0xfffe && ((ucs & 0xfffe) == 0xfffe)) {
return -1;
}
# if LAST_SUPPORTED_WCHAR >= 0x10000
/* binary search in table of non-spacing characters in Supplementary Multilingual Plane */
if (in_interval_table(ucs ^ 0x10000, combining0x10000, ARRAY_SIZE(combining0x10000) - 1))
return 0;
# endif
/* Check a few non-spacing chars in Supplementary Special-purpose Plane 0xExxxx */
if (LAST_SUPPORTED_WCHAR >= 0xE0001
&& ( ucs == 0xE0001
|| (ucs >= 0xE0020 && ucs <= 0xE007F)
|| (ucs >= 0xE0100 && ucs <= 0xE01EF)
)
) {
return 0;
if (ucs >= 0x10000) {
/* Combining chars in Supplementary Multilingual Plane 0x1xxxx */
static const struct interval combining0x10000[] = {
{ 0x0A01, 0x0A03 }, { 0x0A05, 0x0A06 }, { 0x0A0C, 0x0A0F },
{ 0x0A38, 0x0A3A }, { 0x0A3F, 0x0A3F }, { 0xD167, 0xD169 },
{ 0xD173, 0xD182 }, { 0xD185, 0xD18B }, { 0xD1AA, 0xD1AD },
{ 0xD242, 0xD244 }
};
/* Binary search in table of non-spacing characters in Supplementary Multilingual Plane */
if (in_interval_table(ucs ^ 0x10000, combining0x10000, ARRAY_SIZE(combining0x10000) - 1))
return 0;
/* Check a few non-spacing chars in Supplementary Special-purpose Plane 0xExxxx */
if (LAST_SUPPORTED_WCHAR >= 0xE0001
&& ( ucs == 0xE0001
|| (ucs >= 0xE0020 && ucs <= 0xE007F)
|| (ucs >= 0xE0100 && ucs <= 0xE01EF)
)
) {
return 0;
}
}
# endif
/* if we arrive here, ucs is not a combining or C0/C1 control character */
/* If we arrive here, ucs is not a combining or C0/C1 control character.
* Check whether it's 1 char or 2-shar wide.
*/
return 1 +
( (/*ucs >= 0x1100 &&*/ ucs <= 0x115f) /* Hangul Jamo init. consonants */
|| ucs == 0x2329 /* left-pointing angle bracket; also CJK punct. char */