libbb: better unicode width support. Hopefully fixes bug 839.

Also opens up a possibility to make other unicode stuff smaller
and more correct later. but:

function                                             old     new   delta
static.combining                                       -     516    +516
bb_wcwidth                                             -     328    +328
unicode_cut_nchars                                     -     141    +141
mbstowc_internal                                       -      93     +93
in_table                                               -      78     +78
cal_main                                             899     961     +62
static.combining0x10000                                -      40     +40
unicode_strlen                                         -      31     +31
bb_mbstrlen                                           31       -     -31
bb_mbstowcs                                          173     102     -71
------------------------------------------------------------------------------
(add/remove: 7/1 grow/shrink: 1/1 up/down: 1289/-102)        Total: 1187 bytes

Uses code of Markus Kuhn, which is in public domain:
http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
"Permission to use, copy, modify, and distribute this software
 for any purpose and without fee is hereby granted. The author
 disclaims all warranties with regard to this software."

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
Denys Vlasenko
2010-01-24 07:44:03 +01:00
parent 5da9f96ad8
commit 9f93d62192
10 changed files with 410 additions and 98 deletions

View File

@@ -16,8 +16,8 @@
*
* Major size reduction... over 50% (>1.5k) on i386.
*/
#include "libbb.h"
#include "unicode.h"
/* We often use "unsigned" intead of "int", it's easier to div on most CPUs */
@@ -83,9 +83,16 @@ int cal_main(int argc UNUSED_PARAM, char **argv)
time_t now;
unsigned month, year, flags, i;
char *month_names[12];
char day_headings[28]; /* 28 for julian, 21 for nonjulian */
/* normal heading: */
/* "Su Mo Tu We Th Fr Sa" */
/* -j heading: */
/* " Su Mo Tu We Th Fr Sa" */
char day_headings[ENABLE_FEATURE_ASSUME_UNICODE ? 28 * 6 : 28];
IF_FEATURE_ASSUME_UNICODE(char *hp = day_headings;)
char buf[40];
init_unicode();
flags = getopt32(argv, "jy");
/* This sets julian = flags & 1: */
option_mask32 &= 1;
@@ -122,15 +129,24 @@ int cal_main(int argc UNUSED_PARAM, char **argv)
if (i < 7) {
zero_tm.tm_wday = i;
//FIXME: unicode
//Bug 839:
//testcase with doublewidth Japanese chars: "LANG=zh_TW.utf8 cal"
//perhaps use wc[s]width() to probe terminal width
/* abbreviated weekday name according to locale */
strftime(buf, sizeof(buf), "%a", &zero_tm);
#if ENABLE_FEATURE_ASSUME_UNICODE
if (julian)
*hp++ = ' ';
{
char *two_wchars = unicode_cut_nchars(2, buf);
strcpy(hp, two_wchars);
free(two_wchars);
}
hp += strlen(hp);
*hp++ = ' ';
#else
strncpy(day_headings + i * (3+julian) + julian, buf, 2);
#endif
}
} while (++i < 12);
IF_FEATURE_ASSUME_UNICODE(hp[-1] = '\0';)
if (month) {
unsigned row, len, days[MAXDAYS];

View File

@@ -178,7 +178,7 @@ int df_main(int argc UNUSED_PARAM, char **argv)
#endif
#if ENABLE_FEATURE_ASSUME_UNICODE
dev_len = bb_mbstrlen(device);
dev_len = unicode_strlen(device);
if (dev_len > 20) {
printf("%s\n%20s", device, "");
} else {

View File

@@ -49,7 +49,7 @@ static void expand(FILE *file, unsigned tab_size, unsigned opt)
unsigned len;
*ptr = '\0';
# if ENABLE_FEATURE_ASSUME_UNICODE
len = bb_mbstrlen(ptr_strbeg);
len = unicode_strlen(ptr_strbeg);
# else
len = ptr - ptr_strbeg;
# endif
@@ -105,7 +105,7 @@ static void unexpand(FILE *file, unsigned tab_size, unsigned opt)
char c;
c = ptr[n];
ptr[n] = '\0';
len = bb_mbstrlen(ptr);
len = unicode_strlen(ptr);
ptr[n] = c;
}
# else

View File

@@ -550,7 +550,7 @@ static void showfiles(struct dnode **dn, unsigned nfiles)
} else {
/* find the longest file name, use that as the column width */
for (i = 0; dn[i]; i++) {
int len = bb_mbstrlen(dn[i]->name);
int len = unicode_strlen(dn[i]->name);
if (column_width < len)
column_width = len;
}
@@ -742,7 +742,7 @@ static int print_name(const char *name)
{
if (option_mask32 & OPT_Q) {
#if ENABLE_FEATURE_ASSUME_UNICODE
unsigned len = 2 + bb_mbstrlen(name);
unsigned len = 2 + unicode_strlen(name);
#else
unsigned len = 2;
#endif
@@ -762,7 +762,7 @@ static int print_name(const char *name)
/* No -Q: */
#if ENABLE_FEATURE_ASSUME_UNICODE
fputs(name, stdout);
return bb_mbstrlen(name);
return unicode_strlen(name);
#else
return printf("%s", name);
#endif