further work on unicodization
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
45
TODO_unicode
Normal file
45
TODO_unicode
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
Already fixed applets:
|
||||||
|
cal
|
||||||
|
lsmod
|
||||||
|
df
|
||||||
|
dumpleases
|
||||||
|
|
||||||
|
Applets which may need unicode handling (more extensive than sanitizing
|
||||||
|
of filenames in error messages):
|
||||||
|
|
||||||
|
ls - uses unicode_strlen, not scrlen
|
||||||
|
expand, unexpand - uses unicode_strlen, not scrlen
|
||||||
|
ash, hush through lineedit - uses unicode_strlen, not scrlen
|
||||||
|
top - need to sanitize process args
|
||||||
|
ps - need to sanitize process args
|
||||||
|
less
|
||||||
|
more
|
||||||
|
vi
|
||||||
|
ed
|
||||||
|
cut
|
||||||
|
awk
|
||||||
|
sed
|
||||||
|
tr
|
||||||
|
grep egrep fgrep
|
||||||
|
fold
|
||||||
|
sort
|
||||||
|
head, tail
|
||||||
|
catv - "display nonprinting chars" - what this could mean for unicode?
|
||||||
|
wc
|
||||||
|
chat
|
||||||
|
dumpkmap
|
||||||
|
last - just line up columns
|
||||||
|
man
|
||||||
|
microcom
|
||||||
|
strings
|
||||||
|
watch
|
||||||
|
|
||||||
|
Unsure, may need fixing:
|
||||||
|
|
||||||
|
hostname - do we really want to protect against bad chars in it?
|
||||||
|
patch
|
||||||
|
addgroup, adduser, delgroup, deluser
|
||||||
|
telnet
|
||||||
|
telnetd
|
||||||
|
od
|
||||||
|
printf
|
@ -135,7 +135,7 @@ int cal_main(int argc UNUSED_PARAM, char **argv)
|
|||||||
if (julian)
|
if (julian)
|
||||||
*hp++ = ' ';
|
*hp++ = ' ';
|
||||||
{
|
{
|
||||||
char *two_wchars = unicode_cut_nchars(2, buf);
|
char *two_wchars = unicode_conv_to_printable_fixedwidth(NULL, buf, 2);
|
||||||
strcpy(hp, two_wchars);
|
strcpy(hp, two_wchars);
|
||||||
free(two_wchars);
|
free(two_wchars);
|
||||||
}
|
}
|
||||||
|
@ -114,9 +114,6 @@ int df_main(int argc UNUSED_PARAM, char **argv)
|
|||||||
while (1) {
|
while (1) {
|
||||||
const char *device;
|
const char *device;
|
||||||
const char *mount_point;
|
const char *mount_point;
|
||||||
#if ENABLE_FEATURE_ASSUME_UNICODE
|
|
||||||
size_t dev_len;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (mount_table) {
|
if (mount_table) {
|
||||||
mount_entry = getmntent(mount_table);
|
mount_entry = getmntent(mount_table);
|
||||||
@ -178,11 +175,15 @@ int df_main(int argc UNUSED_PARAM, char **argv)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if ENABLE_FEATURE_ASSUME_UNICODE
|
#if ENABLE_FEATURE_ASSUME_UNICODE
|
||||||
dev_len = unicode_strlen(device);
|
{
|
||||||
if (dev_len > 20) {
|
uni_stat_t uni_stat;
|
||||||
printf("%s\n%20s", device, "");
|
char *uni_dev = unicode_conv_to_printable(&uni_stat, device);
|
||||||
} else {
|
if (uni_stat.unicode_width > 20) {
|
||||||
printf("%s%*s", device, 20 - (int)dev_len, "");
|
printf("%s\n%20s", uni_dev, "");
|
||||||
|
} else {
|
||||||
|
printf("%s%*s", uni_dev, 20 - (int)uni_stat.unicode_width, "");
|
||||||
|
}
|
||||||
|
free(uni_dev);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
if (printf("\n%-20s" + 1, device) > 20)
|
if (printf("\n%-20s" + 1, device) > 20)
|
||||||
|
@ -14,15 +14,25 @@ enum {
|
|||||||
#if !ENABLE_FEATURE_ASSUME_UNICODE
|
#if !ENABLE_FEATURE_ASSUME_UNICODE
|
||||||
|
|
||||||
# define unicode_strlen(string) strlen(string)
|
# define unicode_strlen(string) strlen(string)
|
||||||
# define unicode_scrlen(string) TODO
|
|
||||||
# define unicode_status UNICODE_OFF
|
# define unicode_status UNICODE_OFF
|
||||||
# define init_unicode() ((void)0)
|
# define init_unicode() ((void)0)
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
size_t FAST_FUNC unicode_strlen(const char *string);
|
size_t FAST_FUNC unicode_strlen(const char *string);
|
||||||
char* FAST_FUNC unicode_cut_nchars(unsigned width, const char *src);
|
enum {
|
||||||
unsigned FAST_FUNC unicode_padding_to_width(unsigned width, const char *src);
|
UNI_FLAG_PAD = (1 << 0),
|
||||||
|
};
|
||||||
|
typedef struct uni_stat_t {
|
||||||
|
unsigned byte_count;
|
||||||
|
unsigned unicode_count;
|
||||||
|
unsigned unicode_width;
|
||||||
|
} uni_stat_t;
|
||||||
|
//UNUSED: unsigned FAST_FUNC unicode_padding_to_width(unsigned width, const char *src);
|
||||||
|
//UNUSED: char* FAST_FUNC unicode_conv_to_printable2(uni_stat_t *stats, const char *src, unsigned width, int flags);
|
||||||
|
char* FAST_FUNC unicode_conv_to_printable(uni_stat_t *stats, const char *src);
|
||||||
|
char* FAST_FUNC unicode_conv_to_printable_maxwidth(uni_stat_t *stats, const char *src, unsigned maxwidth);
|
||||||
|
char* FAST_FUNC unicode_conv_to_printable_fixedwidth(uni_stat_t *stats, const char *src, unsigned width);
|
||||||
|
|
||||||
# if ENABLE_LOCALE_SUPPORT
|
# if ENABLE_LOCALE_SUPPORT
|
||||||
|
|
||||||
|
@ -246,29 +246,45 @@ size_t FAST_FUNC unicode_strlen(const char *string)
|
|||||||
return width;
|
return width;
|
||||||
}
|
}
|
||||||
|
|
||||||
char* FAST_FUNC unicode_cut_nchars(unsigned width, const char *src)
|
static char* FAST_FUNC unicode_conv_to_printable2(uni_stat_t *stats, const char *src, unsigned width, int flags)
|
||||||
{
|
{
|
||||||
char *dst;
|
char *dst;
|
||||||
unsigned dst_len;
|
unsigned dst_len;
|
||||||
|
unsigned uni_count;
|
||||||
|
unsigned uni_width;
|
||||||
|
|
||||||
if (unicode_status != UNICODE_ON) {
|
if (unicode_status != UNICODE_ON) {
|
||||||
char *d = dst = xmalloc(width + 1);
|
char *d;
|
||||||
while ((int)--width >= 0) {
|
if (flags & UNI_FLAG_PAD) {
|
||||||
unsigned char c = *src;
|
d = dst = xmalloc(width + 1);
|
||||||
if (c == '\0') {
|
while ((int)--width >= 0) {
|
||||||
do
|
unsigned char c = *src;
|
||||||
*d++ = ' ';
|
if (c == '\0') {
|
||||||
while ((int)--width >= 0);
|
do
|
||||||
break;
|
*d++ = ' ';
|
||||||
|
while ((int)--width >= 0);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
*d++ = (c >= ' ' && c < 0x7f) ? c : '?';
|
||||||
|
src++;
|
||||||
|
}
|
||||||
|
*d = '\0';
|
||||||
|
} else {
|
||||||
|
d = dst = xstrndup(src, width);
|
||||||
|
while (*d) {
|
||||||
|
unsigned char c = *d;
|
||||||
|
if (c < ' ' || c >= 0x7f)
|
||||||
|
*d = '?';
|
||||||
|
d++;
|
||||||
}
|
}
|
||||||
*d++ = (c >= ' ' && c < 0x7f) ? c : '?';
|
|
||||||
src++;
|
|
||||||
}
|
}
|
||||||
*d = '\0';
|
if (stats)
|
||||||
|
stats->byte_count = stats->unicode_count = (d - dst);
|
||||||
return dst;
|
return dst;
|
||||||
}
|
}
|
||||||
|
|
||||||
dst = NULL;
|
dst = NULL;
|
||||||
|
uni_count = uni_width = 0;
|
||||||
dst_len = 0;
|
dst_len = 0;
|
||||||
while (1) {
|
while (1) {
|
||||||
int w;
|
int w;
|
||||||
@ -301,7 +317,7 @@ char* FAST_FUNC unicode_cut_nchars(unsigned width, const char *src)
|
|||||||
/* src = NULL: invalid sequence is seen,
|
/* src = NULL: invalid sequence is seen,
|
||||||
* else: wc is set, src is advanced to next mb char
|
* else: wc is set, src is advanced to next mb char
|
||||||
*/
|
*/
|
||||||
if (src1) {/* no error */
|
if (src1) { /* no error */
|
||||||
if (wc == 0) /* end-of-string */
|
if (wc == 0) /* end-of-string */
|
||||||
break;
|
break;
|
||||||
src = src1;
|
src = src1;
|
||||||
@ -315,8 +331,8 @@ char* FAST_FUNC unicode_cut_nchars(unsigned width, const char *src)
|
|||||||
goto subst;
|
goto subst;
|
||||||
w = wcwidth(wc);
|
w = wcwidth(wc);
|
||||||
if ((ENABLE_UNICODE_COMBINING_WCHARS && w < 0) /* non-printable wchar */
|
if ((ENABLE_UNICODE_COMBINING_WCHARS && w < 0) /* non-printable wchar */
|
||||||
|| (!ENABLE_UNICODE_COMBINING_WCHARS && wc <= 0)
|
|| (!ENABLE_UNICODE_COMBINING_WCHARS && w <= 0)
|
||||||
|| (!ENABLE_UNICODE_WIDE_WCHARS && wc > 1)
|
|| (!ENABLE_UNICODE_WIDE_WCHARS && w > 1)
|
||||||
) {
|
) {
|
||||||
subst:
|
subst:
|
||||||
wc = CONFIG_SUBST_WCHAR;
|
wc = CONFIG_SUBST_WCHAR;
|
||||||
@ -331,6 +347,8 @@ char* FAST_FUNC unicode_cut_nchars(unsigned width, const char *src)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uni_count++;
|
||||||
|
uni_width += w;
|
||||||
dst = xrealloc(dst, dst_len + MB_CUR_MAX);
|
dst = xrealloc(dst, dst_len + MB_CUR_MAX);
|
||||||
#if ENABLE_LOCALE_SUPPORT
|
#if ENABLE_LOCALE_SUPPORT
|
||||||
{
|
{
|
||||||
@ -343,15 +361,37 @@ char* FAST_FUNC unicode_cut_nchars(unsigned width, const char *src)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Pad to remaining width */
|
/* Pad to remaining width */
|
||||||
dst = xrealloc(dst, dst_len + width + 1);
|
if (flags & UNI_FLAG_PAD) {
|
||||||
while ((int)--width >= 0) {
|
dst = xrealloc(dst, dst_len + width + 1);
|
||||||
dst[dst_len++] = ' ';
|
uni_count += width;
|
||||||
|
uni_width += width;
|
||||||
|
while ((int)--width >= 0) {
|
||||||
|
dst[dst_len++] = ' ';
|
||||||
|
}
|
||||||
}
|
}
|
||||||
dst[dst_len] = '\0';
|
dst[dst_len] = '\0';
|
||||||
|
if (stats) {
|
||||||
|
stats->byte_count = dst_len;
|
||||||
|
stats->unicode_count = uni_count;
|
||||||
|
stats->unicode_width = uni_width;
|
||||||
|
}
|
||||||
|
|
||||||
return dst;
|
return dst;
|
||||||
}
|
}
|
||||||
|
char* FAST_FUNC unicode_conv_to_printable(uni_stat_t *stats, const char *src)
|
||||||
|
{
|
||||||
|
return unicode_conv_to_printable2(stats, src, INT_MAX, 0);
|
||||||
|
}
|
||||||
|
char* FAST_FUNC unicode_conv_to_printable_maxwidth(uni_stat_t *stats, const char *src, unsigned maxwidth)
|
||||||
|
{
|
||||||
|
return unicode_conv_to_printable2(stats, src, maxwidth, 0);
|
||||||
|
}
|
||||||
|
char* FAST_FUNC unicode_conv_to_printable_fixedwidth(uni_stat_t *stats, const char *src, unsigned width)
|
||||||
|
{
|
||||||
|
return unicode_conv_to_printable2(stats, src, width, UNI_FLAG_PAD);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef UNUSED
|
||||||
unsigned FAST_FUNC unicode_padding_to_width(unsigned width, const char *src)
|
unsigned FAST_FUNC unicode_padding_to_width(unsigned width, const char *src)
|
||||||
{
|
{
|
||||||
if (unicode_status != UNICODE_ON) {
|
if (unicode_status != UNICODE_ON) {
|
||||||
@ -382,3 +422,4 @@ unsigned FAST_FUNC unicode_padding_to_width(unsigned width, const char *src)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
@ -46,9 +46,6 @@ int lsmod_main(int argc UNUSED_PARAM, char **argv UNUSED_PARAM)
|
|||||||
#if ENABLE_FEATURE_LSMOD_PRETTY_2_6_OUTPUT
|
#if ENABLE_FEATURE_LSMOD_PRETTY_2_6_OUTPUT
|
||||||
char *token[4];
|
char *token[4];
|
||||||
parser_t *parser = config_open("/proc/modules");
|
parser_t *parser = config_open("/proc/modules");
|
||||||
# if ENABLE_FEATURE_ASSUME_UNICODE
|
|
||||||
size_t name_len;
|
|
||||||
# endif
|
|
||||||
init_unicode();
|
init_unicode();
|
||||||
|
|
||||||
printf("%-24sSize Used by", "Module");
|
printf("%-24sSize Used by", "Module");
|
||||||
@ -64,9 +61,13 @@ int lsmod_main(int argc UNUSED_PARAM, char **argv UNUSED_PARAM)
|
|||||||
} else
|
} else
|
||||||
token[3] = (char *) "";
|
token[3] = (char *) "";
|
||||||
# if ENABLE_FEATURE_ASSUME_UNICODE
|
# if ENABLE_FEATURE_ASSUME_UNICODE
|
||||||
name_len = unicode_strlen(token[0]);
|
{
|
||||||
name_len = (name_len > 19) ? 0 : 19 - name_len;
|
uni_stat_t uni_stat;
|
||||||
printf("%s%*s %8s %2s %s\n", token[0], name_len, "", token[1], token[2], token[3]);
|
char *uni_name = unicode_conv_to_printable(&uni_stat, token[0]);
|
||||||
|
unsigned pad_len = (uni_stat.unicode_width > 19) ? 0 : 19 - uni_stat.unicode_width;
|
||||||
|
printf("%s%*s %8s %2s %s\n", uni_name, pad_len, "", token[1], token[2], token[3]);
|
||||||
|
free(uni_name);
|
||||||
|
}
|
||||||
# else
|
# else
|
||||||
printf("%-19s %8s %2s %s\n", token[0], token[1], token[2], token[3]);
|
printf("%-19s %8s %2s %s\n", token[0], token[1], token[2], token[3]);
|
||||||
# endif
|
# endif
|
||||||
@ -78,9 +79,13 @@ int lsmod_main(int argc UNUSED_PARAM, char **argv UNUSED_PARAM)
|
|||||||
// so trimming the trailing char is just what we need!
|
// so trimming the trailing char is just what we need!
|
||||||
token[3][strlen(token[3])-1] = '\0';
|
token[3][strlen(token[3])-1] = '\0';
|
||||||
# if ENABLE_FEATURE_ASSUME_UNICODE
|
# if ENABLE_FEATURE_ASSUME_UNICODE
|
||||||
name_len = unicode_strlen(token[0]);
|
{
|
||||||
name_len = (name_len > 19) ? 0 : 19 - name_len;
|
uni_stat_t uni_stat;
|
||||||
printf("%s%*s %8s %2s %s\n", token[0], name_len, "", token[1], token[2], token[3]);
|
char *uni_name = unicode_conv_to_printable(&uni_stat, token[0]);
|
||||||
|
unsigned pad_len = (uni_stat.unicode_width > 19) ? 0 : 19 - uni_stat.unicode_width;
|
||||||
|
printf("%s%*s %8s %2s %s\n", uni_name, pad_len, "", token[1], token[2], token[3]);
|
||||||
|
free(uni_name);
|
||||||
|
}
|
||||||
# else
|
# else
|
||||||
printf("%-19s %8s %2s %s\n", token[0], token[1], token[2], token[3]);
|
printf("%-19s %8s %2s %s\n", token[0], token[1], token[2], token[3]);
|
||||||
# endif
|
# endif
|
||||||
|
@ -71,8 +71,11 @@ int dumpleases_main(int argc UNUSED_PARAM, char **argv)
|
|||||||
/* actually, 15+1 and 19+1, +1 is a space between columns */
|
/* actually, 15+1 and 19+1, +1 is a space between columns */
|
||||||
/* lease.hostname is char[20] and is always NUL terminated */
|
/* lease.hostname is char[20] and is always NUL terminated */
|
||||||
#if ENABLE_FEATURE_ASSUME_UNICODE
|
#if ENABLE_FEATURE_ASSUME_UNICODE
|
||||||
printf(" %-16s%s%*s", inet_ntoa(addr), lease.hostname,
|
{
|
||||||
20 - (int)unicode_strlen(lease.hostname), "");
|
char *uni_name = unicode_conv_to_printable_fixedwidth(NULL, lease.hostname, 20);
|
||||||
|
printf(" %-16s%s", inet_ntoa(addr), uni_name);
|
||||||
|
free(uni_name);
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
printf(" %-16s%-20s", inet_ntoa(addr), lease.hostname);
|
printf(" %-16s%-20s", inet_ntoa(addr), lease.hostname);
|
||||||
#endif
|
#endif
|
||||||
|
Reference in New Issue
Block a user