From 0d386260b4b007e65172d137924db6e4e4e994cd Mon Sep 17 00:00:00 2001 From: Jim Warner Date: Thu, 31 Dec 2020 00:00:00 -0600 Subject: [PATCH] library: let's make 'escaped_copy' multibyte aware too This patch is intended to bring the master branch into closer alignment with the newlib branch functionality. The commit shown below replaced that 'escape_str' with 'escaped_copy' so that the ps program wouldn't execute some code twice. However, it created a problem for the top program when the UTF-8 encoding was not available. Unlike ps who calls the escape functions directly, top outputs those library provided strings unaltered. That means there is no workaround (like LANG=) when such an encoding was unavailable. This, in turn, can lead to a corrupted display (especially with the linux console). Now, when a UTF-8 encoding is not available, multibyte chars are converted to '?', reducing display problems. [ there's still a potential problem concerning 'cmd' ] [ should program names contain multibyte characters. ] [ unlike that newlib branch, this string is not ever ] [ altered with the '?' char under the master branch. ] Reference(s); . Dec 2020, escaoed_copy repleced most escape_str commit eea5626bb144328eb60e9ea8a29bd5b1f100ecd9 Signed-off-by: Jim Warner --- proc/escape.c | 48 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/proc/escape.c b/proc/escape.c index 880dfd95..2e8fb7dd 100644 --- a/proc/escape.c +++ b/proc/escape.c @@ -41,7 +41,20 @@ if ((cells) <= 0) return 0; \ } while (0) +static const unsigned char ESC_tab[] = { + "@..............................." + "||||||||||||||||||||||||||||||||" + "||||||||||||||||||||||||||||||||" + "|||||||||||||||||||||||||||||||." + "????????????????????????????????" + "????????????????????????????????" + "????????????????????????????????" + "????????????????????????????????" +}; + + #if (__GNU_LIBRARY__ >= 6) && (!defined(__UCLIBC__) || defined(__UCLIBC_HAS_WCHAR__)) + static int escape_str_utf8(char *restrict dst, const char *restrict src, int bufsize, int *maxcells){ int my_cells = 0; int my_bytes = 0; @@ -105,23 +118,14 @@ static int escape_str_utf8(char *restrict dst, const char *restrict src, int buf *maxcells -= my_cells; return my_bytes; // bytes of text, excluding the NUL } - #endif /* __GNU_LIBRARY__ */ + /* sanitize a string via one-way mangle */ int escape_str(char *restrict dst, const char *restrict src, int bufsize, int *maxcells){ unsigned char c; int my_cells = 0; int my_bytes = 0; - const char codes[] = - "Z..............................." - "||||||||||||||||||||||||||||||||" - "||||||||||||||||||||||||||||||||" - "|||||||||||||||||||||||||||||||." - "????????????????????????????????" - "????????????????????????????????" - "????????????????????????????????" - "????????????????????????????????"; #if (__GNU_LIBRARY__ >= 6) && (!defined(__UCLIBC__) || defined(__UCLIBC_HAS_WCHAR__)) static int utf_init=0; @@ -145,7 +149,7 @@ int escape_str(char *restrict dst, const char *restrict src, int bufsize, int *m break; c = (unsigned char) *(src++); if(!c) break; - if(codes[c]!='|') c=codes[c]; + if(ESC_tab[c]!='|') c=ESC_tab[c]; my_cells++; my_bytes++; *(dst++) = c; @@ -225,8 +229,14 @@ int escape_command(char *restrict const outbuf, const proc_t *restrict const pp, // copy a string, but 'escape' any control characters // using the traditional escape.h calling conventions int escaped_copy(char *restrict dst, const char *restrict src, int bufsize, int *maxroom){ + static int utf_sw = 0; int i, n; + char c; + if(utf_sw == 0){ + char *enc = nl_langinfo(CODESET); + utf_sw = enc && strcasecmp(enc, "UTF-8")==0 ? 1 : -1; + } SECURE_ESCAPE_ARGS(dst, bufsize, *maxroom); if (bufsize > *maxroom+1) bufsize = *maxroom+1; @@ -237,11 +247,17 @@ int escaped_copy(char *restrict dst, const char *restrict src, int bufsize, int } if (n >= bufsize) n = bufsize-1; - // control chars, especially tabs, create alignment problems for ps & top ... - for (i = 0; i < n; i++) - if ((unsigned char)dst[i] < 0x20 || dst[i] == 0x7f) - dst[i] = '?'; - + if (utf_sw < 0) { + // if bad locale, replace the non-printing stuff + for (i = 0; i < n; i++) + if ((c = ESC_tab[(unsigned char)dst[i]]) != '|') + dst[i] = c; + } else { + // or eliminate those non-printing control chars + for (i = 0; i < n; i++) + if ((unsigned char)dst[i] < 0x20 || dst[i] == 0x7f) + dst[i] = '?'; + } *maxroom -= n; return n; }