library: let's make 'escaped_copy' multibyte aware too

This patch is intended to bring the master branch into
closer alignment with the newlib branch functionality.

The commit shown below replaced that 'escape_str' with
'escaped_copy' so that the ps program wouldn't execute
some code twice. However, it created a problem for the
top program when the UTF-8 encoding was not available.

Unlike ps who calls the escape functions directly, top
outputs those library provided strings unaltered. That
means there is no workaround (like LANG=) when such an
encoding was unavailable. This, in turn, can lead to a
corrupted display (especially with the linux console).

Now, when a UTF-8 encoding is not available, multibyte
chars are converted to '?', reducing display problems.

[ there's still a potential problem concerning 'cmd' ]
[ should program names contain multibyte characters. ]
[ unlike that newlib branch, this string is not ever ]
[ altered with the '?' char under the master branch. ]

Reference(s);
. Dec 2020, escaoed_copy repleced most escape_str
commit eea5626bb1

Signed-off-by: Jim Warner <james.warner@comcast.net>
This commit is contained in:
Jim Warner 2020-12-31 00:00:00 -06:00 committed by Craig Small
parent eea5626bb1
commit 0d386260b4

View File

@ -41,7 +41,20 @@
if ((cells) <= 0) return 0; \
} while (0)
static const unsigned char ESC_tab[] = {
"@..............................."
"||||||||||||||||||||||||||||||||"
"||||||||||||||||||||||||||||||||"
"|||||||||||||||||||||||||||||||."
"????????????????????????????????"
"????????????????????????????????"
"????????????????????????????????"
"????????????????????????????????"
};
#if (__GNU_LIBRARY__ >= 6) && (!defined(__UCLIBC__) || defined(__UCLIBC_HAS_WCHAR__))
static int escape_str_utf8(char *restrict dst, const char *restrict src, int bufsize, int *maxcells){
int my_cells = 0;
int my_bytes = 0;
@ -105,23 +118,14 @@ static int escape_str_utf8(char *restrict dst, const char *restrict src, int buf
*maxcells -= my_cells;
return my_bytes; // bytes of text, excluding the NUL
}
#endif /* __GNU_LIBRARY__ */
/* sanitize a string via one-way mangle */
int escape_str(char *restrict dst, const char *restrict src, int bufsize, int *maxcells){
unsigned char c;
int my_cells = 0;
int my_bytes = 0;
const char codes[] =
"Z..............................."
"||||||||||||||||||||||||||||||||"
"||||||||||||||||||||||||||||||||"
"|||||||||||||||||||||||||||||||."
"????????????????????????????????"
"????????????????????????????????"
"????????????????????????????????"
"????????????????????????????????";
#if (__GNU_LIBRARY__ >= 6) && (!defined(__UCLIBC__) || defined(__UCLIBC_HAS_WCHAR__))
static int utf_init=0;
@ -145,7 +149,7 @@ int escape_str(char *restrict dst, const char *restrict src, int bufsize, int *m
break;
c = (unsigned char) *(src++);
if(!c) break;
if(codes[c]!='|') c=codes[c];
if(ESC_tab[c]!='|') c=ESC_tab[c];
my_cells++;
my_bytes++;
*(dst++) = c;
@ -225,8 +229,14 @@ int escape_command(char *restrict const outbuf, const proc_t *restrict const pp,
// copy a string, but 'escape' any control characters
// using the traditional escape.h calling conventions
int escaped_copy(char *restrict dst, const char *restrict src, int bufsize, int *maxroom){
static int utf_sw = 0;
int i, n;
char c;
if(utf_sw == 0){
char *enc = nl_langinfo(CODESET);
utf_sw = enc && strcasecmp(enc, "UTF-8")==0 ? 1 : -1;
}
SECURE_ESCAPE_ARGS(dst, bufsize, *maxroom);
if (bufsize > *maxroom+1) bufsize = *maxroom+1;
@ -237,11 +247,17 @@ int escaped_copy(char *restrict dst, const char *restrict src, int bufsize, int
}
if (n >= bufsize) n = bufsize-1;
// control chars, especially tabs, create alignment problems for ps & top ...
for (i = 0; i < n; i++)
if ((unsigned char)dst[i] < 0x20 || dst[i] == 0x7f)
dst[i] = '?';
if (utf_sw < 0) {
// if bad locale, replace the non-printing stuff
for (i = 0; i < n; i++)
if ((c = ESC_tab[(unsigned char)dst[i]]) != '|')
dst[i] = c;
} else {
// or eliminate those non-printing control chars
for (i = 0; i < n; i++)
if ((unsigned char)dst[i] < 0x20 || dst[i] == 0x7f)
dst[i] = '?';
}
*maxroom -= n;
return n;
}