lineedit: invalid unicode characters are replaced with CONFIG_SUBST_WCHAR
function old new delta read_key_ungets - 50 +50 lineedit_read_key 223 252 +29 Signed-off-by: Tomas Heinrich <heinrich.tomas@gmail.com> Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
parent
f15620c377
commit
d2b04050c0
@ -1277,6 +1277,7 @@ enum {
|
|||||||
* on first call.
|
* on first call.
|
||||||
*/
|
*/
|
||||||
int64_t read_key(int fd, char *buffer) FAST_FUNC;
|
int64_t read_key(int fd, char *buffer) FAST_FUNC;
|
||||||
|
void read_key_ungets(char *buffer, const char *str, unsigned len) FAST_FUNC;
|
||||||
|
|
||||||
|
|
||||||
#if ENABLE_FEATURE_EDITING
|
#if ENABLE_FEATURE_EDITING
|
||||||
|
@ -1700,18 +1700,34 @@ static int lineedit_read_key(char *read_key_buffer)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if ENABLE_FEATURE_ASSUME_UNICODE
|
#if ENABLE_FEATURE_ASSUME_UNICODE
|
||||||
{
|
if (unicode_status == UNICODE_ON) {
|
||||||
wchar_t wc;
|
wchar_t wc;
|
||||||
|
|
||||||
if ((int32_t)ic < 0) /* KEYCODE_xxx */
|
if ((int32_t)ic < 0) /* KEYCODE_xxx */
|
||||||
return ic;
|
return ic;
|
||||||
|
// TODO: imagine sequence like: 0xff, <left-arrow>: we are currently losing 0xff...
|
||||||
|
|
||||||
unicode_buf[unicode_idx++] = ic;
|
unicode_buf[unicode_idx++] = ic;
|
||||||
unicode_buf[unicode_idx] = '\0';
|
unicode_buf[unicode_idx] = '\0';
|
||||||
if (mbstowcs(&wc, unicode_buf, 1) != 1 && unicode_idx < MB_CUR_MAX) {
|
if (mbstowcs(&wc, unicode_buf, 1) != 1) {
|
||||||
delay = 50;
|
/* Not (yet?) a valid unicode char */
|
||||||
goto poll_again;
|
if (unicode_idx < MB_CUR_MAX) {
|
||||||
|
delay = 50;
|
||||||
|
goto poll_again;
|
||||||
|
}
|
||||||
|
/* Invalid sequence. Save all "bad bytes" except first */
|
||||||
|
read_key_ungets(read_key_buffer, unicode_buf + 1, MB_CUR_MAX - 1);
|
||||||
|
/*
|
||||||
|
* ic = unicode_buf[0] sounds even better, but currently
|
||||||
|
* this does not work: wchar_t[] -> char[] conversion
|
||||||
|
* when lineedit finishes mangles such "raw bytes"
|
||||||
|
* (by misinterpreting them as unicode chars):
|
||||||
|
*/
|
||||||
|
ic = CONFIG_SUBST_WCHAR;
|
||||||
|
} else {
|
||||||
|
/* Valid unicode char, return its code */
|
||||||
|
ic = wc;
|
||||||
}
|
}
|
||||||
ic = wc;
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
} while (errno == EAGAIN);
|
} while (errno == EAGAIN);
|
||||||
|
@ -246,3 +246,12 @@ int64_t FAST_FUNC read_key(int fd, char *buffer)
|
|||||||
buffer[-1] = 0;
|
buffer[-1] = 0;
|
||||||
goto start_over;
|
goto start_over;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void FAST_FUNC read_key_ungets(char *buffer, const char *str, unsigned len)
|
||||||
|
{
|
||||||
|
unsigned cur_len = (unsigned char)buffer[0];
|
||||||
|
if (len > KEYCODE_BUFFER_SIZE-1 - cur_len)
|
||||||
|
len = KEYCODE_BUFFER_SIZE-1 - cur_len;
|
||||||
|
memcpy(buffer + 1 + cur_len, str, len);
|
||||||
|
buffer[0] += cur_len + len;
|
||||||
|
}
|
||||||
|
42
testsuite/ash.tests
Executable file
42
testsuite/ash.tests
Executable file
@ -0,0 +1,42 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
#
|
||||||
|
# These are not ash tests, we use ash as a way to test lineedit!
|
||||||
|
#
|
||||||
|
# Copyright 2010 by Denys Vlasenko
|
||||||
|
# Licensed under GPL v2, see file LICENSE for details.
|
||||||
|
|
||||||
|
. ./testing.sh
|
||||||
|
|
||||||
|
# testing "test name" "options" "expected result" "file input" "stdin"
|
||||||
|
|
||||||
|
testing "One byte which is not valid unicode char followed by valid input" \
|
||||||
|
"script -q -c 'ash' /dev/null >/dev/null; cat output; rm output" \
|
||||||
|
"\
|
||||||
|
00000000 3f 2d 0a |?-.|
|
||||||
|
00000003
|
||||||
|
" \
|
||||||
|
"" \
|
||||||
|
"echo \xff- | hexdump -C >output; exit; exit; exit; exit\n" \
|
||||||
|
|
||||||
|
testing "30 bytes which are not valid unicode chars followed by valid input" \
|
||||||
|
"script -q -c 'ash' /dev/null >/dev/null; cat output; rm output" \
|
||||||
|
"\
|
||||||
|
00000000 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f |????????????????|
|
||||||
|
00000010 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 2d 0a |??????????????-.|
|
||||||
|
00000020
|
||||||
|
" \
|
||||||
|
"" \
|
||||||
|
"echo \xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff- | hexdump -C >output; exit; exit; exit; exit\n" \
|
||||||
|
|
||||||
|
# Not sure this behavior is perfect: we lose all invalid input which precedes
|
||||||
|
# arrow keys and such. In this example, \xff\xff are lost
|
||||||
|
testing "2 bytes which are not valid unicode chars followed by left arrow key" \
|
||||||
|
"script -q -c 'ash' /dev/null >/dev/null; cat output; rm output" \
|
||||||
|
"\
|
||||||
|
00000000 3d 2d 0a |=-.|
|
||||||
|
00000003
|
||||||
|
" \
|
||||||
|
"" \
|
||||||
|
"echo =+\xff\xff\x1b\x5b\x44- | hexdump -C >output; exit; exit; exit; exit\n" \
|
||||||
|
|
||||||
|
exit $FAILCOUNT
|
Loading…
Reference in New Issue
Block a user