lineedit: invalid unicode characters are replaced with CONFIG_SUBST_WCHAR

function old new delta read_key_ungets - 50 +50 lineedit_read_key 223 252 +29 Signed-off-by: Tomas Heinrich <heinrich.tomas@gmail.com> Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
2010-03-09 14:09:24 +01:00
parent f15620c377
commit d2b04050c0
4 changed files with 73 additions and 5 deletions
--- a/include/libbb.h
+++ b/include/libbb.h
@@ -1277,6 +1277,7 @@ enum {
 * on first call.
 */
 int64_t read_key(int fd, char *buffer) FAST_FUNC;
 void read_key_ungets(char *buffer, const char *str, unsigned len) FAST_FUNC;
 #if ENABLE_FEATURE_EDITING
--- a/libbb/lineedit.c
+++ b/libbb/lineedit.c
@@ -1700,18 +1700,34 @@ static int lineedit_read_key(char *read_key_buffer)
 #endif
 #if ENABLE_FEATURE_ASSUME_UNICODE
-		{
+		if (unicode_status == UNICODE_ON) {
 			wchar_t wc;
 			if ((int32_t)ic < 0) /* KEYCODE_xxx */
 				return ic;
 			// TODO: imagine sequence like: 0xff, <left-arrow>: we are currently losing 0xff...
 			unicode_buf[unicode_idx++] = ic;
 			unicode_buf[unicode_idx] = '\0';
-			if (mbstowcs(&wc, unicode_buf, 1) != 1 && unicode_idx < MB_CUR_MAX) {
+			if (mbstowcs(&wc, unicode_buf, 1) != 1) {
-				delay = 50;
+				/* Not (yet?) a valid unicode char */
-				goto poll_again;
+				if (unicode_idx < MB_CUR_MAX) {
 					delay = 50;
 					goto poll_again;
 				}
 				/* Invalid sequence. Save all "bad bytes" except first */
 				read_key_ungets(read_key_buffer, unicode_buf + 1, MB_CUR_MAX - 1);
 				/*
 				 * ic = unicode_buf[0] sounds even better, but currently
 				 * this does not work: wchar_t[] -> char[] conversion
 				 * when lineedit finishes mangles such "raw bytes"
 				 * (by misinterpreting them as unicode chars):
 				 */
 				ic = CONFIG_SUBST_WCHAR;
 			} else {
 				/* Valid unicode char, return its code */
 				ic = wc;
 			}
 			ic = wc;
 		}
 #endif
 	} while (errno == EAGAIN);
--- a/libbb/read_key.c
+++ b/libbb/read_key.c
@@ -246,3 +246,12 @@ int64_t FAST_FUNC read_key(int fd, char *buffer)
 	buffer[-1] = 0;
 	goto start_over;
 }
 void FAST_FUNC read_key_ungets(char *buffer, const char *str, unsigned len)
 {
 	unsigned cur_len = (unsigned char)buffer[0];
 	if (len > KEYCODE_BUFFER_SIZE-1 - cur_len)
 		len = KEYCODE_BUFFER_SIZE-1 - cur_len;
 	memcpy(buffer + 1 + cur_len, str, len);
 	buffer[0] += cur_len + len;
 }
--- a/testsuite/ash.tests
+++ b/testsuite/ash.tests
@@ -0,0 +1,42 @@
 #!/bin/sh
 #
 # These are not ash tests, we use ash as a way to test lineedit!
 #
 # Copyright 2010 by Denys Vlasenko
 # Licensed under GPL v2, see file LICENSE for details.
 . ./testing.sh
 # testing "test name" "options" "expected result" "file input" "stdin"
 testing "One byte which is not valid unicode char followed by valid input" \
 	"script -q -c 'ash' /dev/null >/dev/null; cat output; rm output" \
 	"\
 00000000  3f 2d 0a                                          |?-.|
 00000003
 " \
 	"" \
 	"echo \xff- | hexdump -C >output; exit; exit; exit; exit\n" \
 testing "30 bytes which are not valid unicode chars followed by valid input" \
 	"script -q -c 'ash' /dev/null >/dev/null; cat output; rm output" \
 	"\
 00000000  3f 3f 3f 3f 3f 3f 3f 3f  3f 3f 3f 3f 3f 3f 3f 3f  |????????????????|
 00000010  3f 3f 3f 3f 3f 3f 3f 3f  3f 3f 3f 3f 3f 3f 2d 0a  |??????????????-.|
 00000020
 " \
 	"" \
 	"echo \xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff- | hexdump -C >output; exit; exit; exit; exit\n" \
 # Not sure this behavior is perfect: we lose all invalid input which precedes
 # arrow keys and such. In this example, \xff\xff are lost
 testing "2 bytes which are not valid unicode chars followed by left arrow key" \
 	"script -q -c 'ash' /dev/null >/dev/null; cat output; rm output" \
 	"\
 00000000  3d 2d 0a                                          |=-.|
 00000003
 " \
 	"" \
 	"echo =+\xff\xff\x1b\x5b\x44- | hexdump -C >output; exit; exit; exit; exit\n" \
 exit $FAILCOUNT