ash: fix matching of unicode greek letter rho (cf 81) and similar cases

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
2017-07-05 19:10:21 +02:00
parent 6798486141
commit fda9fafe27
5 changed files with 90 additions and 1 deletions
--- a/shell/ash.c
+++ b/shell/ash.c
@@ -5913,6 +5913,7 @@ rmescapes(char *str, int flag)
 	while (*p) {
 		if ((unsigned char)*p == CTLQUOTEMARK) {
 // Note: both inquotes and protect_against_glob only affect whether
 // CTLESC,<ch> gets converted to <ch> or to \<ch>
 			inquotes = ~inquotes;
 			p++;
 			protect_against_glob = globbing;
@@ -5925,7 +5926,27 @@ rmescapes(char *str, int flag)
 				ash_msg_and_raise_error("CTLESC at EOL (shouldn't happen)");
 #endif
 			if (protect_against_glob) {
-				*q++ = '\\';
+				/*
 				 * We used to trust glob() and fnmatch() to eat
 				 * superfluous escapes (\z where z has no
 				 * special meaning anyway). But this causes
 				 * bugs such as string of one greek letter rho
 				 * (unicode-encoded as two bytes 'cf,81")
 				 * getting encoded as "cf,CTLESC,81"
 				 * and here, converted to "cf,\,81" -
 				 * which does not go well with some flavors
 				 * of fnmatch() in unicode locales.
 				 *
 				 * Lets add "\" only on the chars which need it.
 				 */
 				if (*p == '*'
 				 || *p == '?'
 				 || *p == '['
 				/* || *p == ']' maybe also this? */
 				 || *p == '\\'
 				) {
 					*q++ = '\\';
 				}
 			}
 		} else if (*p == '\\' && !inquotes) {
 			/* naked back slash */
--- a/shell/ash_test/ash-quoting/unicode_8x_chars.right
+++ b/shell/ash_test/ash-quoting/unicode_8x_chars.right
@@ -0,0 +1,6 @@
 ok
 ok
 ok
 ok
 ok
 ok
--- a/shell/ash_test/ash-quoting/unicode_8x_chars.tests
+++ b/shell/ash_test/ash-quoting/unicode_8x_chars.tests
@@ -0,0 +1,28 @@
 # Unicode: cf 80
 case π in
 ( "π" ) echo ok ;;
 ( * )   echo WRONG ;;
 esac
 # Unicode: cf 81
 case ρ in
 ( "ρ" ) echo ok ;;
 ( * )   echo WRONG ;;
 esac
 # Unicode: cf 82
 case ς in
 ( "ς" ) echo ok ;;
 ( * )   echo WRONG ;;
 esac
 case "π" in
 ( π ) echo ok ;;
 ( * ) echo WRONG ;;
 esac
 case "ρ" in
 ( ρ ) echo ok ;;
 ( * ) echo WRONG ;;
 esac
 case "ς" in
 ( ς ) echo ok ;;
 ( * ) echo WRONG ;;
 esac
--- a/shell/hush_test/hush-quoting/unicode_8x_chars.right
+++ b/shell/hush_test/hush-quoting/unicode_8x_chars.right
@@ -0,0 +1,6 @@
 ok
 ok
 ok
 ok
 ok
 ok
--- a/shell/hush_test/hush-quoting/unicode_8x_chars.tests
+++ b/shell/hush_test/hush-quoting/unicode_8x_chars.tests
@@ -0,0 +1,28 @@
 # Unicode: cf 80
 case π in
 ( "π" ) echo ok ;;
 ( * )   echo WRONG ;;
 esac
 # Unicode: cf 81
 case ρ in
 ( "ρ" ) echo ok ;;
 ( * )   echo WRONG ;;
 esac
 # Unicode: cf 82
 case ς in
 ( "ς" ) echo ok ;;
 ( * )   echo WRONG ;;
 esac
 case "π" in
 ( π ) echo ok ;;
 ( * ) echo WRONG ;;
 esac
 case "ρ" in
 ( ρ ) echo ok ;;
 ( * ) echo WRONG ;;
 esac
 case "ς" in
 ( ς ) echo ok ;;
 ( * ) echo WRONG ;;
 esac