[un]expand: unicode support
function old new delta expand_main 633 663 +30 Signed-off-by: Tomas Heinrich <heinrich.tomas@gmail.com> Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
		
				
					committed by
					
						 Denys Vlasenko
						Denys Vlasenko
					
				
			
			
				
	
			
			
			
						parent
						
							4928f3b90b
						
					
				
				
					commit
					d2b1ba6fde
				
			| @@ -20,8 +20,8 @@ | |||||||
|  * |  * | ||||||
|  *  Caveat: this versions of expand and unexpand don't accept tab lists. |  *  Caveat: this versions of expand and unexpand don't accept tab lists. | ||||||
|  */ |  */ | ||||||
|  |  | ||||||
| #include "libbb.h" | #include "libbb.h" | ||||||
|  | #include "unicode.h" | ||||||
|  |  | ||||||
| enum { | enum { | ||||||
| 	OPT_INITIAL     = 1 << 0, | 	OPT_INITIAL     = 1 << 0, | ||||||
| @@ -30,35 +30,37 @@ enum { | |||||||
| }; | }; | ||||||
|  |  | ||||||
| #if ENABLE_EXPAND | #if ENABLE_EXPAND | ||||||
| static void expand(FILE *file, int tab_size, unsigned opt) | static void expand(FILE *file, unsigned tab_size, unsigned opt) | ||||||
| { | { | ||||||
| 	char *line; | 	char *line; | ||||||
|  |  | ||||||
| 	tab_size = -tab_size; |  | ||||||
|  |  | ||||||
| 	while ((line = xmalloc_fgets(file)) != NULL) { | 	while ((line = xmalloc_fgets(file)) != NULL) { | ||||||
| 		int pos; |  | ||||||
| 		unsigned char c; | 		unsigned char c; | ||||||
| 		char *ptr = line; | 		char *ptr; | ||||||
|  | 		char *ptr_strbeg; | ||||||
|  |  | ||||||
| 		goto start; | 		ptr = ptr_strbeg = line; | ||||||
| 		while ((c = *ptr) != '\0') { | 		while ((c = *ptr) != '\0') { | ||||||
| 			if ((opt & OPT_INITIAL) && !isblank(c)) { | 			if ((opt & OPT_INITIAL) && !isblank(c)) { | ||||||
| 				fputs(ptr, stdout); | 				/* not space or tab */ | ||||||
| 				break; | 				break; | ||||||
| 			} | 			} | ||||||
| 			ptr++; |  | ||||||
| 			if (c == '\t') { | 			if (c == '\t') { | ||||||
| 				c = ' '; | 				unsigned len; | ||||||
| 				while (++pos < 0) | 				*ptr = '\0'; | ||||||
| 					bb_putchar(c); | # if ENABLE_FEATURE_ASSUME_UNICODE | ||||||
| 			} | 				len = bb_mbstrlen(ptr_strbeg); | ||||||
| 			bb_putchar(c); | # else | ||||||
| 			if (++pos >= 0) { | 				len = ptr - ptr_strbeg; | ||||||
|  start: | # endif | ||||||
| 				pos = tab_size; | 				len = tab_size - (len % tab_size); | ||||||
|  | 				/*while (ptr[1] == '\t') { ptr++; len += tab_size; } - can handle many tabs at once */ | ||||||
|  | 				printf("%s%*s", ptr_strbeg, len, ""); | ||||||
|  | 				ptr_strbeg = ptr + 1; | ||||||
| 			} | 			} | ||||||
|  | 			ptr++; | ||||||
| 		} | 		} | ||||||
|  | 		fputs(ptr_strbeg, stdout); | ||||||
| 		free(line); | 		free(line); | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
| @@ -75,6 +77,7 @@ static void unexpand(FILE *file, unsigned tab_size, unsigned opt) | |||||||
|  |  | ||||||
| 		while (*ptr) { | 		while (*ptr) { | ||||||
| 			unsigned n; | 			unsigned n; | ||||||
|  | 			unsigned len; | ||||||
|  |  | ||||||
| 			while (*ptr == ' ') { | 			while (*ptr == ' ') { | ||||||
| 				column++; | 				column++; | ||||||
| @@ -97,8 +100,19 @@ static void unexpand(FILE *file, unsigned tab_size, unsigned opt) | |||||||
| 			} | 			} | ||||||
| 			n = strcspn(ptr, "\t "); | 			n = strcspn(ptr, "\t "); | ||||||
| 			printf("%*s%.*s", column, "", n, ptr); | 			printf("%*s%.*s", column, "", n, ptr); | ||||||
|  | # if ENABLE_FEATURE_ASSUME_UNICODE | ||||||
|  | 			{ | ||||||
|  | 				char c; | ||||||
|  | 				c = ptr[n]; | ||||||
|  | 				ptr[n] = '\0'; | ||||||
|  | 				len = bb_mbstrlen(ptr); | ||||||
|  | 				ptr[n] = c; | ||||||
|  | 			} | ||||||
|  | # else | ||||||
|  | 			len = n; | ||||||
|  | # endif | ||||||
| 			ptr += n; | 			ptr += n; | ||||||
| 			column = (column + n) % tab_size; | 			column = (column + len) % tab_size; | ||||||
| 		} | 		} | ||||||
| 		free(line); | 		free(line); | ||||||
| 	} | 	} | ||||||
| @@ -130,6 +144,7 @@ int expand_main(int argc UNUSED_PARAM, char **argv) | |||||||
| 		"all\0"              No_argument       "a" | 		"all\0"              No_argument       "a" | ||||||
| 	; | 	; | ||||||
| #endif | #endif | ||||||
|  | 	check_unicode_in_env(); | ||||||
|  |  | ||||||
| 	if (ENABLE_EXPAND && (!ENABLE_UNEXPAND || applet_name[0] == 'e')) { | 	if (ENABLE_EXPAND && (!ENABLE_UNEXPAND || applet_name[0] == 'e')) { | ||||||
| 		IF_FEATURE_EXPAND_LONG_OPTIONS(applet_long_options = expand_longopts); | 		IF_FEATURE_EXPAND_LONG_OPTIONS(applet_long_options = expand_longopts); | ||||||
|   | |||||||
| @@ -12,4 +12,10 @@ testing "expand" \ | |||||||
| 	"" \ | 	"" \ | ||||||
| 	"\t12345678\t12345678\n" \ | 	"\t12345678\t12345678\n" \ | ||||||
|  |  | ||||||
|  | testing "expand with unicode characher 0x394" \ | ||||||
|  | 	"expand" \ | ||||||
|  | 	"Δ       12345ΔΔΔ        12345678\n" \ | ||||||
|  | 	"" \ | ||||||
|  | 	"Δ\t12345ΔΔΔ\t12345678\n" \ | ||||||
|  |  | ||||||
| exit $FAILCOUNT | exit $FAILCOUNT | ||||||
|   | |||||||
| @@ -27,4 +27,7 @@ testing "unexpand case 6" "unexpand" \ | |||||||
| testing "unexpand case 7" "unexpand" \ | testing "unexpand case 7" "unexpand" \ | ||||||
| 	"123\t 45678\n" "" "123 \t 45678\n" \ | 	"123\t 45678\n" "" "123 \t 45678\n" \ | ||||||
|  |  | ||||||
|  | testing "unexpand with unicode characher 0x394" "unexpand" \ | ||||||
|  | 	"1ΔΔΔ5\t99999\n" "" "1ΔΔΔ5   99999\n" \ | ||||||
|  |  | ||||||
| exit $FAILCOUNT | exit $FAILCOUNT | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user