[un]expand: unicode support

function                                             old     new   delta
expand_main                                          633     663     +30

Signed-off-by: Tomas Heinrich <heinrich.tomas@gmail.com>
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
Tomas Heinrich 2010-01-04 16:21:31 +01:00 committed by Denys Vlasenko
parent 4928f3b90b
commit d2b1ba6fde
3 changed files with 42 additions and 18 deletions

View File

@ -20,8 +20,8 @@
* *
* Caveat: this versions of expand and unexpand don't accept tab lists. * Caveat: this versions of expand and unexpand don't accept tab lists.
*/ */
#include "libbb.h" #include "libbb.h"
#include "unicode.h"
enum { enum {
OPT_INITIAL = 1 << 0, OPT_INITIAL = 1 << 0,
@ -30,35 +30,37 @@ enum {
}; };
#if ENABLE_EXPAND #if ENABLE_EXPAND
static void expand(FILE *file, int tab_size, unsigned opt) static void expand(FILE *file, unsigned tab_size, unsigned opt)
{ {
char *line; char *line;
tab_size = -tab_size;
while ((line = xmalloc_fgets(file)) != NULL) { while ((line = xmalloc_fgets(file)) != NULL) {
int pos;
unsigned char c; unsigned char c;
char *ptr = line; char *ptr;
char *ptr_strbeg;
goto start; ptr = ptr_strbeg = line;
while ((c = *ptr) != '\0') { while ((c = *ptr) != '\0') {
if ((opt & OPT_INITIAL) && !isblank(c)) { if ((opt & OPT_INITIAL) && !isblank(c)) {
fputs(ptr, stdout); /* not space or tab */
break; break;
} }
ptr++;
if (c == '\t') { if (c == '\t') {
c = ' '; unsigned len;
while (++pos < 0) *ptr = '\0';
bb_putchar(c); # if ENABLE_FEATURE_ASSUME_UNICODE
} len = bb_mbstrlen(ptr_strbeg);
bb_putchar(c); # else
if (++pos >= 0) { len = ptr - ptr_strbeg;
start: # endif
pos = tab_size; len = tab_size - (len % tab_size);
/*while (ptr[1] == '\t') { ptr++; len += tab_size; } - can handle many tabs at once */
printf("%s%*s", ptr_strbeg, len, "");
ptr_strbeg = ptr + 1;
} }
ptr++;
} }
fputs(ptr_strbeg, stdout);
free(line); free(line);
} }
} }
@ -75,6 +77,7 @@ static void unexpand(FILE *file, unsigned tab_size, unsigned opt)
while (*ptr) { while (*ptr) {
unsigned n; unsigned n;
unsigned len;
while (*ptr == ' ') { while (*ptr == ' ') {
column++; column++;
@ -97,8 +100,19 @@ static void unexpand(FILE *file, unsigned tab_size, unsigned opt)
} }
n = strcspn(ptr, "\t "); n = strcspn(ptr, "\t ");
printf("%*s%.*s", column, "", n, ptr); printf("%*s%.*s", column, "", n, ptr);
# if ENABLE_FEATURE_ASSUME_UNICODE
{
char c;
c = ptr[n];
ptr[n] = '\0';
len = bb_mbstrlen(ptr);
ptr[n] = c;
}
# else
len = n;
# endif
ptr += n; ptr += n;
column = (column + n) % tab_size; column = (column + len) % tab_size;
} }
free(line); free(line);
} }
@ -130,6 +144,7 @@ int expand_main(int argc UNUSED_PARAM, char **argv)
"all\0" No_argument "a" "all\0" No_argument "a"
; ;
#endif #endif
check_unicode_in_env();
if (ENABLE_EXPAND && (!ENABLE_UNEXPAND || applet_name[0] == 'e')) { if (ENABLE_EXPAND && (!ENABLE_UNEXPAND || applet_name[0] == 'e')) {
IF_FEATURE_EXPAND_LONG_OPTIONS(applet_long_options = expand_longopts); IF_FEATURE_EXPAND_LONG_OPTIONS(applet_long_options = expand_longopts);

View File

@ -12,4 +12,10 @@ testing "expand" \
"" \ "" \
"\t12345678\t12345678\n" \ "\t12345678\t12345678\n" \
testing "expand with unicode characher 0x394" \
"expand" \
"Δ 12345ΔΔΔ 12345678\n" \
"" \
"Δ\t12345ΔΔΔ\t12345678\n" \
exit $FAILCOUNT exit $FAILCOUNT

View File

@ -27,4 +27,7 @@ testing "unexpand case 6" "unexpand" \
testing "unexpand case 7" "unexpand" \ testing "unexpand case 7" "unexpand" \
"123\t 45678\n" "" "123 \t 45678\n" \ "123\t 45678\n" "" "123 \t 45678\n" \
testing "unexpand with unicode characher 0x394" "unexpand" \
"1ΔΔΔ5\t99999\n" "" "1ΔΔΔ5 99999\n" \
exit $FAILCOUNT exit $FAILCOUNT