busybox/coreutils/expand.c
Denys Vlasenko 9f93d62192 libbb: better unicode width support. Hopefully fixes bug 839.
Also opens up a possibility to make other unicode stuff smaller
and more correct later. but:

function                                             old     new   delta
static.combining                                       -     516    +516
bb_wcwidth                                             -     328    +328
unicode_cut_nchars                                     -     141    +141
mbstowc_internal                                       -      93     +93
in_table                                               -      78     +78
cal_main                                             899     961     +62
static.combining0x10000                                -      40     +40
unicode_strlen                                         -      31     +31
bb_mbstrlen                                           31       -     -31
bb_mbstowcs                                          173     102     -71
------------------------------------------------------------------------------
(add/remove: 7/1 grow/shrink: 1/1 up/down: 1289/-102)        Total: 1187 bytes

Uses code of Markus Kuhn, which is in public domain:
http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
"Permission to use, copy, modify, and distribute this software
 for any purpose and without fee is hereby granted. The author
 disclaims all warranties with regard to this software."

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
2010-01-24 07:44:03 +01:00

196 lines
4.6 KiB
C

/* expand - convert tabs to spaces
* unexpand - convert spaces to tabs
*
* Copyright (C) 89, 91, 1995-2006 Free Software Foundation, Inc.
*
* Licensed under GPLv2 or later, see file LICENSE in this tarball for details.
*
* David MacKenzie <djm@gnu.ai.mit.edu>
*
* Options for expand:
* -t num --tabs=NUM Convert tabs to num spaces (default 8 spaces).
* -i --initial Only convert initial tabs on each line to spaces.
*
* Options for unexpand:
* -a --all Convert all blanks, instead of just initial blanks.
* -f --first-only Convert only leading sequences of blanks (default).
* -t num --tabs=NUM Have tabs num characters apart instead of 8.
*
* Busybox version (C) 2007 by Tito Ragusa <farmatito@tiscali.it>
*
* Caveat: this versions of expand and unexpand don't accept tab lists.
*/
#include "libbb.h"
#include "unicode.h"
enum {
OPT_INITIAL = 1 << 0,
OPT_TABS = 1 << 1,
OPT_ALL = 1 << 2,
};
#if ENABLE_EXPAND
static void expand(FILE *file, unsigned tab_size, unsigned opt)
{
char *line;
while ((line = xmalloc_fgets(file)) != NULL) {
unsigned char c;
char *ptr;
char *ptr_strbeg;
ptr = ptr_strbeg = line;
while ((c = *ptr) != '\0') {
if ((opt & OPT_INITIAL) && !isblank(c)) {
/* not space or tab */
break;
}
if (c == '\t') {
unsigned len;
*ptr = '\0';
# if ENABLE_FEATURE_ASSUME_UNICODE
len = unicode_strlen(ptr_strbeg);
# else
len = ptr - ptr_strbeg;
# endif
len = tab_size - (len % tab_size);
/*while (ptr[1] == '\t') { ptr++; len += tab_size; } - can handle many tabs at once */
printf("%s%*s", ptr_strbeg, len, "");
ptr_strbeg = ptr + 1;
}
ptr++;
}
fputs(ptr_strbeg, stdout);
free(line);
}
}
#endif
#if ENABLE_UNEXPAND
static void unexpand(FILE *file, unsigned tab_size, unsigned opt)
{
char *line;
while ((line = xmalloc_fgets(file)) != NULL) {
char *ptr = line;
unsigned column = 0;
while (*ptr) {
unsigned n;
unsigned len;
while (*ptr == ' ') {
column++;
ptr++;
}
if (*ptr == '\t') {
column += tab_size - (column % tab_size);
ptr++;
continue;
}
n = column / tab_size;
column = column % tab_size;
while (n--)
putchar('\t');
if ((opt & OPT_INITIAL) && ptr != line) {
printf("%*s%s", column, "", ptr);
break;
}
n = strcspn(ptr, "\t ");
printf("%*s%.*s", column, "", n, ptr);
# if ENABLE_FEATURE_ASSUME_UNICODE
{
char c;
c = ptr[n];
ptr[n] = '\0';
len = unicode_strlen(ptr);
ptr[n] = c;
}
# else
len = n;
# endif
ptr += n;
column = (column + len) % tab_size;
}
free(line);
}
}
#endif
int expand_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
int expand_main(int argc UNUSED_PARAM, char **argv)
{
/* Default 8 spaces for 1 tab */
const char *opt_t = "8";
FILE *file;
unsigned tab_size;
unsigned opt;
int exit_status = EXIT_SUCCESS;
#if ENABLE_FEATURE_EXPAND_LONG_OPTIONS
static const char expand_longopts[] ALIGN1 =
/* name, has_arg, val */
"initial\0" No_argument "i"
"tabs\0" Required_argument "t"
;
#endif
#if ENABLE_FEATURE_UNEXPAND_LONG_OPTIONS
static const char unexpand_longopts[] ALIGN1 =
/* name, has_arg, val */
"first-only\0" No_argument "i"
"tabs\0" Required_argument "t"
"all\0" No_argument "a"
;
#endif
init_unicode();
if (ENABLE_EXPAND && (!ENABLE_UNEXPAND || applet_name[0] == 'e')) {
IF_FEATURE_EXPAND_LONG_OPTIONS(applet_long_options = expand_longopts);
opt = getopt32(argv, "it:", &opt_t);
} else {
IF_FEATURE_UNEXPAND_LONG_OPTIONS(applet_long_options = unexpand_longopts);
/* -t NUM sets also -a */
opt_complementary = "ta";
opt = getopt32(argv, "ft:a", &opt_t);
/* -f --first-only is the default */
if (!(opt & OPT_ALL)) opt |= OPT_INITIAL;
}
tab_size = xatou_range(opt_t, 1, UINT_MAX);
argv += optind;
if (!*argv) {
*--argv = (char*)bb_msg_standard_input;
}
do {
file = fopen_or_warn_stdin(*argv);
if (!file) {
exit_status = EXIT_FAILURE;
continue;
}
if (ENABLE_EXPAND && (!ENABLE_UNEXPAND || applet_name[0] == 'e'))
IF_EXPAND(expand(file, tab_size, opt));
else
IF_UNEXPAND(unexpand(file, tab_size, opt));
/* Check and close the file */
if (fclose_if_not_stdin(file)) {
bb_simple_perror_msg(*argv);
exit_status = EXIT_FAILURE;
}
/* If stdin also clear EOF */
if (file == stdin)
clearerr(file);
} while (*++argv);
/* Now close stdin also */
/* (if we didn't read from it, it's a no-op) */
if (fclose(stdin))
bb_perror_msg_and_die(bb_msg_standard_input);
fflush_stdout_and_exit(exit_status);
}