split: let me join bikeshed painting frenzy :)

this one should be able to handle huge files.
   text    data     bss     dec     hex filename
    664       4       0     668     29c busybox.t0/coreutils/split.o
    628       0       0     628     274 busybox.t1/coreutils/split.o
split_main                                           488     525     +37
suffix_len                                             4       -      -4
next_file                                             77       -     -77
(add/remove: 0/2 grow/shrink: 1/0 up/down: 37/-81)            Total: -44 bytes
This commit is contained in:
Denis Vlasenko
2007-03-26 20:15:40 +00:00
parent 576de34d1c
commit 91dd275f05

View File

@@ -10,44 +10,43 @@
* http://www.opengroup.org/onlinepubs/009695399/utilities/split.html * http://www.opengroup.org/onlinepubs/009695399/utilities/split.html
*/ */
#include "busybox.h" #include "busybox.h"
static unsigned suffix_len = 2;
static const struct suffix_mult split_suffices[] = { static const struct suffix_mult split_suffices[] = {
#if ENABLE_FEATURE_SPLIT_FANCY
{ "b", 512 }, { "b", 512 },
#endif
{ "k", 1024 }, { "k", 1024 },
{ "m", 1024*1024 }, { "m", 1024*1024 },
#if ENABLE_FEATURE_SPLIT_FANCY
{ "g", 1024*1024*1024 }, { "g", 1024*1024*1024 },
#endif
{ NULL, 0 } { NULL, 0 }
}; };
/* Increment the suffix part of the filename. /* Increment the suffix part of the filename.
* Returns 0 on success and 1 on error (if we are out of files) * Returns NULL if we are out of filenames.
*/ */
static bool next_file(char **old) static char *next_file(char *old, unsigned suffix_len)
{ {
size_t end = strlen(*old); size_t end = strlen(old);
unsigned i = 1; unsigned i = 1;
char *curr; char *curr;
do { do {
curr = *old + end - i; curr = old + end - i;
if (*curr < 'z') { if (*curr < 'z') {
*curr += 1; *curr += 1;
break; break;
} }
i++; i++;
if (i > suffix_len) { if (i > suffix_len) {
bb_error_msg("Suffices exhausted"); return NULL;
return 1;
} }
*curr = 'a'; *curr = 'a';
} while (1); } while (1);
return 0;
return old;
} }
#define read_buffer bb_common_bufsiz1
enum { READ_BUFFER_SIZE = sizeof(bb_common_bufsiz1) - 1 };
#define SPLIT_OPT_l (1<<0) #define SPLIT_OPT_l (1<<0)
#define SPLIT_OPT_b (1<<1) #define SPLIT_OPT_b (1<<1)
#define SPLIT_OPT_a (1<<2) #define SPLIT_OPT_a (1<<2)
@@ -55,79 +54,83 @@ static bool next_file(char **old)
int split_main(int argc, char **argv); int split_main(int argc, char **argv);
int split_main(int argc, char **argv) int split_main(int argc, char **argv)
{ {
char *pfx, *buf, *input_file; unsigned suffix_len = 2;
unsigned cnt = 1000, opt; char *pfx;
bool ret = EXIT_SUCCESS; char *count_p;
FILE *fp; const char *sfx;
char *count_p, *sfx; unsigned long cnt = 1000;
//XXX: FIXME opt_complementary = "+2"; /* at most 2 non-option arguments */ unsigned long remaining = 0;
unsigned opt;
int bytes_read, to_write;
char *src;
opt_complementary = "?2";
opt = getopt32(argc, argv, "l:b:a:", &count_p, &count_p, &sfx); opt = getopt32(argc, argv, "l:b:a:", &count_p, &count_p, &sfx);
if (opt & SPLIT_OPT_l) if (opt & SPLIT_OPT_l)
cnt = xatoi(count_p); cnt = xatoul(count_p);
if (opt & SPLIT_OPT_b) if (opt & SPLIT_OPT_b)
cnt = xatoul_sfx(count_p, split_suffices); cnt = xatoul_sfx(count_p, split_suffices);
if (opt & SPLIT_OPT_a) if (opt & SPLIT_OPT_a)
suffix_len = xatoi(sfx); suffix_len = xatou(sfx);
argv += optind; argv += optind;
if (!*argv) sfx = "x";
*--argv = (char*) "-"; if (argv[0]) {
input_file = *argv; if (argv[1])
sfx = *++argv; sfx = argv[1];
xmove_fd(xopen(argv[0], O_RDONLY), 0);
} else {
argv[0] = (char *) bb_msg_standard_input;
}
if (sfx && (NAME_MAX < strlen(sfx) + suffix_len)) if (NAME_MAX < strlen(sfx) + suffix_len)
bb_error_msg_and_die("Suffix too long"); bb_error_msg_and_die("suffix too long");
{ {
char *char_p = xzalloc(suffix_len); char *char_p = xzalloc(suffix_len + 1);
memset(char_p, 'a', suffix_len); memset(char_p, 'a', suffix_len);
pfx = xasprintf("%s%s", sfx ? sfx : "x", char_p); pfx = xasprintf("%s%s", sfx, char_p);
if (ENABLE_FEATURE_CLEAN_UP) if (ENABLE_FEATURE_CLEAN_UP)
free(char_p); free(char_p);
} }
fp = fopen_or_warn_stdin(input_file);
//XXX:FIXME: unify those two file-handling schemata below (FILE vs fd) !
if (opt & SPLIT_OPT_b) {
ssize_t i;
ssize_t bytes = 0;
int inp = fileno(fp);
while (1) {
bytes_read = safe_read(0, read_buffer, READ_BUFFER_SIZE);
if (!bytes_read)
break;
if (bytes_read < 0)
bb_perror_msg_and_die("%s", argv[0]);
src = read_buffer;
do { do {
int out = xopen(pfx, O_WRONLY | O_CREAT | O_TRUNC); if (!remaining) {
lseek(inp, bytes, SEEK_SET); if (!pfx)
buf = xzalloc(cnt); bb_error_msg_and_die("suffices exhausted");
bytes += i = full_read(inp, buf, cnt); xmove_fd(xopen(pfx, O_WRONLY | O_CREAT | O_TRUNC), 1);
xwrite(out, buf, i); pfx = next_file(pfx, suffix_len);
free(buf); remaining = cnt;
close(out);
if (next_file(&pfx)) {
ret++;
goto bail;
} }
} while (i == cnt); /* if we read less than cnt, then nothing is left */
} else { /* -l */ if (opt & SPLIT_OPT_b) {
do { /* split by bytes */
unsigned i = cnt; to_write = (bytes_read < remaining) ? bytes_read : remaining;
int out = xopen(pfx, O_WRONLY | O_CREAT | O_TRUNC); remaining -= to_write;
buf = NULL; } else {
while (i--) { /* split by lines */
buf = xmalloc_fgets(fp); /* can be sped up by using _memrchr_
if (buf == NULL) * and writing many lines at once... */
break; char *end = memchr(src, '\n', bytes_read);
xwrite(out, buf, strlen(buf)); if (end) {
free(buf); --remaining;
}; to_write = end - src + 1;
close(out); } else {
if (next_file(&pfx)) { to_write = bytes_read;
ret++; }
goto bail;
} }
} while (buf);
xwrite(1, src, to_write);
bytes_read -= to_write;
src += to_write;
} while (bytes_read);
} }
bail: return 0;
if (ENABLE_FEATURE_CLEAN_UP) {
free(pfx);
fclose_if_not_stdin(fp);
}
return ret;
} }