split: let me join bikeshed painting frenzy :)
this one should be able to handle huge files. text data bss dec hex filename 664 4 0 668 29c busybox.t0/coreutils/split.o 628 0 0 628 274 busybox.t1/coreutils/split.o split_main 488 525 +37 suffix_len 4 - -4 next_file 77 - -77 (add/remove: 0/2 grow/shrink: 1/0 up/down: 37/-81) Total: -44 bytes
This commit is contained in:
@@ -10,44 +10,43 @@
|
|||||||
* http://www.opengroup.org/onlinepubs/009695399/utilities/split.html
|
* http://www.opengroup.org/onlinepubs/009695399/utilities/split.html
|
||||||
*/
|
*/
|
||||||
#include "busybox.h"
|
#include "busybox.h"
|
||||||
static unsigned suffix_len = 2;
|
|
||||||
static const struct suffix_mult split_suffices[] = {
|
static const struct suffix_mult split_suffices[] = {
|
||||||
#if ENABLE_FEATURE_SPLIT_FANCY
|
|
||||||
{ "b", 512 },
|
{ "b", 512 },
|
||||||
#endif
|
|
||||||
{ "k", 1024 },
|
{ "k", 1024 },
|
||||||
{ "m", 1024*1024 },
|
{ "m", 1024*1024 },
|
||||||
#if ENABLE_FEATURE_SPLIT_FANCY
|
|
||||||
{ "g", 1024*1024*1024 },
|
{ "g", 1024*1024*1024 },
|
||||||
#endif
|
|
||||||
{ NULL, 0 }
|
{ NULL, 0 }
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Increment the suffix part of the filename.
|
/* Increment the suffix part of the filename.
|
||||||
* Returns 0 on success and 1 on error (if we are out of files)
|
* Returns NULL if we are out of filenames.
|
||||||
*/
|
*/
|
||||||
static bool next_file(char **old)
|
static char *next_file(char *old, unsigned suffix_len)
|
||||||
{
|
{
|
||||||
size_t end = strlen(*old);
|
size_t end = strlen(old);
|
||||||
unsigned i = 1;
|
unsigned i = 1;
|
||||||
char *curr;
|
char *curr;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
curr = *old + end - i;
|
curr = old + end - i;
|
||||||
if (*curr < 'z') {
|
if (*curr < 'z') {
|
||||||
*curr += 1;
|
*curr += 1;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
i++;
|
i++;
|
||||||
if (i > suffix_len) {
|
if (i > suffix_len) {
|
||||||
bb_error_msg("Suffices exhausted");
|
return NULL;
|
||||||
return 1;
|
|
||||||
}
|
}
|
||||||
*curr = 'a';
|
*curr = 'a';
|
||||||
} while (1);
|
} while (1);
|
||||||
return 0;
|
|
||||||
|
return old;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define read_buffer bb_common_bufsiz1
|
||||||
|
enum { READ_BUFFER_SIZE = sizeof(bb_common_bufsiz1) - 1 };
|
||||||
|
|
||||||
#define SPLIT_OPT_l (1<<0)
|
#define SPLIT_OPT_l (1<<0)
|
||||||
#define SPLIT_OPT_b (1<<1)
|
#define SPLIT_OPT_b (1<<1)
|
||||||
#define SPLIT_OPT_a (1<<2)
|
#define SPLIT_OPT_a (1<<2)
|
||||||
@@ -55,79 +54,83 @@ static bool next_file(char **old)
|
|||||||
int split_main(int argc, char **argv);
|
int split_main(int argc, char **argv);
|
||||||
int split_main(int argc, char **argv)
|
int split_main(int argc, char **argv)
|
||||||
{
|
{
|
||||||
char *pfx, *buf, *input_file;
|
unsigned suffix_len = 2;
|
||||||
unsigned cnt = 1000, opt;
|
char *pfx;
|
||||||
bool ret = EXIT_SUCCESS;
|
char *count_p;
|
||||||
FILE *fp;
|
const char *sfx;
|
||||||
char *count_p, *sfx;
|
unsigned long cnt = 1000;
|
||||||
//XXX: FIXME opt_complementary = "+2"; /* at most 2 non-option arguments */
|
unsigned long remaining = 0;
|
||||||
|
unsigned opt;
|
||||||
|
int bytes_read, to_write;
|
||||||
|
char *src;
|
||||||
|
|
||||||
|
opt_complementary = "?2";
|
||||||
opt = getopt32(argc, argv, "l:b:a:", &count_p, &count_p, &sfx);
|
opt = getopt32(argc, argv, "l:b:a:", &count_p, &count_p, &sfx);
|
||||||
|
|
||||||
if (opt & SPLIT_OPT_l)
|
if (opt & SPLIT_OPT_l)
|
||||||
cnt = xatoi(count_p);
|
cnt = xatoul(count_p);
|
||||||
if (opt & SPLIT_OPT_b)
|
if (opt & SPLIT_OPT_b)
|
||||||
cnt = xatoul_sfx(count_p, split_suffices);
|
cnt = xatoul_sfx(count_p, split_suffices);
|
||||||
if (opt & SPLIT_OPT_a)
|
if (opt & SPLIT_OPT_a)
|
||||||
suffix_len = xatoi(sfx);
|
suffix_len = xatou(sfx);
|
||||||
argv += optind;
|
argv += optind;
|
||||||
if (!*argv)
|
sfx = "x";
|
||||||
*--argv = (char*) "-";
|
if (argv[0]) {
|
||||||
input_file = *argv;
|
if (argv[1])
|
||||||
sfx = *++argv;
|
sfx = argv[1];
|
||||||
|
xmove_fd(xopen(argv[0], O_RDONLY), 0);
|
||||||
|
} else {
|
||||||
|
argv[0] = (char *) bb_msg_standard_input;
|
||||||
|
}
|
||||||
|
|
||||||
if (sfx && (NAME_MAX < strlen(sfx) + suffix_len))
|
if (NAME_MAX < strlen(sfx) + suffix_len)
|
||||||
bb_error_msg_and_die("Suffix too long");
|
bb_error_msg_and_die("suffix too long");
|
||||||
|
|
||||||
{
|
{
|
||||||
char *char_p = xzalloc(suffix_len);
|
char *char_p = xzalloc(suffix_len + 1);
|
||||||
memset(char_p, 'a', suffix_len);
|
memset(char_p, 'a', suffix_len);
|
||||||
pfx = xasprintf("%s%s", sfx ? sfx : "x", char_p);
|
pfx = xasprintf("%s%s", sfx, char_p);
|
||||||
if (ENABLE_FEATURE_CLEAN_UP)
|
if (ENABLE_FEATURE_CLEAN_UP)
|
||||||
free(char_p);
|
free(char_p);
|
||||||
}
|
}
|
||||||
fp = fopen_or_warn_stdin(input_file);
|
|
||||||
//XXX:FIXME: unify those two file-handling schemata below (FILE vs fd) !
|
|
||||||
if (opt & SPLIT_OPT_b) {
|
|
||||||
ssize_t i;
|
|
||||||
ssize_t bytes = 0;
|
|
||||||
int inp = fileno(fp);
|
|
||||||
|
|
||||||
|
while (1) {
|
||||||
|
bytes_read = safe_read(0, read_buffer, READ_BUFFER_SIZE);
|
||||||
|
if (!bytes_read)
|
||||||
|
break;
|
||||||
|
if (bytes_read < 0)
|
||||||
|
bb_perror_msg_and_die("%s", argv[0]);
|
||||||
|
src = read_buffer;
|
||||||
do {
|
do {
|
||||||
int out = xopen(pfx, O_WRONLY | O_CREAT | O_TRUNC);
|
if (!remaining) {
|
||||||
lseek(inp, bytes, SEEK_SET);
|
if (!pfx)
|
||||||
buf = xzalloc(cnt);
|
bb_error_msg_and_die("suffices exhausted");
|
||||||
bytes += i = full_read(inp, buf, cnt);
|
xmove_fd(xopen(pfx, O_WRONLY | O_CREAT | O_TRUNC), 1);
|
||||||
xwrite(out, buf, i);
|
pfx = next_file(pfx, suffix_len);
|
||||||
free(buf);
|
remaining = cnt;
|
||||||
close(out);
|
|
||||||
if (next_file(&pfx)) {
|
|
||||||
ret++;
|
|
||||||
goto bail;
|
|
||||||
}
|
}
|
||||||
} while (i == cnt); /* if we read less than cnt, then nothing is left */
|
|
||||||
} else { /* -l */
|
if (opt & SPLIT_OPT_b) {
|
||||||
do {
|
/* split by bytes */
|
||||||
unsigned i = cnt;
|
to_write = (bytes_read < remaining) ? bytes_read : remaining;
|
||||||
int out = xopen(pfx, O_WRONLY | O_CREAT | O_TRUNC);
|
remaining -= to_write;
|
||||||
buf = NULL;
|
} else {
|
||||||
while (i--) {
|
/* split by lines */
|
||||||
buf = xmalloc_fgets(fp);
|
/* can be sped up by using _memrchr_
|
||||||
if (buf == NULL)
|
* and writing many lines at once... */
|
||||||
break;
|
char *end = memchr(src, '\n', bytes_read);
|
||||||
xwrite(out, buf, strlen(buf));
|
if (end) {
|
||||||
free(buf);
|
--remaining;
|
||||||
};
|
to_write = end - src + 1;
|
||||||
close(out);
|
} else {
|
||||||
if (next_file(&pfx)) {
|
to_write = bytes_read;
|
||||||
ret++;
|
}
|
||||||
goto bail;
|
|
||||||
}
|
}
|
||||||
} while (buf);
|
|
||||||
|
xwrite(1, src, to_write);
|
||||||
|
bytes_read -= to_write;
|
||||||
|
src += to_write;
|
||||||
|
} while (bytes_read);
|
||||||
}
|
}
|
||||||
bail:
|
return 0;
|
||||||
if (ENABLE_FEATURE_CLEAN_UP) {
|
|
||||||
free(pfx);
|
|
||||||
fclose_if_not_stdin(fp);
|
|
||||||
}
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user