shuf: speed-up when limited output is requested
A user noted that the following command was slower than they expected: busybox shuf -i "1500000000-$(date +%s)" -n 5 At time of writing the range contains 128 million values. On my system this takes 7.7s whereas 'shuf' from coreutils takes a handful of milliseconds. Optimise BusyBox 'shuf' for cases where -n is specified by stopping shuffling once the required number of lines have been processed. On my system the time for the example is reduced to 0.4s. function old new delta shuf_main 520 540 +20 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 1/0 up/down: 20/0) Total: 20 bytes v2: Code shrink. Since outlines <= numlines: - the loop in shuffle_lines() only needs to test the value of outlines; - shuffle_lines() can be called unconditionally. Update timing to allow for the 13 million seconds elapsed since v1. Signed-off-by: Ron Yorston <rmy@pobox.com> Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
parent
74c4f356ae
commit
8817e285b7
@ -24,7 +24,7 @@
|
|||||||
//usage: "\n -i L-H Treat numbers L-H as lines"
|
//usage: "\n -i L-H Treat numbers L-H as lines"
|
||||||
//usage: "\n -n NUM Output at most NUM lines"
|
//usage: "\n -n NUM Output at most NUM lines"
|
||||||
//usage: "\n -o FILE Write to FILE, not standard output"
|
//usage: "\n -o FILE Write to FILE, not standard output"
|
||||||
//usage: "\n -z End lines with zero byte, not newline"
|
//usage: "\n -z NUL terminated output"
|
||||||
|
|
||||||
#include "libbb.h"
|
#include "libbb.h"
|
||||||
|
|
||||||
@ -39,8 +39,10 @@
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Use the Fisher-Yates shuffle algorithm on an array of lines.
|
* Use the Fisher-Yates shuffle algorithm on an array of lines.
|
||||||
|
* If the required number of output lines is less than the total
|
||||||
|
* we can stop shuffling early.
|
||||||
*/
|
*/
|
||||||
static void shuffle_lines(char **lines, unsigned numlines)
|
static void shuffle_lines(char **lines, unsigned numlines, unsigned outlines)
|
||||||
{
|
{
|
||||||
unsigned i;
|
unsigned i;
|
||||||
unsigned r;
|
unsigned r;
|
||||||
@ -48,7 +50,7 @@ static void shuffle_lines(char **lines, unsigned numlines)
|
|||||||
|
|
||||||
srand(monotonic_us());
|
srand(monotonic_us());
|
||||||
|
|
||||||
for (i = numlines-1; i > 0; i--) {
|
for (i = numlines-1; outlines > 0; i--, outlines--) {
|
||||||
r = rand();
|
r = rand();
|
||||||
/* RAND_MAX can be as small as 32767 */
|
/* RAND_MAX can be as small as 32767 */
|
||||||
if (i > RAND_MAX)
|
if (i > RAND_MAX)
|
||||||
@ -67,7 +69,7 @@ int shuf_main(int argc, char **argv)
|
|||||||
char *opt_i_str, *opt_n_str, *opt_o_str;
|
char *opt_i_str, *opt_n_str, *opt_o_str;
|
||||||
unsigned i;
|
unsigned i;
|
||||||
char **lines;
|
char **lines;
|
||||||
unsigned numlines;
|
unsigned numlines, outlines;
|
||||||
char eol;
|
char eol;
|
||||||
|
|
||||||
opts = getopt32(argv, "^"
|
opts = getopt32(argv, "^"
|
||||||
@ -128,24 +130,23 @@ int shuf_main(int argc, char **argv)
|
|||||||
fclose_if_not_stdin(fp);
|
fclose_if_not_stdin(fp);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (numlines != 0)
|
outlines = numlines;
|
||||||
shuffle_lines(lines, numlines);
|
if (opts & OPT_n) {
|
||||||
|
outlines = xatou(opt_n_str);
|
||||||
|
if (outlines > numlines)
|
||||||
|
outlines = numlines;
|
||||||
|
}
|
||||||
|
|
||||||
|
shuffle_lines(lines, numlines, outlines);
|
||||||
|
|
||||||
if (opts & OPT_o)
|
if (opts & OPT_o)
|
||||||
xmove_fd(xopen(opt_o_str, O_WRONLY|O_CREAT|O_TRUNC), STDOUT_FILENO);
|
xmove_fd(xopen(opt_o_str, O_WRONLY|O_CREAT|O_TRUNC), STDOUT_FILENO);
|
||||||
|
|
||||||
if (opts & OPT_n) {
|
|
||||||
unsigned maxlines;
|
|
||||||
maxlines = xatou(opt_n_str);
|
|
||||||
if (numlines > maxlines)
|
|
||||||
numlines = maxlines;
|
|
||||||
}
|
|
||||||
|
|
||||||
eol = '\n';
|
eol = '\n';
|
||||||
if (opts & OPT_z)
|
if (opts & OPT_z)
|
||||||
eol = '\0';
|
eol = '\0';
|
||||||
|
|
||||||
for (i = 0; i < numlines; i++) {
|
for (i = numlines - outlines; i < numlines; i++) {
|
||||||
if (opts & OPT_i)
|
if (opts & OPT_i)
|
||||||
printf("%u%c", (unsigned)(uintptr_t)lines[i], eol);
|
printf("%u%c", (unsigned)(uintptr_t)lines[i], eol);
|
||||||
else
|
else
|
||||||
|
Loading…
Reference in New Issue
Block a user