shuf: speed-up when limited output is requested
A user noted that the following command was slower than they
expected:
   busybox shuf -i "1500000000-$(date +%s)" -n 5
At time of writing the range contains 128 million values.  On my
system this takes 7.7s whereas 'shuf' from coreutils takes a
handful of milliseconds.
Optimise BusyBox 'shuf' for cases where -n is specified by stopping
shuffling once the required number of lines have been processed.
On my system the time for the example is reduced to 0.4s.
function                                             old     new   delta
shuf_main                                            520     540     +20
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 1/0 up/down: 20/0)               Total: 20 bytes
v2: Code shrink.  Since outlines <= numlines:
    - the loop in shuffle_lines() only needs to test the value of
      outlines;
    - shuffle_lines() can be called unconditionally.
    Update timing to allow for the 13 million seconds elapsed since v1.
Signed-off-by: Ron Yorston <rmy@pobox.com>
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
			
			
This commit is contained in:
		
				
					committed by
					
						 Denys Vlasenko
						Denys Vlasenko
					
				
			
			
				
	
			
			
			
						parent
						
							74c4f356ae
						
					
				
				
					commit
					8817e285b7
				
			| @@ -24,7 +24,7 @@ | |||||||
| //usage:     "\n	-i L-H	Treat numbers L-H as lines" | //usage:     "\n	-i L-H	Treat numbers L-H as lines" | ||||||
| //usage:     "\n	-n NUM	Output at most NUM lines" | //usage:     "\n	-n NUM	Output at most NUM lines" | ||||||
| //usage:     "\n	-o FILE	Write to FILE, not standard output" | //usage:     "\n	-o FILE	Write to FILE, not standard output" | ||||||
| //usage:     "\n	-z	End lines with zero byte, not newline" | //usage:     "\n	-z	NUL terminated output" | ||||||
|  |  | ||||||
| #include "libbb.h" | #include "libbb.h" | ||||||
|  |  | ||||||
| @@ -39,8 +39,10 @@ | |||||||
|  |  | ||||||
| /* | /* | ||||||
|  * Use the Fisher-Yates shuffle algorithm on an array of lines. |  * Use the Fisher-Yates shuffle algorithm on an array of lines. | ||||||
|  |  * If the required number of output lines is less than the total | ||||||
|  |  * we can stop shuffling early. | ||||||
|  */ |  */ | ||||||
| static void shuffle_lines(char **lines, unsigned numlines) | static void shuffle_lines(char **lines, unsigned numlines, unsigned outlines) | ||||||
| { | { | ||||||
| 	unsigned i; | 	unsigned i; | ||||||
| 	unsigned r; | 	unsigned r; | ||||||
| @@ -48,7 +50,7 @@ static void shuffle_lines(char **lines, unsigned numlines) | |||||||
|  |  | ||||||
| 	srand(monotonic_us()); | 	srand(monotonic_us()); | ||||||
|  |  | ||||||
| 	for (i = numlines-1; i > 0; i--) { | 	for (i = numlines-1; outlines > 0; i--, outlines--) { | ||||||
| 		r = rand(); | 		r = rand(); | ||||||
| 		/* RAND_MAX can be as small as 32767 */ | 		/* RAND_MAX can be as small as 32767 */ | ||||||
| 		if (i > RAND_MAX) | 		if (i > RAND_MAX) | ||||||
| @@ -67,7 +69,7 @@ int shuf_main(int argc, char **argv) | |||||||
| 	char *opt_i_str, *opt_n_str, *opt_o_str; | 	char *opt_i_str, *opt_n_str, *opt_o_str; | ||||||
| 	unsigned i; | 	unsigned i; | ||||||
| 	char **lines; | 	char **lines; | ||||||
| 	unsigned numlines; | 	unsigned numlines, outlines; | ||||||
| 	char eol; | 	char eol; | ||||||
|  |  | ||||||
| 	opts = getopt32(argv, "^" | 	opts = getopt32(argv, "^" | ||||||
| @@ -128,24 +130,23 @@ int shuf_main(int argc, char **argv) | |||||||
| 		fclose_if_not_stdin(fp); | 		fclose_if_not_stdin(fp); | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	if (numlines != 0) | 	outlines = numlines; | ||||||
| 		shuffle_lines(lines, numlines); | 	if (opts & OPT_n) { | ||||||
|  | 		outlines = xatou(opt_n_str); | ||||||
|  | 		if (outlines > numlines) | ||||||
|  | 			outlines = numlines; | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	shuffle_lines(lines, numlines, outlines); | ||||||
|  |  | ||||||
| 	if (opts & OPT_o) | 	if (opts & OPT_o) | ||||||
| 		xmove_fd(xopen(opt_o_str, O_WRONLY|O_CREAT|O_TRUNC), STDOUT_FILENO); | 		xmove_fd(xopen(opt_o_str, O_WRONLY|O_CREAT|O_TRUNC), STDOUT_FILENO); | ||||||
|  |  | ||||||
| 	if (opts & OPT_n) { |  | ||||||
| 		unsigned maxlines; |  | ||||||
| 		maxlines = xatou(opt_n_str); |  | ||||||
| 		if (numlines > maxlines) |  | ||||||
| 			numlines = maxlines; |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	eol = '\n'; | 	eol = '\n'; | ||||||
| 	if (opts & OPT_z) | 	if (opts & OPT_z) | ||||||
| 		eol = '\0'; | 		eol = '\0'; | ||||||
|  |  | ||||||
| 	for (i = 0; i < numlines; i++) { | 	for (i = numlines - outlines; i < numlines; i++) { | ||||||
| 		if (opts & OPT_i) | 		if (opts & OPT_i) | ||||||
| 			printf("%u%c", (unsigned)(uintptr_t)lines[i], eol); | 			printf("%u%c", (unsigned)(uintptr_t)lines[i], eol); | ||||||
| 		else | 		else | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user