busybox/coreutils/shuf.c

176 lines
4.2 KiB
C
Raw Normal View History

/* vi: set sw=4 ts=4: */
/*
* shuf: Write a random permutation of the input lines to standard output.
*
* Copyright (C) 2014 by Bartosz Golaszewski <bartekgola@gmail.com>
*
* Licensed under GPLv2 or later, see file LICENSE in this source tree.
*/
//config:config SHUF
//config: bool "shuf (5.4 kb)"
//config: default y
//config: help
//config: Generate random permutations
//applet:IF_SHUF(APPLET_NOEXEC(shuf, shuf, BB_DIR_USR_BIN, BB_SUID_DROP, shuf))
//kbuild:lib-$(CONFIG_SHUF) += shuf.o
//usage:#define shuf_trivial_usage
//usage: "[-n NUM] [-o FILE] [-z] [FILE | -e [ARG...] | -i L-H]"
//usage:#define shuf_full_usage "\n\n"
//usage: "Randomly permute lines\n"
//usage: "\n -n NUM Output at most NUM lines"
//usage: "\n -o FILE Write to FILE, not standard output"
//usage: "\n -z NUL terminated output"
//usage: "\n -e Treat ARGs as lines"
//usage: "\n -i L-H Treat numbers L-H as lines"
#include "libbb.h"
/* This is a NOEXEC applet. Be very careful! */
#define OPT_e (1 << 0)
#define OPT_i (1 << 1)
#define OPT_n (1 << 2)
#define OPT_o (1 << 3)
#define OPT_z (1 << 4)
#define OPT_STR "ei:n:o:z"
/*
* Use the Fisher-Yates shuffle algorithm on an array of lines.
* If the required number of output lines is less than the total
* we can stop shuffling early.
*/
static void shuffle_lines(char **lines, unsigned numlines, unsigned outlines)
{
srand(monotonic_us());
while (outlines != 0) {
char *tmp;
unsigned r = rand();
/* RAND_MAX can be as small as 32767 */
if (numlines > RAND_MAX)
r ^= rand() << 15;
r %= numlines;
//TODO: the above method is seriously non-uniform when numlines is very large.
//For example, with numlines of 0xf0000000,
//values of (r % numlines) in [0, 0x0fffffff] range
//are more likely: e.g. r=1 and r=0xf0000001 both map to 1,
//whereas only one value, r=0xefffffff, maps to 0xefffffff.
numlines--;
tmp = lines[numlines];
lines[numlines] = lines[r];
lines[r] = tmp;
outlines--;
}
}
int shuf_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
int shuf_main(int argc, char **argv)
{
unsigned opts;
char *opt_i_str, *opt_n_str, *opt_o_str;
char **lines;
unsigned long long lo = lo;
unsigned numlines, outlines;
unsigned i;
char eol;
getopt32: remove opt_complementary function old new delta vgetopt32 1318 1392 +74 runsvdir_main 703 713 +10 bb_make_directory 423 425 +2 collect_cpu 546 545 -1 opt_chars 3 - -3 opt_complementary 4 - -4 tftpd_main 567 562 -5 ntp_init 476 471 -5 zcip_main 1266 1256 -10 xxd_main 428 418 -10 whois_main 140 130 -10 who_main 463 453 -10 which_main 212 202 -10 wget_main 2535 2525 -10 watchdog_main 291 281 -10 watch_main 222 212 -10 vlock_main 399 389 -10 uuencode_main 332 322 -10 uudecode_main 316 306 -10 unlink_main 45 35 -10 udhcpd_main 1482 1472 -10 udhcpc_main 2762 2752 -10 tune2fs_main 290 280 -10 tunctl_main 366 356 -10 truncate_main 218 208 -10 tr_main 518 508 -10 time_main 1134 1124 -10 tftp_main 286 276 -10 telnetd_main 1873 1863 -10 tcpudpsvd_main 1785 1775 -10 taskset_main 521 511 -10 tar_main 1009 999 -10 tail_main 1644 1634 -10 syslogd_main 1967 1957 -10 switch_root_main 368 358 -10 svlogd_main 1454 1444 -10 sv 1296 1286 -10 stat_main 104 94 -10 start_stop_daemon_main 1028 1018 -10 split_main 542 532 -10 sort_main 796 786 -10 slattach_main 624 614 -10 shuf_main 504 494 -10 setsid_main 96 86 -10 setserial_main 1132 1122 -10 setfont_main 388 378 -10 setconsole_main 78 68 -10 sendmail_main 1209 1199 -10 sed_main 677 667 -10 script_main 1077 1067 -10 run_parts_main 325 315 -10 rtcwake_main 454 444 -10 rm_main 175 165 -10 reformime_main 119 109 -10 readlink_main 123 113 -10 rdate_main 246 236 -10 pwdx_main 189 179 -10 pstree_main 317 307 -10 pscan_main 663 653 -10 popmaildir_main 818 808 -10 pmap_main 80 70 -10 nc_main 1042 1032 -10 mv_main 558 548 -10 mountpoint_main 477 467 -10 mount_main 1264 1254 -10 modprobe_main 768 758 -10 modinfo_main 333 323 -10 mktemp_main 200 190 -10 mkswap_main 324 314 -10 mkfs_vfat_main 1489 1479 -10 microcom_main 715 705 -10 md5_sha1_sum_main 521 511 -10 man_main 867 857 -10 makedevs_main 1052 1042 -10 ls_main 563 553 -10 losetup_main 432 422 -10 loadfont_main 89 79 -10 ln_main 524 514 -10 link_main 75 65 -10 ipcalc_main 544 534 -10 iostat_main 2397 2387 -10 install_main 768 758 -10 id_main 480 470 -10 i2cset_main 1239 1229 -10 i2cget_main 380 370 -10 i2cdump_main 1482 1472 -10 i2cdetect_main 682 672 -10 hwclock_main 406 396 -10 httpd_main 741 731 -10 grep_main 837 827 -10 getty_main 1559 1549 -10 fuser_main 297 287 -10 ftpgetput_main 345 335 -10 ftpd_main 2232 2222 -10 fstrim_main 251 241 -10 fsfreeze_main 77 67 -10 fsck_minix_main 2921 2911 -10 flock_main 314 304 -10 flashcp_main 740 730 -10 flash_eraseall_main 833 823 -10 fdformat_main 532 522 -10 expand_main 680 670 -10 eject_main 335 325 -10 dumpleases_main 630 620 -10 du_main 314 304 -10 dos2unix_main 441 431 -10 diff_main 1350 1340 -10 df_main 1064 1054 -10 date_main 1095 1085 -10 cut_main 961 951 -10 cryptpw_main 228 218 -10 crontab_main 575 565 -10 crond_main 1149 1139 -10 cp_main 370 360 -10 common_traceroute_main 3834 3824 -10 common_ping_main 1767 1757 -10 comm_main 239 229 -10 cmp_main 655 645 -10 chrt_main 379 369 -10 chpst_main 704 694 -10 chpasswd_main 308 298 -10 chown_main 171 161 -10 chmod_main 158 148 -10 cat_main 428 418 -10 bzip2_main 120 110 -10 blkdiscard_main 264 254 -10 base64_main 221 211 -10 arping_main 1665 1655 -10 ar_main 556 546 -10 adjtimex_main 406 396 -10 adduser_main 882 872 -10 addgroup_main 411 401 -10 acpid_main 1198 1188 -10 optstring 11 - -11 opt_string 18 - -18 OPT_STR 25 - -25 ubi_tools_main 1288 1258 -30 ls_options 31 - -31 ------------------------------------------------------------------------------ (add/remove: 0/6 grow/shrink: 3/129 up/down: 86/-1383) Total: -1297 bytes text data bss dec hex filename 915428 485 6876 922789 e14a5 busybox_old 914629 485 6872 921986 e1182 busybox_unstripped Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
2017-08-09 01:25:02 +05:30
opts = getopt32(argv, "^"
OPT_STR
"\0" "e--i:i--e"/* mutually exclusive */,
&opt_i_str, &opt_n_str, &opt_o_str
);
argc -= optind;
argv += optind;
/* Prepare lines for shuffling - either: */
if (opts & OPT_e) {
/* make lines from command-line arguments */
numlines = argc;
lines = argv;
} else
if (opts & OPT_i) {
/* create a range of numbers */
unsigned long long hi;
char *dash;
if (argv[0])
bb_show_usage();
dash = strchr(opt_i_str, '-');
if (!dash) {
bb_error_msg_and_die("bad range '%s'", opt_i_str);
}
*dash = '\0';
lo = xatoull(opt_i_str);
hi = xatoull(dash + 1);
*dash = '-';
if (hi < lo)
bb_error_msg_and_die("bad range '%s'", opt_i_str);
hi -= lo;
if (sizeof(size_t) > sizeof(numlines)) {
if (hi >= UINT_MAX)
bb_error_msg_and_die("bad range '%s'", opt_i_str);
} else {
if (hi >= UINT_MAX / sizeof(lines[0]))
bb_error_msg_and_die("bad range '%s'", opt_i_str);
}
numlines = hi + 1;
lines = xmalloc((size_t)numlines * sizeof(lines[0]));
for (i = 0; i < numlines; i++) {
lines[i] = (char*)(uintptr_t)i;
}
} else {
/* default - read lines from stdin or the input file */
FILE *fp;
const char *fname = "-";
if (argv[0]) {
if (argv[1])
bb_show_usage();
fname = argv[0];
}
fp = xfopen_stdin(fname);
lines = NULL;
numlines = 0;
for (;;) {
char *line = xmalloc_fgetline(fp);
if (!line)
break;
lines = xrealloc_vector(lines, 6, numlines);
lines[numlines++] = line;
}
fclose_if_not_stdin(fp);
}
outlines = numlines;
if (opts & OPT_n) {
outlines = xatou(opt_n_str);
if (outlines > numlines)
outlines = numlines;
}
shuffle_lines(lines, numlines, outlines);
if (opts & OPT_o)
xmove_fd(xopen(opt_o_str, O_WRONLY|O_CREAT|O_TRUNC), STDOUT_FILENO);
eol = '\n';
if (opts & OPT_z)
eol = '\0';
for (i = numlines - outlines; i < numlines; i++) {
if (opts & OPT_i)
printf("%llu%c", lo + (uintptr_t)lines[i], eol);
else
printf("%s%c", lines[i], eol);
}
fflush_stdout_and_exit_SUCCESS();
}