From d1d6d9c5d8022bcd8f2e7fbd470d293f73adae58 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 6 Apr 2018 16:44:50 +0200 Subject: [PATCH] sort: smaller and more agressive FEATURE_SORT_OPTIMIZE_MEMORY function old new delta sort_main 1098 1037 -61 Signed-off-by: Denys Vlasenko --- coreutils/sort.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/coreutils/sort.c b/coreutils/sort.c index 9909a44a8..4d741e76d 100644 --- a/coreutils/sort.c +++ b/coreutils/sort.c @@ -18,11 +18,11 @@ //config: sort is used to sort lines of text in specified files. //config: //config:config FEATURE_SORT_BIG -//config: bool "Full SuSv3 compliant sort (support -ktcbdfiozgM)" +//config: bool "Full SuSv3 compliant sort (support -ktcbdfiogM)" //config: default y //config: depends on SORT //config: help -//config: Without this, sort only supports -r, -u, -s, and an integer version +//config: Without this, sort only supports -rusz, and an integer version //config: of -n. Selecting this adds sort keys, floating point support, and //config: more. This adds a little over 3k to a nonstatic build on x86. //config: @@ -66,12 +66,10 @@ //usage: "\n -r Reverse sort order" //usage: "\n -s Stable (don't sort ties alphabetically)" //usage: "\n -u Suppress duplicate lines" -//usage: IF_FEATURE_SORT_BIG( //usage: "\n -z Lines are terminated by NUL, not newline" ///////: "\n -m Ignored for GNU compatibility" ///////: "\n -S BUFSZ Ignored for GNU compatibility" ///////: "\n -T TMPDIR Ignored for GNU compatibility" -//usage: ) //usage: //usage:#define sort_example_usage //usage: "$ echo -e \"e\\nf\\nb\\nd\\nc\\na\" | sort\n" @@ -413,6 +411,7 @@ int sort_main(int argc UNUSED_PARAM, char **argv) #if ENABLE_FEATURE_SORT_OPTIMIZE_MEMORY bool can_drop_dups; size_t prev_len = 0; + char *prev_line = (char*) ""; /* Postpone optimizing if the input is small, < 16k lines: * even just free()ing duplicate lines takes time. */ @@ -533,32 +532,33 @@ int sort_main(int argc UNUSED_PARAM, char **argv) if (count_to_optimize_dups == 0) { size_t len; char *new_line; - char first = *line; /* On kernel/linux/arch/ *.[ch] files, * this reduces memory usage by 6%. * yes | head -99999999 | sort * goes down from 1900Mb to 380 Mb. */ - if (first == '\0' || first == '\n') { - len = !(first == '\0'); - new_line = (char*)"\n" + 1 - len; - goto replace; - } len = strlen(line); if (len <= prev_len) { - new_line = lines[linecount-1] + (prev_len - len); + new_line = prev_line + (prev_len - len); if (strcmp(line, new_line) == 0) { + /* it's a tail of the prev line */ if (can_drop_dups && prev_len == len) { + /* it's identical to prev line */ free(line); continue; } - replace: free(line); line = new_line; + /* continue using longer prev_line + * for future tail tests. + */ + goto skip; } } prev_len = len; + prev_line = line; + skip: ; } #else //TODO: lighter version which only drops total dups if can_drop_dups == true