From 4cae044b436a842e0575c3b6787729fb436fd04c Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Wed, 7 Feb 2018 01:33:25 +0100 Subject: [PATCH] bzip2: expose tuning knob for faster/smaller code Signed-off-by: Denys Vlasenko --- archival/bzip2.c | 25 +++++++++++++++++++++++-- archival/libarchive/bz/blocksort.c | 8 ++++---- archival/libarchive/bz/bzlib_private.h | 2 +- archival/libarchive/bz/compress.c | 24 ++++++++++++------------ archival/libarchive/bz/huffman.c | 2 +- 5 files changed, 41 insertions(+), 20 deletions(-) diff --git a/archival/bzip2.c b/archival/bzip2.c index fa906150f..357891ca3 100644 --- a/archival/bzip2.c +++ b/archival/bzip2.c @@ -19,6 +19,23 @@ //config: Unless you have a specific application which requires bzip2, you //config: should probably say N here. //config: +//config:config BZIP2_SMALL +//config: int "Trade bytes for speed (0:fast, 9:small)" +//config: default 8 # all "fast or small" options default to small +//config: range 0 9 +//config: depends on BZIP2 +//config: help +//config: Trade code size versus speed. +//config: Approximate values with gcc-6.3.0 "bzip -9" compressing +//config: linux-4.15.tar were: +//config: value time (sec) code size (386) +//config: 9 (smallest) 70.11 7687 +//config: 8 67.93 8091 +//config: 7 67.88 8405 +//config: 6 67.78 8624 +//config: 5 67.05 9427 +//config: 4-0 (fastest) 64.14 12083 +//config: //config:config FEATURE_BZIP2_DECOMPRESS //config: bool "Enable decompression" //config: default y @@ -48,7 +65,11 @@ #include "libbb.h" #include "bb_archive.h" -#define CONFIG_BZIP2_FAST 1 +#if CONFIG_BZIP2_SMALL >= 4 +#define BZIP2_SPEED (9 - CONFIG_BZIP2_SMALL) +#else +#define BZIP2_SPEED 5 +#endif /* Speed test: * Compiled with gcc 4.2.1, run on Athlon 64 1800 MHz (512K L2 cache). @@ -56,7 +77,7 @@ * (time to compress gcc-4.2.1.tar is 126.4% compared to bbox). * At SPEED 5 difference is 32.7%. * - * Test run of all CONFIG_BZIP2_FAST values on a 11Mb text file: + * Test run of all BZIP2_SPEED values on a 11Mb text file: * Size Time (3 runs) * 0: 10828 4.145 4.146 4.148 * 1: 11097 3.845 3.860 3.861 diff --git a/archival/libarchive/bz/blocksort.c b/archival/libarchive/bz/blocksort.c index 0a9a597d0..92d6d8251 100644 --- a/archival/libarchive/bz/blocksort.c +++ b/archival/libarchive/bz/blocksort.c @@ -392,7 +392,7 @@ int mainGtU(EState* state, * but speeds up compression 10% overall */ -#if CONFIG_BZIP2_FAST >= 1 +#if BZIP2_SPEED >= 1 #define TIMES_8(code) \ code; code; code; code; \ @@ -506,7 +506,7 @@ void mainSimpleSort(EState* state, i++; /* 1.5% overall speedup, +290 bytes */ -#if CONFIG_BZIP2_FAST >= 3 +#if BZIP2_SPEED >= 3 /*-- copy 2 --*/ if (i > hi) break; v = ptr[i]; @@ -755,7 +755,7 @@ void mainSort(EState* state) j = block[0] << 8; i = nblock - 1; /* 3%, +300 bytes */ -#if CONFIG_BZIP2_FAST >= 2 +#if BZIP2_SPEED >= 2 for (; i >= 3; i -= 4) { quadrant[i] = 0; j = (j >> 8) | (((unsigned)block[i]) << 8); @@ -794,7 +794,7 @@ void mainSort(EState* state) unsigned s; s = block[0] << 8; i = nblock - 1; -#if CONFIG_BZIP2_FAST >= 2 +#if BZIP2_SPEED >= 2 for (; i >= 3; i -= 4) { s = (s >> 8) | (block[i] << 8); j = ftab[s] - 1; diff --git a/archival/libarchive/bz/bzlib_private.h b/archival/libarchive/bz/bzlib_private.h index ef75ef08a..ea0f29b7c 100644 --- a/archival/libarchive/bz/bzlib_private.h +++ b/archival/libarchive/bz/bzlib_private.h @@ -188,7 +188,7 @@ typedef struct EState { /* stack-saving measures: these can be local, but they are too big */ int32_t sendMTFValues__code [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; int32_t sendMTFValues__rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; -#if CONFIG_BZIP2_FAST >= 5 +#if BZIP2_SPEED >= 5 /* second dimension: only 3 needed; 4 makes index calculations faster */ uint32_t sendMTFValues__len_pack[BZ_MAX_ALPHA_SIZE][4]; #endif diff --git a/archival/libarchive/bz/compress.c b/archival/libarchive/bz/compress.c index 462740b6c..539ab927e 100644 --- a/archival/libarchive/bz/compress.c +++ b/archival/libarchive/bz/compress.c @@ -32,6 +32,12 @@ in the file LICENSE. /* #include "bzlib_private.h" */ +#if BZIP2_SPEED >= 5 +# define ALWAYS_INLINE_5 ALWAYS_INLINE +#else +# define ALWAYS_INLINE_5 /*nothing*/ +#endif + /*---------------------------------------------------*/ /*--- Bit stream I/O ---*/ /*---------------------------------------------------*/ @@ -60,9 +66,7 @@ void bsFinishWrite(EState* s) /*---------------------------------------------------*/ static /* Helps only on level 5, on other levels hurts. ? */ -#if CONFIG_BZIP2_FAST >= 5 -ALWAYS_INLINE -#endif +ALWAYS_INLINE_5 void bsW(EState* s, int32_t n, uint32_t v) { while (s->bsLive >= 8) { @@ -75,9 +79,7 @@ void bsW(EState* s, int32_t n, uint32_t v) } /* Same with n == 16: */ static -#if CONFIG_BZIP2_FAST >= 5 -ALWAYS_INLINE -#endif +ALWAYS_INLINE_5 void bsW16(EState* s, uint32_t v) { while (s->bsLive >= 8) { @@ -103,9 +105,7 @@ void bsW1_1(EState* s) s->bsLive += 1; } static -#if CONFIG_BZIP2_FAST >= 5 -ALWAYS_INLINE -#endif +ALWAYS_INLINE_5 void bsW1_0(EState* s) { /* need space for only 1 bit, no need for loop freeing > 8 bits */ @@ -394,7 +394,7 @@ void sendMTFValues(EState* s) s->rfreq[t][v] = 0; } -#if CONFIG_BZIP2_FAST >= 5 +#if BZIP2_SPEED >= 5 /* * Set up an auxiliary length table which is used to fast-track * the common case (nGroups == 6). @@ -427,7 +427,7 @@ void sendMTFValues(EState* s) */ for (t = 0; t < nGroups; t++) cost[t] = 0; -#if CONFIG_BZIP2_FAST >= 5 +#if BZIP2_SPEED >= 5 if (nGroups == 6 && 50 == ge-gs+1) { /*--- fast track the common case ---*/ register uint32_t cost01, cost23, cost45; @@ -483,7 +483,7 @@ void sendMTFValues(EState* s) * Increment the symbol frequencies for the selected table. */ /* 1% faster compress. +800 bytes */ -#if CONFIG_BZIP2_FAST >= 4 +#if BZIP2_SPEED >= 4 if (nGroups == 6 && 50 == ge-gs+1) { /*--- fast track the common case ---*/ #define BZ_ITUR(nn) s->rfreq[bt][mtfv[gs + (nn)]]++ diff --git a/archival/libarchive/bz/huffman.c b/archival/libarchive/bz/huffman.c index be5930e00..dc851cd3f 100644 --- a/archival/libarchive/bz/huffman.c +++ b/archival/libarchive/bz/huffman.c @@ -48,7 +48,7 @@ in the file LICENSE. /* 90 bytes, 0.3% of overall compress speed */ -#if CONFIG_BZIP2_FAST >= 1 +#if BZIP2_SPEED >= 1 /* macro works better than inline (gcc 4.2.1) */ #define DOWNHEAP1(heap, weight, Heap) \