From 522041ee7b10ac544b90c6a8d1d4fbf8a5d39c6d Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sat, 10 Sep 2011 13:25:57 +0200 Subject: [PATCH] regularize options which control size/speed trade Signed-off-by: Denys Vlasenko --- archival/Config.src | 27 +++++++++++----------- archival/bzip2.c | 4 ++-- archival/gzip.c | 8 +++++-- archival/libarchive/bz/blocksort.c | 8 +++---- archival/libarchive/bz/bzlib_private.h | 2 +- archival/libarchive/bz/compress.c | 8 +++---- archival/libarchive/bz/huffman.c | 2 +- configs/TEST_nommu_defconfig | 2 +- configs/TEST_noprintf_defconfig | 2 +- configs/TEST_rh9_defconfig | 2 +- configs/android2_defconfig | 2 +- configs/android_defconfig | 2 +- configs/cygwin_defconfig | 2 +- configs/freebsd_defconfig | 2 +- libbb/Config.src | 4 ++-- libbb/hash_md5_sha.c | 32 +++++++++++++------------- libbb/hash_md5prime.c | 26 ++++++++++----------- 17 files changed, 69 insertions(+), 66 deletions(-) diff --git a/archival/Config.src b/archival/Config.src index 9f4908178..885cb5bcc 100644 --- a/archival/Config.src +++ b/archival/Config.src @@ -187,15 +187,17 @@ config FEATURE_GZIP_LONG_OPTIONS help Enable use of long options, increases size by about 106 Bytes -config GZIP_BIG_MEM - bool "Trade memory for gzip speed" - default n - depends on GZIP - help - Enable big memory options for gzip, including larger I/O - buffers and bigger hash tables. Faster, but uses at least - twice as much memory. Select if speed is more important than - memory use. +config GZIP_FAST + int "Trade memory for gzip speed (0:small,slow - 2:fast,big)" + default 0 + range 0 2 + depends on GZIP + help + Enable big memory options for gzip. + 0: small buffers, small hash-tables + 1: larger buffers, larger hash-tables + 2: larger buffers, largest hash-tables + Larger models may give slightly better compression config LZOP bool "lzop" @@ -340,15 +342,12 @@ config UNLZMA is generally considerably better than that achieved by the bzip2 compressors. - The BusyBox unlzma applet is limited to de-compression only. + The BusyBox unlzma applet is limited to decompression only. On an x86 system, this applet adds about 4K. - Unless you have a specific application which requires unlzma, you - should probably say N here. - config FEATURE_LZMA_FAST bool "Optimize unlzma for speed" - default y + default n depends on UNLZMA help This option reduces decompression time by about 25% at the cost of diff --git a/archival/bzip2.c b/archival/bzip2.c index e39d7f704..3dde970f1 100644 --- a/archival/bzip2.c +++ b/archival/bzip2.c @@ -19,7 +19,7 @@ #include "libbb.h" #include "archive.h" -#define CONFIG_BZIP2_FEATURE_SPEED 1 +#define CONFIG_BZIP2_FAST 1 /* Speed test: * Compiled with gcc 4.2.1, run on Athlon 64 1800 MHz (512K L2 cache). @@ -27,7 +27,7 @@ * (time to compress gcc-4.2.1.tar is 126.4% compared to bbox). * At SPEED 5 difference is 32.7%. * - * Test run of all CONFIG_BZIP2_FEATURE_SPEED values on a 11Mb text file: + * Test run of all CONFIG_BZIP2_FAST values on a 11Mb text file: * Size Time (3 runs) * 0: 10828 4.145 4.146 4.148 * 1: 11097 3.845 3.860 3.861 diff --git a/archival/gzip.c b/archival/gzip.c index 0e0b68142..3af930b7e 100644 --- a/archival/gzip.c +++ b/archival/gzip.c @@ -81,10 +81,14 @@ aa: 85.1% -- replaced with aa.gz /* =========================================================================== */ -#if ENABLE_GZIP_BIG_MEM +#if CONFIG_GZIP_FAST == 0 +# define SMALL_MEM +#elif CONFIG_GZIP_FAST == 1 +# define MEDIUM_MEM +#elif CONFIG_GZIP_FAST == 2 # define BIG_MEM #else -# define SMALL_MEM +# error "Invalid CONFIG_GZIP_FAST value" #endif #ifndef INBUFSIZ diff --git a/archival/libarchive/bz/blocksort.c b/archival/libarchive/bz/blocksort.c index f70c3701d..e600cb7a7 100644 --- a/archival/libarchive/bz/blocksort.c +++ b/archival/libarchive/bz/blocksort.c @@ -385,7 +385,7 @@ int mainGtU( * but speeds up compression 10% overall */ -#if CONFIG_BZIP2_FEATURE_SPEED >= 1 +#if CONFIG_BZIP2_FAST >= 1 #define TIMES_8(code) \ code; code; code; code; \ @@ -496,7 +496,7 @@ void mainSimpleSort(uint32_t* ptr, i++; /* 1.5% overall speedup, +290 bytes */ -#if CONFIG_BZIP2_FEATURE_SPEED >= 3 +#if CONFIG_BZIP2_FAST >= 3 /*-- copy 2 --*/ if (i > hi) break; v = ptr[i]; @@ -750,7 +750,7 @@ void mainSort(EState* state, j = block[0] << 8; i = nblock - 1; /* 3%, +300 bytes */ -#if CONFIG_BZIP2_FEATURE_SPEED >= 2 +#if CONFIG_BZIP2_FAST >= 2 for (; i >= 3; i -= 4) { quadrant[i] = 0; j = (j >> 8) | (((uint16_t)block[i]) << 8); @@ -787,7 +787,7 @@ void mainSort(EState* state, s = block[0] << 8; i = nblock - 1; -#if CONFIG_BZIP2_FEATURE_SPEED >= 2 +#if CONFIG_BZIP2_FAST >= 2 for (; i >= 3; i -= 4) { s = (s >> 8) | (block[i] << 8); j = ftab[s] - 1; diff --git a/archival/libarchive/bz/bzlib_private.h b/archival/libarchive/bz/bzlib_private.h index 6430ce407..43e674bec 100644 --- a/archival/libarchive/bz/bzlib_private.h +++ b/archival/libarchive/bz/bzlib_private.h @@ -183,7 +183,7 @@ typedef struct EState { /* stack-saving measures: these can be local, but they are too big */ int32_t sendMTFValues__code [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; int32_t sendMTFValues__rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; -#if CONFIG_BZIP2_FEATURE_SPEED >= 5 +#if CONFIG_BZIP2_FAST >= 5 /* second dimension: only 3 needed; 4 makes index calculations faster */ uint32_t sendMTFValues__len_pack[BZ_MAX_ALPHA_SIZE][4]; #endif diff --git a/archival/libarchive/bz/compress.c b/archival/libarchive/bz/compress.c index f93671742..e9f1afdaf 100644 --- a/archival/libarchive/bz/compress.c +++ b/archival/libarchive/bz/compress.c @@ -61,7 +61,7 @@ void bsFinishWrite(EState* s) /*---------------------------------------------------*/ static /* Helps only on level 5, on other levels hurts. ? */ -#if CONFIG_BZIP2_FEATURE_SPEED >= 5 +#if CONFIG_BZIP2_FAST >= 5 ALWAYS_INLINE #endif void bsW(EState* s, int32_t n, uint32_t v) @@ -331,7 +331,7 @@ void sendMTFValues(EState* s) for (v = 0; v < alphaSize; v++) s->rfreq[t][v] = 0; -#if CONFIG_BZIP2_FEATURE_SPEED >= 5 +#if CONFIG_BZIP2_FAST >= 5 /* * Set up an auxiliary length table which is used to fast-track * the common case (nGroups == 6). @@ -361,7 +361,7 @@ void sendMTFValues(EState* s) */ for (t = 0; t < nGroups; t++) cost[t] = 0; -#if CONFIG_BZIP2_FEATURE_SPEED >= 5 +#if CONFIG_BZIP2_FAST >= 5 if (nGroups == 6 && 50 == ge-gs+1) { /*--- fast track the common case ---*/ register uint32_t cost01, cost23, cost45; @@ -420,7 +420,7 @@ void sendMTFValues(EState* s) * Increment the symbol frequencies for the selected table. */ /* 1% faster compress. +800 bytes */ -#if CONFIG_BZIP2_FEATURE_SPEED >= 4 +#if CONFIG_BZIP2_FAST >= 4 if (nGroups == 6 && 50 == ge-gs+1) { /*--- fast track the common case ---*/ #define BZ_ITUR(nn) s->rfreq[bt][mtfv[gs + (nn)]]++ diff --git a/archival/libarchive/bz/huffman.c b/archival/libarchive/bz/huffman.c index 676b1af66..bbec11adb 100644 --- a/archival/libarchive/bz/huffman.c +++ b/archival/libarchive/bz/huffman.c @@ -48,7 +48,7 @@ in the file LICENSE. /* 90 bytes, 0.3% of overall compress speed */ -#if CONFIG_BZIP2_FEATURE_SPEED >= 1 +#if CONFIG_BZIP2_FAST >= 1 /* macro works better than inline (gcc 4.2.1) */ #define DOWNHEAP1(heap, weight, Heap) \ diff --git a/configs/TEST_nommu_defconfig b/configs/TEST_nommu_defconfig index 905f65296..15e12b1d2 100644 --- a/configs/TEST_nommu_defconfig +++ b/configs/TEST_nommu_defconfig @@ -79,7 +79,7 @@ CONFIG_PREFIX="./_install" # Busybox Library Tuning # CONFIG_PASSWORD_MINLEN=6 -CONFIG_MD5_SIZE_VS_SPEED=2 +CONFIG_MD5_SMALL=1 CONFIG_FEATURE_FAST_TOP=y CONFIG_FEATURE_ETC_NETWORKS=y CONFIG_FEATURE_EDITING=y diff --git a/configs/TEST_noprintf_defconfig b/configs/TEST_noprintf_defconfig index b72e12856..f4338df71 100644 --- a/configs/TEST_noprintf_defconfig +++ b/configs/TEST_noprintf_defconfig @@ -89,7 +89,7 @@ CONFIG_PREFIX="./_install" # Busybox Library Tuning # CONFIG_PASSWORD_MINLEN=6 -CONFIG_MD5_SIZE_VS_SPEED=2 +CONFIG_MD5_SMALL=1 CONFIG_FEATURE_FAST_TOP=y # CONFIG_FEATURE_ETC_NETWORKS is not set CONFIG_FEATURE_EDITING=y diff --git a/configs/TEST_rh9_defconfig b/configs/TEST_rh9_defconfig index 23094e391..193d8f615 100644 --- a/configs/TEST_rh9_defconfig +++ b/configs/TEST_rh9_defconfig @@ -88,7 +88,7 @@ CONFIG_PREFIX="./_install" # Busybox Library Tuning # CONFIG_PASSWORD_MINLEN=6 -CONFIG_MD5_SIZE_VS_SPEED=2 +CONFIG_MD5_SMALL=1 CONFIG_FEATURE_FAST_TOP=y # CONFIG_FEATURE_ETC_NETWORKS is not set CONFIG_FEATURE_EDITING=y diff --git a/configs/android2_defconfig b/configs/android2_defconfig index 338c7b13d..b5166e0fc 100644 --- a/configs/android2_defconfig +++ b/configs/android2_defconfig @@ -92,7 +92,7 @@ CONFIG_PREFIX="./_install" # CONFIG_FEATURE_SYSTEMD is not set # CONFIG_FEATURE_RTMINMAX is not set CONFIG_PASSWORD_MINLEN=6 -CONFIG_MD5_SIZE_VS_SPEED=2 +CONFIG_MD5_SMALL=1 # CONFIG_FEATURE_FAST_TOP is not set # CONFIG_FEATURE_ETC_NETWORKS is not set CONFIG_FEATURE_USE_TERMIOS=y diff --git a/configs/android_defconfig b/configs/android_defconfig index 7e5232a84..76068b040 100644 --- a/configs/android_defconfig +++ b/configs/android_defconfig @@ -109,7 +109,7 @@ CONFIG_PREFIX="./_install" # CONFIG_FEATURE_SYSTEMD is not set # CONFIG_FEATURE_RTMINMAX is not set CONFIG_PASSWORD_MINLEN=6 -CONFIG_MD5_SIZE_VS_SPEED=2 +CONFIG_MD5_SMALL=1 # CONFIG_FEATURE_FAST_TOP is not set # CONFIG_FEATURE_ETC_NETWORKS is not set CONFIG_FEATURE_USE_TERMIOS=y diff --git a/configs/cygwin_defconfig b/configs/cygwin_defconfig index cc2d643e1..bdd0d66d0 100644 --- a/configs/cygwin_defconfig +++ b/configs/cygwin_defconfig @@ -92,7 +92,7 @@ CONFIG_PREFIX="./_install" # CONFIG_FEATURE_SYSTEMD is not set CONFIG_FEATURE_RTMINMAX=y CONFIG_PASSWORD_MINLEN=6 -CONFIG_MD5_SIZE_VS_SPEED=2 +CONFIG_MD5_SMALL=1 CONFIG_FEATURE_FAST_TOP=y # CONFIG_FEATURE_ETC_NETWORKS is not set CONFIG_FEATURE_USE_TERMIOS=y diff --git a/configs/freebsd_defconfig b/configs/freebsd_defconfig index 5f2985be1..dcb5d953c 100644 --- a/configs/freebsd_defconfig +++ b/configs/freebsd_defconfig @@ -90,7 +90,7 @@ CONFIG_PREFIX="./_install" # Busybox Library Tuning # CONFIG_PASSWORD_MINLEN=6 -CONFIG_MD5_SIZE_VS_SPEED=2 +CONFIG_MD5_SMALL=1 CONFIG_FEATURE_FAST_TOP=y # CONFIG_FEATURE_ETC_NETWORKS is not set CONFIG_FEATURE_USE_TERMIOS=y diff --git a/libbb/Config.src b/libbb/Config.src index f6f88b9ce..f45e43b04 100644 --- a/libbb/Config.src +++ b/libbb/Config.src @@ -14,9 +14,9 @@ config PASSWORD_MINLEN help Minimum allowable password length. -config MD5_SIZE_VS_SPEED +config MD5_SMALL int "MD5: Trade bytes for speed (0:fast, 3:slow)" - default 2 + default 1 range 0 3 help Trade binary size versus speed for the md5sum algorithm. diff --git a/libbb/hash_md5_sha.c b/libbb/hash_md5_sha.c index b87d1dde8..a313c2a65 100644 --- a/libbb/hash_md5_sha.c +++ b/libbb/hash_md5_sha.c @@ -104,12 +104,12 @@ static void FAST_FUNC common64_end(md5_ctx_t *ctx, int swap_needed) */ /* 0: fastest, 3: smallest */ -#if CONFIG_MD5_SIZE_VS_SPEED < 0 -# define MD5_SIZE_VS_SPEED 0 -#elif CONFIG_MD5_SIZE_VS_SPEED > 3 -# define MD5_SIZE_VS_SPEED 3 +#if CONFIG_MD5_SMALL < 0 +# define MD5_SMALL 0 +#elif CONFIG_MD5_SMALL > 3 +# define MD5_SMALL 3 #else -# define MD5_SIZE_VS_SPEED CONFIG_MD5_SIZE_VS_SPEED +# define MD5_SMALL CONFIG_MD5_SMALL #endif /* These are the four functions used in the four steps of the MD5 algorithm @@ -129,7 +129,7 @@ static void FAST_FUNC common64_end(md5_ctx_t *ctx, int swap_needed) /* Hash a single block, 64 bytes long and 4-byte aligned */ static void FAST_FUNC md5_process_block64(md5_ctx_t *ctx) { -#if MD5_SIZE_VS_SPEED > 0 +#if MD5_SMALL > 0 /* Before we start, one word to the strange constants. They are defined in RFC 1321 as T[i] = (int)(4294967296.0 * fabs(sin(i))), i=1..64 @@ -157,7 +157,7 @@ static void FAST_FUNC md5_process_block64(md5_ctx_t *ctx) 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391 }; static const char P_array[] ALIGN1 = { -# if MD5_SIZE_VS_SPEED > 1 +# if MD5_SMALL > 1 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, /* 1 */ # endif 1, 6, 11, 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, /* 2 */ @@ -171,7 +171,7 @@ static void FAST_FUNC md5_process_block64(md5_ctx_t *ctx) uint32_t C = ctx->hash[2]; uint32_t D = ctx->hash[3]; -#if MD5_SIZE_VS_SPEED >= 2 /* 2 or 3 */ +#if MD5_SMALL >= 2 /* 2 or 3 */ static const char S_array[] ALIGN1 = { 7, 12, 17, 22, @@ -190,7 +190,7 @@ static void FAST_FUNC md5_process_block64(md5_ctx_t *ctx) words[i] = SWAP_LE32(words[i]); # endif -# if MD5_SIZE_VS_SPEED == 3 +# if MD5_SMALL == 3 pc = C_array; pp = P_array; ps = S_array - 4; @@ -220,7 +220,7 @@ static void FAST_FUNC md5_process_block64(md5_ctx_t *ctx) C = B; B = temp; } -# else /* MD5_SIZE_VS_SPEED == 2 */ +# else /* MD5_SMALL == 2 */ pc = C_array; pp = P_array; ps = S_array; @@ -271,13 +271,13 @@ static void FAST_FUNC md5_process_block64(md5_ctx_t *ctx) ctx->hash[2] += C; ctx->hash[3] += D; -#else /* MD5_SIZE_VS_SPEED == 0 or 1 */ +#else /* MD5_SMALL == 0 or 1 */ uint32_t A_save = A; uint32_t B_save = B; uint32_t C_save = C; uint32_t D_save = D; -# if MD5_SIZE_VS_SPEED == 1 +# if MD5_SMALL == 1 const uint32_t *pc; const char *pp; int i; @@ -299,7 +299,7 @@ static void FAST_FUNC md5_process_block64(md5_ctx_t *ctx) } while (0) /* Round 1 */ -# if MD5_SIZE_VS_SPEED == 1 +# if MD5_SMALL == 1 pc = C_array; for (i = 0; i < 4; i++) { OP(A, B, C, D, 7, *pc++); @@ -339,7 +339,7 @@ static void FAST_FUNC md5_process_block64(md5_ctx_t *ctx) } while (0) /* Round 2 */ -# if MD5_SIZE_VS_SPEED == 1 +# if MD5_SMALL == 1 pp = P_array; for (i = 0; i < 4; i++) { OP(FG, A, B, C, D, (int) (*pp++), 5, *pc++); @@ -367,7 +367,7 @@ static void FAST_FUNC md5_process_block64(md5_ctx_t *ctx) # endif /* Round 3 */ -# if MD5_SIZE_VS_SPEED == 1 +# if MD5_SMALL == 1 for (i = 0; i < 4; i++) { OP(FH, A, B, C, D, (int) (*pp++), 4, *pc++); OP(FH, D, A, B, C, (int) (*pp++), 11, *pc++); @@ -394,7 +394,7 @@ static void FAST_FUNC md5_process_block64(md5_ctx_t *ctx) # endif /* Round 4 */ -# if MD5_SIZE_VS_SPEED == 1 +# if MD5_SMALL == 1 for (i = 0; i < 4; i++) { OP(FI, A, B, C, D, (int) (*pp++), 6, *pc++); OP(FI, D, A, B, C, (int) (*pp++), 10, *pc++); diff --git a/libbb/hash_md5prime.c b/libbb/hash_md5prime.c index 7986f4d29..e089a15f5 100644 --- a/libbb/hash_md5prime.c +++ b/libbb/hash_md5prime.c @@ -59,7 +59,7 @@ * Completely removed static PADDING array. * * Reintroduced the loop unrolling in md5_transform and added the - * MD5_SIZE_VS_SPEED option for configurability. Define below as: + * MD5_SMALL option for configurability. Define below as: * 0 fully unrolled loops * 1 partially unrolled (4 ops per loop) * 2 no unrolling -- introduces the need to swap 4 variables (slow) @@ -75,12 +75,12 @@ #include "libbb.h" /* 1: fastest, 3: smallest */ -#if CONFIG_MD5_SIZE_VS_SPEED < 1 -# define MD5_SIZE_VS_SPEED 1 -#elif CONFIG_MD5_SIZE_VS_SPEED > 3 -# define MD5_SIZE_VS_SPEED 3 +#if CONFIG_MD5_SMALL < 1 +# define MD5_SMALL 1 +#elif CONFIG_MD5_SMALL > 3 +# define MD5_SMALL 3 #else -# define MD5_SIZE_VS_SPEED CONFIG_MD5_SIZE_VS_SPEED +# define MD5_SMALL CONFIG_MD5_SMALL #endif #if BB_LITTLE_ENDIAN @@ -152,7 +152,7 @@ memcpy32_le2cpu(uint32_t *output, const unsigned char *input, unsigned len) static void md5_transform(uint32_t state[4], const unsigned char block[64]) { uint32_t a, b, c, d, x[16]; -#if MD5_SIZE_VS_SPEED > 1 +#if MD5_SMALL > 1 uint32_t temp; const unsigned char *ps; @@ -162,9 +162,9 @@ static void md5_transform(uint32_t state[4], const unsigned char block[64]) 4, 11, 16, 23, 6, 10, 15, 21 }; -#endif /* MD5_SIZE_VS_SPEED > 1 */ +#endif /* MD5_SMALL > 1 */ -#if MD5_SIZE_VS_SPEED > 0 +#if MD5_SMALL > 0 const uint32_t *pc; const unsigned char *pp; int i; @@ -198,7 +198,7 @@ static void md5_transform(uint32_t state[4], const unsigned char block[64]) 0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9 /* 4 */ }; -#endif /* MD5_SIZE_VS_SPEED > 0 */ +#endif /* MD5_SMALL > 0 */ memcpy32_le2cpu(x, block, 64); @@ -207,7 +207,7 @@ static void md5_transform(uint32_t state[4], const unsigned char block[64]) c = state[2]; d = state[3]; -#if MD5_SIZE_VS_SPEED > 2 +#if MD5_SMALL > 2 pc = C; pp = P; ps = S - 4; @@ -233,7 +233,7 @@ static void md5_transform(uint32_t state[4], const unsigned char block[64]) temp += b; a = d; d = c; c = b; b = temp; } -#elif MD5_SIZE_VS_SPEED > 1 +#elif MD5_SMALL > 1 pc = C; pp = P; ps = S; @@ -260,7 +260,7 @@ static void md5_transform(uint32_t state[4], const unsigned char block[64]) II(a, b, c, d, x[*pp], ps[i & 0x3], *pc); pp++; pc++; temp = d; d = c; c = b; b = a; a = temp; } -#elif MD5_SIZE_VS_SPEED > 0 +#elif MD5_SMALL > 0 pc = C; pp = P; /* Round 1 */