bzip2: ~1% speedup by special-casing "store 1 bit" function

function                                             old     new   delta
bsW1                                                   -      52     +52
BZ2_compressBlock                                    230     225      -5
BZ2_blockSort                                        125     118      -7
sendMTFValues                                       2070    2051     -19
------------------------------------------------------------------------------
(add/remove: 1/0 grow/shrink: 0/3 up/down: 52/-31)             Total: 21 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
Denys Vlasenko 2018-02-03 04:43:46 +01:00
parent 982c44d030
commit feafb3423e
3 changed files with 23 additions and 5 deletions

View File

@ -1056,7 +1056,9 @@ void BZ2_blockSort(EState* s)
} }
} }
#if BZ_LIGHT_DEBUG
s->origPtr = -1; s->origPtr = -1;
#endif
for (i = 0; i < s->nblock; i++) for (i = 0; i < s->nblock; i++)
if (ptr[i] == 0) { if (ptr[i] == 0) {
s->origPtr = i; s->origPtr = i;

View File

@ -55,7 +55,7 @@ void prepare_new_block(EState* s)
{ {
int i; int i;
s->nblock = 0; s->nblock = 0;
//indexes inot s->zbits[], initialzation moved to init of s->zbits //indexes into s->zbits[], initialzation moved to init of s->zbits
//s->posZ = s->zbits; // was: s->numZ = 0; //s->posZ = s->zbits; // was: s->numZ = 0;
//s->state_out_pos = s->zbits; //s->state_out_pos = s->zbits;
BZ_INITIALISE_CRC(s->blockCRC); BZ_INITIALISE_CRC(s->blockCRC);

View File

@ -88,6 +88,22 @@ void bsW16(EState* s, uint32_t v)
s->bsBuff |= (v << (16 - s->bsLive)); s->bsBuff |= (v << (16 - s->bsLive));
s->bsLive += 16; s->bsLive += 16;
} }
/* Same with n == 1: */
static
#if CONFIG_BZIP2_FAST >= 5
ALWAYS_INLINE
#endif
void bsW1(EState* s, uint32_t v)
{
/* need space for only 1 bit, no need for loop freeing > 8 bits */
if (s->bsLive >= 8) {
*s->posZ++ = (uint8_t)(s->bsBuff >> 24);
s->bsBuff <<= 8;
s->bsLive -= 8;
}
s->bsBuff |= (v << (31 - s->bsLive));
s->bsLive += 1;
}
/*---------------------------------------------------*/ /*---------------------------------------------------*/
@ -557,8 +573,8 @@ void sendMTFValues(EState* s)
for (i = 0; i < nSelectors; i++) { for (i = 0; i < nSelectors; i++) {
unsigned j; unsigned j;
for (j = 0; j < s->selectorMtf[i]; j++) for (j = 0; j < s->selectorMtf[i]; j++)
bsW(s, 1, 1); bsW1(s, 1);
bsW(s, 1, 0); bsW1(s, 0);
} }
/*--- Now the coding tables. ---*/ /*--- Now the coding tables. ---*/
@ -568,7 +584,7 @@ void sendMTFValues(EState* s)
for (i = 0; i < alphaSize; i++) { for (i = 0; i < alphaSize; i++) {
while (curr < s->len[t][i]) { bsW(s, 2, 2); curr++; /* 10 */ }; while (curr < s->len[t][i]) { bsW(s, 2, 2); curr++; /* 10 */ };
while (curr > s->len[t][i]) { bsW(s, 2, 3); curr--; /* 11 */ }; while (curr > s->len[t][i]) { bsW(s, 2, 3); curr--; /* 11 */ };
bsW(s, 1, 0); bsW1(s, 0);
} }
} }
@ -682,7 +698,7 @@ void BZ2_compressBlock(EState* s, int is_last_block)
* so as to maintain backwards compatibility with * so as to maintain backwards compatibility with
* older versions of bzip2. * older versions of bzip2.
*/ */
bsW(s, 1, 0); bsW1(s, 0);
bsW(s, 24, s->origPtr); bsW(s, 24, s->origPtr);
generateMTFValues(s); generateMTFValues(s);