bzip2: eliminate some divisions
This commit is contained in:
parent
3f5fdc7572
commit
6a9154b6f6
@ -246,7 +246,12 @@ void fallbackSort(uint32_t* fmap,
|
||||
for (i = 0; i < 257; i++) ftab[i] = 0;
|
||||
for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
|
||||
for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i];
|
||||
for (i = 1; i < 257; i++) ftab[i] += ftab[i-1];
|
||||
|
||||
j = ftab[0]; /* bbox: optimized */
|
||||
for (i = 1; i < 257; i++) {
|
||||
j += ftab[i];
|
||||
ftab[i] = j;
|
||||
}
|
||||
|
||||
for (i = 0; i < nblock; i++) {
|
||||
j = eclass8[i];
|
||||
@ -255,7 +260,7 @@ void fallbackSort(uint32_t* fmap,
|
||||
fmap[k] = i;
|
||||
}
|
||||
|
||||
nBhtab = 2 + (nblock / 32);
|
||||
nBhtab = 2 + ((uint32_t)nblock / 32); /* bbox: unsigned div is easier */
|
||||
for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
|
||||
for (i = 0; i < 256; i++) SET_BH(ftab[i]);
|
||||
|
||||
@ -737,27 +742,27 @@ void mainSort(uint32_t* ptr,
|
||||
memset(ftab, 0, 65537 * sizeof(ftab[0]));
|
||||
|
||||
j = block[0] << 8;
|
||||
i = nblock-1;
|
||||
i = nblock - 1;
|
||||
/* 3%, +300 bytes */
|
||||
#if CONFIG_BZIP2_FEATURE_SPEED >= 2
|
||||
for (; i >= 3; i -= 4) {
|
||||
quadrant[i] = 0;
|
||||
j = (j >> 8) |(((uint16_t)block[i]) << 8);
|
||||
j = (j >> 8) | (((uint16_t)block[i]) << 8);
|
||||
ftab[j]++;
|
||||
quadrant[i-1] = 0;
|
||||
j = (j >> 8) |(((uint16_t)block[i-1]) << 8);
|
||||
j = (j >> 8) | (((uint16_t)block[i-1]) << 8);
|
||||
ftab[j]++;
|
||||
quadrant[i-2] = 0;
|
||||
j = (j >> 8) |(((uint16_t)block[i-2]) << 8);
|
||||
j = (j >> 8) | (((uint16_t)block[i-2]) << 8);
|
||||
ftab[j]++;
|
||||
quadrant[i-3] = 0;
|
||||
j = (j >> 8) |(((uint16_t)block[i-3]) << 8);
|
||||
j = (j >> 8) | (((uint16_t)block[i-3]) << 8);
|
||||
ftab[j]++;
|
||||
}
|
||||
#endif
|
||||
for (; i >= 0; i--) {
|
||||
quadrant[i] = 0;
|
||||
j = (j >> 8) |(((uint16_t)block[i]) << 8);
|
||||
j = (j >> 8) | (((uint16_t)block[i]) << 8);
|
||||
ftab[j]++;
|
||||
}
|
||||
|
||||
@ -768,34 +773,37 @@ void mainSort(uint32_t* ptr,
|
||||
}
|
||||
|
||||
/*-- Complete the initial radix sort --*/
|
||||
for (i = 1; i <= 65536; i++)
|
||||
ftab[i] += ftab[i-1];
|
||||
j = ftab[0]; /* bbox: optimized */
|
||||
for (i = 1; i <= 65536; i++) {
|
||||
j += ftab[i];
|
||||
ftab[i] = j;
|
||||
}
|
||||
|
||||
s = block[0] << 8;
|
||||
i = nblock-1;
|
||||
i = nblock - 1;
|
||||
#if CONFIG_BZIP2_FEATURE_SPEED >= 2
|
||||
for (; i >= 3; i -= 4) {
|
||||
s = (s >> 8) | (block[i] << 8);
|
||||
j = ftab[s] -1;
|
||||
j = ftab[s] - 1;
|
||||
ftab[s] = j;
|
||||
ptr[j] = i;
|
||||
s = (s >> 8) | (block[i-1] << 8);
|
||||
j = ftab[s] -1;
|
||||
j = ftab[s] - 1;
|
||||
ftab[s] = j;
|
||||
ptr[j] = i-1;
|
||||
s = (s >> 8) | (block[i-2] << 8);
|
||||
j = ftab[s] -1;
|
||||
j = ftab[s] - 1;
|
||||
ftab[s] = j;
|
||||
ptr[j] = i-2;
|
||||
s = (s >> 8) | (block[i-3] << 8);
|
||||
j = ftab[s] -1;
|
||||
j = ftab[s] - 1;
|
||||
ftab[s] = j;
|
||||
ptr[j] = i-3;
|
||||
}
|
||||
#endif
|
||||
for (; i >= 0; i--) {
|
||||
s = (s >> 8) | (block[i] << 8);
|
||||
j = ftab[s] -1;
|
||||
j = ftab[s] - 1;
|
||||
ftab[s] = j;
|
||||
ptr[j] = i;
|
||||
}
|
||||
@ -812,21 +820,23 @@ void mainSort(uint32_t* ptr,
|
||||
|
||||
{
|
||||
int32_t vv;
|
||||
/* was: int32_t h = 1; */
|
||||
/* bbox: was: int32_t h = 1; */
|
||||
/* do h = 3 * h + 1; while (h <= 256); */
|
||||
int32_t h = 364;
|
||||
uint32_t h = 364;
|
||||
|
||||
do {
|
||||
h = h / 3;
|
||||
/*h = h / 3;*/
|
||||
h = (h * 171) >> 9; /* bbox: fast h/3 */
|
||||
for (i = h; i <= 255; i++) {
|
||||
vv = runningOrder[i];
|
||||
j = i;
|
||||
while (BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv)) {
|
||||
runningOrder[j] = runningOrder[j-h];
|
||||
j = j - h;
|
||||
if (j <= (h - 1)) goto zero;
|
||||
if (j <= (h - 1))
|
||||
goto zero;
|
||||
}
|
||||
zero:
|
||||
zero:
|
||||
runningOrder[j] = vv;
|
||||
}
|
||||
} while (h != 1);
|
||||
@ -860,10 +870,10 @@ void mainSort(uint32_t* ptr,
|
||||
if (j != ss) {
|
||||
sb = (ss << 8) + j;
|
||||
if (!(ftab[sb] & SETMASK)) {
|
||||
int32_t lo = ftab[sb] & CLEARMASK;
|
||||
int32_t lo = ftab[sb] & CLEARMASK;
|
||||
int32_t hi = (ftab[sb+1] & CLEARMASK) - 1;
|
||||
if (hi > lo) {
|
||||
mainQSort3 (
|
||||
mainQSort3(
|
||||
ptr, block, quadrant, nblock,
|
||||
lo, hi, BZ_N_RADIX, budget
|
||||
);
|
||||
@ -966,15 +976,14 @@ void mainSort(uint32_t* ptr,
|
||||
while ((bbSize >> shifts) > 65534) shifts++;
|
||||
|
||||
for (j = bbSize-1; j >= 0; j--) {
|
||||
int32_t a2update = ptr[bbStart + j];
|
||||
uint16_t qVal = (uint16_t)(j >> shifts);
|
||||
int32_t a2update = ptr[bbStart + j];
|
||||
uint16_t qVal = (uint16_t)(j >> shifts);
|
||||
quadrant[a2update] = qVal;
|
||||
if (a2update < BZ_N_OVERSHOOT)
|
||||
quadrant[a2update + nblock] = qVal;
|
||||
}
|
||||
AssertH(((bbSize-1) >> shifts) <= 65535, 1002);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@ -1041,7 +1050,8 @@ void BZ2_blockSort(EState* s)
|
||||
s->origPtr = -1;
|
||||
for (i = 0; i < s->nblock; i++)
|
||||
if (ptr[i] == 0) {
|
||||
s->origPtr = i; break;
|
||||
s->origPtr = i;
|
||||
break;
|
||||
};
|
||||
|
||||
AssertH(s->origPtr != -1, 1003);
|
||||
|
@ -186,7 +186,8 @@ void generateMTFValues(EState* s)
|
||||
s->mtfFreq[BZ_RUNA]++;
|
||||
}
|
||||
if (zPend < 2) break;
|
||||
zPend = (zPend - 2) / 2;
|
||||
zPend = (uint32_t)(zPend - 2) / 2;
|
||||
/* bbox: unsigned div is easier */
|
||||
};
|
||||
zPend = 0;
|
||||
}
|
||||
@ -219,15 +220,18 @@ void generateMTFValues(EState* s)
|
||||
zPend--;
|
||||
while (1) {
|
||||
if (zPend & 1) {
|
||||
mtfv[wr] = BZ_RUNB; wr++;
|
||||
mtfv[wr] = BZ_RUNB;
|
||||
wr++;
|
||||
s->mtfFreq[BZ_RUNB]++;
|
||||
} else {
|
||||
mtfv[wr] = BZ_RUNA; wr++;
|
||||
mtfv[wr] = BZ_RUNA;
|
||||
wr++;
|
||||
s->mtfFreq[BZ_RUNA]++;
|
||||
}
|
||||
if (zPend < 2)
|
||||
break;
|
||||
zPend = (zPend - 2) / 2;
|
||||
zPend = (uint32_t)(zPend - 2) / 2;
|
||||
/* bbox: unsigned div is easier */
|
||||
};
|
||||
zPend = 0;
|
||||
}
|
||||
@ -288,7 +292,7 @@ void sendMTFValues(EState* s)
|
||||
gs = 0;
|
||||
while (nPart > 0) {
|
||||
tFreq = remF / nPart;
|
||||
ge = gs-1;
|
||||
ge = gs - 1;
|
||||
aFreq = 0;
|
||||
while (aFreq < tFreq && ge < alphaSize-1) {
|
||||
ge++;
|
||||
@ -297,7 +301,7 @@ void sendMTFValues(EState* s)
|
||||
|
||||
if (ge > gs
|
||||
&& nPart != nGroups && nPart != 1
|
||||
&& ((nGroups - nPart) % 2 == 1)
|
||||
&& ((nGroups - nPart) % 2 == 1) /* bbox: can this be replaced by x & 1? */
|
||||
) {
|
||||
aFreq -= s->mtfFreq[ge];
|
||||
ge--;
|
||||
@ -310,7 +314,7 @@ void sendMTFValues(EState* s)
|
||||
s->len[nPart-1][v] = BZ_GREATER_ICOST;
|
||||
|
||||
nPart--;
|
||||
gs = ge+1;
|
||||
gs = ge + 1;
|
||||
remF -= aFreq;
|
||||
}
|
||||
}
|
||||
@ -414,7 +418,7 @@ void sendMTFValues(EState* s)
|
||||
/*
|
||||
* Increment the symbol frequencies for the selected table.
|
||||
*/
|
||||
/* 1% faster compress. +800 bytes */
|
||||
/* 1% faster compress. +800 bytes */
|
||||
#if CONFIG_BZIP2_FEATURE_SPEED >= 4
|
||||
if (nGroups == 6 && 50 == ge-gs+1) {
|
||||
/*--- fast track the common case ---*/
|
||||
|
@ -183,6 +183,8 @@ void BZ2_hbMakeCodeLengths(uint8_t *len,
|
||||
|
||||
for (i = 1; i <= alphaSize; i++) {
|
||||
j = weight[i] >> 8;
|
||||
/* bbox: yes, it is a signed division.
|
||||
* don't replace with shift! */
|
||||
j = 1 + (j / 2);
|
||||
weight[i] = j << 8;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user