decompress_bunzip2: code shrink ~10 bytes
Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
This commit is contained in:
@@ -151,8 +151,8 @@ static unsigned get_bits(bunzip_data *bd, int bits_wanted)
|
|||||||
static int get_next_block(bunzip_data *bd)
|
static int get_next_block(bunzip_data *bd)
|
||||||
{
|
{
|
||||||
struct group_data *hufGroup;
|
struct group_data *hufGroup;
|
||||||
int dbufCount, nextSym, dbufSize, groupCount, *base, *limit, selector,
|
int dbufCount, dbufSize, groupCount, *base, *limit, selector,
|
||||||
i, j, k, t, runPos, symCount, symTotal, nSelectors, byteCount[256];
|
i, j, t, runPos, symCount, symTotal, nSelectors, byteCount[256];
|
||||||
uint8_t uc, symToByte[256], mtfSymbol[256], *selectors;
|
uint8_t uc, symToByte[256], mtfSymbol[256], *selectors;
|
||||||
uint32_t *dbuf;
|
uint32_t *dbuf;
|
||||||
unsigned origPtr;
|
unsigned origPtr;
|
||||||
@@ -161,9 +161,12 @@ static int get_next_block(bunzip_data *bd)
|
|||||||
dbufSize = bd->dbufSize;
|
dbufSize = bd->dbufSize;
|
||||||
selectors = bd->selectors;
|
selectors = bd->selectors;
|
||||||
|
|
||||||
|
/* In bbox, we are ok with aborting through setjmp which is set up in start_bunzip */
|
||||||
|
#if 0
|
||||||
/* Reset longjmp I/O error handling */
|
/* Reset longjmp I/O error handling */
|
||||||
i = setjmp(bd->jmpbuf);
|
i = setjmp(bd->jmpbuf);
|
||||||
if (i) return i;
|
if (i) return i;
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Read in header signature and CRC, then validate signature.
|
/* Read in header signature and CRC, then validate signature.
|
||||||
(last block signature means CRC is for whole file, return now) */
|
(last block signature means CRC is for whole file, return now) */
|
||||||
@@ -185,16 +188,23 @@ static int get_next_block(bunzip_data *bd)
|
|||||||
symbols to deal with, and writes a sparse bitfield indicating which
|
symbols to deal with, and writes a sparse bitfield indicating which
|
||||||
values were present. We make a translation table to convert the symbols
|
values were present. We make a translation table to convert the symbols
|
||||||
back to the corresponding bytes. */
|
back to the corresponding bytes. */
|
||||||
t = get_bits(bd, 16);
|
|
||||||
symTotal = 0;
|
symTotal = 0;
|
||||||
for (i = 0; i < 16; i++) {
|
i = 0;
|
||||||
if (t & (1 << (15-i))) {
|
t = get_bits(bd, 16);
|
||||||
k = get_bits(bd, 16);
|
do {
|
||||||
for (j = 0; j < 16; j++)
|
if (t & (1 << 15)) {
|
||||||
if (k & (1 << (15-j)))
|
unsigned inner_map = get_bits(bd, 16);
|
||||||
symToByte[symTotal++] = (16*i) + j;
|
do {
|
||||||
|
if (inner_map & (1 << 15))
|
||||||
|
symToByte[symTotal++] = i;
|
||||||
|
inner_map <<= 1;
|
||||||
|
i++;
|
||||||
|
} while (i & 15);
|
||||||
|
i -= 16;
|
||||||
}
|
}
|
||||||
}
|
t <<= 1;
|
||||||
|
i += 16;
|
||||||
|
} while (i < 256);
|
||||||
|
|
||||||
/* How many different Huffman coding groups does this block use? */
|
/* How many different Huffman coding groups does this block use? */
|
||||||
groupCount = get_bits(bd, 3);
|
groupCount = get_bits(bd, 3);
|
||||||
@@ -205,20 +215,24 @@ static int get_next_block(bunzip_data *bd)
|
|||||||
group. Read in the group selector list, which is stored as MTF encoded
|
group. Read in the group selector list, which is stored as MTF encoded
|
||||||
bit runs. (MTF=Move To Front, as each value is used it's moved to the
|
bit runs. (MTF=Move To Front, as each value is used it's moved to the
|
||||||
start of the list.) */
|
start of the list.) */
|
||||||
|
for (i = 0; i < groupCount; i++)
|
||||||
|
mtfSymbol[i] = i;
|
||||||
nSelectors = get_bits(bd, 15);
|
nSelectors = get_bits(bd, 15);
|
||||||
if (!nSelectors) return RETVAL_DATA_ERROR;
|
if (!nSelectors)
|
||||||
for (i = 0; i < groupCount; i++) mtfSymbol[i] = i;
|
return RETVAL_DATA_ERROR;
|
||||||
for (i = 0; i < nSelectors; i++) {
|
for (i = 0; i < nSelectors; i++) {
|
||||||
|
uint8_t tmp_byte;
|
||||||
/* Get next value */
|
/* Get next value */
|
||||||
for (j = 0; get_bits(bd, 1); j++)
|
int n = 0;
|
||||||
if (j >= groupCount) return RETVAL_DATA_ERROR;
|
while (get_bits(bd, 1)) {
|
||||||
|
if (n >= groupCount) return RETVAL_DATA_ERROR;
|
||||||
|
n++;
|
||||||
|
}
|
||||||
/* Decode MTF to get the next selector */
|
/* Decode MTF to get the next selector */
|
||||||
uc = mtfSymbol[j];
|
tmp_byte = mtfSymbol[n];
|
||||||
for (; j; j--)
|
while (--n >= 0)
|
||||||
mtfSymbol[j] = mtfSymbol[j-1];
|
mtfSymbol[n + 1] = mtfSymbol[n];
|
||||||
mtfSymbol[0] = selectors[i] = uc;
|
mtfSymbol[0] = selectors[i] = tmp_byte;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Read the Huffman coding tables for each group, which code for symTotal
|
/* Read the Huffman coding tables for each group, which code for symTotal
|
||||||
@@ -239,20 +253,21 @@ static int get_next_block(bunzip_data *bd)
|
|||||||
t = get_bits(bd, 5) - 1;
|
t = get_bits(bd, 5) - 1;
|
||||||
for (i = 0; i < symCount; i++) {
|
for (i = 0; i < symCount; i++) {
|
||||||
for (;;) {
|
for (;;) {
|
||||||
|
int two_bits;
|
||||||
if ((unsigned)t > (MAX_HUFCODE_BITS-1))
|
if ((unsigned)t > (MAX_HUFCODE_BITS-1))
|
||||||
return RETVAL_DATA_ERROR;
|
return RETVAL_DATA_ERROR;
|
||||||
|
|
||||||
/* If first bit is 0, stop. Else second bit indicates whether
|
/* If first bit is 0, stop. Else second bit indicates whether
|
||||||
to increment or decrement the value. Optimization: grab 2
|
to increment or decrement the value. Optimization: grab 2
|
||||||
bits and unget the second if the first was 0. */
|
bits and unget the second if the first was 0. */
|
||||||
k = get_bits(bd, 2);
|
two_bits = get_bits(bd, 2);
|
||||||
if (k < 2) {
|
if (two_bits < 2) {
|
||||||
bd->inbufBitCount++;
|
bd->inbufBitCount++;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Add one if second bit 1, else subtract 1. Avoids if/else */
|
/* Add one if second bit 1, else subtract 1. Avoids if/else */
|
||||||
t += (((k+1) & 2) - 1);
|
t += (((two_bits+1) & 2) - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Correct for the initial -1, to get the final symbol length */
|
/* Correct for the initial -1, to get the final symbol length */
|
||||||
@@ -282,17 +297,18 @@ static int get_next_block(bunzip_data *bd)
|
|||||||
|
|
||||||
/* Note that minLen can't be smaller than 1, so we adjust the base
|
/* Note that minLen can't be smaller than 1, so we adjust the base
|
||||||
and limit array pointers so we're not always wasting the first
|
and limit array pointers so we're not always wasting the first
|
||||||
entry. We do this again when using them (during symbol decoding).*/
|
entry. We do this again when using them (during symbol decoding). */
|
||||||
base = hufGroup->base - 1;
|
base = hufGroup->base - 1;
|
||||||
limit = hufGroup->limit - 1;
|
limit = hufGroup->limit - 1;
|
||||||
|
|
||||||
/* Calculate permute[]. Concurently, initialize temp[] and limit[]. */
|
/* Calculate permute[]. Concurently, initialize temp[] and limit[]. */
|
||||||
pp = 0;
|
pp = 0;
|
||||||
for (i = minLen; i <= maxLen; i++) {
|
for (i = minLen; i <= maxLen; i++) {
|
||||||
|
int k;
|
||||||
temp[i] = limit[i] = 0;
|
temp[i] = limit[i] = 0;
|
||||||
for (t = 0; t < symCount; t++)
|
for (k = 0; k < symCount; k++)
|
||||||
if (length[t] == i)
|
if (length[k] == i)
|
||||||
hufGroup->permute[pp++] = t;
|
hufGroup->permute[pp++] = k;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Count symbols coded for at each bit length */
|
/* Count symbols coded for at each bit length */
|
||||||
@@ -305,8 +321,10 @@ static int get_next_block(bunzip_data *bd)
|
|||||||
* base[] (number of symbols to ignore at each bit length, which is
|
* base[] (number of symbols to ignore at each bit length, which is
|
||||||
* limit minus the cumulative count of symbols coded for already). */
|
* limit minus the cumulative count of symbols coded for already). */
|
||||||
pp = t = 0;
|
pp = t = 0;
|
||||||
for (i = minLen; i < maxLen; i++) {
|
for (i = minLen; i < maxLen;) {
|
||||||
pp += temp[i];
|
unsigned temp_i = temp[i];
|
||||||
|
|
||||||
|
pp += temp_i;
|
||||||
|
|
||||||
/* We read the largest possible symbol size and then unget bits
|
/* We read the largest possible symbol size and then unget bits
|
||||||
after determining how many we need, and those extra bits could
|
after determining how many we need, and those extra bits could
|
||||||
@@ -316,8 +334,8 @@ static int get_next_block(bunzip_data *bd)
|
|||||||
don't affect the value>limit[length] comparison. */
|
don't affect the value>limit[length] comparison. */
|
||||||
limit[i] = (pp << (maxLen - i)) - 1;
|
limit[i] = (pp << (maxLen - i)) - 1;
|
||||||
pp <<= 1;
|
pp <<= 1;
|
||||||
t += temp[i];
|
t += temp_i;
|
||||||
base[i+1] = pp - t;
|
base[++i] = pp - t;
|
||||||
}
|
}
|
||||||
limit[maxLen+1] = INT_MAX; /* Sentinel value for reading next sym. */
|
limit[maxLen+1] = INT_MAX; /* Sentinel value for reading next sym. */
|
||||||
limit[maxLen] = pp + temp[maxLen] - 1;
|
limit[maxLen] = pp + temp[maxLen] - 1;
|
||||||
@@ -329,9 +347,9 @@ static int get_next_block(bunzip_data *bd)
|
|||||||
and run length encoding, saving the result into dbuf[dbufCount++] = uc */
|
and run length encoding, saving the result into dbuf[dbufCount++] = uc */
|
||||||
|
|
||||||
/* Initialize symbol occurrence counters and symbol Move To Front table */
|
/* Initialize symbol occurrence counters and symbol Move To Front table */
|
||||||
memset(byteCount, 0, sizeof(byteCount)); /* smaller, maybe slower? */
|
/*memset(byteCount, 0, sizeof(byteCount)); - smaller, but slower */
|
||||||
for (i = 0; i < 256; i++) {
|
for (i = 0; i < 256; i++) {
|
||||||
//byteCount[i] = 0;
|
byteCount[i] = 0;
|
||||||
mtfSymbol[i] = (uint8_t)i;
|
mtfSymbol[i] = (uint8_t)i;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -339,6 +357,7 @@ static int get_next_block(bunzip_data *bd)
|
|||||||
|
|
||||||
runPos = dbufCount = selector = 0;
|
runPos = dbufCount = selector = 0;
|
||||||
for (;;) {
|
for (;;) {
|
||||||
|
int nextSym;
|
||||||
|
|
||||||
/* Fetch next Huffman coding group from list. */
|
/* Fetch next Huffman coding group from list. */
|
||||||
symCount = GROUP_SIZE - 1;
|
symCount = GROUP_SIZE - 1;
|
||||||
@@ -346,44 +365,49 @@ static int get_next_block(bunzip_data *bd)
|
|||||||
hufGroup = bd->groups + selectors[selector++];
|
hufGroup = bd->groups + selectors[selector++];
|
||||||
base = hufGroup->base - 1;
|
base = hufGroup->base - 1;
|
||||||
limit = hufGroup->limit - 1;
|
limit = hufGroup->limit - 1;
|
||||||
continue_this_group:
|
|
||||||
|
|
||||||
|
continue_this_group:
|
||||||
/* Read next Huffman-coded symbol. */
|
/* Read next Huffman-coded symbol. */
|
||||||
|
|
||||||
/* Note: It is far cheaper to read maxLen bits and back up than it is
|
/* Note: It is far cheaper to read maxLen bits and back up than it is
|
||||||
to read minLen bits and then an additional bit at a time, testing
|
to read minLen bits and then add additional bit at a time, testing
|
||||||
as we go. Because there is a trailing last block (with file CRC),
|
as we go. Because there is a trailing last block (with file CRC),
|
||||||
there is no danger of the overread causing an unexpected EOF for a
|
there is no danger of the overread causing an unexpected EOF for a
|
||||||
valid compressed file. As a further optimization, we do the read
|
valid compressed file.
|
||||||
inline (falling back to a call to get_bits if the buffer runs
|
|
||||||
dry). The following (up to got_huff_bits:) is equivalent to
|
|
||||||
j = get_bits(bd, hufGroup->maxLen);
|
|
||||||
*/
|
*/
|
||||||
while ((int)(bd->inbufBitCount) < hufGroup->maxLen) {
|
if (1) {
|
||||||
if (bd->inbufPos == bd->inbufCount) {
|
/* As a further optimization, we do the read inline
|
||||||
j = get_bits(bd, hufGroup->maxLen);
|
(falling back to a call to get_bits if the buffer runs dry).
|
||||||
goto got_huff_bits;
|
*/
|
||||||
}
|
int new_cnt;
|
||||||
bd->inbufBits = (bd->inbufBits << 8) | bd->inbuf[bd->inbufPos++];
|
while ((new_cnt = bd->inbufBitCount - hufGroup->maxLen) < 0) {
|
||||||
bd->inbufBitCount += 8;
|
/* bd->inbufBitCount < hufGroup->maxLen */
|
||||||
};
|
if (bd->inbufPos == bd->inbufCount) {
|
||||||
bd->inbufBitCount -= hufGroup->maxLen;
|
nextSym = get_bits(bd, hufGroup->maxLen);
|
||||||
j = (bd->inbufBits >> bd->inbufBitCount) & ((1 << hufGroup->maxLen) - 1);
|
goto got_huff_bits;
|
||||||
|
}
|
||||||
got_huff_bits:
|
bd->inbufBits = (bd->inbufBits << 8) | bd->inbuf[bd->inbufPos++];
|
||||||
|
bd->inbufBitCount += 8;
|
||||||
/* Figure how how many bits are in next symbol and unget extras */
|
};
|
||||||
|
bd->inbufBitCount = new_cnt; /* "bd->inbufBitCount -= hufGroup->maxLen;" */
|
||||||
|
nextSym = (bd->inbufBits >> new_cnt) & ((1 << hufGroup->maxLen) - 1);
|
||||||
|
got_huff_bits: ;
|
||||||
|
} else { /* unoptimized equivalent */
|
||||||
|
nextSym = get_bits(bd, hufGroup->maxLen);
|
||||||
|
}
|
||||||
|
/* Figure how many bits are in next symbol and unget extras */
|
||||||
i = hufGroup->minLen;
|
i = hufGroup->minLen;
|
||||||
while (j > limit[i]) ++i;
|
while (nextSym > limit[i]) ++i;
|
||||||
bd->inbufBitCount += (hufGroup->maxLen - i);
|
j = hufGroup->maxLen - i;
|
||||||
|
if (j < 0)
|
||||||
|
return RETVAL_DATA_ERROR;
|
||||||
|
bd->inbufBitCount += j;
|
||||||
|
|
||||||
/* Huffman decode value to get nextSym (with bounds checking) */
|
/* Huffman decode value to get nextSym (with bounds checking) */
|
||||||
if (i > hufGroup->maxLen)
|
nextSym = (nextSym >> j) - base[i];
|
||||||
|
if ((unsigned)nextSym >= MAX_SYMBOLS)
|
||||||
return RETVAL_DATA_ERROR;
|
return RETVAL_DATA_ERROR;
|
||||||
j = (j >> (hufGroup->maxLen - i)) - base[i];
|
nextSym = hufGroup->permute[nextSym];
|
||||||
if ((unsigned)j >= MAX_SYMBOLS)
|
|
||||||
return RETVAL_DATA_ERROR;
|
|
||||||
nextSym = hufGroup->permute[j];
|
|
||||||
|
|
||||||
/* We have now decoded the symbol, which indicates either a new literal
|
/* We have now decoded the symbol, which indicates either a new literal
|
||||||
byte, or a repeated run of the most recent literal byte. First,
|
byte, or a repeated run of the most recent literal byte. First,
|
||||||
@@ -392,7 +416,7 @@ static int get_next_block(bunzip_data *bd)
|
|||||||
if ((unsigned)nextSym <= SYMBOL_RUNB) { /* RUNA or RUNB */
|
if ((unsigned)nextSym <= SYMBOL_RUNB) { /* RUNA or RUNB */
|
||||||
|
|
||||||
/* If this is the start of a new run, zero out counter */
|
/* If this is the start of a new run, zero out counter */
|
||||||
if (!runPos) {
|
if (runPos == 0) {
|
||||||
runPos = 1;
|
runPos = 1;
|
||||||
t = 0;
|
t = 0;
|
||||||
}
|
}
|
||||||
@@ -413,13 +437,13 @@ static int get_next_block(bunzip_data *bd)
|
|||||||
how many times to repeat the last literal, so append that many
|
how many times to repeat the last literal, so append that many
|
||||||
copies to our buffer of decoded symbols (dbuf) now. (The last
|
copies to our buffer of decoded symbols (dbuf) now. (The last
|
||||||
literal used is the one at the head of the mtfSymbol array.) */
|
literal used is the one at the head of the mtfSymbol array.) */
|
||||||
if (runPos) {
|
if (runPos != 0) {
|
||||||
runPos = 0;
|
uint8_t tmp_byte;
|
||||||
if (dbufCount + t >= dbufSize) return RETVAL_DATA_ERROR;
|
if (dbufCount + t >= dbufSize) return RETVAL_DATA_ERROR;
|
||||||
|
tmp_byte = symToByte[mtfSymbol[0]];
|
||||||
uc = symToByte[mtfSymbol[0]];
|
byteCount[tmp_byte] += t;
|
||||||
byteCount[uc] += t;
|
while (--t >= 0) dbuf[dbufCount++] = tmp_byte;
|
||||||
while (t--) dbuf[dbufCount++] = uc;
|
runPos = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Is this the terminating symbol? */
|
/* Is this the terminating symbol? */
|
||||||
@@ -448,12 +472,12 @@ static int get_next_block(bunzip_data *bd)
|
|||||||
|
|
||||||
/* We have our literal byte. Save it into dbuf. */
|
/* We have our literal byte. Save it into dbuf. */
|
||||||
byteCount[uc]++;
|
byteCount[uc]++;
|
||||||
dbuf[dbufCount++] = (unsigned)uc;
|
dbuf[dbufCount++] = (uint32_t)uc;
|
||||||
|
|
||||||
/* Skip group initialization if we're not done with this group. Done
|
/* Skip group initialization if we're not done with this group. Done
|
||||||
* this way to avoid compiler warning. */
|
* this way to avoid compiler warning. */
|
||||||
end_of_huffman_loop:
|
end_of_huffman_loop:
|
||||||
if (symCount--) goto continue_this_group;
|
if (--symCount >= 0) goto continue_this_group;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* At this point, we've read all the Huffman-coded symbols (and repeated
|
/* At this point, we've read all the Huffman-coded symbols (and repeated
|
||||||
@@ -466,16 +490,17 @@ static int get_next_block(bunzip_data *bd)
|
|||||||
/* Turn byteCount into cumulative occurrence counts of 0 to n-1. */
|
/* Turn byteCount into cumulative occurrence counts of 0 to n-1. */
|
||||||
j = 0;
|
j = 0;
|
||||||
for (i = 0; i < 256; i++) {
|
for (i = 0; i < 256; i++) {
|
||||||
k = j + byteCount[i];
|
int tmp_count = j + byteCount[i];
|
||||||
byteCount[i] = j;
|
byteCount[i] = j;
|
||||||
j = k;
|
j = tmp_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Figure out what order dbuf would be in if we sorted it. */
|
/* Figure out what order dbuf would be in if we sorted it. */
|
||||||
for (i = 0; i < dbufCount; i++) {
|
for (i = 0; i < dbufCount; i++) {
|
||||||
uc = (uint8_t)dbuf[i];
|
uint8_t tmp_byte = (uint8_t)dbuf[i];
|
||||||
dbuf[byteCount[uc]] |= (i << 8);
|
int tmp_count = byteCount[tmp_byte];
|
||||||
byteCount[uc]++;
|
dbuf[tmp_count] |= (i << 8);
|
||||||
|
byteCount[tmp_byte] = tmp_count + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Decode first byte by hand to initialize "previous" byte. Note that it
|
/* Decode first byte by hand to initialize "previous" byte. Note that it
|
||||||
|
Reference in New Issue
Block a user