busybox/archival/libarchive/decompress_uncompress.c

313 lines
7.9 KiB
C
Raw Normal View History

/* vi: set sw=4 ts=4: */
/*
* uncompress for busybox -- (c) 2002 Robert Griebl
*
* based on the original compress42.c source
* (see disclaimer below)
*/
/* (N)compress42.c - File compression ala IEEE Computer, Mar 1992.
*
* Authors:
* Spencer W. Thomas (decvax!harpo!utah-cs!utah-gr!thomas)
* Jim McKie (decvax!mcvax!jim)
* Steve Davies (decvax!vax135!petsd!peora!srd)
* Ken Turkowski (decvax!decwrl!turtlevax!ken)
* James A. Woods (decvax!ihnp4!ames!jaw)
* Joe Orost (decvax!vax135!petsd!joe)
* Dave Mack (csu@alembic.acs.com)
* Peter Jannesen, Network Communication Systems
* (peter@ncs.nl)
*
* marc@suse.de : a small security fix for a buffer overflow
*
* [... History snipped ...]
*/
#include "libbb.h"
#include "bb_archive.h"
/* Default input buffer size */
#define IBUFSIZ 2048
2002-11-28 09:09:47 +00:00
/* Default output buffer size */
#define OBUFSIZ 2048
2002-11-28 09:09:47 +00:00
/* Defines for third byte of header */
#define BIT_MASK 0x1f /* Mask for 'number of compresssion bits' */
/* Masks 0x20 and 0x40 are free. */
/* I think 0x20 should mean that there is */
/* a fourth header byte (for expansion). */
#define BLOCK_MODE 0x80 /* Block compression if table is full and */
/* compression rate is dropping flush tables */
/* the next two codes should not be changed lightly, as they must not */
/* lie within the contiguous general code space. */
#define FIRST 257 /* first free entry */
#define CLEAR 256 /* table clear output code */
2002-11-28 09:09:47 +00:00
#define INIT_BITS 9 /* initial number of bits/code */
/* machine variants which require cc -Dmachine: pdp11, z8000, DOS */
#define HBITS 17 /* 50% occupancy */
#define HSIZE (1<<HBITS)
#define HMASK (HSIZE-1) /* unused */
#define HPRIME 9941 /* unused */
#define BITS 16
#define BITS_STR "16"
#undef MAXSEG_64K /* unused */
#define MAXCODE(n) (1L << (n))
#define htabof(i) htab[i]
#define codetabof(i) codetab[i]
#define tab_prefixof(i) codetabof(i)
#define tab_suffixof(i) ((unsigned char *)(htab))[i]
#define de_stack ((unsigned char *)&(htab[HSIZE-1]))
#define clear_tab_prefixof() memset(codetab, 0, 256)
/*
* Decompress stdin to stdout. This routine adapts to the codes in the
* file building the "string" table on-the-fly; requiring no table to
* be stored in the compressed file.
*/
IF_DESKTOP(long long) int FAST_FUNC
unpack_Z_stream(transformer_state_t *xstate)
{
IF_DESKTOP(long long total_written = 0;)
IF_DESKTOP(long long) int retval = -1;
2002-11-28 09:09:47 +00:00
unsigned char *stackp;
int finchar;
2007-04-13 23:22:00 +00:00
long oldcode;
long incode;
2002-11-28 09:09:47 +00:00
int inbits;
int posbits;
int outpos;
int insize;
int bitmask;
2007-04-13 23:22:00 +00:00
long free_ent;
long maxcode;
long maxmaxcode;
2002-11-28 09:09:47 +00:00
int n_bits;
int rsize = 0;
unsigned char *inbuf; /* were eating insane amounts of stack - */
unsigned char *outbuf; /* bad for some embedded targets */
unsigned char *htab;
unsigned short *codetab;
/* Hmm, these were statics - why?! */
/* user settable max # bits/code */
int maxbits; /* = BITS; */
/* block compress mode -C compatible with 2.0 */
int block_mode; /* = BLOCK_MODE; */
if (check_signature16(xstate, COMPRESS_MAGIC))
return -1;
inbuf = xzalloc(IBUFSIZ + 64);
outbuf = xzalloc(OBUFSIZ + 2048);
htab = xzalloc(HSIZE); /* wasn't zeroed out before, maybe can xmalloc? */
codetab = xzalloc(HSIZE * sizeof(codetab[0]));
insize = 0;
/* xread isn't good here, we have to return - caller may want
* to do some cleanup (e.g. delete incomplete unpacked file etc) */
if (full_read(xstate->src_fd, inbuf, 1) != 1) {
libbb: reduce the overhead of single parameter bb_error_msg() calls Back in 2007, commit 0c97c9d43707 ("'simple' error message functions by Loic Grenie") introduced bb_simple_perror_msg() to allow for a lower overhead call to bb_perror_msg() when only a string was being printed with no parameters. This saves space for some CPU architectures because it avoids the overhead of a call to a variadic function. However there has never been a simple version of bb_error_msg(), and since 2007 many new calls to bb_perror_msg() have been added that only take a single parameter and so could have been using bb_simple_perror_message(). This changeset introduces 'simple' versions of bb_info_msg(), bb_error_msg(), bb_error_msg_and_die(), bb_herror_msg() and bb_herror_msg_and_die(), and replaces all calls that only take a single parameter, or use something like ("%s", arg), with calls to the corresponding 'simple' version. Since it is likely that single parameter calls to the variadic functions may be accidentally reintroduced in the future a new debugging config option WARN_SIMPLE_MSG has been introduced. This uses some macro magic which will cause any such calls to generate a warning, but this is turned off by default to avoid use of the unpleasant macros in normal circumstances. This is a large changeset due to the number of calls that have been replaced. The only files that contain changes other than simple substitution of function calls are libbb.h, libbb/herror_msg.c, libbb/verror_msg.c and libbb/xfuncs_printf.c. In miscutils/devfsd.c, networking/udhcp/common.h and util-linux/mdev.c additonal macros have been added for logging so that single parameter and multiple parameter logging variants exist. The amount of space saved varies considerably by architecture, and was found to be as follows (for 'defconfig' using GCC 7.4): Arm: -92 bytes MIPS: -52 bytes PPC: -1836 bytes x86_64: -938 bytes Note that for the MIPS architecture only an exception had to be made disabling the 'simple' calls for 'udhcp' (in networking/udhcp/common.h) because it made these files larger on MIPS. Signed-off-by: James Byrne <james.byrne@origamienergy.com> Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
2019-07-02 11:35:03 +02:00
bb_simple_error_msg("short read");
goto err;
}
maxbits = inbuf[0] & BIT_MASK;
block_mode = inbuf[0] & BLOCK_MODE;
maxmaxcode = MAXCODE(maxbits);
2002-11-28 09:09:47 +00:00
if (maxbits > BITS) {
bb_error_msg("compressed with %d bits, can only handle "
BITS_STR" bits", maxbits);
goto err;
}
n_bits = INIT_BITS;
maxcode = MAXCODE(INIT_BITS) - 1;
bitmask = (1 << INIT_BITS) - 1;
oldcode = -1;
finchar = 0;
outpos = 0;
2002-11-28 09:09:47 +00:00
posbits = 0 << 3;
2002-11-28 09:09:47 +00:00
free_ent = ((block_mode) ? FIRST : 256);
2002-11-28 09:09:47 +00:00
/* As above, initialize the first 256 entries in the table. */
/*clear_tab_prefixof(); - done by xzalloc */
{
int i;
for (i = 255; i >= 0; --i)
tab_suffixof(i) = (unsigned char) i;
2002-11-28 09:09:47 +00:00
}
2002-11-28 09:09:47 +00:00
do {
resetbuf:
{
2002-11-28 09:09:47 +00:00
int i;
int e;
int o;
o = posbits >> 3;
e = insize - o;
2002-11-28 09:09:47 +00:00
for (i = 0; i < e; ++i)
inbuf[i] = inbuf[i + o];
insize = e;
posbits = 0;
}
if (insize < (int) (IBUFSIZ + 64) - IBUFSIZ) {
rsize = safe_read(xstate->src_fd, inbuf + insize, IBUFSIZ);
if (rsize < 0)
libbb: reduce the overhead of single parameter bb_error_msg() calls Back in 2007, commit 0c97c9d43707 ("'simple' error message functions by Loic Grenie") introduced bb_simple_perror_msg() to allow for a lower overhead call to bb_perror_msg() when only a string was being printed with no parameters. This saves space for some CPU architectures because it avoids the overhead of a call to a variadic function. However there has never been a simple version of bb_error_msg(), and since 2007 many new calls to bb_perror_msg() have been added that only take a single parameter and so could have been using bb_simple_perror_message(). This changeset introduces 'simple' versions of bb_info_msg(), bb_error_msg(), bb_error_msg_and_die(), bb_herror_msg() and bb_herror_msg_and_die(), and replaces all calls that only take a single parameter, or use something like ("%s", arg), with calls to the corresponding 'simple' version. Since it is likely that single parameter calls to the variadic functions may be accidentally reintroduced in the future a new debugging config option WARN_SIMPLE_MSG has been introduced. This uses some macro magic which will cause any such calls to generate a warning, but this is turned off by default to avoid use of the unpleasant macros in normal circumstances. This is a large changeset due to the number of calls that have been replaced. The only files that contain changes other than simple substitution of function calls are libbb.h, libbb/herror_msg.c, libbb/verror_msg.c and libbb/xfuncs_printf.c. In miscutils/devfsd.c, networking/udhcp/common.h and util-linux/mdev.c additonal macros have been added for logging so that single parameter and multiple parameter logging variants exist. The amount of space saved varies considerably by architecture, and was found to be as follows (for 'defconfig' using GCC 7.4): Arm: -92 bytes MIPS: -52 bytes PPC: -1836 bytes x86_64: -938 bytes Note that for the MIPS architecture only an exception had to be made disabling the 'simple' calls for 'udhcp' (in networking/udhcp/common.h) because it made these files larger on MIPS. Signed-off-by: James Byrne <james.byrne@origamienergy.com> Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
2019-07-02 11:35:03 +02:00
bb_simple_error_msg_and_die(bb_msg_read_error);
insize += rsize;
}
2002-11-28 09:09:47 +00:00
inbits = ((rsize > 0) ? (insize - insize % n_bits) << 3 :
(insize << 3) - (n_bits - 1));
2002-11-28 09:09:47 +00:00
while (inbits > posbits) {
long code;
2002-11-28 09:09:47 +00:00
if (free_ent > maxcode) {
posbits =
((posbits - 1) +
((n_bits << 3) -
(posbits - 1 + (n_bits << 3)) % (n_bits << 3)));
++n_bits;
2002-11-28 09:09:47 +00:00
if (n_bits == maxbits) {
maxcode = maxmaxcode;
2002-11-28 09:09:47 +00:00
} else {
maxcode = MAXCODE(n_bits) - 1;
}
bitmask = (1 << n_bits) - 1;
goto resetbuf;
}
2002-11-28 09:09:47 +00:00
{
unsigned char *p = &inbuf[posbits >> 3];
code = ((p[0]
| ((long) (p[1]) << 8)
| ((long) (p[2]) << 16)) >> (posbits & 0x7)) & bitmask;
}
posbits += n_bits;
2002-11-28 09:09:47 +00:00
if (oldcode == -1) {
if (code >= 256)
libbb: reduce the overhead of single parameter bb_error_msg() calls Back in 2007, commit 0c97c9d43707 ("'simple' error message functions by Loic Grenie") introduced bb_simple_perror_msg() to allow for a lower overhead call to bb_perror_msg() when only a string was being printed with no parameters. This saves space for some CPU architectures because it avoids the overhead of a call to a variadic function. However there has never been a simple version of bb_error_msg(), and since 2007 many new calls to bb_perror_msg() have been added that only take a single parameter and so could have been using bb_simple_perror_message(). This changeset introduces 'simple' versions of bb_info_msg(), bb_error_msg(), bb_error_msg_and_die(), bb_herror_msg() and bb_herror_msg_and_die(), and replaces all calls that only take a single parameter, or use something like ("%s", arg), with calls to the corresponding 'simple' version. Since it is likely that single parameter calls to the variadic functions may be accidentally reintroduced in the future a new debugging config option WARN_SIMPLE_MSG has been introduced. This uses some macro magic which will cause any such calls to generate a warning, but this is turned off by default to avoid use of the unpleasant macros in normal circumstances. This is a large changeset due to the number of calls that have been replaced. The only files that contain changes other than simple substitution of function calls are libbb.h, libbb/herror_msg.c, libbb/verror_msg.c and libbb/xfuncs_printf.c. In miscutils/devfsd.c, networking/udhcp/common.h and util-linux/mdev.c additonal macros have been added for logging so that single parameter and multiple parameter logging variants exist. The amount of space saved varies considerably by architecture, and was found to be as follows (for 'defconfig' using GCC 7.4): Arm: -92 bytes MIPS: -52 bytes PPC: -1836 bytes x86_64: -938 bytes Note that for the MIPS architecture only an exception had to be made disabling the 'simple' calls for 'udhcp' (in networking/udhcp/common.h) because it made these files larger on MIPS. Signed-off-by: James Byrne <james.byrne@origamienergy.com> Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
2019-07-02 11:35:03 +02:00
bb_simple_error_msg_and_die("corrupted data"); /* %ld", code); */
oldcode = code;
finchar = (int) oldcode;
outbuf[outpos++] = (unsigned char) finchar;
continue;
}
2002-11-28 09:09:47 +00:00
if (code == CLEAR && block_mode) {
clear_tab_prefixof();
2002-11-28 09:09:47 +00:00
free_ent = FIRST - 1;
posbits =
((posbits - 1) +
((n_bits << 3) -
(posbits - 1 + (n_bits << 3)) % (n_bits << 3)));
n_bits = INIT_BITS;
maxcode = MAXCODE(INIT_BITS) - 1;
bitmask = (1 << INIT_BITS) - 1;
goto resetbuf;
}
incode = code;
2002-11-28 09:09:47 +00:00
stackp = de_stack;
2002-11-28 09:09:47 +00:00
/* Special case for KwKwK string. */
if (code >= free_ent) {
if (code > free_ent) {
/*
2002-11-28 09:09:47 +00:00
unsigned char *p;
posbits -= n_bits;
2002-11-28 09:09:47 +00:00
p = &inbuf[posbits >> 3];
2003-03-19 09:13:01 +00:00
bb_error_msg
2002-11-28 09:09:47 +00:00
("insize:%d posbits:%d inbuf:%02X %02X %02X %02X %02X (%d)",
insize, posbits, p[-1], p[0], p[1], p[2], p[3],
(posbits & 07));
*/
libbb: reduce the overhead of single parameter bb_error_msg() calls Back in 2007, commit 0c97c9d43707 ("'simple' error message functions by Loic Grenie") introduced bb_simple_perror_msg() to allow for a lower overhead call to bb_perror_msg() when only a string was being printed with no parameters. This saves space for some CPU architectures because it avoids the overhead of a call to a variadic function. However there has never been a simple version of bb_error_msg(), and since 2007 many new calls to bb_perror_msg() have been added that only take a single parameter and so could have been using bb_simple_perror_message(). This changeset introduces 'simple' versions of bb_info_msg(), bb_error_msg(), bb_error_msg_and_die(), bb_herror_msg() and bb_herror_msg_and_die(), and replaces all calls that only take a single parameter, or use something like ("%s", arg), with calls to the corresponding 'simple' version. Since it is likely that single parameter calls to the variadic functions may be accidentally reintroduced in the future a new debugging config option WARN_SIMPLE_MSG has been introduced. This uses some macro magic which will cause any such calls to generate a warning, but this is turned off by default to avoid use of the unpleasant macros in normal circumstances. This is a large changeset due to the number of calls that have been replaced. The only files that contain changes other than simple substitution of function calls are libbb.h, libbb/herror_msg.c, libbb/verror_msg.c and libbb/xfuncs_printf.c. In miscutils/devfsd.c, networking/udhcp/common.h and util-linux/mdev.c additonal macros have been added for logging so that single parameter and multiple parameter logging variants exist. The amount of space saved varies considerably by architecture, and was found to be as follows (for 'defconfig' using GCC 7.4): Arm: -92 bytes MIPS: -52 bytes PPC: -1836 bytes x86_64: -938 bytes Note that for the MIPS architecture only an exception had to be made disabling the 'simple' calls for 'udhcp' (in networking/udhcp/common.h) because it made these files larger on MIPS. Signed-off-by: James Byrne <james.byrne@origamienergy.com> Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
2019-07-02 11:35:03 +02:00
bb_simple_error_msg("corrupted data");
goto err;
}
2002-11-28 09:09:47 +00:00
*--stackp = (unsigned char) finchar;
code = oldcode;
}
2002-11-28 09:09:47 +00:00
/* Generate output characters in reverse order */
while (code >= 256) {
if (stackp <= &htabof(0))
libbb: reduce the overhead of single parameter bb_error_msg() calls Back in 2007, commit 0c97c9d43707 ("'simple' error message functions by Loic Grenie") introduced bb_simple_perror_msg() to allow for a lower overhead call to bb_perror_msg() when only a string was being printed with no parameters. This saves space for some CPU architectures because it avoids the overhead of a call to a variadic function. However there has never been a simple version of bb_error_msg(), and since 2007 many new calls to bb_perror_msg() have been added that only take a single parameter and so could have been using bb_simple_perror_message(). This changeset introduces 'simple' versions of bb_info_msg(), bb_error_msg(), bb_error_msg_and_die(), bb_herror_msg() and bb_herror_msg_and_die(), and replaces all calls that only take a single parameter, or use something like ("%s", arg), with calls to the corresponding 'simple' version. Since it is likely that single parameter calls to the variadic functions may be accidentally reintroduced in the future a new debugging config option WARN_SIMPLE_MSG has been introduced. This uses some macro magic which will cause any such calls to generate a warning, but this is turned off by default to avoid use of the unpleasant macros in normal circumstances. This is a large changeset due to the number of calls that have been replaced. The only files that contain changes other than simple substitution of function calls are libbb.h, libbb/herror_msg.c, libbb/verror_msg.c and libbb/xfuncs_printf.c. In miscutils/devfsd.c, networking/udhcp/common.h and util-linux/mdev.c additonal macros have been added for logging so that single parameter and multiple parameter logging variants exist. The amount of space saved varies considerably by architecture, and was found to be as follows (for 'defconfig' using GCC 7.4): Arm: -92 bytes MIPS: -52 bytes PPC: -1836 bytes x86_64: -938 bytes Note that for the MIPS architecture only an exception had to be made disabling the 'simple' calls for 'udhcp' (in networking/udhcp/common.h) because it made these files larger on MIPS. Signed-off-by: James Byrne <james.byrne@origamienergy.com> Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
2019-07-02 11:35:03 +02:00
bb_simple_error_msg_and_die("corrupted data");
2002-11-28 09:09:47 +00:00
*--stackp = tab_suffixof(code);
code = tab_prefixof(code);
}
finchar = tab_suffixof(code);
*--stackp = (unsigned char) finchar;
2002-11-28 09:09:47 +00:00
/* And put them out in forward order */
{
2002-11-28 09:09:47 +00:00
int i;
i = de_stack - stackp;
if (outpos + i >= OBUFSIZ) {
2002-11-28 09:09:47 +00:00
do {
if (i > OBUFSIZ - outpos) {
i = OBUFSIZ - outpos;
}
2002-11-28 09:09:47 +00:00
if (i > 0) {
memcpy(outbuf + outpos, stackp, i);
outpos += i;
}
2002-11-28 09:09:47 +00:00
if (outpos >= OBUFSIZ) {
xtransformer_write(xstate, outbuf, outpos);
IF_DESKTOP(total_written += outpos;)
outpos = 0;
}
2002-11-28 09:09:47 +00:00
stackp += i;
i = de_stack - stackp;
} while (i > 0);
2002-11-28 09:09:47 +00:00
} else {
memcpy(outbuf + outpos, stackp, i);
outpos += i;
}
}
2002-11-28 09:09:47 +00:00
/* Generate the new entry. */
if (free_ent < maxmaxcode) {
tab_prefixof(free_ent) = (unsigned short) oldcode;
tab_suffixof(free_ent) = (unsigned char) finchar;
free_ent++;
2002-11-28 09:09:47 +00:00
}
2002-11-28 09:09:47 +00:00
/* Remember previous code. */
oldcode = incode;
}
2002-11-28 09:09:47 +00:00
} while (rsize > 0);
if (outpos > 0) {
xtransformer_write(xstate, outbuf, outpos);
IF_DESKTOP(total_written += outpos;)
}
retval = IF_DESKTOP(total_written) + 0;
err:
free(inbuf);
free(outbuf);
free(htab);
free(codetab);
return retval;
}