bzip2: fix two crashes on corrupted archives

As it turns out, longjmp'ing into freed stack is not healthy... function old new delta unpack_usage_messages - 97 +97 unpack_bz2_stream 369 409 +40 get_next_block 1667 1677 +10 get_bits 156 155 -1 start_bunzip 212 183 -29 bb_show_usage 181 120 -61 ------------------------------------------------------------------------------ (add/remove: 1/0 grow/shrink: 2/3 up/down: 147/-91) Total: 56 bytes Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
2018-04-08 20:02:01 +02:00 · 2018-04-08 20:02:01 +02:00 · 38ccd6af8a
commit 38ccd6af8a
parent 8e2174e9bd
10 changed files with 99 additions and 36 deletions
--- a/archival/libarchive/decompress_bunzip2.c
+++ b/archival/libarchive/decompress_bunzip2.c
@ -100,7 +100,7 @@ struct bunzip_data {
 	unsigned dbufSize;
 	/* For I/O error handling */
-	jmp_buf jmpbuf;
+	jmp_buf *jmpbuf;
 	/* Big things go last (register-relative addressing can be larger for big offsets) */
 	uint32_t crc32Table[256];
@ -127,7 +127,7 @@ static unsigned get_bits(bunzip_data *bd, int bits_wanted)
 			/* if "no input fd" case: in_fd == -1, read fails, we jump */
 			bd->inbufCount = read(bd->in_fd, bd->inbuf, IOBUF_SIZE);
 			if (bd->inbufCount <= 0)
-				longjmp(bd->jmpbuf, RETVAL_UNEXPECTED_INPUT_EOF);
+				longjmp(*bd->jmpbuf, RETVAL_UNEXPECTED_INPUT_EOF);
 			bd->inbufPos = 0;
 		}
@ -151,12 +151,12 @@ static unsigned get_bits(bunzip_data *bd, int bits_wanted)
 	return bits;
 }
 //#define get_bits(bd, n) (dbg("%d:get_bits()", __LINE__), get_bits(bd, n))
 /* Unpacks the next block and sets up for the inverse Burrows-Wheeler step. */
 static int get_next_block(bunzip_data *bd)
 {
-	struct group_data *hufGroup;
+	int groupCount, selector,
 	int groupCount, *base, *limit, selector,
 		i, j, symCount, symTotal, nSelectors, byteCount[256];
 	uint8_t uc, symToByte[256], mtfSymbol[256], *selectors;
 	uint32_t *dbuf;
@ -179,15 +179,19 @@ static int get_next_block(bunzip_data *bd)
 	i = get_bits(bd, 24);
 	j = get_bits(bd, 24);
 	bd->headerCRC = get_bits(bd, 32);
-	if ((i == 0x177245) && (j == 0x385090)) return RETVAL_LAST_BLOCK;
+	if ((i == 0x177245) && (j == 0x385090))
-	if ((i != 0x314159) || (j != 0x265359)) return RETVAL_NOT_BZIP_DATA;
+		return RETVAL_LAST_BLOCK;
 	if ((i != 0x314159) || (j != 0x265359))
 		return RETVAL_NOT_BZIP_DATA;
 	/* We can add support for blockRandomised if anybody complains.  There was
 	   some code for this in busybox 1.0.0-pre3, but nobody ever noticed that
 	   it didn't actually work. */
-	if (get_bits(bd, 1)) return RETVAL_OBSOLETE_INPUT;
+	if (get_bits(bd, 1))
 		return RETVAL_OBSOLETE_INPUT;
 	origPtr = get_bits(bd, 24);
-	if (origPtr > bd->dbufSize) return RETVAL_DATA_ERROR;
+	if (origPtr > bd->dbufSize)
 		return RETVAL_DATA_ERROR;
 	/* mapping table: if some byte values are never used (encoding things
 	   like ascii text), the compression code removes the gaps to have fewer
@ -231,13 +235,21 @@ static int get_next_block(bunzip_data *bd)
 		/* Get next value */
 		int n = 0;
 		while (get_bits(bd, 1)) {
-			if (n >= groupCount) return RETVAL_DATA_ERROR;
+			if (n >= groupCount)
 				return RETVAL_DATA_ERROR;
 			n++;
 		}
 		/* Decode MTF to get the next selector */
 		tmp_byte = mtfSymbol[n];
 		while (--n >= 0)
 			mtfSymbol[n + 1] = mtfSymbol[n];
 //We catch it later, in the second loop where we use selectors[i].
 //Maybe this is a better place, though?
 //		if (tmp_byte >= groupCount) {
 //			dbg("%d: selectors[%d]:%d groupCount:%d",
 //					__LINE__, i, tmp_byte, groupCount);
 //			return RETVAL_DATA_ERROR;
 //		}
 		mtfSymbol[0] = selectors[i] = tmp_byte;
 	}
@ -248,6 +260,8 @@ static int get_next_block(bunzip_data *bd)
 		uint8_t length[MAX_SYMBOLS];
 		/* 8 bits is ALMOST enough for temp[], see below */
 		unsigned temp[MAX_HUFCODE_BITS+1];
 		struct group_data *hufGroup;
 		int *base, *limit;
 		int minLen, maxLen, pp, len_m1;
 		/* Read Huffman code lengths for each symbol.  They're stored in
@ -283,8 +297,10 @@ static int get_next_block(bunzip_data *bd)
 		/* Find largest and smallest lengths in this group */
 		minLen = maxLen = length[0];
 		for (i = 1; i < symCount; i++) {
-			if (length[i] > maxLen) maxLen = length[i];
+			if (length[i] > maxLen)
-			else if (length[i] < minLen) minLen = length[i];
+				maxLen = length[i];
 			else if (length[i] < minLen)
 				minLen = length[i];
 		}
 		/* Calculate permute[], base[], and limit[] tables from length[].
@ -320,7 +336,8 @@ static int get_next_block(bunzip_data *bd)
 		/* Count symbols coded for at each bit length */
 		/* NB: in pathological cases, temp[8] can end ip being 256.
 		 * That's why uint8_t is too small for temp[]. */
-		for (i = 0; i < symCount; i++) temp[length[i]]++;
+		for (i = 0; i < symCount; i++)
 			temp[length[i]]++;
 		/* Calculate limit[] (the largest symbol-coding value at each bit
 		 * length, which is (previous limit<<1)+symbols at this level), and
@ -363,12 +380,22 @@ static int get_next_block(bunzip_data *bd)
 	runPos = dbufCount = selector = 0;
 	for (;;) {
 		struct group_data *hufGroup;
 		int *base, *limit;
 		int nextSym;
 		uint8_t ngrp;
 		/* Fetch next Huffman coding group from list. */
 		symCount = GROUP_SIZE - 1;
-		if (selector >= nSelectors) return RETVAL_DATA_ERROR;
+		if (selector >= nSelectors)
-		hufGroup = bd->groups + selectors[selector++];
+			return RETVAL_DATA_ERROR;
 		ngrp = selectors[selector++];
 		if (ngrp >= groupCount) {
 			dbg("%d selectors[%d]:%d groupCount:%d",
 				__LINE__, selector-1, ngrp, groupCount);
 			return RETVAL_DATA_ERROR;
 		}
 		hufGroup = bd->groups + ngrp;
 		base = hufGroup->base - 1;
 		limit = hufGroup->limit - 1;
@ -403,7 +430,8 @@ static int get_next_block(bunzip_data *bd)
 		}
 		/* Figure how many bits are in next symbol and unget extras */
 		i = hufGroup->minLen;
-		while (nextSym > limit[i]) ++i;
+		while (nextSym > limit[i])
 			++i;
 		j = hufGroup->maxLen - i;
 		if (j < 0)
 			return RETVAL_DATA_ERROR;
@ -671,7 +699,10 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len)
 /* Because bunzip2 is used for help text unpacking, and because bb_show_usage()
   should work for NOFORK applets too, we must be extremely careful to not leak
   any allocations! */
-int FAST_FUNC start_bunzip(bunzip_data **bdp, int in_fd,
+int FAST_FUNC start_bunzip(
 		void *jmpbuf,
 		bunzip_data **bdp,
 		int in_fd,
 		const void *inbuf, int len)
 {
 	bunzip_data *bd;
@ -683,11 +714,14 @@ int FAST_FUNC start_bunzip(bunzip_data **bdp, int in_fd,
 	/* Figure out how much data to allocate */
 	i = sizeof(bunzip_data);
-	if (in_fd != -1) i += IOBUF_SIZE;
+	if (in_fd != -1)
 		i += IOBUF_SIZE;
 	/* Allocate bunzip_data.  Most fields initialize to zero. */
 	bd = *bdp = xzalloc(i);
 	bd->jmpbuf = jmpbuf;
 	/* Setup input buffer */
 	bd->in_fd = in_fd;
 	if (-1 == in_fd) {
@ -702,10 +736,6 @@ int FAST_FUNC start_bunzip(bunzip_data **bdp, int in_fd,
 	/* Init the CRC32 table (big endian) */
 	crc32_filltable(bd->crc32Table, 1);
 	/* Setup for I/O error handling via longjmp */
 	i = setjmp(bd->jmpbuf);
 	if (i) return i;
 	/* Ensure that file starts with "BZh['1'-'9']." */
 	/* Update: now caller verifies 1st two bytes, makes .gz/.bz2
 	 * integration easier */
@ -752,8 +782,12 @@ unpack_bz2_stream(transformer_state_t *xstate)
 	outbuf = xmalloc(IOBUF_SIZE);
 	len = 0;
 	while (1) { /* "Process one BZ... stream" loop */
 		jmp_buf jmpbuf;
-		i = start_bunzip(&bd, xstate->src_fd, outbuf + 2, len);
+		/* Setup for I/O error handling via longjmp */
 		i = setjmp(jmpbuf);
 		if (i == 0)
 			i = start_bunzip(&jmpbuf, &bd, xstate->src_fd, outbuf + 2, len);
 		if (i == 0) {
 			while (1) { /* "Produce some output bytes" loop */
--- a/archival/libarchive/decompress_gunzip.c
+++ b/archival/libarchive/decompress_gunzip.c
@ -32,7 +32,6 @@
 *
 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
 */
 #include <setjmp.h>
 #include "libbb.h"
 #include "bb_archive.h"
--- a/coreutils/test.c
+++ b/coreutils/test.c
@ -76,7 +76,6 @@
 //usage:       "1\n"
 #include "libbb.h"
 #include <setjmp.h>
 /* This is a NOFORK applet. Be very careful! */
--- a/include/bb_archive.h
+++ b/include/bb_archive.h
@ -210,7 +210,7 @@ const llist_t *find_list_entry2(const llist_t *list, const char *filename) FAST_
 /* A bit of bunzip2 internals are exposed for compressed help support: */
 typedef struct bunzip_data bunzip_data;
-int start_bunzip(bunzip_data **bdp, int in_fd, const void *inbuf, int len) FAST_FUNC;
+int start_bunzip(void *, bunzip_data **bdp, int in_fd, const void *inbuf, int len) FAST_FUNC;
 /* NB: read_bunzip returns < 0 on error, or the number of *unfilled* bytes
 * in outbuf. IOW: on EOF returns len ("all bytes are not filled"), not 0: */
 int read_bunzip(bunzip_data *bd, char *outbuf, int len) FAST_FUNC;
--- a/libbb/appletlib.c
+++ b/libbb/appletlib.c
@ -102,14 +102,21 @@ static const char *unpack_usage_messages(void)
 	char *outbuf = NULL;
 	bunzip_data *bd;
 	int i;
 	jmp_buf jmpbuf;
-	i = start_bunzip(&bd,
+	/* Setup for I/O error handling via longjmp */
 	i = setjmp(jmpbuf);
 	if (i == 0) {
 		i = start_bunzip(&jmpbuf,
 			&bd,
 			/* src_fd: */ -1,
 			/* inbuf:  */ packed_usage,
-			/* len:    */ sizeof(packed_usage));
+			/* len:    */ sizeof(packed_usage)
-	/* read_bunzip can longjmp to start_bunzip, and ultimately
+		);
-	 * end up here with i != 0 on read data errors! Not trivial */
+	}
-	if (!i) {
+	/* read_bunzip can longjmp and end up here with i != 0
 	 * on read data errors! Not trivial */
 	if (i == 0) {
 		/* Cannot use xmalloc: will leak bd in NOFORK case! */
 		outbuf = malloc_or_warn(sizeof(UNPACKED_USAGE));
 		if (outbuf)
--- a/miscutils/bbconfig.c
+++ b/miscutils/bbconfig.c
@ -44,13 +44,22 @@ int bbconfig_main(int argc UNUSED_PARAM, char **argv UNUSED_PARAM)
 {
 #if ENABLE_FEATURE_COMPRESS_BBCONFIG
 	bunzip_data *bd;
-	int i = start_bunzip(&bd,
+	int i;
 	jmp_buf jmpbuf;
 	/* Setup for I/O error handling via longjmp */
 	i = setjmp(jmpbuf);
 	if (i == 0) {
 		i = start_bunzip(&jmpbuf,
 			&bd,
 			/* src_fd: */ -1,
 			/* inbuf:  */ bbconfig_config_bz2,
-			/* len:    */ sizeof(bbconfig_config_bz2));
+			/* len:    */ sizeof(bbconfig_config_bz2)
-	/* read_bunzip can longjmp to start_bunzip, and ultimately
+		);
-	 * end up here with i != 0 on read data errors! Not trivial */
+	}
-	if (!i) {
+	/* read_bunzip can longjmp and end up here with i != 0
 	 * on read data errors! Not trivial */
 	if (i == 0) {
 		/* Cannot use xmalloc: will leak bd in NOFORK case! */
 		char *outbuf = malloc_or_warn(sizeof(bbconfig_config));
 		if (outbuf) {
--- a/shell/ash.c
+++ b/shell/ash.c
@ -177,7 +177,6 @@
 #define JOBS ENABLE_ASH_JOB_CONTROL
 #include <setjmp.h>
 #include <fnmatch.h>
 #include <sys/times.h>
 #include <sys/utsname.h> /* for setting $HOSTNAME */
--- a/testsuite/bunzip2.tests
+++ b/testsuite/bunzip2.tests
@ -552,6 +552,22 @@ if test "${0##*/}" = "bunzip2.tests"; then
 	echo "FAIL: $unpack: pbzip_4m_zeros file"
 	FAILCOUNT=$((FAILCOUNT + 1))
    fi
    errout="`${bb}bunzip2 <bz2_issue_11.bz2 2>&1 >/dev/null`"
    if test x"$errout:$?" = x"bunzip2: bunzip error -5:1"; then
 	echo "PASS: $unpack: bz2_issue_11.bz2 corrupted example"
    else
 	echo "FAIL: $unpack: bz2_issue_11.bz2 corrupted example"
 	FAILCOUNT=$((FAILCOUNT + 1))
    fi
    errout="`${bb}bunzip2 <bz2_issue_12.bz2 2>&1 >/dev/null`"
    if test x"$errout:$?" = x"bunzip2: bunzip error -3:1"; then
 	echo "PASS: $unpack: bz2_issue_12.bz2 corrupted example"
    else
 	echo "FAIL: $unpack: bz2_issue_12.bz2 corrupted example"
 	FAILCOUNT=$((FAILCOUNT + 1))
    fi
 fi
 exit $((FAILCOUNT <= 255 ? FAILCOUNT : 255))
--- a/testsuite/bz2_issue_11.bz2
+++ b/testsuite/bz2_issue_11.bz2
--- a/testsuite/bz2_issue_12.bz2
+++ b/testsuite/bz2_issue_12.bz2