b4c11c1397
The performance and number of processes for a "depmod -a" with gzipped modules was abysmal. This patch adds a code path without fork, benefiting all users of xmalloc_open_zipped_read_close. "modinfo radeon.ko.gz", a single-file reader, got 30% faster. "depmod -a", which used to fork over 800 times, got 20% faster. Heavily based on a patch by Lauri Kasanen <curaga@operamail.com> function old new delta setup_transformer_on_fd - 159 +159 transformer_write - 122 +122 fork_transformer - 112 +112 xmalloc_open_zipped_read_close 63 118 +55 read_bunzip 1866 1896 +30 xtransformer_write - 19 +19 unzip_main 2449 2462 +13 bbunpack 755 766 +11 unpack_lzma_stream 2717 2723 +6 unpack_xz_stream 2393 2397 +4 unpack_Z_stream 1173 1175 +2 inflate_unzip 111 105 -6 check_signature16 70 63 -7 unpack_bz2_stream 359 349 -10 unpack_unxz 12 - -12 unpack_unlzma 12 - -12 unpack_uncompress 12 - -12 unpack_gunzip 12 - -12 unpack_bunzip2 12 - -12 open_transformer 106 92 -14 inflate_unzip_internal 1945 1916 -29 unpack_gz_stream 693 655 -38 open_zipped 89 47 -42 setup_unzip_on_fd 142 53 -89 ------------------------------------------------------------------------------ (add/remove: 4/5 grow/shrink: 7/8 up/down: 533/-295) Total: 238 bytes Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
132 lines
3.8 KiB
C
132 lines
3.8 KiB
C
/*
|
|
* This file uses XZ Embedded library code which is written
|
|
* by Lasse Collin <lasse.collin@tukaani.org>
|
|
* and Igor Pavlov <http://7-zip.org/>
|
|
*
|
|
* See README file in unxz/ directory for more information.
|
|
*
|
|
* This file is:
|
|
* Copyright (C) 2010 Denys Vlasenko <vda.linux@googlemail.com>
|
|
* Licensed under GPLv2, see file LICENSE in this source tree.
|
|
*/
|
|
#include "libbb.h"
|
|
#include "bb_archive.h"
|
|
|
|
#define XZ_FUNC FAST_FUNC
|
|
#define XZ_EXTERN static
|
|
|
|
#define XZ_DEC_DYNALLOC
|
|
|
|
/* Skip check (rather than fail) of unsupported hash functions */
|
|
#define XZ_DEC_ANY_CHECK 1
|
|
|
|
/* We use our own crc32 function */
|
|
#define XZ_INTERNAL_CRC32 0
|
|
static uint32_t xz_crc32(const uint8_t *buf, size_t size, uint32_t crc)
|
|
{
|
|
return ~crc32_block_endian0(~crc, buf, size, global_crc32_table);
|
|
}
|
|
|
|
/* We use arch-optimized unaligned accessors */
|
|
#define get_unaligned_le32(buf) ({ uint32_t v; move_from_unaligned32(v, buf); SWAP_LE32(v); })
|
|
#define get_unaligned_be32(buf) ({ uint32_t v; move_from_unaligned32(v, buf); SWAP_BE32(v); })
|
|
#define put_unaligned_le32(val, buf) move_to_unaligned32(buf, SWAP_LE32(val))
|
|
#define put_unaligned_be32(val, buf) move_to_unaligned32(buf, SWAP_BE32(val))
|
|
|
|
#include "unxz/xz_dec_bcj.c"
|
|
#include "unxz/xz_dec_lzma2.c"
|
|
#include "unxz/xz_dec_stream.c"
|
|
|
|
IF_DESKTOP(long long) int FAST_FUNC
|
|
unpack_xz_stream(transformer_state_t *xstate)
|
|
{
|
|
enum xz_ret xz_result;
|
|
struct xz_buf iobuf;
|
|
struct xz_dec *state;
|
|
unsigned char *membuf;
|
|
IF_DESKTOP(long long) int total = 0;
|
|
|
|
if (!global_crc32_table)
|
|
global_crc32_table = crc32_filltable(NULL, /*endian:*/ 0);
|
|
|
|
memset(&iobuf, 0, sizeof(iobuf));
|
|
membuf = xmalloc(2 * BUFSIZ);
|
|
iobuf.in = membuf;
|
|
iobuf.out = membuf + BUFSIZ;
|
|
iobuf.out_size = BUFSIZ;
|
|
|
|
if (!xstate || xstate->check_signature == 0) {
|
|
/* Preload XZ file signature */
|
|
strcpy((char*)membuf, HEADER_MAGIC);
|
|
iobuf.in_size = HEADER_MAGIC_SIZE;
|
|
} /* else: let xz code read & check it */
|
|
|
|
/* Limit memory usage to about 64 MiB. */
|
|
state = xz_dec_init(XZ_DYNALLOC, 64*1024*1024);
|
|
|
|
xz_result = X_OK;
|
|
while (1) {
|
|
if (iobuf.in_pos == iobuf.in_size) {
|
|
int rd = safe_read(xstate->src_fd, membuf, BUFSIZ);
|
|
if (rd < 0) {
|
|
bb_error_msg(bb_msg_read_error);
|
|
total = -1;
|
|
break;
|
|
}
|
|
if (rd == 0 && xz_result == XZ_STREAM_END)
|
|
break;
|
|
iobuf.in_size = rd;
|
|
iobuf.in_pos = 0;
|
|
}
|
|
if (xz_result == XZ_STREAM_END) {
|
|
/*
|
|
* Try to start decoding next concatenated stream.
|
|
* Stream padding must always be a multiple of four
|
|
* bytes to preserve four-byte alignment. To keep the
|
|
* code slightly smaller, we aren't as strict here as
|
|
* the .xz spec requires. We just skip all zero-bytes
|
|
* without checking the alignment and thus can accept
|
|
* files that aren't valid, e.g. the XZ utils test
|
|
* files bad-0pad-empty.xz and bad-0catpad-empty.xz.
|
|
*/
|
|
do {
|
|
if (membuf[iobuf.in_pos] != 0) {
|
|
xz_dec_reset(state);
|
|
goto do_run;
|
|
}
|
|
iobuf.in_pos++;
|
|
} while (iobuf.in_pos < iobuf.in_size);
|
|
}
|
|
do_run:
|
|
// bb_error_msg(">in pos:%d size:%d out pos:%d size:%d",
|
|
// iobuf.in_pos, iobuf.in_size, iobuf.out_pos, iobuf.out_size);
|
|
xz_result = xz_dec_run(state, &iobuf);
|
|
// bb_error_msg("<in pos:%d size:%d out pos:%d size:%d r:%d",
|
|
// iobuf.in_pos, iobuf.in_size, iobuf.out_pos, iobuf.out_size, xz_result);
|
|
if (iobuf.out_pos) {
|
|
xtransformer_write(xstate, iobuf.out, iobuf.out_pos);
|
|
IF_DESKTOP(total += iobuf.out_pos;)
|
|
iobuf.out_pos = 0;
|
|
}
|
|
if (xz_result == XZ_STREAM_END) {
|
|
/*
|
|
* Can just "break;" here, if not for concatenated
|
|
* .xz streams.
|
|
* Checking for padding may require buffer
|
|
* replenishment. Can't do it here.
|
|
*/
|
|
continue;
|
|
}
|
|
if (xz_result != XZ_OK && xz_result != XZ_UNSUPPORTED_CHECK) {
|
|
bb_error_msg("corrupted data");
|
|
total = -1;
|
|
break;
|
|
}
|
|
}
|
|
|
|
xz_dec_end(state);
|
|
free(membuf);
|
|
|
|
return total;
|
|
}
|