rename archival/libunarchive -> archival/libarchive; move bz/ into it

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
Denys Vlasenko
2010-11-03 02:38:31 +01:00
parent 5e9934028a
commit 833d4e7f84
74 changed files with 57 additions and 57 deletions

View File

@@ -0,0 +1,64 @@
# Makefile for busybox
#
# Copyright (C) 1999-2004 by Erik Andersen <andersen@codepoet.org>
#
# Licensed under GPLv2 or later, see file LICENSE in this source tree.
lib-y:=
COMMON_FILES:= \
\
data_skip.o \
data_extract_all.o \
data_extract_to_stdout.o \
\
filter_accept_all.o \
filter_accept_list.o \
filter_accept_reject_list.o \
\
header_skip.o \
header_list.o \
header_verbose_list.o \
\
seek_by_read.o \
seek_by_jump.o \
\
data_align.o \
find_list_entry.o \
init_handle.o
DPKG_FILES:= \
get_header_ar.o \
unpack_ar_archive.o \
get_header_tar.o \
filter_accept_list_reassign.o
INSERT
lib-$(CONFIG_AR) += get_header_ar.o unpack_ar_archive.o
lib-$(CONFIG_BUNZIP2) += decompress_bunzip2.o
lib-$(CONFIG_UNLZMA) += decompress_unlzma.o
lib-$(CONFIG_UNXZ) += decompress_unxz.o
lib-$(CONFIG_CPIO) += get_header_cpio.o
lib-$(CONFIG_DPKG) += $(DPKG_FILES)
lib-$(CONFIG_DPKG_DEB) += $(DPKG_FILES)
lib-$(CONFIG_GUNZIP) += decompress_unzip.o
lib-$(CONFIG_RPM2CPIO) += decompress_unzip.o get_header_cpio.o
lib-$(CONFIG_RPM) += open_transformer.o decompress_unzip.o get_header_cpio.o
lib-$(CONFIG_TAR) += get_header_tar.o
lib-$(CONFIG_UNCOMPRESS) += decompress_uncompress.o
lib-$(CONFIG_UNZIP) += decompress_unzip.o
lib-$(CONFIG_LZOP) += lzo1x_1.o lzo1x_1o.o lzo1x_d.o
lib-$(CONFIG_LZOP_COMPR_HIGH) += lzo1x_9x.o
lib-$(CONFIG_FEATURE_SEAMLESS_Z) += open_transformer.o decompress_uncompress.o
lib-$(CONFIG_FEATURE_SEAMLESS_GZ) += open_transformer.o decompress_unzip.o get_header_tar_gz.o
lib-$(CONFIG_FEATURE_SEAMLESS_BZ2) += open_transformer.o decompress_bunzip2.o get_header_tar_bz2.o
lib-$(CONFIG_FEATURE_SEAMLESS_LZMA) += open_transformer.o decompress_unlzma.o get_header_tar_lzma.o
lib-$(CONFIG_FEATURE_SEAMLESS_XZ) += open_transformer.o decompress_unxz.o
lib-$(CONFIG_FEATURE_COMPRESS_USAGE) += decompress_bunzip2.o
lib-$(CONFIG_FEATURE_COMPRESS_BBCONFIG) += decompress_bunzip2.o
lib-$(CONFIG_FEATURE_TAR_TO_COMMAND) += data_extract_to_command.o
ifneq ($(lib-y),)
lib-y += $(COMMON_FILES)
endif

View File

@@ -0,0 +1,44 @@
bzip2 applet in busybox is based on lightly-modified source
of bzip2 version 1.0.4. bzip2 source is distributed
under the following conditions (copied verbatim from LICENSE file)
===========================================================
This program, "bzip2", the associated library "libbzip2", and all
documentation, are copyright (C) 1996-2006 Julian R Seward. All
rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. The origin of this software must not be misrepresented; you must
not claim that you wrote the original software. If you use this
software in a product, an acknowledgment in the product
documentation would be appreciated but is not required.
3. Altered source versions must be plainly marked as such, and must
not be misrepresented as being the original software.
4. The name of the author may not be used to endorse or promote
products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Julian Seward, Cambridge, UK.
jseward@bzip.org
bzip2/libbzip2 version 1.0.4 of 20 December 2006

View File

@@ -0,0 +1,90 @@
This file is an abridged version of README from bzip2 1.0.4
Build instructions (which are not relevant to busyboxed bzip2)
are removed.
===========================================================
This is the README for bzip2/libzip2.
This version is fully compatible with the previous public releases.
------------------------------------------------------------------
This file is part of bzip2/libbzip2, a program and library for
lossless, block-sorting data compression.
bzip2/libbzip2 version 1.0.4 of 20 December 2006
Copyright (C) 1996-2006 Julian Seward <jseward@bzip.org>
Please read the WARNING, DISCLAIMER and PATENTS sections in this file.
This program is released under the terms of the license contained
in the file LICENSE.
------------------------------------------------------------------
Please read and be aware of the following:
WARNING:
This program and library (attempts to) compress data by
performing several non-trivial transformations on it.
Unless you are 100% familiar with *all* the algorithms
contained herein, and with the consequences of modifying them,
you should NOT meddle with the compression or decompression
machinery. Incorrect changes can and very likely *will*
lead to disastrous loss of data.
DISCLAIMER:
I TAKE NO RESPONSIBILITY FOR ANY LOSS OF DATA ARISING FROM THE
USE OF THIS PROGRAM/LIBRARY, HOWSOEVER CAUSED.
Every compression of a file implies an assumption that the
compressed file can be decompressed to reproduce the original.
Great efforts in design, coding and testing have been made to
ensure that this program works correctly. However, the complexity
of the algorithms, and, in particular, the presence of various
special cases in the code which occur with very low but non-zero
probability make it impossible to rule out the possibility of bugs
remaining in the program. DO NOT COMPRESS ANY DATA WITH THIS
PROGRAM UNLESS YOU ARE PREPARED TO ACCEPT THE POSSIBILITY, HOWEVER
SMALL, THAT THE DATA WILL NOT BE RECOVERABLE.
That is not to say this program is inherently unreliable.
Indeed, I very much hope the opposite is true. bzip2/libbzip2
has been carefully constructed and extensively tested.
PATENTS:
To the best of my knowledge, bzip2/libbzip2 does not use any
patented algorithms. However, I do not have the resources
to carry out a patent search. Therefore I cannot give any
guarantee of the above statement.
I hope you find bzip2 useful. Feel free to contact me at
jseward@bzip.org
if you have any suggestions or queries. Many people mailed me with
comments, suggestions and patches after the releases of bzip-0.15,
bzip-0.21, and bzip2 versions 0.1pl2, 0.9.0, 0.9.5, 1.0.0, 1.0.1,
1.0.2 and 1.0.3, and the changes in bzip2 are largely a result of this
feedback. I thank you for your comments.
bzip2's "home" is http://www.bzip.org/
Julian Seward
jseward@bzip.org
Cambridge, UK.
18 July 1996 (version 0.15)
25 August 1996 (version 0.21)
7 August 1997 (bzip2, version 0.1)
29 August 1997 (bzip2, version 0.1pl2)
23 August 1998 (bzip2, version 0.9.0)
8 June 1999 (bzip2, version 0.9.5)
4 Sept 1999 (bzip2, version 0.9.5d)
5 May 2000 (bzip2, version 1.0pre8)
30 December 2001 (bzip2, version 1.0.2pre1)
15 February 2005 (bzip2, version 1.0.3)
20 December 2006 (bzip2, version 1.0.4)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,431 @@
/*
* bzip2 is written by Julian Seward <jseward@bzip.org>.
* Adapted for busybox by Denys Vlasenko <vda.linux@googlemail.com>.
* See README and LICENSE files in this directory for more information.
*/
/*-------------------------------------------------------------*/
/*--- Library top-level functions. ---*/
/*--- bzlib.c ---*/
/*-------------------------------------------------------------*/
/* ------------------------------------------------------------------
This file is part of bzip2/libbzip2, a program and library for
lossless, block-sorting data compression.
bzip2/libbzip2 version 1.0.4 of 20 December 2006
Copyright (C) 1996-2006 Julian Seward <jseward@bzip.org>
Please read the WARNING, DISCLAIMER and PATENTS sections in the
README file.
This program is released under the terms of the license contained
in the file LICENSE.
------------------------------------------------------------------ */
/* CHANGES
* 0.9.0 -- original version.
* 0.9.0a/b -- no changes in this file.
* 0.9.0c -- made zero-length BZ_FLUSH work correctly in bzCompress().
* fixed bzWrite/bzRead to ignore zero-length requests.
* fixed bzread to correctly handle read requests after EOF.
* wrong parameter order in call to bzDecompressInit in
* bzBuffToBuffDecompress. Fixed.
*/
/* #include "bzlib_private.h" */
/*---------------------------------------------------*/
/*--- Compression stuff ---*/
/*---------------------------------------------------*/
/*---------------------------------------------------*/
#if BZ_LIGHT_DEBUG
static
void bz_assert_fail(int errcode)
{
/* if (errcode == 1007) bb_error_msg_and_die("probably bad RAM"); */
bb_error_msg_and_die("internal error %d", errcode);
}
#endif
/*---------------------------------------------------*/
static
void prepare_new_block(EState* s)
{
int i;
s->nblock = 0;
s->numZ = 0;
s->state_out_pos = 0;
BZ_INITIALISE_CRC(s->blockCRC);
/* inlined memset would be nice to have here */
for (i = 0; i < 256; i++)
s->inUse[i] = 0;
s->blockNo++;
}
/*---------------------------------------------------*/
static
ALWAYS_INLINE
void init_RL(EState* s)
{
s->state_in_ch = 256;
s->state_in_len = 0;
}
static
int isempty_RL(EState* s)
{
return (s->state_in_ch >= 256 || s->state_in_len <= 0);
}
/*---------------------------------------------------*/
static
void BZ2_bzCompressInit(bz_stream *strm, int blockSize100k)
{
int32_t n;
EState* s;
s = xzalloc(sizeof(EState));
s->strm = strm;
n = 100000 * blockSize100k;
s->arr1 = xmalloc(n * sizeof(uint32_t));
s->mtfv = (uint16_t*)s->arr1;
s->ptr = (uint32_t*)s->arr1;
s->arr2 = xmalloc((n + BZ_N_OVERSHOOT) * sizeof(uint32_t));
s->block = (uint8_t*)s->arr2;
s->ftab = xmalloc(65537 * sizeof(uint32_t));
s->crc32table = crc32_filltable(NULL, 1);
s->state = BZ_S_INPUT;
s->mode = BZ_M_RUNNING;
s->blockSize100k = blockSize100k;
s->nblockMAX = n - 19;
strm->state = s;
/*strm->total_in = 0;*/
strm->total_out = 0;
init_RL(s);
prepare_new_block(s);
}
/*---------------------------------------------------*/
static
void add_pair_to_block(EState* s)
{
int32_t i;
uint8_t ch = (uint8_t)(s->state_in_ch);
for (i = 0; i < s->state_in_len; i++) {
BZ_UPDATE_CRC(s, s->blockCRC, ch);
}
s->inUse[s->state_in_ch] = 1;
switch (s->state_in_len) {
case 3:
s->block[s->nblock] = (uint8_t)ch; s->nblock++;
/* fall through */
case 2:
s->block[s->nblock] = (uint8_t)ch; s->nblock++;
/* fall through */
case 1:
s->block[s->nblock] = (uint8_t)ch; s->nblock++;
break;
default:
s->inUse[s->state_in_len - 4] = 1;
s->block[s->nblock] = (uint8_t)ch; s->nblock++;
s->block[s->nblock] = (uint8_t)ch; s->nblock++;
s->block[s->nblock] = (uint8_t)ch; s->nblock++;
s->block[s->nblock] = (uint8_t)ch; s->nblock++;
s->block[s->nblock] = (uint8_t)(s->state_in_len - 4);
s->nblock++;
break;
}
}
/*---------------------------------------------------*/
static
void flush_RL(EState* s)
{
if (s->state_in_ch < 256) add_pair_to_block(s);
init_RL(s);
}
/*---------------------------------------------------*/
#define ADD_CHAR_TO_BLOCK(zs, zchh0) \
{ \
uint32_t zchh = (uint32_t)(zchh0); \
/*-- fast track the common case --*/ \
if (zchh != zs->state_in_ch && zs->state_in_len == 1) { \
uint8_t ch = (uint8_t)(zs->state_in_ch); \
BZ_UPDATE_CRC(zs, zs->blockCRC, ch); \
zs->inUse[zs->state_in_ch] = 1; \
zs->block[zs->nblock] = (uint8_t)ch; \
zs->nblock++; \
zs->state_in_ch = zchh; \
} \
else \
/*-- general, uncommon cases --*/ \
if (zchh != zs->state_in_ch || zs->state_in_len == 255) { \
if (zs->state_in_ch < 256) \
add_pair_to_block(zs); \
zs->state_in_ch = zchh; \
zs->state_in_len = 1; \
} else { \
zs->state_in_len++; \
} \
}
/*---------------------------------------------------*/
static
void /*Bool*/ copy_input_until_stop(EState* s)
{
/*Bool progress_in = False;*/
#ifdef SAME_CODE_AS_BELOW
if (s->mode == BZ_M_RUNNING) {
/*-- fast track the common case --*/
while (1) {
/*-- no input? --*/
if (s->strm->avail_in == 0) break;
/*-- block full? --*/
if (s->nblock >= s->nblockMAX) break;
/*progress_in = True;*/
ADD_CHAR_TO_BLOCK(s, (uint32_t)(*(uint8_t*)(s->strm->next_in)));
s->strm->next_in++;
s->strm->avail_in--;
/*s->strm->total_in++;*/
}
} else
#endif
{
/*-- general, uncommon case --*/
while (1) {
/*-- no input? --*/
if (s->strm->avail_in == 0) break;
/*-- block full? --*/
if (s->nblock >= s->nblockMAX) break;
//# /*-- flush/finish end? --*/
//# if (s->avail_in_expect == 0) break;
/*progress_in = True;*/
ADD_CHAR_TO_BLOCK(s, *(uint8_t*)(s->strm->next_in));
s->strm->next_in++;
s->strm->avail_in--;
/*s->strm->total_in++;*/
//# s->avail_in_expect--;
}
}
/*return progress_in;*/
}
/*---------------------------------------------------*/
static
void /*Bool*/ copy_output_until_stop(EState* s)
{
/*Bool progress_out = False;*/
while (1) {
/*-- no output space? --*/
if (s->strm->avail_out == 0) break;
/*-- block done? --*/
if (s->state_out_pos >= s->numZ) break;
/*progress_out = True;*/
*(s->strm->next_out) = s->zbits[s->state_out_pos];
s->state_out_pos++;
s->strm->avail_out--;
s->strm->next_out++;
s->strm->total_out++;
}
/*return progress_out;*/
}
/*---------------------------------------------------*/
static
void /*Bool*/ handle_compress(bz_stream *strm)
{
/*Bool progress_in = False;*/
/*Bool progress_out = False;*/
EState* s = strm->state;
while (1) {
if (s->state == BZ_S_OUTPUT) {
/*progress_out |=*/ copy_output_until_stop(s);
if (s->state_out_pos < s->numZ) break;
if (s->mode == BZ_M_FINISHING
//# && s->avail_in_expect == 0
&& s->strm->avail_in == 0
&& isempty_RL(s))
break;
prepare_new_block(s);
s->state = BZ_S_INPUT;
#ifdef FLUSH_IS_UNUSED
if (s->mode == BZ_M_FLUSHING
&& s->avail_in_expect == 0
&& isempty_RL(s))
break;
#endif
}
if (s->state == BZ_S_INPUT) {
/*progress_in |=*/ copy_input_until_stop(s);
//#if (s->mode != BZ_M_RUNNING && s->avail_in_expect == 0) {
if (s->mode != BZ_M_RUNNING && s->strm->avail_in == 0) {
flush_RL(s);
BZ2_compressBlock(s, (s->mode == BZ_M_FINISHING));
s->state = BZ_S_OUTPUT;
} else
if (s->nblock >= s->nblockMAX) {
BZ2_compressBlock(s, 0);
s->state = BZ_S_OUTPUT;
} else
if (s->strm->avail_in == 0) {
break;
}
}
}
/*return progress_in || progress_out;*/
}
/*---------------------------------------------------*/
static
int BZ2_bzCompress(bz_stream *strm, int action)
{
/*Bool progress;*/
EState* s;
s = strm->state;
switch (s->mode) {
case BZ_M_RUNNING:
if (action == BZ_RUN) {
/*progress =*/ handle_compress(strm);
/*return progress ? BZ_RUN_OK : BZ_PARAM_ERROR;*/
return BZ_RUN_OK;
}
#ifdef FLUSH_IS_UNUSED
else
if (action == BZ_FLUSH) {
//#s->avail_in_expect = strm->avail_in;
s->mode = BZ_M_FLUSHING;
goto case_BZ_M_FLUSHING;
}
#endif
else
/*if (action == BZ_FINISH)*/ {
//#s->avail_in_expect = strm->avail_in;
s->mode = BZ_M_FINISHING;
goto case_BZ_M_FINISHING;
}
#ifdef FLUSH_IS_UNUSED
case_BZ_M_FLUSHING:
case BZ_M_FLUSHING:
/*if (s->avail_in_expect != s->strm->avail_in)
return BZ_SEQUENCE_ERROR;*/
/*progress =*/ handle_compress(strm);
if (s->avail_in_expect > 0 || !isempty_RL(s) || s->state_out_pos < s->numZ)
return BZ_FLUSH_OK;
s->mode = BZ_M_RUNNING;
return BZ_RUN_OK;
#endif
case_BZ_M_FINISHING:
/*case BZ_M_FINISHING:*/
default:
/*if (s->avail_in_expect != s->strm->avail_in)
return BZ_SEQUENCE_ERROR;*/
/*progress =*/ handle_compress(strm);
/*if (!progress) return BZ_SEQUENCE_ERROR;*/
//#if (s->avail_in_expect > 0 || !isempty_RL(s) || s->state_out_pos < s->numZ)
//# return BZ_FINISH_OK;
if (s->strm->avail_in > 0 || !isempty_RL(s) || s->state_out_pos < s->numZ)
return BZ_FINISH_OK;
/*s->mode = BZ_M_IDLE;*/
return BZ_STREAM_END;
}
/* return BZ_OK; --not reached--*/
}
/*---------------------------------------------------*/
#if ENABLE_FEATURE_CLEAN_UP
static
void BZ2_bzCompressEnd(bz_stream *strm)
{
EState* s;
s = strm->state;
free(s->arr1);
free(s->arr2);
free(s->ftab);
free(s->crc32table);
free(strm->state);
}
#endif
/*---------------------------------------------------*/
/*--- Misc convenience stuff ---*/
/*---------------------------------------------------*/
/*---------------------------------------------------*/
#ifdef EXAMPLE_CODE_FOR_MEM_TO_MEM_COMPRESSION
static
int BZ2_bzBuffToBuffCompress(char* dest,
unsigned int* destLen,
char* source,
unsigned int sourceLen,
int blockSize100k)
{
bz_stream strm;
int ret;
if (dest == NULL || destLen == NULL
|| source == NULL
|| blockSize100k < 1 || blockSize100k > 9
) {
return BZ_PARAM_ERROR;
}
BZ2_bzCompressInit(&strm, blockSize100k);
strm.next_in = source;
strm.next_out = dest;
strm.avail_in = sourceLen;
strm.avail_out = *destLen;
ret = BZ2_bzCompress(&strm, BZ_FINISH);
if (ret == BZ_FINISH_OK) goto output_overflow;
if (ret != BZ_STREAM_END) goto errhandler;
/* normal termination */
*destLen -= strm.avail_out;
BZ2_bzCompressEnd(&strm);
return BZ_OK;
output_overflow:
BZ2_bzCompressEnd(&strm);
return BZ_OUTBUFF_FULL;
errhandler:
BZ2_bzCompressEnd(&strm);
return ret;
}
#endif
/*-------------------------------------------------------------*/
/*--- end bzlib.c ---*/
/*-------------------------------------------------------------*/

View File

@@ -0,0 +1,65 @@
/*
* bzip2 is written by Julian Seward <jseward@bzip.org>.
* Adapted for busybox by Denys Vlasenko <vda.linux@googlemail.com>.
* See README and LICENSE files in this directory for more information.
*/
/*-------------------------------------------------------------*/
/*--- Public header file for the library. ---*/
/*--- bzlib.h ---*/
/*-------------------------------------------------------------*/
/* ------------------------------------------------------------------
This file is part of bzip2/libbzip2, a program and library for
lossless, block-sorting data compression.
bzip2/libbzip2 version 1.0.4 of 20 December 2006
Copyright (C) 1996-2006 Julian Seward <jseward@bzip.org>
Please read the WARNING, DISCLAIMER and PATENTS sections in the
README file.
This program is released under the terms of the license contained
in the file LICENSE.
------------------------------------------------------------------ */
#define BZ_RUN 0
#define BZ_FLUSH 1
#define BZ_FINISH 2
#define BZ_OK 0
#define BZ_RUN_OK 1
#define BZ_FLUSH_OK 2
#define BZ_FINISH_OK 3
#define BZ_STREAM_END 4
#define BZ_SEQUENCE_ERROR (-1)
#define BZ_PARAM_ERROR (-2)
#define BZ_MEM_ERROR (-3)
#define BZ_DATA_ERROR (-4)
#define BZ_DATA_ERROR_MAGIC (-5)
#define BZ_IO_ERROR (-6)
#define BZ_UNEXPECTED_EOF (-7)
#define BZ_OUTBUFF_FULL (-8)
#define BZ_CONFIG_ERROR (-9)
typedef struct bz_stream {
void *state;
char *next_in;
char *next_out;
unsigned avail_in;
unsigned avail_out;
/*unsigned long long total_in;*/
unsigned long long total_out;
} bz_stream;
/*-- Core (low-level) library functions --*/
static void BZ2_bzCompressInit(bz_stream *strm, int blockSize100k);
static int BZ2_bzCompress(bz_stream *strm, int action);
#if ENABLE_FEATURE_CLEAN_UP
static void BZ2_bzCompressEnd(bz_stream *strm);
#endif
/*-------------------------------------------------------------*/
/*--- end bzlib.h ---*/
/*-------------------------------------------------------------*/

View File

@@ -0,0 +1,219 @@
/*
* bzip2 is written by Julian Seward <jseward@bzip.org>.
* Adapted for busybox by Denys Vlasenko <vda.linux@googlemail.com>.
* See README and LICENSE files in this directory for more information.
*/
/*-------------------------------------------------------------*/
/*--- Private header file for the library. ---*/
/*--- bzlib_private.h ---*/
/*-------------------------------------------------------------*/
/* ------------------------------------------------------------------
This file is part of bzip2/libbzip2, a program and library for
lossless, block-sorting data compression.
bzip2/libbzip2 version 1.0.4 of 20 December 2006
Copyright (C) 1996-2006 Julian Seward <jseward@bzip.org>
Please read the WARNING, DISCLAIMER and PATENTS sections in the
README file.
This program is released under the terms of the license contained
in the file LICENSE.
------------------------------------------------------------------ */
/* #include "bzlib.h" */
/*-- General stuff. --*/
typedef unsigned char Bool;
#define True ((Bool)1)
#define False ((Bool)0)
#if BZ_LIGHT_DEBUG
static void bz_assert_fail(int errcode) NORETURN;
#define AssertH(cond, errcode) \
do { \
if (!(cond)) \
bz_assert_fail(errcode); \
} while (0)
#else
#define AssertH(cond, msg) do { } while (0)
#endif
#if BZ_DEBUG
#define AssertD(cond, msg) \
do { \
if (!(cond)) \
bb_error_msg_and_die("(debug build): internal error %s", msg); \
} while (0)
#else
#define AssertD(cond, msg) do { } while (0)
#endif
/*-- Header bytes. --*/
#define BZ_HDR_B 0x42 /* 'B' */
#define BZ_HDR_Z 0x5a /* 'Z' */
#define BZ_HDR_h 0x68 /* 'h' */
#define BZ_HDR_0 0x30 /* '0' */
#define BZ_HDR_BZh0 0x425a6830
/*-- Constants for the back end. --*/
#define BZ_MAX_ALPHA_SIZE 258
#define BZ_MAX_CODE_LEN 23
#define BZ_RUNA 0
#define BZ_RUNB 1
#define BZ_N_GROUPS 6
#define BZ_G_SIZE 50
#define BZ_N_ITERS 4
#define BZ_MAX_SELECTORS (2 + (900000 / BZ_G_SIZE))
/*-- Stuff for doing CRCs. --*/
#define BZ_INITIALISE_CRC(crcVar) \
{ \
crcVar = 0xffffffffL; \
}
#define BZ_FINALISE_CRC(crcVar) \
{ \
crcVar = ~(crcVar); \
}
#define BZ_UPDATE_CRC(s, crcVar, cha) \
{ \
crcVar = (crcVar << 8) ^ s->crc32table[(crcVar >> 24) ^ ((uint8_t)cha)]; \
}
/*-- States and modes for compression. --*/
#define BZ_M_IDLE 1
#define BZ_M_RUNNING 2
#define BZ_M_FLUSHING 3
#define BZ_M_FINISHING 4
#define BZ_S_OUTPUT 1
#define BZ_S_INPUT 2
#define BZ_N_RADIX 2
#define BZ_N_QSORT 12
#define BZ_N_SHELL 18
#define BZ_N_OVERSHOOT (BZ_N_RADIX + BZ_N_QSORT + BZ_N_SHELL + 2)
/*-- Structure holding all the compression-side stuff. --*/
typedef struct EState {
/* pointer back to the struct bz_stream */
bz_stream *strm;
/* mode this stream is in, and whether inputting */
/* or outputting data */
int32_t mode;
int32_t state;
/* remembers avail_in when flush/finish requested */
/* bbox: not needed, strm->avail_in always has the same value */
/* commented out with '//#' throughout the code */
/* uint32_t avail_in_expect; */
/* for doing the block sorting */
int32_t origPtr;
uint32_t *arr1;
uint32_t *arr2;
uint32_t *ftab;
/* aliases for arr1 and arr2 */
uint32_t *ptr;
uint8_t *block;
uint16_t *mtfv;
uint8_t *zbits;
/* guess what */
uint32_t *crc32table;
/* run-length-encoding of the input */
uint32_t state_in_ch;
int32_t state_in_len;
/* input and output limits and current posns */
int32_t nblock;
int32_t nblockMAX;
int32_t numZ;
int32_t state_out_pos;
/* the buffer for bit stream creation */
uint32_t bsBuff;
int32_t bsLive;
/* block and combined CRCs */
uint32_t blockCRC;
uint32_t combinedCRC;
/* misc administratium */
int32_t blockNo;
int32_t blockSize100k;
/* stuff for coding the MTF values */
int32_t nMTF;
/* map of bytes used in block */
int32_t nInUse;
Bool inUse[256] ALIGNED(sizeof(long));
uint8_t unseqToSeq[256];
/* stuff for coding the MTF values */
int32_t mtfFreq [BZ_MAX_ALPHA_SIZE];
uint8_t selector [BZ_MAX_SELECTORS];
uint8_t selectorMtf[BZ_MAX_SELECTORS];
uint8_t len[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
/* stack-saving measures: these can be local, but they are too big */
int32_t sendMTFValues__code [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
int32_t sendMTFValues__rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
#if CONFIG_BZIP2_FEATURE_SPEED >= 5
/* second dimension: only 3 needed; 4 makes index calculations faster */
uint32_t sendMTFValues__len_pack[BZ_MAX_ALPHA_SIZE][4];
#endif
int32_t BZ2_hbMakeCodeLengths__heap [BZ_MAX_ALPHA_SIZE + 2];
int32_t BZ2_hbMakeCodeLengths__weight[BZ_MAX_ALPHA_SIZE * 2];
int32_t BZ2_hbMakeCodeLengths__parent[BZ_MAX_ALPHA_SIZE * 2];
int32_t mainSort__runningOrder[256];
int32_t mainSort__copyStart[256];
int32_t mainSort__copyEnd[256];
} EState;
/*-- compression. --*/
static void
BZ2_blockSort(EState*);
static void
BZ2_compressBlock(EState*, int);
static void
BZ2_bsInitWrite(EState*);
static void
BZ2_hbAssignCodes(int32_t*, uint8_t*, int32_t, int32_t, int32_t);
static void
BZ2_hbMakeCodeLengths(EState*, uint8_t*, int32_t*, int32_t, int32_t);
/*-------------------------------------------------------------*/
/*--- end bzlib_private.h ---*/
/*-------------------------------------------------------------*/

View File

@@ -0,0 +1,685 @@
/*
* bzip2 is written by Julian Seward <jseward@bzip.org>.
* Adapted for busybox by Denys Vlasenko <vda.linux@googlemail.com>.
* See README and LICENSE files in this directory for more information.
*/
/*-------------------------------------------------------------*/
/*--- Compression machinery (not incl block sorting) ---*/
/*--- compress.c ---*/
/*-------------------------------------------------------------*/
/* ------------------------------------------------------------------
This file is part of bzip2/libbzip2, a program and library for
lossless, block-sorting data compression.
bzip2/libbzip2 version 1.0.4 of 20 December 2006
Copyright (C) 1996-2006 Julian Seward <jseward@bzip.org>
Please read the WARNING, DISCLAIMER and PATENTS sections in the
README file.
This program is released under the terms of the license contained
in the file LICENSE.
------------------------------------------------------------------ */
/* CHANGES
* 0.9.0 -- original version.
* 0.9.0a/b -- no changes in this file.
* 0.9.0c -- changed setting of nGroups in sendMTFValues()
* so as to do a bit better on small files
*/
/* #include "bzlib_private.h" */
/*---------------------------------------------------*/
/*--- Bit stream I/O ---*/
/*---------------------------------------------------*/
/*---------------------------------------------------*/
static
void BZ2_bsInitWrite(EState* s)
{
s->bsLive = 0;
s->bsBuff = 0;
}
/*---------------------------------------------------*/
static NOINLINE
void bsFinishWrite(EState* s)
{
while (s->bsLive > 0) {
s->zbits[s->numZ] = (uint8_t)(s->bsBuff >> 24);
s->numZ++;
s->bsBuff <<= 8;
s->bsLive -= 8;
}
}
/*---------------------------------------------------*/
static
/* Helps only on level 5, on other levels hurts. ? */
#if CONFIG_BZIP2_FEATURE_SPEED >= 5
ALWAYS_INLINE
#endif
void bsW(EState* s, int32_t n, uint32_t v)
{
while (s->bsLive >= 8) {
s->zbits[s->numZ] = (uint8_t)(s->bsBuff >> 24);
s->numZ++;
s->bsBuff <<= 8;
s->bsLive -= 8;
}
s->bsBuff |= (v << (32 - s->bsLive - n));
s->bsLive += n;
}
/*---------------------------------------------------*/
static
void bsPutU32(EState* s, unsigned u)
{
bsW(s, 8, (u >> 24) & 0xff);
bsW(s, 8, (u >> 16) & 0xff);
bsW(s, 8, (u >> 8) & 0xff);
bsW(s, 8, u & 0xff);
}
/*---------------------------------------------------*/
static
void bsPutU16(EState* s, unsigned u)
{
bsW(s, 8, (u >> 8) & 0xff);
bsW(s, 8, u & 0xff);
}
/*---------------------------------------------------*/
/*--- The back end proper ---*/
/*---------------------------------------------------*/
/*---------------------------------------------------*/
static
void makeMaps_e(EState* s)
{
int i;
s->nInUse = 0;
for (i = 0; i < 256; i++) {
if (s->inUse[i]) {
s->unseqToSeq[i] = s->nInUse;
s->nInUse++;
}
}
}
/*---------------------------------------------------*/
static NOINLINE
void generateMTFValues(EState* s)
{
uint8_t yy[256];
int32_t i, j;
int32_t zPend;
int32_t wr;
int32_t EOB;
/*
* After sorting (eg, here),
* s->arr1[0 .. s->nblock-1] holds sorted order,
* and
* ((uint8_t*)s->arr2)[0 .. s->nblock-1]
* holds the original block data.
*
* The first thing to do is generate the MTF values,
* and put them in ((uint16_t*)s->arr1)[0 .. s->nblock-1].
*
* Because there are strictly fewer or equal MTF values
* than block values, ptr values in this area are overwritten
* with MTF values only when they are no longer needed.
*
* The final compressed bitstream is generated into the
* area starting at &((uint8_t*)s->arr2)[s->nblock]
*
* These storage aliases are set up in bzCompressInit(),
* except for the last one, which is arranged in
* compressBlock().
*/
uint32_t* ptr = s->ptr;
uint8_t* block = s->block;
uint16_t* mtfv = s->mtfv;
makeMaps_e(s);
EOB = s->nInUse+1;
for (i = 0; i <= EOB; i++)
s->mtfFreq[i] = 0;
wr = 0;
zPend = 0;
for (i = 0; i < s->nInUse; i++)
yy[i] = (uint8_t) i;
for (i = 0; i < s->nblock; i++) {
uint8_t ll_i;
AssertD(wr <= i, "generateMTFValues(1)");
j = ptr[i] - 1;
if (j < 0)
j += s->nblock;
ll_i = s->unseqToSeq[block[j]];
AssertD(ll_i < s->nInUse, "generateMTFValues(2a)");
if (yy[0] == ll_i) {
zPend++;
} else {
if (zPend > 0) {
zPend--;
while (1) {
if (zPend & 1) {
mtfv[wr] = BZ_RUNB; wr++;
s->mtfFreq[BZ_RUNB]++;
} else {
mtfv[wr] = BZ_RUNA; wr++;
s->mtfFreq[BZ_RUNA]++;
}
if (zPend < 2) break;
zPend = (uint32_t)(zPend - 2) / 2;
/* bbox: unsigned div is easier */
};
zPend = 0;
}
{
register uint8_t rtmp;
register uint8_t* ryy_j;
register uint8_t rll_i;
rtmp = yy[1];
yy[1] = yy[0];
ryy_j = &(yy[1]);
rll_i = ll_i;
while (rll_i != rtmp) {
register uint8_t rtmp2;
ryy_j++;
rtmp2 = rtmp;
rtmp = *ryy_j;
*ryy_j = rtmp2;
};
yy[0] = rtmp;
j = ryy_j - &(yy[0]);
mtfv[wr] = j+1;
wr++;
s->mtfFreq[j+1]++;
}
}
}
if (zPend > 0) {
zPend--;
while (1) {
if (zPend & 1) {
mtfv[wr] = BZ_RUNB;
wr++;
s->mtfFreq[BZ_RUNB]++;
} else {
mtfv[wr] = BZ_RUNA;
wr++;
s->mtfFreq[BZ_RUNA]++;
}
if (zPend < 2)
break;
zPend = (uint32_t)(zPend - 2) / 2;
/* bbox: unsigned div is easier */
};
zPend = 0;
}
mtfv[wr] = EOB;
wr++;
s->mtfFreq[EOB]++;
s->nMTF = wr;
}
/*---------------------------------------------------*/
#define BZ_LESSER_ICOST 0
#define BZ_GREATER_ICOST 15
static NOINLINE
void sendMTFValues(EState* s)
{
int32_t v, t, i, j, gs, ge, totc, bt, bc, iter;
int32_t nSelectors, alphaSize, minLen, maxLen, selCtr;
int32_t nGroups, nBytes;
/*
* uint8_t len[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
* is a global since the decoder also needs it.
*
* int32_t code[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
* int32_t rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
* are also globals only used in this proc.
* Made global to keep stack frame size small.
*/
#define code sendMTFValues__code
#define rfreq sendMTFValues__rfreq
#define len_pack sendMTFValues__len_pack
uint16_t cost[BZ_N_GROUPS];
int32_t fave[BZ_N_GROUPS];
uint16_t* mtfv = s->mtfv;
alphaSize = s->nInUse + 2;
for (t = 0; t < BZ_N_GROUPS; t++)
for (v = 0; v < alphaSize; v++)
s->len[t][v] = BZ_GREATER_ICOST;
/*--- Decide how many coding tables to use ---*/
AssertH(s->nMTF > 0, 3001);
if (s->nMTF < 200) nGroups = 2; else
if (s->nMTF < 600) nGroups = 3; else
if (s->nMTF < 1200) nGroups = 4; else
if (s->nMTF < 2400) nGroups = 5; else
nGroups = 6;
/*--- Generate an initial set of coding tables ---*/
{
int32_t nPart, remF, tFreq, aFreq;
nPart = nGroups;
remF = s->nMTF;
gs = 0;
while (nPart > 0) {
tFreq = remF / nPart;
ge = gs - 1;
aFreq = 0;
while (aFreq < tFreq && ge < alphaSize-1) {
ge++;
aFreq += s->mtfFreq[ge];
}
if (ge > gs
&& nPart != nGroups && nPart != 1
&& ((nGroups - nPart) % 2 == 1) /* bbox: can this be replaced by x & 1? */
) {
aFreq -= s->mtfFreq[ge];
ge--;
}
for (v = 0; v < alphaSize; v++)
if (v >= gs && v <= ge)
s->len[nPart-1][v] = BZ_LESSER_ICOST;
else
s->len[nPart-1][v] = BZ_GREATER_ICOST;
nPart--;
gs = ge + 1;
remF -= aFreq;
}
}
/*
* Iterate up to BZ_N_ITERS times to improve the tables.
*/
for (iter = 0; iter < BZ_N_ITERS; iter++) {
for (t = 0; t < nGroups; t++)
fave[t] = 0;
for (t = 0; t < nGroups; t++)
for (v = 0; v < alphaSize; v++)
s->rfreq[t][v] = 0;
#if CONFIG_BZIP2_FEATURE_SPEED >= 5
/*
* Set up an auxiliary length table which is used to fast-track
* the common case (nGroups == 6).
*/
if (nGroups == 6) {
for (v = 0; v < alphaSize; v++) {
s->len_pack[v][0] = (s->len[1][v] << 16) | s->len[0][v];
s->len_pack[v][1] = (s->len[3][v] << 16) | s->len[2][v];
s->len_pack[v][2] = (s->len[5][v] << 16) | s->len[4][v];
}
}
#endif
nSelectors = 0;
totc = 0;
gs = 0;
while (1) {
/*--- Set group start & end marks. --*/
if (gs >= s->nMTF)
break;
ge = gs + BZ_G_SIZE - 1;
if (ge >= s->nMTF)
ge = s->nMTF-1;
/*
* Calculate the cost of this group as coded
* by each of the coding tables.
*/
for (t = 0; t < nGroups; t++)
cost[t] = 0;
#if CONFIG_BZIP2_FEATURE_SPEED >= 5
if (nGroups == 6 && 50 == ge-gs+1) {
/*--- fast track the common case ---*/
register uint32_t cost01, cost23, cost45;
register uint16_t icv;
cost01 = cost23 = cost45 = 0;
#define BZ_ITER(nn) \
icv = mtfv[gs+(nn)]; \
cost01 += s->len_pack[icv][0]; \
cost23 += s->len_pack[icv][1]; \
cost45 += s->len_pack[icv][2];
BZ_ITER(0); BZ_ITER(1); BZ_ITER(2); BZ_ITER(3); BZ_ITER(4);
BZ_ITER(5); BZ_ITER(6); BZ_ITER(7); BZ_ITER(8); BZ_ITER(9);
BZ_ITER(10); BZ_ITER(11); BZ_ITER(12); BZ_ITER(13); BZ_ITER(14);
BZ_ITER(15); BZ_ITER(16); BZ_ITER(17); BZ_ITER(18); BZ_ITER(19);
BZ_ITER(20); BZ_ITER(21); BZ_ITER(22); BZ_ITER(23); BZ_ITER(24);
BZ_ITER(25); BZ_ITER(26); BZ_ITER(27); BZ_ITER(28); BZ_ITER(29);
BZ_ITER(30); BZ_ITER(31); BZ_ITER(32); BZ_ITER(33); BZ_ITER(34);
BZ_ITER(35); BZ_ITER(36); BZ_ITER(37); BZ_ITER(38); BZ_ITER(39);
BZ_ITER(40); BZ_ITER(41); BZ_ITER(42); BZ_ITER(43); BZ_ITER(44);
BZ_ITER(45); BZ_ITER(46); BZ_ITER(47); BZ_ITER(48); BZ_ITER(49);
#undef BZ_ITER
cost[0] = cost01 & 0xffff; cost[1] = cost01 >> 16;
cost[2] = cost23 & 0xffff; cost[3] = cost23 >> 16;
cost[4] = cost45 & 0xffff; cost[5] = cost45 >> 16;
} else
#endif
{
/*--- slow version which correctly handles all situations ---*/
for (i = gs; i <= ge; i++) {
uint16_t icv = mtfv[i];
for (t = 0; t < nGroups; t++)
cost[t] += s->len[t][icv];
}
}
/*
* Find the coding table which is best for this group,
* and record its identity in the selector table.
*/
/*bc = 999999999;*/
/*bt = -1;*/
bc = cost[0];
bt = 0;
for (t = 1 /*0*/; t < nGroups; t++) {
if (cost[t] < bc) {
bc = cost[t];
bt = t;
}
}
totc += bc;
fave[bt]++;
s->selector[nSelectors] = bt;
nSelectors++;
/*
* Increment the symbol frequencies for the selected table.
*/
/* 1% faster compress. +800 bytes */
#if CONFIG_BZIP2_FEATURE_SPEED >= 4
if (nGroups == 6 && 50 == ge-gs+1) {
/*--- fast track the common case ---*/
#define BZ_ITUR(nn) s->rfreq[bt][mtfv[gs + (nn)]]++
BZ_ITUR(0); BZ_ITUR(1); BZ_ITUR(2); BZ_ITUR(3); BZ_ITUR(4);
BZ_ITUR(5); BZ_ITUR(6); BZ_ITUR(7); BZ_ITUR(8); BZ_ITUR(9);
BZ_ITUR(10); BZ_ITUR(11); BZ_ITUR(12); BZ_ITUR(13); BZ_ITUR(14);
BZ_ITUR(15); BZ_ITUR(16); BZ_ITUR(17); BZ_ITUR(18); BZ_ITUR(19);
BZ_ITUR(20); BZ_ITUR(21); BZ_ITUR(22); BZ_ITUR(23); BZ_ITUR(24);
BZ_ITUR(25); BZ_ITUR(26); BZ_ITUR(27); BZ_ITUR(28); BZ_ITUR(29);
BZ_ITUR(30); BZ_ITUR(31); BZ_ITUR(32); BZ_ITUR(33); BZ_ITUR(34);
BZ_ITUR(35); BZ_ITUR(36); BZ_ITUR(37); BZ_ITUR(38); BZ_ITUR(39);
BZ_ITUR(40); BZ_ITUR(41); BZ_ITUR(42); BZ_ITUR(43); BZ_ITUR(44);
BZ_ITUR(45); BZ_ITUR(46); BZ_ITUR(47); BZ_ITUR(48); BZ_ITUR(49);
#undef BZ_ITUR
gs = ge + 1;
} else
#endif
{
/*--- slow version which correctly handles all situations ---*/
while (gs <= ge) {
s->rfreq[bt][mtfv[gs]]++;
gs++;
}
/* already is: gs = ge + 1; */
}
}
/*
* Recompute the tables based on the accumulated frequencies.
*/
/* maxLen was changed from 20 to 17 in bzip2-1.0.3. See
* comment in huffman.c for details. */
for (t = 0; t < nGroups; t++)
BZ2_hbMakeCodeLengths(s, &(s->len[t][0]), &(s->rfreq[t][0]), alphaSize, 17 /*20*/);
}
AssertH(nGroups < 8, 3002);
AssertH(nSelectors < 32768 && nSelectors <= (2 + (900000 / BZ_G_SIZE)), 3003);
/*--- Compute MTF values for the selectors. ---*/
{
uint8_t pos[BZ_N_GROUPS], ll_i, tmp2, tmp;
for (i = 0; i < nGroups; i++)
pos[i] = i;
for (i = 0; i < nSelectors; i++) {
ll_i = s->selector[i];
j = 0;
tmp = pos[j];
while (ll_i != tmp) {
j++;
tmp2 = tmp;
tmp = pos[j];
pos[j] = tmp2;
};
pos[0] = tmp;
s->selectorMtf[i] = j;
}
};
/*--- Assign actual codes for the tables. --*/
for (t = 0; t < nGroups; t++) {
minLen = 32;
maxLen = 0;
for (i = 0; i < alphaSize; i++) {
if (s->len[t][i] > maxLen) maxLen = s->len[t][i];
if (s->len[t][i] < minLen) minLen = s->len[t][i];
}
AssertH(!(maxLen > 17 /*20*/), 3004);
AssertH(!(minLen < 1), 3005);
BZ2_hbAssignCodes(&(s->code[t][0]), &(s->len[t][0]), minLen, maxLen, alphaSize);
}
/*--- Transmit the mapping table. ---*/
{
/* bbox: optimized a bit more than in bzip2 */
int inUse16 = 0;
for (i = 0; i < 16; i++) {
if (sizeof(long) <= 4) {
inUse16 = inUse16*2 +
((*(uint32_t*)&(s->inUse[i * 16 + 0])
| *(uint32_t*)&(s->inUse[i * 16 + 4])
| *(uint32_t*)&(s->inUse[i * 16 + 8])
| *(uint32_t*)&(s->inUse[i * 16 + 12])) != 0);
} else { /* Our CPU can do better */
inUse16 = inUse16*2 +
((*(uint64_t*)&(s->inUse[i * 16 + 0])
| *(uint64_t*)&(s->inUse[i * 16 + 8])) != 0);
}
}
nBytes = s->numZ;
bsW(s, 16, inUse16);
inUse16 <<= (sizeof(int)*8 - 16); /* move 15th bit into sign bit */
for (i = 0; i < 16; i++) {
if (inUse16 < 0) {
unsigned v16 = 0;
for (j = 0; j < 16; j++)
v16 = v16*2 + s->inUse[i * 16 + j];
bsW(s, 16, v16);
}
inUse16 <<= 1;
}
}
/*--- Now the selectors. ---*/
nBytes = s->numZ;
bsW(s, 3, nGroups);
bsW(s, 15, nSelectors);
for (i = 0; i < nSelectors; i++) {
for (j = 0; j < s->selectorMtf[i]; j++)
bsW(s, 1, 1);
bsW(s, 1, 0);
}
/*--- Now the coding tables. ---*/
nBytes = s->numZ;
for (t = 0; t < nGroups; t++) {
int32_t curr = s->len[t][0];
bsW(s, 5, curr);
for (i = 0; i < alphaSize; i++) {
while (curr < s->len[t][i]) { bsW(s, 2, 2); curr++; /* 10 */ };
while (curr > s->len[t][i]) { bsW(s, 2, 3); curr--; /* 11 */ };
bsW(s, 1, 0);
}
}
/*--- And finally, the block data proper ---*/
nBytes = s->numZ;
selCtr = 0;
gs = 0;
while (1) {
if (gs >= s->nMTF)
break;
ge = gs + BZ_G_SIZE - 1;
if (ge >= s->nMTF)
ge = s->nMTF-1;
AssertH(s->selector[selCtr] < nGroups, 3006);
/* Costs 1300 bytes and is _slower_ (on Intel Core 2) */
#if 0
if (nGroups == 6 && 50 == ge-gs+1) {
/*--- fast track the common case ---*/
uint16_t mtfv_i;
uint8_t* s_len_sel_selCtr = &(s->len[s->selector[selCtr]][0]);
int32_t* s_code_sel_selCtr = &(s->code[s->selector[selCtr]][0]);
#define BZ_ITAH(nn) \
mtfv_i = mtfv[gs+(nn)]; \
bsW(s, s_len_sel_selCtr[mtfv_i], s_code_sel_selCtr[mtfv_i])
BZ_ITAH(0); BZ_ITAH(1); BZ_ITAH(2); BZ_ITAH(3); BZ_ITAH(4);
BZ_ITAH(5); BZ_ITAH(6); BZ_ITAH(7); BZ_ITAH(8); BZ_ITAH(9);
BZ_ITAH(10); BZ_ITAH(11); BZ_ITAH(12); BZ_ITAH(13); BZ_ITAH(14);
BZ_ITAH(15); BZ_ITAH(16); BZ_ITAH(17); BZ_ITAH(18); BZ_ITAH(19);
BZ_ITAH(20); BZ_ITAH(21); BZ_ITAH(22); BZ_ITAH(23); BZ_ITAH(24);
BZ_ITAH(25); BZ_ITAH(26); BZ_ITAH(27); BZ_ITAH(28); BZ_ITAH(29);
BZ_ITAH(30); BZ_ITAH(31); BZ_ITAH(32); BZ_ITAH(33); BZ_ITAH(34);
BZ_ITAH(35); BZ_ITAH(36); BZ_ITAH(37); BZ_ITAH(38); BZ_ITAH(39);
BZ_ITAH(40); BZ_ITAH(41); BZ_ITAH(42); BZ_ITAH(43); BZ_ITAH(44);
BZ_ITAH(45); BZ_ITAH(46); BZ_ITAH(47); BZ_ITAH(48); BZ_ITAH(49);
#undef BZ_ITAH
gs = ge+1;
} else
#endif
{
/*--- slow version which correctly handles all situations ---*/
/* code is bit bigger, but moves multiply out of the loop */
uint8_t* s_len_sel_selCtr = &(s->len [s->selector[selCtr]][0]);
int32_t* s_code_sel_selCtr = &(s->code[s->selector[selCtr]][0]);
while (gs <= ge) {
bsW(s,
s_len_sel_selCtr[mtfv[gs]],
s_code_sel_selCtr[mtfv[gs]]
);
gs++;
}
/* already is: gs = ge+1; */
}
selCtr++;
}
AssertH(selCtr == nSelectors, 3007);
#undef code
#undef rfreq
#undef len_pack
}
/*---------------------------------------------------*/
static
void BZ2_compressBlock(EState* s, int is_last_block)
{
if (s->nblock > 0) {
BZ_FINALISE_CRC(s->blockCRC);
s->combinedCRC = (s->combinedCRC << 1) | (s->combinedCRC >> 31);
s->combinedCRC ^= s->blockCRC;
if (s->blockNo > 1)
s->numZ = 0;
BZ2_blockSort(s);
}
s->zbits = &((uint8_t*)s->arr2)[s->nblock];
/*-- If this is the first block, create the stream header. --*/
if (s->blockNo == 1) {
BZ2_bsInitWrite(s);
/*bsPutU8(s, BZ_HDR_B);*/
/*bsPutU8(s, BZ_HDR_Z);*/
/*bsPutU8(s, BZ_HDR_h);*/
/*bsPutU8(s, BZ_HDR_0 + s->blockSize100k);*/
bsPutU32(s, BZ_HDR_BZh0 + s->blockSize100k);
}
if (s->nblock > 0) {
/*bsPutU8(s, 0x31);*/
/*bsPutU8(s, 0x41);*/
/*bsPutU8(s, 0x59);*/
/*bsPutU8(s, 0x26);*/
bsPutU32(s, 0x31415926);
/*bsPutU8(s, 0x53);*/
/*bsPutU8(s, 0x59);*/
bsPutU16(s, 0x5359);
/*-- Now the block's CRC, so it is in a known place. --*/
bsPutU32(s, s->blockCRC);
/*
* Now a single bit indicating (non-)randomisation.
* As of version 0.9.5, we use a better sorting algorithm
* which makes randomisation unnecessary. So always set
* the randomised bit to 'no'. Of course, the decoder
* still needs to be able to handle randomised blocks
* so as to maintain backwards compatibility with
* older versions of bzip2.
*/
bsW(s, 1, 0);
bsW(s, 24, s->origPtr);
generateMTFValues(s);
sendMTFValues(s);
}
/*-- If this is the last block, add the stream trailer. --*/
if (is_last_block) {
/*bsPutU8(s, 0x17);*/
/*bsPutU8(s, 0x72);*/
/*bsPutU8(s, 0x45);*/
/*bsPutU8(s, 0x38);*/
bsPutU32(s, 0x17724538);
/*bsPutU8(s, 0x50);*/
/*bsPutU8(s, 0x90);*/
bsPutU16(s, 0x5090);
bsPutU32(s, s->combinedCRC);
bsFinishWrite(s);
}
}
/*-------------------------------------------------------------*/
/*--- end compress.c ---*/
/*-------------------------------------------------------------*/

View File

@@ -0,0 +1,229 @@
/*
* bzip2 is written by Julian Seward <jseward@bzip.org>.
* Adapted for busybox by Denys Vlasenko <vda.linux@googlemail.com>.
* See README and LICENSE files in this directory for more information.
*/
/*-------------------------------------------------------------*/
/*--- Huffman coding low-level stuff ---*/
/*--- huffman.c ---*/
/*-------------------------------------------------------------*/
/* ------------------------------------------------------------------
This file is part of bzip2/libbzip2, a program and library for
lossless, block-sorting data compression.
bzip2/libbzip2 version 1.0.4 of 20 December 2006
Copyright (C) 1996-2006 Julian Seward <jseward@bzip.org>
Please read the WARNING, DISCLAIMER and PATENTS sections in the
README file.
This program is released under the terms of the license contained
in the file LICENSE.
------------------------------------------------------------------ */
/* #include "bzlib_private.h" */
/*---------------------------------------------------*/
#define WEIGHTOF(zz0) ((zz0) & 0xffffff00)
#define DEPTHOF(zz1) ((zz1) & 0x000000ff)
#define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3))
#define ADDWEIGHTS(zw1,zw2) \
(WEIGHTOF(zw1)+WEIGHTOF(zw2)) | \
(1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2)))
#define UPHEAP(z) \
{ \
int32_t zz, tmp; \
zz = z; \
tmp = heap[zz]; \
while (weight[tmp] < weight[heap[zz >> 1]]) { \
heap[zz] = heap[zz >> 1]; \
zz >>= 1; \
} \
heap[zz] = tmp; \
}
/* 90 bytes, 0.3% of overall compress speed */
#if CONFIG_BZIP2_FEATURE_SPEED >= 1
/* macro works better than inline (gcc 4.2.1) */
#define DOWNHEAP1(heap, weight, Heap) \
{ \
int32_t zz, yy, tmp; \
zz = 1; \
tmp = heap[zz]; \
while (1) { \
yy = zz << 1; \
if (yy > nHeap) \
break; \
if (yy < nHeap \
&& weight[heap[yy+1]] < weight[heap[yy]]) \
yy++; \
if (weight[tmp] < weight[heap[yy]]) \
break; \
heap[zz] = heap[yy]; \
zz = yy; \
} \
heap[zz] = tmp; \
}
#else
static
void DOWNHEAP1(int32_t *heap, int32_t *weight, int32_t nHeap)
{
int32_t zz, yy, tmp;
zz = 1;
tmp = heap[zz];
while (1) {
yy = zz << 1;
if (yy > nHeap)
break;
if (yy < nHeap
&& weight[heap[yy + 1]] < weight[heap[yy]])
yy++;
if (weight[tmp] < weight[heap[yy]])
break;
heap[zz] = heap[yy];
zz = yy;
}
heap[zz] = tmp;
}
#endif
/*---------------------------------------------------*/
static
void BZ2_hbMakeCodeLengths(EState *s,
uint8_t *len,
int32_t *freq,
int32_t alphaSize,
int32_t maxLen)
{
/*
* Nodes and heap entries run from 1. Entry 0
* for both the heap and nodes is a sentinel.
*/
int32_t nNodes, nHeap, n1, n2, i, j, k;
Bool tooLong;
/* bbox: moved to EState to save stack
int32_t heap [BZ_MAX_ALPHA_SIZE + 2];
int32_t weight[BZ_MAX_ALPHA_SIZE * 2];
int32_t parent[BZ_MAX_ALPHA_SIZE * 2];
*/
#define heap (s->BZ2_hbMakeCodeLengths__heap)
#define weight (s->BZ2_hbMakeCodeLengths__weight)
#define parent (s->BZ2_hbMakeCodeLengths__parent)
for (i = 0; i < alphaSize; i++)
weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
while (1) {
nNodes = alphaSize;
nHeap = 0;
heap[0] = 0;
weight[0] = 0;
parent[0] = -2;
for (i = 1; i <= alphaSize; i++) {
parent[i] = -1;
nHeap++;
heap[nHeap] = i;
UPHEAP(nHeap);
}
AssertH(nHeap < (BZ_MAX_ALPHA_SIZE+2), 2001);
while (nHeap > 1) {
n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP1(heap, weight, nHeap);
n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP1(heap, weight, nHeap);
nNodes++;
parent[n1] = parent[n2] = nNodes;
weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]);
parent[nNodes] = -1;
nHeap++;
heap[nHeap] = nNodes;
UPHEAP(nHeap);
}
AssertH(nNodes < (BZ_MAX_ALPHA_SIZE * 2), 2002);
tooLong = False;
for (i = 1; i <= alphaSize; i++) {
j = 0;
k = i;
while (parent[k] >= 0) {
k = parent[k];
j++;
}
len[i-1] = j;
if (j > maxLen)
tooLong = True;
}
if (!tooLong)
break;
/* 17 Oct 04: keep-going condition for the following loop used
to be 'i < alphaSize', which missed the last element,
theoretically leading to the possibility of the compressor
looping. However, this count-scaling step is only needed if
one of the generated Huffman code words is longer than
maxLen, which up to and including version 1.0.2 was 20 bits,
which is extremely unlikely. In version 1.0.3 maxLen was
changed to 17 bits, which has minimal effect on compression
ratio, but does mean this scaling step is used from time to
time, enough to verify that it works.
This means that bzip2-1.0.3 and later will only produce
Huffman codes with a maximum length of 17 bits. However, in
order to preserve backwards compatibility with bitstreams
produced by versions pre-1.0.3, the decompressor must still
handle lengths of up to 20. */
for (i = 1; i <= alphaSize; i++) {
j = weight[i] >> 8;
/* bbox: yes, it is a signed division.
* don't replace with shift! */
j = 1 + (j / 2);
weight[i] = j << 8;
}
}
#undef heap
#undef weight
#undef parent
}
/*---------------------------------------------------*/
static
void BZ2_hbAssignCodes(int32_t *code,
uint8_t *length,
int32_t minLen,
int32_t maxLen,
int32_t alphaSize)
{
int32_t n, vec, i;
vec = 0;
for (n = minLen; n <= maxLen; n++) {
for (i = 0; i < alphaSize; i++) {
if (length[i] == n) {
code[i] = vec;
vec++;
};
}
vec <<= 1;
}
}
/*-------------------------------------------------------------*/
/*--- end huffman.c ---*/
/*-------------------------------------------------------------*/

View File

@@ -0,0 +1,15 @@
/* vi: set sw=4 ts=4: */
/*
* Licensed under GPLv2 or later, see file LICENSE in this source tree.
*/
#include "libbb.h"
#include "archive.h"
void FAST_FUNC data_align(archive_handle_t *archive_handle, unsigned boundary)
{
unsigned skip_amount = (boundary - (archive_handle->offset % boundary)) % boundary;
archive_handle->seek(archive_handle->src_fd, skip_amount);
archive_handle->offset += skip_amount;
}

View File

@@ -0,0 +1,200 @@
/* vi: set sw=4 ts=4: */
/*
* Licensed under GPLv2 or later, see file LICENSE in this source tree.
*/
#include "libbb.h"
#include "archive.h"
void FAST_FUNC data_extract_all(archive_handle_t *archive_handle)
{
file_header_t *file_header = archive_handle->file_header;
int dst_fd;
int res;
#if ENABLE_FEATURE_TAR_SELINUX
char *sctx = archive_handle->tar__next_file_sctx;
if (!sctx)
sctx = archive_handle->tar__global_sctx;
if (sctx) { /* setfscreatecon is 4 syscalls, avoid if possible */
setfscreatecon(sctx);
free(archive_handle->tar__next_file_sctx);
archive_handle->tar__next_file_sctx = NULL;
}
#endif
if (archive_handle->ah_flags & ARCHIVE_CREATE_LEADING_DIRS) {
char *slash = strrchr(file_header->name, '/');
if (slash) {
*slash = '\0';
bb_make_directory(file_header->name, -1, FILEUTILS_RECUR);
*slash = '/';
}
}
if (archive_handle->ah_flags & ARCHIVE_UNLINK_OLD) {
/* Remove the entry if it exists */
if (!S_ISDIR(file_header->mode)) {
/* Is it hardlink?
* We encode hard links as regular files of size 0 with a symlink */
if (S_ISREG(file_header->mode)
&& file_header->link_target
&& file_header->size == 0
) {
/* Ugly special case:
* tar cf t.tar hardlink1 hardlink2 hardlink1
* results in this tarball structure:
* hardlink1
* hardlink2 -> hardlink1
* hardlink1 -> hardlink1 <== !!!
*/
if (strcmp(file_header->link_target, file_header->name) == 0)
goto ret;
}
/* Proceed with deleting */
if (unlink(file_header->name) == -1
&& errno != ENOENT
) {
bb_perror_msg_and_die("can't remove old file %s",
file_header->name);
}
}
}
else if (archive_handle->ah_flags & ARCHIVE_EXTRACT_NEWER) {
/* Remove the existing entry if its older than the extracted entry */
struct stat existing_sb;
if (lstat(file_header->name, &existing_sb) == -1) {
if (errno != ENOENT) {
bb_perror_msg_and_die("can't stat old file");
}
}
else if (existing_sb.st_mtime >= file_header->mtime) {
if (!(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET)
&& !S_ISDIR(file_header->mode)
) {
bb_error_msg("%s not created: newer or "
"same age file exists", file_header->name);
}
data_skip(archive_handle);
goto ret;
}
else if ((unlink(file_header->name) == -1) && (errno != EISDIR)) {
bb_perror_msg_and_die("can't remove old file %s",
file_header->name);
}
}
/* Handle hard links separately
* We encode hard links as regular files of size 0 with a symlink */
if (S_ISREG(file_header->mode)
&& file_header->link_target
&& file_header->size == 0
) {
/* hard link */
res = link(file_header->link_target, file_header->name);
if ((res == -1) && !(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET)) {
bb_perror_msg("can't create %slink "
"from %s to %s", "hard",
file_header->name,
file_header->link_target);
}
/* Hardlinks have no separate mode/ownership, skip chown/chmod */
goto ret;
}
/* Create the filesystem entry */
switch (file_header->mode & S_IFMT) {
case S_IFREG: {
/* Regular file */
int flags = O_WRONLY | O_CREAT | O_EXCL;
if (archive_handle->ah_flags & ARCHIVE_O_TRUNC)
flags = O_WRONLY | O_CREAT | O_TRUNC;
dst_fd = xopen3(file_header->name,
flags,
file_header->mode
);
bb_copyfd_exact_size(archive_handle->src_fd, dst_fd, file_header->size);
close(dst_fd);
break;
}
case S_IFDIR:
res = mkdir(file_header->name, file_header->mode);
if ((res == -1)
&& (errno != EISDIR) /* btw, Linux doesn't return this */
&& (errno != EEXIST)
&& !(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET)
) {
bb_perror_msg("can't make dir %s", file_header->name);
}
break;
case S_IFLNK:
/* Symlink */
//TODO: what if file_header->link_target == NULL (say, corrupted tarball?)
res = symlink(file_header->link_target, file_header->name);
if ((res == -1)
&& !(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET)
) {
bb_perror_msg("can't create %slink "
"from %s to %s", "sym",
file_header->name,
file_header->link_target);
}
break;
case S_IFSOCK:
case S_IFBLK:
case S_IFCHR:
case S_IFIFO:
res = mknod(file_header->name, file_header->mode, file_header->device);
if ((res == -1)
&& !(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET)
) {
bb_perror_msg("can't create node %s", file_header->name);
}
break;
default:
bb_error_msg_and_die("unrecognized file type");
}
if (!S_ISLNK(file_header->mode)) {
if (!(archive_handle->ah_flags & ARCHIVE_DONT_RESTORE_OWNER)) {
uid_t uid = file_header->uid;
gid_t gid = file_header->gid;
#if ENABLE_FEATURE_TAR_UNAME_GNAME
if (!(archive_handle->ah_flags & ARCHIVE_NUMERIC_OWNER)) {
if (file_header->tar__uname) {
//TODO: cache last name/id pair?
struct passwd *pwd = getpwnam(file_header->tar__uname);
if (pwd) uid = pwd->pw_uid;
}
if (file_header->tar__gname) {
struct group *grp = getgrnam(file_header->tar__gname);
if (grp) gid = grp->gr_gid;
}
}
#endif
/* GNU tar 1.15.1 uses chown, not lchown */
chown(file_header->name, uid, gid);
}
/* uclibc has no lchmod, glibc is even stranger -
* it has lchmod which seems to do nothing!
* so we use chmod... */
if (!(archive_handle->ah_flags & ARCHIVE_DONT_RESTORE_PERM)) {
chmod(file_header->name, file_header->mode);
}
if (archive_handle->ah_flags & ARCHIVE_RESTORE_DATE) {
struct timeval t[2];
t[1].tv_sec = t[0].tv_sec = file_header->mtime;
t[1].tv_usec = t[0].tv_usec = 0;
utimes(file_header->name, t);
}
}
ret: ;
#if ENABLE_FEATURE_TAR_SELINUX
if (sctx) {
/* reset the context after creating an entry */
setfscreatecon(NULL);
}
#endif
}

View File

@@ -0,0 +1,134 @@
/* vi: set sw=4 ts=4: */
/*
* Licensed under GPLv2 or later, see file LICENSE in this source tree.
*/
#include "libbb.h"
#include "archive.h"
enum {
//TAR_FILETYPE,
TAR_MODE,
TAR_FILENAME,
TAR_REALNAME,
#if ENABLE_FEATURE_TAR_UNAME_GNAME
TAR_UNAME,
TAR_GNAME,
#endif
TAR_SIZE,
TAR_UID,
TAR_GID,
TAR_MAX,
};
static const char *const tar_var[] = {
// "FILETYPE",
"MODE",
"FILENAME",
"REALNAME",
#if ENABLE_FEATURE_TAR_UNAME_GNAME
"UNAME",
"GNAME",
#endif
"SIZE",
"UID",
"GID",
};
static void xputenv(char *str)
{
if (putenv(str))
bb_error_msg_and_die(bb_msg_memory_exhausted);
}
static void str2env(char *env[], int idx, const char *str)
{
env[idx] = xasprintf("TAR_%s=%s", tar_var[idx], str);
xputenv(env[idx]);
}
static void dec2env(char *env[], int idx, unsigned long long val)
{
env[idx] = xasprintf("TAR_%s=%llu", tar_var[idx], val);
xputenv(env[idx]);
}
static void oct2env(char *env[], int idx, unsigned long val)
{
env[idx] = xasprintf("TAR_%s=%lo", tar_var[idx], val);
xputenv(env[idx]);
}
void FAST_FUNC data_extract_to_command(archive_handle_t *archive_handle)
{
file_header_t *file_header = archive_handle->file_header;
#if 0 /* do we need this? ENABLE_FEATURE_TAR_SELINUX */
char *sctx = archive_handle->tar__next_file_sctx;
if (!sctx)
sctx = archive_handle->tar__global_sctx;
if (sctx) { /* setfscreatecon is 4 syscalls, avoid if possible */
setfscreatecon(sctx);
free(archive_handle->tar__next_file_sctx);
archive_handle->tar__next_file_sctx = NULL;
}
#endif
if ((file_header->mode & S_IFMT) == S_IFREG) {
pid_t pid;
int p[2], status;
char *tar_env[TAR_MAX];
memset(tar_env, 0, sizeof(tar_env));
xpipe(p);
pid = BB_MMU ? xfork() : xvfork();
if (pid == 0) {
/* Child */
/* str2env(tar_env, TAR_FILETYPE, "f"); - parent should do it once */
oct2env(tar_env, TAR_MODE, file_header->mode);
str2env(tar_env, TAR_FILENAME, file_header->name);
str2env(tar_env, TAR_REALNAME, file_header->name);
#if ENABLE_FEATURE_TAR_UNAME_GNAME
str2env(tar_env, TAR_UNAME, file_header->tar__uname);
str2env(tar_env, TAR_GNAME, file_header->tar__gname);
#endif
dec2env(tar_env, TAR_SIZE, file_header->size);
dec2env(tar_env, TAR_UID, file_header->uid);
dec2env(tar_env, TAR_GID, file_header->gid);
close(p[1]);
xdup2(p[0], STDIN_FILENO);
signal(SIGPIPE, SIG_DFL);
execl(DEFAULT_SHELL, DEFAULT_SHELL_SHORT_NAME, "-c", archive_handle->tar__to_command, NULL);
bb_perror_msg_and_die("can't execute '%s'", DEFAULT_SHELL);
}
close(p[0]);
/* Our caller is expected to do signal(SIGPIPE, SIG_IGN)
* so that we don't die if child don't read all the input: */
bb_copyfd_exact_size(archive_handle->src_fd, p[1], -file_header->size);
close(p[1]);
if (safe_waitpid(pid, &status, 0) == -1)
bb_perror_msg_and_die("waitpid");
if (WIFEXITED(status) && WEXITSTATUS(status))
bb_error_msg_and_die("'%s' returned status %d",
archive_handle->tar__to_command, WEXITSTATUS(status));
if (WIFSIGNALED(status))
bb_error_msg_and_die("'%s' terminated on signal %d",
archive_handle->tar__to_command, WTERMSIG(status));
if (!BB_MMU) {
int i;
for (i = 0; i < TAR_MAX; i++) {
if (tar_env[i])
bb_unsetenv_and_free(tar_env[i]);
}
}
}
#if 0 /* ENABLE_FEATURE_TAR_SELINUX */
if (sctx)
/* reset the context after creating an entry */
setfscreatecon(NULL);
#endif
}

View File

@@ -0,0 +1,14 @@
/* vi: set sw=4 ts=4: */
/*
* Licensed under GPLv2 or later, see file LICENSE in this source tree.
*/
#include "libbb.h"
#include "archive.h"
void FAST_FUNC data_extract_to_stdout(archive_handle_t *archive_handle)
{
bb_copyfd_exact_size(archive_handle->src_fd,
STDOUT_FILENO,
archive_handle->file_header->size);
}

View File

@@ -0,0 +1,12 @@
/* vi: set sw=4 ts=4: */
/*
* Licensed under GPLv2 or later, see file LICENSE in this source tree.
*/
#include "libbb.h"
#include "archive.h"
void FAST_FUNC data_skip(archive_handle_t *archive_handle)
{
archive_handle->seek(archive_handle->src_fd, archive_handle->file_header->size);
}

View File

@@ -0,0 +1,822 @@
/* vi: set sw=4 ts=4: */
/* Small bzip2 deflate implementation, by Rob Landley (rob@landley.net).
Based on bzip2 decompression code by Julian R Seward (jseward@acm.org),
which also acknowledges contributions by Mike Burrows, David Wheeler,
Peter Fenwick, Alistair Moffat, Radford Neal, Ian H. Witten,
Robert Sedgewick, and Jon L. Bentley.
Licensed under GPLv2 or later, see file LICENSE in this source tree.
*/
/*
Size and speed optimizations by Manuel Novoa III (mjn3@codepoet.org).
More efficient reading of Huffman codes, a streamlined read_bunzip()
function, and various other tweaks. In (limited) tests, approximately
20% faster than bzcat on x86 and about 10% faster on arm.
Note that about 2/3 of the time is spent in read_bunzip() reversing
the Burrows-Wheeler transformation. Much of that time is delay
resulting from cache misses.
(2010 update by vda: profiled "bzcat <84mbyte.bz2 >/dev/null"
on x86-64 CPU with L2 > 1M: get_next_block is hotter than read_bunzip:
%time seconds calls function
71.01 12.69 444 get_next_block
28.65 5.12 93065 read_bunzip
00.22 0.04 7736490 get_bits
00.11 0.02 47 dealloc_bunzip
00.00 0.00 93018 full_write
...)
I would ask that anyone benefiting from this work, especially those
using it in commercial products, consider making a donation to my local
non-profit hospice organization (www.hospiceacadiana.com) in the name of
the woman I loved, Toni W. Hagan, who passed away Feb. 12, 2003.
Manuel
*/
#include "libbb.h"
#include "archive.h"
/* Constants for Huffman coding */
#define MAX_GROUPS 6
#define GROUP_SIZE 50 /* 64 would have been more efficient */
#define MAX_HUFCODE_BITS 20 /* Longest Huffman code allowed */
#define MAX_SYMBOLS 258 /* 256 literals + RUNA + RUNB */
#define SYMBOL_RUNA 0
#define SYMBOL_RUNB 1
/* Status return values */
#define RETVAL_OK 0
#define RETVAL_LAST_BLOCK (-1)
#define RETVAL_NOT_BZIP_DATA (-2)
#define RETVAL_UNEXPECTED_INPUT_EOF (-3)
#define RETVAL_SHORT_WRITE (-4)
#define RETVAL_DATA_ERROR (-5)
#define RETVAL_OUT_OF_MEMORY (-6)
#define RETVAL_OBSOLETE_INPUT (-7)
/* Other housekeeping constants */
#define IOBUF_SIZE 4096
/* This is what we know about each Huffman coding group */
struct group_data {
/* We have an extra slot at the end of limit[] for a sentinel value. */
int limit[MAX_HUFCODE_BITS+1], base[MAX_HUFCODE_BITS], permute[MAX_SYMBOLS];
int minLen, maxLen;
};
/* Structure holding all the housekeeping data, including IO buffers and
* memory that persists between calls to bunzip
* Found the most used member:
* cat this_file.c | sed -e 's/"/ /g' -e "s/'/ /g" | xargs -n1 \
* | grep 'bd->' | sed 's/^.*bd->/bd->/' | sort | $PAGER
* and moved it (inbufBitCount) to offset 0.
*/
struct bunzip_data {
/* I/O tracking data (file handles, buffers, positions, etc.) */
unsigned inbufBitCount, inbufBits;
int in_fd, out_fd, inbufCount, inbufPos /*, outbufPos*/;
uint8_t *inbuf /*,*outbuf*/;
/* State for interrupting output loop */
int writeCopies, writePos, writeRunCountdown, writeCount;
int writeCurrent; /* actually a uint8_t */
/* The CRC values stored in the block header and calculated from the data */
uint32_t headerCRC, totalCRC, writeCRC;
/* Intermediate buffer and its size (in bytes) */
uint32_t *dbuf;
unsigned dbufSize;
/* For I/O error handling */
jmp_buf jmpbuf;
/* Big things go last (register-relative addressing can be larger for big offsets) */
uint32_t crc32Table[256];
uint8_t selectors[32768]; /* nSelectors=15 bits */
struct group_data groups[MAX_GROUPS]; /* Huffman coding tables */
};
/* typedef struct bunzip_data bunzip_data; -- done in .h file */
/* Return the next nnn bits of input. All reads from the compressed input
are done through this function. All reads are big endian */
static unsigned get_bits(bunzip_data *bd, int bits_wanted)
{
unsigned bits = 0;
/* Cache bd->inbufBitCount in a CPU register (hopefully): */
int bit_count = bd->inbufBitCount;
/* If we need to get more data from the byte buffer, do so. (Loop getting
one byte at a time to enforce endianness and avoid unaligned access.) */
while (bit_count < bits_wanted) {
/* If we need to read more data from file into byte buffer, do so */
if (bd->inbufPos == bd->inbufCount) {
/* if "no input fd" case: in_fd == -1, read fails, we jump */
bd->inbufCount = read(bd->in_fd, bd->inbuf, IOBUF_SIZE);
if (bd->inbufCount <= 0)
longjmp(bd->jmpbuf, RETVAL_UNEXPECTED_INPUT_EOF);
bd->inbufPos = 0;
}
/* Avoid 32-bit overflow (dump bit buffer to top of output) */
if (bit_count >= 24) {
bits = bd->inbufBits & ((1 << bit_count) - 1);
bits_wanted -= bit_count;
bits <<= bits_wanted;
bit_count = 0;
}
/* Grab next 8 bits of input from buffer. */
bd->inbufBits = (bd->inbufBits << 8) | bd->inbuf[bd->inbufPos++];
bit_count += 8;
}
/* Calculate result */
bit_count -= bits_wanted;
bd->inbufBitCount = bit_count;
bits |= (bd->inbufBits >> bit_count) & ((1 << bits_wanted) - 1);
return bits;
}
/* Unpacks the next block and sets up for the inverse Burrows-Wheeler step. */
static int get_next_block(bunzip_data *bd)
{
struct group_data *hufGroup;
int dbufCount, dbufSize, groupCount, *base, *limit, selector,
i, j, t, runPos, symCount, symTotal, nSelectors, byteCount[256];
int runCnt = runCnt; /* for compiler */
uint8_t uc, symToByte[256], mtfSymbol[256], *selectors;
uint32_t *dbuf;
unsigned origPtr;
dbuf = bd->dbuf;
dbufSize = bd->dbufSize;
selectors = bd->selectors;
/* In bbox, we are ok with aborting through setjmp which is set up in start_bunzip */
#if 0
/* Reset longjmp I/O error handling */
i = setjmp(bd->jmpbuf);
if (i) return i;
#endif
/* Read in header signature and CRC, then validate signature.
(last block signature means CRC is for whole file, return now) */
i = get_bits(bd, 24);
j = get_bits(bd, 24);
bd->headerCRC = get_bits(bd, 32);
if ((i == 0x177245) && (j == 0x385090)) return RETVAL_LAST_BLOCK;
if ((i != 0x314159) || (j != 0x265359)) return RETVAL_NOT_BZIP_DATA;
/* We can add support for blockRandomised if anybody complains. There was
some code for this in busybox 1.0.0-pre3, but nobody ever noticed that
it didn't actually work. */
if (get_bits(bd, 1)) return RETVAL_OBSOLETE_INPUT;
origPtr = get_bits(bd, 24);
if ((int)origPtr > dbufSize) return RETVAL_DATA_ERROR;
/* mapping table: if some byte values are never used (encoding things
like ascii text), the compression code removes the gaps to have fewer
symbols to deal with, and writes a sparse bitfield indicating which
values were present. We make a translation table to convert the symbols
back to the corresponding bytes. */
symTotal = 0;
i = 0;
t = get_bits(bd, 16);
do {
if (t & (1 << 15)) {
unsigned inner_map = get_bits(bd, 16);
do {
if (inner_map & (1 << 15))
symToByte[symTotal++] = i;
inner_map <<= 1;
i++;
} while (i & 15);
i -= 16;
}
t <<= 1;
i += 16;
} while (i < 256);
/* How many different Huffman coding groups does this block use? */
groupCount = get_bits(bd, 3);
if (groupCount < 2 || groupCount > MAX_GROUPS)
return RETVAL_DATA_ERROR;
/* nSelectors: Every GROUP_SIZE many symbols we select a new Huffman coding
group. Read in the group selector list, which is stored as MTF encoded
bit runs. (MTF=Move To Front, as each value is used it's moved to the
start of the list.) */
for (i = 0; i < groupCount; i++)
mtfSymbol[i] = i;
nSelectors = get_bits(bd, 15);
if (!nSelectors)
return RETVAL_DATA_ERROR;
for (i = 0; i < nSelectors; i++) {
uint8_t tmp_byte;
/* Get next value */
int n = 0;
while (get_bits(bd, 1)) {
if (n >= groupCount) return RETVAL_DATA_ERROR;
n++;
}
/* Decode MTF to get the next selector */
tmp_byte = mtfSymbol[n];
while (--n >= 0)
mtfSymbol[n + 1] = mtfSymbol[n];
mtfSymbol[0] = selectors[i] = tmp_byte;
}
/* Read the Huffman coding tables for each group, which code for symTotal
literal symbols, plus two run symbols (RUNA, RUNB) */
symCount = symTotal + 2;
for (j = 0; j < groupCount; j++) {
uint8_t length[MAX_SYMBOLS];
/* 8 bits is ALMOST enough for temp[], see below */
unsigned temp[MAX_HUFCODE_BITS+1];
int minLen, maxLen, pp, len_m1;
/* Read Huffman code lengths for each symbol. They're stored in
a way similar to mtf; record a starting value for the first symbol,
and an offset from the previous value for every symbol after that.
(Subtracting 1 before the loop and then adding it back at the end is
an optimization that makes the test inside the loop simpler: symbol
length 0 becomes negative, so an unsigned inequality catches it.) */
len_m1 = get_bits(bd, 5) - 1;
for (i = 0; i < symCount; i++) {
for (;;) {
int two_bits;
if ((unsigned)len_m1 > (MAX_HUFCODE_BITS-1))
return RETVAL_DATA_ERROR;
/* If first bit is 0, stop. Else second bit indicates whether
to increment or decrement the value. Optimization: grab 2
bits and unget the second if the first was 0. */
two_bits = get_bits(bd, 2);
if (two_bits < 2) {
bd->inbufBitCount++;
break;
}
/* Add one if second bit 1, else subtract 1. Avoids if/else */
len_m1 += (((two_bits+1) & 2) - 1);
}
/* Correct for the initial -1, to get the final symbol length */
length[i] = len_m1 + 1;
}
/* Find largest and smallest lengths in this group */
minLen = maxLen = length[0];
for (i = 1; i < symCount; i++) {
if (length[i] > maxLen) maxLen = length[i];
else if (length[i] < minLen) minLen = length[i];
}
/* Calculate permute[], base[], and limit[] tables from length[].
*
* permute[] is the lookup table for converting Huffman coded symbols
* into decoded symbols. base[] is the amount to subtract from the
* value of a Huffman symbol of a given length when using permute[].
*
* limit[] indicates the largest numerical value a symbol with a given
* number of bits can have. This is how the Huffman codes can vary in
* length: each code with a value>limit[length] needs another bit.
*/
hufGroup = bd->groups + j;
hufGroup->minLen = minLen;
hufGroup->maxLen = maxLen;
/* Note that minLen can't be smaller than 1, so we adjust the base
and limit array pointers so we're not always wasting the first
entry. We do this again when using them (during symbol decoding). */
base = hufGroup->base - 1;
limit = hufGroup->limit - 1;
/* Calculate permute[]. Concurently, initialize temp[] and limit[]. */
pp = 0;
for (i = minLen; i <= maxLen; i++) {
int k;
temp[i] = limit[i] = 0;
for (k = 0; k < symCount; k++)
if (length[k] == i)
hufGroup->permute[pp++] = k;
}
/* Count symbols coded for at each bit length */
/* NB: in pathological cases, temp[8] can end ip being 256.
* That's why uint8_t is too small for temp[]. */
for (i = 0; i < symCount; i++) temp[length[i]]++;
/* Calculate limit[] (the largest symbol-coding value at each bit
* length, which is (previous limit<<1)+symbols at this level), and
* base[] (number of symbols to ignore at each bit length, which is
* limit minus the cumulative count of symbols coded for already). */
pp = t = 0;
for (i = minLen; i < maxLen;) {
unsigned temp_i = temp[i];
pp += temp_i;
/* We read the largest possible symbol size and then unget bits
after determining how many we need, and those extra bits could
be set to anything. (They're noise from future symbols.) At
each level we're really only interested in the first few bits,
so here we set all the trailing to-be-ignored bits to 1 so they
don't affect the value>limit[length] comparison. */
limit[i] = (pp << (maxLen - i)) - 1;
pp <<= 1;
t += temp_i;
base[++i] = pp - t;
}
limit[maxLen] = pp + temp[maxLen] - 1;
limit[maxLen+1] = INT_MAX; /* Sentinel value for reading next sym. */
base[minLen] = 0;
}
/* We've finished reading and digesting the block header. Now read this
block's Huffman coded symbols from the file and undo the Huffman coding
and run length encoding, saving the result into dbuf[dbufCount++] = uc */
/* Initialize symbol occurrence counters and symbol Move To Front table */
/*memset(byteCount, 0, sizeof(byteCount)); - smaller, but slower */
for (i = 0; i < 256; i++) {
byteCount[i] = 0;
mtfSymbol[i] = (uint8_t)i;
}
/* Loop through compressed symbols. */
runPos = dbufCount = selector = 0;
for (;;) {
int nextSym;
/* Fetch next Huffman coding group from list. */
symCount = GROUP_SIZE - 1;
if (selector >= nSelectors) return RETVAL_DATA_ERROR;
hufGroup = bd->groups + selectors[selector++];
base = hufGroup->base - 1;
limit = hufGroup->limit - 1;
continue_this_group:
/* Read next Huffman-coded symbol. */
/* Note: It is far cheaper to read maxLen bits and back up than it is
to read minLen bits and then add additional bit at a time, testing
as we go. Because there is a trailing last block (with file CRC),
there is no danger of the overread causing an unexpected EOF for a
valid compressed file.
*/
if (1) {
/* As a further optimization, we do the read inline
(falling back to a call to get_bits if the buffer runs dry).
*/
int new_cnt;
while ((new_cnt = bd->inbufBitCount - hufGroup->maxLen) < 0) {
/* bd->inbufBitCount < hufGroup->maxLen */
if (bd->inbufPos == bd->inbufCount) {
nextSym = get_bits(bd, hufGroup->maxLen);
goto got_huff_bits;
}
bd->inbufBits = (bd->inbufBits << 8) | bd->inbuf[bd->inbufPos++];
bd->inbufBitCount += 8;
};
bd->inbufBitCount = new_cnt; /* "bd->inbufBitCount -= hufGroup->maxLen;" */
nextSym = (bd->inbufBits >> new_cnt) & ((1 << hufGroup->maxLen) - 1);
got_huff_bits: ;
} else { /* unoptimized equivalent */
nextSym = get_bits(bd, hufGroup->maxLen);
}
/* Figure how many bits are in next symbol and unget extras */
i = hufGroup->minLen;
while (nextSym > limit[i]) ++i;
j = hufGroup->maxLen - i;
if (j < 0)
return RETVAL_DATA_ERROR;
bd->inbufBitCount += j;
/* Huffman decode value to get nextSym (with bounds checking) */
nextSym = (nextSym >> j) - base[i];
if ((unsigned)nextSym >= MAX_SYMBOLS)
return RETVAL_DATA_ERROR;
nextSym = hufGroup->permute[nextSym];
/* We have now decoded the symbol, which indicates either a new literal
byte, or a repeated run of the most recent literal byte. First,
check if nextSym indicates a repeated run, and if so loop collecting
how many times to repeat the last literal. */
if ((unsigned)nextSym <= SYMBOL_RUNB) { /* RUNA or RUNB */
/* If this is the start of a new run, zero out counter */
if (runPos == 0) {
runPos = 1;
runCnt = 0;
}
/* Neat trick that saves 1 symbol: instead of or-ing 0 or 1 at
each bit position, add 1 or 2 instead. For example,
1011 is 1<<0 + 1<<1 + 2<<2. 1010 is 2<<0 + 2<<1 + 1<<2.
You can make any bit pattern that way using 1 less symbol than
the basic or 0/1 method (except all bits 0, which would use no
symbols, but a run of length 0 doesn't mean anything in this
context). Thus space is saved. */
runCnt += (runPos << nextSym); /* +runPos if RUNA; +2*runPos if RUNB */
if (runPos < dbufSize) runPos <<= 1;
goto end_of_huffman_loop;
}
/* When we hit the first non-run symbol after a run, we now know
how many times to repeat the last literal, so append that many
copies to our buffer of decoded symbols (dbuf) now. (The last
literal used is the one at the head of the mtfSymbol array.) */
if (runPos != 0) {
uint8_t tmp_byte;
if (dbufCount + runCnt >= dbufSize) return RETVAL_DATA_ERROR;
tmp_byte = symToByte[mtfSymbol[0]];
byteCount[tmp_byte] += runCnt;
while (--runCnt >= 0) dbuf[dbufCount++] = (uint32_t)tmp_byte;
runPos = 0;
}
/* Is this the terminating symbol? */
if (nextSym > symTotal) break;
/* At this point, nextSym indicates a new literal character. Subtract
one to get the position in the MTF array at which this literal is
currently to be found. (Note that the result can't be -1 or 0,
because 0 and 1 are RUNA and RUNB. But another instance of the
first symbol in the mtf array, position 0, would have been handled
as part of a run above. Therefore 1 unused mtf position minus
2 non-literal nextSym values equals -1.) */
if (dbufCount >= dbufSize) return RETVAL_DATA_ERROR;
i = nextSym - 1;
uc = mtfSymbol[i];
/* Adjust the MTF array. Since we typically expect to move only a
* small number of symbols, and are bound by 256 in any case, using
* memmove here would typically be bigger and slower due to function
* call overhead and other assorted setup costs. */
do {
mtfSymbol[i] = mtfSymbol[i-1];
} while (--i);
mtfSymbol[0] = uc;
uc = symToByte[uc];
/* We have our literal byte. Save it into dbuf. */
byteCount[uc]++;
dbuf[dbufCount++] = (uint32_t)uc;
/* Skip group initialization if we're not done with this group. Done
* this way to avoid compiler warning. */
end_of_huffman_loop:
if (--symCount >= 0) goto continue_this_group;
}
/* At this point, we've read all the Huffman-coded symbols (and repeated
runs) for this block from the input stream, and decoded them into the
intermediate buffer. There are dbufCount many decoded bytes in dbuf[].
Now undo the Burrows-Wheeler transform on dbuf.
See http://dogma.net/markn/articles/bwt/bwt.htm
*/
/* Turn byteCount into cumulative occurrence counts of 0 to n-1. */
j = 0;
for (i = 0; i < 256; i++) {
int tmp_count = j + byteCount[i];
byteCount[i] = j;
j = tmp_count;
}
/* Figure out what order dbuf would be in if we sorted it. */
for (i = 0; i < dbufCount; i++) {
uint8_t tmp_byte = (uint8_t)dbuf[i];
int tmp_count = byteCount[tmp_byte];
dbuf[tmp_count] |= (i << 8);
byteCount[tmp_byte] = tmp_count + 1;
}
/* Decode first byte by hand to initialize "previous" byte. Note that it
doesn't get output, and if the first three characters are identical
it doesn't qualify as a run (hence writeRunCountdown=5). */
if (dbufCount) {
uint32_t tmp;
if ((int)origPtr >= dbufCount) return RETVAL_DATA_ERROR;
tmp = dbuf[origPtr];
bd->writeCurrent = (uint8_t)tmp;
bd->writePos = (tmp >> 8);
bd->writeRunCountdown = 5;
}
bd->writeCount = dbufCount;
return RETVAL_OK;
}
/* Undo Burrows-Wheeler transform on intermediate buffer to produce output.
If start_bunzip was initialized with out_fd=-1, then up to len bytes of
data are written to outbuf. Return value is number of bytes written or
error (all errors are negative numbers). If out_fd!=-1, outbuf and len
are ignored, data is written to out_fd and return is RETVAL_OK or error.
NB: read_bunzip returns < 0 on error, or the number of *unfilled* bytes
in outbuf. IOW: on EOF returns len ("all bytes are not filled"), not 0.
(Why? This allows to get rid of one local variable)
*/
int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len)
{
const uint32_t *dbuf;
int pos, current, previous;
uint32_t CRC;
/* If we already have error/end indicator, return it */
if (bd->writeCount < 0)
return bd->writeCount;
dbuf = bd->dbuf;
/* Register-cached state (hopefully): */
pos = bd->writePos;
current = bd->writeCurrent;
CRC = bd->writeCRC; /* small loss on x86-32 (not enough regs), win on x86-64 */
/* We will always have pending decoded data to write into the output
buffer unless this is the very first call (in which case we haven't
Huffman-decoded a block into the intermediate buffer yet). */
if (bd->writeCopies) {
dec_writeCopies:
/* Inside the loop, writeCopies means extra copies (beyond 1) */
--bd->writeCopies;
/* Loop outputting bytes */
for (;;) {
/* If the output buffer is full, save cached state and return */
if (--len < 0) {
/* Unlikely branch.
* Use of "goto" instead of keeping code here
* helps compiler to realize this. */
goto outbuf_full;
}
/* Write next byte into output buffer, updating CRC */
*outbuf++ = current;
CRC = (CRC << 8) ^ bd->crc32Table[(CRC >> 24) ^ current];
/* Loop now if we're outputting multiple copies of this byte */
if (bd->writeCopies) {
/* Unlikely branch */
/*--bd->writeCopies;*/
/*continue;*/
/* Same, but (ab)using other existing --writeCopies operation
* (and this if() compiles into just test+branch pair): */
goto dec_writeCopies;
}
decode_next_byte:
if (--bd->writeCount < 0)
break; /* input block is fully consumed, need next one */
/* Follow sequence vector to undo Burrows-Wheeler transform */
previous = current;
pos = dbuf[pos];
current = (uint8_t)pos;
pos >>= 8;
/* After 3 consecutive copies of the same byte, the 4th
* is a repeat count. We count down from 4 instead
* of counting up because testing for non-zero is faster */
if (--bd->writeRunCountdown != 0) {
if (current != previous)
bd->writeRunCountdown = 4;
} else {
/* Unlikely branch */
/* We have a repeated run, this byte indicates the count */
bd->writeCopies = current;
current = previous;
bd->writeRunCountdown = 5;
/* Sometimes there are just 3 bytes (run length 0) */
if (!bd->writeCopies) goto decode_next_byte;
/* Subtract the 1 copy we'd output anyway to get extras */
--bd->writeCopies;
}
} /* for(;;) */
/* Decompression of this input block completed successfully */
bd->writeCRC = CRC = ~CRC;
bd->totalCRC = ((bd->totalCRC << 1) | (bd->totalCRC >> 31)) ^ CRC;
/* If this block had a CRC error, force file level CRC error */
if (CRC != bd->headerCRC) {
bd->totalCRC = bd->headerCRC + 1;
return RETVAL_LAST_BLOCK;
}
}
/* Refill the intermediate buffer by Huffman-decoding next block of input */
{
int r = get_next_block(bd);
if (r) { /* error/end */
bd->writeCount = r;
return (r != RETVAL_LAST_BLOCK) ? r : len;
}
}
CRC = ~0;
pos = bd->writePos;
current = bd->writeCurrent;
goto decode_next_byte;
outbuf_full:
/* Output buffer is full, save cached state and return */
bd->writePos = pos;
bd->writeCurrent = current;
bd->writeCRC = CRC;
bd->writeCopies++;
return 0;
}
/* Allocate the structure, read file header. If in_fd==-1, inbuf must contain
a complete bunzip file (len bytes long). If in_fd!=-1, inbuf and len are
ignored, and data is read from file handle into temporary buffer. */
/* Because bunzip2 is used for help text unpacking, and because bb_show_usage()
should work for NOFORK applets too, we must be extremely careful to not leak
any allocations! */
int FAST_FUNC start_bunzip(bunzip_data **bdp, int in_fd,
const void *inbuf, int len)
{
bunzip_data *bd;
unsigned i;
enum {
BZh0 = ('B' << 24) + ('Z' << 16) + ('h' << 8) + '0',
h0 = ('h' << 8) + '0',
};
/* Figure out how much data to allocate */
i = sizeof(bunzip_data);
if (in_fd != -1) i += IOBUF_SIZE;
/* Allocate bunzip_data. Most fields initialize to zero. */
bd = *bdp = xzalloc(i);
/* Setup input buffer */
bd->in_fd = in_fd;
if (-1 == in_fd) {
/* in this case, bd->inbuf is read-only */
bd->inbuf = (void*)inbuf; /* cast away const-ness */
} else {
bd->inbuf = (uint8_t*)(bd + 1);
memcpy(bd->inbuf, inbuf, len);
}
bd->inbufCount = len;
/* Init the CRC32 table (big endian) */
crc32_filltable(bd->crc32Table, 1);
/* Setup for I/O error handling via longjmp */
i = setjmp(bd->jmpbuf);
if (i) return i;
/* Ensure that file starts with "BZh['1'-'9']." */
/* Update: now caller verifies 1st two bytes, makes .gz/.bz2
* integration easier */
/* was: */
/* i = get_bits(bd, 32); */
/* if ((unsigned)(i - BZh0 - 1) >= 9) return RETVAL_NOT_BZIP_DATA; */
i = get_bits(bd, 16);
if ((unsigned)(i - h0 - 1) >= 9) return RETVAL_NOT_BZIP_DATA;
/* Fourth byte (ascii '1'-'9') indicates block size in units of 100k of
uncompressed data. Allocate intermediate buffer for block. */
/* bd->dbufSize = 100000 * (i - BZh0); */
bd->dbufSize = 100000 * (i - h0);
/* Cannot use xmalloc - may leak bd in NOFORK case! */
bd->dbuf = malloc_or_warn(bd->dbufSize * sizeof(bd->dbuf[0]));
if (!bd->dbuf) {
free(bd);
xfunc_die();
}
return RETVAL_OK;
}
void FAST_FUNC dealloc_bunzip(bunzip_data *bd)
{
free(bd->dbuf);
free(bd);
}
/* Decompress src_fd to dst_fd. Stops at end of bzip data, not end of file. */
IF_DESKTOP(long long) int FAST_FUNC
unpack_bz2_stream(int src_fd, int dst_fd)
{
IF_DESKTOP(long long total_written = 0;)
bunzip_data *bd;
char *outbuf;
int i;
unsigned len;
outbuf = xmalloc(IOBUF_SIZE);
len = 0;
while (1) { /* "Process one BZ... stream" loop */
i = start_bunzip(&bd, src_fd, outbuf + 2, len);
if (i == 0) {
while (1) { /* "Produce some output bytes" loop */
i = read_bunzip(bd, outbuf, IOBUF_SIZE);
if (i < 0) /* error? */
break;
i = IOBUF_SIZE - i; /* number of bytes produced */
if (i == 0) /* EOF? */
break;
if (i != full_write(dst_fd, outbuf, i)) {
bb_error_msg("short write");
i = RETVAL_SHORT_WRITE;
goto release_mem;
}
IF_DESKTOP(total_written += i;)
}
}
if (i != RETVAL_LAST_BLOCK) {
bb_error_msg("bunzip error %d", i);
break;
}
if (bd->headerCRC != bd->totalCRC) {
bb_error_msg("CRC error");
break;
}
/* Successfully unpacked one BZ stream */
i = RETVAL_OK;
/* Do we have "BZ..." after last processed byte?
* pbzip2 (parallelized bzip2) produces such files.
*/
len = bd->inbufCount - bd->inbufPos;
memcpy(outbuf, &bd->inbuf[bd->inbufPos], len);
if (len < 2) {
if (safe_read(src_fd, outbuf + len, 2 - len) != 2 - len)
break;
len = 2;
}
if (*(uint16_t*)outbuf != BZIP2_MAGIC) /* "BZ"? */
break;
dealloc_bunzip(bd);
len -= 2;
}
release_mem:
dealloc_bunzip(bd);
free(outbuf);
return i ? i : IF_DESKTOP(total_written) + 0;
}
IF_DESKTOP(long long) int FAST_FUNC
unpack_bz2_stream_prime(int src_fd, int dst_fd)
{
uint16_t magic2;
xread(src_fd, &magic2, 2);
if (magic2 != BZIP2_MAGIC) {
bb_error_msg_and_die("invalid magic");
}
return unpack_bz2_stream(src_fd, dst_fd);
}
#ifdef TESTING
static char *const bunzip_errors[] = {
NULL, "Bad file checksum", "Not bzip data",
"Unexpected input EOF", "Unexpected output EOF", "Data error",
"Out of memory", "Obsolete (pre 0.9.5) bzip format not supported"
};
/* Dumb little test thing, decompress stdin to stdout */
int main(int argc, char **argv)
{
int i;
char c;
int i = unpack_bz2_stream_prime(0, 1);
if (i < 0)
fprintf(stderr, "%s\n", bunzip_errors[-i]);
else if (read(STDIN_FILENO, &c, 1))
fprintf(stderr, "Trailing garbage ignored\n");
return -i;
}
#endif

View File

@@ -0,0 +1,307 @@
/* vi: set sw=4 ts=4: */
/* uncompress for busybox -- (c) 2002 Robert Griebl
*
* based on the original compress42.c source
* (see disclaimer below)
*/
/* (N)compress42.c - File compression ala IEEE Computer, Mar 1992.
*
* Authors:
* Spencer W. Thomas (decvax!harpo!utah-cs!utah-gr!thomas)
* Jim McKie (decvax!mcvax!jim)
* Steve Davies (decvax!vax135!petsd!peora!srd)
* Ken Turkowski (decvax!decwrl!turtlevax!ken)
* James A. Woods (decvax!ihnp4!ames!jaw)
* Joe Orost (decvax!vax135!petsd!joe)
* Dave Mack (csu@alembic.acs.com)
* Peter Jannesen, Network Communication Systems
* (peter@ncs.nl)
*
* marc@suse.de : a small security fix for a buffer overflow
*
* [... History snipped ...]
*
*/
#include "libbb.h"
#include "archive.h"
/* Default input buffer size */
#define IBUFSIZ 2048
/* Default output buffer size */
#define OBUFSIZ 2048
/* Defines for third byte of header */
#define BIT_MASK 0x1f /* Mask for 'number of compresssion bits' */
/* Masks 0x20 and 0x40 are free. */
/* I think 0x20 should mean that there is */
/* a fourth header byte (for expansion). */
#define BLOCK_MODE 0x80 /* Block compression if table is full and */
/* compression rate is dropping flush tables */
/* the next two codes should not be changed lightly, as they must not */
/* lie within the contiguous general code space. */
#define FIRST 257 /* first free entry */
#define CLEAR 256 /* table clear output code */
#define INIT_BITS 9 /* initial number of bits/code */
/* machine variants which require cc -Dmachine: pdp11, z8000, DOS */
#define HBITS 17 /* 50% occupancy */
#define HSIZE (1<<HBITS)
#define HMASK (HSIZE-1) /* unused */
#define HPRIME 9941 /* unused */
#define BITS 16
#define BITS_STR "16"
#undef MAXSEG_64K /* unused */
#define MAXCODE(n) (1L << (n))
#define htabof(i) htab[i]
#define codetabof(i) codetab[i]
#define tab_prefixof(i) codetabof(i)
#define tab_suffixof(i) ((unsigned char *)(htab))[i]
#define de_stack ((unsigned char *)&(htab[HSIZE-1]))
#define clear_tab_prefixof() memset(codetab, 0, 256)
/*
* Decompress stdin to stdout. This routine adapts to the codes in the
* file building the "string" table on-the-fly; requiring no table to
* be stored in the compressed file.
*/
IF_DESKTOP(long long) int FAST_FUNC
unpack_Z_stream(int fd_in, int fd_out)
{
IF_DESKTOP(long long total_written = 0;)
IF_DESKTOP(long long) int retval = -1;
unsigned char *stackp;
long code;
int finchar;
long oldcode;
long incode;
int inbits;
int posbits;
int outpos;
int insize;
int bitmask;
long free_ent;
long maxcode;
long maxmaxcode;
int n_bits;
int rsize = 0;
unsigned char *inbuf; /* were eating insane amounts of stack - */
unsigned char *outbuf; /* bad for some embedded targets */
unsigned char *htab;
unsigned short *codetab;
/* Hmm, these were statics - why?! */
/* user settable max # bits/code */
int maxbits; /* = BITS; */
/* block compress mode -C compatible with 2.0 */
int block_mode; /* = BLOCK_MODE; */
inbuf = xzalloc(IBUFSIZ + 64);
outbuf = xzalloc(OBUFSIZ + 2048);
htab = xzalloc(HSIZE); /* wsn't zeroed out before, maybe can xmalloc? */
codetab = xzalloc(HSIZE * sizeof(codetab[0]));
insize = 0;
/* xread isn't good here, we have to return - caller may want
* to do some cleanup (e.g. delete incomplete unpacked file etc) */
if (full_read(fd_in, inbuf, 1) != 1) {
bb_error_msg("short read");
goto err;
}
maxbits = inbuf[0] & BIT_MASK;
block_mode = inbuf[0] & BLOCK_MODE;
maxmaxcode = MAXCODE(maxbits);
if (maxbits > BITS) {
bb_error_msg("compressed with %d bits, can only handle "
BITS_STR" bits", maxbits);
goto err;
}
n_bits = INIT_BITS;
maxcode = MAXCODE(INIT_BITS) - 1;
bitmask = (1 << INIT_BITS) - 1;
oldcode = -1;
finchar = 0;
outpos = 0;
posbits = 0 << 3;
free_ent = ((block_mode) ? FIRST : 256);
/* As above, initialize the first 256 entries in the table. */
/*clear_tab_prefixof(); - done by xzalloc */
for (code = 255; code >= 0; --code) {
tab_suffixof(code) = (unsigned char) code;
}
do {
resetbuf:
{
int i;
int e;
int o;
o = posbits >> 3;
e = insize - o;
for (i = 0; i < e; ++i)
inbuf[i] = inbuf[i + o];
insize = e;
posbits = 0;
}
if (insize < (int) (IBUFSIZ + 64) - IBUFSIZ) {
rsize = safe_read(fd_in, inbuf + insize, IBUFSIZ);
//error check??
insize += rsize;
}
inbits = ((rsize > 0) ? (insize - insize % n_bits) << 3 :
(insize << 3) - (n_bits - 1));
while (inbits > posbits) {
if (free_ent > maxcode) {
posbits =
((posbits - 1) +
((n_bits << 3) -
(posbits - 1 + (n_bits << 3)) % (n_bits << 3)));
++n_bits;
if (n_bits == maxbits) {
maxcode = maxmaxcode;
} else {
maxcode = MAXCODE(n_bits) - 1;
}
bitmask = (1 << n_bits) - 1;
goto resetbuf;
}
{
unsigned char *p = &inbuf[posbits >> 3];
code = ((((long) (p[0])) | ((long) (p[1]) << 8) |
((long) (p[2]) << 16)) >> (posbits & 0x7)) & bitmask;
}
posbits += n_bits;
if (oldcode == -1) {
oldcode = code;
finchar = (int) oldcode;
outbuf[outpos++] = (unsigned char) finchar;
continue;
}
if (code == CLEAR && block_mode) {
clear_tab_prefixof();
free_ent = FIRST - 1;
posbits =
((posbits - 1) +
((n_bits << 3) -
(posbits - 1 + (n_bits << 3)) % (n_bits << 3)));
n_bits = INIT_BITS;
maxcode = MAXCODE(INIT_BITS) - 1;
bitmask = (1 << INIT_BITS) - 1;
goto resetbuf;
}
incode = code;
stackp = de_stack;
/* Special case for KwKwK string. */
if (code >= free_ent) {
if (code > free_ent) {
unsigned char *p;
posbits -= n_bits;
p = &inbuf[posbits >> 3];
bb_error_msg
("insize:%d posbits:%d inbuf:%02X %02X %02X %02X %02X (%d)",
insize, posbits, p[-1], p[0], p[1], p[2], p[3],
(posbits & 07));
bb_error_msg("corrupted data");
goto err;
}
*--stackp = (unsigned char) finchar;
code = oldcode;
}
/* Generate output characters in reverse order */
while ((long) code >= (long) 256) {
*--stackp = tab_suffixof(code);
code = tab_prefixof(code);
}
finchar = tab_suffixof(code);
*--stackp = (unsigned char) finchar;
/* And put them out in forward order */
{
int i;
i = de_stack - stackp;
if (outpos + i >= OBUFSIZ) {
do {
if (i > OBUFSIZ - outpos) {
i = OBUFSIZ - outpos;
}
if (i > 0) {
memcpy(outbuf + outpos, stackp, i);
outpos += i;
}
if (outpos >= OBUFSIZ) {
full_write(fd_out, outbuf, outpos);
//error check??
IF_DESKTOP(total_written += outpos;)
outpos = 0;
}
stackp += i;
i = de_stack - stackp;
} while (i > 0);
} else {
memcpy(outbuf + outpos, stackp, i);
outpos += i;
}
}
/* Generate the new entry. */
code = free_ent;
if (code < maxmaxcode) {
tab_prefixof(code) = (unsigned short) oldcode;
tab_suffixof(code) = (unsigned char) finchar;
free_ent = code + 1;
}
/* Remember previous code. */
oldcode = incode;
}
} while (rsize > 0);
if (outpos > 0) {
full_write(fd_out, outbuf, outpos);
//error check??
IF_DESKTOP(total_written += outpos;)
}
retval = IF_DESKTOP(total_written) + 0;
err:
free(inbuf);
free(outbuf);
free(htab);
free(codetab);
return retval;
}

View File

@@ -0,0 +1,465 @@
/* vi: set sw=4 ts=4: */
/*
* Small lzma deflate implementation.
* Copyright (C) 2006 Aurelien Jacobs <aurel@gnuage.org>
*
* Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/)
* Copyright (C) 1999-2005 Igor Pavlov
*
* Licensed under GPLv2 or later, see file LICENSE in this source tree.
*/
#include "libbb.h"
#include "archive.h"
#if ENABLE_FEATURE_LZMA_FAST
# define speed_inline ALWAYS_INLINE
# define size_inline
#else
# define speed_inline
# define size_inline ALWAYS_INLINE
#endif
typedef struct {
int fd;
uint8_t *ptr;
/* Was keeping rc on stack in unlzma and separately allocating buffer,
* but with "buffer 'attached to' allocated rc" code is smaller: */
/* uint8_t *buffer; */
#define RC_BUFFER ((uint8_t*)(rc+1))
uint8_t *buffer_end;
/* Had provisions for variable buffer, but we don't need it here */
/* int buffer_size; */
#define RC_BUFFER_SIZE 0x10000
uint32_t code;
uint32_t range;
uint32_t bound;
} rc_t;
#define RC_TOP_BITS 24
#define RC_MOVE_BITS 5
#define RC_MODEL_TOTAL_BITS 11
/* Called twice: once at startup (LZMA_FAST only) and once in rc_normalize() */
static size_inline void rc_read(rc_t *rc)
{
int buffer_size = safe_read(rc->fd, RC_BUFFER, RC_BUFFER_SIZE);
//TODO: return -1 instead
//This will make unlzma delete broken unpacked file on unpack errors
if (buffer_size <= 0)
bb_error_msg_and_die("unexpected EOF");
rc->ptr = RC_BUFFER;
rc->buffer_end = RC_BUFFER + buffer_size;
}
/* Called twice, but one callsite is in speed_inline'd rc_is_bit_1() */
static void rc_do_normalize(rc_t *rc)
{
if (rc->ptr >= rc->buffer_end)
rc_read(rc);
rc->range <<= 8;
rc->code = (rc->code << 8) | *rc->ptr++;
}
/* Called once */
static ALWAYS_INLINE rc_t* rc_init(int fd) /*, int buffer_size) */
{
int i;
rc_t *rc;
rc = xzalloc(sizeof(*rc) + RC_BUFFER_SIZE);
rc->fd = fd;
/* rc->ptr = rc->buffer_end; */
for (i = 0; i < 5; i++) {
#if ENABLE_FEATURE_LZMA_FAST
if (rc->ptr >= rc->buffer_end)
rc_read(rc);
rc->code = (rc->code << 8) | *rc->ptr++;
#else
rc_do_normalize(rc);
#endif
}
rc->range = 0xFFFFFFFF;
return rc;
}
/* Called once */
static ALWAYS_INLINE void rc_free(rc_t *rc)
{
free(rc);
}
static ALWAYS_INLINE void rc_normalize(rc_t *rc)
{
if (rc->range < (1 << RC_TOP_BITS)) {
rc_do_normalize(rc);
}
}
/* rc_is_bit_1 is called 9 times */
static speed_inline int rc_is_bit_1(rc_t *rc, uint16_t *p)
{
rc_normalize(rc);
rc->bound = *p * (rc->range >> RC_MODEL_TOTAL_BITS);
if (rc->code < rc->bound) {
rc->range = rc->bound;
*p += ((1 << RC_MODEL_TOTAL_BITS) - *p) >> RC_MOVE_BITS;
return 0;
}
rc->range -= rc->bound;
rc->code -= rc->bound;
*p -= *p >> RC_MOVE_BITS;
return 1;
}
/* Called 4 times in unlzma loop */
static speed_inline int rc_get_bit(rc_t *rc, uint16_t *p, int *symbol)
{
int ret = rc_is_bit_1(rc, p);
*symbol = *symbol * 2 + ret;
return ret;
}
/* Called once */
static ALWAYS_INLINE int rc_direct_bit(rc_t *rc)
{
rc_normalize(rc);
rc->range >>= 1;
if (rc->code >= rc->range) {
rc->code -= rc->range;
return 1;
}
return 0;
}
/* Called twice */
static speed_inline void
rc_bit_tree_decode(rc_t *rc, uint16_t *p, int num_levels, int *symbol)
{
int i = num_levels;
*symbol = 1;
while (i--)
rc_get_bit(rc, p + *symbol, symbol);
*symbol -= 1 << num_levels;
}
typedef struct {
uint8_t pos;
uint32_t dict_size;
uint64_t dst_size;
} PACKED lzma_header_t;
/* #defines will force compiler to compute/optimize each one with each usage.
* Have heart and use enum instead. */
enum {
LZMA_BASE_SIZE = 1846,
LZMA_LIT_SIZE = 768,
LZMA_NUM_POS_BITS_MAX = 4,
LZMA_LEN_NUM_LOW_BITS = 3,
LZMA_LEN_NUM_MID_BITS = 3,
LZMA_LEN_NUM_HIGH_BITS = 8,
LZMA_LEN_CHOICE = 0,
LZMA_LEN_CHOICE_2 = (LZMA_LEN_CHOICE + 1),
LZMA_LEN_LOW = (LZMA_LEN_CHOICE_2 + 1),
LZMA_LEN_MID = (LZMA_LEN_LOW \
+ (1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_LOW_BITS))),
LZMA_LEN_HIGH = (LZMA_LEN_MID \
+ (1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_MID_BITS))),
LZMA_NUM_LEN_PROBS = (LZMA_LEN_HIGH + (1 << LZMA_LEN_NUM_HIGH_BITS)),
LZMA_NUM_STATES = 12,
LZMA_NUM_LIT_STATES = 7,
LZMA_START_POS_MODEL_INDEX = 4,
LZMA_END_POS_MODEL_INDEX = 14,
LZMA_NUM_FULL_DISTANCES = (1 << (LZMA_END_POS_MODEL_INDEX >> 1)),
LZMA_NUM_POS_SLOT_BITS = 6,
LZMA_NUM_LEN_TO_POS_STATES = 4,
LZMA_NUM_ALIGN_BITS = 4,
LZMA_MATCH_MIN_LEN = 2,
LZMA_IS_MATCH = 0,
LZMA_IS_REP = (LZMA_IS_MATCH + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX)),
LZMA_IS_REP_G0 = (LZMA_IS_REP + LZMA_NUM_STATES),
LZMA_IS_REP_G1 = (LZMA_IS_REP_G0 + LZMA_NUM_STATES),
LZMA_IS_REP_G2 = (LZMA_IS_REP_G1 + LZMA_NUM_STATES),
LZMA_IS_REP_0_LONG = (LZMA_IS_REP_G2 + LZMA_NUM_STATES),
LZMA_POS_SLOT = (LZMA_IS_REP_0_LONG \
+ (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX)),
LZMA_SPEC_POS = (LZMA_POS_SLOT \
+ (LZMA_NUM_LEN_TO_POS_STATES << LZMA_NUM_POS_SLOT_BITS)),
LZMA_ALIGN = (LZMA_SPEC_POS \
+ LZMA_NUM_FULL_DISTANCES - LZMA_END_POS_MODEL_INDEX),
LZMA_LEN_CODER = (LZMA_ALIGN + (1 << LZMA_NUM_ALIGN_BITS)),
LZMA_REP_LEN_CODER = (LZMA_LEN_CODER + LZMA_NUM_LEN_PROBS),
LZMA_LITERAL = (LZMA_REP_LEN_CODER + LZMA_NUM_LEN_PROBS),
};
IF_DESKTOP(long long) int FAST_FUNC
unpack_lzma_stream(int src_fd, int dst_fd)
{
IF_DESKTOP(long long total_written = 0;)
lzma_header_t header;
int lc, pb, lp;
uint32_t pos_state_mask;
uint32_t literal_pos_mask;
uint16_t *p;
int num_bits;
int num_probs;
rc_t *rc;
int i;
uint8_t *buffer;
uint8_t previous_byte = 0;
size_t buffer_pos = 0, global_pos = 0;
int len = 0;
int state = 0;
uint32_t rep0 = 1, rep1 = 1, rep2 = 1, rep3 = 1;
if (full_read(src_fd, &header, sizeof(header)) != sizeof(header)
|| header.pos >= (9 * 5 * 5)
) {
bb_error_msg("bad lzma header");
return -1;
}
i = header.pos / 9;
lc = header.pos % 9;
pb = i / 5;
lp = i % 5;
pos_state_mask = (1 << pb) - 1;
literal_pos_mask = (1 << lp) - 1;
header.dict_size = SWAP_LE32(header.dict_size);
header.dst_size = SWAP_LE64(header.dst_size);
if (header.dict_size == 0)
header.dict_size++;
buffer = xmalloc(MIN(header.dst_size, header.dict_size));
num_probs = LZMA_BASE_SIZE + (LZMA_LIT_SIZE << (lc + lp));
p = xmalloc(num_probs * sizeof(*p));
num_probs += LZMA_LITERAL - LZMA_BASE_SIZE;
for (i = 0; i < num_probs; i++)
p[i] = (1 << RC_MODEL_TOTAL_BITS) >> 1;
rc = rc_init(src_fd); /*, RC_BUFFER_SIZE); */
while (global_pos + buffer_pos < header.dst_size) {
int pos_state = (buffer_pos + global_pos) & pos_state_mask;
uint16_t *prob = p + LZMA_IS_MATCH + (state << LZMA_NUM_POS_BITS_MAX) + pos_state;
if (!rc_is_bit_1(rc, prob)) {
static const char next_state[LZMA_NUM_STATES] =
{ 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 };
int mi = 1;
prob = (p + LZMA_LITERAL
+ (LZMA_LIT_SIZE * ((((buffer_pos + global_pos) & literal_pos_mask) << lc)
+ (previous_byte >> (8 - lc))
)
)
);
if (state >= LZMA_NUM_LIT_STATES) {
int match_byte;
uint32_t pos = buffer_pos - rep0;
while (pos >= header.dict_size)
pos += header.dict_size;
match_byte = buffer[pos];
do {
int bit;
match_byte <<= 1;
bit = match_byte & 0x100;
bit ^= (rc_get_bit(rc, prob + 0x100 + bit + mi, &mi) << 8); /* 0x100 or 0 */
if (bit)
break;
} while (mi < 0x100);
}
while (mi < 0x100) {
rc_get_bit(rc, prob + mi, &mi);
}
state = next_state[state];
previous_byte = (uint8_t) mi;
#if ENABLE_FEATURE_LZMA_FAST
one_byte1:
buffer[buffer_pos++] = previous_byte;
if (buffer_pos == header.dict_size) {
buffer_pos = 0;
global_pos += header.dict_size;
if (full_write(dst_fd, buffer, header.dict_size) != (ssize_t)header.dict_size)
goto bad;
IF_DESKTOP(total_written += header.dict_size;)
}
#else
len = 1;
goto one_byte2;
#endif
} else {
int offset;
uint16_t *prob2;
#define prob_len prob2
prob2 = p + LZMA_IS_REP + state;
if (!rc_is_bit_1(rc, prob2)) {
rep3 = rep2;
rep2 = rep1;
rep1 = rep0;
state = state < LZMA_NUM_LIT_STATES ? 0 : 3;
prob2 = p + LZMA_LEN_CODER;
} else {
prob2 += LZMA_IS_REP_G0 - LZMA_IS_REP;
if (!rc_is_bit_1(rc, prob2)) {
prob2 = (p + LZMA_IS_REP_0_LONG
+ (state << LZMA_NUM_POS_BITS_MAX)
+ pos_state
);
if (!rc_is_bit_1(rc, prob2)) {
#if ENABLE_FEATURE_LZMA_FAST
uint32_t pos = buffer_pos - rep0;
state = state < LZMA_NUM_LIT_STATES ? 9 : 11;
while (pos >= header.dict_size)
pos += header.dict_size;
previous_byte = buffer[pos];
goto one_byte1;
#else
state = state < LZMA_NUM_LIT_STATES ? 9 : 11;
len = 1;
goto string;
#endif
}
} else {
uint32_t distance;
prob2 += LZMA_IS_REP_G1 - LZMA_IS_REP_G0;
distance = rep1;
if (rc_is_bit_1(rc, prob2)) {
prob2 += LZMA_IS_REP_G2 - LZMA_IS_REP_G1;
distance = rep2;
if (rc_is_bit_1(rc, prob2)) {
distance = rep3;
rep3 = rep2;
}
rep2 = rep1;
}
rep1 = rep0;
rep0 = distance;
}
state = state < LZMA_NUM_LIT_STATES ? 8 : 11;
prob2 = p + LZMA_REP_LEN_CODER;
}
prob_len = prob2 + LZMA_LEN_CHOICE;
num_bits = LZMA_LEN_NUM_LOW_BITS;
if (!rc_is_bit_1(rc, prob_len)) {
prob_len += LZMA_LEN_LOW - LZMA_LEN_CHOICE
+ (pos_state << LZMA_LEN_NUM_LOW_BITS);
offset = 0;
} else {
prob_len += LZMA_LEN_CHOICE_2 - LZMA_LEN_CHOICE;
if (!rc_is_bit_1(rc, prob_len)) {
prob_len += LZMA_LEN_MID - LZMA_LEN_CHOICE_2
+ (pos_state << LZMA_LEN_NUM_MID_BITS);
offset = 1 << LZMA_LEN_NUM_LOW_BITS;
num_bits += LZMA_LEN_NUM_MID_BITS - LZMA_LEN_NUM_LOW_BITS;
} else {
prob_len += LZMA_LEN_HIGH - LZMA_LEN_CHOICE_2;
offset = ((1 << LZMA_LEN_NUM_LOW_BITS)
+ (1 << LZMA_LEN_NUM_MID_BITS));
num_bits += LZMA_LEN_NUM_HIGH_BITS - LZMA_LEN_NUM_LOW_BITS;
}
}
rc_bit_tree_decode(rc, prob_len, num_bits, &len);
len += offset;
if (state < 4) {
int pos_slot;
uint16_t *prob3;
state += LZMA_NUM_LIT_STATES;
prob3 = p + LZMA_POS_SLOT +
((len < LZMA_NUM_LEN_TO_POS_STATES ? len :
LZMA_NUM_LEN_TO_POS_STATES - 1)
<< LZMA_NUM_POS_SLOT_BITS);
rc_bit_tree_decode(rc, prob3,
LZMA_NUM_POS_SLOT_BITS, &pos_slot);
rep0 = pos_slot;
if (pos_slot >= LZMA_START_POS_MODEL_INDEX) {
int i2, mi2, num_bits2 = (pos_slot >> 1) - 1;
rep0 = 2 | (pos_slot & 1);
if (pos_slot < LZMA_END_POS_MODEL_INDEX) {
rep0 <<= num_bits2;
prob3 = p + LZMA_SPEC_POS + rep0 - pos_slot - 1;
} else {
for (; num_bits2 != LZMA_NUM_ALIGN_BITS; num_bits2--)
rep0 = (rep0 << 1) | rc_direct_bit(rc);
rep0 <<= LZMA_NUM_ALIGN_BITS;
prob3 = p + LZMA_ALIGN;
}
i2 = 1;
mi2 = 1;
while (num_bits2--) {
if (rc_get_bit(rc, prob3 + mi2, &mi2))
rep0 |= i2;
i2 <<= 1;
}
}
if (++rep0 == 0)
break;
}
len += LZMA_MATCH_MIN_LEN;
IF_NOT_FEATURE_LZMA_FAST(string:)
do {
uint32_t pos = buffer_pos - rep0;
while (pos >= header.dict_size)
pos += header.dict_size;
previous_byte = buffer[pos];
IF_NOT_FEATURE_LZMA_FAST(one_byte2:)
buffer[buffer_pos++] = previous_byte;
if (buffer_pos == header.dict_size) {
buffer_pos = 0;
global_pos += header.dict_size;
if (full_write(dst_fd, buffer, header.dict_size) != (ssize_t)header.dict_size)
goto bad;
IF_DESKTOP(total_written += header.dict_size;)
}
len--;
} while (len != 0 && buffer_pos < header.dst_size);
}
}
{
IF_NOT_DESKTOP(int total_written = 0; /* success */)
IF_DESKTOP(total_written += buffer_pos;)
if (full_write(dst_fd, buffer, buffer_pos) != (ssize_t)buffer_pos) {
bad:
total_written = -1; /* failure */
}
rc_free(rc);
free(p);
free(buffer);
return total_written;
}
}

View File

@@ -0,0 +1,98 @@
/*
* This file uses XZ Embedded library code which is written
* by Lasse Collin <lasse.collin@tukaani.org>
* and Igor Pavlov <http://7-zip.org/>
*
* See README file in unxz/ directory for more information.
*
* This file is:
* Copyright (C) 2010 Denys Vlasenko <vda.linux@googlemail.com>
* Licensed under GPLv2, see file LICENSE in this source tree.
*/
#include "libbb.h"
#include "archive.h"
#define XZ_FUNC FAST_FUNC
#define XZ_EXTERN static
#define XZ_DEC_DYNALLOC
/* Skip check (rather than fail) of unsupported hash functions */
#define XZ_DEC_ANY_CHECK 1
/* We use our own crc32 function */
#define XZ_INTERNAL_CRC32 0
static uint32_t xz_crc32(const uint8_t *buf, size_t size, uint32_t crc)
{
return ~crc32_block_endian0(~crc, buf, size, global_crc32_table);
}
/* We use arch-optimized unaligned accessors */
#define get_unaligned_le32(buf) ({ uint32_t v; move_from_unaligned32(v, buf); SWAP_LE32(v); })
#define get_unaligned_be32(buf) ({ uint32_t v; move_from_unaligned32(v, buf); SWAP_BE32(v); })
#define put_unaligned_le32(val, buf) move_to_unaligned16(buf, SWAP_LE32(val))
#define put_unaligned_be32(val, buf) move_to_unaligned16(buf, SWAP_BE32(val))
#include "unxz/xz_dec_bcj.c"
#include "unxz/xz_dec_lzma2.c"
#include "unxz/xz_dec_stream.c"
IF_DESKTOP(long long) int FAST_FUNC
unpack_xz_stream(int src_fd, int dst_fd)
{
struct xz_buf iobuf;
struct xz_dec *state;
unsigned char *membuf;
IF_DESKTOP(long long) int total = 0;
if (!global_crc32_table)
global_crc32_table = crc32_filltable(NULL, /*endian:*/ 0);
memset(&iobuf, 0, sizeof(iobuf));
/* Preload XZ file signature */
membuf = (void*) strcpy(xmalloc(2 * BUFSIZ), HEADER_MAGIC);
iobuf.in = membuf;
iobuf.in_size = HEADER_MAGIC_SIZE;
iobuf.out = membuf + BUFSIZ;
iobuf.out_size = BUFSIZ;
/* Limit memory usage to about 64 MiB. */
state = xz_dec_init(XZ_DYNALLOC, 64*1024*1024);
while (1) {
enum xz_ret r;
if (iobuf.in_pos == iobuf.in_size) {
int rd = safe_read(src_fd, membuf, BUFSIZ);
if (rd < 0) {
bb_error_msg(bb_msg_read_error);
total = -1;
break;
}
iobuf.in_size = rd;
iobuf.in_pos = 0;
}
// bb_error_msg(">in pos:%d size:%d out pos:%d size:%d",
// iobuf.in_pos, iobuf.in_size, iobuf.out_pos, iobuf.out_size);
r = xz_dec_run(state, &iobuf);
// bb_error_msg("<in pos:%d size:%d out pos:%d size:%d r:%d",
// iobuf.in_pos, iobuf.in_size, iobuf.out_pos, iobuf.out_size, r);
if (iobuf.out_pos) {
xwrite(dst_fd, iobuf.out, iobuf.out_pos);
IF_DESKTOP(total += iobuf.out_pos;)
iobuf.out_pos = 0;
}
if (r == XZ_STREAM_END) {
break;
}
if (r != XZ_OK && r != XZ_UNSUPPORTED_CHECK) {
bb_error_msg("corrupted data");
total = -1;
break;
}
}
xz_dec_end(state);
free(membuf);
return total;
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,17 @@
/* vi: set sw=4 ts=4: */
/*
* Copyright (C) 2002 by Glenn McGrath
*
* Licensed under GPLv2 or later, see file LICENSE in this source tree.
*/
#include "libbb.h"
#include "archive.h"
/* Accept any non-null name, its not really a filter at all */
char FAST_FUNC filter_accept_all(archive_handle_t *archive_handle)
{
if (archive_handle->file_header->name)
return EXIT_SUCCESS;
return EXIT_FAILURE;
}

View File

@@ -0,0 +1,19 @@
/* vi: set sw=4 ts=4: */
/*
* Copyright (C) 2002 by Glenn McGrath
*
* Licensed under GPLv2 or later, see file LICENSE in this source tree.
*/
#include "libbb.h"
#include "archive.h"
/*
* Accept names that are in the accept list, ignoring reject list.
*/
char FAST_FUNC filter_accept_list(archive_handle_t *archive_handle)
{
if (find_list_entry(archive_handle->accept, archive_handle->file_header->name))
return EXIT_SUCCESS;
return EXIT_FAILURE;
}

View File

@@ -0,0 +1,51 @@
/* vi: set sw=4 ts=4: */
/*
* Copyright (C) 2002 by Glenn McGrath
*
* Licensed under GPLv2 or later, see file LICENSE in this source tree.
*/
#include "libbb.h"
#include "archive.h"
/* Built and used only if ENABLE_DPKG || ENABLE_DPKG_DEB */
/*
* Reassign the subarchive metadata parser based on the filename extension
* e.g. if its a .tar.gz modify archive_handle->sub_archive to process a .tar.gz
* or if its a .tar.bz2 make archive_handle->sub_archive handle that
*/
char FAST_FUNC filter_accept_list_reassign(archive_handle_t *archive_handle)
{
/* Check the file entry is in the accept list */
if (find_list_entry(archive_handle->accept, archive_handle->file_header->name)) {
const char *name_ptr;
/* Find extension */
name_ptr = strrchr(archive_handle->file_header->name, '.');
if (!name_ptr)
return EXIT_FAILURE;
name_ptr++;
/* Modify the subarchive handler based on the extension */
if (ENABLE_FEATURE_SEAMLESS_GZ
&& strcmp(name_ptr, "gz") == 0
) {
archive_handle->dpkg__action_data_subarchive = get_header_tar_gz;
return EXIT_SUCCESS;
}
if (ENABLE_FEATURE_SEAMLESS_BZ2
&& strcmp(name_ptr, "bz2") == 0
) {
archive_handle->dpkg__action_data_subarchive = get_header_tar_bz2;
return EXIT_SUCCESS;
}
if (ENABLE_FEATURE_SEAMLESS_LZMA
&& strcmp(name_ptr, "lzma") == 0
) {
archive_handle->dpkg__action_data_subarchive = get_header_tar_lzma;
return EXIT_SUCCESS;
}
}
return EXIT_FAILURE;
}

View File

@@ -0,0 +1,36 @@
/* vi: set sw=4 ts=4: */
/*
* Copyright (C) 2002 by Glenn McGrath
*
* Licensed under GPLv2 or later, see file LICENSE in this source tree.
*/
#include "libbb.h"
#include "archive.h"
/*
* Accept names that are in the accept list and not in the reject list
*/
char FAST_FUNC filter_accept_reject_list(archive_handle_t *archive_handle)
{
const char *key;
const llist_t *reject_entry;
const llist_t *accept_entry;
key = archive_handle->file_header->name;
/* If the key is in a reject list fail */
reject_entry = find_list_entry2(archive_handle->reject, key);
if (reject_entry) {
return EXIT_FAILURE;
}
accept_entry = find_list_entry2(archive_handle->accept, key);
/* Fail if an accept list was specified and the key wasnt in there */
if ((accept_entry == NULL) && archive_handle->accept) {
return EXIT_FAILURE;
}
/* Accepted */
return EXIT_SUCCESS;
}

View File

@@ -0,0 +1,54 @@
/* vi: set sw=4 ts=4: */
/*
* Copyright (C) 2002 by Glenn McGrath
*
* Licensed under GPLv2 or later, see file LICENSE in this source tree.
*/
#include <fnmatch.h>
#include "libbb.h"
#include "archive.h"
/* Find a string in a shell pattern list */
const llist_t* FAST_FUNC find_list_entry(const llist_t *list, const char *filename)
{
while (list) {
if (fnmatch(list->data, filename, 0) == 0) {
return list;
}
list = list->link;
}
return NULL;
}
/* Same, but compares only path components present in pattern
* (extra trailing path components in filename are assumed to match)
*/
const llist_t* FAST_FUNC find_list_entry2(const llist_t *list, const char *filename)
{
char buf[PATH_MAX];
int pattern_slash_cnt;
const char *c;
char *d;
while (list) {
c = list->data;
pattern_slash_cnt = 0;
while (*c)
if (*c++ == '/') pattern_slash_cnt++;
c = filename;
d = buf;
/* paranoia is better than buffer overflows */
while (*c && d != buf + sizeof(buf)-1) {
if (*c == '/' && --pattern_slash_cnt < 0)
break;
*d++ = *c++;
}
*d = '\0';
if (fnmatch(list->data, buf, 0) == 0) {
return list;
}
list = list->link;
}
return NULL;
}

View File

@@ -0,0 +1,133 @@
/* vi: set sw=4 ts=4: */
/* Copyright 2001 Glenn McGrath.
*
* Licensed under GPLv2 or later, see file LICENSE in this source tree.
*/
#include "libbb.h"
#include "archive.h"
#include "ar.h"
static unsigned read_num(const char *str, int base)
{
/* This code works because
* on misformatted numbers bb_strtou returns all-ones */
int err = bb_strtou(str, NULL, base);
if (err == -1)
bb_error_msg_and_die("invalid ar header");
return err;
}
char FAST_FUNC get_header_ar(archive_handle_t *archive_handle)
{
file_header_t *typed = archive_handle->file_header;
unsigned size;
union {
char raw[60];
struct ar_header formatted;
} ar;
#if ENABLE_FEATURE_AR_LONG_FILENAMES
static char *ar_long_names;
static unsigned ar_long_name_size;
#endif
/* dont use xread as we want to handle the error ourself */
if (read(archive_handle->src_fd, ar.raw, 60) != 60) {
/* End Of File */
return EXIT_FAILURE;
}
/* ar header starts on an even byte (2 byte aligned)
* '\n' is used for padding
*/
if (ar.raw[0] == '\n') {
/* fix up the header, we started reading 1 byte too early */
memmove(ar.raw, &ar.raw[1], 59);
ar.raw[59] = xread_char(archive_handle->src_fd);
archive_handle->offset++;
}
archive_handle->offset += 60;
if (ar.formatted.magic[0] != '`' || ar.formatted.magic[1] != '\n')
bb_error_msg_and_die("invalid ar header");
/* FIXME: more thorough routine would be in order here
* (we have something like that in tar)
* but for now we are lax. */
ar.formatted.magic[0] = '\0'; /* else 4G-2 file will have size="4294967294`\n..." */
typed->size = size = read_num(ar.formatted.size, 10);
/* special filenames have '/' as the first character */
if (ar.formatted.name[0] == '/') {
if (ar.formatted.name[1] == ' ') {
/* This is the index of symbols in the file for compilers */
data_skip(archive_handle);
archive_handle->offset += size;
return get_header_ar(archive_handle); /* Return next header */
}
#if ENABLE_FEATURE_AR_LONG_FILENAMES
if (ar.formatted.name[1] == '/') {
/* If the second char is a '/' then this entries data section
* stores long filename for multiple entries, they are stored
* in static variable long_names for use in future entries
*/
ar_long_name_size = size;
free(ar_long_names);
ar_long_names = xmalloc(size);
xread(archive_handle->src_fd, ar_long_names, size);
archive_handle->offset += size;
/* Return next header */
return get_header_ar(archive_handle);
}
#else
bb_error_msg_and_die("long filenames not supported");
#endif
}
/* Only size is always present, the rest may be missing in
* long filename pseudo file. Thus we decode the rest
* after dealing with long filename pseudo file.
*/
typed->mode = read_num(ar.formatted.mode, 8);
typed->mtime = read_num(ar.formatted.date, 10);
typed->uid = read_num(ar.formatted.uid, 10);
typed->gid = read_num(ar.formatted.gid, 10);
#if ENABLE_FEATURE_AR_LONG_FILENAMES
if (ar.formatted.name[0] == '/') {
unsigned long_offset;
/* The number after the '/' indicates the offset in the ar data section
* (saved in ar_long_names) that conatains the real filename */
long_offset = read_num(&ar.formatted.name[1], 10);
if (long_offset >= ar_long_name_size) {
bb_error_msg_and_die("can't resolve long filename");
}
typed->name = xstrdup(ar_long_names + long_offset);
} else
#endif
{
/* short filenames */
typed->name = xstrndup(ar.formatted.name, 16);
}
typed->name[strcspn(typed->name, " /")] = '\0';
if (archive_handle->filter(archive_handle) == EXIT_SUCCESS) {
archive_handle->action_header(typed);
#if ENABLE_DPKG || ENABLE_DPKG_DEB
if (archive_handle->dpkg__sub_archive) {
while (archive_handle->dpkg__action_data_subarchive(archive_handle->dpkg__sub_archive) == EXIT_SUCCESS)
continue;
} else
#endif
archive_handle->action_data(archive_handle);
} else {
data_skip(archive_handle);
}
archive_handle->offset += typed->size;
/* Set the file pointer to the correct spot, we may have been reading a compressed file */
lseek(archive_handle->src_fd, archive_handle->offset, SEEK_SET);
return EXIT_SUCCESS;
}

View File

@@ -0,0 +1,186 @@
/* vi: set sw=4 ts=4: */
/* Copyright 2002 Laurence Anderson
*
* Licensed under GPLv2 or later, see file LICENSE in this source tree.
*/
#include "libbb.h"
#include "archive.h"
typedef struct hardlinks_t {
struct hardlinks_t *next;
int inode; /* TODO: must match maj/min too! */
int mode ;
int mtime; /* These three are useful only in corner case */
int uid ; /* of hardlinks with zero size body */
int gid ;
char name[1];
} hardlinks_t;
char FAST_FUNC get_header_cpio(archive_handle_t *archive_handle)
{
file_header_t *file_header = archive_handle->file_header;
char cpio_header[110];
int namesize;
int major, minor, nlink, mode, inode;
unsigned size, uid, gid, mtime;
/* There can be padding before archive header */
data_align(archive_handle, 4);
size = full_read(archive_handle->src_fd, cpio_header, 110);
if (size == 0) {
goto create_hardlinks;
}
if (size != 110) {
bb_error_msg_and_die("short read");
}
archive_handle->offset += 110;
if (strncmp(&cpio_header[0], "07070", 5) != 0
|| (cpio_header[5] != '1' && cpio_header[5] != '2')
) {
bb_error_msg_and_die("unsupported cpio format, use newc or crc");
}
if (sscanf(cpio_header + 6,
"%8x" "%8x" "%8x" "%8x"
"%8x" "%8x" "%8x" /*maj,min:*/ "%*16c"
/*rmaj,rmin:*/"%8x" "%8x" "%8x" /*chksum: "%*8c"*/,
&inode, &mode, &uid, &gid,
&nlink, &mtime, &size,
&major, &minor, &namesize) != 10)
bb_error_msg_and_die("damaged cpio file");
file_header->mode = mode;
file_header->uid = uid;
file_header->gid = gid;
file_header->mtime = mtime;
file_header->size = size;
namesize &= 0x1fff; /* paranoia: limit names to 8k chars */
file_header->name = xzalloc(namesize + 1);
/* Read in filename */
xread(archive_handle->src_fd, file_header->name, namesize);
if (file_header->name[0] == '/') {
/* Testcase: echo /etc/hosts | cpio -pvd /tmp
* Without this code, it tries to unpack /etc/hosts
* into "/etc/hosts", not "etc/hosts".
*/
char *p = file_header->name;
do p++; while (*p == '/');
overlapping_strcpy(file_header->name, p);
}
archive_handle->offset += namesize;
/* Update offset amount and skip padding before file contents */
data_align(archive_handle, 4);
if (strcmp(file_header->name, "TRAILER!!!") == 0) {
/* Always round up. ">> 9" divides by 512 */
archive_handle->cpio__blocks = (uoff_t)(archive_handle->offset + 511) >> 9;
goto create_hardlinks;
}
file_header->link_target = NULL;
if (S_ISLNK(file_header->mode)) {
file_header->size &= 0x1fff; /* paranoia: limit names to 8k chars */
file_header->link_target = xzalloc(file_header->size + 1);
xread(archive_handle->src_fd, file_header->link_target, file_header->size);
archive_handle->offset += file_header->size;
file_header->size = 0; /* Stop possible seeks in future */
}
// TODO: data_extract_all can't deal with hardlinks to non-files...
// when fixed, change S_ISREG to !S_ISDIR here
if (nlink > 1 && S_ISREG(file_header->mode)) {
hardlinks_t *new = xmalloc(sizeof(*new) + namesize);
new->inode = inode;
new->mode = mode ;
new->mtime = mtime;
new->uid = uid ;
new->gid = gid ;
strcpy(new->name, file_header->name);
/* Put file on a linked list for later */
if (size == 0) {
new->next = archive_handle->cpio__hardlinks_to_create;
archive_handle->cpio__hardlinks_to_create = new;
return EXIT_SUCCESS; /* Skip this one */
/* TODO: this breaks cpio -t (it does not show hardlinks) */
}
new->next = archive_handle->cpio__created_hardlinks;
archive_handle->cpio__created_hardlinks = new;
}
file_header->device = makedev(major, minor);
if (archive_handle->filter(archive_handle) == EXIT_SUCCESS) {
archive_handle->action_data(archive_handle);
//TODO: run "echo /etc/hosts | cpio -pv /tmp" twice. On 2nd run:
//cpio: etc/hosts not created: newer or same age file exists
//etc/hosts <-- should NOT show it
//2 blocks <-- should say "0 blocks"
archive_handle->action_header(file_header);
} else {
data_skip(archive_handle);
}
archive_handle->offset += file_header->size;
free(file_header->link_target);
free(file_header->name);
file_header->link_target = NULL;
file_header->name = NULL;
return EXIT_SUCCESS;
create_hardlinks:
free(file_header->link_target);
free(file_header->name);
while (archive_handle->cpio__hardlinks_to_create) {
hardlinks_t *cur;
hardlinks_t *make_me = archive_handle->cpio__hardlinks_to_create;
archive_handle->cpio__hardlinks_to_create = make_me->next;
memset(file_header, 0, sizeof(*file_header));
file_header->mtime = make_me->mtime;
file_header->name = make_me->name;
file_header->mode = make_me->mode;
file_header->uid = make_me->uid;
file_header->gid = make_me->gid;
/*file_header->size = 0;*/
/*file_header->link_target = NULL;*/
/* Try to find a file we are hardlinked to */
cur = archive_handle->cpio__created_hardlinks;
while (cur) {
/* TODO: must match maj/min too! */
if (cur->inode == make_me->inode) {
file_header->link_target = cur->name;
/* link_target != NULL, size = 0: "I am a hardlink" */
if (archive_handle->filter(archive_handle) == EXIT_SUCCESS)
archive_handle->action_data(archive_handle);
free(make_me);
goto next_link;
}
cur = cur->next;
}
/* Oops... no file with such inode was created... do it now
* (happens when hardlinked files are empty (zero length)) */
if (archive_handle->filter(archive_handle) == EXIT_SUCCESS)
archive_handle->action_data(archive_handle);
/* Move to the list of created hardlinked files */
make_me->next = archive_handle->cpio__created_hardlinks;
archive_handle->cpio__created_hardlinks = make_me;
next_link: ;
}
while (archive_handle->cpio__created_hardlinks) {
hardlinks_t *p = archive_handle->cpio__created_hardlinks;
archive_handle->cpio__created_hardlinks = p->next;
free(p);
}
return EXIT_FAILURE; /* "No more files to process" */
}

View File

@@ -0,0 +1,461 @@
/* vi: set sw=4 ts=4: */
/* Licensed under GPLv2 or later, see file LICENSE in this source tree.
*
* FIXME:
* In privileged mode if uname and gname map to a uid and gid then use the
* mapped value instead of the uid/gid values in tar header
*
* References:
* GNU tar and star man pages,
* Opengroup's ustar interchange format,
* http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html
*/
#include "libbb.h"
#include "archive.h"
typedef uint32_t aliased_uint32_t FIX_ALIASING;
typedef off_t aliased_off_t FIX_ALIASING;
/* NB: _DESTROYS_ str[len] character! */
static unsigned long long getOctal(char *str, int len)
{
unsigned long long v;
char *end;
/* NB: leading spaces are allowed. Using strtoull to handle that.
* The downside is that we accept e.g. "-123" too :(
*/
str[len] = '\0';
v = strtoull(str, &end, 8);
/* std: "Each numeric field is terminated by one or more
* <space> or NUL characters". We must support ' '! */
if (*end != '\0' && *end != ' ') {
int8_t first = str[0];
if (!(first & 0x80))
bb_error_msg_and_die("corrupted octal value in tar header");
/*
* GNU tar uses "base-256 encoding" for very large numbers.
* Encoding is binary, with highest bit always set as a marker
* and sign in next-highest bit:
* 80 00 .. 00 - zero
* bf ff .. ff - largest positive number
* ff ff .. ff - minus 1
* c0 00 .. 00 - smallest negative number
*
* Example of tar file with 8914993153 (0x213600001) byte file.
* Field starts at offset 7c:
* 00070 30 30 30 00 30 30 30 30 30 30 30 00 80 00 00 00 |000.0000000.....|
* 00080 00 00 00 02 13 60 00 01 31 31 31 32 30 33 33 36 |.....`..11120336|
*
* NB: tarballs with NEGATIVE unix times encoded that way were seen!
*/
v = first;
/* Sign-extend using 6th bit: */
v <<= sizeof(unsigned long long)*8 - 7;
v = (long long)v >> (sizeof(unsigned long long)*8 - 7);
while (--len != 0)
v = (v << 8) + (unsigned char) *str++;
}
return v;
}
#define GET_OCTAL(a) getOctal((a), sizeof(a))
#if ENABLE_FEATURE_TAR_SELINUX
/* Scan a PAX header for SELinux contexts, via "RHT.security.selinux" keyword.
* This is what Red Hat's patched version of tar uses.
*/
# define SELINUX_CONTEXT_KEYWORD "RHT.security.selinux"
static char *get_selinux_sctx_from_pax_hdr(archive_handle_t *archive_handle, unsigned sz)
{
char *buf, *p;
char *result;
p = buf = xmalloc(sz + 1);
/* prevent bb_strtou from running off the buffer */
buf[sz] = '\0';
xread(archive_handle->src_fd, buf, sz);
archive_handle->offset += sz;
result = NULL;
while (sz != 0) {
char *end, *value;
unsigned len;
/* Every record has this format: "LEN NAME=VALUE\n" */
len = bb_strtou(p, &end, 10);
/* expect errno to be EINVAL, because the character
* following the digits should be a space
*/
p += len;
sz -= len;
if ((int)sz < 0
|| len == 0
|| errno != EINVAL
|| *end != ' '
) {
bb_error_msg("malformed extended header, skipped");
// More verbose version:
//bb_error_msg("malformed extended header at %"OFF_FMT"d, skipped",
// archive_handle->offset - (sz + len));
break;
}
/* overwrite the terminating newline with NUL
* (we do not bother to check that it *was* a newline)
*/
p[-1] = '\0';
/* Is it selinux security context? */
value = end + 1;
if (strncmp(value, SELINUX_CONTEXT_KEYWORD"=", sizeof(SELINUX_CONTEXT_KEYWORD"=") - 1) == 0) {
value += sizeof(SELINUX_CONTEXT_KEYWORD"=") - 1;
result = xstrdup(value);
break;
}
}
free(buf);
return result;
}
#endif
char FAST_FUNC get_header_tar(archive_handle_t *archive_handle)
{
file_header_t *file_header = archive_handle->file_header;
struct tar_header_t tar;
char *cp;
int i, sum_u, sum;
#if ENABLE_FEATURE_TAR_OLDSUN_COMPATIBILITY
int sum_s;
#endif
int parse_names;
/* Our "private data" */
#if ENABLE_FEATURE_TAR_GNU_EXTENSIONS
# define p_longname (archive_handle->tar__longname)
# define p_linkname (archive_handle->tar__linkname)
#else
# define p_longname 0
# define p_linkname 0
#endif
#if ENABLE_FEATURE_TAR_GNU_EXTENSIONS || ENABLE_FEATURE_TAR_SELINUX
again:
#endif
/* Align header */
data_align(archive_handle, 512);
again_after_align:
#if ENABLE_DESKTOP || ENABLE_FEATURE_TAR_AUTODETECT
/* to prevent misdetection of bz2 sig */
*(aliased_uint32_t*)&tar = 0;
i = full_read(archive_handle->src_fd, &tar, 512);
/* If GNU tar sees EOF in above read, it says:
* "tar: A lone zero block at N", where N = kilobyte
* where EOF was met (not EOF block, actual EOF!),
* and exits with EXIT_SUCCESS.
* We will mimic exit(EXIT_SUCCESS), although we will not mimic
* the message and we don't check whether we indeed
* saw zero block directly before this. */
if (i == 0) {
xfunc_error_retval = 0;
short_read:
bb_error_msg_and_die("short read");
}
if (i != 512) {
IF_FEATURE_TAR_AUTODETECT(goto autodetect;)
goto short_read;
}
#else
i = 512;
xread(archive_handle->src_fd, &tar, i);
#endif
archive_handle->offset += i;
/* If there is no filename its an empty header */
if (tar.name[0] == 0 && tar.prefix[0] == 0) {
if (archive_handle->tar__end) {
/* Second consecutive empty header - end of archive.
* Read until the end to empty the pipe from gz or bz2
*/
while (full_read(archive_handle->src_fd, &tar, 512) == 512)
continue;
return EXIT_FAILURE;
}
archive_handle->tar__end = 1;
return EXIT_SUCCESS;
}
archive_handle->tar__end = 0;
/* Check header has valid magic, "ustar" is for the proper tar,
* five NULs are for the old tar format */
if (strncmp(tar.magic, "ustar", 5) != 0
&& (!ENABLE_FEATURE_TAR_OLDGNU_COMPATIBILITY
|| memcmp(tar.magic, "\0\0\0\0", 5) != 0)
) {
#if ENABLE_FEATURE_TAR_AUTODETECT
char FAST_FUNC (*get_header_ptr)(archive_handle_t *);
uint16_t magic2;
autodetect:
magic2 = *(uint16_t*)tar.name;
/* tar gz/bz autodetect: check for gz/bz2 magic.
* If we see the magic, and it is the very first block,
* we can switch to get_header_tar_gz/bz2/lzma().
* Needs seekable fd. I wish recv(MSG_PEEK) works
* on any fd... */
# if ENABLE_FEATURE_SEAMLESS_GZ
if (magic2 == GZIP_MAGIC) {
get_header_ptr = get_header_tar_gz;
} else
# endif
# if ENABLE_FEATURE_SEAMLESS_BZ2
if (magic2 == BZIP2_MAGIC
&& tar.name[2] == 'h' && isdigit(tar.name[3])
) { /* bzip2 */
get_header_ptr = get_header_tar_bz2;
} else
# endif
# if ENABLE_FEATURE_SEAMLESS_XZ
//TODO: if (magic2 == XZ_MAGIC1)...
//else
# endif
goto err;
/* Two different causes for lseek() != 0:
* unseekable fd (would like to support that too, but...),
* or not first block (false positive, it's not .gz/.bz2!) */
if (lseek(archive_handle->src_fd, -i, SEEK_CUR) != 0)
goto err;
while (get_header_ptr(archive_handle) == EXIT_SUCCESS)
continue;
return EXIT_FAILURE;
err:
#endif /* FEATURE_TAR_AUTODETECT */
bb_error_msg_and_die("invalid tar magic");
}
/* Do checksum on headers.
* POSIX says that checksum is done on unsigned bytes, but
* Sun and HP-UX gets it wrong... more details in
* GNU tar source. */
#if ENABLE_FEATURE_TAR_OLDSUN_COMPATIBILITY
sum_s = ' ' * sizeof(tar.chksum);
#endif
sum_u = ' ' * sizeof(tar.chksum);
for (i = 0; i < 148; i++) {
sum_u += ((unsigned char*)&tar)[i];
#if ENABLE_FEATURE_TAR_OLDSUN_COMPATIBILITY
sum_s += ((signed char*)&tar)[i];
#endif
}
for (i = 156; i < 512; i++) {
sum_u += ((unsigned char*)&tar)[i];
#if ENABLE_FEATURE_TAR_OLDSUN_COMPATIBILITY
sum_s += ((signed char*)&tar)[i];
#endif
}
/* This field does not need special treatment (getOctal) */
{
char *endp; /* gcc likes temp var for &endp */
sum = strtoul(tar.chksum, &endp, 8);
if ((*endp != '\0' && *endp != ' ')
|| (sum_u != sum IF_FEATURE_TAR_OLDSUN_COMPATIBILITY(&& sum_s != sum))
) {
bb_error_msg_and_die("invalid tar header checksum");
}
}
/* don't use xstrtoul, tar.chksum may have leading spaces */
sum = strtoul(tar.chksum, NULL, 8);
if (sum_u != sum IF_FEATURE_TAR_OLDSUN_COMPATIBILITY(&& sum_s != sum)) {
bb_error_msg_and_die("invalid tar header checksum");
}
/* 0 is reserved for high perf file, treat as normal file */
if (!tar.typeflag) tar.typeflag = '0';
parse_names = (tar.typeflag >= '0' && tar.typeflag <= '7');
/* getOctal trashes subsequent field, therefore we call it
* on fields in reverse order */
if (tar.devmajor[0]) {
char t = tar.prefix[0];
/* we trash prefix[0] here, but we DO need it later! */
unsigned minor = GET_OCTAL(tar.devminor);
unsigned major = GET_OCTAL(tar.devmajor);
file_header->device = makedev(major, minor);
tar.prefix[0] = t;
}
file_header->link_target = NULL;
if (!p_linkname && parse_names && tar.linkname[0]) {
file_header->link_target = xstrndup(tar.linkname, sizeof(tar.linkname));
/* FIXME: what if we have non-link object with link_target? */
/* Will link_target be free()ed? */
}
#if ENABLE_FEATURE_TAR_UNAME_GNAME
file_header->tar__uname = tar.uname[0] ? xstrndup(tar.uname, sizeof(tar.uname)) : NULL;
file_header->tar__gname = tar.gname[0] ? xstrndup(tar.gname, sizeof(tar.gname)) : NULL;
#endif
file_header->mtime = GET_OCTAL(tar.mtime);
file_header->size = GET_OCTAL(tar.size);
file_header->gid = GET_OCTAL(tar.gid);
file_header->uid = GET_OCTAL(tar.uid);
/* Set bits 0-11 of the files mode */
file_header->mode = 07777 & GET_OCTAL(tar.mode);
file_header->name = NULL;
if (!p_longname && parse_names) {
/* we trash mode[0] here, it's ok */
//tar.name[sizeof(tar.name)] = '\0'; - gcc 4.3.0 would complain
tar.mode[0] = '\0';
if (tar.prefix[0]) {
/* and padding[0] */
//tar.prefix[sizeof(tar.prefix)] = '\0'; - gcc 4.3.0 would complain
tar.padding[0] = '\0';
file_header->name = concat_path_file(tar.prefix, tar.name);
} else
file_header->name = xstrdup(tar.name);
}
/* Set bits 12-15 of the files mode */
/* (typeflag was not trashed because chksum does not use getOctal) */
switch (tar.typeflag) {
/* busybox identifies hard links as being regular files with 0 size and a link name */
case '1':
file_header->mode |= S_IFREG;
break;
case '7':
/* case 0: */
case '0':
#if ENABLE_FEATURE_TAR_OLDGNU_COMPATIBILITY
if (last_char_is(file_header->name, '/')) {
goto set_dir;
}
#endif
file_header->mode |= S_IFREG;
break;
case '2':
file_header->mode |= S_IFLNK;
/* have seen tarballs with size field containing
* the size of the link target's name */
size0:
file_header->size = 0;
break;
case '3':
file_header->mode |= S_IFCHR;
goto size0; /* paranoia */
case '4':
file_header->mode |= S_IFBLK;
goto size0;
case '5':
IF_FEATURE_TAR_OLDGNU_COMPATIBILITY(set_dir:)
file_header->mode |= S_IFDIR;
goto size0;
case '6':
file_header->mode |= S_IFIFO;
goto size0;
#if ENABLE_FEATURE_TAR_GNU_EXTENSIONS
case 'L':
/* free: paranoia: tar with several consecutive longnames */
free(p_longname);
/* For paranoia reasons we allocate extra NUL char */
p_longname = xzalloc(file_header->size + 1);
/* We read ASCIZ string, including NUL */
xread(archive_handle->src_fd, p_longname, file_header->size);
archive_handle->offset += file_header->size;
/* return get_header_tar(archive_handle); */
/* gcc 4.1.1 didn't optimize it into jump */
/* so we will do it ourself, this also saves stack */
goto again;
case 'K':
free(p_linkname);
p_linkname = xzalloc(file_header->size + 1);
xread(archive_handle->src_fd, p_linkname, file_header->size);
archive_handle->offset += file_header->size;
/* return get_header_tar(archive_handle); */
goto again;
case 'D': /* GNU dump dir */
case 'M': /* Continuation of multi volume archive */
case 'N': /* Old GNU for names > 100 characters */
case 'S': /* Sparse file */
case 'V': /* Volume header */
#endif
#if !ENABLE_FEATURE_TAR_SELINUX
case 'g': /* pax global header */
case 'x': /* pax extended header */
#else
skip_ext_hdr:
#endif
{
off_t sz;
bb_error_msg("warning: skipping header '%c'", tar.typeflag);
sz = (file_header->size + 511) & ~(off_t)511;
archive_handle->offset += sz;
sz >>= 9; /* sz /= 512 but w/o contortions for signed div */
while (sz--)
xread(archive_handle->src_fd, &tar, 512);
/* return get_header_tar(archive_handle); */
goto again_after_align;
}
#if ENABLE_FEATURE_TAR_SELINUX
case 'g': /* pax global header */
case 'x': { /* pax extended header */
char **pp;
if ((uoff_t)file_header->size > 0xfffff) /* paranoia */
goto skip_ext_hdr;
pp = (tar.typeflag == 'g') ? &archive_handle->tar__global_sctx : &archive_handle->tar__next_file_sctx;
free(*pp);
*pp = get_selinux_sctx_from_pax_hdr(archive_handle, file_header->size);
goto again;
}
#endif
default:
bb_error_msg_and_die("unknown typeflag: 0x%x", tar.typeflag);
}
#if ENABLE_FEATURE_TAR_GNU_EXTENSIONS
if (p_longname) {
file_header->name = p_longname;
p_longname = NULL;
}
if (p_linkname) {
file_header->link_target = p_linkname;
p_linkname = NULL;
}
#endif
if (strncmp(file_header->name, "/../"+1, 3) == 0
|| strstr(file_header->name, "/../")
) {
bb_error_msg_and_die("name with '..' encountered: '%s'",
file_header->name);
}
/* Strip trailing '/' in directories */
/* Must be done after mode is set as '/' is used to check if it's a directory */
cp = last_char_is(file_header->name, '/');
if (archive_handle->filter(archive_handle) == EXIT_SUCCESS) {
archive_handle->action_header(/*archive_handle->*/ file_header);
/* Note that we kill the '/' only after action_header() */
/* (like GNU tar 1.15.1: verbose mode outputs "dir/dir/") */
if (cp)
*cp = '\0';
archive_handle->action_data(archive_handle);
if (archive_handle->accept || archive_handle->reject)
llist_add_to(&archive_handle->passed, file_header->name);
else /* Caller isn't interested in list of unpacked files */
free(file_header->name);
} else {
data_skip(archive_handle);
free(file_header->name);
}
archive_handle->offset += file_header->size;
free(file_header->link_target);
/* Do not free(file_header->name)!
* It might be inserted in archive_handle->passed - see above */
#if ENABLE_FEATURE_TAR_UNAME_GNAME
free(file_header->tar__uname);
free(file_header->tar__gname);
#endif
return EXIT_SUCCESS;
}

View File

@@ -0,0 +1,21 @@
/* vi: set sw=4 ts=4: */
/*
* Licensed under GPLv2 or later, see file LICENSE in this source tree.
*/
#include "libbb.h"
#include "archive.h"
char FAST_FUNC get_header_tar_bz2(archive_handle_t *archive_handle)
{
/* Can't lseek over pipes */
archive_handle->seek = seek_by_read;
open_transformer(archive_handle->src_fd, unpack_bz2_stream_prime, "bunzip2");
archive_handle->offset = 0;
while (get_header_tar(archive_handle) == EXIT_SUCCESS)
continue;
/* Can only do one file at a time */
return EXIT_FAILURE;
}

View File

@@ -0,0 +1,36 @@
/* vi: set sw=4 ts=4: */
/*
* Licensed under GPLv2 or later, see file LICENSE in this source tree.
*/
#include "libbb.h"
#include "archive.h"
char FAST_FUNC get_header_tar_gz(archive_handle_t *archive_handle)
{
#if BB_MMU
unsigned char magic[2];
#endif
/* Can't lseek over pipes */
archive_handle->seek = seek_by_read;
/* Check gzip magic only if open_transformer will invoke unpack_gz_stream (MMU case).
* Otherwise, it will invoke an external helper "gunzip -cf" (NOMMU case) which will
* need the header. */
#if BB_MMU
xread(archive_handle->src_fd, &magic, 2);
/* Can skip this check, but error message will be less clear */
if ((magic[0] != 0x1f) || (magic[1] != 0x8b)) {
bb_error_msg_and_die("invalid gzip magic");
}
#endif
open_transformer(archive_handle->src_fd, unpack_gz_stream, "gunzip");
archive_handle->offset = 0;
while (get_header_tar(archive_handle) == EXIT_SUCCESS)
continue;
/* Can only do one file at a time */
return EXIT_FAILURE;
}

View File

@@ -0,0 +1,24 @@
/* vi: set sw=4 ts=4: */
/*
* Small lzma deflate implementation.
* Copyright (C) 2006 Aurelien Jacobs <aurel@gnuage.org>
*
* Licensed under GPLv2, see file LICENSE in this source tree.
*/
#include "libbb.h"
#include "archive.h"
char FAST_FUNC get_header_tar_lzma(archive_handle_t *archive_handle)
{
/* Can't lseek over pipes */
archive_handle->seek = seek_by_read;
open_transformer(archive_handle->src_fd, unpack_lzma_stream, "unlzma");
archive_handle->offset = 0;
while (get_header_tar(archive_handle) == EXIT_SUCCESS)
continue;
/* Can only do one file at a time */
return EXIT_FAILURE;
}

View File

@@ -0,0 +1,12 @@
/* vi: set sw=4 ts=4: */
/*
* Licensed under GPLv2 or later, see file LICENSE in this source tree.
*/
#include "libbb.h"
#include "archive.h"
void FAST_FUNC header_list(const file_header_t *file_header)
{
//TODO: cpio -vp DIR should output "DIR/NAME", not just "NAME" */
puts(file_header->name);
}

View File

@@ -0,0 +1,10 @@
/* vi: set sw=4 ts=4: */
/*
* Licensed under GPLv2 or later, see file LICENSE in this source tree.
*/
#include "libbb.h"
#include "archive.h"
void FAST_FUNC header_skip(const file_header_t *file_header UNUSED_PARAM)
{
}

View File

@@ -0,0 +1,69 @@
/* vi: set sw=4 ts=4: */
/*
* Licensed under GPLv2 or later, see file LICENSE in this source tree.
*/
#include "libbb.h"
#include "archive.h"
void FAST_FUNC header_verbose_list(const file_header_t *file_header)
{
struct tm tm_time;
struct tm *ptm = &tm_time; //localtime(&file_header->mtime);
#if ENABLE_FEATURE_TAR_UNAME_GNAME
char uid[sizeof(int)*3 + 2];
/*char gid[sizeof(int)*3 + 2];*/
char *user;
char *group;
localtime_r(&file_header->mtime, ptm);
user = file_header->tar__uname;
if (user == NULL) {
sprintf(uid, "%u", (unsigned)file_header->uid);
user = uid;
}
group = file_header->tar__gname;
if (group == NULL) {
/*sprintf(gid, "%u", (unsigned)file_header->gid);*/
group = utoa(file_header->gid);
}
printf("%s %s/%s %9"OFF_FMT"u %4u-%02u-%02u %02u:%02u:%02u %s",
bb_mode_string(file_header->mode),
user,
group,
file_header->size,
1900 + ptm->tm_year,
1 + ptm->tm_mon,
ptm->tm_mday,
ptm->tm_hour,
ptm->tm_min,
ptm->tm_sec,
file_header->name);
#else /* !FEATURE_TAR_UNAME_GNAME */
localtime_r(&file_header->mtime, ptm);
printf("%s %u/%u %9"OFF_FMT"u %4u-%02u-%02u %02u:%02u:%02u %s",
bb_mode_string(file_header->mode),
(unsigned)file_header->uid,
(unsigned)file_header->gid,
file_header->size,
1900 + ptm->tm_year,
1 + ptm->tm_mon,
ptm->tm_mday,
ptm->tm_hour,
ptm->tm_min,
ptm->tm_sec,
file_header->name);
#endif /* FEATURE_TAR_UNAME_GNAME */
/* NB: GNU tar shows "->" for symlinks and "link to" for hardlinks */
if (file_header->link_target) {
printf(" -> %s", file_header->link_target);
}
bb_putchar('\n');
}

View File

@@ -0,0 +1,22 @@
/* vi: set sw=4 ts=4: */
/*
* Licensed under GPLv2 or later, see file LICENSE in this source tree.
*/
#include "libbb.h"
#include "archive.h"
archive_handle_t* FAST_FUNC init_handle(void)
{
archive_handle_t *archive_handle;
/* Initialize default values */
archive_handle = xzalloc(sizeof(archive_handle_t));
archive_handle->file_header = xzalloc(sizeof(file_header_t));
archive_handle->action_header = header_skip;
archive_handle->action_data = data_skip;
archive_handle->filter = filter_accept_all;
archive_handle->seek = seek_by_jump;
return archive_handle;
}

View File

@@ -0,0 +1,93 @@
/*
This file is part of the LZO real-time data compression library.
Copyright (C) 1996..2008 Markus Franz Xaver Johannes Oberhumer
All Rights Reserved.
Markus F.X.J. Oberhumer <markus@oberhumer.com>
http://www.oberhumer.com/opensource/lzo/
The LZO library is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of
the License, or (at your option) any later version.
The LZO library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the LZO library; see the file COPYING.
If not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include "liblzo_interface.h"
/* lzo-2.03/src/config1x.h */
#define M2_MIN_LEN 3
#define M2_MAX_LEN 8
#define M3_MAX_LEN 33
#define M4_MAX_LEN 9
#define M1_MAX_OFFSET 0x0400
#define M2_MAX_OFFSET 0x0800
#define M3_MAX_OFFSET 0x4000
#define M4_MAX_OFFSET 0xbfff
#define M1_MARKER 0
#define M3_MARKER 32
#define M4_MARKER 16
#define MX_MAX_OFFSET (M1_MAX_OFFSET + M2_MAX_OFFSET)
#define MIN_LOOKAHEAD (M2_MAX_LEN + 1)
#define LZO_EOF_CODE
/* lzo-2.03/src/lzo_dict.h */
#define GINDEX(m_pos,m_off,dict,dindex,in) m_pos = dict[dindex]
#define DX2(p,s1,s2) \
(((((unsigned)((p)[2]) << (s2)) ^ (p)[1]) << (s1)) ^ (p)[0])
//#define DA3(p,s1,s2,s3) ((DA2((p)+1,s2,s3) << (s1)) + (p)[0])
//#define DS3(p,s1,s2,s3) ((DS2((p)+1,s2,s3) << (s1)) - (p)[0])
#define DX3(p,s1,s2,s3) ((DX2((p)+1,s2,s3) << (s1)) ^ (p)[0])
#define D_SIZE (1U << D_BITS)
#define D_MASK ((1U << D_BITS) - 1)
#define D_HIGH ((D_MASK >> 1) + 1)
#define LZO_CHECK_MPOS_NON_DET(m_pos,m_off,in,ip,max_offset) \
( \
m_pos = ip - (unsigned)(ip - m_pos), \
((uintptr_t)m_pos < (uintptr_t)in \
|| (m_off = (unsigned)(ip - m_pos)) <= 0 \
|| m_off > max_offset) \
)
#define DENTRY(p,in) (p)
#define UPDATE_I(dict,drun,index,p,in) dict[index] = DENTRY(p,in)
#define DMS(v,s) ((unsigned) (((v) & (D_MASK >> (s))) << (s)))
#define DM(v) ((unsigned) ((v) & D_MASK))
#define DMUL(a,b) ((unsigned) ((a) * (b)))
/* lzo-2.03/src/lzo_ptr.h */
#define pd(a,b) ((unsigned)((a)-(b)))
# define TEST_IP (ip < ip_end)
# define NEED_IP(x) \
if ((unsigned)(ip_end - ip) < (unsigned)(x)) goto input_overrun
# undef TEST_OP /* don't need both of the tests here */
# define TEST_OP 1
# define NEED_OP(x) \
if ((unsigned)(op_end - op) < (unsigned)(x)) goto output_overrun
#define HAVE_ANY_OP 1
//#if defined(LZO_TEST_OVERRUN_LOOKBEHIND)
# define TEST_LB(m_pos) if (m_pos < out || m_pos >= op) goto lookbehind_overrun
//# define TEST_LBO(m_pos,o) if (m_pos < out || m_pos >= op - (o)) goto lookbehind_overrun
//#else
//# define TEST_LB(m_pos) ((void) 0)
//# define TEST_LBO(m_pos,o) ((void) 0)
//#endif

View File

@@ -0,0 +1,35 @@
/* LZO1X-1 compression
This file is part of the LZO real-time data compression library.
Copyright (C) 1996..2008 Markus Franz Xaver Johannes Oberhumer
All Rights Reserved.
Markus F.X.J. Oberhumer <markus@oberhumer.com>
http://www.oberhumer.com/opensource/lzo/
The LZO library is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of
the License, or (at your option) any later version.
The LZO library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the LZO library; see the file COPYING.
If not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include "libbb.h"
#include "liblzo.h"
#define D_BITS 14
#define D_INDEX1(d,p) d = DM(DMUL(0x21,DX3(p,5,5,6)) >> 5)
#define D_INDEX2(d,p) d = (d & (D_MASK & 0x7ff)) ^ (D_HIGH | 0x1f)
#define DO_COMPRESS lzo1x_1_compress
#include "lzo1x_c.c"

View File

@@ -0,0 +1,35 @@
/* LZO1X-1(15) compression
This file is part of the LZO real-time data compression library.
Copyright (C) 1996..2008 Markus Franz Xaver Johannes Oberhumer
All Rights Reserved.
Markus F.X.J. Oberhumer <markus@oberhumer.com>
http://www.oberhumer.com/opensource/lzo/
The LZO library is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of
the License, or (at your option) any later version.
The LZO library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the LZO library; see the file COPYING.
If not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include "libbb.h"
#include "liblzo.h"
#define D_BITS 15
#define D_INDEX1(d,p) d = DM(DMUL(0x21,DX3(p,5,5,6)) >> 5)
#define D_INDEX2(d,p) d = (d & (D_MASK & 0x7ff)) ^ (D_HIGH | 0x1f)
#define DO_COMPRESS lzo1x_1_15_compress
#include "lzo1x_c.c"

View File

@@ -0,0 +1,921 @@
/* lzo1x_9x.c -- implementation of the LZO1X-999 compression algorithm
This file is part of the LZO real-time data compression library.
Copyright (C) 2008 Markus Franz Xaver Johannes Oberhumer
Copyright (C) 2007 Markus Franz Xaver Johannes Oberhumer
Copyright (C) 2006 Markus Franz Xaver Johannes Oberhumer
Copyright (C) 2005 Markus Franz Xaver Johannes Oberhumer
Copyright (C) 2004 Markus Franz Xaver Johannes Oberhumer
Copyright (C) 2003 Markus Franz Xaver Johannes Oberhumer
Copyright (C) 2002 Markus Franz Xaver Johannes Oberhumer
Copyright (C) 2001 Markus Franz Xaver Johannes Oberhumer
Copyright (C) 2000 Markus Franz Xaver Johannes Oberhumer
Copyright (C) 1999 Markus Franz Xaver Johannes Oberhumer
Copyright (C) 1998 Markus Franz Xaver Johannes Oberhumer
Copyright (C) 1997 Markus Franz Xaver Johannes Oberhumer
Copyright (C) 1996 Markus Franz Xaver Johannes Oberhumer
All Rights Reserved.
The LZO library is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of
the License, or (at your option) any later version.
The LZO library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the LZO library; see the file COPYING.
If not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
Markus F.X.J. Oberhumer
<markus@oberhumer.com>
http://www.oberhumer.com/opensource/lzo/
*/
#include "libbb.h"
/* The following is probably only safe on Intel-compatible processors ... */
#define LZO_UNALIGNED_OK_2
#define LZO_UNALIGNED_OK_4
#include "liblzo.h"
#define LZO_MAX(a,b) ((a) >= (b) ? (a) : (b))
#define LZO_MIN(a,b) ((a) <= (b) ? (a) : (b))
#define LZO_MAX3(a,b,c) ((a) >= (b) ? LZO_MAX(a,c) : LZO_MAX(b,c))
/***********************************************************************
//
************************************************************************/
#define SWD_N M4_MAX_OFFSET /* size of ring buffer */
#define SWD_F 2048 /* upper limit for match length */
#define SWD_BEST_OFF (LZO_MAX3(M2_MAX_LEN, M3_MAX_LEN, M4_MAX_LEN) + 1)
typedef struct {
int init;
unsigned look; /* bytes in lookahead buffer */
unsigned m_len;
unsigned m_off;
const uint8_t *bp;
const uint8_t *ip;
const uint8_t *in;
const uint8_t *in_end;
uint8_t *out;
unsigned r1_lit;
} lzo1x_999_t;
#define getbyte(c) ((c).ip < (c).in_end ? *((c).ip)++ : (-1))
/* lzo_swd.c -- sliding window dictionary */
/***********************************************************************
//
************************************************************************/
#define SWD_UINT_MAX USHRT_MAX
#ifndef SWD_HSIZE
# define SWD_HSIZE 16384
#endif
#ifndef SWD_MAX_CHAIN
# define SWD_MAX_CHAIN 2048
#endif
#define HEAD3(b, p) \
( ((0x9f5f * ((((b[p]<<5)^b[p+1])<<5) ^ b[p+2])) >> 5) & (SWD_HSIZE-1) )
#if defined(LZO_UNALIGNED_OK_2)
# define HEAD2(b,p) (* (uint16_t *) &(b[p]))
#else
# define HEAD2(b,p) (b[p] ^ ((unsigned)b[p+1]<<8))
#endif
#define NIL2 SWD_UINT_MAX
typedef struct lzo_swd {
/* public - "built-in" */
/* public - configuration */
unsigned max_chain;
int use_best_off;
/* public - output */
unsigned m_len;
unsigned m_off;
unsigned look;
int b_char;
#if defined(SWD_BEST_OFF)
unsigned best_off[SWD_BEST_OFF];
#endif
/* semi public */
lzo1x_999_t *c;
unsigned m_pos;
#if defined(SWD_BEST_OFF)
unsigned best_pos[SWD_BEST_OFF];
#endif
/* private */
unsigned ip; /* input pointer (lookahead) */
unsigned bp; /* buffer pointer */
unsigned rp; /* remove pointer */
unsigned node_count;
unsigned first_rp;
uint8_t b[SWD_N + SWD_F];
uint8_t b_wrap[SWD_F]; /* must follow b */
uint16_t head3[SWD_HSIZE];
uint16_t succ3[SWD_N + SWD_F];
uint16_t best3[SWD_N + SWD_F];
uint16_t llen3[SWD_HSIZE];
#ifdef HEAD2
uint16_t head2[65536L];
#endif
} lzo_swd_t, *lzo_swd_p;
#define SIZEOF_LZO_SWD_T (sizeof(lzo_swd_t))
/* Access macro for head3.
* head3[key] may be uninitialized, but then its value will never be used.
*/
#define s_get_head3(s,key) s->head3[key]
/***********************************************************************
//
************************************************************************/
#define B_SIZE (SWD_N + SWD_F)
static int swd_init(lzo_swd_p s)
{
/* defaults */
s->node_count = SWD_N;
memset(s->llen3, 0, sizeof(s->llen3[0]) * (unsigned)SWD_HSIZE);
#ifdef HEAD2
memset(s->head2, 0xff, sizeof(s->head2[0]) * 65536L);
assert(s->head2[0] == NIL2);
#endif
s->ip = 0;
s->bp = s->ip;
s->first_rp = s->ip;
assert(s->ip + SWD_F <= B_SIZE);
s->look = (unsigned) (s->c->in_end - s->c->ip);
if (s->look > 0) {
if (s->look > SWD_F)
s->look = SWD_F;
memcpy(&s->b[s->ip], s->c->ip, s->look);
s->c->ip += s->look;
s->ip += s->look;
}
if (s->ip == B_SIZE)
s->ip = 0;
s->rp = s->first_rp;
if (s->rp >= s->node_count)
s->rp -= s->node_count;
else
s->rp += B_SIZE - s->node_count;
return LZO_E_OK;
}
#define swd_pos2off(s,pos) \
(s->bp > (pos) ? s->bp - (pos) : B_SIZE - ((pos) - s->bp))
/***********************************************************************
//
************************************************************************/
static void swd_getbyte(lzo_swd_p s)
{
int c;
if ((c = getbyte(*(s->c))) < 0) {
if (s->look > 0)
--s->look;
} else {
s->b[s->ip] = c;
if (s->ip < SWD_F)
s->b_wrap[s->ip] = c;
}
if (++s->ip == B_SIZE)
s->ip = 0;
if (++s->bp == B_SIZE)
s->bp = 0;
if (++s->rp == B_SIZE)
s->rp = 0;
}
/***********************************************************************
// remove node from lists
************************************************************************/
static void swd_remove_node(lzo_swd_p s, unsigned node)
{
if (s->node_count == 0) {
unsigned key;
key = HEAD3(s->b,node);
assert(s->llen3[key] > 0);
--s->llen3[key];
#ifdef HEAD2
key = HEAD2(s->b,node);
assert(s->head2[key] != NIL2);
if ((unsigned) s->head2[key] == node)
s->head2[key] = NIL2;
#endif
} else
--s->node_count;
}
/***********************************************************************
//
************************************************************************/
static void swd_accept(lzo_swd_p s, unsigned n)
{
assert(n <= s->look);
while (n--) {
unsigned key;
swd_remove_node(s,s->rp);
/* add bp into HEAD3 */
key = HEAD3(s->b, s->bp);
s->succ3[s->bp] = s_get_head3(s, key);
s->head3[key] = s->bp;
s->best3[s->bp] = SWD_F + 1;
s->llen3[key]++;
assert(s->llen3[key] <= SWD_N);
#ifdef HEAD2
/* add bp into HEAD2 */
key = HEAD2(s->b, s->bp);
s->head2[key] = s->bp;
#endif
swd_getbyte(s);
}
}
/***********************************************************************
//
************************************************************************/
static void swd_search(lzo_swd_p s, unsigned node, unsigned cnt)
{
const uint8_t *p1;
const uint8_t *p2;
const uint8_t *px;
unsigned m_len = s->m_len;
const uint8_t *b = s->b;
const uint8_t *bp = s->b + s->bp;
const uint8_t *bx = s->b + s->bp + s->look;
unsigned char scan_end1;
assert(s->m_len > 0);
scan_end1 = bp[m_len - 1];
for ( ; cnt-- > 0; node = s->succ3[node]) {
p1 = bp;
p2 = b + node;
px = bx;
assert(m_len < s->look);
if (p2[m_len - 1] == scan_end1
&& p2[m_len] == p1[m_len]
&& p2[0] == p1[0]
&& p2[1] == p1[1]
) {
unsigned i;
assert(lzo_memcmp(bp, &b[node], 3) == 0);
p1 += 2; p2 += 2;
do {} while (++p1 < px && *p1 == *++p2);
i = p1-bp;
assert(lzo_memcmp(bp, &b[node], i) == 0);
#if defined(SWD_BEST_OFF)
if (i < SWD_BEST_OFF) {
if (s->best_pos[i] == 0)
s->best_pos[i] = node + 1;
}
#endif
if (i > m_len) {
s->m_len = m_len = i;
s->m_pos = node;
if (m_len == s->look)
return;
if (m_len >= SWD_F)
return;
if (m_len > (unsigned) s->best3[node])
return;
scan_end1 = bp[m_len - 1];
}
}
}
}
/***********************************************************************
//
************************************************************************/
#ifdef HEAD2
static int swd_search2(lzo_swd_p s)
{
unsigned key;
assert(s->look >= 2);
assert(s->m_len > 0);
key = s->head2[HEAD2(s->b, s->bp)];
if (key == NIL2)
return 0;
assert(lzo_memcmp(&s->b[s->bp], &s->b[key], 2) == 0);
#if defined(SWD_BEST_OFF)
if (s->best_pos[2] == 0)
s->best_pos[2] = key + 1;
#endif
if (s->m_len < 2) {
s->m_len = 2;
s->m_pos = key;
}
return 1;
}
#endif
/***********************************************************************
//
************************************************************************/
static void swd_findbest(lzo_swd_p s)
{
unsigned key;
unsigned cnt, node;
unsigned len;
assert(s->m_len > 0);
/* get current head, add bp into HEAD3 */
key = HEAD3(s->b,s->bp);
node = s->succ3[s->bp] = s_get_head3(s, key);
cnt = s->llen3[key]++;
assert(s->llen3[key] <= SWD_N + SWD_F);
if (cnt > s->max_chain)
cnt = s->max_chain;
s->head3[key] = s->bp;
s->b_char = s->b[s->bp];
len = s->m_len;
if (s->m_len >= s->look) {
if (s->look == 0)
s->b_char = -1;
s->m_off = 0;
s->best3[s->bp] = SWD_F + 1;
} else {
#ifdef HEAD2
if (swd_search2(s))
#endif
if (s->look >= 3)
swd_search(s, node, cnt);
if (s->m_len > len)
s->m_off = swd_pos2off(s,s->m_pos);
s->best3[s->bp] = s->m_len;
#if defined(SWD_BEST_OFF)
if (s->use_best_off) {
int i;
for (i = 2; i < SWD_BEST_OFF; i++) {
if (s->best_pos[i] > 0)
s->best_off[i] = swd_pos2off(s, s->best_pos[i]-1);
else
s->best_off[i] = 0;
}
}
#endif
}
swd_remove_node(s,s->rp);
#ifdef HEAD2
/* add bp into HEAD2 */
key = HEAD2(s->b, s->bp);
s->head2[key] = s->bp;
#endif
}
#undef HEAD3
#undef HEAD2
#undef s_get_head3
/***********************************************************************
//
************************************************************************/
static int init_match(lzo1x_999_t *c, lzo_swd_p s, uint32_t use_best_off)
{
int r;
assert(!c->init);
c->init = 1;
s->c = c;
r = swd_init(s);
if (r != 0)
return r;
s->use_best_off = use_best_off;
return r;
}
/***********************************************************************
//
************************************************************************/
static int find_match(lzo1x_999_t *c, lzo_swd_p s,
unsigned this_len, unsigned skip)
{
assert(c->init);
if (skip > 0) {
assert(this_len >= skip);
swd_accept(s, this_len - skip);
} else {
assert(this_len <= 1);
}
s->m_len = 1;
s->m_len = 1;
#ifdef SWD_BEST_OFF
if (s->use_best_off)
memset(s->best_pos, 0, sizeof(s->best_pos));
#endif
swd_findbest(s);
c->m_len = s->m_len;
c->m_off = s->m_off;
swd_getbyte(s);
if (s->b_char < 0) {
c->look = 0;
c->m_len = 0;
} else {
c->look = s->look + 1;
}
c->bp = c->ip - c->look;
return LZO_E_OK;
}
/* this is a public functions, but there is no prototype in a header file */
static int lzo1x_999_compress_internal(const uint8_t *in , unsigned in_len,
uint8_t *out, unsigned *out_len,
void *wrkmem,
unsigned good_length,
unsigned max_lazy,
unsigned max_chain,
uint32_t use_best_off);
/***********************************************************************
//
************************************************************************/
static uint8_t *code_match(lzo1x_999_t *c,
uint8_t *op, unsigned m_len, unsigned m_off)
{
assert(op > c->out);
if (m_len == 2) {
assert(m_off <= M1_MAX_OFFSET);
assert(c->r1_lit > 0);
assert(c->r1_lit < 4);
m_off -= 1;
*op++ = M1_MARKER | ((m_off & 3) << 2);
*op++ = m_off >> 2;
} else if (m_len <= M2_MAX_LEN && m_off <= M2_MAX_OFFSET) {
assert(m_len >= 3);
m_off -= 1;
*op++ = ((m_len - 1) << 5) | ((m_off & 7) << 2);
*op++ = m_off >> 3;
assert(op[-2] >= M2_MARKER);
} else if (m_len == M2_MIN_LEN && m_off <= MX_MAX_OFFSET && c->r1_lit >= 4) {
assert(m_len == 3);
assert(m_off > M2_MAX_OFFSET);
m_off -= 1 + M2_MAX_OFFSET;
*op++ = M1_MARKER | ((m_off & 3) << 2);
*op++ = m_off >> 2;
} else if (m_off <= M3_MAX_OFFSET) {
assert(m_len >= 3);
m_off -= 1;
if (m_len <= M3_MAX_LEN)
*op++ = M3_MARKER | (m_len - 2);
else {
m_len -= M3_MAX_LEN;
*op++ = M3_MARKER | 0;
while (m_len > 255) {
m_len -= 255;
*op++ = 0;
}
assert(m_len > 0);
*op++ = m_len;
}
*op++ = m_off << 2;
*op++ = m_off >> 6;
} else {
unsigned k;
assert(m_len >= 3);
assert(m_off > 0x4000);
assert(m_off <= 0xbfff);
m_off -= 0x4000;
k = (m_off & 0x4000) >> 11;
if (m_len <= M4_MAX_LEN)
*op++ = M4_MARKER | k | (m_len - 2);
else {
m_len -= M4_MAX_LEN;
*op++ = M4_MARKER | k | 0;
while (m_len > 255) {
m_len -= 255;
*op++ = 0;
}
assert(m_len > 0);
*op++ = m_len;
}
*op++ = m_off << 2;
*op++ = m_off >> 6;
}
return op;
}
static uint8_t *STORE_RUN(lzo1x_999_t *c, uint8_t *op,
const uint8_t *ii, unsigned t)
{
if (op == c->out && t <= 238) {
*op++ = 17 + t;
} else if (t <= 3) {
op[-2] |= t;
} else if (t <= 18) {
*op++ = t - 3;
} else {
unsigned tt = t - 18;
*op++ = 0;
while (tt > 255) {
tt -= 255;
*op++ = 0;
}
assert(tt > 0);
*op++ = tt;
}
do *op++ = *ii++; while (--t > 0);
return op;
}
static uint8_t *code_run(lzo1x_999_t *c, uint8_t *op, const uint8_t *ii,
unsigned lit)
{
if (lit > 0) {
assert(m_len >= 2);
op = STORE_RUN(c, op, ii, lit);
} else {
assert(m_len >= 3);
}
c->r1_lit = lit;
return op;
}
/***********************************************************************
//
************************************************************************/
static int len_of_coded_match(unsigned m_len, unsigned m_off, unsigned lit)
{
int n = 4;
if (m_len < 2)
return -1;
if (m_len == 2)
return (m_off <= M1_MAX_OFFSET && lit > 0 && lit < 4) ? 2 : -1;
if (m_len <= M2_MAX_LEN && m_off <= M2_MAX_OFFSET)
return 2;
if (m_len == M2_MIN_LEN && m_off <= MX_MAX_OFFSET && lit >= 4)
return 2;
if (m_off <= M3_MAX_OFFSET) {
if (m_len <= M3_MAX_LEN)
return 3;
m_len -= M3_MAX_LEN;
} else if (m_off <= M4_MAX_OFFSET) {
if (m_len <= M4_MAX_LEN)
return 3;
m_len -= M4_MAX_LEN;
} else
return -1;
while (m_len > 255) {
m_len -= 255;
n++;
}
return n;
}
static int min_gain(unsigned ahead, unsigned lit1,
unsigned lit2, int l1, int l2, int l3)
{
int lazy_match_min_gain = 0;
assert (ahead >= 1);
lazy_match_min_gain += ahead;
if (lit1 <= 3)
lazy_match_min_gain += (lit2 <= 3) ? 0 : 2;
else if (lit1 <= 18)
lazy_match_min_gain += (lit2 <= 18) ? 0 : 1;
lazy_match_min_gain += (l2 - l1) * 2;
if (l3 > 0)
lazy_match_min_gain -= (ahead - l3) * 2;
if (lazy_match_min_gain < 0)
lazy_match_min_gain = 0;
return lazy_match_min_gain;
}
/***********************************************************************
//
************************************************************************/
#if defined(SWD_BEST_OFF)
static void better_match(const lzo_swd_p swd,
unsigned *m_len, unsigned *m_off)
{
if (*m_len <= M2_MIN_LEN)
return;
if (*m_off <= M2_MAX_OFFSET)
return;
/* M3/M4 -> M2 */
if (*m_off > M2_MAX_OFFSET
&& *m_len >= M2_MIN_LEN + 1 && *m_len <= M2_MAX_LEN + 1
&& swd->best_off[*m_len-1] && swd->best_off[*m_len-1] <= M2_MAX_OFFSET
) {
*m_len = *m_len - 1;
*m_off = swd->best_off[*m_len];
return;
}
/* M4 -> M2 */
if (*m_off > M3_MAX_OFFSET
&& *m_len >= M4_MAX_LEN + 1 && *m_len <= M2_MAX_LEN + 2
&& swd->best_off[*m_len-2] && swd->best_off[*m_len-2] <= M2_MAX_OFFSET
) {
*m_len = *m_len - 2;
*m_off = swd->best_off[*m_len];
return;
}
/* M4 -> M3 */
if (*m_off > M3_MAX_OFFSET
&& *m_len >= M4_MAX_LEN + 1 && *m_len <= M3_MAX_LEN + 1
&& swd->best_off[*m_len-1] && swd->best_off[*m_len-1] <= M3_MAX_OFFSET
) {
*m_len = *m_len - 1;
*m_off = swd->best_off[*m_len];
}
}
#endif
/***********************************************************************
//
************************************************************************/
static int lzo1x_999_compress_internal(const uint8_t *in, unsigned in_len,
uint8_t *out, unsigned *out_len,
void *wrkmem,
unsigned good_length,
unsigned max_lazy,
unsigned max_chain,
uint32_t use_best_off)
{
uint8_t *op;
const uint8_t *ii;
unsigned lit;
unsigned m_len, m_off;
lzo1x_999_t cc;
lzo1x_999_t *const c = &cc;
const lzo_swd_p swd = (lzo_swd_p) wrkmem;
int r;
c->init = 0;
c->ip = c->in = in;
c->in_end = in + in_len;
c->out = out;
op = out;
ii = c->ip; /* point to start of literal run */
lit = 0;
c->r1_lit = 0;
r = init_match(c, swd, use_best_off);
if (r != 0)
return r;
swd->max_chain = max_chain;
r = find_match(c, swd, 0, 0);
if (r != 0)
return r;
while (c->look > 0) {
unsigned ahead;
unsigned max_ahead;
int l1, l2, l3;
m_len = c->m_len;
m_off = c->m_off;
assert(c->bp == c->ip - c->look);
assert(c->bp >= in);
if (lit == 0)
ii = c->bp;
assert(ii + lit == c->bp);
assert(swd->b_char == *(c->bp));
if (m_len < 2
|| (m_len == 2 && (m_off > M1_MAX_OFFSET || lit == 0 || lit >= 4))
/* Do not accept this match for compressed-data compatibility
* with LZO v1.01 and before
* [ might be a problem for decompress() and optimize() ]
*/
|| (m_len == 2 && op == out)
|| (op == out && lit == 0)
) {
/* a literal */
m_len = 0;
}
else if (m_len == M2_MIN_LEN) {
/* compression ratio improves if we code a literal in some cases */
if (m_off > MX_MAX_OFFSET && lit >= 4)
m_len = 0;
}
if (m_len == 0) {
/* a literal */
lit++;
swd->max_chain = max_chain;
r = find_match(c, swd, 1, 0);
assert(r == 0);
continue;
}
/* a match */
#if defined(SWD_BEST_OFF)
if (swd->use_best_off)
better_match(swd, &m_len, &m_off);
#endif
/* shall we try a lazy match ? */
ahead = 0;
if (m_len >= max_lazy) {
/* no */
l1 = 0;
max_ahead = 0;
} else {
/* yes, try a lazy match */
l1 = len_of_coded_match(m_len, m_off, lit);
assert(l1 > 0);
max_ahead = LZO_MIN(2, (unsigned)l1 - 1);
}
while (ahead < max_ahead && c->look > m_len) {
int lazy_match_min_gain;
if (m_len >= good_length)
swd->max_chain = max_chain >> 2;
else
swd->max_chain = max_chain;
r = find_match(c, swd, 1, 0);
ahead++;
assert(r == 0);
assert(c->look > 0);
assert(ii + lit + ahead == c->bp);
if (c->m_len < m_len)
continue;
if (c->m_len == m_len && c->m_off >= m_off)
continue;
#if defined(SWD_BEST_OFF)
if (swd->use_best_off)
better_match(swd, &c->m_len, &c->m_off);
#endif
l2 = len_of_coded_match(c->m_len, c->m_off, lit+ahead);
if (l2 < 0)
continue;
/* compressed-data compatibility [see above] */
l3 = (op == out) ? -1 : len_of_coded_match(ahead, m_off, lit);
lazy_match_min_gain = min_gain(ahead, lit, lit+ahead, l1, l2, l3);
if (c->m_len >= m_len + lazy_match_min_gain) {
if (l3 > 0) {
/* code previous run */
op = code_run(c, op, ii, lit);
lit = 0;
/* code shortened match */
op = code_match(c, op, ahead, m_off);
} else {
lit += ahead;
assert(ii + lit == c->bp);
}
goto lazy_match_done;
}
}
assert(ii + lit + ahead == c->bp);
/* 1 - code run */
op = code_run(c, op, ii, lit);
lit = 0;
/* 2 - code match */
op = code_match(c, op, m_len, m_off);
swd->max_chain = max_chain;
r = find_match(c, swd, m_len, 1+ahead);
assert(r == 0);
lazy_match_done: ;
}
/* store final run */
if (lit > 0)
op = STORE_RUN(c, op, ii, lit);
#if defined(LZO_EOF_CODE)
*op++ = M4_MARKER | 1;
*op++ = 0;
*op++ = 0;
#endif
*out_len = op - out;
return LZO_E_OK;
}
/***********************************************************************
//
************************************************************************/
int lzo1x_999_compress_level(const uint8_t *in, unsigned in_len,
uint8_t *out, unsigned *out_len,
void *wrkmem,
int compression_level)
{
static const struct {
uint16_t good_length;
uint16_t max_lazy;
uint16_t max_chain;
uint16_t use_best_off;
} c[3] = {
{ 8, 32, 256, 0 },
{ 32, 128, 2048, 1 },
{ SWD_F, SWD_F, 4096, 1 } /* max. compression */
};
if (compression_level < 7 || compression_level > 9)
return LZO_E_ERROR;
compression_level -= 7;
return lzo1x_999_compress_internal(in, in_len, out, out_len, wrkmem,
c[compression_level].good_length,
c[compression_level].max_lazy,
c[compression_level].max_chain,
c[compression_level].use_best_off);
}

View File

@@ -0,0 +1,296 @@
/* implementation of the LZO1[XY]-1 compression algorithm
This file is part of the LZO real-time data compression library.
Copyright (C) 1996..2008 Markus Franz Xaver Johannes Oberhumer
All Rights Reserved.
Markus F.X.J. Oberhumer <markus@oberhumer.com>
http://www.oberhumer.com/opensource/lzo/
The LZO library is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of
the License, or (at your option) any later version.
The LZO library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the LZO library; see the file COPYING.
If not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
/***********************************************************************
// compress a block of data.
************************************************************************/
static NOINLINE unsigned
do_compress(const uint8_t* in, unsigned in_len,
uint8_t* out, unsigned* out_len,
void* wrkmem)
{
register const uint8_t* ip;
uint8_t* op;
const uint8_t* const in_end = in + in_len;
const uint8_t* const ip_end = in + in_len - M2_MAX_LEN - 5;
const uint8_t* ii;
const void* *const dict = (const void**) wrkmem;
op = out;
ip = in;
ii = ip;
ip += 4;
for (;;) {
register const uint8_t* m_pos;
unsigned m_off;
unsigned m_len;
unsigned dindex;
D_INDEX1(dindex,ip);
GINDEX(m_pos,m_off,dict,dindex,in);
if (LZO_CHECK_MPOS_NON_DET(m_pos,m_off,in,ip,M4_MAX_OFFSET))
goto literal;
#if 1
if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3])
goto try_match;
D_INDEX2(dindex,ip);
#endif
GINDEX(m_pos,m_off,dict,dindex,in);
if (LZO_CHECK_MPOS_NON_DET(m_pos,m_off,in,ip,M4_MAX_OFFSET))
goto literal;
if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3])
goto try_match;
goto literal;
try_match:
#if 1 && defined(LZO_UNALIGNED_OK_2)
if (* (const lzo_ushortp) m_pos != * (const lzo_ushortp) ip)
#else
if (m_pos[0] != ip[0] || m_pos[1] != ip[1])
#endif
{
} else {
if (m_pos[2] == ip[2]) {
#if 0
if (m_off <= M2_MAX_OFFSET)
goto match;
if (lit <= 3)
goto match;
if (lit == 3) { /* better compression, but slower */
assert(op - 2 > out); op[-2] |= (uint8_t)(3);
*op++ = *ii++; *op++ = *ii++; *op++ = *ii++;
goto code_match;
}
if (m_pos[3] == ip[3])
#endif
goto match;
}
else {
/* still need a better way for finding M1 matches */
#if 0
/* a M1 match */
#if 0
if (m_off <= M1_MAX_OFFSET && lit > 0 && lit <= 3)
#else
if (m_off <= M1_MAX_OFFSET && lit == 3)
#endif
{
register unsigned t;
t = lit;
assert(op - 2 > out); op[-2] |= (uint8_t)(t);
do *op++ = *ii++; while (--t > 0);
assert(ii == ip);
m_off -= 1;
*op++ = (uint8_t)(M1_MARKER | ((m_off & 3) << 2));
*op++ = (uint8_t)(m_off >> 2);
ip += 2;
goto match_done;
}
#endif
}
}
/* a literal */
literal:
UPDATE_I(dict, 0, dindex, ip, in);
++ip;
if (ip >= ip_end)
break;
continue;
/* a match */
match:
UPDATE_I(dict, 0, dindex, ip, in);
/* store current literal run */
if (pd(ip, ii) > 0) {
register unsigned t = pd(ip, ii);
if (t <= 3) {
assert(op - 2 > out);
op[-2] |= (uint8_t)(t);
}
else if (t <= 18)
*op++ = (uint8_t)(t - 3);
else {
register unsigned tt = t - 18;
*op++ = 0;
while (tt > 255) {
tt -= 255;
*op++ = 0;
}
assert(tt > 0);
*op++ = (uint8_t)(tt);
}
do *op++ = *ii++; while (--t > 0);
}
/* code the match */
assert(ii == ip);
ip += 3;
if (m_pos[3] != *ip++ || m_pos[4] != *ip++ || m_pos[5] != *ip++
|| m_pos[6] != *ip++ || m_pos[7] != *ip++ || m_pos[8] != *ip++
#ifdef LZO1Y
|| m_pos[ 9] != *ip++ || m_pos[10] != *ip++ || m_pos[11] != *ip++
|| m_pos[12] != *ip++ || m_pos[13] != *ip++ || m_pos[14] != *ip++
#endif
) {
--ip;
m_len = pd(ip, ii);
assert(m_len >= 3);
assert(m_len <= M2_MAX_LEN);
if (m_off <= M2_MAX_OFFSET) {
m_off -= 1;
#if defined(LZO1X)
*op++ = (uint8_t)(((m_len - 1) << 5) | ((m_off & 7) << 2));
*op++ = (uint8_t)(m_off >> 3);
#elif defined(LZO1Y)
*op++ = (uint8_t)(((m_len + 1) << 4) | ((m_off & 3) << 2));
*op++ = (uint8_t)(m_off >> 2);
#endif
}
else if (m_off <= M3_MAX_OFFSET) {
m_off -= 1;
*op++ = (uint8_t)(M3_MARKER | (m_len - 2));
goto m3_m4_offset;
} else {
#if defined(LZO1X)
m_off -= 0x4000;
assert(m_off > 0);
assert(m_off <= 0x7fff);
*op++ = (uint8_t)(M4_MARKER | ((m_off & 0x4000) >> 11) | (m_len - 2));
goto m3_m4_offset;
#elif defined(LZO1Y)
goto m4_match;
#endif
}
}
else {
{
const uint8_t* end = in_end;
const uint8_t* m = m_pos + M2_MAX_LEN + 1;
while (ip < end && *m == *ip)
m++, ip++;
m_len = pd(ip, ii);
}
assert(m_len > M2_MAX_LEN);
if (m_off <= M3_MAX_OFFSET) {
m_off -= 1;
if (m_len <= 33)
*op++ = (uint8_t)(M3_MARKER | (m_len - 2));
else {
m_len -= 33;
*op++ = M3_MARKER | 0;
goto m3_m4_len;
}
} else {
#if defined(LZO1Y)
m4_match:
#endif
m_off -= 0x4000;
assert(m_off > 0);
assert(m_off <= 0x7fff);
if (m_len <= M4_MAX_LEN)
*op++ = (uint8_t)(M4_MARKER | ((m_off & 0x4000) >> 11) | (m_len - 2));
else {
m_len -= M4_MAX_LEN;
*op++ = (uint8_t)(M4_MARKER | ((m_off & 0x4000) >> 11));
m3_m4_len:
while (m_len > 255) {
m_len -= 255;
*op++ = 0;
}
assert(m_len > 0);
*op++ = (uint8_t)(m_len);
}
}
m3_m4_offset:
*op++ = (uint8_t)((m_off & 63) << 2);
*op++ = (uint8_t)(m_off >> 6);
}
#if 0
match_done:
#endif
ii = ip;
if (ip >= ip_end)
break;
}
*out_len = pd(op, out);
return pd(in_end, ii);
}
/***********************************************************************
// public entry point
************************************************************************/
int DO_COMPRESS(const uint8_t* in, unsigned in_len,
uint8_t* out, unsigned* out_len,
void* wrkmem)
{
uint8_t* op = out;
unsigned t;
if (in_len <= M2_MAX_LEN + 5)
t = in_len;
else {
t = do_compress(in,in_len,op,out_len,wrkmem);
op += *out_len;
}
if (t > 0) {
const uint8_t* ii = in + in_len - t;
if (op == out && t <= 238)
*op++ = (uint8_t)(17 + t);
else if (t <= 3)
op[-2] |= (uint8_t)(t);
else if (t <= 18)
*op++ = (uint8_t)(t - 3);
else {
unsigned tt = t - 18;
*op++ = 0;
while (tt > 255) {
tt -= 255;
*op++ = 0;
}
assert(tt > 0);
*op++ = (uint8_t)(tt);
}
do *op++ = *ii++; while (--t > 0);
}
*op++ = M4_MARKER | 1;
*op++ = 0;
*op++ = 0;
*out_len = pd(op, out);
return 0; /*LZO_E_OK*/
}

View File

@@ -0,0 +1,420 @@
/* implementation of the LZO1X decompression algorithm
This file is part of the LZO real-time data compression library.
Copyright (C) 1996..2008 Markus Franz Xaver Johannes Oberhumer
All Rights Reserved.
Markus F.X.J. Oberhumer <markus@oberhumer.com>
http://www.oberhumer.com/opensource/lzo/
The LZO library is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of
the License, or (at your option) any later version.
The LZO library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the LZO library; see the file COPYING.
If not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include "libbb.h"
#include "liblzo.h"
/***********************************************************************
// decompress a block of data.
************************************************************************/
/* safe decompression with overrun testing */
int lzo1x_decompress_safe(const uint8_t* in, unsigned in_len,
uint8_t* out, unsigned* out_len,
void* wrkmem UNUSED_PARAM)
{
register uint8_t* op;
register const uint8_t* ip;
register unsigned t;
#if defined(COPY_DICT)
unsigned m_off;
const uint8_t* dict_end;
#else
register const uint8_t* m_pos = NULL; /* possibly not needed */
#endif
const uint8_t* const ip_end = in + in_len;
#if defined(HAVE_ANY_OP)
uint8_t* const op_end = out + *out_len;
#endif
#if defined(LZO1Z)
unsigned last_m_off = 0;
#endif
// LZO_UNUSED(wrkmem);
#if defined(COPY_DICT)
if (dict) {
if (dict_len > M4_MAX_OFFSET) {
dict += dict_len - M4_MAX_OFFSET;
dict_len = M4_MAX_OFFSET;
}
dict_end = dict + dict_len;
} else {
dict_len = 0;
dict_end = NULL;
}
#endif /* COPY_DICT */
*out_len = 0;
op = out;
ip = in;
if (*ip > 17) {
t = *ip++ - 17;
if (t < 4)
goto match_next;
assert(t > 0); NEED_OP(t); NEED_IP(t+1);
do *op++ = *ip++; while (--t > 0);
goto first_literal_run;
}
while (TEST_IP && TEST_OP) {
t = *ip++;
if (t >= 16)
goto match;
/* a literal run */
if (t == 0) {
NEED_IP(1);
while (*ip == 0) {
t += 255;
ip++;
NEED_IP(1);
}
t += 15 + *ip++;
}
/* copy literals */
assert(t > 0);
NEED_OP(t+3);
NEED_IP(t+4);
#if defined(LZO_UNALIGNED_OK_4) || defined(LZO_ALIGNED_OK_4)
# if !defined(LZO_UNALIGNED_OK_4)
if (PTR_ALIGNED2_4(op, ip))
# endif
{
COPY4(op, ip);
op += 4;
ip += 4;
if (--t > 0) {
if (t >= 4) {
do {
COPY4(op, ip);
op += 4;
ip += 4;
t -= 4;
} while (t >= 4);
if (t > 0)
do *op++ = *ip++; while (--t > 0);
} else {
do *op++ = *ip++; while (--t > 0);
}
}
}
# if !defined(LZO_UNALIGNED_OK_4)
else
# endif
#endif
#if !defined(LZO_UNALIGNED_OK_4)
{
*op++ = *ip++;
*op++ = *ip++;
*op++ = *ip++;
do *op++ = *ip++; while (--t > 0);
}
#endif
first_literal_run:
t = *ip++;
if (t >= 16)
goto match;
#if defined(COPY_DICT)
#if defined(LZO1Z)
m_off = (1 + M2_MAX_OFFSET) + (t << 6) + (*ip++ >> 2);
last_m_off = m_off;
#else
m_off = (1 + M2_MAX_OFFSET) + (t >> 2) + (*ip++ << 2);
#endif
NEED_OP(3);
t = 3; COPY_DICT(t,m_off)
#else /* !COPY_DICT */
#if defined(LZO1Z)
t = (1 + M2_MAX_OFFSET) + (t << 6) + (*ip++ >> 2);
m_pos = op - t;
last_m_off = t;
#else
m_pos = op - (1 + M2_MAX_OFFSET);
m_pos -= t >> 2;
m_pos -= *ip++ << 2;
#endif
TEST_LB(m_pos); NEED_OP(3);
*op++ = *m_pos++;
*op++ = *m_pos++;
*op++ = *m_pos;
#endif /* COPY_DICT */
goto match_done;
/* handle matches */
do {
match:
if (t >= 64) { /* a M2 match */
#if defined(COPY_DICT)
#if defined(LZO1X)
m_off = 1 + ((t >> 2) & 7) + (*ip++ << 3);
t = (t >> 5) - 1;
#elif defined(LZO1Y)
m_off = 1 + ((t >> 2) & 3) + (*ip++ << 2);
t = (t >> 4) - 3;
#elif defined(LZO1Z)
m_off = t & 0x1f;
if (m_off >= 0x1c)
m_off = last_m_off;
else {
m_off = 1 + (m_off << 6) + (*ip++ >> 2);
last_m_off = m_off;
}
t = (t >> 5) - 1;
#endif
#else /* !COPY_DICT */
#if defined(LZO1X)
m_pos = op - 1;
m_pos -= (t >> 2) & 7;
m_pos -= *ip++ << 3;
t = (t >> 5) - 1;
#elif defined(LZO1Y)
m_pos = op - 1;
m_pos -= (t >> 2) & 3;
m_pos -= *ip++ << 2;
t = (t >> 4) - 3;
#elif defined(LZO1Z)
{
unsigned off = t & 0x1f;
m_pos = op;
if (off >= 0x1c) {
assert(last_m_off > 0);
m_pos -= last_m_off;
} else {
off = 1 + (off << 6) + (*ip++ >> 2);
m_pos -= off;
last_m_off = off;
}
}
t = (t >> 5) - 1;
#endif
TEST_LB(m_pos); assert(t > 0); NEED_OP(t+3-1);
goto copy_match;
#endif /* COPY_DICT */
}
else if (t >= 32) { /* a M3 match */
t &= 31;
if (t == 0) {
NEED_IP(1);
while (*ip == 0) {
t += 255;
ip++;
NEED_IP(1);
}
t += 31 + *ip++;
}
#if defined(COPY_DICT)
#if defined(LZO1Z)
m_off = 1 + (ip[0] << 6) + (ip[1] >> 2);
last_m_off = m_off;
#else
m_off = 1 + (ip[0] >> 2) + (ip[1] << 6);
#endif
#else /* !COPY_DICT */
#if defined(LZO1Z)
{
unsigned off = 1 + (ip[0] << 6) + (ip[1] >> 2);
m_pos = op - off;
last_m_off = off;
}
#elif defined(LZO_UNALIGNED_OK_2) && defined(LZO_ABI_LITTLE_ENDIAN)
m_pos = op - 1;
m_pos -= (* (const lzo_ushortp) ip) >> 2;
#else
m_pos = op - 1;
m_pos -= (ip[0] >> 2) + (ip[1] << 6);
#endif
#endif /* COPY_DICT */
ip += 2;
}
else if (t >= 16) { /* a M4 match */
#if defined(COPY_DICT)
m_off = (t & 8) << 11;
#else /* !COPY_DICT */
m_pos = op;
m_pos -= (t & 8) << 11;
#endif /* COPY_DICT */
t &= 7;
if (t == 0) {
NEED_IP(1);
while (*ip == 0) {
t += 255;
ip++;
NEED_IP(1);
}
t += 7 + *ip++;
}
#if defined(COPY_DICT)
#if defined(LZO1Z)
m_off += (ip[0] << 6) + (ip[1] >> 2);
#else
m_off += (ip[0] >> 2) + (ip[1] << 6);
#endif
ip += 2;
if (m_off == 0)
goto eof_found;
m_off += 0x4000;
#if defined(LZO1Z)
last_m_off = m_off;
#endif
#else /* !COPY_DICT */
#if defined(LZO1Z)
m_pos -= (ip[0] << 6) + (ip[1] >> 2);
#elif defined(LZO_UNALIGNED_OK_2) && defined(LZO_ABI_LITTLE_ENDIAN)
m_pos -= (* (const lzo_ushortp) ip) >> 2;
#else
m_pos -= (ip[0] >> 2) + (ip[1] << 6);
#endif
ip += 2;
if (m_pos == op)
goto eof_found;
m_pos -= 0x4000;
#if defined(LZO1Z)
last_m_off = pd((const uint8_t*)op, m_pos);
#endif
#endif /* COPY_DICT */
}
else { /* a M1 match */
#if defined(COPY_DICT)
#if defined(LZO1Z)
m_off = 1 + (t << 6) + (*ip++ >> 2);
last_m_off = m_off;
#else
m_off = 1 + (t >> 2) + (*ip++ << 2);
#endif
NEED_OP(2);
t = 2; COPY_DICT(t,m_off)
#else /* !COPY_DICT */
#if defined(LZO1Z)
t = 1 + (t << 6) + (*ip++ >> 2);
m_pos = op - t;
last_m_off = t;
#else
m_pos = op - 1;
m_pos -= t >> 2;
m_pos -= *ip++ << 2;
#endif
TEST_LB(m_pos); NEED_OP(2);
*op++ = *m_pos++;
*op++ = *m_pos;
#endif /* COPY_DICT */
goto match_done;
}
/* copy match */
#if defined(COPY_DICT)
NEED_OP(t+3-1);
t += 3-1; COPY_DICT(t,m_off)
#else /* !COPY_DICT */
TEST_LB(m_pos); assert(t > 0); NEED_OP(t+3-1);
#if defined(LZO_UNALIGNED_OK_4) || defined(LZO_ALIGNED_OK_4)
# if !defined(LZO_UNALIGNED_OK_4)
if (t >= 2 * 4 - (3 - 1) && PTR_ALIGNED2_4(op,m_pos)) {
assert((op - m_pos) >= 4); /* both pointers are aligned */
# else
if (t >= 2 * 4 - (3 - 1) && (op - m_pos) >= 4) {
# endif
COPY4(op,m_pos);
op += 4; m_pos += 4; t -= 4 - (3 - 1);
do {
COPY4(op,m_pos);
op += 4; m_pos += 4; t -= 4;
} while (t >= 4);
if (t > 0)
do *op++ = *m_pos++; while (--t > 0);
}
else
#endif
{
copy_match:
*op++ = *m_pos++; *op++ = *m_pos++;
do *op++ = *m_pos++; while (--t > 0);
}
#endif /* COPY_DICT */
match_done:
#if defined(LZO1Z)
t = ip[-1] & 3;
#else
t = ip[-2] & 3;
#endif
if (t == 0)
break;
/* copy literals */
match_next:
assert(t > 0);
assert(t < 4);
NEED_OP(t);
NEED_IP(t+1);
#if 0
do *op++ = *ip++; while (--t > 0);
#else
*op++ = *ip++;
if (t > 1) {
*op++ = *ip++;
if (t > 2)
*op++ = *ip++;
}
#endif
t = *ip++;
} while (TEST_IP && TEST_OP);
}
//#if defined(HAVE_TEST_IP) || defined(HAVE_TEST_OP)
/* no EOF code was found */
*out_len = pd(op, out);
return LZO_E_EOF_NOT_FOUND;
//#endif
eof_found:
assert(t == 1);
*out_len = pd(op, out);
return (ip == ip_end ? LZO_E_OK :
(ip < ip_end ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN));
//#if defined(HAVE_NEED_IP)
input_overrun:
*out_len = pd(op, out);
return LZO_E_INPUT_OVERRUN;
//#endif
//#if defined(HAVE_NEED_OP)
output_overrun:
*out_len = pd(op, out);
return LZO_E_OUTPUT_OVERRUN;
//#endif
//#if defined(LZO_TEST_OVERRUN_LOOKBEHIND)
lookbehind_overrun:
*out_len = pd(op, out);
return LZO_E_LOOKBEHIND_OVERRUN;
//#endif
}

View File

@@ -0,0 +1,54 @@
/* vi: set sw=4 ts=4: */
/*
* Licensed under GPLv2 or later, see file LICENSE in this source tree.
*/
#include "libbb.h"
#include "archive.h"
/* transformer(), more than meets the eye */
/*
* On MMU machine, the transform_prog is removed by macro magic
* in include/archive.h. On NOMMU, transformer is removed.
*/
void FAST_FUNC open_transformer(int fd,
IF_DESKTOP(long long) int FAST_FUNC (*transformer)(int src_fd, int dst_fd),
const char *transform_prog)
{
struct fd_pair fd_pipe;
int pid;
xpiped_pair(fd_pipe);
pid = BB_MMU ? xfork() : xvfork();
if (pid == 0) {
/* Child */
close(fd_pipe.rd); /* we don't want to read from the parent */
// FIXME: error check?
#if BB_MMU
transformer(fd, fd_pipe.wr);
if (ENABLE_FEATURE_CLEAN_UP) {
close(fd_pipe.wr); /* send EOF */
close(fd);
}
/* must be _exit! bug was actually seen here */
_exit(EXIT_SUCCESS);
#else
{
char *argv[4];
xmove_fd(fd, 0);
xmove_fd(fd_pipe.wr, 1);
argv[0] = (char*)transform_prog;
argv[1] = (char*)"-cf";
argv[2] = (char*)"-";
argv[3] = NULL;
BB_EXECVP(transform_prog, argv);
bb_perror_msg_and_die("can't execute '%s'", transform_prog);
}
#endif
/* notreached */
}
/* parent process */
close(fd_pipe.wr); /* don't want to write to the child */
xmove_fd(fd_pipe.rd, fd);
}

View File

@@ -0,0 +1,19 @@
/* vi: set sw=4 ts=4: */
/*
* Licensed under GPLv2 or later, see file LICENSE in this source tree.
*/
#include "libbb.h"
#include "archive.h"
void FAST_FUNC seek_by_jump(int fd, off_t amount)
{
if (amount
&& lseek(fd, amount, SEEK_CUR) == (off_t) -1
) {
if (errno == ESPIPE)
seek_by_read(fd, amount);
else
bb_perror_msg_and_die("seek failure");
}
}

View File

@@ -0,0 +1,16 @@
/* vi: set sw=4 ts=4: */
/*
* Licensed under GPLv2 or later, see file LICENSE in this source tree.
*/
#include "libbb.h"
#include "archive.h"
/* If we are reading through a pipe, or from stdin then we can't lseek,
* we must read and discard the data to skip over it.
*/
void FAST_FUNC seek_by_read(int fd, off_t amount)
{
if (amount)
bb_copyfd_exact_size(fd, -1, amount);
}

View File

@@ -0,0 +1,22 @@
/* vi: set sw=4 ts=4: */
/*
* Licensed under GPLv2 or later, see file LICENSE in this source tree.
*/
#include "libbb.h"
#include "archive.h"
#include "ar.h"
void FAST_FUNC unpack_ar_archive(archive_handle_t *ar_archive)
{
char magic[7];
xread(ar_archive->src_fd, magic, AR_MAGIC_LEN);
if (strncmp(magic, AR_MAGIC, AR_MAGIC_LEN) != 0) {
bb_error_msg_and_die("invalid ar magic");
}
ar_archive->offset += AR_MAGIC_LEN;
while (get_header_ar(ar_archive) == EXIT_SUCCESS)
continue;
}

View File

@@ -0,0 +1,135 @@
XZ Embedded
===========
XZ Embedded is a relatively small, limited implementation of the .xz
file format. Currently only decoding is implemented.
XZ Embedded was written for use in the Linux kernel, but the code can
be easily used in other environments too, including regular userspace
applications.
This README contains information that is useful only when the copy
of XZ Embedded isn't part of the Linux kernel tree. You should also
read linux/Documentation/xz.txt even if you aren't using XZ Embedded
as part of Linux; information in that file is not repeated in this
README.
Compiling the Linux kernel module
The xz_dec module depends on crc32 module, so make sure that you have
it enabled (CONFIG_CRC32).
Building the xz_dec and xz_dec_test modules without support for BCJ
filters:
cd linux/lib/xz
make -C /path/to/kernel/source \
KCPPFLAGS=-I"$(pwd)/../../include" M="$(pwd)" \
CONFIG_XZ_DEC=m CONFIG_XZ_DEC_TEST=m
Building the xz_dec and xz_dec_test modules with support for BCJ
filters:
cd linux/lib/xz
make -C /path/to/kernel/source \
KCPPFLAGS=-I"$(pwd)/../../include" M="$(pwd)" \
CONFIG_XZ_DEC=m CONFIG_XZ_DEC_TEST=m CONFIG_XZ_DEC_BCJ=y \
CONFIG_XZ_DEC_X86=y CONFIG_XZ_DEC_POWERPC=y \
CONFIG_XZ_DEC_IA64=y CONFIG_XZ_DEC_ARM=y \
CONFIG_XZ_DEC_ARMTHUMB=y CONFIG_XZ_DEC_SPARC=y
If you want only one or a few of the BCJ filters, omit the appropriate
variables. CONFIG_XZ_DEC_BCJ=y is always required to build the support
code shared between all BCJ filters.
Most people don't need the xz_dec_test module. You can skip building
it by omitting CONFIG_XZ_DEC_TEST=m from the make command line.
Compiler requirements
XZ Embedded should compile as either GNU-C89 (used in the Linux
kernel) or with any C99 compiler. Getting the code to compile with
non-GNU C89 compiler or a C++ compiler should be quite easy as
long as there is a data type for unsigned 64-bit integer (or the
code is modified not to support large files, which needs some more
care than just using 32-bit integer instead of 64-bit).
If you use GCC, try to use a recent version. For example, on x86,
xz_dec_lzma2.c compiled with GCC 3.3.6 is 15-25 % slower than when
compiled with GCC 4.3.3.
Embedding into userspace applications
To embed the XZ decoder, copy the following files into a single
directory in your source code tree:
linux/include/linux/xz.h
linux/lib/xz/xz_crc32.c
linux/lib/xz/xz_dec_lzma2.c
linux/lib/xz/xz_dec_stream.c
linux/lib/xz/xz_lzma2.h
linux/lib/xz/xz_private.h
linux/lib/xz/xz_stream.h
userspace/xz_config.h
Alternatively, xz.h may be placed into a different directory but then
that directory must be in the compiler include path when compiling
the .c files.
Your code should use only the functions declared in xz.h. The rest of
the .h files are meant only for internal use in XZ Embedded.
You may want to modify xz_config.h to be more suitable for your build
environment. Probably you should at least skim through it even if the
default file works as is.
BCJ filter support
If you want support for one or more BCJ filters, you need to copy also
linux/lib/xz/xz_dec_bcj.c into your application, and use appropriate
#defines in xz_config.h or in compiler flags. You don't need these
#defines in the code that just uses XZ Embedded via xz.h, but having
them always #defined doesn't hurt either.
#define Instruction set BCJ filter endianness
XZ_DEC_X86 x86 or x86-64 Little endian only
XZ_DEC_POWERPC PowerPC Big endian only
XZ_DEC_IA64 Itanium (IA-64) Big or little endian
XZ_DEC_ARM ARM Little endian only
XZ_DEC_ARMTHUMB ARM-Thumb Little endian only
XZ_DEC_SPARC SPARC Big or little endian
While some architectures are (partially) bi-endian, the endianness
setting doesn't change the endianness of the instructions on all
architectures. That's why Itanium and SPARC filters work for both big
and little endian executables (Itanium has little endian instructions
and SPARC has big endian instructions).
There currently is no filter for little endian PowerPC or big endian
ARM or ARM-Thumb. Implementing filters for them can be considered if
there is a need for such filters in real-world applications.
Notes about shared libraries
If you are including XZ Embedded into a shared library, you very
probably should rename the xz_* functions to prevent symbol
conflicts in case your library is linked against some other library
or application that also has XZ Embedded in it (which may even be
a different version of XZ Embedded). TODO: Provide an easy way
to do this.
Please don't create a shared library of XZ Embedded itself unless
it is fine to rebuild everything depending on that shared library
everytime you upgrade to a newer version of XZ Embedded. There are
no API or ABI stability guarantees between different versions of
XZ Embedded.
Specifying the calling convention
XZ_FUNC macro was included to support declaring functions with __init
in Linux. Outside Linux, it can be used to specify the calling
convention on systems that support multiple calling conventions.
For example, on Windows, you may make all functions use the stdcall
calling convention by defining XZ_FUNC=__stdcall when building and
using the functions from XZ Embedded.

View File

@@ -0,0 +1,271 @@
/*
* XZ decompressor
*
* Authors: Lasse Collin <lasse.collin@tukaani.org>
* Igor Pavlov <http://7-zip.org/>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
#ifndef XZ_H
#define XZ_H
#ifdef __KERNEL__
# include <linux/stddef.h>
# include <linux/types.h>
#else
# include <stddef.h>
# include <stdint.h>
#endif
/* In Linux, this is used to make extern functions static when needed. */
#ifndef XZ_EXTERN
# define XZ_EXTERN extern
#endif
/* In Linux, this is used to mark the functions with __init when needed. */
#ifndef XZ_FUNC
# define XZ_FUNC
#endif
/**
* enum xz_mode - Operation mode
*
* @XZ_SINGLE: Single-call mode. This uses less RAM than
* than multi-call modes, because the LZMA2
* dictionary doesn't need to be allocated as
* part of the decoder state. All required data
* structures are allocated at initialization,
* so xz_dec_run() cannot return XZ_MEM_ERROR.
* @XZ_PREALLOC: Multi-call mode with preallocated LZMA2
* dictionary buffer. All data structures are
* allocated at initialization, so xz_dec_run()
* cannot return XZ_MEM_ERROR.
* @XZ_DYNALLOC: Multi-call mode. The LZMA2 dictionary is
* allocated once the required size has been
* parsed from the stream headers. If the
* allocation fails, xz_dec_run() will return
* XZ_MEM_ERROR.
*
* It is possible to enable support only for a subset of the above
* modes at compile time by defining XZ_DEC_SINGLE, XZ_DEC_PREALLOC,
* or XZ_DEC_DYNALLOC. The xz_dec kernel module is always compiled
* with support for all operation modes, but the preboot code may
* be built with fewer features to minimize code size.
*/
enum xz_mode {
XZ_SINGLE,
XZ_PREALLOC,
XZ_DYNALLOC
};
/**
* enum xz_ret - Return codes
* @XZ_OK: Everything is OK so far. More input or more
* output space is required to continue. This
* return code is possible only in multi-call mode
* (XZ_PREALLOC or XZ_DYNALLOC).
* @XZ_STREAM_END: Operation finished successfully.
* @XZ_UNSUPPORTED_CHECK: Integrity check type is not supported. Decoding
* is still possible in multi-call mode by simply
* calling xz_dec_run() again.
* NOTE: This return value is used only if
* XZ_DEC_ANY_CHECK was defined at build time,
* which is not used in the kernel. Unsupported
* check types return XZ_OPTIONS_ERROR if
* XZ_DEC_ANY_CHECK was not defined at build time.
* @XZ_MEM_ERROR: Allocating memory failed. This return code is
* possible only if the decoder was initialized
* with XZ_DYNALLOC. The amount of memory that was
* tried to be allocated was no more than the
* dict_max argument given to xz_dec_init().
* @XZ_MEMLIMIT_ERROR: A bigger LZMA2 dictionary would be needed than
* allowed by the dict_max argument given to
* xz_dec_init(). This return value is possible
* only in multi-call mode (XZ_PREALLOC or
* XZ_DYNALLOC); the single-call mode (XZ_SINGLE)
* ignores the dict_max argument.
* @XZ_FORMAT_ERROR: File format was not recognized (wrong magic
* bytes).
* @XZ_OPTIONS_ERROR: This implementation doesn't support the requested
* compression options. In the decoder this means
* that the header CRC32 matches, but the header
* itself specifies something that we don't support.
* @XZ_DATA_ERROR: Compressed data is corrupt.
* @XZ_BUF_ERROR: Cannot make any progress. Details are slightly
* different between multi-call and single-call
* mode; more information below.
*
* In multi-call mode, XZ_BUF_ERROR is returned when two consecutive calls
* to XZ code cannot consume any input and cannot produce any new output.
* This happens when there is no new input available, or the output buffer
* is full while at least one output byte is still pending. Assuming your
* code is not buggy, you can get this error only when decoding a compressed
* stream that is truncated or otherwise corrupt.
*
* In single-call mode, XZ_BUF_ERROR is returned only when the output buffer
* is too small, or the compressed input is corrupt in a way that makes the
* decoder produce more output than the caller expected. When it is
* (relatively) clear that the compressed input is truncated, XZ_DATA_ERROR
* is used instead of XZ_BUF_ERROR.
*/
enum xz_ret {
XZ_OK,
XZ_STREAM_END,
XZ_UNSUPPORTED_CHECK,
XZ_MEM_ERROR,
XZ_MEMLIMIT_ERROR,
XZ_FORMAT_ERROR,
XZ_OPTIONS_ERROR,
XZ_DATA_ERROR,
XZ_BUF_ERROR
};
/**
* struct xz_buf - Passing input and output buffers to XZ code
* @in: Beginning of the input buffer. This may be NULL if and only
* if in_pos is equal to in_size.
* @in_pos: Current position in the input buffer. This must not exceed
* in_size.
* @in_size: Size of the input buffer
* @out: Beginning of the output buffer. This may be NULL if and only
* if out_pos is equal to out_size.
* @out_pos: Current position in the output buffer. This must not exceed
* out_size.
* @out_size: Size of the output buffer
*
* Only the contents of the output buffer from out[out_pos] onward, and
* the variables in_pos and out_pos are modified by the XZ code.
*/
struct xz_buf {
const uint8_t *in;
size_t in_pos;
size_t in_size;
uint8_t *out;
size_t out_pos;
size_t out_size;
};
/**
* struct xz_dec - Opaque type to hold the XZ decoder state
*/
struct xz_dec;
/**
* xz_dec_init() - Allocate and initialize a XZ decoder state
* @mode: Operation mode
* @dict_max: Maximum size of the LZMA2 dictionary (history buffer) for
* multi-call decoding. This is ignored in single-call mode
* (mode == XZ_SINGLE). LZMA2 dictionary is always 2^n bytes
* or 2^n + 2^(n-1) bytes (the latter sizes are less common
* in practice), so other values for dict_max don't make sense.
* In the kernel, dictionary sizes of 64 KiB, 128 KiB, 256 KiB,
* 512 KiB, and 1 MiB are probably the only reasonable values,
* except for kernel and initramfs images where a bigger
* dictionary can be fine and useful.
*
* Single-call mode (XZ_SINGLE): xz_dec_run() decodes the whole stream at
* once. The caller must provide enough output space or the decoding will
* fail. The output space is used as the dictionary buffer, which is why
* there is no need to allocate the dictionary as part of the decoder's
* internal state.
*
* Because the output buffer is used as the workspace, streams encoded using
* a big dictionary are not a problem in single-call mode. It is enough that
* the output buffer is big enough to hold the actual uncompressed data; it
* can be smaller than the dictionary size stored in the stream headers.
*
* Multi-call mode with preallocated dictionary (XZ_PREALLOC): dict_max bytes
* of memory is preallocated for the LZMA2 dictionary. This way there is no
* risk that xz_dec_run() could run out of memory, since xz_dec_run() will
* never allocate any memory. Instead, if the preallocated dictionary is too
* small for decoding the given input stream, xz_dec_run() will return
* XZ_MEMLIMIT_ERROR. Thus, it is important to know what kind of data will be
* decoded to avoid allocating excessive amount of memory for the dictionary.
*
* Multi-call mode with dynamically allocated dictionary (XZ_DYNALLOC):
* dict_max specifies the maximum allowed dictionary size that xz_dec_run()
* may allocate once it has parsed the dictionary size from the stream
* headers. This way excessive allocations can be avoided while still
* limiting the maximum memory usage to a sane value to prevent running the
* system out of memory when decompressing streams from untrusted sources.
*
* On success, xz_dec_init() returns a pointer to struct xz_dec, which is
* ready to be used with xz_dec_run(). If memory allocation fails,
* xz_dec_init() returns NULL.
*/
XZ_EXTERN struct xz_dec * XZ_FUNC xz_dec_init(
enum xz_mode mode, uint32_t dict_max);
/**
* xz_dec_run() - Run the XZ decoder
* @s: Decoder state allocated using xz_dec_init()
* @b: Input and output buffers
*
* The possible return values depend on build options and operation mode.
* See enum xz_ret for details.
*
* NOTE: If an error occurs in single-call mode (return value is not
* XZ_STREAM_END), b->in_pos and b->out_pos are not modified, and the
* contents of the output buffer from b->out[b->out_pos] onward are
* undefined. This is true even after XZ_BUF_ERROR, because with some filter
* chains, there may be a second pass over the output buffer, and this pass
* cannot be properly done if the output buffer is truncated. Thus, you
* cannot give the single-call decoder a too small buffer and then expect to
* get that amount valid data from the beginning of the stream. You must use
* the multi-call decoder if you don't want to uncompress the whole stream.
*/
XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_run(struct xz_dec *s, struct xz_buf *b);
/**
* xz_dec_reset() - Reset an already allocated decoder state
* @s: Decoder state allocated using xz_dec_init()
*
* This function can be used to reset the multi-call decoder state without
* freeing and reallocating memory with xz_dec_end() and xz_dec_init().
*
* In single-call mode, xz_dec_reset() is always called in the beginning of
* xz_dec_run(). Thus, explicit call to xz_dec_reset() is useful only in
* multi-call mode.
*/
XZ_EXTERN void XZ_FUNC xz_dec_reset(struct xz_dec *s);
/**
* xz_dec_end() - Free the memory allocated for the decoder state
* @s: Decoder state allocated using xz_dec_init(). If s is NULL,
* this function does nothing.
*/
XZ_EXTERN void XZ_FUNC xz_dec_end(struct xz_dec *s);
/*
* Standalone build (userspace build or in-kernel build for boot time use)
* needs a CRC32 implementation. For normal in-kernel use, kernel's own
* CRC32 module is used instead, and users of this module don't need to
* care about the functions below.
*/
#ifndef XZ_INTERNAL_CRC32
# ifdef __KERNEL__
# define XZ_INTERNAL_CRC32 0
# else
# define XZ_INTERNAL_CRC32 1
# endif
#endif
#if XZ_INTERNAL_CRC32
/*
* This must be called before any other xz_* function to initialize
* the CRC32 lookup table.
*/
XZ_EXTERN void XZ_FUNC xz_crc32_init(void);
/*
* Update CRC32 value using the polynomial from IEEE-802.3. To start a new
* calculation, the third argument must be zero. To continue the calculation,
* the previously returned value is passed as the third argument.
*/
XZ_EXTERN uint32_t XZ_FUNC xz_crc32(
const uint8_t *buf, size_t size, uint32_t crc);
#endif
#endif

View File

@@ -0,0 +1,123 @@
/*
* Private includes and definitions for userspace use of XZ Embedded
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
#ifndef XZ_CONFIG_H
#define XZ_CONFIG_H
/* Uncomment as needed to enable BCJ filter decoders. */
/* #define XZ_DEC_X86 */
/* #define XZ_DEC_POWERPC */
/* #define XZ_DEC_IA64 */
/* #define XZ_DEC_ARM */
/* #define XZ_DEC_ARMTHUMB */
/* #define XZ_DEC_SPARC */
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include "xz.h"
#define kmalloc(size, flags) malloc(size)
#define kfree(ptr) free(ptr)
#define vmalloc(size) malloc(size)
#define vfree(ptr) free(ptr)
#define memeq(a, b, size) (memcmp(a, b, size) == 0)
#define memzero(buf, size) memset(buf, 0, size)
#undef min
#undef min_t
#define min(x, y) ((x) < (y) ? (x) : (y))
#define min_t(type, x, y) min(x, y)
/*
* Some functions have been marked with __always_inline to keep the
* performance reasonable even when the compiler is optimizing for
* small code size. You may be able to save a few bytes by #defining
* __always_inline to plain inline, but don't complain if the code
* becomes slow.
*
* NOTE: System headers on GNU/Linux may #define this macro already,
* so if you want to change it, you need to #undef it first.
*/
#ifndef __always_inline
# ifdef __GNUC__
# define __always_inline \
inline __attribute__((__always_inline__))
# else
# define __always_inline inline
# endif
#endif
/*
* Some functions are marked to never be inlined to reduce stack usage.
* If you don't care about stack usage, you may want to modify this so
* that noinline_for_stack is #defined to be empty even when using GCC.
* Doing so may save a few bytes in binary size.
*/
#ifndef noinline_for_stack
# ifdef __GNUC__
# define noinline_for_stack __attribute__((__noinline__))
# else
# define noinline_for_stack
# endif
#endif
/* Inline functions to access unaligned unsigned 32-bit integers */
#ifndef get_unaligned_le32
static inline uint32_t XZ_FUNC get_unaligned_le32(const uint8_t *buf)
{
return (uint32_t)buf[0]
| ((uint32_t)buf[1] << 8)
| ((uint32_t)buf[2] << 16)
| ((uint32_t)buf[3] << 24);
}
#endif
#ifndef get_unaligned_be32
static inline uint32_t XZ_FUNC get_unaligned_be32(const uint8_t *buf)
{
return (uint32_t)(buf[0] << 24)
| ((uint32_t)buf[1] << 16)
| ((uint32_t)buf[2] << 8)
| (uint32_t)buf[3];
}
#endif
#ifndef put_unaligned_le32
static inline void XZ_FUNC put_unaligned_le32(uint32_t val, uint8_t *buf)
{
buf[0] = (uint8_t)val;
buf[1] = (uint8_t)(val >> 8);
buf[2] = (uint8_t)(val >> 16);
buf[3] = (uint8_t)(val >> 24);
}
#endif
#ifndef put_unaligned_be32
static inline void XZ_FUNC put_unaligned_be32(uint32_t val, uint8_t *buf)
{
buf[0] = (uint8_t)(val >> 24);
buf[1] = (uint8_t)(val >> 16);
buf[2] = (uint8_t)(val >> 8);
buf[3] = (uint8_t)val;
}
#endif
/*
* Use get_unaligned_le32() also for aligned access for simplicity. On
* little endian systems, #define get_le32(ptr) (*(const uint32_t *)(ptr))
* could save a few bytes in code size.
*/
#ifndef get_le32
# define get_le32 get_unaligned_le32
#endif
#endif

View File

@@ -0,0 +1,564 @@
/*
* Branch/Call/Jump (BCJ) filter decoders
*
* Authors: Lasse Collin <lasse.collin@tukaani.org>
* Igor Pavlov <http://7-zip.org/>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
#include "xz_private.h"
/*
* The rest of the file is inside this ifdef. It makes things a little more
* convenient when building without support for any BCJ filters.
*/
#ifdef XZ_DEC_BCJ
struct xz_dec_bcj {
/* Type of the BCJ filter being used */
enum {
BCJ_X86 = 4, /* x86 or x86-64 */
BCJ_POWERPC = 5, /* Big endian only */
BCJ_IA64 = 6, /* Big or little endian */
BCJ_ARM = 7, /* Little endian only */
BCJ_ARMTHUMB = 8, /* Little endian only */
BCJ_SPARC = 9 /* Big or little endian */
} type;
/*
* Return value of the next filter in the chain. We need to preserve
* this information across calls, because we must not call the next
* filter anymore once it has returned XZ_STREAM_END.
*/
enum xz_ret ret;
/* True if we are operating in single-call mode. */
bool single_call;
/*
* Absolute position relative to the beginning of the uncompressed
* data (in a single .xz Block). We care only about the lowest 32
* bits so this doesn't need to be uint64_t even with big files.
*/
uint32_t pos;
/* x86 filter state */
uint32_t x86_prev_mask;
/* Temporary space to hold the variables from struct xz_buf */
uint8_t *out;
size_t out_pos;
size_t out_size;
struct {
/* Amount of already filtered data in the beginning of buf */
size_t filtered;
/* Total amount of data currently stored in buf */
size_t size;
/*
* Buffer to hold a mix of filtered and unfiltered data. This
* needs to be big enough to hold Alignment + 2 * Look-ahead:
*
* Type Alignment Look-ahead
* x86 1 4
* PowerPC 4 0
* IA-64 16 0
* ARM 4 0
* ARM-Thumb 2 2
* SPARC 4 0
*/
uint8_t buf[16];
} temp;
};
#ifdef XZ_DEC_X86
/*
* This is macro used to test the most significant byte of a memory address
* in an x86 instruction.
*/
#define bcj_x86_test_msbyte(b) ((b) == 0x00 || (b) == 0xFF)
static noinline_for_stack size_t XZ_FUNC bcj_x86(
struct xz_dec_bcj *s, uint8_t *buf, size_t size)
{
static const bool mask_to_allowed_status[8]
= { true, true, true, false, true, false, false, false };
static const uint8_t mask_to_bit_num[8] = { 0, 1, 2, 2, 3, 3, 3, 3 };
size_t i;
size_t prev_pos = (size_t)-1;
uint32_t prev_mask = s->x86_prev_mask;
uint32_t src;
uint32_t dest;
uint32_t j;
uint8_t b;
if (size <= 4)
return 0;
size -= 4;
for (i = 0; i < size; ++i) {
if ((buf[i] & 0xFE) != 0xE8)
continue;
prev_pos = i - prev_pos;
if (prev_pos > 3) {
prev_mask = 0;
} else {
prev_mask = (prev_mask << (prev_pos - 1)) & 7;
if (prev_mask != 0) {
b = buf[i + 4 - mask_to_bit_num[prev_mask]];
if (!mask_to_allowed_status[prev_mask]
|| bcj_x86_test_msbyte(b)) {
prev_pos = i;
prev_mask = (prev_mask << 1) | 1;
continue;
}
}
}
prev_pos = i;
if (bcj_x86_test_msbyte(buf[i + 4])) {
src = get_unaligned_le32(buf + i + 1);
while (true) {
dest = src - (s->pos + (uint32_t)i + 5);
if (prev_mask == 0)
break;
j = mask_to_bit_num[prev_mask] * 8;
b = (uint8_t)(dest >> (24 - j));
if (!bcj_x86_test_msbyte(b))
break;
src = dest ^ (((uint32_t)1 << (32 - j)) - 1);
}
dest &= 0x01FFFFFF;
dest |= (uint32_t)0 - (dest & 0x01000000);
put_unaligned_le32(dest, buf + i + 1);
i += 4;
} else {
prev_mask = (prev_mask << 1) | 1;
}
}
prev_pos = i - prev_pos;
s->x86_prev_mask = prev_pos > 3 ? 0 : prev_mask << (prev_pos - 1);
return i;
}
#endif
#ifdef XZ_DEC_POWERPC
static noinline_for_stack size_t XZ_FUNC bcj_powerpc(
struct xz_dec_bcj *s, uint8_t *buf, size_t size)
{
size_t i;
uint32_t instr;
for (i = 0; i + 4 <= size; i += 4) {
instr = get_unaligned_be32(buf + i);
if ((instr & 0xFC000003) == 0x48000001) {
instr &= 0x03FFFFFC;
instr -= s->pos + (uint32_t)i;
instr &= 0x03FFFFFC;
instr |= 0x48000001;
put_unaligned_be32(instr, buf + i);
}
}
return i;
}
#endif
#ifdef XZ_DEC_IA64
static noinline_for_stack size_t XZ_FUNC bcj_ia64(
struct xz_dec_bcj *s, uint8_t *buf, size_t size)
{
static const uint8_t branch_table[32] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
4, 4, 6, 6, 0, 0, 7, 7,
4, 4, 0, 0, 4, 4, 0, 0
};
/*
* The local variables take a little bit stack space, but it's less
* than what LZMA2 decoder takes, so it doesn't make sense to reduce
* stack usage here without doing that for the LZMA2 decoder too.
*/
/* Loop counters */
size_t i;
size_t j;
/* Instruction slot (0, 1, or 2) in the 128-bit instruction word */
uint32_t slot;
/* Bitwise offset of the instruction indicated by slot */
uint32_t bit_pos;
/* bit_pos split into byte and bit parts */
uint32_t byte_pos;
uint32_t bit_res;
/* Address part of an instruction */
uint32_t addr;
/* Mask used to detect which instructions to convert */
uint32_t mask;
/* 41-bit instruction stored somewhere in the lowest 48 bits */
uint64_t instr;
/* Instruction normalized with bit_res for easier manipulation */
uint64_t norm;
for (i = 0; i + 16 <= size; i += 16) {
mask = branch_table[buf[i] & 0x1F];
for (slot = 0, bit_pos = 5; slot < 3; ++slot, bit_pos += 41) {
if (((mask >> slot) & 1) == 0)
continue;
byte_pos = bit_pos >> 3;
bit_res = bit_pos & 7;
instr = 0;
for (j = 0; j < 6; ++j)
instr |= (uint64_t)(buf[i + j + byte_pos])
<< (8 * j);
norm = instr >> bit_res;
if (((norm >> 37) & 0x0F) == 0x05
&& ((norm >> 9) & 0x07) == 0) {
addr = (norm >> 13) & 0x0FFFFF;
addr |= ((uint32_t)(norm >> 36) & 1) << 20;
addr <<= 4;
addr -= s->pos + (uint32_t)i;
addr >>= 4;
norm &= ~((uint64_t)0x8FFFFF << 13);
norm |= (uint64_t)(addr & 0x0FFFFF) << 13;
norm |= (uint64_t)(addr & 0x100000)
<< (36 - 20);
instr &= (1 << bit_res) - 1;
instr |= norm << bit_res;
for (j = 0; j < 6; j++)
buf[i + j + byte_pos]
= (uint8_t)(instr >> (8 * j));
}
}
}
return i;
}
#endif
#ifdef XZ_DEC_ARM
static noinline_for_stack size_t XZ_FUNC bcj_arm(
struct xz_dec_bcj *s, uint8_t *buf, size_t size)
{
size_t i;
uint32_t addr;
for (i = 0; i + 4 <= size; i += 4) {
if (buf[i + 3] == 0xEB) {
addr = (uint32_t)buf[i] | ((uint32_t)buf[i + 1] << 8)
| ((uint32_t)buf[i + 2] << 16);
addr <<= 2;
addr -= s->pos + (uint32_t)i + 8;
addr >>= 2;
buf[i] = (uint8_t)addr;
buf[i + 1] = (uint8_t)(addr >> 8);
buf[i + 2] = (uint8_t)(addr >> 16);
}
}
return i;
}
#endif
#ifdef XZ_DEC_ARMTHUMB
static noinline_for_stack size_t XZ_FUNC bcj_armthumb(
struct xz_dec_bcj *s, uint8_t *buf, size_t size)
{
size_t i;
uint32_t addr;
for (i = 0; i + 4 <= size; i += 2) {
if ((buf[i + 1] & 0xF8) == 0xF0
&& (buf[i + 3] & 0xF8) == 0xF8) {
addr = (((uint32_t)buf[i + 1] & 0x07) << 19)
| ((uint32_t)buf[i] << 11)
| (((uint32_t)buf[i + 3] & 0x07) << 8)
| (uint32_t)buf[i + 2];
addr <<= 1;
addr -= s->pos + (uint32_t)i + 4;
addr >>= 1;
buf[i + 1] = (uint8_t)(0xF0 | ((addr >> 19) & 0x07));
buf[i] = (uint8_t)(addr >> 11);
buf[i + 3] = (uint8_t)(0xF8 | ((addr >> 8) & 0x07));
buf[i + 2] = (uint8_t)addr;
i += 2;
}
}
return i;
}
#endif
#ifdef XZ_DEC_SPARC
static noinline_for_stack size_t XZ_FUNC bcj_sparc(
struct xz_dec_bcj *s, uint8_t *buf, size_t size)
{
size_t i;
uint32_t instr;
for (i = 0; i + 4 <= size; i += 4) {
instr = get_unaligned_be32(buf + i);
if ((instr >> 22) == 0x100 || (instr >> 22) == 0x1FF) {
instr <<= 2;
instr -= s->pos + (uint32_t)i;
instr >>= 2;
instr = ((uint32_t)0x40000000 - (instr & 0x400000))
| 0x40000000 | (instr & 0x3FFFFF);
put_unaligned_be32(instr, buf + i);
}
}
return i;
}
#endif
/*
* Apply the selected BCJ filter. Update *pos and s->pos to match the amount
* of data that got filtered.
*
* NOTE: This is implemented as a switch statement to avoid using function
* pointers, which could be problematic in the kernel boot code, which must
* avoid pointers to static data (at least on x86).
*/
static void XZ_FUNC bcj_apply(struct xz_dec_bcj *s,
uint8_t *buf, size_t *pos, size_t size)
{
size_t filtered;
buf += *pos;
size -= *pos;
switch (s->type) {
#ifdef XZ_DEC_X86
case BCJ_X86:
filtered = bcj_x86(s, buf, size);
break;
#endif
#ifdef XZ_DEC_POWERPC
case BCJ_POWERPC:
filtered = bcj_powerpc(s, buf, size);
break;
#endif
#ifdef XZ_DEC_IA64
case BCJ_IA64:
filtered = bcj_ia64(s, buf, size);
break;
#endif
#ifdef XZ_DEC_ARM
case BCJ_ARM:
filtered = bcj_arm(s, buf, size);
break;
#endif
#ifdef XZ_DEC_ARMTHUMB
case BCJ_ARMTHUMB:
filtered = bcj_armthumb(s, buf, size);
break;
#endif
#ifdef XZ_DEC_SPARC
case BCJ_SPARC:
filtered = bcj_sparc(s, buf, size);
break;
#endif
default:
/* Never reached but silence compiler warnings. */
filtered = 0;
break;
}
*pos += filtered;
s->pos += filtered;
}
/*
* Flush pending filtered data from temp to the output buffer.
* Move the remaining mixture of possibly filtered and unfiltered
* data to the beginning of temp.
*/
static void XZ_FUNC bcj_flush(struct xz_dec_bcj *s, struct xz_buf *b)
{
size_t copy_size;
copy_size = min_t(size_t, s->temp.filtered, b->out_size - b->out_pos);
memcpy(b->out + b->out_pos, s->temp.buf, copy_size);
b->out_pos += copy_size;
s->temp.filtered -= copy_size;
s->temp.size -= copy_size;
memmove(s->temp.buf, s->temp.buf + copy_size, s->temp.size);
}
/*
* The BCJ filter functions are primitive in sense that they process the
* data in chunks of 1-16 bytes. To hide this issue, this function does
* some buffering.
*/
XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_bcj_run(struct xz_dec_bcj *s,
struct xz_dec_lzma2 *lzma2, struct xz_buf *b)
{
size_t out_start;
/*
* Flush pending already filtered data to the output buffer. Return
* immediatelly if we couldn't flush everything, or if the next
* filter in the chain had already returned XZ_STREAM_END.
*/
if (s->temp.filtered > 0) {
bcj_flush(s, b);
if (s->temp.filtered > 0)
return XZ_OK;
if (s->ret == XZ_STREAM_END)
return XZ_STREAM_END;
}
/*
* If we have more output space than what is currently pending in
* temp, copy the unfiltered data from temp to the output buffer
* and try to fill the output buffer by decoding more data from the
* next filter in the chain. Apply the BCJ filter on the new data
* in the output buffer. If everything cannot be filtered, copy it
* to temp and rewind the output buffer position accordingly.
*/
if (s->temp.size < b->out_size - b->out_pos) {
out_start = b->out_pos;
memcpy(b->out + b->out_pos, s->temp.buf, s->temp.size);
b->out_pos += s->temp.size;
s->ret = xz_dec_lzma2_run(lzma2, b);
if (s->ret != XZ_STREAM_END
&& (s->ret != XZ_OK || s->single_call))
return s->ret;
bcj_apply(s, b->out, &out_start, b->out_pos);
/*
* As an exception, if the next filter returned XZ_STREAM_END,
* we can do that too, since the last few bytes that remain
* unfiltered are meant to remain unfiltered.
*/
if (s->ret == XZ_STREAM_END)
return XZ_STREAM_END;
s->temp.size = b->out_pos - out_start;
b->out_pos -= s->temp.size;
memcpy(s->temp.buf, b->out + b->out_pos, s->temp.size);
}
/*
* If we have unfiltered data in temp, try to fill by decoding more
* data from the next filter. Apply the BCJ filter on temp. Then we
* hopefully can fill the actual output buffer by copying filtered
* data from temp. A mix of filtered and unfiltered data may be left
* in temp; it will be taken care on the next call to this function.
*/
if (s->temp.size > 0) {
/* Make b->out{,_pos,_size} temporarily point to s->temp. */
s->out = b->out;
s->out_pos = b->out_pos;
s->out_size = b->out_size;
b->out = s->temp.buf;
b->out_pos = s->temp.size;
b->out_size = sizeof(s->temp.buf);
s->ret = xz_dec_lzma2_run(lzma2, b);
s->temp.size = b->out_pos;
b->out = s->out;
b->out_pos = s->out_pos;
b->out_size = s->out_size;
if (s->ret != XZ_OK && s->ret != XZ_STREAM_END)
return s->ret;
bcj_apply(s, s->temp.buf, &s->temp.filtered, s->temp.size);
/*
* If the next filter returned XZ_STREAM_END, we mark that
* everything is filtered, since the last unfiltered bytes
* of the stream are meant to be left as is.
*/
if (s->ret == XZ_STREAM_END)
s->temp.filtered = s->temp.size;
bcj_flush(s, b);
if (s->temp.filtered > 0)
return XZ_OK;
}
return s->ret;
}
XZ_EXTERN struct xz_dec_bcj * XZ_FUNC xz_dec_bcj_create(bool single_call)
{
struct xz_dec_bcj *s = kmalloc(sizeof(*s), GFP_KERNEL);
if (s != NULL)
s->single_call = single_call;
return s;
}
XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_bcj_reset(
struct xz_dec_bcj *s, uint8_t id)
{
switch (id) {
#ifdef XZ_DEC_X86
case BCJ_X86:
#endif
#ifdef XZ_DEC_POWERPC
case BCJ_POWERPC:
#endif
#ifdef XZ_DEC_IA64
case BCJ_IA64:
#endif
#ifdef XZ_DEC_ARM
case BCJ_ARM:
#endif
#ifdef XZ_DEC_ARMTHUMB
case BCJ_ARMTHUMB:
#endif
#ifdef XZ_DEC_SPARC
case BCJ_SPARC:
#endif
break;
default:
/* Unsupported Filter ID */
return XZ_OPTIONS_ERROR;
}
s->type = id;
s->ret = XZ_OK;
s->pos = 0;
s->x86_prev_mask = 0;
s->temp.filtered = 0;
s->temp.size = 0;
return XZ_OK;
}
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,822 @@
/*
* .xz Stream decoder
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
#include "xz_private.h"
#include "xz_stream.h"
/* Hash used to validate the Index field */
struct xz_dec_hash {
vli_type unpadded;
vli_type uncompressed;
uint32_t crc32;
};
struct xz_dec {
/* Position in dec_main() */
enum {
SEQ_STREAM_HEADER,
SEQ_BLOCK_START,
SEQ_BLOCK_HEADER,
SEQ_BLOCK_UNCOMPRESS,
SEQ_BLOCK_PADDING,
SEQ_BLOCK_CHECK,
SEQ_INDEX,
SEQ_INDEX_PADDING,
SEQ_INDEX_CRC32,
SEQ_STREAM_FOOTER
} sequence;
/* Position in variable-length integers and Check fields */
uint32_t pos;
/* Variable-length integer decoded by dec_vli() */
vli_type vli;
/* Saved in_pos and out_pos */
size_t in_start;
size_t out_start;
/* CRC32 value in Block or Index */
uint32_t crc32;
/* Type of the integrity check calculated from uncompressed data */
enum xz_check check_type;
/* Operation mode */
enum xz_mode mode;
/*
* True if the next call to xz_dec_run() is allowed to return
* XZ_BUF_ERROR.
*/
bool allow_buf_error;
/* Information stored in Block Header */
struct {
/*
* Value stored in the Compressed Size field, or
* VLI_UNKNOWN if Compressed Size is not present.
*/
vli_type compressed;
/*
* Value stored in the Uncompressed Size field, or
* VLI_UNKNOWN if Uncompressed Size is not present.
*/
vli_type uncompressed;
/* Size of the Block Header field */
uint32_t size;
} block_header;
/* Information collected when decoding Blocks */
struct {
/* Observed compressed size of the current Block */
vli_type compressed;
/* Observed uncompressed size of the current Block */
vli_type uncompressed;
/* Number of Blocks decoded so far */
vli_type count;
/*
* Hash calculated from the Block sizes. This is used to
* validate the Index field.
*/
struct xz_dec_hash hash;
} block;
/* Variables needed when verifying the Index field */
struct {
/* Position in dec_index() */
enum {
SEQ_INDEX_COUNT,
SEQ_INDEX_UNPADDED,
SEQ_INDEX_UNCOMPRESSED
} sequence;
/* Size of the Index in bytes */
vli_type size;
/* Number of Records (matches block.count in valid files) */
vli_type count;
/*
* Hash calculated from the Records (matches block.hash in
* valid files).
*/
struct xz_dec_hash hash;
} index;
/*
* Temporary buffer needed to hold Stream Header, Block Header,
* and Stream Footer. The Block Header is the biggest (1 KiB)
* so we reserve space according to that. buf[] has to be aligned
* to a multiple of four bytes; the size_t variables before it
* should guarantee this.
*/
struct {
size_t pos;
size_t size;
uint8_t buf[1024];
} temp;
struct xz_dec_lzma2 *lzma2;
#ifdef XZ_DEC_BCJ
struct xz_dec_bcj *bcj;
bool bcj_active;
#endif
};
#ifdef XZ_DEC_ANY_CHECK
/* Sizes of the Check field with different Check IDs */
static const uint8_t check_sizes[16] = {
0,
4, 4, 4,
8, 8, 8,
16, 16, 16,
32, 32, 32,
64, 64, 64
};
#endif
/*
* Fill s->temp by copying data starting from b->in[b->in_pos]. Caller
* must have set s->temp.pos to indicate how much data we are supposed
* to copy into s->temp.buf. Return true once s->temp.pos has reached
* s->temp.size.
*/
static bool XZ_FUNC fill_temp(struct xz_dec *s, struct xz_buf *b)
{
size_t copy_size = min_t(size_t,
b->in_size - b->in_pos, s->temp.size - s->temp.pos);
memcpy(s->temp.buf + s->temp.pos, b->in + b->in_pos, copy_size);
b->in_pos += copy_size;
s->temp.pos += copy_size;
if (s->temp.pos == s->temp.size) {
s->temp.pos = 0;
return true;
}
return false;
}
/* Decode a variable-length integer (little-endian base-128 encoding) */
static enum xz_ret XZ_FUNC dec_vli(struct xz_dec *s,
const uint8_t *in, size_t *in_pos, size_t in_size)
{
uint8_t byte;
if (s->pos == 0)
s->vli = 0;
while (*in_pos < in_size) {
byte = in[*in_pos];
++*in_pos;
s->vli |= (vli_type)(byte & 0x7F) << s->pos;
if ((byte & 0x80) == 0) {
/* Don't allow non-minimal encodings. */
if (byte == 0 && s->pos != 0)
return XZ_DATA_ERROR;
s->pos = 0;
return XZ_STREAM_END;
}
s->pos += 7;
if (s->pos == 7 * VLI_BYTES_MAX)
return XZ_DATA_ERROR;
}
return XZ_OK;
}
/*
* Decode the Compressed Data field from a Block. Update and validate
* the observed compressed and uncompressed sizes of the Block so that
* they don't exceed the values possibly stored in the Block Header
* (validation assumes that no integer overflow occurs, since vli_type
* is normally uint64_t). Update the CRC32 if presence of the CRC32
* field was indicated in Stream Header.
*
* Once the decoding is finished, validate that the observed sizes match
* the sizes possibly stored in the Block Header. Update the hash and
* Block count, which are later used to validate the Index field.
*/
static enum xz_ret XZ_FUNC dec_block(struct xz_dec *s, struct xz_buf *b)
{
enum xz_ret ret;
s->in_start = b->in_pos;
s->out_start = b->out_pos;
#ifdef XZ_DEC_BCJ
if (s->bcj_active)
ret = xz_dec_bcj_run(s->bcj, s->lzma2, b);
else
#endif
ret = xz_dec_lzma2_run(s->lzma2, b);
s->block.compressed += b->in_pos - s->in_start;
s->block.uncompressed += b->out_pos - s->out_start;
/*
* There is no need to separately check for VLI_UNKNOWN, since
* the observed sizes are always smaller than VLI_UNKNOWN.
*/
if (s->block.compressed > s->block_header.compressed
|| s->block.uncompressed
> s->block_header.uncompressed)
return XZ_DATA_ERROR;
if (s->check_type == XZ_CHECK_CRC32)
s->crc32 = xz_crc32(b->out + s->out_start,
b->out_pos - s->out_start, s->crc32);
if (ret == XZ_STREAM_END) {
if (s->block_header.compressed != VLI_UNKNOWN
&& s->block_header.compressed
!= s->block.compressed)
return XZ_DATA_ERROR;
if (s->block_header.uncompressed != VLI_UNKNOWN
&& s->block_header.uncompressed
!= s->block.uncompressed)
return XZ_DATA_ERROR;
s->block.hash.unpadded += s->block_header.size
+ s->block.compressed;
#ifdef XZ_DEC_ANY_CHECK
s->block.hash.unpadded += check_sizes[s->check_type];
#else
if (s->check_type == XZ_CHECK_CRC32)
s->block.hash.unpadded += 4;
#endif
s->block.hash.uncompressed += s->block.uncompressed;
s->block.hash.crc32 = xz_crc32(
(const uint8_t *)&s->block.hash,
sizeof(s->block.hash), s->block.hash.crc32);
++s->block.count;
}
return ret;
}
/* Update the Index size and the CRC32 value. */
static void XZ_FUNC index_update(struct xz_dec *s, const struct xz_buf *b)
{
size_t in_used = b->in_pos - s->in_start;
s->index.size += in_used;
s->crc32 = xz_crc32(b->in + s->in_start, in_used, s->crc32);
}
/*
* Decode the Number of Records, Unpadded Size, and Uncompressed Size
* fields from the Index field. That is, Index Padding and CRC32 are not
* decoded by this function.
*
* This can return XZ_OK (more input needed), XZ_STREAM_END (everything
* successfully decoded), or XZ_DATA_ERROR (input is corrupt).
*/
static enum xz_ret XZ_FUNC dec_index(struct xz_dec *s, struct xz_buf *b)
{
enum xz_ret ret;
do {
ret = dec_vli(s, b->in, &b->in_pos, b->in_size);
if (ret != XZ_STREAM_END) {
index_update(s, b);
return ret;
}
switch (s->index.sequence) {
case SEQ_INDEX_COUNT:
s->index.count = s->vli;
/*
* Validate that the Number of Records field
* indicates the same number of Records as
* there were Blocks in the Stream.
*/
if (s->index.count != s->block.count)
return XZ_DATA_ERROR;
s->index.sequence = SEQ_INDEX_UNPADDED;
break;
case SEQ_INDEX_UNPADDED:
s->index.hash.unpadded += s->vli;
s->index.sequence = SEQ_INDEX_UNCOMPRESSED;
break;
case SEQ_INDEX_UNCOMPRESSED:
s->index.hash.uncompressed += s->vli;
s->index.hash.crc32 = xz_crc32(
(const uint8_t *)&s->index.hash,
sizeof(s->index.hash),
s->index.hash.crc32);
--s->index.count;
s->index.sequence = SEQ_INDEX_UNPADDED;
break;
}
} while (s->index.count > 0);
return XZ_STREAM_END;
}
/*
* Validate that the next four input bytes match the value of s->crc32.
* s->pos must be zero when starting to validate the first byte.
*/
static enum xz_ret XZ_FUNC crc32_validate(struct xz_dec *s, struct xz_buf *b)
{
do {
if (b->in_pos == b->in_size)
return XZ_OK;
if (((s->crc32 >> s->pos) & 0xFF) != b->in[b->in_pos++])
return XZ_DATA_ERROR;
s->pos += 8;
} while (s->pos < 32);
s->crc32 = 0;
s->pos = 0;
return XZ_STREAM_END;
}
#ifdef XZ_DEC_ANY_CHECK
/*
* Skip over the Check field when the Check ID is not supported.
* Returns true once the whole Check field has been skipped over.
*/
static bool XZ_FUNC check_skip(struct xz_dec *s, struct xz_buf *b)
{
while (s->pos < check_sizes[s->check_type]) {
if (b->in_pos == b->in_size)
return false;
++b->in_pos;
++s->pos;
}
s->pos = 0;
return true;
}
#endif
/* Decode the Stream Header field (the first 12 bytes of the .xz Stream). */
static enum xz_ret XZ_FUNC dec_stream_header(struct xz_dec *s)
{
if (!memeq(s->temp.buf, HEADER_MAGIC, HEADER_MAGIC_SIZE))
return XZ_FORMAT_ERROR;
if (xz_crc32(s->temp.buf + HEADER_MAGIC_SIZE, 2, 0)
!= get_le32(s->temp.buf + HEADER_MAGIC_SIZE + 2))
return XZ_DATA_ERROR;
if (s->temp.buf[HEADER_MAGIC_SIZE] != 0)
return XZ_OPTIONS_ERROR;
/*
* Of integrity checks, we support only none (Check ID = 0) and
* CRC32 (Check ID = 1). However, if XZ_DEC_ANY_CHECK is defined,
* we will accept other check types too, but then the check won't
* be verified and a warning (XZ_UNSUPPORTED_CHECK) will be given.
*/
s->check_type = s->temp.buf[HEADER_MAGIC_SIZE + 1];
#ifdef XZ_DEC_ANY_CHECK
if (s->check_type > XZ_CHECK_MAX)
return XZ_OPTIONS_ERROR;
if (s->check_type > XZ_CHECK_CRC32)
return XZ_UNSUPPORTED_CHECK;
#else
if (s->check_type > XZ_CHECK_CRC32)
return XZ_OPTIONS_ERROR;
#endif
return XZ_OK;
}
/* Decode the Stream Footer field (the last 12 bytes of the .xz Stream) */
static enum xz_ret XZ_FUNC dec_stream_footer(struct xz_dec *s)
{
if (!memeq(s->temp.buf + 10, FOOTER_MAGIC, FOOTER_MAGIC_SIZE))
return XZ_DATA_ERROR;
if (xz_crc32(s->temp.buf + 4, 6, 0) != get_le32(s->temp.buf))
return XZ_DATA_ERROR;
/*
* Validate Backward Size. Note that we never added the size of the
* Index CRC32 field to s->index.size, thus we use s->index.size / 4
* instead of s->index.size / 4 - 1.
*/
if ((s->index.size >> 2) != get_le32(s->temp.buf + 4))
return XZ_DATA_ERROR;
if (s->temp.buf[8] != 0 || s->temp.buf[9] != s->check_type)
return XZ_DATA_ERROR;
/*
* Use XZ_STREAM_END instead of XZ_OK to be more convenient
* for the caller.
*/
return XZ_STREAM_END;
}
/* Decode the Block Header and initialize the filter chain. */
static enum xz_ret XZ_FUNC dec_block_header(struct xz_dec *s)
{
enum xz_ret ret;
/*
* Validate the CRC32. We know that the temp buffer is at least
* eight bytes so this is safe.
*/
s->temp.size -= 4;
if (xz_crc32(s->temp.buf, s->temp.size, 0)
!= get_le32(s->temp.buf + s->temp.size))
return XZ_DATA_ERROR;
s->temp.pos = 2;
/*
* Catch unsupported Block Flags. We support only one or two filters
* in the chain, so we catch that with the same test.
*/
#ifdef XZ_DEC_BCJ
if (s->temp.buf[1] & 0x3E)
#else
if (s->temp.buf[1] & 0x3F)
#endif
return XZ_OPTIONS_ERROR;
/* Compressed Size */
if (s->temp.buf[1] & 0x40) {
if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size)
!= XZ_STREAM_END)
return XZ_DATA_ERROR;
s->block_header.compressed = s->vli;
} else {
s->block_header.compressed = VLI_UNKNOWN;
}
/* Uncompressed Size */
if (s->temp.buf[1] & 0x80) {
if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size)
!= XZ_STREAM_END)
return XZ_DATA_ERROR;
s->block_header.uncompressed = s->vli;
} else {
s->block_header.uncompressed = VLI_UNKNOWN;
}
#ifdef XZ_DEC_BCJ
/* If there are two filters, the first one must be a BCJ filter. */
s->bcj_active = s->temp.buf[1] & 0x01;
if (s->bcj_active) {
if (s->temp.size - s->temp.pos < 2)
return XZ_OPTIONS_ERROR;
ret = xz_dec_bcj_reset(s->bcj, s->temp.buf[s->temp.pos++]);
if (ret != XZ_OK)
return ret;
/*
* We don't support custom start offset,
* so Size of Properties must be zero.
*/
if (s->temp.buf[s->temp.pos++] != 0x00)
return XZ_OPTIONS_ERROR;
}
#endif
/* Valid Filter Flags always take at least two bytes. */
if (s->temp.size - s->temp.pos < 2)
return XZ_DATA_ERROR;
/* Filter ID = LZMA2 */
if (s->temp.buf[s->temp.pos++] != 0x21)
return XZ_OPTIONS_ERROR;
/* Size of Properties = 1-byte Filter Properties */
if (s->temp.buf[s->temp.pos++] != 0x01)
return XZ_OPTIONS_ERROR;
/* Filter Properties contains LZMA2 dictionary size. */
if (s->temp.size - s->temp.pos < 1)
return XZ_DATA_ERROR;
ret = xz_dec_lzma2_reset(s->lzma2, s->temp.buf[s->temp.pos++]);
if (ret != XZ_OK)
return ret;
/* The rest must be Header Padding. */
while (s->temp.pos < s->temp.size)
if (s->temp.buf[s->temp.pos++] != 0x00)
return XZ_OPTIONS_ERROR;
s->temp.pos = 0;
s->block.compressed = 0;
s->block.uncompressed = 0;
return XZ_OK;
}
static enum xz_ret XZ_FUNC dec_main(struct xz_dec *s, struct xz_buf *b)
{
enum xz_ret ret;
/*
* Store the start position for the case when we are in the middle
* of the Index field.
*/
s->in_start = b->in_pos;
while (true) {
switch (s->sequence) {
case SEQ_STREAM_HEADER:
/*
* Stream Header is copied to s->temp, and then
* decoded from there. This way if the caller
* gives us only little input at a time, we can
* still keep the Stream Header decoding code
* simple. Similar approach is used in many places
* in this file.
*/
if (!fill_temp(s, b))
return XZ_OK;
/*
* If dec_stream_header() returns
* XZ_UNSUPPORTED_CHECK, it is still possible
* to continue decoding if working in multi-call
* mode. Thus, update s->sequence before calling
* dec_stream_header().
*/
s->sequence = SEQ_BLOCK_START;
ret = dec_stream_header(s);
if (ret != XZ_OK)
return ret;
case SEQ_BLOCK_START:
/* We need one byte of input to continue. */
if (b->in_pos == b->in_size)
return XZ_OK;
/* See if this is the beginning of the Index field. */
if (b->in[b->in_pos] == 0) {
s->in_start = b->in_pos++;
s->sequence = SEQ_INDEX;
break;
}
/*
* Calculate the size of the Block Header and
* prepare to decode it.
*/
s->block_header.size
= ((uint32_t)b->in[b->in_pos] + 1) * 4;
s->temp.size = s->block_header.size;
s->temp.pos = 0;
s->sequence = SEQ_BLOCK_HEADER;
case SEQ_BLOCK_HEADER:
if (!fill_temp(s, b))
return XZ_OK;
ret = dec_block_header(s);
if (ret != XZ_OK)
return ret;
s->sequence = SEQ_BLOCK_UNCOMPRESS;
case SEQ_BLOCK_UNCOMPRESS:
ret = dec_block(s, b);
if (ret != XZ_STREAM_END)
return ret;
s->sequence = SEQ_BLOCK_PADDING;
case SEQ_BLOCK_PADDING:
/*
* Size of Compressed Data + Block Padding
* must be a multiple of four. We don't need
* s->block.compressed for anything else
* anymore, so we use it here to test the size
* of the Block Padding field.
*/
while (s->block.compressed & 3) {
if (b->in_pos == b->in_size)
return XZ_OK;
if (b->in[b->in_pos++] != 0)
return XZ_DATA_ERROR;
++s->block.compressed;
}
s->sequence = SEQ_BLOCK_CHECK;
case SEQ_BLOCK_CHECK:
if (s->check_type == XZ_CHECK_CRC32) {
ret = crc32_validate(s, b);
if (ret != XZ_STREAM_END)
return ret;
}
#ifdef XZ_DEC_ANY_CHECK
else if (!check_skip(s, b)) {
return XZ_OK;
}
#endif
s->sequence = SEQ_BLOCK_START;
break;
case SEQ_INDEX:
ret = dec_index(s, b);
if (ret != XZ_STREAM_END)
return ret;
s->sequence = SEQ_INDEX_PADDING;
case SEQ_INDEX_PADDING:
while ((s->index.size + (b->in_pos - s->in_start))
& 3) {
if (b->in_pos == b->in_size) {
index_update(s, b);
return XZ_OK;
}
if (b->in[b->in_pos++] != 0)
return XZ_DATA_ERROR;
}
/* Finish the CRC32 value and Index size. */
index_update(s, b);
/* Compare the hashes to validate the Index field. */
if (!memeq(&s->block.hash, &s->index.hash,
sizeof(s->block.hash)))
return XZ_DATA_ERROR;
s->sequence = SEQ_INDEX_CRC32;
case SEQ_INDEX_CRC32:
ret = crc32_validate(s, b);
if (ret != XZ_STREAM_END)
return ret;
s->temp.size = STREAM_HEADER_SIZE;
s->sequence = SEQ_STREAM_FOOTER;
case SEQ_STREAM_FOOTER:
if (!fill_temp(s, b))
return XZ_OK;
return dec_stream_footer(s);
}
}
/* Never reached */
}
/*
* xz_dec_run() is a wrapper for dec_main() to handle some special cases in
* multi-call and single-call decoding.
*
* In multi-call mode, we must return XZ_BUF_ERROR when it seems clear that we
* are not going to make any progress anymore. This is to prevent the caller
* from calling us infinitely when the input file is truncated or otherwise
* corrupt. Since zlib-style API allows that the caller fills the input buffer
* only when the decoder doesn't produce any new output, we have to be careful
* to avoid returning XZ_BUF_ERROR too easily: XZ_BUF_ERROR is returned only
* after the second consecutive call to xz_dec_run() that makes no progress.
*
* In single-call mode, if we couldn't decode everything and no error
* occurred, either the input is truncated or the output buffer is too small.
* Since we know that the last input byte never produces any output, we know
* that if all the input was consumed and decoding wasn't finished, the file
* must be corrupt. Otherwise the output buffer has to be too small or the
* file is corrupt in a way that decoding it produces too big output.
*
* If single-call decoding fails, we reset b->in_pos and b->out_pos back to
* their original values. This is because with some filter chains there won't
* be any valid uncompressed data in the output buffer unless the decoding
* actually succeeds (that's the price to pay of using the output buffer as
* the workspace).
*/
XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_run(struct xz_dec *s, struct xz_buf *b)
{
size_t in_start;
size_t out_start;
enum xz_ret ret;
if (DEC_IS_SINGLE(s->mode))
xz_dec_reset(s);
in_start = b->in_pos;
out_start = b->out_pos;
ret = dec_main(s, b);
if (DEC_IS_SINGLE(s->mode)) {
if (ret == XZ_OK)
ret = b->in_pos == b->in_size
? XZ_DATA_ERROR : XZ_BUF_ERROR;
if (ret != XZ_STREAM_END) {
b->in_pos = in_start;
b->out_pos = out_start;
}
} else if (ret == XZ_OK && in_start == b->in_pos
&& out_start == b->out_pos) {
if (s->allow_buf_error)
ret = XZ_BUF_ERROR;
s->allow_buf_error = true;
} else {
s->allow_buf_error = false;
}
return ret;
}
XZ_EXTERN struct xz_dec * XZ_FUNC xz_dec_init(
enum xz_mode mode, uint32_t dict_max)
{
struct xz_dec *s = kmalloc(sizeof(*s), GFP_KERNEL);
if (s == NULL)
return NULL;
s->mode = mode;
#ifdef XZ_DEC_BCJ
s->bcj = xz_dec_bcj_create(DEC_IS_SINGLE(mode));
if (s->bcj == NULL)
goto error_bcj;
#endif
s->lzma2 = xz_dec_lzma2_create(mode, dict_max);
if (s->lzma2 == NULL)
goto error_lzma2;
xz_dec_reset(s);
return s;
error_lzma2:
#ifdef XZ_DEC_BCJ
xz_dec_bcj_end(s->bcj);
error_bcj:
#endif
kfree(s);
return NULL;
}
XZ_EXTERN void XZ_FUNC xz_dec_reset(struct xz_dec *s)
{
s->sequence = SEQ_STREAM_HEADER;
s->allow_buf_error = false;
s->pos = 0;
s->crc32 = 0;
memzero(&s->block, sizeof(s->block));
memzero(&s->index, sizeof(s->index));
s->temp.pos = 0;
s->temp.size = STREAM_HEADER_SIZE;
}
XZ_EXTERN void XZ_FUNC xz_dec_end(struct xz_dec *s)
{
if (s != NULL) {
xz_dec_lzma2_end(s->lzma2);
#ifdef XZ_DEC_BCJ
xz_dec_bcj_end(s->bcj);
#endif
kfree(s);
}
}

View File

@@ -0,0 +1,204 @@
/*
* LZMA2 definitions
*
* Authors: Lasse Collin <lasse.collin@tukaani.org>
* Igor Pavlov <http://7-zip.org/>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
#ifndef XZ_LZMA2_H
#define XZ_LZMA2_H
/* Range coder constants */
#define RC_SHIFT_BITS 8
#define RC_TOP_BITS 24
#define RC_TOP_VALUE (1 << RC_TOP_BITS)
#define RC_BIT_MODEL_TOTAL_BITS 11
#define RC_BIT_MODEL_TOTAL (1 << RC_BIT_MODEL_TOTAL_BITS)
#define RC_MOVE_BITS 5
/*
* Maximum number of position states. A position state is the lowest pb
* number of bits of the current uncompressed offset. In some places there
* are different sets of probabilities for different position states.
*/
#define POS_STATES_MAX (1 << 4)
/*
* This enum is used to track which LZMA symbols have occurred most recently
* and in which order. This information is used to predict the next symbol.
*
* Symbols:
* - Literal: One 8-bit byte
* - Match: Repeat a chunk of data at some distance
* - Long repeat: Multi-byte match at a recently seen distance
* - Short repeat: One-byte repeat at a recently seen distance
*
* The symbol names are in from STATE_oldest_older_previous. REP means
* either short or long repeated match, and NONLIT means any non-literal.
*/
enum lzma_state {
STATE_LIT_LIT,
STATE_MATCH_LIT_LIT,
STATE_REP_LIT_LIT,
STATE_SHORTREP_LIT_LIT,
STATE_MATCH_LIT,
STATE_REP_LIT,
STATE_SHORTREP_LIT,
STATE_LIT_MATCH,
STATE_LIT_LONGREP,
STATE_LIT_SHORTREP,
STATE_NONLIT_MATCH,
STATE_NONLIT_REP
};
/* Total number of states */
#define STATES 12
/* The lowest 7 states indicate that the previous state was a literal. */
#define LIT_STATES 7
/* Indicate that the latest symbol was a literal. */
static inline void XZ_FUNC lzma_state_literal(enum lzma_state *state)
{
if (*state <= STATE_SHORTREP_LIT_LIT)
*state = STATE_LIT_LIT;
else if (*state <= STATE_LIT_SHORTREP)
*state -= 3;
else
*state -= 6;
}
/* Indicate that the latest symbol was a match. */
static inline void XZ_FUNC lzma_state_match(enum lzma_state *state)
{
*state = *state < LIT_STATES ? STATE_LIT_MATCH : STATE_NONLIT_MATCH;
}
/* Indicate that the latest state was a long repeated match. */
static inline void XZ_FUNC lzma_state_long_rep(enum lzma_state *state)
{
*state = *state < LIT_STATES ? STATE_LIT_LONGREP : STATE_NONLIT_REP;
}
/* Indicate that the latest symbol was a short match. */
static inline void XZ_FUNC lzma_state_short_rep(enum lzma_state *state)
{
*state = *state < LIT_STATES ? STATE_LIT_SHORTREP : STATE_NONLIT_REP;
}
/* Test if the previous symbol was a literal. */
static inline bool XZ_FUNC lzma_state_is_literal(enum lzma_state state)
{
return state < LIT_STATES;
}
/* Each literal coder is divided in three sections:
* - 0x001-0x0FF: Without match byte
* - 0x101-0x1FF: With match byte; match bit is 0
* - 0x201-0x2FF: With match byte; match bit is 1
*
* Match byte is used when the previous LZMA symbol was something else than
* a literal (that is, it was some kind of match).
*/
#define LITERAL_CODER_SIZE 0x300
/* Maximum number of literal coders */
#define LITERAL_CODERS_MAX (1 << 4)
/* Minimum length of a match is two bytes. */
#define MATCH_LEN_MIN 2
/* Match length is encoded with 4, 5, or 10 bits.
*
* Length Bits
* 2-9 4 = Choice=0 + 3 bits
* 10-17 5 = Choice=1 + Choice2=0 + 3 bits
* 18-273 10 = Choice=1 + Choice2=1 + 8 bits
*/
#define LEN_LOW_BITS 3
#define LEN_LOW_SYMBOLS (1 << LEN_LOW_BITS)
#define LEN_MID_BITS 3
#define LEN_MID_SYMBOLS (1 << LEN_MID_BITS)
#define LEN_HIGH_BITS 8
#define LEN_HIGH_SYMBOLS (1 << LEN_HIGH_BITS)
#define LEN_SYMBOLS (LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS + LEN_HIGH_SYMBOLS)
/*
* Maximum length of a match is 273 which is a result of the encoding
* described above.
*/
#define MATCH_LEN_MAX (MATCH_LEN_MIN + LEN_SYMBOLS - 1)
/*
* Different sets of probabilities are used for match distances that have
* very short match length: Lengths of 2, 3, and 4 bytes have a separate
* set of probabilities for each length. The matches with longer length
* use a shared set of probabilities.
*/
#define DIST_STATES 4
/*
* Get the index of the appropriate probability array for decoding
* the distance slot.
*/
static inline uint32_t XZ_FUNC lzma_get_dist_state(uint32_t len)
{
return len < DIST_STATES + MATCH_LEN_MIN
? len - MATCH_LEN_MIN : DIST_STATES - 1;
}
/*
* The highest two bits of a 32-bit match distance are encoded using six bits.
* This six-bit value is called a distance slot. This way encoding a 32-bit
* value takes 6-36 bits, larger values taking more bits.
*/
#define DIST_SLOT_BITS 6
#define DIST_SLOTS (1 << DIST_SLOT_BITS)
/* Match distances up to 127 are fully encoded using probabilities. Since
* the highest two bits (distance slot) are always encoded using six bits,
* the distances 0-3 don't need any additional bits to encode, since the
* distance slot itself is the same as the actual distance. DIST_MODEL_START
* indicates the first distance slot where at least one additional bit is
* needed.
*/
#define DIST_MODEL_START 4
/*
* Match distances greater than 127 are encoded in three pieces:
* - distance slot: the highest two bits
* - direct bits: 2-26 bits below the highest two bits
* - alignment bits: four lowest bits
*
* Direct bits don't use any probabilities.
*
* The distance slot value of 14 is for distances 128-191.
*/
#define DIST_MODEL_END 14
/* Distance slots that indicate a distance <= 127. */
#define FULL_DISTANCES_BITS (DIST_MODEL_END / 2)
#define FULL_DISTANCES (1 << FULL_DISTANCES_BITS)
/*
* For match distances greater than 127, only the highest two bits and the
* lowest four bits (alignment) is encoded using probabilities.
*/
#define ALIGN_BITS 4
#define ALIGN_SIZE (1 << ALIGN_BITS)
#define ALIGN_MASK (ALIGN_SIZE - 1)
/* Total number of all probability variables */
#define PROBS_TOTAL (1846 + LITERAL_CODERS_MAX * LITERAL_CODER_SIZE)
/*
* LZMA remembers the four most recent match distances. Reusing these
* distances tends to take less space than re-encoding the actual
* distance value.
*/
#define REPS 4
#endif

View File

@@ -0,0 +1,159 @@
/*
* Private includes and definitions
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
#ifndef XZ_PRIVATE_H
#define XZ_PRIVATE_H
#ifdef __KERNEL__
/* XZ_PREBOOT may be defined only via decompress_unxz.c. */
# ifndef XZ_PREBOOT
# include <linux/slab.h>
# include <linux/vmalloc.h>
# include <linux/string.h>
# define memeq(a, b, size) (memcmp(a, b, size) == 0)
# define memzero(buf, size) memset(buf, 0, size)
# endif
# include <asm/byteorder.h>
# include <asm/unaligned.h>
# define get_le32(p) le32_to_cpup((const uint32_t *)(p))
/* XZ_IGNORE_KCONFIG may be defined only via decompress_unxz.c. */
# ifndef XZ_IGNORE_KCONFIG
# ifdef CONFIG_XZ_DEC_X86
# define XZ_DEC_X86
# endif
# ifdef CONFIG_XZ_DEC_POWERPC
# define XZ_DEC_POWERPC
# endif
# ifdef CONFIG_XZ_DEC_IA64
# define XZ_DEC_IA64
# endif
# ifdef CONFIG_XZ_DEC_ARM
# define XZ_DEC_ARM
# endif
# ifdef CONFIG_XZ_DEC_ARMTHUMB
# define XZ_DEC_ARMTHUMB
# endif
# ifdef CONFIG_XZ_DEC_SPARC
# define XZ_DEC_SPARC
# endif
# endif
# include <linux/xz.h>
#else
/*
* For userspace builds, use a separate header to define the required
* macros and functions. This makes it easier to adapt the code into
* different environments and avoids clutter in the Linux kernel tree.
*/
# include "xz_config.h"
#endif
/* If no specific decoding mode is requested, enable support for all modes. */
#if !defined(XZ_DEC_SINGLE) && !defined(XZ_DEC_PREALLOC) \
&& !defined(XZ_DEC_DYNALLOC)
# define XZ_DEC_SINGLE
# define XZ_DEC_PREALLOC
# define XZ_DEC_DYNALLOC
#endif
/*
* The DEC_IS_foo(mode) macros are used in "if" statements. If only some
* of the supported modes are enabled, these macros will evaluate to true or
* false at compile time and thus allow the compiler to omit unneeded code.
*/
#ifdef XZ_DEC_SINGLE
# define DEC_IS_SINGLE(mode) ((mode) == XZ_SINGLE)
#else
# define DEC_IS_SINGLE(mode) (false)
#endif
#ifdef XZ_DEC_PREALLOC
# define DEC_IS_PREALLOC(mode) ((mode) == XZ_PREALLOC)
#else
# define DEC_IS_PREALLOC(mode) (false)
#endif
#ifdef XZ_DEC_DYNALLOC
# define DEC_IS_DYNALLOC(mode) ((mode) == XZ_DYNALLOC)
#else
# define DEC_IS_DYNALLOC(mode) (false)
#endif
#if !defined(XZ_DEC_SINGLE)
# define DEC_IS_MULTI(mode) (true)
#elif defined(XZ_DEC_PREALLOC) || defined(XZ_DEC_DYNALLOC)
# define DEC_IS_MULTI(mode) ((mode) != XZ_SINGLE)
#else
# define DEC_IS_MULTI(mode) (false)
#endif
/*
* If any of the BCJ filter decoders are wanted, define XZ_DEC_BCJ.
* XZ_DEC_BCJ is used to enable generic support for BCJ decoders.
*/
#ifndef XZ_DEC_BCJ
# if defined(XZ_DEC_X86) || defined(XZ_DEC_POWERPC) \
|| defined(XZ_DEC_IA64) || defined(XZ_DEC_ARM) \
|| defined(XZ_DEC_ARM) || defined(XZ_DEC_ARMTHUMB) \
|| defined(XZ_DEC_SPARC)
# define XZ_DEC_BCJ
# endif
#endif
/*
* Allocate memory for LZMA2 decoder. xz_dec_lzma2_reset() must be used
* before calling xz_dec_lzma2_run().
*/
XZ_EXTERN struct xz_dec_lzma2 * XZ_FUNC xz_dec_lzma2_create(
enum xz_mode mode, uint32_t dict_max);
/*
* Decode the LZMA2 properties (one byte) and reset the decoder. Return
* XZ_OK on success, XZ_MEMLIMIT_ERROR if the preallocated dictionary is not
* big enough, and XZ_OPTIONS_ERROR if props indicates something that this
* decoder doesn't support.
*/
XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_lzma2_reset(
struct xz_dec_lzma2 *s, uint8_t props);
/* Decode raw LZMA2 stream from b->in to b->out. */
XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_lzma2_run(
struct xz_dec_lzma2 *s, struct xz_buf *b);
/* Free the memory allocated for the LZMA2 decoder. */
XZ_EXTERN void XZ_FUNC xz_dec_lzma2_end(struct xz_dec_lzma2 *s);
#ifdef XZ_DEC_BCJ
/*
* Allocate memory for BCJ decoders. xz_dec_bcj_reset() must be used before
* calling xz_dec_bcj_run().
*/
XZ_EXTERN struct xz_dec_bcj * XZ_FUNC xz_dec_bcj_create(bool single_call);
/*
* Decode the Filter ID of a BCJ filter. This implementation doesn't
* support custom start offsets, so no decoding of Filter Properties
* is needed. Returns XZ_OK if the given Filter ID is supported.
* Otherwise XZ_OPTIONS_ERROR is returned.
*/
XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_bcj_reset(
struct xz_dec_bcj *s, uint8_t id);
/*
* Decode raw BCJ + LZMA2 stream. This must be used only if there actually is
* a BCJ filter in the chain. If the chain has only LZMA2, xz_dec_lzma2_run()
* must be called directly.
*/
XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_bcj_run(struct xz_dec_bcj *s,
struct xz_dec_lzma2 *lzma2, struct xz_buf *b);
/* Free the memory allocated for the BCJ filters. */
#define xz_dec_bcj_end(s) kfree(s)
#endif
#endif

View File

@@ -0,0 +1,57 @@
/*
* Definitions for handling the .xz file format
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
#ifndef XZ_STREAM_H
#define XZ_STREAM_H
#if defined(__KERNEL__) && !XZ_INTERNAL_CRC32
# include <linux/crc32.h>
# undef crc32
# define xz_crc32(buf, size, crc) \
(~crc32_le(~(uint32_t)(crc), buf, size))
#endif
/*
* See the .xz file format specification at
* http://tukaani.org/xz/xz-file-format.txt
* to understand the container format.
*/
#define STREAM_HEADER_SIZE 12
#define HEADER_MAGIC "\3757zXZ\0"
#define HEADER_MAGIC_SIZE 6
#define FOOTER_MAGIC "YZ"
#define FOOTER_MAGIC_SIZE 2
/*
* Variable-length integer can hold a 63-bit unsigned integer, or a special
* value to indicate that the value is unknown.
*/
typedef uint64_t vli_type;
#define VLI_MAX ((vli_type)-1 / 2)
#define VLI_UNKNOWN ((vli_type)-1)
/* Maximum encoded size of a VLI */
#define VLI_BYTES_MAX (sizeof(vli_type) * 8 / 7)
/* Integrity Check types */
enum xz_check {
XZ_CHECK_NONE = 0,
XZ_CHECK_CRC32 = 1,
XZ_CHECK_CRC64 = 4,
XZ_CHECK_SHA256 = 10
};
/* Maximum possible Check ID */
#define XZ_CHECK_MAX 15
#endif