gzip: use common bbunzip infrastructure - ~700 bytes code less

This commit is contained in:
Denis Vlasenko 2007-03-14 00:07:51 +00:00
parent 07766bb0e7
commit 75605788ff
4 changed files with 185 additions and 99 deletions

View File

@ -14,7 +14,7 @@ lib-$(CONFIG_CPIO) += cpio.o
lib-$(CONFIG_DPKG) += dpkg.o
lib-$(CONFIG_DPKG_DEB) += dpkg_deb.o
lib-$(CONFIG_GUNZIP) += bbunzip.o
lib-$(CONFIG_GZIP) += gzip.o
lib-$(CONFIG_GZIP) += gzip.o bbunzip.o
lib-$(CONFIG_RPM2CPIO) += rpm2cpio.o
lib-$(CONFIG_RPM) += rpm.o
lib-$(CONFIG_TAR) += tar.o

View File

@ -9,12 +9,12 @@
#include "unarchive.h"
enum {
OPT_STDOUT = 1,
OPT_FORCE = 2,
OPT_STDOUT = 0x1,
OPT_FORCE = 0x2,
/* gunzip only: */
OPT_TEST = 4,
OPT_DECOMPRESS = 8,
OPT_VERBOSE = 0x10,
OPT_VERBOSE = 0x4,
OPT_DECOMPRESS = 0x8,
OPT_TEST = 0x10,
};
static
@ -33,8 +33,7 @@ int open_to_or_warn(int to_fd, const char *filename, int flags, int mode)
return 0;
}
static
int unpack(char **argv,
int bbunpack(char **argv,
char* (*make_new_name)(char *filename),
USE_DESKTOP(long long) int (*unpacker)(void)
)
@ -173,7 +172,7 @@ int bunzip2_main(int argc, char **argv)
if (applet_name[2] == 'c')
option_mask32 |= OPT_STDOUT;
return unpack(argv, make_new_name_bunzip2, unpack_bunzip2);
return bbunpack(argv, make_new_name_bunzip2, unpack_bunzip2);
}
#endif
@ -267,13 +266,13 @@ USE_DESKTOP(long long) int unpack_gunzip(void)
int gunzip_main(int argc, char **argv);
int gunzip_main(int argc, char **argv)
{
getopt32(argc, argv, "cftdv");
getopt32(argc, argv, "cfvdt");
argv += optind;
/* if called as zcat */
if (applet_name[1] == 'c')
option_mask32 |= OPT_STDOUT;
return unpack(argv, make_new_name_gunzip, unpack_gunzip);
return bbunpack(argv, make_new_name_gunzip, unpack_gunzip);
}
#endif
@ -311,7 +310,7 @@ int unlzma_main(int argc, char **argv)
if (applet_name[4] == 'c')
option_mask32 |= OPT_STDOUT;
return unpack(argv, make_new_name_unlzma, unpack_unlzma);
return bbunpack(argv, make_new_name_unlzma, unpack_unlzma);
}
#endif
@ -350,7 +349,7 @@ int uncompress_main(int argc, char **argv)
getopt32(argc, argv, "cf");
argv += optind;
return unpack(argv, make_new_name_uncompress, unpack_uncompress);
return bbunpack(argv, make_new_name_uncompress, unpack_uncompress);
}
#endif

View File

@ -2074,6 +2074,7 @@ static void zip(int in, int out)
/* ======================================================================== */
#if 0
static void abort_gzip(int ATTRIBUTE_UNUSED ignored)
{
exit(1);
@ -2082,92 +2083,6 @@ static void abort_gzip(int ATTRIBUTE_UNUSED ignored)
int gzip_main(int argc, char **argv);
int gzip_main(int argc, char **argv)
{
enum {
OPT_tostdout = 0x1,
OPT_force = 0x2,
};
unsigned opt;
int inFileNum;
int outFileNum;
int i;
struct stat statBuf;
opt = getopt32(argc, argv, "cf123456789qv" USE_GUNZIP("d"));
//if (opt & 0x1) // -c
//if (opt & 0x2) // -f
/* Ignore 1-9 (compression level) options */
//if (opt & 0x4) // -1
//if (opt & 0x8) // -2
//if (opt & 0x10) // -3
//if (opt & 0x20) // -4
//if (opt & 0x40) // -5
//if (opt & 0x80) // -6
//if (opt & 0x100) // -7
//if (opt & 0x200) // -8
//if (opt & 0x400) // -9
//if (opt & 0x800) // -q
//if (opt & 0x1000) // -v
#if ENABLE_GUNZIP /* gunzip_main may not be visible... */
if (opt & 0x2000) { // -d
/* FIXME: getopt32 should not depend on optind */
optind = 1;
return gunzip_main(argc, argv);
}
#endif
/* Comment?? */
if (signal(SIGINT, SIG_IGN) != SIG_IGN) {
signal(SIGINT, abort_gzip);
}
#ifdef SIGTERM
if (signal(SIGTERM, SIG_IGN) != SIG_IGN) {
signal(SIGTERM, abort_gzip);
}
#endif
#ifdef SIGHUP
if (signal(SIGHUP, SIG_IGN) != SIG_IGN) {
signal(SIGHUP, abort_gzip);
}
#endif
ptr_to_globals = xzalloc(sizeof(struct global1) + sizeof(struct global2));
ptr_to_globals++;
G2.l_desc.dyn_tree = G2.dyn_ltree;
G2.l_desc.static_tree = G2.static_ltree;
G2.l_desc.extra_bits = extra_lbits;
G2.l_desc.extra_base = LITERALS + 1;
G2.l_desc.elems = L_CODES;
G2.l_desc.max_length = MAX_BITS;
//G2.l_desc.max_code = 0;
G2.d_desc.dyn_tree = G2.dyn_dtree;
G2.d_desc.static_tree = G2.static_dtree;
G2.d_desc.extra_bits = extra_dbits;
//G2.d_desc.extra_base = 0;
G2.d_desc.elems = D_CODES;
G2.d_desc.max_length = MAX_BITS;
//G2.d_desc.max_code = 0;
G2.bl_desc.dyn_tree = G2.bl_tree;
//G2.bl_desc.static_tree = NULL;
G2.bl_desc.extra_bits = extra_blbits,
//G2.bl_desc.extra_base = 0;
G2.bl_desc.elems = BL_CODES;
G2.bl_desc.max_length = MAX_BL_BITS;
//G2.bl_desc.max_code = 0;
/* Allocate all global buffers (for DYN_ALLOC option) */
ALLOC(uch, G1.l_buf, INBUFSIZ);
ALLOC(uch, G1.outbuf, OUTBUFSIZ);
ALLOC(ush, G1.d_buf, DIST_BUFSIZE);
ALLOC(uch, G1.window, 2L * WSIZE);
ALLOC(ush, G1.prev, 1L << BITS);
/* Initialise the CRC32 table */
G1.crc_32_tab = crc32_filltable(0);
clear_bufs();
if (optind == argc) {
G1.time_stamp = 0;
@ -2240,3 +2155,87 @@ int gzip_main(int argc, char **argv)
return 0; //##G1.exit_code;
}
#endif
int bbunpack(char **argv,
char* (*make_new_name)(char *filename),
USE_DESKTOP(long long) int (*unpacker)(void)
);
static
char* make_new_name_gzip(char *filename)
{
return xasprintf("%s.gz", filename);
}
static
USE_DESKTOP(long long) int pack_gzip(void)
{
struct stat s;
G1.time_stamp = 0;
if (!fstat(STDIN_FILENO, &s))
G1.time_stamp = s.st_ctime;
zip(STDIN_FILENO, STDOUT_FILENO);
return 0;
}
int gzip_main(int argc, char **argv);
int gzip_main(int argc, char **argv)
{
unsigned opt;
/* Must match bbunzip's constants OPT_STDOUT, OPT_FORCE! */
opt = getopt32(argc, argv, "cfv" USE_GUNZIP("d") "q123456789" );
option_mask32 &= 0x7; /* Clear -d, ignore -q, -0..9 */
//if (opt & 0x1) // -c
//if (opt & 0x2) // -f
//if (opt & 0x4) // -v
#if ENABLE_GUNZIP /* gunzip_main may not be visible... */
if (opt & 0x8) { // -d
/* FIXME: getopt32 should not depend on optind */
optind = 1;
return gunzip_main(argc, argv);
}
#endif
ptr_to_globals = xzalloc(sizeof(struct global1) + sizeof(struct global2));
ptr_to_globals++;
G2.l_desc.dyn_tree = G2.dyn_ltree;
G2.l_desc.static_tree = G2.static_ltree;
G2.l_desc.extra_bits = extra_lbits;
G2.l_desc.extra_base = LITERALS + 1;
G2.l_desc.elems = L_CODES;
G2.l_desc.max_length = MAX_BITS;
//G2.l_desc.max_code = 0;
G2.d_desc.dyn_tree = G2.dyn_dtree;
G2.d_desc.static_tree = G2.static_dtree;
G2.d_desc.extra_bits = extra_dbits;
//G2.d_desc.extra_base = 0;
G2.d_desc.elems = D_CODES;
G2.d_desc.max_length = MAX_BITS;
//G2.d_desc.max_code = 0;
G2.bl_desc.dyn_tree = G2.bl_tree;
//G2.bl_desc.static_tree = NULL;
G2.bl_desc.extra_bits = extra_blbits,
//G2.bl_desc.extra_base = 0;
G2.bl_desc.elems = BL_CODES;
G2.bl_desc.max_length = MAX_BL_BITS;
//G2.bl_desc.max_code = 0;
/* Allocate all global buffers (for DYN_ALLOC option) */
ALLOC(uch, G1.l_buf, INBUFSIZ);
ALLOC(uch, G1.outbuf, OUTBUFSIZ);
ALLOC(ush, G1.d_buf, DIST_BUFSIZE);
ALLOC(uch, G1.window, 2L * WSIZE);
ALLOC(ush, G1.prev, 1L << BITS);
/* Initialise the CRC32 table */
G1.crc_32_tab = crc32_filltable(0);
clear_bufs();
return bbunpack(argv, make_new_name_gzip, pack_gzip);
}

88
docs/keep_data_small.txt Normal file
View File

@ -0,0 +1,88 @@
Keeping data small
When many applets are compiled into busybox, all rw data and
bss for each applet are concatenated. Including those from libc,
if static bbox is built. When bbox is started, _all_ this data
is allocated, not just that one part for selected applet.
What "allocated" exactly means, depends on arch.
On nommu it's probably bites the most, actually using real
RAM for rwdata and bss. On i386, bss is lazily allocated
by COWed zero pages. Not sure about rwdata - also COW?
Small experiment measures "parasitic" bbox memory consumption.
Here we start 1000 "busybox sleep 10" in parallel.
bbox binary is practically allyesconfig static one,
built against uclibc:
bash-3.2# nmeter '%t %c %b %m %p %[pn]'
23:17:28 .......... 0 0 168M 0 147
23:17:29 .......... 0 0 168M 0 147
23:17:30 U......... 0 0 168M 1 147
23:17:31 SU........ 0 188k 181M 244 391
23:17:32 SSSSUUU... 0 0 223M 757 1147
23:17:33 UUU....... 0 0 223M 0 1147
23:17:34 U......... 0 0 223M 1 1147
23:17:35 .......... 0 0 223M 0 1147
23:17:36 .......... 0 0 223M 0 1147
23:17:37 S......... 0 0 223M 0 1147
23:17:38 .......... 0 0 223M 1 1147
23:17:39 .......... 0 0 223M 0 1147
23:17:40 .......... 0 0 223M 0 1147
23:17:41 .......... 0 0 210M 0 906
23:17:42 .......... 0 0 168M 1 147
23:17:43 .......... 0 0 168M 0 147
This requires 55M of memory. Thus 1 trivial busybox applet
takes 55k of userspace memory (nmeter doesn't account for kernel-side
allocations). Definitely can be improved.
Thus we should avoid large global data in our applets,
and should minimize usage of libc functions which implicitly use
such structures in libc.
Example 1
One example how to reduce global data usage is in
archival/libunarchive/decompress_unzip.c:
/* This is somewhat complex-looking arrangement, but it allows
* to place decompressor state either in bss or in
* malloc'ed space simply by changing #defines below.
* Sizes on i386:
* text data bss dec hex
* 5256 0 108 5364 14f4 - bss
* 4915 0 0 4915 1333 - malloc
*/
#define STATE_IN_BSS 0
#define STATE_IN_MALLOC 1
This example completely eliminates globals in that module.
Required memory is allocated in inflate_gunzip() [its main module]
and then passed down to all subroutines which need to access globals
as a parameter.
Example 2
In case you don't want to pass this additional parameter everywhere,
take a look at archival/gzip.c. Here all global data is replaced by
singe global pointer (ptr_to_globals) to allocated storage.
In order to not duplicate ptr_to_globals in every applet, you can
reuse single common one. It is defined in libbb/messages.c
as void *ptr_to_globals, but is NOT declared in libbb.h.
You first define a struct:
struct my_globals { int a; char buf[1000]; };
and then declare that ptr_to_globals is a pointer to it:
extern struct my_globals *ptr_to_globals;
#define G (*ptr_to_globals)
Linker magic enures that these two merge into single pointer object.
Now initialize it in <applet>_main():
ptr_to_globals = xzalloc(sizeof(G));
and you can reference "globals" by G.a, G.buf and so on, in any function.