attack the biggest stack users:

-mkfs_minix_main [busybox_unstripped]:                  4288
-mkfs_minix_main [busybox_unstripped]:                  4276
-grave [busybox_unstripped]:                            4260
(bzip2 users too - not listed)

price we pay in code size increase:
mainSort                                            2458    2515     +57
grave                                               1005    1058     +53
sendMTFValues                                       2177    2195     +18
BZ2_blockSort                                        122     125      +3
mkfs_minix_main                                     3070    3022     -48
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 4/1 up/down: 131/-48)            Total: 83 bytes
This commit is contained in:
Denis Vlasenko
2007-12-02 08:35:37 +00:00
parent 8003e266ed
commit ab801874f8
8 changed files with 85 additions and 20 deletions

View File

@@ -721,7 +721,8 @@ void mainQSort3(uint32_t* ptr,
#define CLEARMASK (~(SETMASK))
static NOINLINE
void mainSort(uint32_t* ptr,
void mainSort(EState* state,
uint32_t* ptr,
uint8_t* block,
uint16_t* quadrant,
uint32_t* ftab,
@@ -729,13 +730,18 @@ void mainSort(uint32_t* ptr,
int32_t* budget)
{
int32_t i, j, k, ss, sb;
int32_t runningOrder[256];
Bool bigDone[256];
int32_t copyStart[256];
int32_t copyEnd [256];
uint8_t c1;
int32_t numQSorted;
uint16_t s;
Bool bigDone[256];
/* bbox: moved to EState to save stack
int32_t runningOrder[256];
int32_t copyStart[256];
int32_t copyEnd [256];
*/
#define runningOrder (state->mainSort__runningOrder)
#define copyStart (state->mainSort__copyStart)
#define copyEnd (state->mainSort__copyEnd)
/*-- set up the 2-byte frequency table --*/
/* was: for (i = 65536; i >= 0; i--) ftab[i] = 0; */
@@ -985,6 +991,9 @@ void mainSort(uint32_t* ptr,
AssertH(((bbSize-1) >> shifts) <= 65535, 1002);
}
}
#undef runningOrder
#undef copyStart
#undef copyEnd
}
#undef BIGFREQ
@@ -1041,7 +1050,7 @@ void BZ2_blockSort(EState* s)
*/
budget = nblock * ((wfact-1) / 3);
mainSort(ptr, block, quadrant, ftab, nblock, &budget);
mainSort(s, ptr, block, quadrant, ftab, nblock, &budget);
if (budget < 0) {
fallbackSort(s->arr1, s->arr2, ftab, nblock);
}

View File

@@ -178,13 +178,22 @@ typedef struct EState {
uint8_t selector [BZ_MAX_SELECTORS];
uint8_t selectorMtf[BZ_MAX_SELECTORS];
uint8_t len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
int32_t code [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
int32_t rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
uint8_t len[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
/* stack-saving measures: these can be local, but they are too big */
int32_t sendMTFValues__code [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
int32_t sendMTFValues__rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
#if CONFIG_BZIP2_FEATURE_SPEED >= 5
/* second dimension: only 3 needed; 4 makes index calculations faster */
uint32_t len_pack[BZ_MAX_ALPHA_SIZE][4];
uint32_t sendMTFValues__len_pack[BZ_MAX_ALPHA_SIZE][4];
#endif
int32_t BZ2_hbMakeCodeLengths__heap [BZ_MAX_ALPHA_SIZE + 2];
int32_t BZ2_hbMakeCodeLengths__weight[BZ_MAX_ALPHA_SIZE * 2];
int32_t BZ2_hbMakeCodeLengths__parent[BZ_MAX_ALPHA_SIZE * 2];
int32_t mainSort__runningOrder[256];
int32_t mainSort__copyStart[256];
int32_t mainSort__copyEnd[256];
} EState;
@@ -203,7 +212,7 @@ static void
BZ2_hbAssignCodes(int32_t*, uint8_t*, int32_t, int32_t, int32_t);
static void
BZ2_hbMakeCodeLengths(uint8_t*, int32_t*, int32_t, int32_t);
BZ2_hbMakeCodeLengths(EState*, uint8_t*, int32_t*, int32_t, int32_t);
/*-------------------------------------------------------------*/
/*--- end bzlib_private.h ---*/

View File

@@ -264,13 +264,16 @@ void sendMTFValues(EState* s)
* are also globals only used in this proc.
* Made global to keep stack frame size small.
*/
#define code sendMTFValues__code
#define rfreq sendMTFValues__rfreq
#define len_pack sendMTFValues__len_pack
uint16_t cost[BZ_N_GROUPS];
int32_t fave[BZ_N_GROUPS];
uint16_t* mtfv = s->mtfv;
alphaSize = s->nInUse+2;
alphaSize = s->nInUse + 2;
for (t = 0; t < BZ_N_GROUPS; t++)
for (v = 0; v < alphaSize; v++)
s->len[t][v] = BZ_GREATER_ICOST;
@@ -453,7 +456,7 @@ void sendMTFValues(EState* s)
/* maxLen was changed from 20 to 17 in bzip2-1.0.3. See
* comment in huffman.c for details. */
for (t = 0; t < nGroups; t++)
BZ2_hbMakeCodeLengths(&(s->len[t][0]), &(s->rfreq[t][0]), alphaSize, 17 /*20*/);
BZ2_hbMakeCodeLengths(s, &(s->len[t][0]), &(s->rfreq[t][0]), alphaSize, 17 /*20*/);
}
AssertH(nGroups < 8, 3002);
@@ -602,6 +605,9 @@ void sendMTFValues(EState* s)
selCtr++;
}
AssertH(selCtr == nSelectors, 3007);
#undef code
#undef rfreq
#undef len_pack
}

View File

@@ -98,7 +98,8 @@ void DOWNHEAP1(int32_t *heap, int32_t *weight, int32_t nHeap)
/*---------------------------------------------------*/
static
void BZ2_hbMakeCodeLengths(uint8_t *len,
void BZ2_hbMakeCodeLengths(EState *s,
uint8_t *len,
int32_t *freq,
int32_t alphaSize,
int32_t maxLen)
@@ -110,9 +111,14 @@ void BZ2_hbMakeCodeLengths(uint8_t *len,
int32_t nNodes, nHeap, n1, n2, i, j, k;
Bool tooLong;
/* bbox: moved to EState to save stack
int32_t heap [BZ_MAX_ALPHA_SIZE + 2];
int32_t weight[BZ_MAX_ALPHA_SIZE * 2];
int32_t parent[BZ_MAX_ALPHA_SIZE * 2];
*/
#define heap (s->BZ2_hbMakeCodeLengths__heap)
#define weight (s->BZ2_hbMakeCodeLengths__weight)
#define parent (s->BZ2_hbMakeCodeLengths__parent)
for (i = 0; i < alphaSize; i++)
weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
@@ -189,6 +195,9 @@ void BZ2_hbMakeCodeLengths(uint8_t *len,
weight[i] = j << 8;
}
}
#undef heap
#undef weight
#undef parent
}