bzip2: pass sorting params through EState* pointer
function old new delta mainGtU 499 515 +16 sendMTFValues 2085 2094 +9 mainSort 1116 1119 +3 generateMTFValues 357 356 -1 fallbackSort 1719 1705 -14 mainQSort3 1163 1141 -22 BZ2_blockSort 118 85 -33 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 3/4 up/down: 28/-70) Total: -42 bytes Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
		| @@ -227,17 +227,19 @@ void fallbackQSort3(uint32_t* fmap, | ||||
| #define UNALIGNED_BH(zz)  ((zz) & 0x01f) | ||||
|  | ||||
| static | ||||
| void fallbackSort(uint32_t* fmap, | ||||
| 		uint32_t* eclass, | ||||
| 		uint32_t* bhtab, | ||||
| 		int32_t   nblock) | ||||
| void fallbackSort(EState* state) | ||||
| { | ||||
| 	int32_t ftab[257]; | ||||
| 	int32_t ftabCopy[256]; | ||||
| 	int32_t H, i, j, k, l, r, cc, cc1; | ||||
| 	int32_t nNotDone; | ||||
| 	int32_t nBhtab; | ||||
| 	uint8_t* eclass8 = (uint8_t*)eclass; | ||||
| 	/* params */ | ||||
| 	uint32_t *const fmap    = state->arr1; | ||||
| 	uint32_t *const eclass  = state->arr2; | ||||
| #define eclass8 ((uint8_t*)eclass) | ||||
| 	uint32_t *const bhtab   = state->ftab; | ||||
| 	const int32_t   nblock  = state->nblock; | ||||
|  | ||||
| 	/* | ||||
| 	 * Initial 1-char radix sort to generate | ||||
| @@ -349,6 +351,7 @@ void fallbackSort(uint32_t* fmap, | ||||
| 		eclass8[fmap[i]] = (uint8_t)j; | ||||
| 	} | ||||
| 	AssertH(j < 256, 1005); | ||||
| #undef eclass8 | ||||
| } | ||||
|  | ||||
| #undef       SET_BH | ||||
| @@ -367,18 +370,18 @@ void fallbackSort(uint32_t* fmap, | ||||
| /*---------------------------------------------*/ | ||||
| static | ||||
| NOINLINE | ||||
| int mainGtU( | ||||
| int mainGtU(EState* state, | ||||
| 		uint32_t  i1, | ||||
| 		uint32_t  i2, | ||||
| 		uint8_t*  block, | ||||
| 		uint16_t* quadrant, | ||||
| 		uint32_t  nblock, | ||||
| 		int32_t*  budget) | ||||
| 		uint32_t  i2) | ||||
| { | ||||
| 	int32_t  k; | ||||
| 	uint8_t  c1, c2; | ||||
| 	uint16_t s1, s2; | ||||
|  | ||||
| 	uint8_t  *const block    = state->block; | ||||
| 	uint16_t *const quadrant = state->quadrant; | ||||
| 	const int32_t   nblock   = state->nblock; | ||||
|  | ||||
| /* Loop unrolling here is actually very useful | ||||
|  * (generated code is much simpler), | ||||
|  * code size increase is only 270 bytes (i386) | ||||
| @@ -435,7 +438,7 @@ int mainGtU( | ||||
| 		if (i1 >= nblock) i1 -= nblock; | ||||
| 		if (i2 >= nblock) i2 -= nblock; | ||||
|  | ||||
| 		(*budget)--; | ||||
| 		state->budget--; | ||||
| 		k -= 8; | ||||
| 	} while (k >= 0); | ||||
|  | ||||
| @@ -459,15 +462,13 @@ const uint32_t incs[14] = { | ||||
| }; | ||||
|  | ||||
| static | ||||
| void mainSimpleSort(uint32_t* ptr, | ||||
| 		uint8_t*  block, | ||||
| 		uint16_t* quadrant, | ||||
| 		int32_t   nblock, | ||||
| void mainSimpleSort(EState* state, | ||||
| 		int32_t   lo, | ||||
| 		int32_t   hi, | ||||
| 		int32_t   d, | ||||
| 		int32_t*  budget) | ||||
| 		int32_t   d) | ||||
| { | ||||
| 	uint32_t *const ptr = state->ptr; | ||||
|  | ||||
| 	/* At which increment to start? */ | ||||
| 	int hp = 0; | ||||
| 	{ | ||||
| @@ -492,7 +493,7 @@ void mainSimpleSort(uint32_t* ptr, | ||||
| 			if (i > hi) break; | ||||
| 			v = ptr[i]; | ||||
| 			j = i; | ||||
| 			while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) { | ||||
| 			while (mainGtU(state, ptr[j-h]+d, v+d)) { | ||||
| 				ptr[j] = ptr[j-h]; | ||||
| 				j = j - h; | ||||
| 				if (j <= (lo + h - 1)) break; | ||||
| @@ -506,7 +507,7 @@ void mainSimpleSort(uint32_t* ptr, | ||||
| 			if (i > hi) break; | ||||
| 			v = ptr[i]; | ||||
| 			j = i; | ||||
| 			while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) { | ||||
| 			while (mainGtU(state, ptr[j-h]+d, v+d)) { | ||||
| 				ptr[j] = ptr[j-h]; | ||||
| 				j = j - h; | ||||
| 				if (j <= (lo + h - 1)) break; | ||||
| @@ -517,7 +518,7 @@ void mainSimpleSort(uint32_t* ptr, | ||||
| 			if (i > hi) break; | ||||
| 			v = ptr[i]; | ||||
| 			j = i; | ||||
| 			while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) { | ||||
| 			while (mainGtU(state, ptr[j-h]+d, v+d)) { | ||||
| 				ptr[j] = ptr[j-h]; | ||||
| 				j = j - h; | ||||
| 				if (j <= (lo + h - 1)) break; | ||||
| @@ -525,7 +526,7 @@ void mainSimpleSort(uint32_t* ptr, | ||||
| 			ptr[j] = v; | ||||
| 			i++; | ||||
| #endif | ||||
| 			if (*budget < 0) return; | ||||
| 			if (state->budget < 0) return; | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
| @@ -590,14 +591,10 @@ uint8_t mmed3(uint8_t a, uint8_t b, uint8_t c) | ||||
| #define MAIN_QSORT_STACK_SIZE   100 | ||||
|  | ||||
| static NOINLINE | ||||
| void mainQSort3(uint32_t* ptr, | ||||
| 		uint8_t*  block, | ||||
| 		uint16_t* quadrant, | ||||
| 		int32_t   nblock, | ||||
| void mainQSort3(EState* state, | ||||
| 		int32_t   loSt, | ||||
| 		int32_t   hiSt, | ||||
| 		/*int32_t   dSt,*/ | ||||
| 		int32_t*  budget) | ||||
| 		int32_t   hiSt | ||||
| 		/*int32_t dSt*/) | ||||
| { | ||||
| 	enum { dSt = BZ_N_RADIX }; | ||||
| 	int32_t unLo, unHi, ltLo, gtHi, n, m, med; | ||||
| @@ -611,6 +608,9 @@ void mainQSort3(uint32_t* ptr, | ||||
| 	int32_t nextHi[3]; | ||||
| 	int32_t nextD [3]; | ||||
|  | ||||
| 	uint32_t *const ptr   = state->ptr; | ||||
| 	uint8_t  *const block = state->block; | ||||
|  | ||||
| 	sp = 0; | ||||
| 	mpush(loSt, hiSt, dSt); | ||||
|  | ||||
| @@ -621,8 +621,8 @@ void mainQSort3(uint32_t* ptr, | ||||
| 		if (hi - lo < MAIN_QSORT_SMALL_THRESH | ||||
| 		 || d > MAIN_QSORT_DEPTH_THRESH | ||||
| 		) { | ||||
| 			mainSimpleSort(ptr, block, quadrant, nblock, lo, hi, d, budget); | ||||
| 			if (*budget < 0) | ||||
| 			mainSimpleSort(state, lo, hi, d); | ||||
| 			if (state->budget < 0) | ||||
| 				return; | ||||
| 			continue; | ||||
| 		} | ||||
| @@ -726,13 +726,7 @@ void mainQSort3(uint32_t* ptr, | ||||
| #define CLEARMASK (~(SETMASK)) | ||||
|  | ||||
| static NOINLINE | ||||
| void mainSort(EState* state, | ||||
| 		uint32_t* ptr, | ||||
| 		uint8_t*  block, | ||||
| 		uint16_t* quadrant, | ||||
| 		uint32_t* ftab, | ||||
| 		int32_t   nblock, | ||||
| 		int32_t*  budget) | ||||
| void mainSort(EState* state) | ||||
| { | ||||
| 	int32_t  i, j; | ||||
| 	Bool     bigDone[256]; | ||||
| @@ -745,6 +739,12 @@ void mainSort(EState* state, | ||||
| #define copyStart    (state->mainSort__copyStart) | ||||
| #define copyEnd      (state->mainSort__copyEnd) | ||||
|  | ||||
| 	uint32_t *const ptr      = state->ptr; | ||||
| 	uint8_t  *const block    = state->block; | ||||
| 	uint32_t *const ftab     = state->ftab; | ||||
| 	const int32_t   nblock   = state->nblock; | ||||
| 	uint16_t *const quadrant = state->quadrant; | ||||
|  | ||||
| 	/*-- set up the 2-byte frequency table --*/ | ||||
| 	/* was: for (i = 65536; i >= 0; i--) ftab[i] = 0; */ | ||||
| 	memset(ftab, 0, 65537 * sizeof(ftab[0])); | ||||
| @@ -883,11 +883,8 @@ void mainSort(EState* state, | ||||
| 					int32_t lo =  ftab[sb] /*& CLEARMASK (redundant)*/; | ||||
| 					int32_t hi = (ftab[sb+1] & CLEARMASK) - 1; | ||||
| 					if (hi > lo) { | ||||
| 						mainQSort3( | ||||
| 							ptr, block, quadrant, nblock, | ||||
| 							lo, hi, /*BZ_N_RADIX,*/ budget | ||||
| 						); | ||||
| 						if (*budget < 0) return; | ||||
| 						mainQSort3(state, lo, hi /*,BZ_N_RADIX*/); | ||||
| 						if (state->budget < 0) return; | ||||
| 					} | ||||
| 				} | ||||
| 				ftab[sb] |= SETMASK; | ||||
| @@ -1025,31 +1022,25 @@ void mainSort(EState* state, | ||||
|  *	arr1[0 .. nblock-1] holds sorted order | ||||
|  */ | ||||
| static NOINLINE | ||||
| void BZ2_blockSort(EState* s) | ||||
| void BZ2_blockSort(EState* state) | ||||
| { | ||||
| 	/* In original bzip2 1.0.4, it's a parameter, but 30 | ||||
| 	 * (which was the default) should work ok. */ | ||||
| 	enum { wfact = 30 }; | ||||
| 	unsigned i; | ||||
|  | ||||
| 	uint32_t* ptr    = s->ptr; | ||||
| 	uint8_t*  block  = s->block; | ||||
| 	uint32_t* ftab   = s->ftab; | ||||
| 	int32_t   nblock = s->nblock; | ||||
| 	uint16_t* quadrant; | ||||
| 	int32_t   budget; | ||||
| 	int32_t   i; | ||||
|  | ||||
| 	if (nblock < 10000) { | ||||
| 		fallbackSort(s->arr1, s->arr2, ftab, nblock); | ||||
| 	if (state->nblock < 10000) { | ||||
| 		fallbackSort(state); | ||||
| 	} else { | ||||
| 		/* Calculate the location for quadrant, remembering to get | ||||
| 		 * the alignment right.  Assumes that &(block[0]) is at least | ||||
| 		 * 2-byte aligned -- this should be ok since block is really | ||||
| 		 * the first section of arr2. | ||||
| 		 */ | ||||
| 		i = nblock + BZ_N_OVERSHOOT; | ||||
| 		if (i & 1) i++; | ||||
| 		quadrant = (uint16_t*)(&(block[i])); | ||||
| 		i = state->nblock + BZ_N_OVERSHOOT; | ||||
| 		if (i & 1) | ||||
| 			i++; | ||||
| 		state->quadrant = (uint16_t*) &(state->block[i]); | ||||
|  | ||||
| 		/* (wfact-1) / 3 puts the default-factor-30 | ||||
| 		 * transition point at very roughly the same place as | ||||
| @@ -1058,24 +1049,25 @@ void BZ2_blockSort(EState* s) | ||||
| 		 * resulting compressed stream is now the same regardless | ||||
| 		 * of whether or not we use the main sort or fallback sort. | ||||
| 		 */ | ||||
| 		budget = nblock * ((wfact-1) / 3); | ||||
| 		state->budget = state->nblock * ((wfact-1) / 3); | ||||
|  | ||||
| 		mainSort(s, ptr, block, quadrant, ftab, nblock, &budget); | ||||
| 		if (budget < 0) { | ||||
| 			fallbackSort(s->arr1, s->arr2, ftab, nblock); | ||||
| 		mainSort(state); | ||||
| 		if (state->budget < 0) { | ||||
| 			fallbackSort(state); | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| #if BZ_LIGHT_DEBUG | ||||
| 	s->origPtr = -1; | ||||
| 	state->origPtr = -1; | ||||
| #endif | ||||
| 	for (i = 0; i < s->nblock; i++) | ||||
| 		if (ptr[i] == 0) { | ||||
| 			s->origPtr = i; | ||||
| 	for (i = 0; i < state->nblock; i++) { | ||||
| 		if (state->ptr[i] == 0) { | ||||
| 			state->origPtr = i; | ||||
| 			break; | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	AssertH(s->origPtr != -1, 1003); | ||||
| 	AssertH(state->origPtr != -1, 1003); | ||||
| } | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -87,7 +87,7 @@ int isempty_RL(EState* s) | ||||
| static | ||||
| void BZ2_bzCompressInit(bz_stream *strm, int blockSize100k) | ||||
| { | ||||
| 	int32_t n; | ||||
| 	unsigned n; | ||||
| 	EState* s; | ||||
|  | ||||
| 	s = xzalloc(sizeof(EState)); | ||||
|   | ||||
| @@ -121,6 +121,7 @@ typedef struct EState { | ||||
| 	/* mode this stream is in, and whether inputting */ | ||||
| 	/* or outputting data */ | ||||
| 	int32_t  mode; | ||||
| //both smallint? | ||||
| 	int32_t  state; | ||||
|  | ||||
| 	/* remembers avail_in when flush/finish requested */ | ||||
| @@ -134,6 +135,9 @@ typedef struct EState { | ||||
| 	uint32_t *arr2; | ||||
| 	uint32_t *ftab; | ||||
|  | ||||
| 	uint16_t* quadrant; | ||||
| 	int32_t  budget; | ||||
|  | ||||
| 	/* aliases for arr1 and arr2 */ | ||||
| 	uint32_t *ptr; | ||||
| 	uint8_t  *block; | ||||
| @@ -142,6 +146,7 @@ typedef struct EState { | ||||
|  | ||||
| 	/* guess what */ | ||||
| 	uint32_t *crc32table; | ||||
| //move down | ||||
|  | ||||
| 	/* run-length-encoding of the input */ | ||||
| 	uint32_t state_in_ch; | ||||
| @@ -165,6 +170,7 @@ typedef struct EState { | ||||
| 	/* misc administratium */ | ||||
| 	int32_t  blockNo; | ||||
| 	int32_t  blockSize100k; | ||||
| //smallint? | ||||
|  | ||||
| 	/* stuff for coding the MTF values */ | ||||
| 	int32_t  nMTF; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user