sha3: code shrink (and speedup for SHA3_SMALL=0)
function old new delta sha3_process_block72 1454 1359 -95 Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
parent
6c6d37ee45
commit
4ff933c0e7
@ -1141,22 +1141,30 @@ static void sha3_process_block72(uint64_t *state)
|
|||||||
#undef RhoPi
|
#undef RhoPi
|
||||||
}
|
}
|
||||||
/* Chi */
|
/* Chi */
|
||||||
for (x = 0; x <= 20; x += 5) {
|
for (x = 0; x <= 40;) {
|
||||||
/*
|
uint32_t BC0, BC1, BC2, BC3, BC4;
|
||||||
* Can write this in terms of uint32 too,
|
BC0 = s32[x + 0*2];
|
||||||
* but why? compiler does it automatically.
|
BC1 = s32[x + 1*2];
|
||||||
*/
|
BC2 = s32[x + 2*2];
|
||||||
uint64_t BC0, BC1, BC2, BC3, BC4;
|
s32[x + 0*2] = BC0 ^ ((~BC1) & BC2);
|
||||||
BC0 = state[x + 0];
|
BC3 = s32[x + 3*2];
|
||||||
BC1 = state[x + 1];
|
s32[x + 1*2] = BC1 ^ ((~BC2) & BC3);
|
||||||
BC2 = state[x + 2];
|
BC4 = s32[x + 4*2];
|
||||||
state[x + 0] = BC0 ^ ((~BC1) & BC2);
|
s32[x + 2*2] = BC2 ^ ((~BC3) & BC4);
|
||||||
BC3 = state[x + 3];
|
s32[x + 3*2] = BC3 ^ ((~BC4) & BC0);
|
||||||
state[x + 1] = BC1 ^ ((~BC2) & BC3);
|
s32[x + 4*2] = BC4 ^ ((~BC0) & BC1);
|
||||||
BC4 = state[x + 4];
|
x++;
|
||||||
state[x + 2] = BC2 ^ ((~BC3) & BC4);
|
BC0 = s32[x + 0*2];
|
||||||
state[x + 3] = BC3 ^ ((~BC4) & BC0);
|
BC1 = s32[x + 1*2];
|
||||||
state[x + 4] = BC4 ^ ((~BC0) & BC1);
|
BC2 = s32[x + 2*2];
|
||||||
|
s32[x + 0*2] = BC0 ^ ((~BC1) & BC2);
|
||||||
|
BC3 = s32[x + 3*2];
|
||||||
|
s32[x + 1*2] = BC1 ^ ((~BC2) & BC3);
|
||||||
|
BC4 = s32[x + 4*2];
|
||||||
|
s32[x + 2*2] = BC2 ^ ((~BC3) & BC4);
|
||||||
|
s32[x + 3*2] = BC3 ^ ((~BC4) & BC0);
|
||||||
|
s32[x + 4*2] = BC4 ^ ((~BC0) & BC1);
|
||||||
|
x += 9;
|
||||||
}
|
}
|
||||||
/* Iota */
|
/* Iota */
|
||||||
s32[0] ^= IOTA_CONST_0bits & 1;
|
s32[0] ^= IOTA_CONST_0bits & 1;
|
||||||
@ -1275,6 +1283,7 @@ static void sha3_process_block72(uint64_t *state)
|
|||||||
#undef RhoPi_twice
|
#undef RhoPi_twice
|
||||||
}
|
}
|
||||||
/* Chi */
|
/* Chi */
|
||||||
|
#if LONG_MAX > 0x7fffffff
|
||||||
for (x = 0; x <= 20; x += 5) {
|
for (x = 0; x <= 20; x += 5) {
|
||||||
uint64_t BC0, BC1, BC2, BC3, BC4;
|
uint64_t BC0, BC1, BC2, BC3, BC4;
|
||||||
BC0 = state[x + 0];
|
BC0 = state[x + 0];
|
||||||
@ -1288,6 +1297,47 @@ static void sha3_process_block72(uint64_t *state)
|
|||||||
state[x + 3] = BC3 ^ ((~BC4) & BC0);
|
state[x + 3] = BC3 ^ ((~BC4) & BC0);
|
||||||
state[x + 4] = BC4 ^ ((~BC0) & BC1);
|
state[x + 4] = BC4 ^ ((~BC0) & BC1);
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
/* Reduced register pressure version
|
||||||
|
* for register-starved 32-bit arches
|
||||||
|
* (i386: -95 bytes, and it is _faster_)
|
||||||
|
*/
|
||||||
|
for (x = 0; x <= 40;) {
|
||||||
|
uint32_t BC0, BC1, BC2, BC3, BC4;
|
||||||
|
uint32_t *const s32 = (uint32_t*)state;
|
||||||
|
# if SHA3_SMALL
|
||||||
|
do_half:
|
||||||
|
#endif
|
||||||
|
BC0 = s32[x + 0*2];
|
||||||
|
BC1 = s32[x + 1*2];
|
||||||
|
BC2 = s32[x + 2*2];
|
||||||
|
s32[x + 0*2] = BC0 ^ ((~BC1) & BC2);
|
||||||
|
BC3 = s32[x + 3*2];
|
||||||
|
s32[x + 1*2] = BC1 ^ ((~BC2) & BC3);
|
||||||
|
BC4 = s32[x + 4*2];
|
||||||
|
s32[x + 2*2] = BC2 ^ ((~BC3) & BC4);
|
||||||
|
s32[x + 3*2] = BC3 ^ ((~BC4) & BC0);
|
||||||
|
s32[x + 4*2] = BC4 ^ ((~BC0) & BC1);
|
||||||
|
x++;
|
||||||
|
# if SHA3_SMALL
|
||||||
|
if (x & 1)
|
||||||
|
goto do_half;
|
||||||
|
x += 8;
|
||||||
|
# else
|
||||||
|
BC0 = s32[x + 0*2];
|
||||||
|
BC1 = s32[x + 1*2];
|
||||||
|
BC2 = s32[x + 2*2];
|
||||||
|
s32[x + 0*2] = BC0 ^ ((~BC1) & BC2);
|
||||||
|
BC3 = s32[x + 3*2];
|
||||||
|
s32[x + 1*2] = BC1 ^ ((~BC2) & BC3);
|
||||||
|
BC4 = s32[x + 4*2];
|
||||||
|
s32[x + 2*2] = BC2 ^ ((~BC3) & BC4);
|
||||||
|
s32[x + 3*2] = BC3 ^ ((~BC4) & BC0);
|
||||||
|
s32[x + 4*2] = BC4 ^ ((~BC0) & BC1);
|
||||||
|
x += 9;
|
||||||
|
# endif
|
||||||
|
}
|
||||||
|
#endif
|
||||||
/* Iota */
|
/* Iota */
|
||||||
state[0] ^= IOTA_CONST[round]
|
state[0] ^= IOTA_CONST[round]
|
||||||
| (uint32_t)((IOTA_CONST_bit31 << round) & 0x80000000)
|
| (uint32_t)((IOTA_CONST_bit31 << round) & 0x80000000)
|
||||||
|
Loading…
Reference in New Issue
Block a user