Optimize internet checksum to use 32-bit fetches.
It's actually not very hard to do everything with 32-bit fetches and 32-bit registers, aside from the necessary bit fetches for tails.
This commit is contained in:
parent
ff8b910222
commit
1e60b4b8e7
@ -24,20 +24,23 @@ static uint16_t net_checksum16(const void *buf, size_t size)
|
|||||||
{
|
{
|
||||||
const char *b = (const char *)buf;
|
const char *b = (const char *)buf;
|
||||||
const char *bend = b + size;
|
const char *bend = b + size;
|
||||||
uint32_t sum = 0, sumo = 0;
|
uint32_t sum = 0, t = 0;
|
||||||
if (size & 1) {
|
uint8_t z[4] = { 0 };
|
||||||
--bend;
|
switch (size & 3) {
|
||||||
uint8_t z[2] = { (uint8_t)*bend, 0 };
|
case 3: z[2] = (uint8_t)*--bend;
|
||||||
uint16_t t;
|
case 2: z[1] = (uint8_t)*--bend;
|
||||||
memcpy(&t, z, 2);
|
case 1: z[0] = (uint8_t)*--bend;
|
||||||
sumo = t;
|
default: break;
|
||||||
}
|
}
|
||||||
for (; b != bend; b += 2) {
|
memcpy(&t, z, 4);
|
||||||
uint16_t t;
|
sum += t & 0xffffu;
|
||||||
memcpy(&t, b, 2);
|
sum += (t >> 16);
|
||||||
sum += t;
|
for (; b < bend; b += 4) {
|
||||||
|
memcpy(&t, b, 4);
|
||||||
|
sum += t & 0xffffu;
|
||||||
|
sum += (t >> 16);
|
||||||
}
|
}
|
||||||
return ~net_checksum16_foldcarry(sum + sumo);
|
return ~net_checksum16_foldcarry(sum);
|
||||||
}
|
}
|
||||||
|
|
||||||
// For two sequences of bytes A and B that return checksums CS(A) and CS(B),
|
// For two sequences of bytes A and B that return checksums CS(A) and CS(B),
|
||||||
|
Loading…
Reference in New Issue
Block a user