Optimize internet checksum to use 32-bit fetches.
It's actually not very hard to do everything with 32-bit fetches and 32-bit registers, aside from the necessary bit fetches for tails.
This commit is contained in:
parent
ff8b910222
commit
1e60b4b8e7
@ -24,20 +24,23 @@ static uint16_t net_checksum16(const void *buf, size_t size)
|
||||
{
|
||||
const char *b = (const char *)buf;
|
||||
const char *bend = b + size;
|
||||
uint32_t sum = 0, sumo = 0;
|
||||
if (size & 1) {
|
||||
--bend;
|
||||
uint8_t z[2] = { (uint8_t)*bend, 0 };
|
||||
uint16_t t;
|
||||
memcpy(&t, z, 2);
|
||||
sumo = t;
|
||||
uint32_t sum = 0, t = 0;
|
||||
uint8_t z[4] = { 0 };
|
||||
switch (size & 3) {
|
||||
case 3: z[2] = (uint8_t)*--bend;
|
||||
case 2: z[1] = (uint8_t)*--bend;
|
||||
case 1: z[0] = (uint8_t)*--bend;
|
||||
default: break;
|
||||
}
|
||||
for (; b != bend; b += 2) {
|
||||
uint16_t t;
|
||||
memcpy(&t, b, 2);
|
||||
sum += t;
|
||||
memcpy(&t, z, 4);
|
||||
sum += t & 0xffffu;
|
||||
sum += (t >> 16);
|
||||
for (; b < bend; b += 4) {
|
||||
memcpy(&t, b, 4);
|
||||
sum += t & 0xffffu;
|
||||
sum += (t >> 16);
|
||||
}
|
||||
return ~net_checksum16_foldcarry(sum + sumo);
|
||||
return ~net_checksum16_foldcarry(sum);
|
||||
}
|
||||
|
||||
// For two sequences of bytes A and B that return checksums CS(A) and CS(B),
|
||||
|
Loading…
Reference in New Issue
Block a user