Optimize internet checksum to use 32-bit fetches.

It's actually not very hard to do everything with 32-bit fetches
and 32-bit registers, aside from the necessary bit fetches for
tails.
This commit is contained in:
Nicholas J. Kain 2020-11-01 00:52:38 -04:00
parent ff8b910222
commit 1e60b4b8e7

View File

@ -24,20 +24,23 @@ static uint16_t net_checksum16(const void *buf, size_t size)
{ {
const char *b = (const char *)buf; const char *b = (const char *)buf;
const char *bend = b + size; const char *bend = b + size;
uint32_t sum = 0, sumo = 0; uint32_t sum = 0, t = 0;
if (size & 1) { uint8_t z[4] = { 0 };
--bend; switch (size & 3) {
uint8_t z[2] = { (uint8_t)*bend, 0 }; case 3: z[2] = (uint8_t)*--bend;
uint16_t t; case 2: z[1] = (uint8_t)*--bend;
memcpy(&t, z, 2); case 1: z[0] = (uint8_t)*--bend;
sumo = t; default: break;
} }
for (; b != bend; b += 2) { memcpy(&t, z, 4);
uint16_t t; sum += t & 0xffffu;
memcpy(&t, b, 2); sum += (t >> 16);
sum += t; for (; b < bend; b += 4) {
memcpy(&t, b, 4);
sum += t & 0xffffu;
sum += (t >> 16);
} }
return ~net_checksum16_foldcarry(sum + sumo); return ~net_checksum16_foldcarry(sum);
} }
// For two sequences of bytes A and B that return checksums CS(A) and CS(B), // For two sequences of bytes A and B that return checksums CS(A) and CS(B),