Optimize internet checksum to use 16-bit fetches.

We could use 32-bit fetches with the same technique on 64-bit
architectures, or SIMD could be used to do very fast 128-bit
fetches, but this isn't a performance bottleneck and this
method is very simple and relatively fast.
This commit is contained in:
Nicholas J. Kain 2020-10-24 17:14:20 -04:00
parent 5fdf3bd83e
commit d07469a5fa

View File

@ -1,6 +1,8 @@
#ifndef NCMLIB_NET_CHECKSUM_H #ifndef NCMLIB_NET_CHECKSUM_H
#define NCMLIB_NET_CHECKSUM_H #define NCMLIB_NET_CHECKSUM_H
// RFC 1071 is still a good reference.
#include <stdint.h> #include <stdint.h>
// When summing ones-complement 16-bit values using a 32-bit unsigned // When summing ones-complement 16-bit values using a 32-bit unsigned
@ -14,30 +16,28 @@ static inline uint16_t net_checksum161c_foldcarry(uint32_t v)
return v; return v;
} }
// This function is not suitable for summing buffers that are greater than // Produces the correct result on little endian in the sense that
// 128k bytes in length: failure case will be incorrect checksums via // the binary value returned, when stored to memory, will match
// unsigned overflow, which is a defined operation and is safe. This limit // the result on big endian; if the numeric value returned
// should not be an issue for IPv4 or IPv6 packet, which are limited to // must match big endian results, then call ntohs() on the result.
// at most 64k bytes.
static uint16_t net_checksum161c(const void *buf, size_t size) static uint16_t net_checksum161c(const void *buf, size_t size)
{ {
uint32_t sum = 0; const char *b = (const char *)buf;
int odd = size & 0x01; const char *bend = b + size;
size_t i; uint32_t sum = 0, sumo = 0;
size &= ~((size_t)0x01); if (size & 1) {
size >>= 1; --bend;
const uint8_t *b = (const uint8_t *)buf; uint8_t z[2] = { (uint8_t)*bend, 0 };
for (i = 0; i < size; ++i) { uint16_t t;
uint16_t hi = b[i*2]; memcpy(&t, z, 2);
uint16_t lo = b[i*2+1]; sumo = t;
sum += ntohs((lo + (hi << 8)));
} }
if (odd) { for (; b != bend; b += 2) {
uint16_t hi = b[i*2]; uint16_t t;
uint16_t lo = 0; memcpy(&t, b, 2);
sum += ntohs((lo + (hi << 8))); sum += t;
} }
return ~net_checksum161c_foldcarry(sum); return ~net_checksum161c_foldcarry(sum + sumo);
} }
// For two sequences of bytes A and B that return checksums CS(A) and CS(B), // For two sequences of bytes A and B that return checksums CS(A) and CS(B),