From d07469a5fad47569984c70b22be5c89af8714f2f Mon Sep 17 00:00:00 2001 From: "Nicholas J. Kain" Date: Sat, 24 Oct 2020 17:14:20 -0400 Subject: [PATCH] Optimize internet checksum to use 16-bit fetches. We could use 32-bit fetches with the same technique on 64-bit architectures, or SIMD could be used to do very fast 128-bit fetches, but this isn't a performance bottleneck and this method is very simple and relatively fast. --- src/nk/net_checksum.h | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/src/nk/net_checksum.h b/src/nk/net_checksum.h index 46dc55f..e983ef6 100644 --- a/src/nk/net_checksum.h +++ b/src/nk/net_checksum.h @@ -1,6 +1,8 @@ #ifndef NCMLIB_NET_CHECKSUM_H #define NCMLIB_NET_CHECKSUM_H +// RFC 1071 is still a good reference. + #include // When summing ones-complement 16-bit values using a 32-bit unsigned @@ -14,30 +16,28 @@ static inline uint16_t net_checksum161c_foldcarry(uint32_t v) return v; } -// This function is not suitable for summing buffers that are greater than -// 128k bytes in length: failure case will be incorrect checksums via -// unsigned overflow, which is a defined operation and is safe. This limit -// should not be an issue for IPv4 or IPv6 packet, which are limited to -// at most 64k bytes. +// Produces the correct result on little endian in the sense that +// the binary value returned, when stored to memory, will match +// the result on big endian; if the numeric value returned +// must match big endian results, then call ntohs() on the result. static uint16_t net_checksum161c(const void *buf, size_t size) { - uint32_t sum = 0; - int odd = size & 0x01; - size_t i; - size &= ~((size_t)0x01); - size >>= 1; - const uint8_t *b = (const uint8_t *)buf; - for (i = 0; i < size; ++i) { - uint16_t hi = b[i*2]; - uint16_t lo = b[i*2+1]; - sum += ntohs((lo + (hi << 8))); + const char *b = (const char *)buf; + const char *bend = b + size; + uint32_t sum = 0, sumo = 0; + if (size & 1) { + --bend; + uint8_t z[2] = { (uint8_t)*bend, 0 }; + uint16_t t; + memcpy(&t, z, 2); + sumo = t; } - if (odd) { - uint16_t hi = b[i*2]; - uint16_t lo = 0; - sum += ntohs((lo + (hi << 8))); + for (; b != bend; b += 2) { + uint16_t t; + memcpy(&t, b, 2); + sum += t; } - return ~net_checksum161c_foldcarry(sum); + return ~net_checksum161c_foldcarry(sum + sumo); } // For two sequences of bytes A and B that return checksums CS(A) and CS(B),