tls: speed up xor'ing of aligned 16-byte buffers
function old new delta xorbuf_aligned_AES_BLOCK_SIZE - 23 +23 xwrite_encrypted 585 580 -5 aesgcm_GHASH 233 228 -5 GMULT 192 187 -5 ------------------------------------------------------------------------------ (add/remove: 1/0 grow/shrink: 0/3 up/down: 23/-15) Total: 8 bytes Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
parent
941440cf16
commit
03569bc50f
@ -357,6 +357,20 @@ void FAST_FUNC xorbuf(void *dst, const void *src, unsigned count)
|
|||||||
xorbuf3(dst, dst, src, count);
|
xorbuf3(dst, dst, src, count);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void FAST_FUNC xorbuf_aligned_AES_BLOCK_SIZE(void *dst, const void *src)
|
||||||
|
{
|
||||||
|
unsigned long *d = dst;
|
||||||
|
const unsigned long *s = src;
|
||||||
|
d[0] ^= s[0];
|
||||||
|
#if ULONG_MAX <= 0xffffffffffffffff
|
||||||
|
d[1] ^= s[1];
|
||||||
|
#if ULONG_MAX == 0xffffffff
|
||||||
|
d[2] ^= s[2];
|
||||||
|
d[3] ^= s[3];
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
/* Nondestructively see the current hash value */
|
/* Nondestructively see the current hash value */
|
||||||
static unsigned sha_peek(md5sha_ctx_t *ctx, void *buffer)
|
static unsigned sha_peek(md5sha_ctx_t *ctx, void *buffer)
|
||||||
{
|
{
|
||||||
@ -802,10 +816,10 @@ static void xwrite_encrypted_aesgcm(tls_state_t *tls, unsigned size, unsigned ty
|
|||||||
{
|
{
|
||||||
#define COUNTER(v) (*(uint32_t*)(v + 12))
|
#define COUNTER(v) (*(uint32_t*)(v + 12))
|
||||||
|
|
||||||
uint8_t aad[13 + 3] ALIGNED(4); /* +3 creates [16] buffer, simplifying GHASH() */
|
uint8_t aad[13 + 3] ALIGNED_long; /* +3 creates [16] buffer, simplifying GHASH() */
|
||||||
uint8_t nonce[12 + 4] ALIGNED(4); /* +4 creates space for AES block counter */
|
uint8_t nonce[12 + 4] ALIGNED_long; /* +4 creates space for AES block counter */
|
||||||
uint8_t scratch[AES_BLOCK_SIZE] ALIGNED(4); //[16]
|
uint8_t scratch[AES_BLOCK_SIZE] ALIGNED_long; //[16]
|
||||||
uint8_t authtag[AES_BLOCK_SIZE] ALIGNED(4); //[16]
|
uint8_t authtag[AES_BLOCK_SIZE] ALIGNED_long; //[16]
|
||||||
uint8_t *buf;
|
uint8_t *buf;
|
||||||
struct record_hdr *xhdr;
|
struct record_hdr *xhdr;
|
||||||
unsigned remaining;
|
unsigned remaining;
|
||||||
@ -850,7 +864,7 @@ static void xwrite_encrypted_aesgcm(tls_state_t *tls, unsigned size, unsigned ty
|
|||||||
aesgcm_GHASH(tls->H, aad, /*sizeof(aad),*/ tls->outbuf + OUTBUF_PFX, size, authtag /*, sizeof(authtag)*/);
|
aesgcm_GHASH(tls->H, aad, /*sizeof(aad),*/ tls->outbuf + OUTBUF_PFX, size, authtag /*, sizeof(authtag)*/);
|
||||||
COUNTER(nonce) = htonl(1);
|
COUNTER(nonce) = htonl(1);
|
||||||
aes_encrypt_one_block(&tls->aes_encrypt, nonce, scratch);
|
aes_encrypt_one_block(&tls->aes_encrypt, nonce, scratch);
|
||||||
xorbuf(authtag, scratch, sizeof(authtag));
|
xorbuf_aligned_AES_BLOCK_SIZE(authtag, scratch);
|
||||||
|
|
||||||
memcpy(buf, authtag, sizeof(authtag));
|
memcpy(buf, authtag, sizeof(authtag));
|
||||||
#undef COUNTER
|
#undef COUNTER
|
||||||
@ -938,10 +952,10 @@ static void tls_aesgcm_decrypt(tls_state_t *tls, uint8_t *buf, int size)
|
|||||||
{
|
{
|
||||||
#define COUNTER(v) (*(uint32_t*)(v + 12))
|
#define COUNTER(v) (*(uint32_t*)(v + 12))
|
||||||
|
|
||||||
//uint8_t aad[13 + 3] ALIGNED(4); /* +3 creates [16] buffer, simplifying GHASH() */
|
//uint8_t aad[13 + 3] ALIGNED_long; /* +3 creates [16] buffer, simplifying GHASH() */
|
||||||
uint8_t nonce[12 + 4] ALIGNED(4); /* +4 creates space for AES block counter */
|
uint8_t nonce[12 + 4] ALIGNED_long; /* +4 creates space for AES block counter */
|
||||||
uint8_t scratch[AES_BLOCK_SIZE] ALIGNED(4); //[16]
|
uint8_t scratch[AES_BLOCK_SIZE] ALIGNED_long; //[16]
|
||||||
//uint8_t authtag[AES_BLOCK_SIZE] ALIGNED(4); //[16]
|
//uint8_t authtag[AES_BLOCK_SIZE] ALIGNED_long; //[16]
|
||||||
unsigned remaining;
|
unsigned remaining;
|
||||||
unsigned cnt;
|
unsigned cnt;
|
||||||
|
|
||||||
@ -973,7 +987,7 @@ static void tls_aesgcm_decrypt(tls_state_t *tls, uint8_t *buf, int size)
|
|||||||
//aesgcm_GHASH(tls->H, aad, tls->inbuf + RECHDR_LEN, size, authtag);
|
//aesgcm_GHASH(tls->H, aad, tls->inbuf + RECHDR_LEN, size, authtag);
|
||||||
//COUNTER(nonce) = htonl(1);
|
//COUNTER(nonce) = htonl(1);
|
||||||
//aes_encrypt_one_block(&tls->aes_encrypt, nonce, scratch);
|
//aes_encrypt_one_block(&tls->aes_encrypt, nonce, scratch);
|
||||||
//xorbuf(authtag, scratch, sizeof(authtag));
|
//xorbuf_aligned_AES_BLOCK_SIZE(authtag, scratch);
|
||||||
|
|
||||||
//memcmp(buf, authtag, sizeof(authtag)) || DIE("HASH DOES NOT MATCH!");
|
//memcmp(buf, authtag, sizeof(authtag)) || DIE("HASH DOES NOT MATCH!");
|
||||||
#undef COUNTER
|
#undef COUNTER
|
||||||
|
@ -81,8 +81,12 @@ typedef int16_t int16;
|
|||||||
#define AES_BLOCK_SIZE 16
|
#define AES_BLOCK_SIZE 16
|
||||||
|
|
||||||
void tls_get_random(void *buf, unsigned len) FAST_FUNC;
|
void tls_get_random(void *buf, unsigned len) FAST_FUNC;
|
||||||
|
|
||||||
void xorbuf(void* buf, const void* mask, unsigned count) FAST_FUNC;
|
void xorbuf(void* buf, const void* mask, unsigned count) FAST_FUNC;
|
||||||
|
|
||||||
|
#define ALIGNED_long ALIGNED(sizeof(long))
|
||||||
|
void xorbuf_aligned_AES_BLOCK_SIZE(void* buf, const void* mask) FAST_FUNC;
|
||||||
|
|
||||||
#define matrixCryptoGetPrngData(buf, len, userPtr) (tls_get_random(buf, len), PS_SUCCESS)
|
#define matrixCryptoGetPrngData(buf, len, userPtr) (tls_get_random(buf, len), PS_SUCCESS)
|
||||||
|
|
||||||
#define psFree(p, pool) free(p)
|
#define psFree(p, pool) free(p)
|
||||||
|
@ -50,8 +50,8 @@ static void RIGHTSHIFTX(byte* x)
|
|||||||
|
|
||||||
static void GMULT(byte* X, byte* Y)
|
static void GMULT(byte* X, byte* Y)
|
||||||
{
|
{
|
||||||
byte Z[AES_BLOCK_SIZE];
|
byte Z[AES_BLOCK_SIZE] ALIGNED_long;
|
||||||
byte V[AES_BLOCK_SIZE];
|
byte V[AES_BLOCK_SIZE] ALIGNED_long;
|
||||||
int i, j;
|
int i, j;
|
||||||
|
|
||||||
XMEMSET(Z, 0, AES_BLOCK_SIZE);
|
XMEMSET(Z, 0, AES_BLOCK_SIZE);
|
||||||
@ -62,7 +62,7 @@ static void GMULT(byte* X, byte* Y)
|
|||||||
for (j = 0; j < 8; j++)
|
for (j = 0; j < 8; j++)
|
||||||
{
|
{
|
||||||
if (y & 0x80) {
|
if (y & 0x80) {
|
||||||
xorbuf(Z, V, AES_BLOCK_SIZE);
|
xorbuf_aligned_AES_BLOCK_SIZE(Z, V);
|
||||||
}
|
}
|
||||||
|
|
||||||
RIGHTSHIFTX(V);
|
RIGHTSHIFTX(V);
|
||||||
@ -86,8 +86,8 @@ void FAST_FUNC aesgcm_GHASH(byte* h,
|
|||||||
byte* s //, unsigned sSz
|
byte* s //, unsigned sSz
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
byte x[AES_BLOCK_SIZE] ALIGNED(4);
|
byte x[AES_BLOCK_SIZE] ALIGNED_long;
|
||||||
byte scratch[AES_BLOCK_SIZE] ALIGNED(4);
|
byte scratch[AES_BLOCK_SIZE] ALIGNED_long;
|
||||||
word32 blocks, partial;
|
word32 blocks, partial;
|
||||||
//was: byte* h = aes->H;
|
//was: byte* h = aes->H;
|
||||||
|
|
||||||
@ -116,6 +116,7 @@ void FAST_FUNC aesgcm_GHASH(byte* h,
|
|||||||
blocks = cSz / AES_BLOCK_SIZE;
|
blocks = cSz / AES_BLOCK_SIZE;
|
||||||
partial = cSz % AES_BLOCK_SIZE;
|
partial = cSz % AES_BLOCK_SIZE;
|
||||||
while (blocks--) {
|
while (blocks--) {
|
||||||
|
//xorbuf_aligned_AES_BLOCK_SIZE(x, c); - c is not guaranteed to be aligned
|
||||||
xorbuf(x, c, AES_BLOCK_SIZE);
|
xorbuf(x, c, AES_BLOCK_SIZE);
|
||||||
GMULT(x, h);
|
GMULT(x, h);
|
||||||
c += AES_BLOCK_SIZE;
|
c += AES_BLOCK_SIZE;
|
||||||
@ -124,7 +125,7 @@ void FAST_FUNC aesgcm_GHASH(byte* h,
|
|||||||
//XMEMSET(scratch, 0, AES_BLOCK_SIZE);
|
//XMEMSET(scratch, 0, AES_BLOCK_SIZE);
|
||||||
//XMEMCPY(scratch, c, partial);
|
//XMEMCPY(scratch, c, partial);
|
||||||
//xorbuf(x, scratch, AES_BLOCK_SIZE);
|
//xorbuf(x, scratch, AES_BLOCK_SIZE);
|
||||||
xorbuf(x, c, partial);
|
xorbuf(x, c, partial);//same result as above
|
||||||
GMULT(x, h);
|
GMULT(x, h);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -132,7 +133,7 @@ void FAST_FUNC aesgcm_GHASH(byte* h,
|
|||||||
/* Hash in the lengths of A and C in bits */
|
/* Hash in the lengths of A and C in bits */
|
||||||
FlattenSzInBits(&scratch[0], aSz);
|
FlattenSzInBits(&scratch[0], aSz);
|
||||||
FlattenSzInBits(&scratch[8], cSz);
|
FlattenSzInBits(&scratch[8], cSz);
|
||||||
xorbuf(x, scratch, AES_BLOCK_SIZE);
|
xorbuf_aligned_AES_BLOCK_SIZE(x, scratch);
|
||||||
GMULT(x, h);
|
GMULT(x, h);
|
||||||
|
|
||||||
/* Copy the result into s. */
|
/* Copy the result into s. */
|
||||||
|
Loading…
x
Reference in New Issue
Block a user