From 7b969bb2ada4d7757229fd735135f7720ef8008c Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Wed, 13 Jul 2022 16:11:17 +0200
Subject: [PATCH] tls: P256: remove NOP macro sp_256_norm_8()

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 networking/tls_sp_c32.c | 35 +++++------------------------------
 1 file changed, 5 insertions(+), 30 deletions(-)

diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c
index 292dda24e..a593c5c40 100644
--- a/networking/tls_sp_c32.c
+++ b/networking/tls_sp_c32.c
@@ -68,9 +68,6 @@ static const sp_digit p256_mod[8] ALIGNED(8) = {
 
 #define p256_mp_mod ((sp_digit)0x000001)
 
-/* Normalize the values in each word to 32 bits - NOP */
-#define sp_256_norm_8(a) ((void)0)
-
 /* Write r as big endian to byte array.
  * Fixed length number of bytes written: 32
  *
@@ -83,8 +80,6 @@ static void sp_256_to_bin_8(const sp_digit* rr, uint8_t* a)
 	int i;
 	const uint64_t* r = (void*)rr;
 
-	sp_256_norm_8(rr);
-
 	r += 4;
 	for (i = 0; i < 4; i++) {
 		r--;
@@ -97,8 +92,6 @@ static void sp_256_to_bin_8(const sp_digit* r, uint8_t* a)
 {
 	int i;
 
-	sp_256_norm_8(r);
-
 	r += 8;
 	for (i = 0; i < 8; i++) {
 		r--;
@@ -641,7 +634,6 @@ static void sp_256_div2_8(sp_digit* r /*, const sp_digit* m*/)
 	int carry = 0;
 	if (r[0] & 1)
 		carry = sp_256_add_8(r, r, m);
-	sp_256_norm_8(r);
 	sp_256_rshift1_8(r, carry);
 }
 
@@ -652,10 +644,8 @@ static void sp_256_mont_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b
 //	const sp_digit* m = p256_mod;
 
 	int carry = sp_256_add_8(r, a, b);
-	sp_256_norm_8(r);
 	if (carry) {
 		sp_256_sub_8_p256_mod(r);
-		sp_256_norm_8(r);
 	}
 }
 
@@ -667,10 +657,8 @@ static void sp_256_mont_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b
 
 	int borrow;
 	borrow = sp_256_sub_8(r, a, b);
-	sp_256_norm_8(r);
 	if (borrow) {
 		sp_256_add_8(r, r, m);
-		sp_256_norm_8(r);
 	}
 }
 
@@ -680,10 +668,8 @@ static void sp_256_mont_dbl_8(sp_digit* r, const sp_digit* a /*, const sp_digit*
 //	const sp_digit* m = p256_mod;
 
 	int carry = sp_256_add_8(r, a, a);
-	sp_256_norm_8(r);
 	if (carry)
 		sp_256_sub_8_p256_mod(r);
-	sp_256_norm_8(r);
 }
 
 /* Triple a Montgomery form number (r = a + a + a % m) */
@@ -692,16 +678,12 @@ static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a /*, const sp_digit*
 //	const sp_digit* m = p256_mod;
 
 	int carry = sp_256_add_8(r, a, a);
-	sp_256_norm_8(r);
 	if (carry) {
 		sp_256_sub_8_p256_mod(r);
-		sp_256_norm_8(r);
 	}
 	carry = sp_256_add_8(r, r, a);
-	sp_256_norm_8(r);
 	if (carry) {
 		sp_256_sub_8_p256_mod(r);
-		sp_256_norm_8(r);
 	}
 }
 
@@ -844,7 +826,6 @@ static void sp_512to256_mont_reduce_8(sp_digit* r, sp_digit* aa/*, const sp_digi
 	sp_512to256_mont_shift_8(r, aa);
 	if (carry != 0)
 		sp_256_sub_8_p256_mod(r);
-	sp_256_norm_8(r);
 }
 
 #else /* Generic 32-bit version */
@@ -1003,8 +984,6 @@ static int sp_256_mul_add_8(sp_digit* r /*, const sp_digit* a, sp_digit b*/)
  * [In our case, it is (p256_mp_mod * a[1]) << 32.]
  * And so on. Eventually T is divisible by R, and after division by R
  * the algorithm is in the same place as the usual Montgomery reduction.
- *
- * TODO: Can conditionally use 64-bit (if bit-little-endian arch) logic?
  */
 static void sp_512to256_mont_reduce_8(sp_digit* r, sp_digit* a/*, const sp_digit* m, sp_digit mp*/)
 {
@@ -1032,7 +1011,6 @@ static void sp_512to256_mont_reduce_8(sp_digit* r, sp_digit* a/*, const sp_digit
 		sp_512to256_mont_shift_8(r, a);
 		if (word16th != 0)
 			sp_256_sub_8_p256_mod(r);
-		sp_256_norm_8(r);
 	}
 	else { /* Same code for explicit mp == 1 (which is always the case for P256) */
 		sp_digit word16th = 0;
@@ -1052,7 +1030,6 @@ static void sp_512to256_mont_reduce_8(sp_digit* r, sp_digit* a/*, const sp_digit
 		sp_512to256_mont_shift_8(r, a);
 		if (word16th != 0)
 			sp_256_sub_8_p256_mod(r);
-		sp_256_norm_8(r);
 	}
 }
 #endif
@@ -1208,14 +1185,12 @@ static void sp_256_map_8(sp_point* r, sp_point* p)
 	/* Reduce x to less than modulus */
 	if (sp_256_cmp_8(r->x, p256_mod) >= 0)
 		sp_256_sub_8_p256_mod(r->x);
-	sp_256_norm_8(r->x);
 
 	/* y /= z^3 */
 	sp_256_mont_mul_and_reduce_8(r->y, p->y, t1 /*, p256_mod, p256_mp_mod*/);
 	/* Reduce y to less than modulus */
 	if (sp_256_cmp_8(r->y, p256_mod) >= 0)
 		sp_256_sub_8_p256_mod(r->y);
-	sp_256_norm_8(r->y);
 
 	memset(r->z, 0, sizeof(r->z));
 	r->z[0] = 1;
@@ -1300,7 +1275,6 @@ static NOINLINE void sp_256_proj_point_add_8(sp_point* r, sp_point* p, sp_point*
 
 	/* Check double */
 	sp_256_sub_8(t1, p256_mod, q->y);
-	sp_256_norm_8(t1);
 	if (sp_256_cmp_equal_8(p->x, q->x)
 	 && sp_256_cmp_equal_8(p->z, q->z)
 	 && (sp_256_cmp_equal_8(p->y, q->y) || sp_256_cmp_equal_8(p->y, t1))
@@ -1422,14 +1396,15 @@ static void sp_256_ecc_mulmod_8(sp_point* r, const sp_point* g, const sp_digit*
 static void sp_256_ecc_mulmod_base_8(sp_point* r, sp_digit* k /*, int map*/)
 {
 	/* Since this function is called only once, save space:
-	 * don't have "static const sp_point p256_base = {...}",
-	 * it would have more zeros than data.
+	 * don't have "static const sp_point p256_base = {...}".
 	 */
 	static const uint8_t p256_base_bin[] = {
 		/* x (big-endian) */
-		0x6b,0x17,0xd1,0xf2,0xe1,0x2c,0x42,0x47,0xf8,0xbc,0xe6,0xe5,0x63,0xa4,0x40,0xf2,0x77,0x03,0x7d,0x81,0x2d,0xeb,0x33,0xa0,0xf4,0xa1,0x39,0x45,0xd8,0x98,0xc2,0x96,
+		0x6b,0x17,0xd1,0xf2,0xe1,0x2c,0x42,0x47,0xf8,0xbc,0xe6,0xe5,0x63,0xa4,0x40,0xf2,
+		0x77,0x03,0x7d,0x81,0x2d,0xeb,0x33,0xa0,0xf4,0xa1,0x39,0x45,0xd8,0x98,0xc2,0x96,
 		/* y */
-		0x4f,0xe3,0x42,0xe2,0xfe,0x1a,0x7f,0x9b,0x8e,0xe7,0xeb,0x4a,0x7c,0x0f,0x9e,0x16,0x2b,0xce,0x33,0x57,0x6b,0x31,0x5e,0xce,0xcb,0xb6,0x40,0x68,0x37,0xbf,0x51,0xf5,
+		0x4f,0xe3,0x42,0xe2,0xfe,0x1a,0x7f,0x9b,0x8e,0xe7,0xeb,0x4a,0x7c,0x0f,0x9e,0x16,
+		0x2b,0xce,0x33,0x57,0x6b,0x31,0x5e,0xce,0xcb,0xb6,0x40,0x68,0x37,0xbf,0x51,0xf5,
 		/* z will be set to 1, infinity flag to "false" */
 	};
 	sp_point p256_base;