From edc97680d564f5456e8b448cd7099ce51cfcce24 Mon Sep 17 00:00:00 2001 From: Paul Elliott Date: Fri, 19 May 2023 18:34:13 +0100 Subject: [PATCH 01/10] Fix output width of mbedtls_ecp_mod_p448() to 448 bits Signed-off-by: Paul Elliott --- library/ecp_curves.c | 33 ++++++++++++++++++++++++---- tests/suites/test_suite_ecp.function | 1 - 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/library/ecp_curves.c b/library/ecp_curves.c index 85c889f6a..782a66aca 100644 --- a/library/ecp_curves.c +++ b/library/ecp_curves.c @@ -28,6 +28,8 @@ #include "mbedtls/platform.h" +#include "constant_time_internal.h" + #include "bn_mul.h" #include "bignum_core.h" #include "ecp_invasive.h" @@ -5502,13 +5504,18 @@ int mbedtls_ecp_mod_p448(mbedtls_mpi_uint *X, size_t X_limbs) /* Extra limb for carry below. */ M_limbs++; - mbedtls_mpi_uint *M = mbedtls_calloc(M_limbs, ciL); + mbedtls_mpi_uint *M = NULL; + mbedtls_mpi_uint *Q = NULL; + const mbedtls_mpi_uint *P = (mbedtls_mpi_uint *) curve448_p; + const size_t P_limbs = CHARS_TO_LIMBS(sizeof(curve448_p)); + + M = mbedtls_calloc(M_limbs, ciL); if (M == NULL) { return MBEDTLS_ERR_ECP_ALLOC_FAILED; } - mbedtls_mpi_uint *Q = mbedtls_calloc(Q_limbs, ciL); + Q = mbedtls_calloc(Q_limbs, ciL); if (Q == NULL) { ret = MBEDTLS_ERR_ECP_ALLOC_FAILED; @@ -5527,9 +5534,15 @@ int mbedtls_ecp_mod_p448(mbedtls_mpi_uint *X, size_t X_limbs) X[i] = 0; } - /* X += A1 - Carry here dealt with by oversize M and X. */ + /* X += A1 - Carry here fits in oversize X. Oversize M means it will get + * added in, not returned as carry. */ (void) mbedtls_mpi_core_add(X, X, M, M_limbs); + /* Deal with carry bit from add by subtracting P if necessary. */ + if (X[P448_WIDTH] != 0) { + mbedtls_mpi_core_sub(X, X, P, P_limbs); + } + /* Q = B1, X += B1 */ memcpy(Q, M, (Q_limbs * ciL)); @@ -5548,10 +5561,22 @@ int mbedtls_ecp_mod_p448(mbedtls_mpi_uint *X, size_t X_limbs) (void) mbedtls_mpi_core_add(M, M, Q, Q_limbs); - /* Shifted carry bit from the addition is dealt with by oversize M */ + /* Shifted carry bit from the addition fits in oversize M */ mbedtls_mpi_core_shift_l(M, M_limbs, 224); (void) mbedtls_mpi_core_add(X, X, M, M_limbs); + /* Deal with carry bit by subtracting P if necessary. */ + if (X[P448_WIDTH] != 0) { + mbedtls_mpi_core_sub(X, X, P, P_limbs); + } + + /* Returned result should be 0 < X < P. Although we have controlled bit + * width, we may still have a result which is greater than P. Subtract P + * if this is the case. */ + if (mbedtls_mpi_core_lt_ct(P, X, P_limbs)) { + mbedtls_mpi_core_sub(X, X, P, P_limbs); + } + ret = 0; cleanup: diff --git a/tests/suites/test_suite_ecp.function b/tests/suites/test_suite_ecp.function index 53da2fc6a..0b9ce6b0c 100644 --- a/tests/suites/test_suite_ecp.function +++ b/tests/suites/test_suite_ecp.function @@ -1404,7 +1404,6 @@ void ecp_mod_p448(char *input_N, TEST_EQUAL(res.n, limbs); TEST_EQUAL(mbedtls_ecp_mod_p448(X.p, X.n), 0); - TEST_EQUAL(mbedtls_mpi_mod_mpi(&X, &X, &N), 0); TEST_LE_U(mbedtls_mpi_core_bitlen(X.p, X.n), 448); ASSERT_COMPARE(X.p, bytes, res.p, bytes); From ee86100963b4ed5b2914752c117ea75b7826a43b Mon Sep 17 00:00:00 2001 From: Paul Elliott Date: Wed, 31 May 2023 12:12:22 +0100 Subject: [PATCH 02/10] Add docs for mbedtls_ecp_mod_p448() Signed-off-by: Paul Elliott --- library/ecp_invasive.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/library/ecp_invasive.h b/library/ecp_invasive.h index b730d95ab..c391eb0d4 100644 --- a/library/ecp_invasive.h +++ b/library/ecp_invasive.h @@ -243,6 +243,25 @@ int mbedtls_ecp_mod_p256k1_raw(mbedtls_mpi_uint *X, size_t X_limbs); #if defined(MBEDTLS_ECP_DP_CURVE448_ENABLED) +/** Fast quasi-reduction modulo p448 = 2^448 - 2^224 - 1 + * Write X as A0 + 2^448 A1 and A1 as B0 + 2^224 B1, and return A0 + A1 + B1 + + * (B0 + B1) * 2^224. + * + * \param[in,out] X The address of the MPI to be converted. + * Must have exact limb size that stores a 896-bit MPI + * (double the bitlength of the modulus). Upon return + * holds the reduced value which is in range `0 <= X < + * N` (where N is the modulus). The bitlength of the + * reduced value is the same as that of the modulus + * (448 bits). + * \param[in] X_limbs The length of \p X in limbs. + * + * \return \c 0 on Success. + * \return #MBEDTLS_ERR_ECP_BAD_INPUT_DATA if \p X does not have + * twice as many limbs as the modulus. + * \return #MBEDTLS_ERR_ECP_ALLOC_FAILED if memory allocation + * failed. + */ MBEDTLS_STATIC_TESTABLE int mbedtls_ecp_mod_p448(mbedtls_mpi_uint *X, size_t X_limbs); From 9cce348a709201c4137555161412db554e377bd4 Mon Sep 17 00:00:00 2001 From: Xiaokang Qian Date: Thu, 8 Jun 2023 07:52:50 +0000 Subject: [PATCH 03/10] Add corner case for p448 (A0+A1= full 1 of 448 bits) Signed-off-by: Xiaokang Qian --- tests/suites/test_suite_ecp.data | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/suites/test_suite_ecp.data b/tests/suites/test_suite_ecp.data index d08ce0f99..83abd44d4 100644 --- a/tests/suites/test_suite_ecp.data +++ b/tests/suites/test_suite_ecp.data @@ -1520,3 +1520,7 @@ ecp_add_sub #48 MBEDTLS_ECP_MOD_SCALAR(MBEDTLS_ECP_DP_CURVE448) depends_on:MBEDTLS_ECP_DP_CURVE448_ENABLED ecp_mod_add_sub:"0000000000000003f7defb1691e8e3b705620733deaaddd33a760e17a4e9ba333445533fcd71d42a6d00e3468c946b0ff353728c6173d944afbfae4877c606f":"0000000000000003f96c1d081a3cfe300dc4c27fa2ebbc37396957d4bf81156d86b88de3a9312ca5be57d93fa3549b71895aa36bd5231f38146a2f0970425b":MBEDTLS_ECP_DP_CURVE448:MBEDTLS_ECP_MOD_SCALAR +#Add one test case that the sum of upper half and lower half of the X is equal to "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff" +ecp_mod_p448 #1 - f4ae65e920a63ac1f2b64df6dff07870c9d531ae72a47403063238da1a1fe3f9d6a179fa50f96cd4aff9261aa92c0e6f17ec940639bc2ccd0B519A16DF59C53E0D49B209200F878F362ACE518D5B8BFCF9CDC725E5E01C06295E8605AF06932B5006D9E556D3F190E8136BF9C643D332 mod fffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffffffffffffffffffffffffffffffffffffffffffffffffffff +depends_on:MBEDTLS_ECP_DP_CURVE448_ENABLED +ecp_mod_p448:"fffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffffffffffffffffffffffffffffffffffffffffffffffffffff":"f4ae65e920a63ac1f2b64df6dff07870c9d531ae72a47403063238da1a1fe3f9d6a179fa50f96cd4aff9261aa92c0e6f17ec940639bc2ccd0B519A16DF59C53E0D49B209200F878F362ACE518D5B8BFCF9CDC725E5E01C06295E8605AF06932B5006D9E556D3F190E8136BF9C643D332":"0ece49e2f747b4bc43afbacb8fe99e8b7301401d8a9108093fee65a9f4ae65e920a63ac1f2b64df6dff07870c9d531ae72a47403063238db" From 035e5fc8856e71438ec30d4fe36042b9b8503449 Mon Sep 17 00:00:00 2001 From: Janos Follath Date: Wed, 7 Jun 2023 14:02:31 +0100 Subject: [PATCH 04/10] Add comments to 448 optimised reduction Signed-off-by: Janos Follath --- library/ecp_curves.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/library/ecp_curves.c b/library/ecp_curves.c index 782a66aca..b1e6338fb 100644 --- a/library/ecp_curves.c +++ b/library/ecp_curves.c @@ -5524,7 +5524,6 @@ int mbedtls_ecp_mod_p448(mbedtls_mpi_uint *X, size_t X_limbs) /* M = A1 */ memset(M, 0, (M_limbs * ciL)); - /* Do not copy into the overflow limb, as this would read past the end of * X. */ memcpy(M, X + P448_WIDTH, ((M_limbs - 1) * ciL)); @@ -5534,7 +5533,8 @@ int mbedtls_ecp_mod_p448(mbedtls_mpi_uint *X, size_t X_limbs) X[i] = 0; } - /* X += A1 - Carry here fits in oversize X. Oversize M means it will get + /* X = X + M = A0 + A1 */ + /* Carry here fits in oversize X. Oversize M means it will get * added in, not returned as carry. */ (void) mbedtls_mpi_core_add(X, X, M, M_limbs); @@ -5543,15 +5543,15 @@ int mbedtls_ecp_mod_p448(mbedtls_mpi_uint *X, size_t X_limbs) mbedtls_mpi_core_sub(X, X, P, P_limbs); } - /* Q = B1, X += B1 */ + /* Q = B1 */ memcpy(Q, M, (Q_limbs * ciL)); - mbedtls_mpi_core_shift_r(Q, Q_limbs, 224); + /* X = X + Q = (A0 + A1) + B1 */ /* No carry here - only max 224 bits */ (void) mbedtls_mpi_core_add(X, X, Q, Q_limbs); - /* M = (B0 + B1) * 2^224, X += M */ + /* M = B0 */ if (sizeof(mbedtls_mpi_uint) > 4) { M[P224_WIDTH_MIN] &= ((mbedtls_mpi_uint)-1) >> (P224_UNUSED_BITS); } @@ -5559,10 +5559,15 @@ int mbedtls_ecp_mod_p448(mbedtls_mpi_uint *X, size_t X_limbs) M[i] = 0; } + /* M = M + Q = B0 + B1 */ (void) mbedtls_mpi_core_add(M, M, Q, Q_limbs); + /* M = (B0 + B1) * 2^224 */ /* Shifted carry bit from the addition fits in oversize M */ mbedtls_mpi_core_shift_l(M, M_limbs, 224); + + + /* X = X + M = (A0 + A1 + B1) + (B0 + B1) * 2^224 */ (void) mbedtls_mpi_core_add(X, X, M, M_limbs); /* Deal with carry bit by subtracting P if necessary. */ From fcdd0477b38f2ba0e4d0bb26ec53cf92317c42e7 Mon Sep 17 00:00:00 2001 From: Xiaokang Qian Date: Thu, 8 Jun 2023 10:03:53 +0000 Subject: [PATCH 05/10] Replace loop zeroise with memset Signed-off-by: Xiaokang Qian --- library/ecp_curves.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/library/ecp_curves.c b/library/ecp_curves.c index b1e6338fb..a4b89be89 100644 --- a/library/ecp_curves.c +++ b/library/ecp_curves.c @@ -5529,9 +5529,7 @@ int mbedtls_ecp_mod_p448(mbedtls_mpi_uint *X, size_t X_limbs) memcpy(M, X + P448_WIDTH, ((M_limbs - 1) * ciL)); /* X = A0 */ - for (i = P448_WIDTH; i < X_limbs; i++) { - X[i] = 0; - } + memset(X + P448_WIDTH, 0, ((M_limbs - 1) * ciL)); /* X = X + M = A0 + A1 */ /* Carry here fits in oversize X. Oversize M means it will get @@ -5558,6 +5556,7 @@ int mbedtls_ecp_mod_p448(mbedtls_mpi_uint *X, size_t X_limbs) for (i = P224_WIDTH_MAX; i < M_limbs; ++i) { M[i] = 0; } + memset(M + P224_WIDTH_MAX, 0, ((M_limbs - P224_WIDTH_MAX) * ciL)); /* M = M + Q = B0 + B1 */ (void) mbedtls_mpi_core_add(M, M, Q, Q_limbs); From 436f2ad37c22372c4b807651328d5b7d7a24f219 Mon Sep 17 00:00:00 2001 From: Paul Elliott Date: Sun, 11 Jun 2023 23:29:24 +0100 Subject: [PATCH 06/10] Three round solution Attempt to fix failing test by dealing with overflow with three rounds, instead of previous subtract modulus solution. Also optimise out shifts by using memcpy / memmove instead. Remove final sub to return canonical result, as this is not required here. Signed-off-by: Paul Elliott --- library/ecp_curves.c | 81 +++++++++++++++++----------- tests/suites/test_suite_ecp.function | 1 + 2 files changed, 50 insertions(+), 32 deletions(-) diff --git a/library/ecp_curves.c b/library/ecp_curves.c index a4b89be89..2e377a090 100644 --- a/library/ecp_curves.c +++ b/library/ecp_curves.c @@ -5452,8 +5452,9 @@ static int ecp_mod_p255(mbedtls_mpi *N) /* Number of limbs fully occupied by 2^224 (max), and limbs used by it (min) */ #define DIV_ROUND_UP(X, Y) (((X) + (Y) -1) / (Y)) -#define P224_WIDTH_MIN (28 / sizeof(mbedtls_mpi_uint)) -#define P224_WIDTH_MAX DIV_ROUND_UP(28, sizeof(mbedtls_mpi_uint)) +#define P224_SIZE (224 / 8) +#define P224_WIDTH_MIN (P224_SIZE / sizeof(mbedtls_mpi_uint)) +#define P224_WIDTH_MAX DIV_ROUND_UP(P224_SIZE, sizeof(mbedtls_mpi_uint)) #define P224_UNUSED_BITS ((P224_WIDTH_MAX * sizeof(mbedtls_mpi_uint) * 8) - 224) static int ecp_mod_p448(mbedtls_mpi *N) @@ -5486,7 +5487,7 @@ cleanup: MBEDTLS_STATIC_TESTABLE int mbedtls_ecp_mod_p448(mbedtls_mpi_uint *X, size_t X_limbs) { - size_t i; + size_t i, round; int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED; if (X_limbs <= P448_WIDTH) { @@ -5494,20 +5495,18 @@ int mbedtls_ecp_mod_p448(mbedtls_mpi_uint *X, size_t X_limbs) } size_t M_limbs = X_limbs - (P448_WIDTH); - const size_t Q_limbs = M_limbs; if (M_limbs > P448_WIDTH) { /* Shouldn't be called with X larger than 2^896! */ return MBEDTLS_ERR_ECP_BAD_INPUT_DATA; } - /* Extra limb for carry below. */ + /* Both M and Q require an extra limb to catch carries. */ M_limbs++; + const size_t Q_limbs = M_limbs; mbedtls_mpi_uint *M = NULL; mbedtls_mpi_uint *Q = NULL; - const mbedtls_mpi_uint *P = (mbedtls_mpi_uint *) curve448_p; - const size_t P_limbs = CHARS_TO_LIMBS(sizeof(curve448_p)); M = mbedtls_calloc(M_limbs, ciL); @@ -5536,49 +5535,67 @@ int mbedtls_ecp_mod_p448(mbedtls_mpi_uint *X, size_t X_limbs) * added in, not returned as carry. */ (void) mbedtls_mpi_core_add(X, X, M, M_limbs); - /* Deal with carry bit from add by subtracting P if necessary. */ - if (X[P448_WIDTH] != 0) { - mbedtls_mpi_core_sub(X, X, P, P_limbs); - } + /* Q = B1 = M >> 224 */ + memcpy(Q, (char *) M + P224_SIZE, P224_SIZE); + memset((char *) Q + P224_SIZE, 0, P224_SIZE); - /* Q = B1 */ - memcpy(Q, M, (Q_limbs * ciL)); - mbedtls_mpi_core_shift_r(Q, Q_limbs, 224); - - /* X = X + Q = (A0 + A1) + B1 */ - /* No carry here - only max 224 bits */ + /* X = X + Q = (A0 + A1) + B1 + * Oversize Q catches potential carry here when X is already max 448 bits. + */ (void) mbedtls_mpi_core_add(X, X, Q, Q_limbs); /* M = B0 */ if (sizeof(mbedtls_mpi_uint) > 4) { M[P224_WIDTH_MIN] &= ((mbedtls_mpi_uint)-1) >> (P224_UNUSED_BITS); } - for (i = P224_WIDTH_MAX; i < M_limbs; ++i) { - M[i] = 0; - } memset(M + P224_WIDTH_MAX, 0, ((M_limbs - P224_WIDTH_MAX) * ciL)); /* M = M + Q = B0 + B1 */ (void) mbedtls_mpi_core_add(M, M, Q, Q_limbs); /* M = (B0 + B1) * 2^224 */ - /* Shifted carry bit from the addition fits in oversize M */ - mbedtls_mpi_core_shift_l(M, M_limbs, 224); - + /* Shifted carry bit from the addition fits in oversize M. */ + memmove((char *) M + P224_SIZE, M, P224_SIZE + sizeof(mbedtls_mpi_uint)); + memset(M, 0, P224_SIZE); /* X = X + M = (A0 + A1 + B1) + (B0 + B1) * 2^224 */ (void) mbedtls_mpi_core_add(X, X, M, M_limbs); - /* Deal with carry bit by subtracting P if necessary. */ - if (X[P448_WIDTH] != 0) { - mbedtls_mpi_core_sub(X, X, P, P_limbs); - } + /* In the second and third rounds A1 and B0 have at most 1 non-zero limb and + * B1=0. + * Using this we need to calculate: + * A0 + A1 + B1 + (B0 + B1) * 2^224 = A0 + A1 + B0 * 2^224. */ + for (round = 0; round < 2; ++round) { - /* Returned result should be 0 < X < P. Although we have controlled bit - * width, we may still have a result which is greater than P. Subtract P - * if this is the case. */ - if (mbedtls_mpi_core_lt_ct(P, X, P_limbs)) { - mbedtls_mpi_core_sub(X, X, P, P_limbs); + /* Q = A1 */ + memset(Q, 0, (Q_limbs * ciL)); + memcpy(Q, X + P448_WIDTH, ((Q_limbs - 1) * ciL)); + + /* X = A0 */ + memset(X + P448_WIDTH, 0, ((M_limbs - 1) * ciL)); + + /* M = B0 */ + memcpy(M, Q, (Q_limbs * ciL)); + M[M_limbs - 1] = 0; + + if (sizeof(mbedtls_mpi_uint) > 4) { + M[P224_WIDTH_MIN] &= ((mbedtls_mpi_uint) -1) >> (P224_UNUSED_BITS); + } + + /* M = B0 * 2^224 + * Oversize M once again takes any carry. */ + memmove((char *) M + P224_SIZE, M, P224_SIZE + + sizeof(mbedtls_mpi_uint)); memset(M, 0, P224_SIZE); + + /* M = A1 + B0 * 2^224 + * No need to have to call mbedtls_mpi_core_add() as as both bignums + * should be all zero except one non-colliding limb each. */ + for (i = 0; i < (M_limbs - 1); ++i) { + M[i] = M[i] + Q[i]; + } + + /* X = A0 + (A1 + B0 * 2^224) */ + (void) mbedtls_mpi_core_add(X, X, M, M_limbs); } ret = 0; diff --git a/tests/suites/test_suite_ecp.function b/tests/suites/test_suite_ecp.function index 0b9ce6b0c..53da2fc6a 100644 --- a/tests/suites/test_suite_ecp.function +++ b/tests/suites/test_suite_ecp.function @@ -1404,6 +1404,7 @@ void ecp_mod_p448(char *input_N, TEST_EQUAL(res.n, limbs); TEST_EQUAL(mbedtls_ecp_mod_p448(X.p, X.n), 0); + TEST_EQUAL(mbedtls_mpi_mod_mpi(&X, &X, &N), 0); TEST_LE_U(mbedtls_mpi_core_bitlen(X.p, X.n), 448); ASSERT_COMPARE(X.p, bytes, res.p, bytes); From b72704250125ebed44aac345723308d282515efe Mon Sep 17 00:00:00 2001 From: Paul Elliott Date: Tue, 13 Jun 2023 17:42:01 +0100 Subject: [PATCH 07/10] Move corner test case into python framework Signed-off-by: Paul Elliott --- scripts/mbedtls_dev/ecp.py | 6 ++++++ tests/suites/test_suite_ecp.data | 4 ---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/scripts/mbedtls_dev/ecp.py b/scripts/mbedtls_dev/ecp.py index c9fb5e55e..722e244fe 100644 --- a/scripts/mbedtls_dev/ecp.py +++ b/scripts/mbedtls_dev/ecp.py @@ -779,6 +779,12 @@ class EcpP448Raw(bignum_common.ModOperationCommon, "167b75dfb948f82a8317cba01c75f67e290535d868a24b7f627f2855" "09167d4126af8090013c3273c02c6b9586b4625b475b51096c4ad652"), + # Corner case which causes maximum overflow + ("f4ae65e920a63ac1f2b64df6dff07870c9d531ae72a47403063238da1" + "a1fe3f9d6a179fa50f96cd4aff9261aa92c0e6f17ec940639bc2ccd0B" + "519A16DF59C53E0D49B209200F878F362ACE518D5B8BFCF9CDC725E5E" + "01C06295E8605AF06932B5006D9E556D3F190E8136BF9C643D332"), + # Next 2 number generated by random.getrandbits(448) ("8f54f8ceacaab39e83844b40ffa9b9f15c14bc4a829e07b0829a48d4" "22fe99a22c70501e533c91352d3d854e061b90303b08c6e33c729578"), diff --git a/tests/suites/test_suite_ecp.data b/tests/suites/test_suite_ecp.data index 83abd44d4..d08ce0f99 100644 --- a/tests/suites/test_suite_ecp.data +++ b/tests/suites/test_suite_ecp.data @@ -1520,7 +1520,3 @@ ecp_add_sub #48 MBEDTLS_ECP_MOD_SCALAR(MBEDTLS_ECP_DP_CURVE448) depends_on:MBEDTLS_ECP_DP_CURVE448_ENABLED ecp_mod_add_sub:"0000000000000003f7defb1691e8e3b705620733deaaddd33a760e17a4e9ba333445533fcd71d42a6d00e3468c946b0ff353728c6173d944afbfae4877c606f":"0000000000000003f96c1d081a3cfe300dc4c27fa2ebbc37396957d4bf81156d86b88de3a9312ca5be57d93fa3549b71895aa36bd5231f38146a2f0970425b":MBEDTLS_ECP_DP_CURVE448:MBEDTLS_ECP_MOD_SCALAR -#Add one test case that the sum of upper half and lower half of the X is equal to "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff" -ecp_mod_p448 #1 - f4ae65e920a63ac1f2b64df6dff07870c9d531ae72a47403063238da1a1fe3f9d6a179fa50f96cd4aff9261aa92c0e6f17ec940639bc2ccd0B519A16DF59C53E0D49B209200F878F362ACE518D5B8BFCF9CDC725E5E01C06295E8605AF06932B5006D9E556D3F190E8136BF9C643D332 mod fffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffffffffffffffffffffffffffffffffffffffffffffffffffff -depends_on:MBEDTLS_ECP_DP_CURVE448_ENABLED -ecp_mod_p448:"fffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffffffffffffffffffffffffffffffffffffffffffffffffffff":"f4ae65e920a63ac1f2b64df6dff07870c9d531ae72a47403063238da1a1fe3f9d6a179fa50f96cd4aff9261aa92c0e6f17ec940639bc2ccd0B519A16DF59C53E0D49B209200F878F362ACE518D5B8BFCF9CDC725E5E01C06295E8605AF06932B5006D9E556D3F190E8136BF9C643D332":"0ece49e2f747b4bc43afbacb8fe99e8b7301401d8a9108093fee65a9f4ae65e920a63ac1f2b64df6dff07870c9d531ae72a47403063238db" From 3646dc78bc44ee9765e4fdf56a74651d7f4ad709 Mon Sep 17 00:00:00 2001 From: Paul Elliott Date: Wed, 14 Jun 2023 08:51:08 +0100 Subject: [PATCH 08/10] Fix coding style issue Signed-off-by: Paul Elliott --- library/ecp_curves.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/ecp_curves.c b/library/ecp_curves.c index 2e377a090..82881393d 100644 --- a/library/ecp_curves.c +++ b/library/ecp_curves.c @@ -5585,7 +5585,7 @@ int mbedtls_ecp_mod_p448(mbedtls_mpi_uint *X, size_t X_limbs) /* M = B0 * 2^224 * Oversize M once again takes any carry. */ memmove((char *) M + P224_SIZE, M, P224_SIZE + - sizeof(mbedtls_mpi_uint)); memset(M, 0, P224_SIZE); + sizeof(mbedtls_mpi_uint)); memset(M, 0, P224_SIZE); /* M = A1 + B0 * 2^224 * No need to have to call mbedtls_mpi_core_add() as as both bignums From b8f7305b026fdf5c7faa056c0c06a15da8e6ae7e Mon Sep 17 00:00:00 2001 From: Paul Elliott Date: Wed, 14 Jun 2023 17:52:42 +0100 Subject: [PATCH 09/10] Replace sizeof(mbedtls_mpi_uint) with ciL define Signed-off-by: Paul Elliott --- library/ecp_curves.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/library/ecp_curves.c b/library/ecp_curves.c index 82881393d..1c797d8ce 100644 --- a/library/ecp_curves.c +++ b/library/ecp_curves.c @@ -5545,7 +5545,7 @@ int mbedtls_ecp_mod_p448(mbedtls_mpi_uint *X, size_t X_limbs) (void) mbedtls_mpi_core_add(X, X, Q, Q_limbs); /* M = B0 */ - if (sizeof(mbedtls_mpi_uint) > 4) { + if (ciL > 4) { M[P224_WIDTH_MIN] &= ((mbedtls_mpi_uint)-1) >> (P224_UNUSED_BITS); } memset(M + P224_WIDTH_MAX, 0, ((M_limbs - P224_WIDTH_MAX) * ciL)); @@ -5555,7 +5555,7 @@ int mbedtls_ecp_mod_p448(mbedtls_mpi_uint *X, size_t X_limbs) /* M = (B0 + B1) * 2^224 */ /* Shifted carry bit from the addition fits in oversize M. */ - memmove((char *) M + P224_SIZE, M, P224_SIZE + sizeof(mbedtls_mpi_uint)); + memmove((char *) M + P224_SIZE, M, P224_SIZE + ciL); memset(M, 0, P224_SIZE); /* X = X + M = (A0 + A1 + B1) + (B0 + B1) * 2^224 */ @@ -5578,14 +5578,14 @@ int mbedtls_ecp_mod_p448(mbedtls_mpi_uint *X, size_t X_limbs) memcpy(M, Q, (Q_limbs * ciL)); M[M_limbs - 1] = 0; - if (sizeof(mbedtls_mpi_uint) > 4) { + if (ciL > 4) { M[P224_WIDTH_MIN] &= ((mbedtls_mpi_uint) -1) >> (P224_UNUSED_BITS); } /* M = B0 * 2^224 * Oversize M once again takes any carry. */ - memmove((char *) M + P224_SIZE, M, P224_SIZE + - sizeof(mbedtls_mpi_uint)); memset(M, 0, P224_SIZE); + memmove((char *) M + P224_SIZE, M, P224_SIZE + ciL); + memset(M, 0, P224_SIZE); /* M = A1 + B0 * 2^224 * No need to have to call mbedtls_mpi_core_add() as as both bignums From bed9ac7b2dd86cfe13c6ddec64dea093ba76274a Mon Sep 17 00:00:00 2001 From: Paul Elliott Date: Wed, 14 Jun 2023 19:20:33 +0100 Subject: [PATCH 10/10] Optimise final 2 rounds Final two rounds logic could be significantly simplified. Signed-off-by: Paul Elliott --- library/ecp_curves.c | 35 +++++++++++++---------------------- 1 file changed, 13 insertions(+), 22 deletions(-) diff --git a/library/ecp_curves.c b/library/ecp_curves.c index 1c797d8ce..6ee364190 100644 --- a/library/ecp_curves.c +++ b/library/ecp_curves.c @@ -5487,7 +5487,7 @@ cleanup: MBEDTLS_STATIC_TESTABLE int mbedtls_ecp_mod_p448(mbedtls_mpi_uint *X, size_t X_limbs) { - size_t i, round; + size_t round; int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED; if (X_limbs <= P448_WIDTH) { @@ -5567,32 +5567,23 @@ int mbedtls_ecp_mod_p448(mbedtls_mpi_uint *X, size_t X_limbs) * A0 + A1 + B1 + (B0 + B1) * 2^224 = A0 + A1 + B0 * 2^224. */ for (round = 0; round < 2; ++round) { - /* Q = A1 */ - memset(Q, 0, (Q_limbs * ciL)); - memcpy(Q, X + P448_WIDTH, ((Q_limbs - 1) * ciL)); + /* M = A1 */ + memset(M, 0, (M_limbs * ciL)); + memcpy(M, X + P448_WIDTH, ((M_limbs - 1) * ciL)); /* X = A0 */ memset(X + P448_WIDTH, 0, ((M_limbs - 1) * ciL)); - /* M = B0 */ - memcpy(M, Q, (Q_limbs * ciL)); - M[M_limbs - 1] = 0; - - if (ciL > 4) { - M[P224_WIDTH_MIN] &= ((mbedtls_mpi_uint) -1) >> (P224_UNUSED_BITS); - } - - /* M = B0 * 2^224 - * Oversize M once again takes any carry. */ - memmove((char *) M + P224_SIZE, M, P224_SIZE + ciL); - memset(M, 0, P224_SIZE); - /* M = A1 + B0 * 2^224 - * No need to have to call mbedtls_mpi_core_add() as as both bignums - * should be all zero except one non-colliding limb each. */ - for (i = 0; i < (M_limbs - 1); ++i) { - M[i] = M[i] + Q[i]; - } + * We know that only one limb of A1 will be non-zero and that it will be + * limb 0. We also know that B0 is the bottom 224 bits of A1 (which is + * then shifted up 224 bits), so, given M is currently A1 this turns + * into: + * M = M + (M << 224) + * As the single non-zero limb in B0 will be A1 limb 0 shifted up by 224 + * bits, we can just move that into the right place, shifted up + * accordingly.*/ + M[P224_WIDTH_MIN] = M[0] << (224 & (biL - 1)); /* X = A0 + (A1 + B0 * 2^224) */ (void) mbedtls_mpi_core_add(X, X, M, M_limbs);