Make mod_p{224,256,384] a bit faster

Speedup is roughly 25%, giving a 6% speedup on ecp_mul() for these curves.
2013-10-23 20:17:00 +02:00 · 2013-10-23 20:17:00 +02:00 · 5779cbe582
commit 5779cbe582
parent c04c530a98
2 changed files with 19 additions and 7 deletions
--- a/include/polarssl/bignum.h
+++ b/include/polarssl/bignum.h
@ -128,6 +128,7 @@ typedef uint32_t t_udbl;
 #define POLARSSL_HAVE_UDBL
 #else
  #if ( defined(_MSC_VER) && defined(_M_AMD64) )
    #define POLARSSL_HAVE_INT64
    typedef  int64_t t_sint;
    typedef uint64_t t_uint;
  #else
@ -137,6 +138,7 @@ typedef uint32_t t_udbl;
          defined(__ia64__)  || defined(__alpha__)     || \
          (defined(__sparc__) && defined(__arch64__))  || \
          defined(__s390x__) ) )
       #define POLARSSL_HAVE_INT64
       typedef  int64_t t_sint;
       typedef uint64_t t_uint;
       typedef unsigned int t_udbl __attribute__((mode(TI)));
--- a/library/ecp.c
+++ b/library/ecp.c
@ -663,22 +663,32 @@ static inline void sub32( uint32_t *dst, uint32_t src, signed char *carry )
    if( c < 0 ) fix_negative( N, c, bits );
 /*
- * If the result is negative, we get it in the form c * 2^192 + N,
+ * If the result is negative, we get it in the form
- * with c negative and N positive (the c >= 0 case is handled by LAST).
+ * c * 2^(bits + 32) + N, with c negative and N positive shorter than 'bits'
 */
 static inline int fix_negative( mpi *N, signed char c, size_t bits )
 {
    int ret;
    mpi C;
    t_uint Cp[ 384 / 8 / sizeof( t_uint) + 1 ];
-    mpi_init( &C );
+    /* C = - c * 2^(bits + 32) */
    C.s = 1;
    C.n = bits / 8 / sizeof( t_uint ) + 1;
    C.p = Cp;
    memset( Cp, 0, C.n * sizeof( t_uint ) );
 #if defined(POLARSSL_HAVE_INT64)
    if( bits == 224 )
        Cp[ C.n - 1 ] = ((t_uint) -c) << 32;
    else
 #endif
        Cp[ C.n - 1 ] = (t_uint) -c;
-    MPI_CHK( mpi_lset( &C, c ) );
+    /* N = - ( C - N ) */
-    MPI_CHK( mpi_shift_l( &C, bits ) );
+    MPI_CHK( mpi_sub_abs( N, &C, N ) );
-    MPI_CHK( mpi_add_mpi( N, N, &C ) );
+    N->s = -1;
 cleanup:
    mpi_free( &C );
    return( ret );
 }