diff --git a/ChangeLog b/ChangeLog index 5f5fdd27c..deadfc37d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -10,6 +10,11 @@ Security trusted CA with a non DER-compliant certificate. Found by luocm on GitHub. Fixes #825. +Features + * Add option MBEDTLS_AES_FEWER_TABLES to dynamically compute 3/4 of the AES tables + during runtime, thereby reducing the RAM/ROM footprint by ~6kb. Suggested + and contributed by jkivilin in #394. + Bugfix * Fix spurious uninitialized variable warning in cmac.c. Fix independently contributed by Brian J Murray and David Brown. diff --git a/include/mbedtls/config.h b/include/mbedtls/config.h index b5905ef9d..48c32d4aa 100644 --- a/include/mbedtls/config.h +++ b/include/mbedtls/config.h @@ -440,12 +440,45 @@ /** * \def MBEDTLS_AES_ROM_TABLES * - * Store the AES tables in ROM. + * Use precomputed AES tables stored in ROM. + * + * Uncomment this macro to use precomputed AES tables stored in ROM. + * Comment this macro to generate AES tables in RAM at runtime. + * + * Tradeoff: Using precomputed ROM tables reduces RAM usage by ~8kb + * (or ~2kb if \c MBEDTLS_AES_FEWER_TABLES is used) and reduces the + * initialization time before the first AES operation can be performed. + * It comes at the cost of additional ~8kb ROM use (resp. ~2kb if \c + * MBEDTLS_AES_FEWER_TABLES below is used), and potentially degraded + * performance if ROM access is slower than RAM access. + * + * This option is independent of \c MBEDTLS_AES_FEWER_TABLES. * - * Uncomment this macro to store the AES tables in ROM. */ //#define MBEDTLS_AES_ROM_TABLES +/** + * \def MBEDTLS_AES_FEWER_TABLES + * + * Use less ROM/RAM for AES tables. + * + * Uncommenting this macro omits 75% of the AES tables from + * ROM / RAM (depending on the value of \c MBEDTLS_AES_ROM_TABLES) + * by computing their values on the fly during operations + * (the tables are entry-wise rotations of one another). + * + * Tradeoff: Uncommenting this reduces the RAM / ROM footprint + * by ~6kb but at the cost of more arithmetic operations during + * runtime. Specifically, one has to compare 4 accesses within + * different tables to 4 accesses with additional arithmetic + * operations within the same table. The performance gain/loss + * depends on the system and memory details. + * + * This option is independent of \c MBEDTLS_AES_ROM_TABLES. + * + */ +//#define MBEDTLS_AES_FEWER_TABLES + /** * \def MBEDTLS_CAMELLIA_SMALL_MEMORY * diff --git a/library/aes.c b/library/aes.c index 3d2eac82d..da94b1943 100644 --- a/library/aes.c +++ b/library/aes.c @@ -201,6 +201,8 @@ static const unsigned char FSb[256] = static const uint32_t FT0[256] = { FT }; #undef V +#if !defined(MBEDTLS_AES_FEWER_TABLES) + #define V(a,b,c,d) 0x##b##c##d##a static const uint32_t FT1[256] = { FT }; #undef V @@ -213,6 +215,8 @@ static const uint32_t FT2[256] = { FT }; static const uint32_t FT3[256] = { FT }; #undef V +#endif /* !MBEDTLS_AES_FEWER_TABLES */ + #undef FT /* @@ -328,6 +332,8 @@ static const unsigned char RSb[256] = static const uint32_t RT0[256] = { RT }; #undef V +#if !defined(MBEDTLS_AES_FEWER_TABLES) + #define V(a,b,c,d) 0x##b##c##d##a static const uint32_t RT1[256] = { RT }; #undef V @@ -340,6 +346,8 @@ static const uint32_t RT2[256] = { RT }; static const uint32_t RT3[256] = { RT }; #undef V +#endif /* !MBEDTLS_AES_FEWER_TABLES */ + #undef RT /* @@ -359,18 +367,22 @@ static const uint32_t RCON[10] = */ static unsigned char FSb[256]; static uint32_t FT0[256]; +#if !defined(MBEDTLS_AES_FEWER_TABLES) static uint32_t FT1[256]; static uint32_t FT2[256]; static uint32_t FT3[256]; +#endif /* !MBEDTLS_AES_FEWER_TABLES */ /* * Reverse S-box & tables */ static unsigned char RSb[256]; static uint32_t RT0[256]; +#if !defined(MBEDTLS_AES_FEWER_TABLES) static uint32_t RT1[256]; static uint32_t RT2[256]; static uint32_t RT3[256]; +#endif /* !MBEDTLS_AES_FEWER_TABLES */ /* * Round constants @@ -445,9 +457,11 @@ static void aes_gen_tables( void ) ( (uint32_t) x << 16 ) ^ ( (uint32_t) z << 24 ); +#if !defined(MBEDTLS_AES_FEWER_TABLES) FT1[i] = ROTL8( FT0[i] ); FT2[i] = ROTL8( FT1[i] ); FT3[i] = ROTL8( FT2[i] ); +#endif /* !MBEDTLS_AES_FEWER_TABLES */ x = RSb[i]; @@ -456,14 +470,48 @@ static void aes_gen_tables( void ) ( (uint32_t) MUL( 0x0D, x ) << 16 ) ^ ( (uint32_t) MUL( 0x0B, x ) << 24 ); +#if !defined(MBEDTLS_AES_FEWER_TABLES) RT1[i] = ROTL8( RT0[i] ); RT2[i] = ROTL8( RT1[i] ); RT3[i] = ROTL8( RT2[i] ); +#endif /* !MBEDTLS_AES_FEWER_TABLES */ } } +#undef ROTL8 + #endif /* MBEDTLS_AES_ROM_TABLES */ +#if defined(MBEDTLS_AES_FEWER_TABLES) + +#define ROTL8(x) ( (uint32_t)( ( x ) << 8 ) + (uint32_t)( ( x ) >> 24 ) ) +#define ROTL16(x) ( (uint32_t)( ( x ) << 16 ) + (uint32_t)( ( x ) >> 16 ) ) +#define ROTL24(x) ( (uint32_t)( ( x ) << 24 ) + (uint32_t)( ( x ) >> 8 ) ) + +#define AES_RT0(idx) RT0[idx] +#define AES_RT1(idx) ROTL8( RT0[idx] ) +#define AES_RT2(idx) ROTL16( RT0[idx] ) +#define AES_RT3(idx) ROTL24( RT0[idx] ) + +#define AES_FT0(idx) FT0[idx] +#define AES_FT1(idx) ROTL8( FT0[idx] ) +#define AES_FT2(idx) ROTL16( FT0[idx] ) +#define AES_FT3(idx) ROTL24( FT0[idx] ) + +#else /* MBEDTLS_AES_FEWER_TABLES */ + +#define AES_RT0(idx) RT0[idx] +#define AES_RT1(idx) RT1[idx] +#define AES_RT2(idx) RT2[idx] +#define AES_RT3(idx) RT3[idx] + +#define AES_FT0(idx) FT0[idx] +#define AES_FT1(idx) FT1[idx] +#define AES_FT2(idx) FT2[idx] +#define AES_FT3(idx) FT3[idx] + +#endif /* MBEDTLS_AES_FEWER_TABLES */ + void mbedtls_aes_init( mbedtls_aes_context *ctx ) { memset( ctx, 0, sizeof( mbedtls_aes_context ) ); @@ -641,10 +689,10 @@ int mbedtls_aes_setkey_dec( mbedtls_aes_context *ctx, const unsigned char *key, { for( j = 0; j < 4; j++, SK++ ) { - *RK++ = RT0[ FSb[ ( *SK ) & 0xFF ] ] ^ - RT1[ FSb[ ( *SK >> 8 ) & 0xFF ] ] ^ - RT2[ FSb[ ( *SK >> 16 ) & 0xFF ] ] ^ - RT3[ FSb[ ( *SK >> 24 ) & 0xFF ] ]; + *RK++ = AES_RT0( FSb[ ( *SK ) & 0xFF ] ) ^ + AES_RT1( FSb[ ( *SK >> 8 ) & 0xFF ] ) ^ + AES_RT2( FSb[ ( *SK >> 16 ) & 0xFF ] ) ^ + AES_RT3( FSb[ ( *SK >> 24 ) & 0xFF ] ); } } @@ -660,50 +708,50 @@ exit: } #endif /* !MBEDTLS_AES_SETKEY_DEC_ALT */ -#define AES_FROUND(X0,X1,X2,X3,Y0,Y1,Y2,Y3) \ -{ \ - X0 = *RK++ ^ FT0[ ( Y0 ) & 0xFF ] ^ \ - FT1[ ( Y1 >> 8 ) & 0xFF ] ^ \ - FT2[ ( Y2 >> 16 ) & 0xFF ] ^ \ - FT3[ ( Y3 >> 24 ) & 0xFF ]; \ - \ - X1 = *RK++ ^ FT0[ ( Y1 ) & 0xFF ] ^ \ - FT1[ ( Y2 >> 8 ) & 0xFF ] ^ \ - FT2[ ( Y3 >> 16 ) & 0xFF ] ^ \ - FT3[ ( Y0 >> 24 ) & 0xFF ]; \ - \ - X2 = *RK++ ^ FT0[ ( Y2 ) & 0xFF ] ^ \ - FT1[ ( Y3 >> 8 ) & 0xFF ] ^ \ - FT2[ ( Y0 >> 16 ) & 0xFF ] ^ \ - FT3[ ( Y1 >> 24 ) & 0xFF ]; \ - \ - X3 = *RK++ ^ FT0[ ( Y3 ) & 0xFF ] ^ \ - FT1[ ( Y0 >> 8 ) & 0xFF ] ^ \ - FT2[ ( Y1 >> 16 ) & 0xFF ] ^ \ - FT3[ ( Y2 >> 24 ) & 0xFF ]; \ +#define AES_FROUND(X0,X1,X2,X3,Y0,Y1,Y2,Y3) \ +{ \ + X0 = *RK++ ^ AES_FT0( ( Y0 ) & 0xFF ) ^ \ + AES_FT1( ( Y1 >> 8 ) & 0xFF ) ^ \ + AES_FT2( ( Y2 >> 16 ) & 0xFF ) ^ \ + AES_FT3( ( Y3 >> 24 ) & 0xFF ); \ + \ + X1 = *RK++ ^ AES_FT0( ( Y1 ) & 0xFF ) ^ \ + AES_FT1( ( Y2 >> 8 ) & 0xFF ) ^ \ + AES_FT2( ( Y3 >> 16 ) & 0xFF ) ^ \ + AES_FT3( ( Y0 >> 24 ) & 0xFF ); \ + \ + X2 = *RK++ ^ AES_FT0( ( Y2 ) & 0xFF ) ^ \ + AES_FT1( ( Y3 >> 8 ) & 0xFF ) ^ \ + AES_FT2( ( Y0 >> 16 ) & 0xFF ) ^ \ + AES_FT3( ( Y1 >> 24 ) & 0xFF ); \ + \ + X3 = *RK++ ^ AES_FT0( ( Y3 ) & 0xFF ) ^ \ + AES_FT1( ( Y0 >> 8 ) & 0xFF ) ^ \ + AES_FT2( ( Y1 >> 16 ) & 0xFF ) ^ \ + AES_FT3( ( Y2 >> 24 ) & 0xFF ); \ } -#define AES_RROUND(X0,X1,X2,X3,Y0,Y1,Y2,Y3) \ -{ \ - X0 = *RK++ ^ RT0[ ( Y0 ) & 0xFF ] ^ \ - RT1[ ( Y3 >> 8 ) & 0xFF ] ^ \ - RT2[ ( Y2 >> 16 ) & 0xFF ] ^ \ - RT3[ ( Y1 >> 24 ) & 0xFF ]; \ - \ - X1 = *RK++ ^ RT0[ ( Y1 ) & 0xFF ] ^ \ - RT1[ ( Y0 >> 8 ) & 0xFF ] ^ \ - RT2[ ( Y3 >> 16 ) & 0xFF ] ^ \ - RT3[ ( Y2 >> 24 ) & 0xFF ]; \ - \ - X2 = *RK++ ^ RT0[ ( Y2 ) & 0xFF ] ^ \ - RT1[ ( Y1 >> 8 ) & 0xFF ] ^ \ - RT2[ ( Y0 >> 16 ) & 0xFF ] ^ \ - RT3[ ( Y3 >> 24 ) & 0xFF ]; \ - \ - X3 = *RK++ ^ RT0[ ( Y3 ) & 0xFF ] ^ \ - RT1[ ( Y2 >> 8 ) & 0xFF ] ^ \ - RT2[ ( Y1 >> 16 ) & 0xFF ] ^ \ - RT3[ ( Y0 >> 24 ) & 0xFF ]; \ +#define AES_RROUND(X0,X1,X2,X3,Y0,Y1,Y2,Y3) \ +{ \ + X0 = *RK++ ^ AES_RT0( ( Y0 ) & 0xFF ) ^ \ + AES_RT1( ( Y3 >> 8 ) & 0xFF ) ^ \ + AES_RT2( ( Y2 >> 16 ) & 0xFF ) ^ \ + AES_RT3( ( Y1 >> 24 ) & 0xFF ); \ + \ + X1 = *RK++ ^ AES_RT0( ( Y1 ) & 0xFF ) ^ \ + AES_RT1( ( Y0 >> 8 ) & 0xFF ) ^ \ + AES_RT2( ( Y3 >> 16 ) & 0xFF ) ^ \ + AES_RT3( ( Y2 >> 24 ) & 0xFF ); \ + \ + X2 = *RK++ ^ AES_RT0( ( Y2 ) & 0xFF ) ^ \ + AES_RT1( ( Y1 >> 8 ) & 0xFF ) ^ \ + AES_RT2( ( Y0 >> 16 ) & 0xFF ) ^ \ + AES_RT3( ( Y3 >> 24 ) & 0xFF ); \ + \ + X3 = *RK++ ^ AES_RT0( ( Y3 ) & 0xFF ) ^ \ + AES_RT1( ( Y2 >> 8 ) & 0xFF ) ^ \ + AES_RT2( ( Y1 >> 16 ) & 0xFF ) ^ \ + AES_RT3( ( Y0 >> 24 ) & 0xFF ); \ } /* diff --git a/library/version_features.c b/library/version_features.c index da47e3d75..1b06ff322 100644 --- a/library/version_features.c +++ b/library/version_features.c @@ -237,6 +237,9 @@ static const char *features[] = { #if defined(MBEDTLS_AES_ROM_TABLES) "MBEDTLS_AES_ROM_TABLES", #endif /* MBEDTLS_AES_ROM_TABLES */ +#if defined(MBEDTLS_AES_FEWER_TABLES) + "MBEDTLS_AES_FEWER_TABLES", +#endif /* MBEDTLS_AES_FEWER_TABLES */ #if defined(MBEDTLS_CAMELLIA_SMALL_MEMORY) "MBEDTLS_CAMELLIA_SMALL_MEMORY", #endif /* MBEDTLS_CAMELLIA_SMALL_MEMORY */ diff --git a/tests/scripts/all.sh b/tests/scripts/all.sh index 2dfd39e86..497a261c4 100755 --- a/tests/scripts/all.sh +++ b/tests/scripts/all.sh @@ -664,6 +664,34 @@ make msg "test: MBEDTLS_TEST_NULL_ENTROPY - main suites (inc. selftests) (ASan build)" make test +msg "build: default config with AES_FEWER_TABLES enabled" +cleanup +cp "$CONFIG_H" "$CONFIG_BAK" +scripts/config.pl set MBEDTLS_AES_FEWER_TABLES +make CC=gcc CFLAGS='-Werror -Wall -Wextra' + +msg "test: AES_FEWER_TABLES" +make test + +msg "build: default config with AES_ROM_TABLES enabled" +cleanup +cp "$CONFIG_H" "$CONFIG_BAK" +scripts/config.pl set MBEDTLS_AES_ROM_TABLES +make CC=gcc CFLAGS='-Werror -Wall -Wextra' + +msg "test: AES_ROM_TABLES" +make test + +msg "build: default config with AES_ROM_TABLES and AES_FEWER_TABLES enabled" +cleanup +cp "$CONFIG_H" "$CONFIG_BAK" +scripts/config.pl set MBEDTLS_AES_FEWER_TABLES +scripts/config.pl set MBEDTLS_AES_ROM_TABLES +make CC=gcc CFLAGS='-Werror -Wall -Wextra' + +msg "test: AES_FEWER_TABLES + AES_ROM_TABLES" +make test + if uname -a | grep -F Linux >/dev/null; then msg "build/test: make shared" # ~ 40s cleanup