diff --git a/library/aes.c b/library/aes.c index 69d4eadfa..1f3d8ad91 100644 --- a/library/aes.c +++ b/library/aes.c @@ -962,6 +962,35 @@ int mbedtls_internal_aes_decrypt(mbedtls_aes_context *ctx, } #endif /* !MBEDTLS_AES_DECRYPT_ALT */ +#if defined(MBEDTLS_AESNI_HAVE_CODE) || \ + (defined(MBEDTLS_PADLOCK_C) && defined(MBEDTLS_HAVE_X86)) +/* VIA Padlock and our intrinsics-based implementation of AESNI require + * the round keys to be aligned on a 16-byte boundary. We take care of this + * before creating them, but the AES context may have moved (this can happen + * if the library is called from a language with managed memory), and in later + * calls it might have a different alignment with respect to 16-byte memory. + * So we may need to realign. + */ +static void aes_maybe_realign(mbedtls_aes_context *ctx) +{ + /* We want a 16-byte alignment. Note that buf is a pointer to uint32_t + * and rk_offset is in units of uint32_t words = 4 bytes. We want a + * 4-word alignment. */ + uintptr_t current_address = (uintptr_t) (ctx->buf + ctx->rk_offset); + unsigned current_alignment = (current_address & 0x0000000f) / 4; + if (current_alignment != 0) { + unsigned new_offset = ctx->rk_offset + 4 - current_alignment; + if (new_offset >= 4) { + new_offset -= 4; + } + memmove(ctx->buf + new_offset, // new address + ctx->buf + ctx->rk_offset, // current address + (ctx->nr + 1) * 16); // number of round keys * bytes per rk + ctx->rk_offset = new_offset; + } +} +#endif + /* * AES-ECB block encryption/decryption */ @@ -976,6 +1005,7 @@ int mbedtls_aes_crypt_ecb(mbedtls_aes_context *ctx, #if defined(MBEDTLS_AESNI_HAVE_CODE) if (mbedtls_aesni_has_support(MBEDTLS_AESNI_AES)) { + aes_maybe_realign(ctx); return mbedtls_aesni_crypt_ecb(ctx, mode, input, output); } #endif @@ -988,13 +1018,8 @@ int mbedtls_aes_crypt_ecb(mbedtls_aes_context *ctx, #if defined(MBEDTLS_PADLOCK_C) && defined(MBEDTLS_HAVE_X86) if (aes_padlock_ace > 0) { - if (mbedtls_padlock_xcryptecb(ctx, mode, input, output) == 0) { - return 0; - } - - // If padlock data misaligned, we just fall back to - // unaccelerated mode - // + aes_maybe_realign(ctx); + return mbedtls_padlock_xcryptecb(ctx, mode, input, output); } #endif