diff --git a/library/aes.c b/library/aes.c
index 69d4eadfa..1f3d8ad91 100644
--- a/library/aes.c
+++ b/library/aes.c
@@ -962,6 +962,35 @@ int mbedtls_internal_aes_decrypt(mbedtls_aes_context *ctx,
 }
 #endif /* !MBEDTLS_AES_DECRYPT_ALT */
 
+#if defined(MBEDTLS_AESNI_HAVE_CODE) || \
+    (defined(MBEDTLS_PADLOCK_C) && defined(MBEDTLS_HAVE_X86))
+/* VIA Padlock and our intrinsics-based implementation of AESNI require
+ * the round keys to be aligned on a 16-byte boundary. We take care of this
+ * before creating them, but the AES context may have moved (this can happen
+ * if the library is called from a language with managed memory), and in later
+ * calls it might have a different alignment with respect to 16-byte memory.
+ * So we may need to realign.
+ */
+static void aes_maybe_realign(mbedtls_aes_context *ctx)
+{
+    /* We want a 16-byte alignment. Note that buf is a pointer to uint32_t
+     * and rk_offset is in units of uint32_t words = 4 bytes. We want a
+     * 4-word alignment. */
+    uintptr_t current_address = (uintptr_t) (ctx->buf + ctx->rk_offset);
+    unsigned current_alignment = (current_address & 0x0000000f) / 4;
+    if (current_alignment != 0) {
+        unsigned new_offset = ctx->rk_offset + 4 - current_alignment;
+        if (new_offset >= 4) {
+            new_offset -= 4;
+        }
+        memmove(ctx->buf + new_offset,     // new address
+                ctx->buf + ctx->rk_offset, // current address
+                (ctx->nr + 1) * 16);       // number of round keys * bytes per rk
+        ctx->rk_offset = new_offset;
+    }
+}
+#endif
+
 /*
  * AES-ECB block encryption/decryption
  */
@@ -976,6 +1005,7 @@ int mbedtls_aes_crypt_ecb(mbedtls_aes_context *ctx,
 
 #if defined(MBEDTLS_AESNI_HAVE_CODE)
     if (mbedtls_aesni_has_support(MBEDTLS_AESNI_AES)) {
+        aes_maybe_realign(ctx);
         return mbedtls_aesni_crypt_ecb(ctx, mode, input, output);
     }
 #endif
@@ -988,13 +1018,8 @@ int mbedtls_aes_crypt_ecb(mbedtls_aes_context *ctx,
 
 #if defined(MBEDTLS_PADLOCK_C) && defined(MBEDTLS_HAVE_X86)
     if (aes_padlock_ace > 0) {
-        if (mbedtls_padlock_xcryptecb(ctx, mode, input, output) == 0) {
-            return 0;
-        }
-
-        // If padlock data misaligned, we just fall back to
-        // unaccelerated mode
-        //
+        aes_maybe_realign(ctx);
+        return mbedtls_padlock_xcryptecb(ctx, mode, input, output);
     }
 #endif