Improve readability of unrolled AESCE code

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
This commit is contained in:
Dave Rodgman 2023-06-16 09:36:50 +01:00
parent 2dd15b3ab5
commit 48fd2ab5d5
2 changed files with 56 additions and 97 deletions

View file

@ -101,59 +101,36 @@ int mbedtls_aesce_has_support(void)
#endif
}
/* Single round of AESCE encryption */
#define AESCE_ENCRYPT_ROUND \
block = vaeseq_u8(block, vld1q_u8(keys)); \
block = vaesmcq_u8(block); \
keys += 16
/* Two rounds of AESCE encryption */
#define AESCE_ENCRYPT_ROUND_X2 AESCE_ENCRYPT_ROUND; AESCE_ENCRYPT_ROUND
MBEDTLS_OPTIMIZE_ALWAYS
static uint8x16_t aesce_encrypt_block(uint8x16_t block,
unsigned char *keys,
int rounds)
{
/* Assume either 10, 12 or 14 rounds */
/* Assume either 10, 12 or 14 rounds.
* Skip 4 or 2 rounds, if doing 10 or 12 rounds */
if (rounds == 10) {
goto rounds_10;
}
if (rounds == 12) {
goto rounds_12;
}
block = vaeseq_u8(block, vld1q_u8(keys));
block = vaesmcq_u8(block);
keys += 16;
block = vaeseq_u8(block, vld1q_u8(keys));
block = vaesmcq_u8(block);
keys += 16;
AESCE_ENCRYPT_ROUND_X2;
rounds_12:
block = vaeseq_u8(block, vld1q_u8(keys));
block = vaesmcq_u8(block);
keys += 16;
block = vaeseq_u8(block, vld1q_u8(keys));
block = vaesmcq_u8(block);
keys += 16;
AESCE_ENCRYPT_ROUND_X2;
rounds_10:
block = vaeseq_u8(block, vld1q_u8(keys));
block = vaesmcq_u8(block);
keys += 16;
block = vaeseq_u8(block, vld1q_u8(keys));
block = vaesmcq_u8(block);
keys += 16;
block = vaeseq_u8(block, vld1q_u8(keys));
block = vaesmcq_u8(block);
keys += 16;
block = vaeseq_u8(block, vld1q_u8(keys));
block = vaesmcq_u8(block);
keys += 16;
block = vaeseq_u8(block, vld1q_u8(keys));
block = vaesmcq_u8(block);
keys += 16;
block = vaeseq_u8(block, vld1q_u8(keys));
block = vaesmcq_u8(block);
keys += 16;
block = vaeseq_u8(block, vld1q_u8(keys));
block = vaesmcq_u8(block);
keys += 16;
block = vaeseq_u8(block, vld1q_u8(keys));
block = vaesmcq_u8(block);
keys += 16;
block = vaeseq_u8(block, vld1q_u8(keys));
block = vaesmcq_u8(block);
keys += 16;
AESCE_ENCRYPT_ROUND_X2;
AESCE_ENCRYPT_ROUND_X2;
AESCE_ENCRYPT_ROUND_X2;
AESCE_ENCRYPT_ROUND_X2;
AESCE_ENCRYPT_ROUND;
/* AES AddRoundKey for the previous round.
* SubBytes, ShiftRows for the final round. */
@ -168,74 +145,56 @@ rounds_10:
return block;
}
/* Single round of AESCE decryption
*
* AES AddRoundKey, SubBytes, ShiftRows
*
* block = vaesdq_u8(block, vld1q_u8(keys));
*
* AES inverse MixColumns for the next round.
*
* This means that we switch the order of the inverse AddRoundKey and
* inverse MixColumns operations. We have to do this as AddRoundKey is
* done in an atomic instruction together with the inverses of SubBytes
* and ShiftRows.
*
* It works because MixColumns is a linear operation over GF(2^8) and
* AddRoundKey is an exclusive or, which is equivalent to addition over
* GF(2^8). (The inverse of MixColumns needs to be applied to the
* affected round keys separately which has been done when the
* decryption round keys were calculated.)
*
* block = vaesimcq_u8(block);
*/
#define AESCE_DECRYPT_ROUND \
block = vaesdq_u8(block, vld1q_u8(keys)); \
block = vaesimcq_u8(block); \
keys += 16
/* Two rounds of AESCE decryption */
#define AESCE_DECRYPT_ROUND_X2 AESCE_DECRYPT_ROUND; AESCE_DECRYPT_ROUND
MBEDTLS_OPTIMIZE_ALWAYS
static uint8x16_t aesce_decrypt_block(uint8x16_t block,
unsigned char *keys,
int rounds)
{
/* Assume either 10, 12 or 14 rounds */
/* Assume either 10, 12 or 14 rounds.
* Skip 4 or 2 rounds, if doing 10 or 12 rounds */
if (rounds == 10) {
goto rounds_10;
}
if (rounds == 12) {
goto rounds_12;
}
/* AES AddRoundKey, SubBytes, ShiftRows */
block = vaesdq_u8(block, vld1q_u8(keys));
/* AES inverse MixColumns for the next round.
*
* This means that we switch the order of the inverse AddRoundKey and
* inverse MixColumns operations. We have to do this as AddRoundKey is
* done in an atomic instruction together with the inverses of SubBytes
* and ShiftRows.
*
* It works because MixColumns is a linear operation over GF(2^8) and
* AddRoundKey is an exclusive or, which is equivalent to addition over
* GF(2^8). (The inverse of MixColumns needs to be applied to the
* affected round keys separately which has been done when the
* decryption round keys were calculated.) */
block = vaesimcq_u8(block);
keys += 16;
block = vaesdq_u8(block, vld1q_u8(keys));
block = vaesimcq_u8(block);
keys += 16;
AESCE_DECRYPT_ROUND_X2;
rounds_12:
block = vaesdq_u8(block, vld1q_u8(keys));
block = vaesimcq_u8(block);
keys += 16;
block = vaesdq_u8(block, vld1q_u8(keys));
block = vaesimcq_u8(block);
keys += 16;
AESCE_DECRYPT_ROUND_X2;
rounds_10:
block = vaesdq_u8(block, vld1q_u8(keys));
block = vaesimcq_u8(block);
keys += 16;
block = vaesdq_u8(block, vld1q_u8(keys));
block = vaesimcq_u8(block);
keys += 16;
block = vaesdq_u8(block, vld1q_u8(keys));
block = vaesimcq_u8(block);
keys += 16;
block = vaesdq_u8(block, vld1q_u8(keys));
block = vaesimcq_u8(block);
keys += 16;
block = vaesdq_u8(block, vld1q_u8(keys));
block = vaesimcq_u8(block);
keys += 16;
block = vaesdq_u8(block, vld1q_u8(keys));
block = vaesimcq_u8(block);
keys += 16;
block = vaesdq_u8(block, vld1q_u8(keys));
block = vaesimcq_u8(block);
keys += 16;
block = vaesdq_u8(block, vld1q_u8(keys));
block = vaesimcq_u8(block);
keys += 16;
block = vaesdq_u8(block, vld1q_u8(keys));
block = vaesimcq_u8(block);
keys += 16;
AESCE_DECRYPT_ROUND_X2;
AESCE_DECRYPT_ROUND_X2;
AESCE_DECRYPT_ROUND_X2;
AESCE_DECRYPT_ROUND_X2;
AESCE_DECRYPT_ROUND;
/* The inverses of AES AddRoundKey, SubBytes, ShiftRows finishing up the
* last full round. */

View file

@ -52,8 +52,8 @@ int mbedtls_aesce_has_support(void);
/**
* \brief Internal AES-ECB block encryption and decryption
*
* Note: this assumes that the context specifies either 10, 12 or 14 rounds
* and will behave incorrectly if this is not the case.
* \warning This assumes that the context specifies either 10, 12 or 14
* rounds and will behave incorrectly if this is not the case.
*
* \param ctx AES context
* \param mode MBEDTLS_AES_ENCRYPT or MBEDTLS_AES_DECRYPT