diff --git a/.travis.yml b/.travis.yml index 1062d9906..cdb79d1aa 100644 --- a/.travis.yml +++ b/.travis.yml @@ -80,6 +80,58 @@ jobs: - sleep 5 - scripts/windows_msbuild.bat v141 # Visual Studio 2017 + - name: full configuration on arm64 + os: linux + dist: focal + arch: arm64 + addons: + apt: + packages: + - gcc + script: + # Do a manual build+test sequence rather than using all.sh, because + # there's no all.sh component that does what we want. We should set + # CFLAGS for arm64 host CC. + - scripts/config.py full + - scripts/config.py unset MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT + - scripts/config.py unset MBEDTLS_SHA512_USE_A64_CRYPTO_ONLY + - scripts/config.py unset MBEDTLS_SHA256_USE_A64_CRYPTO_IF_PRESENT + - scripts/config.py unset MBEDTLS_SHA256_USE_A64_CRYPTO_ONLY + - make generated_files + - make CFLAGS='-march=armv8-a+crypto -O3 -Werror -fsanitize=address,undefined -fno-sanitize-recover=all' LDFLAGS='-Werror -fsanitize=address,undefined -fno-sanitize-recover=all' + - make test + - programs/test/selftest + - tests/scripts/test_psa_constant_names.py + # Modern OpenSSL does not support fixed ECDH or null ciphers. + - tests/compat.sh -p OpenSSL -e 'NULL\|ECDH_' + - tests/scripts/travis-log-failure.sh + - tests/context-info.sh + + - name: full configuration(GnuTLS compat tests) on arm64 + os: linux + dist: focal + arch: arm64 + addons: + apt: + packages: + - clang + - gnutls-bin + script: + # Do a manual build+test sequence rather than using all.sh, because + # there's no all.sh component that does what we want. We should set + # CFLAGS for arm64 host CC. + - scripts/config.py full + - scripts/config.py unset MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT + - scripts/config.py unset MBEDTLS_SHA512_USE_A64_CRYPTO_ONLY + - scripts/config.py unset MBEDTLS_SHA256_USE_A64_CRYPTO_IF_PRESENT + - scripts/config.py unset MBEDTLS_SHA256_USE_A64_CRYPTO_ONLY + - make generated_files + - make CC=clang CFLAGS='-march=armv8-a+crypto -O3 -Werror -fsanitize=address,undefined -fno-sanitize-recover=all' LDFLAGS='-Werror -fsanitize=address,undefined -fno-sanitize-recover=all' + # GnuTLS supports CAMELLIA but compat.sh doesn't properly enable it. + - tests/compat.sh -p GnuTLS -e 'CAMELLIA' + - tests/scripts/travis-log-failure.sh + - tests/context-info.sh + after_failure: - tests/scripts/travis-log-failure.sh diff --git a/include/mbedtls/check_config.h b/include/mbedtls/check_config.h index 1efabdc1f..ac374d2a4 100644 --- a/include/mbedtls/check_config.h +++ b/include/mbedtls/check_config.h @@ -70,6 +70,10 @@ #error "MBEDTLS_AESNI_C defined, but not all prerequisites" #endif +#if defined(MBEDTLS_AESCE_C) && !defined(MBEDTLS_HAVE_ASM) +#error "MBEDTLS_AESCE_C defined, but not all prerequisites" +#endif + #if defined(MBEDTLS_CTR_DRBG_C) && !defined(MBEDTLS_AES_C) #error "MBEDTLS_CTR_DRBG_C defined, but not all prerequisites" #endif diff --git a/include/mbedtls/mbedtls_config.h b/include/mbedtls/mbedtls_config.h index b874995f4..5aff9c5b6 100644 --- a/include/mbedtls/mbedtls_config.h +++ b/include/mbedtls/mbedtls_config.h @@ -2065,6 +2065,34 @@ */ #define MBEDTLS_AESNI_C +/** + * \def MBEDTLS_AESCE_C + * + * Enable AES crypto extension support on Arm64. + * + * Module: library/aesce.c + * Caller: library/aes.c + * + * Requires: MBEDTLS_HAVE_ASM, MBEDTLS_AES_C + * + * \note The code uses Neon intrinsics, so \c CFLAGS must be set to a minimum + * of \c -march=armv8-a+crypto . + * + * \warning If the target architecture is set to something that includes the + * SHA3 feature (e.g. `-march=armv8.2-a+sha3`), for example because + * `MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT` is desired, compilers + * generate code for `MBEDTLS_AESCE_C` that includes instructions + * only present with the (optional) SHA3 feature. This will lead to an + * undefined instruction exception if the code is run on a CPU without + * that feature. + * + * \warning Runtime detection only works on linux. For non-linux operation + * system, crypto extension MUST be supported by CPU. + * + * This module adds support for the AES crypto instructions on Arm64 + */ +#define MBEDTLS_AESCE_C + /** * \def MBEDTLS_AES_C * diff --git a/library/CMakeLists.txt b/library/CMakeLists.txt index c9714bbfb..bef2e1c4b 100644 --- a/library/CMakeLists.txt +++ b/library/CMakeLists.txt @@ -13,6 +13,7 @@ endif() set(src_crypto aes.c aesni.c + aesce.c aria.c asn1parse.c asn1write.c diff --git a/library/Makefile b/library/Makefile index dd16d0615..ed5e1e172 100644 --- a/library/Makefile +++ b/library/Makefile @@ -78,6 +78,7 @@ endif OBJS_CRYPTO= \ aes.o \ aesni.o \ + aesce.o \ aria.o \ asn1parse.o \ asn1write.o \ diff --git a/library/aes.c b/library/aes.c index 566e74715..64392fc56 100644 --- a/library/aes.c +++ b/library/aes.c @@ -39,6 +39,9 @@ #if defined(MBEDTLS_AESNI_C) #include "aesni.h" #endif +#if defined(MBEDTLS_AESCE_C) +#include "aesce.h" +#endif #include "mbedtls/platform.h" @@ -544,6 +547,12 @@ int mbedtls_aes_setkey_enc(mbedtls_aes_context *ctx, const unsigned char *key, } #endif +#if defined(MBEDTLS_AESCE_C) && defined(MBEDTLS_HAVE_ARM64) + if (mbedtls_aesce_has_support()) { + return mbedtls_aesce_setkey_enc((unsigned char *) RK, key, keybits); + } +#endif + for (i = 0; i < (keybits >> 5); i++) { RK[i] = MBEDTLS_GET_UINT32_LE(key, i << 2); } @@ -652,6 +661,16 @@ int mbedtls_aes_setkey_dec(mbedtls_aes_context *ctx, const unsigned char *key, } #endif +#if defined(MBEDTLS_AESCE_C) && defined(MBEDTLS_HAVE_ARM64) + if (mbedtls_aesce_has_support()) { + mbedtls_aesce_inverse_key( + (unsigned char *) RK, + (const unsigned char *) (cty.buf + cty.rk_offset), + ctx->nr); + goto exit; + } +#endif + SK = cty.buf + cty.rk_offset + cty.nr * 4; *RK++ = *SK++; @@ -944,6 +963,12 @@ int mbedtls_aes_crypt_ecb(mbedtls_aes_context *ctx, } #endif +#if defined(MBEDTLS_AESCE_C) && defined(MBEDTLS_HAVE_ARM64) + if (mbedtls_aesce_has_support()) { + return mbedtls_aesce_crypt_ecb(ctx, mode, input, output); + } +#endif + #if defined(MBEDTLS_PADLOCK_C) && defined(MBEDTLS_HAVE_X86) if (aes_padlock_ace > 0) { if (mbedtls_padlock_xcryptecb(ctx, mode, input, output) == 0) { diff --git a/library/aesce.c b/library/aesce.c new file mode 100644 index 000000000..ee0c8e12c --- /dev/null +++ b/library/aesce.c @@ -0,0 +1,257 @@ +/* + * Arm64 crypto extension support functions + * + * Copyright The Mbed TLS Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "common.h" + +#if defined(MBEDTLS_AESCE_C) + +#include "aesce.h" + +#if defined(MBEDTLS_HAVE_ARM64) + +#if defined(__clang__) +# if __clang_major__ < 4 +# error "A more recent Clang is required for MBEDTLS_AESCE_C" +# endif +#elif defined(__GNUC__) +# if __GNUC__ < 6 +# error "A more recent GCC is required for MBEDTLS_AESCE_C" +# endif +#else +# error "Only GCC and Clang supported for MBEDTLS_AESCE_C" +#endif + +#if !defined(__ARM_FEATURE_CRYPTO) +# error "`crypto` feature moddifier MUST be enabled for MBEDTLS_AESCE_C." +# error "Typical option for GCC and Clang is `-march=armv8-a+crypto`." +#endif /* !__ARM_FEATURE_CRYPTO */ + +#include + +#if defined(__linux__) +#include +#include +#endif + +/* + * AES instruction support detection routine + */ +int mbedtls_aesce_has_support(void) +{ +#if defined(__linux__) + unsigned long auxval = getauxval(AT_HWCAP); + return (auxval & (HWCAP_ASIMD | HWCAP_AES)) == + (HWCAP_ASIMD | HWCAP_AES); +#else + /* Assume AES instructions are supported. */ + return 1; +#endif +} + +static uint8x16_t aesce_encrypt_block(uint8x16_t block, + unsigned char *keys, + int rounds) +{ + for (int i = 0; i < rounds - 1; i++) { + /* AES AddRoundKey, SubBytes, ShiftRows (in this order). + * AddRoundKey adds the round key for the previous round. */ + block = vaeseq_u8(block, vld1q_u8(keys + i * 16)); + /* AES mix columns */ + block = vaesmcq_u8(block); + } + + /* AES AddRoundKey for the previous round. + * SubBytes, ShiftRows for the final round. */ + block = vaeseq_u8(block, vld1q_u8(keys + (rounds -1) * 16)); + + /* Final round: no MixColumns */ + + /* Final AddRoundKey */ + block = veorq_u8(block, vld1q_u8(keys + rounds * 16)); + + return block; +} + +static uint8x16_t aesce_decrypt_block(uint8x16_t block, + unsigned char *keys, + int rounds) +{ + + for (int i = 0; i < rounds - 1; i++) { + /* AES AddRoundKey, SubBytes, ShiftRows */ + block = vaesdq_u8(block, vld1q_u8(keys + i * 16)); + /* AES inverse MixColumns for the next round. + * + * This means that we switch the order of the inverse AddRoundKey and + * inverse MixColumns operations. We have to do this as AddRoundKey is + * done in an atomic instruction together with the inverses of SubBytes + * and ShiftRows. + * + * It works because MixColumns is a linear operation over GF(2^8) and + * AddRoundKey is an exclusive or, which is equivalent to addition over + * GF(2^8). (The inverse of MixColumns needs to be applied to the + * affected round keys separately which has been done when the + * decryption round keys were calculated.) */ + block = vaesimcq_u8(block); + } + + /* The inverses of AES AddRoundKey, SubBytes, ShiftRows finishing up the + * last full round. */ + block = vaesdq_u8(block, vld1q_u8(keys + (rounds - 1) * 16)); + + /* Inverse AddRoundKey for inverting the initial round key addition. */ + block = veorq_u8(block, vld1q_u8(keys + rounds * 16)); + + return block; +} + +/* + * AES-ECB block en(de)cryption + */ +int mbedtls_aesce_crypt_ecb(mbedtls_aes_context *ctx, + int mode, + const unsigned char input[16], + unsigned char output[16]) +{ + uint8x16_t block = vld1q_u8(&input[0]); + unsigned char *keys = (unsigned char *) (ctx->buf + ctx->rk_offset); + + if (mode == MBEDTLS_AES_ENCRYPT) { + block = aesce_encrypt_block(block, keys, ctx->nr); + } else { + block = aesce_decrypt_block(block, keys, ctx->nr); + } + vst1q_u8(&output[0], block); + + return 0; +} + +/* + * Compute decryption round keys from encryption round keys + */ +void mbedtls_aesce_inverse_key(unsigned char *invkey, + const unsigned char *fwdkey, + int nr) +{ + int i, j; + j = nr; + vst1q_u8(invkey, vld1q_u8(fwdkey + j * 16)); + for (i = 1, j--; j > 0; i++, j--) { + vst1q_u8(invkey + i * 16, + vaesimcq_u8(vld1q_u8(fwdkey + j * 16))); + } + vst1q_u8(invkey + i * 16, vld1q_u8(fwdkey + j * 16)); + +} + +static inline uint32_t aes_rot_word(uint32_t word) +{ + return (word << (32 - 8)) | (word >> 8); +} + +static inline uint32_t aes_sub_word(uint32_t in) +{ + uint8x16_t v = vreinterpretq_u8_u32(vdupq_n_u32(in)); + uint8x16_t zero = vdupq_n_u8(0); + + /* vaeseq_u8 does both SubBytes and ShiftRows. Taking the first row yields + * the correct result as ShiftRows doesn't change the first row. */ + v = vaeseq_u8(zero, v); + return vgetq_lane_u32(vreinterpretq_u32_u8(v), 0); +} + +/* + * Key expansion function + */ +static void aesce_setkey_enc(unsigned char *rk, + const unsigned char *key, + const size_t key_bit_length) +{ + static uint8_t const rcon[] = { 0x01, 0x02, 0x04, 0x08, 0x10, + 0x20, 0x40, 0x80, 0x1b, 0x36 }; + /* See https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.197.pdf + * - Section 5, Nr = Nk + 6 + * - Section 5.2, the key expansion size is Nb*(Nr+1) + */ + const uint32_t key_len_in_words = key_bit_length / 32; /* Nk */ + const size_t round_key_len_in_words = 4; /* Nb */ + const size_t round_keys_needed = key_len_in_words + 6; /* Nr */ + const size_t key_expansion_size_in_words = + round_key_len_in_words * (round_keys_needed + 1); /* Nb*(Nr+1) */ + const uint32_t *rko_end = (uint32_t *) rk + key_expansion_size_in_words; + + memcpy(rk, key, key_len_in_words * 4); + + for (uint32_t *rki = (uint32_t *) rk; + rki + key_len_in_words < rko_end; + rki += key_len_in_words) { + + size_t iteration = (rki - (uint32_t *) rk) / key_len_in_words; + uint32_t *rko; + rko = rki + key_len_in_words; + rko[0] = aes_rot_word(aes_sub_word(rki[key_len_in_words - 1])); + rko[0] ^= rcon[iteration] ^ rki[0]; + rko[1] = rko[0] ^ rki[1]; + rko[2] = rko[1] ^ rki[2]; + rko[3] = rko[2] ^ rki[3]; + if (rko + key_len_in_words > rko_end) { + /* Do not write overflow words.*/ + continue; + } + switch (key_bit_length) { + case 128: + break; + case 192: + rko[4] = rko[3] ^ rki[4]; + rko[5] = rko[4] ^ rki[5]; + break; + case 256: + rko[4] = aes_sub_word(rko[3]) ^ rki[4]; + rko[5] = rko[4] ^ rki[5]; + rko[6] = rko[5] ^ rki[6]; + rko[7] = rko[6] ^ rki[7]; + break; + } + } +} + +/* + * Key expansion, wrapper + */ +int mbedtls_aesce_setkey_enc(unsigned char *rk, + const unsigned char *key, + size_t bits) +{ + switch (bits) { + case 128: + case 192: + case 256: + aesce_setkey_enc(rk, key, bits); + break; + default: + return MBEDTLS_ERR_AES_INVALID_KEY_LENGTH; + } + + return 0; +} + +#endif /* MBEDTLS_HAVE_ARM64 */ + +#endif /* MBEDTLS_AESCE_C */ diff --git a/library/aesce.h b/library/aesce.h new file mode 100644 index 000000000..da4244699 --- /dev/null +++ b/library/aesce.h @@ -0,0 +1,98 @@ +/** + * \file aesce.h + * + * \brief AES-CE for hardware AES acceleration on ARMv8 processors with crypto + * extension. + * + * \warning These functions are only for internal use by other library + * functions; you must not call them directly. + */ +/* + * Copyright The Mbed TLS Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MBEDTLS_AESCE_H +#define MBEDTLS_AESCE_H + +#include "mbedtls/build_info.h" + +#include "mbedtls/aes.h" + + +#if defined(MBEDTLS_HAVE_ASM) && defined(__GNUC__) && \ + defined(__aarch64__) && !defined(MBEDTLS_HAVE_ARM64) +#define MBEDTLS_HAVE_ARM64 +#endif + +#if defined(MBEDTLS_HAVE_ARM64) + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * \brief Internal function to detect the crypto extension in CPUs. + * + * \return 1 if CPU has support for the feature, 0 otherwise + */ +int mbedtls_aesce_has_support(void); + +/** + * \brief Internal AES-ECB block encryption and decryption + * + * \param ctx AES context + * \param mode MBEDTLS_AES_ENCRYPT or MBEDTLS_AES_DECRYPT + * \param input 16-byte input block + * \param output 16-byte output block + * + * \return 0 on success (cannot fail) + */ +int mbedtls_aesce_crypt_ecb(mbedtls_aes_context *ctx, + int mode, + const unsigned char input[16], + unsigned char output[16]); + +/** + * \brief Internal round key inversion. This function computes + * decryption round keys from the encryption round keys. + * + * \param invkey Round keys for the equivalent inverse cipher + * \param fwdkey Original round keys (for encryption) + * \param nr Number of rounds (that is, number of round keys minus one) + */ +void mbedtls_aesce_inverse_key(unsigned char *invkey, + const unsigned char *fwdkey, + int nr); + +/** + * \brief Internal key expansion for encryption + * + * \param rk Destination buffer where the round keys are written + * \param key Encryption key + * \param bits Key size in bits (must be 128, 192 or 256) + * + * \return 0 if successful, or MBEDTLS_ERR_AES_INVALID_KEY_LENGTH + */ +int mbedtls_aesce_setkey_enc(unsigned char *rk, + const unsigned char *key, + size_t bits); + +#ifdef __cplusplus +} +#endif + +#endif /* MBEDTLS_HAVE_ARM64 */ + +#endif /* MBEDTLS_AESCE_H */ diff --git a/tests/scripts/all.sh b/tests/scripts/all.sh index 7d91fa27d..353ec6907 100755 --- a/tests/scripts/all.sh +++ b/tests/scripts/all.sh @@ -1972,6 +1972,7 @@ component_build_module_alt () { # aesni.c and padlock.c reference mbedtls_aes_context fields directly. scripts/config.py unset MBEDTLS_AESNI_C scripts/config.py unset MBEDTLS_PADLOCK_C + scripts/config.py unset MBEDTLS_AESCE_C # MBEDTLS_ECP_RESTARTABLE is documented as incompatible. scripts/config.py unset MBEDTLS_ECP_RESTARTABLE # You can only have one threading implementation: alt or pthread, not both. @@ -3341,6 +3342,7 @@ component_test_have_int32 () { scripts/config.py unset MBEDTLS_HAVE_ASM scripts/config.py unset MBEDTLS_AESNI_C scripts/config.py unset MBEDTLS_PADLOCK_C + scripts/config.py unset MBEDTLS_AESCE_C make CC=gcc CFLAGS='-Werror -Wall -Wextra -DMBEDTLS_HAVE_INT32' msg "test: gcc, force 32-bit bignum limbs" @@ -3352,6 +3354,7 @@ component_test_have_int64 () { scripts/config.py unset MBEDTLS_HAVE_ASM scripts/config.py unset MBEDTLS_AESNI_C scripts/config.py unset MBEDTLS_PADLOCK_C + scripts/config.py unset MBEDTLS_AESCE_C make CC=gcc CFLAGS='-Werror -Wall -Wextra -DMBEDTLS_HAVE_INT64' msg "test: gcc, force 64-bit bignum limbs"