Merge pull request #8326 from daverodgman/aesce-thumb2

Support hw-accelerated AES on Thumb and Arm
This commit is contained in:
Dave Rodgman 2023-11-27 09:58:58 +00:00 committed by GitHub
commit 9fbac381e6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 219 additions and 32 deletions

View file

@ -0,0 +1,3 @@
Features
* Support use of Armv8-A Cryptographic Extensions for hardware acclerated
AES when compiling for Thumb (T32) or 32-bit Arm (A32).

View file

@ -2238,7 +2238,7 @@
/**
* \def MBEDTLS_AESCE_C
*
* Enable AES cryptographic extension support on 64-bit Arm.
* Enable AES cryptographic extension support on Armv8.
*
* Module: library/aesce.c
* Caller: library/aes.c
@ -2249,13 +2249,15 @@
* system, Armv8-A Cryptographic Extensions must be supported by
* the CPU when this option is enabled.
*
* \note Minimum compiler versions for this feature are Clang 4.0,
* armclang 6.6, GCC 6.0 or MSVC 2019 version 16.11.2.
* \note Minimum compiler versions for this feature when targeting aarch64
* are Clang 4.0; armclang 6.6; GCC 6.0; or MSVC 2019 version 16.11.2.
* Minimum compiler versions for this feature when targeting 32-bit
* Arm or Thumb are Clang 11.0; armclang 6.20; or GCC 6.0.
*
* \note \c CFLAGS must be set to a minimum of \c -march=armv8-a+crypto for
* armclang <= 6.9
*
* This module adds support for the AES Armv8-A Cryptographic Extensions on Aarch64 systems.
* This module adds support for the AES Armv8-A Cryptographic Extensions on Armv8 systems.
*/
#define MBEDTLS_AESCE_C

View file

@ -23,7 +23,7 @@
#include "mbedtls/error.h"
#if defined(MBEDTLS_AES_USE_HARDWARE_ONLY)
#if !((defined(MBEDTLS_ARCH_IS_ARM64) && defined(MBEDTLS_AESCE_C)) || \
#if !((defined(MBEDTLS_ARCH_IS_ARMV8_A) && defined(MBEDTLS_AESCE_C)) || \
(defined(MBEDTLS_ARCH_IS_X64) && defined(MBEDTLS_AESNI_C)) || \
(defined(MBEDTLS_ARCH_IS_X86) && defined(MBEDTLS_AESNI_C)))
#error "MBEDTLS_AES_USE_HARDWARE_ONLY defined, but not all prerequisites"

View file

@ -5,8 +5,17 @@
* SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
*/
#if defined(__aarch64__) && !defined(__ARM_FEATURE_CRYPTO) && \
defined(__clang__) && __clang_major__ >= 4
#if defined(__clang__) && (__clang_major__ >= 4)
/* Ideally, we would simply use MBEDTLS_ARCH_IS_ARMV8_A in the following #if,
* but that is defined by build_info.h, and we need this block to happen first. */
#if defined(__ARM_ARCH)
#if __ARM_ARCH >= 8
#define MBEDTLS_AESCE_ARCH_IS_ARMV8_A
#endif
#endif
#if defined(MBEDTLS_AESCE_ARCH_IS_ARMV8_A) && !defined(__ARM_FEATURE_CRYPTO)
/* TODO: Re-consider above after https://reviews.llvm.org/D131064 merged.
*
* The intrinsic declaration are guarded by predefined ACLE macros in clang:
@ -27,6 +36,8 @@
#define MBEDTLS_ENABLE_ARM_CRYPTO_EXTENSIONS_COMPILER_FLAG
#endif
#endif /* defined(__clang__) && (__clang_major__ >= 4) */
#include <string.h>
#include "common.h"
@ -34,12 +45,14 @@
#include "aesce.h"
#if defined(MBEDTLS_ARCH_IS_ARM64)
#if defined(MBEDTLS_ARCH_IS_ARMV8_A) && defined(__ARM_NEON)
/* Compiler version checks. */
#if defined(__clang__)
# if __clang_major__ < 4
# error "Minimum version of Clang for MBEDTLS_AESCE_C is 4.0."
# if defined(MBEDTLS_ARCH_IS_ARM32) && (__clang_major__ < 11)
# error "Minimum version of Clang for MBEDTLS_AESCE_C on 32-bit Arm or Thumb is 11.0."
# elif defined(MBEDTLS_ARCH_IS_ARM64) && (__clang_major__ < 4)
# error "Minimum version of Clang for MBEDTLS_AESCE_C on aarch64 is 4.0."
# endif
#elif defined(__GNUC__)
# if __GNUC__ < 6
@ -52,6 +65,15 @@
# if _MSC_VER < 1929
# error "Minimum version of MSVC for MBEDTLS_AESCE_C is 2019 version 16.11.2."
# endif
#elif defined(__ARMCC_VERSION)
# if defined(MBEDTLS_ARCH_IS_ARM32) && (__ARMCC_VERSION < 6200002)
/* TODO: We haven't verified armclang for 32-bit Arm/Thumb prior to 6.20.
* If someone verified that, please update this and document of
* `MBEDTLS_AESCE_C` in `mbedtls_config.h`. */
# error "Minimum version of armclang for MBEDTLS_AESCE_C on 32-bit Arm is 6.20."
# elif defined(MBEDTLS_ARCH_IS_ARM64) && (__ARMCC_VERSION < 6060000)
# error "Minimum version of armclang for MBEDTLS_AESCE_C on aarch64 is 6.6."
# endif
#endif
#ifdef __ARM_NEON
@ -84,8 +106,19 @@
#if defined(__linux__) && !defined(MBEDTLS_AES_USE_HARDWARE_ONLY)
#include <asm/hwcap.h>
#include <sys/auxv.h>
#if !defined(HWCAP_NEON)
#define HWCAP_NEON (1 << 12)
#endif
#if !defined(HWCAP2_AES)
#define HWCAP2_AES (1 << 0)
#endif
#if !defined(HWCAP_AES)
#define HWCAP_AES (1 << 3)
#endif
#if !defined(HWCAP_ASIMD)
#define HWCAP_ASIMD (1 << 1)
#endif
signed char mbedtls_aesce_has_support_result = -1;
@ -102,6 +135,16 @@ int mbedtls_aesce_has_support_impl(void)
* once, but that is harmless.
*/
if (mbedtls_aesce_has_support_result == -1) {
#if defined(MBEDTLS_ARCH_IS_ARM32)
unsigned long auxval = getauxval(AT_HWCAP);
unsigned long auxval2 = getauxval(AT_HWCAP2);
if (((auxval & HWCAP_NEON) == HWCAP_NEON) &&
((auxval2 & HWCAP2_AES) == HWCAP2_AES)) {
mbedtls_aesce_has_support_result = 1;
} else {
mbedtls_aesce_has_support_result = 0;
}
#else
unsigned long auxval = getauxval(AT_HWCAP);
if ((auxval & (HWCAP_ASIMD | HWCAP_AES)) ==
(HWCAP_ASIMD | HWCAP_AES)) {
@ -109,6 +152,7 @@ int mbedtls_aesce_has_support_impl(void)
} else {
mbedtls_aesce_has_support_result = 0;
}
#endif
}
return mbedtls_aesce_has_support_result;
}
@ -362,24 +406,91 @@ int mbedtls_aesce_setkey_enc(unsigned char *rk,
#if defined(MBEDTLS_GCM_C)
#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ == 5
#if defined(MBEDTLS_ARCH_IS_ARM32)
#if defined(__clang__)
/* On clang for A32/T32, work around some missing intrinsics and types which are listed in
* [ACLE](https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#polynomial-1)
* These are only required for GCM.
*/
#define vreinterpretq_u64_p64(a) ((uint64x2_t) a)
typedef uint8x16_t poly128_t;
static inline poly128_t vmull_p64(poly64_t a, poly64_t b)
{
poly128_t r;
asm ("vmull.p64 %[r], %[a], %[b]" : [r] "=w" (r) : [a] "w" (a), [b] "w" (b) :);
return r;
}
/* This is set to cause some more missing intrinsics to be defined below */
#define COMMON_MISSING_INTRINSICS
static inline poly128_t vmull_high_p64(poly64x2_t a, poly64x2_t b)
{
return vmull_p64((poly64_t) (vget_high_u64((uint64x2_t) a)),
(poly64_t) (vget_high_u64((uint64x2_t) b)));
}
#endif /* defined(__clang__) */
static inline uint8x16_t vrbitq_u8(uint8x16_t x)
{
/* There is no vrbitq_u8 instruction in A32/T32, so provide
* an equivalent non-Neon implementation. Reverse bit order in each
* byte with 4x rbit, rev. */
asm ("ldm %[p], { r2-r5 } \n\t"
"rbit r2, r2 \n\t"
"rev r2, r2 \n\t"
"rbit r3, r3 \n\t"
"rev r3, r3 \n\t"
"rbit r4, r4 \n\t"
"rev r4, r4 \n\t"
"rbit r5, r5 \n\t"
"rev r5, r5 \n\t"
"stm %[p], { r2-r5 } \n\t"
:
/* Output: 16 bytes of memory pointed to by &x */
"+m" (*(uint8_t(*)[16]) &x)
:
[p] "r" (&x)
:
"r2", "r3", "r4", "r5"
);
return x;
}
#endif /* defined(MBEDTLS_ARCH_IS_ARM32) */
#if defined(MBEDTLS_COMPILER_IS_GCC) && __GNUC__ == 5
/* Some intrinsics are not available for GCC 5.X. */
#define COMMON_MISSING_INTRINSICS
#endif /* MBEDTLS_COMPILER_IS_GCC && __GNUC__ == 5 */
#if defined(COMMON_MISSING_INTRINSICS)
/* Missing intrinsics common to both GCC 5, and Clang on 32-bit */
#define vreinterpretq_p64_u8(a) ((poly64x2_t) a)
#define vreinterpretq_u8_p128(a) ((uint8x16_t) a)
static inline poly64_t vget_low_p64(poly64x2_t __a)
static inline poly64x1_t vget_low_p64(poly64x2_t a)
{
uint64x2_t tmp = (uint64x2_t) (__a);
uint64x1_t lo = vcreate_u64(vgetq_lane_u64(tmp, 0));
return (poly64_t) (lo);
uint64x1_t r = vget_low_u64(vreinterpretq_u64_p64(a));
return (poly64x1_t) r;
}
#endif /* !__clang__ && __GNUC__ && __GNUC__ == 5*/
#endif /* COMMON_MISSING_INTRINSICS */
/* vmull_p64/vmull_high_p64 wrappers.
*
* Older compilers miss some intrinsic functions for `poly*_t`. We use
* uint8x16_t and uint8x16x3_t as input/output parameters.
*/
#if defined(__GNUC__) && !defined(__clang__)
#if defined(MBEDTLS_COMPILER_IS_GCC)
/* GCC reports incompatible type error without cast. GCC think poly64_t and
* poly64x1_t are different, that is different with MSVC and Clang. */
#define MBEDTLS_VMULL_P64(a, b) vmull_p64((poly64_t) a, (poly64_t) b)
@ -388,7 +499,8 @@ static inline poly64_t vget_low_p64(poly64x2_t __a)
* error with/without cast. And I think poly64_t and poly64x1_t are same, no
* cast for clang also. */
#define MBEDTLS_VMULL_P64(a, b) vmull_p64(a, b)
#endif
#endif /* MBEDTLS_COMPILER_IS_GCC */
static inline uint8x16_t pmull_low(uint8x16_t a, uint8x16_t b)
{
@ -464,7 +576,7 @@ static inline uint8x16_t poly_mult_reduce(uint8x16x3_t input)
/* use 'asm' as an optimisation barrier to prevent loading MODULO from
* memory. It is for GNUC compatible compilers.
*/
asm ("" : "+w" (r));
asm volatile ("" : "+w" (r));
#endif
uint8x16_t const MODULO = vreinterpretq_u8_u64(vshrq_n_u64(r, 64 - 8));
uint8x16_t h, m, l; /* input high/middle/low 128b */
@ -507,6 +619,6 @@ void mbedtls_aesce_gcm_mult(unsigned char c[16],
#undef MBEDTLS_POP_TARGET_PRAGMA
#endif
#endif /* MBEDTLS_ARCH_IS_ARM64 */
#endif /* MBEDTLS_ARCH_IS_ARMV8_A */
#endif /* MBEDTLS_AESCE_C */

View file

@ -2,7 +2,7 @@
* \file aesce.h
*
* \brief Support hardware AES acceleration on Armv8-A processors with
* the Armv8-A Cryptographic Extension in AArch64 execution state.
* the Armv8-A Cryptographic Extension.
*
* \warning These functions are only for internal use by other library
* functions; you must not call them directly.
@ -19,7 +19,7 @@
#include "mbedtls/aes.h"
#if defined(MBEDTLS_AESCE_C) && defined(MBEDTLS_ARCH_IS_ARM64)
#if defined(MBEDTLS_AESCE_C) && defined(MBEDTLS_ARCH_IS_ARMV8_A) && defined(__ARM_NEON)
#define MBEDTLS_AESCE_HAVE_CODE
@ -118,6 +118,12 @@ int mbedtls_aesce_setkey_enc(unsigned char *rk,
}
#endif
#endif /* MBEDTLS_AESCE_C && MBEDTLS_ARCH_IS_ARM64 */
#else
#if defined(MBEDTLS_AES_USE_HARDWARE_ONLY) && defined(MBEDTLS_ARCH_IS_ARMV8_A)
#error "AES hardware acceleration not supported on this platform"
#endif
#endif /* MBEDTLS_AESCE_C && MBEDTLS_ARCH_IS_ARMV8_A && __ARM_NEON */
#endif /* MBEDTLS_AESCE_H */

View file

@ -4665,6 +4665,63 @@ component_build_aes_aesce_armcc () {
armc6_build_test "-O1 --target=aarch64-arm-none-eabi -march=armv8-a+crypto"
}
support_build_aes_armce() {
# clang >= 4 is required to build with AES extensions
ver="$(clang --version|grep version|sed -E 's#.*version ([0-9]+).*#\1#')"
[ "${ver}" -ge 11 ]
}
component_build_aes_armce () {
# Test variations of AES with Armv8 crypto extensions
scripts/config.py set MBEDTLS_AESCE_C
scripts/config.py set MBEDTLS_AES_USE_HARDWARE_ONLY
msg "MBEDTLS_AES_USE_HARDWARE_ONLY, clang, aarch64"
make -B library/aesce.o CC=clang CFLAGS="--target=aarch64-linux-gnu -march=armv8-a+crypto"
msg "MBEDTLS_AES_USE_HARDWARE_ONLY, clang, arm"
make -B library/aesce.o CC=clang CFLAGS="--target=arm-linux-gnueabihf -mcpu=cortex-a72+crypto -marm"
msg "MBEDTLS_AES_USE_HARDWARE_ONLY, clang, thumb"
make -B library/aesce.o CC=clang CFLAGS="--target=arm-linux-gnueabihf -mcpu=cortex-a32+crypto -mthumb"
scripts/config.py unset MBEDTLS_AES_USE_HARDWARE_ONLY
msg "no MBEDTLS_AES_USE_HARDWARE_ONLY, clang, aarch64"
make -B library/aesce.o CC=clang CFLAGS="--target=aarch64-linux-gnu -march=armv8-a+crypto"
msg "no MBEDTLS_AES_USE_HARDWARE_ONLY, clang, arm"
make -B library/aesce.o CC=clang CFLAGS="--target=arm-linux-gnueabihf -mcpu=cortex-a72+crypto -marm"
msg "no MBEDTLS_AES_USE_HARDWARE_ONLY, clang, thumb"
make -B library/aesce.o CC=clang CFLAGS="--target=arm-linux-gnueabihf -mcpu=cortex-a32+crypto -mthumb"
# test for presence of AES instructions
scripts/config.py set MBEDTLS_AES_USE_HARDWARE_ONLY
msg "clang, test A32 crypto instructions built"
make -B library/aesce.o CC=clang CFLAGS="--target=arm-linux-gnueabihf -mcpu=cortex-a72+crypto -marm -S"
grep -E 'aes[0-9a-z]+.[0-9]\s*[qv]' library/aesce.o
msg "clang, test T32 crypto instructions built"
make -B library/aesce.o CC=clang CFLAGS="--target=arm-linux-gnueabihf -mcpu=cortex-a32+crypto -mthumb -S"
grep -E 'aes[0-9a-z]+.[0-9]\s*[qv]' library/aesce.o
msg "clang, test aarch64 crypto instructions built"
make -B library/aesce.o CC=clang CFLAGS="--target=aarch64-linux-gnu -march=armv8-a -S"
grep -E 'aes[a-z]+\s*[qv]' library/aesce.o
# test for absence of AES instructions
scripts/config.py unset MBEDTLS_AES_USE_HARDWARE_ONLY
scripts/config.py unset MBEDTLS_AESCE_C
msg "clang, test A32 crypto instructions not built"
make -B library/aesce.o CC=clang CFLAGS="--target=arm-linux-gnueabihf -mcpu=cortex-a72+crypto -marm -S"
not grep -E 'aes[0-9a-z]+.[0-9]\s*[qv]' library/aesce.o
msg "clang, test T32 crypto instructions not built"
make -B library/aesce.o CC=clang CFLAGS="--target=arm-linux-gnueabihf -mcpu=cortex-a32+crypto -mthumb -S"
not grep -E 'aes[0-9a-z]+.[0-9]\s*[qv]' library/aesce.o
msg "clang, test aarch64 crypto instructions not built"
make -B library/aesce.o CC=clang CFLAGS="--target=aarch64-linux-gnu -march=armv8-a -S"
not grep -E 'aes[a-z]+\s*[qv]' library/aesce.o
}
support_build_sha_armce() {
if command -v clang > /dev/null ; then
# clang >= 4 is required to build with SHA extensions
@ -5438,6 +5495,9 @@ component_build_armcc () {
# armc[56] don't support SHA-512 intrinsics
scripts/config.py unset MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT
# older versions of armcc/armclang don't support AESCE_C on 32-bit Arm
scripts/config.py unset MBEDTLS_AESCE_C
# Stop armclang warning about feature detection for A64_CRYPTO.
# With this enabled, the library does build correctly under armclang,
# but in baremetal builds (as tested here), feature detection is
@ -5470,14 +5530,18 @@ component_build_armcc () {
# ARM Compiler 6 - Target ARMv8-M
armc6_build_test "-O1 --target=arm-arm-none-eabi -march=armv8-m.main"
# ARM Compiler 6 - Target ARMv8.2-A - AArch64
armc6_build_test "-O1 --target=aarch64-arm-none-eabi -march=armv8.2-a+crypto"
# ARM Compiler 6 - Target Cortex-M0 - no optimisation
armc6_build_test "-O0 --target=arm-arm-none-eabi -mcpu=cortex-m0"
# ARM Compiler 6 - Target Cortex-M0
armc6_build_test "-Os --target=arm-arm-none-eabi -mcpu=cortex-m0"
# ARM Compiler 6 - Target ARMv8.2-A - AArch64
#
# Re-enable MBEDTLS_AESCE_C as this should be supported by the version of armclang
# that we have in our CI
scripts/config.py set MBEDTLS_AESCE_C
armc6_build_test "-O1 --target=aarch64-arm-none-eabi -march=armv8.2-a+crypto"
}
support_build_armcc () {