From 00b530e3957061a06663e1785dc923ee0b7e7c95 Mon Sep 17 00:00:00 2001 From: Dave Rodgman Date: Tue, 23 Jan 2024 09:36:34 +0000 Subject: [PATCH] Limit compiler hint to compilers that are known to benefit from it Signed-off-by: Dave Rodgman --- library/common.h | 50 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 17 deletions(-) diff --git a/library/common.h b/library/common.h index 2eb917037..937c80284 100644 --- a/library/common.h +++ b/library/common.h @@ -199,30 +199,40 @@ static inline void mbedtls_xor(unsigned char *r, uint8x16_t x = veorq_u8(v1, v2); vst1q_u8(r + i, x); } +#if defined(__IAR_SYSTEMS_ICC__) /* This if statement helps some compilers (e.g., IAR) optimise out the byte-by-byte tail case * where n is a constant multiple of 16. - * It makes no difference for others (e.g. recent gcc and clang) if n is a compile-time - * constant, and very little difference if n is not a compile-time constant. */ - if (n % 16 != 0) + * For other compilers (e.g. recent gcc and clang) it makes no difference if n is a compile-time + * constant, and is a very small perf regression if n is not a compile-time constant. */ + if (n % 16 == 0) { + return; + } +#endif #elif defined(MBEDTLS_ARCH_IS_X64) || defined(MBEDTLS_ARCH_IS_ARM64) /* This codepath probably only makes sense on architectures with 64-bit registers */ for (; (i + 8) <= n; i += 8) { uint64_t x = mbedtls_get_unaligned_uint64(a + i) ^ mbedtls_get_unaligned_uint64(b + i); mbedtls_put_unaligned_uint64(r + i, x); } - if (n % 8 != 0) +#if defined(__IAR_SYSTEMS_ICC__) + if (n % 8 == 0) { + return; + } +#endif #else for (; (i + 4) <= n; i += 4) { uint32_t x = mbedtls_get_unaligned_uint32(a + i) ^ mbedtls_get_unaligned_uint32(b + i); mbedtls_put_unaligned_uint32(r + i, x); } - if (n % 4 != 0) +#if defined(__IAR_SYSTEMS_ICC__) + if (n % 4 == 0) { + return; + } #endif #endif - { - for (; i < n; i++) { - r[i] = a[i] ^ b[i]; - } +#endif + for (; i < n; i++) { + r[i] = a[i] ^ b[i]; } } @@ -268,23 +278,29 @@ static inline void mbedtls_xor_no_simd(unsigned char *r, uint64_t x = mbedtls_get_unaligned_uint64(a + i) ^ mbedtls_get_unaligned_uint64(b + i); mbedtls_put_unaligned_uint64(r + i, x); } +#if defined(__IAR_SYSTEMS_ICC__) /* This if statement helps some compilers (e.g., IAR) optimise out the byte-by-byte tail case * where n is a constant multiple of 16. - * It makes no difference for others (e.g. recent gcc and clang) if n is a compile-time - * constant, and very little difference if n is not a compile-time constant. */ - if (n % 8 != 0) + * For other compilers (e.g. recent gcc and clang) it makes no difference if n is a compile-time + * constant, and is a very small perf regression if n is not a compile-time constant. */ + if (n % 8 == 0) { + return; + } +#endif #else for (; (i + 4) <= n; i += 4) { uint32_t x = mbedtls_get_unaligned_uint32(a + i) ^ mbedtls_get_unaligned_uint32(b + i); mbedtls_put_unaligned_uint32(r + i, x); } - if (n % 4 != 0) +#if defined(__IAR_SYSTEMS_ICC__) + if (n % 4 == 0) { + return; + } #endif #endif - { - for (; i < n; i++) { - r[i] = a[i] ^ b[i]; - } +#endif + for (; i < n; i++) { + r[i] = a[i] ^ b[i]; } }