diff --git a/include/mbedtls/mbedtls_config.h b/include/mbedtls/mbedtls_config.h index 11c3139bd..2a2c039d7 100644 --- a/include/mbedtls/mbedtls_config.h +++ b/include/mbedtls/mbedtls_config.h @@ -48,8 +48,11 @@ * Requires support for asm() in compiler. * * Used in: + * library/aesni.h * library/aria.c * library/bn_mul.h + * library/constant_time.c + * library/padlock.h * * Required by: * MBEDTLS_AESNI_C diff --git a/library/aesni.c b/library/aesni.c index d4abb4d6c..f6b304d15 100644 --- a/library/aesni.c +++ b/library/aesni.c @@ -37,12 +37,6 @@ #include -/* *INDENT-OFF* */ -#ifndef asm -#define asm __asm -#endif -/* *INDENT-ON* */ - #if defined(MBEDTLS_HAVE_X86_64) /* diff --git a/library/alignment.h b/library/alignment.h index bfc965eae..aa09ff856 100644 --- a/library/alignment.h +++ b/library/alignment.h @@ -29,6 +29,23 @@ #include "mbedtls/build_info.h" +/* + * Define MBEDTLS_EFFICIENT_UNALIGNED_ACCESS for architectures where unaligned memory + * accesses are known to be efficient. + * + * All functions defined here will behave correctly regardless, but might be less + * efficient when this is not defined. + */ +#if defined(__ARM_FEATURE_UNALIGNED) \ + || defined(__i386__) || defined(__amd64__) || defined(__x86_64__) +/* + * __ARM_FEATURE_UNALIGNED is defined where appropriate by armcc, gcc 7, clang 9 + * (and later versions) for Arm v7 and later; all x86 platforms should have + * efficient unaligned access. + */ +#define MBEDTLS_EFFICIENT_UNALIGNED_ACCESS +#endif + /** * Read the unsigned 16 bits integer from the given address, which need not * be aligned. diff --git a/library/bn_mul.h b/library/bn_mul.h index 307c2418f..ab59fbd64 100644 --- a/library/bn_mul.h +++ b/library/bn_mul.h @@ -83,10 +83,6 @@ /* *INDENT-OFF* */ #if defined(MBEDTLS_HAVE_ASM) -#ifndef asm -#define asm __asm -#endif - /* armcc5 --gnu defines __GNUC__ but doesn't support GNU's extended asm */ #if defined(__GNUC__) && \ ( !defined(__ARMCC_VERSION) || __ARMCC_VERSION >= 6000000 ) diff --git a/library/common.h b/library/common.h index fd3ddba48..46af79f0d 100644 --- a/library/common.h +++ b/library/common.h @@ -122,11 +122,13 @@ static inline const unsigned char *mbedtls_buffer_offset_const( */ inline void mbedtls_xor(unsigned char *r, const unsigned char *a, const unsigned char *b, size_t n) { - size_t i; - for (i = 0; (i + 4) <= n; i += 4) { + size_t i = 0; +#if defined(MBEDTLS_EFFICIENT_UNALIGNED_ACCESS) + for (; (i + 4) <= n; i += 4) { uint32_t x = mbedtls_get_unaligned_uint32(a + i) ^ mbedtls_get_unaligned_uint32(b + i); mbedtls_put_unaligned_uint32(r + i, x); } +#endif for (; i < n; i++) { r[i] = a[i] ^ b[i]; } @@ -140,4 +142,11 @@ inline void mbedtls_xor(unsigned char *r, const unsigned char *a, const unsigned #define /*no-check-names*/ __func__ __FUNCTION__ #endif +/* Define `asm` for compilers which don't define it. */ +/* *INDENT-OFF* */ +#ifndef asm +#define asm __asm__ +#endif +/* *INDENT-ON* */ + #endif /* MBEDTLS_LIBRARY_COMMON_H */ diff --git a/library/constant_time.c b/library/constant_time.c index 442eb0e7a..7f4d509bc 100644 --- a/library/constant_time.c +++ b/library/constant_time.c @@ -47,16 +47,63 @@ #include +/* + * Define MBEDTLS_EFFICIENT_UNALIGNED_VOLATILE_ACCESS where assembly is present to + * perform fast unaligned access to volatile data. + * + * This is needed because mbedtls_get_unaligned_uintXX etc don't support volatile + * memory accesses. + * + * Some of these definitions could be moved into alignment.h but for now they are + * only used here. + */ +#if defined(MBEDTLS_EFFICIENT_UNALIGNED_ACCESS) && defined(MBEDTLS_HAVE_ASM) +#if defined(__arm__) || defined(__thumb__) || defined(__thumb2__) || defined(__aarch64__) +#define MBEDTLS_EFFICIENT_UNALIGNED_VOLATILE_ACCESS +#endif +#endif + +#if defined(MBEDTLS_EFFICIENT_UNALIGNED_VOLATILE_ACCESS) +static inline uint32_t mbedtls_get_unaligned_volatile_uint32(volatile const unsigned char *p) +{ + /* This is UB, even where it's safe: + * return *((volatile uint32_t*)p); + * so instead the same thing is expressed in assembly below. + */ + uint32_t r; +#if defined(__arm__) || defined(__thumb__) || defined(__thumb2__) + asm ("ldr %0, [%1]" : "=r" (r) : "r" (p) :); +#elif defined(__aarch64__) + asm ("ldr %w0, [%1]" : "=r" (r) : "r" (p) :); +#endif + return r; +} +#endif /* MBEDTLS_EFFICIENT_UNALIGNED_VOLATILE_ACCESS */ + int mbedtls_ct_memcmp(const void *a, const void *b, size_t n) { - size_t i; + size_t i = 0; + /* + * `A` and `B` are cast to volatile to ensure that the compiler + * generates code that always fully reads both buffers. + * Otherwise it could generate a test to exit early if `diff` has all + * bits set early in the loop. + */ volatile const unsigned char *A = (volatile const unsigned char *) a; volatile const unsigned char *B = (volatile const unsigned char *) b; - volatile unsigned char diff = 0; + uint32_t diff = 0; - for (i = 0; i < n; i++) { +#if defined(MBEDTLS_EFFICIENT_UNALIGNED_VOLATILE_ACCESS) + for (; (i + 4) <= n; i += 4) { + uint32_t x = mbedtls_get_unaligned_volatile_uint32(A + i); + uint32_t y = mbedtls_get_unaligned_volatile_uint32(B + i); + diff |= x ^ y; + } +#endif + + for (; i < n; i++) { /* Read volatile data in order before computing diff. * This avoids IAR compiler warning: * 'the order of volatile accesses is undefined ..' */ @@ -414,10 +461,22 @@ void mbedtls_ct_memcpy_if_eq(unsigned char *dest, { /* mask = c1 == c2 ? 0xff : 0x00 */ const size_t equal = mbedtls_ct_size_bool_eq(c1, c2); - const unsigned char mask = (unsigned char) mbedtls_ct_size_mask(equal); /* dest[i] = c1 == c2 ? src[i] : dest[i] */ - for (size_t i = 0; i < len; i++) { + size_t i = 0; +#if defined(MBEDTLS_EFFICIENT_UNALIGNED_ACCESS) + const uint32_t mask32 = (uint32_t) mbedtls_ct_size_mask(equal); + const unsigned char mask = (unsigned char) mask32 & 0xff; + + for (; (i + 4) <= len; i += 4) { + uint32_t a = mbedtls_get_unaligned_uint32(src + i) & mask32; + uint32_t b = mbedtls_get_unaligned_uint32(dest + i) & ~mask32; + mbedtls_put_unaligned_uint32(dest + i, a | b); + } +#else + const unsigned char mask = (unsigned char) mbedtls_ct_size_mask(equal); +#endif /* MBEDTLS_EFFICIENT_UNALIGNED_ACCESS */ + for (; i < len; i++) { dest[i] = (src[i] & mask) | (dest[i] & ~mask); } } diff --git a/library/padlock.c b/library/padlock.c index b6c6919cd..f42c40ff9 100644 --- a/library/padlock.c +++ b/library/padlock.c @@ -31,12 +31,6 @@ #include -/* *INDENT-OFF* */ -#ifndef asm -#define asm __asm -#endif -/* *INDENT-ON* */ - #if defined(MBEDTLS_HAVE_X86) /* diff --git a/library/sha256.c b/library/sha256.c index 16fd20d8c..cb09a71ec 100644 --- a/library/sha256.c +++ b/library/sha256.c @@ -89,12 +89,6 @@ static int mbedtls_a64_crypto_sha256_determine_support(void) #include #include -/* *INDENT-OFF* */ -#ifndef asm -#define asm __asm__ -#endif -/* *INDENT-ON* */ - static jmp_buf return_from_sigill; /* diff --git a/library/sha512.c b/library/sha512.c index 0ea64218b..efcbed413 100644 --- a/library/sha512.c +++ b/library/sha512.c @@ -104,12 +104,6 @@ static int mbedtls_a64_crypto_sha512_determine_support(void) #include #include -/* *INDENT-OFF* */ -#ifndef asm -#define asm __asm__ -#endif -/* *INDENT-ON* */ - static jmp_buf return_from_sigill; /* @@ -300,12 +294,6 @@ static const uint64_t K[80] = # define mbedtls_internal_sha512_process_a64_crypto mbedtls_internal_sha512_process #endif -/* *INDENT-OFF* */ -#ifndef asm -#define asm __asm__ -#endif -/* *INDENT-ON* */ - /* Accelerated SHA-512 implementation originally written by Simon Tatham for PuTTY, * under the MIT licence; dual-licensed as Apache 2 with his kind permission. */ diff --git a/tests/suites/test_suite_alignment.function b/tests/suites/test_suite_alignment.function index 6c98f233a..f6703318c 100644 --- a/tests/suites/test_suite_alignment.function +++ b/tests/suites/test_suite_alignment.function @@ -6,7 +6,6 @@ #if defined(__clang__) #pragma clang diagnostic ignored "-Wunreachable-code" #endif -#include /* * Convert a string of the form "abcd" (case-insensitive) to a uint64_t. diff --git a/tests/suites/test_suite_constant_time.data b/tests/suites/test_suite_constant_time.data index 4504aa4d6..91a25facc 100644 --- a/tests/suites/test_suite_constant_time.data +++ b/tests/suites/test_suite_constant_time.data @@ -9,3 +9,129 @@ ssl_cf_memcpy_offset:0:255:32 # we could get this with 255-bytes plaintext and untruncated SHA-384 Constant-flow memcpy from offset: large ssl_cf_memcpy_offset:100:339:48 + +mbedtls_ct_memcmp NULL +mbedtls_ct_memcmp_null + +mbedtls_ct_memcmp len 1 +mbedtls_ct_memcmp:-1:1:0 + +mbedtls_ct_memcmp len 3 +mbedtls_ct_memcmp:-1:3:0 + +mbedtls_ct_memcmp len 4 +mbedtls_ct_memcmp:-1:4:0 + +mbedtls_ct_memcmp len 5 +mbedtls_ct_memcmp:-1:5:0 + +mbedtls_ct_memcmp len 15 +mbedtls_ct_memcmp:-1:15:0 + +mbedtls_ct_memcmp len 16 +mbedtls_ct_memcmp:-1:16:0 + +mbedtls_ct_memcmp len 17 +mbedtls_ct_memcmp:-1:17:0 + +mbedtls_ct_memcmp len 1 different +mbedtls_ct_memcmp:0:1:0 + +mbedtls_ct_memcmp len 17 different +mbedtls_ct_memcmp:0:17:0 + +mbedtls_ct_memcmp len 17 different 1 +mbedtls_ct_memcmp:1:17:0 + +mbedtls_ct_memcmp len 17 different 4 +mbedtls_ct_memcmp:4:17:0 + +mbedtls_ct_memcmp len 17 different 10 +mbedtls_ct_memcmp:10:17:0 + +mbedtls_ct_memcmp len 17 different 16 +mbedtls_ct_memcmp:16:17:0 + +mbedtls_ct_memcmp len 1 offset 1 different +mbedtls_ct_memcmp:0:1:1 + +mbedtls_ct_memcmp len 17 offset 1 different +mbedtls_ct_memcmp:0:17:1 + +mbedtls_ct_memcmp len 17 offset 1 different 1 +mbedtls_ct_memcmp:1:17:1 + +mbedtls_ct_memcmp len 17 offset 1 different 5 +mbedtls_ct_memcmp:5:17:1 + +mbedtls_ct_memcmp len 1 offset 1 +mbedtls_ct_memcmp:-1:1:1 + +mbedtls_ct_memcmp len 1 offset 2 +mbedtls_ct_memcmp:-1:1:2 + +mbedtls_ct_memcmp len 1 offset 3 +mbedtls_ct_memcmp:-1:1:3 + +mbedtls_ct_memcmp len 5 offset 1 +mbedtls_ct_memcmp:-1:5:1 + +mbedtls_ct_memcmp len 5 offset 2 +mbedtls_ct_memcmp:-1:5:2 + +mbedtls_ct_memcmp len 5 offset 3 +mbedtls_ct_memcmp:-1:5:3 + +mbedtls_ct_memcmp len 17 offset 1 +mbedtls_ct_memcmp:-1:17:1 + +mbedtls_ct_memcmp len 17 offset 2 +mbedtls_ct_memcmp:-1:17:2 + +mbedtls_ct_memcmp len 17 offset 3 +mbedtls_ct_memcmp:-1:17:3 + +mbedtls_ct_memcpy_if_eq len 1 offset 0 +mbedtls_ct_memcpy_if_eq:1:1:0 + +mbedtls_ct_memcpy_if_eq len 1 offset 1 +mbedtls_ct_memcpy_if_eq:1:1:1 + +mbedtls_ct_memcpy_if_eq len 4 offset 0 +mbedtls_ct_memcpy_if_eq:1:1:0 + +mbedtls_ct_memcpy_if_eq len 4 offset 1 +mbedtls_ct_memcpy_if_eq:1:1:1 + +mbedtls_ct_memcpy_if_eq len 4 offset 2 +mbedtls_ct_memcpy_if_eq:1:1:2 + +mbedtls_ct_memcpy_if_eq len 4 offset 3 +mbedtls_ct_memcpy_if_eq:1:1:3 + +mbedtls_ct_memcpy_if_eq len 15 offset 0 +mbedtls_ct_memcpy_if_eq:1:15:0 + +mbedtls_ct_memcpy_if_eq len 15 offset 1 +mbedtls_ct_memcpy_if_eq:1:15:1 + +mbedtls_ct_memcpy_if_eq len 16 offset 0 +mbedtls_ct_memcpy_if_eq:1:16:0 + +mbedtls_ct_memcpy_if_eq len 16 offset 1 +mbedtls_ct_memcpy_if_eq:1:16:1 + +mbedtls_ct_memcpy_if_eq len 17 offset 0 +mbedtls_ct_memcpy_if_eq:1:17:0 + +mbedtls_ct_memcpy_if_eq len 17 offset 1 +mbedtls_ct_memcpy_if_eq:1:17:1 + +mbedtls_ct_memcpy_if_eq len 0 not eq +mbedtls_ct_memcpy_if_eq:0:17:0 + +mbedtls_ct_memcpy_if_eq len 5 offset 1 not eq +mbedtls_ct_memcpy_if_eq:0:5:1 + +mbedtls_ct_memcpy_if_eq len 17 offset 3 not eq +mbedtls_ct_memcpy_if_eq:0:17:3 diff --git a/tests/suites/test_suite_constant_time.function b/tests/suites/test_suite_constant_time.function index a40149ab4..14dc8ae5c 100644 --- a/tests/suites/test_suite_constant_time.function +++ b/tests/suites/test_suite_constant_time.function @@ -15,6 +15,108 @@ #include /* END_HEADER */ +/* BEGIN_CASE */ +void mbedtls_ct_memcmp_null() +{ + uint32_t x; + TEST_ASSERT(mbedtls_ct_memcmp(&x, NULL, 0) == 0); + TEST_ASSERT(mbedtls_ct_memcmp(NULL, &x, 0) == 0); + TEST_ASSERT(mbedtls_ct_memcmp(NULL, NULL, 0) == 0); +} +/* END_CASE */ + +/* BEGIN_CASE */ +void mbedtls_ct_memcmp(int same, int size, int offset) +{ + uint8_t *a = NULL, *b = NULL; + ASSERT_ALLOC(a, size + offset); + ASSERT_ALLOC(b, size + offset); + + TEST_CF_SECRET(a + offset, size); + TEST_CF_SECRET(b + offset, size); + + /* Construct data that matches, if same == -1, otherwise + * same gives the number of bytes (after the initial offset) + * that will match; after that it will differ. + */ + for (int i = 0; i < size + offset; i++) { + a[i] = i & 0xff; + if (same == -1 || (i - offset) < same) { + b[i] = a[i]; + } else { + b[i] = (i + 1) & 0xff; + } + } + + int reference = memcmp(a + offset, b + offset, size); + int actual = mbedtls_ct_memcmp(a + offset, b + offset, size); + TEST_CF_PUBLIC(a + offset, size); + TEST_CF_PUBLIC(b + offset, size); + + if (same == -1 || same >= size) { + TEST_ASSERT(reference == 0); + TEST_ASSERT(actual == 0); + } else { + TEST_ASSERT(reference != 0); + TEST_ASSERT(actual != 0); + } +exit: + mbedtls_free(a); + mbedtls_free(b); +} +/* END_CASE */ + +/* BEGIN_CASE depends_on:MBEDTLS_SSL_SOME_SUITES_USE_MAC */ +void mbedtls_ct_memcpy_if_eq(int eq, int size, int offset) +{ + uint8_t *src = NULL, *result = NULL, *expected = NULL; + ASSERT_ALLOC(src, size + offset); + ASSERT_ALLOC(result, size + offset); + ASSERT_ALLOC(expected, size + offset); + + for (int i = 0; i < size + offset; i++) { + src[i] = 1; + result[i] = 0xff; + expected[i] = eq ? 1 : 0xff; + } + + int one, secret_eq; + TEST_CF_SECRET(&one, sizeof(one)); + TEST_CF_SECRET(&secret_eq, sizeof(secret_eq)); + one = 1; + secret_eq = eq; + + mbedtls_ct_memcpy_if_eq(result + offset, src, size, secret_eq, one); + + TEST_CF_PUBLIC(&one, sizeof(one)); + TEST_CF_PUBLIC(&secret_eq, sizeof(secret_eq)); + + ASSERT_COMPARE(expected, size, result + offset, size); + + for (int i = 0; i < size + offset; i++) { + src[i] = 1; + result[i] = 0xff; + expected[i] = eq ? 1 : 0xff; + } + + TEST_CF_SECRET(&one, sizeof(one)); + TEST_CF_SECRET(&secret_eq, sizeof(secret_eq)); + one = 1; + secret_eq = eq; + + mbedtls_ct_memcpy_if_eq(result, src + offset, size, secret_eq, one); + + TEST_CF_PUBLIC(&one, sizeof(one)); + TEST_CF_PUBLIC(&secret_eq, sizeof(secret_eq)); + + ASSERT_COMPARE(expected, size, result, size); +exit: + mbedtls_free(src); + mbedtls_free(result); + mbedtls_free(expected); +} +/* END_CASE */ + /* BEGIN_CASE depends_on:MBEDTLS_SSL_SOME_SUITES_USE_TLS_CBC:MBEDTLS_TEST_HOOKS */ void ssl_cf_memcpy_offset(int offset_min, int offset_max, int len) {