Merge pull request #6832 from daverodgman/fast-unaligned-ct

Improve efficiency of some constant time functions
This commit is contained in:
Janos Follath 2023-01-23 10:55:35 +00:00 committed by GitHub
commit 3e0769b598
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 323 additions and 42 deletions

View file

@ -48,8 +48,11 @@
* Requires support for asm() in compiler.
*
* Used in:
* library/aesni.h
* library/aria.c
* library/bn_mul.h
* library/constant_time.c
* library/padlock.h
*
* Required by:
* MBEDTLS_AESNI_C

View file

@ -37,12 +37,6 @@
#include <string.h>
/* *INDENT-OFF* */
#ifndef asm
#define asm __asm
#endif
/* *INDENT-ON* */
#if defined(MBEDTLS_HAVE_X86_64)
/*

View file

@ -29,6 +29,23 @@
#include "mbedtls/build_info.h"
/*
* Define MBEDTLS_EFFICIENT_UNALIGNED_ACCESS for architectures where unaligned memory
* accesses are known to be efficient.
*
* All functions defined here will behave correctly regardless, but might be less
* efficient when this is not defined.
*/
#if defined(__ARM_FEATURE_UNALIGNED) \
|| defined(__i386__) || defined(__amd64__) || defined(__x86_64__)
/*
* __ARM_FEATURE_UNALIGNED is defined where appropriate by armcc, gcc 7, clang 9
* (and later versions) for Arm v7 and later; all x86 platforms should have
* efficient unaligned access.
*/
#define MBEDTLS_EFFICIENT_UNALIGNED_ACCESS
#endif
/**
* Read the unsigned 16 bits integer from the given address, which need not
* be aligned.

View file

@ -83,10 +83,6 @@
/* *INDENT-OFF* */
#if defined(MBEDTLS_HAVE_ASM)
#ifndef asm
#define asm __asm
#endif
/* armcc5 --gnu defines __GNUC__ but doesn't support GNU's extended asm */
#if defined(__GNUC__) && \
( !defined(__ARMCC_VERSION) || __ARMCC_VERSION >= 6000000 )

View file

@ -122,11 +122,13 @@ static inline const unsigned char *mbedtls_buffer_offset_const(
*/
inline void mbedtls_xor(unsigned char *r, const unsigned char *a, const unsigned char *b, size_t n)
{
size_t i;
for (i = 0; (i + 4) <= n; i += 4) {
size_t i = 0;
#if defined(MBEDTLS_EFFICIENT_UNALIGNED_ACCESS)
for (; (i + 4) <= n; i += 4) {
uint32_t x = mbedtls_get_unaligned_uint32(a + i) ^ mbedtls_get_unaligned_uint32(b + i);
mbedtls_put_unaligned_uint32(r + i, x);
}
#endif
for (; i < n; i++) {
r[i] = a[i] ^ b[i];
}
@ -140,4 +142,11 @@ inline void mbedtls_xor(unsigned char *r, const unsigned char *a, const unsigned
#define /*no-check-names*/ __func__ __FUNCTION__
#endif
/* Define `asm` for compilers which don't define it. */
/* *INDENT-OFF* */
#ifndef asm
#define asm __asm__
#endif
/* *INDENT-ON* */
#endif /* MBEDTLS_LIBRARY_COMMON_H */

View file

@ -47,16 +47,63 @@
#include <string.h>
/*
* Define MBEDTLS_EFFICIENT_UNALIGNED_VOLATILE_ACCESS where assembly is present to
* perform fast unaligned access to volatile data.
*
* This is needed because mbedtls_get_unaligned_uintXX etc don't support volatile
* memory accesses.
*
* Some of these definitions could be moved into alignment.h but for now they are
* only used here.
*/
#if defined(MBEDTLS_EFFICIENT_UNALIGNED_ACCESS) && defined(MBEDTLS_HAVE_ASM)
#if defined(__arm__) || defined(__thumb__) || defined(__thumb2__) || defined(__aarch64__)
#define MBEDTLS_EFFICIENT_UNALIGNED_VOLATILE_ACCESS
#endif
#endif
#if defined(MBEDTLS_EFFICIENT_UNALIGNED_VOLATILE_ACCESS)
static inline uint32_t mbedtls_get_unaligned_volatile_uint32(volatile const unsigned char *p)
{
/* This is UB, even where it's safe:
* return *((volatile uint32_t*)p);
* so instead the same thing is expressed in assembly below.
*/
uint32_t r;
#if defined(__arm__) || defined(__thumb__) || defined(__thumb2__)
asm ("ldr %0, [%1]" : "=r" (r) : "r" (p) :);
#elif defined(__aarch64__)
asm ("ldr %w0, [%1]" : "=r" (r) : "r" (p) :);
#endif
return r;
}
#endif /* MBEDTLS_EFFICIENT_UNALIGNED_VOLATILE_ACCESS */
int mbedtls_ct_memcmp(const void *a,
const void *b,
size_t n)
{
size_t i;
size_t i = 0;
/*
* `A` and `B` are cast to volatile to ensure that the compiler
* generates code that always fully reads both buffers.
* Otherwise it could generate a test to exit early if `diff` has all
* bits set early in the loop.
*/
volatile const unsigned char *A = (volatile const unsigned char *) a;
volatile const unsigned char *B = (volatile const unsigned char *) b;
volatile unsigned char diff = 0;
uint32_t diff = 0;
for (i = 0; i < n; i++) {
#if defined(MBEDTLS_EFFICIENT_UNALIGNED_VOLATILE_ACCESS)
for (; (i + 4) <= n; i += 4) {
uint32_t x = mbedtls_get_unaligned_volatile_uint32(A + i);
uint32_t y = mbedtls_get_unaligned_volatile_uint32(B + i);
diff |= x ^ y;
}
#endif
for (; i < n; i++) {
/* Read volatile data in order before computing diff.
* This avoids IAR compiler warning:
* 'the order of volatile accesses is undefined ..' */
@ -414,10 +461,22 @@ void mbedtls_ct_memcpy_if_eq(unsigned char *dest,
{
/* mask = c1 == c2 ? 0xff : 0x00 */
const size_t equal = mbedtls_ct_size_bool_eq(c1, c2);
const unsigned char mask = (unsigned char) mbedtls_ct_size_mask(equal);
/* dest[i] = c1 == c2 ? src[i] : dest[i] */
for (size_t i = 0; i < len; i++) {
size_t i = 0;
#if defined(MBEDTLS_EFFICIENT_UNALIGNED_ACCESS)
const uint32_t mask32 = (uint32_t) mbedtls_ct_size_mask(equal);
const unsigned char mask = (unsigned char) mask32 & 0xff;
for (; (i + 4) <= len; i += 4) {
uint32_t a = mbedtls_get_unaligned_uint32(src + i) & mask32;
uint32_t b = mbedtls_get_unaligned_uint32(dest + i) & ~mask32;
mbedtls_put_unaligned_uint32(dest + i, a | b);
}
#else
const unsigned char mask = (unsigned char) mbedtls_ct_size_mask(equal);
#endif /* MBEDTLS_EFFICIENT_UNALIGNED_ACCESS */
for (; i < len; i++) {
dest[i] = (src[i] & mask) | (dest[i] & ~mask);
}
}

View file

@ -31,12 +31,6 @@
#include <string.h>
/* *INDENT-OFF* */
#ifndef asm
#define asm __asm
#endif
/* *INDENT-ON* */
#if defined(MBEDTLS_HAVE_X86)
/*

View file

@ -89,12 +89,6 @@ static int mbedtls_a64_crypto_sha256_determine_support(void)
#include <signal.h>
#include <setjmp.h>
/* *INDENT-OFF* */
#ifndef asm
#define asm __asm__
#endif
/* *INDENT-ON* */
static jmp_buf return_from_sigill;
/*

View file

@ -104,12 +104,6 @@ static int mbedtls_a64_crypto_sha512_determine_support(void)
#include <signal.h>
#include <setjmp.h>
/* *INDENT-OFF* */
#ifndef asm
#define asm __asm__
#endif
/* *INDENT-ON* */
static jmp_buf return_from_sigill;
/*
@ -300,12 +294,6 @@ static const uint64_t K[80] =
# define mbedtls_internal_sha512_process_a64_crypto mbedtls_internal_sha512_process
#endif
/* *INDENT-OFF* */
#ifndef asm
#define asm __asm__
#endif
/* *INDENT-ON* */
/* Accelerated SHA-512 implementation originally written by Simon Tatham for PuTTY,
* under the MIT licence; dual-licensed as Apache 2 with his kind permission.
*/

View file

@ -6,7 +6,6 @@
#if defined(__clang__)
#pragma clang diagnostic ignored "-Wunreachable-code"
#endif
#include <stdio.h>
/*
* Convert a string of the form "abcd" (case-insensitive) to a uint64_t.

View file

@ -9,3 +9,129 @@ ssl_cf_memcpy_offset:0:255:32
# we could get this with 255-bytes plaintext and untruncated SHA-384
Constant-flow memcpy from offset: large
ssl_cf_memcpy_offset:100:339:48
mbedtls_ct_memcmp NULL
mbedtls_ct_memcmp_null
mbedtls_ct_memcmp len 1
mbedtls_ct_memcmp:-1:1:0
mbedtls_ct_memcmp len 3
mbedtls_ct_memcmp:-1:3:0
mbedtls_ct_memcmp len 4
mbedtls_ct_memcmp:-1:4:0
mbedtls_ct_memcmp len 5
mbedtls_ct_memcmp:-1:5:0
mbedtls_ct_memcmp len 15
mbedtls_ct_memcmp:-1:15:0
mbedtls_ct_memcmp len 16
mbedtls_ct_memcmp:-1:16:0
mbedtls_ct_memcmp len 17
mbedtls_ct_memcmp:-1:17:0
mbedtls_ct_memcmp len 1 different
mbedtls_ct_memcmp:0:1:0
mbedtls_ct_memcmp len 17 different
mbedtls_ct_memcmp:0:17:0
mbedtls_ct_memcmp len 17 different 1
mbedtls_ct_memcmp:1:17:0
mbedtls_ct_memcmp len 17 different 4
mbedtls_ct_memcmp:4:17:0
mbedtls_ct_memcmp len 17 different 10
mbedtls_ct_memcmp:10:17:0
mbedtls_ct_memcmp len 17 different 16
mbedtls_ct_memcmp:16:17:0
mbedtls_ct_memcmp len 1 offset 1 different
mbedtls_ct_memcmp:0:1:1
mbedtls_ct_memcmp len 17 offset 1 different
mbedtls_ct_memcmp:0:17:1
mbedtls_ct_memcmp len 17 offset 1 different 1
mbedtls_ct_memcmp:1:17:1
mbedtls_ct_memcmp len 17 offset 1 different 5
mbedtls_ct_memcmp:5:17:1
mbedtls_ct_memcmp len 1 offset 1
mbedtls_ct_memcmp:-1:1:1
mbedtls_ct_memcmp len 1 offset 2
mbedtls_ct_memcmp:-1:1:2
mbedtls_ct_memcmp len 1 offset 3
mbedtls_ct_memcmp:-1:1:3
mbedtls_ct_memcmp len 5 offset 1
mbedtls_ct_memcmp:-1:5:1
mbedtls_ct_memcmp len 5 offset 2
mbedtls_ct_memcmp:-1:5:2
mbedtls_ct_memcmp len 5 offset 3
mbedtls_ct_memcmp:-1:5:3
mbedtls_ct_memcmp len 17 offset 1
mbedtls_ct_memcmp:-1:17:1
mbedtls_ct_memcmp len 17 offset 2
mbedtls_ct_memcmp:-1:17:2
mbedtls_ct_memcmp len 17 offset 3
mbedtls_ct_memcmp:-1:17:3
mbedtls_ct_memcpy_if_eq len 1 offset 0
mbedtls_ct_memcpy_if_eq:1:1:0
mbedtls_ct_memcpy_if_eq len 1 offset 1
mbedtls_ct_memcpy_if_eq:1:1:1
mbedtls_ct_memcpy_if_eq len 4 offset 0
mbedtls_ct_memcpy_if_eq:1:1:0
mbedtls_ct_memcpy_if_eq len 4 offset 1
mbedtls_ct_memcpy_if_eq:1:1:1
mbedtls_ct_memcpy_if_eq len 4 offset 2
mbedtls_ct_memcpy_if_eq:1:1:2
mbedtls_ct_memcpy_if_eq len 4 offset 3
mbedtls_ct_memcpy_if_eq:1:1:3
mbedtls_ct_memcpy_if_eq len 15 offset 0
mbedtls_ct_memcpy_if_eq:1:15:0
mbedtls_ct_memcpy_if_eq len 15 offset 1
mbedtls_ct_memcpy_if_eq:1:15:1
mbedtls_ct_memcpy_if_eq len 16 offset 0
mbedtls_ct_memcpy_if_eq:1:16:0
mbedtls_ct_memcpy_if_eq len 16 offset 1
mbedtls_ct_memcpy_if_eq:1:16:1
mbedtls_ct_memcpy_if_eq len 17 offset 0
mbedtls_ct_memcpy_if_eq:1:17:0
mbedtls_ct_memcpy_if_eq len 17 offset 1
mbedtls_ct_memcpy_if_eq:1:17:1
mbedtls_ct_memcpy_if_eq len 0 not eq
mbedtls_ct_memcpy_if_eq:0:17:0
mbedtls_ct_memcpy_if_eq len 5 offset 1 not eq
mbedtls_ct_memcpy_if_eq:0:5:1
mbedtls_ct_memcpy_if_eq len 17 offset 3 not eq
mbedtls_ct_memcpy_if_eq:0:17:3

View file

@ -15,6 +15,108 @@
#include <test/constant_flow.h>
/* END_HEADER */
/* BEGIN_CASE */
void mbedtls_ct_memcmp_null()
{
uint32_t x;
TEST_ASSERT(mbedtls_ct_memcmp(&x, NULL, 0) == 0);
TEST_ASSERT(mbedtls_ct_memcmp(NULL, &x, 0) == 0);
TEST_ASSERT(mbedtls_ct_memcmp(NULL, NULL, 0) == 0);
}
/* END_CASE */
/* BEGIN_CASE */
void mbedtls_ct_memcmp(int same, int size, int offset)
{
uint8_t *a = NULL, *b = NULL;
ASSERT_ALLOC(a, size + offset);
ASSERT_ALLOC(b, size + offset);
TEST_CF_SECRET(a + offset, size);
TEST_CF_SECRET(b + offset, size);
/* Construct data that matches, if same == -1, otherwise
* same gives the number of bytes (after the initial offset)
* that will match; after that it will differ.
*/
for (int i = 0; i < size + offset; i++) {
a[i] = i & 0xff;
if (same == -1 || (i - offset) < same) {
b[i] = a[i];
} else {
b[i] = (i + 1) & 0xff;
}
}
int reference = memcmp(a + offset, b + offset, size);
int actual = mbedtls_ct_memcmp(a + offset, b + offset, size);
TEST_CF_PUBLIC(a + offset, size);
TEST_CF_PUBLIC(b + offset, size);
if (same == -1 || same >= size) {
TEST_ASSERT(reference == 0);
TEST_ASSERT(actual == 0);
} else {
TEST_ASSERT(reference != 0);
TEST_ASSERT(actual != 0);
}
exit:
mbedtls_free(a);
mbedtls_free(b);
}
/* END_CASE */
/* BEGIN_CASE depends_on:MBEDTLS_SSL_SOME_SUITES_USE_MAC */
void mbedtls_ct_memcpy_if_eq(int eq, int size, int offset)
{
uint8_t *src = NULL, *result = NULL, *expected = NULL;
ASSERT_ALLOC(src, size + offset);
ASSERT_ALLOC(result, size + offset);
ASSERT_ALLOC(expected, size + offset);
for (int i = 0; i < size + offset; i++) {
src[i] = 1;
result[i] = 0xff;
expected[i] = eq ? 1 : 0xff;
}
int one, secret_eq;
TEST_CF_SECRET(&one, sizeof(one));
TEST_CF_SECRET(&secret_eq, sizeof(secret_eq));
one = 1;
secret_eq = eq;
mbedtls_ct_memcpy_if_eq(result + offset, src, size, secret_eq, one);
TEST_CF_PUBLIC(&one, sizeof(one));
TEST_CF_PUBLIC(&secret_eq, sizeof(secret_eq));
ASSERT_COMPARE(expected, size, result + offset, size);
for (int i = 0; i < size + offset; i++) {
src[i] = 1;
result[i] = 0xff;
expected[i] = eq ? 1 : 0xff;
}
TEST_CF_SECRET(&one, sizeof(one));
TEST_CF_SECRET(&secret_eq, sizeof(secret_eq));
one = 1;
secret_eq = eq;
mbedtls_ct_memcpy_if_eq(result, src + offset, size, secret_eq, one);
TEST_CF_PUBLIC(&one, sizeof(one));
TEST_CF_PUBLIC(&secret_eq, sizeof(secret_eq));
ASSERT_COMPARE(expected, size, result, size);
exit:
mbedtls_free(src);
mbedtls_free(result);
mbedtls_free(expected);
}
/* END_CASE */
/* BEGIN_CASE depends_on:MBEDTLS_SSL_SOME_SUITES_USE_TLS_CBC:MBEDTLS_TEST_HOOKS */
void ssl_cf_memcpy_offset(int offset_min, int offset_max, int len)
{