Perf improvement in memcpy_if

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
This commit is contained in:
Dave Rodgman 2023-05-19 10:33:21 +01:00 committed by Dave Rodgman
parent 246210e3c4
commit 42391b4378

View file

@ -152,8 +152,13 @@ void mbedtls_ct_memcpy_if(mbedtls_ct_condition_t condition,
const unsigned char *src2, const unsigned char *src2,
size_t len) size_t len)
{ {
#if defined(MBEDTLS_CT_SIZE_64)
const uint64_t mask = (uint64_t) condition;
const uint64_t not_mask = (uint64_t) ~mbedtls_ct_compiler_opaque(condition);
#else
const uint32_t mask = (uint32_t) condition; const uint32_t mask = (uint32_t) condition;
const uint32_t not_mask = (uint32_t) ~mbedtls_ct_compiler_opaque(condition); const uint32_t not_mask = (uint32_t) ~mbedtls_ct_compiler_opaque(condition);
#endif
/* If src2 is NULL, setup src2 so that we read from the destination address. /* If src2 is NULL, setup src2 so that we read from the destination address.
* *
@ -167,11 +172,19 @@ void mbedtls_ct_memcpy_if(mbedtls_ct_condition_t condition,
/* dest[i] = c1 == c2 ? src[i] : dest[i] */ /* dest[i] = c1 == c2 ? src[i] : dest[i] */
size_t i = 0; size_t i = 0;
#if defined(MBEDTLS_EFFICIENT_UNALIGNED_ACCESS) #if defined(MBEDTLS_EFFICIENT_UNALIGNED_ACCESS)
#if defined(MBEDTLS_CT_SIZE_64)
for (; (i + 8) <= len; i += 8) {
uint64_t a = mbedtls_get_unaligned_uint64(src1 + i) & mask;
uint64_t b = mbedtls_get_unaligned_uint64(src2 + i) & not_mask;
mbedtls_put_unaligned_uint64(dest + i, a | b);
}
#else
for (; (i + 4) <= len; i += 4) { for (; (i + 4) <= len; i += 4) {
uint32_t a = mbedtls_get_unaligned_uint32(src1 + i) & mask; uint32_t a = mbedtls_get_unaligned_uint32(src1 + i) & mask;
uint32_t b = mbedtls_get_unaligned_uint32(src2 + i) & not_mask; uint32_t b = mbedtls_get_unaligned_uint32(src2 + i) & not_mask;
mbedtls_put_unaligned_uint32(dest + i, a | b); mbedtls_put_unaligned_uint32(dest + i, a | b);
} }
#endif /* defined(MBEDTLS_CT_SIZE_64) */
#endif /* MBEDTLS_EFFICIENT_UNALIGNED_ACCESS */ #endif /* MBEDTLS_EFFICIENT_UNALIGNED_ACCESS */
for (; i < len; i++) { for (; i < len; i++) {
dest[i] = (src1[i] & mask) | (src2[i] & not_mask); dest[i] = (src1[i] & mask) | (src2[i] & not_mask);