From b327a1e706f4fbb113ed7f17edcdc23bfa8c5828 Mon Sep 17 00:00:00 2001 From: Dave Rodgman Date: Tue, 6 Feb 2024 11:21:26 +0000 Subject: [PATCH 1/6] Change unaligned access method for old gcc gcc bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94662 shows that __attribute__ aligned may be ignored. Signed-off-by: Dave Rodgman --- library/alignment.h | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/library/alignment.h b/library/alignment.h index 248f29bc7..8db550fa5 100644 --- a/library/alignment.h +++ b/library/alignment.h @@ -71,10 +71,10 @@ typedef uint64_t __packed mbedtls_uint64_unaligned_t; * Tested with several versions of GCC from 4.5.0 up to 9.3.0 * We don't enable for older than 4.5.0 as this has not been tested. */ - #define UINT_UNALIGNED -typedef uint16_t __attribute__((__aligned__(1))) mbedtls_uint16_unaligned_t; -typedef uint32_t __attribute__((__aligned__(1))) mbedtls_uint32_unaligned_t; -typedef uint64_t __attribute__((__aligned__(1))) mbedtls_uint64_unaligned_t; + #define UINT_UNALIGNED_UNION +typedef union { uint16_t x; } __attribute__((packed)) mbedtls_uint16_unaligned_t; +typedef union { uint32_t x; } __attribute__((packed)) mbedtls_uint32_unaligned_t; +typedef union { uint64_t x; } __attribute__((packed)) mbedtls_uint64_unaligned_t; #endif /* @@ -101,6 +101,9 @@ static inline uint16_t mbedtls_get_unaligned_uint16(const void *p) #if defined(UINT_UNALIGNED) mbedtls_uint16_unaligned_t *p16 = (mbedtls_uint16_unaligned_t *) p; r = *p16; +#elif defined(UINT_UNALIGNED_UNION) + mbedtls_uint16_unaligned_t *p16 = (mbedtls_uint16_unaligned_t *) p; + r = p16->x; #else memcpy(&r, p, sizeof(r)); #endif @@ -124,6 +127,9 @@ static inline void mbedtls_put_unaligned_uint16(void *p, uint16_t x) #if defined(UINT_UNALIGNED) mbedtls_uint16_unaligned_t *p16 = (mbedtls_uint16_unaligned_t *) p; *p16 = x; +#elif defined(UINT_UNALIGNED_UNION) + mbedtls_uint16_unaligned_t *p16 = (mbedtls_uint16_unaligned_t *) p; + p16->x = x; #else memcpy(p, &x, sizeof(x)); #endif @@ -147,6 +153,9 @@ static inline uint32_t mbedtls_get_unaligned_uint32(const void *p) #if defined(UINT_UNALIGNED) mbedtls_uint32_unaligned_t *p32 = (mbedtls_uint32_unaligned_t *) p; r = *p32; +#elif defined(UINT_UNALIGNED_UNION) + mbedtls_uint32_unaligned_t *p32 = (mbedtls_uint32_unaligned_t *) p; + r = p32->x; #else memcpy(&r, p, sizeof(r)); #endif @@ -170,6 +179,9 @@ static inline void mbedtls_put_unaligned_uint32(void *p, uint32_t x) #if defined(UINT_UNALIGNED) mbedtls_uint32_unaligned_t *p32 = (mbedtls_uint32_unaligned_t *) p; *p32 = x; +#elif defined(UINT_UNALIGNED_UNION) + mbedtls_uint32_unaligned_t *p32 = (mbedtls_uint32_unaligned_t *) p; + p32->x = x; #else memcpy(p, &x, sizeof(x)); #endif @@ -193,6 +205,9 @@ static inline uint64_t mbedtls_get_unaligned_uint64(const void *p) #if defined(UINT_UNALIGNED) mbedtls_uint64_unaligned_t *p64 = (mbedtls_uint64_unaligned_t *) p; r = *p64; +#elif defined(UINT_UNALIGNED_UNION) + mbedtls_uint64_unaligned_t *p64 = (mbedtls_uint64_unaligned_t *) p; + r = p64->x; #else memcpy(&r, p, sizeof(r)); #endif @@ -216,6 +231,9 @@ static inline void mbedtls_put_unaligned_uint64(void *p, uint64_t x) #if defined(UINT_UNALIGNED) mbedtls_uint64_unaligned_t *p64 = (mbedtls_uint64_unaligned_t *) p; *p64 = x; +#elif defined(UINT_UNALIGNED_UNION) + mbedtls_uint64_unaligned_t *p64 = (mbedtls_uint64_unaligned_t *) p; + p64->x = x; #else memcpy(p, &x, sizeof(x)); #endif From ec9936d1223283d45d947ab1b6d7f1ea31594fe7 Mon Sep 17 00:00:00 2001 From: Dave Rodgman Date: Tue, 6 Feb 2024 12:56:45 +0000 Subject: [PATCH 2/6] Improve gcc guards Signed-off-by: Dave Rodgman --- library/alignment.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/alignment.h b/library/alignment.h index 8db550fa5..65c5321da 100644 --- a/library/alignment.h +++ b/library/alignment.h @@ -53,7 +53,7 @@ typedef uint16_t __packed mbedtls_uint16_unaligned_t; typedef uint32_t __packed mbedtls_uint32_unaligned_t; typedef uint64_t __packed mbedtls_uint64_unaligned_t; #elif defined(MBEDTLS_COMPILER_IS_GCC) && (MBEDTLS_GCC_VERSION >= 40504) && \ - ((MBEDTLS_GCC_VERSION < 90300) || (!defined(MBEDTLS_EFFICIENT_UNALIGNED_ACCESS))) + ((MBEDTLS_GCC_VERSION < 60300) || (!defined(MBEDTLS_EFFICIENT_UNALIGNED_ACCESS))) /* * Old versions of gcc, depending on how the target is specified, may generate a branch to memcpy * for calls like `memcpy(dest, src, 4)` rather than generating some LDR or LDRB instructions From f4e8234f932faa0052b8d97ad9055e7922204aee Mon Sep 17 00:00:00 2001 From: Dave Rodgman Date: Tue, 6 Feb 2024 12:57:03 +0000 Subject: [PATCH 3/6] Improve docs Signed-off-by: Dave Rodgman --- library/alignment.h | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/library/alignment.h b/library/alignment.h index 65c5321da..14a86cf37 100644 --- a/library/alignment.h +++ b/library/alignment.h @@ -55,20 +55,25 @@ typedef uint64_t __packed mbedtls_uint64_unaligned_t; #elif defined(MBEDTLS_COMPILER_IS_GCC) && (MBEDTLS_GCC_VERSION >= 40504) && \ ((MBEDTLS_GCC_VERSION < 60300) || (!defined(MBEDTLS_EFFICIENT_UNALIGNED_ACCESS))) /* - * Old versions of gcc, depending on how the target is specified, may generate a branch to memcpy - * for calls like `memcpy(dest, src, 4)` rather than generating some LDR or LDRB instructions - * (similar for stores). - * Recent versions where unaligned access is not enabled also do this. + * gcc may generate a branch to memcpy for calls like `memcpy(dest, src, 4)` rather than + * generating some LDR or LDRB instructions (similar for stores). + * + * For versions of gcc < 5.4.0 this always happens. + * For gcc < 6.3.0, this happens at -O0 + * For all versions, this happens iff unaligned access is not supported. + * + * For gcc 4.x, this will generate byte-by-byte loads even if unaligned access is supported, which + * is correct but not optimal. * * For performance (and code size, in some cases), we want to avoid the branch and just generate * some inline load/store instructions since the access is small and constant-size. * * The manual states: - * "The aligned attribute specifies a minimum alignment for the variable or structure field, - * measured in bytes." - * https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html + * "The packed attribute specifies that a variable or structure field should have the smallest + * possible alignment—one byte for a variable" + * https://gcc.gnu.org/onlinedocs/gcc-4.5.4/gcc/Variable-Attributes.html * - * Tested with several versions of GCC from 4.5.0 up to 9.3.0 + * Tested with several versions of GCC from 4.5.0 up to 13.2.0 * We don't enable for older than 4.5.0 as this has not been tested. */ #define UINT_UNALIGNED_UNION From 22b934e6d2242c5d7c3b14f82ef99ecb62fb7de4 Mon Sep 17 00:00:00 2001 From: Dave Rodgman Date: Tue, 6 Feb 2024 12:57:16 +0000 Subject: [PATCH 4/6] Use struct not union Signed-off-by: Dave Rodgman --- library/alignment.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/library/alignment.h b/library/alignment.h index 14a86cf37..db835955b 100644 --- a/library/alignment.h +++ b/library/alignment.h @@ -76,10 +76,10 @@ typedef uint64_t __packed mbedtls_uint64_unaligned_t; * Tested with several versions of GCC from 4.5.0 up to 13.2.0 * We don't enable for older than 4.5.0 as this has not been tested. */ - #define UINT_UNALIGNED_UNION -typedef union { uint16_t x; } __attribute__((packed)) mbedtls_uint16_unaligned_t; -typedef union { uint32_t x; } __attribute__((packed)) mbedtls_uint32_unaligned_t; -typedef union { uint64_t x; } __attribute__((packed)) mbedtls_uint64_unaligned_t; + #define UINT_UNALIGNED_STRUCT +typedef struct { uint16_t x; } __attribute__((packed)) mbedtls_uint16_unaligned_t; +typedef struct { uint32_t x; } __attribute__((packed)) mbedtls_uint32_unaligned_t; +typedef struct { uint64_t x; } __attribute__((packed)) mbedtls_uint64_unaligned_t; #endif /* @@ -106,7 +106,7 @@ static inline uint16_t mbedtls_get_unaligned_uint16(const void *p) #if defined(UINT_UNALIGNED) mbedtls_uint16_unaligned_t *p16 = (mbedtls_uint16_unaligned_t *) p; r = *p16; -#elif defined(UINT_UNALIGNED_UNION) +#elif defined(UINT_UNALIGNED_STRUCT) mbedtls_uint16_unaligned_t *p16 = (mbedtls_uint16_unaligned_t *) p; r = p16->x; #else @@ -132,7 +132,7 @@ static inline void mbedtls_put_unaligned_uint16(void *p, uint16_t x) #if defined(UINT_UNALIGNED) mbedtls_uint16_unaligned_t *p16 = (mbedtls_uint16_unaligned_t *) p; *p16 = x; -#elif defined(UINT_UNALIGNED_UNION) +#elif defined(UINT_UNALIGNED_STRUCT) mbedtls_uint16_unaligned_t *p16 = (mbedtls_uint16_unaligned_t *) p; p16->x = x; #else @@ -158,7 +158,7 @@ static inline uint32_t mbedtls_get_unaligned_uint32(const void *p) #if defined(UINT_UNALIGNED) mbedtls_uint32_unaligned_t *p32 = (mbedtls_uint32_unaligned_t *) p; r = *p32; -#elif defined(UINT_UNALIGNED_UNION) +#elif defined(UINT_UNALIGNED_STRUCT) mbedtls_uint32_unaligned_t *p32 = (mbedtls_uint32_unaligned_t *) p; r = p32->x; #else @@ -184,7 +184,7 @@ static inline void mbedtls_put_unaligned_uint32(void *p, uint32_t x) #if defined(UINT_UNALIGNED) mbedtls_uint32_unaligned_t *p32 = (mbedtls_uint32_unaligned_t *) p; *p32 = x; -#elif defined(UINT_UNALIGNED_UNION) +#elif defined(UINT_UNALIGNED_STRUCT) mbedtls_uint32_unaligned_t *p32 = (mbedtls_uint32_unaligned_t *) p; p32->x = x; #else @@ -210,7 +210,7 @@ static inline uint64_t mbedtls_get_unaligned_uint64(const void *p) #if defined(UINT_UNALIGNED) mbedtls_uint64_unaligned_t *p64 = (mbedtls_uint64_unaligned_t *) p; r = *p64; -#elif defined(UINT_UNALIGNED_UNION) +#elif defined(UINT_UNALIGNED_STRUCT) mbedtls_uint64_unaligned_t *p64 = (mbedtls_uint64_unaligned_t *) p; r = p64->x; #else @@ -236,7 +236,7 @@ static inline void mbedtls_put_unaligned_uint64(void *p, uint64_t x) #if defined(UINT_UNALIGNED) mbedtls_uint64_unaligned_t *p64 = (mbedtls_uint64_unaligned_t *) p; *p64 = x; -#elif defined(UINT_UNALIGNED_UNION) +#elif defined(UINT_UNALIGNED_STRUCT) mbedtls_uint64_unaligned_t *p64 = (mbedtls_uint64_unaligned_t *) p; p64->x = x; #else From d09f96b829aedcddd5bb40681fca2c31ff20a591 Mon Sep 17 00:00:00 2001 From: Dave Rodgman Date: Tue, 6 Feb 2024 13:51:58 +0000 Subject: [PATCH 5/6] Improve docs Signed-off-by: Dave Rodgman --- library/alignment.h | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/library/alignment.h b/library/alignment.h index db835955b..fece47dc8 100644 --- a/library/alignment.h +++ b/library/alignment.h @@ -58,12 +58,15 @@ typedef uint64_t __packed mbedtls_uint64_unaligned_t; * gcc may generate a branch to memcpy for calls like `memcpy(dest, src, 4)` rather than * generating some LDR or LDRB instructions (similar for stores). * - * For versions of gcc < 5.4.0 this always happens. - * For gcc < 6.3.0, this happens at -O0 - * For all versions, this happens iff unaligned access is not supported. + * This is architecture dependent: x86-64 seems fine even with old gcc; 32-bit Arm + * is affected. To keep it simple, we enable for all architectures. * - * For gcc 4.x, this will generate byte-by-byte loads even if unaligned access is supported, which - * is correct but not optimal. + * For versions of gcc < 5.4.0 this issue always happens. + * For gcc < 6.3.0, this issue happens at -O0 + * For all versions, this issue happens iff unaligned access is not supported. + * + * For gcc 4.x, this implementation will generate byte-by-byte loads even if unaligned access is + * supported, which is correct but not optimal. * * For performance (and code size, in some cases), we want to avoid the branch and just generate * some inline load/store instructions since the access is small and constant-size. @@ -73,6 +76,9 @@ typedef uint64_t __packed mbedtls_uint64_unaligned_t; * possible alignment—one byte for a variable" * https://gcc.gnu.org/onlinedocs/gcc-4.5.4/gcc/Variable-Attributes.html * + * Previous implementations used __attribute__((__aligned__(1)), but had issues with a gcc bug: + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94662 + * * Tested with several versions of GCC from 4.5.0 up to 13.2.0 * We don't enable for older than 4.5.0 as this has not been tested. */ From e093281a8b51a3d89e61c99b7ac28c7bae47fbfa Mon Sep 17 00:00:00 2001 From: Dave Rodgman Date: Tue, 6 Feb 2024 15:00:58 +0000 Subject: [PATCH 6/6] Pacify check-names Signed-off-by: Dave Rodgman --- library/alignment.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/library/alignment.h b/library/alignment.h index fece47dc8..a17001dd9 100644 --- a/library/alignment.h +++ b/library/alignment.h @@ -83,9 +83,15 @@ typedef uint64_t __packed mbedtls_uint64_unaligned_t; * We don't enable for older than 4.5.0 as this has not been tested. */ #define UINT_UNALIGNED_STRUCT -typedef struct { uint16_t x; } __attribute__((packed)) mbedtls_uint16_unaligned_t; -typedef struct { uint32_t x; } __attribute__((packed)) mbedtls_uint32_unaligned_t; -typedef struct { uint64_t x; } __attribute__((packed)) mbedtls_uint64_unaligned_t; +typedef struct { + uint16_t x; +} __attribute__((packed)) mbedtls_uint16_unaligned_t; +typedef struct { + uint32_t x; +} __attribute__((packed)) mbedtls_uint32_unaligned_t; +typedef struct { + uint64_t x; +} __attribute__((packed)) mbedtls_uint64_unaligned_t; #endif /*