Merge pull request #8788 from daverodgman/old-gcc-alignment-bug
Change unaligned access method for old gcc
This commit is contained in:
commit
57a0957938
1 changed files with 48 additions and 13 deletions
|
@ -53,28 +53,45 @@ typedef uint16_t __packed mbedtls_uint16_unaligned_t;
|
||||||
typedef uint32_t __packed mbedtls_uint32_unaligned_t;
|
typedef uint32_t __packed mbedtls_uint32_unaligned_t;
|
||||||
typedef uint64_t __packed mbedtls_uint64_unaligned_t;
|
typedef uint64_t __packed mbedtls_uint64_unaligned_t;
|
||||||
#elif defined(MBEDTLS_COMPILER_IS_GCC) && (MBEDTLS_GCC_VERSION >= 40504) && \
|
#elif defined(MBEDTLS_COMPILER_IS_GCC) && (MBEDTLS_GCC_VERSION >= 40504) && \
|
||||||
((MBEDTLS_GCC_VERSION < 90300) || (!defined(MBEDTLS_EFFICIENT_UNALIGNED_ACCESS)))
|
((MBEDTLS_GCC_VERSION < 60300) || (!defined(MBEDTLS_EFFICIENT_UNALIGNED_ACCESS)))
|
||||||
/*
|
/*
|
||||||
* Old versions of gcc, depending on how the target is specified, may generate a branch to memcpy
|
* gcc may generate a branch to memcpy for calls like `memcpy(dest, src, 4)` rather than
|
||||||
* for calls like `memcpy(dest, src, 4)` rather than generating some LDR or LDRB instructions
|
* generating some LDR or LDRB instructions (similar for stores).
|
||||||
* (similar for stores).
|
*
|
||||||
* Recent versions where unaligned access is not enabled also do this.
|
* This is architecture dependent: x86-64 seems fine even with old gcc; 32-bit Arm
|
||||||
|
* is affected. To keep it simple, we enable for all architectures.
|
||||||
|
*
|
||||||
|
* For versions of gcc < 5.4.0 this issue always happens.
|
||||||
|
* For gcc < 6.3.0, this issue happens at -O0
|
||||||
|
* For all versions, this issue happens iff unaligned access is not supported.
|
||||||
|
*
|
||||||
|
* For gcc 4.x, this implementation will generate byte-by-byte loads even if unaligned access is
|
||||||
|
* supported, which is correct but not optimal.
|
||||||
*
|
*
|
||||||
* For performance (and code size, in some cases), we want to avoid the branch and just generate
|
* For performance (and code size, in some cases), we want to avoid the branch and just generate
|
||||||
* some inline load/store instructions since the access is small and constant-size.
|
* some inline load/store instructions since the access is small and constant-size.
|
||||||
*
|
*
|
||||||
* The manual states:
|
* The manual states:
|
||||||
* "The aligned attribute specifies a minimum alignment for the variable or structure field,
|
* "The packed attribute specifies that a variable or structure field should have the smallest
|
||||||
* measured in bytes."
|
* possible alignment—one byte for a variable"
|
||||||
* https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html
|
* https://gcc.gnu.org/onlinedocs/gcc-4.5.4/gcc/Variable-Attributes.html
|
||||||
*
|
*
|
||||||
* Tested with several versions of GCC from 4.5.0 up to 9.3.0
|
* Previous implementations used __attribute__((__aligned__(1)), but had issues with a gcc bug:
|
||||||
|
* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94662
|
||||||
|
*
|
||||||
|
* Tested with several versions of GCC from 4.5.0 up to 13.2.0
|
||||||
* We don't enable for older than 4.5.0 as this has not been tested.
|
* We don't enable for older than 4.5.0 as this has not been tested.
|
||||||
*/
|
*/
|
||||||
#define UINT_UNALIGNED
|
#define UINT_UNALIGNED_STRUCT
|
||||||
typedef uint16_t __attribute__((__aligned__(1))) mbedtls_uint16_unaligned_t;
|
typedef struct {
|
||||||
typedef uint32_t __attribute__((__aligned__(1))) mbedtls_uint32_unaligned_t;
|
uint16_t x;
|
||||||
typedef uint64_t __attribute__((__aligned__(1))) mbedtls_uint64_unaligned_t;
|
} __attribute__((packed)) mbedtls_uint16_unaligned_t;
|
||||||
|
typedef struct {
|
||||||
|
uint32_t x;
|
||||||
|
} __attribute__((packed)) mbedtls_uint32_unaligned_t;
|
||||||
|
typedef struct {
|
||||||
|
uint64_t x;
|
||||||
|
} __attribute__((packed)) mbedtls_uint64_unaligned_t;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -101,6 +118,9 @@ static inline uint16_t mbedtls_get_unaligned_uint16(const void *p)
|
||||||
#if defined(UINT_UNALIGNED)
|
#if defined(UINT_UNALIGNED)
|
||||||
mbedtls_uint16_unaligned_t *p16 = (mbedtls_uint16_unaligned_t *) p;
|
mbedtls_uint16_unaligned_t *p16 = (mbedtls_uint16_unaligned_t *) p;
|
||||||
r = *p16;
|
r = *p16;
|
||||||
|
#elif defined(UINT_UNALIGNED_STRUCT)
|
||||||
|
mbedtls_uint16_unaligned_t *p16 = (mbedtls_uint16_unaligned_t *) p;
|
||||||
|
r = p16->x;
|
||||||
#else
|
#else
|
||||||
memcpy(&r, p, sizeof(r));
|
memcpy(&r, p, sizeof(r));
|
||||||
#endif
|
#endif
|
||||||
|
@ -124,6 +144,9 @@ static inline void mbedtls_put_unaligned_uint16(void *p, uint16_t x)
|
||||||
#if defined(UINT_UNALIGNED)
|
#if defined(UINT_UNALIGNED)
|
||||||
mbedtls_uint16_unaligned_t *p16 = (mbedtls_uint16_unaligned_t *) p;
|
mbedtls_uint16_unaligned_t *p16 = (mbedtls_uint16_unaligned_t *) p;
|
||||||
*p16 = x;
|
*p16 = x;
|
||||||
|
#elif defined(UINT_UNALIGNED_STRUCT)
|
||||||
|
mbedtls_uint16_unaligned_t *p16 = (mbedtls_uint16_unaligned_t *) p;
|
||||||
|
p16->x = x;
|
||||||
#else
|
#else
|
||||||
memcpy(p, &x, sizeof(x));
|
memcpy(p, &x, sizeof(x));
|
||||||
#endif
|
#endif
|
||||||
|
@ -147,6 +170,9 @@ static inline uint32_t mbedtls_get_unaligned_uint32(const void *p)
|
||||||
#if defined(UINT_UNALIGNED)
|
#if defined(UINT_UNALIGNED)
|
||||||
mbedtls_uint32_unaligned_t *p32 = (mbedtls_uint32_unaligned_t *) p;
|
mbedtls_uint32_unaligned_t *p32 = (mbedtls_uint32_unaligned_t *) p;
|
||||||
r = *p32;
|
r = *p32;
|
||||||
|
#elif defined(UINT_UNALIGNED_STRUCT)
|
||||||
|
mbedtls_uint32_unaligned_t *p32 = (mbedtls_uint32_unaligned_t *) p;
|
||||||
|
r = p32->x;
|
||||||
#else
|
#else
|
||||||
memcpy(&r, p, sizeof(r));
|
memcpy(&r, p, sizeof(r));
|
||||||
#endif
|
#endif
|
||||||
|
@ -170,6 +196,9 @@ static inline void mbedtls_put_unaligned_uint32(void *p, uint32_t x)
|
||||||
#if defined(UINT_UNALIGNED)
|
#if defined(UINT_UNALIGNED)
|
||||||
mbedtls_uint32_unaligned_t *p32 = (mbedtls_uint32_unaligned_t *) p;
|
mbedtls_uint32_unaligned_t *p32 = (mbedtls_uint32_unaligned_t *) p;
|
||||||
*p32 = x;
|
*p32 = x;
|
||||||
|
#elif defined(UINT_UNALIGNED_STRUCT)
|
||||||
|
mbedtls_uint32_unaligned_t *p32 = (mbedtls_uint32_unaligned_t *) p;
|
||||||
|
p32->x = x;
|
||||||
#else
|
#else
|
||||||
memcpy(p, &x, sizeof(x));
|
memcpy(p, &x, sizeof(x));
|
||||||
#endif
|
#endif
|
||||||
|
@ -193,6 +222,9 @@ static inline uint64_t mbedtls_get_unaligned_uint64(const void *p)
|
||||||
#if defined(UINT_UNALIGNED)
|
#if defined(UINT_UNALIGNED)
|
||||||
mbedtls_uint64_unaligned_t *p64 = (mbedtls_uint64_unaligned_t *) p;
|
mbedtls_uint64_unaligned_t *p64 = (mbedtls_uint64_unaligned_t *) p;
|
||||||
r = *p64;
|
r = *p64;
|
||||||
|
#elif defined(UINT_UNALIGNED_STRUCT)
|
||||||
|
mbedtls_uint64_unaligned_t *p64 = (mbedtls_uint64_unaligned_t *) p;
|
||||||
|
r = p64->x;
|
||||||
#else
|
#else
|
||||||
memcpy(&r, p, sizeof(r));
|
memcpy(&r, p, sizeof(r));
|
||||||
#endif
|
#endif
|
||||||
|
@ -216,6 +248,9 @@ static inline void mbedtls_put_unaligned_uint64(void *p, uint64_t x)
|
||||||
#if defined(UINT_UNALIGNED)
|
#if defined(UINT_UNALIGNED)
|
||||||
mbedtls_uint64_unaligned_t *p64 = (mbedtls_uint64_unaligned_t *) p;
|
mbedtls_uint64_unaligned_t *p64 = (mbedtls_uint64_unaligned_t *) p;
|
||||||
*p64 = x;
|
*p64 = x;
|
||||||
|
#elif defined(UINT_UNALIGNED_STRUCT)
|
||||||
|
mbedtls_uint64_unaligned_t *p64 = (mbedtls_uint64_unaligned_t *) p;
|
||||||
|
p64->x = x;
|
||||||
#else
|
#else
|
||||||
memcpy(p, &x, sizeof(x));
|
memcpy(p, &x, sizeof(x));
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Reference in a new issue