aria: optimize byte perms on Arm
Use specific instructions for moving bytes around in a word. This speeds things up, and as a side-effect, slightly lowers code size. ARIA_P3 and ARIA_P1 are now 1 single-cycle instruction each (those instructions are available in all architecture versions starting from v6-M). Note: ARIA_P3 was already translated to a single instruction by Clang 3.8 and armclang 6.5, but not arm-gcc 5.4 nor armcc 5.06. ARIA_P2 is already efficiently translated to the minimal number of instruction (1 in ARM mode, 2 in thumb mode) by all tested compilers Manually compiled and inspected generated code with the following compilers: arm-gcc 5.4, clang 3.8, armcc 5.06 (with and without --gnu), armclang 6.5. Size reduction (arm-none-eabi-gcc -march=armv6-m -mthumb -Os): 5288 -> 5044 B Effect on executing time of self-tests on a few boards: FRDM-K64F (Cortex-M4): 444 -> 385 us (-13%) LPC1768 (Cortex-M3): 488 -> 432 us (-11%) FRDM-KL64Z (Cortex-M0): 1429 -> 1134 us (-20%) Measured using a config.h with no cipher mode and the following program with aria.c and aria.h copy-pasted to the online compiler: #include "mbed.h" #include "aria.h" int main() { Timer t; t.start(); int ret = mbedtls_aria_self_test(0); t.stop(); printf("ret = %d; time = %d us\n", ret, t.read_us()); }
This commit is contained in:
parent
fb0e4f0d1a
commit
377b2b624d
1 changed files with 50 additions and 6 deletions
|
@ -85,11 +85,33 @@ static void mbedtls_zeroize( void *v, size_t n ) {
|
|||
* Common compilers fail to translate this to minimal number of instructions,
|
||||
* so let's provide asm versions for common platforms with C fallback.
|
||||
*/
|
||||
#if defined(MBEDTLS_HAVE_ASM) && defined(__GNUC__)
|
||||
#if defined(__i386__) || defined(__amd64__) || defined( __x86_64__)
|
||||
#if defined(MBEDTLS_HAVE_ASM)
|
||||
#if defined(__arm__)
|
||||
/* armcc5 --gnu defines __GNUC__ but doesn't support GNU's extended asm */
|
||||
#if defined(__GNUC__) && \
|
||||
( !defined(__ARMCC_VERSION) || __ARMCC_VERSION >= 6000000 )
|
||||
static inline uint32_t aria_p1( uint32_t x )
|
||||
{
|
||||
uint32_t r;
|
||||
asm( "rev16 %0, %1" : "=l" (r) : "l" (x) );
|
||||
return( r );
|
||||
}
|
||||
#define ARIA_P1 aria_p1
|
||||
#elif defined(__ARMCC_VERSION) && __ARMCC_VERSION < 6000000
|
||||
static __inline uint32_t aria_p1( uint32_t x )
|
||||
{
|
||||
uint32_t r;
|
||||
__asm( "rev16 r, x" );
|
||||
return( r );
|
||||
}
|
||||
#define ARIA_P1 aria_p1
|
||||
#endif
|
||||
#endif /* arm */
|
||||
#if defined(__GNUC__) && \
|
||||
defined(__i386__) || defined(__amd64__) || defined( __x86_64__)
|
||||
/* I couldn't find an Intel equivalent of ret16, so two instructions */
|
||||
#define ARIA_P1(x) ARIA_P2( ARIA_P3( x ) )
|
||||
#endif
|
||||
#endif /* x86 gnuc */
|
||||
#endif /* MBEDTLS_HAVE_ASM && GNUC */
|
||||
#if !defined(ARIA_P1)
|
||||
#define ARIA_P1(x) ((((x) >> 8) & 0x00FF00FF) ^ (((x) & 0x00FF00FF) << 8))
|
||||
|
@ -112,15 +134,37 @@ static void mbedtls_zeroize( void *v, size_t n ) {
|
|||
* Some compilers fail to translate this to a single instruction,
|
||||
* so let's provide asm versions for common platforms with C fallback.
|
||||
*/
|
||||
#if defined(MBEDTLS_HAVE_ASM) && defined(__GNUC__)
|
||||
#if defined(__i386__) || defined(__amd64__) || defined( __x86_64__)
|
||||
#if defined(MBEDTLS_HAVE_ASM)
|
||||
#if defined(__arm__)
|
||||
/* armcc5 --gnu defines __GNUC__ but doesn't support GNU's extended asm */
|
||||
#if defined(__GNUC__) && \
|
||||
( !defined(__ARMCC_VERSION) || __ARMCC_VERSION >= 6000000 )
|
||||
static inline uint32_t aria_p3( uint32_t x )
|
||||
{
|
||||
uint32_t r;
|
||||
asm( "rev %0, %1" : "=l" (r) : "l" (x) );
|
||||
return( r );
|
||||
}
|
||||
#define ARIA_P3 aria_p3
|
||||
#elif defined(__ARMCC_VERSION) && __ARMCC_VERSION < 6000000
|
||||
static __inline uint32_t aria_p3( uint32_t x )
|
||||
{
|
||||
uint32_t r;
|
||||
__asm( "rev r, x" );
|
||||
return( r );
|
||||
}
|
||||
#define ARIA_P3 aria_p3
|
||||
#endif
|
||||
#endif /* arm */
|
||||
#if defined(__GNUC__) && \
|
||||
defined(__i386__) || defined(__amd64__) || defined( __x86_64__)
|
||||
static inline uint32_t aria_p3( uint32_t x )
|
||||
{
|
||||
asm( "bswap %0" : "=r" (x) : "0" (x) );
|
||||
return( x );
|
||||
}
|
||||
#define ARIA_P3 aria_p3
|
||||
#endif
|
||||
#endif /* x86 gnuc */
|
||||
#endif /* MBEDTLS_HAVE_ASM && GNUC */
|
||||
#if !defined(ARIA_P3)
|
||||
#define ARIA_P3(x) ARIA_P2( ARIA_P1 ( x ) )
|
||||
|
|
Loading…
Reference in a new issue