Merge more portable AES-NI
This commit is contained in:
commit
8eab8d368b
2 changed files with 65 additions and 38 deletions
|
@ -12,6 +12,7 @@ Changes
|
|||
checked and filled in the relevant module headers
|
||||
* Debug module only outputs full lines instead of parts
|
||||
* Better support for the different Attribute Types from IETF PKIX (RFC 5280)
|
||||
* AES-NI now compiles with "old" assemblers too
|
||||
|
||||
Bugfix
|
||||
* Only iterate over actual certificates in ssl_write_certificate_request()
|
||||
|
|
102
library/aesni.c
102
library/aesni.c
|
@ -58,6 +58,32 @@ int aesni_supports( unsigned int what )
|
|||
return( ( c & what ) != 0 );
|
||||
}
|
||||
|
||||
/*
|
||||
* Binutils needs to be at least 2.19 to support AES-NI instructions.
|
||||
* Unfortunately, a lot of users have a lower version now (2014-04).
|
||||
* Emit bytecode directly in order to support "old" version of gas.
|
||||
*
|
||||
* Opcodes from the Intel architecture reference manual, vol. 3.
|
||||
* We always use registers, so we don't need prefixes for memory operands.
|
||||
* Operand macros are in gas order (src, dst) as opposed to Intel order
|
||||
* (dst, src) in order to blend better into the surrounding assembly code.
|
||||
*/
|
||||
#define AESDEC ".byte 0x66,0x0F,0x38,0xDE,"
|
||||
#define AESDECLAST ".byte 0x66,0x0F,0x38,0xDF,"
|
||||
#define AESENC ".byte 0x66,0x0F,0x38,0xDC,"
|
||||
#define AESENCLAST ".byte 0x66,0x0F,0x38,0xDD,"
|
||||
#define AESIMC ".byte 0x66,0x0F,0x38,0xDB,"
|
||||
#define AESKEYGENA ".byte 0x66,0x0F,0x3A,0xDF,"
|
||||
#define PCLMULQDQ ".byte 0x66,0x0F,0x3A,0x44,"
|
||||
|
||||
#define xmm0_xmm0 "0xC0"
|
||||
#define xmm0_xmm1 "0xC8"
|
||||
#define xmm0_xmm2 "0xD0"
|
||||
#define xmm0_xmm3 "0xD8"
|
||||
#define xmm0_xmm4 "0xE0"
|
||||
#define xmm1_xmm0 "0xC1"
|
||||
#define xmm1_xmm2 "0xD1"
|
||||
|
||||
/*
|
||||
* AES-NI AES-ECB block en(de)cryption
|
||||
*/
|
||||
|
@ -76,22 +102,22 @@ int aesni_crypt_ecb( aes_context *ctx,
|
|||
|
||||
"1: \n" // encryption loop
|
||||
"movdqu (%1), %%xmm1 \n" // load round key
|
||||
"aesenc %%xmm1, %%xmm0 \n" // do round
|
||||
AESENC xmm1_xmm0 "\n" // do round
|
||||
"addq $16, %1 \n" // point to next round key
|
||||
"subl $1, %0 \n" // loop
|
||||
"jnz 1b \n"
|
||||
"movdqu (%1), %%xmm1 \n" // load round key
|
||||
"aesenclast %%xmm1, %%xmm0 \n" // last round
|
||||
AESENCLAST xmm1_xmm0 "\n" // last round
|
||||
"jmp 3f \n"
|
||||
|
||||
"2: \n" // decryption loop
|
||||
"movdqu (%1), %%xmm1 \n"
|
||||
"aesdec %%xmm1, %%xmm0 \n"
|
||||
AESDEC xmm1_xmm0 "\n" // do round
|
||||
"addq $16, %1 \n"
|
||||
"subl $1, %0 \n"
|
||||
"jnz 2b \n"
|
||||
"movdqu (%1), %%xmm1 \n" // load round key
|
||||
"aesdeclast %%xmm1, %%xmm0 \n" // last round
|
||||
AESDECLAST xmm1_xmm0 "\n" // last round
|
||||
|
||||
"3: \n"
|
||||
"movdqu %%xmm0, (%4) \n" // export output
|
||||
|
@ -131,10 +157,10 @@ void aesni_gcm_mult( unsigned char c[16],
|
|||
"movdqa %%xmm1, %%xmm2 \n" // copy of b1:b0
|
||||
"movdqa %%xmm1, %%xmm3 \n" // same
|
||||
"movdqa %%xmm1, %%xmm4 \n" // same
|
||||
"pclmulqdq $0x00, %%xmm0, %%xmm1 \n" // a0*b0 = c1:c0
|
||||
"pclmulqdq $0x11, %%xmm0, %%xmm2 \n" // a1*b1 = d1:d0
|
||||
"pclmulqdq $0x10, %%xmm0, %%xmm3 \n" // a0*b1 = e1:e0
|
||||
"pclmulqdq $0x01, %%xmm0, %%xmm4 \n" // a1*b0 = f1:f0
|
||||
PCLMULQDQ xmm0_xmm1 ",0x00 \n" // a0*b0 = c1:c0
|
||||
PCLMULQDQ xmm0_xmm2 ",0x11 \n" // a1*b1 = d1:d0
|
||||
PCLMULQDQ xmm0_xmm3 ",0x10 \n" // a0*b1 = e1:e0
|
||||
PCLMULQDQ xmm0_xmm4 ",0x01 \n" // a1*b0 = f1:f0
|
||||
"pxor %%xmm3, %%xmm4 \n" // e1+f1:e0+f0
|
||||
"movdqa %%xmm4, %%xmm3 \n" // same
|
||||
"psrldq $8, %%xmm4 \n" // 0:e1+f1
|
||||
|
@ -228,7 +254,7 @@ void aesni_inverse_key( unsigned char *invkey,
|
|||
|
||||
for( fk -= 16, ik += 16; fk > fwdkey; fk -= 16, ik += 16 )
|
||||
asm( "movdqu (%0), %%xmm0 \n"
|
||||
"aesimc %%xmm0, %%xmm0 \n"
|
||||
AESIMC xmm0_xmm0 "\n"
|
||||
"movdqu %%xmm0, (%1) \n"
|
||||
:
|
||||
: "r" (fk), "r" (ik)
|
||||
|
@ -271,17 +297,17 @@ static void aesni_setkey_enc_128( unsigned char *rk,
|
|||
"ret \n"
|
||||
|
||||
/* Main "loop" */
|
||||
"2: \n"
|
||||
"aeskeygenassist $0x01, %%xmm0, %%xmm1 \ncall 1b \n"
|
||||
"aeskeygenassist $0x02, %%xmm0, %%xmm1 \ncall 1b \n"
|
||||
"aeskeygenassist $0x04, %%xmm0, %%xmm1 \ncall 1b \n"
|
||||
"aeskeygenassist $0x08, %%xmm0, %%xmm1 \ncall 1b \n"
|
||||
"aeskeygenassist $0x10, %%xmm0, %%xmm1 \ncall 1b \n"
|
||||
"aeskeygenassist $0x20, %%xmm0, %%xmm1 \ncall 1b \n"
|
||||
"aeskeygenassist $0x40, %%xmm0, %%xmm1 \ncall 1b \n"
|
||||
"aeskeygenassist $0x80, %%xmm0, %%xmm1 \ncall 1b \n"
|
||||
"aeskeygenassist $0x1B, %%xmm0, %%xmm1 \ncall 1b \n"
|
||||
"aeskeygenassist $0x36, %%xmm0, %%xmm1 \ncall 1b \n"
|
||||
"2: \n"
|
||||
AESKEYGENA xmm0_xmm1 ",0x01 \ncall 1b \n"
|
||||
AESKEYGENA xmm0_xmm1 ",0x02 \ncall 1b \n"
|
||||
AESKEYGENA xmm0_xmm1 ",0x04 \ncall 1b \n"
|
||||
AESKEYGENA xmm0_xmm1 ",0x08 \ncall 1b \n"
|
||||
AESKEYGENA xmm0_xmm1 ",0x10 \ncall 1b \n"
|
||||
AESKEYGENA xmm0_xmm1 ",0x20 \ncall 1b \n"
|
||||
AESKEYGENA xmm0_xmm1 ",0x40 \ncall 1b \n"
|
||||
AESKEYGENA xmm0_xmm1 ",0x80 \ncall 1b \n"
|
||||
AESKEYGENA xmm0_xmm1 ",0x1B \ncall 1b \n"
|
||||
AESKEYGENA xmm0_xmm1 ",0x36 \ncall 1b \n"
|
||||
:
|
||||
: "r" (rk), "r" (key)
|
||||
: "memory", "cc", "0" );
|
||||
|
@ -329,15 +355,15 @@ static void aesni_setkey_enc_192( unsigned char *rk,
|
|||
"add $8, %0 \n"
|
||||
"ret \n"
|
||||
|
||||
"2: \n"
|
||||
"aeskeygenassist $0x01, %%xmm1, %%xmm2 \ncall 1b \n"
|
||||
"aeskeygenassist $0x02, %%xmm1, %%xmm2 \ncall 1b \n"
|
||||
"aeskeygenassist $0x04, %%xmm1, %%xmm2 \ncall 1b \n"
|
||||
"aeskeygenassist $0x08, %%xmm1, %%xmm2 \ncall 1b \n"
|
||||
"aeskeygenassist $0x10, %%xmm1, %%xmm2 \ncall 1b \n"
|
||||
"aeskeygenassist $0x20, %%xmm1, %%xmm2 \ncall 1b \n"
|
||||
"aeskeygenassist $0x40, %%xmm1, %%xmm2 \ncall 1b \n"
|
||||
"aeskeygenassist $0x80, %%xmm1, %%xmm2 \ncall 1b \n"
|
||||
"2: \n"
|
||||
AESKEYGENA xmm1_xmm2 ",0x01 \ncall 1b \n"
|
||||
AESKEYGENA xmm1_xmm2 ",0x02 \ncall 1b \n"
|
||||
AESKEYGENA xmm1_xmm2 ",0x04 \ncall 1b \n"
|
||||
AESKEYGENA xmm1_xmm2 ",0x08 \ncall 1b \n"
|
||||
AESKEYGENA xmm1_xmm2 ",0x10 \ncall 1b \n"
|
||||
AESKEYGENA xmm1_xmm2 ",0x20 \ncall 1b \n"
|
||||
AESKEYGENA xmm1_xmm2 ",0x40 \ncall 1b \n"
|
||||
AESKEYGENA xmm1_xmm2 ",0x80 \ncall 1b \n"
|
||||
|
||||
:
|
||||
: "r" (rk), "r" (key)
|
||||
|
@ -380,7 +406,7 @@ static void aesni_setkey_enc_256( unsigned char *rk,
|
|||
|
||||
/* Set xmm2 to stuff:Y:stuff:stuff with Y = subword( r11 )
|
||||
* and proceed to generate next round key from there */
|
||||
"aeskeygenassist $0, %%xmm0, %%xmm2\n"
|
||||
AESKEYGENA xmm0_xmm2 ",0x00 \n"
|
||||
"pshufd $0xaa, %%xmm2, %%xmm2 \n"
|
||||
"pxor %%xmm1, %%xmm2 \n"
|
||||
"pslldq $4, %%xmm1 \n"
|
||||
|
@ -397,14 +423,14 @@ static void aesni_setkey_enc_256( unsigned char *rk,
|
|||
* Main "loop" - Generating one more key than necessary,
|
||||
* see definition of aes_context.buf
|
||||
*/
|
||||
"2: \n"
|
||||
"aeskeygenassist $0x01, %%xmm1, %%xmm2 \ncall 1b \n"
|
||||
"aeskeygenassist $0x02, %%xmm1, %%xmm2 \ncall 1b \n"
|
||||
"aeskeygenassist $0x04, %%xmm1, %%xmm2 \ncall 1b \n"
|
||||
"aeskeygenassist $0x08, %%xmm1, %%xmm2 \ncall 1b \n"
|
||||
"aeskeygenassist $0x10, %%xmm1, %%xmm2 \ncall 1b \n"
|
||||
"aeskeygenassist $0x20, %%xmm1, %%xmm2 \ncall 1b \n"
|
||||
"aeskeygenassist $0x40, %%xmm1, %%xmm2 \ncall 1b \n"
|
||||
"2: \n"
|
||||
AESKEYGENA xmm1_xmm2 ",0x01 \ncall 1b \n"
|
||||
AESKEYGENA xmm1_xmm2 ",0x02 \ncall 1b \n"
|
||||
AESKEYGENA xmm1_xmm2 ",0x04 \ncall 1b \n"
|
||||
AESKEYGENA xmm1_xmm2 ",0x08 \ncall 1b \n"
|
||||
AESKEYGENA xmm1_xmm2 ",0x10 \ncall 1b \n"
|
||||
AESKEYGENA xmm1_xmm2 ",0x20 \ncall 1b \n"
|
||||
AESKEYGENA xmm1_xmm2 ",0x40 \ncall 1b \n"
|
||||
:
|
||||
: "r" (rk), "r" (key)
|
||||
: "memory", "cc", "0" );
|
||||
|
|
Loading…
Reference in a new issue