forked from pool/gnutls
ec623dec0c
- Update to version 3.0.0. many fixes see NEWS for details This changelog only describes important package changes or features. * Main reason for update is to support Intel AES-NI CPU extensions. * Bump sonames in the library package accordingly * C++ apps must now buildrequire libgnutls++-devel * Software using the openssl emulation must buildrequire libgnutls-openssl-devel or better use openssl directly. * Upstream no longer uses libgcrypt but libnettle. * Upstream now requires the use of p11-kit * Add post-release upstream patches critical for improving AES-NI support. (forwarded request 79252 from elvigia) OBS-URL: https://build.opensuse.org/request/show/79281 OBS-URL: https://build.opensuse.org/package/show/openSUSE:Factory/gnutls?expand=0&rev=31
1976 lines
44 KiB
Diff
1976 lines
44 KiB
Diff
From e8a255261eaf28e5dafd7be4cd6c01485a91c752 Mon Sep 17 00:00:00 2001
|
|
From: Nikos Mavrogiannopoulos <nmav@gnutls.org>
|
|
Date: Mon, 8 Aug 2011 14:54:08 +0200
|
|
Subject: [PATCH 1/6] Included appro's updates to AES-NI.
|
|
MIME-Version: 1.0
|
|
Content-Type: text/plain; charset=UTF-8
|
|
Content-Transfer-Encoding: 8bit
|
|
|
|
|
|
Signed-off-by: Cristian Rodríguez <crrodriguez@opensuse.org>
|
|
---
|
|
lib/accelerated/intel/asm/appro-aes-x86-64.s | 368 +++++++++++++-------------
|
|
lib/accelerated/intel/asm/appro-aes-x86.s | 295 +++++++++++----------
|
|
2 files changed, 338 insertions(+), 325 deletions(-)
|
|
|
|
diff --git a/lib/accelerated/intel/asm/appro-aes-x86-64.s b/lib/accelerated/intel/asm/appro-aes-x86-64.s
|
|
index 98204d1..f286fb1 100644
|
|
--- a/lib/accelerated/intel/asm/appro-aes-x86-64.s
|
|
+++ b/lib/accelerated/intel/asm/appro-aes-x86-64.s
|
|
@@ -42,14 +42,14 @@
|
|
aesni_encrypt:
|
|
movups (%rdi),%xmm2
|
|
movl 240(%rdx),%eax
|
|
- movaps (%rdx),%xmm0
|
|
- movaps 16(%rdx),%xmm1
|
|
+ movups (%rdx),%xmm0
|
|
+ movups 16(%rdx),%xmm1
|
|
leaq 32(%rdx),%rdx
|
|
xorps %xmm0,%xmm2
|
|
.Loop_enc1_1:
|
|
.byte 102,15,56,220,209
|
|
decl %eax
|
|
- movaps (%rdx),%xmm1
|
|
+ movups (%rdx),%xmm1
|
|
leaq 16(%rdx),%rdx
|
|
jnz .Loop_enc1_1
|
|
.byte 102,15,56,221,209
|
|
@@ -63,14 +63,14 @@ aesni_encrypt:
|
|
aesni_decrypt:
|
|
movups (%rdi),%xmm2
|
|
movl 240(%rdx),%eax
|
|
- movaps (%rdx),%xmm0
|
|
- movaps 16(%rdx),%xmm1
|
|
+ movups (%rdx),%xmm0
|
|
+ movups 16(%rdx),%xmm1
|
|
leaq 32(%rdx),%rdx
|
|
xorps %xmm0,%xmm2
|
|
.Loop_dec1_2:
|
|
.byte 102,15,56,222,209
|
|
decl %eax
|
|
- movaps (%rdx),%xmm1
|
|
+ movups (%rdx),%xmm1
|
|
leaq 16(%rdx),%rdx
|
|
jnz .Loop_dec1_2
|
|
.byte 102,15,56,223,209
|
|
@@ -80,26 +80,26 @@ aesni_decrypt:
|
|
.type _aesni_encrypt3,@function
|
|
.align 16
|
|
_aesni_encrypt3:
|
|
- movaps (%rcx),%xmm0
|
|
+ movups (%rcx),%xmm0
|
|
shrl $1,%eax
|
|
- movaps 16(%rcx),%xmm1
|
|
+ movups 16(%rcx),%xmm1
|
|
leaq 32(%rcx),%rcx
|
|
xorps %xmm0,%xmm2
|
|
xorps %xmm0,%xmm3
|
|
xorps %xmm0,%xmm4
|
|
- movaps (%rcx),%xmm0
|
|
+ movups (%rcx),%xmm0
|
|
|
|
.Lenc_loop3:
|
|
.byte 102,15,56,220,209
|
|
.byte 102,15,56,220,217
|
|
decl %eax
|
|
.byte 102,15,56,220,225
|
|
- movaps 16(%rcx),%xmm1
|
|
+ movups 16(%rcx),%xmm1
|
|
.byte 102,15,56,220,208
|
|
.byte 102,15,56,220,216
|
|
leaq 32(%rcx),%rcx
|
|
.byte 102,15,56,220,224
|
|
- movaps (%rcx),%xmm0
|
|
+ movups (%rcx),%xmm0
|
|
jnz .Lenc_loop3
|
|
|
|
.byte 102,15,56,220,209
|
|
@@ -113,26 +113,26 @@ _aesni_encrypt3:
|
|
.type _aesni_decrypt3,@function
|
|
.align 16
|
|
_aesni_decrypt3:
|
|
- movaps (%rcx),%xmm0
|
|
+ movups (%rcx),%xmm0
|
|
shrl $1,%eax
|
|
- movaps 16(%rcx),%xmm1
|
|
+ movups 16(%rcx),%xmm1
|
|
leaq 32(%rcx),%rcx
|
|
xorps %xmm0,%xmm2
|
|
xorps %xmm0,%xmm3
|
|
xorps %xmm0,%xmm4
|
|
- movaps (%rcx),%xmm0
|
|
+ movups (%rcx),%xmm0
|
|
|
|
.Ldec_loop3:
|
|
.byte 102,15,56,222,209
|
|
.byte 102,15,56,222,217
|
|
decl %eax
|
|
.byte 102,15,56,222,225
|
|
- movaps 16(%rcx),%xmm1
|
|
+ movups 16(%rcx),%xmm1
|
|
.byte 102,15,56,222,208
|
|
.byte 102,15,56,222,216
|
|
leaq 32(%rcx),%rcx
|
|
.byte 102,15,56,222,224
|
|
- movaps (%rcx),%xmm0
|
|
+ movups (%rcx),%xmm0
|
|
jnz .Ldec_loop3
|
|
|
|
.byte 102,15,56,222,209
|
|
@@ -146,15 +146,15 @@ _aesni_decrypt3:
|
|
.type _aesni_encrypt4,@function
|
|
.align 16
|
|
_aesni_encrypt4:
|
|
- movaps (%rcx),%xmm0
|
|
+ movups (%rcx),%xmm0
|
|
shrl $1,%eax
|
|
- movaps 16(%rcx),%xmm1
|
|
+ movups 16(%rcx),%xmm1
|
|
leaq 32(%rcx),%rcx
|
|
xorps %xmm0,%xmm2
|
|
xorps %xmm0,%xmm3
|
|
xorps %xmm0,%xmm4
|
|
xorps %xmm0,%xmm5
|
|
- movaps (%rcx),%xmm0
|
|
+ movups (%rcx),%xmm0
|
|
|
|
.Lenc_loop4:
|
|
.byte 102,15,56,220,209
|
|
@@ -162,13 +162,13 @@ _aesni_encrypt4:
|
|
decl %eax
|
|
.byte 102,15,56,220,225
|
|
.byte 102,15,56,220,233
|
|
- movaps 16(%rcx),%xmm1
|
|
+ movups 16(%rcx),%xmm1
|
|
.byte 102,15,56,220,208
|
|
.byte 102,15,56,220,216
|
|
leaq 32(%rcx),%rcx
|
|
.byte 102,15,56,220,224
|
|
.byte 102,15,56,220,232
|
|
- movaps (%rcx),%xmm0
|
|
+ movups (%rcx),%xmm0
|
|
jnz .Lenc_loop4
|
|
|
|
.byte 102,15,56,220,209
|
|
@@ -184,15 +184,15 @@ _aesni_encrypt4:
|
|
.type _aesni_decrypt4,@function
|
|
.align 16
|
|
_aesni_decrypt4:
|
|
- movaps (%rcx),%xmm0
|
|
+ movups (%rcx),%xmm0
|
|
shrl $1,%eax
|
|
- movaps 16(%rcx),%xmm1
|
|
+ movups 16(%rcx),%xmm1
|
|
leaq 32(%rcx),%rcx
|
|
xorps %xmm0,%xmm2
|
|
xorps %xmm0,%xmm3
|
|
xorps %xmm0,%xmm4
|
|
xorps %xmm0,%xmm5
|
|
- movaps (%rcx),%xmm0
|
|
+ movups (%rcx),%xmm0
|
|
|
|
.Ldec_loop4:
|
|
.byte 102,15,56,222,209
|
|
@@ -200,13 +200,13 @@ _aesni_decrypt4:
|
|
decl %eax
|
|
.byte 102,15,56,222,225
|
|
.byte 102,15,56,222,233
|
|
- movaps 16(%rcx),%xmm1
|
|
+ movups 16(%rcx),%xmm1
|
|
.byte 102,15,56,222,208
|
|
.byte 102,15,56,222,216
|
|
leaq 32(%rcx),%rcx
|
|
.byte 102,15,56,222,224
|
|
.byte 102,15,56,222,232
|
|
- movaps (%rcx),%xmm0
|
|
+ movups (%rcx),%xmm0
|
|
jnz .Ldec_loop4
|
|
|
|
.byte 102,15,56,222,209
|
|
@@ -222,9 +222,9 @@ _aesni_decrypt4:
|
|
.type _aesni_encrypt6,@function
|
|
.align 16
|
|
_aesni_encrypt6:
|
|
- movaps (%rcx),%xmm0
|
|
+ movups (%rcx),%xmm0
|
|
shrl $1,%eax
|
|
- movaps 16(%rcx),%xmm1
|
|
+ movups 16(%rcx),%xmm1
|
|
leaq 32(%rcx),%rcx
|
|
xorps %xmm0,%xmm2
|
|
pxor %xmm0,%xmm3
|
|
@@ -238,7 +238,7 @@ _aesni_encrypt6:
|
|
pxor %xmm0,%xmm7
|
|
decl %eax
|
|
.byte 102,15,56,220,241
|
|
- movaps (%rcx),%xmm0
|
|
+ movups (%rcx),%xmm0
|
|
.byte 102,15,56,220,249
|
|
jmp .Lenc_loop6_enter
|
|
.align 16
|
|
@@ -251,7 +251,7 @@ _aesni_encrypt6:
|
|
.byte 102,15,56,220,241
|
|
.byte 102,15,56,220,249
|
|
.Lenc_loop6_enter:
|
|
- movaps 16(%rcx),%xmm1
|
|
+ movups 16(%rcx),%xmm1
|
|
.byte 102,15,56,220,208
|
|
.byte 102,15,56,220,216
|
|
leaq 32(%rcx),%rcx
|
|
@@ -259,7 +259,7 @@ _aesni_encrypt6:
|
|
.byte 102,15,56,220,232
|
|
.byte 102,15,56,220,240
|
|
.byte 102,15,56,220,248
|
|
- movaps (%rcx),%xmm0
|
|
+ movups (%rcx),%xmm0
|
|
jnz .Lenc_loop6
|
|
|
|
.byte 102,15,56,220,209
|
|
@@ -279,9 +279,9 @@ _aesni_encrypt6:
|
|
.type _aesni_decrypt6,@function
|
|
.align 16
|
|
_aesni_decrypt6:
|
|
- movaps (%rcx),%xmm0
|
|
+ movups (%rcx),%xmm0
|
|
shrl $1,%eax
|
|
- movaps 16(%rcx),%xmm1
|
|
+ movups 16(%rcx),%xmm1
|
|
leaq 32(%rcx),%rcx
|
|
xorps %xmm0,%xmm2
|
|
pxor %xmm0,%xmm3
|
|
@@ -295,7 +295,7 @@ _aesni_decrypt6:
|
|
pxor %xmm0,%xmm7
|
|
decl %eax
|
|
.byte 102,15,56,222,241
|
|
- movaps (%rcx),%xmm0
|
|
+ movups (%rcx),%xmm0
|
|
.byte 102,15,56,222,249
|
|
jmp .Ldec_loop6_enter
|
|
.align 16
|
|
@@ -308,7 +308,7 @@ _aesni_decrypt6:
|
|
.byte 102,15,56,222,241
|
|
.byte 102,15,56,222,249
|
|
.Ldec_loop6_enter:
|
|
- movaps 16(%rcx),%xmm1
|
|
+ movups 16(%rcx),%xmm1
|
|
.byte 102,15,56,222,208
|
|
.byte 102,15,56,222,216
|
|
leaq 32(%rcx),%rcx
|
|
@@ -316,7 +316,7 @@ _aesni_decrypt6:
|
|
.byte 102,15,56,222,232
|
|
.byte 102,15,56,222,240
|
|
.byte 102,15,56,222,248
|
|
- movaps (%rcx),%xmm0
|
|
+ movups (%rcx),%xmm0
|
|
jnz .Ldec_loop6
|
|
|
|
.byte 102,15,56,222,209
|
|
@@ -336,9 +336,9 @@ _aesni_decrypt6:
|
|
.type _aesni_encrypt8,@function
|
|
.align 16
|
|
_aesni_encrypt8:
|
|
- movaps (%rcx),%xmm0
|
|
+ movups (%rcx),%xmm0
|
|
shrl $1,%eax
|
|
- movaps 16(%rcx),%xmm1
|
|
+ movups 16(%rcx),%xmm1
|
|
leaq 32(%rcx),%rcx
|
|
xorps %xmm0,%xmm2
|
|
xorps %xmm0,%xmm3
|
|
@@ -355,10 +355,10 @@ _aesni_encrypt8:
|
|
pxor %xmm0,%xmm8
|
|
.byte 102,15,56,220,249
|
|
pxor %xmm0,%xmm9
|
|
- movaps (%rcx),%xmm0
|
|
+ movups (%rcx),%xmm0
|
|
.byte 102,68,15,56,220,193
|
|
.byte 102,68,15,56,220,201
|
|
- movaps 16(%rcx),%xmm1
|
|
+ movups 16(%rcx),%xmm1
|
|
jmp .Lenc_loop8_enter
|
|
.align 16
|
|
.Lenc_loop8:
|
|
@@ -371,7 +371,7 @@ _aesni_encrypt8:
|
|
.byte 102,15,56,220,249
|
|
.byte 102,68,15,56,220,193
|
|
.byte 102,68,15,56,220,201
|
|
- movaps 16(%rcx),%xmm1
|
|
+ movups 16(%rcx),%xmm1
|
|
.Lenc_loop8_enter:
|
|
.byte 102,15,56,220,208
|
|
.byte 102,15,56,220,216
|
|
@@ -382,7 +382,7 @@ _aesni_encrypt8:
|
|
.byte 102,15,56,220,248
|
|
.byte 102,68,15,56,220,192
|
|
.byte 102,68,15,56,220,200
|
|
- movaps (%rcx),%xmm0
|
|
+ movups (%rcx),%xmm0
|
|
jnz .Lenc_loop8
|
|
|
|
.byte 102,15,56,220,209
|
|
@@ -406,9 +406,9 @@ _aesni_encrypt8:
|
|
.type _aesni_decrypt8,@function
|
|
.align 16
|
|
_aesni_decrypt8:
|
|
- movaps (%rcx),%xmm0
|
|
+ movups (%rcx),%xmm0
|
|
shrl $1,%eax
|
|
- movaps 16(%rcx),%xmm1
|
|
+ movups 16(%rcx),%xmm1
|
|
leaq 32(%rcx),%rcx
|
|
xorps %xmm0,%xmm2
|
|
xorps %xmm0,%xmm3
|
|
@@ -425,10 +425,10 @@ _aesni_decrypt8:
|
|
pxor %xmm0,%xmm8
|
|
.byte 102,15,56,222,249
|
|
pxor %xmm0,%xmm9
|
|
- movaps (%rcx),%xmm0
|
|
+ movups (%rcx),%xmm0
|
|
.byte 102,68,15,56,222,193
|
|
.byte 102,68,15,56,222,201
|
|
- movaps 16(%rcx),%xmm1
|
|
+ movups 16(%rcx),%xmm1
|
|
jmp .Ldec_loop8_enter
|
|
.align 16
|
|
.Ldec_loop8:
|
|
@@ -441,7 +441,7 @@ _aesni_decrypt8:
|
|
.byte 102,15,56,222,249
|
|
.byte 102,68,15,56,222,193
|
|
.byte 102,68,15,56,222,201
|
|
- movaps 16(%rcx),%xmm1
|
|
+ movups 16(%rcx),%xmm1
|
|
.Ldec_loop8_enter:
|
|
.byte 102,15,56,222,208
|
|
.byte 102,15,56,222,216
|
|
@@ -452,7 +452,7 @@ _aesni_decrypt8:
|
|
.byte 102,15,56,222,248
|
|
.byte 102,68,15,56,222,192
|
|
.byte 102,68,15,56,222,200
|
|
- movaps (%rcx),%xmm0
|
|
+ movups (%rcx),%xmm0
|
|
jnz .Ldec_loop8
|
|
|
|
.byte 102,15,56,222,209
|
|
@@ -481,7 +481,7 @@ aesni_ecb_encrypt:
|
|
jz .Lecb_ret
|
|
|
|
movl 240(%rcx),%eax
|
|
- movaps (%rcx),%xmm0
|
|
+ movups (%rcx),%xmm0
|
|
movq %rcx,%r11
|
|
movl %eax,%r10d
|
|
testl %r8d,%r8d
|
|
@@ -572,14 +572,14 @@ aesni_ecb_encrypt:
|
|
jmp .Lecb_ret
|
|
.align 16
|
|
.Lecb_enc_one:
|
|
- movaps (%rcx),%xmm0
|
|
- movaps 16(%rcx),%xmm1
|
|
+ movups (%rcx),%xmm0
|
|
+ movups 16(%rcx),%xmm1
|
|
leaq 32(%rcx),%rcx
|
|
xorps %xmm0,%xmm2
|
|
.Loop_enc1_3:
|
|
.byte 102,15,56,220,209
|
|
decl %eax
|
|
- movaps (%rcx),%xmm1
|
|
+ movups (%rcx),%xmm1
|
|
leaq 16(%rcx),%rcx
|
|
jnz .Loop_enc1_3
|
|
.byte 102,15,56,221,209
|
|
@@ -670,7 +670,7 @@ aesni_ecb_encrypt:
|
|
|
|
call _aesni_decrypt8
|
|
|
|
- movaps (%r11),%xmm0
|
|
+ movups (%r11),%xmm0
|
|
subq $128,%rdx
|
|
jnc .Lecb_dec_loop8
|
|
|
|
@@ -705,7 +705,7 @@ aesni_ecb_encrypt:
|
|
movups 80(%rdi),%xmm7
|
|
je .Lecb_dec_six
|
|
movups 96(%rdi),%xmm8
|
|
- movaps (%rcx),%xmm0
|
|
+ movups (%rcx),%xmm0
|
|
call _aesni_decrypt8
|
|
movups %xmm2,(%rsi)
|
|
movups %xmm3,16(%rsi)
|
|
@@ -717,14 +717,14 @@ aesni_ecb_encrypt:
|
|
jmp .Lecb_ret
|
|
.align 16
|
|
.Lecb_dec_one:
|
|
- movaps (%rcx),%xmm0
|
|
- movaps 16(%rcx),%xmm1
|
|
+ movups (%rcx),%xmm0
|
|
+ movups 16(%rcx),%xmm1
|
|
leaq 32(%rcx),%rcx
|
|
xorps %xmm0,%xmm2
|
|
.Loop_dec1_4:
|
|
.byte 102,15,56,222,209
|
|
decl %eax
|
|
- movaps (%rcx),%xmm1
|
|
+ movups (%rcx),%xmm1
|
|
leaq 16(%rcx),%rcx
|
|
jnz .Loop_dec1_4
|
|
.byte 102,15,56,223,209
|
|
@@ -779,54 +779,54 @@ aesni_ecb_encrypt:
|
|
.type aesni_ccm64_encrypt_blocks,@function
|
|
.align 16
|
|
aesni_ccm64_encrypt_blocks:
|
|
+ movl 240(%rcx),%eax
|
|
movdqu (%r8),%xmm9
|
|
- movdqu (%r9),%xmm3
|
|
- movdqa .Lincrement64(%rip),%xmm8
|
|
- movdqa .Lbswap_mask(%rip),%xmm9
|
|
-.byte 102,69,15,56,0,201
|
|
+ movdqa .Lincrement64(%rip),%xmm6
|
|
+ movdqa .Lbswap_mask(%rip),%xmm7
|
|
|
|
- movl 240(%rcx),%eax
|
|
- movq %rcx,%r11
|
|
- movl %eax,%r10d
|
|
+ shrl $1,%eax
|
|
+ leaq 0(%rcx),%r11
|
|
+ movdqu (%r9),%xmm3
|
|
movdqa %xmm9,%xmm2
|
|
-
|
|
+ movl %eax,%r10d
|
|
+ jmp .Lccm64_enc_outer
|
|
+.align 16
|
|
.Lccm64_enc_outer:
|
|
- movups (%rdi),%xmm8
|
|
-.byte 102,65,15,56,0,209
|
|
- movq %r11,%rcx
|
|
+ movups (%r11),%xmm0
|
|
movl %r10d,%eax
|
|
+ movups (%rdi),%xmm8
|
|
|
|
- movaps (%rcx),%xmm0
|
|
- shrl $1,%eax
|
|
- movaps 16(%rcx),%xmm1
|
|
- xorps %xmm0,%xmm8
|
|
- leaq 32(%rcx),%rcx
|
|
xorps %xmm0,%xmm2
|
|
- xorps %xmm3,%xmm8
|
|
- movaps (%rcx),%xmm0
|
|
+ movups 16(%r11),%xmm1
|
|
+ xorps %xmm8,%xmm0
|
|
+ leaq 32(%r11),%rcx
|
|
+ xorps %xmm0,%xmm3
|
|
+ movups (%rcx),%xmm0
|
|
|
|
.Lccm64_enc2_loop:
|
|
.byte 102,15,56,220,209
|
|
decl %eax
|
|
.byte 102,15,56,220,217
|
|
- movaps 16(%rcx),%xmm1
|
|
+ movups 16(%rcx),%xmm1
|
|
.byte 102,15,56,220,208
|
|
leaq 32(%rcx),%rcx
|
|
.byte 102,15,56,220,216
|
|
- movaps 0(%rcx),%xmm0
|
|
+ movups 0(%rcx),%xmm0
|
|
jnz .Lccm64_enc2_loop
|
|
+.byte 102,68,15,56,0,207
|
|
.byte 102,15,56,220,209
|
|
.byte 102,15,56,220,217
|
|
+ paddq %xmm6,%xmm9
|
|
.byte 102,15,56,221,208
|
|
.byte 102,15,56,221,216
|
|
|
|
- paddq %xmm8,%xmm9
|
|
decq %rdx
|
|
leaq 16(%rdi),%rdi
|
|
xorps %xmm2,%xmm8
|
|
movdqa %xmm9,%xmm2
|
|
movups %xmm8,(%rsi)
|
|
leaq 16(%rsi),%rsi
|
|
+.byte 102,68,15,56,0,207
|
|
jnz .Lccm64_enc_outer
|
|
|
|
movups %xmm3,(%r9)
|
|
@@ -836,35 +836,36 @@ aesni_ccm64_encrypt_blocks:
|
|
.type aesni_ccm64_decrypt_blocks,@function
|
|
.align 16
|
|
aesni_ccm64_decrypt_blocks:
|
|
- movdqu (%r8),%xmm9
|
|
+ movl 240(%rcx),%eax
|
|
+ movups (%r8),%xmm9
|
|
movdqu (%r9),%xmm3
|
|
- movdqa .Lincrement64(%rip),%xmm8
|
|
- movdqa .Lbswap_mask(%rip),%xmm9
|
|
+ movdqa .Lincrement64(%rip),%xmm6
|
|
+ movdqa .Lbswap_mask(%rip),%xmm7
|
|
|
|
- movl 240(%rcx),%eax
|
|
- movdqa %xmm9,%xmm2
|
|
-.byte 102,69,15,56,0,201
|
|
+ movaps %xmm9,%xmm2
|
|
movl %eax,%r10d
|
|
movq %rcx,%r11
|
|
- movaps (%rcx),%xmm0
|
|
- movaps 16(%rcx),%xmm1
|
|
+.byte 102,68,15,56,0,207
|
|
+ movups (%rcx),%xmm0
|
|
+ movups 16(%rcx),%xmm1
|
|
leaq 32(%rcx),%rcx
|
|
xorps %xmm0,%xmm2
|
|
.Loop_enc1_5:
|
|
.byte 102,15,56,220,209
|
|
decl %eax
|
|
- movaps (%rcx),%xmm1
|
|
+ movups (%rcx),%xmm1
|
|
leaq 16(%rcx),%rcx
|
|
jnz .Loop_enc1_5
|
|
.byte 102,15,56,221,209
|
|
-.Lccm64_dec_outer:
|
|
- paddq %xmm8,%xmm9
|
|
movups (%rdi),%xmm8
|
|
+ paddq %xmm6,%xmm9
|
|
+.byte 102,68,15,56,0,207
|
|
+ leaq 16(%rdi),%rdi
|
|
+ jmp .Lccm64_dec_outer
|
|
+.align 16
|
|
+.Lccm64_dec_outer:
|
|
xorps %xmm2,%xmm8
|
|
movdqa %xmm9,%xmm2
|
|
- leaq 16(%rdi),%rdi
|
|
-.byte 102,65,15,56,0,209
|
|
- movq %r11,%rcx
|
|
movl %r10d,%eax
|
|
movups %xmm8,(%rsi)
|
|
leaq 16(%rsi),%rsi
|
|
@@ -872,41 +873,48 @@ aesni_ccm64_decrypt_blocks:
|
|
subq $1,%rdx
|
|
jz .Lccm64_dec_break
|
|
|
|
- movaps (%rcx),%xmm0
|
|
+ movups (%r11),%xmm0
|
|
shrl $1,%eax
|
|
- movaps 16(%rcx),%xmm1
|
|
+ movups 16(%r11),%xmm1
|
|
xorps %xmm0,%xmm8
|
|
- leaq 32(%rcx),%rcx
|
|
+ leaq 32(%r11),%rcx
|
|
xorps %xmm0,%xmm2
|
|
xorps %xmm8,%xmm3
|
|
- movaps (%rcx),%xmm0
|
|
+ movups (%rcx),%xmm0
|
|
|
|
.Lccm64_dec2_loop:
|
|
.byte 102,15,56,220,209
|
|
decl %eax
|
|
.byte 102,15,56,220,217
|
|
- movaps 16(%rcx),%xmm1
|
|
+ movups 16(%rcx),%xmm1
|
|
.byte 102,15,56,220,208
|
|
leaq 32(%rcx),%rcx
|
|
.byte 102,15,56,220,216
|
|
- movaps 0(%rcx),%xmm0
|
|
+ movups 0(%rcx),%xmm0
|
|
jnz .Lccm64_dec2_loop
|
|
+ movups (%rdi),%xmm8
|
|
+ paddq %xmm6,%xmm9
|
|
.byte 102,15,56,220,209
|
|
.byte 102,15,56,220,217
|
|
+.byte 102,68,15,56,0,207
|
|
+ leaq 16(%rdi),%rdi
|
|
.byte 102,15,56,221,208
|
|
+.byte 102,15,56,221,216
|
|
jmp .Lccm64_dec_outer
|
|
|
|
.align 16
|
|
.Lccm64_dec_break:
|
|
- movaps (%rcx),%xmm0
|
|
- movaps 16(%rcx),%xmm1
|
|
- leaq 32(%rcx),%rcx
|
|
- xorps %xmm0,%xmm3
|
|
+
|
|
+ movups (%r11),%xmm0
|
|
+ movups 16(%r11),%xmm1
|
|
+ xorps %xmm0,%xmm8
|
|
+ leaq 32(%r11),%r11
|
|
+ xorps %xmm8,%xmm3
|
|
.Loop_enc1_6:
|
|
.byte 102,15,56,220,217
|
|
decl %eax
|
|
- movaps (%rcx),%xmm1
|
|
- leaq 16(%rcx),%rcx
|
|
+ movups (%r11),%xmm1
|
|
+ leaq 16(%r11),%r11
|
|
jnz .Loop_enc1_6
|
|
.byte 102,15,56,221,217
|
|
movups %xmm3,(%r9)
|
|
@@ -960,10 +968,10 @@ aesni_ctr32_encrypt_blocks:
|
|
.Lctr32_loop6:
|
|
pshufd $192,%xmm13,%xmm5
|
|
por %xmm14,%xmm2
|
|
- movaps (%r11),%xmm0
|
|
+ movups (%r11),%xmm0
|
|
pshufd $128,%xmm13,%xmm6
|
|
por %xmm14,%xmm3
|
|
- movaps 16(%r11),%xmm1
|
|
+ movups 16(%r11),%xmm1
|
|
pshufd $64,%xmm13,%xmm7
|
|
por %xmm14,%xmm4
|
|
por %xmm14,%xmm5
|
|
@@ -986,7 +994,7 @@ aesni_ctr32_encrypt_blocks:
|
|
pxor %xmm0,%xmm6
|
|
.byte 102,15,56,220,233
|
|
pxor %xmm0,%xmm7
|
|
- movaps (%rcx),%xmm0
|
|
+ movups (%rcx),%xmm0
|
|
decl %eax
|
|
.byte 102,15,56,220,241
|
|
.byte 102,15,56,220,249
|
|
@@ -1001,7 +1009,7 @@ aesni_ctr32_encrypt_blocks:
|
|
.byte 102,15,56,220,241
|
|
.byte 102,15,56,220,249
|
|
.Lctr32_enc_loop6_enter:
|
|
- movaps 16(%rcx),%xmm1
|
|
+ movups 16(%rcx),%xmm1
|
|
.byte 102,15,56,220,208
|
|
.byte 102,15,56,220,216
|
|
leaq 32(%rcx),%rcx
|
|
@@ -1009,7 +1017,7 @@ aesni_ctr32_encrypt_blocks:
|
|
.byte 102,15,56,220,232
|
|
.byte 102,15,56,220,240
|
|
.byte 102,15,56,220,248
|
|
- movaps (%rcx),%xmm0
|
|
+ movups (%rcx),%xmm0
|
|
jnz .Lctr32_enc_loop6
|
|
|
|
.byte 102,15,56,220,209
|
|
@@ -1109,14 +1117,14 @@ aesni_ctr32_encrypt_blocks:
|
|
movups (%rdi),%xmm8
|
|
movl 240(%rcx),%eax
|
|
.Lctr32_one:
|
|
- movaps (%rcx),%xmm0
|
|
- movaps 16(%rcx),%xmm1
|
|
+ movups (%rcx),%xmm0
|
|
+ movups 16(%rcx),%xmm1
|
|
leaq 32(%rcx),%rcx
|
|
xorps %xmm0,%xmm2
|
|
.Loop_enc1_7:
|
|
.byte 102,15,56,220,209
|
|
decl %eax
|
|
- movaps (%rcx),%xmm1
|
|
+ movups (%rcx),%xmm1
|
|
leaq 16(%rcx),%rcx
|
|
jnz .Loop_enc1_7
|
|
.byte 102,15,56,221,209
|
|
@@ -1168,14 +1176,14 @@ aesni_xts_encrypt:
|
|
movups (%r9),%xmm15
|
|
movl 240(%r8),%eax
|
|
movl 240(%rcx),%r10d
|
|
- movaps (%r8),%xmm0
|
|
- movaps 16(%r8),%xmm1
|
|
+ movups (%r8),%xmm0
|
|
+ movups 16(%r8),%xmm1
|
|
leaq 32(%r8),%r8
|
|
xorps %xmm0,%xmm15
|
|
.Loop_enc1_8:
|
|
.byte 102,68,15,56,220,249
|
|
decl %eax
|
|
- movaps (%r8),%xmm1
|
|
+ movups (%r8),%xmm1
|
|
leaq 16(%r8),%r8
|
|
jnz .Loop_enc1_8
|
|
.byte 102,68,15,56,221,249
|
|
@@ -1242,13 +1250,13 @@ aesni_xts_encrypt:
|
|
movdqu 80(%rdi),%xmm7
|
|
leaq 96(%rdi),%rdi
|
|
pxor %xmm13,%xmm5
|
|
- movaps (%r11),%xmm0
|
|
+ movups (%r11),%xmm0
|
|
pxor %xmm14,%xmm6
|
|
pxor %xmm15,%xmm7
|
|
|
|
|
|
|
|
- movaps 16(%r11),%xmm1
|
|
+ movups 16(%r11),%xmm1
|
|
pxor %xmm0,%xmm2
|
|
pxor %xmm0,%xmm3
|
|
movdqa %xmm10,0(%rsp)
|
|
@@ -1264,7 +1272,7 @@ aesni_xts_encrypt:
|
|
movdqa %xmm13,48(%rsp)
|
|
.byte 102,15,56,220,233
|
|
pxor %xmm0,%xmm7
|
|
- movaps (%rcx),%xmm0
|
|
+ movups (%rcx),%xmm0
|
|
decl %eax
|
|
movdqa %xmm14,64(%rsp)
|
|
.byte 102,15,56,220,241
|
|
@@ -1284,7 +1292,7 @@ aesni_xts_encrypt:
|
|
.byte 102,15,56,220,241
|
|
.byte 102,15,56,220,249
|
|
.Lxts_enc_loop6_enter:
|
|
- movaps 16(%rcx),%xmm1
|
|
+ movups 16(%rcx),%xmm1
|
|
.byte 102,15,56,220,208
|
|
.byte 102,15,56,220,216
|
|
leaq 32(%rcx),%rcx
|
|
@@ -1292,7 +1300,7 @@ aesni_xts_encrypt:
|
|
.byte 102,15,56,220,232
|
|
.byte 102,15,56,220,240
|
|
.byte 102,15,56,220,248
|
|
- movaps (%rcx),%xmm0
|
|
+ movups (%rcx),%xmm0
|
|
jnz .Lxts_enc_loop6
|
|
|
|
pshufd $19,%xmm14,%xmm9
|
|
@@ -1307,7 +1315,7 @@ aesni_xts_encrypt:
|
|
.byte 102,15,56,220,233
|
|
.byte 102,15,56,220,241
|
|
.byte 102,15,56,220,249
|
|
- movaps 16(%rcx),%xmm1
|
|
+ movups 16(%rcx),%xmm1
|
|
|
|
pshufd $19,%xmm14,%xmm9
|
|
pxor %xmm14,%xmm14
|
|
@@ -1322,7 +1330,7 @@ aesni_xts_encrypt:
|
|
.byte 102,15,56,220,232
|
|
.byte 102,15,56,220,240
|
|
.byte 102,15,56,220,248
|
|
- movaps 32(%rcx),%xmm0
|
|
+ movups 32(%rcx),%xmm0
|
|
|
|
pshufd $19,%xmm14,%xmm9
|
|
pxor %xmm14,%xmm14
|
|
@@ -1432,14 +1440,14 @@ aesni_xts_encrypt:
|
|
movups (%rdi),%xmm2
|
|
leaq 16(%rdi),%rdi
|
|
xorps %xmm10,%xmm2
|
|
- movaps (%rcx),%xmm0
|
|
- movaps 16(%rcx),%xmm1
|
|
+ movups (%rcx),%xmm0
|
|
+ movups 16(%rcx),%xmm1
|
|
leaq 32(%rcx),%rcx
|
|
xorps %xmm0,%xmm2
|
|
.Loop_enc1_9:
|
|
.byte 102,15,56,220,209
|
|
decl %eax
|
|
- movaps (%rcx),%xmm1
|
|
+ movups (%rcx),%xmm1
|
|
leaq 16(%rcx),%rcx
|
|
jnz .Loop_enc1_9
|
|
.byte 102,15,56,221,209
|
|
@@ -1537,14 +1545,14 @@ aesni_xts_encrypt:
|
|
|
|
movups -16(%rsi),%xmm2
|
|
xorps %xmm10,%xmm2
|
|
- movaps (%rcx),%xmm0
|
|
- movaps 16(%rcx),%xmm1
|
|
+ movups (%rcx),%xmm0
|
|
+ movups 16(%rcx),%xmm1
|
|
leaq 32(%rcx),%rcx
|
|
xorps %xmm0,%xmm2
|
|
.Loop_enc1_10:
|
|
.byte 102,15,56,220,209
|
|
decl %eax
|
|
- movaps (%rcx),%xmm1
|
|
+ movups (%rcx),%xmm1
|
|
leaq 16(%rcx),%rcx
|
|
jnz .Loop_enc1_10
|
|
.byte 102,15,56,221,209
|
|
@@ -1564,14 +1572,14 @@ aesni_xts_decrypt:
|
|
movups (%r9),%xmm15
|
|
movl 240(%r8),%eax
|
|
movl 240(%rcx),%r10d
|
|
- movaps (%r8),%xmm0
|
|
- movaps 16(%r8),%xmm1
|
|
+ movups (%r8),%xmm0
|
|
+ movups 16(%r8),%xmm1
|
|
leaq 32(%r8),%r8
|
|
xorps %xmm0,%xmm15
|
|
.Loop_enc1_11:
|
|
.byte 102,68,15,56,220,249
|
|
decl %eax
|
|
- movaps (%r8),%xmm1
|
|
+ movups (%r8),%xmm1
|
|
leaq 16(%r8),%r8
|
|
jnz .Loop_enc1_11
|
|
.byte 102,68,15,56,221,249
|
|
@@ -1644,13 +1652,13 @@ aesni_xts_decrypt:
|
|
movdqu 80(%rdi),%xmm7
|
|
leaq 96(%rdi),%rdi
|
|
pxor %xmm13,%xmm5
|
|
- movaps (%r11),%xmm0
|
|
+ movups (%r11),%xmm0
|
|
pxor %xmm14,%xmm6
|
|
pxor %xmm15,%xmm7
|
|
|
|
|
|
|
|
- movaps 16(%r11),%xmm1
|
|
+ movups 16(%r11),%xmm1
|
|
pxor %xmm0,%xmm2
|
|
pxor %xmm0,%xmm3
|
|
movdqa %xmm10,0(%rsp)
|
|
@@ -1666,7 +1674,7 @@ aesni_xts_decrypt:
|
|
movdqa %xmm13,48(%rsp)
|
|
.byte 102,15,56,222,233
|
|
pxor %xmm0,%xmm7
|
|
- movaps (%rcx),%xmm0
|
|
+ movups (%rcx),%xmm0
|
|
decl %eax
|
|
movdqa %xmm14,64(%rsp)
|
|
.byte 102,15,56,222,241
|
|
@@ -1686,7 +1694,7 @@ aesni_xts_decrypt:
|
|
.byte 102,15,56,222,241
|
|
.byte 102,15,56,222,249
|
|
.Lxts_dec_loop6_enter:
|
|
- movaps 16(%rcx),%xmm1
|
|
+ movups 16(%rcx),%xmm1
|
|
.byte 102,15,56,222,208
|
|
.byte 102,15,56,222,216
|
|
leaq 32(%rcx),%rcx
|
|
@@ -1694,7 +1702,7 @@ aesni_xts_decrypt:
|
|
.byte 102,15,56,222,232
|
|
.byte 102,15,56,222,240
|
|
.byte 102,15,56,222,248
|
|
- movaps (%rcx),%xmm0
|
|
+ movups (%rcx),%xmm0
|
|
jnz .Lxts_dec_loop6
|
|
|
|
pshufd $19,%xmm14,%xmm9
|
|
@@ -1709,7 +1717,7 @@ aesni_xts_decrypt:
|
|
.byte 102,15,56,222,233
|
|
.byte 102,15,56,222,241
|
|
.byte 102,15,56,222,249
|
|
- movaps 16(%rcx),%xmm1
|
|
+ movups 16(%rcx),%xmm1
|
|
|
|
pshufd $19,%xmm14,%xmm9
|
|
pxor %xmm14,%xmm14
|
|
@@ -1724,7 +1732,7 @@ aesni_xts_decrypt:
|
|
.byte 102,15,56,222,232
|
|
.byte 102,15,56,222,240
|
|
.byte 102,15,56,222,248
|
|
- movaps 32(%rcx),%xmm0
|
|
+ movups 32(%rcx),%xmm0
|
|
|
|
pshufd $19,%xmm14,%xmm9
|
|
pxor %xmm14,%xmm14
|
|
@@ -1843,14 +1851,14 @@ aesni_xts_decrypt:
|
|
movups (%rdi),%xmm2
|
|
leaq 16(%rdi),%rdi
|
|
xorps %xmm10,%xmm2
|
|
- movaps (%rcx),%xmm0
|
|
- movaps 16(%rcx),%xmm1
|
|
+ movups (%rcx),%xmm0
|
|
+ movups 16(%rcx),%xmm1
|
|
leaq 32(%rcx),%rcx
|
|
xorps %xmm0,%xmm2
|
|
.Loop_dec1_12:
|
|
.byte 102,15,56,222,209
|
|
decl %eax
|
|
- movaps (%rcx),%xmm1
|
|
+ movups (%rcx),%xmm1
|
|
leaq 16(%rcx),%rcx
|
|
jnz .Loop_dec1_12
|
|
.byte 102,15,56,223,209
|
|
@@ -1947,14 +1955,14 @@ aesni_xts_decrypt:
|
|
|
|
movups (%rdi),%xmm2
|
|
xorps %xmm11,%xmm2
|
|
- movaps (%rcx),%xmm0
|
|
- movaps 16(%rcx),%xmm1
|
|
+ movups (%rcx),%xmm0
|
|
+ movups 16(%rcx),%xmm1
|
|
leaq 32(%rcx),%rcx
|
|
xorps %xmm0,%xmm2
|
|
.Loop_dec1_13:
|
|
.byte 102,15,56,222,209
|
|
decl %eax
|
|
- movaps (%rcx),%xmm1
|
|
+ movups (%rcx),%xmm1
|
|
leaq 16(%rcx),%rcx
|
|
jnz .Loop_dec1_13
|
|
.byte 102,15,56,223,209
|
|
@@ -1977,14 +1985,14 @@ aesni_xts_decrypt:
|
|
|
|
movups (%rsi),%xmm2
|
|
xorps %xmm10,%xmm2
|
|
- movaps (%rcx),%xmm0
|
|
- movaps 16(%rcx),%xmm1
|
|
+ movups (%rcx),%xmm0
|
|
+ movups 16(%rcx),%xmm1
|
|
leaq 32(%rcx),%rcx
|
|
xorps %xmm0,%xmm2
|
|
.Loop_dec1_14:
|
|
.byte 102,15,56,222,209
|
|
decl %eax
|
|
- movaps (%rcx),%xmm1
|
|
+ movups (%rcx),%xmm1
|
|
leaq 16(%rcx),%rcx
|
|
jnz .Loop_dec1_14
|
|
.byte 102,15,56,223,209
|
|
@@ -2019,15 +2027,15 @@ aesni_cbc_encrypt:
|
|
movups (%rdi),%xmm3
|
|
leaq 16(%rdi),%rdi
|
|
|
|
- movaps (%rcx),%xmm0
|
|
- movaps 16(%rcx),%xmm1
|
|
+ movups (%rcx),%xmm0
|
|
+ movups 16(%rcx),%xmm1
|
|
xorps %xmm0,%xmm3
|
|
leaq 32(%rcx),%rcx
|
|
xorps %xmm3,%xmm2
|
|
.Loop_enc1_15:
|
|
.byte 102,15,56,220,209
|
|
decl %eax
|
|
- movaps (%rcx),%xmm1
|
|
+ movups (%rcx),%xmm1
|
|
leaq 16(%rcx),%rcx
|
|
jnz .Loop_enc1_15
|
|
.byte 102,15,56,221,209
|
|
@@ -2074,10 +2082,10 @@ aesni_cbc_encrypt:
|
|
movups %xmm9,(%rsi)
|
|
leaq 16(%rsi),%rsi
|
|
.Lcbc_dec_loop8_enter:
|
|
- movaps (%rcx),%xmm0
|
|
+ movups (%rcx),%xmm0
|
|
movups (%rdi),%xmm2
|
|
movups 16(%rdi),%xmm3
|
|
- movaps 16(%rcx),%xmm1
|
|
+ movups 16(%rcx),%xmm1
|
|
|
|
leaq 32(%rcx),%rcx
|
|
movdqu 32(%rdi),%xmm4
|
|
@@ -2101,10 +2109,10 @@ aesni_cbc_encrypt:
|
|
pxor %xmm0,%xmm8
|
|
.byte 102,15,56,222,249
|
|
pxor %xmm0,%xmm9
|
|
- movaps (%rcx),%xmm0
|
|
+ movups (%rcx),%xmm0
|
|
.byte 102,68,15,56,222,193
|
|
.byte 102,68,15,56,222,201
|
|
- movaps 16(%rcx),%xmm1
|
|
+ movups 16(%rcx),%xmm1
|
|
|
|
call .Ldec_loop8_enter
|
|
|
|
@@ -2202,14 +2210,14 @@ aesni_cbc_encrypt:
|
|
jmp .Lcbc_dec_tail_collected
|
|
.align 16
|
|
.Lcbc_dec_one:
|
|
- movaps (%rcx),%xmm0
|
|
- movaps 16(%rcx),%xmm1
|
|
+ movups (%rcx),%xmm0
|
|
+ movups 16(%rcx),%xmm1
|
|
leaq 32(%rcx),%rcx
|
|
xorps %xmm0,%xmm2
|
|
.Loop_dec1_16:
|
|
.byte 102,15,56,222,209
|
|
decl %eax
|
|
- movaps (%rcx),%xmm1
|
|
+ movups (%rcx),%xmm1
|
|
leaq 16(%rcx),%rcx
|
|
jnz .Loop_dec1_16
|
|
.byte 102,15,56,223,209
|
|
@@ -2332,28 +2340,28 @@ aesni_set_decrypt_key:
|
|
jnz .Ldec_key_ret
|
|
leaq 16(%rdx,%rsi,1),%rdi
|
|
|
|
- movaps (%rdx),%xmm0
|
|
- movaps (%rdi),%xmm1
|
|
- movaps %xmm0,(%rdi)
|
|
- movaps %xmm1,(%rdx)
|
|
+ movups (%rdx),%xmm0
|
|
+ movups (%rdi),%xmm1
|
|
+ movups %xmm0,(%rdi)
|
|
+ movups %xmm1,(%rdx)
|
|
leaq 16(%rdx),%rdx
|
|
leaq -16(%rdi),%rdi
|
|
|
|
.Ldec_key_inverse:
|
|
- movaps (%rdx),%xmm0
|
|
- movaps (%rdi),%xmm1
|
|
+ movups (%rdx),%xmm0
|
|
+ movups (%rdi),%xmm1
|
|
.byte 102,15,56,219,192
|
|
.byte 102,15,56,219,201
|
|
leaq 16(%rdx),%rdx
|
|
leaq -16(%rdi),%rdi
|
|
- movaps %xmm0,16(%rdi)
|
|
- movaps %xmm1,-16(%rdx)
|
|
+ movups %xmm0,16(%rdi)
|
|
+ movups %xmm1,-16(%rdx)
|
|
cmpq %rdx,%rdi
|
|
ja .Ldec_key_inverse
|
|
|
|
- movaps (%rdx),%xmm0
|
|
+ movups (%rdx),%xmm0
|
|
.byte 102,15,56,219,192
|
|
- movaps %xmm0,(%rdi)
|
|
+ movups %xmm0,(%rdi)
|
|
.Ldec_key_ret:
|
|
addq $8,%rsp
|
|
.byte 0xf3,0xc3
|
|
@@ -2383,7 +2391,7 @@ __aesni_set_encrypt_key:
|
|
|
|
.L10rounds:
|
|
movl $9,%esi
|
|
- movaps %xmm0,(%rdx)
|
|
+ movups %xmm0,(%rdx)
|
|
.byte 102,15,58,223,200,1
|
|
call .Lkey_expansion_128_cold
|
|
.byte 102,15,58,223,200,2
|
|
@@ -2404,7 +2412,7 @@ __aesni_set_encrypt_key:
|
|
call .Lkey_expansion_128
|
|
.byte 102,15,58,223,200,54
|
|
call .Lkey_expansion_128
|
|
- movaps %xmm0,(%rax)
|
|
+ movups %xmm0,(%rax)
|
|
movl %esi,80(%rax)
|
|
xorl %eax,%eax
|
|
jmp .Lenc_key_ret
|
|
@@ -2413,7 +2421,7 @@ __aesni_set_encrypt_key:
|
|
.L12rounds:
|
|
movq 16(%rdi),%xmm2
|
|
movl $11,%esi
|
|
- movaps %xmm0,(%rdx)
|
|
+ movups %xmm0,(%rdx)
|
|
.byte 102,15,58,223,202,1
|
|
call .Lkey_expansion_192a_cold
|
|
.byte 102,15,58,223,202,2
|
|
@@ -2430,7 +2438,7 @@ __aesni_set_encrypt_key:
|
|
call .Lkey_expansion_192a
|
|
.byte 102,15,58,223,202,128
|
|
call .Lkey_expansion_192b
|
|
- movaps %xmm0,(%rax)
|
|
+ movups %xmm0,(%rax)
|
|
movl %esi,48(%rax)
|
|
xorq %rax,%rax
|
|
jmp .Lenc_key_ret
|
|
@@ -2440,8 +2448,8 @@ __aesni_set_encrypt_key:
|
|
movups 16(%rdi),%xmm2
|
|
movl $13,%esi
|
|
leaq 16(%rax),%rax
|
|
- movaps %xmm0,(%rdx)
|
|
- movaps %xmm2,16(%rdx)
|
|
+ movups %xmm0,(%rdx)
|
|
+ movups %xmm2,16(%rdx)
|
|
.byte 102,15,58,223,202,1
|
|
call .Lkey_expansion_256a_cold
|
|
.byte 102,15,58,223,200,1
|
|
@@ -2468,7 +2476,7 @@ __aesni_set_encrypt_key:
|
|
call .Lkey_expansion_256b
|
|
.byte 102,15,58,223,202,64
|
|
call .Lkey_expansion_256a
|
|
- movaps %xmm0,(%rax)
|
|
+ movups %xmm0,(%rax)
|
|
movl %esi,16(%rax)
|
|
xorq %rax,%rax
|
|
jmp .Lenc_key_ret
|
|
@@ -2483,7 +2491,7 @@ __aesni_set_encrypt_key:
|
|
|
|
.align 16
|
|
.Lkey_expansion_128:
|
|
- movaps %xmm0,(%rax)
|
|
+ movups %xmm0,(%rax)
|
|
leaq 16(%rax),%rax
|
|
.Lkey_expansion_128_cold:
|
|
shufps $16,%xmm0,%xmm4
|
|
@@ -2496,7 +2504,7 @@ __aesni_set_encrypt_key:
|
|
|
|
.align 16
|
|
.Lkey_expansion_192a:
|
|
- movaps %xmm0,(%rax)
|
|
+ movups %xmm0,(%rax)
|
|
leaq 16(%rax),%rax
|
|
.Lkey_expansion_192a_cold:
|
|
movaps %xmm2,%xmm5
|
|
@@ -2518,15 +2526,15 @@ __aesni_set_encrypt_key:
|
|
.Lkey_expansion_192b:
|
|
movaps %xmm0,%xmm3
|
|
shufps $68,%xmm0,%xmm5
|
|
- movaps %xmm5,(%rax)
|
|
+ movups %xmm5,(%rax)
|
|
shufps $78,%xmm2,%xmm3
|
|
- movaps %xmm3,16(%rax)
|
|
+ movups %xmm3,16(%rax)
|
|
leaq 32(%rax),%rax
|
|
jmp .Lkey_expansion_192b_warm
|
|
|
|
.align 16
|
|
.Lkey_expansion_256a:
|
|
- movaps %xmm2,(%rax)
|
|
+ movups %xmm2,(%rax)
|
|
leaq 16(%rax),%rax
|
|
.Lkey_expansion_256a_cold:
|
|
shufps $16,%xmm0,%xmm4
|
|
@@ -2539,7 +2547,7 @@ __aesni_set_encrypt_key:
|
|
|
|
.align 16
|
|
.Lkey_expansion_256b:
|
|
- movaps %xmm0,(%rax)
|
|
+ movups %xmm0,(%rax)
|
|
leaq 16(%rax),%rax
|
|
|
|
shufps $16,%xmm2,%xmm4
|
|
diff --git a/lib/accelerated/intel/asm/appro-aes-x86.s b/lib/accelerated/intel/asm/appro-aes-x86.s
|
|
index a8dc2ac..2084749 100644
|
|
--- a/lib/accelerated/intel/asm/appro-aes-x86.s
|
|
+++ b/lib/accelerated/intel/asm/appro-aes-x86.s
|
|
@@ -47,14 +47,14 @@ aesni_encrypt:
|
|
movups (%eax),%xmm2
|
|
movl 240(%edx),%ecx
|
|
movl 8(%esp),%eax
|
|
- movaps (%edx),%xmm0
|
|
- movaps 16(%edx),%xmm1
|
|
+ movups (%edx),%xmm0
|
|
+ movups 16(%edx),%xmm1
|
|
leal 32(%edx),%edx
|
|
xorps %xmm0,%xmm2
|
|
.L000enc1_loop_1:
|
|
.byte 102,15,56,220,209
|
|
decl %ecx
|
|
- movaps (%edx),%xmm1
|
|
+ movups (%edx),%xmm1
|
|
leal 16(%edx),%edx
|
|
jnz .L000enc1_loop_1
|
|
.byte 102,15,56,221,209
|
|
@@ -71,14 +71,14 @@ aesni_decrypt:
|
|
movups (%eax),%xmm2
|
|
movl 240(%edx),%ecx
|
|
movl 8(%esp),%eax
|
|
- movaps (%edx),%xmm0
|
|
- movaps 16(%edx),%xmm1
|
|
+ movups (%edx),%xmm0
|
|
+ movups 16(%edx),%xmm1
|
|
leal 32(%edx),%edx
|
|
xorps %xmm0,%xmm2
|
|
.L001dec1_loop_2:
|
|
.byte 102,15,56,222,209
|
|
decl %ecx
|
|
- movaps (%edx),%xmm1
|
|
+ movups (%edx),%xmm1
|
|
leal 16(%edx),%edx
|
|
jnz .L001dec1_loop_2
|
|
.byte 102,15,56,223,209
|
|
@@ -88,25 +88,25 @@ aesni_decrypt:
|
|
.type _aesni_encrypt3,@function
|
|
.align 16
|
|
_aesni_encrypt3:
|
|
- movaps (%edx),%xmm0
|
|
+ movups (%edx),%xmm0
|
|
shrl $1,%ecx
|
|
- movaps 16(%edx),%xmm1
|
|
+ movups 16(%edx),%xmm1
|
|
leal 32(%edx),%edx
|
|
xorps %xmm0,%xmm2
|
|
pxor %xmm0,%xmm3
|
|
pxor %xmm0,%xmm4
|
|
- movaps (%edx),%xmm0
|
|
+ movups (%edx),%xmm0
|
|
.L002enc3_loop:
|
|
.byte 102,15,56,220,209
|
|
.byte 102,15,56,220,217
|
|
decl %ecx
|
|
.byte 102,15,56,220,225
|
|
- movaps 16(%edx),%xmm1
|
|
+ movups 16(%edx),%xmm1
|
|
.byte 102,15,56,220,208
|
|
.byte 102,15,56,220,216
|
|
leal 32(%edx),%edx
|
|
.byte 102,15,56,220,224
|
|
- movaps (%edx),%xmm0
|
|
+ movups (%edx),%xmm0
|
|
jnz .L002enc3_loop
|
|
.byte 102,15,56,220,209
|
|
.byte 102,15,56,220,217
|
|
@@ -119,25 +119,25 @@ _aesni_encrypt3:
|
|
.type _aesni_decrypt3,@function
|
|
.align 16
|
|
_aesni_decrypt3:
|
|
- movaps (%edx),%xmm0
|
|
+ movups (%edx),%xmm0
|
|
shrl $1,%ecx
|
|
- movaps 16(%edx),%xmm1
|
|
+ movups 16(%edx),%xmm1
|
|
leal 32(%edx),%edx
|
|
xorps %xmm0,%xmm2
|
|
pxor %xmm0,%xmm3
|
|
pxor %xmm0,%xmm4
|
|
- movaps (%edx),%xmm0
|
|
+ movups (%edx),%xmm0
|
|
.L003dec3_loop:
|
|
.byte 102,15,56,222,209
|
|
.byte 102,15,56,222,217
|
|
decl %ecx
|
|
.byte 102,15,56,222,225
|
|
- movaps 16(%edx),%xmm1
|
|
+ movups 16(%edx),%xmm1
|
|
.byte 102,15,56,222,208
|
|
.byte 102,15,56,222,216
|
|
leal 32(%edx),%edx
|
|
.byte 102,15,56,222,224
|
|
- movaps (%edx),%xmm0
|
|
+ movups (%edx),%xmm0
|
|
jnz .L003dec3_loop
|
|
.byte 102,15,56,222,209
|
|
.byte 102,15,56,222,217
|
|
@@ -150,28 +150,28 @@ _aesni_decrypt3:
|
|
.type _aesni_encrypt4,@function
|
|
.align 16
|
|
_aesni_encrypt4:
|
|
- movaps (%edx),%xmm0
|
|
- movaps 16(%edx),%xmm1
|
|
+ movups (%edx),%xmm0
|
|
+ movups 16(%edx),%xmm1
|
|
shrl $1,%ecx
|
|
leal 32(%edx),%edx
|
|
xorps %xmm0,%xmm2
|
|
pxor %xmm0,%xmm3
|
|
pxor %xmm0,%xmm4
|
|
pxor %xmm0,%xmm5
|
|
- movaps (%edx),%xmm0
|
|
+ movups (%edx),%xmm0
|
|
.L004enc4_loop:
|
|
.byte 102,15,56,220,209
|
|
.byte 102,15,56,220,217
|
|
decl %ecx
|
|
.byte 102,15,56,220,225
|
|
.byte 102,15,56,220,233
|
|
- movaps 16(%edx),%xmm1
|
|
+ movups 16(%edx),%xmm1
|
|
.byte 102,15,56,220,208
|
|
.byte 102,15,56,220,216
|
|
leal 32(%edx),%edx
|
|
.byte 102,15,56,220,224
|
|
.byte 102,15,56,220,232
|
|
- movaps (%edx),%xmm0
|
|
+ movups (%edx),%xmm0
|
|
jnz .L004enc4_loop
|
|
.byte 102,15,56,220,209
|
|
.byte 102,15,56,220,217
|
|
@@ -186,28 +186,28 @@ _aesni_encrypt4:
|
|
.type _aesni_decrypt4,@function
|
|
.align 16
|
|
_aesni_decrypt4:
|
|
- movaps (%edx),%xmm0
|
|
- movaps 16(%edx),%xmm1
|
|
+ movups (%edx),%xmm0
|
|
+ movups 16(%edx),%xmm1
|
|
shrl $1,%ecx
|
|
leal 32(%edx),%edx
|
|
xorps %xmm0,%xmm2
|
|
pxor %xmm0,%xmm3
|
|
pxor %xmm0,%xmm4
|
|
pxor %xmm0,%xmm5
|
|
- movaps (%edx),%xmm0
|
|
+ movups (%edx),%xmm0
|
|
.L005dec4_loop:
|
|
.byte 102,15,56,222,209
|
|
.byte 102,15,56,222,217
|
|
decl %ecx
|
|
.byte 102,15,56,222,225
|
|
.byte 102,15,56,222,233
|
|
- movaps 16(%edx),%xmm1
|
|
+ movups 16(%edx),%xmm1
|
|
.byte 102,15,56,222,208
|
|
.byte 102,15,56,222,216
|
|
leal 32(%edx),%edx
|
|
.byte 102,15,56,222,224
|
|
.byte 102,15,56,222,232
|
|
- movaps (%edx),%xmm0
|
|
+ movups (%edx),%xmm0
|
|
jnz .L005dec4_loop
|
|
.byte 102,15,56,222,209
|
|
.byte 102,15,56,222,217
|
|
@@ -222,9 +222,9 @@ _aesni_decrypt4:
|
|
.type _aesni_encrypt6,@function
|
|
.align 16
|
|
_aesni_encrypt6:
|
|
- movaps (%edx),%xmm0
|
|
+ movups (%edx),%xmm0
|
|
shrl $1,%ecx
|
|
- movaps 16(%edx),%xmm1
|
|
+ movups 16(%edx),%xmm1
|
|
leal 32(%edx),%edx
|
|
xorps %xmm0,%xmm2
|
|
pxor %xmm0,%xmm3
|
|
@@ -238,7 +238,7 @@ _aesni_encrypt6:
|
|
.byte 102,15,56,220,233
|
|
pxor %xmm0,%xmm7
|
|
.byte 102,15,56,220,241
|
|
- movaps (%edx),%xmm0
|
|
+ movups (%edx),%xmm0
|
|
.byte 102,15,56,220,249
|
|
jmp .L_aesni_encrypt6_enter
|
|
.align 16
|
|
@@ -252,7 +252,7 @@ _aesni_encrypt6:
|
|
.byte 102,15,56,220,249
|
|
.align 16
|
|
.L_aesni_encrypt6_enter:
|
|
- movaps 16(%edx),%xmm1
|
|
+ movups 16(%edx),%xmm1
|
|
.byte 102,15,56,220,208
|
|
.byte 102,15,56,220,216
|
|
leal 32(%edx),%edx
|
|
@@ -260,7 +260,7 @@ _aesni_encrypt6:
|
|
.byte 102,15,56,220,232
|
|
.byte 102,15,56,220,240
|
|
.byte 102,15,56,220,248
|
|
- movaps (%edx),%xmm0
|
|
+ movups (%edx),%xmm0
|
|
jnz .L006enc6_loop
|
|
.byte 102,15,56,220,209
|
|
.byte 102,15,56,220,217
|
|
@@ -279,9 +279,9 @@ _aesni_encrypt6:
|
|
.type _aesni_decrypt6,@function
|
|
.align 16
|
|
_aesni_decrypt6:
|
|
- movaps (%edx),%xmm0
|
|
+ movups (%edx),%xmm0
|
|
shrl $1,%ecx
|
|
- movaps 16(%edx),%xmm1
|
|
+ movups 16(%edx),%xmm1
|
|
leal 32(%edx),%edx
|
|
xorps %xmm0,%xmm2
|
|
pxor %xmm0,%xmm3
|
|
@@ -295,7 +295,7 @@ _aesni_decrypt6:
|
|
.byte 102,15,56,222,233
|
|
pxor %xmm0,%xmm7
|
|
.byte 102,15,56,222,241
|
|
- movaps (%edx),%xmm0
|
|
+ movups (%edx),%xmm0
|
|
.byte 102,15,56,222,249
|
|
jmp .L_aesni_decrypt6_enter
|
|
.align 16
|
|
@@ -309,7 +309,7 @@ _aesni_decrypt6:
|
|
.byte 102,15,56,222,249
|
|
.align 16
|
|
.L_aesni_decrypt6_enter:
|
|
- movaps 16(%edx),%xmm1
|
|
+ movups 16(%edx),%xmm1
|
|
.byte 102,15,56,222,208
|
|
.byte 102,15,56,222,216
|
|
leal 32(%edx),%edx
|
|
@@ -317,7 +317,7 @@ _aesni_decrypt6:
|
|
.byte 102,15,56,222,232
|
|
.byte 102,15,56,222,240
|
|
.byte 102,15,56,222,248
|
|
- movaps (%edx),%xmm0
|
|
+ movups (%edx),%xmm0
|
|
jnz .L007dec6_loop
|
|
.byte 102,15,56,222,209
|
|
.byte 102,15,56,222,217
|
|
@@ -418,14 +418,14 @@ aesni_ecb_encrypt:
|
|
jmp .L008ecb_ret
|
|
.align 16
|
|
.L013ecb_enc_one:
|
|
- movaps (%edx),%xmm0
|
|
- movaps 16(%edx),%xmm1
|
|
+ movups (%edx),%xmm0
|
|
+ movups 16(%edx),%xmm1
|
|
leal 32(%edx),%edx
|
|
xorps %xmm0,%xmm2
|
|
.L017enc1_loop_3:
|
|
.byte 102,15,56,220,209
|
|
decl %ecx
|
|
- movaps (%edx),%xmm1
|
|
+ movups (%edx),%xmm1
|
|
leal 16(%edx),%edx
|
|
jnz .L017enc1_loop_3
|
|
.byte 102,15,56,221,209
|
|
@@ -521,14 +521,14 @@ aesni_ecb_encrypt:
|
|
jmp .L008ecb_ret
|
|
.align 16
|
|
.L021ecb_dec_one:
|
|
- movaps (%edx),%xmm0
|
|
- movaps 16(%edx),%xmm1
|
|
+ movups (%edx),%xmm0
|
|
+ movups 16(%edx),%xmm1
|
|
leal 32(%edx),%edx
|
|
xorps %xmm0,%xmm2
|
|
.L025dec1_loop_4:
|
|
.byte 102,15,56,222,209
|
|
decl %ecx
|
|
- movaps (%edx),%xmm1
|
|
+ movups (%edx),%xmm1
|
|
leal 16(%edx),%edx
|
|
jnz .L025dec1_loop_4
|
|
.byte 102,15,56,223,209
|
|
@@ -583,56 +583,55 @@ aesni_ccm64_encrypt_blocks:
|
|
movl %ebp,48(%esp)
|
|
movdqu (%ebx),%xmm7
|
|
movdqu (%ecx),%xmm3
|
|
+ movl 240(%edx),%ecx
|
|
movl $202182159,(%esp)
|
|
movl $134810123,4(%esp)
|
|
movl $67438087,8(%esp)
|
|
movl $66051,12(%esp)
|
|
- movl $1,%ecx
|
|
+ movl $1,%ebx
|
|
xorl %ebp,%ebp
|
|
- movl %ecx,16(%esp)
|
|
+ movl %ebx,16(%esp)
|
|
movl %ebp,20(%esp)
|
|
movl %ebp,24(%esp)
|
|
movl %ebp,28(%esp)
|
|
- movdqa (%esp),%xmm5
|
|
-.byte 102,15,56,0,253
|
|
- movl 240(%edx),%ecx
|
|
- movl %edx,%ebp
|
|
- movl %ecx,%ebx
|
|
+ shrl $1,%ecx
|
|
+ leal (%edx),%ebp
|
|
movdqa %xmm7,%xmm2
|
|
+ movl %ecx,%ebx
|
|
+ movdqa (%esp),%xmm5
|
|
.L026ccm64_enc_outer:
|
|
- movups (%esi),%xmm6
|
|
-.byte 102,15,56,0,213
|
|
- movl %ebp,%edx
|
|
+ movups (%ebp),%xmm0
|
|
movl %ebx,%ecx
|
|
- movaps (%edx),%xmm0
|
|
- shrl $1,%ecx
|
|
- movaps 16(%edx),%xmm1
|
|
- xorps %xmm0,%xmm6
|
|
- leal 32(%edx),%edx
|
|
+ movups (%esi),%xmm6
|
|
xorps %xmm0,%xmm2
|
|
- xorps %xmm6,%xmm3
|
|
- movaps (%edx),%xmm0
|
|
+ movups 16(%ebp),%xmm1
|
|
+ xorps %xmm6,%xmm0
|
|
+ leal 32(%ebp),%edx
|
|
+ xorps %xmm0,%xmm3
|
|
+ movups (%edx),%xmm0
|
|
.L027ccm64_enc2_loop:
|
|
.byte 102,15,56,220,209
|
|
decl %ecx
|
|
.byte 102,15,56,220,217
|
|
- movaps 16(%edx),%xmm1
|
|
+ movups 16(%edx),%xmm1
|
|
.byte 102,15,56,220,208
|
|
leal 32(%edx),%edx
|
|
.byte 102,15,56,220,216
|
|
- movaps (%edx),%xmm0
|
|
+ movups (%edx),%xmm0
|
|
jnz .L027ccm64_enc2_loop
|
|
+.byte 102,15,56,0,253
|
|
.byte 102,15,56,220,209
|
|
.byte 102,15,56,220,217
|
|
+ paddq 16(%esp),%xmm7
|
|
.byte 102,15,56,221,208
|
|
.byte 102,15,56,221,216
|
|
- paddq 16(%esp),%xmm7
|
|
decl %eax
|
|
leal 16(%esi),%esi
|
|
xorps %xmm2,%xmm6
|
|
movdqa %xmm7,%xmm2
|
|
movups %xmm6,(%edi)
|
|
leal 16(%edi),%edi
|
|
+.byte 102,15,56,0,253
|
|
jnz .L026ccm64_enc_outer
|
|
movl 48(%esp),%esp
|
|
movl 40(%esp),%edi
|
|
@@ -664,80 +663,86 @@ aesni_ccm64_decrypt_blocks:
|
|
movl %ebp,48(%esp)
|
|
movdqu (%ebx),%xmm7
|
|
movdqu (%ecx),%xmm3
|
|
+ movl 240(%edx),%ecx
|
|
movl $202182159,(%esp)
|
|
movl $134810123,4(%esp)
|
|
movl $67438087,8(%esp)
|
|
movl $66051,12(%esp)
|
|
- movl $1,%ecx
|
|
+ movl $1,%ebx
|
|
xorl %ebp,%ebp
|
|
- movl %ecx,16(%esp)
|
|
+ movl %ebx,16(%esp)
|
|
movl %ebp,20(%esp)
|
|
movl %ebp,24(%esp)
|
|
movl %ebp,28(%esp)
|
|
movdqa (%esp),%xmm5
|
|
movdqa %xmm7,%xmm2
|
|
-.byte 102,15,56,0,253
|
|
- movl 240(%edx),%ecx
|
|
movl %edx,%ebp
|
|
movl %ecx,%ebx
|
|
- movaps (%edx),%xmm0
|
|
- movaps 16(%edx),%xmm1
|
|
+.byte 102,15,56,0,253
|
|
+ movups (%edx),%xmm0
|
|
+ movups 16(%edx),%xmm1
|
|
leal 32(%edx),%edx
|
|
xorps %xmm0,%xmm2
|
|
.L028enc1_loop_5:
|
|
.byte 102,15,56,220,209
|
|
decl %ecx
|
|
- movaps (%edx),%xmm1
|
|
+ movups (%edx),%xmm1
|
|
leal 16(%edx),%edx
|
|
jnz .L028enc1_loop_5
|
|
.byte 102,15,56,221,209
|
|
-.L029ccm64_dec_outer:
|
|
- paddq 16(%esp),%xmm7
|
|
movups (%esi),%xmm6
|
|
+ paddq 16(%esp),%xmm7
|
|
+.byte 102,15,56,0,253
|
|
+ leal 16(%esi),%esi
|
|
+ jmp .L029ccm64_dec_outer
|
|
+.align 16
|
|
+.L029ccm64_dec_outer:
|
|
xorps %xmm2,%xmm6
|
|
movdqa %xmm7,%xmm2
|
|
- leal 16(%esi),%esi
|
|
-.byte 102,15,56,0,213
|
|
- movl %ebp,%edx
|
|
movl %ebx,%ecx
|
|
movups %xmm6,(%edi)
|
|
leal 16(%edi),%edi
|
|
subl $1,%eax
|
|
jz .L030ccm64_dec_break
|
|
- movaps (%edx),%xmm0
|
|
+ movups (%ebp),%xmm0
|
|
shrl $1,%ecx
|
|
- movaps 16(%edx),%xmm1
|
|
+ movups 16(%ebp),%xmm1
|
|
xorps %xmm0,%xmm6
|
|
- leal 32(%edx),%edx
|
|
+ leal 32(%ebp),%edx
|
|
xorps %xmm0,%xmm2
|
|
xorps %xmm6,%xmm3
|
|
- movaps (%edx),%xmm0
|
|
+ movups (%edx),%xmm0
|
|
.L031ccm64_dec2_loop:
|
|
.byte 102,15,56,220,209
|
|
decl %ecx
|
|
.byte 102,15,56,220,217
|
|
- movaps 16(%edx),%xmm1
|
|
+ movups 16(%edx),%xmm1
|
|
.byte 102,15,56,220,208
|
|
leal 32(%edx),%edx
|
|
.byte 102,15,56,220,216
|
|
- movaps (%edx),%xmm0
|
|
+ movups (%edx),%xmm0
|
|
jnz .L031ccm64_dec2_loop
|
|
+ movups (%esi),%xmm6
|
|
+ paddq 16(%esp),%xmm7
|
|
.byte 102,15,56,220,209
|
|
.byte 102,15,56,220,217
|
|
+.byte 102,15,56,0,253
|
|
+ leal 16(%esi),%esi
|
|
.byte 102,15,56,221,208
|
|
.byte 102,15,56,221,216
|
|
jmp .L029ccm64_dec_outer
|
|
.align 16
|
|
.L030ccm64_dec_break:
|
|
- movaps (%edx),%xmm0
|
|
- movaps 16(%edx),%xmm1
|
|
+ movl %ebp,%edx
|
|
+ movups (%edx),%xmm0
|
|
+ movups 16(%edx),%xmm1
|
|
xorps %xmm0,%xmm6
|
|
leal 32(%edx),%edx
|
|
xorps %xmm6,%xmm3
|
|
.L032enc1_loop_6:
|
|
.byte 102,15,56,220,217
|
|
decl %ecx
|
|
- movaps (%edx),%xmm1
|
|
+ movups (%edx),%xmm1
|
|
leal 16(%edx),%edx
|
|
jnz .L032enc1_loop_6
|
|
.byte 102,15,56,221,217
|
|
@@ -826,8 +831,8 @@ aesni_ctr32_encrypt_blocks:
|
|
por %xmm1,%xmm5
|
|
por %xmm1,%xmm6
|
|
por %xmm1,%xmm7
|
|
- movaps (%ebp),%xmm0
|
|
- movaps 16(%ebp),%xmm1
|
|
+ movups (%ebp),%xmm0
|
|
+ movups 16(%ebp),%xmm1
|
|
leal 32(%ebp),%edx
|
|
decl %ecx
|
|
pxor %xmm0,%xmm2
|
|
@@ -841,7 +846,7 @@ aesni_ctr32_encrypt_blocks:
|
|
.byte 102,15,56,220,233
|
|
pxor %xmm0,%xmm7
|
|
.byte 102,15,56,220,241
|
|
- movaps (%edx),%xmm0
|
|
+ movups (%edx),%xmm0
|
|
.byte 102,15,56,220,249
|
|
call .L_aesni_encrypt6_enter
|
|
movups (%esi),%xmm1
|
|
@@ -920,14 +925,14 @@ aesni_ctr32_encrypt_blocks:
|
|
movups (%ebx),%xmm2
|
|
movl 240(%edx),%ecx
|
|
.L037ctr32_one:
|
|
- movaps (%edx),%xmm0
|
|
- movaps 16(%edx),%xmm1
|
|
+ movups (%edx),%xmm0
|
|
+ movups 16(%edx),%xmm1
|
|
leal 32(%edx),%edx
|
|
xorps %xmm0,%xmm2
|
|
.L041enc1_loop_7:
|
|
.byte 102,15,56,220,209
|
|
decl %ecx
|
|
- movaps (%edx),%xmm1
|
|
+ movups (%edx),%xmm1
|
|
leal 16(%edx),%edx
|
|
jnz .L041enc1_loop_7
|
|
.byte 102,15,56,221,209
|
|
@@ -994,14 +999,14 @@ aesni_xts_encrypt:
|
|
movl 40(%esp),%esi
|
|
movl 240(%edx),%ecx
|
|
movups (%esi),%xmm2
|
|
- movaps (%edx),%xmm0
|
|
- movaps 16(%edx),%xmm1
|
|
+ movups (%edx),%xmm0
|
|
+ movups 16(%edx),%xmm1
|
|
leal 32(%edx),%edx
|
|
xorps %xmm0,%xmm2
|
|
.L042enc1_loop_8:
|
|
.byte 102,15,56,220,209
|
|
decl %ecx
|
|
- movaps (%edx),%xmm1
|
|
+ movups (%edx),%xmm1
|
|
leal 16(%edx),%edx
|
|
jnz .L042enc1_loop_8
|
|
.byte 102,15,56,221,209
|
|
@@ -1064,7 +1069,7 @@ aesni_xts_encrypt:
|
|
pshufd $19,%xmm0,%xmm7
|
|
movdqa %xmm1,64(%esp)
|
|
paddq %xmm1,%xmm1
|
|
- movaps (%ebp),%xmm0
|
|
+ movups (%ebp),%xmm0
|
|
pand %xmm3,%xmm7
|
|
movups (%esi),%xmm2
|
|
pxor %xmm1,%xmm7
|
|
@@ -1082,7 +1087,7 @@ aesni_xts_encrypt:
|
|
pxor (%esp),%xmm2
|
|
movdqa %xmm7,80(%esp)
|
|
pxor %xmm1,%xmm7
|
|
- movaps 16(%ebp),%xmm1
|
|
+ movups 16(%ebp),%xmm1
|
|
leal 32(%ebp),%edx
|
|
pxor 16(%esp),%xmm3
|
|
.byte 102,15,56,220,209
|
|
@@ -1095,7 +1100,7 @@ aesni_xts_encrypt:
|
|
.byte 102,15,56,220,233
|
|
pxor %xmm0,%xmm7
|
|
.byte 102,15,56,220,241
|
|
- movaps (%edx),%xmm0
|
|
+ movups (%edx),%xmm0
|
|
.byte 102,15,56,220,249
|
|
call .L_aesni_encrypt6_enter
|
|
movdqa 80(%esp),%xmm1
|
|
@@ -1196,14 +1201,14 @@ aesni_xts_encrypt:
|
|
movups (%esi),%xmm2
|
|
leal 16(%esi),%esi
|
|
xorps %xmm5,%xmm2
|
|
- movaps (%edx),%xmm0
|
|
- movaps 16(%edx),%xmm1
|
|
+ movups (%edx),%xmm0
|
|
+ movups 16(%edx),%xmm1
|
|
leal 32(%edx),%edx
|
|
xorps %xmm0,%xmm2
|
|
.L051enc1_loop_9:
|
|
.byte 102,15,56,220,209
|
|
decl %ecx
|
|
- movaps (%edx),%xmm1
|
|
+ movups (%edx),%xmm1
|
|
leal 16(%edx),%edx
|
|
jnz .L051enc1_loop_9
|
|
.byte 102,15,56,221,209
|
|
@@ -1307,14 +1312,14 @@ aesni_xts_encrypt:
|
|
movl %ebx,%ecx
|
|
movups -16(%edi),%xmm2
|
|
xorps %xmm5,%xmm2
|
|
- movaps (%edx),%xmm0
|
|
- movaps 16(%edx),%xmm1
|
|
+ movups (%edx),%xmm0
|
|
+ movups 16(%edx),%xmm1
|
|
leal 32(%edx),%edx
|
|
xorps %xmm0,%xmm2
|
|
.L054enc1_loop_10:
|
|
.byte 102,15,56,220,209
|
|
decl %ecx
|
|
- movaps (%edx),%xmm1
|
|
+ movups (%edx),%xmm1
|
|
leal 16(%edx),%edx
|
|
jnz .L054enc1_loop_10
|
|
.byte 102,15,56,221,209
|
|
@@ -1341,14 +1346,14 @@ aesni_xts_decrypt:
|
|
movl 40(%esp),%esi
|
|
movl 240(%edx),%ecx
|
|
movups (%esi),%xmm2
|
|
- movaps (%edx),%xmm0
|
|
- movaps 16(%edx),%xmm1
|
|
+ movups (%edx),%xmm0
|
|
+ movups 16(%edx),%xmm1
|
|
leal 32(%edx),%edx
|
|
xorps %xmm0,%xmm2
|
|
.L055enc1_loop_11:
|
|
.byte 102,15,56,220,209
|
|
decl %ecx
|
|
- movaps (%edx),%xmm1
|
|
+ movups (%edx),%xmm1
|
|
leal 16(%edx),%edx
|
|
jnz .L055enc1_loop_11
|
|
.byte 102,15,56,221,209
|
|
@@ -1416,7 +1421,7 @@ aesni_xts_decrypt:
|
|
pshufd $19,%xmm0,%xmm7
|
|
movdqa %xmm1,64(%esp)
|
|
paddq %xmm1,%xmm1
|
|
- movaps (%ebp),%xmm0
|
|
+ movups (%ebp),%xmm0
|
|
pand %xmm3,%xmm7
|
|
movups (%esi),%xmm2
|
|
pxor %xmm1,%xmm7
|
|
@@ -1434,7 +1439,7 @@ aesni_xts_decrypt:
|
|
pxor (%esp),%xmm2
|
|
movdqa %xmm7,80(%esp)
|
|
pxor %xmm1,%xmm7
|
|
- movaps 16(%ebp),%xmm1
|
|
+ movups 16(%ebp),%xmm1
|
|
leal 32(%ebp),%edx
|
|
pxor 16(%esp),%xmm3
|
|
.byte 102,15,56,222,209
|
|
@@ -1447,7 +1452,7 @@ aesni_xts_decrypt:
|
|
.byte 102,15,56,222,233
|
|
pxor %xmm0,%xmm7
|
|
.byte 102,15,56,222,241
|
|
- movaps (%edx),%xmm0
|
|
+ movups (%edx),%xmm0
|
|
.byte 102,15,56,222,249
|
|
call .L_aesni_decrypt6_enter
|
|
movdqa 80(%esp),%xmm1
|
|
@@ -1548,14 +1553,14 @@ aesni_xts_decrypt:
|
|
movups (%esi),%xmm2
|
|
leal 16(%esi),%esi
|
|
xorps %xmm5,%xmm2
|
|
- movaps (%edx),%xmm0
|
|
- movaps 16(%edx),%xmm1
|
|
+ movups (%edx),%xmm0
|
|
+ movups 16(%edx),%xmm1
|
|
leal 32(%edx),%edx
|
|
xorps %xmm0,%xmm2
|
|
.L064dec1_loop_12:
|
|
.byte 102,15,56,222,209
|
|
decl %ecx
|
|
- movaps (%edx),%xmm1
|
|
+ movups (%edx),%xmm1
|
|
leal 16(%edx),%edx
|
|
jnz .L064dec1_loop_12
|
|
.byte 102,15,56,223,209
|
|
@@ -1656,14 +1661,14 @@ aesni_xts_decrypt:
|
|
movl %ebx,%ecx
|
|
movups (%esi),%xmm2
|
|
xorps %xmm5,%xmm2
|
|
- movaps (%edx),%xmm0
|
|
- movaps 16(%edx),%xmm1
|
|
+ movups (%edx),%xmm0
|
|
+ movups 16(%edx),%xmm1
|
|
leal 32(%edx),%edx
|
|
xorps %xmm0,%xmm2
|
|
.L067dec1_loop_13:
|
|
.byte 102,15,56,222,209
|
|
decl %ecx
|
|
- movaps (%edx),%xmm1
|
|
+ movups (%edx),%xmm1
|
|
leal 16(%edx),%edx
|
|
jnz .L067dec1_loop_13
|
|
.byte 102,15,56,223,209
|
|
@@ -1683,14 +1688,14 @@ aesni_xts_decrypt:
|
|
movl %ebx,%ecx
|
|
movups (%edi),%xmm2
|
|
xorps %xmm6,%xmm2
|
|
- movaps (%edx),%xmm0
|
|
- movaps 16(%edx),%xmm1
|
|
+ movups (%edx),%xmm0
|
|
+ movups 16(%edx),%xmm1
|
|
leal 32(%edx),%edx
|
|
xorps %xmm0,%xmm2
|
|
.L069dec1_loop_14:
|
|
.byte 102,15,56,222,209
|
|
decl %ecx
|
|
- movaps (%edx),%xmm1
|
|
+ movups (%edx),%xmm1
|
|
leal 16(%edx),%edx
|
|
jnz .L069dec1_loop_14
|
|
.byte 102,15,56,223,209
|
|
@@ -1740,15 +1745,15 @@ aesni_cbc_encrypt:
|
|
.L073cbc_enc_loop:
|
|
movups (%esi),%xmm7
|
|
leal 16(%esi),%esi
|
|
- movaps (%edx),%xmm0
|
|
- movaps 16(%edx),%xmm1
|
|
+ movups (%edx),%xmm0
|
|
+ movups 16(%edx),%xmm1
|
|
xorps %xmm0,%xmm7
|
|
leal 32(%edx),%edx
|
|
xorps %xmm7,%xmm2
|
|
.L074enc1_loop_15:
|
|
.byte 102,15,56,220,209
|
|
decl %ecx
|
|
- movaps (%edx),%xmm1
|
|
+ movups (%edx),%xmm1
|
|
leal 16(%edx),%edx
|
|
jnz .L074enc1_loop_15
|
|
.byte 102,15,56,221,209
|
|
@@ -1863,14 +1868,14 @@ aesni_cbc_encrypt:
|
|
jmp .L079cbc_dec_tail_collected
|
|
.align 16
|
|
.L080cbc_dec_one:
|
|
- movaps (%edx),%xmm0
|
|
- movaps 16(%edx),%xmm1
|
|
+ movups (%edx),%xmm0
|
|
+ movups 16(%edx),%xmm1
|
|
leal 32(%edx),%edx
|
|
xorps %xmm0,%xmm2
|
|
.L084dec1_loop_16:
|
|
.byte 102,15,56,222,209
|
|
decl %ecx
|
|
- movaps (%edx),%xmm1
|
|
+ movups (%edx),%xmm1
|
|
leal 16(%edx),%edx
|
|
jnz .L084dec1_loop_16
|
|
.byte 102,15,56,223,209
|
|
@@ -1961,7 +1966,7 @@ _aesni_set_encrypt_key:
|
|
.align 16
|
|
.L09010rounds:
|
|
movl $9,%ecx
|
|
- movaps %xmm0,-16(%edx)
|
|
+ movups %xmm0,-16(%edx)
|
|
.byte 102,15,58,223,200,1
|
|
call .L091key_128_cold
|
|
.byte 102,15,58,223,200,2
|
|
@@ -1982,13 +1987,13 @@ _aesni_set_encrypt_key:
|
|
call .L092key_128
|
|
.byte 102,15,58,223,200,54
|
|
call .L092key_128
|
|
- movaps %xmm0,(%edx)
|
|
+ movups %xmm0,(%edx)
|
|
movl %ecx,80(%edx)
|
|
xorl %eax,%eax
|
|
ret
|
|
.align 16
|
|
.L092key_128:
|
|
- movaps %xmm0,(%edx)
|
|
+ movups %xmm0,(%edx)
|
|
leal 16(%edx),%edx
|
|
.L091key_128_cold:
|
|
shufps $16,%xmm0,%xmm4
|
|
@@ -2002,7 +2007,7 @@ _aesni_set_encrypt_key:
|
|
.L08812rounds:
|
|
movq 16(%eax),%xmm2
|
|
movl $11,%ecx
|
|
- movaps %xmm0,-16(%edx)
|
|
+ movups %xmm0,-16(%edx)
|
|
.byte 102,15,58,223,202,1
|
|
call .L093key_192a_cold
|
|
.byte 102,15,58,223,202,2
|
|
@@ -2019,13 +2024,13 @@ _aesni_set_encrypt_key:
|
|
call .L095key_192a
|
|
.byte 102,15,58,223,202,128
|
|
call .L094key_192b
|
|
- movaps %xmm0,(%edx)
|
|
+ movups %xmm0,(%edx)
|
|
movl %ecx,48(%edx)
|
|
xorl %eax,%eax
|
|
ret
|
|
.align 16
|
|
.L095key_192a:
|
|
- movaps %xmm0,(%edx)
|
|
+ movups %xmm0,(%edx)
|
|
leal 16(%edx),%edx
|
|
.align 16
|
|
.L093key_192a_cold:
|
|
@@ -2047,9 +2052,9 @@ _aesni_set_encrypt_key:
|
|
.L094key_192b:
|
|
movaps %xmm0,%xmm3
|
|
shufps $68,%xmm0,%xmm5
|
|
- movaps %xmm5,(%edx)
|
|
+ movups %xmm5,(%edx)
|
|
shufps $78,%xmm2,%xmm3
|
|
- movaps %xmm3,16(%edx)
|
|
+ movups %xmm3,16(%edx)
|
|
leal 32(%edx),%edx
|
|
jmp .L096key_192b_warm
|
|
.align 16
|
|
@@ -2057,8 +2062,8 @@ _aesni_set_encrypt_key:
|
|
movups 16(%eax),%xmm2
|
|
movl $13,%ecx
|
|
leal 16(%edx),%edx
|
|
- movaps %xmm0,-32(%edx)
|
|
- movaps %xmm2,-16(%edx)
|
|
+ movups %xmm0,-32(%edx)
|
|
+ movups %xmm2,-16(%edx)
|
|
.byte 102,15,58,223,202,1
|
|
call .L097key_256a_cold
|
|
.byte 102,15,58,223,200,1
|
|
@@ -2085,13 +2090,13 @@ _aesni_set_encrypt_key:
|
|
call .L098key_256b
|
|
.byte 102,15,58,223,202,64
|
|
call .L099key_256a
|
|
- movaps %xmm0,(%edx)
|
|
+ movups %xmm0,(%edx)
|
|
movl %ecx,16(%edx)
|
|
xorl %eax,%eax
|
|
ret
|
|
.align 16
|
|
.L099key_256a:
|
|
- movaps %xmm2,(%edx)
|
|
+ movups %xmm2,(%edx)
|
|
leal 16(%edx),%edx
|
|
.L097key_256a_cold:
|
|
shufps $16,%xmm0,%xmm4
|
|
@@ -2103,7 +2108,7 @@ _aesni_set_encrypt_key:
|
|
ret
|
|
.align 16
|
|
.L098key_256b:
|
|
- movaps %xmm0,(%edx)
|
|
+ movups %xmm0,(%edx)
|
|
leal 16(%edx),%edx
|
|
shufps $16,%xmm2,%xmm4
|
|
xorps %xmm4,%xmm2
|
|
@@ -2146,26 +2151,26 @@ aesni_set_decrypt_key:
|
|
testl %eax,%eax
|
|
jnz .L100dec_key_ret
|
|
leal 16(%edx,%ecx,1),%eax
|
|
- movaps (%edx),%xmm0
|
|
- movaps (%eax),%xmm1
|
|
- movaps %xmm0,(%eax)
|
|
- movaps %xmm1,(%edx)
|
|
+ movups (%edx),%xmm0
|
|
+ movups (%eax),%xmm1
|
|
+ movups %xmm0,(%eax)
|
|
+ movups %xmm1,(%edx)
|
|
leal 16(%edx),%edx
|
|
leal -16(%eax),%eax
|
|
.L101dec_key_inverse:
|
|
- movaps (%edx),%xmm0
|
|
- movaps (%eax),%xmm1
|
|
+ movups (%edx),%xmm0
|
|
+ movups (%eax),%xmm1
|
|
.byte 102,15,56,219,192
|
|
.byte 102,15,56,219,201
|
|
leal 16(%edx),%edx
|
|
leal -16(%eax),%eax
|
|
- movaps %xmm0,16(%eax)
|
|
- movaps %xmm1,-16(%edx)
|
|
+ movups %xmm0,16(%eax)
|
|
+ movups %xmm1,-16(%edx)
|
|
cmpl %edx,%eax
|
|
ja .L101dec_key_inverse
|
|
- movaps (%edx),%xmm0
|
|
+ movups (%edx),%xmm0
|
|
.byte 102,15,56,219,192
|
|
- movaps %xmm0,(%edx)
|
|
+ movups %xmm0,(%edx)
|
|
xorl %eax,%eax
|
|
.L100dec_key_ret:
|
|
ret
|
|
--
|
|
1.7.4.1
|
|
|