diff --git a/merge_from_0.9.8k.patch b/merge_from_0.9.8k.patch index 58c861e..12c168b 100644 --- a/merge_from_0.9.8k.patch +++ b/merge_from_0.9.8k.patch @@ -1,8 +1,6 @@ -Index: openssl-1.0.0/Configure -=================================================================== ---- openssl-1.0.0.orig/Configure -+++ openssl-1.0.0/Configure -@@ -866,7 +866,7 @@ PROCESS_ARGS: +--- openssl-1.0.1c.orig/Configure ++++ openssl-1.0.1c/Configure +@@ -931,7 +931,7 @@ PROCESS_ARGS: } else { @@ -11,7 +9,7 @@ Index: openssl-1.0.0/Configure $target=$_; } -@@ -1105,7 +1105,7 @@ else { $lflags=$prelflags; undef $pre +@@ -1204,7 +1204,7 @@ if ($target =~ /^mingw/ && `$cc --target my $no_shared_warn=0; my $no_user_cflags=0; @@ -20,13 +18,11 @@ Index: openssl-1.0.0/Configure else { $no_user_cflags=1; } # Kerberos settings. The flavor must be provided from outside, either through -Index: openssl-1.0.0/config -=================================================================== ---- openssl-1.0.0.orig/config -+++ openssl-1.0.0/config -@@ -560,7 +560,8 @@ case "$GUESSOS" in - fi - OUT="darwin-i386-cc" ;; +--- openssl-1.0.1c.orig/config ++++ openssl-1.0.1c/config +@@ -573,7 +573,8 @@ case "$GUESSOS" in + options="$options -arch%20${MACHINE}" + OUT="iphoneos-cross" ;; alpha-*-linux2) - ISA=`awk '/cpu model/{print$4;exit(0);}' /proc/cpuinfo` + #ISA=`awk '/cpu model/{print$4;exit(0);}' /proc/cpuinfo` @@ -34,7 +30,7 @@ Index: openssl-1.0.0/config case ${ISA:-generic} in *[678]) OUT="linux-alpha+bwx-$CC" ;; *) OUT="linux-alpha-$CC" ;; -@@ -580,7 +581,8 @@ case "$GUESSOS" in +@@ -593,7 +594,8 @@ case "$GUESSOS" in echo " You have about 5 seconds to press Ctrl-C to abort." (trap "stty `stty -g`" 2 0; stty -icanon min 0 time 50; read waste) <&1 fi @@ -43,8 +39,8 @@ Index: openssl-1.0.0/config + OUT="linux-ppc64" ;; ppc-*-linux2) OUT="linux-ppc" ;; - ia64-*-linux?) OUT="linux-ia64" ;; -@@ -596,10 +598,10 @@ case "$GUESSOS" in + ppc60x-*-vxworks*) OUT="vxworks-ppc60x" ;; +@@ -614,10 +616,10 @@ case "$GUESSOS" in sparc-*-linux2) KARCH=`awk '/^type/{print$3;exit(0);}' /proc/cpuinfo` case ${KARCH:-sun4} in @@ -59,7 +55,7 @@ Index: openssl-1.0.0/config esac ;; parisc*-*-linux2) # 64-bit builds under parisc64 linux are not supported and -@@ -618,7 +620,11 @@ case "$GUESSOS" in +@@ -636,7 +638,11 @@ case "$GUESSOS" in # PA8500 -> 8000 (2.0) # PA8600 -> 8000 (2.0) diff --git a/openssl-1.0.0b-aesni.patch b/openssl-1.0.0b-aesni.patch deleted file mode 100644 index b760aa2..0000000 --- a/openssl-1.0.0b-aesni.patch +++ /dev/null @@ -1,2377 +0,0 @@ ---- Configure.orig -+++ Configure -@@ -123,11 +123,11 @@ my $tlib="-lnsl -lsocket"; - my $bits1="THIRTY_TWO_BIT "; - my $bits2="SIXTY_FOUR_BIT "; - --my $x86_asm="x86cpuid.o:bn-586.o co-586.o x86-mont.o:des-586.o crypt586.o:aes-586.o:bf-586.o:md5-586.o:sha1-586.o sha256-586.o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o:cmll-x86.o"; -+my $x86_asm="x86cpuid.o:bn-586.o co-586.o x86-mont.o:des-586.o crypt586.o:aes-586.o aesni-x86.o:bf-586.o:md5-586.o:sha1-586.o sha256-586.o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o:cmll-x86.o"; - - my $x86_elf_asm="$x86_asm:elf"; - --my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o::aes-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o"; -+my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o::aes-x86_64.o aesni-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o"; - my $ia64_asm="ia64cpuid.o:bn-ia64.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o:::::void"; - my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o:::sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::void"; - my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::::void"; -@@ -491,7 +491,7 @@ my %table=( - # - # Win64 targets, WIN64I denotes IA-64 and WIN64A - AMD64 - "VC-WIN64I","cl:-W3 -Gs0 -Gy -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE:::WIN64I::SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN:ia64cpuid.o:ia64.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o:::::::ias:win32", --"VC-WIN64A","cl:-W3 -Gs0 -Gy -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE:::WIN64A::SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN:x86_64cpuid.o:bn_asm.o x86_64-mont.o::aes-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:auto:win32", -+"VC-WIN64A","cl:-W3 -Gs0 -Gy -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE:::WIN64A::SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN:x86_64cpuid.o:bn_asm.o x86_64-mont.o::aes-x86_64.o aesni-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:auto:win32", - "debug-VC-WIN64I","cl:-W3 -Gs0 -Gy -Zi -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE:::WIN64I::SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN:ia64cpuid.o:ia64.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o:::::::ias:win32", - "debug-VC-WIN64A","cl:-W3 -Gs0 -Gy -Zi -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE:::WIN64A::SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN:x86_64cpuid.o:bn_asm.o x86_64-mont.o::aes-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:auto:win32", - # x86 Win32 target defaults to ANSI API, if you want UNICODE, complement -@@ -1410,6 +1410,7 @@ if ($rmd160_obj =~ /\.o$/) - if ($aes_obj =~ /\.o$/) - { - $cflags.=" -DAES_ASM"; -+ $aes_obj =~ s/\s*aesni\-x86\.o// if ($no_sse2); - } - else { - $aes_obj=$aes_enc; ---- /dev/null -+++ crypto/aes/asm/aesni-x86.pl -@@ -0,0 +1,765 @@ -+#!/usr/bin/env perl -+ -+# ==================================================================== -+# Written by Andy Polyakov for the OpenSSL -+# project. The module is, however, dual licensed under OpenSSL and -+# CRYPTOGAMS licenses depending on where you obtain it. For further -+# details see http://www.openssl.org/~appro/cryptogams/. -+# ==================================================================== -+# -+# This module implements support for Intel AES-NI extension. In -+# OpenSSL context it's used with Intel engine, but can also be used as -+# drop-in replacement for crypto/aes/asm/aes-586.pl [see below for -+# details]. -+ -+$PREFIX="aesni"; # if $PREFIX is set to "AES", the script -+ # generates drop-in replacement for -+ # crypto/aes/asm/aes-586.pl:-) -+ -+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -+push(@INC,"${dir}","${dir}../../perlasm"); -+require "x86asm.pl"; -+ -+&asm_init($ARGV[0],$0); -+ -+$movekey = eval($RREFIX eq "aseni" ? "*movaps" : "*movups"); -+ -+$len="eax"; -+$rounds="ecx"; -+$key="edx"; -+$inp="esi"; -+$out="edi"; -+$rounds_="ebx"; # backup copy for $rounds -+$key_="ebp"; # backup copy for $key -+ -+$inout0="xmm0"; -+$inout1="xmm1"; -+$inout2="xmm2"; -+$rndkey0="xmm3"; -+$rndkey1="xmm4"; -+$ivec="xmm5"; -+$in0="xmm6"; -+$in1="xmm7"; $inout3="xmm7"; -+ -+# Inline version of internal aesni_[en|de]crypt1 -+sub aesni_inline_generate1 -+{ my $p=shift; -+ -+ &$movekey ($rndkey0,&QWP(0,$key)); -+ &$movekey ($rndkey1,&QWP(16,$key)); -+ &lea ($key,&DWP(32,$key)); -+ &pxor ($inout0,$rndkey0); -+ &set_label("${p}1_loop"); -+ eval"&aes${p} ($inout0,$rndkey1)"; -+ &dec ($rounds); -+ &$movekey ($rndkey1,&QWP(0,$key)); -+ &lea ($key,&DWP(16,$key)); -+ &jnz (&label("${p}1_loop")); -+ eval"&aes${p}last ($inout0,$rndkey1)"; -+} -+ -+sub aesni_generate1 # fully unrolled loop -+{ my $p=shift; -+ -+ &function_begin_B("_aesni_${p}rypt1"); -+ &$movekey ($rndkey0,&QWP(0,$key)); -+ &$movekey ($rndkey1,&QWP(0x10,$key)); -+ &cmp ($rounds,11); -+ &pxor ($inout0,$rndkey0); -+ &$movekey ($rndkey0,&QWP(0x20,$key)); -+ &lea ($key,&DWP(0x30,$key)); -+ &jb (&label("${p}128")); -+ &lea ($key,&DWP(0x20,$key)); -+ &je (&label("${p}192")); -+ &lea ($key,&DWP(0x20,$key)); -+ eval"&aes${p} ($inout0,$rndkey1)"; -+ &$movekey ($rndkey1,&QWP(-0x40,$key)); -+ eval"&aes${p} ($inout0,$rndkey0)"; -+ &$movekey ($rndkey0,&QWP(-0x30,$key)); -+ &set_label("${p}192"); -+ eval"&aes${p} ($inout0,$rndkey1)"; -+ &$movekey ($rndkey1,&QWP(-0x20,$key)); -+ eval"&aes${p} ($inout0,$rndkey0)"; -+ &$movekey ($rndkey0,&QWP(-0x10,$key)); -+ &set_label("${p}128"); -+ eval"&aes${p} ($inout0,$rndkey1)"; -+ &$movekey ($rndkey1,&QWP(0,$key)); -+ eval"&aes${p} ($inout0,$rndkey0)"; -+ &$movekey ($rndkey0,&QWP(0x10,$key)); -+ eval"&aes${p} ($inout0,$rndkey1)"; -+ &$movekey ($rndkey1,&QWP(0x20,$key)); -+ eval"&aes${p} ($inout0,$rndkey0)"; -+ &$movekey ($rndkey0,&QWP(0x30,$key)); -+ eval"&aes${p} ($inout0,$rndkey1)"; -+ &$movekey ($rndkey1,&QWP(0x40,$key)); -+ eval"&aes${p} ($inout0,$rndkey0)"; -+ &$movekey ($rndkey0,&QWP(0x50,$key)); -+ eval"&aes${p} ($inout0,$rndkey1)"; -+ &$movekey ($rndkey1,&QWP(0x60,$key)); -+ eval"&aes${p} ($inout0,$rndkey0)"; -+ &$movekey ($rndkey0,&QWP(0x70,$key)); -+ eval"&aes${p} ($inout0,$rndkey1)"; -+ eval"&aes${p}last ($inout0,$rndkey0)"; -+ &ret(); -+ &function_end_B("_aesni_${p}rypt1"); -+} -+ -+# void $PREFIX_encrypt (const void *inp,void *out,const AES_KEY *key); -+# &aesni_generate1("dec"); -+&function_begin_B("${PREFIX}_encrypt"); -+ &mov ("eax",&wparam(0)); -+ &mov ($key,&wparam(2)); -+ &movups ($inout0,&QWP(0,"eax")); -+ &mov ($rounds,&DWP(240,$key)); -+ &mov ("eax",&wparam(1)); -+ &aesni_inline_generate1("enc"); # &call ("_aesni_encrypt1"); -+ &movups (&QWP(0,"eax"),$inout0); -+ &ret (); -+&function_end_B("${PREFIX}_encrypt"); -+ -+# void $PREFIX_decrypt (const void *inp,void *out,const AES_KEY *key); -+# &aesni_generate1("dec"); -+&function_begin_B("${PREFIX}_decrypt"); -+ &mov ("eax",&wparam(0)); -+ &mov ($key,&wparam(2)); -+ &movups ($inout0,&QWP(0,"eax")); -+ &mov ($rounds,&DWP(240,$key)); -+ &mov ("eax",&wparam(1)); -+ &aesni_inline_generate1("dec"); # &call ("_aesni_decrypt1"); -+ &movups (&QWP(0,"eax"),$inout0); -+ &ret (); -+&function_end_B("${PREFIX}_decrypt"); -+ -+# _aesni_[en|de]crypt[34] are private interfaces, N denotes interleave -+# factor. Why 3x subroutine is used in loops? Even though aes[enc|dec] -+# latency is 6, it turned out that it can be scheduled only every -+# *second* cycle. Thus 3x interleave is the one providing optimal -+# utilization, i.e. when subroutine's throughput is virtually same as -+# of non-interleaved subroutine [for number of input blocks up to 3]. -+# This is why it makes no sense to implement 2x subroutine. As soon -+# as/if Intel improves throughput by making it possible to schedule -+# the instructions in question *every* cycles I would have to -+# implement 6x interleave and use it in loop... -+sub aesni_generate3 -+{ my $p=shift; -+ -+ &function_begin_B("_aesni_${p}rypt3"); -+ &$movekey ($rndkey0,&QWP(0,$key)); -+ &shr ($rounds,1); -+ &$movekey ($rndkey1,&QWP(16,$key)); -+ &lea ($key,&DWP(32,$key)); -+ &pxor ($inout0,$rndkey0); -+ &pxor ($inout1,$rndkey0); -+ &pxor ($inout2,$rndkey0); -+ &jmp (&label("${p}3_loop")); -+ &set_label("${p}3_loop",16); -+ eval"&aes${p} ($inout0,$rndkey1)"; -+ &$movekey ($rndkey0,&QWP(0,$key)); -+ eval"&aes${p} ($inout1,$rndkey1)"; -+ &dec ($rounds); -+ eval"&aes${p} ($inout2,$rndkey1)"; -+ &$movekey ($rndkey1,&QWP(16,$key)); -+ eval"&aes${p} ($inout0,$rndkey0)"; -+ &lea ($key,&DWP(32,$key)); -+ eval"&aes${p} ($inout1,$rndkey0)"; -+ eval"&aes${p} ($inout2,$rndkey0)"; -+ &jnz (&label("${p}3_loop")); -+ eval"&aes${p} ($inout0,$rndkey1)"; -+ &$movekey ($rndkey0,&QWP(0,$key)); -+ eval"&aes${p} ($inout1,$rndkey1)"; -+ eval"&aes${p} ($inout2,$rndkey1)"; -+ eval"&aes${p}last ($inout0,$rndkey0)"; -+ eval"&aes${p}last ($inout1,$rndkey0)"; -+ eval"&aes${p}last ($inout2,$rndkey0)"; -+ &ret(); -+ &function_end_B("_aesni_${p}rypt3"); -+} -+ -+# 4x interleave is implemented to improve small block performance, -+# most notably [and naturally] 4 block by ~30%. One can argue that one -+# should have implemented 5x as well, but improvement would be <20%, -+# so it's not worth it... -+sub aesni_generate4 -+{ my $p=shift; -+ -+ &function_begin_B("_aesni_${p}rypt4"); -+ &$movekey ($rndkey0,&QWP(0,$key)); -+ &$movekey ($rndkey1,&QWP(16,$key)); -+ &shr ($rounds,1); -+ &lea ($key,&DWP(32,$key)); -+ &pxor ($inout0,$rndkey0); -+ &pxor ($inout1,$rndkey0); -+ &pxor ($inout2,$rndkey0); -+ &pxor ($inout3,$rndkey0); -+ &jmp (&label("${p}3_loop")); -+ &set_label("${p}3_loop",16); -+ eval"&aes${p} ($inout0,$rndkey1)"; -+ &$movekey ($rndkey0,&QWP(0,$key)); -+ eval"&aes${p} ($inout1,$rndkey1)"; -+ &dec ($rounds); -+ eval"&aes${p} ($inout2,$rndkey1)"; -+ eval"&aes${p} ($inout3,$rndkey1)"; -+ &$movekey ($rndkey1,&QWP(16,$key)); -+ eval"&aes${p} ($inout0,$rndkey0)"; -+ &lea ($key,&DWP(32,$key)); -+ eval"&aes${p} ($inout1,$rndkey0)"; -+ eval"&aes${p} ($inout2,$rndkey0)"; -+ eval"&aes${p} ($inout3,$rndkey0)"; -+ &jnz (&label("${p}3_loop")); -+ eval"&aes${p} ($inout0,$rndkey1)"; -+ &$movekey ($rndkey0,&QWP(0,$key)); -+ eval"&aes${p} ($inout1,$rndkey1)"; -+ eval"&aes${p} ($inout2,$rndkey1)"; -+ eval"&aes${p} ($inout3,$rndkey1)"; -+ eval"&aes${p}last ($inout0,$rndkey0)"; -+ eval"&aes${p}last ($inout1,$rndkey0)"; -+ eval"&aes${p}last ($inout2,$rndkey0)"; -+ eval"&aes${p}last ($inout3,$rndkey0)"; -+ &ret(); -+ &function_end_B("_aesni_${p}rypt4"); -+} -+&aesni_generate3("enc") if ($PREFIX eq "aesni"); -+&aesni_generate3("dec"); -+&aesni_generate4("enc") if ($PREFIX eq "aesni"); -+&aesni_generate4("dec"); -+ -+if ($PREFIX eq "aesni") { -+# void aesni_ecb_encrypt (const void *in, void *out, -+# size_t length, const AES_KEY *key, -+# int enc); -+&function_begin("aesni_ecb_encrypt"); -+ &mov ($inp,&wparam(0)); -+ &mov ($out,&wparam(1)); -+ &mov ($len,&wparam(2)); -+ &mov ($key,&wparam(3)); -+ &mov ($rounds,&wparam(4)); -+ &cmp ($len,16); -+ &jb (&label("ecb_ret")); -+ &and ($len,-16); -+ &test ($rounds,$rounds) -+ &mov ($rounds,&DWP(240,$key)); -+ &mov ($key_,$key); # backup $key -+ &mov ($rounds_,$rounds); # backup $rounds -+ &jz (&label("ecb_decrypt")); -+ -+ &sub ($len,0x40); -+ &jbe (&label("ecb_enc_tail")); -+ &jmp (&label("ecb_enc_loop3")); -+ -+&set_label("ecb_enc_loop3",16); -+ &movups ($inout0,&QWP(0,$inp)); -+ &movups ($inout1,&QWP(0x10,$inp)); -+ &movups ($inout2,&QWP(0x20,$inp)); -+ &call ("_aesni_encrypt3"); -+ &sub ($len,0x30); -+ &lea ($inp,&DWP(0x30,$inp)); -+ &lea ($out,&DWP(0x30,$out)); -+ &movups (&QWP(-0x30,$out),$inout0); -+ &mov ($key,$key_); # restore $key -+ &movups (&QWP(-0x20,$out),$inout1); -+ &mov ($rounds,$rounds_); # restore $rounds -+ &movups (&QWP(-0x10,$out),$inout2); -+ &ja (&label("ecb_enc_loop3")); -+ -+&set_label("ecb_enc_tail"); -+ &add ($len,0x40); -+ &jz (&label("ecb_ret")); -+ -+ &cmp ($len,0x10); -+ &movups ($inout0,&QWP(0,$inp)); -+ &je (&label("ecb_enc_one")); -+ &cmp ($len,0x20); -+ &movups ($inout1,&QWP(0x10,$inp)); -+ &je (&label("ecb_enc_two")); -+ &cmp ($len,0x30); -+ &movups ($inout2,&QWP(0x20,$inp)); -+ &je (&label("ecb_enc_three")); -+ &movups ($inout3,&QWP(0x30,$inp)); -+ &call ("_aesni_encrypt4"); -+ &movups (&QWP(0,$out),$inout0); -+ &movups (&QWP(0x10,$out),$inout1); -+ &movups (&QWP(0x20,$out),$inout2); -+ &movups (&QWP(0x30,$out),$inout3); -+ jmp (&label("ecb_ret")); -+ -+&set_label("ecb_enc_one",16); -+ &aesni_inline_generate1("enc"); # &call ("_aesni_encrypt1"); -+ &movups (&QWP(0,$out),$inout0); -+ &jmp (&label("ecb_ret")); -+ -+&set_label("ecb_enc_two",16); -+ &call ("_aesni_encrypt3"); -+ &movups (&QWP(0,$out),$inout0); -+ &movups (&QWP(0x10,$out),$inout1); -+ &jmp (&label("ecb_ret")); -+ -+&set_label("ecb_enc_three",16); -+ &call ("_aesni_encrypt3"); -+ &movups (&QWP(0,$out),$inout0); -+ &movups (&QWP(0x10,$out),$inout1); -+ &movups (&QWP(0x20,$out),$inout2); -+ &jmp (&label("ecb_ret")); -+ -+&set_label("ecb_decrypt",16); -+ &sub ($len,0x40); -+ &jbe (&label("ecb_dec_tail")); -+ &jmp (&label("ecb_dec_loop3")); -+ -+&set_label("ecb_dec_loop3",16); -+ &movups ($inout0,&QWP(0,$inp)); -+ &movups ($inout1,&QWP(0x10,$inp)); -+ &movups ($inout2,&QWP(0x20,$inp)); -+ &call ("_aesni_decrypt3"); -+ &sub ($len,0x30); -+ &lea ($inp,&DWP(0x30,$inp)); -+ &lea ($out,&DWP(0x30,$out)); -+ &movups (&QWP(-0x30,$out),$inout0); -+ &mov ($key,$key_); # restore $key -+ &movups (&QWP(-0x20,$out),$inout1); -+ &mov ($rounds,$rounds_); # restore $rounds -+ &movups (&QWP(-0x10,$out),$inout2); -+ &ja (&label("ecb_dec_loop3")); -+ -+&set_label("ecb_dec_tail"); -+ &add ($len,0x40); -+ &jz (&label("ecb_ret")); -+ -+ &cmp ($len,0x10); -+ &movups ($inout0,&QWP(0,$inp)); -+ &je (&label("ecb_dec_one")); -+ &cmp ($len,0x20); -+ &movups ($inout1,&QWP(0x10,$inp)); -+ &je (&label("ecb_dec_two")); -+ &cmp ($len,0x30); -+ &movups ($inout2,&QWP(0x20,$inp)); -+ &je (&label("ecb_dec_three")); -+ &movups ($inout3,&QWP(0x30,$inp)); -+ &call ("_aesni_decrypt4"); -+ &movups (&QWP(0,$out),$inout0); -+ &movups (&QWP(0x10,$out),$inout1); -+ &movups (&QWP(0x20,$out),$inout2); -+ &movups (&QWP(0x30,$out),$inout3); -+ &jmp (&label("ecb_ret")); -+ -+&set_label("ecb_dec_one",16); -+ &aesni_inline_generate1("dec"); # &call ("_aesni_decrypt3"); -+ &movups (&QWP(0,$out),$inout0); -+ &jmp (&label("ecb_ret")); -+ -+&set_label("ecb_dec_two",16); -+ &call ("_aesni_decrypt3"); -+ &movups (&QWP(0,$out),$inout0); -+ &movups (&QWP(0x10,$out),$inout1); -+ &jmp (&label("ecb_ret")); -+ -+&set_label("ecb_dec_three",16); -+ &call ("_aesni_decrypt3"); -+ &movups (&QWP(0,$out),$inout0); -+ &movups (&QWP(0x10,$out),$inout1); -+ &movups (&QWP(0x20,$out),$inout2); -+ -+&set_label("ecb_ret"); -+&function_end("aesni_ecb_encrypt"); -+} -+ -+# void $PREFIX_cbc_encrypt (const void *inp, void *out, -+# size_t length, const AES_KEY *key, -+# unsigned char *ivp,const int enc); -+&function_begin("${PREFIX}_cbc_encrypt"); -+ &mov ($inp,&wparam(0)); -+ &mov ($out,&wparam(1)); -+ &mov ($len,&wparam(2)); -+ &mov ($key,&wparam(3)); -+ &test ($len,$len); -+ &mov ($key_,&wparam(4)); -+ &jz (&label("cbc_ret")); -+ -+ &cmp (&wparam(5),0); -+ &movups ($ivec,&QWP(0,$key_)); # load IV -+ &mov ($rounds,&DWP(240,$key)); -+ &mov ($key_,$key); # backup $key -+ &mov ($rounds_,$rounds); # backup $rounds -+ &je (&label("cbc_decrypt")); -+ -+ &movaps ($inout0,$ivec); -+ &cmp ($len,16); -+ &jb (&label("cbc_enc_tail")); -+ &sub ($len,16); -+ &jmp (&label("cbc_enc_loop")); -+ -+&set_label("cbc_enc_loop",16); -+ &movups ($ivec,&QWP(0,$inp)); -+ &lea ($inp,&DWP(16,$inp)); -+ &pxor ($inout0,$ivec); -+ &aesni_inline_generate1("enc"); # &call ("_aesni_encrypt3"); -+ &sub ($len,16); -+ &lea ($out,&DWP(16,$out)); -+ &mov ($rounds,$rounds_); # restore $rounds -+ &mov ($key,$key_); # restore $key -+ &movups (&QWP(-16,$out),$inout0); -+ &jnc (&label("cbc_enc_loop")); -+ &add ($len,16); -+ &jnz (&label("cbc_enc_tail")); -+ &movaps ($ivec,$inout0); -+ &jmp (&label("cbc_ret")); -+ -+&set_label("cbc_enc_tail"); -+ &mov ("ecx",$len); # zaps $rounds -+ &data_word(0xA4F3F689); # rep movsb -+ &mov ("ecx",16); # zero tail -+ &sub ("ecx",$len); -+ &xor ("eax","eax"); # zaps $len -+ &data_word(0xAAF3F689); # rep stosb -+ &lea ($out,&DWP(-16,$out)); # rewind $out by 1 block -+ &mov ($rounds,$rounds_); # restore $rounds -+ &mov ($inp,$out); # $inp and $out are the same -+ &mov ($key,$key_); # restore $key -+ &jmp (&label("cbc_enc_loop")); -+ -+&set_label("cbc_decrypt",16); -+ &sub ($len,0x40); -+ &jbe (&label("cbc_dec_tail")); -+ &jmp (&label("cbc_dec_loop3")); -+ -+&set_label("cbc_dec_loop3",16); -+ &movups ($inout0,&QWP(0,$inp)); -+ &movups ($inout1,&QWP(0x10,$inp)); -+ &movups ($inout2,&QWP(0x20,$inp)); -+ &movaps ($in0,$inout0); -+ &movaps ($in1,$inout1); -+ &call ("_aesni_decrypt3"); -+ &sub ($len,0x30); -+ &lea ($inp,&DWP(0x30,$inp)); -+ &lea ($out,&DWP(0x30,$out)); -+ &pxor ($inout0,$ivec); -+ &pxor ($inout1,$in0); -+ &movups ($ivec,&QWP(-0x10,$inp)); -+ &pxor ($inout2,$in1); -+ &movups (&QWP(-0x30,$out),$inout0); -+ &mov ($rounds,$rounds_) # restore $rounds -+ &movups (&QWP(-0x20,$out),$inout1); -+ &mov ($key,$key_); # restore $key -+ &movups (&QWP(-0x10,$out),$inout2); -+ &ja (&label("cbc_dec_loop3")); -+ -+&set_label("cbc_dec_tail"); -+ &add ($len,0x40); -+ &jz (&label("cbc_ret")); -+ -+ &movups ($inout0,&QWP(0,$inp)); -+ &cmp ($len,0x10); -+ &movaps ($in0,$inout0); -+ &jbe (&label("cbc_dec_one")); -+ &movups ($inout1,&QWP(0x10,$inp)); -+ &cmp ($len,0x20); -+ &movaps ($in1,$inout1); -+ &jbe (&label("cbc_dec_two")); -+ &movups ($inout2,&QWP(0x20,$inp)); -+ &cmp ($len,0x30); -+ &jbe (&label("cbc_dec_three")); -+ &movups ($inout3,&QWP(0x30,$inp)); -+ &call ("_aesni_decrypt4"); -+ &movups ($rndkey0,&QWP(0x10,$inp)); -+ &movups ($rndkey1,&QWP(0x20,$inp)); -+ &pxor ($inout0,$ivec); -+ &pxor ($inout1,$in0); -+ &movups ($ivec,&QWP(0x30,$inp)); -+ &movups (&QWP(0,$out),$inout0); -+ &pxor ($inout2,$rndkey0); -+ &pxor ($inout3,$rndkey1); -+ &movups (&QWP(0x10,$out),$inout1); -+ &movups (&QWP(0x20,$out),$inout2); -+ &movaps ($inout0,$inout3); -+ &lea ($out,&DWP(0x30,$out)); -+ &jmp (&label("cbc_dec_tail_collected")); -+ -+&set_label("cbc_dec_one"); -+ &aesni_inline_generate1("dec"); # &call ("_aesni_decrypt3"); -+ &pxor ($inout0,$ivec); -+ &movaps ($ivec,$in0); -+ &jmp (&label("cbc_dec_tail_collected")); -+ -+&set_label("cbc_dec_two"); -+ &call ("_aesni_decrypt3"); -+ &pxor ($inout0,$ivec); -+ &pxor ($inout1,$in0); -+ &movups (&QWP(0,$out),$inout0); -+ &movaps ($inout0,$inout1); -+ &movaps ($ivec,$in1); -+ &lea ($out,&DWP(0x10,$out)); -+ &jmp (&label("cbc_dec_tail_collected")); -+ -+&set_label("cbc_dec_three"); -+ &call ("_aesni_decrypt3"); -+ &pxor ($inout0,$ivec); -+ &pxor ($inout1,$in0); -+ &pxor ($inout2,$in1); -+ &movups (&QWP(0,$out),$inout0); -+ &movups (&QWP(0x10,$out),$inout1); -+ &movaps ($inout0,$inout2); -+ &movups ($ivec,&QWP(0x20,$inp)); -+ &lea ($out,&DWP(0x20,$out)); -+ -+&set_label("cbc_dec_tail_collected"); -+ &and ($len,15); -+ &jnz (&label("cbc_dec_tail_partial")); -+ &movups (&QWP(0,$out),$inout0); -+ &jmp (&label("cbc_ret")); -+ -+&set_label("cbc_dec_tail_partial"); -+ &mov ($key_,"esp"); -+ &sub ("esp",16); -+ &and ("esp",-16); -+ &movaps (&QWP(0,"esp"),$inout0); -+ &mov ($inp,"esp"); -+ &mov ("ecx",$len); -+ &data_word(0xA4F3F689); # rep movsb -+ &mov ("esp",$key_); -+ -+&set_label("cbc_ret"); -+ &mov ($key_,&wparam(4)); -+ &movups (&QWP(0,$key_),$ivec); # output IV -+&function_end("${PREFIX}_cbc_encrypt"); -+ -+# Mechanical port from aesni-x86_64.pl. -+# -+# _aesni_set_encrypt_key is private interface, -+# input: -+# "eax" const unsigned char *userKey -+# $rounds int bits -+# $key AES_KEY *key -+# output: -+# "eax" return code -+# $round rounds -+ -+&function_begin_B("_aesni_set_encrypt_key"); -+ &test ("eax","eax"); -+ &jz (&label("bad_pointer")); -+ &test ($key,$key); -+ &jz (&label("bad_pointer")); -+ -+ &movups ("xmm0",&QWP(0,"eax")); # pull first 128 bits of *userKey -+ &pxor ("xmm4","xmm4"); # low dword of xmm4 is assumed 0 -+ &lea ($key,&DWP(16,$key)); -+ &cmp ($rounds,256); -+ &je (&label("14rounds")); -+ &cmp ($rounds,192); -+ &je (&label("12rounds")); -+ &cmp ($rounds,128); -+ &jne (&label("bad_keybits")); -+ -+&set_label("10rounds",16); -+ &mov ($rounds,9); -+ &$movekey (&QWP(-16,$key),"xmm0"); # round 0 -+ &aeskeygenassist("xmm1","xmm0",0x01); # round 1 -+ &call (&label("key_128_cold")); -+ &aeskeygenassist("xmm1","xmm0",0x2); # round 2 -+ &call (&label("key_128")); -+ &aeskeygenassist("xmm1","xmm0",0x04); # round 3 -+ &call (&label("key_128")); -+ &aeskeygenassist("xmm1","xmm0",0x08); # round 4 -+ &call (&label("key_128")); -+ &aeskeygenassist("xmm1","xmm0",0x10); # round 5 -+ &call (&label("key_128")); -+ &aeskeygenassist("xmm1","xmm0",0x20); # round 6 -+ &call (&label("key_128")); -+ &aeskeygenassist("xmm1","xmm0",0x40); # round 7 -+ &call (&label("key_128")); -+ &aeskeygenassist("xmm1","xmm0",0x80); # round 8 -+ &call (&label("key_128")); -+ &aeskeygenassist("xmm1","xmm0",0x1b); # round 9 -+ &call (&label("key_128")); -+ &aeskeygenassist("xmm1","xmm0",0x36); # round 10 -+ &call (&label("key_128")); -+ &$movekey (&QWP(0,$key),"xmm0"); -+ &mov (&DWP(80,$key),$rounds); -+ &xor ("eax","eax"); -+ &ret(); -+ -+&set_label("key_128",16); -+ &$movekey (&QWP(0,$key),"xmm0"); -+ &lea ($key,&DWP(16,$key)); -+&set_label("key_128_cold"); -+ &shufps ("xmm4","xmm0",0b00010000); -+ &pxor ("xmm0","xmm4"); -+ &shufps ("xmm4","xmm0",0b10001100,); -+ &pxor ("xmm0","xmm4"); -+ &pshufd ("xmm1","xmm1",0b11111111); # critical path -+ &pxor ("xmm0","xmm1"); -+ &ret(); -+ -+&set_label("12rounds",16); -+ &movq ("xmm2",&QWP(16,"eax")); # remaining 1/3 of *userKey -+ &mov ($rounds,11); -+ &$movekey (&QWP(-16,$key),"xmm0") # round 0 -+ &aeskeygenassist("xmm1","xmm2",0x01); # round 1,2 -+ &call (&label("key_192a_cold")); -+ &aeskeygenassist("xmm1","xmm2",0x02); # round 2,3 -+ &call (&label("key_192b")); -+ &aeskeygenassist("xmm1","xmm2",0x04); # round 4,5 -+ &call (&label("key_192a")); -+ &aeskeygenassist("xmm1","xmm2",0x08); # round 5,6 -+ &call (&label("key_192b")); -+ &aeskeygenassist("xmm1","xmm2",0x10); # round 7,8 -+ &call (&label("key_192a")); -+ &aeskeygenassist("xmm1","xmm2",0x20); # round 8,9 -+ &call (&label("key_192b")); -+ &aeskeygenassist("xmm1","xmm2",0x40); # round 10,11 -+ &call (&label("key_192a")); -+ &aeskeygenassist("xmm1","xmm2",0x80); # round 11,12 -+ &call (&label("key_192b")); -+ &$movekey (&QWP(0,$key),"xmm0"); -+ &mov (&DWP(48,$key),$rounds); -+ &xor ("eax","eax"); -+ &ret(); -+ -+&set_label("key_192a",16); -+ &$movekey (&QWP(0,$key),"xmm0"); -+ &lea ($key,&DWP(16,$key)); -+&set_label("key_192a_cold",16); -+ &movaps ("xmm5","xmm2"); -+&set_label("key_192b_warm"); -+ &shufps ("xmm4","xmm0",0b00010000); -+ &movaps ("xmm3","xmm2"); -+ &pxor ("xmm0","xmm4"); -+ &shufps ("xmm4","xmm0",0b10001100); -+ &pslldq ("xmm3",4); -+ &pxor ("xmm0","xmm4"); -+ &pshufd ("xmm1","xmm1",0b01010101); # critical path -+ &pxor ("xmm2","xmm3"); -+ &pxor ("xmm0","xmm1"); -+ &pshufd ("xmm3","xmm0",0b11111111); -+ &pxor ("xmm2","xmm3"); -+ &ret(); -+ -+&set_label("key_192b",16); -+ &movaps ("xmm3","xmm0"); -+ &shufps ("xmm5","xmm0",0b01000100); -+ &$movekey (&QWP(0,$key),"xmm5"); -+ &shufps ("xmm3","xmm2",0b01001110); -+ &$movekey (&QWP(16,$key),"xmm3"); -+ &lea ($key,&DWP(32,$key)); -+ &jmp (&label("key_192b_warm")); -+ -+&set_label("14rounds",16); -+ &movups ("xmm2",&QWP(16,"eax")); # remaining half of *userKey -+ &mov ($rounds,13); -+ &lea ($key,&DWP(16,$key)); -+ &$movekey (&QWP(-32,$key),"xmm0"); # round 0 -+ &$movekey (&QWP(-16,$key),"xmm2"); # round 1 -+ &aeskeygenassist("xmm1","xmm2",0x01); # round 2 -+ &call (&label("key_256a_cold")); -+ &aeskeygenassist("xmm1","xmm0",0x01); # round 3 -+ &call (&label("key_256b")); -+ &aeskeygenassist("xmm1","xmm2",0x02); # round 4 -+ &call (&label("key_256a")); -+ &aeskeygenassist("xmm1","xmm0",0x02); # round 5 -+ &call (&label("key_256b")); -+ &aeskeygenassist("xmm1","xmm2",0x04); # round 6 -+ &call (&label("key_256a")); -+ &aeskeygenassist("xmm1","xmm0",0x04); # round 7 -+ &call (&label("key_256b")); -+ &aeskeygenassist("xmm1","xmm2",0x08); # round 8 -+ &call (&label("key_256a")); -+ &aeskeygenassist("xmm1","xmm0",0x08); # round 9 -+ &call (&label("key_256b")); -+ &aeskeygenassist("xmm1","xmm2",0x10); # round 10 -+ &call (&label("key_256a")); -+ &aeskeygenassist("xmm1","xmm0",0x10); # round 11 -+ &call (&label("key_256b")); -+ &aeskeygenassist("xmm1","xmm2",0x20); # round 12 -+ &call (&label("key_256a")); -+ &aeskeygenassist("xmm1","xmm0",0x20); # round 13 -+ &call (&label("key_256b")); -+ &aeskeygenassist("xmm1","xmm2",0x40); # round 14 -+ &call (&label("key_256a")); -+ &$movekey (&QWP(0,$key),"xmm0"); -+ &mov (&DWP(16,$key),$rounds); -+ &xor ("eax","eax"); -+ &ret(); -+ -+&set_label("key_256a",16); -+ &$movekey (&QWP(0,$key),"xmm2"); -+ &lea ($key,&DWP(16,$key)); -+&set_label("key_256a_cold"); -+ &shufps ("xmm4","xmm0",0b00010000); -+ &pxor ("xmm0","xmm4"); -+ &shufps ("xmm4","xmm0",0b10001100); -+ &pxor ("xmm0","xmm4"); -+ &pshufd ("xmm1","xmm1",0b11111111); # critical path -+ &pxor ("xmm0","xmm1"); -+ &ret(); -+ -+&set_label("key_256b",16); -+ &$movekey (&QWP(0,$key),"xmm0"); -+ &lea ($key,&DWP(16,$key)); -+ -+ &shufps ("xmm4","xmm2",0b00010000); -+ &pxor ("xmm2","xmm4"); -+ &shufps ("xmm4","xmm2",0b10001100); -+ &pxor ("xmm2","xmm4"); -+ &pshufd ("xmm1","xmm1",0b10101010); # critical path -+ &pxor ("xmm2","xmm1"); -+ &ret(); -+ -+&set_label("bad_pointer",4); -+ &mov ("eax",-1); -+ &ret (); -+&set_label("bad_keybits",4); -+ &mov ("eax",-2); -+ &ret (); -+&function_end_B("_aesni_set_encrypt_key"); -+ -+# int $PREFIX_set_encrypt_key (const unsigned char *userKey, int bits, -+# AES_KEY *key) -+&function_begin_B("${PREFIX}_set_encrypt_key"); -+ &mov ("eax",&wparam(0)); -+ &mov ($rounds,&wparam(1)); -+ &mov ($key,&wparam(2)); -+ &call ("_aesni_set_encrypt_key"); -+ &ret (); -+&function_end_B("${PREFIX}_set_encrypt_key"); -+ -+# int $PREFIX_set_decrypt_key (const unsigned char *userKey, int bits, -+# AES_KEY *key) -+&function_begin_B("${PREFIX}_set_decrypt_key"); -+ &mov ("eax",&wparam(0)); -+ &mov ($rounds,&wparam(1)); -+ &mov ($key,&wparam(2)); -+ &call ("_aesni_set_encrypt_key"); -+ &mov ($key,&wparam(2)); -+ &shl ($rounds,4) # rounds-1 after _aesni_set_encrypt_key -+ &test ("eax","eax"); -+ &jnz (&label("dec_key_ret")); -+ &lea ("eax",&DWP(16,$key,$rounds)); # end of key schedule -+ -+ &$movekey ("xmm0",&QWP(0,$key)); # just swap -+ &$movekey ("xmm1",&QWP(0,"eax")); -+ &$movekey (&QWP(0,"eax"),"xmm0"); -+ &$movekey (&QWP(0,$key),"xmm1"); -+ &lea ($key,&DWP(16,$key)); -+ &lea ("eax",&DWP(-16,"eax")); -+ -+&set_label("dec_key_inverse"); -+ &$movekey ("xmm0",&QWP(0,$key)); # swap and inverse -+ &$movekey ("xmm1",&QWP(0,"eax")); -+ &aesimc ("xmm0","xmm0"); -+ &aesimc ("xmm1","xmm1"); -+ &lea ($key,&DWP(16,$key)); -+ &lea ("eax",&DWP(-16,"eax")); -+ &cmp ("eax",$key); -+ &$movekey (&QWP(16,"eax"),"xmm0"); -+ &$movekey (&QWP(-16,$key),"xmm1"); -+ &ja (&label("dec_key_inverse")); -+ -+ &$movekey ("xmm0",&QWP(0,$key)); # inverse middle -+ &aesimc ("xmm0","xmm0"); -+ &$movekey (&QWP(0,$key),"xmm0"); -+ -+ &xor ("eax","eax"); # return success -+&set_label("dec_key_ret"); -+ &ret (); -+&function_end_B("${PREFIX}_set_decrypt_key"); -+&asciz("AES for Intel AES-NI, CRYPTOGAMS by "); -+ -+&asm_finish(); ---- /dev/null -+++ crypto/aes/asm/aesni-x86_64.pl -@@ -0,0 +1,991 @@ -+#!/usr/bin/env perl -+# -+# ==================================================================== -+# Written by Andy Polyakov for the OpenSSL -+# project. The module is, however, dual licensed under OpenSSL and -+# CRYPTOGAMS licenses depending on where you obtain it. For further -+# details see http://www.openssl.org/~appro/cryptogams/. -+# ==================================================================== -+# -+# This module implements support for Intel AES-NI extension. In -+# OpenSSL context it's used with Intel engine, but can also be used as -+# drop-in replacement for crypto/aes/asm/aes-x86_64.pl [see below for -+# details]. -+ -+$PREFIX="aesni"; # if $PREFIX is set to "AES", the script -+ # generates drop-in replacement for -+ # crypto/aes/asm/aes-x86_64.pl:-) -+ -+$flavour = shift; -+$output = shift; -+if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } -+ -+$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); -+ -+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -+( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or -+( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or -+die "can't locate x86_64-xlate.pl"; -+ -+open STDOUT,"| $^X $xlate $flavour $output"; -+ -+$movkey = $PREFIX eq "aesni" ? "movaps" : "movups"; -+@_4args=$win64? ("%rcx","%rdx","%r8", "%r9") : # Win64 order -+ ("%rdi","%rsi","%rdx","%rcx"); # Unix order -+ -+$code=".text\n"; -+ -+$rounds="%eax"; # input to and changed by aesni_[en|de]cryptN !!! -+# this is natural Unix argument order for public $PREFIX_[ecb|cbc]_encrypt ... -+$inp="%rdi"; -+$out="%rsi"; -+$len="%rdx"; -+$key="%rcx"; # input to and changed by aesni_[en|de]cryptN !!! -+$ivp="%r8"; # cbc -+ -+$rnds_="%r10d"; # backup copy for $rounds -+$key_="%r11"; # backup copy for $key -+ -+# %xmm register layout -+$inout0="%xmm0"; $inout1="%xmm1"; -+$inout2="%xmm2"; $inout3="%xmm3"; -+$rndkey0="%xmm4"; $rndkey1="%xmm5"; -+ -+$iv="%xmm6"; $in0="%xmm7"; # used in CBC decrypt -+$in1="%xmm8"; $in2="%xmm9"; -+ -+# Inline version of internal aesni_[en|de]crypt1. -+# -+# Why folded loop? Because aes[enc|dec] is slow enough to accommodate -+# cycles which take care of loop variables... -+{ my $sn; -+sub aesni_generate1 { -+my ($p,$key,$rounds)=@_; -+++$sn; -+$code.=<<___; -+ $movkey ($key),$rndkey0 -+ $movkey 16($key),$rndkey1 -+ lea 32($key),$key -+ pxor $rndkey0,$inout0 -+.Loop_${p}1_$sn: -+ aes${p} $rndkey1,$inout0 -+ dec $rounds -+ $movkey ($key),$rndkey1 -+ lea 16($key),$key -+ jnz .Loop_${p}1_$sn # loop body is 16 bytes -+ aes${p}last $rndkey1,$inout0 -+___ -+}} -+# void $PREFIX_[en|de]crypt (const void *inp,void *out,const AES_KEY *key); -+# -+{ my ($inp,$out,$key) = @_4args; -+ -+$code.=<<___; -+.globl ${PREFIX}_encrypt -+.type ${PREFIX}_encrypt,\@abi-omnipotent -+.align 16 -+${PREFIX}_encrypt: -+ movups ($inp),$inout0 # load input -+ mov 240($key),$rounds # pull $rounds -+___ -+ &aesni_generate1("enc",$key,$rounds); -+$code.=<<___; -+ movups $inout0,($out) # output -+ ret -+.size ${PREFIX}_encrypt,.-${PREFIX}_encrypt -+ -+.globl ${PREFIX}_decrypt -+.type ${PREFIX}_decrypt,\@abi-omnipotent -+.align 16 -+${PREFIX}_decrypt: -+ movups ($inp),$inout0 # load input -+ mov 240($key),$rounds # pull $rounds -+___ -+ &aesni_generate1("dec",$key,$rounds); -+$code.=<<___; -+ movups $inout0,($out) # output -+ ret -+.size ${PREFIX}_decrypt, .-${PREFIX}_decrypt -+___ -+} -+ -+# _aesni_[en|de]crypt[34] are private interfaces, N denotes interleave -+# factor. Why 3x subroutine is used in loops? Even though aes[enc|dec] -+# latency is 6, it turned out that it can be scheduled only every -+# *second* cycle. Thus 3x interleave is the one providing optimal -+# utilization, i.e. when subroutine's throughput is virtually same as -+# of non-interleaved subroutine [for number of input blocks up to 3]. -+# This is why it makes no sense to implement 2x subroutine. As soon -+# as/if Intel improves throughput by making it possible to schedule -+# the instructions in question *every* cycles I would have to -+# implement 6x interleave and use it in loop... -+sub aesni_generate3 { -+my $dir=shift; -+# As already mentioned it takes in $key and $rounds, which are *not* -+# preserved. $inout[0-2] is cipher/clear text... -+$code.=<<___; -+.type _aesni_${dir}rypt3,\@abi-omnipotent -+.align 16 -+_aesni_${dir}rypt3: -+ $movkey ($key),$rndkey0 -+ shr \$1,$rounds -+ $movkey 16($key),$rndkey1 -+ lea 32($key),$key -+ pxor $rndkey0,$inout0 -+ pxor $rndkey0,$inout1 -+ pxor $rndkey0,$inout2 -+ -+.L${dir}_loop3: -+ aes${dir} $rndkey1,$inout0 -+ $movkey ($key),$rndkey0 -+ aes${dir} $rndkey1,$inout1 -+ dec $rounds -+ aes${dir} $rndkey1,$inout2 -+ aes${dir} $rndkey0,$inout0 -+ $movkey 16($key),$rndkey1 -+ aes${dir} $rndkey0,$inout1 -+ lea 32($key),$key -+ aes${dir} $rndkey0,$inout2 -+ jnz .L${dir}_loop3 -+ -+ aes${dir} $rndkey1,$inout0 -+ $movkey ($key),$rndkey0 -+ aes${dir} $rndkey1,$inout1 -+ aes${dir} $rndkey1,$inout2 -+ aes${dir}last $rndkey0,$inout0 -+ aes${dir}last $rndkey0,$inout1 -+ aes${dir}last $rndkey0,$inout2 -+ ret -+.size _aesni_${dir}rypt3,.-_aesni_${dir}rypt3 -+___ -+} -+# 4x interleave is implemented to improve small block performance, -+# most notably [and naturally] 4 block by ~30%. One can argue that one -+# should have implemented 5x as well, but improvement would be <20%, -+# so it's not worth it... -+sub aesni_generate4 { -+my $dir=shift; -+# As already mentioned it takes in $key and $rounds, which are *not* -+# preserved. $inout[0-3] is cipher/clear text... -+$code.=<<___; -+.type _aesni_${dir}rypt4,\@abi-omnipotent -+.align 16 -+_aesni_${dir}rypt4: -+ $movkey ($key),$rndkey0 -+ shr \$1,$rounds -+ $movkey 16($key),$rndkey1 -+ lea 32($key),$key -+ pxor $rndkey0,$inout0 -+ pxor $rndkey0,$inout1 -+ pxor $rndkey0,$inout2 -+ pxor $rndkey0,$inout3 -+ -+.L${dir}_loop4: -+ aes${dir} $rndkey1,$inout0 -+ $movkey ($key),$rndkey0 -+ aes${dir} $rndkey1,$inout1 -+ dec $rounds -+ aes${dir} $rndkey1,$inout2 -+ aes${dir} $rndkey1,$inout3 -+ aes${dir} $rndkey0,$inout0 -+ $movkey 16($key),$rndkey1 -+ aes${dir} $rndkey0,$inout1 -+ lea 32($key),$key -+ aes${dir} $rndkey0,$inout2 -+ aes${dir} $rndkey0,$inout3 -+ jnz .L${dir}_loop4 -+ -+ aes${dir} $rndkey1,$inout0 -+ $movkey ($key),$rndkey0 -+ aes${dir} $rndkey1,$inout1 -+ aes${dir} $rndkey1,$inout2 -+ aes${dir} $rndkey1,$inout3 -+ aes${dir}last $rndkey0,$inout0 -+ aes${dir}last $rndkey0,$inout1 -+ aes${dir}last $rndkey0,$inout2 -+ aes${dir}last $rndkey0,$inout3 -+ ret -+.size _aesni_${dir}rypt4,.-_aesni_${dir}rypt4 -+___ -+} -+&aesni_generate3("enc") if ($PREFIX eq "aesni"); -+&aesni_generate3("dec"); -+&aesni_generate4("enc") if ($PREFIX eq "aesni"); -+&aesni_generate4("dec"); -+ -+if ($PREFIX eq "aesni") { -+# void aesni_ecb_encrypt (const void *in, void *out, -+# size_t length, const AES_KEY *key, -+# int enc); -+$code.=<<___; -+.globl aesni_ecb_encrypt -+.type aesni_ecb_encrypt,\@function,5 -+.align 16 -+aesni_ecb_encrypt: -+ cmp \$16,$len # check length -+ jb .Lecb_ret -+ -+ mov 240($key),$rounds # pull $rounds -+ and \$-16,$len -+ mov $key,$key_ # backup $key -+ test %r8d,%r8d # 5th argument -+ mov $rounds,$rnds_ # backup $rounds -+ jz .Lecb_decrypt -+#--------------------------- ECB ENCRYPT ------------------------------# -+ sub \$0x40,$len -+ jbe .Lecb_enc_tail -+ jmp .Lecb_enc_loop3 -+.align 16 -+.Lecb_enc_loop3: -+ movups ($inp),$inout0 -+ movups 0x10($inp),$inout1 -+ movups 0x20($inp),$inout2 -+ call _aesni_encrypt3 -+ sub \$0x30,$len -+ lea 0x30($inp),$inp -+ lea 0x30($out),$out -+ movups $inout0,-0x30($out) -+ mov $rnds_,$rounds # restore $rounds -+ movups $inout1,-0x20($out) -+ mov $key_,$key # restore $key -+ movups $inout2,-0x10($out) -+ ja .Lecb_enc_loop3 -+ -+.Lecb_enc_tail: -+ add \$0x40,$len -+ jz .Lecb_ret -+ -+ cmp \$0x10,$len -+ movups ($inp),$inout0 -+ je .Lecb_enc_one -+ cmp \$0x20,$len -+ movups 0x10($inp),$inout1 -+ je .Lecb_enc_two -+ cmp \$0x30,$len -+ movups 0x20($inp),$inout2 -+ je .Lecb_enc_three -+ movups 0x30($inp),$inout3 -+ call _aesni_encrypt4 -+ movups $inout0,($out) -+ movups $inout1,0x10($out) -+ movups $inout2,0x20($out) -+ movups $inout3,0x30($out) -+ jmp .Lecb_ret -+.align 16 -+.Lecb_enc_one: -+___ -+ &aesni_generate1("enc",$key,$rounds); -+$code.=<<___; -+ movups $inout0,($out) -+ jmp .Lecb_ret -+.align 16 -+.Lecb_enc_two: -+ call _aesni_encrypt3 -+ movups $inout0,($out) -+ movups $inout1,0x10($out) -+ jmp .Lecb_ret -+.align 16 -+.Lecb_enc_three: -+ call _aesni_encrypt3 -+ movups $inout0,($out) -+ movups $inout1,0x10($out) -+ movups $inout2,0x20($out) -+ jmp .Lecb_ret -+ #--------------------------- ECB DECRYPT ------------------------------# -+.align 16 -+.Lecb_decrypt: -+ sub \$0x40,$len -+ jbe .Lecb_dec_tail -+ jmp .Lecb_dec_loop3 -+.align 16 -+.Lecb_dec_loop3: -+ movups ($inp),$inout0 -+ movups 0x10($inp),$inout1 -+ movups 0x20($inp),$inout2 -+ call _aesni_decrypt3 -+ sub \$0x30,$len -+ lea 0x30($inp),$inp -+ lea 0x30($out),$out -+ movups $inout0,-0x30($out) -+ mov $rnds_,$rounds # restore $rounds -+ movups $inout1,-0x20($out) -+ mov $key_,$key # restore $key -+ movups $inout2,-0x10($out) -+ ja .Lecb_dec_loop3 -+ -+.Lecb_dec_tail: -+ add \$0x40,$len -+ jz .Lecb_ret -+ -+ cmp \$0x10,$len -+ movups ($inp),$inout0 -+ je .Lecb_dec_one -+ cmp \$0x20,$len -+ movups 0x10($inp),$inout1 -+ je .Lecb_dec_two -+ cmp \$0x30,$len -+ movups 0x20($inp),$inout2 -+ je .Lecb_dec_three -+ movups 0x30($inp),$inout3 -+ call _aesni_decrypt4 -+ movups $inout0,($out) -+ movups $inout1,0x10($out) -+ movups $inout2,0x20($out) -+ movups $inout3,0x30($out) -+ jmp .Lecb_ret -+.align 16 -+.Lecb_dec_one: -+___ -+ &aesni_generate1("dec",$key,$rounds); -+$code.=<<___; -+ movups $inout0,($out) -+ jmp .Lecb_ret -+.align 16 -+.Lecb_dec_two: -+ call _aesni_decrypt3 -+ movups $inout0,($out) -+ movups $inout1,0x10($out) -+ jmp .Lecb_ret -+.align 16 -+.Lecb_dec_three: -+ call _aesni_decrypt3 -+ movups $inout0,($out) -+ movups $inout1,0x10($out) -+ movups $inout2,0x20($out) -+ -+.Lecb_ret: -+ ret -+.size aesni_ecb_encrypt,.-aesni_ecb_encrypt -+___ -+} -+ -+# void $PREFIX_cbc_encrypt (const void *inp, void *out, -+# size_t length, const AES_KEY *key, -+# unsigned char *ivp,const int enc); -+$reserved = $win64?0x40:-0x18; # used in decrypt -+$code.=<<___; -+.globl ${PREFIX}_cbc_encrypt -+.type ${PREFIX}_cbc_encrypt,\@function,6 -+.align 16 -+${PREFIX}_cbc_encrypt: -+ test $len,$len # check length -+ jz .Lcbc_ret -+ -+ mov 240($key),$rnds_ # pull $rounds -+ mov $key,$key_ # backup $key -+ test %r9d,%r9d # 6th argument -+ jz .Lcbc_decrypt -+#--------------------------- CBC ENCRYPT ------------------------------# -+ movups ($ivp),$inout0 # load iv as initial state -+ cmp \$16,$len -+ mov $rnds_,$rounds -+ jb .Lcbc_enc_tail -+ sub \$16,$len -+ jmp .Lcbc_enc_loop -+.align 16 -+.Lcbc_enc_loop: -+ movups ($inp),$inout1 # load input -+ lea 16($inp),$inp -+ pxor $inout1,$inout0 -+___ -+ &aesni_generate1("enc",$key,$rounds); -+$code.=<<___; -+ sub \$16,$len -+ lea 16($out),$out -+ mov $rnds_,$rounds # restore $rounds -+ mov $key_,$key # restore $key -+ movups $inout0,-16($out) # store output -+ jnc .Lcbc_enc_loop -+ add \$16,$len -+ jnz .Lcbc_enc_tail -+ movups $inout0,($ivp) -+ jmp .Lcbc_ret -+ -+.Lcbc_enc_tail: -+ mov $len,%rcx # zaps $key -+ xchg $inp,$out # $inp is %rsi and $out is %rdi now -+ .long 0x9066A4F3 # rep movsb -+ mov \$16,%ecx # zero tail -+ sub $len,%rcx -+ xor %eax,%eax -+ .long 0x9066AAF3 # rep stosb -+ lea -16(%rdi),%rdi # rewind $out by 1 block -+ mov $rnds_,$rounds # restore $rounds -+ mov %rdi,%rsi # $inp and $out are the same -+ mov $key_,$key # restore $key -+ xor $len,$len # len=16 -+ jmp .Lcbc_enc_loop # one more spin -+ #--------------------------- CBC DECRYPT ------------------------------# -+.align 16 -+.Lcbc_decrypt: -+___ -+$code.=<<___ if ($win64); -+ lea -0x58(%rsp),%rsp -+ movaps %xmm6,(%rsp) -+ movaps %xmm7,0x10(%rsp) -+ movaps %xmm8,0x20(%rsp) -+ movaps %xmm9,0x30(%rsp) -+.Lcbc_decrypt_body: -+___ -+$code.=<<___; -+ movups ($ivp),$iv -+ sub \$0x40,$len -+ mov $rnds_,$rounds -+ jbe .Lcbc_dec_tail -+ jmp .Lcbc_dec_loop3 -+.align 16 -+.Lcbc_dec_loop3: -+ movups ($inp),$inout0 -+ movups 0x10($inp),$inout1 -+ movups 0x20($inp),$inout2 -+ movaps $inout0,$in0 -+ movaps $inout1,$in1 -+ movaps $inout2,$in2 -+ call _aesni_decrypt3 -+ sub \$0x30,$len -+ lea 0x30($inp),$inp -+ lea 0x30($out),$out -+ pxor $iv,$inout0 -+ pxor $in0,$inout1 -+ movaps $in2,$iv -+ pxor $in1,$inout2 -+ movups $inout0,-0x30($out) -+ mov $rnds_,$rounds # restore $rounds -+ movups $inout1,-0x20($out) -+ mov $key_,$key # restore $key -+ movups $inout2,-0x10($out) -+ ja .Lcbc_dec_loop3 -+ -+.Lcbc_dec_tail: -+ add \$0x40,$len -+ movups $iv,($ivp) -+ jz .Lcbc_dec_ret -+ -+ movups ($inp),$inout0 -+ cmp \$0x10,$len -+ movaps $inout0,$in0 -+ jbe .Lcbc_dec_one -+ movups 0x10($inp),$inout1 -+ cmp \$0x20,$len -+ movaps $inout1,$in1 -+ jbe .Lcbc_dec_two -+ movups 0x20($inp),$inout2 -+ cmp \$0x30,$len -+ movaps $inout2,$in2 -+ jbe .Lcbc_dec_three -+ movups 0x30($inp),$inout3 -+ call _aesni_decrypt4 -+ pxor $iv,$inout0 -+ movups 0x30($inp),$iv -+ pxor $in0,$inout1 -+ movups $inout0,($out) -+ pxor $in1,$inout2 -+ movups $inout1,0x10($out) -+ pxor $in2,$inout3 -+ movups $inout2,0x20($out) -+ movaps $inout3,$inout0 -+ lea 0x30($out),$out -+ jmp .Lcbc_dec_tail_collected -+.align 16 -+.Lcbc_dec_one: -+___ -+ &aesni_generate1("dec",$key,$rounds); -+$code.=<<___; -+ pxor $iv,$inout0 -+ movaps $in0,$iv -+ jmp .Lcbc_dec_tail_collected -+.align 16 -+.Lcbc_dec_two: -+ call _aesni_decrypt3 -+ pxor $iv,$inout0 -+ pxor $in0,$inout1 -+ movups $inout0,($out) -+ movaps $in1,$iv -+ movaps $inout1,$inout0 -+ lea 0x10($out),$out -+ jmp .Lcbc_dec_tail_collected -+.align 16 -+.Lcbc_dec_three: -+ call _aesni_decrypt3 -+ pxor $iv,$inout0 -+ pxor $in0,$inout1 -+ movups $inout0,($out) -+ pxor $in1,$inout2 -+ movups $inout1,0x10($out) -+ movaps $in2,$iv -+ movaps $inout2,$inout0 -+ lea 0x20($out),$out -+ jmp .Lcbc_dec_tail_collected -+.align 16 -+.Lcbc_dec_tail_collected: -+ and \$15,$len -+ movups $iv,($ivp) -+ jnz .Lcbc_dec_tail_partial -+ movups $inout0,($out) -+ jmp .Lcbc_dec_ret -+.Lcbc_dec_tail_partial: -+ movaps $inout0,$reserved(%rsp) -+ mov $out,%rdi -+ mov $len,%rcx -+ lea $reserved(%rsp),%rsi -+ .long 0x9066A4F3 # rep movsb -+ -+.Lcbc_dec_ret: -+___ -+$code.=<<___ if ($win64); -+ movaps (%rsp),%xmm6 -+ movaps 0x10(%rsp),%xmm7 -+ movaps 0x20(%rsp),%xmm8 -+ movaps 0x30(%rsp),%xmm9 -+ lea 0x58(%rsp),%rsp -+___ -+$code.=<<___; -+.Lcbc_ret: -+ ret -+.size ${PREFIX}_cbc_encrypt,.-${PREFIX}_cbc_encrypt -+___ -+ -+# int $PREFIX_set_[en|de]crypt_key (const unsigned char *userKey, -+# int bits, AES_KEY *key) -+{ my ($inp,$bits,$key) = @_4args; -+ $bits =~ s/%r/%e/; -+ -+$code.=<<___; -+.globl ${PREFIX}_set_decrypt_key -+.type ${PREFIX}_set_decrypt_key,\@abi-omnipotent -+.align 16 -+${PREFIX}_set_decrypt_key: -+ .byte 0x48,0x83,0xEC,0x08 # sub rsp,8 -+ call _aesni_set_encrypt_key -+ shl \$4,$bits # rounds-1 after _aesni_set_encrypt_key -+ test %eax,%eax -+ jnz .Ldec_key_ret -+ lea 16($key,$bits),$inp # points at the end of key schedule -+ -+ $movkey ($key),%xmm0 # just swap -+ $movkey ($inp),%xmm1 -+ $movkey %xmm0,($inp) -+ $movkey %xmm1,($key) -+ lea 16($key),$key -+ lea -16($inp),$inp -+ -+.Ldec_key_inverse: -+ $movkey ($key),%xmm0 # swap and inverse -+ $movkey ($inp),%xmm1 -+ aesimc %xmm0,%xmm0 -+ aesimc %xmm1,%xmm1 -+ lea 16($key),$key -+ lea -16($inp),$inp -+ cmp $key,$inp -+ $movkey %xmm0,16($inp) -+ $movkey %xmm1,-16($key) -+ ja .Ldec_key_inverse -+ -+ $movkey ($key),%xmm0 # inverse middle -+ aesimc %xmm0,%xmm0 -+ $movkey %xmm0,($inp) -+.Ldec_key_ret: -+ add \$8,%rsp -+ ret -+.LSEH_end_set_decrypt_key: -+.size ${PREFIX}_set_decrypt_key,.-${PREFIX}_set_decrypt_key -+___ -+ -+# This is based on submission by -+# -+# Huang Ying -+# Vinodh Gopal -+# Kahraman Akdemir -+# -+# Agressively optimized in respect to aeskeygenassist's critical path -+# and is contained in %xmm0-5 to meet Win64 ABI requirement. -+# -+$code.=<<___; -+.globl ${PREFIX}_set_encrypt_key -+.type ${PREFIX}_set_encrypt_key,\@abi-omnipotent -+.align 16 -+${PREFIX}_set_encrypt_key: -+_aesni_set_encrypt_key: -+ .byte 0x48,0x83,0xEC,0x08 # sub rsp,8 -+ test $inp,$inp -+ mov \$-1,%rax -+ jz .Lenc_key_ret -+ test $key,$key -+ jz .Lenc_key_ret -+ -+ movups ($inp),%xmm0 # pull first 128 bits of *userKey -+ pxor %xmm4,%xmm4 # low dword of xmm4 is assumed 0 -+ lea 16($key),%rax -+ cmp \$256,$bits -+ je .L14rounds -+ cmp \$192,$bits -+ je .L12rounds -+ cmp \$128,$bits -+ jne .Lbad_keybits -+ -+.L10rounds: -+ mov \$9,$bits # 10 rounds for 128-bit key -+ $movkey %xmm0,($key) # round 0 -+ aeskeygenassist \$0x1,%xmm0,%xmm1 # round 1 -+ call .Lkey_expansion_128_cold -+ aeskeygenassist \$0x2,%xmm0,%xmm1 # round 2 -+ call .Lkey_expansion_128 -+ aeskeygenassist \$0x4,%xmm0,%xmm1 # round 3 -+ call .Lkey_expansion_128 -+ aeskeygenassist \$0x8,%xmm0,%xmm1 # round 4 -+ call .Lkey_expansion_128 -+ aeskeygenassist \$0x10,%xmm0,%xmm1 # round 5 -+ call .Lkey_expansion_128 -+ aeskeygenassist \$0x20,%xmm0,%xmm1 # round 6 -+ call .Lkey_expansion_128 -+ aeskeygenassist \$0x40,%xmm0,%xmm1 # round 7 -+ call .Lkey_expansion_128 -+ aeskeygenassist \$0x80,%xmm0,%xmm1 # round 8 -+ call .Lkey_expansion_128 -+ aeskeygenassist \$0x1b,%xmm0,%xmm1 # round 9 -+ call .Lkey_expansion_128 -+ aeskeygenassist \$0x36,%xmm0,%xmm1 # round 10 -+ call .Lkey_expansion_128 -+ $movkey %xmm0,(%rax) -+ mov $bits,80(%rax) # 240(%rdx) -+ xor %eax,%eax -+ jmp .Lenc_key_ret -+ -+.align 16 -+.L12rounds: -+ movq 16($inp),%xmm2 # remaining 1/3 of *userKey -+ mov \$11,$bits # 12 rounds for 192 -+ $movkey %xmm0,($key) # round 0 -+ aeskeygenassist \$0x1,%xmm2,%xmm1 # round 1,2 -+ call .Lkey_expansion_192a_cold -+ aeskeygenassist \$0x2,%xmm2,%xmm1 # round 2,3 -+ call .Lkey_expansion_192b -+ aeskeygenassist \$0x4,%xmm2,%xmm1 # round 4,5 -+ call .Lkey_expansion_192a -+ aeskeygenassist \$0x8,%xmm2,%xmm1 # round 5,6 -+ call .Lkey_expansion_192b -+ aeskeygenassist \$0x10,%xmm2,%xmm1 # round 7,8 -+ call .Lkey_expansion_192a -+ aeskeygenassist \$0x20,%xmm2,%xmm1 # round 8,9 -+ call .Lkey_expansion_192b -+ aeskeygenassist \$0x40,%xmm2,%xmm1 # round 10,11 -+ call .Lkey_expansion_192a -+ aeskeygenassist \$0x80,%xmm2,%xmm1 # round 11,12 -+ call .Lkey_expansion_192b -+ $movkey %xmm0,(%rax) -+ mov $bits,48(%rax) # 240(%rdx) -+ xor %rax, %rax -+ jmp .Lenc_key_ret -+ -+.align 16 -+.L14rounds: -+ movups 16($inp),%xmm2 # remaning half of *userKey -+ mov \$13,$bits # 14 rounds for 256 -+ lea 16(%rax),%rax -+ $movkey %xmm0,($key) # round 0 -+ $movkey %xmm2,16($key) # round 1 -+ aeskeygenassist \$0x1,%xmm2,%xmm1 # round 2 -+ call .Lkey_expansion_256a_cold -+ aeskeygenassist \$0x1,%xmm0,%xmm1 # round 3 -+ call .Lkey_expansion_256b -+ aeskeygenassist \$0x2,%xmm2,%xmm1 # round 4 -+ call .Lkey_expansion_256a -+ aeskeygenassist \$0x2,%xmm0,%xmm1 # round 5 -+ call .Lkey_expansion_256b -+ aeskeygenassist \$0x4,%xmm2,%xmm1 # round 6 -+ call .Lkey_expansion_256a -+ aeskeygenassist \$0x4,%xmm0,%xmm1 # round 7 -+ call .Lkey_expansion_256b -+ aeskeygenassist \$0x8,%xmm2,%xmm1 # round 8 -+ call .Lkey_expansion_256a -+ aeskeygenassist \$0x8,%xmm0,%xmm1 # round 9 -+ call .Lkey_expansion_256b -+ aeskeygenassist \$0x10,%xmm2,%xmm1 # round 10 -+ call .Lkey_expansion_256a -+ aeskeygenassist \$0x10,%xmm0,%xmm1 # round 11 -+ call .Lkey_expansion_256b -+ aeskeygenassist \$0x20,%xmm2,%xmm1 # round 12 -+ call .Lkey_expansion_256a -+ aeskeygenassist \$0x20,%xmm0,%xmm1 # round 13 -+ call .Lkey_expansion_256b -+ aeskeygenassist \$0x40,%xmm2,%xmm1 # round 14 -+ call .Lkey_expansion_256a -+ $movkey %xmm0,(%rax) -+ mov $bits,16(%rax) # 240(%rdx) -+ xor %rax,%rax -+ jmp .Lenc_key_ret -+ -+.align 16 -+.Lbad_keybits: -+ mov \$-2,%rax -+.Lenc_key_ret: -+ add \$8,%rsp -+ ret -+.LSEH_end_set_encrypt_key: -+ -+.align 16 -+.Lkey_expansion_128: -+ $movkey %xmm0,(%rax) -+ lea 16(%rax),%rax -+.Lkey_expansion_128_cold: -+ shufps \$0b00010000,%xmm0,%xmm4 -+ pxor %xmm4, %xmm0 -+ shufps \$0b10001100,%xmm0,%xmm4 -+ pxor %xmm4, %xmm0 -+ pshufd \$0b11111111,%xmm1,%xmm1 # critical path -+ pxor %xmm1,%xmm0 -+ ret -+ -+.align 16 -+.Lkey_expansion_192a: -+ $movkey %xmm0,(%rax) -+ lea 16(%rax),%rax -+.Lkey_expansion_192a_cold: -+ movaps %xmm2, %xmm5 -+.Lkey_expansion_192b_warm: -+ shufps \$0b00010000,%xmm0,%xmm4 -+ movaps %xmm2,%xmm3 -+ pxor %xmm4,%xmm0 -+ shufps \$0b10001100,%xmm0,%xmm4 -+ pslldq \$4,%xmm3 -+ pxor %xmm4,%xmm0 -+ pshufd \$0b01010101,%xmm1,%xmm1 # critical path -+ pxor %xmm3,%xmm2 -+ pxor %xmm1,%xmm0 -+ pshufd \$0b11111111,%xmm0,%xmm3 -+ pxor %xmm3,%xmm2 -+ ret -+ -+.align 16 -+.Lkey_expansion_192b: -+ movaps %xmm0,%xmm3 -+ shufps \$0b01000100,%xmm0,%xmm5 -+ $movkey %xmm5,(%rax) -+ shufps \$0b01001110,%xmm2,%xmm3 -+ $movkey %xmm3,16(%rax) -+ lea 32(%rax),%rax -+ jmp .Lkey_expansion_192b_warm -+ -+.align 16 -+.Lkey_expansion_256a: -+ $movkey %xmm2,(%rax) -+ lea 16(%rax),%rax -+.Lkey_expansion_256a_cold: -+ shufps \$0b00010000,%xmm0,%xmm4 -+ pxor %xmm4,%xmm0 -+ shufps \$0b10001100,%xmm0,%xmm4 -+ pxor %xmm4,%xmm0 -+ pshufd \$0b11111111,%xmm1,%xmm1 # critical path -+ pxor %xmm1,%xmm0 -+ ret -+ -+.align 16 -+.Lkey_expansion_256b: -+ $movkey %xmm0,(%rax) -+ lea 16(%rax),%rax -+ -+ shufps \$0b00010000,%xmm2,%xmm4 -+ pxor %xmm4,%xmm2 -+ shufps \$0b10001100,%xmm2,%xmm4 -+ pxor %xmm4,%xmm2 -+ pshufd \$0b10101010,%xmm1,%xmm1 # critical path -+ pxor %xmm1,%xmm2 -+ ret -+.size ${PREFIX}_set_encrypt_key,.-${PREFIX}_set_encrypt_key -+___ -+} -+ -+$code.=<<___; -+.asciz "AES for Intel AES-NI, CRYPTOGAMS by " -+.align 64 -+___ -+ -+# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, -+# CONTEXT *context,DISPATCHER_CONTEXT *disp) -+if ($win64) { -+$rec="%rcx"; -+$frame="%rdx"; -+$context="%r8"; -+$disp="%r9"; -+ -+$code.=<<___; -+.extern __imp_RtlVirtualUnwind -+.type cbc_se_handler,\@abi-omnipotent -+.align 16 -+cbc_se_handler: -+ push %rsi -+ push %rdi -+ push %rbx -+ push %rbp -+ push %r12 -+ push %r13 -+ push %r14 -+ push %r15 -+ pushfq -+ sub \$64,%rsp -+ -+ mov 152($context),%rax # pull context->Rsp -+ mov 248($context),%rbx # pull context->Rip -+ -+ lea .Lcbc_decrypt(%rip),%r10 -+ cmp %r10,%rbx # context->Rip<"prologue" label -+ jb .Lin_prologue -+ -+ lea .Lcbc_decrypt_body(%rip),%r10 -+ cmp %r10,%rbx # context->RipRip>="epilogue" label -+ jae .Lin_prologue -+ -+ lea 0(%rax),%rsi # top of stack -+ lea 512($context),%rdi # &context.Xmm6 -+ mov \$8,%ecx # 4*sizeof(%xmm0)/sizeof(%rax) -+ .long 0xa548f3fc # cld; rep movsq -+ lea 0x58(%rax),%rax # adjust stack pointer -+ jmp .Lin_prologue -+ -+.Lrestore_rax: -+ mov 120($context),%rax -+.Lin_prologue: -+ mov 8(%rax),%rdi -+ mov 16(%rax),%rsi -+ mov %rax,152($context) # restore context->Rsp -+ mov %rsi,168($context) # restore context->Rsi -+ mov %rdi,176($context) # restore context->Rdi -+ -+ jmp .Lcommon_seh_exit -+.size cbc_se_handler,.-cbc_se_handler -+ -+.type ecb_se_handler,\@abi-omnipotent -+.align 16 -+ecb_se_handler: -+ push %rsi -+ push %rdi -+ push %rbx -+ push %rbp -+ push %r12 -+ push %r13 -+ push %r14 -+ push %r15 -+ pushfq -+ sub \$64,%rsp -+ -+ mov 152($context),%rax # pull context->Rsp -+ mov 8(%rax),%rdi -+ mov 16(%rax),%rsi -+ mov %rsi,168($context) # restore context->Rsi -+ mov %rdi,176($context) # restore context->Rdi -+ -+.Lcommon_seh_exit: -+ -+ mov 40($disp),%rdi # disp->ContextRecord -+ mov $context,%rsi # context -+ mov \$154,%ecx # sizeof(CONTEXT) -+ .long 0xa548f3fc # cld; rep movsq -+ -+ mov $disp,%rsi -+ xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER -+ mov 8(%rsi),%rdx # arg2, disp->ImageBase -+ mov 0(%rsi),%r8 # arg3, disp->ControlPc -+ mov 16(%rsi),%r9 # arg4, disp->FunctionEntry -+ mov 40(%rsi),%r10 # disp->ContextRecord -+ lea 56(%rsi),%r11 # &disp->HandlerData -+ lea 24(%rsi),%r12 # &disp->EstablisherFrame -+ mov %r10,32(%rsp) # arg5 -+ mov %r11,40(%rsp) # arg6 -+ mov %r12,48(%rsp) # arg7 -+ mov %rcx,56(%rsp) # arg8, (NULL) -+ call *__imp_RtlVirtualUnwind(%rip) -+ -+ mov \$1,%eax # ExceptionContinueSearch -+ add \$64,%rsp -+ popfq -+ pop %r15 -+ pop %r14 -+ pop %r13 -+ pop %r12 -+ pop %rbp -+ pop %rbx -+ pop %rdi -+ pop %rsi -+ ret -+.size cbc_se_handler,.-cbc_se_handler -+ -+.section .pdata -+.align 4 -+ .rva .LSEH_begin_${PREFIX}_ecb_encrypt -+ .rva .LSEH_end_${PREFIX}_ecb_encrypt -+ .rva .LSEH_info_ecb -+ -+ .rva .LSEH_begin_${PREFIX}_cbc_encrypt -+ .rva .LSEH_end_${PREFIX}_cbc_encrypt -+ .rva .LSEH_info_cbc -+ -+ .rva ${PREFIX}_set_decrypt_key -+ .rva .LSEH_end_set_decrypt_key -+ .rva .LSEH_info_key -+ -+ .rva ${PREFIX}_set_encrypt_key -+ .rva .LSEH_end_set_encrypt_key -+ .rva .LSEH_info_key -+.section .xdata -+.align 8 -+.LSEH_info_ecb: -+ .byte 9,0,0,0 -+ .rva ecb_se_handler -+.LSEH_info_cbc: -+ .byte 9,0,0,0 -+ .rva cbc_se_handler -+.LSEH_info_key: -+ .byte 0x01,0x04,0x01,0x00 -+ .byte 0x04,0x02,0x00,0x00 -+___ -+} -+ -+sub rex { -+ local *opcode=shift; -+ my ($dst,$src)=@_; -+ -+ if ($dst>=8 || $src>=8) { -+ $rex=0x40; -+ $rex|=0x04 if($dst>=8); -+ $rex|=0x01 if($src>=8); -+ push @opcode,$rex; -+ } -+} -+ -+sub aesni { -+ my $line=shift; -+ my @opcode=(0x66); -+ -+ if ($line=~/(aeskeygenassist)\s+\$([x0-9a-f]+),\s*%xmm([0-9]+),\s*%xmm([0-9]+)/) { -+ rex(\@opcode,$4,$3); -+ push @opcode,0x0f,0x3a,0xdf; -+ push @opcode,0xc0|($3&7)|(($4&7)<<3); # ModR/M -+ my $c=$2; -+ push @opcode,$c=~/^0/?oct($c):$c; -+ return ".byte\t".join(',',@opcode); -+ } -+ elsif ($line=~/(aes[a-z]+)\s+%xmm([0-9]+),\s*%xmm([0-9]+)/) { -+ my %opcodelet = ( -+ "aesimc" => 0xdb, -+ "aesenc" => 0xdc, "aesenclast" => 0xdd, -+ "aesdec" => 0xde, "aesdeclast" => 0xdf -+ ); -+ return undef if (!defined($opcodelet{$1})); -+ rex(\@opcode,$3,$2); -+ push @opcode,0x0f,0x38,$opcodelet{$1}; -+ push @opcode,0xc0|($2&7)|(($3&7)<<3); # ModR/M -+ return ".byte\t".join(',',@opcode); -+ } -+ return $line; -+} -+ -+$code =~ s/\`([^\`]*)\`/eval($1)/gem; -+$code =~ s/\b(aes.*%xmm[0-9]+).*$/aesni($1)/gem; -+ -+print $code; -+ -+close STDOUT; ---- crypto/aes/Makefile.orig -+++ crypto/aes/Makefile -@@ -50,9 +50,13 @@ aes-ia64.s: asm/aes-ia64.S - - aes-586.s: asm/aes-586.pl ../perlasm/x86asm.pl - $(PERL) asm/aes-586.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@ -+aesni-x86.s: asm/aesni-x86.pl ../perlasm/x86asm.pl -+ $(PERL) asm/aesni-x86.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@ - - aes-x86_64.s: asm/aes-x86_64.pl - $(PERL) asm/aes-x86_64.pl $(PERLASM_SCHEME) > $@ -+aesni-x86_64.s: asm/aesni-x86_64.pl -+ $(PERL) asm/aesni-x86_64.pl $(PERLASM_SCHEME) > $@ - - aes-sparcv9.s: asm/aes-sparcv9.pl - $(PERL) asm/aes-sparcv9.pl $(CFLAGS) > $@ ---- /dev/null -+++ crypto/engine/eng_aesni.c -@@ -0,0 +1,413 @@ -+/* -+ * Support for Intel AES-NI intruction set -+ * Author: Huang Ying -+ * -+ * Intel AES-NI is a new set of Single Instruction Multiple Data -+ * (SIMD) instructions that are going to be introduced in the next -+ * generation of Intel processor, as of 2009. These instructions -+ * enable fast and secure data encryption and decryption, using the -+ * Advanced Encryption Standard (AES), defined by FIPS Publication -+ * number 197. The architecture introduces six instructions that -+ * offer full hardware support for AES. Four of them support high -+ * performance data encryption and decryption, and the other two -+ * instructions support the AES key expansion procedure. -+ * -+ * The white paper can be downloaded from: -+ * http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf -+ * -+ * This file is based on engines/e_padlock.c -+ */ -+ -+/* ==================================================================== -+ * Copyright (c) 1999-2001 The OpenSSL Project. All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * -+ * 1. Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * -+ * 2. Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in -+ * the documentation and/or other materials provided with the -+ * distribution. -+ * -+ * 3. All advertising materials mentioning features or use of this -+ * software must display the following acknowledgment: -+ * "This product includes software developed by the OpenSSL Project -+ * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" -+ * -+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to -+ * endorse or promote products derived from this software without -+ * prior written permission. For written permission, please contact -+ * licensing@OpenSSL.org. -+ * -+ * 5. Products derived from this software may not be called "OpenSSL" -+ * nor may "OpenSSL" appear in their names without prior written -+ * permission of the OpenSSL Project. -+ * -+ * 6. Redistributions of any form whatsoever must retain the following -+ * acknowledgment: -+ * "This product includes software developed by the OpenSSL Project -+ * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY -+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR -+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED -+ * OF THE POSSIBILITY OF SUCH DAMAGE. -+ * ==================================================================== -+ * -+ * This product includes cryptographic software written by Eric Young -+ * (eay@cryptsoft.com). This product includes software written by Tim -+ * Hudson (tjh@cryptsoft.com). -+ * -+ */ -+ -+ -+#include -+ -+#if !defined(OPENSSL_NO_HW) && !defined(OPENSSL_NO_HW_AES_NI) && !defined(OPENSSL_NO_AES) -+ -+#include -+#include "cryptlib.h" -+#include -+#include -+#include -+#include -+#include -+#include -+ -+/* AES-NI is available *ONLY* on some x86 CPUs. Not only that it -+ doesn't exist elsewhere, but it even can't be compiled on other -+ platforms! */ -+#undef COMPILE_HW_AESNI -+#if (defined(__x86_64) || defined(__x86_64__) || \ -+ defined(_M_AMD64) || defined(_M_X64) || \ -+ defined(OPENSSL_IA32_SSE2)) && !defined(OPENSSL_NO_ASM) -+#define COMPILE_HW_AESNI -+static ENGINE *ENGINE_aesni (void); -+#endif -+ -+void ENGINE_load_aesni (void) -+{ -+/* On non-x86 CPUs it just returns. */ -+#ifdef COMPILE_HW_AESNI -+ ENGINE *toadd = ENGINE_aesni(); -+ if (!toadd) -+ return; -+ if(ENGINE_add (toadd)) -+ ENGINE_register_complete (toadd); -+ ENGINE_free (toadd); -+ ERR_clear_error (); -+#endif -+} -+ -+#ifdef COMPILE_HW_AESNI -+int aesni_set_encrypt_key(const unsigned char *userKey, int bits, -+ AES_KEY *key); -+int aesni_set_decrypt_key(const unsigned char *userKey, int bits, -+ AES_KEY *key); -+ -+void aesni_encrypt(const unsigned char *in, unsigned char *out, -+ const AES_KEY *key); -+void aesni_decrypt(const unsigned char *in, unsigned char *out, -+ const AES_KEY *key); -+ -+void aesni_ecb_encrypt(const unsigned char *in, -+ unsigned char *out, -+ size_t length, -+ const AES_KEY *key, -+ int enc); -+void aesni_cbc_encrypt(const unsigned char *in, -+ unsigned char *out, -+ size_t length, -+ const AES_KEY *key, -+ unsigned char *ivec, int enc); -+ -+/* Function for ENGINE detection and control */ -+static int aesni_init(ENGINE *e); -+ -+/* Cipher Stuff */ -+static int aesni_ciphers(ENGINE *e, const EVP_CIPHER **cipher, -+ const int **nids, int nid); -+ -+#define AESNI_MIN_ALIGN 16 -+#define AESNI_ALIGN(x) \ -+ ((void *)(((unsigned long)(x)+AESNI_MIN_ALIGN-1)&~(AESNI_MIN_ALIGN-1))) -+ -+/* Engine names */ -+static const char aesni_id[] = "aesni", -+ aesni_name[] = "Intel AES-NI engine", -+ no_aesni_name[] = "Intel AES-NI engine (no-aesni)"; -+ -+/* ===== Engine "management" functions ===== */ -+ -+#if defined(_WIN32) -+typedef unsigned __int64 IA32CAP; -+#else -+typedef unsigned long long IA32CAP; -+#endif -+ -+/* Prepare the ENGINE structure for registration */ -+static int -+aesni_bind_helper(ENGINE *e) -+{ -+ int engage; -+ if (sizeof(OPENSSL_ia32cap_P) > 4) { -+ engage = (OPENSSL_ia32cap_P >> 57) & 1; -+ } else { -+ IA32CAP OPENSSL_ia32_cpuid(void); -+ engage = (OPENSSL_ia32_cpuid() >> 57) & 1; -+ } -+ -+ /* Register everything or return with an error */ -+ if (!ENGINE_set_id(e, aesni_id) || -+ !ENGINE_set_name(e, engage ? aesni_name : no_aesni_name) || -+ -+ !ENGINE_set_init_function(e, aesni_init) || -+ (engage && !ENGINE_set_ciphers (e, aesni_ciphers)) -+ ) -+ return 0; -+ -+ /* Everything looks good */ -+ return 1; -+} -+ -+/* Constructor */ -+static ENGINE * -+ENGINE_aesni(void) -+{ -+ ENGINE *eng = ENGINE_new(); -+ -+ if (!eng) { -+ return NULL; -+ } -+ -+ if (!aesni_bind_helper(eng)) { -+ ENGINE_free(eng); -+ return NULL; -+ } -+ -+ return eng; -+} -+ -+/* Check availability of the engine */ -+static int -+aesni_init(ENGINE *e) -+{ -+ return 1; -+} -+ -+#if defined(NID_aes_128_cfb128) && ! defined (NID_aes_128_cfb) -+#define NID_aes_128_cfb NID_aes_128_cfb128 -+#endif -+ -+#if defined(NID_aes_128_ofb128) && ! defined (NID_aes_128_ofb) -+#define NID_aes_128_ofb NID_aes_128_ofb128 -+#endif -+ -+#if defined(NID_aes_192_cfb128) && ! defined (NID_aes_192_cfb) -+#define NID_aes_192_cfb NID_aes_192_cfb128 -+#endif -+ -+#if defined(NID_aes_192_ofb128) && ! defined (NID_aes_192_ofb) -+#define NID_aes_192_ofb NID_aes_192_ofb128 -+#endif -+ -+#if defined(NID_aes_256_cfb128) && ! defined (NID_aes_256_cfb) -+#define NID_aes_256_cfb NID_aes_256_cfb128 -+#endif -+ -+#if defined(NID_aes_256_ofb128) && ! defined (NID_aes_256_ofb) -+#define NID_aes_256_ofb NID_aes_256_ofb128 -+#endif -+ -+/* List of supported ciphers. */ -+static int aesni_cipher_nids[] = { -+ NID_aes_128_ecb, -+ NID_aes_128_cbc, -+ NID_aes_128_cfb, -+ NID_aes_128_ofb, -+ -+ NID_aes_192_ecb, -+ NID_aes_192_cbc, -+ NID_aes_192_cfb, -+ NID_aes_192_ofb, -+ -+ NID_aes_256_ecb, -+ NID_aes_256_cbc, -+ NID_aes_256_cfb, -+ NID_aes_256_ofb, -+}; -+static int aesni_cipher_nids_num = -+ (sizeof(aesni_cipher_nids)/sizeof(aesni_cipher_nids[0])); -+ -+typedef struct -+{ -+ AES_KEY ks; -+ unsigned int _pad1[3]; -+} AESNI_KEY; -+ -+static int -+aesni_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *user_key, -+ const unsigned char *iv, int enc) -+{ -+ int ret; -+ AES_KEY *key = AESNI_ALIGN(ctx->cipher_data); -+ -+ if ((ctx->cipher->flags & EVP_CIPH_MODE) == EVP_CIPH_CFB_MODE -+ || (ctx->cipher->flags & EVP_CIPH_MODE) == EVP_CIPH_OFB_MODE -+ || enc) -+ ret=aesni_set_encrypt_key(user_key, ctx->key_len * 8, key); -+ else -+ ret=aesni_set_decrypt_key(user_key, ctx->key_len * 8, key); -+ -+ if(ret < 0) { -+ EVPerr(EVP_F_AESNI_INIT_KEY,EVP_R_AES_KEY_SETUP_FAILED); -+ return 0; -+ } -+ -+ return 1; -+} -+ -+static int aesni_cipher_ecb(EVP_CIPHER_CTX *ctx, unsigned char *out, -+ const unsigned char *in, size_t inl) -+{ AES_KEY *key = AESNI_ALIGN(ctx->cipher_data); -+ aesni_ecb_encrypt(in, out, inl, key, ctx->encrypt); -+ return 1; -+} -+static int aesni_cipher_cbc(EVP_CIPHER_CTX *ctx, unsigned char *out, -+ const unsigned char *in, size_t inl) -+{ AES_KEY *key = AESNI_ALIGN(ctx->cipher_data); -+ aesni_cbc_encrypt(in, out, inl, key, -+ ctx->iv, ctx->encrypt); -+ return 1; -+} -+static int aesni_cipher_cfb(EVP_CIPHER_CTX *ctx, unsigned char *out, -+ const unsigned char *in, size_t inl) -+{ AES_KEY *key = AESNI_ALIGN(ctx->cipher_data); -+ CRYPTO_cfb128_encrypt(in, out, inl, key, ctx->iv, -+ &ctx->num, ctx->encrypt, -+ (block128_f)aesni_encrypt); -+ return 1; -+} -+static int aesni_cipher_ofb(EVP_CIPHER_CTX *ctx, unsigned char *out, -+ const unsigned char *in, size_t inl) -+{ AES_KEY *key = AESNI_ALIGN(ctx->cipher_data); -+ CRYPTO_ofb128_encrypt(in, out, inl, key, ctx->iv, -+ &ctx->num, (block128_f)aesni_encrypt); -+ return 1; -+} -+ -+#define AES_BLOCK_SIZE 16 -+ -+#define EVP_CIPHER_block_size_ECB AES_BLOCK_SIZE -+#define EVP_CIPHER_block_size_CBC AES_BLOCK_SIZE -+#define EVP_CIPHER_block_size_OFB 1 -+#define EVP_CIPHER_block_size_CFB 1 -+ -+/* Declaring so many ciphers by hand would be a pain. -+ Instead introduce a bit of preprocessor magic :-) */ -+#define DECLARE_AES_EVP(ksize,lmode,umode) \ -+static const EVP_CIPHER aesni_##ksize##_##lmode = { \ -+ NID_aes_##ksize##_##lmode, \ -+ EVP_CIPHER_block_size_##umode, \ -+ ksize / 8, \ -+ AES_BLOCK_SIZE, \ -+ 0 | EVP_CIPH_##umode##_MODE, \ -+ aesni_init_key, \ -+ aesni_cipher_##lmode, \ -+ NULL, \ -+ sizeof(AESNI_KEY), \ -+ EVP_CIPHER_set_asn1_iv, \ -+ EVP_CIPHER_get_asn1_iv, \ -+ NULL, \ -+ NULL \ -+} -+ -+DECLARE_AES_EVP(128,ecb,ECB); -+DECLARE_AES_EVP(128,cbc,CBC); -+DECLARE_AES_EVP(128,cfb,CFB); -+DECLARE_AES_EVP(128,ofb,OFB); -+ -+DECLARE_AES_EVP(192,ecb,ECB); -+DECLARE_AES_EVP(192,cbc,CBC); -+DECLARE_AES_EVP(192,cfb,CFB); -+DECLARE_AES_EVP(192,ofb,OFB); -+ -+DECLARE_AES_EVP(256,ecb,ECB); -+DECLARE_AES_EVP(256,cbc,CBC); -+DECLARE_AES_EVP(256,cfb,CFB); -+DECLARE_AES_EVP(256,ofb,OFB); -+ -+static int -+aesni_ciphers (ENGINE *e, const EVP_CIPHER **cipher, -+ const int **nids, int nid) -+{ -+ /* No specific cipher => return a list of supported nids ... */ -+ if (!cipher) { -+ *nids = aesni_cipher_nids; -+ return aesni_cipher_nids_num; -+ } -+ -+ /* ... or the requested "cipher" otherwise */ -+ switch (nid) { -+ case NID_aes_128_ecb: -+ *cipher = &aesni_128_ecb; -+ break; -+ case NID_aes_128_cbc: -+ *cipher = &aesni_128_cbc; -+ break; -+ case NID_aes_128_cfb: -+ *cipher = &aesni_128_cfb; -+ break; -+ case NID_aes_128_ofb: -+ *cipher = &aesni_128_ofb; -+ break; -+ -+ case NID_aes_192_ecb: -+ *cipher = &aesni_192_ecb; -+ break; -+ case NID_aes_192_cbc: -+ *cipher = &aesni_192_cbc; -+ break; -+ case NID_aes_192_cfb: -+ *cipher = &aesni_192_cfb; -+ break; -+ case NID_aes_192_ofb: -+ *cipher = &aesni_192_ofb; -+ break; -+ -+ case NID_aes_256_ecb: -+ *cipher = &aesni_256_ecb; -+ break; -+ case NID_aes_256_cbc: -+ *cipher = &aesni_256_cbc; -+ break; -+ case NID_aes_256_cfb: -+ *cipher = &aesni_256_cfb; -+ break; -+ case NID_aes_256_ofb: -+ *cipher = &aesni_256_ofb; -+ break; -+ -+ default: -+ /* Sorry, we don't support this NID */ -+ *cipher = NULL; -+ return 0; -+ } -+ -+ return 1; -+} -+ -+#endif /* COMPILE_HW_AESNI */ -+#endif /* !defined(OPENSSL_NO_HW) && !defined(OPENSSL_NO_HW_AESNI) && !defined(OPENSSL_NO_AES) */ ---- crypto/engine/eng_all.c.orig -+++ crypto/engine/eng_all.c -@@ -71,6 +71,9 @@ void ENGINE_load_builtin_engines(void) - #if !defined(OPENSSL_NO_HW) && (defined(__OpenBSD__) || defined(__FreeBSD__) || defined(HAVE_CRYPTODEV)) - ENGINE_load_cryptodev(); - #endif -+#if !defined(OPENSSL_NO_HW) && !defined(OPENSSL_NO_HW_AESNI) -+ ENGINE_load_aesni(); -+#endif - ENGINE_load_dynamic(); - #ifndef OPENSSL_NO_STATIC_ENGINE - #ifndef OPENSSL_NO_HW ---- crypto/engine/engine.h.orig -+++ crypto/engine/engine.h -@@ -344,6 +344,7 @@ void ENGINE_load_gost(void); - #endif - #endif - void ENGINE_load_cryptodev(void); -+void ENGINE_load_aesni(void); - void ENGINE_load_builtin_engines(void); - - /* Get and set global flags (ENGINE_TABLE_FLAG_***) for the implementation ---- crypto/engine/Makefile.orig -+++ crypto/engine/Makefile -@@ -21,12 +21,14 @@ LIBSRC= eng_err.c eng_lib.c eng_list.c e - eng_table.c eng_pkey.c eng_fat.c eng_all.c \ - tb_rsa.c tb_dsa.c tb_ecdsa.c tb_dh.c tb_ecdh.c tb_rand.c tb_store.c \ - tb_cipher.c tb_digest.c tb_pkmeth.c tb_asnmth.c \ -- eng_openssl.c eng_cnf.c eng_dyn.c eng_cryptodev.c -+ eng_openssl.c eng_cnf.c eng_dyn.c eng_cryptodev.c \ -+ eng_aesni.c - LIBOBJ= eng_err.o eng_lib.o eng_list.o eng_init.o eng_ctrl.o \ - eng_table.o eng_pkey.o eng_fat.o eng_all.o \ - tb_rsa.o tb_dsa.o tb_ecdsa.o tb_dh.o tb_ecdh.o tb_rand.o tb_store.o \ - tb_cipher.o tb_digest.o tb_pkmeth.o tb_asnmth.o \ -- eng_openssl.o eng_cnf.o eng_dyn.o eng_cryptodev.o -+ eng_openssl.o eng_cnf.o eng_dyn.o eng_cryptodev.o \ -+ eng_aesni.o - - SRC= $(LIBSRC) - ---- crypto/evp/evp_err.c.orig -+++ crypto/evp/evp_err.c -@@ -1,6 +1,6 @@ - /* crypto/evp/evp_err.c */ - /* ==================================================================== -- * Copyright (c) 1999-2008 The OpenSSL Project. All rights reserved. -+ * Copyright (c) 1999-2009 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions -@@ -70,6 +70,7 @@ - - static ERR_STRING_DATA EVP_str_functs[]= - { -+{ERR_FUNC(EVP_F_AESNI_INIT_KEY), "AESNI_INIT_KEY"}, - {ERR_FUNC(EVP_F_AES_INIT_KEY), "AES_INIT_KEY"}, - {ERR_FUNC(EVP_F_CAMELLIA_INIT_KEY), "CAMELLIA_INIT_KEY"}, - {ERR_FUNC(EVP_F_D2I_PKEY), "D2I_PKEY"}, -@@ -86,7 +87,7 @@ static ERR_STRING_DATA EVP_str_functs[]= - {ERR_FUNC(EVP_F_EVP_DIGESTINIT_EX), "EVP_DigestInit_ex"}, - {ERR_FUNC(EVP_F_EVP_ENCRYPTFINAL_EX), "EVP_EncryptFinal_ex"}, - {ERR_FUNC(EVP_F_EVP_MD_CTX_COPY_EX), "EVP_MD_CTX_copy_ex"}, --{ERR_FUNC(EVP_F_EVP_MD_SIZE), "EVP_MD_SIZE"}, -+{ERR_FUNC(EVP_F_EVP_MD_SIZE), "EVP_MD_size"}, - {ERR_FUNC(EVP_F_EVP_OPENINIT), "EVP_OpenInit"}, - {ERR_FUNC(EVP_F_EVP_PBE_ALG_ADD), "EVP_PBE_alg_add"}, - {ERR_FUNC(EVP_F_EVP_PBE_ALG_ADD_TYPE), "EVP_PBE_alg_add_type"}, ---- crypto/evp/evp.h.orig -+++ crypto/evp/evp.h -@@ -1190,6 +1190,7 @@ void ERR_load_EVP_strings(void); - /* Error codes for the EVP functions. */ - - /* Function codes. */ -+#define EVP_F_AESNI_INIT_KEY 163 - #define EVP_F_AES_INIT_KEY 133 - #define EVP_F_CAMELLIA_INIT_KEY 159 - #define EVP_F_D2I_PKEY 100 ---- /dev/null -+++ test/test_aesni -@@ -0,0 +1,69 @@ -+#!/bin/sh -+ -+PROG=$1 -+ -+if [ -x $PROG ]; then -+ if expr "x`$PROG version`" : "xOpenSSL" > /dev/null; then -+ : -+ else -+ echo "$PROG is not OpenSSL executable" -+ exit 1 -+ fi -+else -+ echo "$PROG is not executable" -+ exit 1; -+fi -+ -+if $PROG engine aesni | grep -v no-aesni; then -+ -+ HASH=`cat $PROG | $PROG dgst -hex` -+ -+ AES_ALGS=" aes-128-ecb aes-192-ecb aes-256-ecb \ -+ aes-128-cbc aes-192-cbc aes-256-cbc \ -+ aes-128-cfb aes-192-cfb aes-256-cfb \ -+ aes-128-ofb aes-192-ofb aes-256-ofb" -+ BUFSIZE="16 32 48 64 80 96 128 144 999" -+ -+ nerr=0 -+ -+ for alg in $AES_ALGS; do -+ echo $alg -+ for bufsize in $BUFSIZE; do -+ TEST=`( cat $PROG | \ -+ $PROG enc -e -k "$HASH" -$alg -bufsize $bufsize -engine aesni | \ -+ $PROG enc -d -k "$HASH" -$alg | \ -+ $PROG dgst -hex ) 2>/dev/null` -+ if [ "$TEST" != "$HASH" ]; then -+ echo "-$alg/$bufsize encrypt test failed" -+ nerr=`expr $nerr + 1` -+ fi -+ done -+ for bufsize in $BUFSIZE; do -+ TEST=`( cat $PROG | \ -+ $PROG enc -e -k "$HASH" -$alg | \ -+ $PROG enc -d -k "$HASH" -$alg -bufsize $bufsize -engine aesni | \ -+ $PROG dgst -hex ) 2>/dev/null` -+ if [ "$TEST" != "$HASH" ]; then -+ echo "-$alg/$bufsize decrypt test failed" -+ nerr=`expr $nerr + 1` -+ fi -+ done -+ TEST=`( cat $PROG | \ -+ $PROG enc -e -k "$HASH" -$alg -engine aesni | \ -+ $PROG enc -d -k "$HASH" -$alg -engine aesni | \ -+ $PROG dgst -hex ) 2>/dev/null` -+ if [ "$TEST" != "$HASH" ]; then -+ echo "-$alg en/decrypt test failed" -+ nerr=`expr $nerr + 1` -+ fi -+ done -+ -+ if [ $nerr -gt 0 ]; then -+ echo "AESNI engine test failed." -+ exit 1; -+ fi -+else -+ echo "AESNI engine is not available" -+fi -+ -+exit 0 diff --git a/openssl-1.0.0i.tar.bz2 b/openssl-1.0.0i.tar.bz2 deleted file mode 100644 index 8e00852..0000000 --- a/openssl-1.0.0i.tar.bz2 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:41cc7688650540f918c151eff5676e19b819121065bfe4abf0247f3f022cf49a -size 3241560 diff --git a/openssl-1.0.1c.tar.gz b/openssl-1.0.1c.tar.gz new file mode 100644 index 0000000..275c276 --- /dev/null +++ b/openssl-1.0.1c.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a9eb3cd4e8b114eb9179c0d3884d61658e7d8e8bf4984798a5f5bd48e325ebe +size 4457113 diff --git a/openssl-call-engine-reg-comp.patch b/openssl-call-engine-reg-comp.patch deleted file mode 100644 index bf4cd6d..0000000 --- a/openssl-call-engine-reg-comp.patch +++ /dev/null @@ -1,24 +0,0 @@ -Add call to ENGINE_register_all_complete() to ENGINE_load_builtin_engines(), this means that some implementations will be used automatically, e.g. aesni, -Setup cpuid in ENGINE_load_builtin_engines() too as some ENGINEs use it. -Origin: UPSTREAM -URL: http://cvs.openssl.org/chngview?cn=19781 - ---- crypto/engine/eng_all.c.orig -+++ crypto/engine/eng_all.c -@@ -61,6 +61,8 @@ - - void ENGINE_load_builtin_engines(void) - { -+ /* Some ENGINEs need this */ -+ OPENSSL_cpuid_setup(); - #if 0 - /* There's no longer any need for an "openssl" ENGINE unless, one day, - * it is the *only* way for standard builtin implementations to be be -@@ -115,6 +117,7 @@ void ENGINE_load_builtin_engines(void) - ENGINE_load_capi(); - #endif - #endif -+ ENGINE_register_all_complete(); - } - - #if defined(__OpenBSD__) || defined(__FreeBSD__) || defined(HAVE_CRYPTODEV) diff --git a/openssl.changes b/openssl.changes index f691fd8..2f1262e 100644 --- a/openssl.changes +++ b/openssl.changes @@ -1,3 +1,13 @@ +------------------------------------------------------------------- +Thu May 10 19:18:06 UTC 2012 - crrodriguez@opensuse.org + +- Update to version 1.0.1c for the complete list of changes see + NEWS, this only list packaging changes. +- Drop aes-ni patch, no longer needed as it is builtin in openssl + now. +- Define GNU_SOURCE and use -std=gnu99 to build the package. +- Use LFS_CFLAGS in platforms where it matters. + ------------------------------------------------------------------- Fri May 4 12:09:57 UTC 2012 - lnussel@suse.de diff --git a/openssl.spec b/openssl.spec index 6737dd5..cf6bae7 100644 --- a/openssl.spec +++ b/openssl.spec @@ -15,26 +15,30 @@ # Please submit bugfixes or comments via http://bugs.opensuse.org/ # -# norootforbuild +# Please submit bugfixes or comments via http://bugs.opensuse.org/ +# Name: openssl -BuildRequires: bc ed pkg-config zlib-devel +BuildRequires: bc +BuildRequires: ed +BuildRequires: pkg-config +BuildRequires: zlib-devel %define ssletcdir %{_sysconfdir}/ssl -%define num_version %(echo "%{version}" | sed -e "s+[a-zA-Z]++g; s+_.*++g") -License: OpenSSL -Group: Productivity/Networking/Security +#%define num_version %(echo "%{version}" | sed -e "s+[a-zA-Z]++g; s+_.*++g") +%define num_version 1.0.0 Provides: ssl -AutoReqProv: on # bug437293 %ifarch ppc64 Obsoletes: openssl-64bit %endif -Version: 1.0.0i -Release: 1 +Version: 1.0.1c +Release: 0 Summary: Secure Sockets and Transport Layer Security +License: OpenSSL +Group: Productivity/Networking/Security Url: http://www.openssl.org/ -Source: http://www.%{name}.org/source/%{name}-%{version}.tar.bz2 +Source: http://www.%{name}.org/source/%{name}-%{version}.tar.gz # to get mtime of file: Source1: openssl.changes Source2: baselibs.conf @@ -42,19 +46,6 @@ Source10: README.SuSE Patch0: merge_from_0.9.8k.patch Patch1: openssl-1.0.0-c_rehash-compat.diff Patch2: bug610223.patch -#Patch3: CVE-2010-1633_and_CVE-2010-0742.patch -#Patch4: patchset-19727.diff -#Patch5: CVE-2010-2939.patch -#Patch6: CVE-2010-3864.patch -Patch7: openssl-1.0.0b-aesni.patch -#Patch8: CVE-2011-0014.patch -Patch10: openssl-call-engine-reg-comp.patch -#Patch11: Bug748738_Tolerate_bad_MIME_headers.patch -#Patch12: bug749213-Free-headers-after-use.patch -#Patch13: bug749210-Symmetric-crypto-errors-in-PKCS7_decrypt.patch -#Patch14: CVE-2012-1165.patch -#Patch15: CVE-2012-0884.patch -#Patch16: bug749735.patch BuildRoot: %{_tmppath}/%{name}-%{version}-build %description @@ -87,7 +78,6 @@ Authors: Paul C. Sutton %package -n libopenssl1_0_0 -License: OpenSSL Summary: Secure Sockets and Transport Layer Security Group: Productivity/Networking/Security Recommends: openssl-certs @@ -127,12 +117,12 @@ Authors: Paul C. Sutton %package -n libopenssl-devel -License: OpenSSL Summary: Include Files and Libraries mandatory for Development Group: Development/Libraries/C and C++ Obsoletes: openssl-devel < %{version} -Requires: libopenssl1_0_0 = %{version} zlib-devel Requires: %name = %version +Requires: libopenssl1_0_0 = %{version} +Requires: zlib-devel Provides: openssl-devel = %{version} # bug437293 %ifarch ppc64 @@ -158,7 +148,6 @@ Authors: Paul C. Sutton %package doc -License: OpenSSL Summary: Additional Package Documentation Group: Productivity/Networking/Security BuildArch: noarch @@ -185,19 +174,6 @@ Authors: %patch0 -p1 %patch1 -p1 %patch2 -p1 -#%patch3 -p1 -#%patch4 -p1 -#%patch5 -p1 -#%patch6 -p1 -%patch7 -#%patch8 -p1 -%patch10 -#%patch11 -p1 -#%patch12 -p1 -#%patch13 -p1 -#%patch14 -p1 -#%patch15 -p1 -#%patch16 -p1 cp -p %{S:10} . echo "adding/overwriting some entries in the 'table' hash in Configure" # $dso_scheme:$shared_target:$shared_cflag:$shared_ldflag:$shared_extension:$ranlib:$arflags @@ -219,7 +195,7 @@ i "linux-elf-arm","gcc:-DL_ENDIAN ::-D_REENTRANT::-ldl:BN_LLONG:\${no_asm}: $DSO_SCHEME:", "linux-mips", "gcc:-DB_ENDIAN ::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:\${no_asm}: $DSO_SCHEME:", "linux-sparcv7","gcc:-DB_ENDIAN ::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:\${no_asm}: $DSO_SCHEME:", -"linux-sparcv8","gcc:-DB_ENDIAN -DBN_DIV2W -mv8 ::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::asm/sparcv8.o::::::::::::: $DSO_SCHEME:", +#"linux-sparcv8","gcc:-DB_ENDIAN -DBN_DIV2W -mv8 ::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR::asm/sparcv8.o::::::::::::: $DSO_SCHEME:", #"linux-x86_64", "gcc:-DL_ENDIAN -DNO_ASM -DMD32_REG_T=int::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG:\${no_asm}: $DSO_SCHEME:64", #"linux-s390", "gcc:-DB_ENDIAN ::(unknown): :-ldl:BN_LLONG:\${no_asm}: $DSO_SCHEME:", #"linux-s390x", "gcc:-DB_ENDIAN -DNO_ASM -DMD32_REG_T=int::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG:\${no_asm}: $DSO_SCHEME:64", @@ -242,12 +218,14 @@ zlib \ --prefix=%{_prefix} \ --libdir=%{_lib} \ --openssldir=%{ssletcdir} \ -$RPM_OPT_FLAGS \ +$RPM_OPT_FLAGS -std=gnu99 \ -Wa,--noexecstack \ -fomit-frame-pointer \ -DTERMIO \ -DPURIFY \ -DSSL_FORBID_ENULL \ +-D_GNU_SOURCE \ +$(getconf LFS_CFLAGS) \ %ifnarch hppa -Wall \ -fstack-protector "