930 lines
37 KiB
Diff
930 lines
37 KiB
Diff
|
commit 0fbc50ef0cb8894973d4739af62e95be825b7ccf
|
||
|
Author: trigpolynom <trigpolynom@gmail.com>
|
||
|
Date: Tue Oct 17 22:44:45 2023 -0400
|
||
|
|
||
|
aes-gcm-avx512.pl: fix non-reproducibility issue
|
||
|
|
||
|
Replace the random suffix with a counter, to make the
|
||
|
build reproducible.
|
||
|
|
||
|
Fixes #20954
|
||
|
|
||
|
Reviewed-by: Richard Levitte <levitte@openssl.org>
|
||
|
Reviewed-by: Matthias St. Pierre <Matthias.St.Pierre@ncp-e.com>
|
||
|
Reviewed-by: Tom Cosgrove <tom.cosgrove@arm.com>
|
||
|
Reviewed-by: Hugo Landau <hlandau@openssl.org>
|
||
|
(Merged from https://github.com/openssl/openssl/pull/22415)
|
||
|
|
||
|
diff --git a/crypto/modes/asm/aes-gcm-avx512.pl b/crypto/modes/asm/aes-gcm-avx512.pl
|
||
|
index afd2af941a..9f9124373b 100644
|
||
|
--- a/crypto/modes/asm/aes-gcm-avx512.pl
|
||
|
+++ b/crypto/modes/asm/aes-gcm-avx512.pl
|
||
|
@@ -155,6 +155,9 @@ my $STACK_LOCAL_OFFSET = ($STACK_HKEYS_OFFSET + $HKEYS_STORAGE);
|
||
|
# ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
my ($arg1, $arg2, $arg3, $arg4, $arg5, $arg6, $arg7, $arg8, $arg9, $arg10, $arg11);
|
||
|
|
||
|
+# ; Counter used for assembly label generation
|
||
|
+my $label_count = 0;
|
||
|
+
|
||
|
# ; This implementation follows the convention: for non-leaf functions (they
|
||
|
# ; must call PROLOG) %rbp is used as a frame pointer, and has fixed offset from
|
||
|
# ; the function entry: $GP_STORAGE + [8 bytes alignment (Windows only)]. This
|
||
|
@@ -200,15 +203,6 @@ my $CTX_OFFSET_HTable = (16 * 6); # ; (Htable) Precomputed table (a
|
||
|
# ;;; Helper functions
|
||
|
# ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
|
||
|
-# ; Generates "random" local labels
|
||
|
-sub random_string() {
|
||
|
- my @chars = ('a' .. 'z', 'A' .. 'Z', '0' .. '9', '_');
|
||
|
- my $length = 15;
|
||
|
- my $str;
|
||
|
- map { $str .= $chars[rand(33)] } 1 .. $length;
|
||
|
- return $str;
|
||
|
-}
|
||
|
-
|
||
|
sub BYTE {
|
||
|
my ($reg) = @_;
|
||
|
if ($reg =~ /%r[abcd]x/i) {
|
||
|
@@ -417,7 +411,7 @@ ___
|
||
|
sub EPILOG {
|
||
|
my ($hkeys_storage_on_stack, $payload_len) = @_;
|
||
|
|
||
|
- my $rndsuffix = &random_string();
|
||
|
+ my $label_suffix = $label_count++;
|
||
|
|
||
|
if ($hkeys_storage_on_stack && $CLEAR_HKEYS_STORAGE_ON_EXIT) {
|
||
|
|
||
|
@@ -425,13 +419,13 @@ sub EPILOG {
|
||
|
# ; were stored in the local frame storage
|
||
|
$code .= <<___;
|
||
|
cmpq \$`16*16`,$payload_len
|
||
|
- jbe .Lskip_hkeys_cleanup_${rndsuffix}
|
||
|
+ jbe .Lskip_hkeys_cleanup_${label_suffix}
|
||
|
vpxor %xmm0,%xmm0,%xmm0
|
||
|
___
|
||
|
for (my $i = 0; $i < int($HKEYS_STORAGE / 64); $i++) {
|
||
|
$code .= "vmovdqa64 %zmm0,`$STACK_HKEYS_OFFSET + 64*$i`(%rsp)\n";
|
||
|
}
|
||
|
- $code .= ".Lskip_hkeys_cleanup_${rndsuffix}:\n";
|
||
|
+ $code .= ".Lskip_hkeys_cleanup_${label_suffix}:\n";
|
||
|
}
|
||
|
|
||
|
if ($CLEAR_SCRATCH_REGISTERS) {
|
||
|
@@ -537,11 +531,11 @@ sub precompute_hkeys_on_stack {
|
||
|
&& $HKEYS_RANGE ne "first32"
|
||
|
&& $HKEYS_RANGE ne "last32");
|
||
|
|
||
|
- my $rndsuffix = &random_string();
|
||
|
+ my $label_suffix = $label_count++;
|
||
|
|
||
|
$code .= <<___;
|
||
|
test $HKEYS_READY,$HKEYS_READY
|
||
|
- jnz .L_skip_hkeys_precomputation_${rndsuffix}
|
||
|
+ jnz .L_skip_hkeys_precomputation_${label_suffix}
|
||
|
___
|
||
|
|
||
|
if ($HKEYS_RANGE eq "first16" || $HKEYS_RANGE eq "first32" || $HKEYS_RANGE eq "all") {
|
||
|
@@ -615,7 +609,7 @@ ___
|
||
|
}
|
||
|
}
|
||
|
|
||
|
- $code .= ".L_skip_hkeys_precomputation_${rndsuffix}:\n";
|
||
|
+ $code .= ".L_skip_hkeys_precomputation_${label_suffix}:\n";
|
||
|
}
|
||
|
|
||
|
# ;; =============================================================================
|
||
|
@@ -1418,20 +1412,20 @@ sub CALC_AAD_HASH {
|
||
|
|
||
|
my $SHFMSK = $ZT13;
|
||
|
|
||
|
- my $rndsuffix = &random_string();
|
||
|
+ my $label_suffix = $label_count++;
|
||
|
|
||
|
$code .= <<___;
|
||
|
mov $A_IN,$T1 # ; T1 = AAD
|
||
|
mov $A_LEN,$T2 # ; T2 = aadLen
|
||
|
or $T2,$T2
|
||
|
- jz .L_CALC_AAD_done_${rndsuffix}
|
||
|
+ jz .L_CALC_AAD_done_${label_suffix}
|
||
|
|
||
|
xor $HKEYS_READY,$HKEYS_READY
|
||
|
vmovdqa64 SHUF_MASK(%rip),$SHFMSK
|
||
|
|
||
|
-.L_get_AAD_loop48x16_${rndsuffix}:
|
||
|
+.L_get_AAD_loop48x16_${label_suffix}:
|
||
|
cmp \$`(48*16)`,$T2
|
||
|
- jl .L_exit_AAD_loop48x16_${rndsuffix}
|
||
|
+ jl .L_exit_AAD_loop48x16_${label_suffix}
|
||
|
___
|
||
|
|
||
|
$code .= <<___;
|
||
|
@@ -1499,15 +1493,15 @@ ___
|
||
|
|
||
|
$code .= <<___;
|
||
|
sub \$`(48*16)`,$T2
|
||
|
- je .L_CALC_AAD_done_${rndsuffix}
|
||
|
+ je .L_CALC_AAD_done_${label_suffix}
|
||
|
|
||
|
add \$`(48*16)`,$T1
|
||
|
- jmp .L_get_AAD_loop48x16_${rndsuffix}
|
||
|
+ jmp .L_get_AAD_loop48x16_${label_suffix}
|
||
|
|
||
|
-.L_exit_AAD_loop48x16_${rndsuffix}:
|
||
|
+.L_exit_AAD_loop48x16_${label_suffix}:
|
||
|
# ; Less than 48x16 bytes remaining
|
||
|
cmp \$`(32*16)`,$T2
|
||
|
- jl .L_less_than_32x16_${rndsuffix}
|
||
|
+ jl .L_less_than_32x16_${label_suffix}
|
||
|
___
|
||
|
|
||
|
$code .= <<___;
|
||
|
@@ -1556,14 +1550,14 @@ ___
|
||
|
|
||
|
$code .= <<___;
|
||
|
sub \$`(32*16)`,$T2
|
||
|
- je .L_CALC_AAD_done_${rndsuffix}
|
||
|
+ je .L_CALC_AAD_done_${label_suffix}
|
||
|
|
||
|
add \$`(32*16)`,$T1
|
||
|
- jmp .L_less_than_16x16_${rndsuffix}
|
||
|
+ jmp .L_less_than_16x16_${label_suffix}
|
||
|
|
||
|
-.L_less_than_32x16_${rndsuffix}:
|
||
|
+.L_less_than_32x16_${label_suffix}:
|
||
|
cmp \$`(16*16)`,$T2
|
||
|
- jl .L_less_than_16x16_${rndsuffix}
|
||
|
+ jl .L_less_than_16x16_${label_suffix}
|
||
|
# ; Get next 16 blocks
|
||
|
vmovdqu64 `64*0`($T1),$ZT1
|
||
|
vmovdqu64 `64*1`($T1),$ZT2
|
||
|
@@ -1588,11 +1582,11 @@ ___
|
||
|
|
||
|
$code .= <<___;
|
||
|
sub \$`(16*16)`,$T2
|
||
|
- je .L_CALC_AAD_done_${rndsuffix}
|
||
|
+ je .L_CALC_AAD_done_${label_suffix}
|
||
|
|
||
|
add \$`(16*16)`,$T1
|
||
|
# ; Less than 16x16 bytes remaining
|
||
|
-.L_less_than_16x16_${rndsuffix}:
|
||
|
+.L_less_than_16x16_${label_suffix}:
|
||
|
# ;; prep mask source address
|
||
|
lea byte64_len_to_mask_table(%rip),$T3
|
||
|
lea ($T3,$T2,8),$T3
|
||
|
@@ -1601,28 +1595,28 @@ ___
|
||
|
add \$15,@{[DWORD($T2)]}
|
||
|
shr \$4,@{[DWORD($T2)]}
|
||
|
cmp \$2,@{[DWORD($T2)]}
|
||
|
- jb .L_AAD_blocks_1_${rndsuffix}
|
||
|
- je .L_AAD_blocks_2_${rndsuffix}
|
||
|
+ jb .L_AAD_blocks_1_${label_suffix}
|
||
|
+ je .L_AAD_blocks_2_${label_suffix}
|
||
|
cmp \$4,@{[DWORD($T2)]}
|
||
|
- jb .L_AAD_blocks_3_${rndsuffix}
|
||
|
- je .L_AAD_blocks_4_${rndsuffix}
|
||
|
+ jb .L_AAD_blocks_3_${label_suffix}
|
||
|
+ je .L_AAD_blocks_4_${label_suffix}
|
||
|
cmp \$6,@{[DWORD($T2)]}
|
||
|
- jb .L_AAD_blocks_5_${rndsuffix}
|
||
|
- je .L_AAD_blocks_6_${rndsuffix}
|
||
|
+ jb .L_AAD_blocks_5_${label_suffix}
|
||
|
+ je .L_AAD_blocks_6_${label_suffix}
|
||
|
cmp \$8,@{[DWORD($T2)]}
|
||
|
- jb .L_AAD_blocks_7_${rndsuffix}
|
||
|
- je .L_AAD_blocks_8_${rndsuffix}
|
||
|
+ jb .L_AAD_blocks_7_${label_suffix}
|
||
|
+ je .L_AAD_blocks_8_${label_suffix}
|
||
|
cmp \$10,@{[DWORD($T2)]}
|
||
|
- jb .L_AAD_blocks_9_${rndsuffix}
|
||
|
- je .L_AAD_blocks_10_${rndsuffix}
|
||
|
+ jb .L_AAD_blocks_9_${label_suffix}
|
||
|
+ je .L_AAD_blocks_10_${label_suffix}
|
||
|
cmp \$12,@{[DWORD($T2)]}
|
||
|
- jb .L_AAD_blocks_11_${rndsuffix}
|
||
|
- je .L_AAD_blocks_12_${rndsuffix}
|
||
|
+ jb .L_AAD_blocks_11_${label_suffix}
|
||
|
+ je .L_AAD_blocks_12_${label_suffix}
|
||
|
cmp \$14,@{[DWORD($T2)]}
|
||
|
- jb .L_AAD_blocks_13_${rndsuffix}
|
||
|
- je .L_AAD_blocks_14_${rndsuffix}
|
||
|
+ jb .L_AAD_blocks_13_${label_suffix}
|
||
|
+ je .L_AAD_blocks_14_${label_suffix}
|
||
|
cmp \$15,@{[DWORD($T2)]}
|
||
|
- je .L_AAD_blocks_15_${rndsuffix}
|
||
|
+ je .L_AAD_blocks_15_${label_suffix}
|
||
|
___
|
||
|
|
||
|
# ;; fall through for 16 blocks
|
||
|
@@ -1635,7 +1629,7 @@ ___
|
||
|
# ;; - jump to reduction code
|
||
|
|
||
|
for (my $aad_blocks = 16; $aad_blocks > 0; $aad_blocks--) {
|
||
|
- $code .= ".L_AAD_blocks_${aad_blocks}_${rndsuffix}:\n";
|
||
|
+ $code .= ".L_AAD_blocks_${aad_blocks}_${label_suffix}:\n";
|
||
|
if ($aad_blocks > 12) {
|
||
|
$code .= "sub \$`12*16*8`, $T3\n";
|
||
|
} elsif ($aad_blocks > 8) {
|
||
|
@@ -1656,11 +1650,11 @@ ___
|
||
|
if ($aad_blocks > 1) {
|
||
|
|
||
|
# ;; fall through to CALC_AAD_done in 1 block case
|
||
|
- $code .= "jmp .L_CALC_AAD_done_${rndsuffix}\n";
|
||
|
+ $code .= "jmp .L_CALC_AAD_done_${label_suffix}\n";
|
||
|
}
|
||
|
|
||
|
}
|
||
|
- $code .= ".L_CALC_AAD_done_${rndsuffix}:\n";
|
||
|
+ $code .= ".L_CALC_AAD_done_${label_suffix}:\n";
|
||
|
|
||
|
# ;; result in AAD_HASH
|
||
|
}
|
||
|
@@ -1710,13 +1704,13 @@ sub PARTIAL_BLOCK {
|
||
|
my $IA1 = $GPTMP2;
|
||
|
my $IA2 = $GPTMP0;
|
||
|
|
||
|
- my $rndsuffix = &random_string();
|
||
|
+ my $label_suffix = $label_count++;
|
||
|
|
||
|
$code .= <<___;
|
||
|
# ;; if no partial block present then LENGTH/DATA_OFFSET will be set to zero
|
||
|
mov ($PBLOCK_LEN),$LENGTH
|
||
|
or $LENGTH,$LENGTH
|
||
|
- je .L_partial_block_done_${rndsuffix} # ;Leave Macro if no partial blocks
|
||
|
+ je .L_partial_block_done_${label_suffix} # ;Leave Macro if no partial blocks
|
||
|
___
|
||
|
|
||
|
&READ_SMALL_DATA_INPUT($XTMP0, $PLAIN_CIPH_IN, $PLAIN_CIPH_LEN, $IA0, $IA2, $MASKREG);
|
||
|
@@ -1755,9 +1749,9 @@ ___
|
||
|
}
|
||
|
$code .= <<___;
|
||
|
sub \$16,$IA1
|
||
|
- jge .L_no_extra_mask_${rndsuffix}
|
||
|
+ jge .L_no_extra_mask_${label_suffix}
|
||
|
sub $IA1,$IA0
|
||
|
-.L_no_extra_mask_${rndsuffix}:
|
||
|
+.L_no_extra_mask_${label_suffix}:
|
||
|
# ;; get the appropriate mask to mask out bottom $LENGTH bytes of $XTMP1
|
||
|
# ;; - mask out bottom $LENGTH bytes of $XTMP1
|
||
|
# ;; sizeof(SHIFT_MASK) == 16 bytes
|
||
|
@@ -1781,7 +1775,7 @@ ___
|
||
|
}
|
||
|
$code .= <<___;
|
||
|
cmp \$0,$IA1
|
||
|
- jl .L_partial_incomplete_${rndsuffix}
|
||
|
+ jl .L_partial_incomplete_${label_suffix}
|
||
|
___
|
||
|
|
||
|
# ;; GHASH computation for the last <16 Byte block
|
||
|
@@ -1793,9 +1787,9 @@ ___
|
||
|
mov $LENGTH,$IA0
|
||
|
mov \$16,$LENGTH
|
||
|
sub $IA0,$LENGTH
|
||
|
- jmp .L_enc_dec_done_${rndsuffix}
|
||
|
+ jmp .L_enc_dec_done_${label_suffix}
|
||
|
|
||
|
-.L_partial_incomplete_${rndsuffix}:
|
||
|
+.L_partial_incomplete_${label_suffix}:
|
||
|
___
|
||
|
if ($win64) {
|
||
|
$code .= <<___;
|
||
|
@@ -1808,7 +1802,7 @@ ___
|
||
|
$code .= <<___;
|
||
|
mov $PLAIN_CIPH_LEN,$LENGTH
|
||
|
|
||
|
-.L_enc_dec_done_${rndsuffix}:
|
||
|
+.L_enc_dec_done_${label_suffix}:
|
||
|
# ;; output encrypted Bytes
|
||
|
|
||
|
lea byte_len_to_mask_table(%rip),$IA0
|
||
|
@@ -1826,7 +1820,7 @@ ___
|
||
|
$code .= <<___;
|
||
|
mov $CIPH_PLAIN_OUT,$IA0
|
||
|
vmovdqu8 $XTMP1,($IA0){$MASKREG}
|
||
|
-.L_partial_block_done_${rndsuffix}:
|
||
|
+.L_partial_block_done_${label_suffix}:
|
||
|
___
|
||
|
}
|
||
|
|
||
|
@@ -2016,7 +2010,7 @@ sub INITIAL_BLOCKS_PARTIAL_GHASH {
|
||
|
my $GM = $_[23]; # [in] ZMM with mid prodcut part
|
||
|
my $GL = $_[24]; # [in] ZMM with lo product part
|
||
|
|
||
|
- my $rndsuffix = &random_string();
|
||
|
+ my $label_suffix = $label_count++;
|
||
|
|
||
|
# ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
# ;;; - Hash all but the last partial block of data
|
||
|
@@ -2034,7 +2028,7 @@ sub INITIAL_BLOCKS_PARTIAL_GHASH {
|
||
|
# ;; NOTE: the 'jl' is always taken for num_initial_blocks = 16.
|
||
|
# ;; This is run in the context of GCM_ENC_DEC_SMALL for length < 256.
|
||
|
cmp \$16,$LENGTH
|
||
|
- jl .L_small_initial_partial_block_${rndsuffix}
|
||
|
+ jl .L_small_initial_partial_block_${label_suffix}
|
||
|
|
||
|
# ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
# ;;; Handle a full length final block - encrypt and hash all blocks
|
||
|
@@ -2056,11 +2050,11 @@ ___
|
||
|
&GHASH_1_TO_16($GCM128_CTX, $HASH_IN_OUT, $ZT0, $ZT1, $ZT2, $ZT3, $ZT4,
|
||
|
$ZT5, $ZT6, $ZT7, $ZT8, &ZWORD($HASH_IN_OUT), $DAT0, $DAT1, $DAT2, $DAT3, $NUM_BLOCKS, $GH, $GM, $GL);
|
||
|
}
|
||
|
- $code .= "jmp .L_small_initial_compute_done_${rndsuffix}\n";
|
||
|
+ $code .= "jmp .L_small_initial_compute_done_${label_suffix}\n";
|
||
|
}
|
||
|
|
||
|
$code .= <<___;
|
||
|
-.L_small_initial_partial_block_${rndsuffix}:
|
||
|
+.L_small_initial_partial_block_${label_suffix}:
|
||
|
|
||
|
# ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
# ;;; Handle ghash for a <16B final block
|
||
|
@@ -2125,7 +2119,7 @@ ___
|
||
|
# ;; a partial block of data, so xor that into the hash.
|
||
|
vpxorq $LAST_GHASH_BLK,$HASH_IN_OUT,$HASH_IN_OUT
|
||
|
# ;; The result is in $HASH_IN_OUT
|
||
|
- jmp .L_after_reduction_${rndsuffix}
|
||
|
+ jmp .L_after_reduction_${label_suffix}
|
||
|
___
|
||
|
}
|
||
|
|
||
|
@@ -2133,7 +2127,7 @@ ___
|
||
|
# ;;; After GHASH reduction
|
||
|
# ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
|
||
|
- $code .= ".L_small_initial_compute_done_${rndsuffix}:\n";
|
||
|
+ $code .= ".L_small_initial_compute_done_${label_suffix}:\n";
|
||
|
|
||
|
# ;; If using init/update/finalize, we need to xor any partial block data
|
||
|
# ;; into the hash.
|
||
|
@@ -2144,13 +2138,13 @@ ___
|
||
|
$code .= <<___;
|
||
|
# ;; NOTE: for $NUM_BLOCKS = 16, $LENGTH, stored in [PBlockLen] is never zero
|
||
|
or $LENGTH,$LENGTH
|
||
|
- je .L_after_reduction_${rndsuffix}
|
||
|
+ je .L_after_reduction_${label_suffix}
|
||
|
___
|
||
|
}
|
||
|
$code .= "vpxorq $LAST_GHASH_BLK,$HASH_IN_OUT,$HASH_IN_OUT\n";
|
||
|
}
|
||
|
|
||
|
- $code .= ".L_after_reduction_${rndsuffix}:\n";
|
||
|
+ $code .= ".L_after_reduction_${label_suffix}:\n";
|
||
|
|
||
|
# ;; Final hash is now in HASH_IN_OUT
|
||
|
}
|
||
|
@@ -2266,7 +2260,7 @@ sub GHASH_16_ENCRYPT_N_GHASH_N {
|
||
|
die "GHASH_16_ENCRYPT_N_GHASH_N: num_blocks is out of bounds = $NUM_BLOCKS\n"
|
||
|
if ($NUM_BLOCKS > 16 || $NUM_BLOCKS < 0);
|
||
|
|
||
|
- my $rndsuffix = &random_string();
|
||
|
+ my $label_suffix = $label_count++;
|
||
|
|
||
|
my $GH1H = $HASH_IN_OUT;
|
||
|
|
||
|
@@ -2326,16 +2320,16 @@ ___
|
||
|
|
||
|
$code .= <<___;
|
||
|
cmp \$`(256 - $NUM_BLOCKS)`,@{[DWORD($CTR_CHECK)]}
|
||
|
- jae .L_16_blocks_overflow_${rndsuffix}
|
||
|
+ jae .L_16_blocks_overflow_${label_suffix}
|
||
|
___
|
||
|
|
||
|
&ZMM_OPCODE3_DSTR_SRC1R_SRC2R_BLOCKS_0_16(
|
||
|
$NUM_BLOCKS, "vpaddd", $B00_03, $B04_07, $B08_11, $B12_15, $CTR_BE,
|
||
|
$B00_03, $B04_07, $B08_11, $ADDBE_1234, $ADDBE_4x4, $ADDBE_4x4, $ADDBE_4x4);
|
||
|
$code .= <<___;
|
||
|
- jmp .L_16_blocks_ok_${rndsuffix}
|
||
|
+ jmp .L_16_blocks_ok_${label_suffix}
|
||
|
|
||
|
-.L_16_blocks_overflow_${rndsuffix}:
|
||
|
+.L_16_blocks_overflow_${label_suffix}:
|
||
|
vpshufb $SHFMSK,$CTR_BE,$CTR_BE
|
||
|
vpaddd ddq_add_1234(%rip),$CTR_BE,$B00_03
|
||
|
___
|
||
|
@@ -2355,7 +2349,7 @@ ___
|
||
|
$NUM_BLOCKS, "vpshufb", $B00_03, $B04_07, $B08_11, $B12_15, $B00_03,
|
||
|
$B04_07, $B08_11, $B12_15, $SHFMSK, $SHFMSK, $SHFMSK, $SHFMSK);
|
||
|
$code .= <<___;
|
||
|
-.L_16_blocks_ok_${rndsuffix}:
|
||
|
+.L_16_blocks_ok_${label_suffix}:
|
||
|
|
||
|
# ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
# ;; - pre-load constants
|
||
|
@@ -2805,53 +2799,53 @@ sub GCM_ENC_DEC_LAST {
|
||
|
my $MASKREG = $_[44]; # [clobbered] mask register
|
||
|
my $PBLOCK_LEN = $_[45]; # [in] partial block length
|
||
|
|
||
|
- my $rndsuffix = &random_string();
|
||
|
+ my $label_suffix = $label_count++;
|
||
|
|
||
|
$code .= <<___;
|
||
|
mov @{[DWORD($LENGTH)]},@{[DWORD($IA0)]}
|
||
|
add \$15,@{[DWORD($IA0)]}
|
||
|
shr \$4,@{[DWORD($IA0)]}
|
||
|
- je .L_last_num_blocks_is_0_${rndsuffix}
|
||
|
+ je .L_last_num_blocks_is_0_${label_suffix}
|
||
|
|
||
|
cmp \$8,@{[DWORD($IA0)]}
|
||
|
- je .L_last_num_blocks_is_8_${rndsuffix}
|
||
|
- jb .L_last_num_blocks_is_7_1_${rndsuffix}
|
||
|
+ je .L_last_num_blocks_is_8_${label_suffix}
|
||
|
+ jb .L_last_num_blocks_is_7_1_${label_suffix}
|
||
|
|
||
|
|
||
|
cmp \$12,@{[DWORD($IA0)]}
|
||
|
- je .L_last_num_blocks_is_12_${rndsuffix}
|
||
|
- jb .L_last_num_blocks_is_11_9_${rndsuffix}
|
||
|
+ je .L_last_num_blocks_is_12_${label_suffix}
|
||
|
+ jb .L_last_num_blocks_is_11_9_${label_suffix}
|
||
|
|
||
|
# ;; 16, 15, 14 or 13
|
||
|
cmp \$15,@{[DWORD($IA0)]}
|
||
|
- je .L_last_num_blocks_is_15_${rndsuffix}
|
||
|
- ja .L_last_num_blocks_is_16_${rndsuffix}
|
||
|
+ je .L_last_num_blocks_is_15_${label_suffix}
|
||
|
+ ja .L_last_num_blocks_is_16_${label_suffix}
|
||
|
cmp \$14,@{[DWORD($IA0)]}
|
||
|
- je .L_last_num_blocks_is_14_${rndsuffix}
|
||
|
- jmp .L_last_num_blocks_is_13_${rndsuffix}
|
||
|
+ je .L_last_num_blocks_is_14_${label_suffix}
|
||
|
+ jmp .L_last_num_blocks_is_13_${label_suffix}
|
||
|
|
||
|
-.L_last_num_blocks_is_11_9_${rndsuffix}:
|
||
|
+.L_last_num_blocks_is_11_9_${label_suffix}:
|
||
|
# ;; 11, 10 or 9
|
||
|
cmp \$10,@{[DWORD($IA0)]}
|
||
|
- je .L_last_num_blocks_is_10_${rndsuffix}
|
||
|
- ja .L_last_num_blocks_is_11_${rndsuffix}
|
||
|
- jmp .L_last_num_blocks_is_9_${rndsuffix}
|
||
|
+ je .L_last_num_blocks_is_10_${label_suffix}
|
||
|
+ ja .L_last_num_blocks_is_11_${label_suffix}
|
||
|
+ jmp .L_last_num_blocks_is_9_${label_suffix}
|
||
|
|
||
|
-.L_last_num_blocks_is_7_1_${rndsuffix}:
|
||
|
+.L_last_num_blocks_is_7_1_${label_suffix}:
|
||
|
cmp \$4,@{[DWORD($IA0)]}
|
||
|
- je .L_last_num_blocks_is_4_${rndsuffix}
|
||
|
- jb .L_last_num_blocks_is_3_1_${rndsuffix}
|
||
|
+ je .L_last_num_blocks_is_4_${label_suffix}
|
||
|
+ jb .L_last_num_blocks_is_3_1_${label_suffix}
|
||
|
# ;; 7, 6 or 5
|
||
|
cmp \$6,@{[DWORD($IA0)]}
|
||
|
- ja .L_last_num_blocks_is_7_${rndsuffix}
|
||
|
- je .L_last_num_blocks_is_6_${rndsuffix}
|
||
|
- jmp .L_last_num_blocks_is_5_${rndsuffix}
|
||
|
+ ja .L_last_num_blocks_is_7_${label_suffix}
|
||
|
+ je .L_last_num_blocks_is_6_${label_suffix}
|
||
|
+ jmp .L_last_num_blocks_is_5_${label_suffix}
|
||
|
|
||
|
-.L_last_num_blocks_is_3_1_${rndsuffix}:
|
||
|
+.L_last_num_blocks_is_3_1_${label_suffix}:
|
||
|
# ;; 3, 2 or 1
|
||
|
cmp \$2,@{[DWORD($IA0)]}
|
||
|
- ja .L_last_num_blocks_is_3_${rndsuffix}
|
||
|
- je .L_last_num_blocks_is_2_${rndsuffix}
|
||
|
+ ja .L_last_num_blocks_is_3_${label_suffix}
|
||
|
+ je .L_last_num_blocks_is_2_${label_suffix}
|
||
|
___
|
||
|
|
||
|
# ;; fall through for `jmp .L_last_num_blocks_is_1`
|
||
|
@@ -2859,7 +2853,7 @@ ___
|
||
|
# ;; Use rep to generate different block size variants
|
||
|
# ;; - one block size has to be the first one
|
||
|
for my $num_blocks (1 .. 16) {
|
||
|
- $code .= ".L_last_num_blocks_is_${num_blocks}_${rndsuffix}:\n";
|
||
|
+ $code .= ".L_last_num_blocks_is_${num_blocks}_${label_suffix}:\n";
|
||
|
&GHASH_16_ENCRYPT_N_GHASH_N(
|
||
|
$AES_KEYS, $GCM128_CTX, $CIPH_PLAIN_OUT, $PLAIN_CIPH_IN, $DATA_OFFSET,
|
||
|
$LENGTH, $CTR_BE, $CTR_CHECK, $HASHKEY_OFFSET, $GHASHIN_BLK_OFFSET,
|
||
|
@@ -2872,10 +2866,10 @@ ___
|
||
|
$ENC_DEC, $HASH_IN_OUT, $IA0, $IA1, $MASKREG,
|
||
|
$num_blocks, $PBLOCK_LEN);
|
||
|
|
||
|
- $code .= "jmp .L_last_blocks_done_${rndsuffix}\n";
|
||
|
+ $code .= "jmp .L_last_blocks_done_${label_suffix}\n";
|
||
|
}
|
||
|
|
||
|
- $code .= ".L_last_num_blocks_is_0_${rndsuffix}:\n";
|
||
|
+ $code .= ".L_last_num_blocks_is_0_${label_suffix}:\n";
|
||
|
|
||
|
# ;; if there is 0 blocks to cipher then there are only 16 blocks for ghash and reduction
|
||
|
# ;; - convert mid into end_reduce
|
||
|
@@ -2891,7 +2885,7 @@ ___
|
||
|
$GHASHIN_BLK_OFFSET, 0, "%rsp", $HASHKEY_OFFSET, 0, $HASH_IN_OUT, $ZT00, $ZT01,
|
||
|
$ZT02, $ZT03, $ZT04, $ZT05, $ZT06, $ZT07, $ZT08, $ZT09);
|
||
|
|
||
|
- $code .= ".L_last_blocks_done_${rndsuffix}:\n";
|
||
|
+ $code .= ".L_last_blocks_done_${label_suffix}:\n";
|
||
|
}
|
||
|
|
||
|
# ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
@@ -2985,20 +2979,20 @@ sub GHASH_16_ENCRYPT_16_PARALLEL {
|
||
|
my $GHDAT1 = $ZT21;
|
||
|
my $GHDAT2 = $ZT22;
|
||
|
|
||
|
- my $rndsuffix = &random_string();
|
||
|
+ my $label_suffix = $label_count++;
|
||
|
|
||
|
# ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
# ;; prepare counter blocks
|
||
|
|
||
|
$code .= <<___;
|
||
|
cmpb \$`(256 - 16)`,@{[BYTE($CTR_CHECK)]}
|
||
|
- jae .L_16_blocks_overflow_${rndsuffix}
|
||
|
+ jae .L_16_blocks_overflow_${label_suffix}
|
||
|
vpaddd $ADDBE_1234,$CTR_BE,$B00_03
|
||
|
vpaddd $ADDBE_4x4,$B00_03,$B04_07
|
||
|
vpaddd $ADDBE_4x4,$B04_07,$B08_11
|
||
|
vpaddd $ADDBE_4x4,$B08_11,$B12_15
|
||
|
- jmp .L_16_blocks_ok_${rndsuffix}
|
||
|
-.L_16_blocks_overflow_${rndsuffix}:
|
||
|
+ jmp .L_16_blocks_ok_${label_suffix}
|
||
|
+.L_16_blocks_overflow_${label_suffix}:
|
||
|
vpshufb $SHFMSK,$CTR_BE,$CTR_BE
|
||
|
vmovdqa64 ddq_add_4444(%rip),$B12_15
|
||
|
vpaddd ddq_add_1234(%rip),$CTR_BE,$B00_03
|
||
|
@@ -3009,7 +3003,7 @@ sub GHASH_16_ENCRYPT_16_PARALLEL {
|
||
|
vpshufb $SHFMSK,$B04_07,$B04_07
|
||
|
vpshufb $SHFMSK,$B08_11,$B08_11
|
||
|
vpshufb $SHFMSK,$B12_15,$B12_15
|
||
|
-.L_16_blocks_ok_${rndsuffix}:
|
||
|
+.L_16_blocks_ok_${label_suffix}:
|
||
|
___
|
||
|
|
||
|
# ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
@@ -3338,25 +3332,25 @@ sub ENCRYPT_SINGLE_BLOCK {
|
||
|
my $XMM0 = $_[1]; # ; [in/out]
|
||
|
my $GPR1 = $_[2]; # ; [clobbered]
|
||
|
|
||
|
- my $rndsuffix = &random_string();
|
||
|
+ my $label_suffix = $label_count++;
|
||
|
|
||
|
$code .= <<___;
|
||
|
# ; load number of rounds from AES_KEY structure (offset in bytes is
|
||
|
# ; size of the |rd_key| buffer)
|
||
|
mov `4*15*4`($AES_KEY),@{[DWORD($GPR1)]}
|
||
|
cmp \$9,@{[DWORD($GPR1)]}
|
||
|
- je .Laes_128_${rndsuffix}
|
||
|
+ je .Laes_128_${label_suffix}
|
||
|
cmp \$11,@{[DWORD($GPR1)]}
|
||
|
- je .Laes_192_${rndsuffix}
|
||
|
+ je .Laes_192_${label_suffix}
|
||
|
cmp \$13,@{[DWORD($GPR1)]}
|
||
|
- je .Laes_256_${rndsuffix}
|
||
|
- jmp .Lexit_aes_${rndsuffix}
|
||
|
+ je .Laes_256_${label_suffix}
|
||
|
+ jmp .Lexit_aes_${label_suffix}
|
||
|
___
|
||
|
for my $keylen (sort keys %aes_rounds) {
|
||
|
my $nr = $aes_rounds{$keylen};
|
||
|
$code .= <<___;
|
||
|
.align 32
|
||
|
-.Laes_${keylen}_${rndsuffix}:
|
||
|
+.Laes_${keylen}_${label_suffix}:
|
||
|
___
|
||
|
$code .= "vpxorq `16*0`($AES_KEY),$XMM0, $XMM0\n\n";
|
||
|
for (my $i = 1; $i <= $nr; $i++) {
|
||
|
@@ -3364,10 +3358,10 @@ ___
|
||
|
}
|
||
|
$code .= <<___;
|
||
|
vaesenclast `16*($nr+1)`($AES_KEY),$XMM0,$XMM0
|
||
|
- jmp .Lexit_aes_${rndsuffix}
|
||
|
+ jmp .Lexit_aes_${label_suffix}
|
||
|
___
|
||
|
}
|
||
|
- $code .= ".Lexit_aes_${rndsuffix}:\n\n";
|
||
|
+ $code .= ".Lexit_aes_${label_suffix}:\n\n";
|
||
|
}
|
||
|
|
||
|
sub CALC_J0 {
|
||
|
@@ -3562,52 +3556,52 @@ sub GCM_ENC_DEC_SMALL {
|
||
|
my $SHUFMASK = $_[29]; # [in] ZMM with BE/LE shuffle mask
|
||
|
my $PBLOCK_LEN = $_[30]; # [in] partial block length
|
||
|
|
||
|
- my $rndsuffix = &random_string();
|
||
|
+ my $label_suffix = $label_count++;
|
||
|
|
||
|
$code .= <<___;
|
||
|
cmp \$8,$NUM_BLOCKS
|
||
|
- je .L_small_initial_num_blocks_is_8_${rndsuffix}
|
||
|
- jl .L_small_initial_num_blocks_is_7_1_${rndsuffix}
|
||
|
+ je .L_small_initial_num_blocks_is_8_${label_suffix}
|
||
|
+ jl .L_small_initial_num_blocks_is_7_1_${label_suffix}
|
||
|
|
||
|
|
||
|
cmp \$12,$NUM_BLOCKS
|
||
|
- je .L_small_initial_num_blocks_is_12_${rndsuffix}
|
||
|
- jl .L_small_initial_num_blocks_is_11_9_${rndsuffix}
|
||
|
+ je .L_small_initial_num_blocks_is_12_${label_suffix}
|
||
|
+ jl .L_small_initial_num_blocks_is_11_9_${label_suffix}
|
||
|
|
||
|
# ;; 16, 15, 14 or 13
|
||
|
cmp \$16,$NUM_BLOCKS
|
||
|
- je .L_small_initial_num_blocks_is_16_${rndsuffix}
|
||
|
+ je .L_small_initial_num_blocks_is_16_${label_suffix}
|
||
|
cmp \$15,$NUM_BLOCKS
|
||
|
- je .L_small_initial_num_blocks_is_15_${rndsuffix}
|
||
|
+ je .L_small_initial_num_blocks_is_15_${label_suffix}
|
||
|
cmp \$14,$NUM_BLOCKS
|
||
|
- je .L_small_initial_num_blocks_is_14_${rndsuffix}
|
||
|
- jmp .L_small_initial_num_blocks_is_13_${rndsuffix}
|
||
|
+ je .L_small_initial_num_blocks_is_14_${label_suffix}
|
||
|
+ jmp .L_small_initial_num_blocks_is_13_${label_suffix}
|
||
|
|
||
|
-.L_small_initial_num_blocks_is_11_9_${rndsuffix}:
|
||
|
+.L_small_initial_num_blocks_is_11_9_${label_suffix}:
|
||
|
# ;; 11, 10 or 9
|
||
|
cmp \$11,$NUM_BLOCKS
|
||
|
- je .L_small_initial_num_blocks_is_11_${rndsuffix}
|
||
|
+ je .L_small_initial_num_blocks_is_11_${label_suffix}
|
||
|
cmp \$10,$NUM_BLOCKS
|
||
|
- je .L_small_initial_num_blocks_is_10_${rndsuffix}
|
||
|
- jmp .L_small_initial_num_blocks_is_9_${rndsuffix}
|
||
|
+ je .L_small_initial_num_blocks_is_10_${label_suffix}
|
||
|
+ jmp .L_small_initial_num_blocks_is_9_${label_suffix}
|
||
|
|
||
|
-.L_small_initial_num_blocks_is_7_1_${rndsuffix}:
|
||
|
+.L_small_initial_num_blocks_is_7_1_${label_suffix}:
|
||
|
cmp \$4,$NUM_BLOCKS
|
||
|
- je .L_small_initial_num_blocks_is_4_${rndsuffix}
|
||
|
- jl .L_small_initial_num_blocks_is_3_1_${rndsuffix}
|
||
|
+ je .L_small_initial_num_blocks_is_4_${label_suffix}
|
||
|
+ jl .L_small_initial_num_blocks_is_3_1_${label_suffix}
|
||
|
# ;; 7, 6 or 5
|
||
|
cmp \$7,$NUM_BLOCKS
|
||
|
- je .L_small_initial_num_blocks_is_7_${rndsuffix}
|
||
|
+ je .L_small_initial_num_blocks_is_7_${label_suffix}
|
||
|
cmp \$6,$NUM_BLOCKS
|
||
|
- je .L_small_initial_num_blocks_is_6_${rndsuffix}
|
||
|
- jmp .L_small_initial_num_blocks_is_5_${rndsuffix}
|
||
|
+ je .L_small_initial_num_blocks_is_6_${label_suffix}
|
||
|
+ jmp .L_small_initial_num_blocks_is_5_${label_suffix}
|
||
|
|
||
|
-.L_small_initial_num_blocks_is_3_1_${rndsuffix}:
|
||
|
+.L_small_initial_num_blocks_is_3_1_${label_suffix}:
|
||
|
# ;; 3, 2 or 1
|
||
|
cmp \$3,$NUM_BLOCKS
|
||
|
- je .L_small_initial_num_blocks_is_3_${rndsuffix}
|
||
|
+ je .L_small_initial_num_blocks_is_3_${label_suffix}
|
||
|
cmp \$2,$NUM_BLOCKS
|
||
|
- je .L_small_initial_num_blocks_is_2_${rndsuffix}
|
||
|
+ je .L_small_initial_num_blocks_is_2_${label_suffix}
|
||
|
|
||
|
# ;; for $NUM_BLOCKS == 1, just fall through and no 'jmp' needed
|
||
|
|
||
|
@@ -3616,7 +3610,7 @@ sub GCM_ENC_DEC_SMALL {
|
||
|
___
|
||
|
|
||
|
for (my $num_blocks = 1; $num_blocks <= 16; $num_blocks++) {
|
||
|
- $code .= ".L_small_initial_num_blocks_is_${num_blocks}_${rndsuffix}:\n";
|
||
|
+ $code .= ".L_small_initial_num_blocks_is_${num_blocks}_${label_suffix}:\n";
|
||
|
&INITIAL_BLOCKS_PARTIAL(
|
||
|
$AES_KEYS, $GCM128_CTX, $CIPH_PLAIN_OUT, $PLAIN_CIPH_IN, $LENGTH, $DATA_OFFSET,
|
||
|
$num_blocks, $CTR, $HASH_IN_OUT, $ENC_DEC, $ZTMP0, $ZTMP1,
|
||
|
@@ -3625,11 +3619,11 @@ ___
|
||
|
$ZTMP14, $IA0, $IA1, $MASKREG, $SHUFMASK, $PBLOCK_LEN);
|
||
|
|
||
|
if ($num_blocks != 16) {
|
||
|
- $code .= "jmp .L_small_initial_blocks_encrypted_${rndsuffix}\n";
|
||
|
+ $code .= "jmp .L_small_initial_blocks_encrypted_${label_suffix}\n";
|
||
|
}
|
||
|
}
|
||
|
|
||
|
- $code .= ".L_small_initial_blocks_encrypted_${rndsuffix}:\n";
|
||
|
+ $code .= ".L_small_initial_blocks_encrypted_${label_suffix}:\n";
|
||
|
}
|
||
|
|
||
|
# ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
@@ -3710,7 +3704,7 @@ sub GCM_ENC_DEC {
|
||
|
|
||
|
my $MASKREG = "%k1";
|
||
|
|
||
|
- my $rndsuffix = &random_string();
|
||
|
+ my $label_suffix = $label_count++;
|
||
|
|
||
|
# ;; reduction every 48 blocks, depth 32 blocks
|
||
|
# ;; @note 48 blocks is the maximum capacity of the stack frame
|
||
|
@@ -3751,7 +3745,7 @@ sub GCM_ENC_DEC {
|
||
|
} else {
|
||
|
$code .= "or $PLAIN_CIPH_LEN,$PLAIN_CIPH_LEN\n";
|
||
|
}
|
||
|
- $code .= "je .L_enc_dec_done_${rndsuffix}\n";
|
||
|
+ $code .= "je .L_enc_dec_done_${label_suffix}\n";
|
||
|
|
||
|
# Length value from context $CTX_OFFSET_InLen`($GCM128_CTX) is updated in
|
||
|
# 'providers/implementations/ciphers/cipher_aes_gcm_hw_vaes_avx512.inc'
|
||
|
@@ -3778,12 +3772,12 @@ sub GCM_ENC_DEC {
|
||
|
# ;; There may be no more data if it was consumed in the partial block.
|
||
|
$code .= <<___;
|
||
|
sub $DATA_OFFSET,$LENGTH
|
||
|
- je .L_enc_dec_done_${rndsuffix}
|
||
|
+ je .L_enc_dec_done_${label_suffix}
|
||
|
___
|
||
|
|
||
|
$code .= <<___;
|
||
|
cmp \$`(16 * 16)`,$LENGTH
|
||
|
- jbe .L_message_below_equal_16_blocks_${rndsuffix}
|
||
|
+ jbe .L_message_below_equal_16_blocks_${label_suffix}
|
||
|
|
||
|
vmovdqa64 SHUF_MASK(%rip),$SHUF_MASK
|
||
|
vmovdqa64 ddq_addbe_4444(%rip),$ADDBE_4x4
|
||
|
@@ -3815,7 +3809,7 @@ ___
|
||
|
|
||
|
$code .= <<___;
|
||
|
cmp \$`(32 * 16)`,$LENGTH
|
||
|
- jb .L_message_below_32_blocks_${rndsuffix}
|
||
|
+ jb .L_message_below_32_blocks_${label_suffix}
|
||
|
___
|
||
|
|
||
|
# ;; ==== AES-CTR - next 16 blocks
|
||
|
@@ -3836,13 +3830,13 @@ ___
|
||
|
sub \$`(32 * 16)`,$LENGTH
|
||
|
|
||
|
cmp \$`($big_loop_nblocks * 16)`,$LENGTH
|
||
|
- jb .L_no_more_big_nblocks_${rndsuffix}
|
||
|
+ jb .L_no_more_big_nblocks_${label_suffix}
|
||
|
___
|
||
|
|
||
|
# ;; ====
|
||
|
# ;; ==== AES-CTR + GHASH - 48 blocks loop
|
||
|
# ;; ====
|
||
|
- $code .= ".L_encrypt_big_nblocks_${rndsuffix}:\n";
|
||
|
+ $code .= ".L_encrypt_big_nblocks_${label_suffix}:\n";
|
||
|
|
||
|
# ;; ==== AES-CTR + GHASH - 16 blocks, start
|
||
|
$aesout_offset = ($STACK_LOCAL_OFFSET + (32 * 16));
|
||
|
@@ -3893,15 +3887,15 @@ ___
|
||
|
add \$`($big_loop_nblocks * 16)`,$DATA_OFFSET
|
||
|
sub \$`($big_loop_nblocks * 16)`,$LENGTH
|
||
|
cmp \$`($big_loop_nblocks * 16)`,$LENGTH
|
||
|
- jae .L_encrypt_big_nblocks_${rndsuffix}
|
||
|
+ jae .L_encrypt_big_nblocks_${label_suffix}
|
||
|
|
||
|
-.L_no_more_big_nblocks_${rndsuffix}:
|
||
|
+.L_no_more_big_nblocks_${label_suffix}:
|
||
|
|
||
|
cmp \$`(32 * 16)`,$LENGTH
|
||
|
- jae .L_encrypt_32_blocks_${rndsuffix}
|
||
|
+ jae .L_encrypt_32_blocks_${label_suffix}
|
||
|
|
||
|
cmp \$`(16 * 16)`,$LENGTH
|
||
|
- jae .L_encrypt_16_blocks_${rndsuffix}
|
||
|
+ jae .L_encrypt_16_blocks_${label_suffix}
|
||
|
___
|
||
|
|
||
|
# ;; =====================================================
|
||
|
@@ -3909,7 +3903,7 @@ ___
|
||
|
# ;; ==== GHASH 1 x 16 blocks
|
||
|
# ;; ==== GHASH 1 x 16 blocks (reduction) & encrypt N blocks
|
||
|
# ;; ==== then GHASH N blocks
|
||
|
- $code .= ".L_encrypt_0_blocks_ghash_32_${rndsuffix}:\n";
|
||
|
+ $code .= ".L_encrypt_0_blocks_ghash_32_${label_suffix}:\n";
|
||
|
|
||
|
# ;; calculate offset to the right hash key
|
||
|
$code .= <<___;
|
||
|
@@ -3937,7 +3931,7 @@ ___
|
||
|
$IA0, $IA5, $MASKREG, $PBLOCK_LEN);
|
||
|
|
||
|
$code .= "vpshufb @{[XWORD($SHUF_MASK)]},$CTR_BLOCKx,$CTR_BLOCKx\n";
|
||
|
- $code .= "jmp .L_ghash_done_${rndsuffix}\n";
|
||
|
+ $code .= "jmp .L_ghash_done_${label_suffix}\n";
|
||
|
|
||
|
# ;; =====================================================
|
||
|
# ;; =====================================================
|
||
|
@@ -3946,7 +3940,7 @@ ___
|
||
|
# ;; ==== GHASH 1 x 16 blocks (reduction)
|
||
|
# ;; ==== GHASH 1 x 16 blocks (reduction) & encrypt N blocks
|
||
|
# ;; ==== then GHASH N blocks
|
||
|
- $code .= ".L_encrypt_32_blocks_${rndsuffix}:\n";
|
||
|
+ $code .= ".L_encrypt_32_blocks_${label_suffix}:\n";
|
||
|
|
||
|
# ;; ==== AES-CTR + GHASH - 16 blocks, start
|
||
|
$aesout_offset = ($STACK_LOCAL_OFFSET + (32 * 16));
|
||
|
@@ -4007,7 +4001,7 @@ ___
|
||
|
$IA0, $IA5, $MASKREG, $PBLOCK_LEN);
|
||
|
|
||
|
$code .= "vpshufb @{[XWORD($SHUF_MASK)]},$CTR_BLOCKx,$CTR_BLOCKx\n";
|
||
|
- $code .= "jmp .L_ghash_done_${rndsuffix}\n";
|
||
|
+ $code .= "jmp .L_ghash_done_${label_suffix}\n";
|
||
|
|
||
|
# ;; =====================================================
|
||
|
# ;; =====================================================
|
||
|
@@ -4015,7 +4009,7 @@ ___
|
||
|
# ;; ==== GHASH 1 x 16 blocks
|
||
|
# ;; ==== GHASH 1 x 16 blocks (reduction) & encrypt N blocks
|
||
|
# ;; ==== then GHASH N blocks
|
||
|
- $code .= ".L_encrypt_16_blocks_${rndsuffix}:\n";
|
||
|
+ $code .= ".L_encrypt_16_blocks_${label_suffix}:\n";
|
||
|
|
||
|
# ;; ==== AES-CTR + GHASH - 16 blocks, start
|
||
|
$aesout_offset = ($STACK_LOCAL_OFFSET + (32 * 16));
|
||
|
@@ -4059,9 +4053,9 @@ ___
|
||
|
|
||
|
$code .= "vpshufb @{[XWORD($SHUF_MASK)]},$CTR_BLOCKx,$CTR_BLOCKx\n";
|
||
|
$code .= <<___;
|
||
|
- jmp .L_ghash_done_${rndsuffix}
|
||
|
+ jmp .L_ghash_done_${label_suffix}
|
||
|
|
||
|
-.L_message_below_32_blocks_${rndsuffix}:
|
||
|
+.L_message_below_32_blocks_${label_suffix}:
|
||
|
# ;; 32 > number of blocks > 16
|
||
|
|
||
|
sub \$`(16 * 16)`,$LENGTH
|
||
|
@@ -4094,9 +4088,9 @@ ___
|
||
|
|
||
|
$code .= "vpshufb @{[XWORD($SHUF_MASK)]},$CTR_BLOCKx,$CTR_BLOCKx\n";
|
||
|
$code .= <<___;
|
||
|
- jmp .L_ghash_done_${rndsuffix}
|
||
|
+ jmp .L_ghash_done_${label_suffix}
|
||
|
|
||
|
-.L_message_below_equal_16_blocks_${rndsuffix}:
|
||
|
+.L_message_below_equal_16_blocks_${label_suffix}:
|
||
|
# ;; Determine how many blocks to process
|
||
|
# ;; - process one additional block if there is a partial block
|
||
|
mov @{[DWORD($LENGTH)]},@{[DWORD($IA1)]}
|
||
|
@@ -4113,13 +4107,13 @@ ___
|
||
|
|
||
|
# ;; fall through to exit
|
||
|
|
||
|
- $code .= ".L_ghash_done_${rndsuffix}:\n";
|
||
|
+ $code .= ".L_ghash_done_${label_suffix}:\n";
|
||
|
|
||
|
# ;; save the last counter block
|
||
|
$code .= "vmovdqu64 $CTR_BLOCKx,`$CTX_OFFSET_CurCount`($GCM128_CTX)\n";
|
||
|
$code .= <<___;
|
||
|
vmovdqu64 $AAD_HASHx,`$CTX_OFFSET_AadHash`($GCM128_CTX)
|
||
|
-.L_enc_dec_done_${rndsuffix}:
|
||
|
+.L_enc_dec_done_${label_suffix}:
|
||
|
___
|
||
|
}
|
||
|
|
||
|
@@ -4155,7 +4149,7 @@ sub INITIAL_BLOCKS_16 {
|
||
|
my $B08_11 = $T7;
|
||
|
my $B12_15 = $T8;
|
||
|
|
||
|
- my $rndsuffix = &random_string();
|
||
|
+ my $label_suffix = $label_count++;
|
||
|
|
||
|
my $stack_offset = $BLK_OFFSET;
|
||
|
$code .= <<___;
|
||
|
@@ -4163,13 +4157,13 @@ sub INITIAL_BLOCKS_16 {
|
||
|
# ;; prepare counter blocks
|
||
|
|
||
|
cmpb \$`(256 - 16)`,@{[BYTE($CTR_CHECK)]}
|
||
|
- jae .L_next_16_overflow_${rndsuffix}
|
||
|
+ jae .L_next_16_overflow_${label_suffix}
|
||
|
vpaddd $ADDBE_1234,$CTR,$B00_03
|
||
|
vpaddd $ADDBE_4x4,$B00_03,$B04_07
|
||
|
vpaddd $ADDBE_4x4,$B04_07,$B08_11
|
||
|
vpaddd $ADDBE_4x4,$B08_11,$B12_15
|
||
|
- jmp .L_next_16_ok_${rndsuffix}
|
||
|
-.L_next_16_overflow_${rndsuffix}:
|
||
|
+ jmp .L_next_16_ok_${label_suffix}
|
||
|
+.L_next_16_overflow_${label_suffix}:
|
||
|
vpshufb $SHUF_MASK,$CTR,$CTR
|
||
|
vmovdqa64 ddq_add_4444(%rip),$B12_15
|
||
|
vpaddd ddq_add_1234(%rip),$CTR,$B00_03
|
||
|
@@ -4180,7 +4174,7 @@ sub INITIAL_BLOCKS_16 {
|
||
|
vpshufb $SHUF_MASK,$B04_07,$B04_07
|
||
|
vpshufb $SHUF_MASK,$B08_11,$B08_11
|
||
|
vpshufb $SHUF_MASK,$B12_15,$B12_15
|
||
|
-.L_next_16_ok_${rndsuffix}:
|
||
|
+.L_next_16_ok_${label_suffix}:
|
||
|
vshufi64x2 \$0b11111111,$B12_15,$B12_15,$CTR
|
||
|
addb \$16,@{[BYTE($CTR_CHECK)]}
|
||
|
# ;; === load 16 blocks of data
|
||
|
@@ -4264,7 +4258,7 @@ sub GCM_COMPLETE {
|
||
|
my $GCM128_CTX = $_[0];
|
||
|
my $PBLOCK_LEN = $_[1];
|
||
|
|
||
|
- my $rndsuffix = &random_string();
|
||
|
+ my $label_suffix = $label_count++;
|
||
|
|
||
|
$code .= <<___;
|
||
|
vmovdqu @{[HashKeyByIdx(1,$GCM128_CTX)]},%xmm2
|
||
|
@@ -4276,14 +4270,14 @@ ___
|
||
|
|
||
|
# ;; Process the final partial block.
|
||
|
cmp \$0,$PBLOCK_LEN
|
||
|
- je .L_partial_done_${rndsuffix}
|
||
|
+ je .L_partial_done_${label_suffix}
|
||
|
___
|
||
|
|
||
|
# ;GHASH computation for the last <16 Byte block
|
||
|
&GHASH_MUL("%xmm4", "%xmm2", "%xmm0", "%xmm16", "%xmm17");
|
||
|
|
||
|
$code .= <<___;
|
||
|
-.L_partial_done_${rndsuffix}:
|
||
|
+.L_partial_done_${label_suffix}:
|
||
|
vmovq `$CTX_OFFSET_InLen`($GCM128_CTX), %xmm5
|
||
|
vpinsrq \$1, `$CTX_OFFSET_AadLen`($GCM128_CTX), %xmm5, %xmm5 # ; xmm5 = len(A)||len(C)
|
||
|
vpsllq \$3, %xmm5, %xmm5 # ; convert bytes into bits
|
||
|
@@ -4297,7 +4291,7 @@ ___
|
||
|
vpshufb SHUF_MASK(%rip),%xmm4,%xmm4 # ; perform a 16Byte swap
|
||
|
vpxor %xmm4,%xmm3,%xmm3
|
||
|
|
||
|
-.L_return_T_${rndsuffix}:
|
||
|
+.L_return_T_${label_suffix}:
|
||
|
vmovdqu %xmm3,`$CTX_OFFSET_AadHash`($GCM128_CTX)
|
||
|
___
|
||
|
}
|