Accepting request 1119558 from home:ohollmann:branches:security:tls

- Performance enhancements for cryptography from OpenSSL 3.x [jsc#PED-5086, jsc#PED-3514] * Add patches: - openssl-ec-Use-static-linkage-on-nistp521-felem_-square-mul-.patch - openssl-ec-56-bit-Limb-Solinas-Strategy-for-secp384r1.patch - openssl-ec-powerpc64le-Add-asm-implementation-of-felem_-squa.patch - openssl-ecc-Remove-extraneous-parentheses-in-secp384r1.patch - openssl-powerpc-ecc-Fix-stack-allocation-secp384r1-asm.patch - openssl-Improve-performance-for-6x-unrolling-with-vpermxor-i.patch OBS-URL: https://build.opensuse.org/request/show/1119558 OBS-URL: https://build.opensuse.org/package/show/security:tls/openssl-1_1?expand=0&rev=148
2023-10-25 07:52:22 +00:00 · 2023-10-25 07:52:22 +00:00 · b51c004cd8
commit b51c004cd8
parent 861b7f632f
8 changed files with 3360 additions and 0 deletions
--- a/openssl-1_1.changes
+++ b/openssl-1_1.changes
@ -1,3 +1,16 @@
 -------------------------------------------------------------------
 Thu Oct 19 15:03:14 UTC 2023 - Otto Hollmann <otto.hollmann@suse.com>
 - Performance enhancements for cryptography from OpenSSL 3.x
  [jsc#PED-5086, jsc#PED-3514]
  * Add patches:
    - openssl-ec-Use-static-linkage-on-nistp521-felem_-square-mul-.patch
    - openssl-ec-56-bit-Limb-Solinas-Strategy-for-secp384r1.patch
    - openssl-ec-powerpc64le-Add-asm-implementation-of-felem_-squa.patch
    - openssl-ecc-Remove-extraneous-parentheses-in-secp384r1.patch
    - openssl-powerpc-ecc-Fix-stack-allocation-secp384r1-asm.patch
    - openssl-Improve-performance-for-6x-unrolling-with-vpermxor-i.patch
 -------------------------------------------------------------------
 Wed Oct  4 07:15:29 UTC 2023 - Otto Hollmann <otto.hollmann@suse.com>
--- a/openssl-1_1.spec
+++ b/openssl-1_1.spec
@ -177,6 +177,14 @@ Patch106:       openssl-s_client-check-ocsp-status.patch
 Patch107:       openssl-dont-pass-zero-length-input-to-EVP_Cipher.patch
 #PATCH-FIX-SUSE bsc#1215215 FIPS: Add "fips" to version string
 Patch108:       openssl-1_1-fips-bsc1215215_fips_in_version_string.patch
 # PATCH-FIX-UPSTREAM jsc#PED-5086, jsc#PED-3514
 # POWER10 performance enhancements for cryptography
 Patch109:       openssl-ec-Use-static-linkage-on-nistp521-felem_-square-mul-.patch
 Patch110:       openssl-ec-56-bit-Limb-Solinas-Strategy-for-secp384r1.patch
 Patch111:       openssl-ec-powerpc64le-Add-asm-implementation-of-felem_-squa.patch
 Patch112:       openssl-ecc-Remove-extraneous-parentheses-in-secp384r1.patch
 Patch113:       openssl-powerpc-ecc-Fix-stack-allocation-secp384r1-asm.patch
 Patch114:       openssl-Improve-performance-for-6x-unrolling-with-vpermxor-i.patch
 BuildRequires:  jitterentropy-devel >= 3.4.0
 BuildRequires:  pkgconfig
 BuildRequires:  pkgconfig(zlib)
--- a/openssl-Improve-performance-for-6x-unrolling-with-vpermxor-i.patch
+++ b/openssl-Improve-performance-for-6x-unrolling-with-vpermxor-i.patch
@ -0,0 +1,495 @@
 From 3d3a7ecd1ae5ab08d22041f7b3b035c34f12fa02 Mon Sep 17 00:00:00 2001
 From: Danny Tsen <dtsen@linux.ibm.com>
 Date: Tue, 22 Aug 2023 15:58:53 -0400
 Subject: [PATCH] Improve performance for 6x unrolling with vpermxor
 instruction
 Reviewed-by: Paul Dale <pauli@openssl.org>
 Reviewed-by: Tomas Mraz <tomas@openssl.org>
 (Merged from https://github.com/openssl/openssl/pull/21812)
 ---
 crypto/aes/asm/aesp8-ppc.pl | 145 +++++++++++++++++++++++-------------
 1 file changed, 95 insertions(+), 50 deletions(-)
 diff --git a/crypto/aes/asm/aesp8-ppc.pl b/crypto/aes/asm/aesp8-ppc.pl
 index 60cf86f52aed2..38b9405a283b7 100755
 --- a/crypto/aes/asm/aesp8-ppc.pl
 +++ b/crypto/aes/asm/aesp8-ppc.pl
@@ -99,11 +99,12 @@
 .long	0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000	?rev
 .long	0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c	?rev
 .long	0,0,0,0						?asis
 +.long	0x0f102132, 0x43546576, 0x8798a9ba, 0xcbdcedfe
 Lconsts:
 	mflr	r0
 	bcl	20,31,\$+4
 	mflr	$ptr	 #vvvvv "distance between . and rcon
 -	addi	$ptr,$ptr,-0x48
 +	addi	$ptr,$ptr,-0x58
 	mtlr	r0
 	blr
 	.long	0
@@ -2405,7 +2406,7 @@ ()
 my $key_=$key2;
 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
     $x00=0 if ($flavour =~ /osx/);
 -my ($in0,  $in1,  $in2,  $in3,  $in4,  $in5 )=map("v$_",(0..5));
 +my ($in0,  $in1,  $in2,  $in3,  $in4,  $in5)=map("v$_",(0..5));
 my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
 my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
 my $rndkey0="v23";	# v24-v25 rotating buffer for first found keys
@@ -2460,6 +2461,18 @@ ()
 	li		$x70,0x70
 	mtspr		256,r0
 +	# Reverse eighty7 to 0x010101..87
 +	xxlor		2, 32+$eighty7, 32+$eighty7
 +	vsldoi		$eighty7,$tmp,$eighty7,1	# 0x010101..87
 +	xxlor		1, 32+$eighty7, 32+$eighty7
 +
 +	# Load XOR contents. 0xf102132435465768798a9bacbdcedfe
 +	mr		$x70, r6
 +	bl		Lconsts
 +	lxvw4x		0, $x40, r6		# load XOR contents
 +	mr		r6, $x70
 +	li		$x70,0x70
 +
 	subi		$rounds,$rounds,3	# -4 in total
 	lvx		$rndkey0,$x00,$key1	# load key schedule
@@ -2502,69 +2515,77 @@ ()
 	?vperm		v31,v31,$twk5,$keyperm
 	lvx		v25,$x10,$key_		# pre-load round[2]
 +	# Switch to use the following codes with 0x010101..87 to generate tweak.
 +	#     eighty7 = 0x010101..87
 +	# vsrab		tmp, tweak, seven	# next tweak value, right shift 7 bits
 +	# vand		tmp, tmp, eighty7	# last byte with carry
 +	# vaddubm	tweak, tweak, tweak	# left shift 1 bit (x2)
 +	# xxlor		vsx, 0, 0
 +	# vpermxor	tweak, tweak, tmp, vsx
 +
 	 vperm		$in0,$inout,$inptail,$inpperm
 	 subi		$inp,$inp,31		# undo "caller"
 	vxor		$twk0,$tweak,$rndkey0
 	vsrab		$tmp,$tweak,$seven	# next tweak value
 	vaddubm		$tweak,$tweak,$tweak
 -	vsldoi		$tmp,$tmp,$tmp,15
 	vand		$tmp,$tmp,$eighty7
 	 vxor		$out0,$in0,$twk0
 -	vxor		$tweak,$tweak,$tmp
 +	xxlor		32+$in1, 0, 0
 +	vpermxor	$tweak, $tweak, $tmp, $in1
 	 lvx_u		$in1,$x10,$inp
 	vxor		$twk1,$tweak,$rndkey0
 	vsrab		$tmp,$tweak,$seven	# next tweak value
 	vaddubm		$tweak,$tweak,$tweak
 -	vsldoi		$tmp,$tmp,$tmp,15
 	 le?vperm	$in1,$in1,$in1,$leperm
 	vand		$tmp,$tmp,$eighty7
 	 vxor		$out1,$in1,$twk1
 -	vxor		$tweak,$tweak,$tmp
 +	xxlor		32+$in2, 0, 0
 +	vpermxor	$tweak, $tweak, $tmp, $in2
 	 lvx_u		$in2,$x20,$inp
 	 andi.		$taillen,$len,15
 	vxor		$twk2,$tweak,$rndkey0
 	vsrab		$tmp,$tweak,$seven	# next tweak value
 	vaddubm		$tweak,$tweak,$tweak
 -	vsldoi		$tmp,$tmp,$tmp,15
 	 le?vperm	$in2,$in2,$in2,$leperm
 	vand		$tmp,$tmp,$eighty7
 	 vxor		$out2,$in2,$twk2
 -	vxor		$tweak,$tweak,$tmp
 +	xxlor		32+$in3, 0, 0
 +	vpermxor	$tweak, $tweak, $tmp, $in3
 	 lvx_u		$in3,$x30,$inp
 	 sub		$len,$len,$taillen
 	vxor		$twk3,$tweak,$rndkey0
 	vsrab		$tmp,$tweak,$seven	# next tweak value
 	vaddubm		$tweak,$tweak,$tweak
 -	vsldoi		$tmp,$tmp,$tmp,15
 	 le?vperm	$in3,$in3,$in3,$leperm
 	vand		$tmp,$tmp,$eighty7
 	 vxor		$out3,$in3,$twk3
 -	vxor		$tweak,$tweak,$tmp
 +	xxlor		32+$in4, 0, 0
 +	vpermxor	$tweak, $tweak, $tmp, $in4
 	 lvx_u		$in4,$x40,$inp
 	 subi		$len,$len,0x60
 	vxor		$twk4,$tweak,$rndkey0
 	vsrab		$tmp,$tweak,$seven	# next tweak value
 	vaddubm		$tweak,$tweak,$tweak
 -	vsldoi		$tmp,$tmp,$tmp,15
 	 le?vperm	$in4,$in4,$in4,$leperm
 	vand		$tmp,$tmp,$eighty7
 	 vxor		$out4,$in4,$twk4
 -	vxor		$tweak,$tweak,$tmp
 +	xxlor		32+$in5, 0, 0
 +	vpermxor	$tweak, $tweak, $tmp, $in5
 	 lvx_u		$in5,$x50,$inp
 	 addi		$inp,$inp,0x60
 	vxor		$twk5,$tweak,$rndkey0
 	vsrab		$tmp,$tweak,$seven	# next tweak value
 	vaddubm		$tweak,$tweak,$tweak
 -	vsldoi		$tmp,$tmp,$tmp,15
 	 le?vperm	$in5,$in5,$in5,$leperm
 	vand		$tmp,$tmp,$eighty7
 	 vxor		$out5,$in5,$twk5
 -	vxor		$tweak,$tweak,$tmp
 +	xxlor		32+$in0, 0, 0
 +	vpermxor	$tweak, $tweak, $tmp, $in0
 	vxor		v31,v31,$rndkey0
 	mtctr		$rounds
@@ -2590,6 +2611,8 @@ ()
 	lvx		v25,$x10,$key_		# round[4]
 	bdnz		Loop_xts_enc6x
 +	xxlor		32+$eighty7, 1, 1		# 0x010101..87
 +
 	subic		$len,$len,96		# $len-=96
 	 vxor		$in0,$twk0,v31		# xor with last round key
 	vcipher		$out0,$out0,v24
@@ -2599,7 +2622,6 @@ ()
 	 vaddubm	$tweak,$tweak,$tweak
 	vcipher		$out2,$out2,v24
 	vcipher		$out3,$out3,v24
 -	 vsldoi		$tmp,$tmp,$tmp,15
 	vcipher		$out4,$out4,v24
 	vcipher		$out5,$out5,v24
@@ -2607,7 +2629,8 @@ ()
 	 vand		$tmp,$tmp,$eighty7
 	vcipher		$out0,$out0,v25
 	vcipher		$out1,$out1,v25
 -	 vxor		$tweak,$tweak,$tmp
 +	 xxlor		32+$in1, 0, 0
 +	 vpermxor	$tweak, $tweak, $tmp, $in1
 	vcipher		$out2,$out2,v25
 	vcipher		$out3,$out3,v25
 	 vxor		$in1,$twk1,v31
@@ -2618,13 +2641,13 @@ ()
 	and		r0,r0,$len
 	 vaddubm	$tweak,$tweak,$tweak
 -	 vsldoi		$tmp,$tmp,$tmp,15
 	vcipher		$out0,$out0,v26
 	vcipher		$out1,$out1,v26
 	 vand		$tmp,$tmp,$eighty7
 	vcipher		$out2,$out2,v26
 	vcipher		$out3,$out3,v26
 -	 vxor		$tweak,$tweak,$tmp
 +	 xxlor		32+$in2, 0, 0
 +	 vpermxor	$tweak, $tweak, $tmp, $in2
 	vcipher		$out4,$out4,v26
 	vcipher		$out5,$out5,v26
@@ -2638,7 +2661,6 @@ ()
 	 vaddubm	$tweak,$tweak,$tweak
 	vcipher		$out0,$out0,v27
 	vcipher		$out1,$out1,v27
 -	 vsldoi		$tmp,$tmp,$tmp,15
 	vcipher		$out2,$out2,v27
 	vcipher		$out3,$out3,v27
 	 vand		$tmp,$tmp,$eighty7
@@ -2646,7 +2668,8 @@ ()
 	vcipher		$out5,$out5,v27
 	addi		$key_,$sp,$FRAME+15	# rewind $key_
 -	 vxor		$tweak,$tweak,$tmp
 +	 xxlor		32+$in3, 0, 0
 +	 vpermxor	$tweak, $tweak, $tmp, $in3
 	vcipher		$out0,$out0,v28
 	vcipher		$out1,$out1,v28
 	 vxor		$in3,$twk3,v31
@@ -2655,7 +2678,6 @@ ()
 	vcipher		$out2,$out2,v28
 	vcipher		$out3,$out3,v28
 	 vaddubm	$tweak,$tweak,$tweak
 -	 vsldoi		$tmp,$tmp,$tmp,15
 	vcipher		$out4,$out4,v28
 	vcipher		$out5,$out5,v28
 	lvx		v24,$x00,$key_		# re-pre-load round[1]
@@ -2663,7 +2685,8 @@ ()
 	vcipher		$out0,$out0,v29
 	vcipher		$out1,$out1,v29
 -	 vxor		$tweak,$tweak,$tmp
 +	 xxlor		32+$in4, 0, 0
 +	 vpermxor	$tweak, $tweak, $tmp, $in4
 	vcipher		$out2,$out2,v29
 	vcipher		$out3,$out3,v29
 	 vxor		$in4,$twk4,v31
@@ -2673,14 +2696,14 @@ ()
 	vcipher		$out5,$out5,v29
 	lvx		v25,$x10,$key_		# re-pre-load round[2]
 	 vaddubm	$tweak,$tweak,$tweak
 -	 vsldoi		$tmp,$tmp,$tmp,15
 	vcipher		$out0,$out0,v30
 	vcipher		$out1,$out1,v30
 	 vand		$tmp,$tmp,$eighty7
 	vcipher		$out2,$out2,v30
 	vcipher		$out3,$out3,v30
 -	 vxor		$tweak,$tweak,$tmp
 +	 xxlor		32+$in5, 0, 0
 +	 vpermxor	$tweak, $tweak, $tmp, $in5
 	vcipher		$out4,$out4,v30
 	vcipher		$out5,$out5,v30
 	 vxor		$in5,$twk5,v31
@@ -2690,7 +2713,6 @@ ()
 	vcipherlast	$out0,$out0,$in0
 	 lvx_u		$in0,$x00,$inp		# load next input block
 	 vaddubm	$tweak,$tweak,$tweak
 -	 vsldoi		$tmp,$tmp,$tmp,15
 	vcipherlast	$out1,$out1,$in1
 	 lvx_u		$in1,$x10,$inp
 	vcipherlast	$out2,$out2,$in2
@@ -2703,7 +2725,10 @@ ()
 	vcipherlast	$out4,$out4,$in4
 	 le?vperm	$in2,$in2,$in2,$leperm
 	 lvx_u		$in4,$x40,$inp
 -	 vxor		$tweak,$tweak,$tmp
 +	 xxlor		10, 32+$in0, 32+$in0
 +	 xxlor		32+$in0, 0, 0
 +	 vpermxor	$tweak, $tweak, $tmp, $in0
 +	 xxlor		32+$in0, 10, 10
 	vcipherlast	$tmp,$out5,$in5		# last block might be needed
 						# in stealing mode
 	 le?vperm	$in3,$in3,$in3,$leperm
@@ -2736,6 +2761,8 @@ ()
 	mtctr		$rounds
 	beq		Loop_xts_enc6x		# did $len-=96 borrow?
 +	xxlor		32+$eighty7, 2, 2		# 0x870101..01
 +
 	addic.		$len,$len,0x60
 	beq		Lxts_enc6x_zero
 	cmpwi		$len,0x20
@@ -3112,6 +3139,18 @@ ()
 	li		$x70,0x70
 	mtspr		256,r0
 +	# Reverse eighty7 to 0x010101..87
 +	xxlor		2, 32+$eighty7, 32+$eighty7
 +	vsldoi		$eighty7,$tmp,$eighty7,1	# 0x010101..87
 +	xxlor		1, 32+$eighty7, 32+$eighty7
 +
 +	# Load XOR contents. 0xf102132435465768798a9bacbdcedfe
 +	mr		$x70, r6
 +	bl		Lconsts
 +	lxvw4x		0, $x40, r6		# load XOR contents
 +	mr		r6, $x70
 +	li		$x70,0x70
 +
 	subi		$rounds,$rounds,3	# -4 in total
 	lvx		$rndkey0,$x00,$key1	# load key schedule
@@ -3159,64 +3198,64 @@ ()
 	vxor		$twk0,$tweak,$rndkey0
 	vsrab		$tmp,$tweak,$seven	# next tweak value
 	vaddubm		$tweak,$tweak,$tweak
 -	vsldoi		$tmp,$tmp,$tmp,15
 	vand		$tmp,$tmp,$eighty7
 	 vxor		$out0,$in0,$twk0
 -	vxor		$tweak,$tweak,$tmp
 +	xxlor		32+$in1, 0, 0
 +	vpermxor	$tweak, $tweak, $tmp, $in1
 	 lvx_u		$in1,$x10,$inp
 	vxor		$twk1,$tweak,$rndkey0
 	vsrab		$tmp,$tweak,$seven	# next tweak value
 	vaddubm		$tweak,$tweak,$tweak
 -	vsldoi		$tmp,$tmp,$tmp,15
 	 le?vperm	$in1,$in1,$in1,$leperm
 	vand		$tmp,$tmp,$eighty7
 	 vxor		$out1,$in1,$twk1
 -	vxor		$tweak,$tweak,$tmp
 +	xxlor		32+$in2, 0, 0
 +	vpermxor	$tweak, $tweak, $tmp, $in2
 	 lvx_u		$in2,$x20,$inp
 	 andi.		$taillen,$len,15
 	vxor		$twk2,$tweak,$rndkey0
 	vsrab		$tmp,$tweak,$seven	# next tweak value
 	vaddubm		$tweak,$tweak,$tweak
 -	vsldoi		$tmp,$tmp,$tmp,15
 	 le?vperm	$in2,$in2,$in2,$leperm
 	vand		$tmp,$tmp,$eighty7
 	 vxor		$out2,$in2,$twk2
 -	vxor		$tweak,$tweak,$tmp
 +	xxlor		32+$in3, 0, 0
 +	vpermxor	$tweak, $tweak, $tmp, $in3
 	 lvx_u		$in3,$x30,$inp
 	 sub		$len,$len,$taillen
 	vxor		$twk3,$tweak,$rndkey0
 	vsrab		$tmp,$tweak,$seven	# next tweak value
 	vaddubm		$tweak,$tweak,$tweak
 -	vsldoi		$tmp,$tmp,$tmp,15
 	 le?vperm	$in3,$in3,$in3,$leperm
 	vand		$tmp,$tmp,$eighty7
 	 vxor		$out3,$in3,$twk3
 -	vxor		$tweak,$tweak,$tmp
 +	xxlor		32+$in4, 0, 0
 +	vpermxor	$tweak, $tweak, $tmp, $in4
 	 lvx_u		$in4,$x40,$inp
 	 subi		$len,$len,0x60
 	vxor		$twk4,$tweak,$rndkey0
 	vsrab		$tmp,$tweak,$seven	# next tweak value
 	vaddubm		$tweak,$tweak,$tweak
 -	vsldoi		$tmp,$tmp,$tmp,15
 	 le?vperm	$in4,$in4,$in4,$leperm
 	vand		$tmp,$tmp,$eighty7
 	 vxor		$out4,$in4,$twk4
 -	vxor		$tweak,$tweak,$tmp
 +	xxlor		32+$in5, 0, 0
 +	vpermxor	$tweak, $tweak, $tmp, $in5
 	 lvx_u		$in5,$x50,$inp
 	 addi		$inp,$inp,0x60
 	vxor		$twk5,$tweak,$rndkey0
 	vsrab		$tmp,$tweak,$seven	# next tweak value
 	vaddubm		$tweak,$tweak,$tweak
 -	vsldoi		$tmp,$tmp,$tmp,15
 	 le?vperm	$in5,$in5,$in5,$leperm
 	vand		$tmp,$tmp,$eighty7
 	 vxor		$out5,$in5,$twk5
 -	vxor		$tweak,$tweak,$tmp
 +	xxlor		32+$in0, 0, 0
 +	vpermxor	$tweak, $tweak, $tmp, $in0
 	vxor		v31,v31,$rndkey0
 	mtctr		$rounds
@@ -3242,6 +3281,8 @@ ()
 	lvx		v25,$x10,$key_		# round[4]
 	bdnz		Loop_xts_dec6x
 +	xxlor		32+$eighty7, 1, 1
 +
 	subic		$len,$len,96		# $len-=96
 	 vxor		$in0,$twk0,v31		# xor with last round key
 	vncipher	$out0,$out0,v24
@@ -3251,7 +3292,6 @@ ()
 	 vaddubm	$tweak,$tweak,$tweak
 	vncipher	$out2,$out2,v24
 	vncipher	$out3,$out3,v24
 -	 vsldoi		$tmp,$tmp,$tmp,15
 	vncipher	$out4,$out4,v24
 	vncipher	$out5,$out5,v24
@@ -3259,7 +3299,8 @@ ()
 	 vand		$tmp,$tmp,$eighty7
 	vncipher	$out0,$out0,v25
 	vncipher	$out1,$out1,v25
 -	 vxor		$tweak,$tweak,$tmp
 +	 xxlor		32+$in1, 0, 0
 +	 vpermxor	$tweak, $tweak, $tmp, $in1
 	vncipher	$out2,$out2,v25
 	vncipher	$out3,$out3,v25
 	 vxor		$in1,$twk1,v31
@@ -3270,13 +3311,13 @@ ()
 	and		r0,r0,$len
 	 vaddubm	$tweak,$tweak,$tweak
 -	 vsldoi		$tmp,$tmp,$tmp,15
 	vncipher	$out0,$out0,v26
 	vncipher	$out1,$out1,v26
 	 vand		$tmp,$tmp,$eighty7
 	vncipher	$out2,$out2,v26
 	vncipher	$out3,$out3,v26
 -	 vxor		$tweak,$tweak,$tmp
 +	 xxlor		32+$in2, 0, 0
 +	 vpermxor	$tweak, $tweak, $tmp, $in2
 	vncipher	$out4,$out4,v26
 	vncipher	$out5,$out5,v26
@@ -3290,7 +3331,6 @@ ()
 	 vaddubm	$tweak,$tweak,$tweak
 	vncipher	$out0,$out0,v27
 	vncipher	$out1,$out1,v27
 -	 vsldoi		$tmp,$tmp,$tmp,15
 	vncipher	$out2,$out2,v27
 	vncipher	$out3,$out3,v27
 	 vand		$tmp,$tmp,$eighty7
@@ -3298,7 +3338,8 @@ ()
 	vncipher	$out5,$out5,v27
 	addi		$key_,$sp,$FRAME+15	# rewind $key_
 -	 vxor		$tweak,$tweak,$tmp
 +	 xxlor		32+$in3, 0, 0
 +	 vpermxor	$tweak, $tweak, $tmp, $in3
 	vncipher	$out0,$out0,v28
 	vncipher	$out1,$out1,v28
 	 vxor		$in3,$twk3,v31
@@ -3307,7 +3348,6 @@ ()
 	vncipher	$out2,$out2,v28
 	vncipher	$out3,$out3,v28
 	 vaddubm	$tweak,$tweak,$tweak
 -	 vsldoi		$tmp,$tmp,$tmp,15
 	vncipher	$out4,$out4,v28
 	vncipher	$out5,$out5,v28
 	lvx		v24,$x00,$key_		# re-pre-load round[1]
@@ -3315,7 +3355,8 @@ ()
 	vncipher	$out0,$out0,v29
 	vncipher	$out1,$out1,v29
 -	 vxor		$tweak,$tweak,$tmp
 +	 xxlor		32+$in4, 0, 0
 +	 vpermxor	$tweak, $tweak, $tmp, $in4
 	vncipher	$out2,$out2,v29
 	vncipher	$out3,$out3,v29
 	 vxor		$in4,$twk4,v31
@@ -3325,14 +3366,14 @@ ()
 	vncipher	$out5,$out5,v29
 	lvx		v25,$x10,$key_		# re-pre-load round[2]
 	 vaddubm	$tweak,$tweak,$tweak
 -	 vsldoi		$tmp,$tmp,$tmp,15
 	vncipher	$out0,$out0,v30
 	vncipher	$out1,$out1,v30
 	 vand		$tmp,$tmp,$eighty7
 	vncipher	$out2,$out2,v30
 	vncipher	$out3,$out3,v30
 -	 vxor		$tweak,$tweak,$tmp
 +	 xxlor		32+$in5, 0, 0
 +	 vpermxor	$tweak, $tweak, $tmp, $in5
 	vncipher	$out4,$out4,v30
 	vncipher	$out5,$out5,v30
 	 vxor		$in5,$twk5,v31
@@ -3342,7 +3383,6 @@ ()
 	vncipherlast	$out0,$out0,$in0
 	 lvx_u		$in0,$x00,$inp		# load next input block
 	 vaddubm	$tweak,$tweak,$tweak
 -	 vsldoi		$tmp,$tmp,$tmp,15
 	vncipherlast	$out1,$out1,$in1
 	 lvx_u		$in1,$x10,$inp
 	vncipherlast	$out2,$out2,$in2
@@ -3355,7 +3395,10 @@ ()
 	vncipherlast	$out4,$out4,$in4
 	 le?vperm	$in2,$in2,$in2,$leperm
 	 lvx_u		$in4,$x40,$inp
 -	 vxor		$tweak,$tweak,$tmp
 +	 xxlor		10, 32+$in0, 32+$in0
 +	 xxlor		32+$in0, 0, 0
 +	 vpermxor	$tweak, $tweak, $tmp, $in0
 +	 xxlor		32+$in0, 10, 10
 	vncipherlast	$out5,$out5,$in5
 	 le?vperm	$in3,$in3,$in3,$leperm
 	 lvx_u		$in5,$x50,$inp
@@ -3386,6 +3429,8 @@ ()
 	mtctr		$rounds
 	beq		Loop_xts_dec6x		# did $len-=96 borrow?
 +	xxlor		32+$eighty7, 2, 2
 +
 	addic.		$len,$len,0x60
 	beq		Lxts_dec6x_zero
 	cmpwi		$len,0x20
--- a/openssl-ec-56-bit-Limb-Solinas-Strategy-for-secp384r1.patch
+++ b/openssl-ec-56-bit-Limb-Solinas-Strategy-for-secp384r1.patch
--- a/openssl-ec-Use-static-linkage-on-nistp521-felem_-square-mul-.patch
+++ b/openssl-ec-Use-static-linkage-on-nistp521-felem_-square-mul-.patch
@ -0,0 +1,65 @@
 From 3e47a286dc3274bda72a196c3a4030a1fc8302f1 Mon Sep 17 00:00:00 2001
 From: Rohan McLure <rohanmclure@linux.ibm.com>
 Date: Fri, 23 Jun 2023 16:41:48 +1000
 Subject: [PATCH] ec: Use static linkage on nistp521 felem_{square,mul}
 wrappers
 Runtime selection of implementations for felem_{square,mul} depends on
 felem_{square,mul}_wrapper functions, which overwrite function points in
 a similar design to that of .plt.got sections used by program loaders
 during dynamic linking.
 There's no reason why these functions need to have external linkage.
 Mark static.
 Signed-off-by: Rohan McLure <rohanmclure@linux.ibm.com>
 Reviewed-by: Paul Dale <pauli@openssl.org>
 Reviewed-by: Shane Lontis <shane.lontis@oracle.com>
 Reviewed-by: Dmitry Belyavskiy <beldmit@gmail.com>
 Reviewed-by: Todd Short <todd.short@me.com>
 (Merged from https://github.com/openssl/openssl/pull/21471)
 ---
 crypto/ec/ecp_nistp521.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)
 diff --git a/crypto/ec/ecp_nistp521.c b/crypto/ec/ecp_nistp521.c
 index 97815cac1f13..32a9268ecf17 100644
 --- a/crypto/ec/ecp_nistp521.c
 +++ b/crypto/ec/ecp_nistp521.c
@@ -676,8 +676,8 @@ static void felem_reduce(felem out, const largefelem in)
 }
 #if defined(ECP_NISTP521_ASM)
 -void felem_square_wrapper(largefelem out, const felem in);
 -void felem_mul_wrapper(largefelem out, const felem in1, const felem in2);
 +static void felem_square_wrapper(largefelem out, const felem in);
 +static void felem_mul_wrapper(largefelem out, const felem in1, const felem in2);
 static void (*felem_square_p)(largefelem out, const felem in) =
     felem_square_wrapper;
@@ -691,7 +691,7 @@ void p521_felem_mul(largefelem out, const felem in1, const felem in2);
 #  include "../ppc_arch.h"
 # endif
 -void felem_select(void)
 +static void felem_select(void)
 {
 # if defined(_ARCH_PPC64)
     if ((OPENSSL_ppccap_P & PPC_MADD300) && (OPENSSL_ppccap_P & PPC_ALTIVEC)) {
@@ -707,13 +707,13 @@ void felem_select(void)
     felem_mul_p = felem_mul_ref;
 }
 -void felem_square_wrapper(largefelem out, const felem in)
 +static void felem_square_wrapper(largefelem out, const felem in)
 {
     felem_select();
     felem_square_p(out, in);
 }
 -void felem_mul_wrapper(largefelem out, const felem in1, const felem in2)
 +static void felem_mul_wrapper(largefelem out, const felem in1, const felem in2)
 {
     felem_select();
     felem_mul_p(out, in1, in2);
--- a/openssl-ec-powerpc64le-Add-asm-implementation-of-felem_-squa.patch
+++ b/openssl-ec-powerpc64le-Add-asm-implementation-of-felem_-squa.patch
@ -0,0 +1,410 @@
 From 966047ee13188e8634af25af348940acceb9316d Mon Sep 17 00:00:00 2001
 From: Rohan McLure <rohanmclure@linux.ibm.com>
 Date: Wed, 31 May 2023 14:32:26 +1000
 Subject: [PATCH] ec: powerpc64le: Add asm implementation of felem_{square,mul}
 Add an assembly implementation of felem_{square,mul}, which will be
 implemented whenever Altivec support is present and the core implements
 ISA 3.0 (Power 9) or greater.
 Signed-off-by: Rohan McLure <rohanmclure@linux.ibm.com>
 Reviewed-by: Paul Dale <pauli@openssl.org>
 Reviewed-by: Shane Lontis <shane.lontis@oracle.com>
 Reviewed-by: Dmitry Belyavskiy <beldmit@gmail.com>
 Reviewed-by: Todd Short <todd.short@me.com>
 (Merged from https://github.com/openssl/openssl/pull/21471)
 ---
 crypto/ec/asm/ecp_nistp384-ppc64.pl |  355 ++++++++++++++++++++++++++++++++++++
 crypto/ec/build.info                |    2 
 crypto/ec/ecp_nistp384.c            |    9 
 3 files changed, 366 insertions(+)
 create mode 100755 crypto/ec/asm/ecp_nistp384-ppc64.pl
 --- /dev/null
 +++ b/crypto/ec/asm/ecp_nistp384-ppc64.pl
@@ -0,0 +1,355 @@
 +#! /usr/bin/env perl
 +# Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
 +#
 +# Licensed under the Apache License 2.0 (the "License").  You may not use
 +# this file except in compliance with the License.  You can obtain a copy
 +# in the file LICENSE in the source distribution or at
 +# https://www.openssl.org/source/license.html
 +#
 +# ====================================================================
 +# Written by Rohan McLure <rmclure@linux.ibm.com> for the OpenSSL
 +# project.
 +# ====================================================================
 +#
 +# p384 lower-level primitives for PPC64 using vector instructions.
 +#
 +
 +use strict;
 +use warnings;
 +
 +my $flavour = shift;
 +my $output = "";
 +while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
 +if (!$output) {
 +    $output = "-";
 +}
 +
 +my ($xlate, $dir);
 +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
 +( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
 +( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
 +die "can't locate ppc-xlate.pl";
 +
 +open OUT,"| \"$^X\" $xlate $flavour $output";
 +*STDOUT=*OUT;
 +
 +my $code = "";
 +
 +my ($sp, $outp, $savelr, $savesp) = ("r1", "r3", "r10", "r12");
 +
 +my $vzero = "v32";
 +
 +sub startproc($)
 +{
 +    my ($name) = @_;
 +
 +    $code.=<<___;
 +    .globl ${name}
 +    .align 5
 +${name}:
 +
 +___
 +}
 +
 +sub endproc($)
 +{
 +    my ($name) = @_;
 +
 +    $code.=<<___;
 +    blr
 +        .size ${name},.-${name}
 +
 +___
 +}
 +
 +
 +sub push_vrs($$)
 +{
 +    my ($min, $max) = @_;
 +
 +    my $count = $max - $min + 1;
 +
 +    $code.=<<___;
 +    mr      $savesp,$sp
 +    stdu        $sp,-16*`$count+1`($sp)
 +
 +___
 +        for (my $i = $min; $i <= $max; $i++) {
 +            my $mult = $max - $i + 1;
 +            $code.=<<___;
 +    stxv        $i,-16*$mult($savesp)
 +___
 +
 +    }
 +
 +    $code.=<<___;
 +
 +___
 +}
 +
 +sub pop_vrs($$)
 +{
 +    my ($min, $max) = @_;
 +
 +    $code.=<<___;
 +    ld      $savesp,0($sp)
 +___
 +    for (my $i = $min; $i <= $max; $i++) {
 +        my $mult = $max - $i + 1;
 +        $code.=<<___;
 +    lxv     $i,-16*$mult($savesp)
 +___
 +    }
 +
 +    $code.=<<___;
 +    mr      $sp,$savesp
 +
 +___
 +}
 +
 +sub load_vrs($$)
 +{
 +    my ($pointer, $reg_list) = @_;
 +
 +    for (my $i = 0; $i <= 6; $i++) {
 +        my $offset = $i * 8;
 +        $code.=<<___;
 +    lxsd        $reg_list->[$i],$offset($pointer)
 +___
 +    }
 +
 +    $code.=<<___;
 +
 +___
 +}
 +
 +sub store_vrs($$)
 +{
 +    my ($pointer, $reg_list) = @_;
 +
 +    for (my $i = 0; $i <= 12; $i++) {
 +        my $offset = $i * 16;
 +        $code.=<<___;
 +    stxv        $reg_list->[$i],$offset($pointer)
 +___
 +    }
 +
 +    $code.=<<___;
 +
 +___
 +}
 +
 +$code.=<<___;
 +.machine    "any"
 +.text
 +
 +___
 +
 +{
 +    # mul/square common
 +    my ($t1, $t2, $t3, $t4) = ("v33", "v34", "v42", "v43");
 +    my ($zero, $one) = ("r8", "r9");
 +    my $out = "v51";
 +
 +    {
 +        #
 +        # p384_felem_mul
 +        #
 +
 +        my ($in1p, $in2p) = ("r4", "r5");
 +        my @in1 = map("v$_",(44..50));
 +        my @in2 = map("v$_",(35..41));
 +
 +        startproc("p384_felem_mul");
 +
 +        push_vrs(52, 63);
 +
 +        $code.=<<___;
 +    vspltisw    $vzero,0
 +
 +___
 +
 +        load_vrs($in1p, \@in1);
 +        load_vrs($in2p, \@in2);
 +
 +        $code.=<<___;
 +    vmsumudm    $out,$in1[0],$in2[0],$vzero
 +    stxv        $out,0($outp)
 +
 +    xxpermdi    $t1,$in1[0],$in1[1],0b00
 +    xxpermdi    $t2,$in2[1],$in2[0],0b00
 +    vmsumudm    $out,$t1,$t2,$vzero
 +    stxv        $out,16($outp)
 +
 +    xxpermdi    $t2,$in2[2],$in2[1],0b00
 +    vmsumudm    $out,$t1,$t2,$vzero
 +    vmsumudm    $out,$in1[2],$in2[0],$out
 +    stxv        $out,32($outp)
 +
 +    xxpermdi    $t2,$in2[1],$in2[0],0b00
 +    xxpermdi    $t3,$in1[2],$in1[3],0b00
 +    xxpermdi    $t4,$in2[3],$in2[2],0b00
 +    vmsumudm    $out,$t1,$t4,$vzero
 +    vmsumudm    $out,$t3,$t2,$out
 +    stxv        $out,48($outp)
 +
 +    xxpermdi    $t2,$in2[4],$in2[3],0b00
 +    xxpermdi    $t4,$in2[2],$in2[1],0b00
 +    vmsumudm    $out,$t1,$t2,$vzero
 +    vmsumudm    $out,$t3,$t4,$out
 +    vmsumudm    $out,$in1[4],$in2[0],$out
 +    stxv        $out,64($outp)
 +
 +    xxpermdi    $t2,$in2[5],$in2[4],0b00
 +    xxpermdi    $t4,$in2[3],$in2[2],0b00
 +    vmsumudm    $out,$t1,$t2,$vzero
 +    vmsumudm    $out,$t3,$t4,$out
 +    xxpermdi    $t4,$in2[1],$in2[0],0b00
 +    xxpermdi    $t1,$in1[4],$in1[5],0b00
 +    vmsumudm    $out,$t1,$t4,$out
 +    stxv        $out,80($outp)
 +
 +    xxpermdi    $t1,$in1[0],$in1[1],0b00
 +    xxpermdi    $t2,$in2[6],$in2[5],0b00
 +    xxpermdi    $t4,$in2[4],$in2[3],0b00
 +    vmsumudm    $out,$t1,$t2,$vzero
 +    vmsumudm    $out,$t3,$t4,$out
 +    xxpermdi    $t2,$in2[2],$in2[1],0b00
 +    xxpermdi    $t1,$in1[4],$in1[5],0b00
 +    vmsumudm    $out,$t1,$t2,$out
 +    vmsumudm    $out,$in1[6],$in2[0],$out
 +    stxv        $out,96($outp)
 +
 +    xxpermdi    $t1,$in1[1],$in1[2],0b00
 +    xxpermdi    $t2,$in2[6],$in2[5],0b00
 +    xxpermdi    $t3,$in1[3],$in1[4],0b00
 +    vmsumudm    $out,$t1,$t2,$vzero
 +    vmsumudm    $out,$t3,$t4,$out
 +    xxpermdi    $t3,$in2[2],$in2[1],0b00
 +    xxpermdi    $t1,$in1[5],$in1[6],0b00
 +    vmsumudm    $out,$t1,$t3,$out
 +    stxv        $out,112($outp)
 +
 +    xxpermdi    $t1,$in1[2],$in1[3],0b00
 +    xxpermdi    $t3,$in1[4],$in1[5],0b00
 +    vmsumudm    $out,$t1,$t2,$vzero
 +    vmsumudm    $out,$t3,$t4,$out
 +    vmsumudm    $out,$in1[6],$in2[2],$out
 +    stxv        $out,128($outp)
 +
 +    xxpermdi    $t1,$in1[3],$in1[4],0b00
 +    vmsumudm    $out,$t1,$t2,$vzero
 +    xxpermdi    $t1,$in1[5],$in1[6],0b00
 +    vmsumudm    $out,$t1,$t4,$out
 +    stxv        $out,144($outp)
 +
 +    vmsumudm    $out,$t3,$t2,$vzero
 +    vmsumudm    $out,$in1[6],$in2[4],$out
 +    stxv        $out,160($outp)
 +
 +    vmsumudm    $out,$t1,$t2,$vzero
 +    stxv        $out,176($outp)
 +
 +    vmsumudm    $out,$in1[6],$in2[6],$vzero
 +    stxv        $out,192($outp)
 +___
 +
 +        endproc("p384_felem_mul");
 +    }
 +
 +    {
 +        #
 +        # p384_felem_square
 +        #
 +
 +        my ($inp) = ("r4");
 +        my @in = map("v$_",(44..50));
 +        my @inx2 = map("v$_",(35..41));
 +
 +        startproc("p384_felem_square");
 +
 +        push_vrs(52, 63);
 +
 +        $code.=<<___;
 +    vspltisw    $vzero,0
 +
 +___
 +
 +        load_vrs($inp, \@in);
 +
 +        $code.=<<___;
 +    li        $zero,0
 +    li        $one,1
 +    mtvsrdd        $t1,$one,$zero
 +___
 +
 +        for (my $i = 0; $i <= 6; $i++) {
 +            $code.=<<___;
 +    vsld        $inx2[$i],$in[$i],$t1
 +___
 +        }
 +
 +        $code.=<<___;
 +    vmsumudm    $out,$in[0],$in[0],$vzero
 +    stxv        $out,0($outp)
 +
 +    vmsumudm    $out,$in[0],$inx2[1],$vzero
 +    stxv        $out,16($outp)
 +
 +    vmsumudm    $out,$in[0],$inx2[2],$vzero
 +    vmsumudm    $out,$in[1],$in[1],$out
 +    stxv        $out,32($outp)
 +
 +    xxpermdi    $t1,$in[0],$in[1],0b00
 +    xxpermdi    $t2,$inx2[3],$inx2[2],0b00
 +    vmsumudm    $out,$t1,$t2,$vzero
 +    stxv        $out,48($outp)
 +
 +    xxpermdi    $t4,$inx2[4],$inx2[3],0b00
 +    vmsumudm    $out,$t1,$t4,$vzero
 +    vmsumudm    $out,$in[2],$in[2],$out
 +    stxv        $out,64($outp)
 +
 +    xxpermdi    $t2,$inx2[5],$inx2[4],0b00
 +    vmsumudm    $out,$t1,$t2,$vzero
 +    vmsumudm    $out,$in[2],$inx2[3],$out
 +    stxv        $out,80($outp)
 +
 +    xxpermdi    $t2,$inx2[6],$inx2[5],0b00
 +    vmsumudm    $out,$t1,$t2,$vzero
 +    vmsumudm    $out,$in[2],$inx2[4],$out
 +    vmsumudm    $out,$in[3],$in[3],$out
 +    stxv        $out,96($outp)
 +
 +    xxpermdi    $t3,$in[1],$in[2],0b00
 +    vmsumudm    $out,$t3,$t2,$vzero
 +    vmsumudm    $out,$in[3],$inx2[4],$out
 +    stxv        $out,112($outp)
 +
 +    xxpermdi    $t1,$in[2],$in[3],0b00
 +    vmsumudm    $out,$t1,$t2,$vzero
 +    vmsumudm    $out,$in[4],$in[4],$out
 +    stxv        $out,128($outp)
 +
 +    xxpermdi    $t1,$in[3],$in[4],0b00
 +    vmsumudm    $out,$t1,$t2,$vzero
 +    stxv        $out,144($outp)
 +
 +    vmsumudm    $out,$in[4],$inx2[6],$vzero
 +    vmsumudm    $out,$in[5],$in[5],$out
 +    stxv        $out,160($outp)
 +
 +    vmsumudm    $out,$in[5],$inx2[6],$vzero
 +    stxv        $out,176($outp)
 +
 +    vmsumudm    $out,$in[6],$in[6],$vzero
 +    stxv        $out,192($outp)
 +___
 +
 +        endproc("p384_felem_square");
 +    }
 +}
 +
 +$code =~ s/\`([^\`]*)\`/eval $1/gem;
 +print $code;
 +close STDOUT or die "error closing STDOUT: $!";
 --- a/crypto/ec/build.info
 +++ b/crypto/ec/build.info
@@ -31,6 +31,8 @@ GENERATE[ecp_nistz256-armv8.S]=asm/ecp_n
 INCLUDE[ecp_nistz256-armv8.o]=..
 GENERATE[ecp_nistz256-ppc64.s]=asm/ecp_nistz256-ppc64.pl $(PERLASM_SCHEME)
 +GENERATE[ecp_nistp384-ppc64.s]=asm/ecp_nistp384-ppc64.pl $(PERLASM_SCHEME)
 +INCLUDE[ecp_nistp384.o]=..
 GENERATE[ecp_nistp521-ppc64.s]=asm/ecp_nistp521-ppc64.pl $(PERLASM_SCHEME)
 GENERATE[x25519-x86_64.s]=asm/x25519-x86_64.pl $(PERLASM_SCHEME)
 --- a/crypto/ec/ecp_nistp384.c
 +++ b/crypto/ec/ecp_nistp384.c
@@ -691,6 +691,15 @@ void p384_felem_mul(widefelem out, const
 static void felem_select(void)
 {
 +# if defined(_ARCH_PPC64)
 +    if ((OPENSSL_ppccap_P & PPC_MADD300) && (OPENSSL_ppccap_P & PPC_ALTIVEC)) {
 +        felem_square_p = p384_felem_square;
 +        felem_mul_p = p384_felem_mul;
 +
 +        return;
 +    }
 +# endif
 +
     /* Default */
     felem_square_p = felem_square_ref;
     felem_mul_p = felem_mul_ref;
--- a/openssl-ecc-Remove-extraneous-parentheses-in-secp384r1.patch
+++ b/openssl-ecc-Remove-extraneous-parentheses-in-secp384r1.patch
@ -0,0 +1,76 @@
 From 670e73d9084465384b11ef24802ca4a313e1d2f4 Mon Sep 17 00:00:00 2001
 From: Rohan McLure <rohanmclure@linux.ibm.com>
 Date: Tue, 15 Aug 2023 15:20:20 +1000
 Subject: [PATCH] ecc: Remove extraneous parentheses in secp384r1
 Substitutions in the felem_reduce() method feature unecessary
 parentheses, remove them.
 Signed-off-by: Rohan McLure <rohan.mclure@linux.ibm.com>
 Reviewed-by: Tomas Mraz <tomas@openssl.org>
 Reviewed-by: Shane Lontis <shane.lontis@oracle.com>
 Reviewed-by: Hugo Landau <hlandau@openssl.org>
 (Merged from https://github.com/openssl/openssl/pull/21749)
 ---
 crypto/ec/ecp_nistp384.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)
 diff --git a/crypto/ec/ecp_nistp384.c b/crypto/ec/ecp_nistp384.c
 index 14f9530d07c6..ff68f9cc7ad0 100644
 --- a/crypto/ec/ecp_nistp384.c
 +++ b/crypto/ec/ecp_nistp384.c
@@ -540,7 +540,7 @@ static void felem_reduce(felem out, const widefelem in)
     acc[7] += in[12] >> 8;
     acc[6] += (in[12] & 0xff) << 48;
     acc[6] -= in[12] >> 16;
 -    acc[5] -= ((in[12] & 0xffff) << 40);
 +    acc[5] -= (in[12] & 0xffff) << 40;
     acc[6] += in[12] >> 48;
     acc[5] += (in[12] & 0xffffffffffff) << 8;
@@ -549,7 +549,7 @@ static void felem_reduce(felem out, const widefelem in)
     acc[6] += in[11] >> 8;
     acc[5] += (in[11] & 0xff) << 48;
     acc[5] -= in[11] >> 16;
 -    acc[4] -= ((in[11] & 0xffff) << 40);
 +    acc[4] -= (in[11] & 0xffff) << 40;
     acc[5] += in[11] >> 48;
     acc[4] += (in[11] & 0xffffffffffff) << 8;
@@ -558,7 +558,7 @@ static void felem_reduce(felem out, const widefelem in)
     acc[5] += in[10] >> 8;
     acc[4] += (in[10] & 0xff) << 48;
     acc[4] -= in[10] >> 16;
 -    acc[3] -= ((in[10] & 0xffff) << 40);
 +    acc[3] -= (in[10] & 0xffff) << 40;
     acc[4] += in[10] >> 48;
     acc[3] += (in[10] & 0xffffffffffff) << 8;
@@ -567,7 +567,7 @@ static void felem_reduce(felem out, const widefelem in)
     acc[4] += in[9] >> 8;
     acc[3] += (in[9] & 0xff) << 48;
     acc[3] -= in[9] >> 16;
 -    acc[2] -= ((in[9] & 0xffff) << 40);
 +    acc[2] -= (in[9] & 0xffff) << 40;
     acc[3] += in[9] >> 48;
     acc[2] += (in[9] & 0xffffffffffff) << 8;
@@ -582,7 +582,7 @@ static void felem_reduce(felem out, const widefelem in)
     acc[3] += acc[8] >> 8;
     acc[2] += (acc[8] & 0xff) << 48;
     acc[2] -= acc[8] >> 16;
 -    acc[1] -= ((acc[8] & 0xffff) << 40);
 +    acc[1] -= (acc[8] & 0xffff) << 40;
     acc[2] += acc[8] >> 48;
     acc[1] += (acc[8] & 0xffffffffffff) << 8;
@@ -591,7 +591,7 @@ static void felem_reduce(felem out, const widefelem in)
     acc[2] += acc[7] >> 8;
     acc[1] += (acc[7] & 0xff) << 48;
     acc[1] -= acc[7] >> 16;
 -    acc[0] -= ((acc[7] & 0xffff) << 40);
 +    acc[0] -= (acc[7] & 0xffff) << 40;
     acc[1] += acc[7] >> 48;
     acc[0] += (acc[7] & 0xffffffffffff) << 8;
--- a/openssl-powerpc-ecc-Fix-stack-allocation-secp384r1-asm.patch
+++ b/openssl-powerpc-ecc-Fix-stack-allocation-secp384r1-asm.patch
@ -0,0 +1,96 @@
 From 50f8b936b00dc18ce1f622a7a6aa46daf03da48b Mon Sep 17 00:00:00 2001
 From: Rohan McLure <rohanmclure@linux.ibm.com>
 Date: Wed, 16 Aug 2023 16:52:47 +1000
 Subject: [PATCH] powerpc: ecc: Fix stack allocation secp384r1 asm
 Assembly acceleration secp384r1 opts to not use any callee-save VSRs, as
 VSX enabled systems make extensive use of renaming, and so writebacks in
 felem_{mul,square}() can be reordered for best cache effects.
 Remove stack allocations. This in turn fixes unmatched push/pops in
 felem_{mul,square}().
 Signed-off-by: Rohan McLure <rohan.mclure@linux.ibm.com>
 Reviewed-by: Tomas Mraz <tomas@openssl.org>
 Reviewed-by: Shane Lontis <shane.lontis@oracle.com>
 Reviewed-by: Hugo Landau <hlandau@openssl.org>
 (Merged from https://github.com/openssl/openssl/pull/21749)
 ---
 crypto/ec/asm/ecp_nistp384-ppc64.pl | 49 -----------------------------
 1 file changed, 49 deletions(-)
 diff --git a/crypto/ec/asm/ecp_nistp384-ppc64.pl b/crypto/ec/asm/ecp_nistp384-ppc64.pl
 index 3f86b391af69..28f4168e5218 100755
 --- a/crypto/ec/asm/ecp_nistp384-ppc64.pl
 +++ b/crypto/ec/asm/ecp_nistp384-ppc64.pl
@@ -62,51 +62,6 @@ ($)
 ___
 }
 -
 -sub push_vrs($$)
 -{
 -    my ($min, $max) = @_;
 -
 -    my $count = $max - $min + 1;
 -
 -    $code.=<<___;
 -    mr      $savesp,$sp
 -    stdu        $sp,-16*`$count+1`($sp)
 -
 -___
 -        for (my $i = $min; $i <= $max; $i++) {
 -            my $mult = $max - $i + 1;
 -            $code.=<<___;
 -    stxv        $i,-16*$mult($savesp)
 -___
 -
 -    }
 -
 -    $code.=<<___;
 -
 -___
 -}
 -
 -sub pop_vrs($$)
 -{
 -    my ($min, $max) = @_;
 -
 -    $code.=<<___;
 -    ld      $savesp,0($sp)
 -___
 -    for (my $i = $min; $i <= $max; $i++) {
 -        my $mult = $max - $i + 1;
 -        $code.=<<___;
 -    lxv     $i,-16*$mult($savesp)
 -___
 -    }
 -
 -    $code.=<<___;
 -    mr      $sp,$savesp
 -
 -___
 -}
 -
 sub load_vrs($$)
 {
     my ($pointer, $reg_list) = @_;
@@ -162,8 +117,6 @@ ($$)
         startproc("p384_felem_mul");
 -        push_vrs(52, 63);
 -
         $code.=<<___;
     vspltisw    $vzero,0
@@ -268,8 +221,6 @@ ($$)
         startproc("p384_felem_square");
 -        push_vrs(52, 63);
 -
         $code.=<<___;
     vspltisw    $vzero,0