From 25f5d7b85f6657cd2f9f1ab7ae87f319d9bafe54 Mon Sep 17 00:00:00 2001 From: Joerg Schmidbauer Date: Thu, 29 Feb 2024 12:50:05 +0100 Subject: [PATCH] s390x: support CPACF sha3/shake performance improvements On newer machines the SHA3/SHAKE performance of CPACF instructions KIMD and KLMD can be enhanced by using additional modifier bits. This allows the application to omit initializing the ICV, but also affects the internal processing of the instructions. Performance is mostly gained when processing short messages. The new CPACF feature is backwards compatible with older machines, i.e. the new modifier bits are ignored on older machines. However, to save the ICV initialization, the application must detect the MSA level and omit the ICV initialization only if this feature is supported. Signed-off-by: Joerg Schmidbauer Reviewed-by: Paul Dale Reviewed-by: Tomas Mraz (Merged from https://github.com/openssl/openssl/pull/25235) --- crypto/s390x_arch.h | 3 ++ crypto/s390xcpuid.pl | 4 +-- crypto/sha/sha3.c | 8 +++++- providers/implementations/digests/sha3_prov.c | 28 +++++++++++++++---- 4 files changed, 34 insertions(+), 9 deletions(-) Index: openssl-3.2.3/crypto/s390x_arch.h =================================================================== --- openssl-3.2.3.orig/crypto/s390x_arch.h +++ openssl-3.2.3/crypto/s390x_arch.h @@ -191,6 +191,9 @@ extern int OPENSSL_s390xcex; # define S390X_KMA_LAAD 0x200 # define S390X_KMA_HS 0x400 # define S390X_KDSA_D 0x80 +# define S390X_KIMD_NIP 0x8000 +# define S390X_KLMD_DUFOP 0x4000 +# define S390X_KLMD_NIP 0x8000 # define S390X_KLMD_PS 0x100 # define S390X_KMAC_IKP 0x8000 # define S390X_KMAC_IIMP 0x4000 Index: openssl-3.2.3/crypto/s390xcpuid.pl =================================================================== --- openssl-3.2.3.orig/crypto/s390xcpuid.pl +++ openssl-3.2.3/crypto/s390xcpuid.pl @@ -308,7 +308,7 @@ s390x_kimd: llgfr %r0,$fc lgr %r1,$param - .long 0xb93e0002 # kimd %r0,%r2 + .long 0xb93e8002 # kimd %r0,%r2[,M3] brc 1,.-4 # pay attention to "partial completion" br $ra @@ -329,7 +329,7 @@ s390x_klmd: llgfr %r0,$fc l${g} %r1,$stdframe($sp) - .long 0xb93f0042 # klmd %r4,%r2 + .long 0xb93f8042 # klmd %r4,%r2[,M3] brc 1,.-4 # pay attention to "partial completion" br $ra Index: openssl-3.2.3/crypto/sha/sha3.c =================================================================== --- openssl-3.2.3.orig/crypto/sha/sha3.c +++ openssl-3.2.3/crypto/sha/sha3.c @@ -8,13 +8,19 @@ */ #include +#if defined(__s390x__) && defined(OPENSSL_CPUID_OBJ) +# include "crypto/s390x_arch.h" +#endif #include "internal/sha3.h" void SHA3_squeeze(uint64_t A[5][5], unsigned char *out, size_t len, size_t r, int next); void ossl_sha3_reset(KECCAK1600_CTX *ctx) { - memset(ctx->A, 0, sizeof(ctx->A)); +#if defined(__s390x__) && defined(OPENSSL_CPUID_OBJ) + if (!(OPENSSL_s390xcap_P.stfle[1] & S390X_CAPBIT(S390X_MSA12))) +#endif + memset(ctx->A, 0, sizeof(ctx->A)); ctx->bufsz = 0; ctx->xof_state = XOF_STATE_INIT; } Index: openssl-3.2.3/providers/implementations/digests/sha3_prov.c =================================================================== --- openssl-3.2.3.orig/providers/implementations/digests/sha3_prov.c +++ openssl-3.2.3/providers/implementations/digests/sha3_prov.c @@ -187,26 +187,32 @@ static size_t s390x_sha3_absorb(void *vc { KECCAK1600_CTX *ctx = vctx; size_t rem = len % ctx->block_size; + unsigned int fc; if (!(ctx->xof_state == XOF_STATE_INIT || ctx->xof_state == XOF_STATE_ABSORB)) return 0; + fc = ctx->pad; + fc |= ctx->xof_state == XOF_STATE_INIT ? S390X_KIMD_NIP : 0; ctx->xof_state = XOF_STATE_ABSORB; - s390x_kimd(inp, len - rem, ctx->pad, ctx->A); + s390x_kimd(inp, len - rem, fc, ctx->A); return rem; } static int s390x_sha3_final(void *vctx, unsigned char *out, size_t outlen) { KECCAK1600_CTX *ctx = vctx; + unsigned int fc; if (!ossl_prov_is_running()) return 0; if (!(ctx->xof_state == XOF_STATE_INIT || ctx->xof_state == XOF_STATE_ABSORB)) return 0; + fc = ctx->pad | S390X_KLMD_DUFOP; + fc |= ctx->xof_state == XOF_STATE_INIT ? S390X_KLMD_NIP : 0; ctx->xof_state = XOF_STATE_FINAL; - s390x_klmd(ctx->buf, ctx->bufsz, NULL, 0, ctx->pad, ctx->A); + s390x_klmd(ctx->buf, ctx->bufsz, NULL, 0, fc, ctx->A); memcpy(out, ctx->A, outlen); return 1; } @@ -214,14 +220,17 @@ static int s390x_sha3_final(void *vctx, static int s390x_shake_final(void *vctx, unsigned char *out, size_t outlen) { KECCAK1600_CTX *ctx = vctx; + unsigned int fc; if (!ossl_prov_is_running()) return 0; if (!(ctx->xof_state == XOF_STATE_INIT || ctx->xof_state == XOF_STATE_ABSORB)) return 0; + fc = ctx->pad | S390X_KLMD_DUFOP; + fc |= ctx->xof_state == XOF_STATE_INIT ? S390X_KLMD_NIP : 0; ctx->xof_state = XOF_STATE_FINAL; - s390x_klmd(ctx->buf, ctx->bufsz, out, outlen, ctx->pad, ctx->A); + s390x_klmd(ctx->buf, ctx->bufsz, out, outlen, fc, ctx->A); return 1; } @@ -271,24 +280,28 @@ static int s390x_keccakc_final(void *vct size_t bsz = ctx->block_size; size_t num = ctx->bufsz; size_t needed = outlen; + unsigned int fc; if (!ossl_prov_is_running()) return 0; if (!(ctx->xof_state == XOF_STATE_INIT || ctx->xof_state == XOF_STATE_ABSORB)) return 0; + fc = ctx->pad; + fc |= ctx->xof_state == XOF_STATE_INIT ? S390X_KIMD_NIP : 0; ctx->xof_state = XOF_STATE_FINAL; if (outlen == 0) return 1; memset(ctx->buf + num, 0, bsz - num); ctx->buf[num] = padding; ctx->buf[bsz - 1] |= 0x80; - s390x_kimd(ctx->buf, bsz, ctx->pad, ctx->A); + s390x_kimd(ctx->buf, bsz, fc, ctx->A); num = needed > bsz ? bsz : needed; memcpy(out, ctx->A, num); needed -= num; if (needed > 0) - s390x_klmd(NULL, 0, out + bsz, needed, ctx->pad | S390X_KLMD_PS, ctx->A); + s390x_klmd(NULL, 0, out + bsz, needed, + ctx->pad | S390X_KLMD_PS | S390X_KLMD_DUFOP, ctx->A); return 1; } @@ -308,6 +321,7 @@ static int s390x_keccakc_squeeze(void *v { KECCAK1600_CTX *ctx = vctx; size_t len; + unsigned int fc; if (!ossl_prov_is_running()) return 0; @@ -323,7 +337,9 @@ static int s390x_keccakc_squeeze(void *v memset(ctx->buf + ctx->bufsz, 0, len); ctx->buf[ctx->bufsz] = padding; ctx->buf[ctx->block_size - 1] |= 0x80; - s390x_kimd(ctx->buf, ctx->block_size, ctx->pad, ctx->A); + fc = ctx->pad; + fc |= ctx->xof_state == XOF_STATE_INIT ? S390X_KIMD_NIP : 0; + s390x_kimd(ctx->buf, ctx->block_size, fc, ctx->A); ctx->bufsz = 0; /* reuse ctx->bufsz to count bytes squeezed from current sponge */ }