openssl-3/openssl-3-hw-acceleration-aes-xts-s390x.patch

328 lines
11 KiB
Diff

commit 9cd4051e47c8da8398f93f42f0f56750552965f4
Author: Holger Dengler <dengler@linux.ibm.com>
Date: Tue Aug 6 14:00:49 2024 +0200
s390x: Add hardware acceleration for full AES-XTS
The CPACF instruction KM provides support for accelerating the full
AES-XTS algorithm on newer machines for AES_XTS_128 and AES_XTS_256.
Preliminary measurements showed performance improvements of up to 50%,
dependent on the message size.
Signed-off-by: Holger Dengler <dengler@linux.ibm.com>
Reviewed-by: Tomas Mraz <tomas@openssl.org>
Reviewed-by: Paul Dale <pauli@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/25414)
diff --git a/providers/implementations/ciphers/build.info b/providers/implementations/ciphers/build.info
index 5eb705969f..1837070c21 100644
--- a/providers/implementations/ciphers/build.info
+++ b/providers/implementations/ciphers/build.info
@@ -71,6 +71,19 @@ IF[{- !$disabled{asm} -}]
ENDIF
ENDIF
+IF[{- !$disabled{asm} -}]
+ IF[{- ($target{perlasm_scheme} // '') ne '31' -}]
+ $AESXTSDEF_s390x=AES_XTS_S390X
+ ENDIF
+
+ # Now that we have defined all the arch specific variables, use the
+ # appropriate one, and define the appropriate macros
+
+ IF[$AESXTSDEF_{- $target{asm_arch} -}]
+ $AESXTSDEF=$AESXTSDEF_{- $target{asm_arch} -}
+ ENDIF
+ENDIF
+
# This source is common building blocks for all ciphers in all our providers.
SOURCE[$COMMON_GOAL]=\
ciphercommon.c ciphercommon_hw.c ciphercommon_block.c \
@@ -93,6 +106,7 @@ SOURCE[$AES_GOAL]=\
cipher_aes_cbc_hmac_sha.c \
cipher_aes_cbc_hmac_sha256_hw.c cipher_aes_cbc_hmac_sha1_hw.c \
cipher_cts.c
+DEFINE[$AES_GOAL]=$AESXTSDEF
# Extra code to satisfy the FIPS and non-FIPS separation.
# When the AES-xxx-XTS moves to legacy, cipher_aes_xts_fips.c can be removed.
diff --git a/providers/implementations/ciphers/cipher_aes_xts.c b/providers/implementations/ciphers/cipher_aes_xts.c
index cce2537ea7..2287834d62 100644
--- a/providers/implementations/ciphers/cipher_aes_xts.c
+++ b/providers/implementations/ciphers/cipher_aes_xts.c
@@ -62,6 +62,10 @@ static int aes_xts_check_keys_differ(const unsigned char *key, size_t bytes,
return 1;
}
+#ifdef AES_XTS_S390X
+# include "cipher_aes_xts_s390x.inc"
+#endif
+
/*-
* Provider dispatch functions
*/
@@ -98,6 +102,10 @@ static int aes_xts_einit(void *vctx, const unsigned char *key, size_t keylen,
const unsigned char *iv, size_t ivlen,
const OSSL_PARAM params[])
{
+#ifdef AES_XTS_S390X
+ if (s390x_aes_xts_einit(vctx, key, keylen, iv, ivlen, params) == 1)
+ return 1;
+#endif
return aes_xts_init(vctx, key, keylen, iv, ivlen, params, 1);
}
@@ -105,6 +113,10 @@ static int aes_xts_dinit(void *vctx, const unsigned char *key, size_t keylen,
const unsigned char *iv, size_t ivlen,
const OSSL_PARAM params[])
{
+#ifdef AES_XTS_S390X
+ if (s390x_aes_xts_dinit(vctx, key, keylen, iv, ivlen, params) == 1)
+ return 1;
+#endif
return aes_xts_init(vctx, key, keylen, iv, ivlen, params, 0);
}
@@ -137,6 +149,11 @@ static void *aes_xts_dupctx(void *vctx)
if (!ossl_prov_is_running())
return NULL;
+#ifdef AES_XTS_S390X
+ if (in->plat.s390x.fc)
+ return s390x_aes_xts_dupctx(vctx);
+#endif
+
if (in->xts.key1 != NULL) {
if (in->xts.key1 != &in->ks1)
return NULL;
@@ -157,6 +174,11 @@ static int aes_xts_cipher(void *vctx, unsigned char *out, size_t *outl,
{
PROV_AES_XTS_CTX *ctx = (PROV_AES_XTS_CTX *)vctx;
+#ifdef AES_XTS_S390X
+ if (ctx->plat.s390x.fc)
+ return s390x_aes_xts_cipher(vctx, out, outl, outsize, in, inl);
+#endif
+
if (!ossl_prov_is_running()
|| ctx->xts.key1 == NULL
|| ctx->xts.key2 == NULL
diff --git a/providers/implementations/ciphers/cipher_aes_xts.h b/providers/implementations/ciphers/cipher_aes_xts.h
index afc42ef444..56891ca98c 100644
--- a/providers/implementations/ciphers/cipher_aes_xts.h
+++ b/providers/implementations/ciphers/cipher_aes_xts.h
@@ -22,6 +22,14 @@ PROV_CIPHER_FUNC(void, xts_stream,
const AES_KEY *key1, const AES_KEY *key2,
const unsigned char iv[16]));
+#if defined(OPENSSL_CPUID_OBJ) && defined(__s390__)
+typedef struct S390X_km_xts_params_st {
+ unsigned char key[64];
+ unsigned char tweak[16];
+ unsigned char nap[16];
+} S390X_KM_XTS_PARAMS;
+#endif
+
typedef struct prov_aes_xts_ctx_st {
PROV_CIPHER_CTX base; /* Must be first */
union {
@@ -30,6 +38,23 @@ typedef struct prov_aes_xts_ctx_st {
} ks1, ks2; /* AES key schedules to use */
XTS128_CONTEXT xts;
OSSL_xts_stream_fn stream;
+
+ /* Platform specific data */
+ union {
+ int dummy;
+#if defined(OPENSSL_CPUID_OBJ) && defined(__s390__)
+ struct {
+ union {
+ OSSL_UNION_ALIGN;
+ S390X_KM_XTS_PARAMS km;
+ } param;
+ size_t offset;
+ unsigned int fc;
+ unsigned int iv_set : 1;
+ unsigned int key_set : 1;
+ } s390x;
+#endif
+ } plat;
} PROV_AES_XTS_CTX;
const PROV_CIPHER_HW *ossl_prov_cipher_hw_aes_xts(size_t keybits);
diff --git a/providers/implementations/ciphers/cipher_aes_xts_s390x.inc b/providers/implementations/ciphers/cipher_aes_xts_s390x.inc
new file mode 100644
index 0000000000..77341b3bbd
--- /dev/null
+++ b/providers/implementations/ciphers/cipher_aes_xts_s390x.inc
@@ -0,0 +1,167 @@
+/*
+ * Copyright 2024 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License 2.0 (the "License"). You may not use
+ * this file except in compliance with the License. You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+#include "crypto/s390x_arch.h"
+
+static OSSL_FUNC_cipher_encrypt_init_fn s390x_aes_xts_einit;
+static OSSL_FUNC_cipher_decrypt_init_fn s390x_aes_xts_dinit;
+static OSSL_FUNC_cipher_cipher_fn s390x_aes_xts_cipher;
+static OSSL_FUNC_cipher_dupctx_fn s390x_aes_xts_dupctx;
+
+static int s390x_aes_xts_init(void *vctx, const unsigned char *key,
+ size_t keylen, const unsigned char *iv,
+ size_t ivlen, const OSSL_PARAM params[],
+ unsigned int dec)
+{
+ PROV_AES_XTS_CTX *xctx = (PROV_AES_XTS_CTX *)vctx;
+ S390X_KM_XTS_PARAMS *km = &xctx->plat.s390x.param.km;
+ unsigned int fc, offs;
+
+ switch (xctx->base.keylen) {
+ case 128 / 8 * 2:
+ fc = S390X_XTS_AES_128_MSA10;
+ offs = 32;
+ break;
+ case 256 / 8 * 2:
+ fc = S390X_XTS_AES_256_MSA10;
+ offs = 0;
+ break;
+ default:
+ goto not_supported;
+ }
+
+ if (!(OPENSSL_s390xcap_P.km[1] && S390X_CAPBIT(fc)))
+ goto not_supported;
+
+ if (iv != NULL) {
+ if (ivlen != xctx->base.ivlen
+ || ivlen > sizeof(km->tweak)) {
+ ERR_raise(ERR_LIB_PROV, PROV_R_INVALID_IV_LENGTH);
+ return 0;
+ }
+ memcpy(km->tweak, iv, ivlen);
+ xctx->plat.s390x.iv_set = 1;
+ }
+
+ if (key != NULL) {
+ if (keylen != xctx->base.keylen) {
+ ERR_raise(ERR_LIB_PROV, PROV_R_INVALID_KEY_LENGTH);
+ return 0;
+ }
+ if (!aes_xts_check_keys_differ(key, keylen / 2, !dec))
+ return 0;
+
+ memcpy(km->key + offs, key, keylen);
+ xctx->plat.s390x.key_set = 1;
+ }
+
+ xctx->plat.s390x.fc = fc | dec;
+ xctx->plat.s390x.offset = offs;
+
+ memset(km->nap, 0, sizeof(km->nap));
+ km->nap[0] = 0x1;
+
+ return aes_xts_set_ctx_params(xctx, params);
+
+not_supported:
+ xctx->plat.s390x.fc = 0;
+ xctx->plat.s390x.offset = 0;
+ return 0;
+}
+
+static int s390x_aes_xts_einit(void *vctx, const unsigned char *key,
+ size_t keylen, const unsigned char *iv,
+ size_t ivlen, const OSSL_PARAM params[])
+{
+ return s390x_aes_xts_init(vctx, key, keylen, iv, ivlen, params, 0);
+}
+
+static int s390x_aes_xts_dinit(void *vctx, const unsigned char *key,
+ size_t keylen, const unsigned char *iv,
+ size_t ivlen, const OSSL_PARAM params[])
+{
+ return s390x_aes_xts_init(vctx, key, keylen, iv, ivlen, params,
+ S390X_DECRYPT);
+}
+
+static void *s390x_aes_xts_dupctx(void *vctx)
+{
+ PROV_AES_XTS_CTX *in = (PROV_AES_XTS_CTX *)vctx;
+ PROV_AES_XTS_CTX *ret = OPENSSL_zalloc(sizeof(*in));
+
+ if (ret != NULL)
+ *ret = *in;
+
+ return ret;
+}
+
+static int s390x_aes_xts_cipher(void *vctx, unsigned char *out, size_t *outl,
+ size_t outsize, const unsigned char *in,
+ size_t inl)
+{
+ PROV_AES_XTS_CTX *xctx = (PROV_AES_XTS_CTX *)vctx;
+ S390X_KM_XTS_PARAMS *km = &xctx->plat.s390x.param.km;
+ unsigned char *param = (unsigned char *)km + xctx->plat.s390x.offset;
+ unsigned int fc = xctx->plat.s390x.fc;
+ unsigned char tmp[2][AES_BLOCK_SIZE];
+ unsigned char nap_n1[AES_BLOCK_SIZE];
+ unsigned char drop[AES_BLOCK_SIZE];
+ size_t len_incomplete, len_complete;
+
+ if (!ossl_prov_is_running()
+ || inl < AES_BLOCK_SIZE
+ || in == NULL
+ || out == NULL
+ || !xctx->plat.s390x.iv_set
+ || !xctx->plat.s390x.key_set)
+ return 0;
+
+ /*
+ * Impose a limit of 2^20 blocks per data unit as specified by
+ * IEEE Std 1619-2018. The earlier and obsolete IEEE Std 1619-2007
+ * indicated that this was a SHOULD NOT rather than a MUST NOT.
+ * NIST SP 800-38E mandates the same limit.
+ */
+ if (inl > XTS_MAX_BLOCKS_PER_DATA_UNIT * AES_BLOCK_SIZE) {
+ ERR_raise(ERR_LIB_PROV, PROV_R_XTS_DATA_UNIT_IS_TOO_LARGE);
+ return 0;
+ }
+
+ len_incomplete = inl % AES_BLOCK_SIZE;
+ len_complete = (len_incomplete == 0) ? inl :
+ (inl / AES_BLOCK_SIZE - 1) * AES_BLOCK_SIZE;
+
+ if (len_complete > 0)
+ s390x_km(in, len_complete, out, fc, param);
+ if (len_incomplete == 0)
+ goto out;
+
+ memcpy(tmp, in + len_complete, AES_BLOCK_SIZE + len_incomplete);
+ /* swap NAP for decrypt */
+ if (fc & S390X_DECRYPT) {
+ memcpy(nap_n1, km->nap, AES_BLOCK_SIZE);
+ s390x_km(tmp[0], AES_BLOCK_SIZE, drop, fc, param);
+ }
+ s390x_km(tmp[0], AES_BLOCK_SIZE, tmp[0], fc, param);
+ if (fc & S390X_DECRYPT)
+ memcpy(km->nap, nap_n1, AES_BLOCK_SIZE);
+
+ memcpy(tmp[1] + len_incomplete, tmp[0] + len_incomplete,
+ AES_BLOCK_SIZE - len_incomplete);
+ s390x_km(tmp[1], AES_BLOCK_SIZE, out + len_complete, fc, param);
+ memcpy(out + len_complete + AES_BLOCK_SIZE, tmp[0], len_incomplete);
+
+ /* do not expose temporary data */
+ OPENSSL_cleanse(tmp, sizeof(tmp));
+out:
+ memcpy(xctx->base.iv, km->tweak, AES_BLOCK_SIZE);
+ *outl = inl;
+
+ return 1;
+}