commit 9cd4051e47c8da8398f93f42f0f56750552965f4 Author: Holger Dengler Date: Tue Aug 6 14:00:49 2024 +0200 s390x: Add hardware acceleration for full AES-XTS The CPACF instruction KM provides support for accelerating the full AES-XTS algorithm on newer machines for AES_XTS_128 and AES_XTS_256. Preliminary measurements showed performance improvements of up to 50%, dependent on the message size. Signed-off-by: Holger Dengler Reviewed-by: Tomas Mraz Reviewed-by: Paul Dale (Merged from https://github.com/openssl/openssl/pull/25414) diff --git a/providers/implementations/ciphers/build.info b/providers/implementations/ciphers/build.info index 5eb705969f..1837070c21 100644 --- a/providers/implementations/ciphers/build.info +++ b/providers/implementations/ciphers/build.info @@ -71,6 +71,19 @@ IF[{- !$disabled{asm} -}] ENDIF ENDIF +IF[{- !$disabled{asm} -}] + IF[{- ($target{perlasm_scheme} // '') ne '31' -}] + $AESXTSDEF_s390x=AES_XTS_S390X + ENDIF + + # Now that we have defined all the arch specific variables, use the + # appropriate one, and define the appropriate macros + + IF[$AESXTSDEF_{- $target{asm_arch} -}] + $AESXTSDEF=$AESXTSDEF_{- $target{asm_arch} -} + ENDIF +ENDIF + # This source is common building blocks for all ciphers in all our providers. SOURCE[$COMMON_GOAL]=\ ciphercommon.c ciphercommon_hw.c ciphercommon_block.c \ @@ -93,6 +106,7 @@ SOURCE[$AES_GOAL]=\ cipher_aes_cbc_hmac_sha.c \ cipher_aes_cbc_hmac_sha256_hw.c cipher_aes_cbc_hmac_sha1_hw.c \ cipher_cts.c +DEFINE[$AES_GOAL]=$AESXTSDEF # Extra code to satisfy the FIPS and non-FIPS separation. # When the AES-xxx-XTS moves to legacy, cipher_aes_xts_fips.c can be removed. diff --git a/providers/implementations/ciphers/cipher_aes_xts.c b/providers/implementations/ciphers/cipher_aes_xts.c index cce2537ea7..2287834d62 100644 --- a/providers/implementations/ciphers/cipher_aes_xts.c +++ b/providers/implementations/ciphers/cipher_aes_xts.c @@ -62,6 +62,10 @@ static int aes_xts_check_keys_differ(const unsigned char *key, size_t bytes, return 1; } +#ifdef AES_XTS_S390X +# include "cipher_aes_xts_s390x.inc" +#endif + /*- * Provider dispatch functions */ @@ -98,6 +102,10 @@ static int aes_xts_einit(void *vctx, const unsigned char *key, size_t keylen, const unsigned char *iv, size_t ivlen, const OSSL_PARAM params[]) { +#ifdef AES_XTS_S390X + if (s390x_aes_xts_einit(vctx, key, keylen, iv, ivlen, params) == 1) + return 1; +#endif return aes_xts_init(vctx, key, keylen, iv, ivlen, params, 1); } @@ -105,6 +113,10 @@ static int aes_xts_dinit(void *vctx, const unsigned char *key, size_t keylen, const unsigned char *iv, size_t ivlen, const OSSL_PARAM params[]) { +#ifdef AES_XTS_S390X + if (s390x_aes_xts_dinit(vctx, key, keylen, iv, ivlen, params) == 1) + return 1; +#endif return aes_xts_init(vctx, key, keylen, iv, ivlen, params, 0); } @@ -137,6 +149,11 @@ static void *aes_xts_dupctx(void *vctx) if (!ossl_prov_is_running()) return NULL; +#ifdef AES_XTS_S390X + if (in->plat.s390x.fc) + return s390x_aes_xts_dupctx(vctx); +#endif + if (in->xts.key1 != NULL) { if (in->xts.key1 != &in->ks1) return NULL; @@ -157,6 +174,11 @@ static int aes_xts_cipher(void *vctx, unsigned char *out, size_t *outl, { PROV_AES_XTS_CTX *ctx = (PROV_AES_XTS_CTX *)vctx; +#ifdef AES_XTS_S390X + if (ctx->plat.s390x.fc) + return s390x_aes_xts_cipher(vctx, out, outl, outsize, in, inl); +#endif + if (!ossl_prov_is_running() || ctx->xts.key1 == NULL || ctx->xts.key2 == NULL diff --git a/providers/implementations/ciphers/cipher_aes_xts.h b/providers/implementations/ciphers/cipher_aes_xts.h index afc42ef444..56891ca98c 100644 --- a/providers/implementations/ciphers/cipher_aes_xts.h +++ b/providers/implementations/ciphers/cipher_aes_xts.h @@ -22,6 +22,14 @@ PROV_CIPHER_FUNC(void, xts_stream, const AES_KEY *key1, const AES_KEY *key2, const unsigned char iv[16])); +#if defined(OPENSSL_CPUID_OBJ) && defined(__s390__) +typedef struct S390X_km_xts_params_st { + unsigned char key[64]; + unsigned char tweak[16]; + unsigned char nap[16]; +} S390X_KM_XTS_PARAMS; +#endif + typedef struct prov_aes_xts_ctx_st { PROV_CIPHER_CTX base; /* Must be first */ union { @@ -30,6 +38,23 @@ typedef struct prov_aes_xts_ctx_st { } ks1, ks2; /* AES key schedules to use */ XTS128_CONTEXT xts; OSSL_xts_stream_fn stream; + + /* Platform specific data */ + union { + int dummy; +#if defined(OPENSSL_CPUID_OBJ) && defined(__s390__) + struct { + union { + OSSL_UNION_ALIGN; + S390X_KM_XTS_PARAMS km; + } param; + size_t offset; + unsigned int fc; + unsigned int iv_set : 1; + unsigned int key_set : 1; + } s390x; +#endif + } plat; } PROV_AES_XTS_CTX; const PROV_CIPHER_HW *ossl_prov_cipher_hw_aes_xts(size_t keybits); diff --git a/providers/implementations/ciphers/cipher_aes_xts_s390x.inc b/providers/implementations/ciphers/cipher_aes_xts_s390x.inc new file mode 100644 index 0000000000..77341b3bbd --- /dev/null +++ b/providers/implementations/ciphers/cipher_aes_xts_s390x.inc @@ -0,0 +1,167 @@ +/* + * Copyright 2024 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#include "crypto/s390x_arch.h" + +static OSSL_FUNC_cipher_encrypt_init_fn s390x_aes_xts_einit; +static OSSL_FUNC_cipher_decrypt_init_fn s390x_aes_xts_dinit; +static OSSL_FUNC_cipher_cipher_fn s390x_aes_xts_cipher; +static OSSL_FUNC_cipher_dupctx_fn s390x_aes_xts_dupctx; + +static int s390x_aes_xts_init(void *vctx, const unsigned char *key, + size_t keylen, const unsigned char *iv, + size_t ivlen, const OSSL_PARAM params[], + unsigned int dec) +{ + PROV_AES_XTS_CTX *xctx = (PROV_AES_XTS_CTX *)vctx; + S390X_KM_XTS_PARAMS *km = &xctx->plat.s390x.param.km; + unsigned int fc, offs; + + switch (xctx->base.keylen) { + case 128 / 8 * 2: + fc = S390X_XTS_AES_128_MSA10; + offs = 32; + break; + case 256 / 8 * 2: + fc = S390X_XTS_AES_256_MSA10; + offs = 0; + break; + default: + goto not_supported; + } + + if (!(OPENSSL_s390xcap_P.km[1] && S390X_CAPBIT(fc))) + goto not_supported; + + if (iv != NULL) { + if (ivlen != xctx->base.ivlen + || ivlen > sizeof(km->tweak)) { + ERR_raise(ERR_LIB_PROV, PROV_R_INVALID_IV_LENGTH); + return 0; + } + memcpy(km->tweak, iv, ivlen); + xctx->plat.s390x.iv_set = 1; + } + + if (key != NULL) { + if (keylen != xctx->base.keylen) { + ERR_raise(ERR_LIB_PROV, PROV_R_INVALID_KEY_LENGTH); + return 0; + } + if (!aes_xts_check_keys_differ(key, keylen / 2, !dec)) + return 0; + + memcpy(km->key + offs, key, keylen); + xctx->plat.s390x.key_set = 1; + } + + xctx->plat.s390x.fc = fc | dec; + xctx->plat.s390x.offset = offs; + + memset(km->nap, 0, sizeof(km->nap)); + km->nap[0] = 0x1; + + return aes_xts_set_ctx_params(xctx, params); + +not_supported: + xctx->plat.s390x.fc = 0; + xctx->plat.s390x.offset = 0; + return 0; +} + +static int s390x_aes_xts_einit(void *vctx, const unsigned char *key, + size_t keylen, const unsigned char *iv, + size_t ivlen, const OSSL_PARAM params[]) +{ + return s390x_aes_xts_init(vctx, key, keylen, iv, ivlen, params, 0); +} + +static int s390x_aes_xts_dinit(void *vctx, const unsigned char *key, + size_t keylen, const unsigned char *iv, + size_t ivlen, const OSSL_PARAM params[]) +{ + return s390x_aes_xts_init(vctx, key, keylen, iv, ivlen, params, + S390X_DECRYPT); +} + +static void *s390x_aes_xts_dupctx(void *vctx) +{ + PROV_AES_XTS_CTX *in = (PROV_AES_XTS_CTX *)vctx; + PROV_AES_XTS_CTX *ret = OPENSSL_zalloc(sizeof(*in)); + + if (ret != NULL) + *ret = *in; + + return ret; +} + +static int s390x_aes_xts_cipher(void *vctx, unsigned char *out, size_t *outl, + size_t outsize, const unsigned char *in, + size_t inl) +{ + PROV_AES_XTS_CTX *xctx = (PROV_AES_XTS_CTX *)vctx; + S390X_KM_XTS_PARAMS *km = &xctx->plat.s390x.param.km; + unsigned char *param = (unsigned char *)km + xctx->plat.s390x.offset; + unsigned int fc = xctx->plat.s390x.fc; + unsigned char tmp[2][AES_BLOCK_SIZE]; + unsigned char nap_n1[AES_BLOCK_SIZE]; + unsigned char drop[AES_BLOCK_SIZE]; + size_t len_incomplete, len_complete; + + if (!ossl_prov_is_running() + || inl < AES_BLOCK_SIZE + || in == NULL + || out == NULL + || !xctx->plat.s390x.iv_set + || !xctx->plat.s390x.key_set) + return 0; + + /* + * Impose a limit of 2^20 blocks per data unit as specified by + * IEEE Std 1619-2018. The earlier and obsolete IEEE Std 1619-2007 + * indicated that this was a SHOULD NOT rather than a MUST NOT. + * NIST SP 800-38E mandates the same limit. + */ + if (inl > XTS_MAX_BLOCKS_PER_DATA_UNIT * AES_BLOCK_SIZE) { + ERR_raise(ERR_LIB_PROV, PROV_R_XTS_DATA_UNIT_IS_TOO_LARGE); + return 0; + } + + len_incomplete = inl % AES_BLOCK_SIZE; + len_complete = (len_incomplete == 0) ? inl : + (inl / AES_BLOCK_SIZE - 1) * AES_BLOCK_SIZE; + + if (len_complete > 0) + s390x_km(in, len_complete, out, fc, param); + if (len_incomplete == 0) + goto out; + + memcpy(tmp, in + len_complete, AES_BLOCK_SIZE + len_incomplete); + /* swap NAP for decrypt */ + if (fc & S390X_DECRYPT) { + memcpy(nap_n1, km->nap, AES_BLOCK_SIZE); + s390x_km(tmp[0], AES_BLOCK_SIZE, drop, fc, param); + } + s390x_km(tmp[0], AES_BLOCK_SIZE, tmp[0], fc, param); + if (fc & S390X_DECRYPT) + memcpy(km->nap, nap_n1, AES_BLOCK_SIZE); + + memcpy(tmp[1] + len_incomplete, tmp[0] + len_incomplete, + AES_BLOCK_SIZE - len_incomplete); + s390x_km(tmp[1], AES_BLOCK_SIZE, out + len_complete, fc, param); + memcpy(out + len_complete + AES_BLOCK_SIZE, tmp[0], len_incomplete); + + /* do not expose temporary data */ + OPENSSL_cleanse(tmp, sizeof(tmp)); +out: + memcpy(xctx->base.iv, km->tweak, AES_BLOCK_SIZE); + *outl = inl; + + return 1; +}