218 lines
9.1 KiB
Diff
218 lines
9.1 KiB
Diff
commit 848113a30b431c2fe21ae8de2a366b9b6146fb92
|
||
Author: User <milosprv@gmail.com>
|
||
Date: Wed May 16 13:59:36 2018 -0400
|
||
|
||
bn/bn_exp.c: mitigation of the One-and-Done side-channel attack.
|
||
|
||
The One&Done attack, which is described in a paper to appear in the
|
||
USENIX Security'18 conference, uses EM emanations to recover the values
|
||
of the bits that are obtained using BN_is_bit_set while constructing
|
||
the value of the window in BN_mod_exp_consttime. The EM signal changes
|
||
slightly depending on the value of the bit, and since the lookup of a
|
||
bit is surrounded by highly regular execution (constant-time Montgomery
|
||
multiplications) the attack is able to isolate the (very brief) part of
|
||
the signal that changes depending on the bit. Although the change is
|
||
slight, the attack recovers it successfully >90% of the time on several
|
||
phones and IoT devices (all with ARM processors with clock rates around
|
||
1GHz), so after only one RSA decryption more than 90% of the bits in
|
||
d_p and d_q are recovered correctly, which enables rapid recovery of
|
||
the full RSA key using an algorithm (also described in the paper) that
|
||
modifies the branch-and-prune approach for a situation in which the
|
||
exponents' bits are recovered with errors, i.e. where we do not know
|
||
a priori which bits are correctly recovered.
|
||
|
||
The mitigation for the attack is relatively simple - all the bits of
|
||
the window are obtained at once, along with other bits so that an
|
||
entire integer's worth of bits are obtained together using masking and
|
||
shifts, without unnecessarily considering each bit in isolation. This
|
||
improves performance somewhat (one call to bn_get_bits is faster than
|
||
several calls to BN_is_bit_set), so the attacker now gets one signal
|
||
snippet per window (rather than one per bit) in which the signal is
|
||
affected by all bits in the integer (rather than just the one bit).
|
||
|
||
Reviewed-by: Andy Polyakov <appro@openssl.org>
|
||
Reviewed-by: Rich Salz <rsalz@openssl.org>
|
||
(Merged from https://github.com/openssl/openssl/pull/6276)
|
||
|
||
From 3f0c3d2263cd98dd3bcd366f199f0df7c9887d81 Mon Sep 17 00:00:00 2001
|
||
From: Andy Polyakov <appro@openssl.org>
|
||
Date: Wed, 13 Jun 2018 14:00:04 +0200
|
||
Subject: [PATCH] bn/bn_exp.c: harmonize all code paths with last commit.
|
||
|
||
848113a30b431c2fe21ae8de2a366b9b6146fb92 added mitigation for a
|
||
side-channel attack. This commit extends approach to all code
|
||
paths for consistency.
|
||
|
||
[It also removes redundant white spaces introduced in last commit.]
|
||
|
||
Reviewed-by: Rich Salz <rsalz@openssl.org>
|
||
(Merged from https://github.com/openssl/openssl/pull/6480)
|
||
|
||
diff --git a/crypto/bn/bn_exp.c b/crypto/bn/bn_exp.c
|
||
index 36b7ba6..f96aea2 100644
|
||
--- a/crypto/bn/bn_exp.c
|
||
+++ b/crypto/bn/bn_exp.c
|
||
@@ -586,7 +586,6 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
|
||
return (ret);
|
||
}
|
||
|
||
-#if defined(SPARC_T4_MONT)
|
||
static BN_ULONG bn_get_bits(const BIGNUM *a, int bitpos)
|
||
{
|
||
BN_ULONG ret = 0;
|
||
@@ -605,7 +604,6 @@ static BN_ULONG bn_get_bits(const BIGNUM *a, int bitpos)
|
||
|
||
return ret & BN_MASK2;
|
||
}
|
||
-#endif
|
||
|
||
/*
|
||
* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific
|
||
@@ -704,7 +702,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
|
||
const BIGNUM *m, BN_CTX *ctx,
|
||
BN_MONT_CTX *in_mont)
|
||
{
|
||
- int i, bits, ret = 0, window, wvalue;
|
||
+ int i, bits, ret = 0, window, wvalue, wmask, window0;
|
||
int top;
|
||
BN_MONT_CTX *mont = NULL;
|
||
|
||
@@ -956,20 +954,27 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
|
||
top /= 2;
|
||
bn_flip_t4(np, mont->N.d, top);
|
||
|
||
- bits--;
|
||
- for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--)
|
||
- wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
|
||
+ /*
|
||
+ * The exponent may not have a whole number of fixed-size windows.
|
||
+ * To simplify the main loop, the initial window has between 1 and
|
||
+ * full-window-size bits such that what remains is always a whole
|
||
+ * number of windows
|
||
+ */
|
||
+ window0 = (bits - 1) % 5 + 1;
|
||
+ wmask = (1 << window0) - 1;
|
||
+ bits -= window0;
|
||
+ wvalue = bn_get_bits(p, bits) & wmask;
|
||
bn_gather5_t4(tmp.d, top, powerbuf, wvalue);
|
||
|
||
/*
|
||
* Scan the exponent one window at a time starting from the most
|
||
* significant bits.
|
||
*/
|
||
- while (bits >= 0) {
|
||
+ while (bits > 0) {
|
||
if (bits < stride)
|
||
- stride = bits + 1;
|
||
+ stride = bits;
|
||
bits -= stride;
|
||
- wvalue = bn_get_bits(p, bits + 1);
|
||
+ wvalue = bn_get_bits(p, bits);
|
||
|
||
if ((*pwr5_worker) (tmp.d, np, n0, powerbuf, wvalue, stride))
|
||
continue;
|
||
@@ -1077,32 +1082,36 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
|
||
bn_scatter5(tmp.d, top, powerbuf, i);
|
||
}
|
||
# endif
|
||
- bits--;
|
||
- for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--)
|
||
- wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
|
||
+ /*
|
||
+ * The exponent may not have a whole number of fixed-size windows.
|
||
+ * To simplify the main loop, the initial window has between 1 and
|
||
+ * full-window-size bits such that what remains is always a whole
|
||
+ * number of windows
|
||
+ */
|
||
+ window0 = (bits - 1) % 5 + 1;
|
||
+ wmask = (1 << window0) - 1;
|
||
+ bits -= window0;
|
||
+ wvalue = bn_get_bits(p, bits) & wmask;
|
||
bn_gather5(tmp.d, top, powerbuf, wvalue);
|
||
|
||
/*
|
||
* Scan the exponent one window at a time starting from the most
|
||
* significant bits.
|
||
*/
|
||
- if (top & 7)
|
||
- while (bits >= 0) {
|
||
- for (wvalue = 0, i = 0; i < 5; i++, bits--)
|
||
- wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
|
||
-
|
||
+ if (top & 7) {
|
||
+ while (bits > 0) {
|
||
bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
|
||
bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
|
||
bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
|
||
bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
|
||
bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
|
||
bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top,
|
||
- wvalue);
|
||
+ bn_get_bits5(p->d, bits -= 5));
|
||
+ }
|
||
} else {
|
||
- while (bits >= 0) {
|
||
- wvalue = bn_get_bits5(p->d, bits - 4);
|
||
- bits -= 5;
|
||
- bn_power5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue);
|
||
+ while (bits > 0) {
|
||
+ bn_power5(tmp.d, tmp.d, powerbuf, np, n0, top,
|
||
+ bn_get_bits5(p->d, bits -= 5));
|
||
}
|
||
}
|
||
|
||
@@ -1144,27 +1153,44 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
|
||
}
|
||
}
|
||
|
||
- bits--;
|
||
- for (wvalue = 0, i = bits % window; i >= 0; i--, bits--)
|
||
- wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
|
||
+ /*
|
||
+ * The exponent may not have a whole number of fixed-size windows.
|
||
+ * To simplify the main loop, the initial window has between 1 and
|
||
+ * full-window-size bits such that what remains is always a whole
|
||
+ * number of windows
|
||
+ */
|
||
+ window0 = (bits - 1) % window + 1;
|
||
+ wmask = (1 << window0) - 1;
|
||
+ bits -= window0;
|
||
+ wvalue = bn_get_bits(p, bits) & wmask;
|
||
if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&tmp, top, powerbuf, wvalue,
|
||
window))
|
||
goto err;
|
||
|
||
+ wmask = (1 << window) - 1;
|
||
/*
|
||
* Scan the exponent one window at a time starting from the most
|
||
* significant bits.
|
||
*/
|
||
- while (bits >= 0) {
|
||
- wvalue = 0; /* The 'value' of the window */
|
||
+ while (bits > 0) {
|
||
|
||
- /* Scan the window, squaring the result as we go */
|
||
- for (i = 0; i < window; i++, bits--) {
|
||
+ /* Square the result window-size times */
|
||
+ for (i = 0; i < window; i++)
|
||
if (!bn_mul_mont_fixed_top(&tmp, &tmp, &tmp, mont, ctx))
|
||
goto err;
|
||
- wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
|
||
- }
|
||
|
||
+ /*
|
||
+ * Get a window's worth of bits from the exponent
|
||
+ * This avoids calling BN_is_bit_set for each bit, which
|
||
+ * is not only slower but also makes each bit vulnerable to
|
||
+ * EM (and likely other) side-channel attacks like One&Done
|
||
+ * (for details see "One&Done: A Single-Decryption EM-Based
|
||
+ * Attack on OpenSSL’s Constant-Time Blinded RSA" by M. Alam,
|
||
+ * H. Khan, M. Dey, N. Sinha, R. Callan, A. Zajic, and
|
||
+ * M. Prvulovic, in USENIX Security'18)
|
||
+ */
|
||
+ bits -= window;
|
||
+ wvalue = bn_get_bits(p, bits) & wmask;
|
||
/*
|
||
* Fetch the appropriate pre-computed value from the pre-buf
|
||
*/
|