diff --git a/crypto/fipsmodule/bn/exponentiation.c b/crypto/fipsmodule/bn/exponentiation.c index a76d7cb17..285c4dd70 100644 --- a/crypto/fipsmodule/bn/exponentiation.c +++ b/crypto/fipsmodule/bn/exponentiation.c @@ -863,28 +863,13 @@ static int copy_from_prebuf(BIGNUM *b, int top, const BN_ULONG *table, int idx, // Window sizes optimized for fixed window size modular exponentiation // algorithm (BN_mod_exp_mont_consttime). // -// To achieve the security goals of BN_mode_exp_mont_consttime, the maximum -// size of the window must not exceed -// log_2(MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH). -// -// Window size thresholds are defined for cache line sizes of 32 and 64, cache -// line sizes where log_2(32)=5 and log_2(64)=6 respectively. A window size of -// 7 should only be used on processors that have a 128 byte or greater cache -// line size. -#if MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 64 - +// TODO(davidben): These window sizes were originally set for 64-byte cache +// lines with a cache-line-dependent constant-time mitigation. They can probably +// be revised now that our implementation is no longer cache-time-dependent. #define BN_window_bits_for_ctime_exponent_size(b) \ ((b) > 937 ? 6 : (b) > 306 ? 5 : (b) > 89 ? 4 : (b) > 22 ? 3 : 1) #define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (6) -#elif MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 32 - -#define BN_window_bits_for_ctime_exponent_size(b) \ - ((b) > 306 ? 5 : (b) > 89 ? 4 : (b) > 22 ? 3 : 1) -#define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (5) - -#endif - // This variant of |BN_mod_exp_mont| uses fixed windows and fixed memory access // patterns to protect secret exponents (cf. the hyper-threading timing attacks // pointed out by Colin Percival, @@ -945,8 +930,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, // paths. If we were to use separate static buffers for each then there is // some chance that both large buffers would be allocated on the stack, // causing the stack space requirement to be truly huge (~10KB). - alignas(MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH) BN_ULONG - storage[MOD_EXP_CTIME_STORAGE_LEN]; + alignas(MOD_EXP_CTIME_ALIGN) BN_ULONG storage[MOD_EXP_CTIME_STORAGE_LEN]; #endif #if defined(RSAZ_ENABLED) // If the size of the operands allow it, perform the optimized RSAZ @@ -991,12 +975,11 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, assert(powerbuf != NULL || top * BN_BITS2 > 1024); #endif if (powerbuf == NULL) { - powerbufFree = - OPENSSL_malloc(powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH); + powerbufFree = OPENSSL_malloc(powerbufLen + MOD_EXP_CTIME_ALIGN); if (powerbufFree == NULL) { goto err; } - powerbuf = align_pointer(powerbufFree, MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH); + powerbuf = align_pointer(powerbufFree, MOD_EXP_CTIME_ALIGN); } OPENSSL_memset(powerbuf, 0, powerbufLen); diff --git a/crypto/fipsmodule/bn/internal.h b/crypto/fipsmodule/bn/internal.h index 9329ce7aa..f7adfe92e 100644 --- a/crypto/fipsmodule/bn/internal.h +++ b/crypto/fipsmodule/bn/internal.h @@ -189,14 +189,20 @@ extern "C" { #define BN_CAN_USE_INLINE_ASM #endif -// |BN_mod_exp_mont_consttime| is based on the assumption that the L1 data -// cache line width of the target processor is at least the following value. -#define MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH 64 - -// The number of |BN_ULONG|s needed for the |BN_mod_exp_mont_consttime| stack- -// allocated storage buffer. The buffer is just the right size for the RSAZ -// and is about ~1KB larger than what's necessary (4480 bytes) for 1024-bit -// inputs. +// MOD_EXP_CTIME_ALIGN is the alignment needed for |BN_mod_exp_mont_consttime|'s +// tables. +// +// TODO(davidben): Historically, this alignment came from cache line +// assumptions, which we've since removed. Is 64-byte alignment still necessary +// or ideal? The true alignment requirement seems to now be 32 bytes, coming +// from RSAZ's use of VMOVDQA to a YMM register. Non-x86_64 has even fewer +// requirements. +#define MOD_EXP_CTIME_ALIGN 64 + +// MOD_EXP_CTIME_STORAGE_LEN is the number of |BN_ULONG|s needed for the +// |BN_mod_exp_mont_consttime| stack-allocated storage buffer. The buffer is +// just the right size for the RSAZ and is about ~1KB larger than what's +// necessary (4480 bytes) for 1024-bit inputs. #define MOD_EXP_CTIME_STORAGE_LEN \ (((320u * 3u) + (32u * 9u * 16u)) / sizeof(BN_ULONG)) diff --git a/crypto/fipsmodule/bn/rsaz_exp.c b/crypto/fipsmodule/bn/rsaz_exp.c index 7b455b55f..da2503065 100644 --- a/crypto/fipsmodule/bn/rsaz_exp.c +++ b/crypto/fipsmodule/bn/rsaz_exp.c @@ -40,8 +40,8 @@ void RSAZ_1024_mod_exp_avx2(BN_ULONG result_norm[16], const BN_ULONG m_norm[16], const BN_ULONG RR[16], BN_ULONG k0, BN_ULONG storage[MOD_EXP_CTIME_STORAGE_LEN]) { - static_assert(MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH % 64 == 0, - "MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH is too small"); + static_assert(MOD_EXP_CTIME_ALIGN % 64 == 0, + "MOD_EXP_CTIME_ALIGN is too small"); assert((uintptr_t)storage % 64 == 0); BN_ULONG *a_inv, *m, *result, *table_s = storage + 40 * 3, *R2 = table_s; diff --git a/crypto/fipsmodule/bn/rsaz_exp.h b/crypto/fipsmodule/bn/rsaz_exp.h index 67f1cab5c..22ca3ec46 100644 --- a/crypto/fipsmodule/bn/rsaz_exp.h +++ b/crypto/fipsmodule/bn/rsaz_exp.h @@ -32,8 +32,7 @@ extern "C" { // modulo |m_norm|. |base_norm| must be fully-reduced and |exponent| must have // the high bit set (it is 1024 bits wide). |RR| and |k0| must be |RR| and |n0|, // respectively, extracted from |m_norm|'s |BN_MONT_CTX|. |storage_words| is a -// temporary buffer that must be aligned to |MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH| -// bytes. +// temporary buffer that must be aligned to |MOD_EXP_CTIME_ALIGN| bytes. void RSAZ_1024_mod_exp_avx2(BN_ULONG result[16], const BN_ULONG base_norm[16], const BN_ULONG exponent[16], const BN_ULONG m_norm[16], const BN_ULONG RR[16],