Stop clang from un-constant-timing copy_from_prebuf.

Newer versions of clang figure out that copy_from_prebuf (used in builds
that aren't x86_64 with assembly optimizations) has a bunch of no-op
iterations and insert a branch. Add a value barrier to stop it. This was
caught by our valgrind-based constant-time validation.

As part of this, I noticed that OPENSSL_NO_ASM builds turn off value
barriers. This is because the value barriers use an empty inline asm
block. While this is technically correct, it's probably unnecessary.

The clang|gcc check means we know GCC-style inline assembly is
supported.  Disabling inline asm is used by sanitizers to shut off
unintrumentable code, but there's no uninstrumentable code in the empty
string. It's also used by consumers who haven't figured out how to
integrate an assembler into their build system, but that also doesn't
apply. So just remove the condition on the value barriers so
OPENSSL_NO_ASM also get mitigations.

Update-Note: It is possible the above is wrong and some OPENSSL_NO_ASM
relied on value barriers being disabled. If so, this will break that
build and we'll need to reconsider.

Change-Id: I6e3ea3ee705bef3afcf42d3532b17aaabbbcc60b
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/56827
Reviewed-by: Bob Beck <bbe@google.com>
Commit-Queue: David Benjamin <davidben@google.com>
fips-20230428
David Benjamin 2 years ago committed by Boringssl LUCI CQ
parent aa83c12069
commit 53b876a4d1
  1. 4
      crypto/curve25519/curve25519.c
  2. 6
      crypto/fipsmodule/bn/exponentiation.c
  3. 4
      crypto/fipsmodule/ec/p256.c
  4. 6
      crypto/internal.h

@ -35,10 +35,6 @@
// Various pre-computed constants.
#include "./curve25519_tables.h"
#if defined(OPENSSL_NO_ASM)
#define FIAT_25519_NO_ASM
#endif
#if defined(BORINGSSL_CURVE25519_64BIT)
#include "../../third_party/fiat/curve25519_64.h"
#else

@ -852,7 +852,11 @@ static int copy_from_prebuf(BIGNUM *b, int top, const BN_ULONG *table, int idx,
OPENSSL_memset(b->d, 0, sizeof(BN_ULONG) * top);
const int width = 1 << window;
for (int i = 0; i < width; i++, table += top) {
BN_ULONG mask = constant_time_eq_int(i, idx);
// Use a value barrier to prevent Clang from adding a branch when |i != idx|
// and making this copy not constant time. Clang is still allowed to learn
// that |mask| is constant across the inner loop, so this won't inhibit any
// vectorization it might do.
BN_ULONG mask = value_barrier_w(constant_time_eq_int(i, idx));
for (int j = 0; j < top; j++) {
b->d[j] |= table[j] & mask;
}

@ -30,10 +30,6 @@
#include "../delocate.h"
#include "./internal.h"
#if defined(OPENSSL_NO_ASM)
#define FIAT_P256_NO_ASM
#endif
#if defined(BORINGSSL_HAS_UINT128)
#define BORINGSSL_NISTP256_64BIT 1
#include "../../../third_party/fiat/p256_64.h"

@ -302,7 +302,7 @@ typedef uint32_t crypto_word_t;
// always has the same output for a given input. This allows it to eliminate
// dead code, move computations across loops, and vectorize.
static inline crypto_word_t value_barrier_w(crypto_word_t a) {
#if !defined(OPENSSL_NO_ASM) && (defined(__GNUC__) || defined(__clang__))
#if defined(__GNUC__) || defined(__clang__)
__asm__("" : "+r"(a) : /* no inputs */);
#endif
return a;
@ -310,7 +310,7 @@ static inline crypto_word_t value_barrier_w(crypto_word_t a) {
// value_barrier_u32 behaves like |value_barrier_w| but takes a |uint32_t|.
static inline uint32_t value_barrier_u32(uint32_t a) {
#if !defined(OPENSSL_NO_ASM) && (defined(__GNUC__) || defined(__clang__))
#if defined(__GNUC__) || defined(__clang__)
__asm__("" : "+r"(a) : /* no inputs */);
#endif
return a;
@ -318,7 +318,7 @@ static inline uint32_t value_barrier_u32(uint32_t a) {
// value_barrier_u64 behaves like |value_barrier_w| but takes a |uint64_t|.
static inline uint64_t value_barrier_u64(uint64_t a) {
#if !defined(OPENSSL_NO_ASM) && (defined(__GNUC__) || defined(__clang__))
#if defined(__GNUC__) || defined(__clang__)
__asm__("" : "+r"(a) : /* no inputs */);
#endif
return a;

Loading…
Cancel
Save