From fa6ced95123a19e43598f654b2c3ef6ddda30290 Mon Sep 17 00:00:00 2001 From: David Benjamin Date: Sun, 3 Oct 2021 01:53:38 -0400 Subject: [PATCH] Extract common rotl/rotr functions. We have a ton of per-file rotation functions, often with generic names that do not tell you whether they are uint32_t vs uint64_t, or rotl vs rotr. Additionally, (x >> r) | (x << (32 - r)) is UB at r = 0. (x >> r) | (x << ((-r) & 31)) works for 0 <= r < 32, which is what cast.c does. GCC and Clang recognize this pattern as a rotate, but MSVC doesn't. MSVC does, however, provide functions for this. We usually rotate by a non-zero constant, which makes this moot, but rotation comes up often enough that it's worth extracting out. Some particular changes to call out: - I've switched sha256.c from rotl to rotr. There was a comment explaining why it differed from the specification. Now that we have both functions, it's simpler to just match the specification. - I've dropped all the inline assembly from sha512.c. Compilers should be able to recognize rotations in 2021. Change-Id: Ia1030e8bfe94dad92514ed1c28777447c48b82f9 Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/49765 Reviewed-by: Adam Langley --- crypto/blake2/blake2.c | 10 ++-- crypto/chacha/chacha.c | 85 ++++++++++++++----------------- crypto/evp/scrypt.c | 66 ++++++++++++------------ crypto/fipsmodule/aes/aes_test.cc | 12 ++--- crypto/fipsmodule/des/des.c | 21 ++++---- crypto/fipsmodule/des/internal.h | 4 +- crypto/fipsmodule/md4/md4.c | 9 ++-- crypto/fipsmodule/md5/md5.c | 11 ++-- crypto/fipsmodule/sha/sha1.c | 70 ++++++++++++------------- crypto/fipsmodule/sha/sha256.c | 21 ++++---- crypto/fipsmodule/sha/sha512.c | 47 ++++------------- crypto/internal.h | 39 ++++++++++++++ crypto/siphash/siphash.c | 12 ++--- decrepit/cast/cast.c | 26 ++++------ decrepit/ripemd/ripemd.c | 28 +++++----- 15 files changed, 220 insertions(+), 241 deletions(-) diff --git a/crypto/blake2/blake2.c b/crypto/blake2/blake2.c index e3c560f80..096d61db1 100644 --- a/crypto/blake2/blake2.c +++ b/crypto/blake2/blake2.c @@ -42,19 +42,17 @@ static const uint8_t kSigma[10 * 16] = { // clang-format on }; -#define RIGHT_ROTATE(v, n) (((v) >> (n)) | ((v) << (64 - (n)))) - // https://tools.ietf.org/html/rfc7693#section-3.1 static void blake2b_mix(uint64_t v[16], int a, int b, int c, int d, uint64_t x, uint64_t y) { v[a] = v[a] + v[b] + x; - v[d] = RIGHT_ROTATE(v[d] ^ v[a], 32); + v[d] = CRYPTO_rotr_u64(v[d] ^ v[a], 32); v[c] = v[c] + v[d]; - v[b] = RIGHT_ROTATE(v[b] ^ v[c], 24); + v[b] = CRYPTO_rotr_u64(v[b] ^ v[c], 24); v[a] = v[a] + v[b] + y; - v[d] = RIGHT_ROTATE(v[d] ^ v[a], 16); + v[d] = CRYPTO_rotr_u64(v[d] ^ v[a], 16); v[c] = v[c] + v[d]; - v[b] = RIGHT_ROTATE(v[b] ^ v[c], 63); + v[b] = CRYPTO_rotr_u64(v[b] ^ v[c], 63); } static void blake2b_transform( diff --git a/crypto/chacha/chacha.c b/crypto/chacha/chacha.c index b539f9987..64ca1c48f 100644 --- a/crypto/chacha/chacha.c +++ b/crypto/chacha/chacha.c @@ -25,22 +25,20 @@ #include "internal.h" -#define U8TO32_LITTLE(p) \ - (((uint32_t)((p)[0])) | ((uint32_t)((p)[1]) << 8) | \ - ((uint32_t)((p)[2]) << 16) | ((uint32_t)((p)[3]) << 24)) - // sigma contains the ChaCha constants, which happen to be an ASCII string. static const uint8_t sigma[16] = { 'e', 'x', 'p', 'a', 'n', 'd', ' ', '3', '2', '-', 'b', 'y', 't', 'e', ' ', 'k' }; -#define ROTATE(v, n) (((v) << (n)) | ((v) >> (32 - (n)))) - // QUARTERROUND updates a, b, c, d with a ChaCha "quarter" round. -#define QUARTERROUND(a, b, c, d) \ - x[a] += x[b]; x[d] = ROTATE(x[d] ^ x[a], 16); \ - x[c] += x[d]; x[b] = ROTATE(x[b] ^ x[c], 12); \ - x[a] += x[b]; x[d] = ROTATE(x[d] ^ x[a], 8); \ - x[c] += x[d]; x[b] = ROTATE(x[b] ^ x[c], 7); +#define QUARTERROUND(a, b, c, d) \ + x[a] += x[b]; \ + x[d] = CRYPTO_rotl_u32(x[d] ^ x[a], 16); \ + x[c] += x[d]; \ + x[b] = CRYPTO_rotl_u32(x[b] ^ x[c], 12); \ + x[a] += x[b]; \ + x[d] = CRYPTO_rotl_u32(x[d] ^ x[a], 8); \ + x[c] += x[d]; \ + x[b] = CRYPTO_rotl_u32(x[b] ^ x[c], 7); void CRYPTO_hchacha20(uint8_t out[32], const uint8_t key[32], const uint8_t nonce[16]) { @@ -71,24 +69,25 @@ void CRYPTO_chacha_20(uint8_t *out, const uint8_t *in, size_t in_len, uint32_t counter) { assert(!buffers_alias(out, in_len, in, in_len) || in == out); - uint32_t counter_nonce[4]; counter_nonce[0] = counter; - counter_nonce[1] = U8TO32_LITTLE(nonce + 0); - counter_nonce[2] = U8TO32_LITTLE(nonce + 4); - counter_nonce[3] = U8TO32_LITTLE(nonce + 8); + uint32_t counter_nonce[4]; + counter_nonce[0] = counter; + counter_nonce[1] = CRYPTO_load_u32_le(nonce + 0); + counter_nonce[2] = CRYPTO_load_u32_le(nonce + 4); + counter_nonce[3] = CRYPTO_load_u32_le(nonce + 8); const uint32_t *key_ptr = (const uint32_t *)key; #if !defined(OPENSSL_X86) && !defined(OPENSSL_X86_64) // The assembly expects the key to be four-byte aligned. uint32_t key_u32[8]; if ((((uintptr_t)key) & 3) != 0) { - key_u32[0] = U8TO32_LITTLE(key + 0); - key_u32[1] = U8TO32_LITTLE(key + 4); - key_u32[2] = U8TO32_LITTLE(key + 8); - key_u32[3] = U8TO32_LITTLE(key + 12); - key_u32[4] = U8TO32_LITTLE(key + 16); - key_u32[5] = U8TO32_LITTLE(key + 20); - key_u32[6] = U8TO32_LITTLE(key + 24); - key_u32[7] = U8TO32_LITTLE(key + 28); + key_u32[0] = CRYPTO_load_u32_le(key + 0); + key_u32[1] = CRYPTO_load_u32_le(key + 4); + key_u32[2] = CRYPTO_load_u32_le(key + 8); + key_u32[3] = CRYPTO_load_u32_le(key + 12); + key_u32[4] = CRYPTO_load_u32_le(key + 16); + key_u32[5] = CRYPTO_load_u32_le(key + 20); + key_u32[6] = CRYPTO_load_u32_le(key + 24); + key_u32[7] = CRYPTO_load_u32_le(key + 28); key_ptr = key_u32; } @@ -99,14 +98,6 @@ void CRYPTO_chacha_20(uint8_t *out, const uint8_t *in, size_t in_len, #else -#define U32TO8_LITTLE(p, v) \ - { \ - (p)[0] = (v >> 0) & 0xff; \ - (p)[1] = (v >> 8) & 0xff; \ - (p)[2] = (v >> 16) & 0xff; \ - (p)[3] = (v >> 24) & 0xff; \ - } - // chacha_core performs 20 rounds of ChaCha on the input words in // |input| and writes the 64 output bytes to |output|. static void chacha_core(uint8_t output[64], const uint32_t input[16]) { @@ -129,7 +120,7 @@ static void chacha_core(uint8_t output[64], const uint32_t input[16]) { x[i] += input[i]; } for (i = 0; i < 16; ++i) { - U32TO8_LITTLE(output + 4 * i, x[i]); + CRYPTO_store_u32_le(output + 4 * i, x[i]); } } @@ -142,25 +133,25 @@ void CRYPTO_chacha_20(uint8_t *out, const uint8_t *in, size_t in_len, uint8_t buf[64]; size_t todo, i; - input[0] = U8TO32_LITTLE(sigma + 0); - input[1] = U8TO32_LITTLE(sigma + 4); - input[2] = U8TO32_LITTLE(sigma + 8); - input[3] = U8TO32_LITTLE(sigma + 12); + input[0] = CRYPTO_load_u32_le(sigma + 0); + input[1] = CRYPTO_load_u32_le(sigma + 4); + input[2] = CRYPTO_load_u32_le(sigma + 8); + input[3] = CRYPTO_load_u32_le(sigma + 12); - input[4] = U8TO32_LITTLE(key + 0); - input[5] = U8TO32_LITTLE(key + 4); - input[6] = U8TO32_LITTLE(key + 8); - input[7] = U8TO32_LITTLE(key + 12); + input[4] = CRYPTO_load_u32_le(key + 0); + input[5] = CRYPTO_load_u32_le(key + 4); + input[6] = CRYPTO_load_u32_le(key + 8); + input[7] = CRYPTO_load_u32_le(key + 12); - input[8] = U8TO32_LITTLE(key + 16); - input[9] = U8TO32_LITTLE(key + 20); - input[10] = U8TO32_LITTLE(key + 24); - input[11] = U8TO32_LITTLE(key + 28); + input[8] = CRYPTO_load_u32_le(key + 16); + input[9] = CRYPTO_load_u32_le(key + 20); + input[10] = CRYPTO_load_u32_le(key + 24); + input[11] = CRYPTO_load_u32_le(key + 28); input[12] = counter; - input[13] = U8TO32_LITTLE(nonce + 0); - input[14] = U8TO32_LITTLE(nonce + 4); - input[15] = U8TO32_LITTLE(nonce + 8); + input[13] = CRYPTO_load_u32_le(nonce + 0); + input[14] = CRYPTO_load_u32_le(nonce + 4); + input[15] = CRYPTO_load_u32_le(nonce + 8); while (in_len > 0) { todo = sizeof(buf); diff --git a/crypto/evp/scrypt.c b/crypto/evp/scrypt.c index 2feb6504f..7ec6244ab 100644 --- a/crypto/evp/scrypt.c +++ b/crypto/evp/scrypt.c @@ -32,8 +32,6 @@ typedef struct { uint32_t words[16]; } block_t; OPENSSL_STATIC_ASSERT(sizeof(block_t) == 64, "block_t has padding"); -#define R(a, b) (((a) << (b)) | ((a) >> (32 - (b)))) - // salsa208_word_specification implements the Salsa20/8 core function, also // described in RFC 7914, section 3. It modifies the block at |inout| // in-place. @@ -42,38 +40,38 @@ static void salsa208_word_specification(block_t *inout) { OPENSSL_memcpy(&x, inout, sizeof(x)); for (int i = 8; i > 0; i -= 2) { - x.words[4] ^= R(x.words[0] + x.words[12], 7); - x.words[8] ^= R(x.words[4] + x.words[0], 9); - x.words[12] ^= R(x.words[8] + x.words[4], 13); - x.words[0] ^= R(x.words[12] + x.words[8], 18); - x.words[9] ^= R(x.words[5] + x.words[1], 7); - x.words[13] ^= R(x.words[9] + x.words[5], 9); - x.words[1] ^= R(x.words[13] + x.words[9], 13); - x.words[5] ^= R(x.words[1] + x.words[13], 18); - x.words[14] ^= R(x.words[10] + x.words[6], 7); - x.words[2] ^= R(x.words[14] + x.words[10], 9); - x.words[6] ^= R(x.words[2] + x.words[14], 13); - x.words[10] ^= R(x.words[6] + x.words[2], 18); - x.words[3] ^= R(x.words[15] + x.words[11], 7); - x.words[7] ^= R(x.words[3] + x.words[15], 9); - x.words[11] ^= R(x.words[7] + x.words[3], 13); - x.words[15] ^= R(x.words[11] + x.words[7], 18); - x.words[1] ^= R(x.words[0] + x.words[3], 7); - x.words[2] ^= R(x.words[1] + x.words[0], 9); - x.words[3] ^= R(x.words[2] + x.words[1], 13); - x.words[0] ^= R(x.words[3] + x.words[2], 18); - x.words[6] ^= R(x.words[5] + x.words[4], 7); - x.words[7] ^= R(x.words[6] + x.words[5], 9); - x.words[4] ^= R(x.words[7] + x.words[6], 13); - x.words[5] ^= R(x.words[4] + x.words[7], 18); - x.words[11] ^= R(x.words[10] + x.words[9], 7); - x.words[8] ^= R(x.words[11] + x.words[10], 9); - x.words[9] ^= R(x.words[8] + x.words[11], 13); - x.words[10] ^= R(x.words[9] + x.words[8], 18); - x.words[12] ^= R(x.words[15] + x.words[14], 7); - x.words[13] ^= R(x.words[12] + x.words[15], 9); - x.words[14] ^= R(x.words[13] + x.words[12], 13); - x.words[15] ^= R(x.words[14] + x.words[13], 18); + x.words[4] ^= CRYPTO_rotl_u32(x.words[0] + x.words[12], 7); + x.words[8] ^= CRYPTO_rotl_u32(x.words[4] + x.words[0], 9); + x.words[12] ^= CRYPTO_rotl_u32(x.words[8] + x.words[4], 13); + x.words[0] ^= CRYPTO_rotl_u32(x.words[12] + x.words[8], 18); + x.words[9] ^= CRYPTO_rotl_u32(x.words[5] + x.words[1], 7); + x.words[13] ^= CRYPTO_rotl_u32(x.words[9] + x.words[5], 9); + x.words[1] ^= CRYPTO_rotl_u32(x.words[13] + x.words[9], 13); + x.words[5] ^= CRYPTO_rotl_u32(x.words[1] + x.words[13], 18); + x.words[14] ^= CRYPTO_rotl_u32(x.words[10] + x.words[6], 7); + x.words[2] ^= CRYPTO_rotl_u32(x.words[14] + x.words[10], 9); + x.words[6] ^= CRYPTO_rotl_u32(x.words[2] + x.words[14], 13); + x.words[10] ^= CRYPTO_rotl_u32(x.words[6] + x.words[2], 18); + x.words[3] ^= CRYPTO_rotl_u32(x.words[15] + x.words[11], 7); + x.words[7] ^= CRYPTO_rotl_u32(x.words[3] + x.words[15], 9); + x.words[11] ^= CRYPTO_rotl_u32(x.words[7] + x.words[3], 13); + x.words[15] ^= CRYPTO_rotl_u32(x.words[11] + x.words[7], 18); + x.words[1] ^= CRYPTO_rotl_u32(x.words[0] + x.words[3], 7); + x.words[2] ^= CRYPTO_rotl_u32(x.words[1] + x.words[0], 9); + x.words[3] ^= CRYPTO_rotl_u32(x.words[2] + x.words[1], 13); + x.words[0] ^= CRYPTO_rotl_u32(x.words[3] + x.words[2], 18); + x.words[6] ^= CRYPTO_rotl_u32(x.words[5] + x.words[4], 7); + x.words[7] ^= CRYPTO_rotl_u32(x.words[6] + x.words[5], 9); + x.words[4] ^= CRYPTO_rotl_u32(x.words[7] + x.words[6], 13); + x.words[5] ^= CRYPTO_rotl_u32(x.words[4] + x.words[7], 18); + x.words[11] ^= CRYPTO_rotl_u32(x.words[10] + x.words[9], 7); + x.words[8] ^= CRYPTO_rotl_u32(x.words[11] + x.words[10], 9); + x.words[9] ^= CRYPTO_rotl_u32(x.words[8] + x.words[11], 13); + x.words[10] ^= CRYPTO_rotl_u32(x.words[9] + x.words[8], 18); + x.words[12] ^= CRYPTO_rotl_u32(x.words[15] + x.words[14], 7); + x.words[13] ^= CRYPTO_rotl_u32(x.words[12] + x.words[15], 9); + x.words[14] ^= CRYPTO_rotl_u32(x.words[13] + x.words[12], 13); + x.words[15] ^= CRYPTO_rotl_u32(x.words[14] + x.words[13], 18); } for (int i = 0; i < 16; ++i) { diff --git a/crypto/fipsmodule/aes/aes_test.cc b/crypto/fipsmodule/aes/aes_test.cc index 406e949f1..eef2567c7 100644 --- a/crypto/fipsmodule/aes/aes_test.cc +++ b/crypto/fipsmodule/aes/aes_test.cc @@ -403,10 +403,6 @@ static uint32_t aes_ref_sub_word(uint32_t in) { return a0 | (a1 << 8) | (a2 << 16) | (a3 << 24); } -static uint32_t aes_ref_rot_word(uint32_t in, uint32_t n) { - return (in >> n) | (in << (32 - n)); -} - static int aes_ref_set_encrypt_key(const uint8_t *key, int key_bits, AES_KEY *out) { static const uint32_t kRCon[10] = {0x01, 0x02, 0x04, 0x08, 0x10, @@ -431,7 +427,7 @@ static int aes_ref_set_encrypt_key(const uint8_t *key, int key_bits, for (size_t i = words; i < num_subkey_words; i++) { uint32_t tmp = out->rd_key[i - 1]; if (i % words == 0) { - tmp = aes_ref_sub_word(aes_ref_rot_word(tmp, 8)) ^ kRCon[(i / words) - 1]; + tmp = aes_ref_sub_word(CRYPTO_rotr_u32(tmp, 8)) ^ kRCon[(i / words) - 1]; } else if (key_bits == 256 && i % 4 == 0) { tmp = aes_ref_sub_word(tmp); } @@ -532,9 +528,9 @@ print("static const uint32_t kTable[256] = {%s};\n" % body) for (size_t i = 0; i < 4; i++) { uint32_t in = block[i]; block[i] = kInvMixColumn[in >> 24]; - block[i] ^= aes_ref_rot_word(kInvMixColumn[(in >> 16) & 0xff], 8); - block[i] ^= aes_ref_rot_word(kInvMixColumn[(in >> 8) & 0xff], 16); - block[i] ^= aes_ref_rot_word(kInvMixColumn[in & 0xff], 24); + block[i] ^= CRYPTO_rotr_u32(kInvMixColumn[(in >> 16) & 0xff], 8); + block[i] ^= CRYPTO_rotr_u32(kInvMixColumn[(in >> 8) & 0xff], 16); + block[i] ^= CRYPTO_rotr_u32(kInvMixColumn[in & 0xff], 24); } } diff --git a/crypto/fipsmodule/des/des.c b/crypto/fipsmodule/des/des.c index 2b0fdcd70..95c430cac 100644 --- a/crypto/fipsmodule/des/des.c +++ b/crypto/fipsmodule/des/des.c @@ -342,10 +342,10 @@ void DES_set_key(const DES_cblock *key, DES_key_schedule *schedule) { // table contained 0213 4657 t2 = ((t << 16L) | (s & 0x0000ffffL)) & 0xffffffffL; - schedule->subkeys[i][0] = ROTATE(t2, 30) & 0xffffffffL; + schedule->subkeys[i][0] = CRYPTO_rotr_u32(t2, 30); t2 = ((s >> 16L) | (t & 0xffff0000L)); - schedule->subkeys[i][1] = ROTATE(t2, 26) & 0xffffffffL; + schedule->subkeys[i][1] = CRYPTO_rotr_u32(t2, 26); } } @@ -392,8 +392,8 @@ static void DES_encrypt1(uint32_t *data, const DES_key_schedule *ks, int enc) { // <71755.204@CompuServe.COM> for pointing this out. // clear the top bits on machines with 8byte longs // shift left by 2 - r = ROTATE(r, 29) & 0xffffffffL; - l = ROTATE(l, 29) & 0xffffffffL; + r = CRYPTO_rotr_u32(r, 29); + l = CRYPTO_rotr_u32(l, 29); // I don't know if it is worth the effort of loop unrolling the // inner loop @@ -434,8 +434,8 @@ static void DES_encrypt1(uint32_t *data, const DES_key_schedule *ks, int enc) { } // rotate and clear the top bits on machines with 8byte longs - l = ROTATE(l, 3) & 0xffffffffL; - r = ROTATE(r, 3) & 0xffffffffL; + l = CRYPTO_rotr_u32(l, 3); + r = CRYPTO_rotr_u32(r, 3); FP(r, l); data[0] = l; @@ -454,8 +454,8 @@ static void DES_encrypt2(uint32_t *data, const DES_key_schedule *ks, int enc) { // sparc2. Thanks to Richard Outerbridge <71755.204@CompuServe.COM> for // pointing this out. // clear the top bits on machines with 8byte longs - r = ROTATE(r, 29) & 0xffffffffL; - l = ROTATE(l, 29) & 0xffffffffL; + r = CRYPTO_rotr_u32(r, 29); + l = CRYPTO_rotr_u32(l, 29); // I don't know if it is worth the effort of loop unrolling the // inner loop @@ -495,8 +495,8 @@ static void DES_encrypt2(uint32_t *data, const DES_key_schedule *ks, int enc) { D_ENCRYPT(ks, r, l, 0); } // rotate and clear the top bits on machines with 8byte longs - data[0] = ROTATE(l, 3) & 0xffffffffL; - data[1] = ROTATE(r, 3) & 0xffffffffL; + data[0] = CRYPTO_rotr_u32(l, 3); + data[1] = CRYPTO_rotr_u32(r, 3); } void DES_encrypt3(uint32_t *data, const DES_key_schedule *ks1, @@ -782,4 +782,3 @@ void DES_set_key_unchecked(const DES_cblock *key, DES_key_schedule *schedule) { #undef D_ENCRYPT #undef ITERATIONS #undef HALF_ITERATIONS -#undef ROTATE diff --git a/crypto/fipsmodule/des/internal.h b/crypto/fipsmodule/des/internal.h index 1ae3f22a3..3e3992ecf 100644 --- a/crypto/fipsmodule/des/internal.h +++ b/crypto/fipsmodule/des/internal.h @@ -218,7 +218,7 @@ how to use xors :-) I got it to its final state. #define D_ENCRYPT(ks, LL, R, S) \ do { \ LOAD_DATA(ks, R, S, u, t, E0, E1); \ - t = ROTATE(t, 4); \ + t = CRYPTO_rotr_u32(t, 4); \ (LL) ^= \ DES_SPtrans[0][(u >> 2L) & 0x3f] ^ DES_SPtrans[2][(u >> 10L) & 0x3f] ^ \ DES_SPtrans[4][(u >> 18L) & 0x3f] ^ \ @@ -230,8 +230,6 @@ how to use xors :-) I got it to its final state. #define ITERATIONS 16 #define HALF_ITERATIONS 8 -#define ROTATE(a, n) (((a) >> (n)) + ((a) << (32 - (n)))) - #if defined(__cplusplus) } // extern C diff --git a/crypto/fipsmodule/md4/md4.c b/crypto/fipsmodule/md4/md4.c index a505d057f..5b44653b5 100644 --- a/crypto/fipsmodule/md4/md4.c +++ b/crypto/fipsmodule/md4/md4.c @@ -113,24 +113,22 @@ int MD4_Final(uint8_t out[MD4_DIGEST_LENGTH], MD4_CTX *c) { #define G(b, c, d) (((b) & (c)) | ((b) & (d)) | ((c) & (d))) #define H(b, c, d) ((b) ^ (c) ^ (d)) -#define ROTATE(a, n) (((a) << (n)) | ((a) >> (32 - (n)))) - #define R0(a, b, c, d, k, s, t) \ do { \ (a) += ((k) + (t) + F((b), (c), (d))); \ - (a) = ROTATE(a, s); \ + (a) = CRYPTO_rotl_u32(a, s); \ } while (0) #define R1(a, b, c, d, k, s, t) \ do { \ (a) += ((k) + (t) + G((b), (c), (d))); \ - (a) = ROTATE(a, s); \ + (a) = CRYPTO_rotl_u32(a, s); \ } while (0) #define R2(a, b, c, d, k, s, t) \ do { \ (a) += ((k) + (t) + H((b), (c), (d))); \ - (a) = ROTATE(a, s); \ + (a) = CRYPTO_rotl_u32(a, s); \ } while (0) void md4_block_data_order(uint32_t *state, const uint8_t *data, size_t num) { @@ -237,7 +235,6 @@ void md4_block_data_order(uint32_t *state, const uint8_t *data, size_t num) { #undef F #undef G #undef H -#undef ROTATE #undef R0 #undef R1 #undef R2 diff --git a/crypto/fipsmodule/md5/md5.c b/crypto/fipsmodule/md5/md5.c index eba34bce8..169152676 100644 --- a/crypto/fipsmodule/md5/md5.c +++ b/crypto/fipsmodule/md5/md5.c @@ -119,33 +119,31 @@ int MD5_Final(uint8_t out[MD5_DIGEST_LENGTH], MD5_CTX *c) { #define H(b, c, d) ((b) ^ (c) ^ (d)) #define I(b, c, d) (((~(d)) | (b)) ^ (c)) -#define ROTATE(a, n) (((a) << (n)) | ((a) >> (32 - (n)))) - #define R0(a, b, c, d, k, s, t) \ do { \ (a) += ((k) + (t) + F((b), (c), (d))); \ - (a) = ROTATE(a, s); \ + (a) = CRYPTO_rotl_u32(a, s); \ (a) += (b); \ } while (0) #define R1(a, b, c, d, k, s, t) \ do { \ (a) += ((k) + (t) + G((b), (c), (d))); \ - (a) = ROTATE(a, s); \ + (a) = CRYPTO_rotl_u32(a, s); \ (a) += (b); \ } while (0) #define R2(a, b, c, d, k, s, t) \ do { \ (a) += ((k) + (t) + H((b), (c), (d))); \ - (a) = ROTATE(a, s); \ + (a) = CRYPTO_rotl_u32(a, s); \ (a) += (b); \ } while (0) #define R3(a, b, c, d, k, s, t) \ do { \ (a) += ((k) + (t) + I((b), (c), (d))); \ - (a) = ROTATE(a, s); \ + (a) = CRYPTO_rotl_u32(a, s); \ (a) += (b); \ } while (0) @@ -280,7 +278,6 @@ static void md5_block_data_order(uint32_t *state, const uint8_t *data, #undef G #undef H #undef I -#undef ROTATE #undef R0 #undef R1 #undef R2 diff --git a/crypto/fipsmodule/sha/sha1.c b/crypto/fipsmodule/sha/sha1.c index c6293087d..e482c7761 100644 --- a/crypto/fipsmodule/sha/sha1.c +++ b/crypto/fipsmodule/sha/sha1.c @@ -111,11 +111,10 @@ int SHA1_Final(uint8_t out[SHA_DIGEST_LENGTH], SHA_CTX *c) { return 1; } -#define ROTATE(a, n) (((a) << (n)) | ((a) >> (32 - (n)))) -#define Xupdate(a, ix, ia, ib, ic, id) \ - do { \ - (a) = ((ia) ^ (ib) ^ (ic) ^ (id)); \ - (ix) = (a) = ROTATE((a), 1); \ +#define Xupdate(a, ix, ia, ib, ic, id) \ + do { \ + (a) = ((ia) ^ (ib) ^ (ic) ^ (id)); \ + (ix) = (a) = CRYPTO_rotl_u32((a), 1); \ } while (0) #define K_00_19 0x5a827999UL @@ -133,45 +132,47 @@ int SHA1_Final(uint8_t out[SHA_DIGEST_LENGTH], SHA_CTX *c) { #define F_40_59(b, c, d) (((b) & (c)) | (((b) | (c)) & (d))) #define F_60_79(b, c, d) F_20_39(b, c, d) -#define BODY_00_15(i, a, b, c, d, e, f, xi) \ - do { \ - (f) = (xi) + (e) + K_00_19 + ROTATE((a), 5) + F_00_19((b), (c), (d)); \ - (b) = ROTATE((b), 30); \ +#define BODY_00_15(i, a, b, c, d, e, f, xi) \ + do { \ + (f) = (xi) + (e) + K_00_19 + CRYPTO_rotl_u32((a), 5) + \ + F_00_19((b), (c), (d)); \ + (b) = CRYPTO_rotl_u32((b), 30); \ } while (0) -#define BODY_16_19(i, a, b, c, d, e, f, xi, xa, xb, xc, xd) \ - do { \ - Xupdate(f, xi, xa, xb, xc, xd); \ - (f) += (e) + K_00_19 + ROTATE((a), 5) + F_00_19((b), (c), (d)); \ - (b) = ROTATE((b), 30); \ +#define BODY_16_19(i, a, b, c, d, e, f, xi, xa, xb, xc, xd) \ + do { \ + Xupdate(f, xi, xa, xb, xc, xd); \ + (f) += (e) + K_00_19 + CRYPTO_rotl_u32((a), 5) + F_00_19((b), (c), (d)); \ + (b) = CRYPTO_rotl_u32((b), 30); \ } while (0) -#define BODY_20_31(i, a, b, c, d, e, f, xi, xa, xb, xc, xd) \ - do { \ - Xupdate(f, xi, xa, xb, xc, xd); \ - (f) += (e) + K_20_39 + ROTATE((a), 5) + F_20_39((b), (c), (d)); \ - (b) = ROTATE((b), 30); \ +#define BODY_20_31(i, a, b, c, d, e, f, xi, xa, xb, xc, xd) \ + do { \ + Xupdate(f, xi, xa, xb, xc, xd); \ + (f) += (e) + K_20_39 + CRYPTO_rotl_u32((a), 5) + F_20_39((b), (c), (d)); \ + (b) = CRYPTO_rotl_u32((b), 30); \ } while (0) -#define BODY_32_39(i, a, b, c, d, e, f, xa, xb, xc, xd) \ - do { \ - Xupdate(f, xa, xa, xb, xc, xd); \ - (f) += (e) + K_20_39 + ROTATE((a), 5) + F_20_39((b), (c), (d)); \ - (b) = ROTATE((b), 30); \ +#define BODY_32_39(i, a, b, c, d, e, f, xa, xb, xc, xd) \ + do { \ + Xupdate(f, xa, xa, xb, xc, xd); \ + (f) += (e) + K_20_39 + CRYPTO_rotl_u32((a), 5) + F_20_39((b), (c), (d)); \ + (b) = CRYPTO_rotl_u32((b), 30); \ } while (0) -#define BODY_40_59(i, a, b, c, d, e, f, xa, xb, xc, xd) \ - do { \ - Xupdate(f, xa, xa, xb, xc, xd); \ - (f) += (e) + K_40_59 + ROTATE((a), 5) + F_40_59((b), (c), (d)); \ - (b) = ROTATE((b), 30); \ +#define BODY_40_59(i, a, b, c, d, e, f, xa, xb, xc, xd) \ + do { \ + Xupdate(f, xa, xa, xb, xc, xd); \ + (f) += (e) + K_40_59 + CRYPTO_rotl_u32((a), 5) + F_40_59((b), (c), (d)); \ + (b) = CRYPTO_rotl_u32((b), 30); \ } while (0) -#define BODY_60_79(i, a, b, c, d, e, f, xa, xb, xc, xd) \ - do { \ - Xupdate(f, xa, xa, xb, xc, xd); \ - (f) = (xa) + (e) + K_60_79 + ROTATE((a), 5) + F_60_79((b), (c), (d)); \ - (b) = ROTATE((b), 30); \ +#define BODY_60_79(i, a, b, c, d, e, f, xa, xb, xc, xd) \ + do { \ + Xupdate(f, xa, xa, xb, xc, xd); \ + (f) = (xa) + (e) + K_60_79 + CRYPTO_rotl_u32((a), 5) + \ + F_60_79((b), (c), (d)); \ + (b) = CRYPTO_rotl_u32((b), 30); \ } while (0) #ifdef X @@ -338,7 +339,6 @@ static void sha1_block_data_order(uint32_t *state, const uint8_t *data, } #endif -#undef ROTATE #undef Xupdate #undef K_00_19 #undef K_20_39 diff --git a/crypto/fipsmodule/sha/sha256.c b/crypto/fipsmodule/sha/sha256.c index 4394f4aa0..c187c4a18 100644 --- a/crypto/fipsmodule/sha/sha256.c +++ b/crypto/fipsmodule/sha/sha256.c @@ -184,15 +184,17 @@ static const uint32_t K256[64] = { 0x682e6ff3UL, 0x748f82eeUL, 0x78a5636fUL, 0x84c87814UL, 0x8cc70208UL, 0x90befffaUL, 0xa4506cebUL, 0xbef9a3f7UL, 0xc67178f2UL}; -#define ROTATE(a, n) (((a) << (n)) | ((a) >> (32 - (n)))) - -// FIPS specification refers to right rotations, while our ROTATE macro -// is left one. This is why you might notice that rotation coefficients -// differ from those observed in FIPS document by 32-N... -#define Sigma0(x) (ROTATE((x), 30) ^ ROTATE((x), 19) ^ ROTATE((x), 10)) -#define Sigma1(x) (ROTATE((x), 26) ^ ROTATE((x), 21) ^ ROTATE((x), 7)) -#define sigma0(x) (ROTATE((x), 25) ^ ROTATE((x), 14) ^ ((x) >> 3)) -#define sigma1(x) (ROTATE((x), 15) ^ ROTATE((x), 13) ^ ((x) >> 10)) +// See FIPS 180-4, section 4.1.2. +#define Sigma0(x) \ + (CRYPTO_rotr_u32((x), 2) ^ CRYPTO_rotr_u32((x), 13) ^ \ + CRYPTO_rotr_u32((x), 22)) +#define Sigma1(x) \ + (CRYPTO_rotr_u32((x), 6) ^ CRYPTO_rotr_u32((x), 11) ^ \ + CRYPTO_rotr_u32((x), 25)) +#define sigma0(x) \ + (CRYPTO_rotr_u32((x), 7) ^ CRYPTO_rotr_u32((x), 18) ^ ((x) >> 3)) +#define sigma1(x) \ + (CRYPTO_rotr_u32((x), 17) ^ CRYPTO_rotr_u32((x), 19) ^ ((x) >> 10)) #define Ch(x, y, z) (((x) & (y)) ^ ((~(x)) & (z))) #define Maj(x, y, z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) @@ -309,7 +311,6 @@ void SHA256_TransformBlocks(uint32_t state[8], const uint8_t *data, sha256_block_data_order(state, data, num_blocks); } -#undef ROTATE #undef Sigma0 #undef Sigma1 #undef sigma0 diff --git a/crypto/fipsmodule/sha/sha512.c b/crypto/fipsmodule/sha/sha512.c index befdd5251..d94de284c 100644 --- a/crypto/fipsmodule/sha/sha512.c +++ b/crypto/fipsmodule/sha/sha512.c @@ -321,42 +321,16 @@ static const uint64_t K512[80] = { UINT64_C(0x5fcb6fab3ad6faec), UINT64_C(0x6c44198c4a475817), }; -#if defined(__GNUC__) && __GNUC__ >= 2 && !defined(OPENSSL_NO_ASM) -#if defined(__x86_64) || defined(__x86_64__) -#define ROTR(a, n) \ - ({ \ - uint64_t ret; \ - __asm__("rorq %1, %0" : "=r"(ret) : "J"(n), "0"(a) : "cc"); \ - ret; \ - }) -#elif(defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64) -#define ROTR(a, n) \ - ({ \ - uint64_t ret; \ - __asm__("rotrdi %0, %1, %2" : "=r"(ret) : "r"(a), "K"(n)); \ - ret; \ - }) -#elif defined(__aarch64__) -#define ROTR(a, n) \ - ({ \ - uint64_t ret; \ - __asm__("ror %0, %1, %2" : "=r"(ret) : "r"(a), "I"(n)); \ - ret; \ - }) -#endif -#elif defined(_MSC_VER) && defined(_WIN64) -#pragma intrinsic(_rotr64) -#define ROTR(a, n) _rotr64((a), n) -#endif - -#ifndef ROTR -#define ROTR(x, s) (((x) >> s) | (x) << (64 - s)) -#endif - -#define Sigma0(x) (ROTR((x), 28) ^ ROTR((x), 34) ^ ROTR((x), 39)) -#define Sigma1(x) (ROTR((x), 14) ^ ROTR((x), 18) ^ ROTR((x), 41)) -#define sigma0(x) (ROTR((x), 1) ^ ROTR((x), 8) ^ ((x) >> 7)) -#define sigma1(x) (ROTR((x), 19) ^ ROTR((x), 61) ^ ((x) >> 6)) +#define Sigma0(x) \ + (CRYPTO_rotr_u64((x), 28) ^ CRYPTO_rotr_u64((x), 34) ^ \ + CRYPTO_rotr_u64((x), 39)) +#define Sigma1(x) \ + (CRYPTO_rotr_u64((x), 14) ^ CRYPTO_rotr_u64((x), 18) ^ \ + CRYPTO_rotr_u64((x), 41)) +#define sigma0(x) \ + (CRYPTO_rotr_u64((x), 1) ^ CRYPTO_rotr_u64((x), 8) ^ ((x) >> 7)) +#define sigma1(x) \ + (CRYPTO_rotr_u64((x), 19) ^ CRYPTO_rotr_u64((x), 61) ^ ((x) >> 6)) #define Ch(x, y, z) (((x) & (y)) ^ ((~(x)) & (z))) #define Maj(x, y, z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) @@ -524,7 +498,6 @@ static void sha512_block_data_order(uint64_t *state, const uint8_t *in, #endif // !SHA512_ASM -#undef ROTR #undef Sigma0 #undef Sigma1 #undef sigma0 diff --git a/crypto/internal.h b/crypto/internal.h index 03bb779ca..41c42dd91 100644 --- a/crypto/internal.h +++ b/crypto/internal.h @@ -890,6 +890,45 @@ static inline void CRYPTO_store_word_le(void *out, crypto_word_t v) { } +// Bit rotation functions. +// +// Note these functions use |(-shift) & 31|, etc., because shifting by the bit +// width is undefined. Both Clang and GCC recognize this pattern as a rotation, +// but MSVC does not. Instead, we call MSVC's built-in functions. + +static inline uint32_t CRYPTO_rotl_u32(uint32_t value, int shift) { +#if defined(_MSC_VER) + return _rotl(value, shift); +#else + return (value << shift) | (value >> ((-shift) & 31)); +#endif +} + +static inline uint32_t CRYPTO_rotr_u32(uint32_t value, int shift) { +#if defined(_MSC_VER) + return _rotr(value, shift); +#else + return (value >> shift) | (value << ((-shift) & 31)); +#endif +} + +static inline uint64_t CRYPTO_rotl_u64(uint64_t value, int shift) { +#if defined(_MSC_VER) + return _rotl64(value, shift); +#else + return (value << shift) | (value >> ((-shift) & 63)); +#endif +} + +static inline uint64_t CRYPTO_rotr_u64(uint64_t value, int shift) { +#if defined(_MSC_VER) + return _rotr64(value, shift); +#else + return (value >> shift) | (value << ((-shift) & 63)); +#endif +} + + // FIPS functions. #if defined(BORINGSSL_FIPS) diff --git a/crypto/siphash/siphash.c b/crypto/siphash/siphash.c index f55c3cab4..bb9a0c15a 100644 --- a/crypto/siphash/siphash.c +++ b/crypto/siphash/siphash.c @@ -23,18 +23,18 @@ static void siphash_round(uint64_t v[4]) { v[0] += v[1]; v[2] += v[3]; - v[1] = (v[1] << 13) | (v[1] >> (64 - 13)); - v[3] = (v[3] << 16) | (v[3] >> (64 - 16)); + v[1] = CRYPTO_rotl_u64(v[1], 13); + v[3] = CRYPTO_rotl_u64(v[3], 16); v[1] ^= v[0]; v[3] ^= v[2]; - v[0] = (v[0] << 32) | (v[0] >> 32); + v[0] = CRYPTO_rotl_u64(v[0], 32); v[2] += v[1]; v[0] += v[3]; - v[1] = (v[1] << 17) | (v[1] >> (64 - 17)); - v[3] = (v[3] << 21) | (v[3] >> (64 - 21)); + v[1] = CRYPTO_rotl_u64(v[1], 17); + v[3] = CRYPTO_rotl_u64(v[3], 21); v[1] ^= v[2]; v[3] ^= v[0]; - v[2] = (v[2] << 32) | (v[2] >> 32); + v[2] = CRYPTO_rotl_u64(v[2], 32); } uint64_t SIPHASH_24(const uint64_t key[2], const uint8_t *input, diff --git a/decrepit/cast/cast.c b/decrepit/cast/cast.c index 8fd4e3ab0..dffee5c1f 100644 --- a/decrepit/cast/cast.c +++ b/decrepit/cast/cast.c @@ -84,22 +84,16 @@ void CAST_ecb_encrypt(const uint8_t *in, uint8_t *out, const CAST_KEY *ks, l2n(d[1], out); } -#if defined(OPENSSL_WINDOWS) && defined(_MSC_VER) -#define ROTL(a, n) (_lrotl(a, n)) -#else -#define ROTL(a, n) ((((a) << (n)) | ((a) >> ((-(n))&31))) & 0xffffffffL) -#endif - -#define E_CAST(n, key, L, R, OP1, OP2, OP3) \ - { \ - uint32_t a, b, c, d; \ - t = (key[n * 2] OP1 R) & 0xffffffff; \ - t = ROTL(t, (key[n * 2 + 1])); \ - a = CAST_S_table0[(t >> 8) & 0xff]; \ - b = CAST_S_table1[(t)&0xff]; \ - c = CAST_S_table2[(t >> 24) & 0xff]; \ - d = CAST_S_table3[(t >> 16) & 0xff]; \ - L ^= (((((a OP2 b)&0xffffffffL)OP3 c) & 0xffffffffL)OP1 d) & 0xffffffffL; \ +#define E_CAST(n, key, L, R, OP1, OP2, OP3) \ + { \ + uint32_t a, b, c, d; \ + t = (key[n * 2] OP1 R) & 0xffffffff; \ + t = CRYPTO_rotl_u32(t, (key[n * 2 + 1])); \ + a = CAST_S_table0[(t >> 8) & 0xff]; \ + b = CAST_S_table1[(t)&0xff]; \ + c = CAST_S_table2[(t >> 24) & 0xff]; \ + d = CAST_S_table3[(t >> 16) & 0xff]; \ + L ^= (((((a OP2 b)&0xffffffffL)OP3 c) & 0xffffffffL) OP1 d) & 0xffffffffL; \ } void CAST_encrypt(uint32_t *data, const CAST_KEY *key) { diff --git a/decrepit/ripemd/ripemd.c b/decrepit/ripemd/ripemd.c index 9120cddde..3ae6904f9 100644 --- a/decrepit/ripemd/ripemd.c +++ b/decrepit/ripemd/ripemd.c @@ -112,41 +112,39 @@ int RIPEMD160_Final(uint8_t out[RIPEMD160_DIGEST_LENGTH], RIPEMD160_CTX *c) { #define F4(x, y, z) ((((x) ^ (y)) & (z)) ^ (y)) #define F5(x, y, z) (((~(z)) | (y)) ^ (x)) -#define ROTATE(a, n) (((a) << (n)) | (((a)&0xffffffff) >> (32 - (n)))) - -#define RIP1(a, b, c, d, e, w, s) \ - { \ - a += F1(b, c, d) + X(w); \ - a = ROTATE(a, s) + e; \ - c = ROTATE(c, 10); \ +#define RIP1(a, b, c, d, e, w, s) \ + { \ + a += F1(b, c, d) + X(w); \ + a = CRYPTO_rotl_u32(a, s) + e; \ + c = CRYPTO_rotl_u32(c, 10); \ } #define RIP2(a, b, c, d, e, w, s, K) \ { \ a += F2(b, c, d) + X(w) + K; \ - a = ROTATE(a, s) + e; \ - c = ROTATE(c, 10); \ + a = CRYPTO_rotl_u32(a, s) + e; \ + c = CRYPTO_rotl_u32(c, 10); \ } #define RIP3(a, b, c, d, e, w, s, K) \ { \ a += F3(b, c, d) + X(w) + K; \ - a = ROTATE(a, s) + e; \ - c = ROTATE(c, 10); \ + a = CRYPTO_rotl_u32(a, s) + e; \ + c = CRYPTO_rotl_u32(c, 10); \ } #define RIP4(a, b, c, d, e, w, s, K) \ { \ a += F4(b, c, d) + X(w) + K; \ - a = ROTATE(a, s) + e; \ - c = ROTATE(c, 10); \ + a = CRYPTO_rotl_u32(a, s) + e; \ + c = CRYPTO_rotl_u32(c, 10); \ } #define RIP5(a, b, c, d, e, w, s, K) \ { \ a += F5(b, c, d) + X(w) + K; \ - a = ROTATE(a, s) + e; \ - c = ROTATE(c, 10); \ + a = CRYPTO_rotl_u32(a, s) + e; \ + c = CRYPTO_rotl_u32(c, 10); \ } #define KL0 0x00000000L