From c31fb79cfe35f8b14731bb2598d091b2e1394577 Mon Sep 17 00:00:00 2001 From: David Benjamin Date: Wed, 7 Apr 2021 16:17:47 -0400 Subject: [PATCH] Simplify tls_cbc.c slightly. This removes the now unnecessary virtual calls. Benchmark differences are mostly positive but probably noise. Before: Did 839000 AES-128-CBC-SHA1 (16 bytes) open operations in 2000497us (6.7 MB/sec) Did 623000 AES-128-CBC-SHA1 (256 bytes) open operations in 2000409us (79.7 MB/sec) Did 434000 AES-128-CBC-SHA1 (1350 bytes) open operations in 2002909us (292.5 MB/sec) Did 146000 AES-128-CBC-SHA1 (8192 bytes) open operations in 2000785us (597.8 MB/sec) Did 82000 AES-128-CBC-SHA1 (16384 bytes) open operations in 2014268us (667.0 MB/sec) After: Did 866000 AES-128-CBC-SHA1 (16 bytes) open operations in 2000697us (6.9 MB/sec) [+3.2%] Did 616000 AES-128-CBC-SHA1 (256 bytes) open operations in 2001403us (78.8 MB/sec) [-1.2%] Did 432000 AES-128-CBC-SHA1 (1350 bytes) open operations in 2003898us (291.0 MB/sec) [-0.5%] Did 148000 AES-128-CBC-SHA1 (8192 bytes) open operations in 2006042us (604.4 MB/sec) [+1.1%] Did 83000 AES-128-CBC-SHA1 (16384 bytes) open operations in 2010885us (676.3 MB/sec) [+1.4%] Change-Id: I735e99296ca9a1771518c622b8e7e6979a0d30bc Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/46685 Reviewed-by: Adam Langley --- crypto/cipher_extra/tls_cbc.c | 143 +++++++--------------------------- 1 file changed, 29 insertions(+), 114 deletions(-) diff --git a/crypto/cipher_extra/tls_cbc.c b/crypto/cipher_extra/tls_cbc.c index 5e97a1cd3..f446362dc 100644 --- a/crypto/cipher_extra/tls_cbc.c +++ b/crypto/cipher_extra/tls_cbc.c @@ -193,74 +193,19 @@ void EVP_tls_cbc_copy_mac(uint8_t *out, size_t md_size, const uint8_t *in, *((p)++) = (uint8_t)((n)); \ } while (0) -// u64toBE serialises an unsigned, 64-bit number (n) as eight bytes at (p) in -// big-endian order. The value of p is advanced by eight. -#define u64toBE(n, p) \ - do { \ - *((p)++) = (uint8_t)((n) >> 56); \ - *((p)++) = (uint8_t)((n) >> 48); \ - *((p)++) = (uint8_t)((n) >> 40); \ - *((p)++) = (uint8_t)((n) >> 32); \ - *((p)++) = (uint8_t)((n) >> 24); \ - *((p)++) = (uint8_t)((n) >> 16); \ - *((p)++) = (uint8_t)((n) >> 8); \ - *((p)++) = (uint8_t)((n)); \ - } while (0) - -typedef union { - SHA_CTX sha1; - SHA256_CTX sha256; - SHA512_CTX sha512; -} HASH_CTX; - -static void tls1_sha1_transform(HASH_CTX *ctx, const uint8_t *block) { - SHA1_Transform(&ctx->sha1, block); -} - -static void tls1_sha256_transform(HASH_CTX *ctx, const uint8_t *block) { - SHA256_Transform(&ctx->sha256, block); -} - -static void tls1_sha512_transform(HASH_CTX *ctx, const uint8_t *block) { - SHA512_Transform(&ctx->sha512, block); -} - // These functions serialize the state of a hash and thus perform the standard // "final" operation without adding the padding and length that such a function // typically does. -static void tls1_sha1_final_raw(HASH_CTX *ctx, uint8_t *md_out) { - SHA_CTX *sha1 = &ctx->sha1; - u32toBE(sha1->h[0], md_out); - u32toBE(sha1->h[1], md_out); - u32toBE(sha1->h[2], md_out); - u32toBE(sha1->h[3], md_out); - u32toBE(sha1->h[4], md_out); -} - -static void tls1_sha256_final_raw(HASH_CTX *ctx, uint8_t *md_out) { - SHA256_CTX *sha256 = &ctx->sha256; - for (unsigned i = 0; i < 8; i++) { - u32toBE(sha256->h[i], md_out); - } -} - -static void tls1_sha512_final_raw(HASH_CTX *ctx, uint8_t *md_out) { - SHA512_CTX *sha512 = &ctx->sha512; - for (unsigned i = 0; i < 8; i++) { - u64toBE(sha512->h[i], md_out); - } +static void tls1_sha1_final_raw(SHA_CTX *ctx, uint8_t *md_out) { + u32toBE(ctx->h[0], md_out); + u32toBE(ctx->h[1], md_out); + u32toBE(ctx->h[2], md_out); + u32toBE(ctx->h[3], md_out); + u32toBE(ctx->h[4], md_out); } int EVP_tls_cbc_record_digest_supported(const EVP_MD *md) { - switch (EVP_MD_type(md)) { - case NID_sha1: - case NID_sha256: - case NID_sha384: - return 1; - - default: - return 0; - } + return EVP_MD_type(md) == NID_sha1; } int EVP_tls_cbc_digest_record(const EVP_MD *md, uint8_t *md_out, @@ -269,14 +214,6 @@ int EVP_tls_cbc_digest_record(const EVP_MD *md, uint8_t *md_out, size_t data_plus_mac_plus_padding_size, const uint8_t *mac_secret, unsigned mac_secret_length) { - HASH_CTX md_state; - void (*md_final_raw)(HASH_CTX *ctx, uint8_t *md_out); - void (*md_transform)(HASH_CTX *ctx, const uint8_t *block); - unsigned md_size, md_block_size = 64, md_block_shift = 6; - // md_length_size is the number of bytes in the length field that terminates - // the hash. - unsigned md_length_size = 8; - // Bound the acceptable input so we can forget about many possible overflows // later in this function. This is redundant with the record size limits in // TLS. @@ -285,32 +222,7 @@ int EVP_tls_cbc_digest_record(const EVP_MD *md, uint8_t *md_out, return 0; } - switch (EVP_MD_type(md)) { - case NID_sha1: - SHA1_Init(&md_state.sha1); - md_final_raw = tls1_sha1_final_raw; - md_transform = tls1_sha1_transform; - md_size = SHA_DIGEST_LENGTH; - break; - - case NID_sha256: - SHA256_Init(&md_state.sha256); - md_final_raw = tls1_sha256_final_raw; - md_transform = tls1_sha256_transform; - md_size = SHA256_DIGEST_LENGTH; - break; - - case NID_sha384: - SHA384_Init(&md_state.sha512); - md_final_raw = tls1_sha512_final_raw; - md_transform = tls1_sha512_transform; - md_size = SHA384_DIGEST_LENGTH; - md_block_size = 128; - md_block_shift = 7; - md_length_size = 16; - break; - - default: + if (EVP_MD_type(md) != NID_sha1) { // EVP_tls_cbc_record_digest_supported should have been called first to // check that the hash function is supported. assert(0); @@ -318,6 +230,16 @@ int EVP_tls_cbc_digest_record(const EVP_MD *md, uint8_t *md_out, return 0; } + // TODO(davidben): Further simplify this logic, now that we're down to one + // hash function. + + SHA_CTX md_state; + SHA1_Init(&md_state); + unsigned md_size = SHA_DIGEST_LENGTH, md_block_size = 64, md_block_shift = 6; + // md_length_size is the number of bytes in the length field that terminates + // the hash. + unsigned md_length_size = 8; + assert(md_length_size <= MAX_HASH_BIT_COUNT_BYTES); assert(md_block_size <= MAX_HASH_BLOCK_SIZE); assert(md_block_size == (1u << md_block_shift)); @@ -393,7 +315,7 @@ int EVP_tls_cbc_digest_record(const EVP_MD *md, uint8_t *md_out, hmac_pad[i] ^= 0x36; } - md_transform(&md_state, hmac_pad); + SHA1_Transform(&md_state, hmac_pad); // The length check means |bits| fits in four bytes. uint8_t length_bytes[MAX_HASH_BIT_COUNT_BYTES]; @@ -408,9 +330,9 @@ int EVP_tls_cbc_digest_record(const EVP_MD *md, uint8_t *md_out, uint8_t first_block[MAX_HASH_BLOCK_SIZE]; OPENSSL_memcpy(first_block, header, 13); OPENSSL_memcpy(first_block + 13, data, md_block_size - 13); - md_transform(&md_state, first_block); + SHA1_Transform(&md_state, first_block); for (size_t i = 1; i < k / md_block_size; i++) { - md_transform(&md_state, data + md_block_size * i - 13); + SHA1_Transform(&md_state, data + md_block_size * i - 13); } } @@ -461,32 +383,25 @@ int EVP_tls_cbc_digest_record(const EVP_MD *md, uint8_t *md_out, block[j] = b; } - md_transform(&md_state, block); - md_final_raw(&md_state, block); + SHA1_Transform(&md_state, block); + tls1_sha1_final_raw(&md_state, block); // If this is index_b, copy the hash value to |mac_out|. for (size_t j = 0; j < md_size; j++) { mac_out[j] |= block[j] & is_block_b; } } - EVP_MD_CTX md_ctx; - EVP_MD_CTX_init(&md_ctx); - if (!EVP_DigestInit_ex(&md_ctx, md, NULL /* engine */)) { - EVP_MD_CTX_cleanup(&md_ctx); - return 0; - } + SHA_CTX md_ctx; + SHA1_Init(&md_ctx); // Complete the HMAC in the standard manner. for (size_t i = 0; i < md_block_size; i++) { hmac_pad[i] ^= 0x6a; } - EVP_DigestUpdate(&md_ctx, hmac_pad, md_block_size); - EVP_DigestUpdate(&md_ctx, mac_out, md_size); - unsigned md_out_size_u; - EVP_DigestFinal(&md_ctx, md_out, &md_out_size_u); - *md_out_size = md_out_size_u; - EVP_MD_CTX_cleanup(&md_ctx); - + SHA1_Update(&md_ctx, hmac_pad, md_block_size); + SHA1_Update(&md_ctx, mac_out, md_size); + SHA1_Final(md_out, &md_ctx); + *md_out_size = md_size; return 1; }