|
|
|
@ -27,22 +27,6 @@ |
|
|
|
|
|
|
|
|
|
#include <emmintrin.h> |
|
|
|
|
|
|
|
|
|
static uint32_t load_u32_le(const uint8_t in[4]) { |
|
|
|
|
uint32_t ret; |
|
|
|
|
OPENSSL_memcpy(&ret, in, 4); |
|
|
|
|
return ret; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static uint64_t load_u64_le(const uint8_t in[8]) { |
|
|
|
|
uint64_t ret; |
|
|
|
|
OPENSSL_memcpy(&ret, in, 8); |
|
|
|
|
return ret; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static void store_u64_le(uint8_t out[8], uint64_t v) { |
|
|
|
|
OPENSSL_memcpy(out, &v, 8); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
typedef __m128i xmmi; |
|
|
|
|
|
|
|
|
|
static const alignas(16) uint32_t poly1305_x64_sse2_message_mask[4] = { |
|
|
|
@ -112,8 +96,8 @@ void CRYPTO_poly1305_init(poly1305_state *state, const uint8_t key[32]) { |
|
|
|
|
uint64_t t0, t1; |
|
|
|
|
|
|
|
|
|
// clamp key
|
|
|
|
|
t0 = load_u64_le(key + 0); |
|
|
|
|
t1 = load_u64_le(key + 8); |
|
|
|
|
t0 = CRYPTO_load_u64_le(key + 0); |
|
|
|
|
t1 = CRYPTO_load_u64_le(key + 8); |
|
|
|
|
r0 = t0 & 0xffc0fffffff; |
|
|
|
|
t0 >>= 44; |
|
|
|
|
t0 |= t1 << 20; |
|
|
|
@ -131,10 +115,10 @@ void CRYPTO_poly1305_init(poly1305_state *state, const uint8_t key[32]) { |
|
|
|
|
p->R22.d[3] = (uint32_t)(r2 >> 32); |
|
|
|
|
|
|
|
|
|
// store pad
|
|
|
|
|
p->R23.d[1] = load_u32_le(key + 16); |
|
|
|
|
p->R23.d[3] = load_u32_le(key + 20); |
|
|
|
|
p->R24.d[1] = load_u32_le(key + 24); |
|
|
|
|
p->R24.d[3] = load_u32_le(key + 28); |
|
|
|
|
p->R23.d[1] = CRYPTO_load_u32_le(key + 16); |
|
|
|
|
p->R23.d[3] = CRYPTO_load_u32_le(key + 20); |
|
|
|
|
p->R24.d[1] = CRYPTO_load_u32_le(key + 24); |
|
|
|
|
p->R24.d[3] = CRYPTO_load_u32_le(key + 28); |
|
|
|
|
|
|
|
|
|
// H = 0
|
|
|
|
|
st->H[0] = _mm_setzero_si128(); |
|
|
|
@ -766,8 +750,8 @@ void CRYPTO_poly1305_finish(poly1305_state *state, uint8_t mac[16]) { |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
poly1305_donna_atleast16bytes: |
|
|
|
|
t0 = load_u64_le(m + 0); |
|
|
|
|
t1 = load_u64_le(m + 8); |
|
|
|
|
t0 = CRYPTO_load_u64_le(m + 0); |
|
|
|
|
t1 = CRYPTO_load_u64_le(m + 8); |
|
|
|
|
h0 += t0 & 0xfffffffffff; |
|
|
|
|
t0 = shr128_pair(t1, t0, 44); |
|
|
|
|
h1 += t0 & 0xfffffffffff; |
|
|
|
@ -806,8 +790,8 @@ poly1305_donna_atmost15bytes: |
|
|
|
|
OPENSSL_memset(m + leftover, 0, 16 - leftover); |
|
|
|
|
leftover = 16; |
|
|
|
|
|
|
|
|
|
t0 = load_u64_le(m + 0); |
|
|
|
|
t1 = load_u64_le(m + 8); |
|
|
|
|
t0 = CRYPTO_load_u64_le(m + 0); |
|
|
|
|
t1 = CRYPTO_load_u64_le(m + 8); |
|
|
|
|
h0 += t0 & 0xfffffffffff; |
|
|
|
|
t0 = shr128_pair(t1, t0, 44); |
|
|
|
|
h1 += t0 & 0xfffffffffff; |
|
|
|
@ -853,8 +837,8 @@ poly1305_donna_finish: |
|
|
|
|
t1 = (t1 >> 24); |
|
|
|
|
h2 += (t1)+c; |
|
|
|
|
|
|
|
|
|
store_u64_le(mac + 0, ((h0) | (h1 << 44))); |
|
|
|
|
store_u64_le(mac + 8, ((h1 >> 20) | (h2 << 24))); |
|
|
|
|
CRYPTO_store_u64_le(mac + 0, ((h0) | (h1 << 44))); |
|
|
|
|
CRYPTO_store_u64_le(mac + 8, ((h1 >> 20) | (h2 << 24))); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#endif // BORINGSSL_HAS_UINT128 && OPENSSL_X86_64
|
|
|
|
|