diff --git a/crypto/CMakeLists.txt b/crypto/CMakeLists.txt index 68fb65b30..56f9b908b 100644 --- a/crypto/CMakeLists.txt +++ b/crypto/CMakeLists.txt @@ -18,6 +18,8 @@ set( poly1305/poly1305_arm_asm.S ../third_party/fiat/asm/fiat_curve25519_adx_mul.S ../third_party/fiat/asm/fiat_curve25519_adx_square.S + ../third_party/fiat/asm/fiat_p256_adx_mul.S + ../third_party/fiat/asm/fiat_p256_adx_sqr.S ) perlasm(CRYPTO_SOURCES aarch64 chacha/chacha-armv8 chacha/asm/chacha-armv8.pl) perlasm(CRYPTO_SOURCES aarch64 cipher_extra/chacha20_poly1305_armv8 cipher_extra/asm/chacha20_poly1305_armv8.pl) diff --git a/crypto/fipsmodule/ec/p256_test.cc b/crypto/fipsmodule/ec/p256_test.cc new file mode 100644 index 000000000..2af9319b5 --- /dev/null +++ b/crypto/fipsmodule/ec/p256_test.cc @@ -0,0 +1,47 @@ +/* Copyright (c) 2023, Google Inc. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ + +#include +#include "../../internal.h" +#include "../../test/abi_test.h" + +#if !defined(OPENSSL_NO_ASM) && defined(__GNUC__) && defined(__x86_64__) && \ + defined(SUPPORTS_ABI_TEST) +extern "C" { +#include "../../../third_party/fiat/p256_64.h" +} + +TEST(P256Test, AdxMulABI) { + static const uint64_t in1[4] = {0}, in2[4] = {0}; + uint64_t out[4]; + if (CRYPTO_is_BMI1_capable() && CRYPTO_is_BMI2_capable() && + CRYPTO_is_ADX_capable()) { + CHECK_ABI(fiat_p256_adx_mul, out, in1, in2); + } else { + GTEST_SKIP() << "Can't test ABI of ADX code without ADX"; + } +} + +#include +TEST(P256Test, AdxSquareABI) { + static const uint64_t in[4] = {0}; + uint64_t out[4]; + if (CRYPTO_is_BMI1_capable() && CRYPTO_is_BMI2_capable() && + CRYPTO_is_ADX_capable()) { + CHECK_ABI(fiat_p256_adx_sqr, out, in); + } else { + GTEST_SKIP() << "Can't test ABI of ADX code without ADX"; + } +} +#endif diff --git a/sources.cmake b/sources.cmake index d2e15c737..2e7923f7c 100644 --- a/sources.cmake +++ b/sources.cmake @@ -38,6 +38,7 @@ set( crypto/fipsmodule/cmac/cmac_test.cc crypto/fipsmodule/ec/ec_test.cc crypto/fipsmodule/ec/p256-nistz_test.cc + crypto/fipsmodule/ec/p256_test.cc crypto/fipsmodule/ecdsa/ecdsa_test.cc crypto/fipsmodule/hkdf/hkdf_test.cc crypto/fipsmodule/md5/md5_test.cc diff --git a/third_party/fiat/asm/fiat_p256_adx_mul.S b/third_party/fiat/asm/fiat_p256_adx_mul.S new file mode 100644 index 000000000..d7ebd2171 --- /dev/null +++ b/third_party/fiat/asm/fiat_p256_adx_mul.S @@ -0,0 +1,178 @@ +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && \ + (defined(__APPLE__) || defined(__ELF__)) + +.intel_syntax noprefix +.text +#if defined(__APPLE__) +.private_extern _fiat_p256_adx_mul +.global _fiat_p256_adx_mul +_fiat_p256_adx_mul: +#else +.type fiat_p256_adx_mul, @function +.hidden fiat_p256_adx_mul +.global fiat_p256_adx_mul +fiat_p256_adx_mul: +#endif + +.cfi_startproc +_CET_ENDBR +push rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset rbp, -16 +mov rbp, rsp +mov rax, rdx +mov rdx, [ rsi + 0x0 ] +test al, al +mulx r8, rcx, [ rax + 0x0 ] +mov [ rsp - 0x80 ], rbx +.cfi_offset rbx, -16-0x80 +mulx rbx, r9, [ rax + 0x8 ] +mov [ rsp - 0x68 ], r14 +.cfi_offset r14, -16-0x68 +adc r9, r8 +mov [ rsp - 0x60 ], r15 +.cfi_offset r15, -16-0x60 +mulx r15, r14, [ rax + 0x10 ] +mov [ rsp - 0x78 ], r12 +.cfi_offset r12, -16-0x78 +adc r14, rbx +mulx r11, r10, [ rax + 0x18 ] +mov [ rsp - 0x70 ], r13 +.cfi_offset r13, -16-0x70 +adc r10, r15 +mov rdx, [ rsi + 0x8 ] +mulx rbx, r8, [ rax + 0x0 ] +adc r11, 0x0 +xor r15, r15 +adcx r8, r9 +adox rbx, r14 +mov [ rsp - 0x58 ], rdi +mulx rdi, r9, [ rax + 0x8 ] +adcx r9, rbx +adox rdi, r10 +mulx rbx, r14, [ rax + 0x10 ] +adcx r14, rdi +adox rbx, r11 +mulx r13, r12, [ rax + 0x18 ] +adcx r12, rbx +mov rdx, 0x100000000 +mulx r11, r10, rcx +adox r13, r15 +adcx r13, r15 +xor rdi, rdi +adox r10, r8 +mulx r8, rbx, r10 +adox r11, r9 +adcx rbx, r11 +adox r8, r14 +mov rdx, 0xffffffff00000001 +mulx r9, r15, rcx +adcx r15, r8 +adox r9, r12 +mulx r14, rcx, r10 +mov rdx, [ rsi + 0x10 ] +mulx r10, r12, [ rax + 0x8 ] +adcx rcx, r9 +adox r14, r13 +mulx r11, r13, [ rax + 0x0 ] +mov r9, rdi +adcx r14, r9 +adox rdi, rdi +adc rdi, 0x0 +xor r9, r9 +adcx r13, rbx +adox r11, r15 +mov rdx, [ rsi + 0x10 ] +mulx r15, r8, [ rax + 0x10 ] +adox r10, rcx +mulx rcx, rbx, [ rax + 0x18 ] +mov rdx, [ rsi + 0x18 ] +adcx r12, r11 +mulx rsi, r11, [ rax + 0x8 ] +adcx r8, r10 +adox r15, r14 +adcx rbx, r15 +adox rcx, r9 +adcx rcx, r9 +mulx r15, r10, [ rax + 0x0 ] +add rcx, rdi +mov r14, r9 +adc r14, 0 +xor r9, r9 +adcx r10, r12 +adox r15, r8 +adcx r11, r15 +adox rsi, rbx +mulx r8, r12, [ rax + 0x10 ] +adox r8, rcx +mulx rcx, rbx, [ rax + 0x18 ] +adcx r12, rsi +adox rcx, r9 +mov rdx, 0x100000000 +adcx rbx, r8 +adc rcx, 0 +mulx rdi, r15, r13 +xor rax, rax +adcx rcx, r14 +adc rax, 0 +xor r9, r9 +adox r15, r10 +mulx r14, r10, r15 +adox rdi, r11 +mov rdx, 0xffffffff00000001 +adox r14, r12 +adcx r10, rdi +mulx r12, r11, r13 +adcx r11, r14 +adox r12, rbx +mulx rbx, r13, r15 +adcx r13, r12 +adox rbx, rcx +mov r8, r9 +adox rax, r9 +adcx r8, rbx +adc rax, 0x0 +mov rcx, rax +mov r15, 0xffffffffffffffff +mov rdi, r10 +sub rdi, r15 +mov r14, 0xffffffff +mov r12, r11 +sbb r12, r14 +mov rbx, r13 +sbb rbx, r9 +mov rax, rax +mov rax, r8 +sbb rax, rdx +sbb rcx, r9 +cmovc rdi, r10 +mov r10, [ rsp - 0x58 ] +cmovc rbx, r13 +mov r13, [ rsp - 0x70 ] +.cfi_restore r13 +cmovc r12, r11 +cmovc rax, r8 +mov [ r10 + 0x10 ], rbx +mov rbx, [ rsp - 0x80 ] +.cfi_restore rbx +mov [ r10 + 0x0 ], rdi +mov [ r10 + 0x8 ], r12 +mov [ r10 + 0x18 ], rax +mov r12, [ rsp - 0x78 ] +.cfi_restore r12 +mov r14, [ rsp - 0x68 ] +.cfi_restore r14 +mov r15, [ rsp - 0x60 ] +.cfi_restore r15 +pop rbp +.cfi_restore rbp +.cfi_adjust_cfa_offset -8 +ret +.cfi_endproc +#if defined(__ELF__) +.size fiat_p256_adx_mul, .-fiat_p256_adx_mul +#endif + +#endif diff --git a/third_party/fiat/asm/fiat_p256_adx_sqr.S b/third_party/fiat/asm/fiat_p256_adx_sqr.S new file mode 100644 index 000000000..cca269f52 --- /dev/null +++ b/third_party/fiat/asm/fiat_p256_adx_sqr.S @@ -0,0 +1,167 @@ +#include + +#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && \ + (defined(__APPLE__) || defined(__ELF__)) + +.intel_syntax noprefix +.text +#if defined(__APPLE__) +.private_extern _fiat_p256_adx_sqr +.global _fiat_p256_adx_sqr +_fiat_p256_adx_sqr: +#else +.type fiat_p256_adx_sqr, @function +.hidden fiat_p256_adx_sqr +.global fiat_p256_adx_sqr +fiat_p256_adx_sqr: +#endif + +.cfi_startproc +_CET_ENDBR +push rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset rbp, -16 +mov rbp, rsp +mov rdx, [ rsi + 0x0 ] +mulx r10, rax, [ rsi + 0x18 ] +mulx rcx, r11, rdx +mulx r9, r8, [ rsi + 0x8 ] +mov [ rsp - 0x80 ], rbx +.cfi_offset rbx, -16-0x80 +xor rbx, rbx +adox r8, r8 +mov [ rsp - 0x78 ], r12 +.cfi_offset r12, -16-0x78 +mulx r12, rbx, [ rsi + 0x10 ] +mov rdx, [ rsi + 0x8 ] +mov [ rsp - 0x70 ], r13 +.cfi_offset r13, -16-0x70 +mov [ rsp - 0x68 ], r14 +.cfi_offset r14, -16-0x68 +mulx r14, r13, rdx +mov [ rsp - 0x60 ], r15 +.cfi_offset r15, -16-0x60 +mov [ rsp - 0x58 ], rdi +mulx rdi, r15, [ rsi + 0x10 ] +adcx r12, r15 +mov [ rsp - 0x50 ], r11 +mulx r11, r15, [ rsi + 0x18 ] +adcx r10, rdi +mov rdi, 0x0 +adcx r11, rdi +clc +adcx rbx, r9 +adox rbx, rbx +adcx rax, r12 +adox rax, rax +adcx r15, r10 +adox r15, r15 +mov rdx, [ rsi + 0x10 ] +mulx r12, r9, [ rsi + 0x18 ] +adcx r9, r11 +adcx r12, rdi +mulx r11, r10, rdx +clc +adcx rcx, r8 +adcx r13, rbx +adcx r14, rax +adox r9, r9 +adcx r10, r15 +mov rdx, [ rsi + 0x18 ] +mulx rbx, r8, rdx +adox r12, r12 +adcx r11, r9 +mov rsi, [ rsp - 0x50 ] +adcx r8, r12 +mov rax, 0x100000000 +mov rdx, rax +mulx r15, rax, rsi +adcx rbx, rdi +adox rbx, rdi +xor r9, r9 +adox rax, rcx +adox r15, r13 +mulx rcx, rdi, rax +adcx rdi, r15 +adox rcx, r14 +mov rdx, 0xffffffff00000001 +mulx r14, r13, rsi +adox r14, r10 +adcx r13, rcx +mulx r12, r10, rax +adox r12, r11 +mov r11, r9 +adox r11, r8 +adcx r10, r14 +mov r8, r9 +adcx r8, r12 +mov rax, r9 +adcx rax, r11 +mov r15, r9 +adox r15, rbx +mov rdx, 0x100000000 +mulx rcx, rbx, rdi +mov r14, r9 +adcx r14, r15 +mov r12, r9 +adox r12, r12 +adcx r12, r9 +adox rbx, r13 +mulx r11, r13, rbx +mov r15, 0xffffffff00000001 +mov rdx, r15 +mulx rsi, r15, rbx +adox rcx, r10 +adox r11, r8 +mulx r8, r10, rdi +adcx r13, rcx +adox r8, rax +adcx r10, r11 +adox rsi, r14 +mov rdi, r12 +mov rax, r9 +adox rdi, rax +adcx r15, r8 +mov r14, rax +adcx r14, rsi +adcx rdi, r9 +dec r9 +mov rbx, r13 +sub rbx, r9 +mov rcx, 0xffffffff +mov r11, r10 +sbb r11, rcx +mov r8, r15 +sbb r8, rax +mov rsi, r14 +sbb rsi, rdx +sbb rdi, rax +cmovc rbx, r13 +cmovc r8, r15 +cmovc r11, r10 +cmovc rsi, r14 +mov rdi, [ rsp - 0x58 ] +mov [ rdi + 0x18 ], rsi +mov [ rdi + 0x0 ], rbx +mov [ rdi + 0x8 ], r11 +mov [ rdi + 0x10 ], r8 +mov rbx, [ rsp - 0x80 ] +.cfi_restore rbx +mov r12, [ rsp - 0x78 ] +.cfi_restore r12 +mov r13, [ rsp - 0x70 ] +.cfi_restore r13 +mov r14, [ rsp - 0x68 ] +.cfi_restore r14 +mov r15, [ rsp - 0x60 ] +.cfi_restore r15 +pop rbp +.cfi_restore rbp +.cfi_adjust_cfa_offset -8 +ret +.cfi_endproc +#if defined(__ELF__) +.size fiat_p256_adx_sqr, .-fiat_p256_adx_sqr +#endif + +#endif diff --git a/third_party/fiat/p256_64.h b/third_party/fiat/p256_64.h index c77263843..a691407b6 100644 --- a/third_party/fiat/p256_64.h +++ b/third_party/fiat/p256_64.h @@ -1,3 +1,9 @@ +#include "../../crypto/internal.h" +#if !defined(OPENSSL_NO_ASM) && defined(__GNUC__) && defined(__x86_64__) +void fiat_p256_adx_mul(uint64_t*, const uint64_t*, const uint64_t*); +void fiat_p256_adx_sqr(uint64_t*, const uint64_t*); +#endif + /* Autogenerated: 'src/ExtractionOCaml/word_by_word_montgomery' --inline --static --use-value-barrier p256 64 '2^256 - 2^224 + 2^192 + 2^96 - 1' mul square add sub opp from_montgomery to_montgomery nonzero selectznz to_bytes from_bytes one msat divstep divstep_precomp */ /* curve description: p256 */ /* machine_wordsize = 64 (from "64") */ @@ -165,6 +171,13 @@ static FIAT_P256_FIAT_INLINE void fiat_p256_cmovznz_u64(uint64_t* out1, fiat_p25 * */ static FIAT_P256_FIAT_INLINE void fiat_p256_mul(fiat_p256_montgomery_domain_field_element out1, const fiat_p256_montgomery_domain_field_element arg1, const fiat_p256_montgomery_domain_field_element arg2) { +#if !defined(OPENSSL_NO_ASM) && defined(__GNUC__) && defined(__x86_64__) + if (CRYPTO_is_BMI1_capable() && CRYPTO_is_BMI2_capable() && + CRYPTO_is_ADX_capable()) { + fiat_p256_adx_mul(out1, arg1, arg2); + return; + } +#endif uint64_t x1; uint64_t x2; uint64_t x3; @@ -472,6 +485,13 @@ static FIAT_P256_FIAT_INLINE void fiat_p256_mul(fiat_p256_montgomery_domain_fiel * */ static FIAT_P256_FIAT_INLINE void fiat_p256_square(fiat_p256_montgomery_domain_field_element out1, const fiat_p256_montgomery_domain_field_element arg1) { +#if !defined(OPENSSL_NO_ASM) && defined(__GNUC__) && defined(__x86_64__) + if (CRYPTO_is_BMI1_capable() && CRYPTO_is_BMI2_capable() && + CRYPTO_is_ADX_capable()) { + fiat_p256_adx_sqr(out1, arg1); + return; + } +#endif uint64_t x1; uint64_t x2; uint64_t x3;