boringssl/crypto/fipsmodule/ec/p256-nistz.h

/*
 * Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
 * Copyright (c) 2014, Intel Corporation. All Rights Reserved.
 *
 * Licensed under the OpenSSL license (the "License").  You may not use
 * this file except in compliance with the License.  You can obtain a copy
 * in the file LICENSE in the source distribution or at
 * https://www.openssl.org/source/license.html
 *
 * Originally written by Shay Gueron (1, 2), and Vlad Krasnov (1)
 * (1) Intel Corporation, Israel Development Center, Haifa, Israel
 * (2) University of Haifa, Israel
 *
 * Reference:
 * S.Gueron and V.Krasnov, "Fast Prime Field Elliptic Curve Cryptography with
 *                          256 Bit Primes"
 */

#ifndef OPENSSL_HEADER_EC_P256_X86_64_H
#define OPENSSL_HEADER_EC_P256_X86_64_H

#include <openssl/base.h>

#include <openssl/bn.h>

#include "../bn/internal.h"

#if defined(__cplusplus)
extern "C" {
#endif


#if !defined(OPENSSL_NO_ASM) && \
    (defined(OPENSSL_X86_64) || defined(OPENSSL_AARCH64)) &&   \
    !defined(OPENSSL_SMALL)

// P-256 field operations.
//
// An element mod P in P-256 is represented as a little-endian array of
// |P256_LIMBS| |BN_ULONG|s, spanning the full range of values.
//
// The following functions take fully-reduced inputs mod P and give
// fully-reduced outputs. They may be used in-place.

#define P256_LIMBS (256 / BN_BITS2)

// ecp_nistz256_neg sets |res| to -|a| mod P.
void ecp_nistz256_neg(BN_ULONG res[P256_LIMBS], const BN_ULONG a[P256_LIMBS]);

// ecp_nistz256_mul_mont sets |res| to |a| * |b| * 2^-256 mod P.
void ecp_nistz256_mul_mont(BN_ULONG res[P256_LIMBS],
                           const BN_ULONG a[P256_LIMBS],
                           const BN_ULONG b[P256_LIMBS]);

// ecp_nistz256_sqr_mont sets |res| to |a| * |a| * 2^-256 mod P.
void ecp_nistz256_sqr_mont(BN_ULONG res[P256_LIMBS],
                           const BN_ULONG a[P256_LIMBS]);

// ecp_nistz256_from_mont sets |res| to |in|, converted from Montgomery domain
// by multiplying with 1.
static inline void ecp_nistz256_from_mont(BN_ULONG res[P256_LIMBS],
                                          const BN_ULONG in[P256_LIMBS]) {
  static const BN_ULONG ONE[P256_LIMBS] = { 1 };
  ecp_nistz256_mul_mont(res, in, ONE);
}


// P-256 scalar operations.
//
// The following functions compute modulo N, where N is the order of P-256. They
// take fully-reduced inputs and give fully-reduced outputs.

// ecp_nistz256_ord_mul_mont sets |res| to |a| * |b| where inputs and outputs
// are in Montgomery form. That is, |res| is |a| * |b| * 2^-256 mod N.
void ecp_nistz256_ord_mul_mont(BN_ULONG res[P256_LIMBS],
                               const BN_ULONG a[P256_LIMBS],
                               const BN_ULONG b[P256_LIMBS]);

// ecp_nistz256_ord_sqr_mont sets |res| to |a|^(2*|rep|) where inputs and
// outputs are in Montgomery form. That is, |res| is
// (|a| * 2^-256)^(2*|rep|) * 2^256 mod N.
void ecp_nistz256_ord_sqr_mont(BN_ULONG res[P256_LIMBS],
                               const BN_ULONG a[P256_LIMBS], BN_ULONG rep);

// beeu_mod_inverse_vartime sets out = a^-1 mod p using a Euclidean algorithm.
// Assumption: 0 < a < p < 2^(256) and p is odd.
int beeu_mod_inverse_vartime(BN_ULONG out[P256_LIMBS],
                             const BN_ULONG a[P256_LIMBS],
                             const BN_ULONG p[P256_LIMBS]);


// P-256 point operations.
//
// The following functions may be used in-place. All coordinates are in the
// Montgomery domain.

// A P256_POINT represents a P-256 point in Jacobian coordinates.
typedef struct {
  BN_ULONG X[P256_LIMBS];
  BN_ULONG Y[P256_LIMBS];
  BN_ULONG Z[P256_LIMBS];
} P256_POINT;

// A P256_POINT_AFFINE represents a P-256 point in affine coordinates. Infinity
// is encoded as (0, 0).
typedef struct {
  BN_ULONG X[P256_LIMBS];
  BN_ULONG Y[P256_LIMBS];
} P256_POINT_AFFINE;

// ecp_nistz256_select_w5 sets |*val| to |in_t[index-1]| if 1 <= |index| <= 16
// and all zeros (the point at infinity) if |index| is 0. This is done in
// constant time.
void ecp_nistz256_select_w5(P256_POINT *val, const P256_POINT in_t[16],
                            int index);

// ecp_nistz256_select_w7 sets |*val| to |in_t[index-1]| if 1 <= |index| <= 64
// and all zeros (the point at infinity) if |index| is 0. This is done in
// constant time.
void ecp_nistz256_select_w7(P256_POINT_AFFINE *val,
                            const P256_POINT_AFFINE in_t[64], int index);

// ecp_nistz256_point_double sets |r| to |a| doubled.
void ecp_nistz256_point_double(P256_POINT *r, const P256_POINT *a);

// ecp_nistz256_point_add adds |a| to |b| and places the result in |r|.
void ecp_nistz256_point_add(P256_POINT *r, const P256_POINT *a,
                            const P256_POINT *b);

// ecp_nistz256_point_add_affine adds |a| to |b| and places the result in
// |r|. |a| and |b| must not represent the same point unless they are both
// infinity.
void ecp_nistz256_point_add_affine(P256_POINT *r, const P256_POINT *a,
                                   const P256_POINT_AFFINE *b);

#endif /* !defined(OPENSSL_NO_ASM) && \
          (defined(OPENSSL_X86_64) || defined(OPENSSL_AARCH64)) &&   \
          !defined(OPENSSL_SMALL) */


#if defined(__cplusplus)
}  // extern C++
#endif

#endif  // OPENSSL_HEADER_EC_P256_X86_64_H
acvp: add CMAC-AES support. Change by Dan Janni. Change-Id: I3f059e7b1a822c6f97128ca92a693499a3f7fa8f Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/41984 Commit-Queue: Adam Langley <agl@google.com> Reviewed-by: David Benjamin <davidben@google.com> 5 years ago			`/*`
			`* Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.`
			`* Copyright (c) 2014, Intel Corporation. All Rights Reserved.`
			`*`
			`* Licensed under the OpenSSL license (the "License"). You may not use`
			`* this file except in compliance with the License. You can obtain a copy`
			`* in the file LICENSE in the source distribution or at`
			`* https://www.openssl.org/source/license.html`
			`*`
			`* Originally written by Shay Gueron (1, 2), and Vlad Krasnov (1)`
			`* (1) Intel Corporation, Israel Development Center, Haifa, Israel`
			`* (2) University of Haifa, Israel`
			`*`
			`* Reference:`
			`* S.Gueron and V.Krasnov, "Fast Prime Field Elliptic Curve Cryptography with`
			`* 256 Bit Primes"`
			`*/`

			`#ifndef OPENSSL_HEADER_EC_P256_X86_64_H`
			`#define OPENSSL_HEADER_EC_P256_X86_64_H`

			`#include <openssl/base.h>`

			`#include <openssl/bn.h>`

			`#include "../bn/internal.h"`

			`#if defined(__cplusplus)`
			`extern "C" {`
			`#endif`


P-256 assembly optimisations for Aarch64. The ARMv8 assembly code in this commit is mostly taken from OpenSSL's `ecp_nistz256-armv8.pl` at https://github.com/openssl/openssl/blob/19e277dd19f2897f6a7b7eb236abe46655e575bf/crypto/ec/asm/ecp_nistz256-armv8.pl (see Note 1), adapting it to the implementation in p256-x86_64.c. Most of the assembly functions found in `crypto/fipsmodule/ec/asm/p256-x86_64-asm.pl` required to support that code have their analogous functions in the imported OpenSSL ARMv8 Perl assembly implementation with the exception of the functions: - ecp_nistz256_select_w5 - ecp_nistz256_select_w7 An implementation for these functions was added. Summary of modifications to the imported code: * Renamed to `p256-armv8-asm.pl` * Modified the location of `arm-xlate.pl` and `arm_arch.h` * Replaced the `scatter-gather subroutines` with `select subroutines`. The `select subroutines` are implemented for ARMv8 similarly to their x86_64 counterparts, `ecp_nistz256_select_w5` and `ecp_nistz256_select_w7`. * `ecp_nistz256_add` is removed because it was conflicting during the static build with the function of the same name in p256-nistz.c. The latter calls another assembly function, `ecp_nistz256_point_add`. * `__ecp_nistz256_add` renamed to `__ecp_nistz256_add_to` to avoid the conflict with the function `ecp_nistz256_add` during the static build. * l. 924 `add sp,sp,#256` the calculation of the constant, 32(12-4), is not left for the assembler to perform. Other modifications: `beeu_mod_inverse_vartime()` was implemented for AArch64 in `p256_beeu-armv8-asm.pl` similarly to its implementation in `p256_beeu-x86_64-asm.pl`. * The files containing `p256-x86_64` in their name were renamed to, `p256-nistz` since the functions and tests defined in them are hereby running on ARMv8 as well, if enabled. * Updated `delocate.go` and `delocate.peg` to handle the offset calculation in the assembly instructions. * Regenerated `delocate.peg.go`. Notes: 1- The last commit in the history of the file is in master only, the previous commits are in OpenSSL 3.0.1 2- This change focuses on AArch64 (64-bit architecture of ARMv8). It does not support ARMv4 or ARMv7. Testing the performance on Armv8 platform using -DCMAKE_BUILD_TYPE=Release: Before: ``` Did 2596 ECDH P-256 operations in 1093956us (2373.0 ops/sec) Did 6996 ECDSA P-256 signing operations in 1044630us (6697.1 ops/sec) Did 2970 ECDSA P-256 verify operations in 1084848us (2737.7 ops/sec) ``` After: ``` Did 6699 ECDH P-256 operations in 1091684us (6136.4 ops/sec) Did 20000 ECDSA P-256 signing operations in 1012944us (19744.4 ops/sec) Did 7051 ECDSA P-256 verify operations in 1060000us (6651.9 ops/sec) ``` Change-Id: I9fdef12db365967a9264b5b32c07967b55ea48bd Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/51805 Reviewed-by: Adam Langley <agl@google.com> Commit-Queue: Adam Langley <agl@google.com> 3 years ago			`#if !defined(OPENSSL_NO_ASM) && \`
			`(defined(OPENSSL_X86_64) \|\| defined(OPENSSL_AARCH64)) && \`
acvp: add CMAC-AES support. Change by Dan Janni. Change-Id: I3f059e7b1a822c6f97128ca92a693499a3f7fa8f Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/41984 Commit-Queue: Adam Langley <agl@google.com> Reviewed-by: David Benjamin <davidben@google.com> 5 years ago			`!defined(OPENSSL_SMALL)`

			`// P-256 field operations.`
			`//`
			`// An element mod P in P-256 is represented as a little-endian array of`
			`// \|P256_LIMBS\| \|BN_ULONG\|s, spanning the full range of values.`
			`//`
			`// The following functions take fully-reduced inputs mod P and give`
			`// fully-reduced outputs. They may be used in-place.`

			`#define P256_LIMBS (256 / BN_BITS2)`

			`// ecp_nistz256_neg sets \|res\| to -\|a\| mod P.`
			`void ecp_nistz256_neg(BN_ULONG res[P256_LIMBS], const BN_ULONG a[P256_LIMBS]);`

			`// ecp_nistz256_mul_mont sets \|res\| to \|a\| * \|b\| * 2^-256 mod P.`
			`void ecp_nistz256_mul_mont(BN_ULONG res[P256_LIMBS],`
			`const BN_ULONG a[P256_LIMBS],`
			`const BN_ULONG b[P256_LIMBS]);`

			`// ecp_nistz256_sqr_mont sets \|res\| to \|a\| * \|a\| * 2^-256 mod P.`
			`void ecp_nistz256_sqr_mont(BN_ULONG res[P256_LIMBS],`
			`const BN_ULONG a[P256_LIMBS]);`

			`// ecp_nistz256_from_mont sets \|res\| to \|in\|, converted from Montgomery domain`
			`// by multiplying with 1.`
			`static inline void ecp_nistz256_from_mont(BN_ULONG res[P256_LIMBS],`
			`const BN_ULONG in[P256_LIMBS]) {`
			`static const BN_ULONG ONE[P256_LIMBS] = { 1 };`
			`ecp_nistz256_mul_mont(res, in, ONE);`
			`}`


			`// P-256 scalar operations.`
			`//`
			`// The following functions compute modulo N, where N is the order of P-256. They`
			`// take fully-reduced inputs and give fully-reduced outputs.`

			`// ecp_nistz256_ord_mul_mont sets \|res\| to \|a\| * \|b\| where inputs and outputs`
			`// are in Montgomery form. That is, \|res\| is \|a\| * \|b\| * 2^-256 mod N.`
			`void ecp_nistz256_ord_mul_mont(BN_ULONG res[P256_LIMBS],`
			`const BN_ULONG a[P256_LIMBS],`
			`const BN_ULONG b[P256_LIMBS]);`

			`// ecp_nistz256_ord_sqr_mont sets \|res\| to \|a\|^(2*\|rep\|) where inputs and`
			`// outputs are in Montgomery form. That is, \|res\| is`
			`// (\|a\| * 2^-256)^(2\|rep\|) 2^256 mod N.`
			`void ecp_nistz256_ord_sqr_mont(BN_ULONG res[P256_LIMBS],`
			`const BN_ULONG a[P256_LIMBS], BN_ULONG rep);`

			`// beeu_mod_inverse_vartime sets out = a^-1 mod p using a Euclidean algorithm.`
			`// Assumption: 0 < a < p < 2^(256) and p is odd.`
			`int beeu_mod_inverse_vartime(BN_ULONG out[P256_LIMBS],`
			`const BN_ULONG a[P256_LIMBS],`
			`const BN_ULONG p[P256_LIMBS]);`


			`// P-256 point operations.`
			`//`
			`// The following functions may be used in-place. All coordinates are in the`
			`// Montgomery domain.`

			`// A P256_POINT represents a P-256 point in Jacobian coordinates.`
			`typedef struct {`
			`BN_ULONG X[P256_LIMBS];`
			`BN_ULONG Y[P256_LIMBS];`
			`BN_ULONG Z[P256_LIMBS];`
			`} P256_POINT;`

			`// A P256_POINT_AFFINE represents a P-256 point in affine coordinates. Infinity`
			`// is encoded as (0, 0).`
			`typedef struct {`
			`BN_ULONG X[P256_LIMBS];`
			`BN_ULONG Y[P256_LIMBS];`
			`} P256_POINT_AFFINE;`

			`// ecp_nistz256_select_w5 sets \|*val\| to \|in_t[index-1]\| if 1 <= \|index\| <= 16`
			`// and all zeros (the point at infinity) if \|index\| is 0. This is done in`
			`// constant time.`
			`void ecp_nistz256_select_w5(P256_POINT *val, const P256_POINT in_t[16],`
			`int index);`

			`// ecp_nistz256_select_w7 sets \|*val\| to \|in_t[index-1]\| if 1 <= \|index\| <= 64`
			`// and all zeros (the point at infinity) if \|index\| is 0. This is done in`
			`// constant time.`
			`void ecp_nistz256_select_w7(P256_POINT_AFFINE *val,`
			`const P256_POINT_AFFINE in_t[64], int index);`

			`// ecp_nistz256_point_double sets \|r\| to \|a\| doubled.`
			`void ecp_nistz256_point_double(P256_POINT r, const P256_POINT a);`

			`// ecp_nistz256_point_add adds \|a\| to \|b\| and places the result in \|r\|.`
			`void ecp_nistz256_point_add(P256_POINT r, const P256_POINT a,`
			`const P256_POINT *b);`

			`// ecp_nistz256_point_add_affine adds \|a\| to \|b\| and places the result in`
			`// \|r\|. \|a\| and \|b\| must not represent the same point unless they are both`
			`// infinity.`
			`void ecp_nistz256_point_add_affine(P256_POINT r, const P256_POINT a,`
			`const P256_POINT_AFFINE *b);`

P-256 assembly optimisations for Aarch64. The ARMv8 assembly code in this commit is mostly taken from OpenSSL's `ecp_nistz256-armv8.pl` at https://github.com/openssl/openssl/blob/19e277dd19f2897f6a7b7eb236abe46655e575bf/crypto/ec/asm/ecp_nistz256-armv8.pl (see Note 1), adapting it to the implementation in p256-x86_64.c. Most of the assembly functions found in `crypto/fipsmodule/ec/asm/p256-x86_64-asm.pl` required to support that code have their analogous functions in the imported OpenSSL ARMv8 Perl assembly implementation with the exception of the functions: - ecp_nistz256_select_w5 - ecp_nistz256_select_w7 An implementation for these functions was added. Summary of modifications to the imported code: * Renamed to `p256-armv8-asm.pl` * Modified the location of `arm-xlate.pl` and `arm_arch.h` * Replaced the `scatter-gather subroutines` with `select subroutines`. The `select subroutines` are implemented for ARMv8 similarly to their x86_64 counterparts, `ecp_nistz256_select_w5` and `ecp_nistz256_select_w7`. * `ecp_nistz256_add` is removed because it was conflicting during the static build with the function of the same name in p256-nistz.c. The latter calls another assembly function, `ecp_nistz256_point_add`. * `__ecp_nistz256_add` renamed to `__ecp_nistz256_add_to` to avoid the conflict with the function `ecp_nistz256_add` during the static build. * l. 924 `add sp,sp,#256` the calculation of the constant, 32(12-4), is not left for the assembler to perform. Other modifications: `beeu_mod_inverse_vartime()` was implemented for AArch64 in `p256_beeu-armv8-asm.pl` similarly to its implementation in `p256_beeu-x86_64-asm.pl`. * The files containing `p256-x86_64` in their name were renamed to, `p256-nistz` since the functions and tests defined in them are hereby running on ARMv8 as well, if enabled. * Updated `delocate.go` and `delocate.peg` to handle the offset calculation in the assembly instructions. * Regenerated `delocate.peg.go`. Notes: 1- The last commit in the history of the file is in master only, the previous commits are in OpenSSL 3.0.1 2- This change focuses on AArch64 (64-bit architecture of ARMv8). It does not support ARMv4 or ARMv7. Testing the performance on Armv8 platform using -DCMAKE_BUILD_TYPE=Release: Before: ``` Did 2596 ECDH P-256 operations in 1093956us (2373.0 ops/sec) Did 6996 ECDSA P-256 signing operations in 1044630us (6697.1 ops/sec) Did 2970 ECDSA P-256 verify operations in 1084848us (2737.7 ops/sec) ``` After: ``` Did 6699 ECDH P-256 operations in 1091684us (6136.4 ops/sec) Did 20000 ECDSA P-256 signing operations in 1012944us (19744.4 ops/sec) Did 7051 ECDSA P-256 verify operations in 1060000us (6651.9 ops/sec) ``` Change-Id: I9fdef12db365967a9264b5b32c07967b55ea48bd Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/51805 Reviewed-by: Adam Langley <agl@google.com> Commit-Queue: Adam Langley <agl@google.com> 3 years ago			`#endif /* !defined(OPENSSL_NO_ASM) && \`
			`(defined(OPENSSL_X86_64) \|\| defined(OPENSSL_AARCH64)) && \`
			`!defined(OPENSSL_SMALL) */`
acvp: add CMAC-AES support. Change by Dan Janni. Change-Id: I3f059e7b1a822c6f97128ca92a693499a3f7fa8f Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/41984 Commit-Queue: Adam Langley <agl@google.com> Reviewed-by: David Benjamin <davidben@google.com> 5 years ago

			`#if defined(__cplusplus)`
			`} // extern C++`
			`#endif`

			`#endif // OPENSSL_HEADER_EC_P256_X86_64_H`