Mirror of BoringSSL (grpc依赖) https://boringssl.googlesource.com/boringssl
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

233 lines
5.6 KiB

/* Copyright (c) 2018, Google Inc.
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
package main
import (
"crypto/aes"
"crypto/cipher"
"crypto/elliptic"
"crypto/rand"
"fmt"
"io"
"math/big"
)
var (
p256 elliptic.Curve
zero, one, p, R, Rinv *big.Int
deterministicRand io.Reader
)
type coordinates int
const (
affine coordinates = iota
jacobian
)
func init() {
p256 = elliptic.P256()
zero = new(big.Int)
one = new(big.Int).SetInt64(1)
p = p256.Params().P
R = new(big.Int)
R.SetBit(R, 256, 1)
R.Mod(R, p)
Rinv = new(big.Int).ModInverse(R, p)
deterministicRand = newDeterministicRand()
}
func modMul(z, x, y *big.Int) *big.Int {
z.Mul(x, y)
return z.Mod(z, p)
}
func toMontgomery(z, x *big.Int) *big.Int {
return modMul(z, x, R)
}
func fromMontgomery(z, x *big.Int) *big.Int {
return modMul(z, x, Rinv)
}
func isAffineInfinity(x, y *big.Int) bool {
// Infinity, in affine coordinates, is represented as (0, 0) by
P-256 assembly optimisations for Aarch64. The ARMv8 assembly code in this commit is mostly taken from OpenSSL's `ecp_nistz256-armv8.pl` at https://github.com/openssl/openssl/blob/19e277dd19f2897f6a7b7eb236abe46655e575bf/crypto/ec/asm/ecp_nistz256-armv8.pl (see Note 1), adapting it to the implementation in p256-x86_64.c. Most of the assembly functions found in `crypto/fipsmodule/ec/asm/p256-x86_64-asm.pl` required to support that code have their analogous functions in the imported OpenSSL ARMv8 Perl assembly implementation with the exception of the functions: - ecp_nistz256_select_w5 - ecp_nistz256_select_w7 An implementation for these functions was added. Summary of modifications to the imported code: * Renamed to `p256-armv8-asm.pl` * Modified the location of `arm-xlate.pl` and `arm_arch.h` * Replaced the `scatter-gather subroutines` with `select subroutines`. The `select subroutines` are implemented for ARMv8 similarly to their x86_64 counterparts, `ecp_nistz256_select_w5` and `ecp_nistz256_select_w7`. * `ecp_nistz256_add` is removed because it was conflicting during the static build with the function of the same name in p256-nistz.c. The latter calls another assembly function, `ecp_nistz256_point_add`. * `__ecp_nistz256_add` renamed to `__ecp_nistz256_add_to` to avoid the conflict with the function `ecp_nistz256_add` during the static build. * l. 924 `add sp,sp,#256` the calculation of the constant, 32*(12-4), is not left for the assembler to perform. Other modifications: * `beeu_mod_inverse_vartime()` was implemented for AArch64 in `p256_beeu-armv8-asm.pl` similarly to its implementation in `p256_beeu-x86_64-asm.pl`. * The files containing `p256-x86_64` in their name were renamed to, `p256-nistz` since the functions and tests defined in them are hereby running on ARMv8 as well, if enabled. * Updated `delocate.go` and `delocate.peg` to handle the offset calculation in the assembly instructions. * Regenerated `delocate.peg.go`. Notes: 1- The last commit in the history of the file is in master only, the previous commits are in OpenSSL 3.0.1 2- This change focuses on AArch64 (64-bit architecture of ARMv8). It does not support ARMv4 or ARMv7. Testing the performance on Armv8 platform using -DCMAKE_BUILD_TYPE=Release: Before: ``` Did 2596 ECDH P-256 operations in 1093956us (2373.0 ops/sec) Did 6996 ECDSA P-256 signing operations in 1044630us (6697.1 ops/sec) Did 2970 ECDSA P-256 verify operations in 1084848us (2737.7 ops/sec) ``` After: ``` Did 6699 ECDH P-256 operations in 1091684us (6136.4 ops/sec) Did 20000 ECDSA P-256 signing operations in 1012944us (19744.4 ops/sec) Did 7051 ECDSA P-256 verify operations in 1060000us (6651.9 ops/sec) ``` Change-Id: I9fdef12db365967a9264b5b32c07967b55ea48bd Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/51805 Reviewed-by: Adam Langley <agl@google.com> Commit-Queue: Adam Langley <agl@google.com>
3 years ago
// both Go, p256-x86_64-asm.pl and p256-armv8-asm.pl.
return x.Sign() == 0 && y.Sign() == 0
}
func randNonZeroInt(max *big.Int) *big.Int {
for {
r, err := rand.Int(deterministicRand, max)
if err != nil {
panic(err)
}
if r.Sign() != 0 {
return r
}
}
}
func randPoint() (x, y *big.Int) {
k := randNonZeroInt(p256.Params().N)
return p256.ScalarBaseMult(k.Bytes())
}
func toJacobian(xIn, yIn *big.Int) (x, y, z *big.Int) {
if isAffineInfinity(xIn, yIn) {
// The Jacobian representation of infinity has Z = 0. Depending
// on the implementation, X and Y may be further constrained.
// Generalizing the curve equation to Jacobian coordinates for
// non-zero Z gives:
//
// y² = x³ - 3x + b, where x = X/Z² and y = Y/Z³
// Y² = X³ + aXZ⁴ + bZ⁶
//
// Taking that formula at Z = 0 gives Y² = X³. This constraint
// allows removing a special case in the point-on-curve check.
//
// BoringSSL, however, historically generated infinities with
// arbitrary X and Y and include the special case. We also have
// not verified that add and double preserve this
// property. Thus, generate test vectors with unrelated X and Y,
P-256 assembly optimisations for Aarch64. The ARMv8 assembly code in this commit is mostly taken from OpenSSL's `ecp_nistz256-armv8.pl` at https://github.com/openssl/openssl/blob/19e277dd19f2897f6a7b7eb236abe46655e575bf/crypto/ec/asm/ecp_nistz256-armv8.pl (see Note 1), adapting it to the implementation in p256-x86_64.c. Most of the assembly functions found in `crypto/fipsmodule/ec/asm/p256-x86_64-asm.pl` required to support that code have their analogous functions in the imported OpenSSL ARMv8 Perl assembly implementation with the exception of the functions: - ecp_nistz256_select_w5 - ecp_nistz256_select_w7 An implementation for these functions was added. Summary of modifications to the imported code: * Renamed to `p256-armv8-asm.pl` * Modified the location of `arm-xlate.pl` and `arm_arch.h` * Replaced the `scatter-gather subroutines` with `select subroutines`. The `select subroutines` are implemented for ARMv8 similarly to their x86_64 counterparts, `ecp_nistz256_select_w5` and `ecp_nistz256_select_w7`. * `ecp_nistz256_add` is removed because it was conflicting during the static build with the function of the same name in p256-nistz.c. The latter calls another assembly function, `ecp_nistz256_point_add`. * `__ecp_nistz256_add` renamed to `__ecp_nistz256_add_to` to avoid the conflict with the function `ecp_nistz256_add` during the static build. * l. 924 `add sp,sp,#256` the calculation of the constant, 32*(12-4), is not left for the assembler to perform. Other modifications: * `beeu_mod_inverse_vartime()` was implemented for AArch64 in `p256_beeu-armv8-asm.pl` similarly to its implementation in `p256_beeu-x86_64-asm.pl`. * The files containing `p256-x86_64` in their name were renamed to, `p256-nistz` since the functions and tests defined in them are hereby running on ARMv8 as well, if enabled. * Updated `delocate.go` and `delocate.peg` to handle the offset calculation in the assembly instructions. * Regenerated `delocate.peg.go`. Notes: 1- The last commit in the history of the file is in master only, the previous commits are in OpenSSL 3.0.1 2- This change focuses on AArch64 (64-bit architecture of ARMv8). It does not support ARMv4 or ARMv7. Testing the performance on Armv8 platform using -DCMAKE_BUILD_TYPE=Release: Before: ``` Did 2596 ECDH P-256 operations in 1093956us (2373.0 ops/sec) Did 6996 ECDSA P-256 signing operations in 1044630us (6697.1 ops/sec) Did 2970 ECDSA P-256 verify operations in 1084848us (2737.7 ops/sec) ``` After: ``` Did 6699 ECDH P-256 operations in 1091684us (6136.4 ops/sec) Did 20000 ECDSA P-256 signing operations in 1012944us (19744.4 ops/sec) Did 7051 ECDSA P-256 verify operations in 1060000us (6651.9 ops/sec) ``` Change-Id: I9fdef12db365967a9264b5b32c07967b55ea48bd Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/51805 Reviewed-by: Adam Langley <agl@google.com> Commit-Queue: Adam Langley <agl@google.com>
3 years ago
// to test that p256-x86_64-asm.pl and p256-armv8-asm.pl correctly
// handle unconstrained representations of infinity.
x = randNonZeroInt(p)
y = randNonZeroInt(p)
z = zero
return
}
z = randNonZeroInt(p)
// X = xZ²
y = modMul(new(big.Int), z, z)
x = modMul(new(big.Int), xIn, y)
// Y = yZ³
modMul(y, y, z)
modMul(y, y, yIn)
return
}
func printMontgomery(name string, a *big.Int) {
a = toMontgomery(new(big.Int), a)
fmt.Printf("%s = %064x\n", name, a)
}
func printTestCase(ax, ay *big.Int, aCoord coordinates, bx, by *big.Int, bCoord coordinates) {
rx, ry := p256.Add(ax, ay, bx, by)
var az *big.Int
if aCoord == jacobian {
ax, ay, az = toJacobian(ax, ay)
} else if isAffineInfinity(ax, ay) {
az = zero
} else {
az = one
}
var bz *big.Int
if bCoord == jacobian {
bx, by, bz = toJacobian(bx, by)
} else if isAffineInfinity(bx, by) {
bz = zero
} else {
bz = one
}
fmt.Printf("Test = PointAdd\n")
printMontgomery("A.X", ax)
printMontgomery("A.Y", ay)
printMontgomery("A.Z", az)
printMontgomery("B.X", bx)
printMontgomery("B.Y", by)
printMontgomery("B.Z", bz)
printMontgomery("Result.X", rx)
printMontgomery("Result.Y", ry)
fmt.Printf("\n")
}
func main() {
fmt.Printf("# ∞ + ∞ = ∞.\n")
printTestCase(zero, zero, affine, zero, zero, affine)
fmt.Printf("# ∞ + ∞ = ∞, with an alternate representation of ∞.\n")
printTestCase(zero, zero, jacobian, zero, zero, jacobian)
gx, gy := p256.Params().Gx, p256.Params().Gy
fmt.Printf("# g + ∞ = g.\n")
printTestCase(gx, gy, affine, zero, zero, affine)
fmt.Printf("# g + ∞ = g, with an alternate representation of ∞.\n")
printTestCase(gx, gy, affine, zero, zero, jacobian)
fmt.Printf("# g + -g = ∞.\n")
minusGy := new(big.Int).Sub(p, gy)
printTestCase(gx, gy, affine, gx, minusGy, affine)
fmt.Printf("# Test some random Jacobian sums.\n")
for i := 0; i < 4; i++ {
ax, ay := randPoint()
bx, by := randPoint()
printTestCase(ax, ay, jacobian, bx, by, jacobian)
}
fmt.Printf("# Test some random Jacobian doublings.\n")
for i := 0; i < 4; i++ {
ax, ay := randPoint()
printTestCase(ax, ay, jacobian, ax, ay, jacobian)
}
fmt.Printf("# Test some random affine sums.\n")
for i := 0; i < 4; i++ {
ax, ay := randPoint()
bx, by := randPoint()
printTestCase(ax, ay, affine, bx, by, affine)
}
fmt.Printf("# Test some random affine doublings.\n")
for i := 0; i < 4; i++ {
ax, ay := randPoint()
printTestCase(ax, ay, affine, ax, ay, affine)
}
}
type deterministicRandom struct {
stream cipher.Stream
}
func newDeterministicRand() io.Reader {
block, err := aes.NewCipher(make([]byte, 128/8))
if err != nil {
panic(err)
}
stream := cipher.NewCTR(block, make([]byte, block.BlockSize()))
return &deterministicRandom{stream}
}
func (r *deterministicRandom) Read(b []byte) (n int, err error) {
for i := range b {
b[i] = 0
}
r.stream.XORKeyStream(b, b)
return len(b), nil
}