This imports the changes to sha512-armv8.pl from upstream's af0fcf7b4668218b24d9250b95e0b96939ccb4d1. Tweaks needed: - Add an explicit .text because we put .LK$BITS in .rodata for XOM - .LK$bits and code are in separate sections, so use adrp/add instead of plain adr - Where glibc needs feature flags to *enable* pthread_rwlock, Apple interprets _XOPEN_SOURCE as a request to *disable* Apple extensions. Tighten the condition on the _XOPEN_SOURCE check. Added support for macOS and Linux, tested manually on an ARM Mac and a VM, respectively. Fuchsia and Windows do not currently have APIs to expose this bit, so I've left in TODOs. Benchmarks from an Apple M1 Max: Before: Did 4647000 SHA-512 (16 bytes) operations in 1000103us (74.3 MB/sec) Did 1614000 SHA-512 (256 bytes) operations in 1000379us (413.0 MB/sec) Did 439000 SHA-512 (1350 bytes) operations in 1001694us (591.6 MB/sec) Did 76000 SHA-512 (8192 bytes) operations in 1011821us (615.3 MB/sec) Did 39000 SHA-512 (16384 bytes) operations in 1024311us (623.8 MB/sec) After: Did 10369000 SHA-512 (16 bytes) operations in 1000088us (165.9 MB/sec) [+123.1%] Did 3650000 SHA-512 (256 bytes) operations in 1000079us (934.3 MB/sec) [+126.2%] Did 1029000 SHA-512 (1350 bytes) operations in 1000521us (1388.4 MB/sec) [+134.7%] Did 175000 SHA-512 (8192 bytes) operations in 1001874us (1430.9 MB/sec) [+132.5%] Did 89000 SHA-512 (16384 bytes) operations in 1010314us (1443.3 MB/sec) [+131.4%] (This doesn't seem to change the overall SHA-256 vs SHA-512 performance question on ARM, when hashing perf matters. SHA-256 on the same chip gets up to 2454.6 MB/s.) In terms of build coverage, for now, we'll have build coverage everywhere and test coverage on Chromium, which runs this code on macOS CI. We should request a macOS ARM64 bot for our standalone CI. Longer term, we need a QEMU-based builder to test various features. QEMU seems to have pretty good coverage of all this, which will at least give us Linux. I haven't added an OPENSSL_STATIC_ARMCAP_SHA512 for now. Instead, we just look at the standard __ARM_FEATURE_SHA512 define. Strangely, the corresponding -march tag is not sha512. Neither GCC and nor Clang have -march=armv8-a+sha512. Instead, -march=armv8-a+sha3 implies both __ARM_FEATURE_SHA3 and __ARM_FEATURE_SHA512! Yet everything else seems to describe the SHA512 extension as separate from SHA3. https://developer.arm.com/architectures/system-architectures/software-standards/acle Update-Note: Consumers with a different build setup may need to limit -D_XOPEN_SOURCE=700 to Linux or non-Apple platforms. Otherwise, <sys/types.h> won't define some typedef needed by <sys/sysctl.h>. If you see a build error about u_char, etc., being undefined in some system header, that is probably the cause. Change-Id: Ia213d3796b84c71b7966bb68e0aec92e5d7d26f0 Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/50807 Reviewed-by: Adam Langley <agl@google.com> Commit-Queue: David Benjamin <davidben@google.com>fips-20220613
parent
af561c221d
commit
1e15682f1a
12 changed files with 257 additions and 32 deletions
@ -0,0 +1,73 @@ |
||||
/* Copyright (c) 2021, Google Inc.
|
||||
* |
||||
* Permission to use, copy, modify, and/or distribute this software for any |
||||
* purpose with or without fee is hereby granted, provided that the above |
||||
* copyright notice and this permission notice appear in all copies. |
||||
* |
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY |
||||
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION |
||||
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN |
||||
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ |
||||
|
||||
#include <openssl/cpu.h> |
||||
|
||||
#if defined(OPENSSL_AARCH64) && defined(OPENSSL_APPLE) && \ |
||||
!defined(OPENSSL_STATIC_ARMCAP) |
||||
|
||||
#include <sys/sysctl.h> |
||||
#include <sys/types.h> |
||||
|
||||
#include <openssl/arm_arch.h> |
||||
|
||||
#include "internal.h" |
||||
|
||||
|
||||
extern uint32_t OPENSSL_armcap_P; |
||||
|
||||
static int has_hw_feature(const char *name) { |
||||
int value; |
||||
size_t len = sizeof(value); |
||||
if (sysctlbyname(name, &value, &len, NULL, 0) != 0) { |
||||
return 0; |
||||
} |
||||
if (len != sizeof(int)) { |
||||
// This should not happen. All the values queried should be integer-valued.
|
||||
assert(0); |
||||
return 0; |
||||
} |
||||
|
||||
// Per sys/sysctl.h:
|
||||
//
|
||||
// Selectors that return errors are not support on the system. Supported
|
||||
// features will return 1 if they are recommended or 0 if they are supported
|
||||
// but are not expected to help performance. Future versions of these
|
||||
// selectors may return larger values as necessary so it is best to test for
|
||||
// non zero.
|
||||
return value != 0; |
||||
} |
||||
|
||||
void OPENSSL_cpuid_setup(void) { |
||||
// Apple ARM64 platforms have NEON and cryptography extensions available
|
||||
// statically, so we do not need to query them. In particular, there sometimes
|
||||
// are no sysctls corresponding to such features. See below.
|
||||
#if !defined(__ARM_NEON) || !defined(__ARM_FEATURE_CRYPTO) |
||||
#error "NEON and crypto extensions should be statically available." |
||||
#endif |
||||
OPENSSL_armcap_P = |
||||
ARMV7_NEON | ARMV8_AES | ARMV8_PMULL | ARMV8_SHA1 | ARMV8_SHA256; |
||||
|
||||
// macOS has sysctls named both like "hw.optional.arm.FEAT_SHA512" and like
|
||||
// "hw.optional.armv8_2_sha512". There does not appear to be documentation on
|
||||
// which to use. The "armv8_2_sha512" style omits statically-available
|
||||
// features, while the "FEAT_SHA512" style includes them. However, the
|
||||
// "FEAT_SHA512" style was added in macOS 12, so we use the older style for
|
||||
// better compatibility and handle static features above.
|
||||
if (has_hw_feature("hw.optional.armv8_2_sha512")) { |
||||
OPENSSL_armcap_P |= ARMV8_SHA512; |
||||
} |
||||
} |
||||
|
||||
#endif // OPENSSL_AARCH64 && OPENSSL_APPLE && !OPENSSL_STATIC_ARMCAP
|
Loading…
Reference in new issue