Mirror of BoringSSL (grpc依赖) https://boringssl.googlesource.com/boringssl
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

204 lines
7.5 KiB

/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
* All rights reserved.
*
* This package is an SSL implementation written
* by Eric Young (eay@cryptsoft.com).
* The implementation was written so as to conform with Netscapes SSL.
*
* This library is free for commercial and non-commercial use as long as
* the following conditions are aheared to. The following conditions
* apply to all code found in this distribution, be it the RC4, RSA,
* lhash, DES, etc., code; not just the SSL code. The SSL documentation
* included with this distribution is covered by the same copyright terms
* except that the holder is Tim Hudson (tjh@cryptsoft.com).
*
* Copyright remains Eric Young's, and as such any Copyright notices in
* the code are not to be removed.
* If this package is used in a product, Eric Young should be given attribution
* as the author of the parts of the library used.
* This can be in the form of a textual message at program startup or
* in documentation (online or textual) provided with the package.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* "This product includes cryptographic software written by
* Eric Young (eay@cryptsoft.com)"
* The word 'cryptographic' can be left out if the rouines from the library
* being used are not cryptographic related :-).
* 4. If you include any Windows specific code (or a derivative thereof) from
* the apps directory (application code) you must include an acknowledgement:
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
*
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* The licence and distribution terms for any publically available version or
* derivative of this code cannot be changed. i.e. this code cannot simply be
* copied and put under another distribution licence
* [including the GNU Public Licence.]
*
* This product includes cryptographic software written by Eric Young
* (eay@cryptsoft.com). This product includes software written by Tim
* Hudson (tjh@cryptsoft.com). */
#ifndef OPENSSL_HEADER_CPU_H
#define OPENSSL_HEADER_CPU_H
#include <openssl/base.h>
#if defined(__cplusplus)
extern "C" {
#endif
// Runtime CPU feature support
#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
// OPENSSL_ia32cap_P contains the Intel CPUID bits when running on an x86 or
// x86-64 system.
//
// Index 0:
// EDX for CPUID where EAX = 1
// Bit 20 is always zero
// Bit 28 is adjusted to reflect whether the data cache is shared between
// multiple logical cores
// Bit 30 is used to indicate an Intel CPU
// Index 1:
// ECX for CPUID where EAX = 1
// Bit 11 is used to indicate AMD XOP support, not SDBG
// Index 2:
// EBX for CPUID where EAX = 7
// Index 3:
// ECX for CPUID where EAX = 7
//
// Note: the CPUID bits are pre-adjusted for the OSXSAVE bit and the YMM and XMM
// bits in XCR0, so it is not necessary to check those.
extern uint32_t OPENSSL_ia32cap_P[4];
#if defined(BORINGSSL_FIPS) && !defined(BORINGSSL_SHARED_LIBRARY)
const uint32_t *OPENSSL_ia32cap_get(void);
#else
OPENSSL_INLINE const uint32_t *OPENSSL_ia32cap_get(void) {
return OPENSSL_ia32cap_P;
}
#endif
#endif
#if defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)
Enable SHA-512 ARM acceleration when available. This imports the changes to sha512-armv8.pl from upstream's af0fcf7b4668218b24d9250b95e0b96939ccb4d1. Tweaks needed: - Add an explicit .text because we put .LK$BITS in .rodata for XOM - .LK$bits and code are in separate sections, so use adrp/add instead of plain adr - Where glibc needs feature flags to *enable* pthread_rwlock, Apple interprets _XOPEN_SOURCE as a request to *disable* Apple extensions. Tighten the condition on the _XOPEN_SOURCE check. Added support for macOS and Linux, tested manually on an ARM Mac and a VM, respectively. Fuchsia and Windows do not currently have APIs to expose this bit, so I've left in TODOs. Benchmarks from an Apple M1 Max: Before: Did 4647000 SHA-512 (16 bytes) operations in 1000103us (74.3 MB/sec) Did 1614000 SHA-512 (256 bytes) operations in 1000379us (413.0 MB/sec) Did 439000 SHA-512 (1350 bytes) operations in 1001694us (591.6 MB/sec) Did 76000 SHA-512 (8192 bytes) operations in 1011821us (615.3 MB/sec) Did 39000 SHA-512 (16384 bytes) operations in 1024311us (623.8 MB/sec) After: Did 10369000 SHA-512 (16 bytes) operations in 1000088us (165.9 MB/sec) [+123.1%] Did 3650000 SHA-512 (256 bytes) operations in 1000079us (934.3 MB/sec) [+126.2%] Did 1029000 SHA-512 (1350 bytes) operations in 1000521us (1388.4 MB/sec) [+134.7%] Did 175000 SHA-512 (8192 bytes) operations in 1001874us (1430.9 MB/sec) [+132.5%] Did 89000 SHA-512 (16384 bytes) operations in 1010314us (1443.3 MB/sec) [+131.4%] (This doesn't seem to change the overall SHA-256 vs SHA-512 performance question on ARM, when hashing perf matters. SHA-256 on the same chip gets up to 2454.6 MB/s.) In terms of build coverage, for now, we'll have build coverage everywhere and test coverage on Chromium, which runs this code on macOS CI. We should request a macOS ARM64 bot for our standalone CI. Longer term, we need a QEMU-based builder to test various features. QEMU seems to have pretty good coverage of all this, which will at least give us Linux. I haven't added an OPENSSL_STATIC_ARMCAP_SHA512 for now. Instead, we just look at the standard __ARM_FEATURE_SHA512 define. Strangely, the corresponding -march tag is not sha512. Neither GCC and nor Clang have -march=armv8-a+sha512. Instead, -march=armv8-a+sha3 implies both __ARM_FEATURE_SHA3 and __ARM_FEATURE_SHA512! Yet everything else seems to describe the SHA512 extension as separate from SHA3. https://developer.arm.com/architectures/system-architectures/software-standards/acle Update-Note: Consumers with a different build setup may need to limit -D_XOPEN_SOURCE=700 to Linux or non-Apple platforms. Otherwise, <sys/types.h> won't define some typedef needed by <sys/sysctl.h>. If you see a build error about u_char, etc., being undefined in some system header, that is probably the cause. Change-Id: Ia213d3796b84c71b7966bb68e0aec92e5d7d26f0 Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/50807 Reviewed-by: Adam Langley <agl@google.com> Commit-Queue: David Benjamin <davidben@google.com>
3 years ago
#if defined(OPENSSL_APPLE) && defined(OPENSSL_ARM)
// We do not detect any features at runtime for Apple's 32-bit ARM platforms. On
// 64-bit ARM, we detect some post-ARMv8.0 features.
#define OPENSSL_STATIC_ARMCAP
#endif
#if !defined(OPENSSL_STATIC_ARMCAP)
// CRYPTO_is_NEON_capable_at_runtime returns true if the current CPU has a NEON
// unit. Note that |OPENSSL_armcap_P| also exists and contains the same
// information in a form that's easier for assembly to use.
OPENSSL_EXPORT int CRYPTO_is_NEON_capable_at_runtime(void);
// CRYPTO_is_ARMv8_AES_capable_at_runtime returns true if the current CPU
// supports the ARMv8 AES instruction.
int CRYPTO_is_ARMv8_AES_capable_at_runtime(void);
// CRYPTO_is_ARMv8_PMULL_capable_at_runtime returns true if the current CPU
// supports the ARMv8 PMULL instruction.
int CRYPTO_is_ARMv8_PMULL_capable_at_runtime(void);
#if defined(OPENSSL_ARM)
// CRYPTO_has_broken_NEON returns one if the current CPU is known to have a
// broken NEON unit. See https://crbug.com/341598.
OPENSSL_EXPORT int CRYPTO_has_broken_NEON(void);
// CRYPTO_needs_hwcap2_workaround returns one if the ARMv8 AArch32 AT_HWCAP2
// workaround was needed. See https://crbug.com/boringssl/46.
OPENSSL_EXPORT int CRYPTO_needs_hwcap2_workaround(void);
#endif
#endif // !OPENSSL_STATIC_ARMCAP
// CRYPTO_is_NEON_capable returns true if the current CPU has a NEON unit. If
// this is known statically, it is a constant inline function.
OPENSSL_INLINE int CRYPTO_is_NEON_capable(void) {
#if defined(__ARM_NEON__) || defined(__ARM_NEON) || \
defined(OPENSSL_STATIC_ARMCAP_NEON)
return 1;
#elif defined(OPENSSL_STATIC_ARMCAP)
return 0;
#else
return CRYPTO_is_NEON_capable_at_runtime();
#endif
}
OPENSSL_INLINE int CRYPTO_is_ARMv8_AES_capable(void) {
#if defined(OPENSSL_STATIC_ARMCAP_AES) || defined(__ARM_FEATURE_CRYPTO)
return 1;
#elif defined(OPENSSL_STATIC_ARMCAP)
return 0;
#else
return CRYPTO_is_ARMv8_AES_capable_at_runtime();
#endif
}
OPENSSL_INLINE int CRYPTO_is_ARMv8_PMULL_capable(void) {
#if defined(OPENSSL_STATIC_ARMCAP_PMULL) || defined(__ARM_FEATURE_CRYPTO)
return 1;
#elif defined(OPENSSL_STATIC_ARMCAP)
return 0;
#else
return CRYPTO_is_ARMv8_PMULL_capable_at_runtime();
#endif
}
#endif // OPENSSL_ARM || OPENSSL_AARCH64
#if defined(OPENSSL_PPC64LE)
// CRYPTO_is_PPC64LE_vcrypto_capable returns true iff the current CPU supports
// the Vector.AES category of instructions.
int CRYPTO_is_PPC64LE_vcrypto_capable(void);
extern unsigned long OPENSSL_ppc64le_hwcap2;
#endif // OPENSSL_PPC64LE
#if defined(BORINGSSL_DISPATCH_TEST)
// Runtime CPU dispatch testing support
// BORINGSSL_function_hit is an array of flags. The following functions will
// set these flags if BORINGSSL_DISPATCH_TEST is defined.
// 0: aes_hw_ctr32_encrypt_blocks
// 1: aes_hw_encrypt
// 2: aesni_gcm_encrypt
// 3: aes_hw_set_encrypt_key
// 4: vpaes_encrypt
// 5: vpaes_set_encrypt_key
extern uint8_t BORINGSSL_function_hit[7];
#endif // BORINGSSL_DISPATCH_TEST
#if defined(__cplusplus)
} // extern C
#endif
#endif // OPENSSL_HEADER_CPU_H