mirror of https://github.com/grpc/grpc.git
The C based gRPC (C++, Python, Ruby, Objective-C, PHP, C#)
https://grpc.io/
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
223 lines
5.0 KiB
223 lines
5.0 KiB
2 years ago
|
#include <sys/time.h>
|
||
|
|
||
|
#include <algorithm>
|
||
|
#include <cassert>
|
||
|
#include <cstdint>
|
||
|
#include <cstdio>
|
||
|
#include <cstring>
|
||
|
#include <vector>
|
||
|
|
||
|
static inline int ascii_std(const uint8_t *data, int len) {
|
||
|
return !std::any_of(data, data + len, [](int8_t b) { return b < 0; });
|
||
|
}
|
||
|
|
||
|
static inline int ascii_u64(const uint8_t *data, int len) {
|
||
|
uint8_t orall = 0;
|
||
|
|
||
|
if (len >= 16) {
|
||
|
uint64_t or1 = 0, or2 = 0;
|
||
|
const uint8_t *data2 = data + 8;
|
||
|
|
||
|
do {
|
||
|
or1 |= *(const uint64_t *)data;
|
||
|
or2 |= *(const uint64_t *)data2;
|
||
|
data += 16;
|
||
|
data2 += 16;
|
||
|
len -= 16;
|
||
|
} while (len >= 16);
|
||
|
|
||
|
/*
|
||
|
* Idea from Benny Halevy <bhalevy@scylladb.com>
|
||
|
* - 7-th bit set ==> orall = !(non-zero) - 1 = 0 - 1 = 0xFF
|
||
|
* - 7-th bit clear ==> orall = !0 - 1 = 1 - 1 = 0x00
|
||
|
*/
|
||
|
orall = !((or1 | or2) & 0x8080808080808080ULL) - 1;
|
||
|
}
|
||
|
|
||
|
while (len--) orall |= *data++;
|
||
|
|
||
|
return orall < 0x80;
|
||
|
}
|
||
|
|
||
|
#if defined(__x86_64__)
|
||
|
#include <x86intrin.h>
|
||
|
|
||
|
static inline int ascii_simd(const uint8_t *data, int len) {
|
||
|
if (len >= 32) {
|
||
|
const uint8_t *data2 = data + 16;
|
||
|
|
||
|
__m128i or1 = _mm_set1_epi8(0), or2 = or1;
|
||
|
|
||
|
while (len >= 32) {
|
||
|
__m128i input1 = _mm_loadu_si128((const __m128i *)data);
|
||
|
__m128i input2 = _mm_loadu_si128((const __m128i *)data2);
|
||
|
|
||
|
or1 = _mm_or_si128(or1, input1);
|
||
|
or2 = _mm_or_si128(or2, input2);
|
||
|
|
||
|
data += 32;
|
||
|
data2 += 32;
|
||
|
len -= 32;
|
||
|
}
|
||
|
|
||
|
or1 = _mm_or_si128(or1, or2);
|
||
|
if (_mm_movemask_epi8(_mm_cmplt_epi8(or1, _mm_set1_epi8(0)))) return 0;
|
||
|
}
|
||
|
|
||
|
return ascii_u64(data, len);
|
||
|
}
|
||
|
|
||
|
#elif defined(__aarch64__)
|
||
|
#include <arm_neon.h>
|
||
|
|
||
|
static inline int ascii_simd(const uint8_t *data, int len) {
|
||
|
if (len >= 32) {
|
||
|
const uint8_t *data2 = data + 16;
|
||
|
|
||
|
uint8x16_t or1 = vdupq_n_u8(0), or2 = or1;
|
||
|
|
||
|
while (len >= 32) {
|
||
|
const uint8x16_t input1 = vld1q_u8(data);
|
||
|
const uint8x16_t input2 = vld1q_u8(data2);
|
||
|
|
||
|
or1 = vorrq_u8(or1, input1);
|
||
|
or2 = vorrq_u8(or2, input2);
|
||
|
|
||
|
data += 32;
|
||
|
data2 += 32;
|
||
|
len -= 32;
|
||
|
}
|
||
|
|
||
|
or1 = vorrq_u8(or1, or2);
|
||
|
if (vmaxvq_u8(or1) >= 0x80) return 0;
|
||
|
}
|
||
|
|
||
|
return ascii_u64(data, len);
|
||
|
}
|
||
|
|
||
|
#endif
|
||
|
|
||
|
struct ftab {
|
||
|
const char *name;
|
||
|
int (*func)(const uint8_t *data, int len);
|
||
|
};
|
||
|
|
||
|
static const std::vector<ftab> _f = {
|
||
|
{
|
||
|
.name = "std",
|
||
|
.func = ascii_std,
|
||
|
},
|
||
|
{
|
||
|
.name = "u64",
|
||
|
.func = ascii_u64,
|
||
|
},
|
||
|
{
|
||
|
.name = "simd",
|
||
|
.func = ascii_simd,
|
||
|
},
|
||
|
};
|
||
|
|
||
|
static void load_test_buf(uint8_t *data, int len) {
|
||
|
uint8_t v = 0;
|
||
|
|
||
|
for (int i = 0; i < len; ++i) {
|
||
|
data[i] = v++;
|
||
|
v &= 0x7F;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void bench(const struct ftab &f, const uint8_t *data, int len) {
|
||
|
const int loops = 1024 * 1024 * 1024 / len;
|
||
|
int ret = 1;
|
||
|
double time_aligned, time_unaligned, size;
|
||
|
struct timeval tv1, tv2;
|
||
|
|
||
|
fprintf(stderr, "bench %s (%d bytes)... ", f.name, len);
|
||
|
|
||
|
/* aligned */
|
||
|
gettimeofday(&tv1, 0);
|
||
|
for (int i = 0; i < loops; ++i) ret &= f.func(data, len);
|
||
|
gettimeofday(&tv2, 0);
|
||
|
time_aligned = tv2.tv_usec - tv1.tv_usec;
|
||
|
time_aligned = time_aligned / 1000000 + tv2.tv_sec - tv1.tv_sec;
|
||
|
|
||
|
/* unaligned */
|
||
|
gettimeofday(&tv1, 0);
|
||
|
for (int i = 0; i < loops; ++i) ret &= f.func(data + 1, len);
|
||
|
gettimeofday(&tv2, 0);
|
||
|
time_unaligned = tv2.tv_usec - tv1.tv_usec;
|
||
|
time_unaligned = time_unaligned / 1000000 + tv2.tv_sec - tv1.tv_sec;
|
||
|
|
||
|
printf("%s ", ret ? "pass" : "FAIL");
|
||
|
|
||
|
size = ((double)len * loops) / (1024 * 1024);
|
||
|
printf("%.0f/%.0f MB/s\n", size / time_aligned, size / time_unaligned);
|
||
|
}
|
||
|
|
||
|
static void test(const struct ftab &f, uint8_t *data, int len) {
|
||
|
int error = 0;
|
||
|
|
||
|
fprintf(stderr, "test %s (%d bytes)... ", f.name, len);
|
||
|
|
||
|
/* positive */
|
||
|
error |= !f.func(data, len);
|
||
|
|
||
|
/* negative */
|
||
|
if (len < 100 * 1024) {
|
||
|
for (int i = 0; i < len; ++i) {
|
||
|
data[i] += 0x80;
|
||
|
error |= f.func(data, len);
|
||
|
data[i] -= 0x80;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
printf("%s\n", error ? "FAIL" : "pass");
|
||
|
}
|
||
|
|
||
|
/* ./ascii [test|bench] [alg] */
|
||
|
int main(int argc, const char *argv[]) {
|
||
|
int do_test = 1, do_bench = 1;
|
||
|
const char *alg = NULL;
|
||
|
|
||
|
if (argc > 1) {
|
||
|
do_bench &= !!strcmp(argv[1], "test");
|
||
|
do_test &= !!strcmp(argv[1], "bench");
|
||
|
}
|
||
|
|
||
|
if (do_bench && argc > 2) alg = argv[2];
|
||
|
|
||
|
const std::vector<int> size = {
|
||
|
9, 16 + 1, 32 - 1, 128 + 1,
|
||
|
1024 + 15, 16 * 1024 + 1, 64 * 1024 + 15, 1024 * 1024};
|
||
|
|
||
|
int max_size = *std::max_element(size.begin(), size.end());
|
||
|
uint8_t *_data = new uint8_t[max_size + 1];
|
||
|
assert(((uintptr_t)_data & 7) == 0);
|
||
|
uint8_t *data = _data + 1; /* Unalign buffer address */
|
||
|
|
||
|
_data[0] = 0;
|
||
|
load_test_buf(data, max_size);
|
||
|
|
||
|
if (do_test) {
|
||
|
printf("==================== Test ====================\n");
|
||
|
for (int sz : size) {
|
||
|
for (auto &f : _f) {
|
||
|
test(f, data, sz);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (do_bench) {
|
||
|
printf("==================== Bench ====================\n");
|
||
|
for (int sz : size) {
|
||
|
for (auto &f : _f) {
|
||
|
if (!alg || strcmp(alg, f.name) == 0) bench(f, _data, sz);
|
||
|
}
|
||
|
printf("-----------------------------------------------\n");
|
||
|
}
|
||
|
}
|
||
|
|
||
|
delete _data;
|
||
|
return 0;
|
||
|
}
|