Protocol Buffers - Google's data interchange format (grpc依赖) https://developers.google.com/protocol-buffers/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

425 lines
13 KiB

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <inttypes.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <fcntl.h>
#include <unistd.h>
int utf8_to16_iconv(const unsigned char *buf8, size_t len8,
unsigned short *buf16, size_t *len16);
int utf8_to16_naive(const unsigned char *buf8, size_t len8,
unsigned short *buf16, size_t *len16);
static struct ftab {
const char *name;
int (*func)(const unsigned char *buf8, size_t len8,
unsigned short *buf16, size_t *len16);
} ftab[] = {
{
.name = "iconv",
.func = utf8_to16_iconv,
}, {
.name = "naive",
.func = utf8_to16_naive,
},
};
static unsigned char *load_test_buf(int len)
{
const char utf8[] = "\xF0\x90\xBF\x80";
const int utf8_len = sizeof(utf8)/sizeof(utf8[0]) - 1;
unsigned char *data = malloc(len);
unsigned char *p = data;
while (len >= utf8_len) {
memcpy(p, utf8, utf8_len);
p += utf8_len;
len -= utf8_len;
}
while (len--)
*p++ = 0x7F;
return data;
}
static unsigned char *load_test_file(int *len)
{
unsigned char *data;
int fd;
struct stat stat;
fd = open("../UTF-8-demo.txt", O_RDONLY);
if (fd == -1) {
printf("Failed to open ../UTF-8-demo.txt!\n");
exit(1);
}
if (fstat(fd, &stat) == -1) {
printf("Failed to get file size!\n");
exit(1);
}
*len = stat.st_size;
data = malloc(*len);
if (read(fd, data, *len) != *len) {
printf("Failed to read file!\n");
exit(1);
}
close(fd);
return data;
}
static void print_test(const unsigned char *data, int len)
{
printf(" [len=%d] \"", len);
while (len--)
printf("\\x%02X", *data++);
printf("\"\n");
}
struct test {
const unsigned char *data;
int len;
};
static void prepare_test_buf(unsigned char *buf, const struct test *pos,
int pos_len, int pos_idx)
{
/* Round concatenate correct tokens to 1024 bytes */
int buf_idx = 0;
while (buf_idx < 1024) {
int buf_len = 1024 - buf_idx;
if (buf_len >= pos[pos_idx].len) {
memcpy(buf+buf_idx, pos[pos_idx].data, pos[pos_idx].len);
buf_idx += pos[pos_idx].len;
} else {
memset(buf+buf_idx, 0, buf_len);
buf_idx += buf_len;
}
if (++pos_idx == pos_len)
pos_idx = 0;
}
}
/* Return 0 on success, -1 on error */
static int test_manual(const struct ftab *ftab, unsigned short *buf16,
unsigned short *_buf16)
{
#define LEN16 4096
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wpointer-sign"
/* positive tests */
static const struct test pos[] = {
{"", 0},
{"\x00", 1},
{"\x66", 1},
{"\x7F", 1},
{"\x00\x7F", 2},
{"\x7F\x00", 2},
{"\xC2\x80", 2},
{"\xDF\xBF", 2},
{"\xE0\xA0\x80", 3},
{"\xE0\xA0\xBF", 3},
{"\xED\x9F\x80", 3},
{"\xEF\x80\xBF", 3},
{"\xF0\x90\xBF\x80", 4},
{"\xF2\x81\xBE\x99", 4},
{"\xF4\x8F\x88\xAA", 4},
};
/* negative tests */
static const struct test neg[] = {
{"\x80", 1},
{"\xBF", 1},
{"\xC0\x80", 2},
{"\xC1\x00", 2},
{"\xC2\x7F", 2},
{"\xDF\xC0", 2},
{"\xE0\x9F\x80", 3},
{"\xE0\xC2\x80", 3},
{"\xED\xA0\x80", 3},
{"\xED\x7F\x80", 3},
{"\xEF\x80\x00", 3},
{"\xF0\x8F\x80\x80", 4},
{"\xF0\xEE\x80\x80", 4},
{"\xF2\x90\x91\x7F", 4},
{"\xF4\x90\x88\xAA", 4},
{"\xF4\x00\xBF\xBF", 4},
{"\x00\x00\x00\x00\x00\xC2\x80\x00\x00\x00\xE1\x80\x80\x00\x00\xC2" \
"\xC2\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
32},
{"\x00\x00\x00\x00\x00\xC2\xC2\x80\x00\x00\xE1\x80\x80\x00\x00\x00",
16},
{"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" \
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xF1\x80",
32},
{"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" \
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xF1",
32},
{"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" \
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xF1\x80" \
"\x80", 33},
{"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" \
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xF1\x80" \
"\xC2\x80", 34},
{"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" \
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xF0" \
"\x80\x80\x80", 35},
};
#pragma GCC diagnostic push
size_t len16 = LEN16, _len16 = LEN16;
int ret, _ret;
/* Test single token */
for (int i = 0; i < sizeof(pos)/sizeof(pos[0]); ++i) {
ret = ftab->func(pos[i].data, pos[i].len, buf16, &len16);
_ret = utf8_to16_iconv(pos[i].data, pos[i].len, _buf16, &_len16);
if (ret != _ret || len16 != _len16 || memcmp(buf16, _buf16, len16)) {
printf("FAILED positive test(%d:%d, %lu:%lu): ",
ret, _ret, len16, _len16);
print_test(pos[i].data, pos[i].len);
return -1;
}
len16 = _len16 = LEN16;
}
for (int i = 0; i < sizeof(neg)/sizeof(neg[0]); ++i) {
ret = ftab->func(neg[i].data, neg[i].len, buf16, &len16);
_ret = utf8_to16_iconv(neg[i].data, neg[i].len, _buf16, &_len16);
if (ret != _ret || len16 != _len16 || memcmp(buf16, _buf16, len16)) {
printf("FAILED negative test(%d:%d, %lu:%lu): ",
ret, _ret, len16, _len16);
print_test(neg[i].data, neg[i].len);
return -1;
}
len16 = _len16 = LEN16;
}
/* Test shifted buffer to cover 1k length */
/* buffer size must be greater than 1024 + 16 + max(test string length) */
const int max_size = 1024*2;
uint64_t buf64[max_size/8 + 2];
/* Offset 8 bytes by 1 byte */
unsigned char *buf = ((unsigned char *)buf64) + 1;
int buf_len;
for (int i = 0; i < sizeof(pos)/sizeof(pos[0]); ++i) {
/* Positive test: shift 16 bytes, validate each shift */
prepare_test_buf(buf, pos, sizeof(pos)/sizeof(pos[0]), i);
buf_len = 1024;
for (int j = 0; j < 16; ++j) {
ret = ftab->func(buf, buf_len, buf16, &len16);
_ret = utf8_to16_iconv(buf, buf_len, _buf16, &_len16);
if (ret != _ret || len16 != _len16 || \
memcmp(buf16, _buf16, len16)) {
printf("FAILED positive test(%d:%d, %lu:%lu): ",
ret, _ret, len16, _len16);
print_test(buf, buf_len);
return -1;
}
len16 = _len16 = LEN16;
for (int k = buf_len; k >= 1; --k)
buf[k] = buf[k-1];
buf[0] = '\x55';
++buf_len;
}
/* Negative test: trunk last non ascii */
while (buf_len >= 1 && buf[buf_len-1] <= 0x7F)
--buf_len;
if (buf_len) {
ret = ftab->func(buf, buf_len-1, buf16, &len16);
_ret = utf8_to16_iconv(buf, buf_len-1, _buf16, &_len16);
if (ret != _ret || len16 != _len16 || \
memcmp(buf16, _buf16, len16)) {
printf("FAILED negative test(%d:%d, %lu:%lu): ",
ret, _ret, len16, _len16);
print_test(buf, buf_len-1);
return -1;
}
len16 = _len16 = LEN16;
}
}
/* Negative test */
for (int i = 0; i < sizeof(neg)/sizeof(neg[0]); ++i) {
/* Append one error token, shift 16 bytes, validate each shift */
int pos_idx = i % (sizeof(pos)/sizeof(pos[0]));
prepare_test_buf(buf, pos, sizeof(pos)/sizeof(pos[0]), pos_idx);
memcpy(buf+1024, neg[i].data, neg[i].len);
buf_len = 1024 + neg[i].len;
for (int j = 0; j < 16; ++j) {
ret = ftab->func(buf, buf_len, buf16, &len16);
_ret = utf8_to16_iconv(buf, buf_len, _buf16, &_len16);
if (ret != _ret || len16 != _len16 || \
memcmp(buf16, _buf16, len16)) {
printf("FAILED negative test(%d:%d, %lu:%lu): ",
ret, _ret, len16, _len16);
print_test(buf, buf_len);
return -1;
}
len16 = _len16 = LEN16;
for (int k = buf_len; k >= 1; --k)
buf[k] = buf[k-1];
buf[0] = '\x66';
++buf_len;
}
}
return 0;
}
static void test(const unsigned char *buf8, size_t len8,
unsigned short *buf16, size_t len16, const struct ftab *ftab)
{
/* Use iconv as the reference answer */
if (strcmp(ftab->name, "iconv") == 0)
return;
printf("%s\n", ftab->name);
/* Test file or buffer */
size_t _len16 = len16;
unsigned short *_buf16 = (unsigned short *)malloc(_len16);
if (utf8_to16_iconv(buf8, len8, _buf16, &_len16)) {
printf("Invalid test file or buffer!\n");
exit(1);
}
printf("standard test: ");
if (ftab->func(buf8, len8, buf16, &len16) || len16 != _len16 || \
memcmp(buf16, _buf16, len16) != 0)
printf("FAIL\n");
else
printf("pass\n");
free(_buf16);
/* Manual cases */
unsigned short *mbuf8 = (unsigned short *)malloc(LEN16);
unsigned short *mbuf16 = (unsigned short *)malloc(LEN16);
printf("manual test: %s\n",
test_manual(ftab, mbuf8, mbuf16) ? "FAIL" : "pass");
free(mbuf8);
free(mbuf16);
printf("\n");
}
static void bench(const unsigned char *buf8, size_t len8,
unsigned short *buf16, size_t len16, const struct ftab *ftab)
{
const int loops = 1024*1024*1024/len8;
int ret = 0;
double time, size;
struct timeval tv1, tv2;
fprintf(stderr, "bench %s... ", ftab->name);
gettimeofday(&tv1, 0);
for (int i = 0; i < loops; ++i)
ret |= ftab->func(buf8, len8, buf16, &len16);
gettimeofday(&tv2, 0);
printf("%s\n", ret?"FAIL":"pass");
time = tv2.tv_usec - tv1.tv_usec;
time = time / 1000000 + tv2.tv_sec - tv1.tv_sec;
size = ((double)len8 * loops) / (1024*1024);
printf("time: %.4f s\n", time);
printf("data: %.0f MB\n", size);
printf("BW: %.2f MB/s\n", size / time);
printf("\n");
}
static void usage(const char *bin)
{
printf("Usage:\n");
printf("%s test [alg] ==> test all or one algorithm\n", bin);
printf("%s bench [alg] ==> benchmark all or one algorithm\n", bin);
printf("%s bench size NUM ==> benchmark with specific buffer size\n", bin);
printf("alg = ");
for (int i = 0; i < sizeof(ftab)/sizeof(ftab[0]); ++i)
printf("%s ", ftab[i].name);
printf("\nNUM = buffer size in bytes, 1 ~ 67108864(64M)\n");
}
int main(int argc, char *argv[])
{
int len8 = 0, len16;
unsigned char *buf8;
unsigned short *buf16;
const char *alg = NULL;
void (*tb)(const unsigned char *buf8, size_t len8,
unsigned short *buf16, size_t len16, const struct ftab *ftab);
tb = NULL;
if (argc >= 2) {
if (strcmp(argv[1], "test") == 0)
tb = test;
else if (strcmp(argv[1], "bench") == 0)
tb = bench;
if (argc >= 3) {
alg = argv[2];
if (strcmp(alg, "size") == 0) {
if (argc < 4) {
tb = NULL;
} else {
alg = NULL;
len8 = atoi(argv[3]);
if (len8 <= 0 || len8 > 67108864) {
printf("Buffer size error!\n\n");
tb = NULL;
}
}
}
}
}
if (tb == NULL) {
usage(argv[0]);
return 1;
}
/* Load UTF8 test buffer */
if (len8)
buf8 = load_test_buf(len8);
else
buf8 = load_test_file(&len8);
/* Prepare UTF16 buffer large enough */
len16 = len8 * 2;
buf16 = (unsigned short *)malloc(len16);
if (tb == bench)
printf("============== Bench UTF8 (%d bytes) ==============\n", len8);
for (int i = 0; i < sizeof(ftab)/sizeof(ftab[0]); ++i) {
if (alg && strcmp(alg, ftab[i].name) != 0)
continue;
tb((const unsigned char *)buf8, len8, buf16, len16, &ftab[i]);
}
#if 0
if (tb == bench) {
printf("==================== Bench ASCII ====================\n");
/* Change test buffer to ascii */
for (int i = 0; i < len; i++)
data[i] &= 0x7F;
for (int i = 0; i < sizeof(ftab)/sizeof(ftab[0]); ++i) {
if (alg && strcmp(alg, ftab[i].name) != 0)
continue;
tb((const unsigned char *)data, len, &ftab[i]);
printf("\n");
}
}
#endif
return 0;
}