Protocol Buffers - Google's data interchange format (grpc依赖)
https://developers.google.com/protocol-buffers/
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
41 lines
1.8 KiB
41 lines
1.8 KiB
#include <stdio.h> |
|
|
|
/* http://bjoern.hoehrmann.de/utf-8/decoder/dfa */ |
|
/* Optimized version based on Rich Felker's variant. */ |
|
#define UTF8_ACCEPT 0 |
|
#define UTF8_REJECT 12 |
|
|
|
static const unsigned char utf8d[] = { |
|
/* The first part of the table maps bytes to character classes that |
|
* to reduce the size of the transition table and create bitmasks. */ |
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, |
|
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, |
|
8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, |
|
10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8 |
|
}; |
|
/* Note: Splitting the table improves performance on ARM due to its simpler |
|
* addressing modes not being able to encode x[y + 256]. */ |
|
static const unsigned char utf8s[] = { |
|
/* The second part is a transition table that maps a combination |
|
* of a state of the automaton and a character class to a state. */ |
|
0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12, |
|
12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12, |
|
12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12, |
|
12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12, |
|
12,36,12,12,12,12,12,12,12,12,12,12 |
|
}; |
|
|
|
/* Return 0 on success, -1 on error */ |
|
int utf8_lookup(const unsigned char *data, int len) |
|
{ |
|
int state = 0; |
|
|
|
while (len-- && state != UTF8_REJECT) |
|
state = utf8s[state + utf8d[*data++]]; |
|
|
|
return state == UTF8_ACCEPT ? 0 : -1; |
|
}
|
|
|