Protocol Buffers - Google's data interchange format (grpc依赖) https://developers.google.com/protocol-buffers/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

200 lines
6.3 KiB

/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2009 Google Inc. See LICENSE for details.
* Author: Josh Haberman <jhaberman@gmail.com>
*/
#include <ctype.h>
#include <float.h>
#include <inttypes.h>
#include <stdlib.h>
#include "upb/pb/textprinter.h"
struct _upb_textprinter {
upb_bytesink *bytesink;
int indent_depth;
bool single_line;
upb_status status;
};
#define CHECK(x) if ((x) < 0) goto err;
static int upb_textprinter_putescaped(upb_textprinter *p, upb_strref *strref,
bool preserve_utf8) {
// Based on CEscapeInternal() from Google's protobuf release.
// TODO; we could read directly fraom a bytesrc's buffer instead.
// TODO; we could write directly into a bytesink's buffer instead.
char dstbuf[4096], *dst = dstbuf, *dstend = dstbuf + sizeof(dstbuf);
char buf[strref->len], *src = buf;
char *end = src + strref->len;
upb_bytesrc_read(strref->bytesrc, strref->stream_offset, strref->len, buf);
// I think hex is prettier and more useful, but proto2 uses octal; should
// investigate whether it can parse hex also.
bool use_hex = false;
bool last_hex_escape = false; // true if last output char was \xNN
for (; src < end; src++) {
if (dstend - dst < 4) {
CHECK(upb_bytesink_write(p->bytesink, dstbuf, dst - dstbuf, &p->status));
dst = dstbuf;
}
bool is_hex_escape = false;
switch (*src) {
case '\n': *(dst++) = '\\'; *(dst++) = 'n'; break;
case '\r': *(dst++) = '\\'; *(dst++) = 'r'; break;
case '\t': *(dst++) = '\\'; *(dst++) = 't'; break;
case '\"': *(dst++) = '\\'; *(dst++) = '\"'; break;
case '\'': *(dst++) = '\\'; *(dst++) = '\''; break;
case '\\': *(dst++) = '\\'; *(dst++) = '\\'; break;
default:
// Note that if we emit \xNN and the src character after that is a hex
// digit then that digit must be escaped too to prevent it being
// interpreted as part of the character code by C.
if ((!preserve_utf8 || (uint8_t)*src < 0x80) &&
(!isprint(*src) || (last_hex_escape && isxdigit(*src)))) {
sprintf(dst, (use_hex ? "\\x%02x" : "\\%03o"), (uint8_t)*src);
is_hex_escape = use_hex;
dst += 4;
} else {
*(dst++) = *src; break;
}
}
last_hex_escape = is_hex_escape;
}
// Flush remaining data.
CHECK(upb_bytesink_write(p->bytesink, dst, dst - dstbuf, &p->status));
return 0;
err:
return -1;
}
static int upb_textprinter_indent(upb_textprinter *p) {
if(!p->single_line)
for(int i = 0; i < p->indent_depth; i++)
CHECK(upb_bytesink_writestr(p->bytesink, " ", &p->status));
return 0;
err:
return -1;
}
static int upb_textprinter_endfield(upb_textprinter *p) {
if(p->single_line) {
CHECK(upb_bytesink_writestr(p->bytesink, " ", &p->status));
} else {
CHECK(upb_bytesink_writestr(p->bytesink, "\n", &p->status));
}
return 0;
err:
return -1;
}
upb_stream: all callbacks registered ahead-of-time. This is a significant change to the upb_stream protocol, and should hopefully be the last significant change. All callbacks are now registered ahead-of-time instead of having delegated callbacks registered at runtime, which makes it much easier to aggressively optimize ahead-of-time (like with a JIT). Other impacts of this change: - You no longer need to have loaded descriptor.proto as a upb_def to load other descriptors! This means the special-case code we used for bootstrapping is no longer necessary, and we no longer need to link the descriptor for descriptor.proto into upb. - A client can now register any upb_value as what will be delivered to their value callback, not just a upb_fielddef*. This should allow for other clients to get more bang out of the streaming decoder. This change unfortunately causes a bit of a performance regression -- I think largely due to highly suboptimal code that GCC generates when structs are returned by value. See: http://blog.reverberate.org/2011/03/19/when-a-compilers-slow-code-actually-bites-you/ On the other hand, once we have a JIT this should no longer matter. Performance numbers: plain.parsestream_googlemessage1.upb_table: 374 -> 396 (5.88) plain.parsestream_googlemessage2.upb_table: 616 -> 449 (-27.11) plain.parsetostruct_googlemessage1.upb_table_byref: 268 -> 269 (0.37) plain.parsetostruct_googlemessage1.upb_table_byval: 215 -> 204 (-5.12) plain.parsetostruct_googlemessage2.upb_table_byref: 307 -> 281 (-8.47) plain.parsetostruct_googlemessage2.upb_table_byval: 297 -> 272 (-8.42) omitfp.parsestream_googlemessage1.upb_table: 423 -> 410 (-3.07) omitfp.parsestream_googlemessage2.upb_table: 679 -> 483 (-28.87) omitfp.parsetostruct_googlemessage1.upb_table_byref: 287 -> 282 (-1.74) omitfp.parsetostruct_googlemessage1.upb_table_byval: 226 -> 219 (-3.10) omitfp.parsetostruct_googlemessage2.upb_table_byref: 315 -> 298 (-5.40) omitfp.parsetostruct_googlemessage2.upb_table_byval: 297 -> 287 (-3.37)
14 years ago
static upb_flow_t upb_textprinter_value(void *_p, upb_value fval,
upb_value val) {
upb_textprinter *p = _p;
upb_stream: all callbacks registered ahead-of-time. This is a significant change to the upb_stream protocol, and should hopefully be the last significant change. All callbacks are now registered ahead-of-time instead of having delegated callbacks registered at runtime, which makes it much easier to aggressively optimize ahead-of-time (like with a JIT). Other impacts of this change: - You no longer need to have loaded descriptor.proto as a upb_def to load other descriptors! This means the special-case code we used for bootstrapping is no longer necessary, and we no longer need to link the descriptor for descriptor.proto into upb. - A client can now register any upb_value as what will be delivered to their value callback, not just a upb_fielddef*. This should allow for other clients to get more bang out of the streaming decoder. This change unfortunately causes a bit of a performance regression -- I think largely due to highly suboptimal code that GCC generates when structs are returned by value. See: http://blog.reverberate.org/2011/03/19/when-a-compilers-slow-code-actually-bites-you/ On the other hand, once we have a JIT this should no longer matter. Performance numbers: plain.parsestream_googlemessage1.upb_table: 374 -> 396 (5.88) plain.parsestream_googlemessage2.upb_table: 616 -> 449 (-27.11) plain.parsetostruct_googlemessage1.upb_table_byref: 268 -> 269 (0.37) plain.parsetostruct_googlemessage1.upb_table_byval: 215 -> 204 (-5.12) plain.parsetostruct_googlemessage2.upb_table_byref: 307 -> 281 (-8.47) plain.parsetostruct_googlemessage2.upb_table_byval: 297 -> 272 (-8.42) omitfp.parsestream_googlemessage1.upb_table: 423 -> 410 (-3.07) omitfp.parsestream_googlemessage2.upb_table: 679 -> 483 (-28.87) omitfp.parsetostruct_googlemessage1.upb_table_byref: 287 -> 282 (-1.74) omitfp.parsetostruct_googlemessage1.upb_table_byval: 226 -> 219 (-3.10) omitfp.parsetostruct_googlemessage2.upb_table_byref: 315 -> 298 (-5.40) omitfp.parsetostruct_googlemessage2.upb_table_byval: 297 -> 287 (-3.37)
14 years ago
upb_fielddef *f = upb_value_getfielddef(fval);
upb_textprinter_indent(p);
CHECK(upb_bytesink_printf(p->bytesink, &p->status, "%s: ", f->name));
#define CASE(fmtstr, member) \
CHECK(upb_bytesink_printf(p->bytesink, &p->status, fmtstr, upb_value_get ## member(val))); break;
switch(f->type) {
// TODO: figure out what we should really be doing for these
// floating-point formats.
case UPB_TYPE(DOUBLE):
CHECK(upb_bytesink_printf(p->bytesink, &p->status, "%.*g", DBL_DIG, upb_value_getdouble(val))); break;
case UPB_TYPE(FLOAT):
CHECK(upb_bytesink_printf(p->bytesink, &p->status, "%.*g", FLT_DIG+2, upb_value_getfloat(val))); break;
case UPB_TYPE(INT64):
case UPB_TYPE(SFIXED64):
case UPB_TYPE(SINT64):
CASE("%" PRId64, int64)
case UPB_TYPE(UINT64):
case UPB_TYPE(FIXED64):
CASE("%" PRIu64, uint64)
case UPB_TYPE(UINT32):
case UPB_TYPE(FIXED32):
CASE("%" PRIu32, uint32);
case UPB_TYPE(ENUM): {
upb_enumdef *enum_def = upb_downcast_enumdef(f->def);
const char *label = upb_enumdef_iton(enum_def, upb_value_getint32(val));
if (label) {
// We found a corresponding string for this enum. Otherwise we fall
// through to the int32 code path.
CHECK(upb_bytesink_writestr(p->bytesink, label, &p->status));
break;
}
}
case UPB_TYPE(INT32):
case UPB_TYPE(SFIXED32):
case UPB_TYPE(SINT32):
CASE("%" PRId32, int32)
case UPB_TYPE(BOOL):
CASE("%hhu", bool);
case UPB_TYPE(STRING):
case UPB_TYPE(BYTES): {
CHECK(upb_bytesink_writestr(p->bytesink, "\"", &p->status));
CHECK(upb_textprinter_putescaped(p, upb_value_getstrref(val),
f->type == UPB_TYPE(STRING)));
CHECK(upb_bytesink_writestr(p->bytesink, "\"", &p->status));
break;
}
}
upb_textprinter_endfield(p);
return UPB_CONTINUE;
err:
return UPB_BREAK;
}
upb_stream: all callbacks registered ahead-of-time. This is a significant change to the upb_stream protocol, and should hopefully be the last significant change. All callbacks are now registered ahead-of-time instead of having delegated callbacks registered at runtime, which makes it much easier to aggressively optimize ahead-of-time (like with a JIT). Other impacts of this change: - You no longer need to have loaded descriptor.proto as a upb_def to load other descriptors! This means the special-case code we used for bootstrapping is no longer necessary, and we no longer need to link the descriptor for descriptor.proto into upb. - A client can now register any upb_value as what will be delivered to their value callback, not just a upb_fielddef*. This should allow for other clients to get more bang out of the streaming decoder. This change unfortunately causes a bit of a performance regression -- I think largely due to highly suboptimal code that GCC generates when structs are returned by value. See: http://blog.reverberate.org/2011/03/19/when-a-compilers-slow-code-actually-bites-you/ On the other hand, once we have a JIT this should no longer matter. Performance numbers: plain.parsestream_googlemessage1.upb_table: 374 -> 396 (5.88) plain.parsestream_googlemessage2.upb_table: 616 -> 449 (-27.11) plain.parsetostruct_googlemessage1.upb_table_byref: 268 -> 269 (0.37) plain.parsetostruct_googlemessage1.upb_table_byval: 215 -> 204 (-5.12) plain.parsetostruct_googlemessage2.upb_table_byref: 307 -> 281 (-8.47) plain.parsetostruct_googlemessage2.upb_table_byval: 297 -> 272 (-8.42) omitfp.parsestream_googlemessage1.upb_table: 423 -> 410 (-3.07) omitfp.parsestream_googlemessage2.upb_table: 679 -> 483 (-28.87) omitfp.parsetostruct_googlemessage1.upb_table_byref: 287 -> 282 (-1.74) omitfp.parsetostruct_googlemessage1.upb_table_byval: 226 -> 219 (-3.10) omitfp.parsetostruct_googlemessage2.upb_table_byref: 315 -> 298 (-5.40) omitfp.parsetostruct_googlemessage2.upb_table_byval: 297 -> 287 (-3.37)
14 years ago
static upb_sflow_t upb_textprinter_startsubmsg(void *_p, upb_value fval) {
upb_textprinter *p = _p;
upb_stream: all callbacks registered ahead-of-time. This is a significant change to the upb_stream protocol, and should hopefully be the last significant change. All callbacks are now registered ahead-of-time instead of having delegated callbacks registered at runtime, which makes it much easier to aggressively optimize ahead-of-time (like with a JIT). Other impacts of this change: - You no longer need to have loaded descriptor.proto as a upb_def to load other descriptors! This means the special-case code we used for bootstrapping is no longer necessary, and we no longer need to link the descriptor for descriptor.proto into upb. - A client can now register any upb_value as what will be delivered to their value callback, not just a upb_fielddef*. This should allow for other clients to get more bang out of the streaming decoder. This change unfortunately causes a bit of a performance regression -- I think largely due to highly suboptimal code that GCC generates when structs are returned by value. See: http://blog.reverberate.org/2011/03/19/when-a-compilers-slow-code-actually-bites-you/ On the other hand, once we have a JIT this should no longer matter. Performance numbers: plain.parsestream_googlemessage1.upb_table: 374 -> 396 (5.88) plain.parsestream_googlemessage2.upb_table: 616 -> 449 (-27.11) plain.parsetostruct_googlemessage1.upb_table_byref: 268 -> 269 (0.37) plain.parsetostruct_googlemessage1.upb_table_byval: 215 -> 204 (-5.12) plain.parsetostruct_googlemessage2.upb_table_byref: 307 -> 281 (-8.47) plain.parsetostruct_googlemessage2.upb_table_byval: 297 -> 272 (-8.42) omitfp.parsestream_googlemessage1.upb_table: 423 -> 410 (-3.07) omitfp.parsestream_googlemessage2.upb_table: 679 -> 483 (-28.87) omitfp.parsetostruct_googlemessage1.upb_table_byref: 287 -> 282 (-1.74) omitfp.parsetostruct_googlemessage1.upb_table_byval: 226 -> 219 (-3.10) omitfp.parsetostruct_googlemessage2.upb_table_byref: 315 -> 298 (-5.40) omitfp.parsetostruct_googlemessage2.upb_table_byval: 297 -> 287 (-3.37)
14 years ago
upb_fielddef *f = upb_value_getfielddef(fval);
upb_textprinter_indent(p);
bool ret = upb_bytesink_printf(p->bytesink, &p->status, "%s {", f->name);
if (!ret) return UPB_SBREAK;
if (!p->single_line)
upb_bytesink_writestr(p->bytesink, "\n", &p->status);
p->indent_depth++;
upb_stream: all callbacks registered ahead-of-time. This is a significant change to the upb_stream protocol, and should hopefully be the last significant change. All callbacks are now registered ahead-of-time instead of having delegated callbacks registered at runtime, which makes it much easier to aggressively optimize ahead-of-time (like with a JIT). Other impacts of this change: - You no longer need to have loaded descriptor.proto as a upb_def to load other descriptors! This means the special-case code we used for bootstrapping is no longer necessary, and we no longer need to link the descriptor for descriptor.proto into upb. - A client can now register any upb_value as what will be delivered to their value callback, not just a upb_fielddef*. This should allow for other clients to get more bang out of the streaming decoder. This change unfortunately causes a bit of a performance regression -- I think largely due to highly suboptimal code that GCC generates when structs are returned by value. See: http://blog.reverberate.org/2011/03/19/when-a-compilers-slow-code-actually-bites-you/ On the other hand, once we have a JIT this should no longer matter. Performance numbers: plain.parsestream_googlemessage1.upb_table: 374 -> 396 (5.88) plain.parsestream_googlemessage2.upb_table: 616 -> 449 (-27.11) plain.parsetostruct_googlemessage1.upb_table_byref: 268 -> 269 (0.37) plain.parsetostruct_googlemessage1.upb_table_byval: 215 -> 204 (-5.12) plain.parsetostruct_googlemessage2.upb_table_byref: 307 -> 281 (-8.47) plain.parsetostruct_googlemessage2.upb_table_byval: 297 -> 272 (-8.42) omitfp.parsestream_googlemessage1.upb_table: 423 -> 410 (-3.07) omitfp.parsestream_googlemessage2.upb_table: 679 -> 483 (-28.87) omitfp.parsetostruct_googlemessage1.upb_table_byref: 287 -> 282 (-1.74) omitfp.parsetostruct_googlemessage1.upb_table_byval: 226 -> 219 (-3.10) omitfp.parsetostruct_googlemessage2.upb_table_byref: 315 -> 298 (-5.40) omitfp.parsetostruct_googlemessage2.upb_table_byval: 297 -> 287 (-3.37)
14 years ago
return UPB_CONTINUE_WITH(_p);
}
upb_stream: all callbacks registered ahead-of-time. This is a significant change to the upb_stream protocol, and should hopefully be the last significant change. All callbacks are now registered ahead-of-time instead of having delegated callbacks registered at runtime, which makes it much easier to aggressively optimize ahead-of-time (like with a JIT). Other impacts of this change: - You no longer need to have loaded descriptor.proto as a upb_def to load other descriptors! This means the special-case code we used for bootstrapping is no longer necessary, and we no longer need to link the descriptor for descriptor.proto into upb. - A client can now register any upb_value as what will be delivered to their value callback, not just a upb_fielddef*. This should allow for other clients to get more bang out of the streaming decoder. This change unfortunately causes a bit of a performance regression -- I think largely due to highly suboptimal code that GCC generates when structs are returned by value. See: http://blog.reverberate.org/2011/03/19/when-a-compilers-slow-code-actually-bites-you/ On the other hand, once we have a JIT this should no longer matter. Performance numbers: plain.parsestream_googlemessage1.upb_table: 374 -> 396 (5.88) plain.parsestream_googlemessage2.upb_table: 616 -> 449 (-27.11) plain.parsetostruct_googlemessage1.upb_table_byref: 268 -> 269 (0.37) plain.parsetostruct_googlemessage1.upb_table_byval: 215 -> 204 (-5.12) plain.parsetostruct_googlemessage2.upb_table_byref: 307 -> 281 (-8.47) plain.parsetostruct_googlemessage2.upb_table_byval: 297 -> 272 (-8.42) omitfp.parsestream_googlemessage1.upb_table: 423 -> 410 (-3.07) omitfp.parsestream_googlemessage2.upb_table: 679 -> 483 (-28.87) omitfp.parsetostruct_googlemessage1.upb_table_byref: 287 -> 282 (-1.74) omitfp.parsetostruct_googlemessage1.upb_table_byval: 226 -> 219 (-3.10) omitfp.parsetostruct_googlemessage2.upb_table_byref: 315 -> 298 (-5.40) omitfp.parsetostruct_googlemessage2.upb_table_byval: 297 -> 287 (-3.37)
14 years ago
static upb_flow_t upb_textprinter_endsubmsg(void *_p, upb_value fval) {
(void)fval;
upb_textprinter *p = _p;
p->indent_depth--;
upb_textprinter_indent(p);
upb_bytesink_writestr(p->bytesink, "}", &p->status);
upb_textprinter_endfield(p);
return UPB_CONTINUE;
}
upb_textprinter *upb_textprinter_new() {
upb_textprinter *p = malloc(sizeof(*p));
return p;
}
void upb_textprinter_free(upb_textprinter *p) {
free(p);
}
upb_stream: all callbacks registered ahead-of-time. This is a significant change to the upb_stream protocol, and should hopefully be the last significant change. All callbacks are now registered ahead-of-time instead of having delegated callbacks registered at runtime, which makes it much easier to aggressively optimize ahead-of-time (like with a JIT). Other impacts of this change: - You no longer need to have loaded descriptor.proto as a upb_def to load other descriptors! This means the special-case code we used for bootstrapping is no longer necessary, and we no longer need to link the descriptor for descriptor.proto into upb. - A client can now register any upb_value as what will be delivered to their value callback, not just a upb_fielddef*. This should allow for other clients to get more bang out of the streaming decoder. This change unfortunately causes a bit of a performance regression -- I think largely due to highly suboptimal code that GCC generates when structs are returned by value. See: http://blog.reverberate.org/2011/03/19/when-a-compilers-slow-code-actually-bites-you/ On the other hand, once we have a JIT this should no longer matter. Performance numbers: plain.parsestream_googlemessage1.upb_table: 374 -> 396 (5.88) plain.parsestream_googlemessage2.upb_table: 616 -> 449 (-27.11) plain.parsetostruct_googlemessage1.upb_table_byref: 268 -> 269 (0.37) plain.parsetostruct_googlemessage1.upb_table_byval: 215 -> 204 (-5.12) plain.parsetostruct_googlemessage2.upb_table_byref: 307 -> 281 (-8.47) plain.parsetostruct_googlemessage2.upb_table_byval: 297 -> 272 (-8.42) omitfp.parsestream_googlemessage1.upb_table: 423 -> 410 (-3.07) omitfp.parsestream_googlemessage2.upb_table: 679 -> 483 (-28.87) omitfp.parsetostruct_googlemessage1.upb_table_byref: 287 -> 282 (-1.74) omitfp.parsetostruct_googlemessage1.upb_table_byval: 226 -> 219 (-3.10) omitfp.parsetostruct_googlemessage2.upb_table_byref: 315 -> 298 (-5.40) omitfp.parsetostruct_googlemessage2.upb_table_byval: 297 -> 287 (-3.37)
14 years ago
void upb_textprinter_reset(upb_textprinter *p, upb_bytesink *sink,
bool single_line) {
p->bytesink = sink;
p->single_line = single_line;
p->indent_depth = 0;
}
upb_mhandlers *upb_textprinter_reghandlers(upb_handlers *h, upb_msgdef *m) {
upb_handlerset hset = {
NULL, // startmsg
NULL, // endmsg
upb_textprinter_value,
upb_textprinter_startsubmsg,
upb_textprinter_endsubmsg,
NULL, // startseq
NULL, // endseq
};
return upb_handlers_reghandlerset(h, m, &hset);
}