Protocol Buffers - Google's data interchange format (grpc依赖) https://developers.google.com/protocol-buffers/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

319 lines
9.9 KiB

/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2009 Google Inc. See LICENSE for details.
* Author: Josh Haberman <jhaberman@gmail.com>
*
* OPT: This is not optimized at all. It uses printf() which parses the format
* string every time, and it allocates memory for every put.
*/
#include "upb/pb/textprinter.h"
#include <ctype.h>
#include <float.h>
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "upb/sink.h"
#define CHECK(x) if ((x) < 0) goto err;
static const char *shortname(const char *longname) {
const char *last = strrchr(longname, '.');
return last ? last + 1 : longname;
}
static int indent(upb_textprinter *p) {
int i;
if (!p->single_line_)
for (i = 0; i < p->indent_depth_; i++)
upb_bytessink_putbuf(p->output_, p->subc, " ", 2, NULL);
return 0;
}
static int endfield(upb_textprinter *p) {
const char ch = (p->single_line_ ? ' ' : '\n');
upb_bytessink_putbuf(p->output_, p->subc, &ch, 1, NULL);
return 0;
}
static int putescaped(upb_textprinter *p, const char *buf, size_t len,
bool preserve_utf8) {
// Based on CEscapeInternal() from Google's protobuf release.
char dstbuf[4096], *dst = dstbuf, *dstend = dstbuf + sizeof(dstbuf);
const char *end = buf + len;
// I think hex is prettier and more useful, but proto2 uses octal; should
// investigate whether it can parse hex also.
const bool use_hex = false;
bool last_hex_escape = false; // true if last output char was \xNN
for (; buf < end; buf++) {
if (dstend - dst < 4) {
upb_bytessink_putbuf(p->output_, p->subc, dstbuf, dst - dstbuf, NULL);
dst = dstbuf;
}
bool is_hex_escape = false;
switch (*buf) {
case '\n': *(dst++) = '\\'; *(dst++) = 'n'; break;
case '\r': *(dst++) = '\\'; *(dst++) = 'r'; break;
case '\t': *(dst++) = '\\'; *(dst++) = 't'; break;
case '\"': *(dst++) = '\\'; *(dst++) = '\"'; break;
case '\'': *(dst++) = '\\'; *(dst++) = '\''; break;
case '\\': *(dst++) = '\\'; *(dst++) = '\\'; break;
default:
// Note that if we emit \xNN and the buf character after that is a hex
// digit then that digit must be escaped too to prevent it being
// interpreted as part of the character code by C.
if ((!preserve_utf8 || (uint8_t)*buf < 0x80) &&
(!isprint(*buf) || (last_hex_escape && isxdigit(*buf)))) {
sprintf(dst, (use_hex ? "\\x%02x" : "\\%03o"), (uint8_t)*buf);
is_hex_escape = use_hex;
dst += 4;
} else {
*(dst++) = *buf; break;
}
}
last_hex_escape = is_hex_escape;
}
// Flush remaining data.
upb_bytessink_putbuf(p->output_, p->subc, dstbuf, dst - dstbuf, NULL);
return 0;
}
bool putf(upb_textprinter *p, const char *fmt, ...) {
va_list args;
va_start(args, fmt);
// Run once to get the length of the string.
va_list args_copy;
va_copy(args_copy, args);
int len = vsnprintf(NULL, 0, fmt, args_copy);
va_end(args_copy);
// + 1 for NULL terminator (vsnprintf() requires it even if we don't).
char *str = malloc(len + 1);
if (!str) return false;
int written = vsnprintf(str, len + 1, fmt, args);
va_end(args);
UPB_ASSERT_VAR(written, written == len);
bool ok = upb_bytessink_putbuf(p->output_, p->subc, str, len, NULL);
free(str);
return ok;
}
/* handlers *******************************************************************/
static bool startmsg(void *c, const void *hd) {
upb_textprinter *p = c;
if (p->indent_depth_ == 0) {
upb_bytessink_start(p->output_, 0, &p->subc);
}
return true;
}
static bool endmsg(void *c, const void *hd, upb_status *s) {
upb_textprinter *p = c;
if (p->indent_depth_ == 0) {
upb_bytessink_end(p->output_);
}
return true;
}
#define TYPE(name, ctype, fmt) \
static bool put ## name(void *closure, const void *handler_data, ctype val) {\
upb_textprinter *p = closure; \
const upb_fielddef *f = handler_data; \
CHECK(indent(p)); \
putf(p, "%s: " fmt, upb_fielddef_name(f), val); \
CHECK(endfield(p)); \
return true; \
err: \
return false; \
}
static bool putbool(void *closure, const void *handler_data, bool val) {
upb_textprinter *p = closure;
const upb_fielddef *f = handler_data;
CHECK(indent(p));
putf(p, "%s: %s", upb_fielddef_name(f), val ? "true" : "false");
CHECK(endfield(p));
return true;
err:
return false;
}
#define STRINGIFY_HELPER(x) #x
#define STRINGIFY_MACROVAL(x) STRINGIFY_HELPER(x)
TYPE(int32, int32_t, "%" PRId32)
TYPE(int64, int64_t, "%" PRId64)
TYPE(uint32, uint32_t, "%" PRIu32);
TYPE(uint64, uint64_t, "%" PRIu64)
TYPE(float, float, "%." STRINGIFY_MACROVAL(FLT_DIG) "g")
TYPE(double, double, "%." STRINGIFY_MACROVAL(DBL_DIG) "g")
// Output a symbolic value from the enum if found, else just print as int32.
static bool putenum(void *closure, const void *handler_data, int32_t val) {
upb_textprinter *p = closure;
const upb_fielddef *f = handler_data;
const upb_enumdef *enum_def = upb_downcast_enumdef(upb_fielddef_subdef(f));
const char *label = upb_enumdef_iton(enum_def, val);
if (label) {
indent(p);
putf(p, "%s: %s", upb_fielddef_name(f), label);
endfield(p);
} else {
CHECK(putint32(closure, handler_data, val));
}
return true;
err:
return false;
}
static void *startstr(void *closure, const void *handler_data,
size_t size_hint) {
const upb_fielddef *f = handler_data;
UPB_UNUSED(size_hint);
upb_textprinter *p = closure;
putf(p, "%s: \"", upb_fielddef_name(f));
return p;
}
static bool endstr(void *closure, const void *handler_data) {
UPB_UNUSED(handler_data);
upb_textprinter *p = closure;
putf(p, "\"");
endfield(p);
return true;
}
static size_t putstr(void *closure, const void *hd, const char *buf,
size_t len, const upb_bufhandle *handle) {
UPB_UNUSED(handle);
upb_textprinter *p = closure;
const upb_fielddef *f = hd;
CHECK(putescaped(p, buf, len, upb_fielddef_type(f) == UPB_TYPE_STRING));
return len;
err:
return 0;
}
static void *startsubmsg(void *closure, const void *handler_data) {
upb_textprinter *p = closure;
const char *name = handler_data;
CHECK(indent(p));
putf(p, "%s {%c", name, p->single_line_ ? ' ' : '\n');
p->indent_depth_++;
return p;
err:
return UPB_BREAK;
}
static bool endsubmsg(void *closure, const void *handler_data) {
UPB_UNUSED(handler_data);
upb_textprinter *p = closure;
p->indent_depth_--;
CHECK(indent(p));
upb_bytessink_putbuf(p->output_, p->subc, "}", 1, NULL);
CHECK(endfield(p));
return true;
err:
return false;
}
/* Public API *****************************************************************/
void upb_textprinter_init(upb_textprinter *p, const upb_handlers *h) {
p->single_line_ = false;
p->indent_depth_ = 0;
upb_sink_reset(&p->input_, h, p);
}
void upb_textprinter_uninit(upb_textprinter *p) {}
void upb_textprinter_reset(upb_textprinter *p, bool single_line) {
p->single_line_ = single_line;
p->indent_depth_ = 0;
upb_stream: all callbacks registered ahead-of-time. This is a significant change to the upb_stream protocol, and should hopefully be the last significant change. All callbacks are now registered ahead-of-time instead of having delegated callbacks registered at runtime, which makes it much easier to aggressively optimize ahead-of-time (like with a JIT). Other impacts of this change: - You no longer need to have loaded descriptor.proto as a upb_def to load other descriptors! This means the special-case code we used for bootstrapping is no longer necessary, and we no longer need to link the descriptor for descriptor.proto into upb. - A client can now register any upb_value as what will be delivered to their value callback, not just a upb_fielddef*. This should allow for other clients to get more bang out of the streaming decoder. This change unfortunately causes a bit of a performance regression -- I think largely due to highly suboptimal code that GCC generates when structs are returned by value. See: http://blog.reverberate.org/2011/03/19/when-a-compilers-slow-code-actually-bites-you/ On the other hand, once we have a JIT this should no longer matter. Performance numbers: plain.parsestream_googlemessage1.upb_table: 374 -> 396 (5.88) plain.parsestream_googlemessage2.upb_table: 616 -> 449 (-27.11) plain.parsetostruct_googlemessage1.upb_table_byref: 268 -> 269 (0.37) plain.parsetostruct_googlemessage1.upb_table_byval: 215 -> 204 (-5.12) plain.parsetostruct_googlemessage2.upb_table_byref: 307 -> 281 (-8.47) plain.parsetostruct_googlemessage2.upb_table_byval: 297 -> 272 (-8.42) omitfp.parsestream_googlemessage1.upb_table: 423 -> 410 (-3.07) omitfp.parsestream_googlemessage2.upb_table: 679 -> 483 (-28.87) omitfp.parsetostruct_googlemessage1.upb_table_byref: 287 -> 282 (-1.74) omitfp.parsetostruct_googlemessage1.upb_table_byval: 226 -> 219 (-3.10) omitfp.parsetostruct_googlemessage2.upb_table_byref: 315 -> 298 (-5.40) omitfp.parsetostruct_googlemessage2.upb_table_byval: 297 -> 287 (-3.37)
14 years ago
}
static void onmreg(void *c, upb_handlers *h) {
(void)c;
const upb_msgdef *m = upb_handlers_msgdef(h);
upb_handlers_setstartmsg(h, startmsg, NULL);
upb_handlers_setendmsg(h, endmsg, NULL);
upb_msg_iter i;
for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) {
upb_fielddef *f = upb_msg_iter_field(&i);
upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
upb_handlerattr_sethandlerdata(&attr, f, NULL);
switch (upb_fielddef_type(f)) {
case UPB_TYPE_INT32:
upb_handlers_setint32(h, f, putint32, &attr);
break;
case UPB_TYPE_INT64:
upb_handlers_setint64(h, f, putint64, &attr);
break;
case UPB_TYPE_UINT32:
upb_handlers_setuint32(h, f, putuint32, &attr);
break;
case UPB_TYPE_UINT64:
upb_handlers_setuint64(h, f, putuint64, &attr);
break;
case UPB_TYPE_FLOAT:
upb_handlers_setfloat(h, f, putfloat, &attr);
break;
case UPB_TYPE_DOUBLE:
upb_handlers_setdouble(h, f, putdouble, &attr);
break;
case UPB_TYPE_BOOL:
upb_handlers_setbool(h, f, putbool, &attr);
break;
case UPB_TYPE_STRING:
case UPB_TYPE_BYTES:
upb_handlers_setstartstr(h, f, startstr, &attr);
upb_handlers_setstring(h, f, putstr, &attr);
upb_handlers_setendstr(h, f, endstr, &attr);
break;
case UPB_TYPE_MESSAGE: {
const char *name =
upb_fielddef_istagdelim(f)
? shortname(upb_msgdef_fullname(upb_fielddef_msgsubdef(f)))
: upb_fielddef_name(f);
// TODO(haberman): add "setconsthandlerdata"? If we pass NULL for
// cleanup then we don't need a non-const pointer.
upb_handlerattr_sethandlerdata(&attr, (void*)name, NULL);
upb_handlers_setstartsubmsg(h, f, startsubmsg, &attr);
upb_handlers_setendsubmsg(h, f, endsubmsg, &attr);
break;
}
case UPB_TYPE_ENUM:
upb_handlers_setint32(h, f, putenum, &attr);
break;
}
}
}
const upb_handlers *upb_textprinter_newhandlers(const upb_msgdef *m,
const void *owner) {
return upb_handlers_newfrozen(m, owner, &onmreg, NULL);
}
upb_sink *upb_textprinter_input(upb_textprinter *p) { return &p->input_; }
bool upb_textprinter_resetoutput(upb_textprinter *p, upb_bytessink *output) {
p->output_ = output;
return true;
}
void upb_textprinter_setsingleline(upb_textprinter *p, bool single_line) {
p->single_line_ = single_line;
}