From cfdb9907cb87d15eaab72ceefbfa42fd7a4c3127 Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Sat, 11 May 2013 16:45:38 -0700 Subject: [PATCH] Synced with 3 months of Google-internal development. Major changes: - Got rid of all bytestream interfaces in favor of using regular handlers. - new Pipeline object represents a upb pipeline, does bump allocation internally to manage memory. - proto2 support now can handle extensions. --- bindings/cpp/upb/pb/decoder.hpp | 81 --- bindings/lua/table.c | 8 +- bindings/lua/test.lua | 1 + bindings/lua/upb.c | 192 ++++--- tests/test_cpp.cc | 14 +- tests/test_decoder.cc | 404 +++++++------- tests/test_def.c | 14 +- tests/test_pipeline.c | 116 ++++ tests/test_table.cc | 47 +- tests/test_vs_proto2.cc | 53 +- tools/dump_cinit.lua | 74 ++- upb/bytestream.c | 176 ------ upb/bytestream.h | 627 +--------------------- upb/bytestream.proto | 14 + upb/bytestream.upb.c | 40 ++ upb/bytestream.upb.h | 37 ++ upb/def.c | 344 ++++++++---- upb/def.h | 205 ++++--- upb/descriptor/descriptor.upb.c | 422 +++++++-------- upb/descriptor/descriptor.upb.h | 194 +++++++ upb/descriptor/reader.c | 269 ++++++---- upb/descriptor/reader.h | 106 ++-- upb/google/bridge.cc | 82 +-- upb/google/bridge.h | 5 +- upb/google/cord.h | 48 -- upb/google/proto1.cc | 281 +++++----- upb/google/proto1.h | 8 +- upb/google/proto2.cc | 540 +++++++++++++------ upb/google/proto2.h | 5 +- upb/handlers.c | 137 +++-- upb/handlers.h | 505 ++++++++++++++---- upb/pb/decoder.c | 914 ++++++++++++++++++++------------ upb/pb/decoder.h | 207 +++----- upb/pb/decoder_x64.dasc | 429 ++++++++------- upb/pb/glue.c | 46 +- upb/pb/textprinter.c | 121 ++--- upb/pb/textprinter.h | 4 +- upb/pb/varint.h | 24 +- upb/refcounted.c | 52 +- upb/sink.c | 437 +++++++++++---- upb/sink.h | 465 ++++++++++++++-- upb/symtab.c | 37 +- upb/table.c | 200 ++++--- upb/table.h | 87 +-- upb/upb.h | 199 +++++-- 45 files changed, 4813 insertions(+), 3458 deletions(-) delete mode 100644 bindings/cpp/upb/pb/decoder.hpp create mode 100644 tests/test_pipeline.c delete mode 100644 upb/bytestream.c create mode 100644 upb/bytestream.proto create mode 100644 upb/bytestream.upb.c create mode 100644 upb/bytestream.upb.h delete mode 100644 upb/google/cord.h diff --git a/bindings/cpp/upb/pb/decoder.hpp b/bindings/cpp/upb/pb/decoder.hpp deleted file mode 100644 index 950e9e20e9..0000000000 --- a/bindings/cpp/upb/pb/decoder.hpp +++ /dev/null @@ -1,81 +0,0 @@ -// -// upb - a minimalist implementation of protocol buffers. -// -// Copyright (c) 2011 Google Inc. See LICENSE for details. -// Author: Josh Haberman -// -// upb::Decoder is a high performance, streaming decoder for protobuf -// data that works by getting its input data from a ubp::ByteRegion and calling -// into a upb::Handlers. -// -// A DecoderPlan contains whatever data structures and generated (JIT-ted) code -// are necessary to decode protobuf data of a specific type to a specific set -// of handlers. By generating the plan ahead of time, we avoid having to -// redo this work every time we decode. -// -// A DecoderPlan is threadsafe, meaning that it can be used concurrently by -// different upb::Decoders in different threads. However, the upb::Decoders are -// *not* thread-safe. - -#ifndef UPB_PB_DECODER_HPP -#define UPB_PB_DECODER_HPP - -#include "upb/pb/decoder.h" - -#include "upb/bytestream.h" -#include "upb/upb.h" - -namespace upb { - -class DecoderPlan : public upb_decoderplan { - public: - static DecoderPlan* New(const Handlers* h, bool allow_jit) { - return static_cast(upb_decoderplan_new(h, allow_jit)); - } - void Unref() { upb_decoderplan_unref(this); } - - // Returns true if the plan contains JIT-ted code. This may not be the same - // as the "allowjit" parameter to the constructor if support for JIT-ting was - // not compiled in. - bool HasJitCode() { return upb_decoderplan_hasjitcode(this); } - - private: - DecoderPlan() {} // Only constructed by New -}; - -class Decoder : public upb_decoder { - public: - Decoder() { upb_decoder_init(this); } - ~Decoder() { upb_decoder_uninit(this); } - - // Resets the plan that the decoder will parse from. This will also reset the - // decoder's input to be uninitialized -- ResetInput() must be called before - // parsing can occur. The plan must live until the decoder is destroyed or - // reset to a different plan. - // - // Must be called before ResetInput() or Decode(). - void ResetPlan(DecoderPlan* plan) { upb_decoder_resetplan(this, plan); } - - // Resets the input of the decoder. This puts it in a state where it has not - // seen any data, and expects the next data to be from the beginning of a new - // protobuf. - // - // ResetInput() must be called before Decode() but may be called more than - // once. "input" must live until the decoder destroyed or ResetInput is - // called again. "c" is the closure that will be passed to the handlers. - void ResetInput(ByteRegion* byte_region, void* c) { - upb_decoder_resetinput(this, byte_region, c); - } - - // Decodes serialized data (calling Handlers as the data is parsed) until - // error or EOF (see status() for details). - Status::Success Decode() { return upb_decoder_decode(this); } - - const upb::Status& status() { - return static_cast(*upb_decoder_status(this)); - } -}; - -} // namespace upb - -#endif diff --git a/bindings/lua/table.c b/bindings/lua/table.c index 31b92d26bd..450730a109 100644 --- a/bindings/lua/table.c +++ b/bindings/lua/table.c @@ -31,16 +31,16 @@ static void lupbtable_setnum(lua_State *L, int tab, const char *key, lua_setfield(L, tab - 1, key); } -static void lupbtable_pushval(lua_State *L, upb_value val, upb_ctype_t type) { +static void lupbtable_pushval(lua_State *L, _upb_value val, upb_ctype_t type) { switch (type) { case UPB_CTYPE_INT32: - lua_pushnumber(L, upb_value_getint32(val)); + lua_pushnumber(L, val.int32); break; case UPB_CTYPE_PTR: - lupb_def_pushwrapper(L, upb_value_getptr(val), NULL); + lupb_def_pushwrapper(L, val.ptr, NULL); break; case UPB_CTYPE_CSTR: - lua_pushstring(L, upb_value_getcstr(val)); + lua_pushstring(L, val.cstr); break; default: luaL_error(L, "Unexpected type: %d", type); diff --git a/bindings/lua/test.lua b/bindings/lua/test.lua index 6b162a9a53..fc4db70d36 100644 --- a/bindings/lua/test.lua +++ b/bindings/lua/test.lua @@ -13,6 +13,7 @@ function test_fielddef() assert_false(f:is_frozen()) assert_nil(f:number()) assert_nil(f:name()) + assert_nil(f:type()) assert_equal(upb.LABEL_OPTIONAL, f:label()) f:set_name("foo_field") diff --git a/bindings/lua/upb.c b/bindings/lua/upb.c index 1a1d7c0de0..29de9f0cd7 100644 --- a/bindings/lua/upb.c +++ b/bindings/lua/upb.c @@ -14,7 +14,7 @@ #include #include "lauxlib.h" #include "bindings/lua/upb.h" -#include "upb/bytestream.h" +#include "upb/handlers.h" #include "upb/pb/glue.h" // Lua metatable types. @@ -75,7 +75,7 @@ static uint32_t lupb_checkint32(lua_State *L, int narg, const char *name) { // Converts a number or bool from Lua -> upb_value. static upb_value lupb_getvalue(lua_State *L, int narg, upb_fieldtype_t type) { upb_value val; - if (type == UPB_TYPE(BOOL)) { + if (type == UPB_TYPE_BOOL) { if (!lua_isboolean(L, narg)) luaL_error(L, "Must explicitly pass true or false for boolean fields"); upb_value_setbool(&val, lua_toboolean(L, narg)); @@ -83,41 +83,35 @@ static upb_value lupb_getvalue(lua_State *L, int narg, upb_fieldtype_t type) { // Numeric type. lua_Number num = luaL_checknumber(L, narg); switch (type) { - case UPB_TYPE(INT32): - case UPB_TYPE(SINT32): - case UPB_TYPE(SFIXED32): - case UPB_TYPE(ENUM): + case UPB_TYPE_INT32: + case UPB_TYPE_ENUM: if (num > INT32_MAX || num < INT32_MIN || num != rint(num)) luaL_error(L, "Cannot convert %f to 32-bit integer", num); upb_value_setint32(&val, num); break; - case UPB_TYPE(INT64): - case UPB_TYPE(SINT64): - case UPB_TYPE(SFIXED64): + case UPB_TYPE_INT64: if (num > INT64_MAX || num < INT64_MIN || num != rint(num)) luaL_error(L, "Cannot convert %f to 64-bit integer", num); upb_value_setint64(&val, num); break; - case UPB_TYPE(UINT32): - case UPB_TYPE(FIXED32): + case UPB_TYPE_UINT32: if (num > UINT32_MAX || num < 0 || num != rint(num)) luaL_error(L, "Cannot convert %f to unsigned 32-bit integer", num); upb_value_setuint32(&val, num); break; - case UPB_TYPE(UINT64): - case UPB_TYPE(FIXED64): + case UPB_TYPE_UINT64: if (num > UINT64_MAX || num < 0 || num != rint(num)) luaL_error(L, "Cannot convert %f to unsigned 64-bit integer", num); upb_value_setuint64(&val, num); break; - case UPB_TYPE(DOUBLE): + case UPB_TYPE_DOUBLE: if (num > DBL_MAX || num < -DBL_MAX) { // This could happen if lua_Number was long double. luaL_error(L, "Cannot convert %f to double", num); } upb_value_setdouble(&val, num); break; - case UPB_TYPE(FLOAT): + case UPB_TYPE_FLOAT: if (num > FLT_MAX || num < -FLT_MAX) luaL_error(L, "Cannot convert %f to float", num); upb_value_setfloat(&val, num); @@ -131,34 +125,21 @@ static upb_value lupb_getvalue(lua_State *L, int narg, upb_fieldtype_t type) { // Converts a upb_value -> Lua value. static void lupb_pushvalue(lua_State *L, upb_value val, upb_fieldtype_t type) { switch (type) { - case UPB_TYPE(INT32): - case UPB_TYPE(SINT32): - case UPB_TYPE(SFIXED32): - case UPB_TYPE(ENUM): + case UPB_TYPE_INT32: + case UPB_TYPE_ENUM: lua_pushnumber(L, upb_value_getint32(val)); break; - case UPB_TYPE(INT64): - case UPB_TYPE(SINT64): - case UPB_TYPE(SFIXED64): + case UPB_TYPE_INT64: lua_pushnumber(L, upb_value_getint64(val)); break; - case UPB_TYPE(UINT32): - case UPB_TYPE(FIXED32): + case UPB_TYPE_UINT32: lua_pushnumber(L, upb_value_getuint32(val)); break; - case UPB_TYPE(UINT64): - case UPB_TYPE(FIXED64): + case UPB_TYPE_UINT64: lua_pushnumber(L, upb_value_getuint64(val)); break; - case UPB_TYPE(DOUBLE): + case UPB_TYPE_DOUBLE: lua_pushnumber(L, upb_value_getdouble(val)); break; - case UPB_TYPE(FLOAT): + case UPB_TYPE_FLOAT: lua_pushnumber(L, upb_value_getfloat(val)); break; - case UPB_TYPE(BOOL): + case UPB_TYPE_BOOL: lua_pushboolean(L, upb_value_getbool(val)); break; - case UPB_TYPE(STRING): - case UPB_TYPE(BYTES): { - const upb_byteregion *r = upb_value_getbyteregion(val); - size_t len; - const char *str = upb_byteregion_getptr(r, 0, &len); - lua_pushlstring(L, str, len); - } default: luaL_error(L, "internal error"); } } @@ -341,7 +322,7 @@ static void lupb_fielddef_dosetdefault(lua_State *L, upb_fielddef *f, int type = lua_type(L, narg); upb_fieldtype_t upbtype = upb_fielddef_type(f); if (type == LUA_TSTRING) { - if (!upb_fielddef_isstring(f) && upbtype != UPB_TYPE(ENUM)) + if (!upb_fielddef_isstring(f) && upbtype != UPB_TYPE_ENUM) luaL_argerror(L, narg, "field does not expect a string default"); size_t len; const char *str = lua_tolstring(L, narg, &len); @@ -387,6 +368,21 @@ static void lupb_fielddef_dosettype(lua_State *L, upb_fielddef *f, int narg) { luaL_argerror(L, narg, "invalid field type"); } +static void lupb_fielddef_dosetintfmt(lua_State *L, upb_fielddef *f, int narg) { + int32_t intfmt = luaL_checknumber(L, narg); + if (!upb_fielddef_settype(f, intfmt)) + luaL_argerror(L, narg, "invalid field intfmt"); +} + +static void lupb_fielddef_dosettagdelim(lua_State *L, upb_fielddef *f, + int narg) { + if (!lua_isboolean(L, narg)) + luaL_argerror(L, narg, "tagdelim value must be boolean"); + int32_t tagdelim = luaL_checknumber(L, narg); + if (!upb_fielddef_settagdelim(f, tagdelim)) + luaL_argerror(L, narg, "invalid field tagdelim"); +} + // Setter API calls. These use the setter functions above. static int lupb_fielddef_setdefault(lua_State *L) { @@ -425,6 +421,18 @@ static int lupb_fielddef_settype(lua_State *L) { return 0; } +static int lupb_fielddef_setintfmt(lua_State *L) { + upb_fielddef *f = lupb_fielddef_checkmutable(L, 1); + lupb_fielddef_dosetintfmt(L, f, 2); + return 0; +} + +static int lupb_fielddef_settagdelim(lua_State *L) { + upb_fielddef *f = lupb_fielddef_checkmutable(L, 1); + lupb_fielddef_dosettagdelim(L, f, 2); + return 0; +} + // Constructor and other methods. static int lupb_fielddef_new(lua_State *L) { @@ -470,11 +478,22 @@ static int lupb_fielddef_default(lua_State *L) { const upb_fielddef *f = lupb_fielddef_check(L, 1); upb_fieldtype_t type = upb_fielddef_type(f); if (upb_fielddef_default_is_symbolic(f)) - type = UPB_TYPE(STRING); + type = UPB_TYPE_STRING; lupb_pushvalue(L, upb_fielddef_default(f), type); return 1; } +static int lupb_fielddef_getsel(lua_State *L) { + const upb_fielddef *f = lupb_fielddef_check(L, 1); + upb_selector_t sel; + if (upb_getselector(f, luaL_checknumber(L, 2), &sel)) { + lua_pushnumber(L, sel); + return 1; + } else { + return 0; + } +} + static int lupb_fielddef_label(lua_State *L) { const upb_fielddef *f = lupb_fielddef_check(L, 1); lua_pushnumber(L, upb_fielddef_label(f)); @@ -530,7 +549,22 @@ static int lupb_fielddef_subdefname(lua_State *L) { static int lupb_fielddef_type(lua_State *L) { const upb_fielddef *f = lupb_fielddef_check(L, 1); - lua_pushnumber(L, upb_fielddef_type(f)); + if (upb_fielddef_typeisset(f)) + lua_pushnumber(L, upb_fielddef_type(f)); + else + lua_pushnil(L); + return 1; +} + +static int lupb_fielddef_intfmt(lua_State *L) { + const upb_fielddef *f = lupb_fielddef_check(L, 1); + lua_pushnumber(L, upb_fielddef_intfmt(f)); + return 1; +} + +static int lupb_fielddef_istagdelim(lua_State *L) { + const upb_fielddef *f = lupb_fielddef_check(L, 1); + lua_pushboolean(L, upb_fielddef_istagdelim(f)); return 1; } @@ -545,7 +579,10 @@ static const struct luaL_Reg lupb_fielddef_m[] = { LUPB_COMMON_DEF_METHODS {"default", lupb_fielddef_default}, + {"getsel", lupb_fielddef_getsel}, {"has_subdef", lupb_fielddef_hassubdef}, + {"intfmt", lupb_fielddef_intfmt}, + {"istagdelim", lupb_fielddef_istagdelim}, {"label", lupb_fielddef_label}, {"msgdef", lupb_fielddef_msgdef}, {"name", lupb_def_fullname}, // name() is just an alias for fullname() @@ -561,6 +598,8 @@ static const struct luaL_Reg lupb_fielddef_m[] = { {"set_subdef", lupb_fielddef_setsubdef}, {"set_subdef_name", lupb_fielddef_setsubdefname}, {"set_type", lupb_fielddef_settype}, + {"set_intfmt", lupb_fielddef_setintfmt}, + {"set_tagdelim", lupb_fielddef_settagdelim}, // Internal-only. {"_selector_base", lupb_fielddef_selectorbase}, @@ -1040,28 +1079,44 @@ int luaopen_upb(lua_State *L) { lua_call(L, 1, 0); // Register constants. - lupb_setfieldi(L, "LABEL_OPTIONAL", UPB_LABEL(OPTIONAL)); - lupb_setfieldi(L, "LABEL_REQUIRED", UPB_LABEL(REQUIRED)); - lupb_setfieldi(L, "LABEL_REPEATED", UPB_LABEL(REPEATED)); - - lupb_setfieldi(L, "TYPE_DOUBLE", UPB_TYPE(DOUBLE)); - lupb_setfieldi(L, "TYPE_FLOAT", UPB_TYPE(FLOAT)); - lupb_setfieldi(L, "TYPE_INT64", UPB_TYPE(INT64)); - lupb_setfieldi(L, "TYPE_UINT64", UPB_TYPE(UINT64)); - lupb_setfieldi(L, "TYPE_INT32", UPB_TYPE(INT32)); - lupb_setfieldi(L, "TYPE_FIXED64", UPB_TYPE(FIXED64)); - lupb_setfieldi(L, "TYPE_FIXED32", UPB_TYPE(FIXED32)); - lupb_setfieldi(L, "TYPE_BOOL", UPB_TYPE(BOOL)); - lupb_setfieldi(L, "TYPE_STRING", UPB_TYPE(STRING)); - lupb_setfieldi(L, "TYPE_GROUP", UPB_TYPE(GROUP)); - lupb_setfieldi(L, "TYPE_MESSAGE", UPB_TYPE(MESSAGE)); - lupb_setfieldi(L, "TYPE_BYTES", UPB_TYPE(BYTES)); - lupb_setfieldi(L, "TYPE_UINT32", UPB_TYPE(UINT32)); - lupb_setfieldi(L, "TYPE_ENUM", UPB_TYPE(ENUM)); - lupb_setfieldi(L, "TYPE_SFIXED32", UPB_TYPE(SFIXED32)); - lupb_setfieldi(L, "TYPE_SFIXED64", UPB_TYPE(SFIXED64)); - lupb_setfieldi(L, "TYPE_SINT32", UPB_TYPE(SINT32)); - lupb_setfieldi(L, "TYPE_SINT64", UPB_TYPE(SINT64)); + lupb_setfieldi(L, "LABEL_OPTIONAL", UPB_LABEL_OPTIONAL); + lupb_setfieldi(L, "LABEL_REQUIRED", UPB_LABEL_REQUIRED); + lupb_setfieldi(L, "LABEL_REPEATED", UPB_LABEL_REPEATED); + + lupb_setfieldi(L, "TYPE_DOUBLE", UPB_TYPE_DOUBLE); + lupb_setfieldi(L, "TYPE_FLOAT", UPB_TYPE_FLOAT); + lupb_setfieldi(L, "TYPE_INT64", UPB_TYPE_INT64); + lupb_setfieldi(L, "TYPE_UINT64", UPB_TYPE_UINT64); + lupb_setfieldi(L, "TYPE_INT32", UPB_TYPE_INT32); + lupb_setfieldi(L, "TYPE_BOOL", UPB_TYPE_BOOL); + lupb_setfieldi(L, "TYPE_STRING", UPB_TYPE_STRING); + lupb_setfieldi(L, "TYPE_MESSAGE", UPB_TYPE_MESSAGE); + lupb_setfieldi(L, "TYPE_BYTES", UPB_TYPE_BYTES); + lupb_setfieldi(L, "TYPE_UINT32", UPB_TYPE_UINT32); + lupb_setfieldi(L, "TYPE_ENUM", UPB_TYPE_ENUM); + + lupb_setfieldi(L, "INTFMT_VARIABLE", UPB_INTFMT_VARIABLE); + lupb_setfieldi(L, "INTFMT_FIXED", UPB_INTFMT_FIXED); + lupb_setfieldi(L, "INTFMT_ZIGZAG", UPB_INTFMT_ZIGZAG); + + lupb_setfieldi(L, "DESCRIPTOR_TYPE_DOUBLE", UPB_DESCRIPTOR_TYPE_DOUBLE); + lupb_setfieldi(L, "DESCRIPTOR_TYPE_FLOAT", UPB_DESCRIPTOR_TYPE_FLOAT); + lupb_setfieldi(L, "DESCRIPTOR_TYPE_INT64", UPB_DESCRIPTOR_TYPE_INT64); + lupb_setfieldi(L, "DESCRIPTOR_TYPE_UINT64", UPB_DESCRIPTOR_TYPE_UINT64); + lupb_setfieldi(L, "DESCRIPTOR_TYPE_INT32", UPB_DESCRIPTOR_TYPE_INT32); + lupb_setfieldi(L, "DESCRIPTOR_TYPE_FIXED64", UPB_DESCRIPTOR_TYPE_FIXED64); + lupb_setfieldi(L, "DESCRIPTOR_TYPE_FIXED32", UPB_DESCRIPTOR_TYPE_FIXED32); + lupb_setfieldi(L, "DESCRIPTOR_TYPE_BOOL", UPB_DESCRIPTOR_TYPE_BOOL); + lupb_setfieldi(L, "DESCRIPTOR_TYPE_STRING", UPB_DESCRIPTOR_TYPE_STRING); + lupb_setfieldi(L, "DESCRIPTOR_TYPE_GROUP", UPB_DESCRIPTOR_TYPE_GROUP); + lupb_setfieldi(L, "DESCRIPTOR_TYPE_MESSAGE", UPB_DESCRIPTOR_TYPE_MESSAGE); + lupb_setfieldi(L, "DESCRIPTOR_TYPE_BYTES", UPB_DESCRIPTOR_TYPE_BYTES); + lupb_setfieldi(L, "DESCRIPTOR_TYPE_UINT32", UPB_DESCRIPTOR_TYPE_UINT32); + lupb_setfieldi(L, "DESCRIPTOR_TYPE_ENUM", UPB_DESCRIPTOR_TYPE_ENUM); + lupb_setfieldi(L, "DESCRIPTOR_TYPE_SFIXED32", UPB_DESCRIPTOR_TYPE_SFIXED32); + lupb_setfieldi(L, "DESCRIPTOR_TYPE_SFIXED64", UPB_DESCRIPTOR_TYPE_SFIXED64); + lupb_setfieldi(L, "DESCRIPTOR_TYPE_SINT32", UPB_DESCRIPTOR_TYPE_SINT32); + lupb_setfieldi(L, "DESCRIPTOR_TYPE_SINT64", UPB_DESCRIPTOR_TYPE_SINT64); lupb_setfieldi(L, "DEF_MSG", UPB_DEF_MSG); lupb_setfieldi(L, "DEF_FIELD", UPB_DEF_FIELD); @@ -1069,6 +1124,21 @@ int luaopen_upb(lua_State *L) { lupb_setfieldi(L, "DEF_SERVICE", UPB_DEF_SERVICE); lupb_setfieldi(L, "DEF_ANY", UPB_DEF_ANY); + lupb_setfieldi(L, "UPB_HANDLER_INT32", UPB_HANDLER_INT32); + lupb_setfieldi(L, "UPB_HANDLER_INT64", UPB_HANDLER_INT64); + lupb_setfieldi(L, "UPB_HANDLER_UINT32", UPB_HANDLER_UINT32); + lupb_setfieldi(L, "UPB_HANDLER_UINT64", UPB_HANDLER_UINT64); + lupb_setfieldi(L, "UPB_HANDLER_FLOAT", UPB_HANDLER_FLOAT); + lupb_setfieldi(L, "UPB_HANDLER_DOUBLE", UPB_HANDLER_DOUBLE); + lupb_setfieldi(L, "UPB_HANDLER_BOOL", UPB_HANDLER_BOOL); + lupb_setfieldi(L, "UPB_HANDLER_STARTSTR", UPB_HANDLER_STARTSTR); + lupb_setfieldi(L, "UPB_HANDLER_STRING", UPB_HANDLER_STRING); + lupb_setfieldi(L, "UPB_HANDLER_ENDSTR", UPB_HANDLER_ENDSTR); + lupb_setfieldi(L, "UPB_HANDLER_STARTSUBMSG", UPB_HANDLER_STARTSUBMSG); + lupb_setfieldi(L, "UPB_HANDLER_ENDSUBMSG", UPB_HANDLER_ENDSUBMSG); + lupb_setfieldi(L, "UPB_HANDLER_STARTSEQ", UPB_HANDLER_STARTSEQ); + lupb_setfieldi(L, "UPB_HANDLER_ENDSEQ", UPB_HANDLER_ENDSEQ); + return 1; // Return package table. } diff --git a/tests/test_cpp.cc b/tests/test_cpp.cc index 59603d9974..db2337ec5c 100644 --- a/tests/test_cpp.cc +++ b/tests/test_cpp.cc @@ -10,9 +10,10 @@ #include #include #include -#include "upb/bytestream.h" #include "upb/def.h" +#include "upb/descriptor/reader.h" #include "upb/handlers.h" +#include "upb/pb/decoder.h" #include "upb/pb/glue.h" #include "upb_test.h" #include "upb/upb.h" @@ -31,16 +32,6 @@ static void TestSymbolTable(const char *descriptor_file) { md->Unref(&md); } -static void TestByteStream() { - upb::StringSource stringsrc; - stringsrc.Reset("testing", 7); - upb::ByteRegion* byteregion = stringsrc.AllBytes(); - ASSERT(byteregion->FetchAll() == UPB_BYTE_OK); - char* str = byteregion->StrDup(); - ASSERT(strcmp(str, "testing") == 0); - free(str); -} - extern "C" { int run_tests(int argc, char *argv[]) { @@ -49,7 +40,6 @@ int run_tests(int argc, char *argv[]) { return 1; } TestSymbolTable(argv[1]); - TestByteStream(); return 0; } diff --git a/tests/test_decoder.cc b/tests/test_decoder.cc index d42c0fe67f..1f0e87e809 100644 --- a/tests/test_decoder.cc +++ b/tests/test_decoder.cc @@ -31,11 +31,12 @@ #include #include #include +#include "upb/bytestream.h" #include "upb/handlers.h" #include "upb/pb/decoder.h" #include "upb/pb/varint.h" -#include "upb/upb.h" #include "upb_test.h" +#include "upb/upb.h" #include "third_party/upb/tests/test_decoder_schema.upb.h" uint32_t filter_hash = 0; @@ -186,16 +187,13 @@ void indentbuf(buffer *buf, int depth) { buf->append(" ", 2); } -void indent(void *depth) { - indentbuf(&output, *(int*)depth); -} - #define NUMERIC_VALUE_HANDLER(member, ctype, fmt) \ - bool value_ ## member(void *closure, void *fval, ctype val) { \ - indent(closure); \ - uint32_t *num = static_cast(fval); \ - output.appendf("%" PRIu32 ":%" fmt "\n", *num, val); \ - return true; \ + bool value_ ## member(const upb::SinkFrame *frame, ctype val) { \ + int *depth = (int*)frame->userdata(); \ + indentbuf(&output, *depth); \ + uint32_t *num = static_cast(frame->handler_data()); \ + output.appendf("%" PRIu32 ":%" fmt "\n", *num, val); \ + return true; \ } NUMERIC_VALUE_HANDLER(uint32, uint32_t, PRIu32) @@ -205,68 +203,73 @@ NUMERIC_VALUE_HANDLER(int64, int64_t, PRId64) NUMERIC_VALUE_HANDLER(float, float, "g") NUMERIC_VALUE_HANDLER(double, double, "g") -bool value_bool(void *closure, void *fval, bool val) { - indent(closure); - uint32_t *num = static_cast(fval); +bool value_bool(const upb::SinkFrame *frame, bool val) { + int *depth = (int*)frame->userdata(); + indentbuf(&output, *depth); + uint32_t *num = static_cast(frame->handler_data()); output.appendf("%" PRIu32 ":%s\n", *num, val ? "true" : "false"); return true; } -void* startstr(void *closure, void *fval, size_t size_hint) { - indent(closure); - uint32_t *num = static_cast(fval); +void* startstr(const upb::SinkFrame* frame, size_t size_hint) { + int *depth = (int*)frame->userdata(); + indentbuf(&output, *depth); + uint32_t *num = static_cast(frame->handler_data()); output.appendf("%" PRIu32 ":(%zu)\"", *num, size_hint); - return ((int*)closure) + 1; + return depth + 1; } -size_t value_string(void *closure, void *fval, const char *buf, size_t n) { +size_t value_string(const upb::SinkFrame* frame, const char* buf, size_t n) { output.append(buf, n); return n; } -bool endstr(void *closure, void *fval) { - UPB_UNUSED(fval); +bool endstr(const upb::SinkFrame* frame) { output.append("\"\n"); return true; } -void* startsubmsg(void *closure, void *fval) { - indent(closure); - uint32_t *num = static_cast(fval); +void* startsubmsg(const upb::SinkFrame* frame) { + int *depth = (int*)frame->userdata(); + indentbuf(&output, *depth); + uint32_t *num = static_cast(frame->handler_data()); output.appendf("%" PRIu32 ":{\n", *num); - return ((int*)closure) + 1; + return depth + 1; } -bool endsubmsg(void *closure, void *fval) { - UPB_UNUSED(fval); - indent(closure); +bool endsubmsg(const upb::SinkFrame* frame) { + int *depth = (int*)frame->userdata(); + indentbuf(&output, *depth); output.append("}\n"); return true; } -void* startseq(void *closure, void *fval) { - indent(closure); - uint32_t *num = static_cast(fval); +void* startseq(const upb::SinkFrame* frame) { + int *depth = (int*)frame->userdata(); + indentbuf(&output, *depth); + uint32_t *num = static_cast(frame->handler_data()); output.appendf("%" PRIu32 ":[\n", *num); - return ((int*)closure) + 1; + return depth + 1; } -bool endseq(void *closure, void *fval) { - UPB_UNUSED(fval); - indent(closure); +bool endseq(const upb::SinkFrame* frame) { + int *depth = (int*)frame->userdata(); + indentbuf(&output, *depth); output.append("]\n"); return true; } -bool startmsg(void *closure) { - indent(closure); +bool startmsg(const upb::SinkFrame* frame) { + int *depth = (int*)frame->userdata(); + indentbuf(&output, *depth); output.append("<\n"); return true; } -void endmsg(void *closure, upb_status *status) { +void endmsg(const upb::SinkFrame* frame, upb_status* status) { (void)status; - indent(closure); + int *depth = (int*)frame->userdata(); + indentbuf(&output, *depth); output.append(">\n"); } @@ -299,7 +302,7 @@ uint32_t rep_fn(uint32_t fn) { #define UNKNOWN_FIELD 666 template -void reg(upb_handlers *h, upb_fieldtype_t type, +void reg(upb_handlers *h, upb_descriptortype_t type, typename upb::Handlers::Value::Handler *handler) { // We register both a repeated and a non-repeated field for every type. // For the non-repeated field we make the field number the same as the @@ -346,167 +349,111 @@ void reghandlers(upb_handlers *h) { upb_handlers_setendmsg(h, &endmsg); // Register handlers for each type. - reg (h, UPB_TYPE(DOUBLE), &value_double); - reg (h, UPB_TYPE(FLOAT), &value_float); - reg (h, UPB_TYPE(INT64), &value_int64); - reg(h, UPB_TYPE(UINT64), &value_uint64); - reg (h, UPB_TYPE(INT32) , &value_int32); - reg(h, UPB_TYPE(FIXED64), &value_uint64); - reg(h, UPB_TYPE(FIXED32), &value_uint32); - reg (h, UPB_TYPE(BOOL), &value_bool); - reg(h, UPB_TYPE(UINT32), &value_uint32); - reg (h, UPB_TYPE(ENUM), &value_int32); - reg (h, UPB_TYPE(SFIXED32), &value_int32); - reg (h, UPB_TYPE(SFIXED64), &value_int64); - reg (h, UPB_TYPE(SINT32), &value_int32); - reg (h, UPB_TYPE(SINT64), &value_int64); - - reg_str(h, UPB_TYPE(STRING)); - reg_str(h, UPB_TYPE(BYTES)); - reg_str(h, rep_fn(UPB_TYPE(STRING))); - reg_str(h, rep_fn(UPB_TYPE(BYTES))); + reg (h, UPB_DESCRIPTOR_TYPE_DOUBLE, &value_double); + reg (h, UPB_DESCRIPTOR_TYPE_FLOAT, &value_float); + reg (h, UPB_DESCRIPTOR_TYPE_INT64, &value_int64); + reg(h, UPB_DESCRIPTOR_TYPE_UINT64, &value_uint64); + reg (h, UPB_DESCRIPTOR_TYPE_INT32 , &value_int32); + reg(h, UPB_DESCRIPTOR_TYPE_FIXED64, &value_uint64); + reg(h, UPB_DESCRIPTOR_TYPE_FIXED32, &value_uint32); + reg (h, UPB_DESCRIPTOR_TYPE_BOOL, &value_bool); + reg(h, UPB_DESCRIPTOR_TYPE_UINT32, &value_uint32); + reg (h, UPB_DESCRIPTOR_TYPE_ENUM, &value_int32); + reg (h, UPB_DESCRIPTOR_TYPE_SFIXED32, &value_int32); + reg (h, UPB_DESCRIPTOR_TYPE_SFIXED64, &value_int64); + reg (h, UPB_DESCRIPTOR_TYPE_SINT32, &value_int32); + reg (h, UPB_DESCRIPTOR_TYPE_SINT64, &value_int64); + + reg_str(h, UPB_DESCRIPTOR_TYPE_STRING); + reg_str(h, UPB_DESCRIPTOR_TYPE_BYTES); + reg_str(h, rep_fn(UPB_DESCRIPTOR_TYPE_STRING)); + reg_str(h, rep_fn(UPB_DESCRIPTOR_TYPE_BYTES)); // Register submessage/group handlers that are self-recursive // to this type, eg: message M { optional M m = 1; } - reg_subm(h, UPB_TYPE(MESSAGE)); - reg_subm(h, rep_fn(UPB_TYPE(MESSAGE))); + reg_subm(h, UPB_DESCRIPTOR_TYPE_MESSAGE); + reg_subm(h, rep_fn(UPB_DESCRIPTOR_TYPE_MESSAGE)); // For NOP_FIELD we register no handlers, so we can pad a proto freely without // changing the output. } -/* Custom bytesrc that can insert buffer seams in arbitrary places ************/ - -typedef struct { - upb_bytesrc bytesrc; - const char *str; - size_t len, seam1, seam2; - upb_byteregion byteregion; -} upb_seamsrc; - -size_t upb_seamsrc_avail(const upb_seamsrc *src, size_t ofs) { - if (ofs < src->seam1) return src->seam1 - ofs; - if (ofs < src->seam2) return src->seam2 - ofs; - return src->len - ofs; -} - -upb_bytesuccess_t upb_seamsrc_fetch(void *_src, uint64_t ofs, size_t *read) { - upb_seamsrc *src = (upb_seamsrc*)_src; - assert(ofs < src->len); - if (ofs == src->len) { - upb_status_seteof(&src->bytesrc.status); - return UPB_BYTE_EOF; - } - *read = upb_seamsrc_avail(src, ofs); - return UPB_BYTE_OK; -} - -void upb_seamsrc_copy(const void *_src, uint64_t ofs, - size_t len, char *dst) { - const upb_seamsrc *src = (const upb_seamsrc*)_src; - assert(ofs + len <= src->len); - memcpy(dst, src->str + ofs, len); -} - -void upb_seamsrc_discard(void *src, uint64_t ofs) { - (void)src; - (void)ofs; -} - -const char *upb_seamsrc_getptr(const void *_s, uint64_t ofs, size_t *len) { - const upb_seamsrc *src = (const upb_seamsrc*)_s; - *len = upb_seamsrc_avail(src, ofs); - return src->str + ofs; -} - -void upb_seamsrc_init(upb_seamsrc *s, const char *str, size_t len) { - static upb_bytesrc_vtbl vtbl = { - &upb_seamsrc_fetch, - &upb_seamsrc_discard, - &upb_seamsrc_copy, - &upb_seamsrc_getptr, - }; - upb_bytesrc_init(&s->bytesrc, &vtbl); - s->seam1 = 0; - s->seam2 = 0; - s->str = str; - s->len = len; - s->byteregion.bytesrc = &s->bytesrc; - s->byteregion.toplevel = true; - s->byteregion.start = 0; - s->byteregion.end = len; -} - -void upb_seamsrc_resetseams(upb_seamsrc *s, size_t seam1, size_t seam2) { - assert(seam1 <= seam2); - s->seam1 = seam1; - s->seam2 = seam2; - s->byteregion.discard = 0; - s->byteregion.fetch = 0; -} - -void upb_seamsrc_uninit(upb_seamsrc *s) { (void)s; } - -upb_bytesrc *upb_seamsrc_bytesrc(upb_seamsrc *s) { - return &s->bytesrc; -} - -// Returns the top-level upb_byteregion* for this seamsrc. Invalidated when -// the seamsrc is reset. -upb_byteregion *upb_seamsrc_allbytes(upb_seamsrc *s) { - return &s->byteregion; -} - - /* Running of test cases ******************************************************/ -upb_decoderplan *plan; +const upb::Handlers *handlers; +const upb::Handlers *plan; uint32_t Hash(const buffer& proto, const buffer* expected_output) { uint32_t hash = MurmurHash2(proto.buf(), proto.len(), 0); if (expected_output) hash = MurmurHash2(expected_output->buf(), expected_output->len(), hash); - bool hasjit = upb_decoderplan_hasjitcode(plan); + bool hasjit = upb::pb::HasJitCode(plan); hash = MurmurHash2(&hasjit, 1, hash); return hash; } +bool parse( + upb_sink *s, const char *buf, size_t start, size_t end, size_t *ofs) { + start = UPB_MAX(start, *ofs); + if (start <= end) { + size_t len = end - start; + size_t parsed = + s->PutStringBuffer(UPB_BYTESTREAM_BYTES_STRING, buf + start, len); + if (s->pipeline()->status().ok() != (parsed >= len)) { + ASSERT(false); + } + if (!s->pipeline()->status().ok()) + return false; + *ofs += parsed; + } + return true; +} + #define LINE(x) x "\n" void run_decoder(const buffer& proto, const buffer* expected_output) { testhash = Hash(proto, expected_output); if (filter_hash && testhash != filter_hash) return; - upb_seamsrc src; - upb_seamsrc_init(&src, proto.buf(), proto.len()); - upb_decoder d; - upb_decoder_init(&d); - upb_decoder_resetplan(&d, plan); + upb::Pipeline pipeline(NULL, 0, upb_realloc, NULL); + upb::Sink* sink = pipeline.NewSink(handlers); + upb::Sink* decoder_sink = pipeline.NewSink(plan); + upb::pb::Decoder* d = decoder_sink->base()->GetUserdata(); + upb::pb::ResetDecoderSink(d, sink); for (size_t i = 0; i < proto.len(); i++) { for (size_t j = i; j < UPB_MIN(proto.len(), i + 5); j++) { - upb_seamsrc_resetseams(&src, i, j); - upb_byteregion *input = upb_seamsrc_allbytes(&src); + pipeline.Reset(); output.clear(); - upb_decoder_resetinput(&d, input, &closures[0]); - upb_success_t success = upb_decoder_decode(&d); - ASSERT(upb_ok(upb_decoder_status(&d)) == (success == UPB_OK)); + sink->Reset(&closures[0]); + size_t ofs = 0; + bool ok = + decoder_sink->StartMessage() && + decoder_sink->StartString( + UPB_BYTESTREAM_BYTES_STARTSTR, proto.len()) && + parse(decoder_sink, proto.buf(), 0, i, &ofs) && + parse(decoder_sink, proto.buf(), i, j, &ofs) && + parse(decoder_sink, proto.buf(), j, proto.len(), &ofs) && + ofs == proto.len() && + decoder_sink->EndString(UPB_BYTESTREAM_BYTES_ENDSTR); + if (ok) decoder_sink->EndMessage(); if (expected_output) { - ASSERT_STATUS(success == UPB_OK, upb_decoder_status(&d)); - // The input should be fully consumed. - ASSERT(upb_byteregion_fetchofs(input) == upb_byteregion_endofs(input)); - ASSERT(upb_byteregion_discardofs(input) == - upb_byteregion_endofs(input)); if (!output.eql(*expected_output)) { fprintf(stderr, "Text mismatch: '%s' vs '%s'\n", output.buf(), expected_output->buf()); } + if (!ok) { + fprintf(stderr, "Failed: %s\n", pipeline.status().GetString()); + } + ASSERT(ok); ASSERT(output.eql(*expected_output)); } else { - ASSERT(success == UPB_ERROR); + if (ok) { + fprintf(stderr, "Didn't expect ok result, but got output: '%s'\n", + output.buf()); + } + ASSERT(!ok); } } } - upb_decoder_uninit(&d); - upb_seamsrc_uninit(&src); testhash = 0; } @@ -540,7 +487,7 @@ void assert_does_not_parse(const buffer& proto) { /* The actual tests ***********************************************************/ -void test_premature_eof_for_type(upb_fieldtype_t type) { +void test_premature_eof_for_type(upb_descriptortype_t type) { // Incomplete values for each wire type. static const buffer incompletes[6] = { buffer("\x80"), // UPB_WIRE_TYPE_VARINT @@ -590,10 +537,10 @@ void test_premature_eof_for_type(upb_fieldtype_t type) { assert_does_not_parse_at_eof( cat( tag(UNKNOWN_FIELD, wire_type), varint(1) )); - if (type == UPB_TYPE(MESSAGE)) { + if (type == UPB_DESCRIPTOR_TYPE_MESSAGE) { // Submessage ends in the middle of a value. buffer incomplete_submsg = - cat ( tag(UPB_TYPE(INT32), UPB_WIRE_TYPE_VARINT), + cat ( tag(UPB_DESCRIPTOR_TYPE_INT32, UPB_WIRE_TYPE_VARINT), incompletes[UPB_WIRE_TYPE_VARINT] ); assert_does_not_parse( cat( tag(fieldnum, UPB_WIRE_TYPE_DELIMITED), @@ -615,7 +562,7 @@ void test_premature_eof_for_type(upb_fieldtype_t type) { // "33" and "66" are just two random values that all numeric types can // represent. -void test_valid_data_for_type(upb_fieldtype_t type, +void test_valid_data_for_type(upb_descriptortype_t type, const buffer& enc33, const buffer& enc66) { uint32_t fieldnum = type; uint32_t rep_fieldnum = rep_fn(type); @@ -653,7 +600,7 @@ void test_valid_data_for_type(upb_fieldtype_t type, LINE(">"), rep_fieldnum, rep_fieldnum, rep_fieldnum); } -void test_valid_data_for_signed_type(upb_fieldtype_t type, +void test_valid_data_for_signed_type(upb_descriptortype_t type, const buffer& enc33, const buffer& enc66) { uint32_t fieldnum = type; uint32_t rep_fieldnum = rep_fn(type); @@ -694,22 +641,22 @@ void test_valid_data_for_signed_type(upb_fieldtype_t type, // Test that invalid protobufs are properly detected (without crashing) and // have an error reported. Field numbers match registered handlers above. void test_invalid() { - test_premature_eof_for_type(UPB_TYPE(DOUBLE)); - test_premature_eof_for_type(UPB_TYPE(FLOAT)); - test_premature_eof_for_type(UPB_TYPE(INT64)); - test_premature_eof_for_type(UPB_TYPE(UINT64)); - test_premature_eof_for_type(UPB_TYPE(INT32)); - test_premature_eof_for_type(UPB_TYPE(FIXED64)); - test_premature_eof_for_type(UPB_TYPE(FIXED32)); - test_premature_eof_for_type(UPB_TYPE(BOOL)); - test_premature_eof_for_type(UPB_TYPE(STRING)); - test_premature_eof_for_type(UPB_TYPE(BYTES)); - test_premature_eof_for_type(UPB_TYPE(UINT32)); - test_premature_eof_for_type(UPB_TYPE(ENUM)); - test_premature_eof_for_type(UPB_TYPE(SFIXED32)); - test_premature_eof_for_type(UPB_TYPE(SFIXED64)); - test_premature_eof_for_type(UPB_TYPE(SINT32)); - test_premature_eof_for_type(UPB_TYPE(SINT64)); + test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_DOUBLE); + test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_FLOAT); + test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_INT64); + test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_UINT64); + test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_INT32); + test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_FIXED64); + test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_FIXED32); + test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_BOOL); + test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_STRING); + test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_BYTES); + test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_UINT32); + test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_ENUM); + test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_SFIXED32); + test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_SFIXED64); + test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_SINT32); + test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_SINT64); // EOF inside a tag's varint. assert_does_not_parse_at_eof( buffer("\x80") ); @@ -734,31 +681,47 @@ void test_invalid() { // Test exceeding the resource limit of stack depth. buffer buf; - for (int i = 0; i < UPB_MAX_NESTING; i++) { - buf.assign(submsg(UPB_TYPE(MESSAGE), buf)); + for (int i = 0; i <= UPB_MAX_NESTING; i++) { + buf.assign(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, buf)); } assert_does_not_parse(buf); } void test_valid() { - test_valid_data_for_signed_type(UPB_TYPE(DOUBLE), dbl(33), dbl(-66)); - test_valid_data_for_signed_type(UPB_TYPE(FLOAT), flt(33), flt(-66)); - test_valid_data_for_signed_type(UPB_TYPE(INT64), varint(33), varint(-66)); - test_valid_data_for_signed_type(UPB_TYPE(INT32), varint(33), varint(-66)); - test_valid_data_for_signed_type(UPB_TYPE(ENUM), varint(33), varint(-66)); - test_valid_data_for_signed_type(UPB_TYPE(SFIXED32), uint32(33), uint32(-66)); - test_valid_data_for_signed_type(UPB_TYPE(SFIXED64), uint64(33), uint64(-66)); - test_valid_data_for_signed_type(UPB_TYPE(SINT32), zz32(33), zz32(-66)); - test_valid_data_for_signed_type(UPB_TYPE(SINT64), zz64(33), zz64(-66)); - - test_valid_data_for_type(UPB_TYPE(UINT64), varint(33), varint(66)); - test_valid_data_for_type(UPB_TYPE(UINT32), varint(33), varint(66)); - test_valid_data_for_type(UPB_TYPE(FIXED64), uint64(33), uint64(66)); - test_valid_data_for_type(UPB_TYPE(FIXED32), uint32(33), uint32(66)); + test_valid_data_for_signed_type(UPB_DESCRIPTOR_TYPE_DOUBLE, + dbl(33), + dbl(-66)); + test_valid_data_for_signed_type(UPB_DESCRIPTOR_TYPE_FLOAT, flt(33), flt(-66)); + test_valid_data_for_signed_type(UPB_DESCRIPTOR_TYPE_INT64, + varint(33), + varint(-66)); + test_valid_data_for_signed_type(UPB_DESCRIPTOR_TYPE_INT32, + varint(33), + varint(-66)); + test_valid_data_for_signed_type(UPB_DESCRIPTOR_TYPE_ENUM, + varint(33), + varint(-66)); + test_valid_data_for_signed_type(UPB_DESCRIPTOR_TYPE_SFIXED32, + uint32(33), + uint32(-66)); + test_valid_data_for_signed_type(UPB_DESCRIPTOR_TYPE_SFIXED64, + uint64(33), + uint64(-66)); + test_valid_data_for_signed_type(UPB_DESCRIPTOR_TYPE_SINT32, + zz32(33), + zz32(-66)); + test_valid_data_for_signed_type(UPB_DESCRIPTOR_TYPE_SINT64, + zz64(33), + zz64(-66)); + + test_valid_data_for_type(UPB_DESCRIPTOR_TYPE_UINT64, varint(33), varint(66)); + test_valid_data_for_type(UPB_DESCRIPTOR_TYPE_UINT32, varint(33), varint(66)); + test_valid_data_for_type(UPB_DESCRIPTOR_TYPE_FIXED64, uint64(33), uint64(66)); + test_valid_data_for_type(UPB_DESCRIPTOR_TYPE_FIXED32, uint32(33), uint32(66)); // Test implicit startseq/endseq. - uint32_t repfl_fn = rep_fn(UPB_TYPE(FLOAT)); - uint32_t repdb_fn = rep_fn(UPB_TYPE(DOUBLE)); + uint32_t repfl_fn = rep_fn(UPB_DESCRIPTOR_TYPE_FLOAT); + uint32_t repdb_fn = rep_fn(UPB_DESCRIPTOR_TYPE_DOUBLE); assert_successful_parse( cat( tag(repfl_fn, UPB_WIRE_TYPE_32BIT), flt(33), tag(repdb_fn, UPB_WIRE_TYPE_64BIT), dbl(66) ), @@ -772,7 +735,7 @@ void test_valid() { LINE(">"), repfl_fn, repfl_fn, repdb_fn, repdb_fn); // Submessage tests. - uint32_t msg_fn = UPB_TYPE(MESSAGE); + uint32_t msg_fn = UPB_DESCRIPTOR_TYPE_MESSAGE; assert_successful_parse( submsg(msg_fn, submsg(msg_fn, submsg(msg_fn, buffer()))), LINE("<") @@ -790,7 +753,7 @@ void test_valid() { LINE("}") LINE(">"), msg_fn, msg_fn, msg_fn); - uint32_t repm_fn = rep_fn(UPB_TYPE(MESSAGE)); + uint32_t repm_fn = rep_fn(UPB_DESCRIPTOR_TYPE_MESSAGE); assert_successful_parse( submsg(repm_fn, submsg(repm_fn, buffer())), LINE("<") @@ -813,11 +776,11 @@ void test_valid() { buffer textbuf; int total = UPB_MAX_NESTING - 1; for (int i = 0; i < total; i++) { - buf.assign(submsg(UPB_TYPE(MESSAGE), buf)); + buf.assign(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, buf)); indentbuf(&textbuf, i); textbuf.append("<\n"); indentbuf(&textbuf, i); - textbuf.appendf("%u:{\n", UPB_TYPE(MESSAGE)); + textbuf.appendf("%u:{\n", UPB_DESCRIPTOR_TYPE_MESSAGE); } indentbuf(&textbuf, total); textbuf.append("<\n"); @@ -848,35 +811,36 @@ int run_tests(int argc, char *argv[]) { // Create an empty handlers to make sure that the decoder can handle empty // messages. - upb_handlers *h = upb_handlers_new(UPB_TEST_DECODER_EMPTYMESSAGE, &h); - bool ok = upb_handlers_freeze(&h, 1, NULL); + upb::Handlers *h = upb_handlers_new(UPB_TEST_DECODER_EMPTYMESSAGE, NULL, &h); + bool ok = upb::Handlers::Freeze(&h, 1, NULL); ASSERT(ok); - plan = upb_decoderplan_new(h, true); - upb_handlers_unref(h, &h); - upb_decoderplan_unref(plan); + plan = upb::pb::GetDecoderHandlers(h, true, &plan); + h->Unref(&h); + plan->Unref(&plan); // Construct decoder plan. - h = upb_handlers_new(UPB_TEST_DECODER_DECODERTEST, &h); + h = upb::Handlers::New(UPB_TEST_DECODER_DECODERTEST, NULL, &handlers); reghandlers(h); - ok = upb_handlers_freeze(&h, 1, NULL); + ok = upb::Handlers::Freeze(&h, 1, NULL); + handlers = h; // Test without JIT. - plan = upb_decoderplan_new(h, false); - ASSERT(!upb_decoderplan_hasjitcode(plan)); + plan = upb::pb::GetDecoderHandlers(handlers, false, &plan); + ASSERT(!upb::pb::HasJitCode(plan)); run_tests(); - upb_decoderplan_unref(plan); + plan->Unref(&plan); #ifdef UPB_USE_JIT_X64 // Test JIT. - plan = upb_decoderplan_new(h, true); - ASSERT(upb_decoderplan_hasjitcode(plan)); + plan = upb::pb::GetDecoderHandlers(handlers, true, &plan); + ASSERT(upb::pb::HasJitCode(plan)); run_tests(); - upb_decoderplan_unref(plan); + plan->Unref(&plan); #endif plan = NULL; printf("All tests passed, %d assertions.\n", num_assertions); - upb_handlers_unref(h, &h); + handlers->Unref(&handlers); return 0; } diff --git a/tests/test_def.c b/tests/test_def.c index 7f089d7f87..d048b3e925 100644 --- a/tests/test_def.c +++ b/tests/test_def.c @@ -88,15 +88,15 @@ static void test_fielddef_accessors() { ASSERT(!upb_fielddef_isfrozen(f1)); upb_fielddef_setname(f1, "f1"); upb_fielddef_setnumber(f1, 1937); - upb_fielddef_settype(f1, UPB_TYPE(FIXED64)); - upb_fielddef_setlabel(f1, UPB_LABEL(REPEATED)); + upb_fielddef_settype(f1, UPB_TYPE_INT64); + upb_fielddef_setlabel(f1, UPB_LABEL_REPEATED); ASSERT(upb_fielddef_number(f1) == 1937); ASSERT(!upb_fielddef_isfrozen(f2)); upb_fielddef_setname(f2, "f2"); upb_fielddef_setnumber(f2, 1572); - upb_fielddef_settype(f2, UPB_TYPE(BYTES)); - upb_fielddef_setlabel(f2, UPB_LABEL(REPEATED)); + upb_fielddef_settype(f2, UPB_TYPE_BYTES); + upb_fielddef_setlabel(f2, UPB_LABEL_REPEATED); ASSERT(upb_fielddef_number(f2) == 1572); upb_fielddef_unref(f1, &f1); @@ -104,7 +104,7 @@ static void test_fielddef_accessors() { // Test that we don't leak an unresolved subdef name. f1 = upb_fielddef_new(&f1); - upb_fielddef_settype(f1, UPB_TYPE(MESSAGE)); + upb_fielddef_settype(f1, UPB_TYPE_MESSAGE); upb_fielddef_setsubdefname(f1, "YO"); upb_fielddef_unref(f1, &f1); } @@ -127,7 +127,7 @@ static upb_msgdef *upb_msgdef_newnamed(const char *name, void *owner) { return m; } -INLINE upb_enumdef *upb_enumdef_newnamed(const char *name, void *owner) { +static upb_enumdef *upb_enumdef_newnamed(const char *name, void *owner) { upb_enumdef *e = upb_enumdef_new(owner); upb_def_setfullname(upb_upcast(e), name); return e; @@ -138,7 +138,7 @@ static void test_replacement() { upb_msgdef *m = upb_msgdef_newnamed("MyMessage", &s); upb_msgdef_addfield(m, newfield( - "field1", 1, UPB_TYPE(ENUM), UPB_LABEL(OPTIONAL), ".MyEnum", &s), &s); + "field1", 1, UPB_TYPE_ENUM, UPB_LABEL_OPTIONAL, ".MyEnum", &s), &s); upb_msgdef *m2 = upb_msgdef_newnamed("MyMessage2", &s); upb_enumdef *e = upb_enumdef_newnamed("MyEnum", &s); diff --git a/tests/test_pipeline.c b/tests/test_pipeline.c new file mode 100644 index 0000000000..d54d15c94a --- /dev/null +++ b/tests/test_pipeline.c @@ -0,0 +1,116 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2013 Google Inc. See LICENSE for details. + * + * Test of upb_pipeline. + */ + +#include "upb/sink.h" +#include "tests/upb_test.h" + +static void *count_realloc(void *ud, void *ptr, size_t size) { + int *count = ud; + *count += 1; + return upb_realloc(ud, ptr, size); +} + +static void test_empty() { + // A pipeline with no initial memory or allocation function should return + // NULL from attempts to allocate. + upb_pipeline pipeline; + upb_pipeline_init(&pipeline, NULL, 0, NULL, NULL); + ASSERT(upb_pipeline_alloc(&pipeline, 1) == NULL); + ASSERT(upb_pipeline_alloc(&pipeline, 1) == NULL); + ASSERT(upb_pipeline_realloc(&pipeline, NULL, 0, 1) == NULL); + upb_pipeline_uninit(&pipeline); +} + +static void test_only_initial() { + upb_pipeline pipeline; + char initial[152]; // 128 + a conservative 24 bytes overhead. + upb_pipeline_init(&pipeline, initial, sizeof(initial), NULL, NULL); + void *p1 = upb_pipeline_alloc(&pipeline, 64); + void *p2 = upb_pipeline_alloc(&pipeline, 64); + void *p3 = upb_pipeline_alloc(&pipeline, 64); + ASSERT(p1); + ASSERT(p2); + ASSERT(!p3); + ASSERT(p1 != p2); + ASSERT((void*)initial <= p1); + ASSERT(p1 < p2); + ASSERT(p2 < (void*)(initial + sizeof(initial))); + upb_pipeline_uninit(&pipeline); +} + +static void test_with_alloc_func() { + upb_pipeline pipeline; + char initial[152]; // 128 + a conservative 24 bytes overhead. + int count = 0; + upb_pipeline_init(&pipeline, initial, sizeof(initial), count_realloc, &count); + void *p1 = upb_pipeline_alloc(&pipeline, 64); + void *p2 = upb_pipeline_alloc(&pipeline, 64); + ASSERT(p1); + ASSERT(p2); + ASSERT(p1 != p2); + ASSERT(count == 0); + + void *p3 = upb_pipeline_alloc(&pipeline, 64); + ASSERT(p3); + ASSERT(p3 != p2); + ASSERT(count == 1); + + // Allocation larger than internal block size should force another alloc. + char *p4 = upb_pipeline_alloc(&pipeline, 16384); + ASSERT(p4); + p4[16383] = 1; // Verify memory is writable without crashing. + ASSERT(p4[16383] == 1); + ASSERT(count == 2); + + upb_pipeline_uninit(&pipeline); + ASSERT(count == 4); // From two calls to free the memory. +} + +static void test_realloc() { + upb_pipeline pipeline; + char initial[152]; // 128 + a conservative 24 bytes overhead. + int count = 0; + upb_pipeline_init(&pipeline, initial, sizeof(initial), count_realloc, &count); + void *p1 = upb_pipeline_alloc(&pipeline, 64); + // This realloc should work in-place. + void *p2 = upb_pipeline_realloc(&pipeline, p1, 64, 128); + ASSERT(p1); + ASSERT(p2); + ASSERT(p1 == p2); + ASSERT(count == 0); + + // This realloc will *not* work in place, due to size. + void *p3 = upb_pipeline_realloc(&pipeline, p2, 128, 256); + ASSERT(p3); + ASSERT(p3 != p2); + ASSERT(count == 1); + + void *p4 = upb_pipeline_alloc(&pipeline, 64); + void *p5 = upb_pipeline_alloc(&pipeline, 64); + // This realloc will *not* work in place because it was not the last + // allocation. + void *p6 = upb_pipeline_realloc(&pipeline, p4, 64, 128); + ASSERT(p4); + ASSERT(p5); + ASSERT(p6); + ASSERT(p4 != p6); + ASSERT(p4 < p5); + ASSERT(p5 < p6); + ASSERT(count == 1); // These should all fit in the first dynamic block. + + upb_pipeline_uninit(&pipeline); + ASSERT(count == 2); +} + +int run_tests(int argc, char *argv[]) { + test_empty(); + test_only_initial(); + test_with_alloc_func(); + test_realloc(); + return 0; +} diff --git a/tests/test_table.cc b/tests/test_table.cc index bb75fc4686..80b0139b0c 100644 --- a/tests/test_table.cc +++ b/tests/test_table.cc @@ -46,13 +46,14 @@ void test_strtable(const vector& keys, uint32_t num_to_insert) { /* Test correctness. */ for(uint32_t i = 0; i < keys.size(); i++) { const std::string& key = keys[i]; - const upb_value *v = upb_strtable_lookup(&table, key.c_str()); + upb_value v; + bool found = upb_strtable_lookup(&table, key.c_str(), &v); if(m.find(key) != m.end()) { /* Assume map implementation is correct. */ - ASSERT(v); - ASSERT(upb_value_getint32(*v) == key[0]); + ASSERT(found); + ASSERT(upb_value_getint32(v) == key[0]); ASSERT(m[key] == key[0]); } else { - ASSERT(v == NULL); + ASSERT(!found); } } @@ -88,14 +89,15 @@ void test_inttable(int32_t *keys, uint16_t num_entries, const char *desc) { /* Test correctness. */ for(uint32_t i = 0; i <= largest_key; i++) { - const upb_value *v = upb_inttable_lookup(&table, i); + upb_value v; + bool found = upb_inttable_lookup(&table, i, &v); if(m.find(i) != m.end()) { /* Assume map implementation is correct. */ - ASSERT(v); - ASSERT(upb_value_getuint32(*v) == i*2); + ASSERT(found); + ASSERT(upb_value_getuint32(v) == i*2); ASSERT(m[i] == i*2); ASSERT(hm[i] == i*2); } else { - ASSERT(v == NULL); + ASSERT(!found); } } @@ -112,28 +114,30 @@ void test_inttable(int32_t *keys, uint16_t num_entries, const char *desc) { /* Test correctness. */ for(uint32_t i = 0; i <= largest_key; i++) { - const upb_value *v = upb_inttable_lookup(&table, i); + upb_value v; + bool found = upb_inttable_lookup(&table, i, &v); if(m.find(i) != m.end()) { /* Assume map implementation is correct. */ - ASSERT(v); - ASSERT(upb_value_getuint32(*v) == i*2); + ASSERT(found); + ASSERT(upb_value_getuint32(v) == i*2); ASSERT(m[i] == i*2); ASSERT(hm[i] == i*2); } else { - ASSERT(v == NULL); + ASSERT(!found); } } // Compact and test correctness again. upb_inttable_compact(&table); for(uint32_t i = 0; i <= largest_key; i++) { - const upb_value *v = upb_inttable_lookup(&table, i); + upb_value v; + bool found = upb_inttable_lookup(&table, i, &v); if(m.find(i) != m.end()) { /* Assume map implementation is correct. */ - ASSERT(v); - ASSERT(upb_value_getuint32(*v) == i*2); + ASSERT(found); + ASSERT(upb_value_getuint32(v) == i*2); ASSERT(m[i] == i*2); ASSERT(hm[i] == i*2); } else { - ASSERT(v == NULL); + ASSERT(!found); } } @@ -172,8 +176,9 @@ void test_inttable(int32_t *keys, uint16_t num_entries, const char *desc) { for(i = 0; true; i++) { MAYBE_BREAK; int32_t key = keys[i & mask]; - const upb_value *v = upb_inttable_lookup32(&table, key); - x += (uintptr_t)v; + upb_value v; + bool ok = upb_inttable_lookup32(&table, key, &v); + x += (uintptr_t)ok; } double total = get_usertime() - before; printf("%s/s\n", eng(i/total, 3, false)); @@ -184,8 +189,9 @@ void test_inttable(int32_t *keys, uint16_t num_entries, const char *desc) { for(i = 0; true; i++) { MAYBE_BREAK; int32_t key = keys[rand_order[i & mask]]; - const upb_value *v = upb_inttable_lookup32(&table, key); - x += (uintptr_t)v; + upb_value v; + bool ok = upb_inttable_lookup32(&table, key, &v); + x += (uintptr_t)ok; } total = get_usertime() - before; printf("%s/s\n", eng(i/total, 3, false)); @@ -232,6 +238,7 @@ void test_inttable(int32_t *keys, uint16_t num_entries, const char *desc) { x += hm[key]; } total = get_usertime() - before; + if (x == INT_MAX) abort(); printf("%s/s\n\n", eng(i/total, 3, false)); upb_inttable_uninit(&table); delete rand_order; diff --git a/tests/test_vs_proto2.cc b/tests/test_vs_proto2.cc index 5eca399c54..b1a6053e99 100644 --- a/tests/test_vs_proto2.cc +++ b/tests/test_vs_proto2.cc @@ -10,6 +10,7 @@ #define __STDC_LIMIT_MACROS // So we get UINT32_MAX #include #include +#include #include #include #include @@ -17,10 +18,11 @@ #include #include #include "benchmarks/google_messages.pb.h" -#include "bindings/cpp/upb/pb/decoder.hpp" +#include "upb/bytestream.h" #include "upb/def.h" #include "upb/google/bridge.h" #include "upb/handlers.h" +#include "upb/pb/decoder.h" #include "upb/pb/glue.h" #include "upb/pb/varint.h" #include "upb_test.h" @@ -36,25 +38,36 @@ void compare_metadata(const google::protobuf::Descriptor* d, ASSERT(proto2_f); ASSERT(upb_f->number() == proto2_f->number()); ASSERT(std::string(upb_f->name()) == proto2_f->name()); - ASSERT(upb_f->type() == static_cast(proto2_f->type())); + ASSERT(upb_f->descriptor_type() == + static_cast(proto2_f->type())); ASSERT(upb_f->IsSequence() == proto2_f->is_repeated()); } } -void parse_and_compare(MESSAGE_CIDENT *msg1, MESSAGE_CIDENT *msg2, - const upb::Handlers *handlers, +void parse_and_compare(google::protobuf::Message *msg1, + google::protobuf::Message *msg2, + const upb::Handlers *protomsg_handlers, const char *str, size_t len, bool allow_jit) { // Parse to both proto2 and upb. ASSERT(msg1->ParseFromArray(str, len)); - upb::DecoderPlan* plan = upb::DecoderPlan::New(handlers, allow_jit); - upb::StringSource src(str, len); - upb::Decoder decoder; - decoder.ResetPlan(plan); - decoder.ResetInput(src.AllBytes(), msg2); + const upb::Handlers* decoder_handlers = upb::pb::GetDecoderHandlers( + protomsg_handlers, allow_jit, &decoder_handlers); + + upb::Pipeline pipeline(NULL, 0, upb_realloc, NULL); + pipeline.DonateRef(decoder_handlers, &decoder_handlers); + upb::Sink* protomsg_sink = pipeline.NewSink(protomsg_handlers); + upb::Sink* decoder_sink = pipeline.NewSink(decoder_handlers); + + protomsg_sink->Reset(msg2); + upb::pb::Decoder* decoder = + decoder_sink->base()->GetUserdata(); + upb::pb::ResetDecoderSink(decoder, protomsg_sink); + msg2->Clear(); - ASSERT(decoder.Decode() == UPB_OK); - plan->Unref(); + bool ok = upb::PutStringToBytestream(decoder_sink, str, len); + ASSERT(ok); + ASSERT(pipeline.status().ok()); // Would like to just compare the message objects themselves, but // unfortunately MessageDifferencer is not part of the open-source release of @@ -110,13 +123,29 @@ int run_tests(int argc, char *argv[]) parse_and_compare(&msg1, &msg2, h, str, len, true); parse_and_compare(&msg1, &msg2, h, str, len, false); parse_and_compare(&msg1, &msg2, h, str, len, true); - printf("All tests passed, %d assertions.\n", num_assertions); + h->Unref(&h); + // Test with DynamicMessage. + google::protobuf::DynamicMessageFactory* factory = + new google::protobuf::DynamicMessageFactory; + const google::protobuf::Message* prototype = + factory->GetPrototype(msg1.descriptor()); + google::protobuf::Message* dyn_msg1 = prototype->New(); + google::protobuf::Message* dyn_msg2 = prototype->New(); + h = upb::google::NewWriteHandlers(*dyn_msg1, &h); + parse_and_compare(dyn_msg1, dyn_msg2, h, str, len, false); + parse_and_compare(dyn_msg1, dyn_msg2, h, str, len, true); + delete dyn_msg1; + delete dyn_msg2; + delete factory; h->Unref(&h); + free((void*)str); test_zig_zag(); + printf("All tests passed, %d assertions.\n", num_assertions); + google::protobuf::ShutdownProtobufLibrary(); return 0; } diff --git a/tools/dump_cinit.lua b/tools/dump_cinit.lua index 1447d58ee6..fac3fca235 100644 --- a/tools/dump_cinit.lua +++ b/tools/dump_cinit.lua @@ -45,7 +45,28 @@ function const(obj, name) return "UPB_" .. k end end - assert(false, "Couldn't find constant") + assert(false, "Couldn't find UPB_" .. string.upper(name) .. + " constant for value: " .. val) +end + +function constlist(pattern) + local ret = {} + for k, v in pairs(upb) do + if string.find(k, "^UPB_" .. pattern) then + ret[k] = v + end + end + return ret +end + +function boolstr(val) + if val == true then + return "true" + elseif val == false then + return "false" + else + assert(false, "Bad bool value: " .. tostring(val)) + end end --[[ @@ -128,11 +149,11 @@ function Dumper:new(linktab) return obj end --- Dumps a upb_value, eg: +-- Dumps a _upb_value, eg: -- UPB_VALUE_INIT_INT32(5) -function Dumper:value(val, upbtype) +function Dumper:_value(val, upbtype) if type(val) == "nil" then - return "UPB_VALUE_INIT_NONE" + return "UPB__VALUE_INIT_NONE" elseif type(val) == "number" then -- Use upbtype to disambiguate what kind of number it is. if upbtype == upbtable.CTYPE_INT32 then @@ -164,7 +185,7 @@ end -- Dumps a table entry. function Dumper:tabent(ent) local key = self:tabkey(ent.key) - local val = self:value(ent.value, ent.valtype) + local val = self:_value(ent.value, ent.valtype) local next = self.linktab:addr(ent.next) return string.format(' {%s, %s, %s},\n', key, val, next) end @@ -173,7 +194,7 @@ end -- except that nil values have a special value to indicate "empty". function Dumper:arrayval(val) if val.val then - return string.format(" %s,\n", self:value(val.val, val.valtype)) + return string.format(" %s,\n", self:_value(val.val, val.valtype)) else return " UPB_ARRAY_EMPTYENT,\n" end @@ -283,7 +304,7 @@ local function dump_defs_c(symtab, basename, append) append("const upb_enumdef %s;\n", linktab:cdecl(upb.DEF_ENUM)) append("const upb_tabent %s;\n", linktab:cdecl("strentries")) append("const upb_tabent %s;\n", linktab:cdecl("intentries")) - append("const upb_value %s;\n", linktab:cdecl("arrays")) + append("const _upb_value %s;\n", linktab:cdecl("arrays")) append("\n") -- Emit defs. @@ -307,13 +328,23 @@ local function dump_defs_c(symtab, basename, append) if f:has_subdef() then subdef = string.format("upb_upcast(%s)", linktab:addr(f:subdef())) end - -- UPB_FIELDDEF_INIT(label, type, name, num, msgdef, subdef, - -- selector_base, default_value) - append(' UPB_FIELDDEF_INIT(%s, %s, "%s", %d, %s, %s, %d, %s),\n', - const(f, "label"), const(f, "type"), f:name(), + local intfmt + if f:type() == upb.TYPE_UINT32 or + f:type() == upb.TYPE_INT32 or + f:type() == upb.TYPE_UINT64 or + f:type() == upb.TYPE_INT64 then + intfmt = const(f, "intfmt") + else + intfmt = "0" + end + -- UPB_FIELDDEF_INIT(label, type, intfmt, tagdelim, name, num, msgdef, + -- subdef, selector_base, default_value) + append(' UPB_FIELDDEF_INIT(%s, %s, %s, %s, "%s", %d, %s, %s, %d, ' .. + 'UPB_VALUE_INIT_NONE),\n', -- TODO: support default value + const(f, "label"), const(f, "type"), intfmt, + boolstr(f:istagdelim()), f:name(), f:number(), linktab:addr(f:msgdef()), subdef, - f:_selector_base(), - dumper:value(nil) -- TODO + f:_selector_base() ) end append("};\n\n") @@ -343,7 +374,7 @@ local function dump_defs_c(symtab, basename, append) end append("};\n\n"); - append("const upb_value %s = {\n", linktab:cdecl("arrays")) + append("const _upb_value %s = {\n", linktab:cdecl("arrays")) for ent in linktab:objs("arrays") do append(dumper:arrayval(ent)) end @@ -400,6 +431,21 @@ local function dump_defs_h(symtab, basename, append, linktab) end append("\n") + append("// Selector definitions.\n") + local selector_types = constlist("HANDLER_") + for f in linktab:objs(upb.DEF_FIELD) do + for sel_type_name, sel_type_value in pairs(selector_types) do + sel_type_name = sel_type_name:gsub("UPB_HANDLER_", "") + local sel = f:getsel(sel_type_value) + if sel then + local symname = f:msgdef():full_name() .. "." .. f:name() .. + "." .. sel_type_name + append("#define %s %d\n", to_preproc(symname), sel) + end + end + end + append("\n") + append('#ifdef __cplusplus\n') append('}; // extern "C"\n') append('#endif\n\n') diff --git a/upb/bytestream.c b/upb/bytestream.c deleted file mode 100644 index 8473f33fe5..0000000000 --- a/upb/bytestream.c +++ /dev/null @@ -1,176 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2010 Google Inc. See LICENSE for details. - * Author: Josh Haberman - */ - -#include "upb/bytestream.h" - -#include -#include - - -/* upb_byteregion *************************************************************/ - -char *upb_byteregion_strdup(const upb_byteregion *r) { - char *ret = malloc(upb_byteregion_len(r) + 1); - upb_byteregion_copyall(r, ret); - ret[upb_byteregion_len(r)] = '\0'; - return ret; -} - -upb_byteregion *upb_byteregion_new(const void *str) { - return upb_byteregion_newl(str, strlen(str)); -} - -upb_byteregion *upb_byteregion_newl(const void *str, size_t len) { - upb_stringsrc *src = malloc(sizeof(*src)); - upb_stringsrc_init(src); - char *ptr = malloc(len + 1); - memcpy(ptr, str, len); - ptr[len] = '\0'; - upb_stringsrc_reset(src, ptr, len); - return upb_stringsrc_allbytes(src); -} - -void upb_byteregion_free(upb_byteregion *r) { - if (!r) return; - size_t len; - free((char*)upb_byteregion_getptr(r, 0, &len)); - upb_stringsrc_uninit((upb_stringsrc*)r->bytesrc); - free(r->bytesrc); -} - -void upb_bytesink_init(upb_bytesink *sink, upb_bytesink_vtbl *vtbl) { - sink->vtbl = vtbl; - upb_status_init(&sink->status); -} - -void upb_bytesink_uninit(upb_bytesink *sink) { - upb_status_uninit(&sink->status); -} - -void upb_byteregion_reset(upb_byteregion *r, const upb_byteregion *src, - uint64_t ofs, uint64_t len) { - assert(ofs >= upb_byteregion_startofs(src)); - assert(len <= upb_byteregion_remaining(src, ofs)); - r->bytesrc = src->bytesrc; - r->toplevel = false; - r->start = ofs; - r->discard = ofs; - r->end = ofs + len; - r->fetch = UPB_MIN(src->fetch, r->end); -} - -upb_bytesuccess_t upb_byteregion_fetch(upb_byteregion *r) { - uint64_t fetchable = upb_byteregion_remaining(r, r->fetch); - if (fetchable == 0) return UPB_BYTE_EOF; - size_t fetched; - upb_bytesuccess_t ret = upb_bytesrc_fetch(r->bytesrc, r->fetch, &fetched); - if (ret != UPB_BYTE_OK) return false; - r->fetch += UPB_MIN(fetched, fetchable); - return UPB_BYTE_OK; -} - - -/* upb_stringsrc **************************************************************/ - -upb_bytesuccess_t upb_stringsrc_fetch(void *_src, uint64_t ofs, size_t *read) { - upb_stringsrc *src = _src; - assert(ofs < src->len); - if (ofs == src->len) { - upb_status_seteof(&src->bytesrc.status); - return UPB_BYTE_EOF; - } - *read = src->len - ofs; - return UPB_BYTE_OK; -} - -void upb_stringsrc_copy(const void *_src, uint64_t ofs, - size_t len, char *dst) { - const upb_stringsrc *src = _src; - assert(ofs + len <= src->len); - memcpy(dst, src->str + ofs, len); -} - -void upb_stringsrc_discard(void *src, uint64_t ofs) { - (void)src; - (void)ofs; -} - -const char *upb_stringsrc_getptr(const void *_s, uint64_t ofs, size_t *len) { - const upb_stringsrc *src = _s; - *len = src->len - ofs; - return src->str + ofs; -} - -void upb_stringsrc_init(upb_stringsrc *s) { - static upb_bytesrc_vtbl vtbl = { - &upb_stringsrc_fetch, - &upb_stringsrc_discard, - &upb_stringsrc_copy, - &upb_stringsrc_getptr, - }; - upb_bytesrc_init(&s->bytesrc, &vtbl); - s->str = NULL; - s->byteregion.bytesrc = &s->bytesrc; - s->byteregion.toplevel = true; -} - -void upb_stringsrc_reset(upb_stringsrc *s, const char *str, size_t len) { - s->str = str; - s->len = len; - s->byteregion.start = 0; - s->byteregion.discard = 0; - s->byteregion.fetch = 0; - s->byteregion.end = len; -} - -void upb_stringsrc_uninit(upb_stringsrc *s) { (void)s; } - -/* upb_stringsink *************************************************************/ - -void upb_stringsink_uninit(upb_stringsink *s) { - free(s->str); -} - -void upb_stringsink_reset(upb_stringsink *s, char *str, size_t size) { - free(s->str); - s->str = str; - s->len = 0; - s->size = size; -} - -upb_bytesink *upb_stringsink_bytesink(upb_stringsink *s) { - return &s->bytesink; -} - -static int32_t upb_stringsink_vprintf(void *_s, const char *fmt, va_list args) { - // TODO: detect realloc() errors. - upb_stringsink *s = _s; - int ret = upb_vrprintf(&s->str, &s->size, s->len, fmt, args); - if (ret >= 0) s->len += ret; - return ret; -} - -int upb_stringsink_write(void *_s, const void *buf, int len) { - // TODO: detect realloc() errors. - upb_stringsink *s = _s; - if (s->len + len > s->size) { - while(s->len + len > s->size) s->size *= 2; - s->str = realloc(s->str, s->size); - } - memcpy(s->str + s->len, buf, len); - s->len += len; - return len; -} - -void upb_stringsink_init(upb_stringsink *s) { - static upb_bytesink_vtbl vtbl = { - upb_stringsink_write, - upb_stringsink_vprintf - }; - upb_bytesink_init(&s->bytesink, &vtbl); - s->str = NULL; -} diff --git a/upb/bytestream.h b/upb/bytestream.h index 41f767ac72..a0c4110292 100644 --- a/upb/bytestream.h +++ b/upb/bytestream.h @@ -1,629 +1,50 @@ /* * upb - a minimalist implementation of protocol buffers. * - * Copyright (c) 2011 Google Inc. See LICENSE for details. + * Copyright (c) 2013 Google Inc. See LICENSE for details. * Author: Josh Haberman * - * This file defines three core interfaces: - * - upb_bytesink: for writing streams of data. - * - upb_bytesrc: for reading streams of data. - * - upb_byteregion: for reading from a specific region of a upb_bytesrc; - * should be used by decoders instead of using upb_bytesrc directly. + * This file contains the standard ByteStream msgdef and some useful routines + * surrounding it. * - * These interfaces are used by streaming encoders and decoders: for example, a - * protobuf parser gets its input from a upb_byteregion. They are virtual base - * classes so concrete implementations can get the data from a fd, a FILE*, a - * string, etc. + * This is a mixed C/C++ interface that offers a full API to both languages. + * See the top-level README for more information. */ -// A upb_byteregion represents a region of data from a bytesrc. -// -// Parsers get data from this interface instead of a bytesrc because we often -// want to parse only a specific region of the input. For example, if we parse -// a string from our input but know that the string represents a protobuf, we -// can pass its upb_byteregion to an appropriate protobuf parser. -// -// Since the bytes may be coming from a file or network socket, bytes must be -// fetched before they can be read (though in some cases this fetch may be a -// no-op). "fetch" is the only operation on a byteregion that could fail or -// block, because it is the only operation that actually performs I/O. -// -// Bytes can be discarded when they are no longer needed. Parsers should -// always discard bytes they no longer need, both so the buffers can be freed -// when possible and to give better visibility into what bytes the parser is -// still using. -// -// start discard read fetch end -// ofs ofs ofs ofs ofs -// | |--->discard() | |--->fetch() | -// V V V V V -// +-------------+-------------------------+-----------------+-----------------+ -// | discarded | | | fetchable | -// +-------------+-------------------------+-----------------+-----------------+ -// | <------------- loaded ------------------> | -// | <- available -> | -// | <---------- remaining ----------> | -// -// Note that the start offset may be something other than zero! A byteregion -// is a view into an underlying bytesrc stream, and the region may start -// somewhere other than the beginning of that stream. -// -// The region can be either delimited or nondelimited. A non-delimited region -// will keep returning data until the underlying data source returns EOF. A -// delimited region will return EOF at a predetermined offset. -// -// end -// ofs -// | -// V -// +-----------------------+ -// | delimited region | <-- hard EOF, even if data source has more data. -// +-----------------------+ -// -// +------------------------ -// | nondelimited region Z <-- won't return EOF until data source hits EOF. -// +------------------------ -// -// TODO: if 64-bit math for stream offsets is a performance issue on -// non-64-bit machines, we could introduce a upb_off_t typedef that can be -// defined as a 32-bit type for applications that don't need to handle -// streams longer than 4GB. +#ifndef UPB_BYTESTREAM_H_ +#define UPB_BYTESTREAM_H_ +#include "upb/sink.h" +#include "upb/bytestream.upb.h" -#ifndef UPB_BYTESTREAM_H -#define UPB_BYTESTREAM_H +#define UPB_BYTESTREAM_BYTES &upb_bytestream_fields[0] -#include "upb.h" - -#ifdef __cplusplus -namespace upb { -class ByteRegion; -class StringSource; -} // namespace upb -typedef upb::StringSource upb_stringsrc; -extern "C" { -#else -struct upb_stringsrc; -typedef struct upb_stringsrc upb_stringsrc; -#endif - -typedef enum { - UPB_BYTE_OK = UPB_OK, - UPB_BYTE_WOULDBLOCK = UPB_SUSPENDED, - UPB_BYTE_ERROR = UPB_ERROR, - UPB_BYTE_EOF -} upb_bytesuccess_t; - -/* upb_bytesrc ****************************************************************/ - -// A upb_bytesrc allows the consumer of a stream of bytes to obtain buffers as -// they become available, and to preserve some trailing amount of data before -// it is discarded. Consumers should not use upb_bytesrc directly, but rather -// should use a upb_byteregion (which allows access to a region of a bytesrc). -// -// upb_bytesrc is a virtual base class with implementations that get data from -// eg. a string, a cord, a file descriptor, a FILE*, etc. - -typedef upb_bytesuccess_t upb_bytesrc_fetch_func(void*, uint64_t, size_t*); -typedef void upb_bytesrc_discard_func(void*, uint64_t); -typedef void upb_bytesrc_copy_func(const void*, uint64_t, size_t, char*); -typedef const char *upb_bytesrc_getptr_func(const void*, uint64_t, size_t*); -typedef struct _upb_bytesrc_vtbl { - upb_bytesrc_fetch_func *fetch; - upb_bytesrc_discard_func *discard; - upb_bytesrc_copy_func *copy; - upb_bytesrc_getptr_func *getptr; -} upb_bytesrc_vtbl; - -typedef struct { - const upb_bytesrc_vtbl *vtbl; - upb_status status; -} upb_bytesrc; - -INLINE void upb_bytesrc_init(upb_bytesrc *src, const upb_bytesrc_vtbl *vtbl) { - src->vtbl = vtbl; - upb_status_init(&src->status); -} - -INLINE void upb_bytesrc_uninit(upb_bytesrc *src) { - upb_status_uninit(&src->status); -} - -// Fetches at least one byte starting at ofs, returning the success or failure -// of the operation. If UPB_BYTE_OK is returned, *read indicates the number of -// of bytes successfully fetched; any error or EOF status will be reflected in -// upb_bytesrc_status(). It is valid for bytes to be fetched multiple times, -// as long as the bytes have not been previously discarded. -INLINE upb_bytesuccess_t upb_bytesrc_fetch(upb_bytesrc *src, uint64_t ofs, - size_t *read) { - return src->vtbl->fetch(src, ofs, read); -} - -// Discards all data prior to ofs (except data that is pinned, if pinning -// support is added -- see TODO below). -INLINE void upb_bytesrc_discard(upb_bytesrc *src, uint64_t ofs) { - src->vtbl->discard(src, ofs); -} - -// Copies "len" bytes of data from ofs to "dst", which must be at least "len" -// bytes long. The given region must not be discarded. -INLINE void upb_bytesrc_copy(const upb_bytesrc *src, uint64_t ofs, size_t len, - char *dst) { - src->vtbl->copy(src, ofs, len, dst); -} - -// Returns a pointer to the bytesrc's internal buffer, storing in *len how much -// data is available. The given offset must not be discarded. The returned -// buffer is valid for as long as its bytes are not discarded (in the case that -// part of the returned buffer is discarded, only the non-discarded bytes -// remain valid). -INLINE const char *upb_bytesrc_getptr(const upb_bytesrc *src, uint64_t ofs, - size_t *len) { - return src->vtbl->getptr(src, ofs, len); -} - -// TODO: Add if/when there is a demonstrated need: -// -// // When the caller pins a region (which must not be already discarded), it -// // is guaranteed that the region will not be discarded (nor will the bytesrc -// // be destroyed) until the region is unpinned. However, not all bytesrc's -// // support pinning; a false return indicates that a pin was not possible. -// INLINE bool upb_bytesrc_pin(upb_bytesrc *src, uint64_t ofs, size_t len) { -// return src->vtbl->refregion(src, ofs, len); -// } -// -// // Releases some number of pinned bytes from the beginning of a pinned -// // region (which may be fewer than the total number of bytes pinned). -// INLINE void upb_bytesrc_unpin(upb_bytesrc *src, uint64_t ofs, size_t len, -// size_t bytes_to_release) { -// src->vtbl->unpin(src, ofs, len); -// } -// -// Adding pinning support would also involve adding a "pin_ofs" parameter to -// upb_bytesrc_fetch, so that the fetch can extend an already-pinned region. - - -/* upb_byteregion *************************************************************/ - -#define UPB_NONDELIMITED (0xffffffffffffffffULL) - -#ifdef __cplusplus -} // extern "C" - -class upb::ByteRegion { - public: - static const uint64_t kNondelimited = UPB_NONDELIMITED; - typedef upb_bytesuccess_t ByteSuccess; - - // Accessors for the regions bounds -- the meaning of these is described in - // the diagram above. - uint64_t start_ofs() const; - uint64_t discard_ofs() const; - uint64_t fetch_ofs() const; - uint64_t end_ofs() const; - - // Returns how many bytes are fetched and available for reading starting from - // offset "offset". - uint64_t BytesAvailable(uint64_t offset) const; - - // Returns the total number of bytes remaining after offset "offset", or - // kNondelimited if the byteregion is non-delimited. - uint64_t BytesRemaining(uint64_t offset) const; - - uint64_t Length() const; - - // Sets the value of this byteregion to be a subset of the given byteregion's - // data. The caller is responsible for releasing this region before the src - // region is released (unless the region is first pinned, if pinning support - // is added. see below). - void Reset(const upb_byteregion *src, uint64_t ofs, uint64_t len); - void Release(); - - // Attempts to fetch more data, extending the fetched range of this - // byteregion. Returns true if the fetched region was extended by at least - // one byte, false on EOF or error (see *s for details). - ByteSuccess Fetch(); - - // Fetches all remaining data, returning false if the operation failed (see - // *s for details). May only be used on delimited byteregions. - ByteSuccess FetchAll(); - - // Discards bytes from the byteregion up until ofs (which must be greater or - // equal to discard_ofs()). It is valid to discard bytes that have not been - // fetched (such bytes will never be fetched) but it is an error to discard - // past the end of a delimited byteregion. - void Discard(uint64_t ofs); - - // Copies "len" bytes of data into "dst", starting at ofs. The specified - // region must be available. - void Copy(uint64_t ofs, size_t len, char *dst) const; - - // Copies all bytes from the byteregion into dst. Requires that the entire - // byteregion is fetched and that none has been discarded. - void CopyAll(char *dst) const; - - // Returns a pointer to the internal buffer for the byteregion starting at - // offset "ofs." Stores the number of bytes available in this buffer in *len. - // The returned buffer is invalidated when the byteregion is reset or - // released, or when the bytes are discarded. If the byteregion is not - // currently pinned, the pointer is only valid for the lifetime of the parent - // byteregion. - const char *GetPtr(uint64_t ofs, size_t *len) const; - - // Copies the contents of the byteregion into a newly-allocated, - // NULL-terminated string. Requires that the byteregion is fully fetched. - char *StrDup() const; - - template void AssignToString(T* str); - -#else -struct upb_byteregion { -#endif - uint64_t start; - uint64_t discard; - uint64_t fetch; - uint64_t end; // UPB_NONDELIMITED if nondelimited. - upb_bytesrc *bytesrc; - bool toplevel; // If true, discards hit the underlying bytesrc. -}; - -#ifdef __cplusplus -extern "C" { -#endif - -// Native C API. -INLINE uint64_t upb_byteregion_startofs(const upb_byteregion *r) { - return r->start; -} -INLINE uint64_t upb_byteregion_discardofs(const upb_byteregion *r) { - return r->discard; -} -INLINE uint64_t upb_byteregion_fetchofs(const upb_byteregion *r) { - return r->fetch; -} -INLINE uint64_t upb_byteregion_endofs(const upb_byteregion *r) { - return r->end; -} -INLINE uint64_t upb_byteregion_available(const upb_byteregion *r, uint64_t o) { - assert(o >= upb_byteregion_discardofs(r)); - assert(o <= r->fetch); // Could relax this. - return r->fetch - o; -} -INLINE uint64_t upb_byteregion_remaining(const upb_byteregion *r, uint64_t o) { - return r->end == UPB_NONDELIMITED ? UPB_NONDELIMITED : r->end - o; -} - -INLINE uint64_t upb_byteregion_len(const upb_byteregion *r) { - return upb_byteregion_remaining(r, r->start); -} -void upb_byteregion_reset(upb_byteregion *r, const upb_byteregion *src, - uint64_t ofs, uint64_t len); -void upb_byteregion_release(upb_byteregion *r); -upb_bytesuccess_t upb_byteregion_fetch(upb_byteregion *r); -INLINE upb_bytesuccess_t upb_byteregion_fetchall(upb_byteregion *r) { - assert(upb_byteregion_len(r) != UPB_NONDELIMITED); - upb_bytesuccess_t ret; - do { - ret = upb_byteregion_fetch(r); - } while (ret == UPB_BYTE_OK); - return ret == UPB_BYTE_EOF ? UPB_BYTE_OK : ret; -} -INLINE void upb_byteregion_discard(upb_byteregion *r, uint64_t ofs) { - assert(ofs >= upb_byteregion_discardofs(r)); - assert(ofs <= upb_byteregion_endofs(r)); - r->discard = ofs; - if (ofs > r->fetch) r->fetch = ofs; - if (r->toplevel) upb_bytesrc_discard(r->bytesrc, ofs); -} -INLINE void upb_byteregion_copy(const upb_byteregion *r, uint64_t ofs, - size_t len, char *dst) { - assert(ofs >= upb_byteregion_discardofs(r)); - assert(len <= upb_byteregion_available(r, ofs)); - upb_bytesrc_copy(r->bytesrc, ofs, len, dst); -} -INLINE void upb_byteregion_copyall(const upb_byteregion *r, char *dst) { - assert(r->start == r->discard && r->end == r->fetch); - upb_byteregion_copy(r, r->start, upb_byteregion_len(r), dst); -} -INLINE const char *upb_byteregion_getptr(const upb_byteregion *r, - uint64_t ofs, size_t *len) { - assert(ofs >= upb_byteregion_discardofs(r)); - const char *ret = upb_bytesrc_getptr(r->bytesrc, ofs, len); - *len = UPB_MIN(*len, upb_byteregion_available(r, ofs)); +// A convenience method that handles the start/end calls and tracks overall +// success. +UPB_INLINE bool upb_bytestream_putstr(upb_sink *s, const char *buf, size_t n) { + bool ret = + upb_sink_startmsg(s) && + upb_sink_startstr(s, UPB_BYTESTREAM_BYTES_STARTSTR, n) && + upb_sink_putstring(s, UPB_BYTESTREAM_BYTES_STRING, buf, n) == n && + upb_sink_endstr(s, UPB_BYTESTREAM_BYTES_ENDSTR); + if (ret) upb_sink_endmsg(s); return ret; } -// TODO: add if/when there is a demonstrated need. -// -// // Pins this byteregion's bytes in memory, allowing it to outlive its parent -// // byteregion. Normally a byteregion may only be used while its parent is -// // still valid, but a pinned byteregion may continue to be used until it is -// // reset or released. A byteregion must be fully fetched to be pinned -// // (this implies that the byteregion must be delimited). -// // -// // In some cases this operation may cause the input data to be copied. -// // -// // void upb_byteregion_pin(upb_byteregion *r); - -// Convenience functions for creating and destroying a byteregion with a simple -// string as its data. These are relatively inefficient compared with creating -// your own bytesrc (they call malloc() and copy the string data) so should not -// be used on any critical path. -// -// The string data in the returned region is guaranteed to be contiguous and -// NULL-terminated. -upb_byteregion *upb_byteregion_new(const void *str); -upb_byteregion *upb_byteregion_newl(const void *str, size_t len); -// May *only* be called on a byteregion created with upb_byteregion_new[l]()! -void upb_byteregion_free(upb_byteregion *r); - -// Copies the contents of the byteregion into a newly-allocated, NULL-terminated -// string. Requires that the byteregion is fully fetched. -char *upb_byteregion_strdup(const upb_byteregion *r); - - -/* upb_bytesink ***************************************************************/ - -// A bytesink is an interface that allows the caller to push byte-wise data. -// It is very simple -- the only special capability is the ability to "rewind" -// the stream, which is really only a mechanism of having the bytesink ignore -// some subsequent calls. -typedef int upb_bytesink_write_func(void*, const void*, int); -typedef int upb_bytesink_vprintf_func(void*, const char *fmt, va_list args); - -typedef struct { - upb_bytesink_write_func *write; - upb_bytesink_vprintf_func *vprintf; -} upb_bytesink_vtbl; - -typedef struct { - upb_bytesink_vtbl *vtbl; - upb_status status; - uint64_t offset; -} upb_bytesink; - -// Should be called by derived classes. -void upb_bytesink_init(upb_bytesink *sink, upb_bytesink_vtbl *vtbl); -void upb_bytesink_uninit(upb_bytesink *sink); - -INLINE int upb_bytesink_write(upb_bytesink *s, const void *buf, int len) { - return s->vtbl->write(s, buf, len); -} - -#define upb_bytesink_writestr(s, buf) upb_bytesink_write(s, buf, strlen(buf)) - -// Returns the number of bytes written or -1 on error. -INLINE int upb_bytesink_printf(upb_bytesink *sink, const char *fmt, ...) { - va_list args; - va_start(args, fmt); - uint32_t ret = sink->vtbl->vprintf(sink, fmt, args); - va_end(args); - return ret; -} - -INLINE int upb_bytesink_putc(upb_bytesink *sink, char ch) { - return upb_bytesink_write(sink, &ch, 1); -} - -INLINE int upb_bytesink_putrepeated(upb_bytesink *sink, char ch, int len) { - for (int i = 0; i < len; i++) - if (upb_bytesink_write(sink, &ch, 1) < 0) - return -1; - return len; -} - -INLINE uint64_t upb_bytesink_getoffset(upb_bytesink *sink) { - return sink->offset; -} - -// Rewinds the stream to the given offset. This cannot actually "unput" any -// data, it is for situations like: -// -// // If false is returned (because of error), call again later to resume. -// bool write_some_data(upb_bytesink *sink, int indent) { -// uint64_t start_offset = upb_bytesink_getoffset(sink); -// if (upb_bytesink_writestr(sink, "Some data") < 0) goto err; -// if (upb_bytesink_putrepeated(sink, ' ', indent) < 0) goto err; -// return true; -// err: -// upb_bytesink_rewind(sink, start_offset); -// return false; -// } -// -// The subsequent bytesink writes *must* be identical to the writes that were -// rewinded past. -INLINE void upb_bytesink_rewind(upb_bytesink *sink, uint64_t offset) { - // TODO - (void)sink; - (void)offset; -} - -// OPT: add getappendbuf() -// OPT: add writefrombytesrc() -// TODO: add flush() - - -/* upb_stringsrc **************************************************************/ - -// bytesrc/bytesink for a simple contiguous string. - #ifdef __cplusplus -} // extern "C" - -class upb::StringSource { - public: - StringSource(); - template explicit StringSource(const T& str); - StringSource(const char *data, size_t len); - ~StringSource(); - - // Resets the stringsrc to a state where it will vend the given string. The - // string data must be valid until the stringsrc is reset again or destroyed. - void Reset(const char* data, size_t len); - template void Reset(const T& str); - - // Returns the top-level upb_byteregion* for this stringsrc. Invalidated - // when the stringsrc is reset. - ByteRegion* AllBytes(); - - upb_bytesrc* ByteSource(); - -#else -struct upb_stringsrc { -#endif - upb_bytesrc bytesrc; - const char *str; - size_t len; - upb_byteregion byteregion; -}; - -#ifdef __cplusplus -extern "C" { -#endif - -// Native C API. -void upb_stringsrc_init(upb_stringsrc *s); -void upb_stringsrc_uninit(upb_stringsrc *s); -void upb_stringsrc_reset(upb_stringsrc *s, const char *str, size_t len); -INLINE upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s) { - return &s->bytesrc; -} -INLINE upb_byteregion *upb_stringsrc_allbytes(upb_stringsrc *s) { - return &s->byteregion; -} - - -/* upb_stringsink *************************************************************/ - -struct _upb_stringsink { - upb_bytesink bytesink; - char *str; - size_t len, size; -}; -typedef struct _upb_stringsink upb_stringsink; - -// Create/free a stringsrc. -void upb_stringsink_init(upb_stringsink *s); -void upb_stringsink_uninit(upb_stringsink *s); - -// Resets the sink's string to "str", which the sink takes ownership of. -// "str" may be NULL, which will make the sink allocate a new string. -void upb_stringsink_reset(upb_stringsink *s, char *str, size_t len); - -// Releases ownership of the returned string (which is "len" bytes long) and -// resets the internal string to be empty again (as if reset were called with -// NULL). -const char *upb_stringsink_release(upb_stringsink *s, size_t *len); - -// Returns the upb_bytesink* for this stringsrc. Invalidated by reset above. -upb_bytesink *upb_stringsink_bytesink(upb_stringsink *s); - -#ifdef __cplusplus -} // extern "C" namespace upb { -inline uint64_t ByteRegion::start_ofs() const { - return upb_byteregion_startofs(this); -} -inline uint64_t ByteRegion::discard_ofs() const { - return upb_byteregion_discardofs(this); -} -inline uint64_t ByteRegion::fetch_ofs() const { - return upb_byteregion_fetchofs(this); -} -inline uint64_t ByteRegion::end_ofs() const { - return upb_byteregion_endofs(this); -} -inline uint64_t ByteRegion::BytesAvailable(uint64_t offset) const { - return upb_byteregion_available(this, offset); -} -inline uint64_t ByteRegion::BytesRemaining(uint64_t offset) const { - return upb_byteregion_remaining(this, offset); -} -inline uint64_t ByteRegion::Length() const { - return upb_byteregion_len(this); -} -inline void ByteRegion::Reset( - const upb_byteregion *src, uint64_t ofs, uint64_t len) { - upb_byteregion_reset(this, src, ofs, len); -} -inline void ByteRegion::Release() { - upb_byteregion_release(this); -} -inline ByteRegion::ByteSuccess ByteRegion::Fetch() { - return upb_byteregion_fetch(this); -} -inline ByteRegion::ByteSuccess ByteRegion::FetchAll() { - return upb_byteregion_fetchall(this); -} -inline void ByteRegion::Discard(uint64_t ofs) { - upb_byteregion_discard(this, ofs); -} -inline void ByteRegion::Copy(uint64_t ofs, size_t len, char *dst) const { - upb_byteregion_copy(this, ofs, len, dst); -} -inline void ByteRegion::CopyAll(char *dst) const { - upb_byteregion_copyall(this, dst); -} -inline const char *ByteRegion::GetPtr(uint64_t ofs, size_t *len) const { - return upb_byteregion_getptr(this, ofs, len); -} -inline char *ByteRegion::StrDup() const { - return upb_byteregion_strdup(this); -} -template void ByteRegion::AssignToString(T* str) { - uint64_t ofs = start_ofs(); - size_t len; - const char *ptr = GetPtr(ofs, &len); - // Emperically calling reserve() here is counterproductive and slows down - // benchmarks. If the parsing is happening in a tight loop that is reusing - // the string object, there is probably enough data reserved already and - // the reserve() call is extra overhead. - str->assign(ptr, len); - ofs += len; - while (ofs < end_ofs()) { - ptr = GetPtr(ofs, &len); - str->append(ptr, len); - ofs += len; - } -} - -template <> inline ByteRegion* GetValue(Value v) { - return static_cast(upb_value_getbyteregion(v)); -} - -template <> inline Value MakeValue(ByteRegion* v) { - return upb_value_byteregion(v); +inline bool PutStringToBytestream(Sink* s, const char* buf, size_t n) { + return upb_bytestream_putstr(s, buf, n); } -inline StringSource::StringSource() { upb_stringsrc_init(this); } -template StringSource::StringSource(const T& str) { - upb_stringsrc_init(this); - Reset(str); -} -inline StringSource::StringSource(const char *data, size_t len) { - upb_stringsrc_init(this); - Reset(data, len); -} -inline StringSource::~StringSource() { - upb_stringsrc_uninit(this); -} -inline void StringSource::Reset(const char* data, size_t len) { - upb_stringsrc_reset(this, data, len); -} -template void StringSource::Reset(const T& str) { - upb_stringsrc_reset(this, str.c_str(), str.size()); -} -inline ByteRegion* StringSource::AllBytes() { - return upb_stringsrc_allbytes(this); -} -inline upb_bytesrc* StringSource::ByteSource() { - return upb_stringsrc_bytesrc(this); +template bool PutStringToBytestream(Sink* s, T str) { + return upb_bytestream_putstr(s, str.c_str(), str.size()); } } // namespace upb #endif -#endif +#endif // UPB_BYTESTREAM_H_ diff --git a/upb/bytestream.proto b/upb/bytestream.proto new file mode 100644 index 0000000000..12879253de --- /dev/null +++ b/upb/bytestream.proto @@ -0,0 +1,14 @@ +// +// upb - a minimalist implementation of protocol buffers. +// +// Copyright (c) 2013 Google Inc. See LICENSE for details. +// Author: Josh Haberman +// +// This file contains a proto definition for use by handlers that consume a +// simple byte stream (like traditional UNIX pipes). + +package upb; + +message ByteStream { + optional bytes bytes = 1; +} diff --git a/upb/bytestream.upb.c b/upb/bytestream.upb.c new file mode 100644 index 0000000000..03388ab826 --- /dev/null +++ b/upb/bytestream.upb.c @@ -0,0 +1,40 @@ +// This file was generated by upbc (the upb compiler). +// Do not edit -- your changes will be discarded when the file is +// regenerated. + +#include "upb/def.h" + +const upb_msgdef upb_bytestream_msgs[1]; +const upb_fielddef upb_bytestream_fields[1]; +const upb_enumdef upb_bytestream_enums[0]; +const upb_tabent upb_bytestream_strentries[4]; +const upb_tabent upb_bytestream_intentries[0]; +const _upb_value upb_bytestream_arrays[3]; + +const upb_msgdef upb_bytestream_msgs[1] = { + UPB_MSGDEF_INIT("upb.ByteStream", UPB_INTTABLE_INIT(0, 0, 9, 0, NULL, &upb_bytestream_arrays[0], 3, 1), UPB_STRTABLE_INIT(1, 3, 9, 2, &upb_bytestream_strentries[0]), 3), +}; + +const upb_fielddef upb_bytestream_fields[1] = { + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BYTES, 0, false, "bytes", 1, &upb_bytestream_msgs[0], NULL, 0, UPB_VALUE_INIT_NONE), +}; + +const upb_enumdef upb_bytestream_enums[0] = { +}; + +const upb_tabent upb_bytestream_strentries[4] = { + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_STR("bytes"), UPB_VALUE_INIT_CONSTPTR(&upb_bytestream_fields[0]), NULL}, +}; + +const upb_tabent upb_bytestream_intentries[0] = { +}; + +const _upb_value upb_bytestream_arrays[3] = { + UPB_ARRAY_EMPTYENT, + UPB_VALUE_INIT_CONSTPTR(&upb_bytestream_fields[0]), + UPB_ARRAY_EMPTYENT, +}; + diff --git a/upb/bytestream.upb.h b/upb/bytestream.upb.h new file mode 100644 index 0000000000..304c426f37 --- /dev/null +++ b/upb/bytestream.upb.h @@ -0,0 +1,37 @@ +// This file was generated by upbc (the upb compiler). +// Do not edit -- your changes will be discarded when the file is +// regenerated. + +#ifndef UPB_BYTESTREAM_UPB_H_ +#define UPB_BYTESTREAM_UPB_H_ + +#include "upb/def.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// Enums + +// Do not refer to these forward declarations; use the constants +// below. +extern const upb_msgdef upb_bytestream_msgs[1]; +extern const upb_fielddef upb_bytestream_fields[1]; +extern const upb_enumdef upb_bytestream_enums[0]; + +// Constants for references to defs. +// We hide these behind macros to decouple users from the +// details of how we have statically defined them (ie. whether +// each def has its own symbol or lives in an array of defs). +#define UPB_BYTESTREAM &upb_bytestream_msgs[0] + +// Selector definitions. +#define UPB_BYTESTREAM_BYTES_ENDSTR 2 +#define UPB_BYTESTREAM_BYTES_STRING 0 +#define UPB_BYTESTREAM_BYTES_STARTSTR 1 + +#ifdef __cplusplus +}; // extern "C" +#endif + +#endif // UPB_BYTESTREAM_UPB_H_ diff --git a/upb/def.c b/upb/def.c index d858c39380..aad0e51a90 100644 --- a/upb/def.c +++ b/upb/def.c @@ -9,9 +9,25 @@ #include #include -#include "upb/bytestream.h" +#include "upb/descriptor/descriptor.upb.h" #include "upb/handlers.h" +typedef struct { + size_t len; + char str[1]; // Null-terminated string data follows. +} str_t; + +static str_t *newstr(const char *data, size_t len) { + str_t *ret = malloc(sizeof(*ret) + len); + if (!ret) return NULL; + ret->len = len; + memcpy(ret->str, data, len); + ret->str[len] = '\0'; + return ret; +} + +static void freestr(str_t *s) { free(s); } + // isalpha() etc. from are locale-dependent, which we don't want. static bool upb_isbetween(char c, char low, char high) { return c >= low && c <= high; @@ -113,6 +129,10 @@ static bool upb_validate_field(upb_fielddef *f, upb_status *s) { upb_status_seterrliteral(s, "fielddef must have name and number set"); return false; } + if (!f->type_is_set_) { + upb_status_seterrliteral(s, "fielddef type was not initialized"); + return false; + } if (upb_fielddef_hassubdef(f)) { if (f->subdef_is_symbolic) { upb_status_seterrf(s, @@ -281,7 +301,7 @@ bool upb_enumdef_addval(upb_enumdef *e, const char *name, int32_t num, upb_status_seterrliteral(status, "out of memory"); return false; } - if (!upb_inttable_lookup(&e->iton, num) && + if (!upb_inttable_lookup(&e->iton, num, NULL) && !upb_inttable_insert(&e->iton, num, upb_value_cstr(upb_strdup(name)))) { upb_status_seterrliteral(status, "out of memory"); upb_strtable_remove(&e->ntoi, name, NULL); @@ -310,15 +330,18 @@ void upb_enum_next(upb_enum_iter *iter) { upb_strtable_next(iter); } bool upb_enum_done(upb_enum_iter *iter) { return upb_strtable_done(iter); } bool upb_enumdef_ntoi(const upb_enumdef *def, const char *name, int32_t *num) { - const upb_value *v = upb_strtable_lookup(&def->ntoi, name); - if (!v) return false; - if (num) *num = upb_value_getint32(*v); + upb_value v; + if (!upb_strtable_lookup(&def->ntoi, name, &v)) { + return false; + } + if (num) *num = upb_value_getint32(v); return true; } const char *upb_enumdef_iton(const upb_enumdef *def, int32_t num) { - const upb_value *v = upb_inttable_lookup32(&def->iton, num); - return v ? upb_value_getcstr(*v) : NULL; + upb_value v; + return upb_inttable_lookup32(&def->iton, num, &v) ? + upb_value_getcstr(v) : NULL; } const char *upb_enum_iter_name(upb_enum_iter *iter) { @@ -332,37 +355,11 @@ int32_t upb_enum_iter_number(upb_enum_iter *iter) { /* upb_fielddef ***************************************************************/ -#define alignof(t) offsetof(struct { char c; t x; }, x) -#define TYPE_INFO(ctype, inmemory_type) \ - {alignof(ctype), sizeof(ctype), UPB_CTYPE_ ## inmemory_type} - -const upb_typeinfo upb_types[UPB_NUM_TYPES] = { - TYPE_INFO(void*, PTR), // (unused) - TYPE_INFO(double, DOUBLE), // DOUBLE - TYPE_INFO(float, FLOAT), // FLOAT - TYPE_INFO(int64_t, INT64), // INT64 - TYPE_INFO(uint64_t, UINT64), // UINT64 - TYPE_INFO(int32_t, INT32), // INT32 - TYPE_INFO(uint64_t, UINT64), // FIXED64 - TYPE_INFO(uint32_t, UINT32), // FIXED32 - TYPE_INFO(bool, BOOL), // BOOL - TYPE_INFO(void*, BYTEREGION), // STRING - TYPE_INFO(void*, PTR), // GROUP - TYPE_INFO(void*, PTR), // MESSAGE - TYPE_INFO(void*, BYTEREGION), // BYTES - TYPE_INFO(uint32_t, UINT32), // UINT32 - TYPE_INFO(int32_t, INT32), // ENUM - TYPE_INFO(int32_t, INT32), // SFIXED32 - TYPE_INFO(int64_t, INT64), // SFIXED64 - TYPE_INFO(int32_t, INT32), // SINT32 - TYPE_INFO(int64_t, INT64), // SINT64 -}; - static void upb_fielddef_init_default(upb_fielddef *f); static void upb_fielddef_uninit_default(upb_fielddef *f) { - if (f->default_is_string) - upb_byteregion_free(upb_value_getbyteregion(f->defaultval)); + if (f->type_is_set_ && f->default_is_string) + freestr(upb_value_getptr(f->defaultval)); } static void visitfield(const upb_refcounted *r, upb_refcounted_visit *visit, @@ -396,16 +393,21 @@ upb_fielddef *upb_fielddef_new(const void *owner) { f->msgdef = NULL; f->sub.def = NULL; f->subdef_is_symbolic = false; - f->subdef_is_owned = false; - f->label_ = UPB_LABEL(OPTIONAL); - - // These are initialized to be invalid; the user must set them explicitly. - // Could relax this later if it's convenient and non-confusing to have a - // defaults for them. - f->type_ = UPB_TYPE_NONE; + f->label_ = UPB_LABEL_OPTIONAL; + f->type_ = UPB_TYPE_INT32; f->number_ = 0; - - upb_fielddef_init_default(f); + f->type_is_set_ = false; + f->tagdelim = false; + + // For the moment we default this to UPB_INTFMT_VARIABLE, since it will work + // with all integer types and is in some since more "default" since the most + // normal-looking proto2 types int32/int64/uint32/uint64 use variable. + // + // Other options to consider: + // - there is no default; users must set this manually (like type). + // - default signed integers to UPB_INTFMT_ZIGZAG, since it's more likely to + // be an optimal default for signed integers. + f->intfmt = UPB_INTFMT_VARIABLE; return f; } @@ -417,11 +419,8 @@ upb_fielddef *upb_fielddef_dup(const upb_fielddef *f, const void *owner) { upb_fielddef_setnumber(newf, upb_fielddef_number(f)); upb_fielddef_setname(newf, upb_fielddef_name(f)); if (f->default_is_string) { - upb_byteregion *r = upb_value_getbyteregion(upb_fielddef_default(f)); - size_t len; - const char *ptr = upb_byteregion_getptr(r, 0, &len); - assert(len == upb_byteregion_len(r)); - upb_fielddef_setdefaultstr(newf, ptr, len); + str_t *s = upb_value_getptr(upb_fielddef_default(f)); + upb_fielddef_setdefaultstr(newf, s->str, s->len); } else { upb_fielddef_setdefault(newf, upb_fielddef_default(f)); } @@ -468,7 +467,12 @@ void upb_fielddef_checkref(const upb_fielddef *f, const void *owner) { upb_def_checkref(upb_upcast(f), owner); } +bool upb_fielddef_typeisset(const upb_fielddef *f) { + return f->type_is_set_; +} + upb_fieldtype_t upb_fielddef_type(const upb_fielddef *f) { + assert(f->type_is_set_); return f->type_; } @@ -476,7 +480,17 @@ upb_label_t upb_fielddef_label(const upb_fielddef *f) { return f->label_; } -uint32_t upb_fielddef_number(const upb_fielddef *f) { return f->number_; } +upb_intfmt_t upb_fielddef_intfmt(const upb_fielddef *f) { + return f->intfmt; +} + +bool upb_fielddef_istagdelim(const upb_fielddef *f) { + return f->tagdelim; +} + +uint32_t upb_fielddef_number(const upb_fielddef *f) { + return f->number_; +} const char *upb_fielddef_name(const upb_fielddef *f) { return upb_def_fullname(upb_upcast(f)); @@ -495,34 +509,37 @@ bool upb_fielddef_setname(upb_fielddef *f, const char *name) { } upb_value upb_fielddef_default(const upb_fielddef *f) { + assert(f->type_is_set_); return f->defaultval; } +const char *upb_fielddef_defaultstr(const upb_fielddef *f, size_t *len) { + assert(f->type_is_set_); + if (f->default_is_string) { + str_t *str = upb_value_getptr(f->defaultval); + if (len) *len = str->len; + return str->str; + } + return NULL; +} + static void upb_fielddef_init_default(upb_fielddef *f) { f->default_is_string = false; switch (upb_fielddef_type(f)) { - case UPB_TYPE(DOUBLE): upb_value_setdouble(&f->defaultval, 0); break; - case UPB_TYPE(FLOAT): upb_value_setfloat(&f->defaultval, 0); break; - case UPB_TYPE(UINT64): - case UPB_TYPE(FIXED64): upb_value_setuint64(&f->defaultval, 0); break; - case UPB_TYPE(INT64): - case UPB_TYPE(SFIXED64): - case UPB_TYPE(SINT64): upb_value_setint64(&f->defaultval, 0); break; - case UPB_TYPE(ENUM): - case UPB_TYPE(INT32): - case UPB_TYPE(SINT32): - case UPB_TYPE(SFIXED32): upb_value_setint32(&f->defaultval, 0); break; - case UPB_TYPE(UINT32): - case UPB_TYPE(FIXED32): upb_value_setuint32(&f->defaultval, 0); break; - case UPB_TYPE(BOOL): upb_value_setbool(&f->defaultval, false); break; - case UPB_TYPE(STRING): - case UPB_TYPE(BYTES): - upb_value_setbyteregion(&f->defaultval, upb_byteregion_new("")); - f->default_is_string = true; - break; - case UPB_TYPE(GROUP): - case UPB_TYPE(MESSAGE): upb_value_setptr(&f->defaultval, NULL); break; - case UPB_TYPE_NONE: break; + case UPB_TYPE_DOUBLE: upb_value_setdouble(&f->defaultval, 0); break; + case UPB_TYPE_FLOAT: upb_value_setfloat(&f->defaultval, 0); break; + case UPB_TYPE_UINT64: upb_value_setuint64(&f->defaultval, 0); break; + case UPB_TYPE_INT64: upb_value_setint64(&f->defaultval, 0); break; + case UPB_TYPE_ENUM: + case UPB_TYPE_INT32: upb_value_setint32(&f->defaultval, 0); break; + case UPB_TYPE_UINT32: upb_value_setuint32(&f->defaultval, 0); break; + case UPB_TYPE_BOOL: upb_value_setbool(&f->defaultval, false); break; + case UPB_TYPE_STRING: + case UPB_TYPE_BYTES: + upb_value_setptr(&f->defaultval, newstr("", 0)); + f->default_is_string = true; + break; + case UPB_TYPE_MESSAGE: upb_value_setptr(&f->defaultval, NULL); break; } } @@ -554,89 +571,199 @@ bool upb_fielddef_settype(upb_fielddef *f, upb_fieldtype_t type) { assert(!upb_fielddef_isfrozen(f)); upb_fielddef_uninit_default(f); f->type_ = type; + f->type_is_set_ = true; upb_fielddef_init_default(f); return true; } +bool upb_fielddef_setdescriptortype(upb_fielddef *f, int type) { + assert(!upb_fielddef_isfrozen(f)); + switch (type) { + case UPB_DESCRIPTOR_TYPE_DOUBLE: + upb_fielddef_settype(f, UPB_TYPE_DOUBLE); + break; + case UPB_DESCRIPTOR_TYPE_FLOAT: + upb_fielddef_settype(f, UPB_TYPE_FLOAT); + break; + case UPB_DESCRIPTOR_TYPE_INT64: + case UPB_DESCRIPTOR_TYPE_SFIXED64: + case UPB_DESCRIPTOR_TYPE_SINT64: + upb_fielddef_settype(f, UPB_TYPE_INT64); + break; + case UPB_DESCRIPTOR_TYPE_UINT64: + case UPB_DESCRIPTOR_TYPE_FIXED64: + upb_fielddef_settype(f, UPB_TYPE_UINT64); + break; + case UPB_DESCRIPTOR_TYPE_INT32: + case UPB_DESCRIPTOR_TYPE_SFIXED32: + case UPB_DESCRIPTOR_TYPE_SINT32: + upb_fielddef_settype(f, UPB_TYPE_INT32); + break; + case UPB_DESCRIPTOR_TYPE_UINT32: + case UPB_DESCRIPTOR_TYPE_FIXED32: + upb_fielddef_settype(f, UPB_TYPE_UINT32); + break; + case UPB_DESCRIPTOR_TYPE_BOOL: + upb_fielddef_settype(f, UPB_TYPE_BOOL); + break; + case UPB_DESCRIPTOR_TYPE_STRING: + upb_fielddef_settype(f, UPB_TYPE_STRING); + break; + case UPB_DESCRIPTOR_TYPE_BYTES: + upb_fielddef_settype(f, UPB_TYPE_BYTES); + break; + case UPB_DESCRIPTOR_TYPE_GROUP: + case UPB_DESCRIPTOR_TYPE_MESSAGE: + upb_fielddef_settype(f, UPB_TYPE_MESSAGE); + break; + case UPB_DESCRIPTOR_TYPE_ENUM: + upb_fielddef_settype(f, UPB_TYPE_ENUM); + break; + default: + return false; + } + + if (type == UPB_DESCRIPTOR_TYPE_FIXED64 || + type == UPB_DESCRIPTOR_TYPE_FIXED32 || + type == UPB_DESCRIPTOR_TYPE_SFIXED64 || + type == UPB_DESCRIPTOR_TYPE_SFIXED32) { + upb_fielddef_setintfmt(f, UPB_INTFMT_FIXED); + } else if (type == UPB_DESCRIPTOR_TYPE_SINT64 || + type == UPB_DESCRIPTOR_TYPE_SINT32) { + upb_fielddef_setintfmt(f, UPB_INTFMT_ZIGZAG); + } else { + upb_fielddef_setintfmt(f, UPB_INTFMT_VARIABLE); + } + + upb_fielddef_settagdelim(f, type == UPB_DESCRIPTOR_TYPE_GROUP); + + return true; +} + +upb_descriptortype_t upb_fielddef_descriptortype(const upb_fielddef *f) { + switch (upb_fielddef_type(f)) { + case UPB_TYPE_FLOAT: return UPB_DESCRIPTOR_TYPE_FLOAT; + case UPB_TYPE_DOUBLE: return UPB_DESCRIPTOR_TYPE_DOUBLE; + case UPB_TYPE_BOOL: return UPB_DESCRIPTOR_TYPE_BOOL; + case UPB_TYPE_STRING: return UPB_DESCRIPTOR_TYPE_STRING; + case UPB_TYPE_BYTES: return UPB_DESCRIPTOR_TYPE_BYTES; + case UPB_TYPE_ENUM: return UPB_DESCRIPTOR_TYPE_ENUM; + case UPB_TYPE_INT32: + switch (upb_fielddef_intfmt(f)) { + case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_INT32; + case UPB_INTFMT_FIXED: return UPB_DESCRIPTOR_TYPE_SFIXED32; + case UPB_INTFMT_ZIGZAG: return UPB_DESCRIPTOR_TYPE_SINT32; + } + case UPB_TYPE_INT64: + switch (upb_fielddef_intfmt(f)) { + case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_INT64; + case UPB_INTFMT_FIXED: return UPB_DESCRIPTOR_TYPE_SFIXED64; + case UPB_INTFMT_ZIGZAG: return UPB_DESCRIPTOR_TYPE_SINT64; + } + case UPB_TYPE_UINT32: + switch (upb_fielddef_intfmt(f)) { + case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_UINT32; + case UPB_INTFMT_FIXED: return UPB_DESCRIPTOR_TYPE_FIXED32; + case UPB_INTFMT_ZIGZAG: return -1; + } + case UPB_TYPE_UINT64: + switch (upb_fielddef_intfmt(f)) { + case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_UINT64; + case UPB_INTFMT_FIXED: return UPB_DESCRIPTOR_TYPE_FIXED64; + case UPB_INTFMT_ZIGZAG: return -1; + } + case UPB_TYPE_MESSAGE: + return upb_fielddef_istagdelim(f) ? + UPB_DESCRIPTOR_TYPE_GROUP : UPB_DESCRIPTOR_TYPE_MESSAGE; + } +} + bool upb_fielddef_setlabel(upb_fielddef *f, upb_label_t label) { assert(!upb_fielddef_isfrozen(f)); f->label_ = label; return true; } +bool upb_fielddef_setintfmt(upb_fielddef *f, upb_intfmt_t fmt) { + assert(!upb_fielddef_isfrozen(f)); + f->intfmt = fmt; + return true; +} + +bool upb_fielddef_settagdelim(upb_fielddef *f, bool tag_delim) { + assert(!upb_fielddef_isfrozen(f)); + f->tagdelim = tag_delim; + return true; +} + void upb_fielddef_setdefault(upb_fielddef *f, upb_value value) { + assert(f->type_is_set_); assert(!upb_fielddef_isfrozen(f)); assert(!upb_fielddef_isstring(f) && !upb_fielddef_issubmsg(f)); if (f->default_is_string) { - upb_byteregion *bytes = upb_value_getbyteregion(f->defaultval); - assert(bytes); - upb_byteregion_free(bytes); + str_t *s = upb_value_getptr(f->defaultval); + assert(s); + freestr(s); } f->defaultval = value; f->default_is_string = false; } bool upb_fielddef_setdefaultstr(upb_fielddef *f, const void *str, size_t len) { - assert(upb_fielddef_isstring(f) || f->type_ == UPB_TYPE(ENUM)); - if (f->type_ == UPB_TYPE(ENUM) && !upb_isident(str, len, false)) return false; + assert(upb_fielddef_isstring(f) || f->type_ == UPB_TYPE_ENUM); + if (f->type_ == UPB_TYPE_ENUM && !upb_isident(str, len, false)) + return false; if (f->default_is_string) { - upb_byteregion *bytes = upb_value_getbyteregion(f->defaultval); - assert(bytes); - upb_byteregion_free(bytes); + str_t *s = upb_value_getptr(f->defaultval); + assert(s); + freestr(s); } else { - assert(f->type_ == UPB_TYPE(ENUM)); + assert(f->type_ == UPB_TYPE_ENUM); } - upb_byteregion *r = upb_byteregion_newl(str, len); - upb_value_setbyteregion(&f->defaultval, r); - upb_bytesuccess_t ret = upb_byteregion_fetch(r); - UPB_ASSERT_VAR(ret, ret == (len == 0 ? UPB_BYTE_EOF : UPB_BYTE_OK)); - assert(upb_byteregion_available(r, 0) == upb_byteregion_len(r)); + str_t *s = newstr(str, len); + upb_value_setptr(&f->defaultval, s); f->default_is_string = true; return true; } void upb_fielddef_setdefaultcstr(upb_fielddef *f, const char *str) { + assert(f->type_is_set_); upb_fielddef_setdefaultstr(f, str, str ? strlen(str) : 0); } bool upb_fielddef_default_is_symbolic(const upb_fielddef *f) { - return f->default_is_string && f->type_ == UPB_TYPE_ENUM; + return f->type_is_set_ && + f->default_is_string && + f->type_ == UPB_TYPE_ENUM; } bool upb_fielddef_resolvedefault(upb_fielddef *f) { if (!upb_fielddef_default_is_symbolic(f)) return true; - upb_byteregion *bytes = upb_value_getbyteregion(f->defaultval); + str_t *s = upb_value_getptr(f->defaultval); const upb_enumdef *e = upb_downcast_enumdef(upb_fielddef_subdef(f)); - assert(bytes); // Points to either a real default or the empty string. + assert(s); // Points to either a real default or the empty string. assert(e); - if (upb_byteregion_len(bytes) == 0) { + if (s->len == 0) { // The "default default" for an enum is the first defined value. upb_value_setint32(&f->defaultval, e->defaultval); } else { - size_t len; int32_t val = 0; - // ptr is guaranteed to be NULL-terminated because the byteregion was - // created with upb_byteregion_newl(). - const char *ptr = upb_byteregion_getptr( - bytes, upb_byteregion_startofs(bytes), &len); - assert(len == upb_byteregion_len(bytes)); // Should all be in one chunk - if (!upb_enumdef_ntoi(e, ptr, &val)) { + if (!upb_enumdef_ntoi(e, s->str, &val)) return false; - } upb_value_setint32(&f->defaultval, val); } f->default_is_string = false; - upb_byteregion_free(bytes); + freestr(s); return true; } static bool upb_subdef_typecheck(upb_fielddef *f, const upb_def *subdef) { - if (f->type_ == UPB_TYPE(MESSAGE) || f->type_ == UPB_TYPE(GROUP)) + if (f->type_ == UPB_TYPE_MESSAGE) return upb_dyncast_msgdef(subdef) != NULL; - else if (f->type_ == UPB_TYPE(ENUM)) + else if (f->type_ == UPB_TYPE_ENUM) return upb_dyncast_enumdef(subdef) != NULL; else { assert(false); @@ -673,8 +800,7 @@ bool upb_fielddef_setsubdefname(upb_fielddef *f, const char *name) { } bool upb_fielddef_issubmsg(const upb_fielddef *f) { - return upb_fielddef_type(f) == UPB_TYPE_GROUP || - upb_fielddef_type(f) == UPB_TYPE_MESSAGE; + return upb_fielddef_type(f) == UPB_TYPE_MESSAGE; } bool upb_fielddef_isstring(const upb_fielddef *f) { @@ -691,7 +817,7 @@ bool upb_fielddef_isprimitive(const upb_fielddef *f) { } bool upb_fielddef_hassubdef(const upb_fielddef *f) { - return upb_fielddef_issubmsg(f) || upb_fielddef_type(f) == UPB_TYPE(ENUM); + return upb_fielddef_issubmsg(f) || upb_fielddef_type(f) == UPB_TYPE_ENUM; } @@ -808,13 +934,15 @@ bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f, } const upb_fielddef *upb_msgdef_itof(const upb_msgdef *m, uint32_t i) { - const upb_value *val = upb_inttable_lookup32(&m->itof, i); - return val ? (const upb_fielddef*)upb_value_getptr(*val) : NULL; + upb_value val; + return upb_inttable_lookup32(&m->itof, i, &val) ? + upb_value_getptr(val) : NULL; } const upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name) { - const upb_value *val = upb_strtable_lookup(&m->ntof, name); - return val ? (upb_fielddef*)upb_value_getptr(*val) : NULL; + upb_value val; + return upb_strtable_lookup(&m->ntof, name, &val) ? + upb_value_getptr(val) : NULL; } upb_fielddef *upb_msgdef_itof_mutable(upb_msgdef *m, uint32_t i) { diff --git a/upb/def.h b/upb/def.h index 4210e8c861..18528d158b 100644 --- a/upb/def.h +++ b/upb/def.h @@ -153,50 +153,59 @@ bool upb_def_freeze(upb_def *const*defs, int n, upb_status *status); /* upb::FieldDef **************************************************************/ -// We choose these to match descriptor.proto. Clients may use UPB_TYPE() and -// UPB_LABEL() instead of referencing these directly. +// The types a field can have. Note that this list is not identical to the +// types defined in descriptor.proto, which gives INT32 and SINT32 separate +// types (we distinguish the two with the "integer encoding" enum below). typedef enum { - UPB_TYPE_NONE = -1, // Internal-only, may be removed. - UPB_TYPE_DOUBLE = 1, - UPB_TYPE_FLOAT = 2, - UPB_TYPE_INT64 = 3, - UPB_TYPE_UINT64 = 4, - UPB_TYPE_INT32 = 5, - UPB_TYPE_FIXED64 = 6, - UPB_TYPE_FIXED32 = 7, - UPB_TYPE_BOOL = 8, - UPB_TYPE_STRING = 9, - UPB_TYPE_GROUP = 10, - UPB_TYPE_MESSAGE = 11, - UPB_TYPE_BYTES = 12, - UPB_TYPE_UINT32 = 13, - UPB_TYPE_ENUM = 14, - UPB_TYPE_SFIXED32 = 15, - UPB_TYPE_SFIXED64 = 16, - UPB_TYPE_SINT32 = 17, - UPB_TYPE_SINT64 = 18, + UPB_TYPE_FLOAT = 1, + UPB_TYPE_DOUBLE = 2, + UPB_TYPE_BOOL = 3, + UPB_TYPE_STRING = 4, + UPB_TYPE_BYTES = 5, + UPB_TYPE_MESSAGE = 6, + UPB_TYPE_ENUM = 7, // Enum values are int32. + UPB_TYPE_INT32 = 8, + UPB_TYPE_UINT32 = 9, + UPB_TYPE_INT64 = 10, + UPB_TYPE_UINT64 = 11, } upb_fieldtype_t; -#define UPB_NUM_TYPES 19 - +// The repeated-ness of each field; this matches descriptor.proto. typedef enum { UPB_LABEL_OPTIONAL = 1, UPB_LABEL_REQUIRED = 2, UPB_LABEL_REPEATED = 3, } upb_label_t; -// These macros are provided for legacy reasons. -#define UPB_TYPE(type) UPB_TYPE_ ## type -#define UPB_LABEL(type) UPB_LABEL_ ## type - -// Info for a given field type. -typedef struct { - uint8_t align; - uint8_t size; - uint8_t inmemory_type; // For example, INT32, SINT32, and SFIXED32 -> INT32 -} upb_typeinfo; +// How integers should be encoded in serializations that offer multiple +// integer encoding methods. +typedef enum { + UPB_INTFMT_VARIABLE = 1, + UPB_INTFMT_FIXED = 2, + UPB_INTFMT_ZIGZAG = 3, // Only for signed types (INT32/INT64). +} upb_intfmt_t; -extern const upb_typeinfo upb_types[UPB_NUM_TYPES]; +// Descriptor types, as defined in descriptor.proto. +typedef enum { + UPB_DESCRIPTOR_TYPE_DOUBLE = 1, + UPB_DESCRIPTOR_TYPE_FLOAT = 2, + UPB_DESCRIPTOR_TYPE_INT64 = 3, + UPB_DESCRIPTOR_TYPE_UINT64 = 4, + UPB_DESCRIPTOR_TYPE_INT32 = 5, + UPB_DESCRIPTOR_TYPE_FIXED64 = 6, + UPB_DESCRIPTOR_TYPE_FIXED32 = 7, + UPB_DESCRIPTOR_TYPE_BOOL = 8, + UPB_DESCRIPTOR_TYPE_STRING = 9, + UPB_DESCRIPTOR_TYPE_GROUP = 10, + UPB_DESCRIPTOR_TYPE_MESSAGE = 11, + UPB_DESCRIPTOR_TYPE_BYTES = 12, + UPB_DESCRIPTOR_TYPE_UINT32 = 13, + UPB_DESCRIPTOR_TYPE_ENUM = 14, + UPB_DESCRIPTOR_TYPE_SFIXED32 = 15, + UPB_DESCRIPTOR_TYPE_SFIXED64 = 16, + UPB_DESCRIPTOR_TYPE_SINT32 = 17, + UPB_DESCRIPTOR_TYPE_SINT64 = 18, +} upb_descriptortype_t; #ifdef __cplusplus @@ -207,6 +216,8 @@ class upb::FieldDef { public: typedef upb_fieldtype_t Type; typedef upb_label_t Label; + typedef upb_intfmt_t IntegerFormat; + typedef upb_descriptortype_t DescriptorType; // Returns NULL if memory allocation failed. static FieldDef* New(const void *owner); @@ -235,17 +246,27 @@ class upb::FieldDef { bool set_full_name(const char *fullname); bool set_full_name(const std::string& fullname); - Type type() const; // Return UPB_TYPE_NONE if uninitialized. - Label label() const; // Defaults to UPB_LABEL_OPTIONAL. - uint32_t number() const; // Returns 0 if uninitialized. + bool type_is_set() const; // Whether set_type() has been called. + Type type() const; // Requires that type_is_set() == true. + Label label() const; // Defaults to UPB_LABEL_OPTIONAL. + uint32_t number() const; // Returns 0 if uninitialized. const MessageDef* message_def() const; - // "number" and "name" must be set before the fielddef is added to a msgdef. - // For the moment we do not allow these to be set once the fielddef is added - // to a msgdef -- this could be relaxed in the future. + // Gets/sets the field's type according to the enum in descriptor.proto. + // This is not the same as UPB_TYPE_*, because it distinguishes between + // (for example) INT32 and SINT32, whereas our "type" enum does not. + // This return of descriptor_type() is a function of type(), + // integer_format(), and is_tag_delimited(). Likewise set_descriptor_type() + // sets all three appropriately. + DescriptorType descriptor_type() const; + bool set_descriptor_type(DescriptorType type); + + // "number" and "name" must be set before the FieldDef is added to a + // MessageDef, and may not be set after that. "type" must be set explicitly + // before the fielddef is finalized. bool set_number(uint32_t number); - bool set_type(upb_fieldtype_t type); - bool set_label(upb_label_t label); + bool set_type(Type type); + bool set_label(Label label); // These are the same as full_name()/set_full_name(), but since fielddefs // most often use simple, non-qualified names, we provide this accessor @@ -255,15 +276,26 @@ class upb::FieldDef { bool set_name(const std::string& name); const char *name() const; + // Convenient field type tests. bool IsSubMessage() const; bool IsString() const; bool IsSequence() const; bool IsPrimitive() const; - // Returns the default value for this fielddef, which may either be something - // the client set explicitly or the "default default" (0 for numbers, empty - // for strings). The field's type indicates the type of the returned value, - // except for enum fields that are still mutable. + // How integers are encoded. Only meaningful for integer types. + // Defaults to UPB_INTFMT_VARIABLE, and is reset when "type" changes. + IntegerFormat integer_format() const; + bool set_integer_format(IntegerFormat format); + + // Whether a submessage field is tag-delimited or not (if false, then + // length-delimited). Only meaningful when type() == UPB_TYPE_MESSAGE. + bool is_tag_delimited() const; + bool set_tag_delimited(bool tag_delimited); + + // Returns the non-string default value for this fielddef, which may either + // be something the client set explicitly or the "default default" (0 for + // numbers, empty for strings). The field's type indicates the type of the + // returned value, except for enum fields that are still mutable. // // For enums the default can be set either numerically or symbolically -- the // upb_fielddef_default_is_symbolic() function below will indicate which it @@ -273,6 +305,12 @@ class upb::FieldDef { // always have a default of type int32. Value default_value() const; + // Returns the NULL-terminated string default value for this field, or NULL + // if the default for this field is not a string. The user may optionally + // pass "len" to retrieve the length of the default also (this would be + // required to get default values with embedded NULLs). + const char *GetDefaultString(size_t* len) const; + // Sets default value for the field. For numeric types, use // upb_fielddef_setdefault(), and "value" must match the type of the field. // For string/bytes types, use upb_fielddef_setdefaultstr(). Enum types may @@ -337,6 +375,7 @@ class upb::FieldDef { struct upb_fielddef { #endif upb_def base; + upb_value defaultval; // Only for non-repeated scalars and strings. const upb_msgdef *msgdef; union { const upb_def *def; // If !subdef_is_symbolic. @@ -344,23 +383,20 @@ struct upb_fielddef { } sub; // The msgdef or enumdef for this field, if upb_hassubdef(f). bool subdef_is_symbolic; bool default_is_string; - bool subdef_is_owned; + bool type_is_set_; // False until type is explicitly set. + upb_intfmt_t intfmt; + bool tagdelim; upb_fieldtype_t type_; upb_label_t label_; uint32_t number_; - upb_value defaultval; // Only for non-repeated scalars and strings. uint32_t selector_base; // Used to index into a upb::Handlers table. }; -// This will only work for static initialization because of the subdef_is_owned -// initialization. Theoretically the other _INIT() macros could possible work -// for non-static initialization, but this has not been tested. -#define UPB_FIELDDEF_INIT(label, type, name, num, msgdef, subdef, \ - selector_base, defaultval) \ - {UPB_DEF_INIT(name, UPB_DEF_FIELD), msgdef, {subdef}, false, \ - type == UPB_TYPE_STRING || type == UPB_TYPE_BYTES, \ - false, /* subdef_is_owned: not used since fielddef is not freed. */ \ - type, label, num, defaultval, selector_base} +#define UPB_FIELDDEF_INIT(label, type, intfmt, tagdelim, name, num, \ + msgdef, subdef, selector_base, defaultval) \ + {UPB_DEF_INIT(name, UPB_DEF_FIELD), defaultval, msgdef, {subdef}, \ + false, type == UPB_TYPE_STRING || type == UPB_TYPE_BYTES, true, \ + intfmt, tagdelim, type, label, num, selector_base} // Native C API. #ifdef __cplusplus @@ -381,31 +417,40 @@ void upb_fielddef_checkref(const upb_fielddef *f, const void *owner); const char *upb_fielddef_fullname(const upb_fielddef *f); bool upb_fielddef_setfullname(upb_fielddef *f, const char *fullname); +bool upb_fielddef_typeisset(const upb_fielddef *f); upb_fieldtype_t upb_fielddef_type(const upb_fielddef *f); +upb_descriptortype_t upb_fielddef_descriptortype(const upb_fielddef *f); upb_label_t upb_fielddef_label(const upb_fielddef *f); uint32_t upb_fielddef_number(const upb_fielddef *f); const char *upb_fielddef_name(const upb_fielddef *f); const upb_msgdef *upb_fielddef_msgdef(const upb_fielddef *f); upb_msgdef *upb_fielddef_msgdef_mutable(upb_fielddef *f); -bool upb_fielddef_settype(upb_fielddef *f, upb_fieldtype_t type); -bool upb_fielddef_setlabel(upb_fielddef *f, upb_label_t label); -bool upb_fielddef_setnumber(upb_fielddef *f, uint32_t number); -bool upb_fielddef_setname(upb_fielddef *f, const char *name); +upb_intfmt_t upb_fielddef_intfmt(const upb_fielddef *f); +bool upb_fielddef_istagdelim(const upb_fielddef *f); bool upb_fielddef_issubmsg(const upb_fielddef *f); bool upb_fielddef_isstring(const upb_fielddef *f); bool upb_fielddef_isseq(const upb_fielddef *f); bool upb_fielddef_isprimitive(const upb_fielddef *f); upb_value upb_fielddef_default(const upb_fielddef *f); +const char *upb_fielddef_defaultstr(const upb_fielddef *f, size_t *len); +bool upb_fielddef_default_is_symbolic(const upb_fielddef *f); +bool upb_fielddef_hassubdef(const upb_fielddef *f); +const upb_def *upb_fielddef_subdef(const upb_fielddef *f); +const char *upb_fielddef_subdefname(const upb_fielddef *f); + +bool upb_fielddef_settype(upb_fielddef *f, upb_fieldtype_t type); +bool upb_fielddef_setdescriptortype(upb_fielddef *f, int type); +bool upb_fielddef_setlabel(upb_fielddef *f, upb_label_t label); +bool upb_fielddef_setnumber(upb_fielddef *f, uint32_t number); +bool upb_fielddef_setname(upb_fielddef *f, const char *name); +bool upb_fielddef_setintfmt(upb_fielddef *f, upb_intfmt_t fmt); +bool upb_fielddef_settagdelim(upb_fielddef *f, bool tag_delim); void upb_fielddef_setdefault(upb_fielddef *f, upb_value value); bool upb_fielddef_setdefaultstr(upb_fielddef *f, const void *str, size_t len); void upb_fielddef_setdefaultcstr(upb_fielddef *f, const char *str); -bool upb_fielddef_default_is_symbolic(const upb_fielddef *f); bool upb_fielddef_resolvedefault(upb_fielddef *f); -bool upb_fielddef_hassubdef(const upb_fielddef *f); bool upb_fielddef_setsubdef(upb_fielddef *f, const upb_def *subdef); bool upb_fielddef_setsubdefname(upb_fielddef *f, const char *name); -const upb_def *upb_fielddef_subdef(const upb_fielddef *f); -const char *upb_fielddef_subdefname(const upb_fielddef *f); #ifdef __cplusplus } // extern "C" #endif @@ -453,9 +498,9 @@ class upb::MessageDef { // These return NULL if the field is not found. FieldDef* FindFieldByNumber(uint32_t number); - FieldDef* FieldFieldByName(const char *name); + FieldDef* FindFieldByName(const char *name); const FieldDef* FindFieldByNumber(uint32_t number) const; - const FieldDef* FieldFieldByName(const char *name) const; + const FieldDef* FindFieldByName(const char *name) const; // Returns a new msgdef that is a copy of the given msgdef (and a copy of all // the fields) but with any references to submessages broken and replaced @@ -685,18 +730,18 @@ int32_t upb_enum_iter_number(upb_enum_iter *iter); // Downcasts, for when some wants to assert that a def is of a particular type. // These are only checked if we are building debug. #define UPB_DEF_CASTS(lower, upper) \ - INLINE const upb_ ## lower *upb_dyncast_ ## lower(const upb_def *def) { \ + UPB_INLINE const upb_ ## lower *upb_dyncast_ ## lower(const upb_def *def) { \ if (upb_def_type(def) != UPB_DEF_ ## upper) return NULL; \ return (upb_ ## lower*)def; \ } \ - INLINE const upb_ ## lower *upb_downcast_ ## lower(const upb_def *def) { \ + UPB_INLINE const upb_ ## lower *upb_downcast_ ## lower(const upb_def *def) { \ assert(upb_def_type(def) == UPB_DEF_ ## upper); \ return (const upb_ ## lower*)def; \ } \ - INLINE upb_ ## lower *upb_dyncast_ ## lower ## _mutable(upb_def *def) { \ + UPB_INLINE upb_ ## lower *upb_dyncast_ ## lower ## _mutable(upb_def *def) { \ return (upb_ ## lower*)upb_dyncast_ ## lower(def); \ } \ - INLINE upb_ ## lower *upb_downcast_ ## lower ## _mutable(upb_def *def) { \ + UPB_INLINE upb_ ## lower *upb_downcast_ ## lower ## _mutable(upb_def *def) { \ return (upb_ ## lower*)upb_downcast_ ## lower(def); \ } UPB_DEF_CASTS(msgdef, MSG); @@ -706,7 +751,7 @@ UPB_DEF_CASTS(enumdef, ENUM); #ifdef __cplusplus -INLINE const char *upb_safecstr(const std::string& str) { +UPB_INLINE const char *upb_safecstr(const std::string& str) { assert(str.size() == std::strlen(str.c_str())); return str.c_str(); } @@ -793,9 +838,15 @@ inline bool FieldDef::set_full_name(const char *fullname) { inline bool FieldDef::set_full_name(const std::string& fullname) { return upb_fielddef_setfullname(this, upb_safecstr(fullname)); } +inline bool FieldDef::type_is_set() const { + return upb_fielddef_typeisset(this); +} inline FieldDef::Type FieldDef::type() const { return upb_fielddef_type(this); } +inline FieldDef::DescriptorType FieldDef::descriptor_type() const { + return upb_fielddef_descriptortype(this); +} inline FieldDef::Label FieldDef::label() const { return upb_fielddef_label(this); } @@ -820,6 +871,9 @@ inline bool FieldDef::set_name(const std::string& name) { inline bool FieldDef::set_type(upb_fieldtype_t type) { return upb_fielddef_settype(this, type); } +inline bool FieldDef::set_descriptor_type(FieldDef::DescriptorType type) { + return upb_fielddef_setdescriptortype(this, type); +} inline bool FieldDef::set_label(upb_label_t label) { return upb_fielddef_setlabel(this, label); } @@ -835,6 +889,9 @@ inline bool FieldDef::IsSequence() const { inline Value FieldDef::default_value() const { return upb_fielddef_default(this); } +inline const char *FieldDef::GetDefaultString(size_t* len) const { + return upb_fielddef_defaultstr(this, len); +} inline void FieldDef::set_default_value(Value value) { upb_fielddef_setdefault(this, value); } @@ -914,13 +971,13 @@ inline bool MessageDef::AddField(upb_fielddef *f, const void *ref_donor) { inline FieldDef* MessageDef::FindFieldByNumber(uint32_t number) { return upb_msgdef_itof_mutable(this, number); } -inline FieldDef* MessageDef::FieldFieldByName(const char *name) { +inline FieldDef* MessageDef::FindFieldByName(const char *name) { return upb_msgdef_ntof_mutable(this, name); } inline const FieldDef* MessageDef::FindFieldByNumber(uint32_t number) const { return upb_msgdef_itof(this, number); } -inline const FieldDef* MessageDef::FieldFieldByName(const char *name) const { +inline const FieldDef* MessageDef::FindFieldByName(const char *name) const { return upb_msgdef_ntof(this, name); } inline MessageDef* MessageDef::Dup(const void *owner) const { diff --git a/upb/descriptor/descriptor.upb.c b/upb/descriptor/descriptor.upb.c index 9a64c5b87f..80d5f594ca 100755 --- a/upb/descriptor/descriptor.upb.c +++ b/upb/descriptor/descriptor.upb.c @@ -9,105 +9,105 @@ const upb_fielddef google_protobuf_fields[73]; const upb_enumdef google_protobuf_enums[4]; const upb_tabent google_protobuf_strentries[192]; const upb_tabent google_protobuf_intentries[66]; -const upb_value google_protobuf_arrays[97]; +const _upb_value google_protobuf_arrays[97]; const upb_msgdef google_protobuf_msgs[20] = { - UPB_MSGDEF_INIT("google.protobuf.DescriptorProto", UPB_INTTABLE_INIT(2, 3, 9, 2, &google_protobuf_intentries[0], &google_protobuf_arrays[0], 6, 5), UPB_STRTABLE_INIT(7, 15, 9, 4, &google_protobuf_strentries[0]), 31), + UPB_MSGDEF_INIT("google.protobuf.DescriptorProto", UPB_INTTABLE_INIT(2, 3, 9, 2, &google_protobuf_intentries[0], &google_protobuf_arrays[0], 6, 5), UPB_STRTABLE_INIT(7, 15, 9, 4, &google_protobuf_strentries[0]), 25), UPB_MSGDEF_INIT("google.protobuf.DescriptorProto.ExtensionRange", UPB_INTTABLE_INIT(0, 0, 9, 0, NULL, &google_protobuf_arrays[6], 4, 2), UPB_STRTABLE_INIT(2, 3, 9, 2, &google_protobuf_strentries[16]), 2), - UPB_MSGDEF_INIT("google.protobuf.EnumDescriptorProto", UPB_INTTABLE_INIT(0, 0, 9, 0, NULL, &google_protobuf_arrays[10], 4, 3), UPB_STRTABLE_INIT(3, 3, 9, 2, &google_protobuf_strentries[20]), 11), - UPB_MSGDEF_INIT("google.protobuf.EnumOptions", UPB_INTTABLE_INIT(1, 1, 9, 1, &google_protobuf_intentries[4], &google_protobuf_arrays[14], 1, 0), UPB_STRTABLE_INIT(1, 3, 9, 2, &google_protobuf_strentries[24]), 5), - UPB_MSGDEF_INIT("google.protobuf.EnumValueDescriptorProto", UPB_INTTABLE_INIT(0, 0, 9, 0, NULL, &google_protobuf_arrays[15], 4, 3), UPB_STRTABLE_INIT(3, 3, 9, 2, &google_protobuf_strentries[28]), 7), - UPB_MSGDEF_INIT("google.protobuf.EnumValueOptions", UPB_INTTABLE_INIT(1, 1, 9, 1, &google_protobuf_intentries[6], &google_protobuf_arrays[19], 1, 0), UPB_STRTABLE_INIT(1, 3, 9, 2, &google_protobuf_strentries[32]), 5), - UPB_MSGDEF_INIT("google.protobuf.FieldDescriptorProto", UPB_INTTABLE_INIT(3, 3, 9, 2, &google_protobuf_intentries[8], &google_protobuf_arrays[20], 6, 5), UPB_STRTABLE_INIT(8, 15, 9, 4, &google_protobuf_strentries[36]), 18), - UPB_MSGDEF_INIT("google.protobuf.FieldOptions", UPB_INTTABLE_INIT(2, 3, 9, 2, &google_protobuf_intentries[12], &google_protobuf_arrays[26], 5, 3), UPB_STRTABLE_INIT(5, 7, 9, 3, &google_protobuf_strentries[52]), 11), - UPB_MSGDEF_INIT("google.protobuf.FileDescriptorProto", UPB_INTTABLE_INIT(4, 7, 9, 3, &google_protobuf_intentries[16], &google_protobuf_arrays[31], 6, 5), UPB_STRTABLE_INIT(9, 15, 9, 4, &google_protobuf_strentries[60]), 37), - UPB_MSGDEF_INIT("google.protobuf.FileDescriptorSet", UPB_INTTABLE_INIT(0, 0, 9, 0, NULL, &google_protobuf_arrays[37], 3, 1), UPB_STRTABLE_INIT(1, 3, 9, 2, &google_protobuf_strentries[76]), 5), - UPB_MSGDEF_INIT("google.protobuf.FileOptions", UPB_INTTABLE_INIT(8, 15, 9, 4, &google_protobuf_intentries[24], &google_protobuf_arrays[40], 6, 1), UPB_STRTABLE_INIT(9, 15, 9, 4, &google_protobuf_strentries[80]), 17), - UPB_MSGDEF_INIT("google.protobuf.MessageOptions", UPB_INTTABLE_INIT(1, 1, 9, 1, &google_protobuf_intentries[40], &google_protobuf_arrays[46], 4, 2), UPB_STRTABLE_INIT(3, 3, 9, 2, &google_protobuf_strentries[96]), 7), - UPB_MSGDEF_INIT("google.protobuf.MethodDescriptorProto", UPB_INTTABLE_INIT(0, 0, 9, 0, NULL, &google_protobuf_arrays[50], 5, 4), UPB_STRTABLE_INIT(4, 7, 9, 3, &google_protobuf_strentries[100]), 12), - UPB_MSGDEF_INIT("google.protobuf.MethodOptions", UPB_INTTABLE_INIT(1, 1, 9, 1, &google_protobuf_intentries[42], &google_protobuf_arrays[55], 1, 0), UPB_STRTABLE_INIT(1, 3, 9, 2, &google_protobuf_strentries[108]), 5), - UPB_MSGDEF_INIT("google.protobuf.ServiceDescriptorProto", UPB_INTTABLE_INIT(0, 0, 9, 0, NULL, &google_protobuf_arrays[56], 4, 3), UPB_STRTABLE_INIT(3, 3, 9, 2, &google_protobuf_strentries[112]), 11), - UPB_MSGDEF_INIT("google.protobuf.ServiceOptions", UPB_INTTABLE_INIT(1, 1, 9, 1, &google_protobuf_intentries[44], &google_protobuf_arrays[60], 1, 0), UPB_STRTABLE_INIT(1, 3, 9, 2, &google_protobuf_strentries[116]), 5), - UPB_MSGDEF_INIT("google.protobuf.SourceCodeInfo", UPB_INTTABLE_INIT(0, 0, 9, 0, NULL, &google_protobuf_arrays[61], 3, 1), UPB_STRTABLE_INIT(1, 3, 9, 2, &google_protobuf_strentries[120]), 5), + UPB_MSGDEF_INIT("google.protobuf.EnumDescriptorProto", UPB_INTTABLE_INIT(0, 0, 9, 0, NULL, &google_protobuf_arrays[10], 4, 3), UPB_STRTABLE_INIT(3, 3, 9, 2, &google_protobuf_strentries[20]), 9), + UPB_MSGDEF_INIT("google.protobuf.EnumOptions", UPB_INTTABLE_INIT(1, 1, 9, 1, &google_protobuf_intentries[4], &google_protobuf_arrays[14], 1, 0), UPB_STRTABLE_INIT(1, 3, 9, 2, &google_protobuf_strentries[24]), 4), + UPB_MSGDEF_INIT("google.protobuf.EnumValueDescriptorProto", UPB_INTTABLE_INIT(0, 0, 9, 0, NULL, &google_protobuf_arrays[15], 4, 3), UPB_STRTABLE_INIT(3, 3, 9, 2, &google_protobuf_strentries[28]), 6), + UPB_MSGDEF_INIT("google.protobuf.EnumValueOptions", UPB_INTTABLE_INIT(1, 1, 9, 1, &google_protobuf_intentries[6], &google_protobuf_arrays[19], 1, 0), UPB_STRTABLE_INIT(1, 3, 9, 2, &google_protobuf_strentries[32]), 4), + UPB_MSGDEF_INIT("google.protobuf.FieldDescriptorProto", UPB_INTTABLE_INIT(3, 3, 9, 2, &google_protobuf_intentries[8], &google_protobuf_arrays[20], 6, 5), UPB_STRTABLE_INIT(8, 15, 9, 4, &google_protobuf_strentries[36]), 17), + UPB_MSGDEF_INIT("google.protobuf.FieldOptions", UPB_INTTABLE_INIT(2, 3, 9, 2, &google_protobuf_intentries[12], &google_protobuf_arrays[26], 5, 3), UPB_STRTABLE_INIT(5, 7, 9, 3, &google_protobuf_strentries[52]), 10), + UPB_MSGDEF_INIT("google.protobuf.FileDescriptorProto", UPB_INTTABLE_INIT(4, 7, 9, 3, &google_protobuf_intentries[16], &google_protobuf_arrays[31], 6, 5), UPB_STRTABLE_INIT(9, 15, 9, 4, &google_protobuf_strentries[60]), 31), + UPB_MSGDEF_INIT("google.protobuf.FileDescriptorSet", UPB_INTTABLE_INIT(0, 0, 9, 0, NULL, &google_protobuf_arrays[37], 3, 1), UPB_STRTABLE_INIT(1, 3, 9, 2, &google_protobuf_strentries[76]), 4), + UPB_MSGDEF_INIT("google.protobuf.FileOptions", UPB_INTTABLE_INIT(8, 15, 9, 4, &google_protobuf_intentries[24], &google_protobuf_arrays[40], 6, 1), UPB_STRTABLE_INIT(9, 15, 9, 4, &google_protobuf_strentries[80]), 16), + UPB_MSGDEF_INIT("google.protobuf.MessageOptions", UPB_INTTABLE_INIT(1, 1, 9, 1, &google_protobuf_intentries[40], &google_protobuf_arrays[46], 4, 2), UPB_STRTABLE_INIT(3, 3, 9, 2, &google_protobuf_strentries[96]), 6), + UPB_MSGDEF_INIT("google.protobuf.MethodDescriptorProto", UPB_INTTABLE_INIT(0, 0, 9, 0, NULL, &google_protobuf_arrays[50], 5, 4), UPB_STRTABLE_INIT(4, 7, 9, 3, &google_protobuf_strentries[100]), 11), + UPB_MSGDEF_INIT("google.protobuf.MethodOptions", UPB_INTTABLE_INIT(1, 1, 9, 1, &google_protobuf_intentries[42], &google_protobuf_arrays[55], 1, 0), UPB_STRTABLE_INIT(1, 3, 9, 2, &google_protobuf_strentries[108]), 4), + UPB_MSGDEF_INIT("google.protobuf.ServiceDescriptorProto", UPB_INTTABLE_INIT(0, 0, 9, 0, NULL, &google_protobuf_arrays[56], 4, 3), UPB_STRTABLE_INIT(3, 3, 9, 2, &google_protobuf_strentries[112]), 9), + UPB_MSGDEF_INIT("google.protobuf.ServiceOptions", UPB_INTTABLE_INIT(1, 1, 9, 1, &google_protobuf_intentries[44], &google_protobuf_arrays[60], 1, 0), UPB_STRTABLE_INIT(1, 3, 9, 2, &google_protobuf_strentries[116]), 4), + UPB_MSGDEF_INIT("google.protobuf.SourceCodeInfo", UPB_INTTABLE_INIT(0, 0, 9, 0, NULL, &google_protobuf_arrays[61], 3, 1), UPB_STRTABLE_INIT(1, 3, 9, 2, &google_protobuf_strentries[120]), 4), UPB_MSGDEF_INIT("google.protobuf.SourceCodeInfo.Location", UPB_INTTABLE_INIT(0, 0, 9, 0, NULL, &google_protobuf_arrays[64], 4, 2), UPB_STRTABLE_INIT(2, 3, 9, 2, &google_protobuf_strentries[124]), 6), - UPB_MSGDEF_INIT("google.protobuf.UninterpretedOption", UPB_INTTABLE_INIT(3, 3, 9, 2, &google_protobuf_intentries[46], &google_protobuf_arrays[68], 6, 4), UPB_STRTABLE_INIT(7, 15, 9, 4, &google_protobuf_strentries[128]), 17), + UPB_MSGDEF_INIT("google.protobuf.UninterpretedOption", UPB_INTTABLE_INIT(3, 3, 9, 2, &google_protobuf_intentries[46], &google_protobuf_arrays[68], 6, 4), UPB_STRTABLE_INIT(7, 15, 9, 4, &google_protobuf_strentries[128]), 16), UPB_MSGDEF_INIT("google.protobuf.UninterpretedOption.NamePart", UPB_INTTABLE_INIT(0, 0, 9, 0, NULL, &google_protobuf_arrays[74], 4, 2), UPB_STRTABLE_INIT(2, 3, 9, 2, &google_protobuf_strentries[144]), 4), }; const upb_fielddef google_protobuf_fields[73] = { - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "aggregate_value", 8, &google_protobuf_msgs[18], NULL, 10, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, "cc_generic_services", 16, &google_protobuf_msgs[10], NULL, 3, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, "ctype", 1, &google_protobuf_msgs[7], upb_upcast(&google_protobuf_enums[2]), 0, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "default_value", 7, &google_protobuf_msgs[6], NULL, 15, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_STRING, "dependency", 3, &google_protobuf_msgs[8], NULL, 8, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, "deprecated", 3, &google_protobuf_msgs[7], NULL, 2, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_DOUBLE, "double_value", 6, &google_protobuf_msgs[18], NULL, 13, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, "end", 2, &google_protobuf_msgs[1], NULL, 1, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "enum_type", 4, &google_protobuf_msgs[0], upb_upcast(&google_protobuf_msgs[2]), 15, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "enum_type", 5, &google_protobuf_msgs[8], upb_upcast(&google_protobuf_msgs[2]), 18, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "experimental_map_key", 9, &google_protobuf_msgs[7], NULL, 3, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "extendee", 2, &google_protobuf_msgs[6], NULL, 3, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "extension", 7, &google_protobuf_msgs[8], upb_upcast(&google_protobuf_msgs[6]), 34, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "extension", 6, &google_protobuf_msgs[0], upb_upcast(&google_protobuf_msgs[6]), 25, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "extension_range", 5, &google_protobuf_msgs[0], upb_upcast(&google_protobuf_msgs[1]), 20, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "field", 2, &google_protobuf_msgs[0], upb_upcast(&google_protobuf_msgs[6]), 5, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "file", 1, &google_protobuf_msgs[9], upb_upcast(&google_protobuf_msgs[8]), 2, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "identifier_value", 3, &google_protobuf_msgs[18], NULL, 5, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "input_type", 2, &google_protobuf_msgs[12], NULL, 3, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_REQUIRED, UPB_TYPE_BOOL, "is_extension", 2, &google_protobuf_msgs[19], NULL, 3, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, "java_generate_equals_and_hash", 20, &google_protobuf_msgs[10], NULL, 6, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, "java_generic_services", 17, &google_protobuf_msgs[10], NULL, 4, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, "java_multiple_files", 10, &google_protobuf_msgs[10], NULL, 16, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "java_outer_classname", 8, &google_protobuf_msgs[10], NULL, 12, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "java_package", 1, &google_protobuf_msgs[10], NULL, 0, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, "label", 4, &google_protobuf_msgs[6], upb_upcast(&google_protobuf_enums[0]), 7, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "location", 1, &google_protobuf_msgs[16], upb_upcast(&google_protobuf_msgs[17]), 2, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, "message_set_wire_format", 1, &google_protobuf_msgs[11], NULL, 0, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "message_type", 4, &google_protobuf_msgs[8], upb_upcast(&google_protobuf_msgs[0]), 13, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "method", 2, &google_protobuf_msgs[14], upb_upcast(&google_protobuf_msgs[12]), 5, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "name", 1, &google_protobuf_msgs[12], NULL, 0, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "name", 1, &google_protobuf_msgs[4], NULL, 0, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "name", 1, &google_protobuf_msgs[14], NULL, 0, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "name", 1, &google_protobuf_msgs[2], NULL, 0, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "name", 1, &google_protobuf_msgs[6], NULL, 0, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "name", 2, &google_protobuf_msgs[18], upb_upcast(&google_protobuf_msgs[19]), 2, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "name", 1, &google_protobuf_msgs[0], NULL, 0, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "name", 1, &google_protobuf_msgs[8], NULL, 0, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_REQUIRED, UPB_TYPE_STRING, "name_part", 1, &google_protobuf_msgs[19], NULL, 0, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT64, "negative_int_value", 5, &google_protobuf_msgs[18], NULL, 9, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "nested_type", 3, &google_protobuf_msgs[0], upb_upcast(&google_protobuf_msgs[0]), 10, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, "no_standard_descriptor_accessor", 2, &google_protobuf_msgs[11], NULL, 1, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, "number", 2, &google_protobuf_msgs[4], NULL, 3, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, "number", 3, &google_protobuf_msgs[6], NULL, 6, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, "optimize_for", 9, &google_protobuf_msgs[10], upb_upcast(&google_protobuf_enums[3]), 15, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, "options", 4, &google_protobuf_msgs[12], upb_upcast(&google_protobuf_msgs[13]), 9, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, "options", 3, &google_protobuf_msgs[14], upb_upcast(&google_protobuf_msgs[15]), 8, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, "options", 8, &google_protobuf_msgs[8], upb_upcast(&google_protobuf_msgs[10]), 21, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, "options", 3, &google_protobuf_msgs[2], upb_upcast(&google_protobuf_msgs[3]), 8, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, "options", 7, &google_protobuf_msgs[0], upb_upcast(&google_protobuf_msgs[11]), 28, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, "options", 8, &google_protobuf_msgs[6], upb_upcast(&google_protobuf_msgs[7]), 9, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, "options", 3, &google_protobuf_msgs[4], upb_upcast(&google_protobuf_msgs[5]), 4, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "output_type", 3, &google_protobuf_msgs[12], NULL, 6, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "package", 2, &google_protobuf_msgs[8], NULL, 3, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, "packed", 2, &google_protobuf_msgs[7], NULL, 1, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, "path", 1, &google_protobuf_msgs[17], NULL, 2, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_UINT64, "positive_int_value", 4, &google_protobuf_msgs[18], NULL, 8, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, "py_generic_services", 18, &google_protobuf_msgs[10], NULL, 5, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "service", 6, &google_protobuf_msgs[8], upb_upcast(&google_protobuf_msgs[14]), 29, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, "source_code_info", 9, &google_protobuf_msgs[8], upb_upcast(&google_protobuf_msgs[16]), 24, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, "span", 2, &google_protobuf_msgs[17], NULL, 5, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, "start", 1, &google_protobuf_msgs[1], NULL, 0, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BYTES, "string_value", 7, &google_protobuf_msgs[18], NULL, 14, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, "type", 5, &google_protobuf_msgs[6], upb_upcast(&google_protobuf_enums[1]), 8, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, "type_name", 6, &google_protobuf_msgs[6], NULL, 12, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "uninterpreted_option", 999, &google_protobuf_msgs[15], upb_upcast(&google_protobuf_msgs[18]), 2, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "uninterpreted_option", 999, &google_protobuf_msgs[11], upb_upcast(&google_protobuf_msgs[18]), 4, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "uninterpreted_option", 999, &google_protobuf_msgs[13], upb_upcast(&google_protobuf_msgs[18]), 2, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "uninterpreted_option", 999, &google_protobuf_msgs[10], upb_upcast(&google_protobuf_msgs[18]), 9, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "uninterpreted_option", 999, &google_protobuf_msgs[7], upb_upcast(&google_protobuf_msgs[18]), 8, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "uninterpreted_option", 999, &google_protobuf_msgs[3], upb_upcast(&google_protobuf_msgs[18]), 2, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "uninterpreted_option", 999, &google_protobuf_msgs[5], upb_upcast(&google_protobuf_msgs[18]), 2, UPB_VALUE_INIT_NONE), - UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, "value", 2, &google_protobuf_msgs[2], upb_upcast(&google_protobuf_msgs[4]), 5, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, "aggregate_value", 8, &google_protobuf_msgs[18], NULL, 9, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, "cc_generic_services", 16, &google_protobuf_msgs[10], NULL, 3, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, "ctype", 1, &google_protobuf_msgs[7], upb_upcast(&google_protobuf_enums[2]), 0, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, "default_value", 7, &google_protobuf_msgs[6], NULL, 14, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_STRING, 0, false, "dependency", 3, &google_protobuf_msgs[8], NULL, 8, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, "deprecated", 3, &google_protobuf_msgs[7], NULL, 2, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_DOUBLE, 0, false, "double_value", 6, &google_protobuf_msgs[18], NULL, 12, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, "end", 2, &google_protobuf_msgs[1], NULL, 1, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, "enum_type", 4, &google_protobuf_msgs[0], upb_upcast(&google_protobuf_msgs[2]), 13, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, "enum_type", 5, &google_protobuf_msgs[8], upb_upcast(&google_protobuf_msgs[2]), 17, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, "experimental_map_key", 9, &google_protobuf_msgs[7], NULL, 3, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, "extendee", 2, &google_protobuf_msgs[6], NULL, 3, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, "extension", 7, &google_protobuf_msgs[8], upb_upcast(&google_protobuf_msgs[6]), 29, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, "extension", 6, &google_protobuf_msgs[0], upb_upcast(&google_protobuf_msgs[6]), 21, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, "extension_range", 5, &google_protobuf_msgs[0], upb_upcast(&google_protobuf_msgs[1]), 17, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, "field", 2, &google_protobuf_msgs[0], upb_upcast(&google_protobuf_msgs[6]), 5, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, "file", 1, &google_protobuf_msgs[9], upb_upcast(&google_protobuf_msgs[8]), 2, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, "identifier_value", 3, &google_protobuf_msgs[18], NULL, 4, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, "input_type", 2, &google_protobuf_msgs[12], NULL, 3, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REQUIRED, UPB_TYPE_BOOL, 0, false, "is_extension", 2, &google_protobuf_msgs[19], NULL, 3, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, "java_generate_equals_and_hash", 20, &google_protobuf_msgs[10], NULL, 6, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, "java_generic_services", 17, &google_protobuf_msgs[10], NULL, 4, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, "java_multiple_files", 10, &google_protobuf_msgs[10], NULL, 15, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, "java_outer_classname", 8, &google_protobuf_msgs[10], NULL, 11, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, "java_package", 1, &google_protobuf_msgs[10], NULL, 0, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, "label", 4, &google_protobuf_msgs[6], upb_upcast(&google_protobuf_enums[0]), 7, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, "location", 1, &google_protobuf_msgs[16], upb_upcast(&google_protobuf_msgs[17]), 2, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, "message_set_wire_format", 1, &google_protobuf_msgs[11], NULL, 0, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, "message_type", 4, &google_protobuf_msgs[8], upb_upcast(&google_protobuf_msgs[0]), 13, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, "method", 2, &google_protobuf_msgs[14], upb_upcast(&google_protobuf_msgs[12]), 5, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, "name", 1, &google_protobuf_msgs[12], NULL, 0, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, "name", 1, &google_protobuf_msgs[4], NULL, 0, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, "name", 1, &google_protobuf_msgs[14], NULL, 0, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, "name", 1, &google_protobuf_msgs[2], NULL, 0, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, "name", 1, &google_protobuf_msgs[6], NULL, 0, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, "name", 2, &google_protobuf_msgs[18], upb_upcast(&google_protobuf_msgs[19]), 2, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, "name", 1, &google_protobuf_msgs[0], NULL, 0, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, "name", 1, &google_protobuf_msgs[8], NULL, 0, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REQUIRED, UPB_TYPE_STRING, 0, false, "name_part", 1, &google_protobuf_msgs[19], NULL, 0, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT64, UPB_INTFMT_VARIABLE, false, "negative_int_value", 5, &google_protobuf_msgs[18], NULL, 8, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, "nested_type", 3, &google_protobuf_msgs[0], upb_upcast(&google_protobuf_msgs[0]), 9, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, "no_standard_descriptor_accessor", 2, &google_protobuf_msgs[11], NULL, 1, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, "number", 2, &google_protobuf_msgs[4], NULL, 3, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, "number", 3, &google_protobuf_msgs[6], NULL, 6, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, "optimize_for", 9, &google_protobuf_msgs[10], upb_upcast(&google_protobuf_enums[3]), 14, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, "options", 4, &google_protobuf_msgs[12], upb_upcast(&google_protobuf_msgs[13]), 9, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, "options", 3, &google_protobuf_msgs[14], upb_upcast(&google_protobuf_msgs[15]), 7, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, "options", 8, &google_protobuf_msgs[8], upb_upcast(&google_protobuf_msgs[10]), 19, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, "options", 3, &google_protobuf_msgs[2], upb_upcast(&google_protobuf_msgs[3]), 7, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, "options", 7, &google_protobuf_msgs[0], upb_upcast(&google_protobuf_msgs[11]), 23, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, "options", 8, &google_protobuf_msgs[6], upb_upcast(&google_protobuf_msgs[7]), 9, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, "options", 3, &google_protobuf_msgs[4], upb_upcast(&google_protobuf_msgs[5]), 4, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, "output_type", 3, &google_protobuf_msgs[12], NULL, 6, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, "package", 2, &google_protobuf_msgs[8], NULL, 3, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, "packed", 2, &google_protobuf_msgs[7], NULL, 1, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, "path", 1, &google_protobuf_msgs[17], NULL, 2, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_UINT64, UPB_INTFMT_VARIABLE, false, "positive_int_value", 4, &google_protobuf_msgs[18], NULL, 7, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, "py_generic_services", 18, &google_protobuf_msgs[10], NULL, 5, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, "service", 6, &google_protobuf_msgs[8], upb_upcast(&google_protobuf_msgs[14]), 25, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, "source_code_info", 9, &google_protobuf_msgs[8], upb_upcast(&google_protobuf_msgs[16]), 21, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, "span", 2, &google_protobuf_msgs[17], NULL, 5, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, "start", 1, &google_protobuf_msgs[1], NULL, 0, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BYTES, 0, false, "string_value", 7, &google_protobuf_msgs[18], NULL, 13, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, "type", 5, &google_protobuf_msgs[6], upb_upcast(&google_protobuf_enums[1]), 8, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, "type_name", 6, &google_protobuf_msgs[6], NULL, 11, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, "uninterpreted_option", 999, &google_protobuf_msgs[15], upb_upcast(&google_protobuf_msgs[18]), 2, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, "uninterpreted_option", 999, &google_protobuf_msgs[11], upb_upcast(&google_protobuf_msgs[18]), 4, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, "uninterpreted_option", 999, &google_protobuf_msgs[13], upb_upcast(&google_protobuf_msgs[18]), 2, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, "uninterpreted_option", 999, &google_protobuf_msgs[10], upb_upcast(&google_protobuf_msgs[18]), 9, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, "uninterpreted_option", 999, &google_protobuf_msgs[7], upb_upcast(&google_protobuf_msgs[18]), 8, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, "uninterpreted_option", 999, &google_protobuf_msgs[3], upb_upcast(&google_protobuf_msgs[18]), 2, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, "uninterpreted_option", 999, &google_protobuf_msgs[5], upb_upcast(&google_protobuf_msgs[18]), 2, UPB_VALUE_INIT_NONE), + UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, "value", 2, &google_protobuf_msgs[2], upb_upcast(&google_protobuf_msgs[4]), 5, UPB_VALUE_INIT_NONE), }; const upb_enumdef google_protobuf_enums[4] = { @@ -119,257 +119,257 @@ const upb_enumdef google_protobuf_enums[4] = { const upb_tabent google_protobuf_strentries[192] = { {UPB_TABKEY_STR("extension"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[13]), NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_STR("name"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[36]), NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_STR("field"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[15]), NULL}, {UPB_TABKEY_STR("extension_range"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[14]), NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_STR("nested_type"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[40]), NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_STR("options"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[49]), NULL}, {UPB_TABKEY_STR("enum_type"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[8]), &google_protobuf_strentries[14]}, {UPB_TABKEY_STR("start"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[61]), NULL}, {UPB_TABKEY_STR("end"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[7]), NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_STR("value"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[72]), NULL}, {UPB_TABKEY_STR("options"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[48]), NULL}, {UPB_TABKEY_STR("name"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[33]), &google_protobuf_strentries[22]}, {UPB_TABKEY_STR("uninterpreted_option"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[70]), NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_STR("number"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[42]), NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_STR("options"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[51]), NULL}, {UPB_TABKEY_STR("name"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[31]), &google_protobuf_strentries[30]}, {UPB_TABKEY_STR("uninterpreted_option"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[71]), NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_STR("label"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[25]), NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_STR("name"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[34]), NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_STR("number"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[43]), &google_protobuf_strentries[49]}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_STR("type_name"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[64]), NULL}, {UPB_TABKEY_STR("extendee"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[11]), NULL}, {UPB_TABKEY_STR("type"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[63]), &google_protobuf_strentries[48]}, {UPB_TABKEY_STR("default_value"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[3]), NULL}, {UPB_TABKEY_STR("options"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[50]), NULL}, {UPB_TABKEY_STR("experimental_map_key"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[10]), &google_protobuf_strentries[58]}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_STR("ctype"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[2]), NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_STR("deprecated"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[5]), NULL}, {UPB_TABKEY_STR("uninterpreted_option"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[69]), NULL}, {UPB_TABKEY_STR("packed"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[54]), NULL}, {UPB_TABKEY_STR("extension"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[12]), NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_STR("name"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[37]), NULL}, {UPB_TABKEY_STR("service"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[58]), NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_STR("source_code_info"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[59]), NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_STR("dependency"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[4]), NULL}, {UPB_TABKEY_STR("message_type"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[28]), NULL}, {UPB_TABKEY_STR("package"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[53]), NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_STR("options"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[47]), NULL}, {UPB_TABKEY_STR("enum_type"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[9]), &google_protobuf_strentries[74]}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_STR("file"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[16]), NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_STR("uninterpreted_option"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[68]), NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_STR("cc_generic_services"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[1]), NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_STR("java_multiple_files"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[22]), NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_STR("java_generic_services"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[21]), &google_protobuf_strentries[94]}, {UPB_TABKEY_STR("java_generate_equals_and_hash"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[20]), NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_STR("java_package"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[24]), NULL}, {UPB_TABKEY_STR("optimize_for"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[44]), NULL}, {UPB_TABKEY_STR("py_generic_services"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[57]), NULL}, {UPB_TABKEY_STR("java_outer_classname"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[23]), NULL}, {UPB_TABKEY_STR("message_set_wire_format"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[27]), &google_protobuf_strentries[98]}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_STR("uninterpreted_option"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[66]), NULL}, {UPB_TABKEY_STR("no_standard_descriptor_accessor"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[41]), NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_STR("name"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[30]), NULL}, {UPB_TABKEY_STR("input_type"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[18]), NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_STR("output_type"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[52]), NULL}, {UPB_TABKEY_STR("options"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[45]), NULL}, {UPB_TABKEY_STR("uninterpreted_option"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[67]), NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_STR("options"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[46]), &google_protobuf_strentries[114]}, {UPB_TABKEY_STR("method"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[29]), NULL}, {UPB_TABKEY_STR("name"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[32]), &google_protobuf_strentries[113]}, {UPB_TABKEY_STR("uninterpreted_option"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[65]), NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_STR("location"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[26]), NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_STR("span"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[60]), NULL}, {UPB_TABKEY_STR("path"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[55]), &google_protobuf_strentries[126]}, {UPB_TABKEY_STR("double_value"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[6]), NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_STR("name"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[35]), NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_STR("negative_int_value"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[39]), NULL}, {UPB_TABKEY_STR("aggregate_value"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[0]), NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_STR("positive_int_value"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[56]), NULL}, {UPB_TABKEY_STR("identifier_value"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[17]), NULL}, {UPB_TABKEY_STR("string_value"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[62]), &google_protobuf_strentries[142]}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_STR("is_extension"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[19]), NULL}, {UPB_TABKEY_STR("name_part"), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[38]), NULL}, {UPB_TABKEY_STR("LABEL_REQUIRED"), UPB_VALUE_INIT_INT32(2), &google_protobuf_strentries[150]}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_STR("LABEL_REPEATED"), UPB_VALUE_INIT_INT32(3), NULL}, {UPB_TABKEY_STR("LABEL_OPTIONAL"), UPB_VALUE_INIT_INT32(1), NULL}, {UPB_TABKEY_STR("TYPE_FIXED64"), UPB_VALUE_INIT_INT32(6), NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_STR("TYPE_STRING"), UPB_VALUE_INIT_INT32(9), NULL}, {UPB_TABKEY_STR("TYPE_FLOAT"), UPB_VALUE_INIT_INT32(2), &google_protobuf_strentries[181]}, {UPB_TABKEY_STR("TYPE_DOUBLE"), UPB_VALUE_INIT_INT32(1), NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_STR("TYPE_INT32"), UPB_VALUE_INIT_INT32(5), NULL}, {UPB_TABKEY_STR("TYPE_SFIXED32"), UPB_VALUE_INIT_INT32(15), NULL}, {UPB_TABKEY_STR("TYPE_FIXED32"), UPB_VALUE_INIT_INT32(7), NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_STR("TYPE_MESSAGE"), UPB_VALUE_INIT_INT32(11), &google_protobuf_strentries[182]}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_STR("TYPE_INT64"), UPB_VALUE_INIT_INT32(3), &google_protobuf_strentries[179]}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_STR("TYPE_ENUM"), UPB_VALUE_INIT_INT32(14), NULL}, {UPB_TABKEY_STR("TYPE_UINT32"), UPB_VALUE_INIT_INT32(13), NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_STR("TYPE_UINT64"), UPB_VALUE_INIT_INT32(4), &google_protobuf_strentries[178]}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_STR("TYPE_SFIXED64"), UPB_VALUE_INIT_INT32(16), NULL}, {UPB_TABKEY_STR("TYPE_BYTES"), UPB_VALUE_INIT_INT32(12), NULL}, {UPB_TABKEY_STR("TYPE_SINT64"), UPB_VALUE_INIT_INT32(18), NULL}, {UPB_TABKEY_STR("TYPE_BOOL"), UPB_VALUE_INIT_INT32(8), NULL}, {UPB_TABKEY_STR("TYPE_GROUP"), UPB_VALUE_INIT_INT32(10), NULL}, {UPB_TABKEY_STR("TYPE_SINT32"), UPB_VALUE_INIT_INT32(17), NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_STR("CORD"), UPB_VALUE_INIT_INT32(1), NULL}, {UPB_TABKEY_STR("STRING"), UPB_VALUE_INIT_INT32(0), &google_protobuf_strentries[185]}, {UPB_TABKEY_STR("STRING_PIECE"), UPB_VALUE_INIT_INT32(2), NULL}, {UPB_TABKEY_STR("CODE_SIZE"), UPB_VALUE_INIT_INT32(2), NULL}, {UPB_TABKEY_STR("SPEED"), UPB_VALUE_INIT_INT32(1), &google_protobuf_strentries[191]}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_STR("LITE_RUNTIME"), UPB_VALUE_INIT_INT32(3), NULL}, }; const upb_tabent google_protobuf_intentries[66] = { - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_NUM(6), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[13]), NULL}, {UPB_TABKEY_NUM(7), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[49]), NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_NUM(999), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[70]), NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_NUM(999), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[71]), NULL}, {UPB_TABKEY_NUM(8), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[50]), NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_NUM(6), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[64]), NULL}, {UPB_TABKEY_NUM(7), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[3]), NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_NUM(9), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[10]), NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_NUM(999), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[69]), NULL}, {UPB_TABKEY_NUM(8), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[47]), NULL}, {UPB_TABKEY_NUM(9), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[59]), NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_NUM(6), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[58]), NULL}, {UPB_TABKEY_NUM(7), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[12]), NULL}, {UPB_TABKEY_NUM(16), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[1]), NULL}, {UPB_TABKEY_NUM(17), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[21]), NULL}, {UPB_TABKEY_NUM(18), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[57]), NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_NUM(20), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[20]), NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_NUM(999), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[68]), NULL}, {UPB_TABKEY_NUM(8), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[23]), NULL}, {UPB_TABKEY_NUM(9), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[44]), NULL}, {UPB_TABKEY_NUM(10), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[22]), NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_NUM(999), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[66]), NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_NUM(999), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[67]), NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_NUM(999), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[65]), NULL}, {UPB_TABKEY_NUM(8), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[0]), NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_NUM(6), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[6]), NULL}, {UPB_TABKEY_NUM(7), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[62]), NULL}, {UPB_TABKEY_NUM(16), UPB_VALUE_INIT_CONSTPTR("TYPE_SFIXED64"), NULL}, {UPB_TABKEY_NUM(17), UPB_VALUE_INIT_CONSTPTR("TYPE_SINT32"), NULL}, {UPB_TABKEY_NUM(18), UPB_VALUE_INIT_CONSTPTR("TYPE_SINT64"), NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, - {UPB_TABKEY_NONE, UPB_VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, + {UPB_TABKEY_NONE, UPB__VALUE_INIT_NONE, NULL}, {UPB_TABKEY_NUM(7), UPB_VALUE_INIT_CONSTPTR("TYPE_FIXED32"), NULL}, {UPB_TABKEY_NUM(8), UPB_VALUE_INIT_CONSTPTR("TYPE_BOOL"), NULL}, {UPB_TABKEY_NUM(9), UPB_VALUE_INIT_CONSTPTR("TYPE_STRING"), NULL}, @@ -381,7 +381,7 @@ const upb_tabent google_protobuf_intentries[66] = { {UPB_TABKEY_NUM(15), UPB_VALUE_INIT_CONSTPTR("TYPE_SFIXED32"), NULL}, }; -const upb_value google_protobuf_arrays[97] = { +const _upb_value google_protobuf_arrays[97] = { UPB_ARRAY_EMPTYENT, UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[36]), UPB_VALUE_INIT_CONSTPTR(&google_protobuf_fields[15]), diff --git a/upb/descriptor/descriptor.upb.h b/upb/descriptor/descriptor.upb.h index 4903ae5a55..23c42a00bc 100755 --- a/upb/descriptor/descriptor.upb.h +++ b/upb/descriptor/descriptor.upb.h @@ -83,6 +83,200 @@ extern const upb_enumdef google_protobuf_enums[4]; #define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION &google_protobuf_msgs[18] #define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART &google_protobuf_msgs[19] +// Selector definitions. +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE_ENDSTR 11 +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE_STRING 9 +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE_STARTSTR 10 +#define GOOGLE_PROTOBUF_FILEOPTIONS_CC_GENERIC_SERVICES_BOOL 3 +#define GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_INT32 0 +#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE_ENDSTR 16 +#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE_STRING 14 +#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE_STARTSTR 15 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_DEPENDENCY_ENDSTR 10 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_DEPENDENCY_ENDSEQ 7 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_DEPENDENCY_STRING 8 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_DEPENDENCY_STARTSTR 9 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_DEPENDENCY_STARTSEQ 6 +#define GOOGLE_PROTOBUF_FIELDOPTIONS_DEPRECATED_BOOL 2 +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_DOUBLE_VALUE_DOUBLE 12 +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE_END_INT32 1 +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE_STARTSUBMSG 13 +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE_ENDSEQ 12 +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE_STARTSEQ 11 +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE_ENDSUBMSG 14 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_STARTSUBMSG 17 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_ENDSEQ 16 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_STARTSEQ 15 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_ENDSUBMSG 18 +#define GOOGLE_PROTOBUF_FIELDOPTIONS_EXPERIMENTAL_MAP_KEY_ENDSTR 5 +#define GOOGLE_PROTOBUF_FIELDOPTIONS_EXPERIMENTAL_MAP_KEY_STRING 3 +#define GOOGLE_PROTOBUF_FIELDOPTIONS_EXPERIMENTAL_MAP_KEY_STARTSTR 4 +#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_EXTENDEE_ENDSTR 5 +#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_EXTENDEE_STRING 3 +#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_EXTENDEE_STARTSTR 4 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION_STARTSUBMSG 29 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION_ENDSEQ 28 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION_STARTSEQ 27 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION_ENDSUBMSG 30 +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_STARTSUBMSG 21 +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_ENDSEQ 20 +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_STARTSEQ 19 +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_ENDSUBMSG 22 +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE_STARTSUBMSG 17 +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE_ENDSEQ 16 +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE_STARTSEQ 15 +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE_ENDSUBMSG 18 +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD_STARTSUBMSG 5 +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD_ENDSEQ 4 +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD_STARTSEQ 3 +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD_ENDSUBMSG 6 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_STARTSUBMSG 2 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_ENDSEQ 1 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_STARTSEQ 0 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_ENDSUBMSG 3 +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_IDENTIFIER_VALUE_ENDSTR 6 +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_IDENTIFIER_VALUE_STRING 4 +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_IDENTIFIER_VALUE_STARTSTR 5 +#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_INPUT_TYPE_ENDSTR 5 +#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_INPUT_TYPE_STRING 3 +#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_INPUT_TYPE_STARTSTR 4 +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION_BOOL 3 +#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_GENERATE_EQUALS_AND_HASH_BOOL 6 +#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_GENERIC_SERVICES_BOOL 4 +#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_MULTIPLE_FILES_BOOL 15 +#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME_ENDSTR 13 +#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME_STRING 11 +#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME_STARTSTR 12 +#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_PACKAGE_ENDSTR 2 +#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_PACKAGE_STRING 0 +#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_PACKAGE_STARTSTR 1 +#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_INT32 7 +#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_STARTSUBMSG 2 +#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_ENDSEQ 1 +#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_STARTSEQ 0 +#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_ENDSUBMSG 3 +#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT_BOOL 0 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_STARTSUBMSG 13 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_ENDSEQ 12 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_STARTSEQ 11 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_ENDSUBMSG 14 +#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD_STARTSUBMSG 5 +#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD_ENDSEQ 4 +#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD_STARTSEQ 3 +#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD_ENDSUBMSG 6 +#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_NAME_ENDSTR 2 +#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_NAME_STRING 0 +#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_NAME_STARTSTR 1 +#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME_ENDSTR 2 +#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME_STRING 0 +#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME_STARTSTR 1 +#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME_ENDSTR 2 +#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME_STRING 0 +#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME_STARTSTR 1 +#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME_ENDSTR 2 +#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME_STRING 0 +#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME_STARTSTR 1 +#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME_ENDSTR 2 +#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME_STRING 0 +#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME_STARTSTR 1 +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAME_STARTSUBMSG 2 +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAME_ENDSEQ 1 +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAME_STARTSEQ 0 +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAME_ENDSUBMSG 3 +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME_ENDSTR 2 +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME_STRING 0 +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME_STARTSTR 1 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME_ENDSTR 2 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME_STRING 0 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME_STARTSTR 1 +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART_ENDSTR 2 +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART_STRING 0 +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART_STARTSTR 1 +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NEGATIVE_INT_VALUE_INT64 8 +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE_STARTSUBMSG 9 +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE_ENDSEQ 8 +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE_STARTSEQ 7 +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE_ENDSUBMSG 10 +#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_NO_STANDARD_DESCRIPTOR_ACCESSOR_BOOL 1 +#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER_INT32 3 +#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NUMBER_INT32 6 +#define GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZE_FOR_INT32 14 +#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OPTIONS_STARTSUBMSG 9 +#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OPTIONS_ENDSUBMSG 10 +#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS_STARTSUBMSG 7 +#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS_ENDSUBMSG 8 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_OPTIONS_STARTSUBMSG 19 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_OPTIONS_ENDSUBMSG 20 +#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS_STARTSUBMSG 7 +#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS_ENDSUBMSG 8 +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_OPTIONS_STARTSUBMSG 23 +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_OPTIONS_ENDSUBMSG 24 +#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS_STARTSUBMSG 9 +#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS_ENDSUBMSG 10 +#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS_STARTSUBMSG 4 +#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS_ENDSUBMSG 5 +#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OUTPUT_TYPE_ENDSTR 8 +#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OUTPUT_TYPE_STRING 6 +#define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OUTPUT_TYPE_STARTSTR 7 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PACKAGE_ENDSTR 5 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PACKAGE_STRING 3 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PACKAGE_STARTSTR 4 +#define GOOGLE_PROTOBUF_FIELDOPTIONS_PACKED_BOOL 1 +#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_PATH_ENDSEQ 1 +#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_PATH_INT32 2 +#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_PATH_STARTSEQ 0 +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_POSITIVE_INT_VALUE_UINT64 7 +#define GOOGLE_PROTOBUF_FILEOPTIONS_PY_GENERIC_SERVICES_BOOL 5 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE_STARTSUBMSG 25 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE_ENDSEQ 24 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE_STARTSEQ 23 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE_ENDSUBMSG 26 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SOURCE_CODE_INFO_STARTSUBMSG 21 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SOURCE_CODE_INFO_ENDSUBMSG 22 +#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_SPAN_ENDSEQ 4 +#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_SPAN_INT32 5 +#define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_SPAN_STARTSEQ 3 +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE_START_INT32 0 +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_STRING_VALUE_ENDSTR 15 +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_STRING_VALUE_STRING 13 +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_STRING_VALUE_STARTSTR 14 +#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT32 8 +#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME_ENDSTR 13 +#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME_STRING 11 +#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME_STARTSTR 12 +#define GOOGLE_PROTOBUF_SERVICEOPTIONS_UNINTERPRETED_OPTION_STARTSUBMSG 2 +#define GOOGLE_PROTOBUF_SERVICEOPTIONS_UNINTERPRETED_OPTION_ENDSEQ 1 +#define GOOGLE_PROTOBUF_SERVICEOPTIONS_UNINTERPRETED_OPTION_STARTSEQ 0 +#define GOOGLE_PROTOBUF_SERVICEOPTIONS_UNINTERPRETED_OPTION_ENDSUBMSG 3 +#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION_STARTSUBMSG 4 +#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION_ENDSEQ 3 +#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION_STARTSEQ 2 +#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION_ENDSUBMSG 5 +#define GOOGLE_PROTOBUF_METHODOPTIONS_UNINTERPRETED_OPTION_STARTSUBMSG 2 +#define GOOGLE_PROTOBUF_METHODOPTIONS_UNINTERPRETED_OPTION_ENDSEQ 1 +#define GOOGLE_PROTOBUF_METHODOPTIONS_UNINTERPRETED_OPTION_STARTSEQ 0 +#define GOOGLE_PROTOBUF_METHODOPTIONS_UNINTERPRETED_OPTION_ENDSUBMSG 3 +#define GOOGLE_PROTOBUF_FILEOPTIONS_UNINTERPRETED_OPTION_STARTSUBMSG 9 +#define GOOGLE_PROTOBUF_FILEOPTIONS_UNINTERPRETED_OPTION_ENDSEQ 8 +#define GOOGLE_PROTOBUF_FILEOPTIONS_UNINTERPRETED_OPTION_STARTSEQ 7 +#define GOOGLE_PROTOBUF_FILEOPTIONS_UNINTERPRETED_OPTION_ENDSUBMSG 10 +#define GOOGLE_PROTOBUF_FIELDOPTIONS_UNINTERPRETED_OPTION_STARTSUBMSG 8 +#define GOOGLE_PROTOBUF_FIELDOPTIONS_UNINTERPRETED_OPTION_ENDSEQ 7 +#define GOOGLE_PROTOBUF_FIELDOPTIONS_UNINTERPRETED_OPTION_STARTSEQ 6 +#define GOOGLE_PROTOBUF_FIELDOPTIONS_UNINTERPRETED_OPTION_ENDSUBMSG 9 +#define GOOGLE_PROTOBUF_ENUMOPTIONS_UNINTERPRETED_OPTION_STARTSUBMSG 2 +#define GOOGLE_PROTOBUF_ENUMOPTIONS_UNINTERPRETED_OPTION_ENDSEQ 1 +#define GOOGLE_PROTOBUF_ENUMOPTIONS_UNINTERPRETED_OPTION_STARTSEQ 0 +#define GOOGLE_PROTOBUF_ENUMOPTIONS_UNINTERPRETED_OPTION_ENDSUBMSG 3 +#define GOOGLE_PROTOBUF_ENUMVALUEOPTIONS_UNINTERPRETED_OPTION_STARTSUBMSG 2 +#define GOOGLE_PROTOBUF_ENUMVALUEOPTIONS_UNINTERPRETED_OPTION_ENDSEQ 1 +#define GOOGLE_PROTOBUF_ENUMVALUEOPTIONS_UNINTERPRETED_OPTION_STARTSEQ 0 +#define GOOGLE_PROTOBUF_ENUMVALUEOPTIONS_UNINTERPRETED_OPTION_ENDSUBMSG 3 +#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_STARTSUBMSG 5 +#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_ENDSEQ 4 +#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_STARTSEQ 3 +#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_ENDSUBMSG 6 + #ifdef __cplusplus }; // extern "C" #endif diff --git a/upb/descriptor/reader.c b/upb/descriptor/reader.c index 16f3b24af8..529d5ef689 100644 --- a/upb/descriptor/reader.c +++ b/upb/descriptor/reader.c @@ -16,8 +16,8 @@ #include #include #include -#include "upb/bytestream.h" #include "upb/def.h" +#include "upb/sink.h" #include "upb/descriptor/descriptor.upb.h" static char *upb_strndup(const char *buf, size_t n) { @@ -28,7 +28,8 @@ static char *upb_strndup(const char *buf, size_t n) { return ret; } -// Returns a newly allocated string that joins input strings together, for example: +// Returns a newly allocated string that joins input strings together, for +// example: // join("Foo.Bar", "Baz") -> "Foo.Bar.Baz" // join("", "Baz") -> "Baz" // Caller owns a ref on the returned string. @@ -45,9 +46,21 @@ static char *upb_join(const char *base, const char *name) { } } + +/* upb_deflist ****************************************************************/ + +// upb_deflist is an internal-only dynamic array for storing a growing list of +// upb_defs. +typedef struct { + upb_def **defs; + size_t len; + size_t size; + bool owned; +} upb_deflist; + void upb_deflist_init(upb_deflist *l) { - l->size = 8; - l->defs = malloc(l->size * sizeof(void*)); + l->size = 0; + l->defs = NULL; l->len = 0; l->owned = true; } @@ -56,15 +69,19 @@ void upb_deflist_uninit(upb_deflist *l) { if (l->owned) for(size_t i = 0; i < l->len; i++) upb_def_unref(l->defs[i], &l->defs); - free(l->defs); } -void upb_deflist_push(upb_deflist *l, upb_def *d) { - if(l->len == l->size) { - l->size *= 2; - l->defs = realloc(l->defs, l->size * sizeof(void*)); +bool upb_deflist_push(upb_deflist *l, upb_def *d, upb_pipeline *p) { + if(++l->len >= l->size) { + size_t new_size = UPB_MAX(l->size, 4); + new_size *= 2; + l->defs = upb_pipeline_realloc( + p, l->defs, l->size * sizeof(void*), new_size * sizeof(void*)); + if (!l->defs) return false; + l->size = new_size; } - l->defs[l->len++] = d; + l->defs[l->len - 1] = d; + return true; } void upb_deflist_donaterefs(upb_deflist *l, void *owner) { @@ -74,9 +91,6 @@ void upb_deflist_donaterefs(upb_deflist *l, void *owner) { l->owned = false; } - -/* upb_descreader ************************************************************/ - static upb_def *upb_deflist_last(upb_deflist *l) { return l->defs[l->len-1]; } @@ -91,17 +105,67 @@ static void upb_deflist_qualify(upb_deflist *l, char *str, int32_t start) { } } -void upb_descreader_init(upb_descreader *r) { + +/* upb_descreader ************************************************************/ + +// We keep a stack of all the messages scopes we are currently in, as well as +// the top-level file scope. This is necessary to correctly qualify the +// definitions that are contained inside. "name" tracks the name of the +// message or package (a bare name -- not qualified by any enclosing scopes). +typedef struct { + char *name; + // Index of the first def that is under this scope. For msgdefs, the + // msgdef itself is at start-1. + int start; +} upb_descreader_frame; + +struct upb_descreader { + upb_deflist defs; + upb_descreader_frame stack[UPB_MAX_TYPE_DEPTH]; + int stack_len; + + uint32_t number; + char *name; + bool saw_number; + bool saw_name; + + char *default_string; + + upb_fielddef *f; +}; + +void upb_descreader_init(void *r); +void upb_descreader_uninit(void *r); + +const upb_frametype upb_descreader_frametype = { + sizeof(upb_descreader), + upb_descreader_init, + upb_descreader_uninit, + NULL, +}; + +const upb_frametype *upb_descreader_getframetype() { + return &upb_descreader_frametype; +} + +// Registers handlers that will build the defs. Pass the descreader as the +// closure. +const upb_handlers *upb_descreader_gethandlers(const void *owner); + + +/* upb_descreader ************************************************************/ + +void upb_descreader_init(void *_r) { + upb_descreader *r = _r; upb_deflist_init(&r->defs); - upb_status_init(&r->status); r->stack_len = 0; r->name = NULL; r->default_string = NULL; } -void upb_descreader_uninit(upb_descreader *r) { +void upb_descreader_uninit(void *_r) { + upb_descreader *r = _r; free(r->name); - upb_status_uninit(&r->status); upb_deflist_uninit(&r->defs); free(r->default_string); while (r->stack_len > 0) { @@ -149,37 +213,37 @@ void upb_descreader_setscopename(upb_descreader *r, char *str) { } // Handlers for google.protobuf.FileDescriptorProto. -static bool file_startmsg(void *_r) { - upb_descreader *r = _r; +static bool file_startmsg(const upb_sinkframe *frame) { + upb_descreader *r = upb_sinkframe_userdata(frame); upb_descreader_startcontainer(r); return true; } -static void file_endmsg(void *_r, upb_status *status) { +static void file_endmsg(const upb_sinkframe *frame, upb_status *status) { UPB_UNUSED(status); - upb_descreader *r = _r; + upb_descreader *r = upb_sinkframe_userdata(frame); upb_descreader_endcontainer(r); } -static size_t file_onpackage(void *_r, void *fval, const char *buf, size_t n) { - UPB_UNUSED(fval); - upb_descreader *r = _r; +static size_t file_onpackage(const upb_sinkframe *frame, + const char *buf, size_t n) { + upb_descreader *r = upb_sinkframe_userdata(frame); // XXX: see comment at the top of the file. upb_descreader_setscopename(r, upb_strndup(buf, n)); return n; } // Handlers for google.protobuf.EnumValueDescriptorProto. -static bool enumval_startmsg(void *_r) { - upb_descreader *r = _r; +static bool enumval_startmsg(const upb_sinkframe *frame) { + upb_descreader *r = upb_sinkframe_userdata(frame); r->saw_number = false; r->saw_name = false; return true; } -static size_t enumval_onname(void *_r, void *fval, const char *buf, size_t n) { - UPB_UNUSED(fval); - upb_descreader *r = _r; +static size_t enumval_onname(const upb_sinkframe *frame, + const char *buf, size_t n) { + upb_descreader *r = upb_sinkframe_userdata(frame); // XXX: see comment at the top of the file. free(r->name); r->name = upb_strndup(buf, n); @@ -187,16 +251,15 @@ static size_t enumval_onname(void *_r, void *fval, const char *buf, size_t n) { return n; } -static bool enumval_onnumber(void *_r, void *fval, int32_t val) { - UPB_UNUSED(fval); - upb_descreader *r = _r; +static bool enumval_onnumber(const upb_sinkframe *frame, int32_t val) { + upb_descreader *r = upb_sinkframe_userdata(frame); r->number = val; r->saw_number = true; return true; } -static void enumval_endmsg(void *_r, upb_status *status) { - upb_descreader *r = _r; +static void enumval_endmsg(const upb_sinkframe *frame, upb_status *status) { + upb_descreader *r = upb_sinkframe_userdata(frame); if(!r->saw_number || !r->saw_name) { upb_status_seterrliteral(status, "Enum value missing name or number."); return; @@ -214,16 +277,17 @@ static void enumval_endmsg(void *_r, upb_status *status) { // Handlers for google.protobuf.EnumDescriptorProto. -static bool enum_startmsg(void *_r) { - upb_descreader *r = _r; - upb_deflist_push(&r->defs, upb_upcast(upb_enumdef_new(&r->defs))); +static bool enum_startmsg(const upb_sinkframe *frame) { + upb_descreader *r = upb_sinkframe_userdata(frame); + upb_pipeline *p = upb_sinkframe_pipeline(frame); + upb_deflist_push(&r->defs, upb_upcast(upb_enumdef_new(&r->defs)), p); return true; } -static void enum_endmsg(void *_r, upb_status *status) { - upb_descreader *r = _r; +static void enum_endmsg(const upb_sinkframe *frame, upb_status *status) { + upb_descreader *r = upb_sinkframe_userdata(frame); upb_enumdef *e = upb_downcast_enumdef_mutable(upb_descreader_last(r)); - if (upb_def_fullname(upb_descreader_last((upb_descreader*)_r)) == NULL) { + if (upb_def_fullname(upb_descreader_last(r)) == NULL) { upb_status_seterrliteral(status, "Enum had no name."); return; } @@ -233,9 +297,9 @@ static void enum_endmsg(void *_r, upb_status *status) { } } -static size_t enum_onname(void *_r, void *fval, const char *buf, size_t n) { - UPB_UNUSED(fval); - upb_descreader *r = _r; +static size_t enum_onname(const upb_sinkframe *frame, + const char *buf, size_t n) { + upb_descreader *r = upb_sinkframe_userdata(frame); // XXX: see comment at the top of the file. char *fullname = upb_strndup(buf, n); upb_def_setfullname(upb_descreader_last(r), fullname); @@ -244,8 +308,8 @@ static size_t enum_onname(void *_r, void *fval, const char *buf, size_t n) { } // Handlers for google.protobuf.FieldDescriptorProto -static bool field_startmsg(void *_r) { - upb_descreader *r = _r; +static bool field_startmsg(const upb_sinkframe *frame) { + upb_descreader *r = upb_sinkframe_userdata(frame); r->f = upb_fielddef_new(&r->defs); free(r->default_string); r->default_string = NULL; @@ -258,27 +322,19 @@ static bool parse_default(char *str, upb_value *d, int type) { bool success = true; if (str) { switch(type) { - case UPB_TYPE(INT32): - case UPB_TYPE(SINT32): - case UPB_TYPE(SFIXED32): upb_value_setint32(d, 0); break; - case UPB_TYPE(INT64): - case UPB_TYPE(SINT64): - case UPB_TYPE(SFIXED64): upb_value_setint64(d, 0); break; - case UPB_TYPE(UINT32): - case UPB_TYPE(FIXED32): upb_value_setuint32(d, 0); - case UPB_TYPE(UINT64): - case UPB_TYPE(FIXED64): upb_value_setuint64(d, 0); break; - case UPB_TYPE(DOUBLE): upb_value_setdouble(d, 0); break; - case UPB_TYPE(FLOAT): upb_value_setfloat(d, 0); break; - case UPB_TYPE(BOOL): upb_value_setbool(d, false); break; + case UPB_TYPE_INT32: upb_value_setint32(d, 0); break; + case UPB_TYPE_INT64: upb_value_setint64(d, 0); break; + case UPB_TYPE_UINT32: upb_value_setuint32(d, 0); + case UPB_TYPE_UINT64: upb_value_setuint64(d, 0); break; + case UPB_TYPE_FLOAT: upb_value_setfloat(d, 0); break; + case UPB_TYPE_DOUBLE: upb_value_setdouble(d, 0); break; + case UPB_TYPE_BOOL: upb_value_setbool(d, false); break; default: abort(); } } else { char *end; switch (type) { - case UPB_TYPE(INT32): - case UPB_TYPE(SINT32): - case UPB_TYPE(SFIXED32): { + case UPB_TYPE_INT32: { long val = strtol(str, &end, 0); if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end) success = false; @@ -286,14 +342,11 @@ static bool parse_default(char *str, upb_value *d, int type) { upb_value_setint32(d, val); break; } - case UPB_TYPE(INT64): - case UPB_TYPE(SINT64): - case UPB_TYPE(SFIXED64): + case UPB_TYPE_INT64: upb_value_setint64(d, strtoll(str, &end, 0)); if (errno == ERANGE || *end) success = false; break; - case UPB_TYPE(UINT32): - case UPB_TYPE(FIXED32): { + case UPB_TYPE_UINT32: { unsigned long val = strtoul(str, &end, 0); if (val > UINT32_MAX || errno == ERANGE || *end) success = false; @@ -301,20 +354,19 @@ static bool parse_default(char *str, upb_value *d, int type) { upb_value_setuint32(d, val); break; } - case UPB_TYPE(UINT64): - case UPB_TYPE(FIXED64): + case UPB_TYPE_UINT64: upb_value_setuint64(d, strtoull(str, &end, 0)); if (errno == ERANGE || *end) success = false; break; - case UPB_TYPE(DOUBLE): + case UPB_TYPE_DOUBLE: upb_value_setdouble(d, strtod(str, &end)); if (errno == ERANGE || *end) success = false; break; - case UPB_TYPE(FLOAT): + case UPB_TYPE_FLOAT: upb_value_setfloat(d, strtof(str, &end)); if (errno == ERANGE || *end) success = false; break; - case UPB_TYPE(BOOL): { + case UPB_TYPE_BOOL: { if (strcmp(str, "false") == 0) upb_value_setbool(d, false); else if (strcmp(str, "true") == 0) @@ -328,8 +380,8 @@ static bool parse_default(char *str, upb_value *d, int type) { return success; } -static void field_endmsg(void *_r, upb_status *status) { - upb_descreader *r = _r; +static void field_endmsg(const upb_sinkframe *frame, upb_status *status) { + upb_descreader *r = upb_sinkframe_userdata(frame); upb_fielddef *f = r->f; // TODO: verify that all required fields were present. assert(upb_fielddef_number(f) != 0 && upb_fielddef_name(f) != NULL); @@ -340,7 +392,7 @@ static void field_endmsg(void *_r, upb_status *status) { upb_status_seterrliteral(status, "Submessages cannot have defaults."); return; } - if (upb_fielddef_isstring(f) || upb_fielddef_type(f) == UPB_TYPE(ENUM)) { + if (upb_fielddef_isstring(f) || upb_fielddef_type(f) == UPB_TYPE_ENUM) { upb_fielddef_setdefaultcstr(f, r->default_string); } else { upb_value val; @@ -356,30 +408,27 @@ static void field_endmsg(void *_r, upb_status *status) { } } -static bool field_ontype(void *_r, void *fval, int32_t val) { - UPB_UNUSED(fval); - upb_descreader *r = _r; - upb_fielddef_settype(r->f, val); +static bool field_ontype(const upb_sinkframe *frame, int32_t val) { + upb_descreader *r = upb_sinkframe_userdata(frame); + upb_fielddef_setdescriptortype(r->f, val); return true; } -static bool field_onlabel(void *_r, void *fval, int32_t val) { - UPB_UNUSED(fval); - upb_descreader *r = _r; +static bool field_onlabel(const upb_sinkframe *frame, int32_t val) { + upb_descreader *r = upb_sinkframe_userdata(frame); upb_fielddef_setlabel(r->f, val); return true; } -static bool field_onnumber(void *_r, void *fval, int32_t val) { - UPB_UNUSED(fval); - upb_descreader *r = _r; +static bool field_onnumber(const upb_sinkframe *frame, int32_t val) { + upb_descreader *r = upb_sinkframe_userdata(frame); upb_fielddef_setnumber(r->f, val); return true; } -static size_t field_onname(void *_r, void *fval, const char *buf, size_t n) { - UPB_UNUSED(fval); - upb_descreader *r = _r; +static size_t field_onname(const upb_sinkframe *frame, + const char *buf, size_t n) { + upb_descreader *r = upb_sinkframe_userdata(frame); // XXX: see comment at the top of the file. char *name = upb_strndup(buf, n); upb_fielddef_setname(r->f, name); @@ -387,10 +436,9 @@ static size_t field_onname(void *_r, void *fval, const char *buf, size_t n) { return n; } -static size_t field_ontypename(void *_r, void *fval, const char *buf, - size_t n) { - UPB_UNUSED(fval); - upb_descreader *r = _r; +static size_t field_ontypename(const upb_sinkframe *frame, + const char *buf, size_t n) { + upb_descreader *r = upb_sinkframe_userdata(frame); // XXX: see comment at the top of the file. char *name = upb_strndup(buf, n); upb_fielddef_setsubdefname(r->f, name); @@ -398,10 +446,9 @@ static size_t field_ontypename(void *_r, void *fval, const char *buf, return n; } -static size_t field_ondefaultval(void *_r, void *fval, const char *buf, - size_t n) { - UPB_UNUSED(fval); - upb_descreader *r = _r; +static size_t field_ondefaultval(const upb_sinkframe *frame, + const char *buf, size_t n) { + upb_descreader *r = upb_sinkframe_userdata(frame); // Have to convert from string to the correct type, but we might not know the // type yet, so we save it as a string until the end of the field. // XXX: see comment at the top of the file. @@ -411,15 +458,16 @@ static size_t field_ondefaultval(void *_r, void *fval, const char *buf, } // Handlers for google.protobuf.DescriptorProto (representing a message). -static bool msg_startmsg(void *_r) { - upb_descreader *r = _r; - upb_deflist_push(&r->defs, upb_upcast(upb_msgdef_new(&r->defs))); +static bool msg_startmsg(const upb_sinkframe *frame) { + upb_descreader *r = upb_sinkframe_userdata(frame); + upb_pipeline *p = upb_sinkframe_pipeline(frame); + upb_deflist_push(&r->defs, upb_upcast(upb_msgdef_new(&r->defs)), p); upb_descreader_startcontainer(r); return true; } -static void msg_endmsg(void *_r, upb_status *status) { - upb_descreader *r = _r; +static void msg_endmsg(const upb_sinkframe *frame, upb_status *status) { + upb_descreader *r = upb_sinkframe_userdata(frame); upb_msgdef *m = upb_descreader_top(r); if(!upb_def_fullname(upb_upcast(m))) { upb_status_seterrliteral(status, "Encountered message with no name."); @@ -428,9 +476,9 @@ static void msg_endmsg(void *_r, upb_status *status) { upb_descreader_endcontainer(r); } -static size_t msg_onname(void *_r, void *fval, const char *buf, size_t n) { - UPB_UNUSED(fval); - upb_descreader *r = _r; +static size_t msg_onname(const upb_sinkframe *frame, + const char *buf, size_t n) { + upb_descreader *r = upb_sinkframe_userdata(frame); upb_msgdef *m = upb_descreader_top(r); // XXX: see comment at the top of the file. char *name = upb_strndup(buf, n); @@ -439,18 +487,16 @@ static size_t msg_onname(void *_r, void *fval, const char *buf, size_t n) { return n; } -static bool msg_onendfield(void *_r, void *fval) { - UPB_UNUSED(fval); - upb_descreader *r = _r; +static bool msg_onendfield(const upb_sinkframe *frame) { + upb_descreader *r = upb_sinkframe_userdata(frame); upb_msgdef *m = upb_descreader_top(r); upb_msgdef_addfield(m, r->f, &r->defs); r->f = NULL; return true; } -static bool discardfield(void *_r, void *fval) { - UPB_UNUSED(fval); - upb_descreader *r = _r; +static bool discardfield(const upb_sinkframe *frame) { + upb_descreader *r = upb_sinkframe_userdata(frame); // Discard extension field so we don't leak it. upb_fielddef_unref(r->f, &r->defs); r->f = NULL; @@ -496,7 +542,8 @@ static void reghandlers(void *closure, upb_handlers *h) { } } -const upb_handlers *upb_descreader_newhandlers(const void *owner) { +const upb_handlers *upb_descreader_gethandlers(const void *owner) { return upb_handlers_newfrozen( - GOOGLE_PROTOBUF_FILEDESCRIPTORSET, owner, reghandlers, NULL); + GOOGLE_PROTOBUF_FILEDESCRIPTORSET, &upb_descreader_frametype, + owner, reghandlers, NULL); } diff --git a/upb/descriptor/reader.h b/upb/descriptor/reader.h index 4312682a8a..87f5f86a54 100644 --- a/upb/descriptor/reader.h +++ b/upb/descriptor/reader.h @@ -4,9 +4,8 @@ * Copyright (c) 2011 Google Inc. See LICENSE for details. * Author: Josh Haberman * - * upb_descreader provides a set of sink handlers that will build defs from a - * data source that uses the descriptor.proto schema (like a protobuf binary - * descriptor). + * upb::descriptor::Reader provides a way of building upb::Defs from + * data in descriptor.proto format. */ #ifndef UPB_DESCRIPTOR_H @@ -15,69 +14,62 @@ #include "upb/handlers.h" #ifdef __cplusplus -extern "C" { -#endif +namespace upb { +namespace descriptor { -/* upb_deflist ****************************************************************/ - -// upb_deflist is an internal-only dynamic array for storing a growing list of -// upb_defs. -typedef struct { - upb_def **defs; - size_t len; - size_t size; - bool owned; -} upb_deflist; - -void upb_deflist_init(upb_deflist *l); -void upb_deflist_uninit(upb_deflist *l); -void upb_deflist_push(upb_deflist *l, upb_def *d); - -/* upb_descreader ************************************************************/ - -// We keep a stack of all the messages scopes we are currently in, as well as -// the top-level file scope. This is necessary to correctly qualify the -// definitions that are contained inside. "name" tracks the name of the -// message or package (a bare name -- not qualified by any enclosing scopes). -typedef struct { - char *name; - // Index of the first def that is under this scope. For msgdefs, the - // msgdef itself is at start-1. - int start; -} upb_descreader_frame; - -typedef struct { - upb_deflist defs; - upb_descreader_frame stack[UPB_MAX_TYPE_DEPTH]; - int stack_len; - upb_status status; - - uint32_t number; - char *name; - bool saw_number; - bool saw_name; - - char *default_string; - - upb_fielddef *f; -} upb_descreader; - -void upb_descreader_init(upb_descreader *r); -void upb_descreader_uninit(upb_descreader *r); - -// Registers handlers that will build the defs. Pass the descreader as the -// closure. -const upb_handlers *upb_descreader_newhandlers(const void *owner); +// Frame type that accumulates defs as they are being built from a descriptor +// according to the descriptor.proto schema. +class Reader; // Gets the array of defs that have been parsed and removes them from the // descreader. Ownership of the defs is passed to the caller using the given // owner), but the ownership of the returned array is retained and is // invalidated by any other call into the descreader. The defs will not have // been resolved, and are ready to be added to a symtab. +inline upb::Def** GetDefs(Reader* r, void* owner, int* n); + +// Gets the handlers for reading a FileDescriptorSet, which builds defs and +// accumulates them in a Reader object (which the handlers use as their +// FrameType). +inline const upb::Handlers* GetReaderHandlers(const void* owner); + +} // namespace descriptor +} // namespace upb + +typedef upb::descriptor::Reader upb_descreader; + +extern "C" { +#else +struct upb_descreader; +typedef struct upb_descreader upb_descreader; +#endif + +// C API. +const upb_frametype *upb_descreader_getframetype(); upb_def **upb_descreader_getdefs(upb_descreader *r, void *owner, int *n); +const upb_handlers *upb_descreader_gethandlers(const void *owner); + + +// C++ implementation details. ///////////////////////////////////////////////// #ifdef __cplusplus -} /* extern "C" */ +} // extern "C" + +namespace upb { + +template<> inline const FrameType* GetFrameType() { + return upb_descreader_getframetype(); +} + +namespace descriptor { +inline upb::Def** GetDefs(Reader* r, void* owner, int* n) { + return upb_descreader_getdefs(r, owner, n); +} +inline const upb::Handlers* GetReaderHandlers(const void* owner) { + return upb_descreader_gethandlers(owner); +} +} // namespace descriptor +} // namespace upb #endif -#endif +#endif // UPB_DESCRIPTOR_H diff --git a/upb/google/bridge.cc b/upb/google/bridge.cc index 4d64ab8dd7..f5b664cfe5 100644 --- a/upb/google/bridge.cc +++ b/upb/google/bridge.cc @@ -47,11 +47,15 @@ class me::Defs { const upb::FieldDef* upb_f = i.field(); const goog::FieldDescriptor* proto2_f = d->FindFieldByNumber(upb_f->number()); + if (!proto2_f) { + proto2_f = d->file()->pool()->FindExtensionByNumber(d, upb_f->number()); + } + assert(proto2_f); if (!upb::google::TrySetWriteHandlers(proto2_f, m, upb_f, h) #ifdef UPB_GOOGLE3 && !upb::google::TrySetProto1WriteHandlers(proto2_f, m, upb_f, h) #endif - ) { + ) { // Unsupported reflection class. // // Should we fall back to using the public Reflection interface in this @@ -62,7 +66,7 @@ class me::Defs { } } - static void StaticOnMessage(void *closure, upb::Handlers* handlers) { + static void StaticOnMessage(void* closure, upb::Handlers* handlers) { me::Defs* defs = static_cast(closure); defs->OnMessage(handlers); } @@ -121,51 +125,53 @@ FieldDef* AddFieldDef(const goog::Message& m, const goog::FieldDescriptor* f, upb_f->set_number(f->number()); upb_f->set_name(f->name()); upb_f->set_label(static_cast(f->label())); - upb_f->set_type(weak_prototype ? - UPB_TYPE_MESSAGE : static_cast(f->type())); + upb_f->set_descriptor_type( + weak_prototype ? UPB_DESCRIPTOR_TYPE_MESSAGE : + static_cast(f->type())); if (weak_prototype) { upb_f->set_subdef_name(weak_prototype->GetDescriptor()->full_name()); - } else if (upb_f->IsSubMessage()) { - upb_f->set_subdef_name(f->message_type()->full_name()); - } else if (upb_f->type() == UPB_TYPE(ENUM)) { - // We set the enum default numerically. - upb_f->set_default_value( - MakeValue(static_cast(f->default_value_enum()->number()))); - upb_f->set_subdef_name(f->enum_type()->full_name()); } else { - // Set field default for primitive types. Need to switch on the upb type - // rather than the proto2 type, because upb_f->type() may have been changed - // from BYTES to MESSAGE for a weak field. - switch (upb_types[upb_f->type()].inmemory_type) { - case UPB_CTYPE_INT32: + switch (upb_f->type()) { + case UPB_TYPE_INT32: upb_f->set_default_value(MakeValue(f->default_value_int32())); break; - case UPB_CTYPE_INT64: + case UPB_TYPE_INT64: upb_f->set_default_value( MakeValue(static_cast(f->default_value_int64()))); break; - case UPB_CTYPE_UINT32: + case UPB_TYPE_UINT32: upb_f->set_default_value(MakeValue(f->default_value_uint32())); break; - case UPB_CTYPE_UINT64: + case UPB_TYPE_UINT64: upb_f->set_default_value( MakeValue(static_cast(f->default_value_uint64()))); break; - case UPB_CTYPE_DOUBLE: + case UPB_TYPE_DOUBLE: upb_f->set_default_value(MakeValue(f->default_value_double())); break; - case UPB_CTYPE_FLOAT: + case UPB_TYPE_FLOAT: upb_f->set_default_value(MakeValue(f->default_value_float())); break; - case UPB_CTYPE_BOOL: + case UPB_TYPE_BOOL: upb_f->set_default_value(MakeValue(f->default_value_bool())); break; - case UPB_CTYPE_BYTEREGION: + case UPB_TYPE_STRING: + case UPB_TYPE_BYTES: upb_f->set_default_string(f->default_value_string()); break; + case UPB_TYPE_MESSAGE: + upb_f->set_subdef_name(f->message_type()->full_name()); + break; + case UPB_TYPE_ENUM: + // We set the enum default numerically. + upb_f->set_default_value( + MakeValue(static_cast(f->default_value_enum()->number()))); + upb_f->set_subdef_name(f->enum_type()->full_name()); + break; } } + bool ok = md->AddField(upb_f, &upb_f); UPB_ASSERT_VAR(ok, ok); @@ -174,8 +180,7 @@ FieldDef* AddFieldDef(const goog::Message& m, const goog::FieldDescriptor* f, } else if (f->cpp_type() == goog::FieldDescriptor::CPPTYPE_MESSAGE) { *subm = upb::google::GetFieldPrototype(m, f); #ifdef UPB_GOOGLE3 - if (!*subm) - *subm = upb::google::GetProto1FieldPrototype(m, f); + if (!*subm) *subm = upb::google::GetProto1FieldPrototype(m, f); #endif assert(*subm); } @@ -183,7 +188,7 @@ FieldDef* AddFieldDef(const goog::Message& m, const goog::FieldDescriptor* f, return upb_f; } -upb::EnumDef* NewEnumDef(const goog::EnumDescriptor* desc, void *owner) { +upb::EnumDef* NewEnumDef(const goog::EnumDescriptor* desc, const void* owner) { upb::EnumDef* e = upb::EnumDef::New(owner); e->set_full_name(desc->full_name()); for (int i = 0; i < desc->value_count(); i++) { @@ -194,25 +199,28 @@ upb::EnumDef* NewEnumDef(const goog::EnumDescriptor* desc, void *owner) { return e; } -static upb::MessageDef* NewMessageDef(const goog::Message& m, void *owner, +static upb::MessageDef* NewMessageDef(const goog::Message& m, const void* owner, me::Defs* defs) { upb::MessageDef* md = upb::MessageDef::New(owner); + const goog::Descriptor* d = m.GetDescriptor(); md->set_full_name(m.GetDescriptor()->full_name()); // Must do this before processing submessages to prevent infinite recursion. defs->AddMessage(&m, md); - const goog::Descriptor* d = m.GetDescriptor(); + vector fields; + d->file()->pool()->FindAllExtensions(d, &fields); for (int i = 0; i < d->field_count(); i++) { - const goog::FieldDescriptor* proto2_f = d->field(i); + fields.push_back(d->field(i)); + } + for (int i = 0; i < fields.size(); i++) { + const goog::FieldDescriptor* proto2_f = fields[i]; + assert(proto2_f); #ifdef UPB_GOOGLE3 // Skip lazy fields for now since we can't properly handle them. if (proto2_f->options().lazy()) continue; #endif - // Extensions not supported yet. - if (proto2_f->is_extension()) continue; - const goog::Message* subm_prototype; upb::FieldDef* f = AddFieldDef(m, proto2_f, md, &subm_prototype); @@ -220,7 +228,7 @@ static upb::MessageDef* NewMessageDef(const goog::Message& m, void *owner, upb::Def* subdef = defs->FindSymbol(f->subdef_name()); if (!subdef) { - if (f->type() == UPB_TYPE(ENUM)) { + if (f->type() == UPB_TYPE_ENUM) { subdef = NewEnumDef(proto2_f->enum_type(), owner)->Upcast(); defs->AddSymbol(subdef->full_name(), subdef); } else { @@ -231,11 +239,11 @@ static upb::MessageDef* NewMessageDef(const goog::Message& m, void *owner, } f->set_subdef(subdef); } - return md; } -const upb::Handlers* NewWriteHandlers(const goog::Message& m, void *owner) { +const upb::Handlers* NewWriteHandlers(const goog::Message& m, + const void* owner) { me::Defs defs; const upb::MessageDef* md = NewMessageDef(m, owner, &defs); @@ -245,8 +253,8 @@ const upb::Handlers* NewWriteHandlers(const goog::Message& m, void *owner) { bool success = Def::Freeze(defs_vec, &status); UPB_ASSERT_VAR(success, success); - const upb::Handlers* ret = - upb::Handlers::NewFrozen(md, owner, me::Defs::StaticOnMessage, &defs); + const upb::Handlers* ret = upb::Handlers::NewFrozen( + md, NULL, owner, me::Defs::StaticOnMessage, &defs); // Unref all defs, since they're now ref'd by the handlers. for (int i = 0; i < static_cast(defs_vec.size()); i++) { diff --git a/upb/google/bridge.h b/upb/google/bridge.h index 8a2256f6fc..5091e233fb 100644 --- a/upb/google/bridge.h +++ b/upb/google/bridge.h @@ -66,9 +66,10 @@ namespace google { // // TODO(haberman): Add handler caching functionality so that we don't use // O(n^2) memory in the worst case when incrementally building handlers. -const upb::Handlers* NewWriteHandlers(const proto2::Message& m, void *owner); +const upb::Handlers* NewWriteHandlers(const proto2::Message& m, + const void* owner); const upb::Handlers* NewWriteHandlers(const ::google::protobuf::Message& m, - void *owner); + const void* owner); } // namespace google } // namespace upb diff --git a/upb/google/cord.h b/upb/google/cord.h deleted file mode 100644 index c579c0c748..0000000000 --- a/upb/google/cord.h +++ /dev/null @@ -1,48 +0,0 @@ -// -// upb - a minimalist implementation of protocol buffers. -// -// Copyright (c) 2011-2012 Google Inc. See LICENSE for details. -// Author: Josh Haberman -// -// Functionality for interoperating with Cord. Only needed inside Google. - -#ifndef UPB_GOOGLE_CORD_H -#define UPB_GOOGLE_CORD_H - -#include "strings/cord.h" -#include "upb/bytestream.h" - -namespace upb { - -namespace proto2_bridge_google3 { class FieldAccessor; } -namespace proto2_bridge_opensource { class FieldAccessor; } - -namespace google { - -class P2R_Handlers; - -class CordSupport { - private: - UPB_DISALLOW_POD_OPS(CordSupport); - - inline static void AssignToCord(const upb::ByteRegion* r, Cord* cord) { - // TODO(haberman): ref source data if source is a cord. - cord->Clear(); - uint64_t ofs = r->start_ofs(); - while (ofs < r->end_ofs()) { - size_t len; - const char *buf = r->GetPtr(ofs, &len); - cord->Append(StringPiece(buf, len)); - ofs += len; - } - } - - friend class ::upb::proto2_bridge_google3::FieldAccessor; - friend class ::upb::proto2_bridge_opensource::FieldAccessor; - friend class P2R_Handlers; -}; - -} // namespace google -} // namespace upb - -#endif // UPB_GOOGLE_CORD_H diff --git a/upb/google/proto1.cc b/upb/google/proto1.cc index bb9ff75b23..39677237ad 100644 --- a/upb/google/proto1.cc +++ b/upb/google/proto1.cc @@ -18,23 +18,14 @@ #include "upb/google/proto1.h" -// TODO(haberman): friend upb so that this isn't required. -#define protected public #include "net/proto2/public/repeated_field.h" -#undef private - -// TODO(haberman): friend upb so that this isn't required. -#define private public -#include "net/proto/proto2_reflection.h" -#undef private - #include "net/proto/internal_layout.h" -#include "upb/bytestream.h" +#include "net/proto/proto2_reflection.h" #include "upb/def.h" -#include "upb/google/cord.h" #include "upb/handlers.h" +#include "upb/sink.h" -template static T* GetPointer(void *message, size_t offset) { +template static T* GetPointer(void* message, size_t offset) { return reinterpret_cast(static_cast(message) + offset); } @@ -47,31 +38,32 @@ class P2R_Handlers { // of the FieldDef that are necessary to read/write this field to a // proto2::Message. static bool TrySet(const proto2::FieldDescriptor* proto2_f, - const proto2::Message& m, - const upb::FieldDef* upb_f, upb::Handlers* h) { + const proto2::Message& m, const upb::FieldDef* upb_f, + upb::Handlers* h) { const proto2::Reflection* base_r = m.GetReflection(); // See file comment re: dynamic_cast. const _pi::Proto2Reflection* r = dynamic_cast(base_r); if (!r) return false; - // Extensions not supported yet. - if (proto2_f->is_extension()) return false; + // Extensions don't exist in proto1. + assert(!proto2_f->is_extension()); + +#define PRIMITIVE(name, type_name) \ + case _pi::CREP_REQUIRED_##name: \ + case _pi::CREP_OPTIONAL_##name: \ + case _pi::CREP_REPEATED_##name: \ + SetPrimitiveHandlers(proto2_f, r, upb_f, h); \ + return true; switch (r->GetFieldLayout(proto2_f)->crep) { -#define PRIMITIVE(name, type_name) \ - case _pi::CREP_REQUIRED_ ## name: \ - case _pi::CREP_OPTIONAL_ ## name: \ - case _pi::CREP_REPEATED_ ## name: \ - SetPrimitiveHandlers(proto2_f, r, upb_f, h); return true; - PRIMITIVE(DOUBLE, double); - PRIMITIVE(FLOAT, float); - PRIMITIVE(INT64, int64_t); - PRIMITIVE(UINT64, uint64_t); - PRIMITIVE(INT32, int32_t); - PRIMITIVE(FIXED64, uint64_t); - PRIMITIVE(FIXED32, uint32_t); - PRIMITIVE(BOOL, bool); -#undef PRIMITIVE + PRIMITIVE(DOUBLE, double); + PRIMITIVE(FLOAT, float); + PRIMITIVE(INT64, int64_t); + PRIMITIVE(UINT64, uint64_t); + PRIMITIVE(INT32, int32_t); + PRIMITIVE(FIXED64, uint64_t); + PRIMITIVE(FIXED32, uint32_t); + PRIMITIVE(BOOL, bool); case _pi::CREP_REQUIRED_STRING: case _pi::CREP_OPTIONAL_STRING: case _pi::CREP_REPEATED_STRING: @@ -102,16 +94,19 @@ class P2R_Handlers { case _pi::CREP_OPTIONAL_FOREIGN_WEAK_PROTO2: SetWeakMessageHandlers(proto2_f, m, r, upb_f, h); return true; - default: assert(false); return false; + default: + assert(false); + return false; } } +#undef PRIMITIVE + // If the field "f" in the message "m" is a weak field, returns the prototype // of the submessage (which may be a specific type or may be OpaqueMessage). // Otherwise returns NULL. static const proto2::Message* GetWeakPrototype( - const proto2::Message& m, - const proto2::FieldDescriptor* f) { + const proto2::Message& m, const proto2::FieldDescriptor* f) { // See file comment re: dynamic_cast. const _pi::Proto2Reflection* r = dynamic_cast(m.GetReflection()); @@ -132,8 +127,7 @@ class P2R_Handlers { // the submessage (which may be OpaqueMessage for a weak field that is not // linked in). Otherwise returns NULL. static const proto2::Message* GetFieldPrototype( - const proto2::Message& m, - const proto2::FieldDescriptor* f) { + const proto2::Message& m, const proto2::FieldDescriptor* f) { // See file comment re: dynamic_cast. const proto2::Message* ret = GetWeakPrototype(m, f); if (ret) { @@ -143,7 +137,7 @@ class P2R_Handlers { // factory. assert(f->cpp_type() == proto2::FieldDescriptor::CPPTYPE_MESSAGE); ret = proto2::MessageFactory::generated_factory()->GetPrototype( - f->message_type()); + f->message_type()); assert(ret); return ret; } else { @@ -154,11 +148,9 @@ class P2R_Handlers { private: class FieldOffset { public: - FieldOffset( - const proto2::FieldDescriptor* f, - const _pi::Proto2Reflection* r) - : offset_(GetOffset(f, r)), - is_repeated_(f->is_repeated()) { + FieldOffset(const proto2::FieldDescriptor* f, + const _pi::Proto2Reflection* r) + : offset_(GetOffset(f, r)), is_repeated_(f->is_repeated()) { if (!is_repeated_) { int64_t hasbit = GetHasbit(f, r); hasbyte_ = hasbit / 8; @@ -166,7 +158,7 @@ class P2R_Handlers { } } - template T* GetFieldPointer(void* message) const { + template T* GetFieldPointer(void* message) const { return GetPointer(message, offset_); } @@ -193,7 +185,6 @@ class P2R_Handlers { return selector; } - static int16_t GetHasbit(const proto2::FieldDescriptor* f, const _pi::Proto2Reflection* r) { assert(!f->is_repeated()); @@ -211,60 +202,60 @@ class P2R_Handlers { const proto2::FieldDescriptor* proto2_f, const _pi::Proto2Reflection* r, const upb::FieldDef* f, upb::Handlers* h) { assert(f->IsSequence()); - h->SetStartSequenceHandler( - f, &PushOffset, new FieldOffset(proto2_f, r), - &upb::DeletePointer); + h->SetStartSequenceHandler(f, &PushOffset, new FieldOffset(proto2_f, r), + &upb::DeletePointer); } - static void* PushOffset(void *m, void *fval) { - const FieldOffset* offset = static_cast(fval); - return offset->GetFieldPointer(m); + static void* PushOffset(const upb::SinkFrame* frame) { + const FieldOffset* offset = + static_cast(frame->handler_data()); + return offset->GetFieldPointer(frame->userdata()); } // Primitive Value (numeric, enum, bool) ///////////////////////////////////// - template static void SetPrimitiveHandlers( - const proto2::FieldDescriptor* proto2_f, - const _pi::Proto2Reflection* r, - const upb::FieldDef* f, upb::Handlers* h) { + template + static void SetPrimitiveHandlers(const proto2::FieldDescriptor* proto2_f, + const _pi::Proto2Reflection* r, + const upb::FieldDef* f, upb::Handlers* h) { if (f->IsSequence()) { SetStartSequenceHandler(proto2_f, r, f, h); h->SetValueHandler(f, &Append, NULL, NULL); } else { - upb::SetStoreValueHandler( - f, GetOffset(proto2_f, r), GetHasbit(proto2_f, r), h); + upb::SetStoreValueHandler(f, GetOffset(proto2_f, r), + GetHasbit(proto2_f, r), h); } } template - static bool Append(void *_r, void *fval, T val) { - UPB_UNUSED(fval); + static bool Append(const upb::SinkFrame* frame, T val) { // Proto1's ProtoArray class derives from proto2::RepeatedField. - proto2::RepeatedField* r = static_cast*>(_r); + proto2::RepeatedField* r = + static_cast*>(frame->userdata()); r->Add(val); return true; } // String //////////////////////////////////////////////////////////////////// - static void SetStringHandlers( - const proto2::FieldDescriptor* proto2_f, - const _pi::Proto2Reflection* r, - const upb::FieldDef* f, upb::Handlers* h) { + static void SetStringHandlers(const proto2::FieldDescriptor* proto2_f, + const _pi::Proto2Reflection* r, + const upb::FieldDef* f, upb::Handlers* h) { h->SetStringHandler(f, &OnStringBuf, NULL, NULL); if (f->IsSequence()) { SetStartSequenceHandler(proto2_f, r, f, h); h->SetStartStringHandler(f, &StartRepeatedString, NULL, NULL); } else { - h->SetStartStringHandler( - f, &StartString, new FieldOffset(proto2_f, r), - &upb::DeletePointer); + h->SetStartStringHandler(f, &StartString, new FieldOffset(proto2_f, r), + &upb::DeletePointer); } } - static void* StartString(void *m, void *fval, size_t size_hint) { + static void* StartString(const upb::SinkFrame* frame, size_t size_hint) { UPB_UNUSED(size_hint); - const FieldOffset* info = static_cast(fval); + void* m = frame->userdata(); + const FieldOffset* info = + static_cast(frame->handler_data()); info->SetHasbit(m); string* str = info->GetFieldPointer(m); str->clear(); @@ -272,16 +263,18 @@ class P2R_Handlers { return str; } - static size_t OnStringBuf(void *_s, void *fval, const char *buf, size_t n) { - string* s = static_cast(_s); + static size_t OnStringBuf(const upb::SinkFrame* frame, + const char* buf, + size_t n) { + string* s = static_cast(frame->userdata()); s->append(buf, n); return n; } - static void* StartRepeatedString(void *_r, void *fval, size_t size_hint) { - UPB_UNUSED(fval); + static void* StartRepeatedString(const upb::SinkFrame* frame, + size_t size_hint) { proto2::RepeatedPtrField* r = - static_cast*>(_r); + static_cast*>(frame->userdata()); string* str = r->Add(); // reserve() here appears to hurt performance rather than help. return str; @@ -290,22 +283,24 @@ class P2R_Handlers { // Out-of-line string //////////////////////////////////////////////////////// static void SetOutOfLineStringHandlers( - const proto2::FieldDescriptor* proto2_f, - const _pi::Proto2Reflection* r, + const proto2::FieldDescriptor* proto2_f, const _pi::Proto2Reflection* r, const upb::FieldDef* f, upb::Handlers* h) { // This type is only used for non-repeated string fields. assert(!f->IsSequence()); - h->SetStartStringHandler( - f, &StartOutOfLineString, new FieldOffset(proto2_f, r), - &upb::DeletePointer); + h->SetStartStringHandler(f, &StartOutOfLineString, + new FieldOffset(proto2_f, r), + &upb::DeletePointer); h->SetStringHandler(f, &OnStringBuf, NULL, NULL); } - static void* StartOutOfLineString(void *m, void *fval, size_t size_hint) { - const FieldOffset* info = static_cast(fval); + static void* StartOutOfLineString(const upb::SinkFrame* frame, + size_t size_hint) { + const FieldOffset* info = + static_cast(frame->handler_data()); + void* m = frame->userdata(); info->SetHasbit(m); - string **str = info->GetFieldPointer(m); - if (*str == &::ProtocolMessage::___empty_internal_proto_string_) + string** str = info->GetFieldPointer(m); + if (*str == &::proto2::internal::GetEmptyString()) *str = new string(); (*str)->clear(); // reserve() here appears to hurt performance rather than help. @@ -314,43 +309,43 @@ class P2R_Handlers { // Cord ////////////////////////////////////////////////////////////////////// - static void SetCordHandlers( - const proto2::FieldDescriptor* proto2_f, - const _pi::Proto2Reflection* r, - const upb::FieldDef* f, upb::Handlers* h) { + static void SetCordHandlers(const proto2::FieldDescriptor* proto2_f, + const _pi::Proto2Reflection* r, + const upb::FieldDef* f, upb::Handlers* h) { h->SetStringHandler(f, &OnCordBuf, NULL, NULL); if (f->IsSequence()) { SetStartSequenceHandler(proto2_f, r, f, h); h->SetStartStringHandler(f, &StartRepeatedCord, NULL, NULL); } else { - h->SetStartStringHandler( - f, &StartCord, new FieldOffset(proto2_f, r), - &upb::DeletePointer); + h->SetStartStringHandler(f, &StartCord, new FieldOffset(proto2_f, r), + &upb::DeletePointer); } } - static void* StartCord(void *m, void *fval, size_t size_hint) { + static void* StartCord(const upb::SinkFrame* frame, size_t size_hint) { UPB_UNUSED(size_hint); - UPB_UNUSED(fval); - const FieldOffset* offset = static_cast(fval); + void* m = frame->userdata(); + const FieldOffset* offset = + static_cast(frame->handler_data()); offset->SetHasbit(m); Cord* field = offset->GetFieldPointer(m); field->Clear(); return field; } - static size_t OnCordBuf(void *_c, void *fval, const char *buf, size_t n) { - UPB_UNUSED(fval); - Cord* c = static_cast(_c); + static size_t OnCordBuf(const upb::SinkFrame* frame, + const char* buf, + size_t n) { + Cord* c = static_cast(frame->userdata()); c->Append(StringPiece(buf, n)); return true; } - static void* StartRepeatedCord(void *_r, void *fval, size_t size_hint) { + static void* StartRepeatedCord(const upb::SinkFrame* frame, + size_t size_hint) { UPB_UNUSED(size_hint); - UPB_UNUSED(fval); proto2::RepeatedField* r = - static_cast*>(_r); + static_cast*>(frame->userdata()); return r->Add(); } @@ -358,14 +353,12 @@ class P2R_Handlers { class SubMessageHandlerData : public FieldOffset { public: - SubMessageHandlerData( - const proto2::Message& prototype, - const proto2::FieldDescriptor* f, - const _pi::Proto2Reflection* r) + SubMessageHandlerData(const proto2::Message& prototype, + const proto2::FieldDescriptor* f, + const _pi::Proto2Reflection* r) : FieldOffset(f, r) { prototype_ = GetWeakPrototype(prototype, f); - if (!prototype_) - prototype_ = GetFieldPrototype(prototype, f); + if (!prototype_) prototype_ = GetFieldPrototype(prototype, f); } const proto2::Message* prototype() const { return prototype_; } @@ -375,43 +368,40 @@ class P2R_Handlers { }; static void SetStartSubMessageHandler( - const proto2::FieldDescriptor* proto2_f, - const proto2::Message& m, - const _pi::Proto2Reflection* r, - upb::Handlers::StartFieldHandler* handler, + const proto2::FieldDescriptor* proto2_f, const proto2::Message& m, + const _pi::Proto2Reflection* r, upb::Handlers::StartFieldHandler* handler, const upb::FieldDef* f, upb::Handlers* h) { - h->SetStartSubMessageHandler( - f, handler, - new SubMessageHandlerData(m, proto2_f, r), - &upb::DeletePointer); + h->SetStartSubMessageHandler(f, handler, + new SubMessageHandlerData(m, proto2_f, r), + &upb::DeletePointer); } static void SetRequiredMessageHandlers( - const proto2::FieldDescriptor* proto2_f, - const proto2::Message& m, - const _pi::Proto2Reflection* r, - const upb::FieldDef* f, upb::Handlers* h) { + const proto2::FieldDescriptor* proto2_f, const proto2::Message& m, + const _pi::Proto2Reflection* r, const upb::FieldDef* f, + upb::Handlers* h) { if (f->IsSequence()) { SetStartSequenceHandler(proto2_f, r, f, h); SetStartSubMessageHandler(proto2_f, m, r, &StartRepeatedSubMessage, f, h); } else { - h->SetStartSubMessageHandler( - f, &StartRequiredSubMessage, new FieldOffset(proto2_f, r), - &upb::DeletePointer); + h->SetStartSubMessageHandler(f, &StartRequiredSubMessage, + new FieldOffset(proto2_f, r), + &upb::DeletePointer); } } - static void* StartRequiredSubMessage(void *m, void *fval) { - const FieldOffset* offset = static_cast(fval); + static void* StartRequiredSubMessage(const upb::SinkFrame* frame) { + const FieldOffset* offset = + static_cast(frame->handler_data()); + void* m = frame->userdata(); offset->SetHasbit(m); return offset->GetFieldPointer(m); } - static void SetMessageHandlers( - const proto2::FieldDescriptor* proto2_f, - const proto2::Message& m, - const _pi::Proto2Reflection* r, - const upb::FieldDef* f, upb::Handlers* h) { + static void SetMessageHandlers(const proto2::FieldDescriptor* proto2_f, + const proto2::Message& m, + const _pi::Proto2Reflection* r, + const upb::FieldDef* f, upb::Handlers* h) { if (f->IsSequence()) { SetStartSequenceHandler(proto2_f, r, f, h); SetStartSubMessageHandler(proto2_f, m, r, &StartRepeatedSubMessage, f, h); @@ -420,11 +410,10 @@ class P2R_Handlers { } } - static void SetWeakMessageHandlers( - const proto2::FieldDescriptor* proto2_f, - const proto2::Message& m, - const _pi::Proto2Reflection* r, - const upb::FieldDef* f, upb::Handlers* h) { + static void SetWeakMessageHandlers(const proto2::FieldDescriptor* proto2_f, + const proto2::Message& m, + const _pi::Proto2Reflection* r, + const upb::FieldDef* f, upb::Handlers* h) { if (f->IsSequence()) { SetStartSequenceHandler(proto2_f, r, f, h); SetStartSubMessageHandler(proto2_f, m, r, &StartRepeatedSubMessage, f, h); @@ -433,20 +422,22 @@ class P2R_Handlers { } } - static void* StartSubMessage(void *m, void *fval) { + static void* StartSubMessage(const upb::SinkFrame* frame) { + void* m = frame->userdata(); const SubMessageHandlerData* info = - static_cast(fval); + static_cast(frame->handler_data()); info->SetHasbit(m); - proto2::Message **subm = info->GetFieldPointer(m); + proto2::Message** subm = info->GetFieldPointer(m); if (*subm == info->prototype()) *subm = (*subm)->New(); return *subm; } - static void* StartWeakSubMessage(void *m, void *fval) { + static void* StartWeakSubMessage(const upb::SinkFrame* frame) { + void* m = frame->userdata(); const SubMessageHandlerData* info = - static_cast(fval); + static_cast(frame->handler_data()); info->SetHasbit(m); - proto2::Message **subm = info->GetFieldPointer(m); + proto2::Message** subm = info->GetFieldPointer(m); if (*subm == NULL) { *subm = info->prototype()->New(); } @@ -459,19 +450,19 @@ class P2R_Handlers { // AddAllocated() calls this, but only if other objects are sitting // around waiting for reuse, which we will not do. static void Delete(Type* t) { - (void)t; + UPB_UNUSED(t); assert(false); } }; // Closure is a RepeatedPtrField*, but we access it through // its base class RepeatedPtrFieldBase*. - static void* StartRepeatedSubMessage(void* _r, void *fval) { + static void* StartRepeatedSubMessage(const upb::SinkFrame* frame) { const SubMessageHandlerData* info = - static_cast(fval); - proto2::internal::RepeatedPtrFieldBase *r = - static_cast(_r); - void *submsg = r->AddFromCleared(); + static_cast(frame->handler_data()); + proto2::internal::RepeatedPtrFieldBase* r = + static_cast(frame->userdata()); + void* submsg = r->AddFromCleared(); if (!submsg) { submsg = info->prototype()->New(); r->AddAllocated(submsg); @@ -487,14 +478,12 @@ bool TrySetProto1WriteHandlers(const proto2::FieldDescriptor* proto2_f, } const proto2::Message* GetProto1WeakPrototype( - const proto2::Message& m, - const proto2::FieldDescriptor* f) { + const proto2::Message& m, const proto2::FieldDescriptor* f) { return P2R_Handlers::GetWeakPrototype(m, f); } const proto2::Message* GetProto1FieldPrototype( - const proto2::Message& m, - const proto2::FieldDescriptor* f) { + const proto2::Message& m, const proto2::FieldDescriptor* f) { return P2R_Handlers::GetFieldPrototype(m, f); } diff --git a/upb/google/proto1.h b/upb/google/proto1.h index f35fb1371e..eb550acf6f 100644 --- a/upb/google/proto1.h +++ b/upb/google/proto1.h @@ -38,14 +38,12 @@ bool TrySetProto1WriteHandlers(const proto2::FieldDescriptor* proto2_f, // Returns a prototype for the given field in "m", if it is weak. The returned // message could be the linked-in message type or OpaqueMessage, if the weak // message is *not* linked in. Otherwise returns NULL. -const proto2::Message* GetProto1WeakPrototype( - const proto2::Message& m, - const proto2::FieldDescriptor* f); +const proto2::Message* GetProto1WeakPrototype(const proto2::Message& m, + const proto2::FieldDescriptor* f); // Returns a prototype for the given non-weak field in "m". const proto2::Message* GetProto1FieldPrototype( - const proto2::Message& m, - const proto2::FieldDescriptor* f); + const proto2::Message& m, const proto2::FieldDescriptor* f); } // namespace google } // namespace upb diff --git a/upb/google/proto2.cc b/upb/google/proto2.cc index 264530c59e..d7ad919aff 100644 --- a/upb/google/proto2.cc +++ b/upb/google/proto2.cc @@ -15,42 +15,36 @@ #include "upb/google/proto2.h" -#include "upb/google/proto1.h" -#include "upb/bytestream.h" #include "upb/def.h" +#include "upb/google/proto1.h" #include "upb/handlers.h" +#include "upb/sink.h" namespace upb { -namespace proto2_bridge_google3 { class FieldAccessor; } -namespace proto2_bridge_opensource { class FieldAccessor; } +namespace google_google3 { class GMR_Handlers; } +namespace google_opensource { class GMR_Handlers; } } // namespace upb // BEGIN DOUBLE COMPILATION TRICKERY. ////////////////////////////////////////// #ifdef UPB_GOOGLE3 -// TODO(haberman): friend upb so that this isn't required. -#define protected public -#include "net/proto2/public/repeated_field.h" -#undef protected - -#define private public -#include "net/proto2/public/generated_message_reflection.h" -#undef private - #include "net/proto2/proto/descriptor.pb.h" #include "net/proto2/public/descriptor.h" +#include "net/proto2/public/extension_set.h" +#include "net/proto2/public/generated_message_reflection.h" #include "net/proto2/public/lazy_field.h" #include "net/proto2/public/message.h" +#include "net/proto2/public/repeated_field.h" #include "net/proto2/public/string_piece_field_support.h" -#include "upb/google/cord.h" namespace goog = ::proto2; -namespace me = ::upb::proto2_bridge_google3; +namespace me = ::upb::google_google3; #else -// TODO(haberman): friend upb so that this isn't required. +// TODO(haberman): remove these once new versions of protobuf that "friend" +// upb are pervasive in the wild. #define protected public #include "google/protobuf/repeated_field.h" #undef protected @@ -61,10 +55,16 @@ namespace me = ::upb::proto2_bridge_google3; #include "google/protobuf/descriptor.h" #include "google/protobuf/descriptor.pb.h" +#include "google/protobuf/extension_set.h" #include "google/protobuf/message.h" namespace goog = ::google::protobuf; -namespace me = ::upb::proto2_bridge_opensource; +namespace me = ::upb::google_opensource; + +using goog::int32; +using goog::int64; +using goog::uint32; +using goog::uint64; #endif // ifdef UPB_GOOGLE3 @@ -74,9 +74,13 @@ namespace me = ::upb::proto2_bridge_opensource; // an enum value for STRING. #define UPB_CTYPE_STRING 0 -template static T* GetPointer(void *message, size_t offset) { +template static T* GetPointer(void* message, size_t offset) { return reinterpret_cast(static_cast(message) + offset); } +template +static const T* GetConstPointer(const void* message, size_t offset) { + return reinterpret_cast(static_cast(message) + offset); +} // This class contains handlers that can write into a proto2 class whose // reflection class is GeneratedMessageReflection. (Despite the name, even @@ -86,57 +90,60 @@ template static T* GetPointer(void *message, size_t offset) { // internal interfaces that are not guaranteed to be stable. This class will // need to be updated if any non-backward-compatible changes are made to // GeneratedMessageReflection. -// -// TODO(haberman): change class name? In retrospect, "FieldAccessor" isn't the -// best (something more specific like GeneratedMessageReflectionHandlers or -// GMR_Handlers would be better) but we're depending on a "friend" declaration -// in proto2 that already specifies "FieldAccessor." No versions of proto2 have -// been released that include the "friend FieldAccessor" declaration, so there's -// still time to change this. On the other hand, perhaps it's simpler to just -// rely on "#define private public" since it may be a long time before new -// versions of proto2 open source are pervasive enough that we can remove this -// anyway. -class me::FieldAccessor { +class me::GMR_Handlers { public: // Returns true if we were able to set an accessor and any other properties // of the FieldDef that are necessary to read/write this field to a // proto2::Message. static bool TrySet(const goog::FieldDescriptor* proto2_f, - const goog::Message& m, - const upb::FieldDef* upb_f, upb::Handlers* h) { + const goog::Message& m, const upb::FieldDef* upb_f, + upb::Handlers* h) { const goog::Reflection* base_r = m.GetReflection(); // See file comment re: dynamic_cast. const goog::internal::GeneratedMessageReflection* r = dynamic_cast(base_r); if (!r) return false; - // Extensions not supported yet. - if (proto2_f->is_extension()) return false; + +#define PRIMITIVE_TYPE(cpptype, cident) \ +case goog::FieldDescriptor::cpptype: \ + SetPrimitiveHandlers(proto2_f, r, upb_f, h); \ + return true; switch (proto2_f->cpp_type()) { -#define PRIMITIVE_TYPE(cpptype, cident) \ - case goog::FieldDescriptor::cpptype: \ - SetPrimitiveHandlers(proto2_f, r, upb_f, h); return true; - PRIMITIVE_TYPE(CPPTYPE_INT32, int32_t); - PRIMITIVE_TYPE(CPPTYPE_INT64, int64_t); - PRIMITIVE_TYPE(CPPTYPE_UINT32, uint32_t); - PRIMITIVE_TYPE(CPPTYPE_UINT64, uint64_t); + PRIMITIVE_TYPE(CPPTYPE_INT32, int32); + PRIMITIVE_TYPE(CPPTYPE_INT64, int64); + PRIMITIVE_TYPE(CPPTYPE_UINT32, uint32); + PRIMITIVE_TYPE(CPPTYPE_UINT64, uint64); PRIMITIVE_TYPE(CPPTYPE_DOUBLE, double); - PRIMITIVE_TYPE(CPPTYPE_FLOAT, float); - PRIMITIVE_TYPE(CPPTYPE_BOOL, bool); -#undef PRIMITIVE_TYPE + PRIMITIVE_TYPE(CPPTYPE_FLOAT, float); + PRIMITIVE_TYPE(CPPTYPE_BOOL, bool); case goog::FieldDescriptor::CPPTYPE_ENUM: - SetEnumHandlers(proto2_f, r, upb_f, h); + if (proto2_f->is_extension()) { + SetEnumExtensionHandlers(proto2_f, r, upb_f, h); + } else { + SetEnumHandlers(proto2_f, r, upb_f, h); + } return true; case goog::FieldDescriptor::CPPTYPE_STRING: { + if (proto2_f->is_extension()) { +#ifdef UPB_GOOGLE3 + SetStringExtensionHandlers(proto2_f, r, upb_f, h); +#else + SetStringExtensionHandlers(proto2_f, r, upb_f, h); +#endif + return true; + } + // Old versions of the open-source protobuf release erroneously default // to Cord even though that has never been supported in the open-source // release. int32_t ctype = proto2_f->options().has_ctype() ? - proto2_f->options().ctype() : UPB_CTYPE_STRING; + proto2_f->options().ctype() + : UPB_CTYPE_STRING; switch (ctype) { #ifdef UPB_GOOGLE3 case goog::FieldOptions::STRING: - SetStringHandlers(proto2_f, m, r, upb_f, h); + SetStringHandlers(proto2_f, r, upb_f, h); return true; case goog::FieldOptions::CORD: SetCordHandlers(proto2_f, r, upb_f, h); @@ -146,7 +153,7 @@ class me::FieldAccessor { return true; #else case UPB_CTYPE_STRING: - SetStringHandlers(proto2_f, m, r, upb_f, h); + SetStringHandlers(proto2_f, r, upb_f, h); return true; #endif default: @@ -156,23 +163,25 @@ class me::FieldAccessor { case goog::FieldDescriptor::CPPTYPE_MESSAGE: #ifdef UPB_GOOGLE3 if (proto2_f->options().lazy()) { + assert(false); return false; // Not yet implemented. - } else { - SetSubMessageHandlers(proto2_f, m, r, upb_f, h); + } +#endif + if (proto2_f->is_extension()) { + SetSubMessageExtensionHandlers(proto2_f, m, r, upb_f, h); return true; } -#else SetSubMessageHandlers(proto2_f, m, r, upb_f, h); return true; -#endif default: return false; } } +#undef PRIMITIVE_TYPE + static const goog::Message* GetFieldPrototype( - const goog::Message& m, - const goog::FieldDescriptor* f) { + const goog::Message& m, const goog::FieldDescriptor* f) { // We assume that all submessages (and extensions) will be constructed // using the same MessageFactory as this message. This doesn't cover the // case of CodedInputStream::SetExtensionRegistry(). @@ -209,11 +218,9 @@ class me::FieldAccessor { class FieldOffset { public: - FieldOffset( - const goog::FieldDescriptor* f, - const goog::internal::GeneratedMessageReflection* r) - : offset_(GetOffset(f, r)), - is_repeated_(f->is_repeated()) { + FieldOffset(const goog::FieldDescriptor* f, + const goog::internal::GeneratedMessageReflection* r) + : offset_(GetOffset(f, r)), is_repeated_(f->is_repeated()) { if (!is_repeated_) { int64_t hasbit = GetHasbit(f, r); hasbyte_ = hasbit / 8; @@ -221,7 +228,7 @@ class me::FieldAccessor { } } - template T* GetFieldPointer(void *message) const { + template T* GetFieldPointer(void* message) const { return GetPointer(message, offset_); } @@ -240,6 +247,29 @@ class me::FieldAccessor { int8_t mask_; }; + class ExtensionFieldData { + public: + ExtensionFieldData( + const goog::FieldDescriptor* proto2_f, + const goog::internal::GeneratedMessageReflection* r) + : offset_(r->extensions_offset_), + number_(proto2_f->number()), + type_(proto2_f->type()) { + } + + int number() const { return number_; } + goog::internal::FieldType type() const { return type_; } + + goog::internal::ExtensionSet* GetExtensionSet(goog::Message* m) const { + return GetPointer(m, offset_); + } + + private: + const size_t offset_; + int number_; + goog::internal::FieldType type_; + }; + // StartSequence ///////////////////////////////////////////////////////////// static void SetStartSequenceHandler( @@ -247,14 +277,14 @@ class me::FieldAccessor { const goog::internal::GeneratedMessageReflection* r, const upb::FieldDef* f, upb::Handlers* h) { assert(f->IsSequence()); - h->SetStartSequenceHandler( - f, &PushOffset, new FieldOffset(proto2_f, r), - &upb::DeletePointer); + h->SetStartSequenceHandler(f, &PushOffset, new FieldOffset(proto2_f, r), + &upb::DeletePointer); } - static void* PushOffset(void *m, void *fval) { - const FieldOffset* offset = static_cast(fval); - return offset->GetFieldPointer(m); + static void* PushOffset(const upb::SinkFrame* frame) { + const FieldOffset* offset = + static_cast(frame->handler_data()); + return offset->GetFieldPointer(frame->userdata()); } // Primitive Value (numeric, bool) /////////////////////////////////////////// @@ -262,38 +292,68 @@ class me::FieldAccessor { template static void SetPrimitiveHandlers( const goog::FieldDescriptor* proto2_f, const goog::internal::GeneratedMessageReflection* r, - const upb::FieldDef* f, - upb::Handlers* h) { - if (f->IsSequence()) { - SetStartSequenceHandler(proto2_f, r, f, h); - h->SetValueHandler(f, &AppendPrimitive, NULL, NULL); + const upb::FieldDef* f, upb::Handlers* h) { + if (proto2_f->is_extension()) { + ExtensionFieldData* data = new ExtensionFieldData(proto2_f, r); + upb::Handlers::Free* free = &upb::DeletePointer; + if (f->IsSequence()) { + h->SetValueHandler(f, &AppendPrimitiveExtension, data, free); + } else { + h->SetValueHandler(f, &SetPrimitiveExtension, data, free); + } } else { - upb::SetStoreValueHandler( - f, GetOffset(proto2_f, r), GetHasbit(proto2_f, r), h); + if (f->IsSequence()) { + SetStartSequenceHandler(proto2_f, r, f, h); + h->SetValueHandler(f, &AppendPrimitive, NULL, NULL); + } else { + upb::SetStoreValueHandler(f, GetOffset(proto2_f, r), + GetHasbit(proto2_f, r), h); + } } } template - static bool AppendPrimitive(void *_r, void *fval, T val) { - UPB_UNUSED(fval); - goog::RepeatedField* r = static_cast*>(_r); + static bool AppendPrimitive(const upb::SinkFrame* frame, T val) { + goog::RepeatedField* r = + static_cast*>(frame->userdata()); r->Add(val); return true; } + template + static bool AppendPrimitiveExtension(const upb::SinkFrame* frame, T val) { + goog::Message* m = frame->GetUserdata(); + ExtensionFieldData* data = + static_cast(frame->handler_data()); + goog::internal::ExtensionSet* set = data->GetExtensionSet(m); + // TODO(haberman): give an accurate value for "packed" + goog::internal::RepeatedPrimitiveTypeTraits::Add( + data->number(), data->type(), true, val, set); + return true; + } + + template + static bool SetPrimitiveExtension(const upb::SinkFrame* frame, T val) { + goog::Message* m = frame->GetUserdata(); + ExtensionFieldData* data = + static_cast(frame->handler_data()); + goog::internal::ExtensionSet* set = data->GetExtensionSet(m); + goog::internal::PrimitiveTypeTraits::Set(data->number(), data->type(), + val, set); + return true; + } + // Enum ////////////////////////////////////////////////////////////////////// class EnumHandlerData : public FieldOffset { public: - EnumHandlerData( - const goog::FieldDescriptor* proto2_f, - const goog::internal::GeneratedMessageReflection* r, - const upb::FieldDef* f) + EnumHandlerData(const goog::FieldDescriptor* proto2_f, + const goog::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f) : FieldOffset(proto2_f, r), field_number_(f->number()), unknown_fields_offset_(r->unknown_fields_offset_), - enum_(upb_downcast_enumdef(f->subdef())) { - } + enum_(upb_downcast_enumdef(f->subdef())) {} bool IsValidValue(int32_t val) const { return enum_->FindValueByNumber(val) != NULL; @@ -314,21 +374,22 @@ class me::FieldAccessor { static void SetEnumHandlers( const goog::FieldDescriptor* proto2_f, const goog::internal::GeneratedMessageReflection* r, - const upb::FieldDef* f, - upb::Handlers* h) { + const upb::FieldDef* f, upb::Handlers* h) { + assert(!proto2_f->is_extension()); EnumHandlerData* data = new EnumHandlerData(proto2_f, r, f); if (f->IsSequence()) { - h->SetInt32Handler( - f, &AppendEnum, data, &upb::DeletePointer); + h->SetInt32Handler(f, &AppendEnum, data, + &upb::DeletePointer); } else { - h->SetInt32Handler( - f, &SetEnum, data, &upb::DeletePointer); + h->SetInt32Handler(f, &SetEnum, data, + &upb::DeletePointer); } } - static bool SetEnum(void *_m, void *fval, int32_t val) { - goog::Message* m = static_cast(_m); - const EnumHandlerData* data = static_cast(fval); + static bool SetEnum(const upb::SinkFrame* frame, int32_t val) { + goog::Message* m = static_cast(frame->userdata()); + const EnumHandlerData* data = + static_cast(frame->handler_data()); if (data->IsValidValue(val)) { int32_t* message_val = data->GetFieldPointer(m); *message_val = val; @@ -339,15 +400,16 @@ class me::FieldAccessor { return true; } - static bool AppendEnum(void *_m, void *fval, int32_t val) { + static bool AppendEnum(const upb::SinkFrame* frame, int32_t val) { // Closure is the enclosing message. We can't use the RepeatedField<> as // the closure because we need to go back to the message for unrecognized // enum values, which go into the unknown field set. - goog::Message* m = static_cast(_m); - const EnumHandlerData* data = static_cast(fval); + goog::Message* m = static_cast(frame->userdata()); + const EnumHandlerData* data = + static_cast(frame->handler_data()); if (data->IsValidValue(val)) { goog::RepeatedField* r = - data->GetFieldPointer >(m); + data->GetFieldPointer>(m); r->Add(val); } else { data->mutable_unknown_fields(m)->AddVarint(data->field_number(), val); @@ -355,26 +417,56 @@ class me::FieldAccessor { return true; } + // EnumExtension ///////////////////////////////////////////////////////////// + + static void SetEnumExtensionHandlers( + const goog::FieldDescriptor* proto2_f, + const goog::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + assert(proto2_f->is_extension()); + ExtensionFieldData* data = new ExtensionFieldData(proto2_f, r); + if (f->IsSequence()) { + h->SetInt32Handler(f, &AppendEnumExtension, data, + upb::DeletePointer); + } else { + h->SetInt32Handler(f, &SetEnumExtension, data, + upb::DeletePointer); + } + } + + static bool SetEnumExtension(const upb::SinkFrame* frame, int32_t val) { + goog::Message* m = frame->GetUserdata(); + const ExtensionFieldData* data = + static_cast(frame->handler_data()); + goog::internal::ExtensionSet* set = data->GetExtensionSet(m); + set->SetEnum(data->number(), data->type(), val, NULL); + return true; + } + + static bool AppendEnumExtension(const upb::SinkFrame* frame, int32_t val) { + goog::Message* m = frame->GetUserdata(); + const ExtensionFieldData* data = + static_cast(frame->handler_data()); + goog::internal::ExtensionSet* set = data->GetExtensionSet(m); + // TODO(haberman): give an accurate value for "packed" + set->AddEnum(data->number(), data->type(), true, val, NULL); + return true; + } + // String //////////////////////////////////////////////////////////////////// // For scalar (non-repeated) string fields. - template - class StringHandlerData : public FieldOffset { + template class StringHandlerData : public FieldOffset { public: StringHandlerData(const goog::FieldDescriptor* proto2_f, - const goog::internal::GeneratedMessageReflection* r, - const goog::Message& prototype) - : FieldOffset(proto2_f, r) { - // "prototype" isn't guaranteed to be empty, so we create a copy to get - // the default string instance. - goog::Message* empty = prototype.New(); - prototype_ = &r->GetStringReference(*empty, proto2_f, NULL); - delete empty; - } + const goog::internal::GeneratedMessageReflection* r) + : FieldOffset(proto2_f, r), + prototype_(*GetConstPointer(r->default_instance_, + GetOffset(proto2_f, r))) {} const T* prototype() const { return prototype_; } - T** GetStringPointer(void *message) const { + T** GetStringPointer(void* message) const { return GetFieldPointer(message); } @@ -384,27 +476,28 @@ class me::FieldAccessor { template static void SetStringHandlers( const goog::FieldDescriptor* proto2_f, - const goog::Message& m, const goog::internal::GeneratedMessageReflection* r, const upb::FieldDef* f, upb::Handlers* h) { + assert(!proto2_f->is_extension()); h->SetStringHandler(f, &OnStringBuf, NULL, NULL); if (f->IsSequence()) { SetStartSequenceHandler(proto2_f, r, f, h); h->SetStartStringHandler(f, &StartRepeatedString, NULL, NULL); } else { - StringHandlerData* data = new StringHandlerData(proto2_f, r, m); - h->SetStartStringHandler( - f, &StartString, data, &upb::DeletePointer >); + StringHandlerData* data = new StringHandlerData(proto2_f, r); + h->SetStartStringHandler(f, &StartString, data, + &upb::DeletePointer>); } } // This needs to be templated because google3 string is not std::string. - template static void* StartString( - void *m, void *fval, size_t size_hint) { + template + static void* StartString(const upb::SinkFrame* frame, size_t size_hint) { UPB_UNUSED(size_hint); + goog::Message* m = static_cast(frame->userdata()); const StringHandlerData* data = - static_cast*>(fval); + static_cast*>(frame->handler_data()); T** str = data->GetStringPointer(m); data->SetHasbit(m); // If it points to the default instance, we must create a new instance. @@ -414,37 +507,75 @@ class me::FieldAccessor { return *str; } - template static size_t OnStringBuf( - void *_str, void *fval, const char *buf, size_t n) { - UPB_UNUSED(fval); - T* str = static_cast(_str); + template + static size_t OnStringBuf(const upb::SinkFrame* frame, + const char* buf, size_t n) { + T* str = static_cast(frame->userdata()); str->append(buf, n); return n; } - template - static void* StartRepeatedString(void *_r, void *fval, size_t size_hint) { + static void* StartRepeatedString(const upb::SinkFrame* frame, + size_t size_hint) { UPB_UNUSED(size_hint); - UPB_UNUSED(fval); - goog::RepeatedPtrField* r = static_cast*>(_r); + goog::RepeatedPtrField* r = + static_cast*>(frame->userdata()); T* str = r->Add(); str->clear(); // reserve() here appears to hurt performance rather than help. return str; } + // StringExtension /////////////////////////////////////////////////////////// + + template + static void SetStringExtensionHandlers( + const goog::FieldDescriptor* proto2_f, + const goog::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f, upb::Handlers* h) { + assert(proto2_f->is_extension()); + h->SetStringHandler(f, &OnStringBuf, NULL, NULL); + ExtensionFieldData* data = new ExtensionFieldData(proto2_f, r); + if (f->IsSequence()) { + h->SetStartStringHandler(f, &StartRepeatedStringExtension, data, + upb::DeletePointer); + } else { + h->SetStartStringHandler(f, &StartStringExtension, data, + upb::DeletePointer); + } + } + + // google3 string is not std::string, but we avoid needing to template + // because we do not actually have to declare the string type. + static void* StartStringExtension(const upb::SinkFrame* frame, + size_t size_hint) { + UPB_UNUSED(size_hint); + goog::Message* m = frame->GetUserdata(); + const ExtensionFieldData* data = + static_cast(frame->handler_data()); + goog::internal::ExtensionSet* set = data->GetExtensionSet(m); + return set->MutableString(data->number(), data->type(), NULL); + } + + static void* StartRepeatedStringExtension(const upb::SinkFrame* frame, + size_t size_hint) { + UPB_UNUSED(size_hint); + goog::Message* m = frame->GetUserdata(); + const ExtensionFieldData* data = + static_cast(frame->handler_data()); + goog::internal::ExtensionSet* set = data->GetExtensionSet(m); + return set->AddString(data->number(), data->type(), NULL); + } + // SubMessage //////////////////////////////////////////////////////////////// class SubMessageHandlerData : public FieldOffset { public: - SubMessageHandlerData( - const goog::FieldDescriptor* f, - const goog::internal::GeneratedMessageReflection* r, - const goog::Message* prototype) - : FieldOffset(f, r), - prototype_(prototype) { - } + SubMessageHandlerData(const goog::FieldDescriptor* f, + const goog::internal::GeneratedMessageReflection* r, + const goog::Message* prototype) + : FieldOffset(f, r), prototype_(prototype) {} const goog::Message* prototype() const { return prototype_; } @@ -453,13 +584,12 @@ class me::FieldAccessor { }; static void SetSubMessageHandlers( - const goog::FieldDescriptor* proto2_f, - const goog::Message& m, + const goog::FieldDescriptor* proto2_f, const goog::Message& m, const goog::internal::GeneratedMessageReflection* r, - const upb::FieldDef* f, - upb::Handlers* h) { + const upb::FieldDef* f, upb::Handlers* h) { + const goog::Message* field_prototype = GetFieldPrototype(m, proto2_f); SubMessageHandlerData* data = - new SubMessageHandlerData(proto2_f, r, GetFieldPrototype(m, proto2_f)); + new SubMessageHandlerData(proto2_f, r, field_prototype); upb::Handlers::Free* free = &upb::DeletePointer; if (f->IsSequence()) { SetStartSequenceHandler(proto2_f, r, f, h); @@ -469,11 +599,13 @@ class me::FieldAccessor { } } - static void* StartSubMessage(void *m, void *fval) { + static void* StartSubMessage(const upb::SinkFrame* frame) { + void* m = frame->userdata(); const SubMessageHandlerData* data = - static_cast(fval); + static_cast(frame->handler_data()); data->SetHasbit(m); - goog::Message **subm = data->GetFieldPointer(m); + goog::Message** subm = + data->GetFieldPointer(frame->userdata()); if (*subm == NULL || *subm == data->prototype()) { *subm = data->prototype()->New(); } @@ -486,19 +618,19 @@ class me::FieldAccessor { // AddAllocated() calls this, but only if other objects are sitting // around waiting for reuse, which we will not do. static void Delete(Type* t) { - (void)t; + UPB_UNUSED(t); assert(false); } }; // Closure is a RepeatedPtrField*, but we access it through // its base class RepeatedPtrFieldBase*. - static void* StartRepeatedSubMessage(void* _r, void *fval) { + static void* StartRepeatedSubMessage(const upb::SinkFrame* frame) { const SubMessageHandlerData* data = - static_cast(fval); - goog::internal::RepeatedPtrFieldBase *r = - static_cast(_r); - void *submsg = r->AddFromCleared(); + static_cast(frame->handler_data()); + goog::internal::RepeatedPtrFieldBase* r = + static_cast(frame->userdata()); + void* submsg = r->AddFromCleared(); if (!submsg) { submsg = data->prototype()->New(); r->AddAllocated(submsg); @@ -506,7 +638,63 @@ class me::FieldAccessor { return submsg; } - // TODO(haberman): handle Extensions, Unknown Fields. + // SubMessageExtension /////////////////////////////////////////////////////// + + class SubMessageExtensionHandlerData : public ExtensionFieldData { + public: + SubMessageExtensionHandlerData( + const goog::FieldDescriptor* proto2_f, + const goog::internal::GeneratedMessageReflection* r, + const goog::Message* prototype) + : ExtensionFieldData(proto2_f, r), + prototype_(prototype) { + } + + const goog::Message* prototype() const { return prototype_; } + + private: + const goog::Message* const prototype_; + }; + + static void SetSubMessageExtensionHandlers( + const goog::FieldDescriptor* proto2_f, + const goog::Message& m, + const goog::internal::GeneratedMessageReflection* r, + const upb::FieldDef* f, + upb::Handlers* h) { + const goog::Message* field_prototype = GetFieldPrototype(m, proto2_f); + SubMessageExtensionHandlerData* data = + new SubMessageExtensionHandlerData(proto2_f, r, field_prototype); + upb::Handlers::Free* free = &upb::DeletePointer; + if (f->IsSequence()) { + h->SetStartSubMessageHandler(f, &StartRepeatedSubMessageExtension, data, + free); + } else { + h->SetStartSubMessageHandler(f, &StartSubMessageExtension, data, free); + } + } + + static void* StartRepeatedSubMessageExtension(const upb::SinkFrame* frame) { + goog::Message* m = frame->GetUserdata(); + const SubMessageExtensionHandlerData* data = + static_cast( + frame->handler_data()); + goog::internal::ExtensionSet* set = data->GetExtensionSet(m); + return set->AddMessage(data->number(), data->type(), *data->prototype(), + NULL); + } + + static void* StartSubMessageExtension(const upb::SinkFrame* frame) { + goog::Message* m = frame->GetUserdata(); + const SubMessageExtensionHandlerData* data = + static_cast( + frame->handler_data()); + goog::internal::ExtensionSet* set = data->GetExtensionSet(m); + return set->MutableMessage(data->number(), data->type(), *data->prototype(), + NULL); + } + + // TODO(haberman): handle Unknown Fields. #ifdef UPB_GOOGLE3 // Handlers for types/features only included in internal proto2 release: @@ -519,38 +707,40 @@ class me::FieldAccessor { const proto2::FieldDescriptor* proto2_f, const proto2::internal::GeneratedMessageReflection* r, const upb::FieldDef* f, upb::Handlers* h) { + assert(!proto2_f->is_extension()); h->SetStringHandler(f, &OnCordBuf, NULL, NULL); if (f->IsSequence()) { SetStartSequenceHandler(proto2_f, r, f, h); h->SetStartStringHandler(f, &StartRepeatedCord, NULL, NULL); } else { - h->SetStartStringHandler( - f, &StartCord, new FieldOffset(proto2_f, r), - &upb::DeletePointer); + h->SetStartStringHandler(f, &StartCord, new FieldOffset(proto2_f, r), + &upb::DeletePointer); } } - static void* StartCord(void *m, void *fval, size_t size_hint) { + static void* StartCord(const upb::SinkFrame* frame, size_t size_hint) { UPB_UNUSED(size_hint); - const FieldOffset* offset = static_cast(fval); + void* m = frame->userdata(); + const FieldOffset* offset = + static_cast(frame->handler_data()); offset->SetHasbit(m); Cord* field = offset->GetFieldPointer(m); field->Clear(); return field; } - static size_t OnCordBuf(void *_c, void *fval, const char *buf, size_t n) { - UPB_UNUSED(fval); - Cord* c = static_cast(_c); + static size_t OnCordBuf(const upb::SinkFrame* frame, + const char* buf, size_t n) { + Cord* c = static_cast(frame->userdata()); c->Append(StringPiece(buf, n)); return n; } - static void* StartRepeatedCord(void *_r, void *fval, size_t size_hint) { + static void* StartRepeatedCord(const upb::SinkFrame* frame, + size_t size_hint) { UPB_UNUSED(size_hint); - UPB_UNUSED(fval); proto2::RepeatedField* r = - static_cast*>(_r); + static_cast*>(frame->userdata()); return r->Add(); } @@ -560,27 +750,27 @@ class me::FieldAccessor { const proto2::FieldDescriptor* proto2_f, const proto2::internal::GeneratedMessageReflection* r, const upb::FieldDef* f, upb::Handlers* h) { + assert(!proto2_f->is_extension()); h->SetStringHandler(f, &OnStringPieceBuf, NULL, NULL); if (f->IsSequence()) { SetStartSequenceHandler(proto2_f, r, f, h); h->SetStartStringHandler(f, &StartRepeatedStringPiece, NULL, NULL); } else { - h->SetStartStringHandler( - f, &StartStringPiece, new FieldOffset(proto2_f, r), - &upb::DeletePointer); + h->SetStartStringHandler(f, &StartStringPiece, + new FieldOffset(proto2_f, r), + &upb::DeletePointer); } } - static size_t OnStringPieceBuf(void *_f, void *fval, - const char *buf, size_t len) { - UPB_UNUSED(fval); + static size_t OnStringPieceBuf(const upb::SinkFrame* frame, + const char* buf, size_t len) { // TODO(haberman): alias if possible and enabled on the input stream. // TODO(haberman): add a method to StringPieceField that lets us avoid // this copy/malloc/free. proto2::internal::StringPieceField* field = - static_cast(_f); + static_cast(frame->userdata()); size_t new_len = field->size() + len; - char *data = new char[new_len]; + char* data = new char[new_len]; memcpy(data, field->data(), field->size()); memcpy(data + field->size(), buf, len); field->CopyFrom(StringPiece(data, new_len)); @@ -588,9 +778,12 @@ class me::FieldAccessor { return len; } - static void* StartStringPiece(void *m, void *fval, size_t size_hint) { + static void* StartStringPiece(const upb::SinkFrame* frame, + size_t size_hint) { UPB_UNUSED(size_hint); - const FieldOffset* offset = static_cast(fval); + void* m = frame->userdata(); + const FieldOffset* offset = + static_cast(frame->handler_data()); offset->SetHasbit(m); proto2::internal::StringPieceField* field = offset->GetFieldPointer(m); @@ -598,13 +791,13 @@ class me::FieldAccessor { return field; } - static void* StartRepeatedStringPiece(void* _r, void *fval, + static void* StartRepeatedStringPiece(const upb::SinkFrame* frame, size_t size_hint) { UPB_UNUSED(size_hint); - UPB_UNUSED(fval); - typedef proto2::RepeatedPtrField - RepeatedStringPiece; - RepeatedStringPiece* r = static_cast(_r); + typedef proto2::RepeatedPtrField< + proto2::internal::StringPieceField> RepeatedStringPiece; + RepeatedStringPiece* r = + static_cast(frame->userdata()); proto2::internal::StringPieceField* field = r->Add(); field->Clear(); return field; @@ -619,13 +812,12 @@ namespace google { bool TrySetWriteHandlers(const goog::FieldDescriptor* proto2_f, const goog::Message& prototype, const upb::FieldDef* upb_f, upb::Handlers* h) { - return me::FieldAccessor::TrySet(proto2_f, prototype, upb_f, h); + return me::GMR_Handlers::TrySet(proto2_f, prototype, upb_f, h); } -const goog::Message* GetFieldPrototype( - const goog::Message& m, - const goog::FieldDescriptor* f) { - return me::FieldAccessor::GetFieldPrototype(m, f); +const goog::Message* GetFieldPrototype(const goog::Message& m, + const goog::FieldDescriptor* f) { + return me::GMR_Handlers::GetFieldPrototype(m, f); } } // namespace google diff --git a/upb/google/proto2.h b/upb/google/proto2.h index f2662ea520..516b7fdcf7 100644 --- a/upb/google/proto2.h +++ b/upb/google/proto2.h @@ -49,9 +49,8 @@ bool TrySetWriteHandlers(const ::google::protobuf::FieldDescriptor* proto2_f, // Returns a prototype for the given field in "m", if it is weak. The returned // message could be the linked-in message type or OpaqueMessage, if the weak // message is *not* linked in. Otherwise returns NULL. -const proto2::Message* GetFieldPrototype( - const proto2::Message& m, - const proto2::FieldDescriptor* f); +const proto2::Message* GetFieldPrototype(const proto2::Message& m, + const proto2::FieldDescriptor* f); const ::google::protobuf::Message* GetFieldPrototype( const ::google::protobuf::Message& m, const ::google::protobuf::FieldDescriptor* f); diff --git a/upb/handlers.c b/upb/handlers.c index 8263c9af7e..b7458f2ec6 100644 --- a/upb/handlers.c +++ b/upb/handlers.c @@ -10,36 +10,22 @@ #include #include +#include "upb/sink.h" + // Defined for the sole purpose of having a unique pointer value for // UPB_NO_CLOSURE. char _upb_noclosure; -typedef struct { - upb_func *handler; - - // Could put either or both of these in a separate table to save memory when - // they are sparse. - void *data; - upb_handlerfree *cleanup; - - // TODO(haberman): this is wasteful; only the first "fieldhandler" of a - // submessage field needs this. To reduce memory footprint we should either: - // - put the subhandlers in a separate "fieldhandler", stored as part of - // a union with one of the above fields. - // - count selector offsets by individual pointers instead of by whole - // fieldhandlers. - const upb_handlers *subhandlers; -} fieldhandler; - -static const fieldhandler *getfh( +static const upb_fieldhandler *getfh( const upb_handlers *h, upb_selector_t selector) { assert(selector < upb_handlers_msgdef(h)->selector_count); - fieldhandler* fhbase = (void*)&h->fh_base; + upb_fieldhandler* fhbase = (void*)&h->fh_base; return &fhbase[selector]; } -static fieldhandler *getfh_mutable(upb_handlers *h, upb_selector_t selector) { - return (fieldhandler*)getfh(h, selector); +static upb_fieldhandler *getfh_mutable(upb_handlers *h, + upb_selector_t selector) { + return (upb_fieldhandler*)getfh(h, selector); } bool upb_handlers_isfrozen(const upb_handlers *h) { @@ -52,28 +38,22 @@ uint32_t upb_handlers_selectorbaseoffset(const upb_fielddef *f) { uint32_t upb_handlers_selectorcount(const upb_fielddef *f) { uint32_t ret = 1; - if (upb_fielddef_isstring(f)) ret += 2; // STARTSTR/ENDSTR + if (upb_fielddef_isstring(f)) ret += 2; // [STARTSTR]/STRING/ENDSTR if (upb_fielddef_isseq(f)) ret += 2; // STARTSEQ/ENDSEQ - if (upb_fielddef_issubmsg(f)) ret += 2; // STARTSUBMSG/ENDSUBMSG + if (upb_fielddef_issubmsg(f)) ret += 1; // [STARTSUBMSG]/ENDSUBMSG return ret; } upb_handlertype_t upb_handlers_getprimitivehandlertype(const upb_fielddef *f) { switch (upb_fielddef_type(f)) { case UPB_TYPE_INT32: - case UPB_TYPE_SINT32: - case UPB_TYPE_SFIXED32: case UPB_TYPE_ENUM: return UPB_HANDLER_INT32; case UPB_TYPE_INT64: - case UPB_TYPE_SINT64: - case UPB_TYPE_SFIXED64: return UPB_HANDLER_INT64; case UPB_TYPE_UINT32: - case UPB_TYPE_FIXED32: return UPB_HANDLER_UINT32; case UPB_TYPE_UINT64: - case UPB_TYPE_FIXED64: return UPB_HANDLER_UINT64; case UPB_TYPE_FLOAT: return UPB_HANDLER_FLOAT; @@ -103,11 +83,11 @@ bool upb_getselector( return false; *s = f->selector_base; break; - case UPB_HANDLER_STARTSTR: + case UPB_HANDLER_STRING: if (!upb_fielddef_isstring(f)) return false; *s = f->selector_base; break; - case UPB_HANDLER_STRING: + case UPB_HANDLER_STARTSTR: if (!upb_fielddef_isstring(f)) return false; *s = f->selector_base + 1; break; @@ -125,11 +105,11 @@ bool upb_getselector( break; case UPB_HANDLER_STARTSUBMSG: if (!upb_fielddef_issubmsg(f)) return false; - *s = f->selector_base + 1; + *s = f->selector_base; break; case UPB_HANDLER_ENDSUBMSG: if (!upb_fielddef_issubmsg(f)) return false; - *s = f->selector_base + 2; + *s = f->selector_base + 1; break; } assert(*s < upb_fielddef_msgdef(f)->selector_count); @@ -157,7 +137,7 @@ static void do_cleanup(upb_handlers* h, const upb_fielddef *f, upb_handlertype_t type) { upb_selector_t selector; if (!upb_getselector(f, type, &selector)) return; - fieldhandler *fh = getfh_mutable(h, selector); + upb_fieldhandler *fh = getfh_mutable(h, selector); if (fh->cleanup) fh->cleanup(fh->data); fh->cleanup = NULL; fh->data = NULL; @@ -187,13 +167,15 @@ static void visithandlers(const upb_refcounted *r, upb_refcounted_visit *visit, } } -upb_handlers *upb_handlers_new(const upb_msgdef *md, const void *owner) { +upb_handlers *upb_handlers_new(const upb_msgdef *md, const upb_frametype *ft, + const void *owner) { assert(upb_msgdef_isfrozen(md)); static const struct upb_refcounted_vtbl vtbl = {visithandlers, freehandlers}; - size_t fhandlers_size = sizeof(fieldhandler) * md->selector_count; + size_t fhandlers_size = sizeof(upb_fieldhandler) * md->selector_count; upb_handlers *h = calloc(sizeof(*h) - sizeof(void*) + fhandlers_size, 1); if (!h) return NULL; h->msg = md; + h->ft = ft; upb_msgdef_ref(h->msg, h); if (!upb_refcounted_init(upb_upcast(h), &vtbl, owner)) goto oom; @@ -212,6 +194,10 @@ bool upb_handlers_freeze(upb_handlers *const*handlers, int n, upb_status *s) { const upb_msgdef *upb_handlers_msgdef(const upb_handlers *h) { return h->msg; } +const upb_frametype *upb_handlers_frametype(const upb_handlers *h) { + return h->ft; +} + void upb_handlers_setstartmsg(upb_handlers *h, upb_startmsg_handler *handler) { assert(!upb_handlers_isfrozen(h)); h->startmsg = handler; @@ -232,13 +218,18 @@ upb_endmsg_handler *upb_handlers_getendmsg(const upb_handlers *h) { // For now we stuff the subhandlers pointer into the fieldhandlers* // corresponding to the UPB_HANDLER_STARTSUBMSG handler. +static const upb_handlers **subhandlersptr_sel(upb_handlers *h, + upb_selector_t startsubmsg) { + return &getfh_mutable(h, startsubmsg)->subhandlers; +} + static const upb_handlers **subhandlersptr(upb_handlers *h, const upb_fielddef *f) { assert(upb_fielddef_issubmsg(f)); upb_selector_t selector; bool ok = upb_getselector(f, UPB_HANDLER_STARTSUBMSG, &selector); UPB_ASSERT_VAR(ok, ok); - return &getfh_mutable(h, selector)->subhandlers; + return subhandlersptr_sel(h, selector); } bool upb_handlers_setsubhandlers(upb_handlers *h, const upb_fielddef *f, @@ -263,6 +254,12 @@ const upb_handlers *upb_handlers_getsubhandlers(const upb_handlers *h, return *stored; } +const upb_handlers *upb_handlers_getsubhandlers_sel(const upb_handlers *h, + upb_selector_t sel) { + const upb_handlers **stored = subhandlersptr_sel((upb_handlers*)h, sel); + return *stored; +} + #define SETTER(name, handlerctype, handlertype) \ bool upb_handlers_set ## name(upb_handlers *h, const upb_fielddef *f, \ handlerctype val, void *data, \ @@ -273,7 +270,7 @@ const upb_handlers *upb_handlers_getsubhandlers(const upb_handlers *h, bool ok = upb_getselector(f, handlertype, &selector); \ if (!ok) return false; \ do_cleanup(h, f, handlertype); \ - fieldhandler *fh = getfh_mutable(h, selector); \ + upb_fieldhandler *fh = getfh_mutable(h, selector); \ fh->handler = (upb_func*)val; \ fh->data = (upb_func*)data; \ fh->cleanup = (upb_func*)cleanup; \ @@ -294,6 +291,47 @@ SETTER(startseq, upb_startfield_handler*, UPB_HANDLER_STARTSEQ); SETTER(startsubmsg, upb_startfield_handler*, UPB_HANDLER_STARTSUBMSG); SETTER(endsubmsg, upb_endfield_handler*, UPB_HANDLER_ENDSUBMSG); SETTER(endseq, upb_endfield_handler*, UPB_HANDLER_ENDSEQ); + +// Our current implementation of these "alt" functions is, according to the +// letter of the standard, undefined behavior, because we store the +// upb_int32_handler2* to memory and then read it back (and call it) as a +// upb_int32_handler*. Even though both function pointer types take 32-bit +// integer arguments, they are still technically different types (because one +// takes an "int" argument and one takes a "long" argument), and calling a +// function through a pointer to an incompatible type is undefined behavior. +// +// I think it is exceedingly unlikely that "int" and "long" would ever have +// incompatible calling conventions when both are known to be 32 bit signed +// two's complement integers. But if absolute standards-compliance is ever +// required, either due to a practical problem with the undefined behavior or a +// tool that notices the incongruity, we have an available option for being +// perfectly standard-compliant; we can store a bool for every function pointer +// indicating whether it is an "alt" pointer or not. Then at the call site +// (inside upb_sink) we can do: +// +// if (is_alt) { +// upb_int32_handler2 *func = fp; +// func(...); +// } else { +// upb_int32_handler *func = fp; +// func(...); +// } +// +// We could do this now, but it adds complexity and wastes the memory to store +// these useless bools. The bools are useless because the compiler will almost +// certainly optimize away this branch and elide the two calls into a single +// call with the 32-bit parameter calling convention. + +#ifdef UPB_TWO_32BIT_TYPES +SETTER(int32alt, upb_int32_handler2*, UPB_HANDLER_INT32); +SETTER(uint32alt, upb_uint32_handler2*, UPB_HANDLER_UINT32); +#endif + +#ifdef UPB_TWO_64BIT_TYPES +SETTER(int64alt, upb_int64_handler2*, UPB_HANDLER_INT64); +SETTER(uint64alt, upb_uint64_handler2*, UPB_HANDLER_UINT64); +#endif + #undef SETTER upb_func *upb_handlers_gethandler(const upb_handlers *h, upb_selector_t s) { @@ -310,9 +348,10 @@ typedef struct { void *closure; } dfs_state; -static upb_handlers *newformsg(const upb_msgdef *m, const void *owner, +static upb_handlers *newformsg(const upb_msgdef *m, const upb_frametype *ft, + const void *owner, dfs_state *s) { - upb_handlers *h = upb_handlers_new(m, owner); + upb_handlers *h = upb_handlers_new(m, ft, owner); if (!h) return NULL; if (!upb_inttable_insertptr(&s->tab, m, upb_value_ptr(h))) goto oom; @@ -326,11 +365,11 @@ static upb_handlers *newformsg(const upb_msgdef *m, const void *owner, if (!upb_fielddef_issubmsg(f)) continue; const upb_msgdef *subdef = upb_downcast_msgdef(upb_fielddef_subdef(f)); - const upb_value *subm_ent = upb_inttable_lookupptr(&s->tab, subdef); - if (subm_ent) { - upb_handlers_setsubhandlers(h, f, upb_value_getptr(*subm_ent)); + upb_value subm_ent; + if (upb_inttable_lookupptr(&s->tab, subdef, &subm_ent)) { + upb_handlers_setsubhandlers(h, f, upb_value_getptr(subm_ent)); } else { - upb_handlers *sub_mh = newformsg(subdef, &sub_mh, s); + upb_handlers *sub_mh = newformsg(subdef, ft, &sub_mh, s); if (!sub_mh) goto oom; upb_handlers_setsubhandlers(h, f, sub_mh); upb_handlers_unref(sub_mh, &sub_mh); @@ -344,6 +383,7 @@ oom: } const upb_handlers *upb_handlers_newfrozen(const upb_msgdef *m, + const upb_frametype *ft, const void *owner, upb_handlers_callback *callback, void *closure) { @@ -352,7 +392,7 @@ const upb_handlers *upb_handlers_newfrozen(const upb_msgdef *m, state.closure = closure; if (!upb_inttable_init(&state.tab, UPB_CTYPE_PTR)) return NULL; - upb_handlers *ret = newformsg(m, owner, &state); + upb_handlers *ret = newformsg(m, ft, owner, &state); if (!ret) return NULL; upb_refcounted *r = upb_upcast(ret); upb_status status = UPB_STATUS_INIT; @@ -365,10 +405,9 @@ const upb_handlers *upb_handlers_newfrozen(const upb_msgdef *m, } #define STDMSG_WRITER(type, ctype) \ - bool upb_stdmsg_set ## type (void *_m, void *fval, ctype val) { \ - assert(_m != NULL); \ - const upb_stdmsg_fval *f = fval; \ - uint8_t *m = _m; \ + bool upb_stdmsg_set ## type (const upb_sinkframe *frame, ctype val) { \ + const upb_stdmsg_fval *f = upb_sinkframe_handlerdata(frame); \ + uint8_t *m = upb_sinkframe_userdata(frame); \ if (f->hasbit > 0) \ *(uint8_t*)&m[f->hasbit / 8] |= 1 << (f->hasbit % 8); \ *(ctype*)&m[f->offset] = val; \ diff --git a/upb/handlers.h b/upb/handlers.h index 094702ec99..582ba43685 100644 --- a/upb/handlers.h +++ b/upb/handlers.h @@ -25,11 +25,23 @@ #include "upb/def.h" #ifdef __cplusplus -namespace upb { class Handlers; } +struct upb_frametype; +namespace upb { +typedef upb_frametype FrameType; +class Handlers; +class SinkFrame; +} +typedef upb::FrameType upb_frametype; typedef upb::Handlers upb_handlers; +typedef upb::SinkFrame upb_sinkframe; +UPB_INLINE void *upb_sinkframe_handlerdata(const upb_sinkframe* frame); #else +struct upb_frametype; struct upb_handlers; +struct upb_sinkframe; +typedef struct upb_frametype upb_frametype; typedef struct upb_handlers upb_handlers; +typedef struct upb_sinkframe upb_sinkframe; #endif // All the different types of handlers that can be registered. @@ -61,7 +73,7 @@ extern char _upb_noclosure; // A selector refers to a specific field handler in the Handlers object // (for example: the STARTSUBMSG handler for field "field15"). -typedef uint32_t upb_selector_t; +typedef int32_t upb_selector_t; #ifdef __cplusplus @@ -78,24 +90,34 @@ class upb::Handlers { typedef upb_selector_t Selector; typedef upb_handlertype_t Type; - typedef bool StartMessageHandler(void* closure); - typedef void EndMessageHandler(void* closure, Status* status); - typedef void* StartFieldHandler(void* closure, void* data); - typedef bool EndFieldHandler(void *closure, void *data); - typedef void* StartStringHandler(void *c, void *d, size_t size_hint); - typedef size_t StringHandler(void *c, void *d, const char *buf, size_t len); + typedef bool StartMessageHandler(const SinkFrame*); + typedef void EndMessageHandler(const SinkFrame*, Status* status); + typedef void* StartFieldHandler(const SinkFrame*); + typedef bool EndFieldHandler(const SinkFrame*); + typedef void* StartStringHandler(const SinkFrame* c, size_t size_hint); + typedef size_t StringHandler(const SinkFrame* c, const char* buf, size_t len); template struct Value { - typedef bool Handler(void* closure, void* data, T val); + typedef bool Handler(const SinkFrame*, T val); }; - typedef Value::Handler Int32Handler; - typedef Value::Handler Int64Handler; - typedef Value::Handler Uint32Handler; - typedef Value::Handler Uint64Handler; - typedef Value::Handler FloatHandler; - typedef Value::Handler DoubleHandler; - typedef Value::Handler BoolHandler; + typedef Value::Handler Int32Handler; + typedef Value::Handler Int64Handler; + typedef Value::Handler UInt32Handler; + typedef Value::Handler UInt64Handler; + typedef Value::Handler FloatHandler; + typedef Value::Handler DoubleHandler; + typedef Value::Handler BoolHandler; + +#ifdef UPB_TWO_32BIT_TYPES + typedef Value::Handler Int32Handler2; + typedef Value::Handler UInt32Handler2; +#endif + +#ifdef UPB_TWO_64BIT_TYPES + typedef Value::Handler Int64Handler2; + typedef Value::Handler UInt64Handler2; +#endif // Any function pointer can be converted to this and converted back to its // correct type. @@ -106,17 +128,23 @@ class upb::Handlers { typedef void HandlersCallback(void *closure, upb_handlers *h); - // Returns a new handlers object for the given frozen msgdef. A single ref - // will belong to the given owner. + // Returns a new handlers object for the given frozen msgdef that will use + // the given FrameType as its top-level state (can be NULL, for now). A + // single ref on the returned object will belong to the given owner. // Returns NULL if memory allocation failed. - static Handlers* New(const MessageDef* m, const void *owner); + static Handlers* New(const MessageDef* m, + const FrameType* ft, + const void *owner); // Convenience function for registering a graph of handlers that mirrors the // graph of msgdefs for some message. For "m" and all its children a new set // of handlers will be created and the given callback will be invoked, // allowing the client to register handlers for this message. Note that any - // subhandlers set by the callback will be overwritten. - static const Handlers* NewFrozen(const MessageDef *m, const void *owner, + // subhandlers set by the callback will be overwritten. A single ref on the + // returned object will belong to the given owner. + static const Handlers* NewFrozen(const MessageDef *m, + const FrameType* ft, + const void *owner, HandlersCallback *callback, void *closure); // Functionality from upb::RefCounted. @@ -126,6 +154,9 @@ class upb::Handlers { void DonateRef(const void *from, const void *to) const; void CheckRef(const void *owner) const; + // Top-level frame type. + const FrameType* frame_type() const; + // Freezes the given set of handlers. You may not freeze a handler without // also freezing any handlers they point to. In the future we may want to // require that all fields of the submessage have had subhandlers set for @@ -137,7 +168,7 @@ class upb::Handlers { // Sets the startmsg handler for the message, which is defined as follows: // - // bool startmsg(void *closure) { + // bool startmsg(const upb::SinkFrame* frame) { // // Called when the message begins. Returns true if processing should // // continue. // return true; @@ -147,7 +178,7 @@ class upb::Handlers { // Sets the endmsg handler for the message, which is defined as follows: // - // void endmsg(void *closure, upb_status *status) { + // void endmsg(const upb::SinkFrame* frame, upb_status *status) { // // Called when processing of this message ends, whether in success or // // failure. "status" indicates the final status of processing, and // // can also be modified in-place to update the final status. @@ -159,7 +190,7 @@ class upb::Handlers { // (this is for an int32 field; other field types will pass their native // C/C++ type for "val"): // - // bool value(void *closure, void *d, int32_t val) { + // bool value(const upb::SinkFrame *frame, upb_int32_t val) { // // Called when the field's value is encountered. "d" contains // // whatever data was bound to this field when it was registered. // // Returns true if processing should continue. @@ -168,24 +199,72 @@ class upb::Handlers { // // The value type must exactly match f->type(). // For example, SetInt32Handler() may only be used for fields of type - // UPB_TYPE_INT32, UPB_TYPE_SINT32, UPB_TYPE_SFIXED32, and UPB_TYPE_ENUM. + // UPB_TYPE_INT32 and UPB_TYPE_ENUM. // // "d" is the data that will be bound to this callback and passed to it. // If "fr" is non-NULL it will be run when the data is no longer needed. // // Returns "false" if "f" does not belong to this message or has the wrong // type for this handler. + // + // NOTE: the prototype above uses "upb_int32_t" and not "int32_t" from + // stdint.h. For C++ any int32 typedef will work correctly thanks to + // function overloading on the function pointer type. But in C things are + // more complicated; "int" and "long" could both be 32-bit types, but the + // two are incompatible with each other when it comes to function pointers. + // Since we don't know what the underlying type of int32_t is, we have to + // define our own which we *do* know the underlying type of. The easiest + // and most portable choice is to define handlers in C with the upb_intXX_t + // types. bool SetInt32Handler (const FieldDef* f, Int32Handler* h, void* d, Free* fr); bool SetInt64Handler (const FieldDef* f, Int64Handler* h, void* d, Free* fr); - bool SetUint32Handler(const FieldDef* f, Uint32Handler* h, void* d, Free* fr); - bool SetUint64Handler(const FieldDef* f, Uint64Handler* h, void* d, Free* fr); + bool SetUInt32Handler(const FieldDef* f, UInt32Handler* h, void* d, Free* fr); + bool SetUInt64Handler(const FieldDef* f, UInt64Handler* h, void* d, Free* fr); bool SetFloatHandler (const FieldDef* f, FloatHandler* h, void* d, Free* fr); bool SetDoubleHandler(const FieldDef* f, DoubleHandler* h, void* d, Free* fr); bool SetBoolHandler (const FieldDef* f, BoolHandler* h, void* d, Free* fr); + // Convenience versions that look up the field by name first. These return + // false if no field with this name exists, or for any of the other reasons + // that the FieldDef* version returns false. + bool SetInt32Handler (const char *name, Int32Handler* h, void* d, Free* fr); + bool SetInt64Handler (const char *name, Int64Handler* h, void* d, Free* fr); + bool SetUInt32Handler(const char *name, UInt32Handler* h, void* d, Free* fr); + bool SetUInt64Handler(const char *name, UInt64Handler* h, void* d, Free* fr); + bool SetFloatHandler (const char *name, FloatHandler* h, void* d, Free* fr); + bool SetDoubleHandler(const char *name, DoubleHandler* h, void* d, Free* fr); + bool SetBoolHandler (const char *name, BoolHandler* h, void* d, Free* fr); + + // On platforms where there are two 32-bit or 64-bit integer types, provide + // registration functions for both. Function overloading should make this + // all transparent to the user. +#ifdef UPB_TWO_32BIT_TYPES + bool SetInt32Handler (const FieldDef* f, Int32Handler2* h, void* d, Free* x); + bool SetUInt32Handler(const FieldDef* f, UInt32Handler2* h, void* d, Free* x); + bool SetInt32Handler (const char *name, Int32Handler2* h, void* d, Free* x); + bool SetUInt32Handler(const char *name, UInt32Handler2* h, void* d, Free* x); +#endif + +#ifdef UPB_TWO_64BIT_TYPES + bool SetInt64Handler (const FieldDef* f, Int64Handler2* h, void* d, Free* x); + bool SetUInt64Handler(const FieldDef* f, UInt64Handler2* h, void* d, Free* x); + bool SetInt64Handler (const char *name, Int64Handler2* h, void* d, Free* x); + bool SetUInt64Handler(const char *name, UInt64Handler2* h, void* d, Free* x); +#endif + + // Like the above, but these are templated on the type of the value. For + // example, templating on int64_t is equivalent to calling SetInt64Handler. + // Attempts to template on a type that does not map to a UPB_TYPE_* type + // (like int8_t, since protobufs have no 8-bit type) will get an "undefined + // function" compilation error. + template bool SetValueHandler( + const FieldDef* f, typename Value::Handler* h, void* d, Free* fr); + template bool SetValueHandler( + const char* name, typename Value::Handler* h, void* d, Free* fr); + // Sets handlers for a string field, which are defined as follows: // - // void* startstr(void *closure, void *data, size_t size_hint) { + // void* startstr(const upb::SinkFrame *frame, size_t size_hint) { // // Called when a string value begins. The return value indicates the // // closure for the string. "size_hint" indicates the size of the // // string if it is known, however if the string is length-delimited @@ -200,7 +279,7 @@ class upb::Handlers { // return closure; // } // - // size_t str(void *closure, void *data, const char *str, size_t len) { + // size_t str(const upb::SinkFrame* frame, const char *str, size_t len) { // // Called for each buffer of string data; the multiple physical buffers // // are all part of the same logical string. The return value indicates // // how many bytes were consumed. If this number is less than "len", @@ -211,7 +290,7 @@ class upb::Handlers { // return len; // } // - // bool endstr(void *closure, void *data) { + // bool endstr(const upb::SinkFrame* frame) { // // Called when a string value ends. // return true; // } @@ -221,13 +300,18 @@ class upb::Handlers { bool SetEndStringHandler(const FieldDef* f, EndFieldHandler* h, void* d, Free* fr); - // A setter that is templated on the type of the value. - template bool SetValueHandler( - const FieldDef* f, typename Value::Handler* h, void* d, Free* fr); + // Convenience versions that look up the field by name first. These return + // false if no field with this name exists, or for any of the other reasons + // that the FieldDef* version returns false. + bool SetStartStringHandler(const char* name, StartStringHandler* h, + void* d, Free* fr); + bool SetStringHandler(const char* name, StringHandler* h, void* d, Free* fr); + bool SetEndStringHandler(const char* name, EndFieldHandler* h, + void* d, Free* fr); // Sets the startseq handler, which is defined as follows: // - // void *startseq(void *closure, void *data) { + // void *startseq(const upb::SinkFrame* frame) { // // Called when a sequence (repeated field) begins. The returned // // pointer indicates the closure for the sequence (or UPB_BREAK // // to interrupt processing). @@ -241,11 +325,13 @@ class upb::Handlers { // If "cleanup" is non-NULL it will be run when the data is no longer needed. bool SetStartSequenceHandler(const FieldDef* f, StartFieldHandler *handler, void* data, Free* cleanup); + bool SetStartSequenceHandler(const char* name, StartFieldHandler *handler, + void* data, Free* cleanup); // Sets the startsubmsg handler for the given field, which is defined as // follows: // - // void *startsubmsg(void *closure, void *data) { + // void *startsubmsg(const upb::SinkFrame *frame) { // // Called when a submessage begins. The returned pointer indicates the // // closure for the sequence (or UPB_BREAK to interrupt processing). // return closure; @@ -258,11 +344,13 @@ class upb::Handlers { // submessage/group field. bool SetStartSubMessageHandler(const FieldDef* f, StartFieldHandler *handler, void* data, Free* cleanup); + bool SetStartSubMessageHandler(const char* name, StartFieldHandler *handler, + void* data, Free* cleanup); // Sets the endsubmsg handler for the given field, which is defined as // follows: // - // bool endsubmsg(void *closure, void *data) { + // bool endsubmsg(const upb::SinkFrame *frame) { // // Called when a submessage ends. Returns true to continue processing. // return true; // } @@ -274,11 +362,13 @@ class upb::Handlers { // submessage/group field. bool SetEndSubMessageHandler(const FieldDef* f, EndFieldHandler *handler, void* data, Free* cleanup); + bool SetEndSubMessageHandler(const char* name, EndFieldHandler *handler, + void* data, Free* cleanup); // Starts the endsubseq handler for the given field, which is defined as // follows: // - // bool endseq(void *closure, void *data) { + // bool endseq(const upb::SinkFrame *frame) { // // Called when a sequence ends. Returns true continue processing. // return true; // } @@ -290,16 +380,17 @@ class upb::Handlers { // repeated field. bool SetEndSequenceHandler(const FieldDef* f, EndFieldHandler *handler, void* data, Free* cleanup); + bool SetEndSequenceHandler(const char* name, EndFieldHandler *handler, + void* data, Free* cleanup); // Sets or gets the object that specifies handlers for the given field, which // must be a submessage or group. Returns NULL if no handlers are set. bool SetSubHandlers(const FieldDef* f, const Handlers* sub); const Handlers* GetSubHandlers(const FieldDef* f) const; - // NOTE: The remaining functions in this class are mostly of interest to - // byte-code/JIT compilers (or upb internals); most users will not need them. - // These functions also require more care, since passing a selector that - // does not match the type of these handlers yields undefined behavior. + // Equivalent to GetSubHandlers, but takes the STARTSUBMSG selector for the + // field. + const Handlers* GetSubHandlers(Selector startsubmsg) const; // A selector refers to a specific field handler in the Handlers object // (for example: the STARTSUBMSG handler for field "field15"). @@ -309,6 +400,9 @@ class upb::Handlers { // contains this FieldDef. static bool GetSelector(const FieldDef* f, Type type, Selector* s); + // Given a START selector of any kind, returns the corresponding END selector. + static Selector GetEndSelector(Selector start_selector); + // Returns the function pointer for this handler. It is the client's // responsibility to cast to the correct function type before calling it. GenericFunction* GetHandler(Selector selector); @@ -325,18 +419,19 @@ class upb::Handlers { // // const FieldDef* GetFieldDef(Selector selector); // static bool IsSequence(Selector selector); - // Selector GetEndSelector(Selector start_selector); private: UPB_DISALLOW_POD_OPS(Handlers); + friend void* ::upb_sinkframe_handlerdata(const upb_sinkframe* frame); #else struct upb_handlers { #endif upb_refcounted base; const upb_msgdef *msg; - bool (*startmsg)(void*); - void (*endmsg)(void*, upb_status*); + const upb_frametype *ft; + bool (*startmsg)(const upb_sinkframe*); + void (*endmsg)(const upb_sinkframe*, upb_status*); void *fh_base[1]; // Start of dynamically-sized field handler array. }; @@ -344,26 +439,40 @@ struct upb_handlers { #ifdef __cplusplus extern "C" { #endif -typedef bool upb_startmsg_handler(void *c); -typedef void upb_endmsg_handler(void *c, upb_status *status); -typedef void* upb_startfield_handler(void *closure, void *d); -typedef bool upb_endfield_handler(void *closure, void *d); +typedef bool upb_startmsg_handler(const upb_sinkframe *frame); +typedef void upb_endmsg_handler(const upb_sinkframe *frame, upb_status *status); +typedef void* upb_startfield_handler(const upb_sinkframe *frame); +typedef bool upb_endfield_handler(const upb_sinkframe *frame); typedef void upb_handlers_callback(void *closure, upb_handlers *h); typedef void upb_handlerfree(void *d); typedef void upb_func(); -typedef bool upb_int32_handler(void *c, void *d, int32_t val); -typedef bool upb_int64_handler(void *c, void *d, int64_t val); -typedef bool upb_uint32_handler(void *c, void *d, uint32_t val); -typedef bool upb_uint64_handler(void *c, void *d, uint64_t val); -typedef bool upb_float_handler(void *c, void *d, float val); -typedef bool upb_double_handler(void *c, void *d, double val); -typedef bool upb_bool_handler(void *c, void *d, bool val); -typedef void* upb_startstr_handler(void *closure, void *d, size_t size_hint); -typedef size_t upb_string_handler(void *c, void *d, const char *buf, size_t n); - -upb_handlers *upb_handlers_new(const upb_msgdef *m, const void *owner); +typedef bool upb_int32_handler(const upb_sinkframe *f, upb_int32_t val); +typedef bool upb_int64_handler(const upb_sinkframe *f, upb_int64_t val); +typedef bool upb_uint32_handler(const upb_sinkframe *f, upb_uint32_t val); +typedef bool upb_uint64_handler(const upb_sinkframe *f, upb_uint64_t val); +typedef bool upb_float_handler(const upb_sinkframe *f, float val); +typedef bool upb_double_handler(const upb_sinkframe *f, double val); +typedef bool upb_bool_handler(const upb_sinkframe *f, bool val); +typedef void* upb_startstr_handler(const upb_sinkframe *f, size_t size_hint); +typedef size_t upb_string_handler( + const upb_sinkframe *f, const char *buf, size_t n); + +#ifdef UPB_TWO_32BIT_TYPES +typedef bool upb_int32_handler2(const upb_sinkframe *f, upb_int32alt_t val); +typedef bool upb_uint32_handler2(const upb_sinkframe *f, upb_uint32alt_t val); +#endif + +#ifdef UPB_TWO_64BIT_TYPES +typedef bool upb_int64_handler2(const upb_sinkframe *f, upb_int64alt_t val); +typedef bool upb_uint64_handler2(const upb_sinkframe *f, upb_uint64alt_t val); +#endif + +upb_handlers *upb_handlers_new(const upb_msgdef *m, + const upb_frametype *ft, + const void *owner); const upb_handlers *upb_handlers_newfrozen(const upb_msgdef *m, + const upb_frametype *ft, const void *owner, upb_handlers_callback *callback, void *closure); @@ -378,6 +487,7 @@ void upb_handlers_checkref(const upb_handlers *h, const void *owner); bool upb_handlers_freeze(upb_handlers *const*handlers, int n, upb_status *s); const upb_msgdef *upb_handlers_msgdef(const upb_handlers *h); +const upb_frametype *upb_handlers_frametype(const upb_handlers *h); void upb_handlers_setstartmsg(upb_handlers *h, upb_startmsg_handler *handler); upb_startmsg_handler *upb_handlers_getstartmsg(const upb_handlers *h); void upb_handlers_setendmsg(upb_handlers *h, upb_endmsg_handler *handler); @@ -428,13 +538,36 @@ bool upb_handlers_setsubhandlers( upb_handlers *h, const upb_fielddef *f, const upb_handlers *sub); const upb_handlers *upb_handlers_getsubhandlers( const upb_handlers *h, const upb_fielddef *f); +const upb_handlers *upb_handlers_getsubhandlers_sel( + const upb_handlers *h, upb_selector_t sel); upb_handlertype_t upb_handlers_getprimitivehandlertype(const upb_fielddef *f); bool upb_getselector( const upb_fielddef *f, upb_handlertype_t type, upb_selector_t *s); +UPB_INLINE upb_selector_t upb_getendselector(upb_selector_t start) { + return start + 1; +} upb_func *upb_handlers_gethandler(const upb_handlers *h, upb_selector_t s); void *upb_handlers_gethandlerdata(const upb_handlers *h, upb_selector_t s); size_t upb_gethandleroffset(upb_selector_t s); +#ifdef UPB_TWO_32BIT_TYPES +bool upb_handlers_setint32alt( + upb_handlers *h, const upb_fielddef *f, upb_int32_handler2 *handler, + void *d, upb_handlerfree *fr); +bool upb_handlers_setuint32alt( + upb_handlers *h, const upb_fielddef *f, upb_uint32_handler2 *handler, + void *d, upb_handlerfree *fr); +#endif + +#ifdef UPB_TWO_64BIT_TYPES +bool upb_handlers_setint64alt( + upb_handlers *h, const upb_fielddef *f, upb_int64_handler2 *handler, + void *d, upb_handlerfree *fr); +bool upb_handlers_setuint64alt( + upb_handlers *h, const upb_fielddef *f, upb_uint64_handler2 *handler, + void *d, upb_handlerfree *fr); +#endif + // Internal-only. uint32_t upb_handlers_selectorbaseoffset(const upb_fielddef *f); uint32_t upb_handlers_selectorcount(const upb_fielddef *f); @@ -444,11 +577,12 @@ uint32_t upb_handlers_selectorcount(const upb_fielddef *f); // Convenience versions of the above that first look up the field by name. #define DEFINE_NAME_SETTER(slot, type) \ - INLINE void upb_handlers_set ## slot ## _n( \ + UPB_INLINE bool upb_handlers_set ## slot ## _n( \ upb_handlers *h, const char *name, type val, \ void *d, upb_handlerfree *fr) { \ - upb_handlers_set ## slot(h, upb_msgdef_ntof( \ - upb_handlers_msgdef(h), name), val, d, fr); \ + const upb_fielddef *f = upb_msgdef_ntof(upb_handlers_msgdef(h), name); \ + if (!f) return false; \ + return upb_handlers_set ## slot(h, f, val, d, fr); \ } DEFINE_NAME_SETTER(int32, upb_int32_handler*); DEFINE_NAME_SETTER(int64, upb_int64_handler*); @@ -464,6 +598,17 @@ DEFINE_NAME_SETTER(startseq, upb_startfield_handler*); DEFINE_NAME_SETTER(startsubmsg, upb_startfield_handler*); DEFINE_NAME_SETTER(endsubmsg, upb_endfield_handler*); DEFINE_NAME_SETTER(endseq, upb_endfield_handler*); + +#ifdef UPB_TWO_32BIT_TYPES +DEFINE_NAME_SETTER(int32alt, upb_int32_handler2*); +DEFINE_NAME_SETTER(uint32alt, upb_uint32_handler2*); +#endif + +#ifdef UPB_TWO_64BIT_TYPES +DEFINE_NAME_SETTER(int64alt, upb_int64_handler2*); +DEFINE_NAME_SETTER(uint64alt, upb_uint64_handler2*); +#endif + #undef DEFINE_NAME_SETTER // Value writers for every in-memory type: write the data to a known offset @@ -488,13 +633,13 @@ typedef struct upb_stdmsg_fval { #ifdef __cplusplus extern "C" { #endif -bool upb_stdmsg_setint32(void *c, void *d, int32_t val); -bool upb_stdmsg_setint64(void *c, void *d, int64_t val); -bool upb_stdmsg_setuint32(void *c, void *d, uint32_t val); -bool upb_stdmsg_setuint64(void *c, void *d, uint64_t val); -bool upb_stdmsg_setfloat(void *c, void *d, float val); -bool upb_stdmsg_setdouble(void *c, void *d, double val); -bool upb_stdmsg_setbool(void *c, void *d, bool val); +bool upb_stdmsg_setint32(const upb_sinkframe *frame, int32_t val); +bool upb_stdmsg_setint64(const upb_sinkframe *frame, int64_t val); +bool upb_stdmsg_setuint32(const upb_sinkframe *frame, uint32_t val); +bool upb_stdmsg_setuint64(const upb_sinkframe *frame, uint64_t val); +bool upb_stdmsg_setfloat(const upb_sinkframe *frame, float val); +bool upb_stdmsg_setdouble(const upb_sinkframe *frame, double val); +bool upb_stdmsg_setbool(const upb_sinkframe *frame, bool val); #ifdef __cplusplus } // extern "C" #endif @@ -503,14 +648,18 @@ bool upb_stdmsg_setbool(void *c, void *d, bool val); namespace upb { +// This function should be specialized by types that have a FrameType. +template inline const FrameType* GetFrameType() { return NULL; } + // C++ Wrappers -inline Handlers* Handlers::New(const MessageDef* m, const void *owner) { - return upb_handlers_new(m, owner); +inline Handlers* Handlers::New(const MessageDef* m, const FrameType* ft, + const void *owner) { + return upb_handlers_new(m, ft, owner); } inline const Handlers* Handlers::NewFrozen( - const MessageDef *m, const void *owner, + const MessageDef *m, const FrameType* ft, const void *owner, upb_handlers_callback *callback, void *closure) { - return upb_handlers_newfrozen(m, owner, callback, closure); + return upb_handlers_newfrozen(m, ft, owner, callback, closure); } inline bool Handlers::IsFrozen() const { return upb_handlers_isfrozen(this); @@ -530,6 +679,9 @@ inline void Handlers::CheckRef(const void *owner) const { inline bool Handlers::Freeze(Handlers*const* handlers, int n, Status* s) { return upb_handlers_freeze(handlers, n, s); } +inline const FrameType* Handlers::frame_type() const { + return upb_handlers_frametype(this); +} inline const MessageDef* Handlers::message_def() const { return upb_handlers_msgdef(this); } @@ -551,13 +703,13 @@ inline bool Handlers::SetInt64Handler( void *d, Handlers::Free *fr) { return upb_handlers_setint64(this, f, handler, d, fr); } -inline bool Handlers::SetUint32Handler( - const FieldDef *f, Handlers::Uint32Handler *handler, +inline bool Handlers::SetUInt32Handler( + const FieldDef *f, Handlers::UInt32Handler *handler, void *d, Handlers::Free *fr) { return upb_handlers_setuint32(this, f, handler, d, fr); } -inline bool Handlers::SetUint64Handler( - const FieldDef *f, Handlers::Uint64Handler *handler, +inline bool Handlers::SetUInt64Handler( + const FieldDef *f, Handlers::UInt64Handler *handler, void *d, Handlers::Free *fr) { return upb_handlers_setuint64(this, f, handler, d, fr); } @@ -615,6 +767,76 @@ inline bool Handlers::SetSubHandlers( const FieldDef* f, const Handlers* sub) { return upb_handlers_setsubhandlers(this, f, sub); } +inline bool Handlers::SetInt32Handler( + const char* name, Handlers::Int32Handler *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setint32_n(this, name, handler, d, fr); +} +inline bool Handlers::SetInt64Handler( + const char* name, Handlers::Int64Handler *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setint64_n(this, name, handler, d, fr); +} +inline bool Handlers::SetUInt32Handler( + const char* name, Handlers::UInt32Handler *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setuint32_n(this, name, handler, d, fr); +} +inline bool Handlers::SetUInt64Handler( + const char* name, Handlers::UInt64Handler *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setuint64_n(this, name, handler, d, fr); +} +inline bool Handlers::SetFloatHandler( + const char* name, Handlers::FloatHandler *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setfloat_n(this, name, handler, d, fr); +} +inline bool Handlers::SetDoubleHandler( + const char* name, Handlers::DoubleHandler *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setdouble_n(this, name, handler, d, fr); +} +inline bool Handlers::SetBoolHandler( + const char* name, Handlers::BoolHandler *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setbool_n(this, name, handler, d, fr); +} +inline bool Handlers::SetStartStringHandler( + const char* name, Handlers::StartStringHandler* handler, + void* d, Handlers::Free* fr) { + return upb_handlers_setstartstr_n(this, name, handler, d, fr); +} +inline bool Handlers::SetEndStringHandler( + const char* name, Handlers::EndFieldHandler* handler, + void* d, Handlers::Free* fr) { + return upb_handlers_setendstr_n(this, name, handler, d, fr); +} +inline bool Handlers::SetStringHandler( + const char* name, Handlers::StringHandler *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setstring_n(this, name, handler, d, fr); +} +inline bool Handlers::SetStartSequenceHandler( + const char* name, Handlers::StartFieldHandler *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setstartseq_n(this, name, handler, d, fr); +} +inline bool Handlers::SetStartSubMessageHandler( + const char* name, Handlers::StartFieldHandler *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setstartsubmsg_n(this, name, handler, d, fr); +} +inline bool Handlers::SetEndSubMessageHandler( + const char* name, Handlers::EndFieldHandler *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setendsubmsg_n(this, name, handler, d, fr); +} +inline bool Handlers::SetEndSequenceHandler( + const char* name, Handlers::EndFieldHandler *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setendseq_n(this, name, handler, d, fr); +} inline Handlers::StartMessageHandler *Handlers::GetStartMessageHandler() const { return upb_handlers_getstartmsg(this); } @@ -625,10 +847,17 @@ inline const Handlers* Handlers::GetSubHandlers( const FieldDef* f) const { return upb_handlers_getsubhandlers(this, f); } +inline const Handlers* Handlers::GetSubHandlers( + Handlers::Selector sel) const { + return upb_handlers_getsubhandlers_sel(this, sel); +} inline bool Handlers::GetSelector( const FieldDef* f, Handlers::Type type, Handlers::Selector* s) { return upb_getselector(f, type, s); } +inline Handlers::Selector Handlers::GetEndSelector(Handlers::Selector start) { + return upb_getendselector(start); +} inline Handlers::GenericFunction* Handlers::GetHandler( Handlers::Selector selector) { return upb_handlers_gethandler(this, selector); @@ -640,6 +869,52 @@ inline size_t Handlers::GetHandlerOffset(Handlers::Selector selector) { return upb_gethandleroffset(selector); } +#ifdef UPB_TWO_32BIT_TYPES +inline bool Handlers::SetInt32Handler( + const FieldDef *f, Handlers::Int32Handler2 *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setint32alt(this, f, handler, d, fr); +} +inline bool Handlers::SetUInt32Handler( + const FieldDef *f, Handlers::UInt32Handler2 *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setuint32alt(this, f, handler, d, fr); +} +inline bool Handlers::SetInt32Handler( + const char* name, Handlers::Int32Handler2 *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setint32alt_n(this, name, handler, d, fr); +} +inline bool Handlers::SetUInt32Handler( + const char* name, Handlers::UInt32Handler2 *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setuint32alt_n(this, name, handler, d, fr); +} +#endif + +#ifdef UPB_TWO_64BIT_TYPES +inline bool Handlers::SetInt64Handler( + const FieldDef *f, Handlers::Int64Handler2 *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setint64alt(this, f, handler, d, fr); +} +inline bool Handlers::SetUInt64Handler( + const FieldDef *f, Handlers::UInt64Handler2 *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setuint64alt(this, f, handler, d, fr); +} +inline bool Handlers::SetInt64Handler( + const char* name, Handlers::Int64Handler2 *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setint64alt_n(this, name, handler, d, fr); +} +inline bool Handlers::SetUInt64Handler( + const char* name, Handlers::UInt64Handler2 *handler, + void *d, Handlers::Free *fr) { + return upb_handlers_setuint64alt_n(this, name, handler, d, fr); +} +#endif + #define SET_VALUE_HANDLER(type, ctype) \ template<> \ inline bool Handlers::SetValueHandler( \ @@ -647,14 +922,32 @@ inline size_t Handlers::GetHandlerOffset(Handlers::Selector selector) { typename Handlers::Value::Handler* handler, \ void* data, Handlers::Free* cleanup) { \ return upb_handlers_set ## type(this, f, handler, data, cleanup); \ + } \ + template<> \ + inline bool Handlers::SetValueHandler( \ + const char* f, \ + typename Handlers::Value::Handler* handler, \ + void* data, Handlers::Free* cleanup) { \ + return upb_handlers_set ## type ## _n(this, f, handler, data, cleanup); \ } SET_VALUE_HANDLER(double, double); SET_VALUE_HANDLER(float, float); -SET_VALUE_HANDLER(uint64, uint64_t); -SET_VALUE_HANDLER(uint32, uint32_t); -SET_VALUE_HANDLER(int64, int64_t); -SET_VALUE_HANDLER(int32, int32_t); +SET_VALUE_HANDLER(uint64, upb_uint64_t); +SET_VALUE_HANDLER(uint32, upb_uint32_t); +SET_VALUE_HANDLER(int64, upb_int64_t); +SET_VALUE_HANDLER(int32, upb_int32_t); SET_VALUE_HANDLER(bool, bool); + +#ifdef UPB_TWO_32BIT_TYPES +SET_VALUE_HANDLER(int32alt, upb_int32alt_t); +SET_VALUE_HANDLER(uint32alt, upb_uint32alt_t); +#endif + +#ifdef UPB_TWO_64BIT_TYPES +SET_VALUE_HANDLER(int64alt, upb_int64alt_t); +SET_VALUE_HANDLER(uint64alt, upb_uint64alt_t); +#endif + #undef SET_VALUE_HANDLER template void DeletePointer(void *p) { delete static_cast(p); } @@ -665,25 +958,55 @@ void SetStoreValueHandler( // A handy templated function that will retrieve a value handler for a given // C++ type. -#define SET_STORE_VALUE_HANDLER(type, ctype) \ +#define SET_STORE_VALUE_HANDLER(type, ctype, handlerctype) \ template <> \ inline void SetStoreValueHandler(const FieldDef* f, size_t offset, \ int32_t hasbit, Handlers* h) { \ - h->SetValueHandler( \ + h->SetValueHandler( \ f, upb_stdmsg_set ## type, new upb_stdmsg_fval(offset, hasbit), \ &upb::DeletePointer); \ } -SET_STORE_VALUE_HANDLER(double, double); -SET_STORE_VALUE_HANDLER(float, float); -SET_STORE_VALUE_HANDLER(uint64, uint64_t); -SET_STORE_VALUE_HANDLER(uint32, uint32_t); -SET_STORE_VALUE_HANDLER(int64, int64_t); -SET_STORE_VALUE_HANDLER(int32, int32_t); -SET_STORE_VALUE_HANDLER(bool, bool); -#undef GET_VALUE_HANDLER +SET_STORE_VALUE_HANDLER(double, double, double); +SET_STORE_VALUE_HANDLER(float, float, float); +SET_STORE_VALUE_HANDLER(uint64, upb_uint64_t, uint64_t); +SET_STORE_VALUE_HANDLER(uint32, upb_uint32_t, uint32_t); +SET_STORE_VALUE_HANDLER(int64, upb_int64_t, int64_t); +SET_STORE_VALUE_HANDLER(int32, upb_int32_t, int32_t); +SET_STORE_VALUE_HANDLER(bool, bool, bool); + +#ifdef UPB_TWO_32BIT_TYPES +SET_STORE_VALUE_HANDLER(int32, upb_int32alt_t, int32_t); +SET_STORE_VALUE_HANDLER(uint32, upb_uint32alt_t, uint32_t); +#endif + +#ifdef UPB_TWO_64BIT_TYPES +SET_STORE_VALUE_HANDLER(int64, upb_int64alt_t, int64_t); +SET_STORE_VALUE_HANDLER(uint64, upb_uint64alt_t, uint64_t); +#endif + +#undef SET_STORE_VALUE_HANDLER } // namespace upb #endif +// Implementation detail, put in the header file only so +// upb_sinkframe_handlerdata() can be inlined. +typedef struct { + upb_func *handler; + + // Could put either or both of these in a separate table to save memory when + // they are sparse. + void *data; + upb_handlerfree *cleanup; + + // TODO(haberman): this is wasteful; only the first "fieldhandler" of a + // submessage field needs this. To reduce memory footprint we should either: + // - put the subhandlers in a separate "fieldhandler", stored as part of + // a union with one of the above fields. + // - count selector offsets by individual pointers instead of by whole + // fieldhandlers. + const upb_handlers *subhandlers; +} upb_fieldhandler; + #endif diff --git a/upb/pb/decoder.c b/upb/pb/decoder.c index 065c495d4f..2bfc71713b 100644 --- a/upb/pb/decoder.c +++ b/upb/pb/decoder.c @@ -6,12 +6,101 @@ */ #include +#include #include #include #include "upb/bytestream.h" #include "upb/pb/decoder.h" #include "upb/pb/varint.h" +#define UPB_NONDELIMITED (0xffffffffffffffffULL) + +/* upb_pbdecoder ****************************************************************/ + +struct dasm_State; + +typedef struct { + const upb_fielddef *f; + uint64_t end_ofs; + uint32_t group_fieldnum; // UINT32_MAX for non-groups. + bool is_sequence; // frame represents seq or submsg/str? (f might be both). + bool is_packed; // true for packed primitive sequences. +} frame; + +struct upb_pbdecoder { + // Where we push parsed data (not owned). + upb_sink *sink; + + // Current input buffer and its stream offset. + const char *buf, *ptr, *end, *checkpoint; + uint64_t bufstart_ofs; + + // Buffer for residual bytes not parsed from the previous buffer. + char residual[16]; + char *residual_end; + + // Stores the user buffer passed to our decode function. + const char *buf_param; + size_t size_param; + + // Equal to size_param while we are in the residual buf, 0 otherwise. + size_t userbuf_remaining; + + // Used to temporarily store the return value before calling longjmp(). + size_t ret; + + // End of the delimited region, relative to ptr, or NULL if not in this buf. + const char *delim_end; + +#ifdef UPB_USE_JIT_X64 + // For JIT, which doesn't do bounds checks in the middle of parsing a field. + const char *jit_end, *effective_end; // == MIN(jit_end, delim_end) + + // Used momentarily by the generated code to store a value while a user + // function is called. + uint32_t tmp_len; + + const void *saved_rbp; +#endif + + // Our internal stack. + frame *top, *limit; + frame stack[UPB_MAX_NESTING]; + + // For exiting the decoder on error. + jmp_buf exitjmp; +}; + +typedef struct { + // The top-level handlers that this plan calls into. We own a ref. + const upb_handlers *dest_handlers; + +#ifdef UPB_USE_JIT_X64 + // JIT-generated machine code (else NULL). + char *jit_code; + size_t jit_size; + char *debug_info; + + // For storing upb_jitmsginfo, which contains per-msg runtime data needed + // by the JIT. + // Maps upb_handlers* -> upb_jitmsginfo. + upb_inttable msginfo; + + // The following members are used only while the JIT is being built. + + // This pointer is allocated by dasm_init() and freed by dasm_free(). + struct dasm_State *dynasm; + + // For storing pclabel bases while we are building the JIT. + // Maps (upb_handlers* or upb_fielddef*) -> int32 pclabel_base + upb_inttable pclabels; + + // This is not the same as len(pclabels) because the table only contains base + // offsets for each def, but each def can have many pclabels. + uint32_t pclabel_count; +#endif +} decoderplan; + typedef struct { uint8_t native_wire_type; bool is_numeric; @@ -39,12 +128,21 @@ static const upb_decoder_typeinfo upb_decoder_types[] = { {UPB_WIRE_TYPE_VARINT, true}, // SINT64 }; -/* upb_decoderplan ************************************************************/ +static upb_selector_t getselector(const upb_fielddef *f, + upb_handlertype_t type) { + upb_selector_t selector; + bool ok = upb_getselector(f, type, &selector); + UPB_ASSERT_VAR(ok, ok); + return selector; +} + + +/* decoderplan ****************************************************************/ #ifdef UPB_USE_JIT_X64 // These defines are necessary for DynASM codegen. // See dynasm/dasm_proto.h for more info. -#define Dst_DECL upb_decoderplan *plan +#define Dst_DECL decoderplan *plan #define Dst_REF (plan->dynasm) #define Dst (plan) @@ -58,39 +156,49 @@ static const upb_decoder_typeinfo upb_decoder_types[] = { #include "upb/pb/decoder_x64.h" #endif -upb_decoderplan *upb_decoderplan_new(const upb_handlers *h, bool allowjit) { - UPB_UNUSED(allowjit); - upb_decoderplan *p = malloc(sizeof(*p)); - assert(upb_handlers_isfrozen(h)); - p->handlers = h; - upb_handlers_ref(h, p); -#ifdef UPB_USE_JIT_X64 - p->jit_code = NULL; - if (allowjit) upb_decoderplan_makejit(p); -#endif - return p; -} - -void upb_decoderplan_unref(upb_decoderplan *p) { - // TODO: make truly refcounted. - upb_handlers_unref(p->handlers, p); +void freeplan(void *_p) { + decoderplan *p = _p; + upb_handlers_unref(p->dest_handlers, p); #ifdef UPB_USE_JIT_X64 if (p->jit_code) upb_decoderplan_freejit(p); #endif free(p); } -bool upb_decoderplan_hasjitcode(upb_decoderplan *p) { +static decoderplan *getdecoderplan(const upb_handlers *h) { + if (upb_handlers_frametype(h) != upb_pbdecoder_getframetype()) + return NULL; + upb_selector_t sel; + if (!upb_getselector(UPB_BYTESTREAM_BYTES, UPB_HANDLER_STRING, &sel)) + return NULL; + return upb_handlers_gethandlerdata(h, sel); +} + +bool upb_pbdecoder_isdecoder(const upb_handlers *h) { + return getdecoderplan(h) != NULL; +} + +bool upb_pbdecoder_hasjitcode(const upb_handlers *h) { #ifdef UPB_USE_JIT_X64 + decoderplan *p = getdecoderplan(h); + if (!p) return false; return p->jit_code != NULL; #else - (void)p; + UPB_UNUSED(h); return false; #endif } +const upb_handlers *upb_pbdecoder_getdesthandlers(const upb_handlers *h) { + decoderplan *p = getdecoderplan(h); + if (!p) return NULL; + return p->dest_handlers; +} + + +/* upb_pbdecoder ****************************************************************/ -/* upb_decoder ****************************************************************/ +static bool in_residual_buf(const upb_pbdecoder *d, const char *p); // It's unfortunate that we have to micro-manage the compiler this way, // especially since this tuning is necessarily specific to one hardware @@ -100,98 +208,73 @@ bool upb_decoderplan_hasjitcode(upb_decoderplan *p) { #define FORCEINLINE static inline __attribute__((always_inline)) #define NOINLINE static __attribute__((noinline)) -UPB_NORETURN static void upb_decoder_exitjmp(upb_decoder *d) { - // Resumable decoder would back out to completed_ptr (and possibly get a - // previous buffer). - _longjmp(d->exitjmp, 1); +static upb_status *decoder_status(upb_pbdecoder *d) { + // TODO(haberman): encapsulate this access to pipeline->status, but not sure + // exactly what that interface should look like. + return &d->sink->pipeline_->status_; } -UPB_NORETURN static void upb_decoder_exitjmp2(void *d) { - upb_decoder_exitjmp(d); + +UPB_NORETURN static void exitjmp(upb_pbdecoder *d) { + _longjmp(d->exitjmp, 1); } -UPB_NORETURN static void upb_decoder_abortjmp(upb_decoder *d, const char *msg) { - upb_status_seterrliteral(&d->status, msg); - upb_decoder_exitjmp(d); + +UPB_NORETURN static void abortjmp(upb_pbdecoder *d, const char *msg) { + d->ret = in_residual_buf(d, d->checkpoint) ? 0 : (d->checkpoint - d->buf); + upb_status_seterrliteral(decoder_status(d), msg); + exitjmp(d); } /* Buffering ******************************************************************/ -// We operate on one buffer at a time, which may be a subset of the currently -// loaded byteregion data. When data for the buffer is completely gone we pull -// the next one. When we've committed our progress we discard any previous -// buffers' regions. +// We operate on one buffer at a time, which is either the user's buffer passed +// to our "decode" callback or some residual bytes from the previous buffer. -static size_t upb_decoder_bufleft(upb_decoder *d) { +// How many bytes can be safely read from d->ptr. +static size_t bufleft(upb_pbdecoder *d) { assert(d->end >= d->ptr); return d->end - d->ptr; } -static void upb_decoder_advance(upb_decoder *d, size_t len) { - assert(upb_decoder_bufleft(d) >= len); +// Overall offset of d->ptr. +uint64_t offset(const upb_pbdecoder *d) { + return d->bufstart_ofs + (d->ptr - d->buf); +} + +// Advances d->ptr. +static void advance(upb_pbdecoder *d, size_t len) { + assert(bufleft(d) >= len); d->ptr += len; } -uint64_t upb_decoder_offset(upb_decoder *d) { - return d->bufstart_ofs + (d->ptr - d->buf); +// Commits d->ptr progress; should be called when an entire atomic value +// (ie tag+value) has been successfully consumed. +static void checkpoint(upb_pbdecoder *d) { + d->checkpoint = d->ptr; } -uint64_t upb_decoder_bufendofs(upb_decoder *d) { - return d->bufstart_ofs + (d->end - d->buf); +static bool in_buf(const char *p, const char *buf, const char *end) { + return p >= buf && p <= end; } -static bool upb_decoder_islegalend(upb_decoder *d) { - if (d->top == d->stack) return true; - if (d->top - 1 == d->stack && - d->top->is_sequence && !d->top->is_packed) return true; - return false; +static bool in_residual_buf(const upb_pbdecoder *d, const char *p) { + return in_buf(p, d->residual, d->residual_end); } -// Calculates derived values that we cache for speed. These reflect a -// combination of the current buffer and the stack, so must be called whenever -// either is updated. -static void upb_decoder_setmsgend(upb_decoder *d) { - upb_decoder_frame *f = d->top; +// Calculates the delim_end value, which represents a combination of the +// current buffer and the stack, so must be called whenever either is updated. +static void set_delim_end(upb_pbdecoder *d) { + frame *f = d->top; size_t delimlen = f->end_ofs - d->bufstart_ofs; size_t buflen = d->end - d->buf; d->delim_end = (f->end_ofs != UPB_NONDELIMITED && delimlen <= buflen) ? d->buf + delimlen : NULL; // NULL if not in this buf. - d->top_is_packed = f->is_packed; -} - -static void upb_decoder_skiptonewbuf(upb_decoder *d, uint64_t ofs) { - assert(ofs >= upb_decoder_offset(d)); - if (ofs > upb_byteregion_endofs(d->input)) - upb_decoder_abortjmp(d, "Unexpected EOF"); - d->buf = NULL; - d->ptr = NULL; - d->end = NULL; - d->delim_end = NULL; -#ifdef UPB_USE_JIT_X64 - d->jit_end = NULL; -#endif - d->bufstart_ofs = ofs; } -static bool upb_trypullbuf(upb_decoder *d) { - assert(upb_decoder_bufleft(d) == 0); - upb_decoder_skiptonewbuf(d, upb_decoder_offset(d)); - if (upb_byteregion_available(d->input, d->bufstart_ofs) == 0) { - switch (upb_byteregion_fetch(d->input)) { - case UPB_BYTE_OK: - assert(upb_byteregion_available(d->input, d->bufstart_ofs) > 0); - break; - case UPB_BYTE_EOF: return false; - case UPB_BYTE_ERROR: upb_decoder_abortjmp(d, "I/O error in input"); - // Decoder resuming is not yet supported. - case UPB_BYTE_WOULDBLOCK: - upb_decoder_abortjmp(d, "Input returned WOULDBLOCK"); - } - } - size_t len; - d->buf = upb_byteregion_getptr(d->input, d->bufstart_ofs, &len); - assert(len > 0); - d->ptr = d->buf; - d->end = d->buf + len; - upb_decoder_setmsgend(d); +static void switchtobuf(upb_pbdecoder *d, const char *buf, const char *end) { + d->ptr = buf; + d->buf = buf; + d->end = end; + set_delim_end(d); #ifdef UPB_USE_JIT_X64 // If we start parsing a value, we can parse up to 20 bytes without // having to bounds-check anything (2 10-byte varints). Since the @@ -199,172 +282,232 @@ static bool upb_trypullbuf(upb_decoder *d) { // JIT bails if there are not 20 bytes available. d->jit_end = d->end - 20; #endif - assert(upb_decoder_bufleft(d) > 0); - return true; } -static void upb_pullbuf(upb_decoder *d) { - if (!upb_trypullbuf(d)) upb_decoder_abortjmp(d, "Unexpected EOF"); +static void suspendjmp(upb_pbdecoder *d) { + switchtobuf(d, d->residual, d->residual_end); + exitjmp(d); +} + +static void advancetobuf(upb_pbdecoder *d, const char *buf, size_t len) { + assert(len >= 0); + assert(d->ptr == d->end); + d->bufstart_ofs += (d->ptr - d->buf); + switchtobuf(d, buf, buf + len); +} + +static void skip(upb_pbdecoder *d, size_t bytes) { + size_t avail = bufleft(d); + size_t total_avail = avail + d->userbuf_remaining; + if (avail >= bytes) { + // Skipped data is all in current buffer. + advance(d, bytes); + } else if (total_avail >= bytes) { + // Skipped data is all in residual buf and param buffer. + assert(in_residual_buf(d, d->ptr)); + advance(d, avail); + advancetobuf(d, d->buf_param, d->size_param); + d->userbuf_remaining = 0; + advance(d, bytes - avail); + } else { + // Skipped data extends beyond currently available buffers. + // TODO: we need to do a checkdelim() equivalent that pops any frames that + // we just skipped past. + d->bufstart_ofs = offset(d) + bytes; + d->residual_end = d->residual; + d->ret += bytes - total_avail; + suspendjmp(d); + } +} + +static void consumebytes(upb_pbdecoder *d, void *buf, size_t bytes) { + assert(bytes <= bufleft(d)); + memcpy(buf, d->ptr, bytes); + advance(d, bytes); } -static void upb_decoder_checkpoint(upb_decoder *d) { - upb_byteregion_discard(d->input, upb_decoder_offset(d)); +NOINLINE void getbytes_slow(upb_pbdecoder *d, void *buf, size_t bytes) { + const size_t avail = bufleft(d); + if (avail + d->userbuf_remaining >= bytes) { + // Remaining residual buffer and param buffer together can satisfy. + // (We are only called from getbytes() which has already verified that + // the current buffer alone cannot satisfy). + assert(in_residual_buf(d, d->ptr)); + consumebytes(d, buf, avail); + advancetobuf(d, d->buf_param, d->size_param); + consumebytes(d, buf + avail, bytes - avail); + d->userbuf_remaining = 0; + } else { + // There is not enough remaining data, save residual bytes (if any) + // starting at the last committed checkpoint and exit. + if (in_buf(d->checkpoint, d->buf_param, d->buf_param + d->size_param)) { + // Checkpoint was in user buf; old residual bytes not needed. + d->ptr = d->checkpoint; + size_t save = bufleft(d); + assert(save <= sizeof(d->residual)); + memcpy(d->residual, d->ptr, save); + d->residual_end = d->residual + save; + d->bufstart_ofs = offset(d); + } else { + // Checkpoint was in residual buf; append user byte(s) to residual buf. + assert(d->checkpoint == d->residual); + assert((d->residual_end - d->residual) + d->size_param <= + sizeof(d->residual)); + if (!in_residual_buf(d, d->ptr)) { + d->bufstart_ofs -= (d->residual_end - d->residual); + } + memcpy(d->residual_end, d->buf_param, d->size_param); + d->residual_end += d->size_param; + } + suspendjmp(d); + } } -static void upb_decoder_discardto(upb_decoder *d, uint64_t ofs) { - if (ofs <= upb_decoder_bufendofs(d)) { - upb_decoder_advance(d, ofs - upb_decoder_offset(d)); +FORCEINLINE void getbytes(upb_pbdecoder *d, void *buf, size_t bytes) { + if (bufleft(d) >= bytes) { + // Buffer has enough data to satisfy. + consumebytes(d, buf, bytes); } else { - upb_decoder_skiptonewbuf(d, ofs); + getbytes_slow(d, buf, bytes); } - upb_decoder_checkpoint(d); } -static void upb_decoder_discard(upb_decoder *d, size_t bytes) { - upb_decoder_discardto(d, upb_decoder_offset(d) + bytes); +FORCEINLINE uint8_t getbyte(upb_pbdecoder *d) { + uint8_t byte; + getbytes(d, &byte, 1); + return byte; } /* Decoding of wire types *****************************************************/ -NOINLINE uint64_t upb_decode_varint_slow(upb_decoder *d) { +NOINLINE uint64_t decode_varint_slow(upb_pbdecoder *d) { uint8_t byte = 0x80; uint64_t u64 = 0; int bitpos; for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) { - if (upb_decoder_bufleft(d) == 0) upb_pullbuf(d); - u64 |= ((uint64_t)(byte = *d->ptr) & 0x7F) << bitpos; - upb_decoder_advance(d, 1); + u64 |= ((uint64_t)((byte = getbyte(d)) & 0x7F)) << bitpos; } if(bitpos == 70 && (byte & 0x80)) - upb_decoder_abortjmp(d, "Unterminated varint"); + abortjmp(d, "Unterminated varint"); return u64; } +NOINLINE uint32_t decode_v32_slow(upb_pbdecoder *d) { + uint64_t u64 = decode_varint_slow(d); + if (u64 > UINT32_MAX) abortjmp(d, "Unterminated 32-bit varint"); + return (uint32_t)u64; +} + // For tags and delimited lengths, which must be <=32bit and are usually small. -FORCEINLINE uint32_t upb_decode_varint32(upb_decoder *d) { - const char *p = d->ptr; - uint32_t ret; - uint64_t u64; +FORCEINLINE uint32_t decode_v32(upb_pbdecoder *d) { // Nearly all will be either 1 byte (1-16) or 2 bytes (17-2048). - if (upb_decoder_bufleft(d) < 2) goto slow; // unlikely. - ret = *p & 0x7f; - if ((*(p++) & 0x80) == 0) goto done; // predictable if fields are in order - ret |= (*p & 0x7f) << 7; - if ((*(p++) & 0x80) == 0) goto done; // likely -slow: - u64 = upb_decode_varint_slow(d); - if (u64 > UINT32_MAX) upb_decoder_abortjmp(d, "Unterminated 32-bit varint"); - ret = (uint32_t)u64; - p = d->ptr; // Turn the next line into a nop. -done: - upb_decoder_advance(d, p - d->ptr); - return ret; -} - -// Returns true on success or false if we've hit a valid EOF. -FORCEINLINE bool upb_trydecode_varint32(upb_decoder *d, uint32_t *val) { - if (upb_decoder_bufleft(d) == 0 && - upb_decoder_islegalend(d) && - !upb_trypullbuf(d)) { - return false; + if (bufleft(d) >= 2) { + uint32_t ret = d->ptr[0] & 0x7f; + if ((d->ptr[0] & 0x80) == 0) { + advance(d, 1); + return ret; + } + ret |= (d->ptr[1] & 0x7f) << 7; + if ((d->ptr[1] & 0x80) == 0) { + advance(d, 2); + return ret; + } } - *val = upb_decode_varint32(d); - return true; + return decode_v32_slow(d); } -FORCEINLINE uint64_t upb_decode_varint(upb_decoder *d) { - if (upb_decoder_bufleft(d) >= 10) { +FORCEINLINE uint64_t decode_varint(upb_pbdecoder *d) { + if (bufleft(d) >= 10) { // Fast case. upb_decoderet r = upb_vdecode_fast(d->ptr); - if (r.p == NULL) upb_decoder_abortjmp(d, "Unterminated varint"); - upb_decoder_advance(d, r.p - d->ptr); + if (r.p == NULL) abortjmp(d, "Unterminated varint"); + advance(d, r.p - d->ptr); return r.val; - } else if (upb_decoder_bufleft(d) > 0) { - // Intermediate case -- worth it? - char tmpbuf[10]; - memset(tmpbuf, 0x80, 10); - memcpy(tmpbuf, d->ptr, upb_decoder_bufleft(d)); - upb_decoderet r = upb_vdecode_fast(tmpbuf); - if (r.p != NULL) { - upb_decoder_advance(d, r.p - tmpbuf); - return r.val; - } - } - // Slow case -- varint spans buffer seam. - return upb_decode_varint_slow(d); -} - -FORCEINLINE void upb_decode_fixed(upb_decoder *d, char *buf, size_t bytes) { - if (upb_decoder_bufleft(d) >= bytes) { - // Fast case. - memcpy(buf, d->ptr, bytes); - upb_decoder_advance(d, bytes); } else { - // Slow case. - size_t read = 0; - while (1) { - size_t avail = UPB_MIN(upb_decoder_bufleft(d), bytes - read); - memcpy(buf + read, d->ptr, avail); - upb_decoder_advance(d, avail); - read += avail; - if (read == bytes) break; - upb_pullbuf(d); - } + // Slow case -- varint spans buffer seam. + return decode_varint_slow(d); } } -FORCEINLINE uint32_t upb_decode_fixed32(upb_decoder *d) { +FORCEINLINE uint32_t decode_fixed32(upb_pbdecoder *d) { uint32_t u32; - upb_decode_fixed(d, (char*)&u32, sizeof(uint32_t)); + getbytes(d, &u32, 4); return u32; // TODO: proper byte swapping for big-endian machines. } -FORCEINLINE uint64_t upb_decode_fixed64(upb_decoder *d) { + +FORCEINLINE uint64_t decode_fixed64(upb_pbdecoder *d) { uint64_t u64; - upb_decode_fixed(d, (char*)&u64, sizeof(uint64_t)); + getbytes(d, &u64, 8); return u64; // TODO: proper byte swapping for big-endian machines. } -INLINE void upb_push_msg(upb_decoder *d, const upb_fielddef *f, uint64_t end) { - upb_decoder_frame *fr = d->top + 1; - if (!upb_sink_startsubmsg(&d->sink, f) || fr > d->limit) { - upb_decoder_abortjmp(d, "Nesting too deep."); - } +static void push(upb_pbdecoder *d, const upb_fielddef *f, bool is_sequence, + bool is_packed, int32_t group_fieldnum, uint64_t end) { + frame *fr = d->top + 1; + if (fr >= d->limit) abortjmp(d, "Nesting too deep."); fr->f = f; - fr->is_sequence = false; - fr->is_packed = false; + fr->is_sequence = is_sequence; + fr->is_packed = is_packed; fr->end_ofs = end; - fr->group_fieldnum = end == UPB_NONDELIMITED ? - (int32_t)upb_fielddef_number(f) : -1; + fr->group_fieldnum = group_fieldnum; d->top = fr; - upb_decoder_setmsgend(d); + set_delim_end(d); } -INLINE void upb_push_seq(upb_decoder *d, const upb_fielddef *f, bool packed, - uint64_t end_ofs) { - upb_decoder_frame *fr = d->top + 1; - if (!upb_sink_startseq(&d->sink, f) || fr > d->limit) { - upb_decoder_abortjmp(d, "Nesting too deep."); - } - fr->f = f; - fr->is_sequence = true; - fr->group_fieldnum = -1; - fr->is_packed = packed; - fr->end_ofs = end_ofs; - d->top = fr; - upb_decoder_setmsgend(d); +static void push_msg(upb_pbdecoder *d, const upb_fielddef *f, uint64_t end) { + if (!upb_sink_startsubmsg(d->sink, getselector(f, UPB_HANDLER_STARTSUBMSG))) + abortjmp(d, "startsubmsg failed."); + int32_t group_fieldnum = (end == UPB_NONDELIMITED) ? + (int32_t)upb_fielddef_number(f) : -1; + push(d, f, false, false, group_fieldnum, end); +} + +static void push_seq(upb_pbdecoder *d, const upb_fielddef *f, bool packed, + uint64_t end_ofs) { + if (!upb_sink_startseq(d->sink, getselector(f, UPB_HANDLER_STARTSEQ))) + abortjmp(d, "startseq failed."); + push(d, f, true, packed, -1, end_ofs); +} + +static void push_str(upb_pbdecoder *d, const upb_fielddef *f, size_t len, + uint64_t end) { + if (!upb_sink_startstr(d->sink, getselector(f, UPB_HANDLER_STARTSTR), len)) + abortjmp(d, "startseq failed."); + push(d, f, false, false, -1, end); } -INLINE void upb_pop_submsg(upb_decoder *d) { - upb_sink_endsubmsg(&d->sink, d->top->f); +static void pop_submsg(upb_pbdecoder *d) { + upb_sink_endsubmsg(d->sink, getselector(d->top->f, UPB_HANDLER_ENDSUBMSG)); d->top--; - upb_decoder_setmsgend(d); + set_delim_end(d); } -INLINE void upb_pop_seq(upb_decoder *d) { - upb_sink_endseq(&d->sink, d->top->f); +static void pop_seq(upb_pbdecoder *d) { + upb_sink_endseq(d->sink, getselector(d->top->f, UPB_HANDLER_ENDSEQ)); d->top--; - upb_decoder_setmsgend(d); + set_delim_end(d); +} + +static void pop_string(upb_pbdecoder *d) { + upb_sink_endstr(d->sink, getselector(d->top->f, UPB_HANDLER_ENDSTR)); + d->top--; + set_delim_end(d); +} + +static void checkdelim(upb_pbdecoder *d) { + while (d->delim_end && d->ptr >= d->delim_end) { + // TODO(haberman): not sure what to do about this; if we detect this error + // we can possibly violate the promise that errors are always signaled by a + // short "parsed byte" count (because all bytes might have been successfully + // parsed prior to detecting this error). + // if (d->ptr > d->delim_end) abortjmp(d, "Bad submessage end"); + if (d->top->is_sequence) { + pop_seq(d); + } else { + pop_submsg(d); + } + } } @@ -374,95 +517,79 @@ INLINE void upb_pop_seq(upb_decoder *d) { // properly sign-extended. We could detect this and error about the data loss, // but proto2 does not do this, so we pass. -#define T(type, wt, name, convfunc) \ - INLINE void upb_decode_ ## type(upb_decoder *d, const upb_fielddef *f) { \ - upb_sink_put ## name(&d->sink, f, (convfunc)(upb_decode_ ## wt(d))); \ +#define T(type, sel, wt, name, convfunc) \ + static void decode_ ## type(upb_pbdecoder *d, const upb_fielddef *f) { \ + upb_sink_put ## name(d->sink, getselector(f, UPB_HANDLER_ ## sel), \ + (convfunc)(decode_ ## wt(d))); \ } \ static double upb_asdouble(uint64_t n) { double d; memcpy(&d, &n, 8); return d; } static float upb_asfloat(uint32_t n) { float f; memcpy(&f, &n, 4); return f; } -T(INT32, varint, int32, int32_t) -T(INT64, varint, int64, int64_t) -T(UINT32, varint, uint32, uint32_t) -T(UINT64, varint, uint64, uint64_t) -T(FIXED32, fixed32, uint32, uint32_t) -T(FIXED64, fixed64, uint64, uint64_t) -T(SFIXED32, fixed32, int32, int32_t) -T(SFIXED64, fixed64, int64, int64_t) -T(BOOL, varint, bool, bool) -T(ENUM, varint, int32, int32_t) -T(DOUBLE, fixed64, double, upb_asdouble) -T(FLOAT, fixed32, float, upb_asfloat) -T(SINT32, varint, int32, upb_zzdec_32) -T(SINT64, varint, int64, upb_zzdec_64) +T(INT32, INT32, varint, int32, int32_t) +T(INT64, INT64, varint, int64, int64_t) +T(UINT32, UINT32, varint, uint32, uint32_t) +T(UINT64, UINT64, varint, uint64, uint64_t) +T(FIXED32, UINT32, fixed32, uint32, uint32_t) +T(FIXED64, UINT64, fixed64, uint64, uint64_t) +T(SFIXED32, INT32, fixed32, int32, int32_t) +T(SFIXED64, INT64, fixed64, int64, int64_t) +T(BOOL, BOOL, varint, bool, bool) +T(ENUM, INT32, varint, int32, int32_t) +T(DOUBLE, DOUBLE, fixed64, double, upb_asdouble) +T(FLOAT, FLOAT, fixed32, float, upb_asfloat) +T(SINT32, INT32, varint, int32, upb_zzdec_32) +T(SINT64, INT64, varint, int64, upb_zzdec_64) #undef T -static void upb_decode_GROUP(upb_decoder *d, const upb_fielddef *f) { - upb_push_msg(d, f, UPB_NONDELIMITED); -} - -static void upb_decode_MESSAGE(upb_decoder *d, const upb_fielddef *f) { - uint32_t len = upb_decode_varint32(d); - upb_push_msg(d, f, upb_decoder_offset(d) + len); -} - -static void upb_decode_STRING(upb_decoder *d, const upb_fielddef *f) { - uint32_t strlen = upb_decode_varint32(d); - uint64_t offset = upb_decoder_offset(d); - uint64_t end = offset + strlen; - if (end > upb_byteregion_endofs(d->input)) - upb_decoder_abortjmp(d, "Unexpected EOF"); - upb_sink_startstr(&d->sink, f, strlen); - while (strlen > 0) { - if (upb_byteregion_available(d->input, offset) == 0) - upb_pullbuf(d); - size_t len; - const char *ptr = upb_byteregion_getptr(d->input, offset, &len); - len = UPB_MIN(len, strlen); - len = upb_sink_putstring(&d->sink, f, ptr, len); - if (len > strlen) - upb_decoder_abortjmp(d, "Skipped too many bytes."); - offset += len; - strlen -= len; - upb_decoder_discardto(d, offset); - } - upb_sink_endstr(&d->sink, f); +static void decode_GROUP(upb_pbdecoder *d, const upb_fielddef *f) { + push_msg(d, f, UPB_NONDELIMITED); } +static void decode_MESSAGE(upb_pbdecoder *d, const upb_fielddef *f) { + uint32_t len = decode_v32(d); + push_msg(d, f, offset(d) + len); +} -/* The main decoding loop *****************************************************/ - -static void upb_decoder_checkdelim(upb_decoder *d) { - // TODO: This doesn't work for the case that no buffer is currently loaded - // (ie. d->buf == NULL) because delim_end is NULL even if we are at - // end-of-delim. Need to add a test that exercises this by putting a buffer - // seam in the middle of the final delimited value in a proto that we skip - // for some reason (like because it's unknown and we have no unknown field - // handler). - while (d->delim_end != NULL && d->ptr >= d->delim_end) { - if (d->ptr > d->delim_end) upb_decoder_abortjmp(d, "Bad submessage end"); - if (d->top->is_sequence) { - upb_pop_seq(d); - } else { - upb_pop_submsg(d); +static void decode_STRING(upb_pbdecoder *d, const upb_fielddef *f) { + uint32_t strlen = decode_v32(d); + if (strlen <= bufleft(d)) { + upb_sink_startstr(d->sink, getselector(f, UPB_HANDLER_STARTSTR), strlen); + if (strlen) + upb_sink_putstring(d->sink, getselector(f, UPB_HANDLER_STRING), + d->ptr, strlen); + upb_sink_endstr(d->sink, getselector(f, UPB_HANDLER_ENDSTR)); + advance(d, strlen); + } else { + // Buffer ends in the middle of the string; need to push a decoder frame + // for it. + push_str(d, f, strlen, offset(d) + strlen); + if (bufleft(d)) { + upb_sink_putstring(d->sink, getselector(f, UPB_HANDLER_STRING), + d->ptr, bufleft(d)); + advance(d, bufleft(d)); } + d->bufstart_ofs = offset(d); + d->residual_end = d->residual; + suspendjmp(d); } } -INLINE const upb_fielddef *upb_decode_tag(upb_decoder *d) { + +/* The main decoding loop *****************************************************/ + +static const upb_fielddef *decode_tag(upb_pbdecoder *d) { while (1) { - uint32_t tag; - if (!upb_trydecode_varint32(d, &tag)) return NULL; + uint32_t tag = decode_v32(d); uint8_t wire_type = tag & 0x7; uint32_t fieldnum = tag >> 3; const upb_fielddef *f = NULL; - const upb_handlers *h = upb_sink_tophandlers(&d->sink); + const upb_handlers *h = upb_sinkframe_handlers(upb_sink_top(d->sink)); f = upb_msgdef_itof(upb_handlers_msgdef(h), fieldnum); bool packed = false; if (f) { // Wire type check. - upb_fieldtype_t type = upb_fielddef_type(f); + upb_descriptortype_t type = upb_fielddef_descriptortype(f); if (wire_type == upb_decoder_types[type].native_wire_type) { // Wire type is ok. } else if ((wire_type == UPB_WIRE_TYPE_DELIMITED && @@ -477,18 +604,19 @@ INLINE const upb_fielddef *upb_decode_tag(upb_decoder *d) { // There are no explicit "startseq" or "endseq" markers in protobuf // streams, so we have to infer them by noticing when a repeated field // starts or ends. - upb_decoder_frame *fr = d->top; + frame *fr = d->top; if (fr->is_sequence && fr->f != f) { - upb_pop_seq(d); + pop_seq(d); fr = d->top; } if (f && upb_fielddef_isseq(f) && !fr->is_sequence) { if (packed) { - uint32_t len = upb_decode_varint32(d); - upb_push_seq(d, f, true, upb_decoder_offset(d) + len); + uint32_t len = decode_v32(d); + push_seq(d, f, true, offset(d) + len); + checkpoint(d); } else { - upb_push_seq(d, f, false, fr->end_ofs); + push_seq(d, f, false, fr->end_ofs); } } @@ -496,118 +624,202 @@ INLINE const upb_fielddef *upb_decode_tag(upb_decoder *d) { // Unknown field or ENDGROUP. if (fieldnum == 0 || fieldnum > UPB_MAX_FIELDNUMBER) - upb_decoder_abortjmp(d, "Invalid field number"); + abortjmp(d, "Invalid field number"); switch (wire_type) { - case UPB_WIRE_TYPE_VARINT: upb_decode_varint(d); break; - case UPB_WIRE_TYPE_32BIT: upb_decoder_discard(d, 4); break; - case UPB_WIRE_TYPE_64BIT: upb_decoder_discard(d, 8); break; - case UPB_WIRE_TYPE_DELIMITED: - upb_decoder_discard(d, upb_decode_varint32(d)); break; + case UPB_WIRE_TYPE_VARINT: decode_varint(d); break; + case UPB_WIRE_TYPE_32BIT: skip(d, 4); break; + case UPB_WIRE_TYPE_64BIT: skip(d, 8); break; + case UPB_WIRE_TYPE_DELIMITED: skip(d, decode_v32(d)); break; case UPB_WIRE_TYPE_START_GROUP: - upb_decoder_abortjmp(d, "Can't handle unknown groups yet"); + abortjmp(d, "Can't handle unknown groups yet"); case UPB_WIRE_TYPE_END_GROUP: if (fieldnum != fr->group_fieldnum) - upb_decoder_abortjmp(d, "Unmatched ENDGROUP tag"); - upb_sink_endsubmsg(&d->sink, fr->f); - d->top--; - upb_decoder_setmsgend(d); + abortjmp(d, "Unmatched ENDGROUP tag"); + pop_submsg(d); break; default: - upb_decoder_abortjmp(d, "Invalid wire type"); + abortjmp(d, "Invalid wire type"); } // TODO: deliver to unknown field callback. - upb_decoder_checkpoint(d); - upb_decoder_checkdelim(d); + checkpoint(d); + checkdelim(d); } } -upb_success_t upb_decoder_decode(upb_decoder *d) { - assert(d->input); +void *start(const upb_sinkframe *fr, size_t size_hint) { + UPB_UNUSED(size_hint); + upb_pbdecoder *d = upb_sinkframe_userdata(fr); + assert(d); + assert(d->sink); + upb_sink_startmsg(d->sink); + return d; +} + +bool end(const upb_sinkframe *fr) { + upb_pbdecoder *d = upb_sinkframe_userdata(fr); + + if (d->residual_end > d->residual) { + // We have preserved bytes. + upb_status_seterrliteral(decoder_status(d), "Unexpected EOF"); + return false; + } + + // We may need to dispatch a top-level implicit frame. + if (d->top == d->stack + 1 && + d->top->is_sequence && + !d->top->is_packed) { + assert(upb_sinkframe_depth(upb_sink_top(d->sink)) == 1); + pop_seq(d); + } + if (d->top != d->stack) { + upb_status_seterrliteral( + decoder_status(d), "Ended inside delimited field."); + return false; + } + upb_sink_endmsg(d->sink); + return true; +} + +size_t decode(const upb_sinkframe *fr, const char *buf, size_t size) { + upb_pbdecoder *d = upb_sinkframe_userdata(fr); + decoderplan *plan = upb_sinkframe_handlerdata(fr); + UPB_UNUSED(plan); + assert(upb_sinkframe_handlers(upb_sink_top(d->sink)) == plan->dest_handlers); + + if (size == 0) return 0; + // Assume we'll consume the whole buffer unless this is overwritten. + d->ret = size; + if (_setjmp(d->exitjmp)) { - assert(!upb_ok(&d->status)); - return UPB_ERROR; + // Hit end-of-buffer or error. + return d->ret; + } + + d->buf_param = buf; + d->size_param = size; + if (d->residual_end > d->residual) { + // We have residual bytes from the last buffer. + d->userbuf_remaining = size; + } else { + d->userbuf_remaining = 0; + advancetobuf(d, buf, size); + + if (d->top != d->stack && + upb_fielddef_isstring(d->top->f) && + !d->top->is_sequence) { + // Last buffer ended in the middle of a string; deliver more of it. + size_t len = d->top->end_ofs - offset(d); + if (size >= len) { + upb_sink_putstring(d->sink, getselector(d->top->f, UPB_HANDLER_STRING), + d->ptr, len); + advance(d, len); + pop_string(d); + } else { + upb_sink_putstring(d->sink, getselector(d->top->f, UPB_HANDLER_STRING), + d->ptr, size); + advance(d, size); + d->residual_end = d->residual; + advancetobuf(d, d->residual, 0); + return size; + } + } } - upb_sink_startmsg(&d->sink); - // Prime the buf so we can hit the JIT immediately. - upb_trypullbuf(d); + checkpoint(d); + const upb_fielddef *f = d->top->f; while(1) { #ifdef UPB_USE_JIT_X64 - upb_decoder_enterjit(d); - upb_decoder_checkpoint(d); - upb_decoder_setmsgend(d); + upb_decoder_enterjit(d, plan); + checkpoint(d); + set_delim_end(d); // JIT doesn't keep this current. #endif - upb_decoder_checkdelim(d); - if (!d->top_is_packed) f = upb_decode_tag(d); - if (!f) { - // Sucessful EOF. We may need to dispatch a top-level implicit frame. - if (d->top->is_sequence) { - assert(d->sink.top == d->sink.stack + 1); - upb_pop_seq(d); - } - assert(d->top == d->stack); - upb_sink_endmsg(&d->sink, &d->status); - return UPB_OK; + checkdelim(d); + if (!d->top->is_packed) { + f = decode_tag(d); } - switch (upb_fielddef_type(f)) { - case UPB_TYPE(DOUBLE): upb_decode_DOUBLE(d, f); break; - case UPB_TYPE(FLOAT): upb_decode_FLOAT(d, f); break; - case UPB_TYPE(INT64): upb_decode_INT64(d, f); break; - case UPB_TYPE(UINT64): upb_decode_UINT64(d, f); break; - case UPB_TYPE(INT32): upb_decode_INT32(d, f); break; - case UPB_TYPE(FIXED64): upb_decode_FIXED64(d, f); break; - case UPB_TYPE(FIXED32): upb_decode_FIXED32(d, f); break; - case UPB_TYPE(BOOL): upb_decode_BOOL(d, f); break; - case UPB_TYPE(STRING): - case UPB_TYPE(BYTES): upb_decode_STRING(d, f); break; - case UPB_TYPE(GROUP): upb_decode_GROUP(d, f); break; - case UPB_TYPE(MESSAGE): upb_decode_MESSAGE(d, f); break; - case UPB_TYPE(UINT32): upb_decode_UINT32(d, f); break; - case UPB_TYPE(ENUM): upb_decode_ENUM(d, f); break; - case UPB_TYPE(SFIXED32): upb_decode_SFIXED32(d, f); break; - case UPB_TYPE(SFIXED64): upb_decode_SFIXED64(d, f); break; - case UPB_TYPE(SINT32): upb_decode_SINT32(d, f); break; - case UPB_TYPE(SINT64): upb_decode_SINT64(d, f); break; - case UPB_TYPE_NONE: assert(false); break; + switch (upb_fielddef_descriptortype(f)) { + case UPB_DESCRIPTOR_TYPE_DOUBLE: decode_DOUBLE(d, f); break; + case UPB_DESCRIPTOR_TYPE_FLOAT: decode_FLOAT(d, f); break; + case UPB_DESCRIPTOR_TYPE_INT64: decode_INT64(d, f); break; + case UPB_DESCRIPTOR_TYPE_UINT64: decode_UINT64(d, f); break; + case UPB_DESCRIPTOR_TYPE_INT32: decode_INT32(d, f); break; + case UPB_DESCRIPTOR_TYPE_FIXED64: decode_FIXED64(d, f); break; + case UPB_DESCRIPTOR_TYPE_FIXED32: decode_FIXED32(d, f); break; + case UPB_DESCRIPTOR_TYPE_BOOL: decode_BOOL(d, f); break; + case UPB_DESCRIPTOR_TYPE_STRING: UPB_FALLTHROUGH_INTENDED; + case UPB_DESCRIPTOR_TYPE_BYTES: decode_STRING(d, f); break; + case UPB_DESCRIPTOR_TYPE_GROUP: decode_GROUP(d, f); break; + case UPB_DESCRIPTOR_TYPE_MESSAGE: decode_MESSAGE(d, f); break; + case UPB_DESCRIPTOR_TYPE_UINT32: decode_UINT32(d, f); break; + case UPB_DESCRIPTOR_TYPE_ENUM: decode_ENUM(d, f); break; + case UPB_DESCRIPTOR_TYPE_SFIXED32: decode_SFIXED32(d, f); break; + case UPB_DESCRIPTOR_TYPE_SFIXED64: decode_SFIXED64(d, f); break; + case UPB_DESCRIPTOR_TYPE_SINT32: decode_SINT32(d, f); break; + case UPB_DESCRIPTOR_TYPE_SINT64: decode_SINT64(d, f); break; } - upb_decoder_checkpoint(d); + checkpoint(d); } } -void upb_decoder_init(upb_decoder *d) { - upb_status_init(&d->status); - d->plan = NULL; - d->input = NULL; +void init(void *_d) { + upb_pbdecoder *d = _d; d->limit = &d->stack[UPB_MAX_NESTING]; + d->sink = NULL; + // reset() must be called before decoding; this is guaranteed by assert() in + // start(). } -void upb_decoder_resetplan(upb_decoder *d, upb_decoderplan *p) { - d->plan = p; - d->input = NULL; - upb_sink_init(&d->sink, p->handlers); -} - -void upb_decoder_resetinput(upb_decoder *d, upb_byteregion *input, - void *c) { - assert(d->plan); - upb_status_clear(&d->status); - upb_sink_reset(&d->sink, c); - d->input = input; - +void reset(void *_d) { + upb_pbdecoder *d = _d; d->top = d->stack; d->top->is_sequence = false; d->top->is_packed = false; d->top->group_fieldnum = UINT32_MAX; d->top->end_ofs = UPB_NONDELIMITED; - - // Protect against assert in skiptonewbuf(). d->bufstart_ofs = 0; - d->ptr = NULL; - d->buf = NULL; - upb_decoder_skiptonewbuf(d, upb_byteregion_startofs(input)); + d->ptr = d->residual; + d->buf = d->residual; + d->end = d->residual; + d->residual_end = d->residual; } -void upb_decoder_uninit(upb_decoder *d) { - upb_status_uninit(&d->status); +bool upb_pbdecoder_resetsink(upb_pbdecoder *d, upb_sink* sink) { + // TODO(haberman): typecheck the sink, and test whether the decoder is in the + // middle of decoding. Return false if either assumption is violated. + d->sink = sink; + reset(d); + return true; +} + +const upb_frametype upb_pbdecoder_frametype = { + sizeof(upb_pbdecoder), + init, + NULL, + reset, +}; + +const upb_frametype *upb_pbdecoder_getframetype() { + return &upb_pbdecoder_frametype; +} + +const upb_handlers *upb_pbdecoder_gethandlers(const upb_handlers *dest, + bool allowjit, + const void *owner) { + UPB_UNUSED(allowjit); + decoderplan *p = malloc(sizeof(*p)); + assert(upb_handlers_isfrozen(dest)); + p->dest_handlers = dest; + upb_handlers_ref(dest, p); +#ifdef UPB_USE_JIT_X64 + p->jit_code = NULL; + if (allowjit) upb_decoderplan_makejit(p); +#endif + + upb_handlers *h = upb_handlers_new( + UPB_BYTESTREAM, &upb_pbdecoder_frametype, owner); + upb_handlers_setstartstr(h, UPB_BYTESTREAM_BYTES, start, NULL, NULL); + upb_handlers_setstring(h, UPB_BYTESTREAM_BYTES, decode, p, freeplan); + upb_handlers_setendstr(h, UPB_BYTESTREAM_BYTES, end, NULL, NULL); + return h; } diff --git a/upb/pb/decoder.h b/upb/pb/decoder.h index 690ebb9cd5..4307434e58 100644 --- a/upb/pb/decoder.h +++ b/upb/pb/decoder.h @@ -4,163 +4,96 @@ * Copyright (c) 2009-2010 Google Inc. See LICENSE for details. * Author: Josh Haberman * - * upb_decoder implements a high performance, streaming decoder for protobuf - * data that works by getting its input data from a upb_byteregion and calling - * into a upb_handlers. + * upb::Decoder implements a high performance, streaming decoder for protobuf + * data that works by parsing input data one buffer at a time and calling into + * a upb::Handlers. */ #ifndef UPB_DECODER_H_ #define UPB_DECODER_H_ -#include -#include "upb/bytestream.h" #include "upb/sink.h" #ifdef __cplusplus -extern "C" { -#endif +namespace upb { +namespace pb { -/* upb_decoderplan ************************************************************/ - -// A decoderplan contains whatever data structures and generated (JIT-ted) code -// are necessary to decode protobuf data of a specific type to a specific set -// of handlers. By generating the plan ahead of time, we avoid having to -// redo this work every time we decode. -// -// A decoderplan is threadsafe, meaning that it can be used concurrently by -// different upb_decoders in different threads. However, the upb_decoders are -// *not* thread-safe. -struct _upb_decoderplan; -typedef struct _upb_decoderplan upb_decoderplan; - -// TODO(haberman): -// - add support for letting any message in the plan be at the top level. -// - make this object a handlers instead (when bytesrc/bytesink are merged -// into handlers). -// - add support for sharing code with previously-built plans/handlers. -upb_decoderplan *upb_decoderplan_new(const upb_handlers *h, bool allowjit); -void upb_decoderplan_unref(upb_decoderplan *p); - -// Returns true if the plan contains JIT-ted code. This may not be the same as -// the "allowjit" parameter to the constructor if support for JIT-ting was not -// compiled in. -bool upb_decoderplan_hasjitcode(upb_decoderplan *p); - - -/* upb_decoder ****************************************************************/ - -struct dasm_State; - -typedef struct { - const upb_fielddef *f; - uint64_t end_ofs; - uint32_t group_fieldnum; // UINT32_MAX for non-groups. - bool is_sequence; // frame represents seq or submsg? (f might be both). - bool is_packed; // !upb_issubmsg(f) && end_ofs != UINT64_MAX - // (strings aren't pushed). -} upb_decoder_frame; - -typedef struct _upb_decoder { - upb_decoderplan *plan; - upb_byteregion *input; // Input data (serialized), not owned. - upb_status status; // Where we store errors that occur. - - // Where we push parsed data. - // TODO(haberman): make this a pointer and make upb_decoder_resetinput() take - // one of these instead of a void*. - upb_sink sink; - - // Our internal stack. - upb_decoder_frame *top, *limit; - upb_decoder_frame stack[UPB_MAX_NESTING]; - - // Current input buffer and its stream offset. - const char *buf, *ptr, *end; - uint64_t bufstart_ofs; - - // End of the delimited region, relative to ptr, or NULL if not in this buf. - const char *delim_end; - // True if the top stack frame represents a packed field. - bool top_is_packed; - -#ifdef UPB_USE_JIT_X64 - // For JIT, which doesn't do bounds checks in the middle of parsing a field. - const char *jit_end, *effective_end; // == MIN(jit_end, delim_end) - - // Used momentarily by the generated code to store a value while a user - // function is called. - uint32_t tmp_len; -#endif - - // For exiting the decoder on error. - jmp_buf exitjmp; -} upb_decoder; - -void upb_decoder_init(upb_decoder *d); -void upb_decoder_uninit(upb_decoder *d); - -// Resets the plan that the decoder will parse from. "msg_offset" indicates -// which message from the plan will be used as the top-level message. -// -// This will also reset the decoder's input to be uninitialized -- -// upb_decoder_resetinput() must be called before parsing can occur. The plan -// must live until the decoder is destroyed or reset to a different plan. -// -// Must be called before upb_decoder_resetinput() or upb_decoder_decode(). -void upb_decoder_resetplan(upb_decoder *d, upb_decoderplan *p); - -// Resets the input of an already-allocated decoder. This puts it in a state -// where it has not seen any data, and expects the next data to be from the -// beginning of a new protobuf. Decoders must have their input reset before -// they can be used. A decoder can have its input reset multiple times. -// "input" must live until the decoder is destroyed or has it input reset -// again. "c" is the closure that will be passed to the handlers. -// -// Must be called before upb_decoder_decode(). -void upb_decoder_resetinput(upb_decoder *d, upb_byteregion *input, void *c); - -// Decodes serialized data (calling handlers as the data is parsed), returning -// the success of the operation (call upb_decoder_status() for details). -upb_success_t upb_decoder_decode(upb_decoder *d); - -INLINE const upb_status *upb_decoder_status(upb_decoder *d) { - return &d->status; -} +// Frame type that encapsulates decoder state. +class Decoder; -// Implementation details +// Resets the sink of the Decoder. This must be called at least once before +// the decoder can be used. It may only be called with the decoder is in a +// state where it was just created or reset. The given sink must be from the +// same pipeline as this decoder. +inline bool ResetDecoderSink(Decoder* d, Sink* sink); -struct _upb_decoderplan { - // The top-level handlers that this plan calls into. We own a ref. - const upb_handlers *handlers; +// Gets the handlers suitable for parsing protobuf data according to the given +// destination handlers. The protobuf schema to parse is taken from dest. +inline const upb::Handlers *GetDecoderHandlers(const upb::Handlers *dest, + bool allowjit, + const void *owner); -#ifdef UPB_USE_JIT_X64 - // JIT-generated machine code (else NULL). - char *jit_code; - size_t jit_size; - char *debug_info; +// Returns true if these handlers represent a upb::pb::Decoder. +bool IsDecoder(const upb::Handlers *h); - // For storing upb_jitmsginfo, which contains per-msg runtime data needed - // by the JIT. - // Maps upb_handlers* -> upb_jitmsginfo. - upb_inttable msginfo; +// Returns true if IsDecoder(h) and the given handlers have JIT code. +inline bool HasJitCode(const upb::Handlers* h); - // The following members are used only while the JIT is being built. +// Returns the destination handlers if IsDecoder(h), otherwise returns NULL. +const upb::Handlers* GetDestHandlers(const upb::Handlers* h); - // This pointer is allocated by dasm_init() and freed by dasm_free(). - struct dasm_State *dynasm; +} // namespace pb +} // namespace upb - // For storing pclabel bases while we are building the JIT. - // Maps (upb_handlers* or upb_fielddef*) -> int32 pclabel_base - upb_inttable pclabels; +typedef upb::pb::Decoder upb_pbdecoder; - // This is not the same as len(pclabels) because the table only contains base - // offsets for each def, but each def can have many pclabels. - uint32_t pclabel_count; +extern "C" { +#else +struct upb_pbdecoder; +typedef struct upb_pbdecoder upb_pbdecoder; #endif -}; + +// C API. +const upb_frametype *upb_pbdecoder_getframetype(); +bool upb_pbdecoder_resetsink(upb_pbdecoder *d, upb_sink *sink); +const upb_handlers *upb_pbdecoder_gethandlers(const upb_handlers *dest, + bool allowjit, + const void *owner); +bool upb_pbdecoder_isdecoder(const upb_handlers *h); +bool upb_pbdecoder_hasjitcode(const upb_handlers *h); +const upb_handlers *upb_pbdecoder_getdesthandlers(const upb_handlers *h); + +// C++ implementation details. ///////////////////////////////////////////////// #ifdef __cplusplus -} /* extern "C" */ +} // extern "C" + +namespace upb { + +template<> inline const FrameType* GetFrameType() { + return upb_pbdecoder_getframetype(); +} + +namespace pb { +inline bool ResetDecoderSink(Decoder* r, Sink* sink) { + return upb_pbdecoder_resetsink(r, sink); +} +inline const upb::Handlers* GetDecoderHandlers(const upb::Handlers* dest, + bool allowjit, + const void* owner) { + return upb_pbdecoder_gethandlers(dest, allowjit, owner); +} +inline bool IsDecoder(const upb::Handlers* h) { + return upb_pbdecoder_isdecoder(h); +} +inline bool HasJitCode(const upb::Handlers* h) { + return upb_pbdecoder_hasjitcode(h); +} +inline const upb::Handlers* GetDestHandlers(const upb::Handlers* h) { + return upb_pbdecoder_getdesthandlers(h); +} +} // namespace pb +} // namespace upb #endif #endif /* UPB_DECODER_H_ */ diff --git a/upb/pb/decoder_x64.dasc b/upb/pb/decoder_x64.dasc index cd09cfeb99..7d4c537b06 100644 --- a/upb/pb/decoder_x64.dasc +++ b/upb/pb/decoder_x64.dasc @@ -4,7 +4,7 @@ |// Copyright (c) 2011 Google Inc. See LICENSE for details. |// Author: Josh Haberman |// -|// JIT compiler for upb_decoder on x86. Given a upb_decoderplan object (which +|// JIT compiler for upb_pbdecoder on x86. Given a decoderplan object (which |// contains an embedded set of upb_handlers), generates code specialized to |// parsing the specific message and calling specific handlers. |// @@ -54,17 +54,19 @@ typedef struct { void *jit_func; } upb_jitmsginfo; -static uint32_t upb_getpclabel(upb_decoderplan *plan, const void *obj, int n) { - const upb_value *v = upb_inttable_lookupptr(&plan->pclabels, obj); - assert(v); - return upb_value_getuint32(*v) + n; +static uint32_t upb_getpclabel(decoderplan *plan, const void *obj, int n) { + upb_value v; + bool found = upb_inttable_lookupptr(&plan->pclabels, obj, &v); + UPB_ASSERT_VAR(found, found); + return upb_value_getuint32(v) + n; } -static upb_jitmsginfo *upb_getmsginfo(upb_decoderplan *plan, +static upb_jitmsginfo *upb_getmsginfo(decoderplan *plan, const upb_handlers *h) { - const upb_value *v = upb_inttable_lookupptr(&plan->msginfo, h); - assert(v); - return upb_value_getptr(*v); + upb_value v; + bool found = upb_inttable_lookupptr(&plan->msginfo, h, &v); + UPB_ASSERT_VAR(found, found); + return upb_value_getptr(v); } // To debug JIT-ted code with GDB we need to tell GDB about the JIT-ted code @@ -109,7 +111,7 @@ void __attribute__((noinline)) __jit_debug_register_code() { __asm__ __volatile__(""); } -void upb_reg_jit_gdb(upb_decoderplan *plan) { +void upb_reg_jit_gdb(decoderplan *plan) { // Create debug info. size_t elf_len = sizeof(upb_jit_debug_elf_file); plan->debug_info = malloc(elf_len); @@ -135,7 +137,7 @@ void upb_reg_jit_gdb(upb_decoderplan *plan) { #else -void upb_reg_jit_gdb(upb_decoderplan *plan) { +void upb_reg_jit_gdb(decoderplan *plan) { (void)plan; } @@ -154,10 +156,9 @@ static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; } |// Calling conventions. Note -- this will need to be changed for |// Windows, which uses a different calling convention! |.define ARG1_64, rdi -|.define ARG2_8, sil +|.define ARG2_8, r6b // DynASM's equivalent to "sil" -- low byte of esi. |.define ARG2_32, esi |.define ARG2_64, rsi -|.define ARG3_8, dl |.define ARG3_32, edx |.define ARG3_64, rdx |.define ARG4_64, rcx @@ -170,9 +171,10 @@ static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; } |// conventions, but of course when calling to user callbacks we must. |.define PTR, rbx // Writing this to DECODER->ptr commits our progress. |.define CLOSURE, r12 -|.type SINKFRAME, upb_sink_frame, r13 -|.type FRAME, upb_decoder_frame, r14 -|.type DECODER, upb_decoder, r15 +|.type SINKFRAME, upb_sinkframe, r13 +|.type FRAME, frame, r14 +|.type DECODER, upb_pbdecoder, r15 +|.type SINK, upb_sink | |.macro callp, addr || upb_assert_notnull(addr); @@ -187,6 +189,21 @@ static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; } || } |.endmacro | +|.macro load_handler_data, h, f, type +||{ +|| uintptr_t data = (uintptr_t)gethandlerdata(h, f, type); +|| if (data > 0xffffffff) { +| mov64 rax, data +| mov SINKFRAME->u.handler_data, rax +|| } else if (data > 0x7fffffff) { +| mov eax, data +| mov SINKFRAME->u.handler_data, rax +|| } else { +| mov qword SINKFRAME->u.handler_data, data +|| } +|| } +|.endmacro +| |// Checkpoints our progress by writing PTR to DECODER, and |// checks for end-of-buffer. |.macro checkpoint, h @@ -205,25 +222,33 @@ static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; } | jz ->exit_jit |.endmacro | -|// Decodes varint from [PTR + offset] -> ARG3. -|// Saves new pointer as rax. +|// Decodes varint into ARG2. +|// Inputs: +|// - ecx: first 4 bytes of varint +|// - offset: offset from PTR where varint begins +|// Outputs: +|// - ARG2: contains decoded varint +|// - rax: new PTR |.macro decode_loaded_varint, offset | // Check for <=2 bytes inline, otherwise jump to 2-10 byte decoder. | lea rax, [PTR + offset + 1] -| mov ARG3_32, ecx -| and ARG3_32, 0x7f +| mov ARG2_32, ecx +| and ARG2_32, 0x7f | test cl, cl | jns >9 | lea rax, [PTR + offset + 2] -| movzx esi, ch -| and esi, 0x7f -| shl esi, 7 -| or ARG3_32, esi +| movzx edx, ch +| and edx, 0x7f +| shl edx, 7 +| or ARG2_32, edx | test cx, cx | jns >9 | mov ARG1_64, rax -| mov ARG2_32, ARG3_32 +|// XXX: I don't think this handles 64-bit values correctly. +|// Test with UINT64_MAX | callp upb_vdecode_max8_fast +|// rax return from function will contain new pointer +| mov ARG2_64, rdx | check_ptr_ret // Check for unterminated, >10-byte varint. |9: |.endmacro @@ -234,17 +259,22 @@ static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; } | mov PTR, rax |.endmacro | -|// Decode the tag -> edx. +|// Table-based field dispatch. +|// Inputs: +|// - ecx: first 4 bytes of tag +|// Outputs: +|// - edx: field number +|// - esi: wire type |// Could specialize this by avoiding the value masking: could just key the |// table on the raw (length-masked) varint to save 3-4 cycles of latency. |// Currently only support tables where all entries are in the array part. |.macro dyndispatch_, h |=>upb_getpclabel(plan, h, DYNDISPATCH): | decode_loaded_varint, 0 -| mov ecx, edx +| mov ecx, esi | shr ecx, 3 -| and edx, 0x7 // Note: this value is used in the FIELD pclabel below. -| cmp edx, UPB_WIRE_TYPE_END_GROUP +| and esi, 0x7 // Note: this value is used in the FIELD pclabel below. +| cmp esi, UPB_WIRE_TYPE_END_GROUP | je >1 || upb_jitmsginfo *mi = upb_getmsginfo(plan, h); | cmp ecx, mi->max_field_number // Bounds-check the field. @@ -278,10 +308,31 @@ static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; } | .endmacro |.endif | -|// Push a stack frame (not the CPU stack, the upb_decoder stack). -|.macro pushframe, h, field, end_offset_, endtype +|.macro pushsinkframe, handlers, field, endtype +| mov rax, DECODER->sink +| mov dword SINKFRAME->u.selector, getselector(field, endtype) +| lea rcx, [SINKFRAME + sizeof(upb_sinkframe)] // rcx for short addressing +| cmp rcx, SINK:rax->limit +| jae ->exit_jit // Frame stack overflow. +| mov64 r9, (uintptr_t)handlers +| mov SINKFRAME:rcx->h, r9 +| mov SINKFRAME:rcx->closure, CLOSURE +| mov SINK:rax->top_, rcx +| mov SINKFRAME:rcx->sink_, rax +| mov SINKFRAME, rcx +|.endmacro +| +|.macro popsinkframe +| sub SINKFRAME, sizeof(upb_sinkframe) +| mov rax, DECODER->sink +| mov SINK:rax->top_, SINKFRAME +| mov CLOSURE, SINKFRAME->closure +|.endmacro +| +|// Push a stack frame (not the CPU stack, the upb_pbdecoder stack). +|.macro pushframe, handlers, field, end_offset_, endtype |// Decoder Frame. -| lea rax, [FRAME + sizeof(upb_decoder_frame)] // rax for short addressing +| lea rax, [FRAME + sizeof(frame)] // rax for short addressing | cmp rax, DECODER->limit | jae ->exit_jit // Frame stack overflow. | mov64 r10, (uintptr_t)field @@ -289,36 +340,21 @@ static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; } | mov qword FRAME:rax->end_ofs, end_offset_ | mov byte FRAME:rax->is_sequence, (endtype == UPB_HANDLER_ENDSEQ) | mov byte FRAME:rax->is_packed, 0 -|| if (upb_fielddef_type(field) == UPB_TYPE_GROUP && -|| endtype == UPB_HANDLER_ENDSUBMSG) { +|| if (upb_fielddef_istagdelim(field) && endtype == UPB_HANDLER_ENDSUBMSG) { | mov dword FRAME:rax->group_fieldnum, upb_fielddef_number(field) || } else { | mov dword FRAME:rax->group_fieldnum, 0xffffffff || } | mov DECODER->top, rax | mov FRAME, rax -|// Sink Frame. -| lea rcx, [SINKFRAME + sizeof(upb_sink_frame)] // rcx for short addressing -| cmp rcx, DECODER->sink.limit -| jae ->exit_jit // Frame stack overflow. -| mov dword SINKFRAME:rcx->end, getselector(field, endtype) -|| if (upb_fielddef_issubmsg(field)) { -| mov64 r9, (uintptr_t)upb_handlers_getsubhandlers(h, field) -|| } else { -| mov64 r9, (uintptr_t)h -|| } -| mov SINKFRAME:rcx->h, r9 -| mov DECODER->sink.top, rcx -| mov SINKFRAME, rcx +| pushsinkframe handlers, field, endtype |.endmacro | |.macro popframe -| sub FRAME, sizeof(upb_decoder_frame) +| sub FRAME, sizeof(frame) | mov DECODER->top, FRAME -| sub SINKFRAME, sizeof(upb_sink_frame) -| mov DECODER->sink.top, SINKFRAME +| popsinkframe | setmsgend -| mov CLOSURE, SINKFRAME->closure |.endmacro | |.macro setmsgend @@ -369,14 +405,6 @@ static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; } #include #include "upb/pb/varint.h" -static upb_selector_t getselector(const upb_fielddef *f, - upb_handlertype_t type) { - upb_selector_t selector; - bool ok = upb_getselector(f, type, &selector); - UPB_ASSERT_VAR(ok, ok); - return selector; -} - static upb_func *gethandler(const upb_handlers *h, const upb_fielddef *f, upb_handlertype_t type) { return upb_handlers_gethandler(h, getselector(f, type)); @@ -387,73 +415,74 @@ static uintptr_t gethandlerdata(const upb_handlers *h, const upb_fielddef *f, return (uintptr_t)upb_handlers_gethandlerdata(h, getselector(f, type)); } -// Decodes the next val into ARG3, advances PTR. -static void upb_decoderplan_jit_decodefield(upb_decoderplan *plan, - uint8_t type, size_t tag_size, +// Decodes the next val into ARG2, advances PTR. +static void upb_decoderplan_jit_decodefield(decoderplan *plan, + size_t tag_size, const upb_handlers *h, const upb_fielddef *f) { // Decode the value into arg 3 for the callback. - switch (type) { - case UPB_TYPE(DOUBLE): + switch (upb_fielddef_descriptortype(f)) { + case UPB_DESCRIPTOR_TYPE_DOUBLE: | movsd XMMARG1, qword [PTR + tag_size] | add PTR, 8 + tag_size break; - case UPB_TYPE(FIXED64): - case UPB_TYPE(SFIXED64): - | mov ARG3_64, qword [PTR + tag_size] + case UPB_DESCRIPTOR_TYPE_FIXED64: + case UPB_DESCRIPTOR_TYPE_SFIXED64: + | mov ARG2_64, qword [PTR + tag_size] | add PTR, 8 + tag_size break; - case UPB_TYPE(FLOAT): + case UPB_DESCRIPTOR_TYPE_FLOAT: | movss XMMARG1, dword [PTR + tag_size] | add PTR, 4 + tag_size break; - case UPB_TYPE(FIXED32): - case UPB_TYPE(SFIXED32): - | mov ARG3_32, dword [PTR + tag_size] + case UPB_DESCRIPTOR_TYPE_FIXED32: + case UPB_DESCRIPTOR_TYPE_SFIXED32: + | mov ARG2_32, dword [PTR + tag_size] | add PTR, 4 + tag_size break; - case UPB_TYPE(BOOL): + case UPB_DESCRIPTOR_TYPE_BOOL: // Can't assume it's one byte long, because bool must be wire-compatible // with all of the varint integer types. | decode_varint tag_size - | test ARG3_64, ARG3_64 - | setne ARG3_8 // Other bytes left with val, should be ok. + | test ARG2_64, ARG2_64 + | setne al + | movzx ARG2_32, al break; - case UPB_TYPE(INT64): - case UPB_TYPE(UINT64): - case UPB_TYPE(INT32): - case UPB_TYPE(UINT32): - case UPB_TYPE(ENUM): + case UPB_DESCRIPTOR_TYPE_INT64: + case UPB_DESCRIPTOR_TYPE_UINT64: + case UPB_DESCRIPTOR_TYPE_INT32: + case UPB_DESCRIPTOR_TYPE_UINT32: + case UPB_DESCRIPTOR_TYPE_ENUM: | decode_varint tag_size break; - case UPB_TYPE(SINT64): + case UPB_DESCRIPTOR_TYPE_SINT64: // 64-bit zig-zag decoding. | decode_varint tag_size - | mov rax, ARG3_64 - | shr ARG3_64, 1 + | mov rax, ARG2_64 + | shr ARG2_64, 1 | and rax, 1 | neg rax - | xor ARG3_64, rax + | xor ARG2_64, rax break; - case UPB_TYPE(SINT32): + case UPB_DESCRIPTOR_TYPE_SINT32: // 32-bit zig-zag decoding. | decode_varint tag_size - | mov eax, ARG3_32 - | shr ARG3_32, 1 + | mov eax, ARG2_32 + | shr ARG2_32, 1 | and eax, 1 | neg eax - | xor ARG3_32, eax + | xor ARG2_32, eax break; - case UPB_TYPE(STRING): - case UPB_TYPE(BYTES): { + case UPB_DESCRIPTOR_TYPE_STRING: + case UPB_DESCRIPTOR_TYPE_BYTES: { // We only handle the case where the entire string is in our current // buf, which sidesteps any security problems. The C path has more // robust checks. @@ -461,39 +490,46 @@ static void upb_decoderplan_jit_decodefield(upb_decoderplan *plan, | decode_loaded_varint tag_size | mov rdi, DECODER->end | sub rdi, rax - | cmp ARG3_64, rdi // if (len > d->end - str) + | cmp ARG2_64, rdi // if (len > d->end - str) | ja ->exit_jit // Can't deliver, whole string not in buf. | mov PTR, rax upb_func *handler = gethandler(h, f, UPB_HANDLER_STARTSTR); if (handler) { - | mov DECODER->tmp_len, ARG3_64 - | mov ARG1_64, CLOSURE - | mov64 ARG2_64, gethandlerdata(h, f, UPB_HANDLER_STARTSTR) + | mov DECODER->tmp_len, ARG2_32 + | mov ARG1_64, SINKFRAME + | load_handler_data h, f, UPB_HANDLER_STARTSTR | callp handler | check_ptr_ret - | mov ARG1_64, rax // sub-closure - | mov ARG4_64, DECODER->tmp_len + | mov CLOSURE, rax + | mov ARG3_32, DECODER->tmp_len } else { - | mov ARG1_64, CLOSURE - | mov ARG4_64, ARG3_64 + | mov ARG3_64, ARG2_64 } handler = gethandler(h, f, UPB_HANDLER_STRING); if (handler) { - | mov64 ARG2_64, gethandlerdata(h, f, UPB_HANDLER_STRING) - | mov ARG3_64, PTR + // TODO: push a real frame so we can resume into the string. + // (but maybe do this only if the string breaks). + | pushsinkframe h, f, UPB_HANDLER_ENDSTR + + // size_t str(const upb_sinkframe *frame, const char *buf, size_t len) + | mov ARG1_64, SINKFRAME + | load_handler_data h, f, UPB_HANDLER_STRING + | mov ARG2_64, PTR | callp handler // TODO: properly handle returns other than "n" (the whole string). | add PTR, rax + | popsinkframe } else { - | add PTR, ARG4_64 + | add PTR, ARG3_64 } handler = gethandler(h, f, UPB_HANDLER_ENDSTR); if (handler) { - | mov ARG1_64, CLOSURE - | mov64 ARG2_64, gethandlerdata(h, f, UPB_HANDLER_ENDSTR) + // bool endstr(const upb_sinkframe *frame); + | mov ARG1_64, SINKFRAME + | load_handler_data h, f, UPB_HANDLER_ENDSTR | callp handler | check_bool_ret } @@ -501,10 +537,10 @@ static void upb_decoderplan_jit_decodefield(upb_decoderplan *plan, } // Will dispatch callbacks and call submessage in a second. - case UPB_TYPE(MESSAGE): + case UPB_DESCRIPTOR_TYPE_MESSAGE: | decode_varint tag_size break; - case UPB_TYPE(GROUP): + case UPB_DESCRIPTOR_TYPE_GROUP: | add PTR, tag_size break; @@ -512,52 +548,58 @@ static void upb_decoderplan_jit_decodefield(upb_decoderplan *plan, } } -static void upb_decoderplan_jit_callcb(upb_decoderplan *plan, +static void upb_decoderplan_jit_callcb(decoderplan *plan, const upb_handlers *h, const upb_fielddef *f) { // Call callbacks. Specializing the append accessors didn't yield a speed // increase in benchmarks. if (upb_fielddef_issubmsg(f)) { - if (upb_fielddef_type(f) == UPB_TYPE(MESSAGE)) { - | mov rsi, PTR - | sub rsi, DECODER->buf - | add rsi, ARG3_64 // = (d->ptr - d->buf) + delim_len - } else { - assert(upb_fielddef_type(f) == UPB_TYPE(GROUP)); - | mov rsi, UPB_NONDELIMITED - } - | pushframe h, f, rsi, UPB_HANDLER_ENDSUBMSG - // Call startsubmsg handler (if any). upb_func *startsubmsg = gethandler(h, f, UPB_HANDLER_STARTSUBMSG); if (startsubmsg) { - // upb_sflow_t startsubmsg(void *closure, upb_value fval) - | mov ARG1_64, CLOSURE - | mov64 ARG2_64, gethandlerdata(h, f, UPB_HANDLER_STARTSUBMSG); + // upb_sflow_t startsubmsg(const upb_sinkframe *frame) + | mov DECODER->tmp_len, ARG2_32 + | mov ARG1_64, SINKFRAME + | load_handler_data h, f, UPB_HANDLER_STARTSUBMSG | callp startsubmsg | check_ptr_ret | mov CLOSURE, rax } - | mov qword SINKFRAME->closure, CLOSURE - // TODO: have to decide what to do with NULLs subhandlers (or whether to - // disallow them and require a full handlers tree to match the def tree). const upb_handlers *sub_h = upb_handlers_getsubhandlers(h, f); - assert(sub_h); - | call =>upb_getpclabel(plan, sub_h, STARTMSG) - | popframe + if (sub_h) { + if (upb_fielddef_istagdelim(f)) { + | mov rdx, UPB_NONDELIMITED + } else { + | mov esi, DECODER->tmp_len + | mov rdx, PTR + | sub rdx, DECODER->buf + | add rdx, DECODER->bufstart_ofs + | add rdx, rsi // = d->bufstart_ofs + (d->ptr - d->buf) + delim_len + } + | pushframe sub_h, f, rdx, UPB_HANDLER_ENDSUBMSG + | call =>upb_getpclabel(plan, sub_h, STARTMSG) + | popframe + } else { + if (upb_fielddef_istagdelim(f)) { + // Groups with no handlers not supported yet. + assert(false); + } else { + | mov esi, DECODER->tmp_len + | add PTR, rsi + } + } // Call endsubmsg handler (if any). upb_func *endsubmsg = gethandler(h, f, UPB_HANDLER_ENDSUBMSG); if (endsubmsg) { // upb_flow_t endsubmsg(void *closure, upb_value fval); - | mov ARG1_64, CLOSURE - | mov64 ARG2_64, gethandlerdata(h, f, UPB_HANDLER_ENDSUBMSG); + | mov ARG1_64, SINKFRAME + | load_handler_data h, f, UPB_HANDLER_ENDSUBMSG | callp endsubmsg | check_bool_ret } } else if (!upb_fielddef_isstring(f)) { - | mov ARG1_64, CLOSURE upb_handlertype_t handlertype = upb_handlers_getprimitivehandlertype(f); upb_func *handler = gethandler(h, f, handlertype); const upb_stdmsg_fval *fv = (void*)gethandlerdata(h, f, handlertype); @@ -565,24 +607,25 @@ static void upb_decoderplan_jit_callcb(upb_decoderplan *plan, // Can't switch() on function pointers. if (handler == (void*)&upb_stdmsg_setint64 || handler == (void*)&upb_stdmsg_setuint64) { - | mov [ARG1_64 + fv->offset], ARG3_64 + | mov [CLOSURE + fv->offset], ARG2_64 | sethas CLOSURE, fv->hasbit } else if (handler == (void*)&upb_stdmsg_setdouble) { - | movsd qword [ARG1_64 + fv->offset], XMMARG1 + | movsd qword [CLOSURE + fv->offset], XMMARG1 | sethas CLOSURE, fv->hasbit } else if (handler == (void*)&upb_stdmsg_setint32 || handler == (void*)&upb_stdmsg_setuint32) { - | mov [ARG1_64 + fv->offset], ARG3_32 + | mov [CLOSURE + fv->offset], ARG2_32 | sethas CLOSURE, fv->hasbit } else if (handler == (void*)&upb_stdmsg_setfloat) { - | movss dword [ARG1_64 + fv->offset], XMMARG1 + | movss dword [CLOSURE + fv->offset], XMMARG1 | sethas CLOSURE, fv->hasbit } else if (handler == (void*)&upb_stdmsg_setbool) { - | mov [ARG1_64 + fv->offset], ARG3_8 + | mov [CLOSURE + fv->offset], ARG2_8 | sethas CLOSURE, fv->hasbit } else if (handler) { - // Load closure and fval into arg registers. - | mov64 ARG2_64, gethandlerdata(h, f, handlertype); + // bool value(const upb_sinkframe* frame, ctype val) + | mov ARG1_64, SINKFRAME + | load_handler_data h, f, handlertype | callp handler | check_bool_ret } @@ -591,15 +634,27 @@ static void upb_decoderplan_jit_callcb(upb_decoderplan *plan, static uint64_t upb_get_encoded_tag(const upb_fielddef *f) { uint32_t tag = (upb_fielddef_number(f) << 3) | - upb_decoder_types[upb_fielddef_type(f)].native_wire_type; + upb_decoder_types[upb_fielddef_descriptortype(f)].native_wire_type; uint64_t encoded_tag = upb_vencode32(tag); // No tag should be greater than 5 bytes. assert(encoded_tag <= 0xffffffffff); return encoded_tag; } +static void upb_decoderplan_jit_endseq(decoderplan *plan, + const upb_handlers *h, + const upb_fielddef *f) { + | popframe + upb_func *endseq = gethandler(h, f, UPB_HANDLER_ENDSEQ); + if (endseq) { + | mov ARG1_64, SINKFRAME + | load_handler_data h, f, UPB_HANDLER_ENDSEQ + | callp endseq + } +} + // PTR should point to the beginning of the tag. -static void upb_decoderplan_jit_field(upb_decoderplan *plan, +static void upb_decoderplan_jit_field(decoderplan *plan, const upb_handlers *h, const upb_fielddef *f, const upb_fielddef *next_f) { @@ -608,45 +663,51 @@ static void upb_decoderplan_jit_field(upb_decoderplan *plan, int tag_size = upb_value_size(tag); // PC-label for the dispatch table. - // We check the wire type (which must be loaded in edx) because the + // We check the wire type (which must be loaded in edi) because the // table is keyed on field number, not type. |=>upb_getpclabel(plan, f, FIELD): - | cmp edx, (tag & 0x7) + | cmp esi, (tag & 0x7) | jne ->exit_jit // In the future: could be an unknown field or packed. |=>upb_getpclabel(plan, f, FIELD_NO_TYPECHECK): if (upb_fielddef_isseq(f)) { - | mov rsi, FRAME->end_ofs - | pushframe h, f, rsi, UPB_HANDLER_ENDSEQ upb_func *startseq = gethandler(h, f, UPB_HANDLER_STARTSEQ); if (startseq) { - | mov ARG1_64, CLOSURE - | mov64 ARG2_64, gethandlerdata(h, f, UPB_HANDLER_STARTSEQ); - | callp startseq + | mov ARG1_64, SINKFRAME + | load_handler_data h, f, UPB_HANDLER_STARTSEQ + | callp startseq | check_ptr_ret - | mov CLOSURE, rax + | mov CLOSURE, rax } - | mov qword SINKFRAME->closure, CLOSURE + | mov rsi, FRAME->end_ofs + | pushframe h, f, rsi, UPB_HANDLER_ENDSEQ } |1: // Label for repeating this field. - upb_decoderplan_jit_decodefield(plan, upb_fielddef_type(f), tag_size, h, f); + upb_decoderplan_jit_decodefield(plan, tag_size, h, f); upb_decoderplan_jit_callcb(plan, h, f); - // Epilogue: load next tag, check for repeated field. - | checkpoint h - | mov rcx, qword [PTR] + // This is kind of gross; future redesign should take into account how to + // make this work nicely. The difficult part is that the sequence can be + // broken either by end-of-message or by seeing a different field; in both + // cases we need to call the endseq handler, but what we do after that + // depends on which case triggered the end-of-sequence. + | mov DECODER->ptr, PTR + | cmp PTR, DECODER->jit_end + | jae ->exit_jit + | cmp PTR, DECODER->effective_end + | jb >2 + if (upb_fielddef_isseq(f)) { + upb_decoderplan_jit_endseq(plan, h, f); + } + | jmp =>upb_getpclabel(plan, h, ENDOFMSG) + |2: + | mov rcx, qword [PTR] if (upb_fielddef_isseq(f)) { | checktag tag | je <1 - upb_func *endseq = gethandler(h, f, UPB_HANDLER_ENDSEQ); - if (endseq) { - | mov ARG1_64, CLOSURE - | mov64 ARG2_64, gethandlerdata(h, f, UPB_HANDLER_ENDSEQ); - | callp endseq - } - | popframe - // Load next tag again (popframe clobbered it). + upb_decoderplan_jit_endseq(plan, h, f); + // Load next tag again (popframe/endseq clobbered it). | mov rcx, qword [PTR] } @@ -663,22 +724,22 @@ static int upb_compare_uint32(const void *a, const void *b) { return *(uint32_t*)a - *(uint32_t*)b; } -static void upb_decoderplan_jit_msg(upb_decoderplan *plan, +static void upb_decoderplan_jit_msg(decoderplan *plan, const upb_handlers *h) { |=>upb_getpclabel(plan, h, AFTER_STARTMSG): - // There was a call to get here, so we need to align the stack. - | sub rsp, 8 + | push rbp + | mov rbp, rsp | jmp >1 |=>upb_getpclabel(plan, h, STARTMSG): - // There was a call to get here, so we need to align the stack. - | sub rsp, 8 + | push rbp + | mov rbp, rsp // Call startmsg handler (if any): upb_startmsg_handler *startmsg = upb_handlers_getstartmsg(h); if (startmsg) { // upb_flow_t startmsg(void *closure); - | mov ARG1_64, SINKFRAME->closure + | mov ARG1_64, SINKFRAME | callp startmsg | check_bool_ret } @@ -731,17 +792,18 @@ static void upb_decoderplan_jit_msg(upb_decoderplan *plan, upb_endmsg_handler *endmsg = upb_handlers_getendmsg(h); if (endmsg) { // void endmsg(void *closure, upb_status *status) { - | mov ARG1_64, SINKFRAME->closure - | lea ARG2_64, DECODER->sink.status + | mov ARG1_64, SINKFRAME + | mov ARG2_64, DECODER->sink + | mov ARG2_64, SINK:ARG2_64->pipeline_ + | add ARG2_64, offsetof(upb_pipeline, status_) | callp endmsg } - // Counter previous alignment. - | add rsp, 8 + | leave | ret } -static void upb_decoderplan_jit(upb_decoderplan *plan) { +static void upb_decoderplan_jit(decoderplan *plan) { // The JIT prologue/epilogue trampoline that is generated in this function // does not depend on the handlers, so it will never vary. Ideally we would // put it in an object file and just link it into upb so we could have only a @@ -763,18 +825,18 @@ static void upb_decoderplan_jit(upb_decoderplan *plan) { // Align stack. | sub rsp, 8 | mov DECODER, ARG1_64 + | mov DECODER->saved_rbp, rbp | mov FRAME, DECODER:ARG1_64->top - | mov SINKFRAME, DECODER:ARG1_64->sink.top + | mov rax, DECODER:ARG1_64->sink + | mov SINKFRAME, SINK:rax->top_ | mov CLOSURE, SINKFRAME->closure | mov PTR, DECODER->ptr // TODO: push return addresses for re-entry (will be necessary for multiple // buffer support). | call ARG2_64 - |->exit_jit: - // Restore stack pointer to where it was before any "call" instructions - // inside our generated code. + | mov rbp, DECODER->saved_rbp | lea rsp, [rbp - 48] // Counter previous alignment. | add rsp, 8 @@ -794,10 +856,10 @@ static void upb_decoderplan_jit(upb_decoderplan *plan) { } } -static void upb_decoderplan_jit_assignpclabels(upb_decoderplan *plan, +static void upb_decoderplan_jit_assignpclabels(decoderplan *plan, const upb_handlers *h) { // Limit the DFS. - if (upb_inttable_lookupptr(&plan->pclabels, h)) return; + if (upb_inttable_lookupptr(&plan->pclabels, h, NULL)) return; upb_inttable_insertptr(&plan->pclabels, h, upb_value_uint32(plan->pclabel_count)); @@ -832,14 +894,14 @@ static void upb_decoderplan_jit_assignpclabels(upb_decoderplan *plan, info->tablearray = malloc((info->max_field_number + 1) * sizeof(void*)); } -static void upb_decoderplan_makejit(upb_decoderplan *plan) { +static void upb_decoderplan_makejit(decoderplan *plan) { upb_inttable_init(&plan->msginfo, UPB_CTYPE_PTR); plan->debug_info = NULL; // Assign pclabels. plan->pclabel_count = 0; upb_inttable_init(&plan->pclabels, UPB_CTYPE_UINT32); - upb_decoderplan_jit_assignpclabels(plan, plan->handlers); + upb_decoderplan_jit_assignpclabels(plan, plan->dest_handlers); void **globals = malloc(UPB_JIT_GLOBAL__MAX * sizeof(*globals)); dasm_init(plan, 1); @@ -867,7 +929,7 @@ static void upb_decoderplan_makejit(upb_decoderplan *plan) { const upb_handlers *h = (const upb_handlers*)upb_inttable_iter_key(&i); upb_jitmsginfo *mi = upb_getmsginfo(plan, h); // We jump to after the startmsg handler since it is called before entering - // the JIT (either by upb_decoder or by a previous call to the JIT). + // the JIT (either by upb_pbdecoder or by a previous call to the JIT). mi->jit_func = plan->jit_code + dasm_getpclabel(plan, upb_getpclabel(plan, h, AFTER_STARTMSG)); for (uint32_t j = 0; j <= mi->max_field_number; j++) { @@ -899,7 +961,7 @@ static void upb_decoderplan_makejit(upb_decoderplan *plan) { #endif } -static void upb_decoderplan_freejit(upb_decoderplan *plan) { +static void upb_decoderplan_freejit(decoderplan *plan) { upb_inttable_iter i; upb_inttable_begin(&i, &plan->msginfo); for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { @@ -913,9 +975,10 @@ static void upb_decoderplan_freejit(upb_decoderplan *plan) { // TODO: unregister } -static void upb_decoder_enterjit(upb_decoder *d) { - if (d->plan->jit_code && - d->sink.top == d->sink.stack && +static void upb_decoder_enterjit(upb_pbdecoder *d, decoderplan *plan) { + if (plan->jit_code && + d->top == d->stack && + d->sink->top_ == d->sink->stack && d->ptr && d->ptr < d->jit_end) { #ifndef NDEBUG register uint64_t rbx asm ("rbx") = 11; @@ -926,8 +989,8 @@ static void upb_decoder_enterjit(upb_decoder *d) { #endif // Decodes as many fields as possible, updating d->ptr appropriately, // before falling through to the slow(er) path. - void (*upb_jit_decode)(upb_decoder *d, void*) = (void*)d->plan->jit_code; - upb_jitmsginfo *mi = upb_getmsginfo(d->plan, d->plan->handlers); + void (*upb_jit_decode)(upb_pbdecoder *d, void*) = (void*)plan->jit_code; + upb_jitmsginfo *mi = upb_getmsginfo(plan, plan->dest_handlers); assert(mi); upb_jit_decode(d, mi->jit_func); assert(d->ptr <= d->end); diff --git a/upb/pb/glue.c b/upb/pb/glue.c index 4e69c0cd56..bcde039d94 100644 --- a/upb/pb/glue.c +++ b/upb/pb/glue.c @@ -16,33 +16,37 @@ upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n, void *owner, upb_status *status) { - upb_stringsrc strsrc; - upb_stringsrc_init(&strsrc); - upb_stringsrc_reset(&strsrc, str, len); + // Create handlers. + const upb_handlers *reader_h = upb_descreader_gethandlers(&reader_h); + const upb_handlers *decoder_h = + upb_pbdecoder_gethandlers(reader_h, false, &decoder_h); - const upb_handlers *h = upb_descreader_newhandlers(&h); - upb_decoderplan *p = upb_decoderplan_new(h, false); - upb_decoder d; - upb_decoder_init(&d); - upb_handlers_unref(h, &h); - upb_descreader r; - upb_descreader_init(&r); - upb_decoder_resetplan(&d, p); - upb_decoder_resetinput(&d, upb_stringsrc_allbytes(&strsrc), &r); + // Create pipeline. + upb_pipeline pipeline; + upb_pipeline_init(&pipeline, NULL, 0, upb_realloc, NULL); + upb_pipeline_donateref(&pipeline, reader_h, &reader_h); + upb_pipeline_donateref(&pipeline, decoder_h, &decoder_h); - upb_success_t ret = upb_decoder_decode(&d); - if (status) upb_status_copy(status, upb_decoder_status(&d)); - upb_stringsrc_uninit(&strsrc); - upb_decoder_uninit(&d); - upb_decoderplan_unref(p); - if (ret != UPB_OK) { - upb_descreader_uninit(&r); + // Create sinks. + upb_sink *reader_sink = upb_pipeline_newsink(&pipeline, reader_h); + upb_sink *decoder_sink = upb_pipeline_newsink(&pipeline, decoder_h); + upb_pbdecoder *d = upb_sinkframe_userdata(upb_sink_base(decoder_sink)); + upb_pbdecoder_resetsink(d, reader_sink); + + // Push input data. + bool ok = upb_bytestream_putstr(decoder_sink, str, len); + + if (status) upb_status_copy(status, upb_pipeline_status(&pipeline)); + if (!ok) { + upb_pipeline_uninit(&pipeline); return NULL; } - upb_def **defs = upb_descreader_getdefs(&r, owner, n); + + upb_descreader *r = upb_sinkframe_userdata(upb_sink_base(reader_sink)); + upb_def **defs = upb_descreader_getdefs(r, owner, n); upb_def **defscopy = malloc(sizeof(upb_def*) * (*n)); memcpy(defscopy, defs, sizeof(upb_def*) * (*n)); - upb_descreader_uninit(&r); + upb_pipeline_uninit(&pipeline); return defscopy; } diff --git a/upb/pb/textprinter.c b/upb/pb/textprinter.c index 3770afcd56..91c1e2d746 100644 --- a/upb/pb/textprinter.c +++ b/upb/pb/textprinter.c @@ -14,8 +14,9 @@ #include #include +#include "upb/sink.h" + struct _upb_textprinter { - upb_bytesink *sink; int indent_depth; bool single_line; upb_status status; @@ -24,18 +25,17 @@ struct _upb_textprinter { #define CHECK(x) if ((x) < 0) goto err; static int indent(upb_textprinter *p) { + int i; if (!p->single_line) - CHECK(upb_bytesink_putrepeated(p->sink, ' ', p->indent_depth*2)); + for (i = 0; i < p->indent_depth * 2; i++) + putchar(' '); return 0; -err: return -1; } static int endfield(upb_textprinter *p) { - CHECK(upb_bytesink_putc(p->sink, p->single_line ? ' ' : '\n')); + putchar(p->single_line ? ' ' : '\n'); return 0; -err: - return -1; } static int putescaped(upb_textprinter *p, const char *buf, size_t len, @@ -51,7 +51,7 @@ static int putescaped(upb_textprinter *p, const char *buf, size_t len, for (; buf < end; buf++) { if (dstend - dst < 4) { - CHECK(upb_bytesink_write(p->sink, dstbuf, dst - dstbuf)); + fwrite(dstbuf, dst - dstbuf, 1, stdout); dst = dstbuf; } @@ -79,24 +79,35 @@ static int putescaped(upb_textprinter *p, const char *buf, size_t len, last_hex_escape = is_hex_escape; } // Flush remaining data. - CHECK(upb_bytesink_write(p->sink, dst, dst - dstbuf)); + fwrite(dst, dst - dstbuf, 1, stdout); return 0; -err: - return -1; } #define TYPE(name, ctype, fmt) \ - static bool put ## name(void *_p, void *fval, ctype val) { \ - upb_textprinter *p = _p; \ - const upb_fielddef *f = fval; \ - CHECK(indent(p)); \ - CHECK(upb_bytesink_writestr(p->sink, upb_fielddef_name(f))); \ - CHECK(upb_bytesink_writestr(p->sink, ": ")); \ - CHECK(upb_bytesink_printf(p->sink, fmt, val)); \ - CHECK(endfield(p)); \ - return true; \ - err: \ - return false; \ + static bool put ## name(const upb_sinkframe *frame, ctype val) { \ + upb_textprinter *p = upb_sinkframe_userdata(frame); \ + const upb_fielddef *f = upb_sinkframe_handlerdata(frame); \ + CHECK(indent(p)); \ + puts(upb_fielddef_name(f)); \ + puts(": "); \ + printf(fmt, val); \ + CHECK(endfield(p)); \ + return true; \ + err: \ + return false; \ +} + +static bool putbool(const upb_sinkframe *frame, bool val) { + upb_textprinter *p = upb_sinkframe_userdata(frame); + const upb_fielddef *f = upb_sinkframe_handlerdata(frame); + CHECK(indent(p)); + puts(upb_fielddef_name(f)); + puts(": "); + puts(val ? "true" : "false"); + CHECK(endfield(p)); + return true; +err: + return false; } #define STRINGIFY_HELPER(x) #x @@ -108,72 +119,61 @@ TYPE(uint32, uint32_t, "%" PRIu32); TYPE(uint64, uint64_t, "%" PRIu64) TYPE(float, float, "%." STRINGIFY_MACROVAL(FLT_DIG) "g") TYPE(double, double, "%." STRINGIFY_MACROVAL(DBL_DIG) "g") -TYPE(bool, bool, "%hhu"); // Output a symbolic value from the enum if found, else just print as int32. -static bool putenum(void *_p, void *fval, int32_t val) { - - upb_textprinter *p = _p; - const upb_fielddef *f = fval; +static bool putenum(const upb_sinkframe *frame, int32_t val) { + const upb_fielddef *f = upb_sinkframe_handlerdata(frame); const upb_enumdef *enum_def = upb_downcast_enumdef(upb_fielddef_subdef(f)); const char *label = upb_enumdef_iton(enum_def, val); if (label) { - CHECK(upb_bytesink_writestr(p->sink, label)); + puts(label); } else { - CHECK(putint32(_p, fval, val)); + CHECK(putint32(frame, val)); } return true; err: return false; } -static void *startstr(void *_p, void *fval, size_t size_hint) { +static void *startstr(const upb_sinkframe *frame, size_t size_hint) { UPB_UNUSED(size_hint); - UPB_UNUSED(fval); - upb_textprinter *p = _p; - CHECK(upb_bytesink_putc(p->sink, '"')); + upb_textprinter *p = upb_sinkframe_userdata(frame); + putchar('"'); return p; -err: - return UPB_BREAK; } -static bool endstr(void *_p, void *fval) { - UPB_UNUSED(fval); - upb_textprinter *p = _p; - CHECK(upb_bytesink_putc(p->sink, '"')); +static bool endstr(const upb_sinkframe *frame) { + putchar('"'); return true; -err: - return false; } -static size_t putstr(void *_p, void *fval, const char *buf, size_t len) { - upb_textprinter *p = _p; - const upb_fielddef *f = fval; - CHECK(putescaped(p, buf, len, upb_fielddef_type(f) == UPB_TYPE(STRING))); +static size_t putstr(const upb_sinkframe *frame, const char *buf, size_t len) { + upb_textprinter *p = upb_sinkframe_userdata(frame); + const upb_fielddef *f = upb_sinkframe_handlerdata(frame); + CHECK(putescaped(p, buf, len, upb_fielddef_type(f) == UPB_TYPE_STRING)); return len; err: return 0; } -static void *startsubmsg(void *_p, void *fval) { - upb_textprinter *p = _p; - const upb_fielddef *f = fval; +static void *startsubmsg(const upb_sinkframe *frame) { + upb_textprinter *p = upb_sinkframe_userdata(frame); + const upb_fielddef *f = upb_sinkframe_handlerdata(frame); CHECK(indent(p)); - CHECK(upb_bytesink_printf(p->sink, "%s {", upb_fielddef_name(f))); + printf("%s {", upb_fielddef_name(f)); if (!p->single_line) - CHECK(upb_bytesink_putc(p->sink, '\n')); + putchar('\n'); p->indent_depth++; - return _p; + return p; err: return UPB_BREAK; } -static bool endsubmsg(void *_p, void *fval) { - UPB_UNUSED(fval); - upb_textprinter *p = _p; +static bool endsubmsg(const upb_sinkframe *frame) { + upb_textprinter *p = upb_sinkframe_userdata(frame); p->indent_depth--; CHECK(indent(p)); - CHECK(upb_bytesink_putc(p->sink, '}')); + putchar('}'); CHECK(endfield(p)); return true; err: @@ -187,9 +187,7 @@ upb_textprinter *upb_textprinter_new() { void upb_textprinter_free(upb_textprinter *p) { free(p); } -void upb_textprinter_reset(upb_textprinter *p, upb_bytesink *sink, - bool single_line) { - p->sink = sink; +void upb_textprinter_reset(upb_textprinter *p, bool single_line) { p->single_line = single_line; p->indent_depth = 0; } @@ -202,21 +200,15 @@ static void onmreg(void *c, upb_handlers *h) { upb_fielddef *f = upb_msg_iter_field(&i); switch (upb_fielddef_type(f)) { case UPB_TYPE_INT32: - case UPB_TYPE_SINT32: - case UPB_TYPE_SFIXED32: upb_handlers_setint32(h, f, putint32, f, NULL); break; - case UPB_TYPE_SINT64: - case UPB_TYPE_SFIXED64: case UPB_TYPE_INT64: upb_handlers_setint64(h, f, putint64, f, NULL); break; case UPB_TYPE_UINT32: - case UPB_TYPE_FIXED32: upb_handlers_setuint32(h, f, putuint32, f, NULL); break; case UPB_TYPE_UINT64: - case UPB_TYPE_FIXED64: upb_handlers_setuint64(h, f, putuint64, f, NULL); break; case UPB_TYPE_FLOAT: @@ -234,7 +226,6 @@ static void onmreg(void *c, upb_handlers *h) { upb_handlers_setstring(h, f, putstr, f, NULL); upb_handlers_setendstr(h, f, endstr, f, NULL); break; - case UPB_TYPE_GROUP: case UPB_TYPE_MESSAGE: upb_handlers_setstartsubmsg(h, f, &startsubmsg, f, NULL); upb_handlers_setendsubmsg(h, f, &endsubmsg, f, NULL); @@ -250,5 +241,5 @@ static void onmreg(void *c, upb_handlers *h) { const upb_handlers *upb_textprinter_newhandlers(const void *owner, const upb_msgdef *m) { - return upb_handlers_newfrozen(m, owner, &onmreg, NULL); + return upb_handlers_newfrozen(m, NULL, owner, &onmreg, NULL); } diff --git a/upb/pb/textprinter.h b/upb/pb/textprinter.h index 6d111d2827..7b653e7741 100644 --- a/upb/pb/textprinter.h +++ b/upb/pb/textprinter.h @@ -8,7 +8,6 @@ #ifndef UPB_TEXT_H_ #define UPB_TEXT_H_ -#include "upb/bytestream.h" #include "upb/handlers.h" #ifdef __cplusplus @@ -20,8 +19,7 @@ typedef struct _upb_textprinter upb_textprinter; upb_textprinter *upb_textprinter_new(); void upb_textprinter_free(upb_textprinter *p); -void upb_textprinter_reset(upb_textprinter *p, upb_bytesink *sink, - bool single_line); +void upb_textprinter_reset(upb_textprinter *p, bool single_line); const upb_handlers *upb_textprinter_newhandlers(const void *owner, const upb_msgdef *m); diff --git a/upb/pb/varint.h b/upb/pb/varint.h index c4d67baee0..d33872dc76 100644 --- a/upb/pb/varint.h +++ b/upb/pb/varint.h @@ -36,10 +36,14 @@ typedef enum { /* Zig-zag encoding/decoding **************************************************/ -INLINE int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); } -INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } -INLINE uint32_t upb_zzenc_32(int32_t n) { return (n << 1) ^ (n >> 31); } -INLINE uint64_t upb_zzenc_64(int64_t n) { return (n << 1) ^ (n >> 63); } +UPB_INLINE int32_t upb_zzdec_32(uint32_t n) { + return (n >> 1) ^ -(int32_t)(n & 1); +} +UPB_INLINE int64_t upb_zzdec_64(uint64_t n) { + return (n >> 1) ^ -(int64_t)(n & 1); +} +UPB_INLINE uint32_t upb_zzenc_32(int32_t n) { return (n << 1) ^ (n >> 31); } +UPB_INLINE uint64_t upb_zzenc_64(int64_t n) { return (n << 1) ^ (n >> 63); } /* Decoding *******************************************************************/ @@ -65,7 +69,7 @@ upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r); // up to 10 bytes, so it must not be used unless there are at least ten bytes // left in the buffer! #define UPB_VARINT_DECODER_CHECK2(name, decode_max8_function) \ -INLINE upb_decoderet upb_vdecode_check2_ ## name(const char *_p) { \ +UPB_INLINE upb_decoderet upb_vdecode_check2_ ## name(const char *_p) { \ uint8_t *p = (uint8_t*)_p; \ if ((*p & 0x80) == 0) { upb_decoderet r = {_p + 1, *p & 0x7fU}; return r; } \ upb_decoderet r = {_p + 2, (*p & 0x7fU) | ((*(p + 1) & 0x7fU) << 7)}; \ @@ -81,21 +85,21 @@ UPB_VARINT_DECODER_CHECK2(massimino, upb_vdecode_max8_massimino); // Our canonical functions for decoding varints, based on the currently // favored best-performing implementations. -INLINE upb_decoderet upb_vdecode_fast(const char *p) { +UPB_INLINE upb_decoderet upb_vdecode_fast(const char *p) { if (sizeof(long) == 8) return upb_vdecode_check2_massimino(p); else return upb_vdecode_check2_branch32(p); } -INLINE upb_decoderet upb_vdecode_max8_fast(upb_decoderet r) { +UPB_INLINE upb_decoderet upb_vdecode_max8_fast(upb_decoderet r) { return upb_vdecode_max8_massimino(r); } /* Encoding *******************************************************************/ -INLINE int upb_value_size(uint64_t val) { +UPB_INLINE int upb_value_size(uint64_t val) { #ifdef __GNUC__ int high_bit = 63 - __builtin_clzll(val); // 0-based, undef if val == 0. #else @@ -110,7 +114,7 @@ INLINE int upb_value_size(uint64_t val) { // bytes long), returning how many bytes were used. // // TODO: benchmark and optimize if necessary. -INLINE size_t upb_vencode64(uint64_t val, char *buf) { +UPB_INLINE size_t upb_vencode64(uint64_t val, char *buf) { if (val == 0) { buf[0] = 0; return 1; } size_t i = 0; while (val) { @@ -123,7 +127,7 @@ INLINE size_t upb_vencode64(uint64_t val, char *buf) { } // Encodes a 32-bit varint, *not* sign-extended. -INLINE uint64_t upb_vencode32(uint32_t val) { +UPB_INLINE uint64_t upb_vencode32(uint32_t val) { char buf[UPB_PB_VARINT_MAX_LEN]; size_t bytes = upb_vencode64(val, buf); uint64_t ret = 0; diff --git a/upb/refcounted.c b/upb/refcounted.c index 54ad735db9..1e517b7182 100644 --- a/upb/refcounted.c +++ b/upb/refcounted.c @@ -136,8 +136,8 @@ static const void *unobfuscate_v(upb_value x) { static upb_inttable reftracks = UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR); static upb_inttable *trygettab(const void *p) { - const upb_value *v = upb_inttable_lookupptr(&reftracks, p); - return v ? upb_value_getptr(*v) : NULL; + upb_value v; + return upb_inttable_lookupptr(&reftracks, p, &v) ? upb_value_getptr(v) : NULL; } // Gets or creates the tracking table for the given owner. @@ -155,9 +155,9 @@ static upb_inttable *gettab(const void *p) { static void track(const upb_refcounted *r, const void *owner, bool ref2) { upb_lock(); upb_inttable *refs = gettab(owner); - const upb_value *v = upb_inttable_lookup(refs, obfuscate(r)); - if (v) { - trackedref *ref = (trackedref*)unobfuscate_v(*v); + upb_value v; + if (upb_inttable_lookup(refs, obfuscate(r), &v)) { + trackedref *ref = (trackedref*)unobfuscate_v(v); // Since we allow multiple ref2's for the same to/from pair without // allocating separate memory for each one, we lose the fine-grained // tracking behavior we get with regular refs. Since ref2s only happen @@ -177,10 +177,11 @@ static void track(const upb_refcounted *r, const void *owner, bool ref2) { static void untrack(const upb_refcounted *r, const void *owner, bool ref2) { upb_lock(); upb_inttable *refs = gettab(owner); - const upb_value *v = upb_inttable_lookup(refs, obfuscate(r)); + upb_value v; + bool found = upb_inttable_lookup(refs, obfuscate(r), &v); // This assert will fail if an owner attempts to release a ref it didn't have. - assert(v); - trackedref *ref = (trackedref*)unobfuscate_v(*v); + UPB_ASSERT_VAR(found, found); + trackedref *ref = (trackedref*)unobfuscate_v(v); assert(ref->is_ref2 == ref2); if (--ref->count == 0) { free(ref); @@ -197,9 +198,10 @@ static void untrack(const upb_refcounted *r, const void *owner, bool ref2) { static void checkref(const upb_refcounted *r, const void *owner, bool ref2) { upb_lock(); upb_inttable *refs = gettab(owner); - const upb_value *v = upb_inttable_lookup(refs, obfuscate(r)); - assert(v); - trackedref *ref = (trackedref*)unobfuscate_v(*v); + upb_value v; + bool found = upb_inttable_lookup(refs, obfuscate(r), &v); + UPB_ASSERT_VAR(found, found); + trackedref *ref = (trackedref*)unobfuscate_v(v); assert(ref->obj == r); assert(ref->is_ref2 == ref2); upb_unlock(); @@ -339,14 +341,16 @@ UPB_NORETURN static void oom(tarjan *t) { } uint64_t trygetattr(const tarjan *t, const upb_refcounted *r) { - const upb_value *v = upb_inttable_lookupptr(&t->objattr, r); - return v ? upb_value_getuint64(*v) : 0; + upb_value v; + return upb_inttable_lookupptr(&t->objattr, r, &v) ? + upb_value_getuint64(v) : 0; } uint64_t getattr(const tarjan *t, const upb_refcounted *r) { - const upb_value *v = upb_inttable_lookupptr(&t->objattr, r); - assert(v); - return upb_value_getuint64(*v); + upb_value v; + bool found = upb_inttable_lookupptr(&t->objattr, r, &v); + UPB_ASSERT_VAR(found, found); + return upb_value_getuint64(v); } void setattr(tarjan *t, const upb_refcounted *r, uint64_t attr) { @@ -420,9 +424,10 @@ static void set_lowlink(tarjan *t, const upb_refcounted *r, uint32_t lowlink) { uint32_t *group(tarjan *t, upb_refcounted *r) { assert(color(t, r) == WHITE); uint64_t groupnum = getattr(t, r) >> 8; - const upb_value *v = upb_inttable_lookup(&t->groups, groupnum); - assert(v); - return upb_value_getptr(*v); + upb_value v; + bool found = upb_inttable_lookup(&t->groups, groupnum, &v); + UPB_ASSERT_VAR(found, found); + return upb_value_getptr(v); } // If the group leader for this object's group has not previously been set, @@ -430,10 +435,11 @@ uint32_t *group(tarjan *t, upb_refcounted *r) { static upb_refcounted *groupleader(tarjan *t, upb_refcounted *r) { assert(color(t, r) == WHITE); uint64_t leader_slot = (getattr(t, r) >> 8) + 1; - const upb_value *v = upb_inttable_lookup(&t->groups, leader_slot); - assert(v); - if (upb_value_getptr(*v)) { - return upb_value_getptr(*v); + upb_value v; + bool found = upb_inttable_lookup(&t->groups, leader_slot, &v); + UPB_ASSERT_VAR(found, found); + if (upb_value_getptr(v)) { + return upb_value_getptr(v); } else { upb_inttable_remove(&t->groups, leader_slot, NULL); upb_inttable_insert(&t->groups, leader_slot, upb_value_ptr(r)); diff --git a/upb/sink.c b/upb/sink.c index d829fa97ff..f3af23d8f2 100644 --- a/upb/sink.c +++ b/upb/sink.c @@ -7,61 +7,258 @@ #include "upb/sink.h" +#include +#include + +static void upb_sink_init(upb_sink *s, const upb_handlers *h, upb_pipeline *p); +static void upb_sink_resetobj(void *obj); +static const upb_frametype upb_sink_frametype; + static bool chkstack(upb_sink *s) { - if (s->top + 1 >= s->limit) { - upb_status_seterrliteral(&s->status, "Nesting too deep."); + if (s->top_ + 1 >= s->limit) { + upb_status_seterrliteral(&s->pipeline_->status_, "Nesting too deep."); return false; } else { return true; } } -static upb_selector_t getselector(const upb_fielddef *f, - upb_handlertype_t type) { - upb_selector_t selector; - bool ok = upb_getselector(f, type, &selector); - UPB_ASSERT_VAR(ok, ok); - return selector; +#define alignof(type) offsetof (struct { char c; type member; }, member) + +typedef union { double u; void *p; long l; } maxalign_t; +static const size_t maxalign = alignof(maxalign_t); + +static void *align_up(void *p) { + if (!p) return NULL; + uintptr_t val = (uintptr_t)p; + uintptr_t aligned = + val % maxalign == 0 ? val : val + maxalign - (val % maxalign); + return (void*)aligned; +} + +void *upb_realloc(void *ud, void *ptr, size_t size) { + UPB_UNUSED(ud); + return realloc(ptr, size); +} + + +/* upb_pipeline ***************************************************************/ + +// For the moment we get fixed-size blocks of this size, but we could change +// this strategy if necessary. +#define BLOCK_SIZE 8192 + +struct region { + struct region *prev; + maxalign_t data[1]; // Region data follows. +}; + +size_t regionsize(size_t usable_size) { + return sizeof(struct region) - sizeof(maxalign_t) + usable_size; +} + +struct obj { + struct obj *prev; + const upb_frametype *ft; + maxalign_t data; // Region data follows. +}; + +size_t objsize(size_t memsize) { + return sizeof(struct obj) - sizeof(maxalign_t) + memsize; +} + +void upb_pipeline_init(upb_pipeline *p, void *initial_mem, size_t initial_size, + void *(*realloc)(void *ud, void *ptr, size_t bytes), + void *ud) { + p->realloc = realloc; + p->ud = ud; + p->bump_top = initial_mem; + p->bump_limit = initial_mem ? initial_mem + initial_size : NULL; + p->region_head = NULL; + p->obj_head = NULL; + p->last_alloc = NULL; + upb_status_init(&p->status_); +} + +void upb_pipeline_uninit(upb_pipeline *p) { + for (struct obj *o = p->obj_head; o; o = o->prev) { + if (o->ft->uninit) + o->ft->uninit(&o->data); + } + + for (struct region *r = p->region_head; r; ) { + struct region *prev = r->prev; + p->realloc(p->ud, r, 0); + r = prev; + } + upb_status_uninit(&p->status_); +} + +void *upb_pipeline_alloc(upb_pipeline *p, size_t bytes) { + void *mem = align_up(p->bump_top); + if (!mem || mem > p->bump_limit || p->bump_limit - mem < bytes) { + size_t size = regionsize(UPB_MAX(BLOCK_SIZE, bytes)); + struct region *r; + if (!p->realloc || !(r = p->realloc(p->ud, NULL, size))) { + return NULL; + } + r->prev = p->region_head; + p->region_head = r; + p->bump_limit = (char*)r + size; + mem = &r->data[0]; + assert(p->bump_limit > mem); + assert(p->bump_limit - mem >= bytes); + } + p->bump_top = mem + bytes; + p->last_alloc = mem; + return mem; +} + +void *upb_pipeline_realloc(upb_pipeline *p, void *ptr, + size_t oldsize, size_t bytes) { + if (ptr && ptr == p->last_alloc && + p->bump_limit - ptr >= bytes) { + p->bump_top = ptr + bytes; + return ptr; + } else { + void *mem = upb_pipeline_alloc(p, bytes); + memcpy(mem, ptr, oldsize); + return mem; + } +} + +void *upb_pipeline_allocobj(upb_pipeline *p, const upb_frametype *ft) { + struct obj *obj = upb_pipeline_alloc(p, objsize(ft->size)); + if (!obj) return NULL; + + obj->prev = p->obj_head; + obj->ft = ft; + p->obj_head = obj; + if (ft->init) ft->init(&obj->data); + return &obj->data; +} + +void upb_pipeline_reset(upb_pipeline *p) { + upb_status_clear(&p->status_); + for (struct obj *o = p->obj_head; o; o = o->prev) { + if (o->ft->reset) + o->ft->reset(&o->data); + } +} + +upb_sink *upb_pipeline_newsink(upb_pipeline *p, const upb_handlers *handlers) { + upb_sink *s = upb_pipeline_allocobj(p, &upb_sink_frametype); + upb_sink_init(s, handlers, p); + return s; +} + +const upb_status *upb_pipeline_status(const upb_pipeline *p) { + return &p->status_; +} + +typedef struct { + const upb_handlers *h; +} handlersref_t; + +static void freehandlersref(void *r) { + handlersref_t *ref = r; + upb_handlers_unref(ref->h, &ref->h); +} + +static const upb_frametype handlersref_frametype = { + sizeof(handlersref_t), + NULL, + freehandlersref, + NULL, +}; + +void upb_pipeline_donateref( + upb_pipeline *p, const upb_handlers *h, const void *owner) { + handlersref_t *ref = upb_pipeline_allocobj(p, &handlersref_frametype); + upb_handlers_donateref(h, owner, &ref->h); + ref->h = h; } -void upb_sink_init(upb_sink *s, const upb_handlers *h) { + +/* upb_sinkframe **************************************************************/ + +int upb_sinkframe_depth(const upb_sinkframe* frame) { + return frame - frame->sink_->stack; +} + +const upb_handlers* upb_sinkframe_handlers(const upb_sinkframe* frame) { + return frame->h; +} + +upb_pipeline *upb_sinkframe_pipeline(const upb_sinkframe* frame) { + return frame->sink_->pipeline_; +} + + +/* upb_sink *******************************************************************/ + +static const upb_frametype upb_sink_frametype = { + sizeof(upb_sink), + NULL, + NULL, + upb_sink_resetobj, +}; + +void upb_sink_reset(upb_sink *s, void *closure) { + s->top_ = s->stack; + s->top_->closure = closure; +} + +static void upb_sink_resetobj(void *obj) { + upb_sink *s = obj; + s->top_ = s->stack; +} + +static void upb_sink_init(upb_sink *s, const upb_handlers *h, upb_pipeline *p) { + s->pipeline_ = p; s->limit = &s->stack[UPB_MAX_NESTING]; - s->top = NULL; s->stack[0].h = h; - upb_status_init(&s->status); + s->top_ = s->stack; + if (h->ft) { + s->stack[0].closure = upb_pipeline_allocobj(p, h->ft); + } } -void upb_sink_reset(upb_sink *s, void *closure) { - s->top = s->stack; - s->top->closure = closure; +const upb_sinkframe *upb_sink_top(const upb_sink *s) { + return s->top_; +} + +const upb_sinkframe *upb_sink_base(const upb_sink *s) { + return s->stack; } -void upb_sink_uninit(upb_sink *s) { - upb_status_uninit(&s->status); +upb_pipeline *upb_sink_pipeline(const upb_sink *s) { + return s->pipeline_; } bool upb_sink_startmsg(upb_sink *s) { - const upb_handlers *h = s->top->h; + const upb_handlers *h = s->top_->h; upb_startmsg_handler *startmsg = upb_handlers_getstartmsg(h); - return startmsg ? startmsg(s->top->closure) : true; + return startmsg ? startmsg(s->top_) : true; } -void upb_sink_endmsg(upb_sink *s, upb_status *status) { - UPB_UNUSED(status); - assert(s->top == s->stack); - upb_endmsg_handler *endmsg = upb_handlers_getendmsg(s->top->h); - if (endmsg) endmsg(s->top->closure, &s->status); +void upb_sink_endmsg(upb_sink *s) { + assert(s->top_ == s->stack); + upb_endmsg_handler *endmsg = upb_handlers_getendmsg(s->top_->h); + if (endmsg) { + endmsg(s->top_, &s->pipeline_->status_); + } } #define PUTVAL(type, ctype, htype) \ - bool upb_sink_put ## type(upb_sink *s, const upb_fielddef *f, ctype val) { \ - upb_selector_t selector; \ - if (!upb_getselector(f, UPB_HANDLER_ ## htype, &selector)) return false; \ + bool upb_sink_put ## type(upb_sink *s, upb_selector_t sel, ctype val) { \ + const upb_handlers *h = s->top_->h; \ upb_ ## type ## _handler *handler = (upb_ ## type ## _handler*) \ - upb_handlers_gethandler(s->top->h, selector); \ + upb_handlers_gethandler(h, sel); \ if (handler) { \ - void *data = upb_handlers_gethandlerdata(s->top->h, selector); \ - if (!handler(s->top->closure, data, val)) return false; \ + s->top_->u.handler_data = upb_handlers_gethandlerdata(h, sel); \ + bool ok = handler(s->top_, val); \ + if (!ok) return false; \ } \ return true; \ } @@ -75,131 +272,153 @@ PUTVAL(double, double, DOUBLE); PUTVAL(bool, bool, BOOL); #undef PUTVAL -size_t upb_sink_putstring(upb_sink *s, const upb_fielddef *f, +size_t upb_sink_putstring(upb_sink *s, upb_selector_t sel, const char *buf, size_t n) { - upb_selector_t selector; - if (!upb_getselector(f, UPB_HANDLER_STRING, &selector)) return false; - upb_string_handler *handler = (upb_string_handler*) - upb_handlers_gethandler(s->top->h, selector); + const upb_handlers *h = s->top_->h; + upb_string_handler *handler = + (upb_string_handler*)upb_handlers_gethandler(h, sel); + if (handler) { - void *data = upb_handlers_gethandlerdata(s->top->h, selector); \ - return handler(s->top->closure, data, buf, n); + s->top_->u.handler_data = upb_handlers_gethandlerdata(h, sel);; + n = handler(s->top_, buf, n); } + return n; } -bool upb_sink_startseq(upb_sink *s, const upb_fielddef *f) { - assert(upb_fielddef_isseq(f)); +bool upb_sink_startseq(upb_sink *s, upb_selector_t sel) { if (!chkstack(s)) return false; - void *subc = s->top->closure; - const upb_handlers *h = s->top->h; - upb_selector_t selector; - if (!upb_getselector(f, UPB_HANDLER_STARTSEQ, &selector)) return false; + void *subc = s->top_->closure; + const upb_handlers *h = s->top_->h; upb_startfield_handler *startseq = - (upb_startfield_handler*)upb_handlers_gethandler(h, selector); + (upb_startfield_handler*)upb_handlers_gethandler(h, sel); + if (startseq) { - subc = startseq(s->top->closure, upb_handlers_gethandlerdata(h, selector)); - if (!subc) return false; + s->top_->u.handler_data = upb_handlers_gethandlerdata(h, sel); + subc = startseq(s->top_); + if (subc == UPB_BREAK) { + return false; + } } - ++s->top; - s->top->end = getselector(f, UPB_HANDLER_ENDSEQ); - s->top->h = h; - s->top->closure = subc; + s->top_->u.selector = upb_getendselector(sel); + ++s->top_; + s->top_->h = h; + s->top_->closure = subc; + s->top_->sink_ = s; return true; } -bool upb_sink_endseq(upb_sink *s, const upb_fielddef *f) { - upb_selector_t selector = s->top->end; - assert(selector == getselector(f, UPB_HANDLER_ENDSEQ)); - --s->top; +bool upb_sink_endseq(upb_sink *s, upb_selector_t sel) { + --s->top_; + assert(sel == s->top_->u.selector); - const upb_handlers *h = s->top->h; + const upb_handlers *h = s->top_->h; upb_endfield_handler *endseq = - (upb_endfield_handler*)upb_handlers_gethandler(h, selector); - return endseq ? - endseq(s->top->closure, upb_handlers_gethandlerdata(h, selector)) : - true; + (upb_endfield_handler*)upb_handlers_gethandler(h, sel); + + if (endseq) { + bool ok = endseq(s->top_); + if (!ok) { + ++s->top_; + return false; + } + } + + return true; } -bool upb_sink_startstr(upb_sink *s, const upb_fielddef *f, size_t size_hint) { - assert(upb_fielddef_isstring(f)); +bool upb_sink_startstr(upb_sink *s, upb_selector_t sel, size_t size_hint) { if (!chkstack(s)) return false; - void *subc = s->top->closure; - const upb_handlers *h = s->top->h; - upb_selector_t selector; - if (!upb_getselector(f, UPB_HANDLER_STARTSTR, &selector)) return false; + void *subc = s->top_->closure; + const upb_handlers *h = s->top_->h; upb_startstr_handler *startstr = - (upb_startstr_handler*)upb_handlers_gethandler(h, selector); + (upb_startstr_handler*)upb_handlers_gethandler(h, sel); + if (startstr) { - subc = startstr( - s->top->closure, upb_handlers_gethandlerdata(h, selector), size_hint); - if (!subc) return false; + s->top_->u.handler_data = upb_handlers_gethandlerdata(h, sel); + subc = startstr(s->top_, size_hint); + if (subc == UPB_BREAK) { + return false; + } } - ++s->top; - s->top->end = getselector(f, UPB_HANDLER_ENDSTR); - s->top->h = h; - s->top->closure = subc; + s->top_->u.selector = upb_getendselector(sel); + ++s->top_; + s->top_->h = h; + s->top_->closure = subc; + s->top_->sink_ = s; return true; } -bool upb_sink_endstr(upb_sink *s, const upb_fielddef *f) { - upb_selector_t selector = s->top->end; - assert(selector == getselector(f, UPB_HANDLER_ENDSTR)); - --s->top; - - const upb_handlers *h = s->top->h; +bool upb_sink_endstr(upb_sink *s, upb_selector_t sel) { + --s->top_; + assert(sel == s->top_->u.selector); + const upb_handlers *h = s->top_->h; upb_endfield_handler *endstr = - (upb_endfield_handler*)upb_handlers_gethandler(h, selector); - return endstr ? - endstr(s->top->closure, upb_handlers_gethandlerdata(h, selector)) : - true; + (upb_endfield_handler*)upb_handlers_gethandler(h, sel); + + if (endstr) { + bool ok = endstr(s->top_); + if (!ok) { + ++s->top_; + return false; + } + } + + return true; } -bool upb_sink_startsubmsg(upb_sink *s, const upb_fielddef *f) { - assert(upb_fielddef_issubmsg(f)); +bool upb_sink_startsubmsg(upb_sink *s, upb_selector_t sel) { if (!chkstack(s)) return false; - const upb_handlers *h = s->top->h; - upb_selector_t selector; - if (!upb_getselector(f, UPB_HANDLER_STARTSUBMSG, &selector)) return false; + void *subc = s->top_->closure; + const upb_handlers *h = s->top_->h; upb_startfield_handler *startsubmsg = - (upb_startfield_handler*)upb_handlers_gethandler(h, selector); - void *subc = s->top->closure; + (upb_startfield_handler*)upb_handlers_gethandler(h, sel); if (startsubmsg) { - void *data = upb_handlers_gethandlerdata(h, selector); - subc = startsubmsg(s->top->closure, data); - if (!subc) return false; + s->top_->u.handler_data = upb_handlers_gethandlerdata(h, sel); + subc = startsubmsg(s->top_); + if (subc == UPB_BREAK) { + return false; + } } - ++s->top; - s->top->end = getselector(f, UPB_HANDLER_ENDSUBMSG); - s->top->h = upb_handlers_getsubhandlers(h, f); - s->top->closure = subc; + s->top_->u.selector= upb_getendselector(sel); + ++s->top_; + s->top_->h = upb_handlers_getsubhandlers_sel(h, sel); + // TODO: should add support for submessages without any handlers + assert(s->top_->h); + s->top_->closure = subc; + s->top_->sink_ = s; upb_sink_startmsg(s); return true; } -bool upb_sink_endsubmsg(upb_sink *s, const upb_fielddef *f) { - upb_selector_t selector = s->top->end; - assert(selector == getselector(f, UPB_HANDLER_ENDSUBMSG)); +bool upb_sink_endsubmsg(upb_sink *s, upb_selector_t sel) { + upb_endmsg_handler *endmsg = upb_handlers_getendmsg(s->top_->h); + if (endmsg) endmsg(s->top_, &s->pipeline_->status_); + --s->top_; - upb_endmsg_handler *endmsg = upb_handlers_getendmsg(s->top->h); - if (endmsg) endmsg(s->top->closure, &s->status); - --s->top; + assert(sel == s->top_->u.selector); + const upb_handlers *h = s->top_->h; + upb_endfield_handler *endsubmsg = + (upb_endfield_handler*)upb_handlers_gethandler(h, sel); - const upb_handlers *h = s->top->h; - upb_endfield_handler *endfield = - (upb_endfield_handler*)upb_handlers_gethandler(h, selector); - return endfield ? - endfield(s->top->closure, upb_handlers_gethandlerdata(h, selector)) : - true; + if (endsubmsg) { + bool ok = endsubmsg(s->top_); + if (!ok) { + ++s->top_; + return false; + } + } + + return true; } const upb_handlers *upb_sink_tophandlers(upb_sink *s) { - return s->top->h; + return s->top_->h; } diff --git a/upb/sink.h b/upb/sink.h index 2c0f0370cd..333575fae3 100644 --- a/upb/sink.h +++ b/upb/sink.h @@ -23,60 +23,451 @@ #include "upb/handlers.h" #ifdef __cplusplus -extern "C" { +namespace upb { +class Pipeline; +class Sink; +template class SeededPipeline; +} +typedef upb::Pipeline upb_pipeline; +typedef upb::Sink upb_sink; +UPB_INLINE upb_sink* upb_sinkframe_sink(const upb_sinkframe* frame); +UPB_INLINE void* upb_sinkframe_userdata(const upb_sinkframe* frame); +UPB_INLINE void* upb_sinkframe_handlerdata(const upb_sinkframe* frame); +#else +struct upb_pipeline; +struct upb_sink; +typedef struct upb_pipeline upb_pipeline; +typedef struct upb_sink upb_sink; +#endif + +struct upb_frametype { + size_t size; + void (*init)(void* obj); + void (*uninit)(void* obj); + void (*reset)(void* obj); +}; + +#ifdef __cplusplus + +// A upb::Pipeline is a set of sinks that can send data to each other. The +// pipeline object also contains an arena allocator that the sinks and their +// associated processing state can use for fast memory allocation. This makes +// pipelines very fast to construct and destroy, especially if the arena is +// supplied with an initial block of memory. If this initial block of memory +// is from the C stack and is large enough, then actual heap allocation can be +// avoided entirely which significantly reduces overhead in some cases. +// +// All sinks and processing state are automatically freed when the pipeline is +// destroyed, so Free() is not necessary or possible. Allocated objects can +// optionally specify a Reset() callback that will be called when whenever the +// pipeline is Reset() or destroyed. This can be used to free any outside +// resources the object is holding. +// +// Pipelines (and sinks/objects allocated from them) are not thread-safe! +class upb::Pipeline { + public: + // Initializes the pipeline's arena with the given initial memory that will + // be used before allocating memory using the given allocation function. + // The "ud" pointer will be passed as the first parameter to the realloc + // callback, and can be used to pass user-specific state. + Pipeline(void *initial_mem, size_t initial_size, + void *(*realloc)(void *ud, void *ptr, size_t size), void *ud); + ~Pipeline(); + + // Returns a newly-allocated Sink for the given handlers. The sink is will + // live as long as the pipeline does. Caller retains ownership of the + // handlers object, which must outlive the pipeline. + // + // TODO(haberman): add an option for the sink to take a ref, so the handlers + // don't have to outlive? This would be simpler but imposes a minimum cost. + // Taking an atomic ref is not *so* bad in the single-threaded case, but this + // can degrade heavily under contention, so we need a way to avoid it in + // cases where this overhead would be significant and the caller can easily + // guarantee the outlive semantics. + Sink* NewSink(const Handlers* handlers); + + // Accepts a ref donated from the given owner. Will unref the Handlers when + // the Pipeline is destroyed. + void DonateRef(const Handlers* h, const void* owner); + + // The current error status for the pipeline. + const upb::Status& status() const; + + // Calls "reset" on all Sinks and resettable state objects in the arena, and + // resets the error status. Useful for resetting processing state so new + // input can be accepted. + void Reset(); + + // Allocates/reallocates memory of the given size, or returns NULL if no + // memory is available. It is not necessary (or possible) to manually free + // the memory obtained from these functions. + void* Alloc(size_t size); + void* Realloc(void* ptr, size_t old_size, size_t size); + + // Allocates an object with the given FrameType. Note that this object may + // *not* be resized with Realloc(). + void* AllocObject(const FrameType* type); + + private: +#else +struct upb_pipeline { +#endif + void *(*realloc)(void *ud, void *ptr, size_t size); + void *ud; + void *bump_top; // Current alloc offset, either from initial or dyn region. + void *bump_limit; // Limit of current alloc block. + void *obj_head; // Linked list of objects with "reset" functions. + void *region_head; // Linked list of dyn regions we got from user's realloc(). + void *last_alloc; + upb_status status_; +}; + +#ifdef __cplusplus + +// For convenience, a template for a pipeline with an array of initial memory. +template +class upb::SeededPipeline : public upb::Pipeline { + public: + SeededPipeline(void *(*realloc)(void *ud, void *ptr, size_t size), void *ud) + : Pipeline(mem_, initial_size, realloc, ud) { + } + + private: + char mem_[initial_size]; +}; + +class upb::SinkFrame { + public: + // Returns the sink that this frame belongs to. + Sink* sink() const; + + // Returns the pipeline that this sink and frame belong to. + Pipeline* pipeline() const; + + // The depth of this frame (counts all kind of frames (sequence, submessage, + // and string frames). + int depth() const; + + // The Handlers object for this frame. + const Handlers* handlers() const; + + // Returns the user data that is bound to this sink frame (as returned + // by the Start{SubMessage,String,Sequence} handler, or passed to + // Sink::Reset()). + void* userdata() const; + + // A templated version of userdata() that type-checks the templated return + // type. + // + // TODO(haberman): this isn't truly robust until sequence and string frames + // have distinct FrameTypes in the Handlers. + template + T* GetUserdata() const { +#ifdef NDEBUG + return static_cast(userdata()); +#else + const FrameType* type = handlers()->frame_type(); + if (!type || type == GetFrameType()) { + return static_cast(userdata()); + } else { + assert(false); + return NULL; + } #endif + } + // Returns the data that was bound to the currently-executing callback in the + // Handlers object. If not currently in a handler, the results are undefined. + void* handler_data() const; -/* upb_sink *******************************************************************/ + private: + UPB_DISALLOW_POD_OPS(SinkFrame); + friend class upb::Sink; + friend upb_sink* ::upb_sinkframe_sink(const upb_sinkframe* frame); + friend void* ::upb_sinkframe_userdata(const upb_sinkframe* frame); + friend void* ::upb_sinkframe_handlerdata(const upb_sinkframe* frame); -typedef struct { - upb_selector_t end; // From the enclosing message (unused at top-level). +#else +struct upb_sinkframe { +#endif + upb_sink *sink_; const upb_handlers *h; void *closure; -} upb_sink_frame; -typedef struct { - upb_sink_frame *top, *limit; - upb_sink_frame stack[UPB_MAX_NESTING]; - upb_status status; -} upb_sink; + union { + // For the top frame (sink->top), the handler_data for the + // currently-executing callback, otherwise undefined. + // TODO(haberman): have a special pointer value to indicate "not in a + // callback"; this will be a way to enforce non-reentrancy of a sink. + void *handler_data; -// Caller retains ownership of the handlers object. -void upb_sink_init(upb_sink *s, const upb_handlers *h); + // For other frames, the END* callback that will run when the subframe is + // popped (for example, for a "sequence" frame the frame above it will be a + // UPB_HANDLER_ENDSEQ handler). But this is only necessary for assertion + // checking inside upb_sink and can be omitted if the sink has only one + // caller. + // TODO(haberman): have a mechanism for ensuring that a sink only has one + // caller. + upb_selector_t selector; + } u; +}; -// Resets the state of the sink so that it is ready to accept new input. -// Any state from previously received data is discarded. "Closure" will be -// used as the top-level closure. -void upb_sink_reset(upb_sink *s, void *closure); +#ifdef __cplusplus + +// A upb::Sink is an object that binds a upb::Handlers object to some runtime +// state. It is the object that can actually call a set of handlers. +// +// Unlike upb::Def and upb::Handlers, upb::Sink is never frozen, immutable, or +// thread-safe. You can create as many of them as you want, but each one may +// only be used in a single thread at a time. +// +// If we compare with class-based OOP, a you can think of a upb::Def as an +// abstract base class, a upb::Handlers as a concrete derived class, and a +// upb::Sink as an object (class instance). +// +// Each upb::Sink lives in exactly one pipeline. +class upb::Sink { + public: -void upb_sink_uninit(upb_sink *s); + // Resets the state of the sink so that it is ready to accept new input. + // Any state from previously received data is discarded. "Closure" will be + // used as the top-level closure. + void Reset(void *closure); -// Returns the handlers at the top of the stack. -const upb_handlers *upb_sink_tophandlers(upb_sink *s); + // Returns the top-most and base (lowest) frame of the stack, respectively. + const SinkFrame* top() const; + const SinkFrame* base() const; -// Functions for pushing data into the sink. -// These return false if processing should stop (either due to error or just -// to suspend). + // Returns the pipeline that this sink comes from. + Pipeline* pipeline() const; + + // Functions for pushing data into the sink. + // + // These return false if processing should stop (either due to error or just + // to suspend). + // + // These may not be called from within one of the same sink's handlers (in + // other words, handlers are not re-entrant). + + // Should be called at the start and end of processing. + bool StartMessage(); + void EndMessage(); + + // Putting of individual values. These work for both repeated and + // non-repeated fields, but for repeated fields you must wrap them in + // calls to StartSequence()/EndSequence(). + bool PutInt32(Handlers::Selector s, int32_t val); + bool PutInt64(Handlers::Selector s, int64_t val); + bool PutUInt32(Handlers::Selector s, uint32_t val); + bool PutUInt64(Handlers::Selector s, uint64_t val); + bool PutFloat(Handlers::Selector s, float val); + bool PutDouble(Handlers::Selector s, double val); + bool PutBool(Handlers::Selector s, bool val); + + // Putting of string/bytes values. Each string can consist of zero or more + // non-contiguous buffers of data. + bool StartString(Handlers::Selector s, size_t size_hint); + size_t PutStringBuffer(Handlers::Selector s, const char *buf, size_t len); + bool EndString(Handlers::Selector s); + + // For submessage fields. + bool StartSubMessage(Handlers::Selector s); + bool EndSubMessage(Handlers::Selector s); + + // For repeated fields of any type, the sequence of values must be wrapped in + // these calls. + bool StartSequence(Handlers::Selector s); + bool EndSequence(Handlers::Selector s); + + private: + UPB_DISALLOW_POD_OPS(Sink); +#else +struct upb_sink { +#endif + upb_pipeline *pipeline_; + upb_sinkframe *top_, *limit; + upb_sinkframe stack[UPB_MAX_NESTING]; +}; + +// C API. +UPB_INLINE upb_sink *upb_sinkframe_sink(const upb_sinkframe* frame) { + return frame->sink_; +} + +UPB_INLINE void *upb_sinkframe_userdata(const upb_sinkframe* frame) { + return frame->closure; +} + +UPB_INLINE void *upb_sinkframe_handlerdata(const upb_sinkframe* frame) { + return frame->u.handler_data; +} + +#ifdef __cplusplus +extern "C" { +#endif + +void *upb_realloc(void *ud, void *ptr, size_t size); +void upb_pipeline_init(upb_pipeline *p, void *initial_mem, size_t initial_size, + void *(*realloc)(void *ud, void *ptr, size_t size), + void *ud); +void upb_pipeline_uninit(upb_pipeline *p); +void *upb_pipeline_alloc(upb_pipeline *p, size_t size); +void *upb_pipeline_realloc( + upb_pipeline *p, void *ptr, size_t old_size, size_t size); +void *upb_pipeline_allocobj(upb_pipeline *p, const upb_frametype *type); +void upb_pipeline_reset(upb_pipeline *p); +void upb_pipeline_donateref( + upb_pipeline *p, const upb_handlers *h, const void *owner); +upb_sink *upb_pipeline_newsink(upb_pipeline *p, const upb_handlers *h); +const upb_status *upb_pipeline_status(const upb_pipeline *p); + +int upb_sinkframe_depth(const upb_sinkframe* frame); +const upb_handlers* upb_sinkframe_handlers(const upb_sinkframe* frame); +upb_pipeline* upb_sinkframe_pipeline(const upb_sinkframe* frame); + +void upb_sink_reset(upb_sink *s, void *closure); +upb_pipeline *upb_sink_pipeline(const upb_sink *s); +const upb_sinkframe *upb_sink_top(const upb_sink *s); +const upb_sinkframe *upb_sink_base(const upb_sink *s); bool upb_sink_startmsg(upb_sink *s); -void upb_sink_endmsg(upb_sink *s, upb_status *status); -bool upb_sink_putint32(upb_sink *s, const upb_fielddef *f, int32_t val); -bool upb_sink_putint64(upb_sink *s, const upb_fielddef *f, int64_t val); -bool upb_sink_putuint32(upb_sink *s, const upb_fielddef *f, uint32_t val); -bool upb_sink_putuint64(upb_sink *s, const upb_fielddef *f, uint64_t val); -bool upb_sink_putfloat(upb_sink *s, const upb_fielddef *f, float val); -bool upb_sink_putdouble(upb_sink *s, const upb_fielddef *f, double val); -bool upb_sink_putbool(upb_sink *s, const upb_fielddef *f, bool val); -bool upb_sink_startstr(upb_sink *s, const upb_fielddef *f, size_t size_hint); -size_t upb_sink_putstring(upb_sink *s, const upb_fielddef *f, const char *buf, +void upb_sink_endmsg(upb_sink *s); +bool upb_sink_putint32(upb_sink *s, upb_selector_t sel, int32_t val); +bool upb_sink_putint64(upb_sink *s, upb_selector_t sel, int64_t val); +bool upb_sink_putuint32(upb_sink *s, upb_selector_t sel, uint32_t val); +bool upb_sink_putuint64(upb_sink *s, upb_selector_t sel, uint64_t val); +bool upb_sink_putfloat(upb_sink *s, upb_selector_t sel, float val); +bool upb_sink_putdouble(upb_sink *s, upb_selector_t sel, double val); +bool upb_sink_putbool(upb_sink *s, upb_selector_t sel, bool val); +bool upb_sink_startstr(upb_sink *s, upb_selector_t sel, size_t size_hint); +size_t upb_sink_putstring(upb_sink *s, upb_selector_t sel, const char *buf, size_t len); -bool upb_sink_endstr(upb_sink *s, const upb_fielddef *f); -bool upb_sink_startsubmsg(upb_sink *s, const upb_fielddef *f); -bool upb_sink_endsubmsg(upb_sink *s, const upb_fielddef *f); -bool upb_sink_startseq(upb_sink *s, const upb_fielddef *f); -bool upb_sink_endseq(upb_sink *s, const upb_fielddef *f); +bool upb_sink_endstr(upb_sink *s, upb_selector_t sel); +bool upb_sink_startsubmsg(upb_sink *s, upb_selector_t sel); +bool upb_sink_endsubmsg(upb_sink *s, upb_selector_t sel); +bool upb_sink_startseq(upb_sink *s, upb_selector_t sel); +bool upb_sink_endseq(upb_sink *s, upb_selector_t sel); #ifdef __cplusplus } /* extern "C" */ #endif +#ifdef __cplusplus + +namespace upb { + + +inline Pipeline::Pipeline(void *initial_mem, size_t initial_size, + void *(*realloc)(void *ud, void *ptr, size_t size), + void *ud) { + upb_pipeline_init(this, initial_mem, initial_size, realloc, ud); +} +inline Pipeline::~Pipeline() { + upb_pipeline_uninit(this); +} +inline void* Pipeline::Alloc(size_t size) { + return upb_pipeline_alloc(this, size); +} +inline void* Pipeline::Realloc(void* ptr, size_t old_size, size_t size) { + return upb_pipeline_realloc(this, ptr, old_size, size); +} +inline void* Pipeline::AllocObject(const upb::FrameType* type) { + return upb_pipeline_allocobj(this, type); +} +inline void Pipeline::Reset() { + upb_pipeline_reset(this); +} +inline const upb::Status& Pipeline::status() const { + return *upb_pipeline_status(this); +} +inline Sink* Pipeline::NewSink(const upb::Handlers* handlers) { + return upb_pipeline_newsink(this, handlers); +} +inline void Pipeline::DonateRef(const upb::Handlers* h, const void *owner) { + return upb_pipeline_donateref(this, h, owner); +} + +inline Sink* SinkFrame::sink() const { + return upb_sinkframe_sink(this); +} +inline Pipeline* SinkFrame::pipeline() const { + return upb_sinkframe_pipeline(this); +} +inline void* SinkFrame::userdata() const { + return upb_sinkframe_userdata(this); +} +inline void* SinkFrame::handler_data() const { + return upb_sinkframe_handlerdata(this); +} +inline int SinkFrame::depth() const { + return upb_sinkframe_depth(this); +} +inline const Handlers* SinkFrame::handlers() const { + return upb_sinkframe_handlers(this); +} + +inline void Sink::Reset(void *closure) { + upb_sink_reset(this, closure); +} +inline Pipeline* Sink::pipeline() const { + return upb_sink_pipeline(this); +} +inline const SinkFrame* Sink::top() const { + return upb_sink_top(this); +} +inline const SinkFrame* Sink::base() const { + return upb_sink_base(this); +} +inline bool Sink::StartMessage() { + return upb_sink_startmsg(this); +} +inline void Sink::EndMessage() { + upb_sink_endmsg(this); +} +inline bool Sink::PutInt32(Handlers::Selector sel, int32_t val) { + return upb_sink_putint32(this, sel, val); +} +inline bool Sink::PutInt64(Handlers::Selector sel, int64_t val) { + return upb_sink_putint64(this, sel, val); +} +inline bool Sink::PutUInt32(Handlers::Selector sel, uint32_t val) { + return upb_sink_putuint32(this, sel, val); +} +inline bool Sink::PutUInt64(Handlers::Selector sel, uint64_t val) { + return upb_sink_putuint64(this, sel, val); +} +inline bool Sink::PutFloat(Handlers::Selector sel, float val) { + return upb_sink_putfloat(this, sel, val); +} +inline bool Sink::PutDouble(Handlers::Selector sel, double val) { + return upb_sink_putdouble(this, sel, val); +} +inline bool Sink::PutBool(Handlers::Selector sel, bool val) { + return upb_sink_putbool(this, sel, val); +} +inline bool Sink::StartString(Handlers::Selector sel, size_t size_hint) { + return upb_sink_startstr(this, sel, size_hint); +} +inline size_t Sink::PutStringBuffer(Handlers::Selector sel, const char *buf, + size_t len) { + return upb_sink_putstring(this, sel, buf, len); +} +inline bool Sink::EndString(Handlers::Selector sel) { + return upb_sink_endstr(this, sel); +} +inline bool Sink::StartSubMessage(Handlers::Selector sel) { + return upb_sink_startsubmsg(this, sel); +} +inline bool Sink::EndSubMessage(Handlers::Selector sel) { + return upb_sink_endsubmsg(this, sel); +} +inline bool Sink::StartSequence(Handlers::Selector sel) { + return upb_sink_startseq(this, sel); +} +inline bool Sink::EndSequence(Handlers::Selector sel) { + return upb_sink_endseq(this, sel); +} + +} // namespace upb +#endif + #endif diff --git a/upb/symtab.c b/upb/symtab.c index cd82bddc91..2092787c43 100644 --- a/upb/symtab.c +++ b/upb/symtab.c @@ -10,8 +10,6 @@ #include #include -#include "upb/bytestream.h" - bool upb_symtab_isfrozen(const upb_symtab *s) { return upb_refcounted_isfrozen(upb_upcast(s)); } @@ -77,16 +75,18 @@ const upb_def **upb_symtab_getdefs(const upb_symtab *s, upb_deftype_t type, const upb_def *upb_symtab_lookup(const upb_symtab *s, const char *sym, const void *owner) { - const upb_value *v = upb_strtable_lookup(&s->symtab, sym); - upb_def *ret = v ? upb_value_getptr(*v) : NULL; + upb_value v; + upb_def *ret = upb_strtable_lookup(&s->symtab, sym, &v) ? + upb_value_getptr(v) : NULL; if (ret) upb_def_ref(ret, owner); return ret; } const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym, const void *owner) { - const upb_value *v = upb_strtable_lookup(&s->symtab, sym); - upb_def *def = v ? upb_value_getptr(*v) : NULL; + upb_value v; + upb_def *def = upb_strtable_lookup(&s->symtab, sym, &v) ? + upb_value_getptr(v) : NULL; upb_msgdef *ret = NULL; if(def && def->type == UPB_DEF_MSG) { ret = upb_downcast_msgdef_mutable(def); @@ -103,8 +103,8 @@ static upb_def *upb_resolvename(const upb_strtable *t, if(sym[0] == UPB_SYMBOL_SEPARATOR) { // Symbols starting with '.' are absolute, so we do a single lookup. // Slice to omit the leading '.' - const upb_value *v = upb_strtable_lookup(t, sym + 1); - return v ? upb_value_getptr(*v) : NULL; + upb_value v; + return upb_strtable_lookup(t, sym + 1, &v) ? upb_value_getptr(v) : NULL; } else { // Remove components from base until we find an entry or run out. // TODO: This branch is totally broken, but currently not used. @@ -134,8 +134,9 @@ static bool upb_resolve_dfs(const upb_def *def, upb_strtable *addtab, upb_status *s) { // Memoize results of this function for efficiency (since we're traversing a // DAG this is not needed to limit the depth of the search). - const upb_value *v = upb_inttable_lookup(seen, (uintptr_t)def); - if (v) return upb_value_getbool(*v); + upb_value v; + if (upb_inttable_lookup(seen, (uintptr_t)def, &v)) + return upb_value_getbool(v); // Visit submessages for all messages in the SCC. bool need_dup = false; @@ -143,10 +144,10 @@ static bool upb_resolve_dfs(const upb_def *def, upb_strtable *addtab, do { assert(upb_def_isfrozen(def)); if (def->type == UPB_DEF_FIELD) continue; - const upb_value *v = upb_strtable_lookup(addtab, upb_def_fullname(def)); - if (v) { + upb_value v; + if (upb_strtable_lookup(addtab, upb_def_fullname(def), &v)) { // Because we memoize we should not visit a node after we have dup'd it. - assert(((upb_def*)upb_value_getptr(*v))->came_from_user); + assert(((upb_def*)upb_value_getptr(v))->came_from_user); need_dup = true; } const upb_msgdef *m = upb_dyncast_msgdef(def); @@ -169,7 +170,7 @@ static bool upb_resolve_dfs(const upb_def *def, upb_strtable *addtab, do { if (def->type == UPB_DEF_FIELD) continue; const char *name = upb_def_fullname(def); - if (upb_strtable_lookup(addtab, name) == NULL) { + if (!upb_strtable_lookup(addtab, name, NULL)) { upb_def *newdef = upb_def_dup(def, new_owner); if (!newdef) goto oom; newdef->came_from_user = false; @@ -210,7 +211,7 @@ bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, int n, void *ref_donor, status, "Anonymous defs cannot be added to a symtab"); goto err; } - if (upb_strtable_lookup(&addtab, fullname) != NULL) { + if (upb_strtable_lookup(&addtab, fullname, NULL)) { upb_status_seterrf(status, "Conflicting defs named '%s'", fullname); goto err; } @@ -263,10 +264,8 @@ bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, int n, void *ref_donor, } if (!upb_fielddef_resolvedefault(f)) { - upb_byteregion *r = upb_value_getbyteregion(upb_fielddef_default(f)); - size_t len; - const char *ptr = upb_byteregion_getptr(r, 0, &len); - upb_status_seterrf(status, "couldn't resolve enum default '%s'", ptr); + upb_status_seterrf(status, "couldn't resolve enum default '%s'", + upb_fielddef_defaultstr(f, NULL)); goto err; } } diff --git a/upb/table.c b/upb/table.c index 21457a05b7..a54e7151d1 100644 --- a/upb/table.c +++ b/upb/table.c @@ -38,22 +38,22 @@ char *upb_strdup(const char *s) { return p; } -static upb_tabkey upb_strkey(const char *str) { +static upb_tabkey strkey(const char *str) { upb_tabkey k; k.str = (char*)str; return k; } -typedef const upb_tabent *upb_hashfunc_t(const upb_table *t, upb_tabkey key); -typedef bool upb_eqlfunc_t(upb_tabkey k1, upb_tabkey k2); +typedef const upb_tabent *hashfunc_t(const upb_table *t, upb_tabkey key); +typedef bool eqlfunc_t(upb_tabkey k1, upb_tabkey k2); /* Base table (shared code) ***************************************************/ -static bool upb_table_isfull(upb_table *t) { +static bool isfull(upb_table *t) { return (double)(t->count + 1) / upb_table_size(t) > MAX_LOAD; } -static bool upb_table_init(upb_table *t, upb_ctype_t type, uint8_t size_lg2) { +static bool init(upb_table *t, upb_ctype_t type, uint8_t size_lg2) { t->count = 0; t->type = type; t->size_lg2 = size_lg2; @@ -69,29 +69,41 @@ static bool upb_table_init(upb_table *t, upb_ctype_t type, uint8_t size_lg2) { return true; } -static void upb_table_uninit(upb_table *t) { free((void*)t->entries); } +static void uninit(upb_table *t) { free((void*)t->entries); } -static upb_tabent *upb_table_emptyent(upb_table *t) { +static upb_tabent *emptyent(upb_table *t) { upb_tabent *e = (upb_tabent*)t->entries + upb_table_size(t); while (1) { if (upb_tabent_isempty(--e)) return e; assert(e > t->entries); } } -static const upb_value *upb_table_lookup(const upb_table *t, upb_tabkey key, - upb_hashfunc_t *hash, - upb_eqlfunc_t *eql) { +static const upb_tabent *findentry(const upb_table *t, upb_tabkey key, + hashfunc_t *hash, eqlfunc_t *eql) { if (t->size_lg2 == 0) return NULL; const upb_tabent *e = hash(t, key); if (upb_tabent_isempty(e)) return NULL; while (1) { - if (eql(e->key, key)) return &e->val; + if (eql(e->key, key)) return e; if ((e = e->next) == NULL) return NULL; } } +static bool lookup(const upb_table *t, upb_tabkey key, upb_value *v, + hashfunc_t *hash, eqlfunc_t *eql) { + const upb_tabent *e = findentry(t, key, hash, eql); + if (e) { + if (v) { + _upb_value_setval(v, e->val, t->type); + } + return true; + } else { + return false; + } +} + // The given key must not already exist in the table. -static void upb_table_insert(upb_table *t, upb_tabkey key, upb_value val, - upb_hashfunc_t *hash, upb_eqlfunc_t *eql) { - assert(upb_table_lookup(t, key, hash, eql) == NULL); +static void insert(upb_table *t, upb_tabkey key, upb_value val, + hashfunc_t *hash, eqlfunc_t *eql) { + assert(findentry(t, key, hash, eql) == NULL); assert(val.type == t->type); t->count++; upb_tabent *mainpos_e = (upb_tabent*)hash(t, key); @@ -101,7 +113,7 @@ static void upb_table_insert(upb_table *t, upb_tabkey key, upb_value val, our_e->next = NULL; } else { // Collision. - upb_tabent *new_e = upb_table_emptyent(t); + upb_tabent *new_e = emptyent(t); // Head of collider's chain. upb_tabent *chain = (upb_tabent*)hash(t, mainpos_e->key); if (chain == mainpos_e) { @@ -125,26 +137,27 @@ static void upb_table_insert(upb_table *t, upb_tabkey key, upb_value val, } } our_e->key = key; - our_e->val = val; - assert(upb_table_lookup(t, key, hash, eql) == &our_e->val); + our_e->val = val.val; + assert(findentry(t, key, hash, eql) == our_e); } -static bool upb_table_remove(upb_table *t, upb_tabkey key, upb_value *val, - upb_tabkey *removed, - upb_hashfunc_t *hash, upb_eqlfunc_t *eql) { +static bool rm(upb_table *t, upb_tabkey key, upb_value *val, + upb_tabkey *removed, hashfunc_t *hash, eqlfunc_t *eql) { upb_tabent *chain = (upb_tabent*)hash(t, key); if (upb_tabent_isempty(chain)) return false; if (eql(chain->key, key)) { // Element to remove is at the head of its chain. t->count--; - if (val) *val = chain->val; + if (val) { + _upb_value_setval(val, chain->val, t->type); + } if (chain->next) { upb_tabent *move = (upb_tabent*)chain->next; *chain = *move; - *removed = move->key; + if (removed) *removed = move->key; move->key.num = 0; // Make the slot empty. } else { - *removed = chain->key; + if (removed) *removed = chain->key; chain->key.num = 0; // Make the slot empty. } return true; @@ -154,11 +167,13 @@ static bool upb_table_remove(upb_table *t, upb_tabkey key, upb_value *val, chain = (upb_tabent*)chain->next; if (chain->next) { // Found element to remove. - if (val) *val = chain->next->val; - upb_tabent *remove = (upb_tabent*)chain->next; - *removed = remove->key; - remove->key.num = 0; - chain->next = remove->next; + if (val) { + _upb_value_setval(val, chain->next->val, t->type); + } + upb_tabent *rm = (upb_tabent*)chain->next; + if (removed) *removed = rm->key; + rm->key.num = 0; + chain->next = rm->next; t->count--; return true; } else { @@ -167,8 +182,7 @@ static bool upb_table_remove(upb_table *t, upb_tabkey key, upb_value *val, } } -static const upb_tabent *upb_table_next(const upb_table *t, - const upb_tabent *e) { +static const upb_tabent *next(const upb_table *t, const upb_tabent *e) { const upb_tabent *end = t->entries + upb_table_size(t); do { if (++e == end) return NULL; } while(e->key.num == 0); return e; @@ -176,8 +190,8 @@ static const upb_tabent *upb_table_next(const upb_table *t, // TODO: is calculating t->entries - 1 undefined behavior? If so find a better // solution. -static const upb_tabent *upb_table_begin(const upb_table *t) { - return upb_table_next(t, t->entries - 1); +static const upb_tabent *begin(const upb_table *t) { + return next(t, t->entries - 1); } @@ -185,30 +199,30 @@ static const upb_tabent *upb_table_begin(const upb_table *t) { // A simple "subclass" of upb_table that only adds a hash function for strings. -static const upb_tabent *upb_strhash(const upb_table *t, upb_tabkey key) { +static const upb_tabent *strhash(const upb_table *t, upb_tabkey key) { // Could avoid the strlen() by using a hash function that terminates on NULL. return t->entries + (MurmurHash2(key.str, strlen(key.str), 0) & t->mask); } -static bool upb_streql(upb_tabkey k1, upb_tabkey k2) { +static bool streql(upb_tabkey k1, upb_tabkey k2) { return strcmp(k1.str, k2.str) == 0; } bool upb_strtable_init(upb_strtable *t, upb_ctype_t type) { - return upb_table_init(&t->t, type, 2); + return init(&t->t, type, 2); } void upb_strtable_uninit(upb_strtable *t) { for (size_t i = 0; i < upb_table_size(&t->t); i++) free((void*)t->t.entries[i].key.str); - upb_table_uninit(&t->t); + uninit(&t->t); } bool upb_strtable_insert(upb_strtable *t, const char *k, upb_value v) { - if (upb_table_isfull(&t->t)) { + if (isfull(&t->t)) { // Need to resize. New table of double the size, add old elements to it. upb_strtable new_table; - if (!upb_table_init(&new_table.t, t->t.type, t->t.size_lg2 + 1)) + if (!init(&new_table.t, t->t.type, t->t.size_lg2 + 1)) return false; upb_strtable_iter i; upb_strtable_begin(&i, t); @@ -220,29 +234,31 @@ bool upb_strtable_insert(upb_strtable *t, const char *k, upb_value v) { *t = new_table; } if ((k = upb_strdup(k)) == NULL) return false; - upb_table_insert(&t->t, upb_strkey(k), v, &upb_strhash, &upb_streql); + insert(&t->t, strkey(k), v, &strhash, &streql); return true; } -const upb_value *upb_strtable_lookup(const upb_strtable *t, const char *key) { - return upb_table_lookup(&t->t, upb_strkey(key), &upb_strhash, &upb_streql); +bool upb_strtable_lookup(const upb_strtable *t, const char *key, upb_value *v) { + return lookup(&t->t, strkey(key), v, &strhash, &streql); } bool upb_strtable_remove(upb_strtable *t, const char *key, upb_value *val) { - upb_tabkey removed; - bool found = upb_table_remove( - &t->t, upb_strkey(key), val, &removed, &upb_strhash, &upb_streql); - if (found) free((void*)removed.str); - return found; + upb_tabkey tabkey; + if (rm(&t->t, strkey(key), val, &tabkey, &strhash, &streql)) { + free((void*)tabkey.str); + return true; + } else { + return false; + } } void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t) { i->t = t; - i->e = upb_table_begin(&t->t); + i->e = begin(&t->t); } void upb_strtable_next(upb_strtable_iter *i) { - i->e = upb_table_next(&i->t->t, i->e); + i->e = next(&i->t->t, i->e); } @@ -251,7 +267,7 @@ void upb_strtable_next(upb_strtable_iter *i) { // For inttables we use a hybrid structure where small keys are kept in an // array and large keys are put in the hash table. -static bool upb_inteql(upb_tabkey k1, upb_tabkey k2) { +static bool inteql(upb_tabkey k1, upb_tabkey k2) { return k1.num == k2.num; } @@ -259,9 +275,23 @@ size_t upb_inttable_count(const upb_inttable *t) { return t->t.count + t->array_count; } +static void check(upb_inttable *t) { + UPB_UNUSED(t); +#if defined(UPB_DEBUG_TABLE) && !defined(NDEBUG) + // This check is very expensive (makes inserts/deletes O(N)). + size_t count = 0; + upb_inttable_iter i; + upb_inttable_begin(&i, t); + for(; !upb_inttable_done(&i); upb_inttable_next(&i), count++) { + assert(upb_inttable_lookup(t, upb_inttable_iter_key(&i), NULL)); + } + assert(count == upb_inttable_count(t)); +#endif +} + bool upb_inttable_sizedinit(upb_inttable *t, upb_ctype_t type, size_t asize, int hsize_lg2) { - if (!upb_table_init(&t->t, type, hsize_lg2)) return false; + if (!init(&t->t, type, hsize_lg2)) return false; // Always make the array part at least 1 long, so that we know key 0 // won't be in the hash part, which simplifies things. t->array_size = UPB_MAX(1, asize); @@ -269,10 +299,11 @@ bool upb_inttable_sizedinit(upb_inttable *t, upb_ctype_t type, size_t array_bytes = t->array_size * sizeof(upb_value); t->array = malloc(array_bytes); if (!t->array) { - upb_table_uninit(&t->t); + uninit(&t->t); return false; } memset((void*)t->array, 0xff, array_bytes); + check(t); return true; } @@ -281,58 +312,50 @@ bool upb_inttable_init(upb_inttable *t, upb_ctype_t type) { } void upb_inttable_uninit(upb_inttable *t) { - upb_table_uninit(&t->t); + uninit(&t->t); free((void*)t->array); } -static void upb_inttable_check(upb_inttable *t) { - UPB_UNUSED(t); -#if defined(UPB_DEBUG_TABLE) && !defined(NDEBUG) - // This check is very expensive (makes inserts/deletes O(N)). - size_t count = 0; - upb_inttable_iter i; - upb_inttable_begin(&i, t); - for(; !upb_inttable_done(&i); upb_inttable_next(&i), count++) { - const upb_value *v = upb_inttable_lookup(t, upb_inttable_iter_key(&i)); - assert(v); - } - assert(count == upb_inttable_count(t)); -#endif -} - bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val) { - assert(upb_arrhas(val)); + assert(upb_arrhas(val.val)); if (key < t->array_size) { assert(!upb_arrhas(t->array[key])); t->array_count++; - ((upb_value*)t->array)[key] = val; + ((_upb_value*)t->array)[key] = val.val; } else { - if (upb_table_isfull(&t->t)) { + if (isfull(&t->t)) { // Need to resize the hash part, but we re-use the array part. upb_table new_table; - if (!upb_table_init(&new_table, t->t.type, t->t.size_lg2 + 1)) + if (!init(&new_table, t->t.type, t->t.size_lg2 + 1)) return false; const upb_tabent *e; - for (e = upb_table_begin(&t->t); e; e = upb_table_next(&t->t, e)) - upb_table_insert(&new_table, e->key, e->val, &upb_inthash, &upb_inteql); + for (e = begin(&t->t); e; e = next(&t->t, e)) { + upb_value v; + _upb_value_setval(&v, e->val, t->t.type); + insert(&new_table, e->key, v, &upb_inthash, &inteql); + } assert(t->t.count == new_table.count); - upb_table_uninit(&t->t); + uninit(&t->t); t->t = new_table; } - upb_table_insert(&t->t, upb_intkey(key), val, &upb_inthash, &upb_inteql); + insert(&t->t, upb_intkey(key), val, &upb_inthash, &inteql); } - upb_inttable_check(t); + check(t); return true; } -const upb_value *upb_inttable_lookup(const upb_inttable *t, uintptr_t key) { +bool upb_inttable_lookup(const upb_inttable *t, uintptr_t key, upb_value *v) { if (key < t->array_size) { - const upb_value *v = &t->array[key]; - return upb_arrhas(*v) ? v : NULL; + bool ret = upb_arrhas(t->array[key]); + if (ret && v) { + _upb_value_setval(v, t->array[key], t->t.type); + } + return ret; + } else { + return lookup(&t->t, upb_intkey(key), v, &upb_inthash, &inteql); } - return upb_table_lookup(&t->t, upb_intkey(key), &upb_inthash, &upb_inteql); } bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val) { @@ -340,7 +363,9 @@ bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val) { if (key < t->array_size) { if (upb_arrhas(t->array[key])) { t->array_count--; - if (val) *val = t->array[key]; + if (val) { + _upb_value_setval(val, t->array[key], t->t.type); + } ((upb_value*)t->array)[key] = upb_value_uint64(-1); success = true; } else { @@ -348,10 +373,9 @@ bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val) { } } else { upb_tabkey removed; - success = upb_table_remove( - &t->t, upb_intkey(key), val, &removed, &upb_inthash, &upb_inteql); + success = rm(&t->t, upb_intkey(key), val, &removed, &upb_inthash, &inteql); } - upb_inttable_check(t); + check(t); return success; } @@ -370,9 +394,9 @@ bool upb_inttable_insertptr(upb_inttable *t, const void *key, upb_value val) { return upb_inttable_insert(t, (uintptr_t)key, val); } -const upb_value *upb_inttable_lookupptr(const upb_inttable *t, - const void *key) { - return upb_inttable_lookup(t, (uintptr_t)key); +bool upb_inttable_lookupptr(const upb_inttable *t, const void *key, + upb_value *v) { + return upb_inttable_lookup(t, (uintptr_t)key, v); } bool upb_inttable_removeptr(upb_inttable *t, const void *key, upb_value *val) { @@ -426,7 +450,7 @@ void upb_inttable_next(upb_inttable_iter *iter) { iter->array_part = false; iter->ptr.ent = t->t.entries - 1; } - iter->ptr.ent = upb_table_next(&t->t, iter->ptr.ent); + iter->ptr.ent = next(&t->t, iter->ptr.ent); } #ifdef UPB_UNALIGNED_READS_OK diff --git a/upb/table.h b/upb/table.h index 80f6813762..2b4c80c747 100644 --- a/upb/table.h +++ b/upb/table.h @@ -47,11 +47,7 @@ typedef union { typedef struct _upb_tabent { upb_tabkey key; - // Storing a upb_value here wastes a bit of memory in debug mode because - // we are storing the type for each value even though we enforce that all - // values are the same. But since this only affects debug mode, we don't - // worry too much about it. The same applies to upb_inttable.array below. - upb_value val; + _upb_value val; // Internal chaining. This is const so we can create static initializers for // tables. We cast away const sometimes, but *only* when the containing // upb_table is known to be non-const. This requires a bit of care, but @@ -76,7 +72,7 @@ typedef struct { typedef struct { upb_table t; // For entries that don't fit in the array part. - const upb_value *array; // Array part of the table. + const _upb_value *array; // Array part of the table. size_t array_size; // Array part size. size_t array_count; // Array part number of elements. } upb_inttable; @@ -89,7 +85,7 @@ typedef struct { #define UPB_ARRAY_EMPTYENT UPB_VALUE_INIT_INT64(-1) -INLINE size_t upb_table_size(const upb_table *t) { +UPB_INLINE size_t upb_table_size(const upb_table *t) { if (t->size_lg2 == 0) return 0; else @@ -97,12 +93,22 @@ INLINE size_t upb_table_size(const upb_table *t) { } // Internal-only functions, in .h file only out of necessity. -INLINE bool upb_tabent_isempty(const upb_tabent *e) { return e->key.num == 0; } -INLINE upb_tabkey upb_intkey(uintptr_t key) { upb_tabkey k = {key}; return k; } -INLINE const upb_tabent *upb_inthash(const upb_table *t, upb_tabkey key) { +UPB_INLINE bool upb_tabent_isempty(const upb_tabent *e) { + return e->key.num == 0; +} + +UPB_INLINE upb_tabkey upb_intkey(uintptr_t key) { + upb_tabkey k = {key}; return k; +} + +UPB_INLINE const upb_tabent *upb_inthash(const upb_table *t, upb_tabkey key) { return t->entries + ((uint32_t)key.num & t->mask); } -INLINE bool upb_arrhas(upb_value v) { return v.val.uint64 != (uint64_t)-1; } + +UPB_INLINE bool upb_arrhas(_upb_value v) { + return v.uint64 != (uint64_t)-1; +} + uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed); // Initialize and uninitialize a table, respectively. If memory allocation @@ -114,7 +120,9 @@ void upb_strtable_uninit(upb_strtable *table); // Returns the number of values in the table. size_t upb_inttable_count(const upb_inttable *t); -INLINE size_t upb_strtable_count(const upb_strtable *t) { return t->t.count; } +UPB_INLINE size_t upb_strtable_count(const upb_strtable *t) { + return t->t.count; +} // Inserts the given key into the hashtable with the given value. The key must // not already exist in the hash table. For string tables, the key must be @@ -129,8 +137,8 @@ bool upb_strtable_insert(upb_strtable *t, const char *key, upb_value val); // Looks up key in this table, returning a pointer to the table's internal copy // of the user's inserted data, or NULL if this key is not in the table. The // returned pointer is invalidated by inserts. -const upb_value *upb_inttable_lookup(const upb_inttable *t, uintptr_t key); -const upb_value *upb_strtable_lookup(const upb_strtable *t, const char *key); +bool upb_inttable_lookup(const upb_inttable *t, uintptr_t key, upb_value *v); +bool upb_strtable_lookup(const upb_strtable *t, const char *key, upb_value *v); // Removes an item from the table. Returns true if the remove was successful, // and stores the removed item in *val if non-NULL. @@ -145,7 +153,8 @@ upb_value upb_inttable_pop(upb_inttable *t); // Convenience routines for inttables with pointer keys. bool upb_inttable_insertptr(upb_inttable *t, const void *key, upb_value val); bool upb_inttable_removeptr(upb_inttable *t, const void *key, upb_value *val); -const upb_value *upb_inttable_lookupptr(const upb_inttable *t, const void *key); +bool upb_inttable_lookupptr( + const upb_inttable *t, const void *key, upb_value *val); // Optimizes the table for the current set of entries, for both memory use and // lookup time. Client should call this after all entries have been inserted; @@ -153,17 +162,26 @@ const upb_value *upb_inttable_lookupptr(const upb_inttable *t, const void *key); void upb_inttable_compact(upb_inttable *t); // A special-case inlinable version of the lookup routine for 32-bit integers. -INLINE const upb_value *upb_inttable_lookup32(const upb_inttable *t, - uint32_t key) { +UPB_INLINE bool upb_inttable_lookup32(const upb_inttable *t, uint32_t key, + upb_value *v) { if (key < t->array_size) { - const upb_value *v = &t->array[key]; - return upb_arrhas(*v) ? v : NULL; - } - const upb_tabent *e; - if (t->t.entries == NULL) return NULL; - for (e = upb_inthash(&t->t, upb_intkey(key)); true; e = e->next) { - if ((uint32_t)e->key.num == key) return &e->val; - if (e->next == NULL) return NULL; + _upb_value arrval = t->array[key]; + if (upb_arrhas(arrval)) { + _upb_value_setval(v, arrval, t->t.type); + return true; + } else { + return false; + } + } else { + const upb_tabent *e; + if (t->t.entries == NULL) return NULL; + for (e = upb_inthash(&t->t, upb_intkey(key)); true; e = e->next) { + if ((uint32_t)e->key.num == key) { + _upb_value_setval(v, e->val, t->t.type); + return true; + } + if (e->next == NULL) return false; + } } } @@ -185,12 +203,12 @@ typedef struct { void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t); void upb_strtable_next(upb_strtable_iter *i); -INLINE bool upb_strtable_done(upb_strtable_iter *i) { return i->e == NULL; } -INLINE const char *upb_strtable_iter_key(upb_strtable_iter *i) { +UPB_INLINE bool upb_strtable_done(upb_strtable_iter *i) { return i->e == NULL; } +UPB_INLINE const char *upb_strtable_iter_key(upb_strtable_iter *i) { return i->e->key.str; } -INLINE upb_value upb_strtable_iter_value(upb_strtable_iter *i) { - return i->e->val; +UPB_INLINE upb_value upb_strtable_iter_value(upb_strtable_iter *i) { + return _upb_value_val(i->e->val, i->t->t.type); } @@ -208,7 +226,7 @@ typedef struct { const upb_inttable *t; union { const upb_tabent *ent; // For hash iteration. - const upb_value *val; // For array iteration. + const _upb_value *val; // For array iteration. } ptr; uintptr_t arrkey; bool array_part; @@ -216,14 +234,15 @@ typedef struct { void upb_inttable_begin(upb_inttable_iter *i, const upb_inttable *t); void upb_inttable_next(upb_inttable_iter *i); -INLINE bool upb_inttable_done(upb_inttable_iter *i) { +UPB_INLINE bool upb_inttable_done(upb_inttable_iter *i) { return i->ptr.ent == NULL; } -INLINE uintptr_t upb_inttable_iter_key(upb_inttable_iter *i) { +UPB_INLINE uintptr_t upb_inttable_iter_key(upb_inttable_iter *i) { return i->array_part ? i->arrkey : i->ptr.ent->key.num; } -INLINE upb_value upb_inttable_iter_value(upb_inttable_iter *i) { - return i->array_part ? *i->ptr.val : i->ptr.ent->val; +UPB_INLINE upb_value upb_inttable_iter_value(upb_inttable_iter *i) { + return _upb_value_val( + i->array_part ? *i->ptr.val : i->ptr.ent->val, i->t->t.type); } #ifdef __cplusplus diff --git a/upb/upb.h b/upb/upb.h index a7a3ed1fea..cf221dcf09 100644 --- a/upb/upb.h +++ b/upb/upb.h @@ -14,6 +14,7 @@ #define UPB_H_ #include +#include #include #include #include @@ -24,8 +25,10 @@ extern "C" { #endif // inline if possible, emit standalone code if required. -#ifndef INLINE -#define INLINE static inline +#ifdef __cplusplus +#define UPB_INLINE inline +#else +#define UPB_INLINE static inline #endif #if __STDC_VERSION__ >= 199901L @@ -50,20 +53,107 @@ extern "C" { void operator=(const class_name&); #endif +#if defined(__clang__) && defined(LANG_CXX11) && defined(__has_warning) +#if __has_feature(cxx_attributes) && __has_warning("-Wimplicit-fallthrough") +#define UPB_FALLTHROUGH_INTENDED [[clang::fallthrough]] +#endif +#endif + +#ifndef UPB_FALLTHROUGH_INTENDED +#define UPB_FALLTHROUGH_INTENDED do { } while (0) +#endif + #ifdef __GNUC__ #define UPB_NORETURN __attribute__((__noreturn__)) #else #define UPB_NORETURN #endif -#ifndef UINT16_MAX -#define UINT16_MAX 0xffff +// Type detection and typedefs for integer types. +// +// We unfortunately cannot just use stdint.h types in all cases, because some +// platforms have more than one 32-bit type (or 64-bit type). For example, on +// x86-64, both "long" and "long long" are 64-bit types, but they are +// unfortunately incompatible with each other despite being the same size. +// Since the types are incompatible, functions pointers between them are +// incompatible also, which leads to trouble since handlers are declared in +// terms of function pointers. Since we don't know which of these types +// stdint.h will use (and we have no way of inspecting the typedefs, either at +// preprocessing or compilation time), we are forced to declare our own +// typedefs that we *do* know the real underlying type of. +// +// If any platform existed where there three integer types were the same size, +// this would have to become more complicated. For example, short, int, and +// long could all be 32-bits. Even more diabolically, short, int, long, and +// long long could all be 64 bits and still be standard-compliant. However, +// few platforms are this strange, and it's unlikely that upb will be used on +// the strangest ones. +// +// For more information, see: +// http://blog.reverberate.org/2013/03/cc-gripe-1-integer-types.html + +// Can't count on stdint.h limits like INT32_MAX, because in C++ these are +// only defined when __STDC_LIMIT_MACROS are defined before the *first* include +// of stdint.h. We can't guarantee that someone else didn't include these first +// without defining __STDC_LIMIT_MACROS. +#define UPB_INT32_MAX 0x7fffffffLL +#define UPB_INT32_MIN (-UPB_INT32_MAX - 1) +#define UPB_INT64_MAX 0x7fffffffffffffffLL +#define UPB_INT64_MIN (-UPB_INT64_MAX - 1) + +#if INT_MAX == UPB_INT32_MAX && INT_MIN == UPB_INT32_MIN +#define UPB_INT_IS_32BITS 1 +#endif + +#if LONG_MAX == UPB_INT32_MAX && LONG_MIN == UPB_INT32_MIN +#define UPB_LONG_IS_32BITS 1 +#endif + +#if LONG_MAX == UPB_INT64_MAX && LONG_MIN == UPB_INT64_MIN +#define UPB_LONG_IS_64BITS 1 #endif -#ifndef UINT32_MAX -#define UINT32_MAX 0xffffffff +#if LLONG_MAX == UPB_INT64_MAX && LLONG_MIN == UPB_INT64_MIN +#define UPB_LLONG_IS_64BITS 1 #endif +#if UPB_INT_IS_32BITS +typedef int upb_int32_t; +typedef unsigned int upb_uint32_t; +#define UPB_INT32_CTYPE i + +#if UPB_LONG_IS_32BITS +#define UPB_TWO_32BIT_TYPES 1 +typedef long upb_int32alt_t; +typedef unsigned long upb_uint32alt_t; +#define UPB_INT32_CTYPE2 l +#endif // UPB_LONG_IS_32BITS + +#elif UPB_LONG_IS_32BITS // && !UPB_INT_IS_32BITS +typedef long upb_int32_t; +typedef unsigned long upb_uint32_t; +#define UPB_INT32_CTYPE l +#endif // UPB_INT_IS_32BITS + + +#if UPB_LONG_IS_64BITS +typedef long upb_int64_t; +typedef unsigned long upb_uint64_t; +#define UPB_INT64_CTYPE l + +#if UPB_LLONG_IS_64BITS +#define UPB_TWO_64BIT_TYPES 1 +typedef long long upb_int64alt_t; +typedef unsigned long long upb_uint64alt_t; +#define UPB_INT64_CTYPE2 ll +#endif // UPB_LLONG_IS_64BITS + +#elif UPB_LLONG_IS_64BITS // && !UPB_LONG_IS_64BITS +typedef long long upb_int64_t; +typedef unsigned long long upb_uint64_t; +#define UPB_INT64_CTYPE ll +#endif // UPB_LONG_IS_64BITS + #define UPB_MAX(x, y) ((x) > (y) ? (x) : (y)) #define UPB_MIN(x, y) ((x) < (y) ? (x) : (y)) @@ -147,8 +237,8 @@ class upb::Status { Status(); ~Status(); - bool ok(); - bool eof(); + bool ok() const; + bool eof() const; const char *GetString() const; void SetEof(); @@ -225,31 +315,23 @@ typedef enum { UPB_CTYPE_FIELDDEF = 11, } upb_ctype_t; -#ifdef __cplusplus -namespace upb { class ByteRegion; } -typedef upb::ByteRegion upb_byteregion; -#else -struct upb_byteregion; -typedef struct upb_byteregion upb_byteregion; -#endif +typedef union { + uint64_t uint64; + int32_t int32; + int64_t int64; + uint32_t uint32; + double _double; + float _float; + bool _bool; + char *cstr; + void *ptr; + const void *constptr; +} _upb_value; // A single .proto value. The owner must have an out-of-band way of knowing // the type, so that it knows which union member to use. typedef struct { - union { - uint64_t uint64; - int32_t int32; - int64_t int64; - uint32_t uint32; - double _double; - float _float; - bool _bool; - char *cstr; - void *ptr; - const void *constptr; - upb_byteregion *byteregion; - } val; - + _upb_value val; #ifndef NDEBUG // In debug mode we carry the value type around also so we can check accesses // to be sure the right member is being read. @@ -258,30 +340,44 @@ typedef struct { } upb_value; #ifdef UPB_C99 -#define UPB_VAL_INIT(v, member) {.member = v} +#define UPB_VALUE_INIT(v, member) {.member = v} #endif // TODO(haberman): C++ +// +// +#define UPB__VALUE_INIT_NONE UPB_VALUE_INIT(NULL, ptr) #ifdef NDEBUG #define SET_TYPE(dest, val) -#define UPB_VALUE_INIT(v, member, type) {UPB_VAL_INIT(v, member)} +#define UPB_VALUE_INIT_NONE {UPB__VALUE_INIT_NONE} #else #define SET_TYPE(dest, val) dest = val -#define UPB_VALUE_INIT(v, member, type) {UPB_VAL_INIT(v, member), type} +// Non-existent type, all reads will fail. +#define UPB_VALUE_INIT_NONE {UPB__VALUE_INIT_NONE, -1} #endif -#define UPB_VALUE_INIT_INT32(v) UPB_VALUE_INIT(v, int32, UPB_CTYPE_INT32) -#define UPB_VALUE_INIT_INT64(v) UPB_VALUE_INIT(v, int64, UPB_CTYPE_INT64) -#define UPB_VALUE_INIT_UINT32(v) UPB_VALUE_INIT(v, uint32, UPB_CTYPE_UINT32) -#define UPB_VALUE_INIT_UINT64(v) UPB_VALUE_INIT(v, uint64, UPB_CTYPE_UINT64) -#define UPB_VALUE_INIT_DOUBLE(v) UPB_VALUE_INIT(v, _double, UPB_CTYPE_DOUBLE) -#define UPB_VALUE_INIT_FLOAT(v) UPB_VALUE_INIT(v, _float, UPB_CTYPE_FLOAT) -#define UPB_VALUE_INIT_BOOL(v) UPB_VALUE_INIT(v, _bool, UPB_CTYPE_BOOL) -#define UPB_VALUE_INIT_CSTR(v) UPB_VALUE_INIT(v, cstr, UPB_CTYPE_CSTR) -#define UPB_VALUE_INIT_PTR(v) UPB_VALUE_INIT(v, ptr, UPB_CTYPE_PTR) -#define UPB_VALUE_INIT_CONSTPTR(v) UPB_VALUE_INIT(v, constptr, UPB_CTYPE_PTR) -// Non-existent type, all reads will fail. -#define UPB_VALUE_INIT_NONE UPB_VALUE_INIT(NULL, ptr, -1) +#define UPB_VALUE_INIT_INT32(v) UPB_VALUE_INIT(v, int32) +#define UPB_VALUE_INIT_INT64(v) UPB_VALUE_INIT(v, int64) +#define UPB_VALUE_INIT_UINT32(v) UPB_VALUE_INIT(v, uint32) +#define UPB_VALUE_INIT_UINT64(v) UPB_VALUE_INIT(v, uint64) +#define UPB_VALUE_INIT_DOUBLE(v) UPB_VALUE_INIT(v, _double) +#define UPB_VALUE_INIT_FLOAT(v) UPB_VALUE_INIT(v, _float) +#define UPB_VALUE_INIT_BOOL(v) UPB_VALUE_INIT(v, _bool) +#define UPB_VALUE_INIT_CSTR(v) UPB_VALUE_INIT(v, cstr) +#define UPB_VALUE_INIT_PTR(v) UPB_VALUE_INIT(v, ptr) +#define UPB_VALUE_INIT_CONSTPTR(v) UPB_VALUE_INIT(v, constptr) + +UPB_INLINE void _upb_value_setval(upb_value *v, _upb_value val, + upb_ctype_t type) { + v->val = val; + SET_TYPE(v->type, type); +} + +UPB_INLINE upb_value _upb_value_val(_upb_value val, upb_ctype_t type) { + upb_value ret; + _upb_value_setval(&ret, val, type); + return ret; +} // For each value type, define the following set of functions: // @@ -293,12 +389,12 @@ typedef struct { // upb_value upb_value_int32(int32_t val); #define WRITERS(name, membername, ctype, proto_type) \ - INLINE void upb_value_set ## name(upb_value *val, ctype cval) { \ + UPB_INLINE void upb_value_set ## name(upb_value *val, ctype cval) { \ val->val.uint64 = 0; \ SET_TYPE(val->type, proto_type); \ val->val.membername = cval; \ } \ - INLINE upb_value upb_value_ ## name(ctype val) { \ + UPB_INLINE upb_value upb_value_ ## name(ctype val) { \ upb_value ret; \ upb_value_set ## name(&ret, val); \ return ret; \ @@ -307,17 +403,17 @@ typedef struct { #define ALL(name, membername, ctype, proto_type) \ /* Can't reuse WRITERS() here unfortunately because "bool" is a macro \ * that expands to _Bool, so it ends up defining eg. upb_value_set_Bool */ \ - INLINE void upb_value_set ## name(upb_value *val, ctype cval) { \ + UPB_INLINE void upb_value_set ## name(upb_value *val, ctype cval) { \ val->val.uint64 = 0; \ SET_TYPE(val->type, proto_type); \ val->val.membername = cval; \ } \ - INLINE upb_value upb_value_ ## name(ctype val) { \ + UPB_INLINE upb_value upb_value_ ## name(ctype val) { \ upb_value ret; \ upb_value_set ## name(&ret, val); \ return ret; \ } \ - INLINE ctype upb_value_get ## name(upb_value val) { \ + UPB_INLINE ctype upb_value_get ## name(upb_value val) { \ assert(val.type == proto_type); \ return val.val.membername; \ } @@ -329,7 +425,6 @@ ALL(uint64, uint64, uint64_t, UPB_CTYPE_UINT64); ALL(bool, _bool, bool, UPB_CTYPE_BOOL); ALL(cstr, cstr, char*, UPB_CTYPE_CSTR); ALL(ptr, ptr, void*, UPB_CTYPE_PTR); -ALL(byteregion, byteregion, upb_byteregion*, UPB_CTYPE_BYTEREGION); #ifdef __KERNEL__ // Linux kernel modules are compiled without SSE and therefore are incapable @@ -387,8 +482,8 @@ template inline Value MakePtrValue(T* v) { // C++ Wrappers inline Status::Status() { upb_status_init(this); } inline Status::~Status() { upb_status_uninit(this); } -inline bool Status::ok() { return upb_ok(this); } -inline bool Status::eof() { return upb_eof(this); } +inline bool Status::ok() const { return upb_ok(this); } +inline bool Status::eof() const { return upb_eof(this); } inline const char *Status::GetString() const { return upb_status_getstr(this); } inline void Status::SetEof() { upb_status_seteof(this); } inline void Status::SetErrorLiteral(const char* msg) {