Google-internal development.

pull/13171/head
Josh Haberman 10 years ago
parent 87fc2c516b
commit 3bd691a497
  1. 13
      tests/bindings/googlepb/test_vs_proto2.cc
  2. 110
      tests/json/test_json.cc
  3. 51
      tests/pb/test_decoder.cc
  4. 103
      tests/test_cpp.cc
  5. 91
      tests/test_def.c
  6. 3
      upb/bindings/googlepb/bridge.cc
  7. 9
      upb/bindings/googlepb/proto2.cc
  8. 29
      upb/bindings/lua/upb.c
  9. 30
      upb/bindings/lua/upb/pb.c
  10. 41
      upb/bindings/python/upb.c
  11. 39
      upb/bindings/ruby/upb.c
  12. 429
      upb/def.c
  13. 639
      upb/def.h
  14. 117
      upb/descriptor/reader.c
  15. 78
      upb/descriptor/reader.h
  16. 259
      upb/env.c
  17. 256
      upb/env.h
  18. 36
      upb/handlers-inl.h
  19. 27
      upb/handlers.c
  20. 2
      upb/handlers.h
  21. 466
      upb/json/parser.c
  22. 82
      upb/json/parser.h
  23. 394
      upb/json/parser.rl
  24. 340
      upb/json/printer.c
  25. 70
      upb/json/printer.h
  26. 15
      upb/pb/compile_decoder.c
  27. 10
      upb/pb/compile_decoder_x64.dasc
  28. 24
      upb/pb/compile_decoder_x64.h
  29. 176
      upb/pb/decoder.c
  30. 173
      upb/pb/decoder.h
  31. 92
      upb/pb/decoder.int.h
  32. 150
      upb/pb/encoder.c
  33. 116
      upb/pb/encoder.h
  34. 18
      upb/pb/glue.c
  35. 57
      upb/pb/textprinter.c
  36. 38
      upb/pb/textprinter.h
  37. 21
      upb/sink.h
  38. 12
      upb/symtab.c
  39. 8
      upb/table.c
  40. 3
      upb/table.int.h
  41. 9
      upb/upb.h

@ -40,8 +40,8 @@ const unsigned char message2_data[] = {
void compare_metadata(const google::protobuf::Descriptor* d,
const upb::MessageDef *upb_md) {
ASSERT(d->field_count() == upb_md->field_count());
for (upb::MessageDef::const_iterator i = upb_md->begin(); i != upb_md->end();
++i) {
for (upb::MessageDef::const_field_iterator i = upb_md->field_begin();
i != upb_md->field_end(); ++i) {
const upb::FieldDef* upb_f = *i;
const google::protobuf::FieldDescriptor *proto2_f =
d->FindFieldByNumber(upb_f->number());
@ -77,13 +77,14 @@ void parse_and_compare(google::protobuf::Message *msg1,
cache.GetDecoderMethod(upb::pb::DecoderMethodOptions(protomsg_handlers)));
upb::Status status;
upb::pb::Decoder decoder(decoder_method.get(), &status);
upb::Environment env;
env.ReportErrorsTo(&status);
upb::Sink protomsg_sink(protomsg_handlers, msg2);
decoder.ResetOutput(&protomsg_sink);
upb::pb::Decoder* decoder =
upb::pb::Decoder::Create(&env, decoder_method.get(), &protomsg_sink);
msg2->Clear();
bool ok = upb::BufferSource::PutBuffer(str, len, decoder.input());
bool ok = upb::BufferSource::PutBuffer(str, len, decoder->input());
if (!ok) {
fprintf(stderr, "error parsing: %s\n", status.error_message());
print_diff(*msg1, *msg2);

@ -85,6 +85,33 @@ static TestCase kTestRoundtripMessages[] = {
TEST("{\"optional_string\":\"\\uFFFF\"}"),
EXPECT("{\"optional_string\":\"\xEF\xBF\xBF\"}")
},
// map-field tests
{
TEST("{\"map_string_string\":{\"a\":\"value1\",\"b\":\"value2\","
"\"c\":\"value3\"}}"),
EXPECT_SAME
},
{
TEST("{\"map_int32_string\":{\"1\":\"value1\",\"-1\":\"value2\","
"\"1234\":\"value3\"}}"),
EXPECT_SAME
},
{
TEST("{\"map_bool_string\":{\"false\":\"value1\",\"true\":\"value2\"}}"),
EXPECT_SAME
},
{
TEST("{\"map_string_int32\":{\"asdf\":1234,\"jkl;\":-1}}"),
EXPECT_SAME
},
{
TEST("{\"map_string_bool\":{\"asdf\":true,\"jkl;\":false}}"),
EXPECT_SAME
},
{
TEST("{\"map_string_msg\":{\"asdf\":{\"foo\":42},\"jkl;\":{\"foo\":84}}}"),
EXPECT_SAME
},
TEST_SENTINEL
};
@ -115,6 +142,53 @@ static const upb::MessageDef* BuildTestMessage(
submsg->set_full_name("SubMessage", &st);
AddField(submsg.get(), 1, "foo", UPB_TYPE_INT32, false);
// Create MapEntryStringString.
upb::reffed_ptr<upb::MessageDef> mapentry_string_string(
upb::MessageDef::New());
mapentry_string_string->set_full_name("MapEntry_String_String", &st);
mapentry_string_string->setmapentry(true);
AddField(mapentry_string_string.get(), 1, "key", UPB_TYPE_STRING, false);
AddField(mapentry_string_string.get(), 2, "value", UPB_TYPE_STRING, false);
// Create MapEntryInt32String.
upb::reffed_ptr<upb::MessageDef> mapentry_int32_string(
upb::MessageDef::New());
mapentry_int32_string->set_full_name("MapEntry_Int32_String", &st);
mapentry_int32_string->setmapentry(true);
AddField(mapentry_int32_string.get(), 1, "key", UPB_TYPE_INT32, false);
AddField(mapentry_int32_string.get(), 2, "value", UPB_TYPE_STRING, false);
// Create MapEntryBoolString.
upb::reffed_ptr<upb::MessageDef> mapentry_bool_string(
upb::MessageDef::New());
mapentry_bool_string->set_full_name("MapEntry_Bool_String", &st);
mapentry_bool_string->setmapentry(true);
AddField(mapentry_bool_string.get(), 1, "key", UPB_TYPE_BOOL, false);
AddField(mapentry_bool_string.get(), 2, "value", UPB_TYPE_STRING, false);
// Create MapEntryStringInt32.
upb::reffed_ptr<upb::MessageDef> mapentry_string_int32(
upb::MessageDef::New());
mapentry_string_int32->set_full_name("MapEntry_String_Int32", &st);
mapentry_string_int32->setmapentry(true);
AddField(mapentry_string_int32.get(), 1, "key", UPB_TYPE_STRING, false);
AddField(mapentry_string_int32.get(), 2, "value", UPB_TYPE_INT32, false);
// Create MapEntryStringBool.
upb::reffed_ptr<upb::MessageDef> mapentry_string_bool(upb::MessageDef::New());
mapentry_string_bool->set_full_name("MapEntry_String_Bool", &st);
mapentry_string_bool->setmapentry(true);
AddField(mapentry_string_bool.get(), 1, "key", UPB_TYPE_STRING, false);
AddField(mapentry_string_bool.get(), 2, "value", UPB_TYPE_BOOL, false);
// Create MapEntryStringMessage.
upb::reffed_ptr<upb::MessageDef> mapentry_string_msg(upb::MessageDef::New());
mapentry_string_msg->set_full_name("MapEntry_String_Message", &st);
mapentry_string_msg->setmapentry(true);
AddField(mapentry_string_msg.get(), 1, "key", UPB_TYPE_STRING, false);
AddField(mapentry_string_msg.get(), 2, "value", UPB_TYPE_MESSAGE, false,
upb::upcast(submsg.get()));
// Create MyEnum.
upb::reffed_ptr<upb::EnumDef> myenum(upb::EnumDef::New());
myenum->set_full_name("MyEnum", &st);
@ -150,13 +224,33 @@ static const upb::MessageDef* BuildTestMessage(
AddField(md.get(), 19, "optional_enum", UPB_TYPE_ENUM, true,
upb::upcast(myenum.get()));
AddField(md.get(), 20, "map_string_string", UPB_TYPE_MESSAGE, true,
upb::upcast(mapentry_string_string.get()));
AddField(md.get(), 21, "map_int32_string", UPB_TYPE_MESSAGE, true,
upb::upcast(mapentry_int32_string.get()));
AddField(md.get(), 22, "map_bool_string", UPB_TYPE_MESSAGE, true,
upb::upcast(mapentry_bool_string.get()));
AddField(md.get(), 23, "map_string_int32", UPB_TYPE_MESSAGE, true,
upb::upcast(mapentry_string_int32.get()));
AddField(md.get(), 24, "map_string_bool", UPB_TYPE_MESSAGE, true,
upb::upcast(mapentry_string_bool.get()));
AddField(md.get(), 25, "map_string_msg", UPB_TYPE_MESSAGE, true,
upb::upcast(mapentry_string_msg.get()));
// Add both to our symtab.
upb::Def* defs[3] = {
upb::Def* defs[9] = {
upb::upcast(submsg.ReleaseTo(&defs)),
upb::upcast(myenum.ReleaseTo(&defs)),
upb::upcast(md.ReleaseTo(&defs)),
upb::upcast(mapentry_string_string.ReleaseTo(&defs)),
upb::upcast(mapentry_int32_string.ReleaseTo(&defs)),
upb::upcast(mapentry_bool_string.ReleaseTo(&defs)),
upb::upcast(mapentry_string_int32.ReleaseTo(&defs)),
upb::upcast(mapentry_string_bool.ReleaseTo(&defs)),
upb::upcast(mapentry_string_msg.ReleaseTo(&defs)),
};
symtab->Add(defs, 3, &defs, &st);
symtab->Add(defs, 9, &defs, &st);
ASSERT(st.ok());
// Return TestMessage.
return symtab->LookupMessage("TestMessage");
@ -198,14 +292,14 @@ void test_json_roundtrip_message(const char* json_src,
const upb::Handlers* serialize_handlers,
int seam) {
upb::Status st;
upb::json::Parser parser(&st);
upb::json::Printer printer(serialize_handlers);
upb::Environment env;
env.ReportErrorsTo(&st);
StringSink data_sink;
upb::json::Printer* printer =
upb::json::Printer::Create(&env, serialize_handlers, data_sink.Sink());
upb::json::Parser* parser = upb::json::Parser::Create(&env, printer->input());
parser.ResetOutput(printer.input());
printer.ResetOutput(data_sink.Sink());
upb::BytesSink* input = parser.input();
upb::BytesSink* input = parser->input();
void *sub;
size_t len = strlen(json_src);
size_t ofs = 0;

@ -64,6 +64,8 @@
(float)completed * 100 / total); \
}
#define MAX_NESTING 64
uint32_t filter_hash = 0;
double completed;
double total;
@ -210,7 +212,7 @@ string submsg(uint32_t fn, const string& buf) {
// using the closure depth to test that the stack of closures is properly
// handled.
int closures[UPB_DECODER_MAX_NESTING];
int closures[MAX_NESTING];
string output;
void indentbuf(string *buf, int depth) {
@ -508,6 +510,15 @@ upb::reffed_ptr<const upb::Handlers> NewHandlers(TestMode mode) {
const upb::Handlers *global_handlers;
const upb::pb::DecoderMethod *global_method;
upb::pb::Decoder* CreateDecoder(upb::Environment* env,
const upb::pb::DecoderMethod* method,
upb::Sink* sink) {
upb::pb::Decoder *ret = upb::pb::Decoder::Create(env, method, sink);
ASSERT(ret != NULL);
ret->set_max_nesting(MAX_NESTING);
return ret;
}
uint32_t Hash(const string& proto, const string* expected_output, size_t seam1,
size_t seam2) {
uint32_t hash = MurmurHash2(proto.c_str(), proto.size(), 0);
@ -545,19 +556,21 @@ static bool parse(upb::pb::Decoder* decoder, void* subc, const char* buf,
#define LINE(x) x "\n"
void run_decoder(const string& proto, const string* expected_output) {
upb::Status status;
upb::pb::Decoder decoder(global_method, &status);
upb::Sink sink(global_handlers, &closures[0]);
decoder.ResetOutput(&sink);
for (size_t i = 0; i < proto.size(); i++) {
for (size_t j = i; j < UPB_MIN(proto.size(), i + 5); j++) {
// TODO(haberman): hoist this again once the environment supports reset.
upb::Environment env;
env.ReportErrorsTo(&status);
upb::pb::Decoder *decoder = CreateDecoder(&env, global_method, &sink);
testhash = Hash(proto, expected_output, i, j);
if (filter_hash && testhash != filter_hash) continue;
if (test_mode != COUNT_ONLY) {
decoder.Reset();
output.clear();
status.Clear();
size_t ofs = 0;
upb::BytesSink* input = decoder.input();
upb::BytesSink* input = decoder->input();
void *sub;
if (filter_hash) {
@ -576,9 +589,9 @@ void run_decoder(const string& proto, const string* expected_output) {
}
bool ok = input->Start(proto.size(), &sub) &&
parse(&decoder, sub, proto.c_str(), 0, i, &ofs, &status) &&
parse(&decoder, sub, proto.c_str(), i, j, &ofs, &status) &&
parse(&decoder, sub, proto.c_str(), j, proto.size(), &ofs,
parse(decoder, sub, proto.c_str(), 0, i, &ofs, &status) &&
parse(decoder, sub, proto.c_str(), i, j, &ofs, &status) &&
parse(decoder, sub, proto.c_str(), j, proto.size(), &ofs,
&status) &&
ofs == proto.size();
@ -852,7 +865,7 @@ void test_invalid() {
// Test exceeding the resource limit of stack depth.
string buf;
for (int i = 0; i <= UPB_DECODER_MAX_NESTING; i++) {
for (int i = 0; i <= MAX_NESTING; i++) {
buf.assign(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, buf));
}
assert_does_not_parse(buf);
@ -871,11 +884,12 @@ void test_valid() {
if (!filter_hash || filter_hash == testhash) {
testhash = emptyhash;
upb::Status status;
upb::pb::Decoder decoder(global_method, &status);
upb::Environment env;
env.ReportErrorsTo(&status);
upb::Sink sink(global_handlers, &closures[0]);
decoder.ResetOutput(&sink);
upb::pb::Decoder* decoder = CreateDecoder(&env, global_method, &sink);
output.clear();
bool ok = upb::BufferSource::PutBuffer("", 0, decoder.input());
bool ok = upb::BufferSource::PutBuffer("", 0, decoder->input());
ASSERT(ok);
ASSERT(status.ok());
if (test_mode == ALL_HANDLERS) {
@ -1076,7 +1090,7 @@ void test_valid() {
// Staying within the stack limit should work properly.
string buf;
string textbuf;
int total = UPB_DECODER_MAX_NESTING - 1;
int total = MAX_NESTING - 1;
for (int i = 0; i < total; i++) {
buf.assign(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, buf));
indentbuf(&textbuf, i);
@ -1135,11 +1149,12 @@ upb::reffed_ptr<const upb::pb::DecoderMethod> method =
{ NULL, 0 },
};
for (int i = 0; testdata[i].data; i++) {
upb::Environment env;
upb::Status status;
upb::pb::Decoder decoder(method.get(), &status);
upb::Sink sink(global_handlers, &closures[0]);
decoder.ResetOutput(&sink);
upb::BytesSink* input = decoder.input();
env.ReportErrorsTo(&status);
upb::Sink sink(method->dest_handlers(), &closures[0]);
upb::pb::Decoder* decoder = CreateDecoder(&env, method.get(), &sink);
upb::BytesSink* input = decoder->input();
void* subc;
ASSERT(input->Start(0, &subc));
size_t ofs = 0;
@ -1182,7 +1197,7 @@ extern "C" {
int run_tests(int argc, char *argv[]) {
if (argc > 1)
filter_hash = strtol(argv[1], NULL, 16);
for (int i = 0; i < UPB_DECODER_MAX_NESTING; i++) {
for (int i = 0; i < MAX_NESTING; i++) {
closures[i] = i;
}

@ -12,8 +12,10 @@
#include <iostream>
#include <set>
#include <sstream>
#include "upb/def.h"
#include "upb/env.h"
#include "upb/descriptor/reader.h"
#include "upb/handlers.h"
#include "upb/pb/decoder.h"
@ -164,7 +166,7 @@ static void TestSymbolTable(const char *descriptor_file) {
#ifdef UPB_CXX11
// Test range-based for.
std::set<const upb::FieldDef*> fielddefs;
for (const upb::FieldDef* f : *md.get()) {
for (const upb::FieldDef* f : md.get()->fields()) {
AssertInsert(&fielddefs, f);
ASSERT(f->containing_type() == md.get());
}
@ -1117,6 +1119,103 @@ void TestHandlerDataDestruction() {
ASSERT(x == 0);
}
void TestOneofs() {
upb::Status status;
upb::reffed_ptr<upb::MessageDef> md(upb::MessageDef::New());
upb::reffed_ptr<upb::OneofDef> o(upb::OneofDef::New());
o->set_name("test_oneof", &status);
ASSERT(status.ok());
for (int i = 0; i < 5; i++) {
std::ostringstream fieldname;
fieldname << "field_" << i;
upb::reffed_ptr<upb::FieldDef> f(upb::FieldDef::New());
f->set_name(fieldname.str(), &status);
ASSERT(status.ok());
f->set_type(UPB_TYPE_INT32);
f->set_number(i + 1, &status);
ASSERT(status.ok());
f->set_label(UPB_LABEL_OPTIONAL);
o->AddField(f.get(), &status);
ASSERT(status.ok());
}
md->AddOneof(o.get(), &status);
ASSERT(status.ok());
int field_count = 0;
for (upb::OneofDef::iterator it = o->begin(); it != o->end(); ++it) {
upb::FieldDef* f = *it;
ASSERT(f->type() == UPB_TYPE_INT32);
field_count++;
}
ASSERT(field_count == 5);
upb::MessageDef::oneof_iterator msg_it = md->oneof_begin();
ASSERT(msg_it != md->oneof_end());
ASSERT((*msg_it) == o.get());
#ifdef UPB_CXX11
// Test range-based for on both fields and oneofs (with the iterator adaptor).
field_count = 0;
for (auto* field : md->fields()) {
UPB_UNUSED(field);
field_count++;
}
ASSERT(field_count == 5);
int oneof_count = 0;
for (auto* oneof : md->oneofs()) {
UPB_UNUSED(oneof);
oneof_count++;
}
ASSERT(oneof_count == 1);
#endif // UPB_CXX11
// Test that we can add a new field to the oneof and that it becomes a member
// of the msgdef as well.
upb::reffed_ptr<upb::FieldDef> newf(upb::FieldDef::New());
newf->set_name("new_field_10", &status);
ASSERT(status.ok());
newf->set_number(10, &status);
ASSERT(status.ok());
newf->set_label(UPB_LABEL_OPTIONAL);
newf->set_type(UPB_TYPE_INT32);
o->AddField(newf.get(), &status);
ASSERT(status.ok());
ASSERT(newf->containing_type() == md.get());
// Test that we can add a new field to the msgdef first and then to the oneof.
upb::reffed_ptr<upb::FieldDef> newf2(upb::FieldDef::New());
newf2->set_name("new_field_11", &status);
ASSERT(status.ok());
newf2->set_number(11, &status);
ASSERT(status.ok());
newf2->set_label(UPB_LABEL_OPTIONAL);
newf2->set_type(UPB_TYPE_INT32);
md->AddField(newf2.get(), &status);
ASSERT(status.ok());
o->AddField(newf2.get(), &status);
ASSERT(status.ok());
ASSERT(newf2->containing_oneof() == o.get());
// Test that we cannot add REQUIRED or REPEATED fields to the oneof.
upb::reffed_ptr<upb::FieldDef> newf3(upb::FieldDef::New());
newf3->set_name("new_field_12", &status);
ASSERT(status.ok());
newf3->set_number(12, &status);
ASSERT(status.ok());
newf3->set_label(UPB_LABEL_REQUIRED);
newf3->set_type(UPB_TYPE_INT32);
o->AddField(newf3.get(), &status);
ASSERT(!status.ok());
newf->set_label(UPB_LABEL_REPEATED);
o->AddField(newf3.get(), &status);
ASSERT(!status.ok());
}
extern "C" {
int run_tests(int argc, char *argv[]) {
@ -1173,6 +1272,8 @@ int run_tests(int argc, char *argv[]) {
TestHandlerDataDestruction();
TestOneofs();
return 0;
}

@ -189,7 +189,9 @@ static upb_fielddef *newfield(
ASSERT(upb_fielddef_setnumber(f, num, NULL));
upb_fielddef_settype(f, type);
upb_fielddef_setlabel(f, label);
ASSERT(upb_fielddef_setsubdefname(f, type_name, NULL));
if (type_name) {
ASSERT(upb_fielddef_setsubdefname(f, type_name, NULL));
}
return f;
}
@ -342,6 +344,91 @@ static void test_descriptor_flags() {
upb_msgdef_unref(m2, &m2);
}
static void test_mapentry_check() {
upb_status s = UPB_STATUS_INIT;
upb_msgdef *m = upb_msgdef_new(&m);
upb_msgdef_setfullname(m, "TestMessage", &s);
upb_fielddef *f = upb_fielddef_new(&f);
upb_fielddef_setname(f, "field1", &s);
upb_fielddef_setnumber(f, 1, &s);
upb_fielddef_setlabel(f, UPB_LABEL_OPTIONAL);
upb_fielddef_settype(f, UPB_TYPE_MESSAGE);
upb_fielddef_setsubdefname(f, ".MapEntry", &s);
upb_msgdef_addfield(m, f, &f, &s);
ASSERT(upb_ok(&s));
upb_msgdef *subm = upb_msgdef_new(&subm);
upb_msgdef_setfullname(subm, "MapEntry", &s);
upb_msgdef_setmapentry(subm, true);
upb_symtab *symtab = upb_symtab_new(&symtab);
upb_def *defs[] = {UPB_UPCAST(m), UPB_UPCAST(subm)};
upb_symtab_add(symtab, defs, 2, NULL, &s);
// Should not have succeeded: non-repeated field pointing to a MapEntry.
ASSERT(!upb_ok(&s));
upb_fielddef_setlabel(f, UPB_LABEL_REPEATED);
upb_symtab_add(symtab, defs, 2, NULL, &s);
ASSERT(upb_ok(&s));
upb_symtab_unref(symtab, &symtab);
upb_msgdef_unref(subm, &subm);
upb_msgdef_unref(m, &m);
}
static void test_oneofs() {
upb_status s = UPB_STATUS_INIT;
bool ok = true;
upb_symtab *symtab = upb_symtab_new(&symtab);
ASSERT(symtab != NULL);
// Create a test message for fields to refer to.
upb_msgdef *subm = upb_msgdef_newnamed("SubMessage", &symtab);
upb_msgdef_addfield(subm, newfield("field1", 1, UPB_TYPE_INT32,
UPB_LABEL_OPTIONAL, NULL, &symtab),
&symtab, NULL);
upb_def *subm_defs[] = {UPB_UPCAST(subm)};
ASSERT_STATUS(upb_symtab_add(symtab, subm_defs, 1, &symtab, &s), &s);
upb_msgdef *m = upb_msgdef_newnamed("TestMessage", &symtab);
ASSERT(upb_msgdef_numoneofs(m) == 0);
upb_oneofdef *o = upb_oneofdef_new(&o);
ASSERT(upb_oneofdef_numfields(o) == 0);
ASSERT(upb_oneofdef_name(o) == NULL);
ok = upb_oneofdef_setname(o, "test_oneof", &s);
ASSERT_STATUS(ok, &s);
ok = upb_oneofdef_addfield(o, newfield("field1", 1, UPB_TYPE_INT32,
UPB_LABEL_OPTIONAL, NULL, &symtab),
&symtab, NULL);
ASSERT_STATUS(ok, &s);
ok = upb_oneofdef_addfield(o, newfield("field2", 2, UPB_TYPE_MESSAGE,
UPB_LABEL_OPTIONAL, ".SubMessage",
&symtab),
&symtab, NULL);
ASSERT_STATUS(ok, &s);
ok = upb_msgdef_addoneof(m, o, NULL, &s);
ASSERT_STATUS(ok, &s);
upb_def *defs[] = {UPB_UPCAST(m)};
ASSERT_STATUS(upb_symtab_add(symtab, defs, 1, &symtab, &s), &s);
ASSERT(upb_msgdef_numoneofs(m) == 1);
const upb_oneofdef *lookup_o = upb_msgdef_ntooz(m, "test_oneof");
ASSERT(lookup_o == o);
const upb_fielddef *lookup_field = upb_oneofdef_ntofz(o, "field1");
ASSERT(lookup_field != NULL && upb_fielddef_number(lookup_field) == 1);
upb_symtab_unref(symtab, &symtab);
upb_oneofdef_unref(o, &o);
}
int run_tests(int argc, char *argv[]) {
if (argc < 2) {
fprintf(stderr, "Usage: test_def <test.proto.pb>\n");
@ -358,5 +445,7 @@ int run_tests(int argc, char *argv[]) {
test_partial_freeze();
test_noreftracking();
test_descriptor_flags();
test_mapentry_check();
test_oneofs();
return 0;
}

@ -246,7 +246,8 @@ const Handlers* CodeCache::GetMaybeUnfrozenWriteHandlers(
to_freeze_.push_back(h);
const goog::Descriptor* d = m.GetDescriptor();
for (upb::MessageDef::const_iterator i = md->begin(); i != md->end(); ++i) {
for (upb::MessageDef::const_field_iterator i = md->field_begin();
i != md->field_end(); ++i) {
const FieldDef* upb_f = *i;
const goog::FieldDescriptor* proto2_f =

@ -946,14 +946,14 @@ case goog::FieldDescriptor::cpptype: \
public:
typedef goog::Message Type;
#ifdef GOOGLE_PROTOBUF_HAS_ARENAS
static ::proto2::Arena* GetArena(Type* t) {
static goog::Arena* GetArena(Type* t) {
return t->GetArena();
}
static void* GetMaybeArenaPointer(Type* t) {
return t->GetMaybeArenaPointer();
}
static inline Type* NewFromPrototype(
const Type* prototype, ::proto2::Arena* arena = NULL) {
const Type* prototype, goog::Arena* arena = NULL) {
return prototype->New(arena);
}
static void Delete(Type* t, goog::Arena* arena = NULL) {
@ -1277,6 +1277,11 @@ case goog::FieldDescriptor::cpptype: \
return lazy_field_.SetAllocated(static_cast<proto2::Message*>(message));
}
virtual void UnsafeArenaSetAllocatedMessage(proto2::MessageLite* message) {
return lazy_field_.UnsafeArenaSetAllocated(
static_cast<proto2::Message*>(message));
}
virtual proto2::MessageLite* ReleaseMessage(
const proto2::MessageLite& prototype) {
return lazy_field_.ReleaseByPrototype(

@ -139,7 +139,8 @@ bool lupb_openlib(lua_State *L, void *ptr, const char *name,
// Pushes a new userdata with the given metatable and ensures that it has a
// uservalue.
static void *newudata_with_userval(lua_State *L, size_t size, const char *type) {
static void *newudata_with_userval(lua_State *L, size_t size,
const char *type) {
void *ret = lua_newuserdata(L, size);
// Set metatable.
@ -952,17 +953,17 @@ static int lupb_msgdef_field(lua_State *L) {
}
static int lupb_msgiter_next(lua_State *L) {
upb_msg_iter *i = lua_touserdata(L, lua_upvalueindex(1));
if (upb_msg_done(i)) return 0;
upb_msg_field_iter *i = lua_touserdata(L, lua_upvalueindex(1));
if (upb_msg_field_done(i)) return 0;
lupb_def_pushwrapper(L, UPB_UPCAST(upb_msg_iter_field(i)), NULL);
upb_msg_next(i);
upb_msg_field_next(i);
return 1;
}
static int lupb_msgdef_fields(lua_State *L) {
const upb_msgdef *m = lupb_msgdef_check(L, 1);
upb_msg_iter *i = lua_newuserdata(L, sizeof(upb_msg_iter));
upb_msg_begin(i, m);
upb_msg_field_iter *i = lua_newuserdata(L, sizeof(upb_msg_field_iter));
upb_msg_field_begin(i, m);
// Need to guarantee that the msgdef outlives the iter.
lua_pushvalue(L, 1);
lua_pushcclosure(L, &lupb_msgiter_next, 2);
@ -1416,8 +1417,10 @@ static lupb_msgdef *lupb_msg_assignoffsets(lua_State *L, int narg) {
size_t userval_idx = 1;
// Assign offsets.
upb_msg_iter i;
for (upb_msg_begin(&i, lmd->md); !upb_msg_done(&i); upb_msg_next(&i)) {
upb_msg_field_iter i;
for (upb_msg_field_begin(&i, lmd->md);
!upb_msg_field_done(&i);
upb_msg_field_next(&i)) {
upb_fielddef *f = upb_msg_iter_field(&i);
if (in_userval(f)) {
offsets[upb_fielddef_index(f)] = userval_idx++;
@ -1442,7 +1445,9 @@ static lupb_msgdef *lupb_msg_assignoffsets(lua_State *L, int narg) {
lua_newtable(L); // This will be our userval.
int idx = 1;
for (upb_msg_begin(&i, lmd->md); !upb_msg_done(&i); upb_msg_next(&i)) {
for (upb_msg_field_begin(&i, lmd->md);
!upb_msg_field_done(&i);
upb_msg_field_next(&i)) {
upb_fielddef *f = upb_msg_iter_field(&i);
if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE) {
bool created = lupb_def_pushwrapper(L, upb_fielddef_subdef(f), NULL);
@ -1660,9 +1665,9 @@ void callback(const void *closure, upb_handlers *h) {
lua_State *L = (lua_State*)closure;
lupb_def_pushwrapper(L, UPB_UPCAST(upb_handlers_msgdef(h)), NULL);
lupb_msgdef *lmd = lupb_msg_assignoffsets(L, -1);
upb_msg_iter i;
upb_msg_begin(&i, upb_handlers_msgdef(h));
for (; !upb_msg_done(&i); upb_msg_next(&i)) {
upb_msg_field_iter i;
upb_msg_field_begin(&i, upb_handlers_msgdef(h));
for (; !upb_msg_field_done(&i); upb_msg_field_next(&i)) {
upb_fielddef *f = upb_msg_iter_field(&i);
int hasbit = upb_fielddef_index(f);
uint16_t ofs = lmd->field_offsets[upb_fielddef_index(f)];

@ -41,6 +41,13 @@ static int lupb_pbdecodermethod_new(lua_State *L) {
return 1; // The DecoderMethod wrapper.
}
// We implement upb's allocation function by allocating a Lua userdata.
// This is a raw hunk of memory that will be GC'd by Lua.
static void *lua_alloc(void *ud, size_t size) {
lua_State *L = ud;
return lua_newuserdata(L, size);
}
// Unlike most of our exposed Lua functions, this does not correspond to an
// actual method on the underlying DecoderMethod. But it's convenient, and
// important to implement in C because we can do stack allocation and
@ -61,19 +68,22 @@ static int lupb_pbdecodermethod_parse(lua_State *L) {
// Handlers need this.
lua_getuservalue(L, -1);
upb_pbdecoder decoder;
upb_status status = UPB_STATUS_INIT;
upb_pbdecoder_init(&decoder, method, &status);
upb_env env;
upb_env_init(&env);
upb_env_reporterrorsto(&env, &status);
upb_sink sink;
upb_sink_reset(&sink, handlers, msg);
upb_pbdecoder_resetoutput(&decoder, &sink);
upb_bufsrc_putbuf(pb, len, upb_pbdecoder_input(&decoder));
// TODO: Our need to call uninit isn't longjmp-safe; what if the decode
// triggers a Lua error? uninit is only needed if the decoder
// dynamically-allocated a growing stack -- ditch this feature and live with
// the compile-time limit? Or have a custom allocation function that
// allocates Lua GC-rooted memory?
upb_pbdecoder_uninit(&decoder);
upb_pbdecoder *decoder = upb_pbdecoder_create(&env, method, &sink);
upb_bufsrc_putbuf(pb, len, upb_pbdecoder_input(decoder));
// This won't get called in the error case, which longjmp's across us. But
// since we made our alloc function allocate only GC-able memory, that
// shouldn't matter. It *would* matter if the environment had references to
// any non-memory resources (ie. filehandles). As an alternative to this we
// could make the environment itself a userdata.
upb_env_uninit(&env);
lupb_checkstatus(L, &status);
lua_pop(L, 1); // Uservalue.

@ -298,7 +298,8 @@ static PyObject *PyUpb_MessageDef_new(PyTypeObject *subtype,
static PyObject *PyUpb_MessageDef_add_fields(PyObject *o, PyObject *args);
static int PyUpb_MessageDef_init(PyObject *self, PyObject *args, PyObject *kwds) {
static int PyUpb_MessageDef_init(
PyObject *self, PyObject *args, PyObject *kwds) {
if (!kwds) return 0;
PyObject *key, *value;
Py_ssize_t pos = 0;
@ -323,7 +324,8 @@ static PyObject *PyUpb_MessageDef_getattro(PyObject *obj, PyObject *attr_name) {
return PyObject_GenericGetAttr(obj, attr_name);
}
static int PyUpb_MessageDef_setattro(PyObject *o, PyObject *key, PyObject *val) {
static int PyUpb_MessageDef_setattro(
PyObject *o, PyObject *key, PyObject *val) {
upb_msgdef *m = Check_MessageDef(o, -1);
if (!upb_def_ismutable(UPB_UPCAST(m))) {
PyErr_SetString(PyExc_TypeError, "MessageDef is not mutable.");
@ -343,9 +345,11 @@ static int PyUpb_MessageDef_setattro(PyObject *o, PyObject *key, PyObject *val)
static PyObject *PyUpb_MessageDef_fields(PyObject *obj, PyObject *args) {
upb_msgdef *m = Check_MessageDef(obj, NULL);
PyObject *ret = PyList_New(0);
upb_msg_iter i;
for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
upb_fielddef *f = upb_msg_iter_field(i);
upb_msg_field_iter i;
for(upb_msg_field_begin(&i, m);
!upb_msg_field_done(&i);
upb_msg_field_next(&ii)) {
upb_fielddef *f = upb_msg_iter_field(&i);
PyList_Append(ret, PyUpb_FieldDef_GetOrCreate(f));
}
return ret;
@ -374,9 +378,12 @@ static PyObject *PyUpb_MessageDef_add_field(PyObject *o, PyObject *field) {
}
static PyMethodDef PyUpb_MessageDef_methods[] = {
{"add_field", &PyUpb_MessageDef_add_field, METH_O, "Adds a list of fields."},
{"add_fields", &PyUpb_MessageDef_add_fields, METH_O, "Adds a list of fields."},
{"fields", &PyUpb_MessageDef_fields, METH_NOARGS, "Returns list of fields."},
{"add_field", &PyUpb_MessageDef_add_field, METH_O,
"Adds a list of fields."},
{"add_fields", &PyUpb_MessageDef_add_fields, METH_O,
"Adds a list of fields."},
{"fields", &PyUpb_MessageDef_fields, METH_NOARGS,
"Returns list of fields."},
{NULL, NULL}
};
@ -448,7 +455,8 @@ static PyObject *PyUpb_SymbolTable_new(PyTypeObject *subtype,
return PyUpb_ObjCacheGet(upb_symtab_new(), subtype);
}
static int PyUpb_SymbolTable_init(PyObject *self, PyObject *args, PyObject *kwds) {
static int PyUpb_SymbolTable_init(
PyObject *self, PyObject *args, PyObject *kwds) {
return 0;
}
@ -475,8 +483,10 @@ static PyObject *PyUpb_SymbolTable_add_defs(PyObject *o, PyObject *defs) {
cdefs[i++] = def;
upb_msgdef *md = upb_dyncast_msgdef(def);
if (!md) continue;
upb_msg_iter j;
for(j = upb_msg_begin(md); !upb_msg_done(j); j = upb_msg_next(md, j)) {
upb_msg_field_iter j;
for(upb_msg_field_begin(&j, md);
!upb_msg_field_done(&j);
upb_msg_field_next(&j)) {
upb_fielddef *f = upb_msg_iter_field(j);
upb_fielddef_setaccessor(f, PyUpb_AccessorForField(f));
}
@ -601,7 +611,8 @@ static upb_sflow_t PyUpb_Message_StartSubmessage(void *m, upb_value fval) {
return UPB_CONTINUE_WITH(*submsg);
}
static upb_sflow_t PyUpb_Message_StartRepeatedSubmessage(void *a, upb_value fval) {
static upb_sflow_t PyUpb_Message_StartRepeatedSubmessage(
void *a, upb_value fval) {
(void)fval;
PyObject **elem = upb_stdarray_append(a, sizeof(void*));
PyTypeObject *type = ((PyUpb_MessageType*)Py_TYPE(a))->alt_type;
@ -609,7 +620,8 @@ static upb_sflow_t PyUpb_Message_StartRepeatedSubmessage(void *a, upb_value fval
return UPB_CONTINUE_WITH(*elem);
}
static upb_flow_t PyUpb_Message_StringValue(void *m, upb_value fval, upb_value val) {
static upb_flow_t PyUpb_Message_StringValue(
void *m, upb_value fval, upb_value val) {
PyObject **str = PyUpb_Accessor_GetPtr(m, fval);
if (*str) { Py_DECREF(*str); }
*str = PyString_FromStringAndSize(NULL, upb_value_getstrref(val)->len);
@ -618,7 +630,8 @@ static upb_flow_t PyUpb_Message_StringValue(void *m, upb_value fval, upb_value v
return UPB_CONTINUE;
}
static upb_flow_t PyUpb_Message_AppendStringValue(void *a, upb_value fval, upb_value val) {
static upb_flow_t PyUpb_Message_AppendStringValue(
void *a, upb_value fval, upb_value val) {
(void)fval;
PyObject **elem = upb_stdarray_append(a, sizeof(void*));
*elem = PyString_FromStringAndSize(NULL, upb_value_getstrref(val)->len);

@ -273,9 +273,11 @@ static size_t rupb_sizeof(const upb_fielddef *f) {
static void assign_offsets(rb_msglayout *layout, const upb_msgdef *md) {
layout->field_offsets = ALLOC_N(uint32_t, upb_msgdef_numfields(md));
size_t ofs = 0;
upb_msg_iter i;
upb_msg_field_iter i;
for (upb_msg_begin(&i, md); !upb_msg_done(&i); upb_msg_next(&i)) {
for (upb_msg_field_begin(&i, md);
!upb_msg_field_done(&i);
upb_msg_field_next(&i)) {
const upb_fielddef *f = upb_msg_iter_field(&i);
size_t field_size = rupb_sizeof(f);
@ -301,8 +303,10 @@ static void make_prototype(rb_msglayout *layout, const upb_msgdef *md) {
// more specific initialization.
memset(prototype, 0, layout->size);
upb_msg_iter i;
for (upb_msg_begin(&i, md); !upb_msg_done(&i); upb_msg_next(&i)) {
upb_msg_field_iter i;
for (upb_msg_field_begin(&i, md);
!upb_msg_field_done(&i);
upb_msg_field_next(&i)) {
const upb_fielddef *f = upb_msg_iter_field(&i);
if (is_ruby_value(f)) {
size_t ofs = layout->field_offsets[upb_fielddef_index(f)];
@ -373,8 +377,10 @@ static void msgdef_mark(void *_rmd) {
rb_gc_mark(rmd->klass);
// Mark all submessage types.
upb_msg_iter i;
for (upb_msg_begin(&i, rmd->md); !upb_msg_done(&i); upb_msg_next(&i)) {
upb_msg_field_iter i;
for (upb_msg_field_begin(&i, rmd->md);
!upb_msg_field_done(&i);
upb_msg_field_next(&i)) {
upb_fielddef *f = upb_msg_iter_field(&i);
if (upb_fielddef_issubmsg(f)) {
// If we were trying to be more aggressively lazy, the submessage might
@ -495,8 +501,10 @@ static void msg_mark(void *p) {
// We need to mark all references to other Ruby values: strings, arrays, and
// submessages that we point to.
upb_msg_iter i;
for (upb_msg_begin(&i, rmd->md); !upb_msg_done(&i); upb_msg_next(&i)) {
upb_msg_field_iter i;
for (upb_msg_field_begin(&i, rmd->md);
!upb_msg_field_done(&i);
upb_msg_field_next(&i)) {
upb_fielddef *f = upb_msg_iter_field(&i);
if (is_ruby_value(f)) {
size_t ofs = rmd->layout.field_offsets[upb_fielddef_index(f)];
@ -903,7 +911,8 @@ static void *submsg_handler(void *closure, const void *hd) {
const submsg_handlerdata_t *submsgdata = hd;
if (DEREF(msg, submsgdata->ofs, VALUE) == Qnil) {
DEREF(msg, submsgdata->ofs, VALUE) = msg_new(msgdef_getwrapper(submsgdata->md));
DEREF(msg, submsgdata->ofs, VALUE) =
msg_new(msgdef_getwrapper(submsgdata->md));
}
VALUE submsg = DEREF(msg, submsgdata->ofs, VALUE);
@ -912,9 +921,11 @@ static void *submsg_handler(void *closure, const void *hd) {
static void add_handlers_for_message(const void *closure, upb_handlers *h) {
const rupb_MessageDef *rmd = get_rbmsgdef(upb_handlers_msgdef(h));
upb_msg_iter i;
upb_msg_field_iter i;
for (upb_msg_begin(&i, rmd->md); !upb_msg_done(&i); upb_msg_next(&i)) {
for (upb_msg_field_begin(&i, rmd->md);
!upb_msg_field_done(&i);
upb_msg_field_next(&i)) {
const upb_fielddef *f = upb_msg_iter_field(&i);
size_t ofs = rmd->layout.field_offsets[upb_fielddef_index(f)];
@ -1085,8 +1096,10 @@ static void putmsg(rupb_Message *msg, const rupb_MessageDef *rmd,
upb_sink *sink) {
upb_sink_startmsg(sink);
upb_msg_iter i;
for (upb_msg_begin(&i, rmd->md); !upb_msg_done(&i); upb_msg_next(&i)) {
upb_msg_field_iter i;
for (upb_msg_field_begin(&i, rmd->md);
!upb_msg_field_done(&i);
upb_msg_field_next(&i)) {
upb_fielddef *f = upb_msg_iter_field(&i);
uint32_t ofs = rmd->layout.field_offsets[upb_fielddef_index(f)];

@ -211,6 +211,21 @@ static bool upb_validate_field(upb_fielddef *f, upb_status *s) {
upb_fielddef_setdefaultint32(f, upb_fielddef_defaultint32(f));
}
// Ensure that MapEntry submessages only appear as repeated fields, not
// optional/required (singular) fields.
if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE &&
upb_fielddef_msgsubdef(f) != NULL) {
const upb_msgdef *subdef = upb_fielddef_msgsubdef(f);
if (upb_msgdef_mapentry(subdef) && !upb_fielddef_isseq(f)) {
upb_status_seterrf(s,
"Field %s refers to mapentry message but is not "
"a repeated field",
upb_fielddef_name(f) ? upb_fielddef_name(f) :
"(unnamed)");
return false;
}
}
return true;
}
@ -248,10 +263,12 @@ static bool assign_msg_indices(upb_msgdef *m, upb_status *s) {
upb_fielddef **fields = malloc(n * sizeof(*fields));
if (!fields) return false;
upb_msg_iter j;
upb_msg_field_iter j;
int i;
m->submsg_field_count = 0;
for(i = 0, upb_msg_begin(&j, m); !upb_msg_done(&j); upb_msg_next(&j), i++) {
for(i = 0, upb_msg_field_begin(&j, m);
!upb_msg_field_done(&j);
upb_msg_field_next(&j), i++) {
upb_fielddef *f = upb_msg_iter_field(&j);
assert(f->msg.def == m);
if (!upb_validate_field(f, s)) {
@ -287,7 +304,9 @@ static bool assign_msg_indices(upb_msgdef *m, upb_status *s) {
upb_selector_t sel;
upb_inttable_insert(&t, UPB_STARTMSG_SELECTOR, v);
upb_inttable_insert(&t, UPB_ENDMSG_SELECTOR, v);
for(upb_msg_begin(&j, m); !upb_msg_done(&j); upb_msg_next(&j)) {
for(upb_msg_field_begin(&j, m);
!upb_msg_field_done(&j);
upb_msg_field_next(&j)) {
upb_fielddef *f = upb_msg_iter_field(&j);
// These calls will assert-fail in upb_table if the value already exists.
TRY(UPB_HANDLER_INT32);
@ -545,6 +564,9 @@ static void visitfield(const upb_refcounted *r, upb_refcounted_visit *visit,
if (upb_fielddef_containingtype(f)) {
visit(r, UPB_UPCAST2(upb_fielddef_containingtype(f)), closure);
}
if (upb_fielddef_containingoneof(f)) {
visit(r, UPB_UPCAST2(upb_fielddef_containingoneof(f)), closure);
}
if (upb_fielddef_subdef(f)) {
visit(r, UPB_UPCAST(upb_fielddef_subdef(f)), closure);
}
@ -620,6 +642,7 @@ upb_fielddef *upb_fielddef_new(const void *owner) {
}
f->msg.def = NULL;
f->sub.def = NULL;
f->oneof = NULL;
f->subdef_is_symbolic = false;
f->msg_is_symbolic = false;
f->label_ = UPB_LABEL_OPTIONAL;
@ -749,6 +772,10 @@ const upb_msgdef *upb_fielddef_containingtype(const upb_fielddef *f) {
return f->msg_is_symbolic ? NULL : f->msg.def;
}
const upb_oneofdef *upb_fielddef_containingoneof(const upb_fielddef *f) {
return f->oneof;
}
upb_msgdef *upb_fielddef_containingtype_mutable(upb_fielddef *f) {
return (upb_msgdef*)upb_fielddef_containingtype(f);
}
@ -777,6 +804,10 @@ bool upb_fielddef_setcontainingtypename(upb_fielddef *f, const char *name,
}
bool upb_fielddef_setname(upb_fielddef *f, const char *name, upb_status *s) {
if (upb_fielddef_containingtype(f) || upb_fielddef_containingoneof(f)) {
upb_status_seterrmsg(s, "Already added to message or oneof");
return false;
}
return upb_def_setfullname(UPB_UPCAST(f), name, s);
}
@ -1227,6 +1258,11 @@ bool upb_fielddef_isprimitive(const upb_fielddef *f) {
return !upb_fielddef_isstring(f) && !upb_fielddef_issubmsg(f);
}
bool upb_fielddef_ismap(const upb_fielddef *f) {
return upb_fielddef_isseq(f) && upb_fielddef_issubmsg(f) &&
upb_msgdef_mapentry(upb_fielddef_msgsubdef(f));
}
bool upb_fielddef_hassubdef(const upb_fielddef *f) {
return upb_fielddef_issubmsg(f) || upb_fielddef_type(f) == UPB_TYPE_ENUM;
}
@ -1248,15 +1284,25 @@ bool upb_fielddef_checkdescriptortype(int32_t type) {
static void visitmsg(const upb_refcounted *r, upb_refcounted_visit *visit,
void *closure) {
const upb_msgdef *m = (const upb_msgdef*)r;
upb_msg_iter i;
for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) {
upb_msg_field_iter i;
for(upb_msg_field_begin(&i, m);
!upb_msg_field_done(&i);
upb_msg_field_next(&i)) {
upb_fielddef *f = upb_msg_iter_field(&i);
visit(r, UPB_UPCAST2(f), closure);
}
upb_msg_oneof_iter o;
for(upb_msg_oneof_begin(&o, m);
!upb_msg_oneof_done(&o);
upb_msg_oneof_next(&o)) {
upb_oneofdef *f = upb_msg_iter_oneof(&o);
visit(r, UPB_UPCAST2(f), closure);
}
}
static void freemsg(upb_refcounted *r) {
upb_msgdef *m = (upb_msgdef*)r;
upb_strtable_uninit(&m->ntoo);
upb_strtable_uninit(&m->ntof);
upb_inttable_uninit(&m->itof);
upb_def_uninit(UPB_UPCAST(m));
@ -1268,14 +1314,17 @@ upb_msgdef *upb_msgdef_new(const void *owner) {
upb_msgdef *m = malloc(sizeof(*m));
if (!m) return NULL;
if (!upb_def_init(UPB_UPCAST(m), UPB_DEF_MSG, &vtbl, owner)) goto err2;
if (!upb_inttable_init(&m->itof, UPB_CTYPE_PTR)) goto err2;
if (!upb_strtable_init(&m->ntof, UPB_CTYPE_PTR)) goto err1;
if (!upb_inttable_init(&m->itof, UPB_CTYPE_PTR)) goto err3;
if (!upb_strtable_init(&m->ntof, UPB_CTYPE_PTR)) goto err2;
if (!upb_strtable_init(&m->ntoo, UPB_CTYPE_PTR)) goto err1;
m->map_entry = false;
return m;
err1:
upb_inttable_uninit(&m->itof);
upb_strtable_uninit(&m->ntof);
err2:
upb_inttable_uninit(&m->itof);
err3:
free(m);
return NULL;
}
@ -1287,14 +1336,28 @@ upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, const void *owner) {
upb_def_fullname(UPB_UPCAST(m)), NULL);
newm->map_entry = m->map_entry;
UPB_ASSERT_VAR(ok, ok);
upb_msg_iter i;
for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) {
upb_msg_field_iter i;
for(upb_msg_field_begin(&i, m);
!upb_msg_field_done(&i);
upb_msg_field_next(&i)) {
upb_fielddef *f = upb_fielddef_dup(upb_msg_iter_field(&i), &f);
// Fields in oneofs are dup'd below.
if (upb_fielddef_containingoneof(f)) continue;
if (!f || !upb_msgdef_addfield(newm, f, &f, NULL)) {
upb_msgdef_unref(newm, owner);
return NULL;
}
}
upb_msg_oneof_iter o;
for(upb_msg_oneof_begin(&o, m);
!upb_msg_oneof_done(&o);
upb_msg_oneof_next(&o)) {
upb_oneofdef *f = upb_oneofdef_dup(upb_msg_iter_oneof(&o), &f);
if (!f || !upb_msgdef_addoneof(newm, f, &f, NULL)) {
upb_msgdef_unref(newm, owner);
return NULL;
}
}
return newm;
}
@ -1333,6 +1396,35 @@ bool upb_msgdef_setfullname(upb_msgdef *m, const char *fullname,
return upb_def_setfullname(UPB_UPCAST(m), fullname, s);
}
// Helper: check that the field |f| is safe to add to msgdef |m|. Set an error
// on status |s| and return false if not.
static bool check_field_add(const upb_msgdef *m, const upb_fielddef *f,
upb_status *s) {
if (upb_fielddef_containingtype(f) != NULL) {
upb_status_seterrmsg(s, "fielddef already belongs to a message");
return false;
} else if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0) {
upb_status_seterrmsg(s, "field name or number were not set");
return false;
} else if (upb_msgdef_ntofz(m, upb_fielddef_name(f)) ||
upb_msgdef_itof(m, upb_fielddef_number(f))) {
upb_status_seterrmsg(s, "duplicate field name or number for field");
return false;
}
return true;
}
static void add_field(upb_msgdef *m, upb_fielddef *f, const void *ref_donor) {
release_containingtype(f);
f->msg.def = m;
f->msg_is_symbolic = false;
upb_inttable_insert(&m->itof, upb_fielddef_number(f), upb_value_ptr(f));
upb_strtable_insert(&m->ntof, upb_fielddef_name(f), upb_value_ptr(f));
upb_ref2(f, m);
upb_ref2(m, f);
if (ref_donor) upb_fielddef_unref(f, ref_donor);
}
bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f, const void *ref_donor,
upb_status *s) {
// TODO: extensions need to have a separate namespace, because proto2 allows a
@ -1346,28 +1438,65 @@ bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f, const void *ref_donor,
// We also need to validate that the field number is in an extension range iff
// it is an extension.
// This method is idempotent. Check if |f| is already part of this msgdef and
// return immediately if so.
if (upb_fielddef_containingtype(f) == m) {
return true;
}
// Check constraints for all fields before performing any action.
if (upb_fielddef_containingtype(f) != NULL) {
upb_status_seterrmsg(s, "fielddef already belongs to a message");
if (!check_field_add(m, f, s)) {
return false;
} else if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0) {
upb_status_seterrmsg(s, "field name or number were not set");
return false;
} else if(upb_msgdef_itof(m, upb_fielddef_number(f)) ||
upb_msgdef_ntofz(m, upb_fielddef_name(f))) {
upb_status_seterrmsg(s, "duplicate field name or number");
} else if (upb_fielddef_containingoneof(f) != NULL) {
// Fields in a oneof can only be added by adding the oneof to the msgdef.
upb_status_seterrmsg(s, "fielddef is part of a oneof");
return false;
}
// Constraint checks ok, perform the action.
release_containingtype(f);
f->msg.def = m;
f->msg_is_symbolic = false;
upb_inttable_insert(&m->itof, upb_fielddef_number(f), upb_value_ptr(f));
upb_strtable_insert(&m->ntof, upb_fielddef_name(f), upb_value_ptr(f));
upb_ref2(f, m);
upb_ref2(m, f);
if (ref_donor) upb_fielddef_unref(f, ref_donor);
add_field(m, f, ref_donor);
return true;
}
bool upb_msgdef_addoneof(upb_msgdef *m, upb_oneofdef *o, const void *ref_donor,
upb_status *s) {
// Check various conditions that would prevent this oneof from being added.
if (upb_oneofdef_containingtype(o)) {
upb_status_seterrmsg(s, "oneofdef already belongs to a message");
return false;
} else if (upb_oneofdef_name(o) == NULL) {
upb_status_seterrmsg(s, "oneofdef name was not set");
return false;
} else if (upb_msgdef_ntooz(m, upb_oneofdef_name(o))) {
upb_status_seterrmsg(s, "duplicate oneof name");
return false;
}
// Check that all of the oneof's fields do not conflict with names or numbers
// of fields already in the message.
upb_oneof_iter it;
for (upb_oneof_begin(&it, o); !upb_oneof_done(&it); upb_oneof_next(&it)) {
const upb_fielddef *f = upb_oneof_iter_field(&it);
if (!check_field_add(m, f, s)) {
return false;
}
}
// Everything checks out -- commit now.
// Add oneof itself first.
o->parent = m;
upb_strtable_insert(&m->ntoo, upb_oneofdef_name(o), upb_value_ptr(o));
upb_ref2(o, m);
upb_ref2(m, o);
// Add each field of the oneof directly to the msgdef.
for (upb_oneof_begin(&it, o); !upb_oneof_done(&it); upb_oneof_next(&it)) {
upb_fielddef *f = upb_oneof_iter_field(&it);
add_field(m, f, NULL);
}
if (ref_donor) upb_oneofdef_unref(o, ref_donor);
return true;
}
@ -1385,10 +1514,21 @@ const upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name,
upb_value_getptr(val) : NULL;
}
const upb_oneofdef *upb_msgdef_ntoo(const upb_msgdef *m, const char *name,
size_t len) {
upb_value val;
return upb_strtable_lookup2(&m->ntoo, name, len, &val) ?
upb_value_getptr(val) : NULL;
}
int upb_msgdef_numfields(const upb_msgdef *m) {
return upb_strtable_count(&m->ntof);
}
int upb_msgdef_numoneofs(const upb_msgdef *m) {
return upb_strtable_count(&m->ntoo);
}
void upb_msgdef_setmapentry(upb_msgdef *m, bool map_entry) {
assert(!upb_msgdef_isfrozen(m));
m->map_entry = map_entry;
@ -1398,18 +1538,245 @@ bool upb_msgdef_mapentry(const upb_msgdef *m) {
return m->map_entry;
}
void upb_msg_begin(upb_msg_iter *iter, const upb_msgdef *m) {
void upb_msg_field_begin(upb_msg_field_iter *iter, const upb_msgdef *m) {
upb_inttable_begin(iter, &m->itof);
}
void upb_msg_next(upb_msg_iter *iter) { upb_inttable_next(iter); }
void upb_msg_field_next(upb_msg_field_iter *iter) { upb_inttable_next(iter); }
bool upb_msg_field_done(const upb_msg_field_iter *iter) {
return upb_inttable_done(iter);
}
upb_fielddef *upb_msg_iter_field(const upb_msg_field_iter *iter) {
return (upb_fielddef*)upb_value_getptr(upb_inttable_iter_value(iter));
}
void upb_msg_field_iter_setdone(upb_msg_field_iter *iter) {
upb_inttable_iter_setdone(iter);
}
void upb_msg_oneof_begin(upb_msg_oneof_iter *iter, const upb_msgdef *m) {
upb_strtable_begin(iter, &m->ntoo);
}
void upb_msg_oneof_next(upb_msg_oneof_iter *iter) { upb_strtable_next(iter); }
bool upb_msg_done(const upb_msg_iter *iter) { return upb_inttable_done(iter); }
bool upb_msg_oneof_done(const upb_msg_oneof_iter *iter) {
return upb_strtable_done(iter);
}
upb_oneofdef *upb_msg_iter_oneof(const upb_msg_oneof_iter *iter) {
return (upb_oneofdef*)upb_value_getptr(upb_strtable_iter_value(iter));
}
void upb_msg_oneof_iter_setdone(upb_msg_oneof_iter *iter) {
upb_strtable_iter_setdone(iter);
}
/* upb_oneofdef ***************************************************************/
static void visitoneof(const upb_refcounted *r, upb_refcounted_visit *visit,
void *closure) {
const upb_oneofdef *o = (const upb_oneofdef*)r;
upb_oneof_iter i;
for (upb_oneof_begin(&i, o); !upb_oneof_done(&i); upb_oneof_next(&i)) {
const upb_fielddef *f = upb_oneof_iter_field(&i);
visit(r, UPB_UPCAST2(f), closure);
}
if (o->parent) {
visit(r, UPB_UPCAST2(o->parent), closure);
}
}
static void freeoneof(upb_refcounted *r) {
upb_oneofdef *o = (upb_oneofdef*)r;
upb_strtable_uninit(&o->ntof);
upb_inttable_uninit(&o->itof);
upb_def_uninit(UPB_UPCAST(o));
free(o);
}
upb_oneofdef *upb_oneofdef_new(const void *owner) {
static const struct upb_refcounted_vtbl vtbl = {visitoneof, freeoneof};
upb_oneofdef *o = malloc(sizeof(*o));
o->parent = NULL;
if (!o) return NULL;
if (!upb_def_init(UPB_UPCAST(o), UPB_DEF_ONEOF, &vtbl, owner)) goto err2;
if (!upb_inttable_init(&o->itof, UPB_CTYPE_PTR)) goto err2;
if (!upb_strtable_init(&o->ntof, UPB_CTYPE_PTR)) goto err1;
return o;
err1:
upb_inttable_uninit(&o->itof);
err2:
free(o);
return NULL;
}
upb_oneofdef *upb_oneofdef_dup(const upb_oneofdef *o, const void *owner) {
upb_oneofdef *newo = upb_oneofdef_new(owner);
if (!newo) return NULL;
bool ok = upb_def_setfullname(UPB_UPCAST(newo),
upb_def_fullname(UPB_UPCAST(o)), NULL);
UPB_ASSERT_VAR(ok, ok);
upb_oneof_iter i;
for (upb_oneof_begin(&i, o); !upb_oneof_done(&i); upb_oneof_next(&i)) {
upb_fielddef *f = upb_fielddef_dup(upb_oneof_iter_field(&i), &f);
if (!f || !upb_oneofdef_addfield(newo, f, &f, NULL)) {
upb_oneofdef_unref(newo, owner);
return NULL;
}
}
return newo;
}
bool upb_oneofdef_isfrozen(const upb_oneofdef *o) {
return upb_def_isfrozen(UPB_UPCAST(o));
}
void upb_oneofdef_ref(const upb_oneofdef *o, const void *owner) {
upb_def_ref(UPB_UPCAST(o), owner);
}
void upb_oneofdef_unref(const upb_oneofdef *o, const void *owner) {
upb_def_unref(UPB_UPCAST(o), owner);
}
void upb_oneofdef_donateref(const upb_oneofdef *o, const void *from,
const void *to) {
upb_def_donateref(UPB_UPCAST(o), from, to);
}
void upb_oneofdef_checkref(const upb_oneofdef *o, const void *owner) {
upb_def_checkref(UPB_UPCAST(o), owner);
}
const char *upb_oneofdef_name(const upb_oneofdef *o) {
return upb_def_fullname(UPB_UPCAST(o));
}
bool upb_oneofdef_setname(upb_oneofdef *o, const char *fullname,
upb_status *s) {
if (upb_oneofdef_containingtype(o)) {
upb_status_seterrmsg(s, "oneof already added to a message");
return false;
}
return upb_def_setfullname(UPB_UPCAST(o), fullname, s);
}
const upb_msgdef *upb_oneofdef_containingtype(const upb_oneofdef *o) {
return o->parent;
}
int upb_oneofdef_numfields(const upb_oneofdef *o) {
return upb_strtable_count(&o->ntof);
}
bool upb_oneofdef_addfield(upb_oneofdef *o, upb_fielddef *f,
const void *ref_donor,
upb_status *s) {
assert(!upb_oneofdef_isfrozen(o));
assert(!o->parent || !upb_msgdef_isfrozen(o->parent));
// This method is idempotent. Check if |f| is already part of this oneofdef
// and return immediately if so.
if (upb_fielddef_containingoneof(f) == o) {
return true;
}
// The field must have an OPTIONAL label.
if (upb_fielddef_label(f) != UPB_LABEL_OPTIONAL) {
upb_status_seterrmsg(s, "fields in oneof must have OPTIONAL label");
return false;
}
// Check that no field with this name or number exists already in the oneof.
// Also check that the field is not already part of a oneof.
if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0) {
upb_status_seterrmsg(s, "field name or number were not set");
return false;
} else if (upb_oneofdef_itof(o, upb_fielddef_number(f)) ||
upb_oneofdef_ntofz(o, upb_fielddef_name(f))) {
upb_status_seterrmsg(s, "duplicate field name or number");
return false;
} else if (upb_fielddef_containingoneof(f) != NULL) {
upb_status_seterrmsg(s, "fielddef already belongs to a oneof");
return false;
}
// We allow adding a field to the oneof either if the field is not part of a
// msgdef, or if it is and we are also part of the same msgdef.
if (o->parent == NULL) {
// If we're not in a msgdef, the field cannot be either. Otherwise we would
// need to magically add this oneof to a msgdef to remain consistent, which
// is surprising behavior.
if (upb_fielddef_containingtype(f) != NULL) {
upb_status_seterrmsg(s, "fielddef already belongs to a message, but "
"oneof does not");
return false;
}
} else {
// If we're in a msgdef, the user can add fields that either aren't in any
// msgdef (in which case they're added to our msgdef) or already a part of
// our msgdef.
if (upb_fielddef_containingtype(f) != NULL &&
upb_fielddef_containingtype(f) != o->parent) {
upb_status_seterrmsg(s, "fielddef belongs to a different message "
"than oneof");
return false;
}
}
// Commit phase. First add the field to our parent msgdef, if any, because
// that may fail; then add the field to our own tables.
if (o->parent != NULL && upb_fielddef_containingtype(f) == NULL) {
if (!upb_msgdef_addfield((upb_msgdef*)o->parent, f, NULL, s)) {
return false;
}
}
release_containingtype(f);
f->oneof = o;
upb_inttable_insert(&o->itof, upb_fielddef_number(f), upb_value_ptr(f));
upb_strtable_insert(&o->ntof, upb_fielddef_name(f), upb_value_ptr(f));
upb_ref2(f, o);
upb_ref2(o, f);
if (ref_donor) upb_fielddef_unref(f, ref_donor);
return true;
}
const upb_fielddef *upb_oneofdef_ntof(const upb_oneofdef *o,
const char *name, size_t length) {
upb_value val;
return upb_strtable_lookup2(&o->ntof, name, length, &val) ?
upb_value_getptr(val) : NULL;
}
const upb_fielddef *upb_oneofdef_itof(const upb_oneofdef *o, uint32_t num) {
upb_value val;
return upb_inttable_lookup32(&o->itof, num, &val) ?
upb_value_getptr(val) : NULL;
}
void upb_oneof_begin(upb_oneof_iter *iter, const upb_oneofdef *o) {
upb_inttable_begin(iter, &o->itof);
}
void upb_oneof_next(upb_oneof_iter *iter) {
upb_inttable_next(iter);
}
bool upb_oneof_done(upb_oneof_iter *iter) {
return upb_inttable_done(iter);
}
upb_fielddef *upb_msg_iter_field(const upb_msg_iter *iter) {
upb_fielddef *upb_oneof_iter_field(const upb_oneof_iter *iter) {
return (upb_fielddef*)upb_value_getptr(upb_inttable_iter_value(iter));
}
void upb_msg_iter_setdone(upb_msg_iter *iter) {
void upb_oneof_iter_setdone(upb_oneof_iter *iter) {
upb_inttable_iter_setdone(iter);
}

@ -34,6 +34,7 @@ class Def;
class EnumDef;
class FieldDef;
class MessageDef;
class OneofDef;
}
#endif
@ -41,6 +42,7 @@ UPB_DECLARE_TYPE(upb::Def, upb_def);
UPB_DECLARE_TYPE(upb::EnumDef, upb_enumdef);
UPB_DECLARE_TYPE(upb::FieldDef, upb_fielddef);
UPB_DECLARE_TYPE(upb::MessageDef, upb_msgdef);
UPB_DECLARE_TYPE(upb::OneofDef, upb_oneofdef);
// Maximum field number allowed for FieldDefs. This is an inherent limit of the
// protobuf wire format.
@ -64,6 +66,7 @@ typedef enum {
UPB_DEF_MSG,
UPB_DEF_FIELD,
UPB_DEF_ENUM,
UPB_DEF_ONEOF,
UPB_DEF_SERVICE, // Not yet implemented.
UPB_DEF_ANY = -1, // Wildcard for upb_symtab_get*()
} upb_deftype_t;
@ -348,6 +351,10 @@ UPB_DEFINE_DEF(upb::FieldDef, fielddef, FIELD,
const MessageDef* containing_type() const;
const char* containing_type_name();
// The OneofDef to which this field belongs, or NULL if this field is not part
// of a oneof.
const OneofDef* containing_oneof() const;
// The field's type according to the enum in descriptor.proto. This is not
// the same as UPB_TYPE_*, because it distinguishes between (for example)
// INT32 and SINT32, whereas our "type" enum does not. This return of
@ -361,6 +368,7 @@ UPB_DEFINE_DEF(upb::FieldDef, fielddef, FIELD,
bool IsString() const;
bool IsSequence() const;
bool IsPrimitive() const;
bool IsMap() const;
// How integers are encoded. Only meaningful for integer types.
// Defaults to UPB_INTFMT_VARIABLE, and is reset when "type" changes.
@ -521,6 +529,7 @@ UPB_DEFINE_STRUCT(upb_fielddef, upb_def,
} sub; // The msgdef or enumdef for this field, if upb_hassubdef(f).
bool subdef_is_symbolic;
bool msg_is_symbolic;
const upb_oneofdef *oneof;
bool default_is_string;
bool type_is_set_; // False until type is explicitly set.
bool is_extension_;
@ -536,11 +545,11 @@ UPB_DEFINE_STRUCT(upb_fielddef, upb_def,
));
#define UPB_FIELDDEF_INIT(label, type, intfmt, tagdelim, is_extension, lazy, \
packed, name, num, msgdef, subdef, selector_base, \
packed, name, num, msgdef, subdef, selector_base, \
index, defaultval, refs, ref2s) \
{ \
UPB_DEF_INIT(name, UPB_DEF_FIELD, refs, ref2s), defaultval, {msgdef}, \
{subdef}, false, false, \
{subdef}, NULL, false, false, \
type == UPB_TYPE_STRING || type == UPB_TYPE_BYTES, true, is_extension, \
lazy, packed, intfmt, tagdelim, type, label, num, selector_base, index \
}
@ -574,6 +583,7 @@ bool upb_fielddef_isextension(const upb_fielddef *f);
bool upb_fielddef_lazy(const upb_fielddef *f);
bool upb_fielddef_packed(const upb_fielddef *f);
const upb_msgdef *upb_fielddef_containingtype(const upb_fielddef *f);
const upb_oneofdef *upb_fielddef_containingoneof(const upb_fielddef *f);
upb_msgdef *upb_fielddef_containingtype_mutable(upb_fielddef *f);
const char *upb_fielddef_containingtypename(upb_fielddef *f);
upb_intfmt_t upb_fielddef_intfmt(const upb_fielddef *f);
@ -583,6 +593,7 @@ bool upb_fielddef_issubmsg(const upb_fielddef *f);
bool upb_fielddef_isstring(const upb_fielddef *f);
bool upb_fielddef_isseq(const upb_fielddef *f);
bool upb_fielddef_isprimitive(const upb_fielddef *f);
bool upb_fielddef_ismap(const upb_fielddef *f);
int64_t upb_fielddef_defaultint64(const upb_fielddef *f);
int32_t upb_fielddef_defaultint32(const upb_fielddef *f);
uint64_t upb_fielddef_defaultuint64(const upb_fielddef *f);
@ -641,7 +652,8 @@ UPB_END_EXTERN_C // }
/* upb::MessageDef ************************************************************/
typedef upb_inttable_iter upb_msg_iter;
typedef upb_inttable_iter upb_msg_field_iter;
typedef upb_strtable_iter upb_msg_oneof_iter;
// Structure that describes a single .proto message type.
//
@ -671,14 +683,37 @@ UPB_DEFINE_DEF(upb::MessageDef, msgdef, MSG, UPB_QUOTE(
// The number of fields that belong to the MessageDef.
int field_count() const;
// The number of oneofs that belong to the MessageDef.
int oneof_count() const;
// Adds a field (upb_fielddef object) to a msgdef. Requires that the msgdef
// and the fielddefs are mutable. The fielddef's name and number must be
// set, and the message may not already contain any field with this name or
// number, and this fielddef may not be part of another message. In error
// cases false is returned and the msgdef is unchanged.
//
// If the given field is part of a oneof, this call succeeds if and only if
// that oneof is already part of this msgdef. (Note that adding a oneof to a
// msgdef automatically adds all of its fields to the msgdef at the time that
// the oneof is added, so it is usually more idiomatic to add the oneof's
// fields first then add the oneof to the msgdef. This case is supported for
// convenience.)
//
// If |f| is already part of this MessageDef, this method performs no action
// and returns true (success). Thus, this method is idempotent.
bool AddField(FieldDef* f, Status* s);
bool AddField(const reffed_ptr<FieldDef>& f, Status* s);
// Adds a oneof (upb_oneofdef object) to a msgdef. Requires that the msgdef,
// oneof, and any fielddefs are mutable, that the fielddefs contained in the
// oneof do not have any name or number conflicts with existing fields in the
// msgdef, and that the oneof's name is unique among all oneofs in the msgdef.
// If the oneof is added successfully, all of its fields will be added
// directly to the msgdef as well. In error cases, false is returned and the
// msgdef is unchanged.
bool AddOneof(OneofDef* o, Status* s);
bool AddOneof(const reffed_ptr<OneofDef>& o, Status* s);
// These return NULL if the field is not found.
FieldDef* FindFieldByNumber(uint32_t number);
FieldDef* FindFieldByName(const char *name, size_t len);
@ -702,6 +737,25 @@ UPB_DEFINE_DEF(upb::MessageDef, msgdef, MSG, UPB_QUOTE(
return FindFieldByName(str.c_str(), str.size());
}
OneofDef* FindOneofByName(const char* name, size_t len);
const OneofDef* FindOneofByName(const char* name, size_t len) const;
OneofDef* FindOneofByName(const char* name) {
return FindOneofByName(name, strlen(name));
}
const OneofDef* FindOneofByName(const char* name) const {
return FindOneofByName(name, strlen(name));
}
template<class T>
OneofDef* FindOneofByName(const T& str) {
return FindOneofByName(str.c_str(), str.size());
}
template<class T>
const OneofDef* FindOneofByName(const T& str) const {
return FindOneofByName(str.c_str(), str.size());
}
// Returns a new msgdef that is a copy of the given msgdef (and a copy of all
// the fields) but with any references to submessages broken and replaced
// with just the name of the submessage. Returns NULL if memory allocation
@ -717,39 +771,117 @@ UPB_DEFINE_DEF(upb::MessageDef, msgdef, MSG, UPB_QUOTE(
bool mapentry() const;
// Iteration over fields. The order is undefined.
class iterator : public std::iterator<std::forward_iterator_tag, FieldDef*> {
class field_iterator
: public std::iterator<std::forward_iterator_tag, FieldDef*> {
public:
explicit iterator(MessageDef* md);
static iterator end(MessageDef* md);
explicit field_iterator(MessageDef* md);
static field_iterator end(MessageDef* md);
void operator++();
FieldDef* operator*() const;
bool operator!=(const iterator& other) const;
bool operator==(const iterator& other) const;
bool operator!=(const field_iterator& other) const;
bool operator==(const field_iterator& other) const;
private:
upb_msg_iter iter_;
upb_msg_field_iter iter_;
};
class const_iterator
class const_field_iterator
: public std::iterator<std::forward_iterator_tag, const FieldDef*> {
public:
explicit const_iterator(const MessageDef* md);
static const_iterator end(const MessageDef* md);
explicit const_field_iterator(const MessageDef* md);
static const_field_iterator end(const MessageDef* md);
void operator++();
const FieldDef* operator*() const;
bool operator!=(const const_iterator& other) const;
bool operator==(const const_iterator& other) const;
bool operator!=(const const_field_iterator& other) const;
bool operator==(const const_field_iterator& other) const;
private:
upb_msg_iter iter_;
upb_msg_field_iter iter_;
};
iterator begin();
iterator end();
const_iterator begin() const;
const_iterator end() const;
// Iteration over oneofs. The order is undefined.
class oneof_iterator
: public std::iterator<std::forward_iterator_tag, FieldDef*> {
public:
explicit oneof_iterator(MessageDef* md);
static oneof_iterator end(MessageDef* md);
void operator++();
OneofDef* operator*() const;
bool operator!=(const oneof_iterator& other) const;
bool operator==(const oneof_iterator& other) const;
private:
upb_msg_oneof_iter iter_;
};
class const_oneof_iterator
: public std::iterator<std::forward_iterator_tag, const FieldDef*> {
public:
explicit const_oneof_iterator(const MessageDef* md);
static const_oneof_iterator end(const MessageDef* md);
void operator++();
const OneofDef* operator*() const;
bool operator!=(const const_oneof_iterator& other) const;
bool operator==(const const_oneof_iterator& other) const;
private:
upb_msg_oneof_iter iter_;
};
class FieldAccessor {
public:
explicit FieldAccessor(MessageDef* msg) : msg_(msg) {}
field_iterator begin() { return msg_->field_begin(); }
field_iterator end() { return msg_->field_end(); }
private:
MessageDef* msg_;
};
class ConstFieldAccessor {
public:
explicit ConstFieldAccessor(const MessageDef* msg) : msg_(msg) {}
const_field_iterator begin() { return msg_->field_begin(); }
const_field_iterator end() { return msg_->field_end(); }
private:
const MessageDef* msg_;
};
class OneofAccessor {
public:
explicit OneofAccessor(MessageDef* msg) : msg_(msg) {}
oneof_iterator begin() { return msg_->oneof_begin(); }
oneof_iterator end() { return msg_->oneof_end(); }
private:
MessageDef* msg_;
};
class ConstOneofAccessor {
public:
explicit ConstOneofAccessor(const MessageDef* msg) : msg_(msg) {}
const_oneof_iterator begin() { return msg_->oneof_begin(); }
const_oneof_iterator end() { return msg_->oneof_end(); }
private:
const MessageDef* msg_;
};
field_iterator field_begin();
field_iterator field_end();
const_field_iterator field_begin() const;
const_field_iterator field_end() const;
oneof_iterator oneof_begin();
oneof_iterator oneof_end();
const_oneof_iterator oneof_begin() const;
const_oneof_iterator oneof_end() const;
FieldAccessor fields() { return FieldAccessor(this); }
ConstFieldAccessor fields() const { return ConstFieldAccessor(this); }
OneofAccessor oneofs() { return OneofAccessor(this); }
ConstOneofAccessor oneofs() const { return ConstOneofAccessor(this); }
private:
UPB_DISALLOW_POD_OPS(MessageDef, upb::MessageDef);
@ -762,6 +894,9 @@ UPB_DEFINE_STRUCT(upb_msgdef, upb_def,
upb_inttable itof; // int to field
upb_strtable ntof; // name to field
// Tables for looking up oneofs by name.
upb_strtable ntoo; // name to oneof
// Is this a map-entry message?
// TODO: set this flag properly for static descriptors; regenerate
// descriptor.upb.c.
@ -770,11 +905,14 @@ UPB_DEFINE_STRUCT(upb_msgdef, upb_def,
// TODO(haberman): proper extension ranges (there can be multiple).
));
// TODO: also support static initialization of the oneofs table. This will be
// needed if we compile in descriptors that contain oneofs.
#define UPB_MSGDEF_INIT(name, selector_count, submsg_field_count, itof, ntof, \
refs, ref2s) \
{ \
UPB_DEF_INIT(name, UPB_DEF_MSG, refs, ref2s), selector_count, \
submsg_field_count, itof, ntof, false \
submsg_field_count, itof, ntof, \
UPB_EMPTY_STRTABLE_INIT(UPB_CTYPE_PTR), false \
}
UPB_BEGIN_EXTERN_C // {
@ -798,6 +936,8 @@ bool upb_msgdef_setfullname(upb_msgdef *m, const char *fullname, upb_status *s);
upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, const void *owner);
bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f, const void *ref_donor,
upb_status *s);
bool upb_msgdef_addoneof(upb_msgdef *m, upb_oneofdef *o, const void *ref_donor,
upb_status *s);
// Field lookup in a couple of different variations:
// - itof = int to field
@ -822,11 +962,38 @@ UPB_INLINE upb_fielddef *upb_msgdef_ntof_mutable(upb_msgdef *m,
return (upb_fielddef *)upb_msgdef_ntof(m, name, len);
}
// Oneof lookup:
// - ntoo = name to oneof
// - ntooz = name to oneof, null-terminated string.
const upb_oneofdef *upb_msgdef_ntoo(const upb_msgdef *m, const char *name,
size_t len);
int upb_msgdef_numoneofs(const upb_msgdef *m);
UPB_INLINE const upb_oneofdef *upb_msgdef_ntooz(const upb_msgdef *m,
const char *name) {
return upb_msgdef_ntoo(m, name, strlen(name));
}
UPB_INLINE upb_oneofdef *upb_msgdef_ntoo_mutable(upb_msgdef *m,
const char *name, size_t len) {
return (upb_oneofdef *)upb_msgdef_ntoo(m, name, len);
}
void upb_msgdef_setmapentry(upb_msgdef *m, bool map_entry);
bool upb_msgdef_mapentry(const upb_msgdef *m);
// upb_msg_iter i;
// for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) {
// Well-known field tag numbers for map-entry messages.
#define UPB_MAPENTRY_KEY 1
#define UPB_MAPENTRY_VALUE 2
const upb_oneofdef *upb_msgdef_findoneof(const upb_msgdef *m,
const char *name);
int upb_msgdef_numoneofs(const upb_msgdef *m);
// upb_msg_field_iter i;
// for(upb_msg_field_begin(&i, m);
// !upb_msg_field_done(&i);
// upb_msg_field_next(&i)) {
// upb_fielddef *f = upb_msg_iter_field(&i);
// // ...
// }
@ -834,11 +1001,18 @@ bool upb_msgdef_mapentry(const upb_msgdef *m);
// For C we don't have separate iterators for const and non-const.
// It is the caller's responsibility to cast the upb_fielddef* to
// const if the upb_msgdef* is const.
void upb_msg_begin(upb_msg_iter *iter, const upb_msgdef *m);
void upb_msg_next(upb_msg_iter *iter);
bool upb_msg_done(const upb_msg_iter *iter);
upb_fielddef *upb_msg_iter_field(const upb_msg_iter *iter);
void upb_msg_iter_setdone(upb_msg_iter *iter);
void upb_msg_field_begin(upb_msg_field_iter *iter, const upb_msgdef *m);
void upb_msg_field_next(upb_msg_field_iter *iter);
bool upb_msg_field_done(const upb_msg_field_iter *iter);
upb_fielddef *upb_msg_iter_field(const upb_msg_field_iter *iter);
void upb_msg_field_iter_setdone(upb_msg_field_iter *iter);
// Similar to above, we also support iterating through the oneofs in a msgdef.
void upb_msg_oneof_begin(upb_msg_oneof_iter *iter, const upb_msgdef *m);
void upb_msg_oneof_next(upb_msg_oneof_iter *iter);
bool upb_msg_oneof_done(const upb_msg_oneof_iter *iter);
upb_oneofdef *upb_msg_iter_oneof(const upb_msg_oneof_iter *iter);
void upb_msg_oneof_iter_setdone(upb_msg_oneof_iter *iter);
UPB_END_EXTERN_C // }
@ -980,6 +1154,172 @@ int32_t upb_enum_iter_number(upb_enum_iter *iter);
UPB_END_EXTERN_C // }
/* upb::OneofDef **************************************************************/
typedef upb_inttable_iter upb_oneof_iter;
// Class that represents a oneof. Its base class is upb::Def (convert with
// upb::upcast()).
UPB_DEFINE_DEF(upb::OneofDef, oneofdef, ONEOF, UPB_QUOTE(
public:
// Returns NULL if memory allocation failed.
static reffed_ptr<OneofDef> New();
// Functionality from upb::RefCounted.
bool IsFrozen() const;
void Ref(const void* owner) const;
void Unref(const void* owner) const;
void DonateRef(const void* from, const void* to) const;
void CheckRef(const void* owner) const;
// Functionality from upb::Def.
const char* full_name() const;
// Returns the MessageDef that owns this OneofDef.
const MessageDef* containing_type() const;
// Returns the name of this oneof. This is the name used to look up the oneof
// by name once added to a message def.
const char* name() const;
bool set_name(const char* name, Status* s);
// Returns the number of fields currently defined in the oneof.
int field_count() const;
// Adds a field to the oneof. The field must not have been added to any other
// oneof or msgdef. If the oneof is not yet part of a msgdef, then when the
// oneof is eventually added to a msgdef, all fields added to the oneof will
// also be added to the msgdef at that time. If the oneof is already part of a
// msgdef, the field must either be a part of that msgdef already, or must not
// be a part of any msgdef; in the latter case, the field is added to the
// msgdef as a part of this operation.
//
// The field may only have an OPTIONAL label, never REQUIRED or REPEATED.
//
// If |f| is already part of this MessageDef, this method performs no action
// and returns true (success). Thus, this method is idempotent.
bool AddField(FieldDef* field, Status* s);
bool AddField(const reffed_ptr<FieldDef>& field, Status* s);
// Looks up by name.
const FieldDef* FindFieldByName(const char* name, size_t len) const;
FieldDef* FindFieldByName(const char* name, size_t len);
const FieldDef* FindFieldByName(const char* name) const {
return FindFieldByName(name, strlen(name));
}
FieldDef* FindFieldByName(const char* name) {
return FindFieldByName(name, strlen(name));
}
template <class T>
FieldDef* FindFieldByName(const T& str) {
return FindFieldByName(str.c_str(), str.size());
}
template <class T>
const FieldDef* FindFieldByName(const T& str) const {
return FindFieldByName(str.c_str(), str.size());
}
// Looks up by tag number.
const FieldDef* FindFieldByNumber(uint32_t num) const;
// Returns a new OneofDef with all the same fields. The OneofDef will be owned
// by the given owner.
OneofDef* Dup(const void* owner) const;
// Iteration over fields. The order is undefined.
class iterator : public std::iterator<std::forward_iterator_tag, FieldDef*> {
public:
explicit iterator(OneofDef* md);
static iterator end(OneofDef* md);
void operator++();
FieldDef* operator*() const;
bool operator!=(const iterator& other) const;
bool operator==(const iterator& other) const;
private:
upb_oneof_iter iter_;
};
class const_iterator
: public std::iterator<std::forward_iterator_tag, const FieldDef*> {
public:
explicit const_iterator(const OneofDef* md);
static const_iterator end(const OneofDef* md);
void operator++();
const FieldDef* operator*() const;
bool operator!=(const const_iterator& other) const;
bool operator==(const const_iterator& other) const;
private:
upb_oneof_iter iter_;
};
iterator begin();
iterator end();
const_iterator begin() const;
const_iterator end() const;
private:
UPB_DISALLOW_POD_OPS(OneofDef, upb::OneofDef);
),
UPB_DEFINE_STRUCT(upb_oneofdef, upb_def,
upb_strtable ntof;
upb_inttable itof;
const upb_msgdef *parent;
));
#define UPB_ONEOFDEF_INIT(name, ntof, itof, refs, ref2s) \
{ UPB_DEF_INIT(name, UPB_DEF_ENUM, refs, ref2s), ntof, itof }
UPB_BEGIN_EXTERN_C // {
// Native C API.
upb_oneofdef *upb_oneofdef_new(const void *owner);
upb_oneofdef *upb_oneofdef_dup(const upb_oneofdef *o, const void *owner);
// From upb_refcounted.
void upb_oneofdef_unref(const upb_oneofdef *o, const void *owner);
bool upb_oneofdef_isfrozen(const upb_oneofdef *e);
void upb_oneofdef_ref(const upb_oneofdef *o, const void *owner);
void upb_oneofdef_donateref(const upb_oneofdef *m, const void *from,
const void *to);
void upb_oneofdef_checkref(const upb_oneofdef *o, const void *owner);
const char *upb_oneofdef_name(const upb_oneofdef *o);
bool upb_oneofdef_setname(upb_oneofdef *o, const char *name, upb_status *s);
const upb_msgdef *upb_oneofdef_containingtype(const upb_oneofdef *o);
int upb_oneofdef_numfields(const upb_oneofdef *o);
bool upb_oneofdef_addfield(upb_oneofdef *o, upb_fielddef *f,
const void *ref_donor,
upb_status *s);
// Oneof lookups:
// - ntof: look up a field by name.
// - ntofz: look up a field by name (as a null-terminated string).
// - itof: look up a field by number.
const upb_fielddef *upb_oneofdef_ntof(const upb_oneofdef *o,
const char *name, size_t length);
UPB_INLINE const upb_fielddef *upb_oneofdef_ntofz(const upb_oneofdef *o,
const char *name) {
return upb_oneofdef_ntof(o, name, strlen(name));
}
const upb_fielddef *upb_oneofdef_itof(const upb_oneofdef *o, uint32_t num);
// upb_oneof_iter i;
// for(upb_oneof_begin(&i, e); !upb_oneof_done(&i); upb_oneof_next(&i)) {
// // ...
// }
void upb_oneof_begin(upb_oneof_iter *iter, const upb_oneofdef *o);
void upb_oneof_next(upb_oneof_iter *iter);
bool upb_oneof_done(upb_oneof_iter *iter);
upb_fielddef *upb_oneof_iter_field(const upb_oneof_iter *iter);
void upb_oneof_iter_setdone(upb_oneof_iter *iter);
UPB_END_EXTERN_C // }
#ifdef __cplusplus
@ -1106,6 +1446,9 @@ inline void FieldDef::set_packed(bool packed) {
inline const MessageDef* FieldDef::containing_type() const {
return upb_fielddef_containingtype(this);
}
inline const OneofDef* FieldDef::containing_oneof() const {
return upb_fielddef_containingoneof(this);
}
inline const char* FieldDef::containing_type_name() {
return upb_fielddef_containingtypename(this);
}
@ -1142,6 +1485,7 @@ inline bool FieldDef::IsSubMessage() const {
}
inline bool FieldDef::IsString() const { return upb_fielddef_isstring(this); }
inline bool FieldDef::IsSequence() const { return upb_fielddef_isseq(this); }
inline bool FieldDef::IsMap() const { return upb_fielddef_ismap(this); }
inline int64_t FieldDef::default_int64() const {
return upb_fielddef_defaultint64(this);
}
@ -1256,12 +1600,21 @@ inline bool MessageDef::Freeze(Status* status) {
inline int MessageDef::field_count() const {
return upb_msgdef_numfields(this);
}
inline int MessageDef::oneof_count() const {
return upb_msgdef_numoneofs(this);
}
inline bool MessageDef::AddField(upb_fielddef* f, Status* s) {
return upb_msgdef_addfield(this, f, NULL, s);
}
inline bool MessageDef::AddField(const reffed_ptr<FieldDef>& f, Status* s) {
return upb_msgdef_addfield(this, f.get(), NULL, s);
}
inline bool MessageDef::AddOneof(upb_oneofdef* o, Status* s) {
return upb_msgdef_addoneof(this, o, NULL, s);
}
inline bool MessageDef::AddOneof(const reffed_ptr<OneofDef>& o, Status* s) {
return upb_msgdef_addoneof(this, o.get(), NULL, s);
}
inline FieldDef* MessageDef::FindFieldByNumber(uint32_t number) {
return upb_msgdef_itof_mutable(this, number);
}
@ -1275,6 +1628,13 @@ inline const FieldDef *MessageDef::FindFieldByName(const char *name,
size_t len) const {
return upb_msgdef_ntof(this, name, len);
}
inline OneofDef* MessageDef::FindOneofByName(const char* name, size_t len) {
return upb_msgdef_ntoo_mutable(this, name, len);
}
inline const OneofDef* MessageDef::FindOneofByName(const char* name,
size_t len) const {
return upb_msgdef_ntoo(this, name, len);
}
inline MessageDef* MessageDef::Dup(const void *owner) const {
return upb_msgdef_dup(this, owner);
}
@ -1284,55 +1644,127 @@ inline void MessageDef::setmapentry(bool map_entry) {
inline bool MessageDef::mapentry() const {
return upb_msgdef_mapentry(this);
}
inline MessageDef::iterator MessageDef::begin() { return iterator(this); }
inline MessageDef::iterator MessageDef::end() { return iterator::end(this); }
inline MessageDef::const_iterator MessageDef::begin() const {
return const_iterator(this);
inline MessageDef::field_iterator MessageDef::field_begin() {
return field_iterator(this);
}
inline MessageDef::const_iterator MessageDef::end() const {
return const_iterator::end(this);
inline MessageDef::field_iterator MessageDef::field_end() {
return field_iterator::end(this);
}
inline MessageDef::const_field_iterator MessageDef::field_begin() const {
return const_field_iterator(this);
}
inline MessageDef::const_field_iterator MessageDef::field_end() const {
return const_field_iterator::end(this);
}
inline MessageDef::oneof_iterator MessageDef::oneof_begin() {
return oneof_iterator(this);
}
inline MessageDef::oneof_iterator MessageDef::oneof_end() {
return oneof_iterator::end(this);
}
inline MessageDef::const_oneof_iterator MessageDef::oneof_begin() const {
return const_oneof_iterator(this);
}
inline MessageDef::const_oneof_iterator MessageDef::oneof_end() const {
return const_oneof_iterator::end(this);
}
inline MessageDef::iterator::iterator(MessageDef* md) {
upb_msg_begin(&iter_, md);
inline MessageDef::field_iterator::field_iterator(MessageDef* md) {
upb_msg_field_begin(&iter_, md);
}
inline MessageDef::iterator MessageDef::iterator::end(MessageDef* md) {
MessageDef::iterator iter(md);
upb_msg_iter_setdone(&iter.iter_);
inline MessageDef::field_iterator MessageDef::field_iterator::end(
MessageDef* md) {
MessageDef::field_iterator iter(md);
upb_msg_field_iter_setdone(&iter.iter_);
return iter;
}
inline FieldDef* MessageDef::iterator::operator*() const {
inline FieldDef* MessageDef::field_iterator::operator*() const {
return upb_msg_iter_field(&iter_);
}
inline void MessageDef::iterator::operator++() { return upb_msg_next(&iter_); }
inline bool MessageDef::iterator::operator==(const iterator &other) const {
inline void MessageDef::field_iterator::operator++() {
return upb_msg_field_next(&iter_);
}
inline bool MessageDef::field_iterator::operator==(
const field_iterator &other) const {
return upb_inttable_iter_isequal(&iter_, &other.iter_);
}
inline bool MessageDef::iterator::operator!=(const iterator &other) const {
inline bool MessageDef::field_iterator::operator!=(
const field_iterator &other) const {
return !(*this == other);
}
inline MessageDef::const_iterator::const_iterator(const MessageDef* md) {
upb_msg_begin(&iter_, md);
inline MessageDef::const_field_iterator::const_field_iterator(
const MessageDef* md) {
upb_msg_field_begin(&iter_, md);
}
inline MessageDef::const_iterator MessageDef::const_iterator::end(
inline MessageDef::const_field_iterator MessageDef::const_field_iterator::end(
const MessageDef *md) {
MessageDef::const_iterator iter(md);
upb_msg_iter_setdone(&iter.iter_);
MessageDef::const_field_iterator iter(md);
upb_msg_field_iter_setdone(&iter.iter_);
return iter;
}
inline const FieldDef* MessageDef::const_iterator::operator*() const {
inline const FieldDef* MessageDef::const_field_iterator::operator*() const {
return upb_msg_iter_field(&iter_);
}
inline void MessageDef::const_iterator::operator++() {
return upb_msg_next(&iter_);
inline void MessageDef::const_field_iterator::operator++() {
return upb_msg_field_next(&iter_);
}
inline bool MessageDef::const_iterator::operator==(
const const_iterator &other) const {
inline bool MessageDef::const_field_iterator::operator==(
const const_field_iterator &other) const {
return upb_inttable_iter_isequal(&iter_, &other.iter_);
}
inline bool MessageDef::const_iterator::operator!=(
const const_iterator &other) const {
inline bool MessageDef::const_field_iterator::operator!=(
const const_field_iterator &other) const {
return !(*this == other);
}
inline MessageDef::oneof_iterator::oneof_iterator(MessageDef* md) {
upb_msg_oneof_begin(&iter_, md);
}
inline MessageDef::oneof_iterator MessageDef::oneof_iterator::end(
MessageDef* md) {
MessageDef::oneof_iterator iter(md);
upb_msg_oneof_iter_setdone(&iter.iter_);
return iter;
}
inline OneofDef* MessageDef::oneof_iterator::operator*() const {
return upb_msg_iter_oneof(&iter_);
}
inline void MessageDef::oneof_iterator::operator++() {
return upb_msg_oneof_next(&iter_);
}
inline bool MessageDef::oneof_iterator::operator==(
const oneof_iterator &other) const {
return upb_strtable_iter_isequal(&iter_, &other.iter_);
}
inline bool MessageDef::oneof_iterator::operator!=(
const oneof_iterator &other) const {
return !(*this == other);
}
inline MessageDef::const_oneof_iterator::const_oneof_iterator(
const MessageDef* md) {
upb_msg_oneof_begin(&iter_, md);
}
inline MessageDef::const_oneof_iterator MessageDef::const_oneof_iterator::end(
const MessageDef *md) {
MessageDef::const_oneof_iterator iter(md);
upb_msg_oneof_iter_setdone(&iter.iter_);
return iter;
}
inline const OneofDef* MessageDef::const_oneof_iterator::operator*() const {
return upb_msg_iter_oneof(&iter_);
}
inline void MessageDef::const_oneof_iterator::operator++() {
return upb_msg_oneof_next(&iter_);
}
inline bool MessageDef::const_oneof_iterator::operator==(
const const_oneof_iterator &other) const {
return upb_strtable_iter_isequal(&iter_, &other.iter_);
}
inline bool MessageDef::const_oneof_iterator::operator!=(
const const_oneof_iterator &other) const {
return !(*this == other);
}
@ -1400,6 +1832,105 @@ inline const char* EnumDef::Iterator::name() {
}
inline bool EnumDef::Iterator::Done() { return upb_enum_done(&iter_); }
inline void EnumDef::Iterator::Next() { return upb_enum_next(&iter_); }
inline reffed_ptr<OneofDef> OneofDef::New() {
upb_oneofdef *o = upb_oneofdef_new(&o);
return reffed_ptr<OneofDef>(o, &o);
}
inline bool OneofDef::IsFrozen() const { return upb_oneofdef_isfrozen(this); }
inline void OneofDef::Ref(const void* owner) const {
return upb_oneofdef_ref(this, owner);
}
inline void OneofDef::Unref(const void* owner) const {
return upb_oneofdef_unref(this, owner);
}
inline void OneofDef::DonateRef(const void* from, const void* to) const {
return upb_oneofdef_donateref(this, from, to);
}
inline void OneofDef::CheckRef(const void* owner) const {
return upb_oneofdef_checkref(this, owner);
}
inline const char* OneofDef::full_name() const {
return upb_oneofdef_name(this);
}
inline const MessageDef* OneofDef::containing_type() const {
return upb_oneofdef_containingtype(this);
}
inline const char* OneofDef::name() const {
return upb_oneofdef_name(this);
}
inline bool OneofDef::set_name(const char* name, Status* s) {
return upb_oneofdef_setname(this, name, s);
}
inline int OneofDef::field_count() const {
return upb_oneofdef_numfields(this);
}
inline bool OneofDef::AddField(FieldDef* field, Status* s) {
return upb_oneofdef_addfield(this, field, NULL, s);
}
inline bool OneofDef::AddField(const reffed_ptr<FieldDef>& field, Status* s) {
return upb_oneofdef_addfield(this, field.get(), NULL, s);
}
inline const FieldDef* OneofDef::FindFieldByName(const char* name,
size_t len) const {
return upb_oneofdef_ntof(this, name, len);
}
inline const FieldDef* OneofDef::FindFieldByNumber(uint32_t num) const {
return upb_oneofdef_itof(this, num);
}
inline OneofDef::iterator OneofDef::begin() { return iterator(this); }
inline OneofDef::iterator OneofDef::end() { return iterator::end(this); }
inline OneofDef::const_iterator OneofDef::begin() const {
return const_iterator(this);
}
inline OneofDef::const_iterator OneofDef::end() const {
return const_iterator::end(this);
}
inline OneofDef::iterator::iterator(OneofDef* o) {
upb_oneof_begin(&iter_, o);
}
inline OneofDef::iterator OneofDef::iterator::end(OneofDef* o) {
OneofDef::iterator iter(o);
upb_oneof_iter_setdone(&iter.iter_);
return iter;
}
inline FieldDef* OneofDef::iterator::operator*() const {
return upb_oneof_iter_field(&iter_);
}
inline void OneofDef::iterator::operator++() { return upb_oneof_next(&iter_); }
inline bool OneofDef::iterator::operator==(const iterator &other) const {
return upb_inttable_iter_isequal(&iter_, &other.iter_);
}
inline bool OneofDef::iterator::operator!=(const iterator &other) const {
return !(*this == other);
}
inline OneofDef::const_iterator::const_iterator(const OneofDef* md) {
upb_oneof_begin(&iter_, md);
}
inline OneofDef::const_iterator OneofDef::const_iterator::end(
const OneofDef *md) {
OneofDef::const_iterator iter(md);
upb_oneof_iter_setdone(&iter.iter_);
return iter;
}
inline const FieldDef* OneofDef::const_iterator::operator*() const {
return upb_msg_iter_field(&iter_);
}
inline void OneofDef::const_iterator::operator++() {
return upb_oneof_next(&iter_);
}
inline bool OneofDef::const_iterator::operator==(
const const_iterator &other) const {
return upb_inttable_iter_isequal(&iter_, &other.iter_);
}
inline bool OneofDef::const_iterator::operator!=(
const const_iterator &other) const {
return !(*this == other);
}
} // namespace upb
#endif

@ -20,6 +20,54 @@
#include "upb/sink.h"
#include "upb/descriptor/descriptor.upb.h"
// upb_deflist is an internal-only dynamic array for storing a growing list of
// upb_defs.
typedef struct {
upb_def **defs;
size_t len;
size_t size;
bool owned;
} upb_deflist;
// We keep a stack of all the messages scopes we are currently in, as well as
// the top-level file scope. This is necessary to correctly qualify the
// definitions that are contained inside. "name" tracks the name of the
// message or package (a bare name -- not qualified by any enclosing scopes).
typedef struct {
char *name;
// Index of the first def that is under this scope. For msgdefs, the
// msgdef itself is at start-1.
int start;
} upb_descreader_frame;
// The maximum number of nested declarations that are allowed, ie.
// message Foo {
// message Bar {
// message Baz {
// }
// }
// }
//
// This is a resource limit that affects how big our runtime stack can grow.
// TODO: make this a runtime-settable property of the Reader instance.
#define UPB_MAX_MESSAGE_NESTING 64
struct upb_descreader {
upb_sink sink;
upb_deflist defs;
upb_descreader_frame stack[UPB_MAX_MESSAGE_NESTING];
int stack_len;
uint32_t number;
char *name;
bool saw_number;
bool saw_name;
char *default_string;
upb_fielddef *f;
};
static char *upb_strndup(const char *buf, size_t n) {
char *ret = malloc(n + 1);
if (!ret) return NULL;
@ -99,36 +147,6 @@ static void upb_deflist_qualify(upb_deflist *l, char *str, int32_t start) {
/* upb_descreader ************************************************************/
void upb_descreader_init(upb_descreader *r, const upb_handlers *handlers,
upb_status *status) {
UPB_UNUSED(status);
upb_deflist_init(&r->defs);
upb_sink_reset(upb_descreader_input(r), handlers, r);
r->stack_len = 0;
r->name = NULL;
r->default_string = NULL;
}
void upb_descreader_uninit(upb_descreader *r) {
free(r->name);
upb_deflist_uninit(&r->defs);
free(r->default_string);
while (r->stack_len > 0) {
upb_descreader_frame *f = &r->stack[--r->stack_len];
free(f->name);
}
}
upb_def **upb_descreader_getdefs(upb_descreader *r, void *owner, int *n) {
*n = r->defs.len;
upb_deflist_donaterefs(&r->defs, owner);
return r->defs.defs;
}
upb_sink *upb_descreader_input(upb_descreader *r) {
return &r->sink;
}
static upb_msgdef *upb_descreader_top(upb_descreader *r) {
assert(r->stack_len > 1);
int index = r->stack[r->stack_len-1].start - 1;
@ -568,6 +586,45 @@ static void reghandlers(const void *closure, upb_handlers *h) {
#undef D
void descreader_cleanup(void *_r) {
upb_descreader *r = _r;
free(r->name);
upb_deflist_uninit(&r->defs);
free(r->default_string);
while (r->stack_len > 0) {
upb_descreader_frame *f = &r->stack[--r->stack_len];
free(f->name);
}
}
/* Public API ****************************************************************/
upb_descreader *upb_descreader_create(upb_env *e, const upb_handlers *h) {
upb_descreader *r = upb_env_malloc(e, sizeof(upb_descreader));
if (!r || !upb_env_addcleanup(e, descreader_cleanup, r)) {
return NULL;
}
upb_deflist_init(&r->defs);
upb_sink_reset(upb_descreader_input(r), h, r);
r->stack_len = 0;
r->name = NULL;
r->default_string = NULL;
return r;
}
upb_def **upb_descreader_getdefs(upb_descreader *r, void *owner, int *n) {
*n = r->defs.len;
upb_deflist_donaterefs(&r->defs, owner);
return r->defs.defs;
}
upb_sink *upb_descreader_input(upb_descreader *r) {
return &r->sink;
}
const upb_handlers *upb_descreader_newhandlers(const void *owner) {
const upb_symtab *s = upbdefs_google_protobuf_descriptor(&s);
const upb_handlers *h = upb_handlers_newfrozen(

@ -11,6 +11,7 @@
#ifndef UPB_DESCRIPTOR_H
#define UPB_DESCRIPTOR_H
#include "upb/env.h"
#include "upb/sink.h"
#ifdef __cplusplus
@ -23,45 +24,11 @@ class Reader;
UPB_DECLARE_TYPE(upb::descriptor::Reader, upb_descreader);
// Internal-only structs used by Reader.
// upb_deflist is an internal-only dynamic array for storing a growing list of
// upb_defs.
typedef struct {
UPB_PRIVATE_FOR_CPP
upb_def **defs;
size_t len;
size_t size;
bool owned;
} upb_deflist;
// We keep a stack of all the messages scopes we are currently in, as well as
// the top-level file scope. This is necessary to correctly qualify the
// definitions that are contained inside. "name" tracks the name of the
// message or package (a bare name -- not qualified by any enclosing scopes).
typedef struct {
UPB_PRIVATE_FOR_CPP
char *name;
// Index of the first def that is under this scope. For msgdefs, the
// msgdef itself is at start-1.
int start;
} upb_descreader_frame;
// The maximum number of nested declarations that are allowed, ie.
// message Foo {
// message Bar {
// message Baz {
// }
// }
// }
//
// This is a resource limit that affects how big our runtime stack can grow.
// TODO: make this a runtime-settable property of the Reader instance.
#define UPB_MAX_MESSAGE_NESTING 64
#ifdef __cplusplus
// Class that receives descriptor data according to the descriptor.proto schema
// and use it to build upb::Defs corresponding to that schema.
UPB_DEFINE_CLASS0(upb::descriptor::Reader,
class upb::descriptor::Reader {
public:
// These handlers must have come from NewHandlers() and must outlive the
// Reader.
@ -71,11 +38,7 @@ UPB_DEFINE_CLASS0(upb::descriptor::Reader,
// to build/memory-manage the handlers at runtime at all). Unfortunately this
// is a bit tricky to implement for Handlers, but necessary to simplify this
// interface.
Reader(const Handlers* handlers, Status* status);
~Reader();
// Resets the reader's state and discards any defs it may have built.
void Reset();
static Reader* Create(Environment* env, const Handlers* handlers);
// The reader's input; this is where descriptor.proto data should be sent.
Sink* input();
@ -91,45 +54,30 @@ UPB_DEFINE_CLASS0(upb::descriptor::Reader,
// Builds and returns handlers for the reader, owned by "owner."
static Handlers* NewHandlers(const void* owner);
,
UPB_DEFINE_STRUCT0(upb_descreader,
upb_sink sink;
upb_deflist defs;
upb_descreader_frame stack[UPB_MAX_MESSAGE_NESTING];
int stack_len;
uint32_t number;
char *name;
bool saw_number;
bool saw_name;
private:
UPB_DISALLOW_POD_OPS(Reader, upb::descriptor::Reader);
};
char *default_string;
upb_fielddef *f;
));
#endif
UPB_BEGIN_EXTERN_C // {
UPB_BEGIN_EXTERN_C
// C API.
void upb_descreader_init(upb_descreader *r, const upb_handlers *handlers,
upb_status *status);
void upb_descreader_uninit(upb_descreader *r);
void upb_descreader_reset(upb_descreader *r);
upb_descreader *upb_descreader_create(upb_env *e, const upb_handlers *h);
upb_sink *upb_descreader_input(upb_descreader *r);
upb_def **upb_descreader_getdefs(upb_descreader *r, void *owner, int *n);
const upb_handlers *upb_descreader_newhandlers(const void *owner);
UPB_END_EXTERN_C // }
UPB_END_EXTERN_C
#ifdef __cplusplus
// C++ implementation details. /////////////////////////////////////////////////
namespace upb {
namespace descriptor {
inline Reader::Reader(const Handlers *h, Status *s) {
upb_descreader_init(this, h, s);
inline Reader* Reader::Create(Environment* e, const Handlers *h) {
return upb_descreader_create(e, h);
}
inline Reader::~Reader() { upb_descreader_uninit(this); }
inline void Reader::Reset() { upb_descreader_reset(this); }
inline Sink* Reader::input() { return upb_descreader_input(this); }
inline upb::Def** Reader::GetDefs(void* owner, int* n) {
return upb_descreader_getdefs(this, owner, n);

@ -0,0 +1,259 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2014 Google Inc. See LICENSE for details.
* Author: Josh Haberman <jhaberman@gmail.com>
*/
#include "upb/env.h"
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
typedef struct cleanup_ent {
upb_cleanup_func *cleanup;
void *ud;
struct cleanup_ent *next;
} cleanup_ent;
static void *seeded_alloc(void *ud, void *ptr, size_t oldsize, size_t size);
/* Default allocator **********************************************************/
// Just use realloc, keeping all allocated blocks in a linked list to destroy at
// the end.
typedef struct mem_block {
// List is doubly-linked, because in cases where realloc() moves an existing
// block, we need to be able to remove the old pointer from the list
// efficiently.
struct mem_block *prev, *next;
#ifndef NDEBUG
size_t size; // Doesn't include mem_block structure.
#endif
char data[];
} mem_block;
typedef struct {
mem_block *head;
} default_alloc_ud;
static void *default_alloc(void *_ud, void *ptr, size_t oldsize, size_t size) {
UPB_UNUSED(oldsize);
default_alloc_ud *ud = _ud;
mem_block *from = ptr ? ptr - sizeof(mem_block) : NULL;
#ifndef NDEBUG
if (from) {
assert(oldsize <= from->size);
}
#endif
mem_block *block = realloc(from, size + sizeof(mem_block));
if (!block) return NULL;
#ifndef NDEBUG
block->size = size;
#endif
if (from) {
if (block != from) {
// The block was moved, so pointers in next and prev blocks must be
// updated to its new location.
if (block->next) block->next->prev = block;
if (block->prev) block->prev->next = block;
}
} else {
// Insert at head of linked list.
block->prev = NULL;
block->next = ud->head;
if (block->next) block->next->prev = block;
ud->head = block;
}
return &block->data;
}
static void default_alloc_cleanup(void *_ud) {
default_alloc_ud *ud = _ud;
mem_block *block = ud->head;
while (block) {
void *to_free = block;
block = block->next;
free(to_free);
}
}
/* Standard error functions ***************************************************/
static bool default_err(void *ud, const upb_status *status) {
UPB_UNUSED(ud);
fprintf(stderr, "upb error: %s\n", upb_status_errmsg(status));
return false;
}
static bool write_err_to(void *ud, const upb_status *status) {
upb_status *copy_to = ud;
upb_status_copy(copy_to, status);
return false;
}
/* upb_env ********************************************************************/
void upb_env_init(upb_env *e) {
e->ok_ = true;
e->bytes_allocated = 0;
e->cleanup_head = NULL;
default_alloc_ud *ud = (default_alloc_ud*)&e->default_alloc_ud;
ud->head = NULL;
// Set default functions.
upb_env_setallocfunc(e, default_alloc, ud);
upb_env_seterrorfunc(e, default_err, NULL);
}
void upb_env_uninit(upb_env *e) {
cleanup_ent *ent = e->cleanup_head;
while (ent) {
ent->cleanup(ent->ud);
ent = ent->next;
}
// Must do this after running cleanup functions, because this will delete
// the memory we store our cleanup entries in!
if (e->alloc == default_alloc) {
default_alloc_cleanup(e->alloc_ud);
}
}
UPB_FORCEINLINE void upb_env_setallocfunc(upb_env *e, upb_alloc_func *alloc,
void *ud) {
e->alloc = alloc;
e->alloc_ud = ud;
}
UPB_FORCEINLINE void upb_env_seterrorfunc(upb_env *e, upb_error_func *func,
void *ud) {
e->err = func;
e->err_ud = ud;
}
void upb_env_reporterrorsto(upb_env *e, upb_status *status) {
e->err = write_err_to;
e->err_ud = status;
}
bool upb_env_ok(const upb_env *e) {
return e->ok_;
}
bool upb_env_reporterror(upb_env *e, const upb_status *status) {
e->ok_ = false;
return e->err(e->err_ud, status);
}
bool upb_env_addcleanup(upb_env *e, upb_cleanup_func *func, void *ud) {
cleanup_ent *ent = upb_env_malloc(e, sizeof(cleanup_ent));
if (!ent) return false;
ent->cleanup = func;
ent->ud = ud;
ent->next = e->cleanup_head;
e->cleanup_head = ent;
return true;
}
void *upb_env_malloc(upb_env *e, size_t size) {
e->bytes_allocated += size;
if (e->alloc == seeded_alloc) {
// This is equivalent to the next branch, but allows inlining for a
// measurable perf benefit.
return seeded_alloc(e->alloc_ud, NULL, 0, size);
} else {
return e->alloc(e->alloc_ud, NULL, 0, size);
}
}
void *upb_env_realloc(upb_env *e, void *ptr, size_t oldsize, size_t size) {
assert(oldsize <= size);
char *ret = e->alloc(e->alloc_ud, ptr, oldsize, size);
#ifndef NDEBUG
// Overwrite non-preserved memory to ensure callers are passing the oldsize
// that they truly require.
memset(ret + oldsize, 0xff, size - oldsize);
#endif
return ret;
}
size_t upb_env_bytesallocated(const upb_env *e) {
return e->bytes_allocated;
}
/* upb_seededalloc ************************************************************/
// Be conservative and choose 16 in case anyone is using SSE.
static const size_t maxalign = 16;
static size_t align_up(size_t size) {
return ((size + maxalign - 1) / maxalign) * maxalign;
}
UPB_FORCEINLINE static void *seeded_alloc(void *ud, void *ptr, size_t oldsize,
size_t size) {
upb_seededalloc *a = ud;
size = align_up(size);
if (oldsize == 0 && size <= a->mem_limit - a->mem_ptr) {
// Fast path: we can satisfy from the initial allocation.
void *ret = a->mem_ptr;
a->mem_ptr += size;
return ret;
} else {
// Slow path: fallback to other allocator.
a->need_cleanup = true;
return a->alloc(a->alloc_ud, ptr, oldsize, size);
}
}
void upb_seededalloc_init(upb_seededalloc *a, void *mem, size_t len) {
a->mem_base = mem;
a->mem_ptr = mem;
a->mem_limit = mem + len;
a->need_cleanup = false;
a->returned_allocfunc = false;
default_alloc_ud *ud = (default_alloc_ud*)&a->default_alloc_ud;
ud->head = NULL;
upb_seededalloc_setfallbackalloc(a, default_alloc, ud);
}
void upb_seededalloc_uninit(upb_seededalloc *a) {
if (a->alloc == default_alloc && a->need_cleanup) {
default_alloc_cleanup(a->alloc_ud);
}
}
UPB_FORCEINLINE void upb_seededalloc_setfallbackalloc(upb_seededalloc *a,
upb_alloc_func *alloc,
void *ud) {
assert(!a->returned_allocfunc);
a->alloc = alloc;
a->alloc_ud = ud;
}
upb_alloc_func *upb_seededalloc_getallocfunc(upb_seededalloc *a) {
a->returned_allocfunc = true;
return seeded_alloc;
}

@ -0,0 +1,256 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2014 Google Inc. See LICENSE for details.
* Author: Josh Haberman <jhaberman@gmail.com>
*
* A upb::Environment provides a means for injecting malloc and an
* error-reporting callback into encoders/decoders. This allows them to be
* independent of nearly all assumptions about their actual environment.
*
* It is also a container for allocating the encoders/decoders themselves that
* insulates clients from knowing their actual size. This provides ABI
* compatibility even if the size of the objects change. And this allows the
* structure definitions to be in the .c files instead of the .h files, making
* the .h files smaller and more readable.
*/
#include "upb/upb.h"
#ifndef UPB_ENV_H_
#define UPB_ENV_H_
#ifdef __cplusplus
namespace upb {
class Environment;
class SeededAllocator;
}
#endif
UPB_DECLARE_TYPE(upb::Environment, upb_env);
UPB_DECLARE_TYPE(upb::SeededAllocator, upb_seededalloc);
typedef void *upb_alloc_func(void *ud, void *ptr, size_t oldsize, size_t size);
typedef void upb_cleanup_func(void *ud);
typedef bool upb_error_func(void *ud, const upb_status *status);
// An environment is *not* thread-safe.
UPB_DEFINE_CLASS0(upb::Environment,
public:
Environment();
~Environment();
// Set a custom memory allocation function for the environment. May ONLY
// be called before any calls to Malloc()/Realloc()/AddCleanup() below.
// If this is not called, the system realloc() function will be used.
// The given user pointer "ud" will be passed to the allocation function.
//
// The allocation function will not receive corresponding "free" calls. it
// must ensure that the memory is valid for the lifetime of the Environment,
// but it may be reclaimed any time thereafter. The likely usage is that
// "ud" points to a stateful allocator, and that the allocator frees all
// memory, arena-style, when it is destroyed. In this case the allocator must
// outlive the Environment. Another possibility is that the allocation
// function returns GC-able memory that is guaranteed to be GC-rooted for the
// life of the Environment.
void SetAllocationFunction(upb_alloc_func* alloc, void* ud);
template<class T>
void SetAllocator(T* allocator) {
SetAllocationFunction(allocator->GetAllocationFunction(), allocator);
}
// Set a custom error reporting function.
void SetErrorFunction(upb_error_func* func, void* ud);
// Set the error reporting function to simply copy the status to the given
// status and abort.
void ReportErrorsTo(Status* status);
// Returns true if all allocations and AddCleanup() calls have succeeded,
// and no errors were reported with ReportError() (except ones that recovered
// successfully).
bool ok() const;
//////////////////////////////////////////////////////////////////////////////
// Functions for use by encoders/decoders.
// Reports an error to this environment's callback, returning true if
// the caller should try to recover.
bool ReportError(const Status* status);
// Allocate memory. Uses the environment's allocation function.
//
// There is no need to free(). All memory will be freed automatically, but is
// guaranteed to outlive the Environment.
void* Malloc(size_t size);
// Reallocate memory. Preserves "oldsize" bytes from the existing buffer
// Requires: oldsize <= existing_size.
//
// TODO(haberman): should we also enforce that oldsize <= size?
void* Realloc(void* ptr, size_t oldsize, size_t size);
// Add a cleanup function to run when the environment is destroyed.
// Returns false on out-of-memory.
//
// The first call to AddCleanup() after SetAllocationFunction() is guaranteed
// to return true -- this makes it possible to robustly set a cleanup handler
// for a custom allocation function.
bool AddCleanup(upb_cleanup_func* func, void* ud);
// Total number of bytes that have been allocated. It is undefined what
// Realloc() does to this counter.
size_t BytesAllocated() const;
private:
UPB_DISALLOW_COPY_AND_ASSIGN(Environment);
,
UPB_DEFINE_STRUCT0(upb_env,
bool ok_;
size_t bytes_allocated;
// Alloc function.
upb_alloc_func *alloc;
void *alloc_ud;
// Error-reporting function.
upb_error_func *err;
void *err_ud;
// Userdata for default alloc func.
void *default_alloc_ud;
// Cleanup entries. Pointer to a cleanup_ent, defined in env.c
void *cleanup_head;
// For future expansion, since the size of this struct is exposed to users.
void *future1;
void *future2;
));
UPB_BEGIN_EXTERN_C
void upb_env_init(upb_env *e);
void upb_env_uninit(upb_env *e);
void upb_env_setallocfunc(upb_env *e, upb_alloc_func *func, void *ud);
void upb_env_seterrorfunc(upb_env *e, upb_error_func *func, void *ud);
void upb_env_reporterrorsto(upb_env *e, upb_status *status);
bool upb_env_ok(const upb_env *e);
bool upb_env_reporterror(upb_env *e, const upb_status *status);
void *upb_env_malloc(upb_env *e, size_t size);
void *upb_env_realloc(upb_env *e, void *ptr, size_t oldsize, size_t size);
bool upb_env_addcleanup(upb_env *e, upb_cleanup_func *func, void *ud);
size_t upb_env_bytesallocated(const upb_env *e);
UPB_END_EXTERN_C
// An allocator that allocates from an initial memory region (likely the stack)
// before falling back to another allocator.
UPB_DEFINE_CLASS0(upb::SeededAllocator,
public:
SeededAllocator(void *mem, size_t len);
~SeededAllocator();
// Set a custom fallback memory allocation function for the allocator, to use
// once the initial region runs out.
//
// May ONLY be called before GetAllocationFunction(). If this is not
// called, the system realloc() will be the fallback allocator.
void SetFallbackAllocator(upb_alloc_func *alloc, void *ud);
// Gets the allocation function for this allocator.
upb_alloc_func* GetAllocationFunction();
private:
UPB_DISALLOW_COPY_AND_ASSIGN(SeededAllocator);
,
UPB_DEFINE_STRUCT0(upb_seededalloc,
// Fallback alloc function.
upb_alloc_func *alloc;
upb_cleanup_func *alloc_cleanup;
void *alloc_ud;
bool need_cleanup;
bool returned_allocfunc;
// Userdata for default alloc func.
void *default_alloc_ud;
// Pointers for the initial memory region.
void *mem_base;
void *mem_ptr;
void *mem_limit;
// For future expansion, since the size of this struct is exposed to users.
void *future1;
void *future2;
));
UPB_BEGIN_EXTERN_C
void upb_seededalloc_init(upb_seededalloc *a, void *mem, size_t len);
void upb_seededalloc_uninit(upb_seededalloc *a);
void upb_seededalloc_setfallbackalloc(upb_seededalloc *a, upb_alloc_func *func,
void *ud);
upb_alloc_func *upb_seededalloc_getallocfunc(upb_seededalloc *a);
UPB_END_EXTERN_C
#ifdef __cplusplus
namespace upb {
inline Environment::Environment() {
upb_env_init(this);
}
inline Environment::~Environment() {
upb_env_uninit(this);
}
inline void Environment::SetAllocationFunction(upb_alloc_func *alloc,
void *ud) {
upb_env_setallocfunc(this, alloc, ud);
}
inline void Environment::SetErrorFunction(upb_error_func *func, void *ud) {
upb_env_seterrorfunc(this, func, ud);
}
inline void Environment::ReportErrorsTo(Status* status) {
upb_env_reporterrorsto(this, status);
}
inline bool Environment::ok() const {
return upb_env_ok(this);
}
inline bool Environment::ReportError(const Status* status) {
return upb_env_reporterror(this, status);
}
inline void *Environment::Malloc(size_t size) {
return upb_env_malloc(this, size);
}
inline void *Environment::Realloc(void *ptr, size_t oldsize, size_t size) {
return upb_env_realloc(this, ptr, oldsize, size);
}
inline bool Environment::AddCleanup(upb_cleanup_func *func, void *ud) {
return upb_env_addcleanup(this, func, ud);
}
inline size_t Environment::BytesAllocated() const {
return upb_env_bytesallocated(this);
}
inline SeededAllocator::SeededAllocator(void *mem, size_t len) {
upb_seededalloc_init(this, mem, len);
}
inline SeededAllocator::~SeededAllocator() {
upb_seededalloc_uninit(this);
}
inline void SeededAllocator::SetFallbackAllocator(upb_alloc_func *alloc,
void *ud) {
upb_seededalloc_setfallbackalloc(this, alloc, ud);
}
inline upb_alloc_func *SeededAllocator::GetAllocationFunction() {
return upb_seededalloc_getallocfunc(this);
}
} // namespace upb
#endif // __cplusplus
#endif // UPB_ENV_H_

@ -147,12 +147,17 @@ template <class T> struct disable_if_same<T, T> {};
template <class T> void DeletePointer(void *p) { delete static_cast<T>(p); }
template <class T1, class T2>
struct FirstUnlessVoid {
struct FirstUnlessVoidOrBool {
typedef T1 value;
};
template <class T2>
struct FirstUnlessVoid<void, T2> {
struct FirstUnlessVoidOrBool<void, T2> {
typedef T2 value;
};
template <class T2>
struct FirstUnlessVoidOrBool<bool, T2> {
typedef T2 value;
};
@ -534,10 +539,14 @@ inline MethodSig4<R, C, P1, P2, P3, P4> MatchFunc(R (C::*f)(P1, P2, P3, P4)) {
//
// 1. If the function returns void, make it return the expected type and with
// a value that always indicates success.
// 2. If the function is expected to return void* but doesn't, wrap it so it
// does (either by returning the closure param if the wrapped function
// returns void or by casting a different pointer type to void* for
// return).
// 2. If the function returns bool, make it return the expected type with a
// value that indicates success or failure.
//
// The "expected type" for return is:
// 1. void* for start handlers. If the closure parameter has a different type
// we will cast it to void* for the return in the success case.
// 2. size_t for string buffer handlers.
// 3. bool for everything else.
// Template parameters are FuncN type and desired return type.
template <class F, class R, class Enable = void>
@ -926,10 +935,13 @@ inline Handler<T>::Handler(F func)
attr_.SetClosureType(UniquePtrForType<typename F::FuncInfo::Closure>());
// We use the closure type (from the first parameter) if the return type is
// void. This is all nonsense for non START* handlers, but it doesn't matter
// because in that case the value will be ignored.
typedef typename FirstUnlessVoid<typename F::FuncInfo::Return,
typename F::FuncInfo::Closure>::value
// void or bool, since these are the two cases we wrap to return the closure's
// type anyway.
//
// This is all nonsense for non START* handlers, but it doesn't matter because
// in that case the value will be ignored.
typedef typename FirstUnlessVoidOrBool<typename F::FuncInfo::Return,
typename F::FuncInfo::Closure>::value
EffectiveReturn;
attr_.SetReturnClosureType(UniquePtrForType<EffectiveReturn>());
}
@ -1124,9 +1136,7 @@ inline BytesHandler::BytesHandler() {
upb_byteshandler_init(this);
}
inline BytesHandler::~BytesHandler() {
upb_byteshandler_uninit(this);
}
inline BytesHandler::~BytesHandler() {}
} // namespace upb

@ -40,8 +40,10 @@ static void freehandlers(upb_refcounted *r) {
static void visithandlers(const upb_refcounted *r, upb_refcounted_visit *visit,
void *closure) {
const upb_handlers *h = (const upb_handlers*)r;
upb_msg_iter i;
for(upb_msg_begin(&i, h->msg); !upb_msg_done(&i); upb_msg_next(&i)) {
upb_msg_field_iter i;
for(upb_msg_field_begin(&i, h->msg);
!upb_msg_field_done(&i);
upb_msg_field_next(&i)) {
upb_fielddef *f = upb_msg_iter_field(&i);
if (!upb_fielddef_issubmsg(f)) continue;
const upb_handlers *sub = upb_handlers_getsubhandlers(h, f);
@ -70,8 +72,10 @@ static upb_handlers *newformsg(const upb_msgdef *m, const void *owner,
// For each submessage field, get or create a handlers object and set it as
// the subhandlers.
upb_msg_iter i;
for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) {
upb_msg_field_iter i;
for(upb_msg_field_begin(&i, m);
!upb_msg_field_done(&i);
upb_msg_field_next(&i)) {
upb_fielddef *f = upb_msg_iter_field(&i);
if (!upb_fielddef_issubmsg(f)) continue;
@ -172,7 +176,14 @@ static bool doset(upb_handlers *h, int32_t sel, const upb_fielddef *f,
if (closure_type && *context_closure_type &&
closure_type != *context_closure_type) {
// TODO(haberman): better message for debugging.
upb_status_seterrmsg(&h->status_, "closure type does not match");
if (f) {
upb_status_seterrf(&h->status_,
"closure type does not match for field %s",
upb_fielddef_name(f));
} else {
upb_status_seterrmsg(
&h->status_, "closure type does not match for message-level handler");
}
return false;
}
@ -428,8 +439,10 @@ bool upb_handlers_freeze(upb_handlers *const*handlers, int n, upb_status *s) {
// Check that there are no closure mismatches due to missing Start* handlers
// or subhandlers with different type-level types.
upb_msg_iter j;
for(upb_msg_begin(&j, h->msg); !upb_msg_done(&j); upb_msg_next(&j)) {
upb_msg_field_iter j;
for(upb_msg_field_begin(&j, h->msg);
!upb_msg_field_done(&j);
upb_msg_field_next(&j)) {
const upb_fielddef *f = upb_msg_iter_field(&j);
if (upb_fielddef_isseq(f)) {

@ -755,10 +755,8 @@ UPB_DEFINE_STRUCT0(upb_byteshandler,
));
void upb_byteshandler_init(upb_byteshandler *h);
void upb_byteshandler_uninit(upb_byteshandler *h);
// Caller must ensure that "d" outlives the handlers.
// TODO(haberman): support handlerfree function for the data.
// TODO(haberman): should this have a "freeze" operation? It's not necessary
// for memory management, but could be useful to force immutability and provide
// a convenient moment to verify that all registration succeeded.

@ -33,6 +33,71 @@
#include "upb/json/parser.h"
#define UPB_JSON_MAX_DEPTH 64
typedef struct {
upb_sink sink;
// The current message in which we're parsing, and the field whose value we're
// expecting next.
const upb_msgdef *m;
const upb_fielddef *f;
// We are in a repeated-field context, ready to emit mapentries as
// submessages. This flag alters the start-of-object (open-brace) behavior to
// begin a sequence of mapentry messages rather than a single submessage.
bool is_map;
// We are in a map-entry message context. This flag is set when parsing the
// value field of a single map entry and indicates to all value-field parsers
// (subobjects, strings, numbers, and bools) that the map-entry submessage
// should end as soon as the value is parsed.
bool is_mapentry;
// If |is_map| or |is_mapentry| is true, |mapfield| refers to the parent
// message's map field that we're currently parsing. This differs from |f|
// because |f| is the field in the *current* message (i.e., the map-entry
// message itself), not the parent's field that leads to this map.
const upb_fielddef *mapfield;
} upb_jsonparser_frame;
struct upb_json_parser {
upb_env *env;
upb_byteshandler input_handler_;
upb_bytessink input_;
// Stack to track the JSON scopes we are in.
upb_jsonparser_frame stack[UPB_JSON_MAX_DEPTH];
upb_jsonparser_frame *top;
upb_jsonparser_frame *limit;
upb_status *status;
// Ragel's internal parsing stack for the parsing state machine.
int current_state;
int parser_stack[UPB_JSON_MAX_DEPTH];
int parser_top;
// The handle for the current buffer.
const upb_bufhandle *handle;
// Accumulate buffer. See details in parser.rl.
const char *accumulated;
size_t accumulated_len;
char *accumulate_buf;
size_t accumulate_buf_size;
// Multi-part text data. See details in parser.rl.
int multipart_state;
upb_selector_t string_selector;
// Input capture. See details in parser.rl.
const char *capture;
// Intermediate result of parsing a unicode escape sequence.
uint32_t digit;
};
#define PARSER_CHECK_RETURN(x) if (!(x)) return false
// Used to signal that a capture has been suspended.
@ -235,12 +300,13 @@ static void accumulate_clear(upb_json_parser *p) {
// Used internally by accumulate_append().
static bool accumulate_realloc(upb_json_parser *p, size_t need) {
size_t new_size = UPB_MAX(p->accumulate_buf_size, 128);
size_t old_size = p->accumulate_buf_size;
size_t new_size = UPB_MAX(old_size, 128);
while (new_size < need) {
new_size = saturating_multiply(new_size, 2);
}
void *mem = realloc(p->accumulate_buf, new_size);
void *mem = upb_env_realloc(p->env, p->accumulate_buf, old_size, new_size);
if (!mem) {
upb_status_seterrmsg(p->status, "Out of memory allocating buffer.");
return false;
@ -262,16 +328,14 @@ static bool accumulate_append(upb_json_parser *p, const char *buf, size_t len,
return true;
}
if (p->accumulate_buf_size - p->accumulated_len < len) {
size_t need;
if (!checked_add(p->accumulated_len, len, &need)) {
upb_status_seterrmsg(p->status, "Integer overflow.");
return false;
}
size_t need;
if (!checked_add(p->accumulated_len, len, &need)) {
upb_status_seterrmsg(p->status, "Integer overflow.");
return false;
}
if (!accumulate_realloc(p, need)) {
return false;
}
if (need > p->accumulate_buf_size && !accumulate_realloc(p, need)) {
return false;
}
if (p->accumulated != p->accumulate_buf) {
@ -510,16 +574,28 @@ static void start_number(upb_json_parser *p, const char *ptr) {
capture_begin(p, ptr);
}
static bool parse_number(upb_json_parser *p);
static bool end_number(upb_json_parser *p, const char *ptr) {
if (!capture_end(p, ptr)) {
return false;
}
return parse_number(p);
}
static bool parse_number(upb_json_parser *p) {
// strtol() and friends unfortunately do not support specifying the length of
// the input string, so we need to force a copy into a NULL-terminated buffer.
if (!multipart_text(p, "\0", 1, false)) {
return false;
}
size_t len;
const char *buf = accumulate_getptr(p, &len);
const char *myend = buf + len;
char *end;
const char *myend = buf + len - 1; // One for NULL.
char *end;
switch (upb_fielddef_type(p->top->f)) {
case UPB_TYPE_ENUM:
case UPB_TYPE_INT32: {
@ -575,10 +651,11 @@ static bool end_number(upb_json_parser *p, const char *ptr) {
}
multipart_end(p);
return true;
err:
upb_status_seterrf(p->status, "error parsing number: %.*s", buf, len);
upb_status_seterrf(p->status, "error parsing number: %s", buf);
multipart_end(p);
return false;
}
@ -593,6 +670,7 @@ static bool parser_putbool(upb_json_parser *p, bool val) {
bool ok = upb_sink_putbool(&p->top->sink, parser_getsel(p), val);
UPB_ASSERT_VAR(ok, ok);
return true;
}
@ -609,6 +687,8 @@ static bool start_stringval(upb_json_parser *p) {
upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink);
inner->m = p->top->m;
inner->f = p->top->f;
inner->is_map = false;
inner->is_mapentry = false;
p->top = inner;
if (upb_fielddef_type(p->top->f) == UPB_TYPE_STRING) {
@ -686,6 +766,7 @@ static bool end_stringval(upb_json_parser *p) {
}
multipart_end(p);
return ok;
}
@ -694,54 +775,217 @@ static void start_member(upb_json_parser *p) {
multipart_startaccum(p);
}
static bool end_member(upb_json_parser *p) {
assert(!p->top->f);
// Helper: invoked during parse_mapentry() to emit the mapentry message's key
// field based on the current contents of the accumulate buffer.
static bool parse_mapentry_key(upb_json_parser *p) {
size_t len;
const char *buf = accumulate_getptr(p, &len);
const upb_fielddef *f = upb_msgdef_ntof(p->top->m, buf, len);
// Emit the key field. We do a bit of ad-hoc parsing here because the
// parser state machine has already decided that this is a string field
// name, and we are reinterpreting it as some arbitrary key type. In
// particular, integer and bool keys are quoted, so we need to parse the
// quoted string contents here.
if (!f) {
// TODO(haberman): Ignore unknown fields if requested/configured to do so.
upb_status_seterrf(p->status, "No such field: %.*s\n", (int)len, buf);
p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_KEY);
if (p->top->f == NULL) {
upb_status_seterrmsg(p->status, "mapentry message has no key");
return false;
}
switch (upb_fielddef_type(p->top->f)) {
case UPB_TYPE_INT32:
case UPB_TYPE_INT64:
case UPB_TYPE_UINT32:
case UPB_TYPE_UINT64:
// Invoke end_number. The accum buffer has the number's text already.
if (!parse_number(p)) {
return false;
}
break;
case UPB_TYPE_BOOL:
if (len == 4 && !strncmp(buf, "true", 4)) {
if (!parser_putbool(p, true)) {
return false;
}
} else if (len == 5 && !strncmp(buf, "false", 5)) {
if (!parser_putbool(p, false)) {
return false;
}
} else {
upb_status_seterrmsg(p->status,
"Map bool key not 'true' or 'false'");
return false;
}
multipart_end(p);
break;
case UPB_TYPE_STRING:
case UPB_TYPE_BYTES: {
upb_sink subsink;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
upb_sink_startstr(&p->top->sink, sel, len, &subsink);
sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
upb_sink_putstring(&subsink, sel, buf, len, NULL);
sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
upb_sink_endstr(&subsink, sel);
multipart_end(p);
break;
}
default:
upb_status_seterrmsg(p->status, "Invalid field type for map key");
return false;
}
p->top->f = f;
multipart_end(p);
return true;
}
// Helper: emit one map entry (as a submessage in the map field sequence). This
// is invoked from end_membername(), at the end of the map entry's key string,
// with the map key in the accumulate buffer. It parses the key from that
// buffer, emits the handler calls to start the mapentry submessage (setting up
// its subframe in the process), and sets up state in the subframe so that the
// value parser (invoked next) will emit the mapentry's value field and then
// end the mapentry message.
static bool handle_mapentry(upb_json_parser *p) {
// Map entry: p->top->sink is the seq frame, so we need to start a frame
// for the mapentry itself, and then set |f| in that frame so that the map
// value field is parsed, and also set a flag to end the frame after the
// map-entry value is parsed.
if (!check_stack(p)) return false;
const upb_fielddef *mapfield = p->top->mapfield;
const upb_msgdef *mapentrymsg = upb_fielddef_msgsubdef(mapfield);
upb_jsonparser_frame *inner = p->top + 1;
p->top->f = mapfield;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
inner->m = mapentrymsg;
inner->mapfield = mapfield;
inner->is_map = false;
// Don't set this to true *yet* -- we reuse parsing handlers below to push
// the key field value to the sink, and these handlers will pop the frame
// if they see is_mapentry (when invoked by the parser state machine, they
// would have just seen the map-entry value, not key).
inner->is_mapentry = false;
p->top = inner;
// send STARTMSG in submsg frame.
upb_sink_startmsg(&p->top->sink);
parse_mapentry_key(p);
// Set up the value field to receive the map-entry value.
p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_VALUE);
p->top->is_mapentry = true; // set up to pop frame after value is parsed.
p->top->mapfield = mapfield;
if (p->top->f == NULL) {
upb_status_seterrmsg(p->status, "mapentry message has no value");
return false;
}
return true;
}
static void clear_member(upb_json_parser *p) { p->top->f = NULL; }
static bool end_membername(upb_json_parser *p) {
assert(!p->top->f);
if (p->top->is_map) {
return handle_mapentry(p);
} else {
size_t len;
const char *buf = accumulate_getptr(p, &len);
const upb_fielddef *f = upb_msgdef_ntof(p->top->m, buf, len);
if (!f) {
// TODO(haberman): Ignore unknown fields if requested/configured to do so.
upb_status_seterrf(p->status, "No such field: %.*s\n", (int)len, buf);
return false;
}
p->top->f = f;
multipart_end(p);
return true;
}
}
static void end_member(upb_json_parser *p) {
// If we just parsed a map-entry value, end that frame too.
if (p->top->is_mapentry) {
assert(p->top > p->stack);
// send ENDMSG on submsg.
upb_status s = UPB_STATUS_INIT;
upb_sink_endmsg(&p->top->sink, &s);
const upb_fielddef* mapfield = p->top->mapfield;
// send ENDSUBMSG in repeated-field-of-mapentries frame.
p->top--;
upb_selector_t sel;
bool ok = upb_handlers_getselector(mapfield,
UPB_HANDLER_ENDSUBMSG, &sel);
UPB_ASSERT_VAR(ok, ok);
upb_sink_endsubmsg(&p->top->sink, sel);
}
p->top->f = NULL;
}
static bool start_subobject(upb_json_parser *p) {
assert(p->top->f);
if (!upb_fielddef_issubmsg(p->top->f)) {
if (upb_fielddef_ismap(p->top->f)) {
// Beginning of a map. Start a new parser frame in a repeated-field
// context.
if (!check_stack(p)) return false;
upb_jsonparser_frame *inner = p->top + 1;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
upb_sink_startseq(&p->top->sink, sel, &inner->sink);
inner->m = upb_fielddef_msgsubdef(p->top->f);
inner->mapfield = p->top->f;
inner->f = NULL;
inner->is_map = true;
inner->is_mapentry = false;
p->top = inner;
return true;
} else if (upb_fielddef_issubmsg(p->top->f)) {
// Beginning of a subobject. Start a new parser frame in the submsg
// context.
if (!check_stack(p)) return false;
upb_jsonparser_frame *inner = p->top + 1;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
inner->m = upb_fielddef_msgsubdef(p->top->f);
inner->f = NULL;
inner->is_map = false;
inner->is_mapentry = false;
p->top = inner;
return true;
} else {
upb_status_seterrf(p->status,
"Object specified for non-message/group field: %s",
upb_fielddef_name(p->top->f));
return false;
}
if (!check_stack(p)) return false;
upb_jsonparser_frame *inner = p->top + 1;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
inner->m = upb_fielddef_msgsubdef(p->top->f);
inner->f = NULL;
p->top = inner;
return true;
}
static void end_subobject(upb_json_parser *p) {
p->top--;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG);
upb_sink_endsubmsg(&p->top->sink, sel);
if (p->top->is_map) {
p->top--;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
upb_sink_endseq(&p->top->sink, sel);
} else {
p->top--;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG);
upb_sink_endsubmsg(&p->top->sink, sel);
}
}
static bool start_array(upb_json_parser *p) {
@ -761,6 +1005,8 @@ static bool start_array(upb_json_parser *p) {
upb_sink_startseq(&p->top->sink, sel, &inner->sink);
inner->m = p->top->m;
inner->f = p->top->f;
inner->is_map = false;
inner->is_mapentry = false;
p->top = inner;
return true;
@ -775,12 +1021,16 @@ static void end_array(upb_json_parser *p) {
}
static void start_object(upb_json_parser *p) {
upb_sink_startmsg(&p->top->sink);
if (!p->top->is_map) {
upb_sink_startmsg(&p->top->sink);
}
}
static void end_object(upb_json_parser *p) {
upb_status status;
upb_sink_endmsg(&p->top->sink, &status);
if (!p->top->is_map) {
upb_status status;
upb_sink_endmsg(&p->top->sink, &status);
}
}
@ -805,11 +1055,11 @@ static void end_object(upb_json_parser *p) {
// final state once, when the closing '"' is seen.
#line 901 "upb/json/parser.rl"
#line 1151 "upb/json/parser.rl"
#line 813 "upb/json/parser.c"
#line 1063 "upb/json/parser.c"
static const char _json_actions[] = {
0, 1, 0, 1, 2, 1, 3, 1,
5, 1, 6, 1, 7, 1, 8, 1,
@ -960,7 +1210,7 @@ static const int json_en_value_machine = 27;
static const int json_en_main = 1;
#line 904 "upb/json/parser.rl"
#line 1154 "upb/json/parser.rl"
size_t parse(void *closure, const void *hd, const char *buf, size_t size,
const upb_bufhandle *handle) {
@ -980,7 +1230,7 @@ size_t parse(void *closure, const void *hd, const char *buf, size_t size,
capture_resume(parser, buf);
#line 984 "upb/json/parser.c"
#line 1234 "upb/json/parser.c"
{
int _klen;
unsigned int _trans;
@ -1055,118 +1305,118 @@ _match:
switch ( *_acts++ )
{
case 0:
#line 816 "upb/json/parser.rl"
#line 1066 "upb/json/parser.rl"
{ p--; {cs = stack[--top]; goto _again;} }
break;
case 1:
#line 817 "upb/json/parser.rl"
#line 1067 "upb/json/parser.rl"
{ p--; {stack[top++] = cs; cs = 10; goto _again;} }
break;
case 2:
#line 821 "upb/json/parser.rl"
#line 1071 "upb/json/parser.rl"
{ start_text(parser, p); }
break;
case 3:
#line 822 "upb/json/parser.rl"
#line 1072 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_text(parser, p)); }
break;
case 4:
#line 828 "upb/json/parser.rl"
#line 1078 "upb/json/parser.rl"
{ start_hex(parser); }
break;
case 5:
#line 829 "upb/json/parser.rl"
#line 1079 "upb/json/parser.rl"
{ hexdigit(parser, p); }
break;
case 6:
#line 830 "upb/json/parser.rl"
#line 1080 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_hex(parser)); }
break;
case 7:
#line 836 "upb/json/parser.rl"
#line 1086 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(escape(parser, p)); }
break;
case 8:
#line 842 "upb/json/parser.rl"
#line 1092 "upb/json/parser.rl"
{ p--; {cs = stack[--top]; goto _again;} }
break;
case 9:
#line 845 "upb/json/parser.rl"
#line 1095 "upb/json/parser.rl"
{ {stack[top++] = cs; cs = 19; goto _again;} }
break;
case 10:
#line 847 "upb/json/parser.rl"
#line 1097 "upb/json/parser.rl"
{ p--; {stack[top++] = cs; cs = 27; goto _again;} }
break;
case 11:
#line 852 "upb/json/parser.rl"
#line 1102 "upb/json/parser.rl"
{ start_member(parser); }
break;
case 12:
#line 853 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_member(parser)); }
#line 1103 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_membername(parser)); }
break;
case 13:
#line 856 "upb/json/parser.rl"
{ clear_member(parser); }
#line 1106 "upb/json/parser.rl"
{ end_member(parser); }
break;
case 14:
#line 862 "upb/json/parser.rl"
#line 1112 "upb/json/parser.rl"
{ start_object(parser); }
break;
case 15:
#line 865 "upb/json/parser.rl"
#line 1115 "upb/json/parser.rl"
{ end_object(parser); }
break;
case 16:
#line 871 "upb/json/parser.rl"
#line 1121 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(start_array(parser)); }
break;
case 17:
#line 875 "upb/json/parser.rl"
#line 1125 "upb/json/parser.rl"
{ end_array(parser); }
break;
case 18:
#line 880 "upb/json/parser.rl"
#line 1130 "upb/json/parser.rl"
{ start_number(parser, p); }
break;
case 19:
#line 881 "upb/json/parser.rl"
#line 1131 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_number(parser, p)); }
break;
case 20:
#line 883 "upb/json/parser.rl"
#line 1133 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(start_stringval(parser)); }
break;
case 21:
#line 884 "upb/json/parser.rl"
#line 1134 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_stringval(parser)); }
break;
case 22:
#line 886 "upb/json/parser.rl"
#line 1136 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(parser_putbool(parser, true)); }
break;
case 23:
#line 888 "upb/json/parser.rl"
#line 1138 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(parser_putbool(parser, false)); }
break;
case 24:
#line 890 "upb/json/parser.rl"
#line 1140 "upb/json/parser.rl"
{ /* null value */ }
break;
case 25:
#line 892 "upb/json/parser.rl"
#line 1142 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(start_subobject(parser)); }
break;
case 26:
#line 893 "upb/json/parser.rl"
#line 1143 "upb/json/parser.rl"
{ end_subobject(parser); }
break;
case 27:
#line 898 "upb/json/parser.rl"
#line 1148 "upb/json/parser.rl"
{ p--; {cs = stack[--top]; goto _again;} }
break;
#line 1170 "upb/json/parser.c"
#line 1420 "upb/json/parser.c"
}
}
@ -1179,7 +1429,7 @@ _again:
_out: {}
}
#line 923 "upb/json/parser.rl"
#line 1173 "upb/json/parser.rl"
if (p != pe) {
upb_status_seterrf(parser->status, "Parse error at %s\n", p);
@ -1201,52 +1451,58 @@ bool end(void *closure, const void *hd) {
return true;
}
/* Public API *****************************************************************/
void upb_json_parser_init(upb_json_parser *p, upb_status *status) {
p->limit = p->stack + UPB_JSON_MAX_DEPTH;
p->accumulate_buf = NULL;
p->accumulate_buf_size = 0;
upb_byteshandler_init(&p->input_handler_);
upb_byteshandler_setstring(&p->input_handler_, parse, NULL);
upb_byteshandler_setendstr(&p->input_handler_, end, NULL);
upb_bytessink_reset(&p->input_, &p->input_handler_, p);
p->status = status;
}
void upb_json_parser_uninit(upb_json_parser *p) {
upb_byteshandler_uninit(&p->input_handler_);
free(p->accumulate_buf);
}
void upb_json_parser_reset(upb_json_parser *p) {
static void json_parser_reset(upb_json_parser *p) {
p->top = p->stack;
p->top->f = NULL;
p->top->is_map = false;
p->top->is_mapentry = false;
int cs;
int top;
// Emit Ragel initialization of the parser.
#line 1232 "upb/json/parser.c"
#line 1465 "upb/json/parser.c"
{
cs = json_start;
top = 0;
}
#line 971 "upb/json/parser.rl"
#line 1204 "upb/json/parser.rl"
p->current_state = cs;
p->parser_top = top;
accumulate_clear(p);
p->multipart_state = MULTIPART_INACTIVE;
p->capture = NULL;
p->accumulated = NULL;
}
void upb_json_parser_resetoutput(upb_json_parser *p, upb_sink *sink) {
upb_json_parser_reset(p);
upb_sink_reset(&p->top->sink, sink->handlers, sink->closure);
p->top->m = upb_handlers_msgdef(sink->handlers);
p->accumulated = NULL;
/* Public API *****************************************************************/
upb_json_parser *upb_json_parser_create(upb_env *env, upb_sink *output) {
#ifndef NDEBUG
const size_t size_before = upb_env_bytesallocated(env);
#endif
upb_json_parser *p = upb_env_malloc(env, sizeof(upb_json_parser));
if (!p) return false;
p->env = env;
p->limit = p->stack + UPB_JSON_MAX_DEPTH;
p->accumulate_buf = NULL;
p->accumulate_buf_size = 0;
upb_byteshandler_init(&p->input_handler_);
upb_byteshandler_setstring(&p->input_handler_, parse, NULL);
upb_byteshandler_setendstr(&p->input_handler_, end, NULL);
upb_bytessink_reset(&p->input_, &p->input_handler_, p);
json_parser_reset(p);
upb_sink_reset(&p->top->sink, output->handlers, output->closure);
p->top->m = upb_handlers_msgdef(output->handlers);
// If this fails, uncomment and increase the value in parser.h.
// fprintf(stderr, "%zd\n", upb_env_bytesallocated(env) - size_before);
assert(upb_env_bytesallocated(env) - size_before <= UPB_JSON_PARSER_SIZE);
return p;
}
upb_bytessink *upb_json_parser_input(upb_json_parser *p) {

@ -11,6 +11,7 @@
#ifndef UPB_JSON_PARSER_H_
#define UPB_JSON_PARSER_H_
#include "upb/env.h"
#include "upb/sink.h"
#ifdef __cplusplus
@ -23,78 +24,32 @@ class Parser;
UPB_DECLARE_TYPE(upb::json::Parser, upb_json_parser);
// Internal-only struct used by the parser.
typedef struct {
UPB_PRIVATE_FOR_CPP
upb_sink sink;
const upb_msgdef *m;
const upb_fielddef *f;
} upb_jsonparser_frame;
/* upb::json::Parser **********************************************************/
#define UPB_JSON_MAX_DEPTH 64
// Preallocation hint: parser won't allocate more bytes than this when first
// constructed. This hint may be an overestimate for some build configurations.
// But if the parser library is upgraded without recompiling the application,
// it may be an underestimate.
#define UPB_JSON_PARSER_SIZE 3568
#ifdef __cplusplus
// Parses an incoming BytesStream, pushing the results to the destination sink.
UPB_DEFINE_CLASS0(upb::json::Parser,
class upb::json::Parser {
public:
Parser(Status* status);
~Parser();
static Parser* Create(Environment* env, Sink* output);
// Resets the state of the printer, so that it will expect to begin a new
// document.
void Reset();
// Resets the output pointer which will serve as our closure. Implies
// Reset().
void ResetOutput(Sink* output);
// The input to the printer.
BytesSink* input();
,
UPB_DEFINE_STRUCT0(upb_json_parser,
upb_byteshandler input_handler_;
upb_bytessink input_;
// Stack to track the JSON scopes we are in.
upb_jsonparser_frame stack[UPB_JSON_MAX_DEPTH];
upb_jsonparser_frame *top;
upb_jsonparser_frame *limit;
upb_status *status;
private:
UPB_DISALLOW_POD_OPS(Parser, upb::json::Parser);
};
// Ragel's internal parsing stack for the parsing state machine.
int current_state;
int parser_stack[UPB_JSON_MAX_DEPTH];
int parser_top;
// The handle for the current buffer.
const upb_bufhandle *handle;
// Accumulate buffer. See details in parser.rl.
const char *accumulated;
size_t accumulated_len;
char *accumulate_buf;
size_t accumulate_buf_size;
// Multi-part text data. See details in parser.rl.
int multipart_state;
upb_selector_t string_selector;
// Input capture. See details in parser.rl.
const char *capture;
// Intermediate result of parsing a unicode escape sequence.
uint32_t digit;
));
#endif
UPB_BEGIN_EXTERN_C
void upb_json_parser_init(upb_json_parser *p, upb_status *status);
void upb_json_parser_uninit(upb_json_parser *p);
void upb_json_parser_reset(upb_json_parser *p);
void upb_json_parser_resetoutput(upb_json_parser *p, upb_sink *output);
upb_json_parser *upb_json_parser_create(upb_env *e, upb_sink *output);
upb_bytessink *upb_json_parser_input(upb_json_parser *p);
UPB_END_EXTERN_C
@ -103,11 +58,8 @@ UPB_END_EXTERN_C
namespace upb {
namespace json {
inline Parser::Parser(Status* status) { upb_json_parser_init(this, status); }
inline Parser::~Parser() { upb_json_parser_uninit(this); }
inline void Parser::Reset() { upb_json_parser_reset(this); }
inline void Parser::ResetOutput(Sink* output) {
upb_json_parser_resetoutput(this, output);
inline Parser* Parser::Create(Environment* env, Sink* output) {
return upb_json_parser_create(env, output);
}
inline BytesSink* Parser::input() {
return upb_json_parser_input(this);

@ -31,6 +31,71 @@
#include "upb/json/parser.h"
#define UPB_JSON_MAX_DEPTH 64
typedef struct {
upb_sink sink;
// The current message in which we're parsing, and the field whose value we're
// expecting next.
const upb_msgdef *m;
const upb_fielddef *f;
// We are in a repeated-field context, ready to emit mapentries as
// submessages. This flag alters the start-of-object (open-brace) behavior to
// begin a sequence of mapentry messages rather than a single submessage.
bool is_map;
// We are in a map-entry message context. This flag is set when parsing the
// value field of a single map entry and indicates to all value-field parsers
// (subobjects, strings, numbers, and bools) that the map-entry submessage
// should end as soon as the value is parsed.
bool is_mapentry;
// If |is_map| or |is_mapentry| is true, |mapfield| refers to the parent
// message's map field that we're currently parsing. This differs from |f|
// because |f| is the field in the *current* message (i.e., the map-entry
// message itself), not the parent's field that leads to this map.
const upb_fielddef *mapfield;
} upb_jsonparser_frame;
struct upb_json_parser {
upb_env *env;
upb_byteshandler input_handler_;
upb_bytessink input_;
// Stack to track the JSON scopes we are in.
upb_jsonparser_frame stack[UPB_JSON_MAX_DEPTH];
upb_jsonparser_frame *top;
upb_jsonparser_frame *limit;
upb_status *status;
// Ragel's internal parsing stack for the parsing state machine.
int current_state;
int parser_stack[UPB_JSON_MAX_DEPTH];
int parser_top;
// The handle for the current buffer.
const upb_bufhandle *handle;
// Accumulate buffer. See details in parser.rl.
const char *accumulated;
size_t accumulated_len;
char *accumulate_buf;
size_t accumulate_buf_size;
// Multi-part text data. See details in parser.rl.
int multipart_state;
upb_selector_t string_selector;
// Input capture. See details in parser.rl.
const char *capture;
// Intermediate result of parsing a unicode escape sequence.
uint32_t digit;
};
#define PARSER_CHECK_RETURN(x) if (!(x)) return false
// Used to signal that a capture has been suspended.
@ -233,12 +298,13 @@ static void accumulate_clear(upb_json_parser *p) {
// Used internally by accumulate_append().
static bool accumulate_realloc(upb_json_parser *p, size_t need) {
size_t new_size = UPB_MAX(p->accumulate_buf_size, 128);
size_t old_size = p->accumulate_buf_size;
size_t new_size = UPB_MAX(old_size, 128);
while (new_size < need) {
new_size = saturating_multiply(new_size, 2);
}
void *mem = realloc(p->accumulate_buf, new_size);
void *mem = upb_env_realloc(p->env, p->accumulate_buf, old_size, new_size);
if (!mem) {
upb_status_seterrmsg(p->status, "Out of memory allocating buffer.");
return false;
@ -260,16 +326,14 @@ static bool accumulate_append(upb_json_parser *p, const char *buf, size_t len,
return true;
}
if (p->accumulate_buf_size - p->accumulated_len < len) {
size_t need;
if (!checked_add(p->accumulated_len, len, &need)) {
upb_status_seterrmsg(p->status, "Integer overflow.");
return false;
}
size_t need;
if (!checked_add(p->accumulated_len, len, &need)) {
upb_status_seterrmsg(p->status, "Integer overflow.");
return false;
}
if (!accumulate_realloc(p, need)) {
return false;
}
if (need > p->accumulate_buf_size && !accumulate_realloc(p, need)) {
return false;
}
if (p->accumulated != p->accumulate_buf) {
@ -508,16 +572,28 @@ static void start_number(upb_json_parser *p, const char *ptr) {
capture_begin(p, ptr);
}
static bool parse_number(upb_json_parser *p);
static bool end_number(upb_json_parser *p, const char *ptr) {
if (!capture_end(p, ptr)) {
return false;
}
return parse_number(p);
}
static bool parse_number(upb_json_parser *p) {
// strtol() and friends unfortunately do not support specifying the length of
// the input string, so we need to force a copy into a NULL-terminated buffer.
if (!multipart_text(p, "\0", 1, false)) {
return false;
}
size_t len;
const char *buf = accumulate_getptr(p, &len);
const char *myend = buf + len;
char *end;
const char *myend = buf + len - 1; // One for NULL.
char *end;
switch (upb_fielddef_type(p->top->f)) {
case UPB_TYPE_ENUM:
case UPB_TYPE_INT32: {
@ -573,10 +649,11 @@ static bool end_number(upb_json_parser *p, const char *ptr) {
}
multipart_end(p);
return true;
err:
upb_status_seterrf(p->status, "error parsing number: %.*s", buf, len);
upb_status_seterrf(p->status, "error parsing number: %s", buf);
multipart_end(p);
return false;
}
@ -591,6 +668,7 @@ static bool parser_putbool(upb_json_parser *p, bool val) {
bool ok = upb_sink_putbool(&p->top->sink, parser_getsel(p), val);
UPB_ASSERT_VAR(ok, ok);
return true;
}
@ -607,6 +685,8 @@ static bool start_stringval(upb_json_parser *p) {
upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink);
inner->m = p->top->m;
inner->f = p->top->f;
inner->is_map = false;
inner->is_mapentry = false;
p->top = inner;
if (upb_fielddef_type(p->top->f) == UPB_TYPE_STRING) {
@ -684,6 +764,7 @@ static bool end_stringval(upb_json_parser *p) {
}
multipart_end(p);
return ok;
}
@ -692,54 +773,217 @@ static void start_member(upb_json_parser *p) {
multipart_startaccum(p);
}
static bool end_member(upb_json_parser *p) {
assert(!p->top->f);
// Helper: invoked during parse_mapentry() to emit the mapentry message's key
// field based on the current contents of the accumulate buffer.
static bool parse_mapentry_key(upb_json_parser *p) {
size_t len;
const char *buf = accumulate_getptr(p, &len);
const upb_fielddef *f = upb_msgdef_ntof(p->top->m, buf, len);
// Emit the key field. We do a bit of ad-hoc parsing here because the
// parser state machine has already decided that this is a string field
// name, and we are reinterpreting it as some arbitrary key type. In
// particular, integer and bool keys are quoted, so we need to parse the
// quoted string contents here.
if (!f) {
// TODO(haberman): Ignore unknown fields if requested/configured to do so.
upb_status_seterrf(p->status, "No such field: %.*s\n", (int)len, buf);
p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_KEY);
if (p->top->f == NULL) {
upb_status_seterrmsg(p->status, "mapentry message has no key");
return false;
}
switch (upb_fielddef_type(p->top->f)) {
case UPB_TYPE_INT32:
case UPB_TYPE_INT64:
case UPB_TYPE_UINT32:
case UPB_TYPE_UINT64:
// Invoke end_number. The accum buffer has the number's text already.
if (!parse_number(p)) {
return false;
}
break;
case UPB_TYPE_BOOL:
if (len == 4 && !strncmp(buf, "true", 4)) {
if (!parser_putbool(p, true)) {
return false;
}
} else if (len == 5 && !strncmp(buf, "false", 5)) {
if (!parser_putbool(p, false)) {
return false;
}
} else {
upb_status_seterrmsg(p->status,
"Map bool key not 'true' or 'false'");
return false;
}
multipart_end(p);
break;
case UPB_TYPE_STRING:
case UPB_TYPE_BYTES: {
upb_sink subsink;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
upb_sink_startstr(&p->top->sink, sel, len, &subsink);
sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
upb_sink_putstring(&subsink, sel, buf, len, NULL);
sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
upb_sink_endstr(&subsink, sel);
multipart_end(p);
break;
}
default:
upb_status_seterrmsg(p->status, "Invalid field type for map key");
return false;
}
p->top->f = f;
multipart_end(p);
return true;
}
// Helper: emit one map entry (as a submessage in the map field sequence). This
// is invoked from end_membername(), at the end of the map entry's key string,
// with the map key in the accumulate buffer. It parses the key from that
// buffer, emits the handler calls to start the mapentry submessage (setting up
// its subframe in the process), and sets up state in the subframe so that the
// value parser (invoked next) will emit the mapentry's value field and then
// end the mapentry message.
static bool handle_mapentry(upb_json_parser *p) {
// Map entry: p->top->sink is the seq frame, so we need to start a frame
// for the mapentry itself, and then set |f| in that frame so that the map
// value field is parsed, and also set a flag to end the frame after the
// map-entry value is parsed.
if (!check_stack(p)) return false;
const upb_fielddef *mapfield = p->top->mapfield;
const upb_msgdef *mapentrymsg = upb_fielddef_msgsubdef(mapfield);
upb_jsonparser_frame *inner = p->top + 1;
p->top->f = mapfield;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
inner->m = mapentrymsg;
inner->mapfield = mapfield;
inner->is_map = false;
// Don't set this to true *yet* -- we reuse parsing handlers below to push
// the key field value to the sink, and these handlers will pop the frame
// if they see is_mapentry (when invoked by the parser state machine, they
// would have just seen the map-entry value, not key).
inner->is_mapentry = false;
p->top = inner;
// send STARTMSG in submsg frame.
upb_sink_startmsg(&p->top->sink);
parse_mapentry_key(p);
// Set up the value field to receive the map-entry value.
p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_VALUE);
p->top->is_mapentry = true; // set up to pop frame after value is parsed.
p->top->mapfield = mapfield;
if (p->top->f == NULL) {
upb_status_seterrmsg(p->status, "mapentry message has no value");
return false;
}
return true;
}
static void clear_member(upb_json_parser *p) { p->top->f = NULL; }
static bool end_membername(upb_json_parser *p) {
assert(!p->top->f);
if (p->top->is_map) {
return handle_mapentry(p);
} else {
size_t len;
const char *buf = accumulate_getptr(p, &len);
const upb_fielddef *f = upb_msgdef_ntof(p->top->m, buf, len);
if (!f) {
// TODO(haberman): Ignore unknown fields if requested/configured to do so.
upb_status_seterrf(p->status, "No such field: %.*s\n", (int)len, buf);
return false;
}
p->top->f = f;
multipart_end(p);
return true;
}
}
static void end_member(upb_json_parser *p) {
// If we just parsed a map-entry value, end that frame too.
if (p->top->is_mapentry) {
assert(p->top > p->stack);
// send ENDMSG on submsg.
upb_status s = UPB_STATUS_INIT;
upb_sink_endmsg(&p->top->sink, &s);
const upb_fielddef* mapfield = p->top->mapfield;
// send ENDSUBMSG in repeated-field-of-mapentries frame.
p->top--;
upb_selector_t sel;
bool ok = upb_handlers_getselector(mapfield,
UPB_HANDLER_ENDSUBMSG, &sel);
UPB_ASSERT_VAR(ok, ok);
upb_sink_endsubmsg(&p->top->sink, sel);
}
p->top->f = NULL;
}
static bool start_subobject(upb_json_parser *p) {
assert(p->top->f);
if (!upb_fielddef_issubmsg(p->top->f)) {
if (upb_fielddef_ismap(p->top->f)) {
// Beginning of a map. Start a new parser frame in a repeated-field
// context.
if (!check_stack(p)) return false;
upb_jsonparser_frame *inner = p->top + 1;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
upb_sink_startseq(&p->top->sink, sel, &inner->sink);
inner->m = upb_fielddef_msgsubdef(p->top->f);
inner->mapfield = p->top->f;
inner->f = NULL;
inner->is_map = true;
inner->is_mapentry = false;
p->top = inner;
return true;
} else if (upb_fielddef_issubmsg(p->top->f)) {
// Beginning of a subobject. Start a new parser frame in the submsg
// context.
if (!check_stack(p)) return false;
upb_jsonparser_frame *inner = p->top + 1;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
inner->m = upb_fielddef_msgsubdef(p->top->f);
inner->f = NULL;
inner->is_map = false;
inner->is_mapentry = false;
p->top = inner;
return true;
} else {
upb_status_seterrf(p->status,
"Object specified for non-message/group field: %s",
upb_fielddef_name(p->top->f));
return false;
}
if (!check_stack(p)) return false;
upb_jsonparser_frame *inner = p->top + 1;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
inner->m = upb_fielddef_msgsubdef(p->top->f);
inner->f = NULL;
p->top = inner;
return true;
}
static void end_subobject(upb_json_parser *p) {
p->top--;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG);
upb_sink_endsubmsg(&p->top->sink, sel);
if (p->top->is_map) {
p->top--;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
upb_sink_endseq(&p->top->sink, sel);
} else {
p->top--;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG);
upb_sink_endsubmsg(&p->top->sink, sel);
}
}
static bool start_array(upb_json_parser *p) {
@ -759,6 +1003,8 @@ static bool start_array(upb_json_parser *p) {
upb_sink_startseq(&p->top->sink, sel, &inner->sink);
inner->m = p->top->m;
inner->f = p->top->f;
inner->is_map = false;
inner->is_mapentry = false;
p->top = inner;
return true;
@ -773,12 +1019,16 @@ static void end_array(upb_json_parser *p) {
}
static void start_object(upb_json_parser *p) {
upb_sink_startmsg(&p->top->sink);
if (!p->top->is_map) {
upb_sink_startmsg(&p->top->sink);
}
}
static void end_object(upb_json_parser *p) {
upb_status status;
upb_sink_endmsg(&p->top->sink, &status);
if (!p->top->is_map) {
upb_status status;
upb_sink_endmsg(&p->top->sink, &status);
}
}
@ -850,10 +1100,10 @@ static void end_object(upb_json_parser *p) {
ws
string
>{ start_member(parser); }
@{ CHECK_RETURN_TOP(end_member(parser)); }
@{ CHECK_RETURN_TOP(end_membername(parser)); }
ws ":" ws
value2
%{ clear_member(parser); }
%{ end_member(parser); }
ws;
object =
@ -941,28 +1191,11 @@ bool end(void *closure, const void *hd) {
return true;
}
/* Public API *****************************************************************/
void upb_json_parser_init(upb_json_parser *p, upb_status *status) {
p->limit = p->stack + UPB_JSON_MAX_DEPTH;
p->accumulate_buf = NULL;
p->accumulate_buf_size = 0;
upb_byteshandler_init(&p->input_handler_);
upb_byteshandler_setstring(&p->input_handler_, parse, NULL);
upb_byteshandler_setendstr(&p->input_handler_, end, NULL);
upb_bytessink_reset(&p->input_, &p->input_handler_, p);
p->status = status;
}
void upb_json_parser_uninit(upb_json_parser *p) {
upb_byteshandler_uninit(&p->input_handler_);
free(p->accumulate_buf);
}
void upb_json_parser_reset(upb_json_parser *p) {
static void json_parser_reset(upb_json_parser *p) {
p->top = p->stack;
p->top->f = NULL;
p->top->is_map = false;
p->top->is_mapentry = false;
int cs;
int top;
@ -973,13 +1206,36 @@ void upb_json_parser_reset(upb_json_parser *p) {
accumulate_clear(p);
p->multipart_state = MULTIPART_INACTIVE;
p->capture = NULL;
p->accumulated = NULL;
}
void upb_json_parser_resetoutput(upb_json_parser *p, upb_sink *sink) {
upb_json_parser_reset(p);
upb_sink_reset(&p->top->sink, sink->handlers, sink->closure);
p->top->m = upb_handlers_msgdef(sink->handlers);
p->accumulated = NULL;
/* Public API *****************************************************************/
upb_json_parser *upb_json_parser_create(upb_env *env, upb_sink *output) {
#ifndef NDEBUG
const size_t size_before = upb_env_bytesallocated(env);
#endif
upb_json_parser *p = upb_env_malloc(env, sizeof(upb_json_parser));
if (!p) return false;
p->env = env;
p->limit = p->stack + UPB_JSON_MAX_DEPTH;
p->accumulate_buf = NULL;
p->accumulate_buf_size = 0;
upb_byteshandler_init(&p->input_handler_);
upb_byteshandler_setstring(&p->input_handler_, parse, NULL);
upb_byteshandler_setendstr(&p->input_handler_, end, NULL);
upb_bytessink_reset(&p->input_, &p->input_handler_, p);
json_parser_reset(p);
upb_sink_reset(&p->top->sink, output->handlers, output->closure);
p->top->m = upb_handlers_msgdef(output->handlers);
// If this fails, uncomment and increase the value in parser.h.
// fprintf(stderr, "%zd\n", upb_env_bytesallocated(env) - size_before);
assert(upb_env_bytesallocated(env) - size_before <= UPB_JSON_PARSER_SIZE);
return p;
}
upb_bytessink *upb_json_parser_input(upb_json_parser *p) {

@ -15,6 +15,27 @@
#include <string.h>
#include <stdint.h>
struct upb_json_printer {
upb_sink input_;
// BytesSink closure.
void *subc_;
upb_bytessink *output_;
// We track the depth so that we know when to emit startstr/endstr on the
// output.
int depth_;
// Have we emitted the first element? This state is necessary to emit commas
// without leaving a trailing comma in arrays/maps. We keep this state per
// frame depth.
//
// Why max_depth * 2? UPB_MAX_HANDLER_DEPTH counts depth as nested messages.
// We count frames (contexts in which we separate elements by commas) as both
// repeated fields and messages (maps), and the worst case is a
// message->repeated field->submessage->repeated field->... nesting.
bool first_elem_[UPB_MAX_HANDLER_DEPTH * 2];
};
// StringPiece; a pointer plus a length.
typedef struct {
const char *ptr;
@ -182,13 +203,23 @@ static bool putkey(void *closure, const void *handler_data) {
return true; \
} \
static bool repeated_##type(void *closure, const void *handler_data, \
type val) { \
type val) { \
upb_json_printer *p = closure; \
print_comma(p); \
CHK(put##type(closure, handler_data, val)); \
return true; \
}
#define TYPE_HANDLERS_MAPKEY(type, fmt_func) \
static bool putmapkey_##type(void *closure, const void *handler_data, \
type val) { \
upb_json_printer *p = closure; \
print_data(p, "\"", 1); \
CHK(put##type(closure, handler_data, val)); \
print_data(p, "\":", 2); \
return true; \
}
TYPE_HANDLERS(double, fmt_double);
TYPE_HANDLERS(float, fmt_float);
TYPE_HANDLERS(bool, fmt_bool);
@ -197,7 +228,15 @@ TYPE_HANDLERS(uint32_t, fmt_int64);
TYPE_HANDLERS(int64_t, fmt_int64);
TYPE_HANDLERS(uint64_t, fmt_uint64);
// double and float are not allowed to be map keys.
TYPE_HANDLERS_MAPKEY(bool, fmt_bool);
TYPE_HANDLERS_MAPKEY(int32_t, fmt_int64);
TYPE_HANDLERS_MAPKEY(uint32_t, fmt_int64);
TYPE_HANDLERS_MAPKEY(int64_t, fmt_int64);
TYPE_HANDLERS_MAPKEY(uint64_t, fmt_uint64);
#undef TYPE_HANDLERS
#undef TYPE_HANDLERS_MAPKEY
typedef struct {
void *keyname;
@ -222,20 +261,36 @@ static bool scalar_enum(void *closure, const void *handler_data,
return true;
}
static bool repeated_enum(void *closure, const void *handler_data,
int32_t val) {
const EnumHandlerData *hd = handler_data;
upb_json_printer *p = closure;
print_comma(p);
const char *symbolic_name = upb_enumdef_iton(hd->enumdef, val);
static void print_enum_symbolic_name(upb_json_printer *p,
const upb_enumdef *def,
int32_t val) {
const char *symbolic_name = upb_enumdef_iton(def, val);
if (symbolic_name) {
print_data(p, "\"", 1);
putstring(p, symbolic_name, strlen(symbolic_name));
print_data(p, "\"", 1);
} else {
putint32_t(closure, NULL, val);
putint32_t(p, NULL, val);
}
}
static bool repeated_enum(void *closure, const void *handler_data,
int32_t val) {
const EnumHandlerData *hd = handler_data;
upb_json_printer *p = closure;
print_comma(p);
print_enum_symbolic_name(p, hd->enumdef, val);
return true;
}
static bool mapvalue_enum(void *closure, const void *handler_data,
int32_t val) {
const EnumHandlerData *hd = handler_data;
upb_json_printer *p = closure;
print_enum_symbolic_name(p, hd->enumdef, val);
return true;
}
@ -251,25 +306,35 @@ static void *repeated_startsubmsg(void *closure, const void *handler_data) {
return closure;
}
static bool startmap(void *closure, const void *handler_data) {
static void start_frame(upb_json_printer *p) {
p->depth_++;
p->first_elem_[p->depth_] = true;
print_data(p, "{", 1);
}
static void end_frame(upb_json_printer *p) {
print_data(p, "}", 1);
p->depth_--;
}
static bool printer_startmsg(void *closure, const void *handler_data) {
UPB_UNUSED(handler_data);
upb_json_printer *p = closure;
if (p->depth_++ == 0) {
if (p->depth_ == 0) {
upb_bytessink_start(p->output_, 0, &p->subc_);
}
p->first_elem_[p->depth_] = true;
print_data(p, "{", 1);
start_frame(p);
return true;
}
static bool endmap(void *closure, const void *handler_data, upb_status *s) {
static bool printer_endmsg(void *closure, const void *handler_data, upb_status *s) {
UPB_UNUSED(handler_data);
UPB_UNUSED(s);
upb_json_printer *p = closure;
if (--p->depth_ == 0) {
end_frame(p);
if (p->depth_ == 0) {
upb_bytessink_end(p->output_);
}
print_data(p, "}", 1);
return true;
}
@ -290,6 +355,23 @@ static bool endseq(void *closure, const void *handler_data) {
return true;
}
static void *startmap(void *closure, const void *handler_data) {
upb_json_printer *p = closure;
CHK(putkey(closure, handler_data));
p->depth_++;
p->first_elem_[p->depth_] = true;
print_data(p, "{", 1);
return closure;
}
static bool endmap(void *closure, const void *handler_data) {
UPB_UNUSED(handler_data);
upb_json_printer *p = closure;
print_data(p, "}", 1);
p->depth_--;
return true;
}
static size_t putstr(void *closure, const void *handler_data, const char *str,
size_t len, const upb_bufhandle *handle) {
UPB_UNUSED(handler_data);
@ -404,6 +486,36 @@ static bool repeated_endstr(void *closure, const void *handler_data) {
return true;
}
static void *mapkeyval_startstr(void *closure, const void *handler_data,
size_t size_hint) {
UPB_UNUSED(handler_data);
UPB_UNUSED(size_hint);
upb_json_printer *p = closure;
print_data(p, "\"", 1);
return p;
}
static size_t mapkey_str(void *closure, const void *handler_data,
const char *str, size_t len,
const upb_bufhandle *handle) {
CHK(putstr(closure, handler_data, str, len, handle));
return len;
}
static bool mapkey_endstr(void *closure, const void *handler_data) {
UPB_UNUSED(handler_data);
upb_json_printer *p = closure;
print_data(p, "\":", 2);
return true;
}
static bool mapvalue_endstr(void *closure, const void *handler_data) {
UPB_UNUSED(handler_data);
upb_json_printer *p = closure;
print_data(p, "\"", 1);
return true;
}
static size_t scalar_bytes(void *closure, const void *handler_data,
const char *str, size_t len,
const upb_bufhandle *handle) {
@ -421,31 +533,161 @@ static size_t repeated_bytes(void *closure, const void *handler_data,
return len;
}
void printer_sethandlers(const void *closure, upb_handlers *h) {
static size_t mapkey_bytes(void *closure, const void *handler_data,
const char *str, size_t len,
const upb_bufhandle *handle) {
upb_json_printer *p = closure;
CHK(putbytes(closure, handler_data, str, len, handle));
print_data(p, ":", 1);
return len;
}
static void set_enum_hd(upb_handlers *h,
const upb_fielddef *f,
upb_handlerattr *attr) {
EnumHandlerData *hd = malloc(sizeof(EnumHandlerData));
hd->enumdef = (const upb_enumdef *)upb_fielddef_subdef(f);
hd->keyname = newstrpc(h, f);
upb_handlers_addcleanup(h, hd, free);
upb_handlerattr_sethandlerdata(attr, hd);
}
// Set up handlers for a mapentry submessage (i.e., an individual key/value pair
// in a map).
//
// TODO: Handle missing key, missing value, out-of-order key/value, or repeated
// key or value cases properly. The right way to do this is to allocate a
// temporary structure at the start of a mapentry submessage, store key and
// value data in it as key and value handlers are called, and then print the
// key/value pair once at the end of the submessage. If we don't do this, we
// should at least detect the case and throw an error. However, so far all of
// our sources that emit mapentry messages do so canonically (with one key
// field, and then one value field), so this is not a pressing concern at the
// moment.
void printer_sethandlers_mapentry(const void *closure, upb_handlers *h) {
UPB_UNUSED(closure);
const upb_msgdef *md = upb_handlers_msgdef(h);
// A mapentry message is printed simply as '"key": value'. Rather than
// special-case key and value for every type below, we just handle both
// fields explicitly here.
const upb_fielddef* key_field = upb_msgdef_itof(md, UPB_MAPENTRY_KEY);
const upb_fielddef* value_field = upb_msgdef_itof(md, UPB_MAPENTRY_VALUE);
upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER;
switch (upb_fielddef_type(key_field)) {
case UPB_TYPE_INT32:
upb_handlers_setint32(h, key_field, putmapkey_int32_t, &empty_attr);
break;
case UPB_TYPE_INT64:
upb_handlers_setint64(h, key_field, putmapkey_int64_t, &empty_attr);
break;
case UPB_TYPE_UINT32:
upb_handlers_setuint32(h, key_field, putmapkey_uint32_t, &empty_attr);
break;
case UPB_TYPE_UINT64:
upb_handlers_setuint64(h, key_field, putmapkey_uint64_t, &empty_attr);
break;
case UPB_TYPE_BOOL:
upb_handlers_setbool(h, key_field, putmapkey_bool, &empty_attr);
break;
case UPB_TYPE_STRING:
upb_handlers_setstartstr(h, key_field, mapkeyval_startstr, &empty_attr);
upb_handlers_setstring(h, key_field, mapkey_str, &empty_attr);
upb_handlers_setendstr(h, key_field, mapkey_endstr, &empty_attr);
break;
case UPB_TYPE_BYTES:
upb_handlers_setstring(h, key_field, mapkey_bytes, &empty_attr);
break;
default:
assert(false);
break;
}
switch (upb_fielddef_type(value_field)) {
case UPB_TYPE_INT32:
upb_handlers_setint32(h, value_field, putint32_t, &empty_attr);
break;
case UPB_TYPE_INT64:
upb_handlers_setint64(h, value_field, putint64_t, &empty_attr);
break;
case UPB_TYPE_UINT32:
upb_handlers_setuint32(h, value_field, putuint32_t, &empty_attr);
break;
case UPB_TYPE_UINT64:
upb_handlers_setuint64(h, value_field, putuint64_t, &empty_attr);
break;
case UPB_TYPE_BOOL:
upb_handlers_setbool(h, value_field, putbool, &empty_attr);
break;
case UPB_TYPE_FLOAT:
upb_handlers_setfloat(h, value_field, putfloat, &empty_attr);
break;
case UPB_TYPE_DOUBLE:
upb_handlers_setdouble(h, value_field, putdouble, &empty_attr);
break;
case UPB_TYPE_STRING:
upb_handlers_setstartstr(h, value_field, mapkeyval_startstr, &empty_attr);
upb_handlers_setstring(h, value_field, putstr, &empty_attr);
upb_handlers_setendstr(h, value_field, mapvalue_endstr, &empty_attr);
break;
case UPB_TYPE_BYTES:
upb_handlers_setstring(h, value_field, putbytes, &empty_attr);
break;
case UPB_TYPE_ENUM: {
upb_handlerattr enum_attr = UPB_HANDLERATTR_INITIALIZER;
set_enum_hd(h, value_field, &enum_attr);
upb_handlers_setint32(h, value_field, mapvalue_enum, &enum_attr);
upb_handlerattr_uninit(&enum_attr);
break;
}
case UPB_TYPE_MESSAGE:
// No handler necessary -- the submsg handlers will print the message
// as appropriate.
break;
}
upb_handlerattr_uninit(&empty_attr);
}
void printer_sethandlers(const void *closure, upb_handlers *h) {
UPB_UNUSED(closure);
const upb_msgdef *md = upb_handlers_msgdef(h);
bool is_mapentry = upb_msgdef_mapentry(md);
upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER;
upb_handlers_setstartmsg(h, startmap, &empty_attr);
upb_handlers_setendmsg(h, endmap, &empty_attr);
#define TYPE(type, name, ctype) \
case type: \
if (upb_fielddef_isseq(f)) { \
upb_handlers_set##name(h, f, repeated_##ctype, &empty_attr); \
} else { \
upb_handlers_set##name(h, f, scalar_##ctype, &name_attr); \
} \
if (is_mapentry) {
// mapentry messages are sufficiently different that we handle them
// separately.
printer_sethandlers_mapentry(closure, h);
return;
}
upb_handlers_setstartmsg(h, printer_startmsg, &empty_attr);
upb_handlers_setendmsg(h, printer_endmsg, &empty_attr);
#define TYPE(type, name, ctype) \
case type: \
if (upb_fielddef_isseq(f)) { \
upb_handlers_set##name(h, f, repeated_##ctype, &empty_attr); \
} else { \
upb_handlers_set##name(h, f, scalar_##ctype, &name_attr); \
} \
break;
upb_msg_iter i;
upb_msg_begin(&i, upb_handlers_msgdef(h));
for(; !upb_msg_done(&i); upb_msg_next(&i)) {
upb_msg_field_iter i;
upb_msg_field_begin(&i, md);
for(; !upb_msg_field_done(&i); upb_msg_field_next(&i)) {
const upb_fielddef *f = upb_msg_iter_field(&i);
upb_handlerattr name_attr = UPB_HANDLERATTR_INITIALIZER;
upb_handlerattr_sethandlerdata(&name_attr, newstrpc(h, f));
if (upb_fielddef_isseq(f)) {
if (upb_fielddef_ismap(f)) {
upb_handlers_setstartseq(h, f, startmap, &name_attr);
upb_handlers_setendseq(h, f, endmap, &name_attr);
} else if (upb_fielddef_isseq(f)) {
upb_handlers_setstartseq(h, f, startseq, &name_attr);
upb_handlers_setendseq(h, f, endseq, &empty_attr);
}
@ -462,12 +704,8 @@ void printer_sethandlers(const void *closure, upb_handlers *h) {
// For now, we always emit symbolic names for enums. We may want an
// option later to control this behavior, but we will wait for a real
// need first.
EnumHandlerData *hd = malloc(sizeof(EnumHandlerData));
hd->enumdef = (const upb_enumdef *)upb_fielddef_subdef(f);
hd->keyname = newstrpc(h, f);
upb_handlers_addcleanup(h, hd, free);
upb_handlerattr enum_attr = UPB_HANDLERATTR_INITIALIZER;
upb_handlerattr_sethandlerdata(&enum_attr, hd);
set_enum_hd(h, f, &enum_attr);
if (upb_fielddef_isseq(f)) {
upb_handlers_setint32(h, f, repeated_enum, &enum_attr);
@ -514,25 +752,29 @@ void printer_sethandlers(const void *closure, upb_handlers *h) {
#undef TYPE
}
/* Public API *****************************************************************/
void upb_json_printer_init(upb_json_printer *p, const upb_handlers *h) {
p->output_ = NULL;
static void json_printer_reset(upb_json_printer *p) {
p->depth_ = 0;
upb_sink_reset(&p->input_, h, p);
}
void upb_json_printer_uninit(upb_json_printer *p) {
UPB_UNUSED(p);
}
void upb_json_printer_reset(upb_json_printer *p) {
p->depth_ = 0;
}
/* Public API *****************************************************************/
upb_json_printer *upb_json_printer_create(upb_env *e, const upb_handlers *h,
upb_bytessink *output) {
#ifndef NDEBUG
size_t size_before = upb_env_bytesallocated(e);
#endif
upb_json_printer *p = upb_env_malloc(e, sizeof(upb_json_printer));
if (!p) return NULL;
void upb_json_printer_resetoutput(upb_json_printer *p, upb_bytessink *output) {
upb_json_printer_reset(p);
p->output_ = output;
json_printer_reset(p);
upb_sink_reset(&p->input_, h, p);
// If this fails, increase the value in printer.h.
assert(upb_env_bytesallocated(e) - size_before <= UPB_JSON_PRINTER_SIZE);
return p;
}
upb_sink *upb_json_printer_input(upb_json_printer *p) {

@ -11,6 +11,7 @@
#ifndef UPB_JSON_TYPED_PRINTER_H_
#define UPB_JSON_TYPED_PRINTER_H_
#include "upb/env.h"
#include "upb/sink.h"
#ifdef __cplusplus
@ -26,71 +27,48 @@ UPB_DECLARE_TYPE(upb::json::Printer, upb_json_printer);
/* upb::json::Printer *********************************************************/
// Prints an incoming stream of data to a BytesSink in JSON format.
UPB_DEFINE_CLASS0(upb::json::Printer,
public:
Printer(const upb::Handlers* handlers);
~Printer();
#define UPB_JSON_PRINTER_SIZE 168
// Resets the state of the printer, so that it will expect to begin a new
// document.
void Reset();
#ifdef __cplusplus
// Resets the output pointer which will serve as our closure. Implies
// Reset().
void ResetOutput(BytesSink* output);
// Prints an incoming stream of data to a BytesSink in JSON format.
class upb::json::Printer {
public:
static Printer* Create(Environment* env, const upb::Handlers* handlers,
BytesSink* output);
// The input to the printer.
Sink* input();
// Returns handlers for printing according to the specified schema.
static reffed_ptr<const Handlers> NewHandlers(const upb::MessageDef* md);
,
UPB_DEFINE_STRUCT0(upb_json_printer,
upb_sink input_;
// BytesSink closure.
void *subc_;
upb_bytessink *output_;
// We track the depth so that we know when to emit startstr/endstr on the
// output.
int depth_;
// Have we emitted the first element? This state is necessary to emit commas
// without leaving a trailing comma in arrays/maps. We keep this state per
// frame depth.
//
// Why max_depth * 2? UPB_MAX_HANDLER_DEPTH counts depth as nested messages.
// We count frames (contexts in which we separate elements by commas) as both
// repeated fields and messages (maps), and the worst case is a
// message->repeated field->submessage->repeated field->... nesting.
bool first_elem_[UPB_MAX_HANDLER_DEPTH * 2];
));
UPB_BEGIN_EXTERN_C // {
// Native C API.
static const size_t kSize = UPB_JSON_PRINTER_SIZE;
void upb_json_printer_init(upb_json_printer *p, const upb_handlers *h);
void upb_json_printer_uninit(upb_json_printer *p);
void upb_json_printer_reset(upb_json_printer *p);
void upb_json_printer_resetoutput(upb_json_printer *p, upb_bytessink *output);
private:
UPB_DISALLOW_POD_OPS(Printer, upb::json::Printer);
};
#endif
UPB_BEGIN_EXTERN_C
// Native C API.
upb_json_printer *upb_json_printer_create(upb_env *e, const upb_handlers *h,
upb_bytessink *output);
upb_sink *upb_json_printer_input(upb_json_printer *p);
const upb_handlers *upb_json_printer_newhandlers(const upb_msgdef *md,
const void *owner);
UPB_END_EXTERN_C // }
UPB_END_EXTERN_C
#ifdef __cplusplus
namespace upb {
namespace json {
inline Printer::Printer(const upb::Handlers* handlers) {
upb_json_printer_init(this, handlers);
}
inline Printer::~Printer() { upb_json_printer_uninit(this); }
inline void Printer::Reset() { upb_json_printer_reset(this); }
inline void Printer::ResetOutput(BytesSink* output) {
upb_json_printer_resetoutput(this, output);
inline Printer* Printer::Create(Environment* env, const upb::Handlers* handlers,
BytesSink* output) {
return upb_json_printer_create(env, handlers, output);
}
inline Sink* Printer::input() { return upb_json_printer_input(this); }
inline reffed_ptr<const Handlers> Printer::NewHandlers(

@ -64,7 +64,6 @@ mgroup *newgroup(const void *owner) {
static void freemethod(upb_refcounted *r) {
upb_pbdecodermethod *method = (upb_pbdecodermethod*)r;
upb_byteshandler_uninit(&method->input_handler_);
if (method->dest_handlers_) {
upb_handlers_unref(method->dest_handlers_, method);
@ -762,8 +761,10 @@ static void compile_method(compiler *c, upb_pbdecodermethod *method) {
putsel(c, OP_STARTMSG, UPB_STARTMSG_SELECTOR, h);
label(c, LABEL_FIELD);
uint32_t* start_pc = c->pc;
upb_msg_iter i;
for(upb_msg_begin(&i, md); !upb_msg_done(&i); upb_msg_next(&i)) {
upb_msg_field_iter i;
for(upb_msg_field_begin(&i, md);
!upb_msg_field_done(&i);
upb_msg_field_next(&i)) {
const upb_fielddef *f = upb_msg_iter_field(&i);
upb_fieldtype_t type = upb_fielddef_type(f);
@ -813,9 +814,11 @@ static void find_methods(compiler *c, const upb_handlers *h) {
newmethod(h, c->group);
// Find submethods.
upb_msg_iter i;
upb_msg_field_iter i;
const upb_msgdef *md = upb_handlers_msgdef(h);
for(upb_msg_begin(&i, md); !upb_msg_done(&i); upb_msg_next(&i)) {
for(upb_msg_field_begin(&i, md);
!upb_msg_field_done(&i);
upb_msg_field_next(&i)) {
const upb_fielddef *f = upb_msg_iter_field(&i);
const upb_handlers *sub_h;
if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE &&
@ -857,7 +860,7 @@ static void set_bytecode_handlers(mgroup *g) {
}
/* JIT setup. ******************************************************************/
/* JIT setup. *****************************************************************/
#ifdef UPB_USE_JIT_X64

@ -242,7 +242,7 @@ static void emit_static_asm(jitcompiler *jc) {
|
|2:
| // Resume decoder.
| lea ARG2_64, DECODER->callstack
| mov ARG2_64, DECODER->callstack
| sub rsp, ARG3_64
| mov ARG1_64, rsp
| callp memcpy // Restore stack.
@ -255,7 +255,7 @@ static void emit_static_asm(jitcompiler *jc) {
asmlabel(jc, "exitjit");
|->exitjit:
| // Save the stack into DECODER->callstack.
| lea ARG1_64, DECODER->callstack
| mov ARG1_64, DECODER->callstack
| mov ARG2_64, rsp
| mov ARG3_64, DECODER->saved_rsp
| sub ARG3_64, rsp
@ -300,11 +300,11 @@ static void emit_static_asm(jitcompiler *jc) {
| sub rcx, rdx
| jb ->err // Len is greater than enclosing message.
| mov FRAME->end_ofs, rcx
| cmp FRAME, DECODER->limit
| je >3 // Stack overflow
| add FRAME, sizeof(upb_pbdecoder_frame)
| mov DELIMEND, PTR
| add DELIMEND, rdx
| cmp FRAME, DECODER->limit
| je >3 // Stack overflow
| mov dword FRAME->groupnum, 0
| test rcx, rcx
| jz >2
@ -1071,9 +1071,9 @@ static void jitbytecode(jitcompiler *jc) {
| // code with the packed code-path. If this is changed later, this
| // store can be removed.
| mov qword FRAME->end_ofs, 0
| add FRAME, sizeof(upb_pbdecoder_frame)
| cmp FRAME, DECODER->limit
| je ->err
| add FRAME, sizeof(upb_pbdecoder_frame)
| mov dword FRAME->groupnum, arg
break;
case OP_PUSHLENDELIM:

@ -28,8 +28,8 @@ static const unsigned char upb_jit_actionlist[2162] = {
73,139,159,233,77,139,167,233,77,139,174,233,73,139,174,233,73,43,175,233,
73,3,175,233,73,139,151,233,72,133,210,15,133,244,248,252,255,208,73,139,
135,233,73,199,135,233,0,0,0,0,248,1,255,91,65,92,65,93,65,94,65,95,93,195,
248,2,73,141,183,233,72,41,212,72,137,231,72,184,237,237,65,84,73,137,228,
72,129,228,239,252,255,208,76,137,228,65,92,195,255,248,11,73,141,191,233,
248,2,73,139,183,233,72,41,212,72,137,231,72,184,237,237,65,84,73,137,228,
72,129,228,239,252,255,208,76,137,228,65,92,195,255,248,11,73,139,191,233,
72,137,230,73,139,151,233,72,41,226,73,137,151,233,137,195,72,184,237,237,
65,84,73,137,228,72,129,228,239,252,255,208,76,137,228,65,92,137,216,73,139,
167,233,91,65,92,65,93,65,94,65,95,93,195,255,248,12,73,57,159,233,15,132,
@ -40,7 +40,7 @@ static const unsigned char upb_jit_actionlist[2162] = {
255,76,57,227,15,132,244,253,255,76,137,225,72,41,217,72,131,252,249,1,15,
130,244,253,255,15,182,19,132,210,15,137,244,254,248,7,232,244,14,248,8,72,
131,195,1,72,137,252,233,72,41,217,72,41,209,15,130,244,15,73,137,142,233,
73,129,198,239,72,137,221,72,1,213,77,59,183,233,15,132,244,249,65,199,134,
77,59,183,233,15,132,244,249,73,129,198,239,72,137,221,72,1,213,65,199,134,
233,0,0,0,0,72,133,201,15,132,244,248,77,139,167,233,72,57,252,235,15,135,
244,248,76,57,229,15,135,244,248,255,73,137,252,236,248,2,195,248,3,73,139,
159,233,76,137,252,255,255,72,190,237,237,255,190,237,255,49,252,246,255,
@ -122,8 +122,8 @@ static const unsigned char upb_jit_actionlist[2162] = {
1,248,2,255,72,137,218,76,137,225,72,41,217,77,139,135,233,72,184,237,237,
65,84,73,137,228,72,129,228,239,252,255,208,76,137,228,65,92,72,1,195,255,
76,57,227,15,132,244,249,232,244,29,248,3,255,76,137,227,255,72,57,252,235,
15,133,244,1,248,4,255,77,137,174,233,73,199,134,233,0,0,0,0,73,129,198,239,
77,59,183,233,15,132,244,15,65,199,134,233,237,255,232,244,13,255,73,129,
15,133,244,1,248,4,255,77,137,174,233,73,199,134,233,0,0,0,0,77,59,183,233,
15,132,244,15,73,129,198,239,65,199,134,233,237,255,232,244,13,255,73,129,
252,238,239,77,139,174,233,255,77,139,167,233,73,3,174,233,73,59,175,233,
15,130,244,247,76,57,229,15,135,244,247,73,137,252,236,248,1,255,72,57,221,
15,132,245,255,232,245,255,248,9,72,131,196,8,195,255
@ -419,7 +419,7 @@ static void emit_static_asm(jitcompiler *jc) {
//|
//|2:
//| // Resume decoder.
//| lea ARG2_64, DECODER->callstack
//| mov ARG2_64, DECODER->callstack
//| sub rsp, ARG3_64
//| mov ARG1_64, rsp
//| callp memcpy // Restore stack.
@ -434,7 +434,7 @@ static void emit_static_asm(jitcompiler *jc) {
asmlabel(jc, "exitjit");
//|->exitjit:
//| // Save the stack into DECODER->callstack.
//| lea ARG1_64, DECODER->callstack
//| mov ARG1_64, DECODER->callstack
//| mov ARG2_64, rsp
//| mov ARG3_64, DECODER->saved_rsp
//| sub ARG3_64, rsp
@ -490,11 +490,11 @@ static void emit_static_asm(jitcompiler *jc) {
//| sub rcx, rdx
//| jb ->err // Len is greater than enclosing message.
//| mov FRAME->end_ofs, rcx
//| cmp FRAME, DECODER->limit
//| je >3 // Stack overflow
//| add FRAME, sizeof(upb_pbdecoder_frame)
//| mov DELIMEND, PTR
//| add DELIMEND, rdx
//| cmp FRAME, DECODER->limit
//| je >3 // Stack overflow
//| mov dword FRAME->groupnum, 0
//| test rcx, rcx
//| jz >2
@ -504,7 +504,7 @@ static void emit_static_asm(jitcompiler *jc) {
//| cmp DELIMEND, DATAEND
//| ja >2
//| mov DATAEND, DELIMEND // If DELIMEND >= PTR && DELIMEND < DATAEND
dasm_put(Dst, 337, Dt1(->end_ofs), sizeof(upb_pbdecoder_frame), Dt2(->limit), Dt1(->groupnum), Dt2(->end));
dasm_put(Dst, 337, Dt1(->end_ofs), Dt2(->limit), sizeof(upb_pbdecoder_frame), Dt1(->groupnum), Dt2(->end));
# 317 "upb/pb/compile_decoder_x64.dasc"
//|2:
//| ret
@ -1609,11 +1609,11 @@ static void jitbytecode(jitcompiler *jc) {
//| // code with the packed code-path. If this is changed later, this
//| // store can be removed.
//| mov qword FRAME->end_ofs, 0
//| add FRAME, sizeof(upb_pbdecoder_frame)
//| cmp FRAME, DECODER->limit
//| je ->err
//| add FRAME, sizeof(upb_pbdecoder_frame)
//| mov dword FRAME->groupnum, arg
dasm_put(Dst, 2070, Dt1(->sink.closure), Dt1(->end_ofs), sizeof(upb_pbdecoder_frame), Dt2(->limit), Dt1(->groupnum), arg);
dasm_put(Dst, 2070, Dt1(->sink.closure), Dt1(->end_ofs), Dt2(->limit), sizeof(upb_pbdecoder_frame), Dt1(->groupnum), arg);
# 1078 "upb/pb/compile_decoder_x64.dasc"
break;
case OP_PUSHLENDELIM:

@ -19,10 +19,7 @@
*/
#include <inttypes.h>
#include <setjmp.h>
#include <stdarg.h>
#include <stddef.h>
#include <stdlib.h>
#include "upb/pb/decoder.int.h"
#include "upb/pb/varint.int.h"
@ -70,18 +67,17 @@ static bool consumes_input(opcode op) {
static bool in_residual_buf(const upb_pbdecoder *d, const char *p);
// It's unfortunate that we have to micro-manage the compiler this way,
// especially since this tuning is necessarily specific to one hardware
// configuration. But emperically on a Core i7, performance increases 30-50%
// with these annotations. Every instance where these appear, gcc 4.2.1 made
// the wrong decision and degraded performance in benchmarks.
#define FORCEINLINE static inline __attribute__((always_inline))
#define NOINLINE __attribute__((noinline))
// It's unfortunate that we have to micro-manage the compiler with
// UPB_FORCEINLINE and UPB_NOINLINE, especially since this tuning is necessarily
// specific to one hardware configuration. But empirically on a Core i7,
// performance increases 30-50% with these annotations. Every instance where
// these appear, gcc 4.2.1 made the wrong decision and degraded performance in
// benchmarks.
static void seterr(upb_pbdecoder *d, const char *msg) {
// TODO(haberman): encapsulate this access to pipeline->status, but not sure
// exactly what that interface should look like.
upb_status_seterrmsg(d->status, msg);
upb_status status = UPB_STATUS_INIT;
upb_status_seterrmsg(&status, msg);
upb_env_reporterror(d->env, &status);
}
void upb_pbdecoder_seterr(upb_pbdecoder *d, const char *msg) {
@ -249,7 +245,8 @@ static int32_t skip(upb_pbdecoder *d, size_t bytes) {
// Copies the next "bytes" bytes into "buf" and advances the stream.
// Requires that this many bytes are available in the current buffer.
FORCEINLINE void consumebytes(upb_pbdecoder *d, void *buf, size_t bytes) {
UPB_FORCEINLINE static void consumebytes(upb_pbdecoder *d, void *buf,
size_t bytes) {
assert(bytes <= curbufleft(d));
memcpy(buf, d->ptr, bytes);
advance(d, bytes);
@ -258,8 +255,8 @@ FORCEINLINE void consumebytes(upb_pbdecoder *d, void *buf, size_t bytes) {
// Slow path for getting the next "bytes" bytes, regardless of whether they are
// available in the current buffer or not. Returns a status code as described
// in decoder.int.h.
static NOINLINE int32_t getbytes_slow(upb_pbdecoder *d, void *buf,
size_t bytes) {
UPB_NOINLINE static int32_t getbytes_slow(upb_pbdecoder *d, void *buf,
size_t bytes) {
const size_t avail = curbufleft(d);
consumebytes(d, buf, avail);
bytes -= avail;
@ -280,7 +277,8 @@ static NOINLINE int32_t getbytes_slow(upb_pbdecoder *d, void *buf,
// Gets the next "bytes" bytes, regardless of whether they are available in the
// current buffer or not. Returns a status code as described in decoder.int.h.
FORCEINLINE int32_t getbytes(upb_pbdecoder *d, void *buf, size_t bytes) {
UPB_FORCEINLINE static int32_t getbytes(upb_pbdecoder *d, void *buf,
size_t bytes) {
if (curbufleft(d) >= bytes) {
// Buffer has enough data to satisfy.
consumebytes(d, buf, bytes);
@ -290,8 +288,8 @@ FORCEINLINE int32_t getbytes(upb_pbdecoder *d, void *buf, size_t bytes) {
}
}
static NOINLINE size_t peekbytes_slow(upb_pbdecoder *d, void *buf,
size_t bytes) {
UPB_NOINLINE static size_t peekbytes_slow(upb_pbdecoder *d, void *buf,
size_t bytes) {
size_t ret = curbufleft(d);
memcpy(buf, d->ptr, ret);
if (in_residual_buf(d, d->ptr)) {
@ -302,7 +300,8 @@ static NOINLINE size_t peekbytes_slow(upb_pbdecoder *d, void *buf,
return ret;
}
FORCEINLINE size_t peekbytes(upb_pbdecoder *d, void *buf, size_t bytes) {
UPB_FORCEINLINE static size_t peekbytes(upb_pbdecoder *d, void *buf,
size_t bytes) {
if (curbufleft(d) >= bytes) {
memcpy(buf, d->ptr, bytes);
return bytes;
@ -316,8 +315,8 @@ FORCEINLINE size_t peekbytes(upb_pbdecoder *d, void *buf, size_t bytes) {
// Slow path for decoding a varint from the current buffer position.
// Returns a status code as described in decoder.int.h.
NOINLINE int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d,
uint64_t *u64) {
UPB_NOINLINE int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d,
uint64_t *u64) {
*u64 = 0;
uint8_t byte = 0x80;
int bitpos;
@ -335,7 +334,7 @@ NOINLINE int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d,
// Decodes a varint from the current buffer position.
// Returns a status code as described in decoder.int.h.
FORCEINLINE int32_t decode_varint(upb_pbdecoder *d, uint64_t *u64) {
UPB_FORCEINLINE static int32_t decode_varint(upb_pbdecoder *d, uint64_t *u64) {
if (curbufleft(d) > 0 && !(*d->ptr & 0x80)) {
*u64 = *d->ptr;
advance(d, 1);
@ -358,7 +357,7 @@ FORCEINLINE int32_t decode_varint(upb_pbdecoder *d, uint64_t *u64) {
// Decodes a 32-bit varint from the current buffer position.
// Returns a status code as described in decoder.int.h.
FORCEINLINE int32_t decode_v32(upb_pbdecoder *d, uint32_t *u32) {
UPB_FORCEINLINE static int32_t decode_v32(upb_pbdecoder *d, uint32_t *u32) {
uint64_t u64;
int32_t ret = decode_varint(d, &u64);
if (ret >= 0) return ret;
@ -377,14 +376,14 @@ FORCEINLINE int32_t decode_v32(upb_pbdecoder *d, uint32_t *u32) {
// Decodes a fixed32 from the current buffer position.
// Returns a status code as described in decoder.int.h.
// TODO: proper byte swapping for big-endian machines.
FORCEINLINE int32_t decode_fixed32(upb_pbdecoder *d, uint32_t *u32) {
UPB_FORCEINLINE static int32_t decode_fixed32(upb_pbdecoder *d, uint32_t *u32) {
return getbytes(d, u32, 4);
}
// Decodes a fixed64 from the current buffer position.
// Returns a status code as described in decoder.int.h.
// TODO: proper byte swapping for big-endian machines.
FORCEINLINE int32_t decode_fixed64(upb_pbdecoder *d, uint64_t *u64) {
UPB_FORCEINLINE static int32_t decode_fixed64(upb_pbdecoder *d, uint64_t *u64) {
return getbytes(d, u64, 8);
}
@ -408,7 +407,7 @@ static bool decoder_push(upb_pbdecoder *d, uint64_t end) {
if (end > fr->end_ofs) {
seterr(d, "Submessage end extends past enclosing submessage.");
return false;
} else if ((fr + 1) == d->limit) {
} else if (fr == d->limit) {
seterr(d, kPbDecoderStackOverflow);
return false;
}
@ -435,8 +434,8 @@ static bool pushtagdelim(upb_pbdecoder *d, uint32_t arg) {
// Pops a frame from the decoder stack.
static void decoder_pop(upb_pbdecoder *d) { d->top--; }
NOINLINE int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d,
uint64_t expected) {
UPB_NOINLINE int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d,
uint64_t expected) {
uint64_t data = 0;
size_t bytes = upb_value_size(expected);
size_t read = peekbytes(d, &data, bytes);
@ -814,7 +813,10 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint) {
upb_pbdecoder *d = closure;
UPB_UNUSED(size_hint);
d->top->end_ofs = UINT64_MAX;
d->bufstart_ofs = 0;
d->call_len = 1;
d->callstack[0] = &halt;
d->pc = pc;
return d;
}
@ -823,6 +825,8 @@ void *upb_pbdecoder_startjit(void *closure, const void *hd, size_t size_hint) {
UPB_UNUSED(hd);
UPB_UNUSED(size_hint);
upb_pbdecoder *d = closure;
d->top->end_ofs = UINT64_MAX;
d->bufstart_ofs = 0;
d->call_len = 0;
return d;
}
@ -879,55 +883,115 @@ bool upb_pbdecoder_end(void *closure, const void *handler_data) {
return true;
}
void upb_pbdecoder_init(upb_pbdecoder *d, const upb_pbdecodermethod *m,
upb_status *s) {
d->limit = &d->stack[UPB_DECODER_MAX_NESTING];
upb_bytessink_reset(&d->input_, &m->input_handler_, d);
d->method_ = m;
d->callstack[0] = &halt;
d->status = s;
upb_pbdecoder_reset(d);
}
void upb_pbdecoder_reset(upb_pbdecoder *d) {
d->top = d->stack;
d->top->end_ofs = UINT64_MAX;
d->top->groupnum = 0;
d->bufstart_ofs = 0;
d->ptr = d->residual;
d->buf = d->residual;
d->end = d->residual;
d->residual_end = d->residual;
d->call_len = 1;
}
uint64_t upb_pbdecoder_bytesparsed(const upb_pbdecoder *d) {
return offset(d);
static size_t stacksize(upb_pbdecoder *d, size_t entries) {
UPB_UNUSED(d);
return entries * sizeof(upb_pbdecoder_frame);
}
// Not currently required, but to support outgrowing the static stack we need
// this.
void upb_pbdecoder_uninit(upb_pbdecoder *d) {
static size_t callstacksize(upb_pbdecoder *d, size_t entries) {
UPB_UNUSED(d);
}
const upb_pbdecodermethod *upb_pbdecoder_method(const upb_pbdecoder *d) {
return d->method_;
#ifdef UPB_USE_JIT_X64
if (d->method_->is_native_) {
// Each native stack frame needs two pointers, plus we need a few frames for
// the enter/exit trampolines.
size_t ret = entries * sizeof(void*) * 2;
ret += sizeof(void*) * 10;
return ret;
}
#endif
return entries * sizeof(uint32_t*);
}
bool upb_pbdecoder_resetoutput(upb_pbdecoder *d, upb_sink* sink) {
// TODO(haberman): do we need to test whether the decoder is already on the
// stack (like calling this from within a callback)? Should we support
// rebinding the output at all?
upb_pbdecoder *upb_pbdecoder_create(upb_env *e, const upb_pbdecodermethod *m,
upb_sink *sink) {
const size_t default_max_nesting = 64;
#ifndef NDEBUG
size_t size_before = upb_env_bytesallocated(e);
#endif
upb_pbdecoder *d = upb_env_malloc(e, sizeof(upb_pbdecoder));
if (!d) return NULL;
d->method_ = m;
d->callstack = upb_env_malloc(e, callstacksize(d, default_max_nesting));
d->stack = upb_env_malloc(e, stacksize(d, default_max_nesting));
if (!d->stack || !d->callstack) {
return NULL;
}
d->env = e;
d->limit = d->stack + default_max_nesting - 1;
d->stack_size = default_max_nesting;
upb_pbdecoder_reset(d);
upb_bytessink_reset(&d->input_, &m->input_handler_, d);
assert(sink);
if (d->method_->dest_handlers_) {
if (sink->handlers != d->method_->dest_handlers_)
return false;
return NULL;
}
upb_sink_reset(&d->top->sink, sink->handlers, sink->closure);
return true;
// If this fails, increase the value in decoder.h.
assert(upb_env_bytesallocated(e) - size_before <= UPB_PB_DECODER_SIZE);
return d;
}
uint64_t upb_pbdecoder_bytesparsed(const upb_pbdecoder *d) {
return offset(d);
}
const upb_pbdecodermethod *upb_pbdecoder_method(const upb_pbdecoder *d) {
return d->method_;
}
upb_bytessink *upb_pbdecoder_input(upb_pbdecoder *d) {
return &d->input_;
}
size_t upb_pbdecoder_maxnesting(const upb_pbdecoder *d) {
return d->stack_size;
}
bool upb_pbdecoder_setmaxnesting(upb_pbdecoder *d, size_t max) {
if (max < d->top - d->stack) {
// Can't set a limit smaller than what we are currently at.
return false;
}
if (max > d->stack_size) {
// Need to reallocate stack and callstack to accommodate.
size_t old_size = stacksize(d, d->stack_size);
size_t new_size = stacksize(d, max);
void *p = upb_env_realloc(d->env, d->stack, old_size, new_size);
if (!p) {
return false;
}
d->stack = p;
old_size = callstacksize(d, d->stack_size);
new_size = callstacksize(d, max);
p = upb_env_realloc(d->env, d->callstack, old_size, new_size);
if (!p) {
return false;
}
d->callstack = p;
d->stack_size = max;
}
d->limit = d->stack + max - 1;
return true;
}

@ -18,7 +18,7 @@
#ifndef UPB_DECODER_H_
#define UPB_DECODER_H_
#include "upb/table.int.h"
#include "upb/env.h"
#include "upb/sink.h"
#ifdef __cplusplus
@ -37,44 +37,6 @@ UPB_DECLARE_TYPE(upb::pb::Decoder, upb_pbdecoder);
UPB_DECLARE_TYPE(upb::pb::DecoderMethod, upb_pbdecodermethod);
UPB_DECLARE_TYPE(upb::pb::DecoderMethodOptions, upb_pbdecodermethodopts);
// The maximum that any submessages can be nested. Matches proto2's limit.
// This specifies the size of the decoder's statically-sized array and therefore
// setting it high will cause the upb::pb::Decoder object to be larger.
//
// If necessary we can add a runtime-settable property to Decoder that allow
// this to be larger than the compile-time setting, but this would add
// complexity, particularly since we would have to decide how/if to give users
// the ability to set a custom memory allocation function.
#define UPB_DECODER_MAX_NESTING 64
// Internal-only struct used by the decoder.
typedef struct {
UPB_PRIVATE_FOR_CPP
// Space optimization note: we store two pointers here that the JIT
// doesn't need at all; the upb_handlers* inside the sink and
// the dispatch table pointer. We can optimze so that the JIT uses
// smaller stack frames than the interpreter. The only thing we need
// to guarantee is that the fallback routines can find end_ofs.
upb_sink sink;
// The absolute stream offset of the end-of-frame delimiter.
// Non-delimited frames (groups and non-packed repeated fields) reuse the
// delimiter of their parent, even though the frame may not end there.
//
// NOTE: the JIT stores a slightly different value here for non-top frames.
// It stores the value relative to the end of the enclosed message. But the
// top frame is still stored the same way, which is important for ensuring
// that calls from the JIT into C work correctly.
uint64_t end_ofs;
const uint32_t *base;
// 0 indicates a length-delimited field.
// A positive number indicates a known group.
// A negative number indicates an unknown group.
int32_t groupnum;
upb_inttable *dispatch; // Not used by the JIT.
} upb_pbdecoder_frame;
// The parameters one uses to construct a DecoderMethod.
// TODO(haberman): move allowjit here? Seems more convenient for users.
UPB_DEFINE_CLASS0(upb::pb::DecoderMethodOptions,
@ -152,22 +114,31 @@ UPB_DEFINE_STRUCT(upb_pbdecodermethod, upb_refcounted,
upb_inttable dispatch;
));
// Preallocation hint: decoder won't allocate more bytes than this when first
// constructed. This hint may be an overestimate for some build configurations.
// But if the decoder library is upgraded without recompiling the application,
// it may be an underestimate.
#define UPB_PB_DECODER_SIZE 4400
#ifdef __cplusplus
// A Decoder receives binary protobuf data on its input sink and pushes the
// decoded data to its output sink.
UPB_DEFINE_CLASS0(upb::pb::Decoder,
class upb::pb::Decoder {
public:
// Constructs a decoder instance for the given method, which must outlive this
// decoder. Any errors during parsing will be set on the given status, which
// must also outlive this decoder.
Decoder(const DecoderMethod* method, Status* status);
~Decoder();
//
// The sink must match the given method.
static Decoder* Create(Environment* env, const DecoderMethod* method,
Sink* output);
// Returns the DecoderMethod this decoder is parsing from.
// TODO(haberman): Do users need to be able to rebind this?
const DecoderMethod* method() const;
// Resets the state of the decoder.
void Reset();
// The sink on which this decoder receives input.
BytesSink* input();
// Returns number of bytes successfully parsed.
//
@ -178,76 +149,25 @@ UPB_DEFINE_CLASS0(upb::pb::Decoder,
// callback.
uint64_t BytesParsed() const;
// Resets the output sink of the Decoder.
// The given sink must match method()->dest_handlers().
// Gets/sets the parsing nexting limit. If the total number of nested
// submessages and repeated fields hits this limit, parsing will fail. This
// is a resource limit that controls the amount of memory used by the parsing
// stack.
//
// This must be called at least once before the decoder can be used. It may
// only be called with the decoder is in a state where it was just created or
// reset with pipeline.Reset(). The given sink must be from the same pipeline
// as this decoder.
bool ResetOutput(Sink* sink);
// The sink on which this decoder receives input.
BytesSink* input();
private:
UPB_DISALLOW_COPY_AND_ASSIGN(Decoder);
,
UPB_DEFINE_STRUCT0(upb_pbdecoder, UPB_QUOTE(
// Our input sink.
upb_bytessink input_;
// The decoder method we are parsing with (owned).
const upb_pbdecodermethod *method_;
size_t call_len;
const uint32_t *pc, *last;
// Setting the limit will fail if the parser is currently suspended at a depth
// greater than this, or if memory allocation of the stack fails.
size_t max_nesting() const;
bool set_max_nesting(size_t max);
// Current input buffer and its stream offset.
const char *buf, *ptr, *end, *checkpoint;
// End of the delimited region, relative to ptr, or NULL if not in this buf.
const char *delim_end;
// End of the delimited region, relative to ptr, or end if not in this buf.
const char *data_end;
// Overall stream offset of "buf."
uint64_t bufstart_ofs;
// Buffer for residual bytes not parsed from the previous buffer.
// The maximum number of residual bytes we require is 12; a five-byte
// unknown tag plus an eight-byte value, less one because the value
// is only a partial value.
char residual[12];
char *residual_end;
// Stores the user buffer passed to our decode function.
const char *buf_param;
size_t size_param;
const upb_bufhandle *handle;
void Reset();
#ifdef UPB_USE_JIT_X64
// Used momentarily by the generated code to store a value while a user
// function is called.
uint32_t tmp_len;
static const size_t kSize = UPB_PB_DECODER_SIZE;
const void *saved_rsp;
#endif
private:
UPB_DISALLOW_POD_OPS(Decoder, upb::pb::Decoder);
};
upb_status *status;
// Our internal stack.
upb_pbdecoder_frame *top, *limit;
upb_pbdecoder_frame stack[UPB_DECODER_MAX_NESTING];
#ifdef UPB_USE_JIT_X64
// Each native stack frame needs two pointers, plus we need a few frames for
// the enter/exit trampolines.
const uint32_t *callstack[(UPB_DECODER_MAX_NESTING * 2) + 10];
#else
const uint32_t *callstack[UPB_DECODER_MAX_NESTING];
#endif
)));
#endif // __cplusplus
// A class for caching protobuf processing code, whether bytecode for the
// interpreted decoder or machine code for the JIT.
@ -296,14 +216,15 @@ UPB_DEFINE_STRUCT0(upb_pbcodecache,
UPB_BEGIN_EXTERN_C // {
void upb_pbdecoder_init(upb_pbdecoder *d, const upb_pbdecodermethod *method,
upb_status *status);
void upb_pbdecoder_uninit(upb_pbdecoder *d);
void upb_pbdecoder_reset(upb_pbdecoder *d);
upb_pbdecoder *upb_pbdecoder_create(upb_env *e,
const upb_pbdecodermethod *method,
upb_sink *output);
const upb_pbdecodermethod *upb_pbdecoder_method(const upb_pbdecoder *d);
bool upb_pbdecoder_resetoutput(upb_pbdecoder *d, upb_sink *sink);
upb_bytessink *upb_pbdecoder_input(upb_pbdecoder *d);
uint64_t upb_pbdecoder_bytesparsed(const upb_pbdecoder *d);
size_t upb_pbdecoder_maxnesting(const upb_pbdecoder *d);
bool upb_pbdecoder_setmaxnesting(upb_pbdecoder *d, size_t max);
void upb_pbdecoder_reset(upb_pbdecoder *d);
void upb_pbdecodermethodopts_init(upb_pbdecodermethodopts *opts,
const upb_handlers *h);
@ -338,27 +259,27 @@ namespace upb {
namespace pb {
inline Decoder::Decoder(const DecoderMethod* m, Status* s) {
upb_pbdecoder_init(this, m, s);
}
inline Decoder::~Decoder() {
upb_pbdecoder_uninit(this);
// static
inline Decoder* Decoder::Create(Environment* env, const DecoderMethod* m,
Sink* sink) {
return upb_pbdecoder_create(env, m, sink);
}
inline const DecoderMethod* Decoder::method() const {
return upb_pbdecoder_method(this);
}
inline void Decoder::Reset() {
upb_pbdecoder_reset(this);
inline BytesSink* Decoder::input() {
return upb_pbdecoder_input(this);
}
inline uint64_t Decoder::BytesParsed() const {
return upb_pbdecoder_bytesparsed(this);
}
inline bool Decoder::ResetOutput(Sink* sink) {
return upb_pbdecoder_resetoutput(this, sink);
inline size_t Decoder::max_nesting() const {
return upb_pbdecoder_maxnesting(this);
}
inline BytesSink* Decoder::input() {
return upb_pbdecoder_input(this);
inline bool Decoder::set_max_nesting(size_t max) {
return upb_pbdecoder_setmaxnesting(this, max);
}
inline void Decoder::Reset() { upb_pbdecoder_reset(this); }
inline DecoderMethodOptions::DecoderMethodOptions(const Handlers* h) {
upb_pbdecodermethodopts_init(this, h);

@ -13,8 +13,9 @@
#include <stdlib.h>
#include "upb/def.h"
#include "upb/handlers.h"
#include "upb/sink.h"
#include "upb/pb/decoder.h"
#include "upb/sink.h"
#include "upb/table.int.h"
// Opcode definitions. The canonical meaning of each opcode is its
// implementation in the interpreter (the JIT is written to match this).
@ -112,6 +113,95 @@ typedef struct {
#endif
} mgroup;
// The maximum that any submessages can be nested. Matches proto2's limit.
// This specifies the size of the decoder's statically-sized array and therefore
// setting it high will cause the upb::pb::Decoder object to be larger.
//
// If necessary we can add a runtime-settable property to Decoder that allow
// this to be larger than the compile-time setting, but this would add
// complexity, particularly since we would have to decide how/if to give users
// the ability to set a custom memory allocation function.
#define UPB_DECODER_MAX_NESTING 64
// Internal-only struct used by the decoder.
typedef struct {
// Space optimization note: we store two pointers here that the JIT
// doesn't need at all; the upb_handlers* inside the sink and
// the dispatch table pointer. We can optimze so that the JIT uses
// smaller stack frames than the interpreter. The only thing we need
// to guarantee is that the fallback routines can find end_ofs.
upb_sink sink;
// The absolute stream offset of the end-of-frame delimiter.
// Non-delimited frames (groups and non-packed repeated fields) reuse the
// delimiter of their parent, even though the frame may not end there.
//
// NOTE: the JIT stores a slightly different value here for non-top frames.
// It stores the value relative to the end of the enclosed message. But the
// top frame is still stored the same way, which is important for ensuring
// that calls from the JIT into C work correctly.
uint64_t end_ofs;
const uint32_t *base;
// 0 indicates a length-delimited field.
// A positive number indicates a known group.
// A negative number indicates an unknown group.
int32_t groupnum;
upb_inttable *dispatch; // Not used by the JIT.
} upb_pbdecoder_frame;
struct upb_pbdecoder {
upb_env *env;
// Our input sink.
upb_bytessink input_;
// The decoder method we are parsing with (owned).
const upb_pbdecodermethod *method_;
size_t call_len;
const uint32_t *pc, *last;
// Current input buffer and its stream offset.
const char *buf, *ptr, *end, *checkpoint;
// End of the delimited region, relative to ptr, or NULL if not in this buf.
const char *delim_end;
// End of the delimited region, relative to ptr, or end if not in this buf.
const char *data_end;
// Overall stream offset of "buf."
uint64_t bufstart_ofs;
// Buffer for residual bytes not parsed from the previous buffer.
// The maximum number of residual bytes we require is 12; a five-byte
// unknown tag plus an eight-byte value, less one because the value
// is only a partial value.
char residual[12];
char *residual_end;
// Stores the user buffer passed to our decode function.
const char *buf_param;
size_t size_param;
const upb_bufhandle *handle;
// Our internal stack.
upb_pbdecoder_frame *stack, *top, *limit;
const uint32_t **callstack;
size_t stack_size;
upb_status *status;
#ifdef UPB_USE_JIT_X64
// Used momentarily by the generated code to store a value while a user
// function is called.
uint32_t tmp_len;
const void *saved_rsp;
#endif
};
// Decoder entry points; used as handlers.
void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint);
void *upb_pbdecoder_startjit(void *closure, const void *hd, size_t size_hint);

@ -62,6 +62,68 @@
#include <stdlib.h>
// The output buffer is divided into segments; a segment is a string of data
// that is "ready to go" -- it does not need any varint lengths inserted into
// the middle. The seams between segments are where varints will be inserted
// once they are known.
//
// We also use the concept of a "run", which is a range of encoded bytes that
// occur at a single submessage level. Every segment contains one or more runs.
//
// A segment can span messages. Consider:
//
// .--Submessage lengths---------.
// | | |
// | V V
// V | |--------------- | |-----------------
// Submessages: | |-----------------------------------------------
// Top-level msg: ------------------------------------------------------------
//
// Segments: ----- ------------------- -----------------
// Runs: *---- *--------------*--- *----------------
// (* marks the start)
//
// Note that the top-level menssage is not in any segment because it does not
// have any length preceding it.
//
// A segment is only interrupted when another length needs to be inserted. So
// observe how the second segment spans both the inner submessage and part of
// the next enclosing message.
typedef struct {
uint32_t msglen; // The length to varint-encode before this segment.
uint32_t seglen; // Length of the segment.
} upb_pb_encoder_segment;
struct upb_pb_encoder {
upb_env *env;
// Our input and output.
upb_sink input_;
upb_bytessink *output_;
// The "subclosure" -- used as the inner closure as part of the bytessink
// protocol.
void *subc;
// The output buffer and limit, and our current write position. "buf"
// initially points to "initbuf", but is dynamically allocated if we need to
// grow beyond the initial size.
char *buf, *ptr, *limit;
// The beginning of the current run, or undefined if we are at the top level.
char *runbegin;
// The list of segments we are accumulating.
upb_pb_encoder_segment *segbuf, *segptr, *seglimit;
// The stack of enclosing submessages. Each entry in the stack points to the
// segment where this submessage's length is being accumulated.
int *stack, *top, *stacklimit;
// Depth of startmsg/endmsg calls.
int depth;
};
/* low-level buffering ********************************************************/
// Low-level functions for interacting with the output buffer.
@ -80,24 +142,22 @@ static upb_pb_encoder_segment *top(upb_pb_encoder *e) {
// e->ptr. Returns false if the bytes could not be allocated.
static bool reserve(upb_pb_encoder *e, size_t bytes) {
if ((e->limit - e->ptr) < bytes) {
// Grow buffer.
size_t needed = bytes + (e->ptr - e->buf);
size_t old_size = e->limit - e->buf;
size_t new_size = old_size;
while (new_size < needed) {
new_size *= 2;
}
char *realloc_from = (e->buf == e->initbuf) ? NULL : e->buf;
char *new_buf = realloc(realloc_from, new_size);
char *new_buf = upb_env_realloc(e->env, e->buf, old_size, new_size);
if (new_buf == NULL) {
return false;
}
if (realloc_from == NULL) {
memcpy(new_buf, e->initbuf, old_size);
}
e->ptr = new_buf + (e->ptr - e->buf);
e->runbegin = new_buf + (e->runbegin - e->buf);
e->limit = new_buf + new_size;
@ -166,21 +226,17 @@ static bool start_delim(upb_pb_encoder *e) {
}
if (++e->segptr == e->seglimit) {
upb_pb_encoder_segment *realloc_from =
(e->segbuf == e->seginitbuf) ? NULL : e->segbuf;
// Grow segment buffer.
size_t old_size =
(e->seglimit - e->segbuf) * sizeof(upb_pb_encoder_segment);
size_t new_size = old_size * 2;
upb_pb_encoder_segment *new_buf = realloc(realloc_from, new_size);
upb_pb_encoder_segment *new_buf =
upb_env_realloc(e->env, e->segbuf, old_size, new_size);
if (new_buf == NULL) {
return false;
}
if (realloc_from == NULL) {
memcpy(new_buf, e->seginitbuf, old_size);
}
e->segptr = new_buf + (e->segptr - e->segbuf);
e->seglimit = new_buf + (new_size / sizeof(upb_pb_encoder_segment));
e->segbuf = new_buf;
@ -378,8 +434,10 @@ static void newhandlers_callback(const void *closure, upb_handlers *h) {
upb_handlers_setendmsg(h, endmsg, NULL);
const upb_msgdef *m = upb_handlers_msgdef(h);
upb_msg_iter i;
for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) {
upb_msg_field_iter i;
for(upb_msg_field_begin(&i, m);
!upb_msg_field_done(&i);
upb_msg_field_next(&i)) {
const upb_fielddef *f = upb_msg_iter_field(&i);
bool packed = upb_fielddef_isseq(f) && upb_fielddef_isprimitive(f) &&
upb_fielddef_packed(f);
@ -449,6 +507,12 @@ static void newhandlers_callback(const void *closure, upb_handlers *h) {
}
}
void upb_pb_encoder_reset(upb_pb_encoder *e) {
e->segptr = NULL;
e->top = NULL;
e->depth = 0;
}
/* public API *****************************************************************/
@ -457,40 +521,42 @@ const upb_handlers *upb_pb_encoder_newhandlers(const upb_msgdef *m,
return upb_handlers_newfrozen(m, owner, newhandlers_callback, NULL);
}
#define ARRAYSIZE(x) (sizeof(x) / sizeof(x[0]))
void upb_pb_encoder_init(upb_pb_encoder *e, const upb_handlers *h) {
e->output_ = NULL;
e->subc = NULL;
e->buf = e->initbuf;
e->ptr = e->buf;
e->limit = e->buf + ARRAYSIZE(e->initbuf);
e->segbuf = e->seginitbuf;
e->seglimit = e->segbuf + ARRAYSIZE(e->seginitbuf);
e->stacklimit = e->stack + ARRAYSIZE(e->stack);
upb_sink_reset(&e->input_, h, e);
}
void upb_pb_encoder_uninit(upb_pb_encoder *e) {
if (e->buf != e->initbuf) {
free(e->buf);
upb_pb_encoder *upb_pb_encoder_create(upb_env *env, const upb_handlers *h,
upb_bytessink *output) {
const size_t initial_bufsize = 256;
const size_t initial_segbufsize = 16;
// TODO(haberman): make this configurable.
const size_t stack_size = 64;
#ifndef NDEBUG
const size_t size_before = upb_env_bytesallocated(env);
#endif
upb_pb_encoder *e = upb_env_malloc(env, sizeof(upb_pb_encoder));
if (!e) return NULL;
e->buf = upb_env_malloc(env, initial_bufsize);
e->segbuf = upb_env_malloc(env, initial_segbufsize * sizeof(*e->segbuf));
e->stack = upb_env_malloc(env, stack_size * sizeof(*e->stack));
if (!e->buf || !e->segbuf || !e->stack) {
return NULL;
}
if (e->segbuf != e->seginitbuf) {
free(e->segbuf);
}
}
e->limit = e->buf + initial_bufsize;
e->seglimit = e->segbuf + initial_segbufsize;
e->stacklimit = e->stack + stack_size;
void upb_pb_encoder_resetoutput(upb_pb_encoder *e, upb_bytessink *output) {
upb_pb_encoder_reset(e);
upb_sink_reset(&e->input_, h, e);
e->env = env;
e->output_ = output;
e->subc = output->closure;
}
e->ptr = e->buf;
void upb_pb_encoder_reset(upb_pb_encoder *e) {
e->segptr = NULL;
e->top = NULL;
e->depth = 0;
// If this fails, increase the value in encoder.h.
assert(upb_env_bytesallocated(env) - size_before <= UPB_PB_ENCODER_SIZE);
return e;
}
upb_sink *upb_pb_encoder_input(upb_pb_encoder *e) { return &e->input_; }

@ -15,6 +15,7 @@
#ifndef UPB_ENCODER_H_
#define UPB_ENCODER_H_
#include "upb/env.h"
#include "upb/sink.h"
#ifdef __cplusplus
@ -31,101 +32,42 @@ UPB_DECLARE_TYPE(upb::pb::Encoder, upb_pb_encoder);
/* upb::pb::Encoder ***********************************************************/
// The output buffer is divided into segments; a segment is a string of data
// that is "ready to go" -- it does not need any varint lengths inserted into
// the middle. The seams between segments are where varints will be inserted
// once they are known.
//
// We also use the concept of a "run", which is a range of encoded bytes that
// occur at a single submessage level. Every segment contains one or more runs.
//
// A segment can span messages. Consider:
//
// .--Submessage lengths---------.
// | | |
// | V V
// V | |--------------- | |-----------------
// Submessages: | |-----------------------------------------------
// Top-level msg: ------------------------------------------------------------
//
// Segments: ----- ------------------- -----------------
// Runs: *---- *--------------*--- *----------------
// (* marks the start)
//
// Note that the top-level menssage is not in any segment because it does not
// have any length preceding it.
//
// A segment is only interrupted when another length needs to be inserted. So
// observe how the second segment spans both the inner submessage and part of
// the next enclosing message.
typedef struct {
UPB_PRIVATE_FOR_CPP
uint32_t msglen; // The length to varint-encode before this segment.
uint32_t seglen; // Length of the segment.
} upb_pb_encoder_segment;
UPB_DEFINE_CLASS0(upb::pb::Encoder,
public:
Encoder(const upb::Handlers* handlers);
~Encoder();
static reffed_ptr<const Handlers> NewHandlers(const upb::MessageDef* msg);
// Preallocation hint: decoder won't allocate more bytes than this when first
// constructed. This hint may be an overestimate for some build configurations.
// But if the decoder library is upgraded without recompiling the application,
// it may be an underestimate.
#define UPB_PB_ENCODER_SIZE 768
// Resets the state of the printer, so that it will expect to begin a new
// document.
void Reset();
#ifdef __cplusplus
// Resets the output pointer which will serve as our closure.
void ResetOutput(BytesSink* output);
class upb::pb::Encoder {
public:
// Creates a new encoder in the given environment. The Handlers must have
// come from NewHandlers() below.
static Encoder* Create(Environment* env, const Handlers* handlers,
BytesSink* output);
// The input to the encoder.
Sink* input();
private:
UPB_DISALLOW_COPY_AND_ASSIGN(Encoder);
,
UPB_DEFINE_STRUCT0(upb_pb_encoder, UPB_QUOTE(
// Our input and output.
upb_sink input_;
upb_bytessink *output_;
// The "subclosure" -- used as the inner closure as part of the bytessink
// protocol.
void *subc;
// The output buffer and limit, and our current write position. "buf"
// initially points to "initbuf", but is dynamically allocated if we need to
// grow beyond the initial size.
char *buf, *ptr, *limit;
// Creates a new set of handlers for this MessageDef.
static reffed_ptr<const Handlers> NewHandlers(const MessageDef* msg);
// The beginning of the current run, or undefined if we are at the top level.
char *runbegin;
static const size_t kSize = UPB_PB_ENCODER_SIZE;
// The list of segments we are accumulating.
upb_pb_encoder_segment *segbuf, *segptr, *seglimit;
// The stack of enclosing submessages. Each entry in the stack points to the
// segment where this submessage's length is being accumulated.
int stack[UPB_PBENCODER_MAX_NESTING], *top, *stacklimit;
// Depth of startmsg/endmsg calls.
int depth;
private:
UPB_DISALLOW_POD_OPS(Encoder, upb::pb::Encoder);
};
// Initial buffers for the output buffer and segment buffer. If we outgrow
// these we will dynamically allocate bigger ones.
char initbuf[256];
upb_pb_encoder_segment seginitbuf[32];
)));
#endif
UPB_BEGIN_EXTERN_C
const upb_handlers *upb_pb_encoder_newhandlers(const upb_msgdef *m,
const void *owner);
void upb_pb_encoder_reset(upb_pb_encoder *e);
upb_sink *upb_pb_encoder_input(upb_pb_encoder *p);
void upb_pb_encoder_init(upb_pb_encoder *e, const upb_handlers *h);
void upb_pb_encoder_resetoutput(upb_pb_encoder *e, upb_bytessink *output);
void upb_pb_encoder_uninit(upb_pb_encoder *e);
upb_pb_encoder* upb_pb_encoder_create(upb_env* e, const upb_handlers* h,
upb_bytessink* output);
UPB_END_EXTERN_C
@ -133,17 +75,9 @@ UPB_END_EXTERN_C
namespace upb {
namespace pb {
inline Encoder::Encoder(const upb::Handlers* handlers) {
upb_pb_encoder_init(this, handlers);
}
inline Encoder::~Encoder() {
upb_pb_encoder_uninit(this);
}
inline void Encoder::Reset() {
upb_pb_encoder_reset(this);
}
inline void Encoder::ResetOutput(BytesSink* output) {
upb_pb_encoder_resetoutput(this, output);
inline Encoder* Encoder::Create(Environment* env, const Handlers* handlers,
BytesSink* output) {
return upb_pb_encoder_create(env, handlers, output);
}
inline Sink* Encoder::input() {
return upb_pb_encoder_input(this);

@ -22,26 +22,26 @@ upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n,
const upb_pbdecodermethod *decoder_m =
upb_pbdecodermethod_new(&opts, &decoder_m);
upb_pbdecoder decoder;
upb_descreader reader;
upb_env env;
upb_env_init(&env);
upb_env_reporterrorsto(&env, status);
upb_pbdecoder_init(&decoder, decoder_m, status);
upb_descreader_init(&reader, reader_h, status);
upb_pbdecoder_resetoutput(&decoder, upb_descreader_input(&reader));
upb_descreader *reader = upb_descreader_create(&env, reader_h);
upb_pbdecoder *decoder =
upb_pbdecoder_create(&env, decoder_m, upb_descreader_input(reader));
// Push input data.
bool ok = upb_bufsrc_putbuf(str, len, upb_pbdecoder_input(&decoder));
bool ok = upb_bufsrc_putbuf(str, len, upb_pbdecoder_input(decoder));
upb_def **ret = NULL;
if (!ok) goto cleanup;
upb_def **defs = upb_descreader_getdefs(&reader, owner, n);
upb_def **defs = upb_descreader_getdefs(reader, owner, n);
ret = malloc(sizeof(upb_def*) * (*n));
memcpy(ret, defs, sizeof(upb_def*) * (*n));
cleanup:
upb_pbdecoder_uninit(&decoder);
upb_descreader_uninit(&reader);
upb_env_uninit(&env);
upb_handlers_unref(reader_h, &reader_h);
upb_pbdecodermethod_unref(decoder_m, &decoder_m);
return ret;

@ -19,6 +19,14 @@
#include "upb/sink.h"
struct upb_textprinter {
upb_sink input_;
upb_bytessink *output_;
int indent_depth_;
bool single_line_;
void *subc;
};
#define CHECK(x) if ((x) < 0) goto err;
static const char *shortname(const char *longname) {
@ -236,24 +244,6 @@ err:
return false;
}
/* Public API *****************************************************************/
void upb_textprinter_init(upb_textprinter *p, const upb_handlers *h) {
p->single_line_ = false;
p->indent_depth_ = 0;
upb_sink_reset(&p->input_, h, p);
}
void upb_textprinter_uninit(upb_textprinter *p) {
UPB_UNUSED(p);
}
void upb_textprinter_reset(upb_textprinter *p, bool single_line) {
p->single_line_ = single_line;
p->indent_depth_ = 0;
}
static void onmreg(const void *c, upb_handlers *h) {
UPB_UNUSED(c);
const upb_msgdef *m = upb_handlers_msgdef(h);
@ -261,8 +251,10 @@ static void onmreg(const void *c, upb_handlers *h) {
upb_handlers_setstartmsg(h, textprinter_startmsg, NULL);
upb_handlers_setendmsg(h, textprinter_endmsg, NULL);
upb_msg_iter i;
for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) {
upb_msg_field_iter i;
for(upb_msg_field_begin(&i, m);
!upb_msg_field_done(&i);
upb_msg_field_next(&i)) {
upb_fielddef *f = upb_msg_iter_field(&i);
upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
upb_handlerattr_sethandlerdata(&attr, f);
@ -311,6 +303,26 @@ static void onmreg(const void *c, upb_handlers *h) {
}
}
static void textprinter_reset(upb_textprinter *p, bool single_line) {
p->single_line_ = single_line;
p->indent_depth_ = 0;
}
/* Public API *****************************************************************/
upb_textprinter *upb_textprinter_create(upb_env *env, const upb_handlers *h,
upb_bytessink *output) {
upb_textprinter *p = upb_env_malloc(env, sizeof(upb_textprinter));
if (!p) return NULL;
p->output_ = output;
upb_sink_reset(&p->input_, h, p);
textprinter_reset(p, false);
return p;
}
const upb_handlers *upb_textprinter_newhandlers(const upb_msgdef *m,
const void *owner) {
return upb_handlers_newfrozen(m, owner, &onmreg, NULL);
@ -318,11 +330,6 @@ const upb_handlers *upb_textprinter_newhandlers(const upb_msgdef *m,
upb_sink *upb_textprinter_input(upb_textprinter *p) { return &p->input_; }
bool upb_textprinter_resetoutput(upb_textprinter *p, upb_bytessink *output) {
p->output_ = output;
return true;
}
void upb_textprinter_setsingleline(upb_textprinter *p, bool single_line) {
p->single_line_ = single_line;
}

@ -8,6 +8,7 @@
#ifndef UPB_TEXT_H_
#define UPB_TEXT_H_
#include "upb/env.h"
#include "upb/sink.h"
#ifdef __cplusplus
@ -20,58 +21,51 @@ class TextPrinter;
UPB_DECLARE_TYPE(upb::pb::TextPrinter, upb_textprinter);
UPB_DEFINE_CLASS0(upb::pb::TextPrinter,
#ifdef __cplusplus
class upb::pb::TextPrinter {
public:
// The given handlers must have come from NewHandlers(). It must outlive the
// TextPrinter.
explicit TextPrinter(const upb::Handlers* handlers);
static TextPrinter *Create(Environment *env, const upb::Handlers *handlers,
BytesSink *output);
void SetSingleLineMode(bool single_line);
bool ResetOutput(BytesSink* output);
Sink* input();
// If handler caching becomes a requirement we can add a code cache as in
// decoder.h
static reffed_ptr<const Handlers> NewHandlers(const MessageDef* md);
};
private:
,
UPB_DEFINE_STRUCT0(upb_textprinter,
upb_sink input_;
upb_bytessink *output_;
int indent_depth_;
bool single_line_;
void *subc;
));
#endif
UPB_BEGIN_EXTERN_C // {
UPB_BEGIN_EXTERN_C
// C API.
void upb_textprinter_init(upb_textprinter *p, const upb_handlers *h);
void upb_textprinter_uninit(upb_textprinter *p);
bool upb_textprinter_resetoutput(upb_textprinter *p, upb_bytessink *output);
upb_textprinter *upb_textprinter_create(upb_env *env, const upb_handlers *h,
upb_bytessink *output);
void upb_textprinter_setsingleline(upb_textprinter *p, bool single_line);
upb_sink *upb_textprinter_input(upb_textprinter *p);
const upb_handlers *upb_textprinter_newhandlers(const upb_msgdef *m,
const void *owner);
UPB_END_EXTERN_C // }
UPB_END_EXTERN_C
#ifdef __cplusplus
namespace upb {
namespace pb {
inline TextPrinter::TextPrinter(const upb::Handlers* handlers) {
upb_textprinter_init(this, handlers);
inline TextPrinter *TextPrinter::Create(Environment *env,
const upb::Handlers *handlers,
BytesSink *output) {
return upb_textprinter_create(env, handlers, output);
}
inline void TextPrinter::SetSingleLineMode(bool single_line) {
upb_textprinter_setsingleline(this, single_line);
}
inline bool TextPrinter::ResetOutput(BytesSink* output) {
return upb_textprinter_resetoutput(this, output);
}
inline Sink* TextPrinter::input() {
return upb_textprinter_input(this);
}

@ -34,27 +34,6 @@ UPB_DECLARE_TYPE(upb::BufferSource, upb_bufsrc);
UPB_DECLARE_TYPE(upb::BytesSink, upb_bytessink);
UPB_DECLARE_TYPE(upb::Sink, upb_sink);
// Internal-only struct for the sink.
struct upb_sinkframe {
UPB_PRIVATE_FOR_CPP
const upb_handlers *h;
void *closure;
// For any frames besides the top, this is the END* callback that will run
// when the subframe is popped (for example, for a "sequence" frame the frame
// above it will be a UPB_HANDLER_ENDSEQ handler). But this is only
// necessary for assertion checking inside upb_sink and can be omitted if the
// sink has only one caller.
//
// TODO(haberman): have a mechanism for ensuring that a sink only has one
// caller.
upb_selector_t selector;
};
// The maximum nesting depth that upb::Sink will allow. Matches proto2's limit.
// TODO: make this a runtime-settable property of Sink.
#define UPB_SINK_MAX_NESTING 64
// A upb::Sink is an object that binds a upb::Handlers object to some runtime
// state. It represents an endpoint to which data can be sent.
//

@ -139,8 +139,10 @@ static bool upb_resolve_dfs(const upb_def *def, upb_strtable *addtab,
// For messages, continue the recursion by visiting all subdefs.
const upb_msgdef *m = upb_dyncast_msgdef(def);
if (m) {
upb_msg_iter i;
for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) {
upb_msg_field_iter i;
for(upb_msg_field_begin(&i, m);
!upb_msg_field_done(&i);
upb_msg_field_next(&i)) {
upb_fielddef *f = upb_msg_iter_field(&i);
if (!upb_fielddef_hassubdef(f)) continue;
// |= to avoid short-circuit; we need its side-effects.
@ -293,8 +295,10 @@ bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, int n, void *ref_donor,
// Type names are resolved relative to the message in which they appear.
const char *base = upb_msgdef_fullname(m);
upb_msg_iter j;
for(upb_msg_begin(&j, m); !upb_msg_done(&j); upb_msg_next(&j)) {
upb_msg_field_iter j;
for(upb_msg_field_begin(&j, m);
!upb_msg_field_done(&j);
upb_msg_field_next(&j)) {
upb_fielddef *f = upb_msg_iter_field(&j);
const char *name = upb_fielddef_subdefname(f);
if (name && !upb_fielddef_subdef(f)) {

@ -40,12 +40,16 @@ char *upb_strdup(const char *s) {
}
char *upb_strdup2(const char *s, size_t len) {
// Prevent overflow errors.
if (len == SIZE_MAX) return NULL;
// Always null-terminate, even if binary data; but don't rely on the input to
// have a null-terminating byte since it may be a raw binary buffer.
size_t n = len + 1;
char *p = malloc(n);
if (p) memcpy(p, s, len);
p[len] = 0;
if (p) {
memcpy(p, s, len);
p[len] = 0;
}
return p;
}

@ -207,6 +207,9 @@ typedef struct {
#define UPB_STRTABLE_INIT(count, mask, ctype, size_lg2, entries) \
{{count, mask, ctype, size_lg2, entries}}
#define UPB_EMPTY_STRTABLE_INIT(ctype) \
UPB_STRTABLE_INIT(0, 0, ctype, 0, NULL)
typedef struct {
upb_table t; // For entries that don't fit in the array part.
const _upb_value *array; // Array part of the table. See const note above.

@ -25,6 +25,15 @@
#define UPB_INLINE static inline
#endif
// For use in C/C++ source files (not headers), forces inlining within the file.
#ifdef __GNUC__
#define UPB_FORCEINLINE inline __attribute__((always_inline))
#define UPB_NOINLINE __attribute__((noinline))
#else
#define UPB_FORCEINLINE
#define UPB_NOINLINE
#endif
#if __STDC_VERSION__ >= 199901L
#define UPB_C99
#endif

Loading…
Cancel
Save