diff --git a/Makefile b/Makefile index 6aef58112f..a12e7ef5e2 100644 --- a/Makefile +++ b/Makefile @@ -83,11 +83,15 @@ deps: Makefile $(ALLSRC) CORE= \ upb/upb.c \ upb/handlers.c \ - upb/descriptor.c \ + upb/descriptor/reader.c \ upb/table.c \ + upb/refcount.c \ upb/def.c \ upb/msg.c \ upb/bytestream.c \ + bindings/cpp/upb/proto2_bridge.cc \ + +# TODO: the proto2 bridge should be built as a separate library. # Library for the protocol buffer format (both text and binary). PB= \ @@ -122,8 +126,9 @@ LIBUPB_PIC=upb/libupb_pic.a lib: $(LIBUPB) -OBJ=$(patsubst %.c,%.o,$(SRC)) -PICOBJ=$(patsubst %.c,%.lo,$(SRC)) +OBJ=$(patsubst %.c,%.o,$(SRC)) $(patsubst %.cc,%.o,$(SRC)) +PICOBJ=$(patsubst %.c,%.lo,$(SRC)) $(patsubst %.cc,%.lo,$(SRC)) + ifdef USE_JIT upb/pb/decoder.o upb/pb/decoder.lo: upb/pb/decoder_x64.h @@ -139,10 +144,18 @@ $(LIBUPB_PIC): $(PICOBJ) $(E) CC $< $(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -c -o $@ $< +%.o : %.cc + $(E) CXX $< + $(Q) $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c -o $@ $< + %.lo : %.c $(E) 'CC -fPIC' $< $(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -c -o $@ $< -fPIC +%.o : %.cc + $(E) CXX $< + $(Q) $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c -o $@ $< -fPIC + # Override the optimization level for def.o, because it is not in the # critical path but gets very large when -O3 is used. upb/def.o: upb/def.c @@ -197,47 +210,39 @@ tests/test.proto.pb: tests/test.proto SIMPLE_TESTS= \ tests/test_def \ tests/test_varint \ - tests/tests \ - -# Too many tests in this binary to run Valgrind (it takes minutes). -SLOW_TESTS= \ - tests/test_decoder \ SIMPLE_CXX_TESTS= \ tests/test_table \ tests/test_cpp \ + tests/test_decoder \ VARIADIC_TESTS= \ tests/t.test_vs_proto2.googlemessage1 \ tests/t.test_vs_proto2.googlemessage2 \ -TESTS=$(SIMPLE_TESTS) $(SIMPLE_CXX_TESTS) $(VARIADIC_TESTS) $(SLOW_TESTS) -tests: $(TESTS) +TESTS=$(SIMPLE_TESTS) $(SIMPLE_CXX_TESTS) $(VARIADIC_TESTS) + + +tests: $(TESTS) $(INTERACTIVE_TESTS) $(TESTS): $(LIBUPB) -tests/tests: tests/test.proto.pb +tests/test_def: tests/test.proto.pb $(SIMPLE_TESTS): % : %.c $(E) CC $< $(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -o $@ $< $(LIBUPB) -VALGRIND=valgrind --leak-check=full --error-exitcode=1 +VALGRIND=valgrind --leak-check=full --error-exitcode=1 test: tests @echo Running all tests under valgrind. @set -e # Abort on error. @for test in $(SIMPLE_TESTS) $(SIMPLE_CXX_TESTS); do \ if [ -x ./$$test ] ; then \ - echo !!! $(VALGRIND) ./$$test tests/test.proto.pb; \ + echo !!! $(VALGRIND) ./$$test; \ $(VALGRIND) ./$$test tests/test.proto.pb || exit 1; \ fi \ done; - @for test in "$(SLOW_TESTS)"; do \ - if [ -x ./$$test ] ; then \ - echo !!! ./$$test; \ - ./$$test || exit 1; \ - fi \ - done; - @$(VALGRIND) tests/t.test_vs_proto2.googlemessage1 benchmarks/google_messages.proto.pb benchmarks/google_message1.dat - @$(VALGRIND) tests/t.test_vs_proto2.googlemessage2 benchmarks/google_messages.proto.pb benchmarks/google_message2.dat + @$(VALGRIND) ./tests/t.test_vs_proto2.googlemessage1 benchmarks/google_message1.dat || exit 1; + @$(VALGRIND) ./tests/t.test_vs_proto2.googlemessage2 benchmarks/google_message2.dat || exit 1; @echo "All tests passed!" tests/t.test_vs_proto2.googlemessage1 \ @@ -273,15 +278,11 @@ tests/tests: upb/libupb.a # Benchmarks UPB_BENCHMARKS=benchmarks/b.parsestream_googlemessage1.upb_table \ benchmarks/b.parsestream_googlemessage2.upb_table \ - benchmarks/b.parsetostruct_googlemessage1.upb_table_byval \ - benchmarks/b.parsetostruct_googlemessage2.upb_table_byval \ ifdef USE_JIT UPB_BENCHMARKS += \ benchmarks/b.parsestream_googlemessage1.upb_jit \ benchmarks/b.parsestream_googlemessage2.upb_jit \ - benchmarks/b.parsetostruct_googlemessage1.upb_jit_byval \ - benchmarks/b.parsetostruct_googlemessage2.upb_jit_byval \ benchmarks/b.parsetoproto2_googlemessage1.upb_jit \ benchmarks/b.parsetoproto2_googlemessage2.upb_jit endif @@ -318,21 +319,21 @@ benchmarks/google_messages.pb.cc: benchmarks/google_messages.proto # want to make these command-line parameters -- it makes it more annoying to # debug or profile them. -benchmarks/b.parsetostruct_googlemessage1.upb_table_byval \ -benchmarks/b.parsetostruct_googlemessage2.upb_table_byval: \ +benchmarks/b.parsetostruct_googlemessage1.upb_table \ +benchmarks/b.parsetostruct_googlemessage2.upb_table: \ benchmarks/parsetostruct.upb.c $(LIBUPB) benchmarks/google_messages.proto.pb - $(E) 'CC benchmarks/parsetostruct.upb.c (benchmarks.SpeedMessage1, byval, nojit)' - $(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -o benchmarks/b.parsetostruct_googlemessage1.upb_table_byval $< \ + $(E) 'CC benchmarks/parsetostruct.upb.c (benchmarks.SpeedMessage1, nojit)' + $(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -o benchmarks/b.parsetostruct_googlemessage1.upb_table $< \ -DMESSAGE_NAME=\"benchmarks.SpeedMessage1\" \ -DMESSAGE_DESCRIPTOR_FILE=\"google_messages.proto.pb\" \ -DMESSAGE_FILE=\"google_message1.dat\" \ - -DBYREF=false -DJIT=false $(LIBUPB) - $(E) 'CC benchmarks/parsetostruct.upb.c (benchmarks.SpeedMessage2, byref, nojit)' - $(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -o benchmarks/b.parsetostruct_googlemessage2.upb_table_byval $< \ + -DJIT=false $(LIBUPB) + $(E) 'CC benchmarks/parsetostruct.upb.c (benchmarks.SpeedMessage2, nojit)' + $(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -o benchmarks/b.parsetostruct_googlemessage2.upb_table $< \ -DMESSAGE_NAME=\"benchmarks.SpeedMessage2\" \ -DMESSAGE_DESCRIPTOR_FILE=\"google_messages.proto.pb\" \ -DMESSAGE_FILE=\"google_message2.dat\" \ - -DBYREF=false -DJIT=false $(LIBUPB) + -DJIT=false $(LIBUPB) benchmarks/b.parsestream_googlemessage1.upb_table \ benchmarks/b.parsestream_googlemessage2.upb_table: \ @@ -351,21 +352,21 @@ benchmarks/b.parsestream_googlemessage2.upb_table: \ $(LIBUPB) ifdef USE_JIT -benchmarks/b.parsetostruct_googlemessage1.upb_jit_byval \ -benchmarks/b.parsetostruct_googlemessage2.upb_jit_byval: \ +benchmarks/b.parsetostruct_googlemessage1.upb_jit \ +benchmarks/b.parsetostruct_googlemessage2.upb_jit: \ benchmarks/parsetostruct.upb.c $(LIBUPB) benchmarks/google_messages.proto.pb - $(E) 'CC benchmarks/parsetostruct.upb.c (benchmarks.SpeedMessage1, byref, jit)' - $(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -o benchmarks/b.parsetostruct_googlemessage1.upb_jit_byval $< \ + $(E) 'CC benchmarks/parsetostruct.upb.c (benchmarks.SpeedMessage1, jit)' + $(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -o benchmarks/b.parsetostruct_googlemessage1.upb_jit $< \ -DMESSAGE_NAME=\"benchmarks.SpeedMessage1\" \ -DMESSAGE_DESCRIPTOR_FILE=\"google_messages.proto.pb\" \ -DMESSAGE_FILE=\"google_message1.dat\" -DJIT=true \ - -DBYREF=true -DJIT=true $(LIBUPB) - $(E) 'CC benchmarks/parsetostruct.upb.c (benchmarks.SpeedMessage2, byval, jit)' - $(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -o benchmarks/b.parsetostruct_googlemessage2.upb_jit_byval $< \ + -DJIT=true $(LIBUPB) + $(E) 'CC benchmarks/parsetostruct.upb.c (benchmarks.SpeedMessage2, jit)' + $(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -o benchmarks/b.parsetostruct_googlemessage2.upb_jit $< \ -DMESSAGE_NAME=\"benchmarks.SpeedMessage2\" \ -DMESSAGE_DESCRIPTOR_FILE=\"google_messages.proto.pb\" \ -DMESSAGE_FILE=\"google_message2.dat\" -DJIT=true \ - -DBYREF=false -DJIT=true $(LIBUPB) + -DJIT=true $(LIBUPB) benchmarks/b.parsestream_googlemessage1.upb_jit \ benchmarks/b.parsestream_googlemessage2.upb_jit: \ diff --git a/benchmarks/google_messages.proto b/benchmarks/google_messages.proto index b43e94b307..b367954556 100644 --- a/benchmarks/google_messages.proto +++ b/benchmarks/google_messages.proto @@ -3,6 +3,11 @@ package benchmarks; option optimize_for = SPEED; +enum Foo { + FOO_VALUE = 1; + FOO_VALUE2 = 2; +} + message SpeedMessage1 { required string field1 = 1; optional string field9 = 9; @@ -45,6 +50,7 @@ message SpeedMessage1 { optional int32 field128 = 128 [default=0]; optional string field129 = 129 [default="xxxxxxxxxxxxxxxxxxxxx"]; optional int32 field131 = 131 [default=0]; + optional Foo field132 = 132 [default=FOO_VALUE]; } message SpeedMessage1SubMessage { diff --git a/benchmarks/parsestream.upb.c b/benchmarks/parsestream.upb.c index 0316a86916..e9164d0284 100644 --- a/benchmarks/parsestream.upb.c +++ b/benchmarks/parsestream.upb.c @@ -39,7 +39,7 @@ static bool initialize() return false; } - def = upb_dyncast_msgdef_const(upb_symtab_lookup(s, MESSAGE_NAME)); + def = upb_dyncast_msgdef_const(upb_symtab_lookup(s, MESSAGE_NAME, &def)); if(!def) { fprintf(stderr, "Error finding symbol '%s'.\n", MESSAGE_NAME); return false; @@ -68,7 +68,7 @@ static bool initialize() static void cleanup() { free(input_str); - upb_def_unref(UPB_UPCAST(def)); + upb_def_unref(UPB_UPCAST(def), &def); upb_decoder_uninit(&decoder); upb_decoderplan_unref(plan); upb_stringsrc_uninit(&stringsrc); diff --git a/benchmarks/parsetoproto2.upb.cc b/benchmarks/parsetoproto2.upb.cc index 988faade91..5023b0e882 100644 --- a/benchmarks/parsetoproto2.upb.cc +++ b/benchmarks/parsetoproto2.upb.cc @@ -1,320 +1,61 @@ -// This file is a crime against software engineering. It breaks the -// encapsulation of proto2 in numerous ways, violates the C++ standard -// in others, and generally deserves to have comtempt and scorn heaped -// upon it. -// -// Its purpose is to get an accurate benchmark for how fast upb can -// parse into proto2 data structures. To add proper support for this -// functionality, proto2 would need to expose actual support for the -// operations we are trying to perform here. +// Tests speed of upb parsing into proto2 generated classes. #define __STDC_LIMIT_MACROS 1 #include "main.c" #include -#include "upb/bytestream.h" -#include "upb/def.h" -#include "upb/msg.h" -#include "upb/pb/decoder.h" +#include "upb/bytestream.hpp" +#include "upb/def.hpp" +#include "upb/msg.hpp" +#include "upb/pb/decoder.hpp" #include "upb/pb/glue.h" - -// Need to violate the encapsulation of GeneratedMessageReflection -- see below. -#define private public +#include "upb/proto2_bridge.hpp" #include MESSAGE_HFILE -#include -#undef private -static size_t len; +const char *str; +size_t len; MESSAGE_CIDENT msg[NUM_MESSAGES]; MESSAGE_CIDENT msg2; -static upb_stringsrc strsrc; -static upb_decoder d; -static const upb_msgdef *def; -static upb_decoderplan *p; -char *str; - -#define PROTO2_APPEND(type, ctype) \ - upb_flow_t proto2_append_ ## type(void *_r, upb_value fval, upb_value val) { \ - (void)fval; \ - typedef google::protobuf::RepeatedField R; \ - R *r = (R*)_r; \ - r->Add(upb_value_get ## type(val)); \ - return UPB_CONTINUE; \ - } - -PROTO2_APPEND(double, double) -PROTO2_APPEND(float, float) -PROTO2_APPEND(uint64, uint64_t) -PROTO2_APPEND(int64, int64_t) -PROTO2_APPEND(int32, int32_t) -PROTO2_APPEND(uint32, uint32_t) -PROTO2_APPEND(bool, bool) - -upb_flow_t proto2_setstr(void *m, upb_value fval, upb_value val) { - assert(m != NULL); - const upb_fielddef *f = upb_value_getfielddef(fval); - std::string **str = (std::string**)UPB_INDEX(m, f->offset, 1); - if (*str == f->default_ptr) *str = new std::string; - const upb_byteregion *reg = upb_value_getbyteregion(val); - size_t len; - (*str)->assign( - upb_byteregion_getptr(reg, upb_byteregion_startofs(reg), &len), - upb_byteregion_len(reg)); - // XXX: only supports contiguous strings atm. - assert(len == upb_byteregion_len(reg)); - return UPB_CONTINUE; -} - -upb_flow_t proto2_append_str(void *_r, upb_value fval, upb_value val) { - assert(_r != NULL); - typedef google::protobuf::RepeatedPtrField R; - (void)fval; - R *r = (R*)_r; - const upb_byteregion *reg = upb_value_getbyteregion(val); - size_t len; - r->Add()->assign( - upb_byteregion_getptr(reg, upb_byteregion_startofs(reg), &len), - upb_byteregion_len(reg)); - // XXX: only supports contiguous strings atm. - assert(len == upb_byteregion_len(reg)); - return UPB_CONTINUE; -} - -upb_sflow_t proto2_startseq(void *m, upb_value fval) { - assert(m != NULL); - const upb_fielddef *f = upb_value_getfielddef(fval); - return UPB_CONTINUE_WITH(UPB_INDEX(m, f->offset, 1)); -} - -upb_sflow_t proto2_startsubmsg(void *m, upb_value fval) { - assert(m != NULL); - const upb_fielddef *f = upb_value_getfielddef(fval); - google::protobuf::Message *prototype = (google::protobuf::Message*)f->prototype; - void **subm = (void**)UPB_INDEX(m, f->offset, 1); - if (*subm == NULL || *subm == f->default_ptr) - *subm = prototype->New(); - assert(*subm != NULL); - return UPB_CONTINUE_WITH(*subm); -} - -class UpbRepeatedPtrField : public google::protobuf::internal::RepeatedPtrFieldBase { - public: - class TypeHandler { - public: - typedef void Type; - // AddAllocated() calls this, but only if other objects are sitting - // around waiting for reuse, which we will not do. - static void Delete(Type*) { assert(false); } - }; - void *Add(google::protobuf::Message *m) { - void *submsg = RepeatedPtrFieldBase::AddFromCleared(); - if (!submsg) { - submsg = m->New(); - RepeatedPtrFieldBase::AddAllocated(submsg); - } - return submsg; - } -}; - -upb_sflow_t proto2_startsubmsg_r(void *_r, upb_value fval) { - assert(_r != NULL); - // Compared to the other writers, this implementation is particularly sketchy. - // The object we are modifying is a RepeatedPtrField*, but we can't - // properly declare that templated pointer because we don't have access to - // that type at compile-time (and wouldn't want to create a separate callback - // for each type anyway). Instead we access the pointer as a - // RepeatedPtrFieldBase, which is indeed a superclass of RepeatedPtrField. - // But we can't properly declare a TypeHandler for the submessage's type, - // for the same reason that we can't create a RepeatedPtrField*. - // Instead we treat it as a void*, and create the submessage using - // google::protobuf::Message::New() if we need to. - class TypeHandler { - public: - typedef void Type; - }; - const upb_fielddef *f = upb_value_getfielddef(fval); - UpbRepeatedPtrField *r = (UpbRepeatedPtrField*)_r; - void *submsg = r->Add((google::protobuf::Message*)f->prototype); - assert(submsg != NULL); - return UPB_CONTINUE_WITH(submsg); -} - -#define PROTO2MSG(type, size) { static upb_accessor_vtbl vtbl = { \ - &proto2_startsubmsg, \ - &upb_stdmsg_set ## type, \ - &proto2_startseq, \ - &proto2_startsubmsg_r, \ - &proto2_append_ ## type, \ - NULL, NULL, NULL, NULL, NULL, NULL}; \ - return &vtbl; } - -static upb_accessor_vtbl *proto2_accessor(upb_fielddef *f) { - switch (f->type) { - case UPB_TYPE(DOUBLE): PROTO2MSG(double, 8) - case UPB_TYPE(FLOAT): PROTO2MSG(float, 4) - case UPB_TYPE(UINT64): - case UPB_TYPE(FIXED64): PROTO2MSG(uint64, 8) - case UPB_TYPE(INT64): - case UPB_TYPE(SFIXED64): - case UPB_TYPE(SINT64): PROTO2MSG(int64, 8) - case UPB_TYPE(INT32): - case UPB_TYPE(SINT32): - case UPB_TYPE(ENUM): - case UPB_TYPE(SFIXED32): PROTO2MSG(int32, 4) - case UPB_TYPE(UINT32): - case UPB_TYPE(FIXED32): PROTO2MSG(uint32, 4) - case UPB_TYPE(BOOL): PROTO2MSG(bool, 1) - case UPB_TYPE(STRING): - case UPB_TYPE(BYTES): - case UPB_TYPE(GROUP): - case UPB_TYPE(MESSAGE): { - static upb_accessor_vtbl vtbl = { - &proto2_startsubmsg, - &proto2_setstr, - &proto2_startseq, - &proto2_startsubmsg_r, - &proto2_append_str, - NULL, NULL, NULL, NULL, NULL, NULL}; - return &vtbl; - } - } - return NULL; -} - -static void layout_msgdef_from_proto2(upb_msgdef *upb_md, - const google::protobuf::Message *m, - const google::protobuf::Descriptor *proto2_d) { - // Hack: we break the encapsulation of GeneratedMessageReflection to get at - // the offsets we need. If/when we do this for real, we will need - // GeneratedMessageReflection to expose those offsets publicly. - const google::protobuf::internal::GeneratedMessageReflection *r = - (google::protobuf::internal::GeneratedMessageReflection*)m->GetReflection(); - for (int i = 0; i < proto2_d->field_count(); i++) { - const google::protobuf::FieldDescriptor *proto2_f = proto2_d->field(i); - upb_fielddef *upb_f = upb_msgdef_itof(upb_md, proto2_f->number()); - assert(upb_f); - - // Encapsulation violation BEGIN - uint32_t data_offset = r->offsets_[proto2_f->index()]; - uint32_t hasbit = (r->has_bits_offset_ * 8) + proto2_f->index(); - // Encapsulation violation END - - if (upb_isseq(upb_f)) { - // proto2 does not store hasbits for repeated fields. - upb_f->hasbit = -1; - } else { - upb_f->hasbit = hasbit; - } - upb_f->offset = data_offset; - upb_fielddef_setaccessor(upb_f, proto2_accessor(upb_f)); - - if (upb_isstring(upb_f) && !upb_isseq(upb_f)) { - upb_f->default_ptr = &r->GetStringReference(*m, proto2_f, NULL); - } else if (upb_issubmsg(upb_f)) { - // XXX: skip leading "." - const google::protobuf::Descriptor *subm_descriptor = - google::protobuf::DescriptorPool::generated_pool()-> - FindMessageTypeByName(upb_fielddef_typename(upb_f) + 1); - assert(subm_descriptor); - upb_f->prototype = google::protobuf::MessageFactory::generated_factory()->GetPrototype(subm_descriptor); - if (!upb_isseq(upb_f)) - upb_f->default_ptr = &r->GetMessage(*m, proto2_f); - } - } -} +upb::StringSource strsrc; +upb::Decoder d; +const upb::MessageDef *def; +upb::DecoderPlan* plan; static bool initialize() { - // Initialize upb state, decode descriptor. - upb_status status = UPB_STATUS_INIT; - upb_symtab *s = upb_symtab_new(); - - char *data = upb_readfile(MESSAGE_DESCRIPTOR_FILE, &len); - if (!data) { - fprintf(stderr, "Couldn't read file: " MESSAGE_DESCRIPTOR_FILE); - return false; - } - int n; - upb_def **defs = upb_load_defs_from_descriptor(data, len, &n, &status); - free(data); - if(!upb_ok(&status)) { - fprintf(stderr, "Error reading descriptor: %s\n", - upb_status_getstr(&status)); - return false; - } - - // Setup offsets and accessors to properly write into a proto2 generated - // class. - for (int i = 0; i < n; i++) { - upb_def *def = defs[i]; - upb_msgdef *upb_md = upb_dyncast_msgdef(def); - if (!upb_md) continue; - const google::protobuf::Descriptor *proto2_md = - google::protobuf::DescriptorPool::generated_pool()-> - FindMessageTypeByName(upb_def_fqname(def)); - if (!proto2_md) abort(); - const google::protobuf::Message *proto2_m = - google::protobuf::MessageFactory::generated_factory()->GetPrototype(proto2_md); - layout_msgdef_from_proto2(upb_md, proto2_m, proto2_md); - } - - upb_symtab_add(s, defs, n, &status); - if(!upb_ok(&status)) { - fprintf(stderr, "Error reading adding to symtab: %s\n", - upb_status_getstr(&status)); - return false; - } - for(int i = 0; i < n; i++) upb_def_unref(defs[i]); - free(defs); - - def = upb_dyncast_msgdef_const(upb_symtab_lookup(s, MESSAGE_NAME)); - if(!def) { - fprintf(stderr, "Error finding symbol '%s'.\n", MESSAGE_NAME); - return false; - } - upb_symtab_unref(s); - // Read the message data itself. str = upb_readfile(MESSAGE_FILE, &len); if(str == NULL) { fprintf(stderr, "Error reading " MESSAGE_FILE "\n"); return false; } - upb_status_uninit(&status); + + def = upb::proto2_bridge::NewFinalMessageDef(msg2, &def); msg2.ParseFromArray(str, len); - upb_stringsrc_init(&strsrc); - upb_handlers *h = upb_handlers_new(); - upb_accessors_reghandlers(h, def); - p = upb_decoderplan_new(h, JIT); - upb_decoder_init(&d); - upb_decoder_resetplan(&d, p, 0); - upb_handlers_unref(h); + upb::Handlers* h = upb::Handlers::New(); + upb::RegisterWriteHandlers(h, def); + plan = upb::DecoderPlan::New(h, JIT); + d.ResetPlan(plan, 0); + h->Unref(); return true; } static void cleanup() { - upb_stringsrc_uninit(&strsrc); - upb_decoder_uninit(&d); - upb_def_unref(UPB_UPCAST(def)); - upb_decoderplan_unref(p); - free(str); + def->Unref(&def); + plan->Unref(); } -static size_t run(int i) -{ - (void)i; - upb_status status = UPB_STATUS_INIT; +static size_t run(int i) { msg[i % NUM_MESSAGES].Clear(); - upb_stringsrc_reset(&strsrc, str, len); - upb_decoder_resetinput( - &d, upb_stringsrc_allbytes(&strsrc), &msg[i % NUM_MESSAGES]); - if (upb_decoder_decode(&d) != UPB_OK) goto err; + strsrc.Reset(str, len); + d.ResetInput(strsrc.AllBytes(), &msg[i % NUM_MESSAGES]); + if (d.Decode() != UPB_OK) goto err; return len; err: - fprintf(stderr, "Decode error: %s", upb_status_getstr(&status)); + fprintf(stderr, "Decode error: %s", d.status().GetString()); return 0; } diff --git a/benchmarks/parsetostruct.upb.c b/benchmarks/parsetostruct.upb.c deleted file mode 100644 index 9487577006..0000000000 --- a/benchmarks/parsetostruct.upb.c +++ /dev/null @@ -1,85 +0,0 @@ - -#include "main.c" - -#include "upb/bytestream.h" -#include "upb/def.h" -#include "upb/msg.h" -#include "upb/pb/decoder.h" -#include "upb/pb/glue.h" - -static const upb_msgdef *def; -static size_t len; -static void *msg[NUM_MESSAGES]; -static upb_stringsrc strsrc; -static upb_decoder d; -static upb_decoderplan *p; -char *str; - -static bool initialize() -{ - // Initialize upb state, decode descriptor. - upb_status status = UPB_STATUS_INIT; - upb_symtab *s = upb_symtab_new(); - upb_load_descriptor_file_into_symtab(s, MESSAGE_DESCRIPTOR_FILE, &status); - if(!upb_ok(&status)) { - fprintf(stderr, "Error reading descriptor: %s\n", - upb_status_getstr(&status)); - return false; - } - - def = upb_dyncast_msgdef_const(upb_symtab_lookup(s, MESSAGE_NAME)); - if(!def) { - fprintf(stderr, "Error finding symbol '%s'.\n", MESSAGE_NAME); - return false; - } - upb_symtab_unref(s); - - // Read the message data itself. - str = upb_readfile(MESSAGE_FILE, &len); - if(str == NULL) { - fprintf(stderr, "Error reading " MESSAGE_FILE "\n"); - return false; - } - upb_status_uninit(&status); - for (int i = 0; i < NUM_MESSAGES; i++) - msg[i] = upb_stdmsg_new(def); - - upb_stringsrc_init(&strsrc); - upb_handlers *h = upb_handlers_new(); - upb_accessors_reghandlers(h, def); - p = upb_decoderplan_new(h, JIT); - upb_decoder_init(&d); - upb_handlers_unref(h); - upb_decoder_resetplan(&d, p, 0); - - if (!BYREF) { - // TODO: use byref/byval accessors. - } - return true; -} - -static void cleanup() -{ - for (int i = 0; i < NUM_MESSAGES; i++) - upb_stdmsg_free(msg[i], def); - upb_def_unref(UPB_UPCAST(def)); - upb_stringsrc_uninit(&strsrc); - upb_decoder_uninit(&d); - upb_decoderplan_unref(p); - free(str); -} - -static size_t run(int i) -{ - upb_status status = UPB_STATUS_INIT; - i %= NUM_MESSAGES; - upb_msg_clear(msg[i], def); - upb_stringsrc_reset(&strsrc, str, len); - upb_decoder_resetinput(&d, upb_stringsrc_allbytes(&strsrc), msg[i]); - if (upb_decoder_decode(&d) != UPB_OK) goto err; - return len; - -err: - fprintf(stderr, "Decode error: %s", upb_status_getstr(&status)); - return 0; -} diff --git a/bindings/cpp/upb/bytestream.hpp b/bindings/cpp/upb/bytestream.hpp index 968d542c2a..81134b95a6 100644 --- a/bindings/cpp/upb/bytestream.hpp +++ b/bindings/cpp/upb/bytestream.hpp @@ -68,6 +68,7 @@ #include "upb/bytestream.h" #include "upb/upb.hpp" +#include namespace upb { @@ -204,6 +205,18 @@ class ByteRegion : public upb_byteregion { return upb_byteregion_strdup(this); } + template void AssignToString(T* str) { + uint64_t ofs = start_ofs(); + str->clear(); + str->reserve(Length()); + while (ofs < end_ofs()) { + size_t len; + const char *ptr = GetPtr(ofs, &len); + str->append(ptr, len); + ofs += len; + } + } + // TODO: add if/when there is a demonstrated need. // // // Pins this byteregion's bytes in memory, allowing it to outlive its @@ -220,12 +233,24 @@ class ByteRegion : public upb_byteregion { class StringSource : public upb_stringsrc { public: StringSource() : upb_stringsrc() { upb_stringsrc_init(this); } + template explicit StringSource(const T& str) { + upb_stringsrc_init(this); + Reset(str); + } + StringSource(const char *data, size_t len) { + upb_stringsrc_init(this); + Reset(data, len); + } ~StringSource() { upb_stringsrc_uninit(this); } void Reset(const char* data, size_t len) { upb_stringsrc_reset(this, data, len); } + template void Reset(const T& str) { + Reset(str.c_str(), str.size()); + } + ByteRegion* AllBytes() { return static_cast(upb_stringsrc_allbytes(this)); } @@ -233,6 +258,14 @@ class StringSource : public upb_stringsrc { upb_bytesrc* ByteSource() { return upb_stringsrc_bytesrc(this); } }; +template <> inline ByteRegion* GetValue(Value v) { + return static_cast(upb_value_getbyteregion(v)); +} + +template <> inline Value MakeValue(ByteRegion* v) { + return upb_value_byteregion(v); +} + } // namespace upb #endif diff --git a/bindings/cpp/upb/def.hpp b/bindings/cpp/upb/def.hpp index 030ba40e86..69986482c6 100644 --- a/bindings/cpp/upb/def.hpp +++ b/bindings/cpp/upb/def.hpp @@ -1,7 +1,7 @@ // // upb - a minimalist implementation of protocol buffers. // -// Copyright (c) 2011 Google Inc. See LICENSE for details. +// Copyright (c) 2011-2012 Google Inc. See LICENSE for details. // Author: Josh Haberman // // The set of upb::*Def classes and upb::SymbolTable allow for defining and @@ -15,21 +15,20 @@ // not be used for any purpose except to set its properties (it can't be // used to parse anything, create any messages in memory, etc). // -// 2. FINALIZED: after being added to a symtab (which links the defs together) -// the defs become finalized (thread-safe and immutable). Programs may only -// access defs through a CONST POINTER during this stage -- upb_symtab will -// help you out with this requirement by only vending const pointers, but -// you need to make sure not to use any non-const pointers you still have -// sitting around. In practice this means that you may not call any setters -// on the defs (or functions that themselves call the setters). If you want -// to modify an existing immutable def, copy it with upb_*_dup(), modify the -// copy, and add the modified def to the symtab (replacing the existing -// def). +// 2. FINALIZED: the Def::Finzlie() operation finalizes a set of defs, +// which makes them thread-safe and immutable. Finalized defs may only be +// accessed through a CONST POINTER. If you want to modify an existing +// immutable def, copy it with Dup() and modify and finalize the copy. // -// You can test for which stage of life a def is in by calling -// upb::Def::IsMutable(). This is particularly useful for dynamic language -// bindings, which must properly guarantee that the dynamic language cannot -// break the rules laid out above. +// The refcounting of defs works properly no matter what state the def is in. +// Once the def is finalized it is guaranteed that any def reachable from a +// live def is also live (so a ref on the base of a message tree keeps the +// whole tree alive). +// +// You can test for which stage of life a def is in by calling IsMutable(). +// This is particularly useful for dynamic language bindings, which must +// properly guarantee that the dynamic language cannot break the rules laid out +// above. // // It would be possible to make the defs thread-safe during stage 1 by using // mutexes internally and changing any methods returning pointers to return @@ -48,63 +47,213 @@ namespace upb { +class Def; class MessageDef; +typedef upb_fieldtype_t FieldType; +typedef upb_label_t Label; + class FieldDef : public upb_fielddef { public: - static FieldDef* Cast(upb_fielddef *f) { return (FieldDef*)f; } - static const FieldDef* Cast(const upb_fielddef *f) { return (FieldDef*)f; } + static FieldDef* Cast(upb_fielddef *f) { return static_cast(f); } + static const FieldDef* Cast(const upb_fielddef *f) { + return static_cast(f); + } + + static FieldDef* New(void *owner) { return Cast(upb_fielddef_new(owner)); } + FieldDef* Dup(void *owner) const { + return Cast(upb_fielddef_dup(this, owner)); + } + void Ref(void *owner) { upb_fielddef_ref(this, owner); } + void Unref(void *owner) { upb_fielddef_unref(this, owner); } - static FieldDef* New() { return Cast(upb_fielddef_new()); } - FieldDef* Dup() { return Cast(upb_fielddef_dup(this)); } + bool IsMutable() const { return upb_fielddef_ismutable(this); } + bool IsFinalized() const { return upb_fielddef_isfinalized(this); } + bool IsString() const { return upb_isstring(this); } + bool IsSequence() const { return upb_isseq(this); } + bool IsSubmessage() const { return upb_issubmsg(this); } - // Read accessors -- may be called at any time. - uint8_t type() const { return upb_fielddef_type(this); } - uint8_t label() const { return upb_fielddef_label(this); } + // Simple accessors. ///////////////////////////////////////////////////////// + + FieldType type() const { return upb_fielddef_type(this); } + Label label() const { return upb_fielddef_label(this); } int32_t number() const { return upb_fielddef_number(this); } std::string name() const { return std::string(upb_fielddef_name(this)); } Value default_() const { return upb_fielddef_default(this); } Value bound_value() const { return upb_fielddef_fval(this); } + uint16_t offset() const { return upb_fielddef_offset(this); } + int16_t hasbit() const { return upb_fielddef_hasbit(this); } + + bool set_type(FieldType type) { return upb_fielddef_settype(this, type); } + bool set_label(Label label) { return upb_fielddef_setlabel(this, label); } + void set_offset(uint16_t offset) { upb_fielddef_setoffset(this, offset); } + void set_hasbit(int16_t hasbit) { upb_fielddef_sethasbit(this, hasbit); } + void set_fval(Value fval) { upb_fielddef_setfval(this, fval); } + void set_accessor(struct _upb_accessor_vtbl* vtbl) { + upb_fielddef_setaccessor(this, vtbl); + } + MessageDef* message(); + const MessageDef* message() const; - MessageDef* message() { return (MessageDef*)upb_fielddef_msgdef(this); } - const MessageDef* message() const { return (MessageDef*)upb_fielddef_msgdef(this); } - - // Will be added once upb::Def is defined: - // Def* subdef() { return upb_fielddef_subdef(this); } - // const Def* subdef() { return upb_fielddef_subdef(this); } - - // Returns true if this FieldDef is finalized - bool IsFinalized() const { return upb_fielddef_finalized(this); } struct _upb_accessor_vtbl *accessor() const { return upb_fielddef_accessor(this); } - std::string type_name() const { - return std::string(upb_fielddef_typename(this)); + + // "Number" and "name" must be set before the fielddef is added to a msgdef. + // For the moment we do not allow these to be set once the fielddef is added + // to a msgdef -- this could be relaxed in the future. + bool set_number(int32_t number) { + return upb_fielddef_setnumber(this, number); + } + bool set_name(const char *name) { return upb_fielddef_setname(this, name); } + bool set_name(const std::string& name) { return set_name(name.c_str()); } + + // Default value. //////////////////////////////////////////////////////////// + + // Returns the default value for this fielddef, which may either be something + // the client set explicitly or the "default default" (0 for numbers, empty + // for strings). The field's type indicates the type of the returned value, + // except for enum fields that are still mutable. + // + // For enums the default can be set either numerically or symbolically -- the + // upb_fielddef_default_is_symbolic() function below will indicate which it + // is. For string defaults, the value will be a upb_byteregion which is + // invalidated by any other non-const call on this object. Once the fielddef + // is finalized, symbolic enum defaults are resolved, so finalized enum + // fielddefs always have a default of type int32. + Value defaultval() { return upb_fielddef_default(this); } + + // Sets default value for the field. For numeric types, use + // upb_fielddef_setdefault(), and "value" must match the type of the field. + // For string/bytes types, use upb_fielddef_setdefaultstr(). Enum types may + // use either, since the default may be set either numerically or + // symbolically. + // + // NOTE: May only be called for fields whose type has already been set. + // Also, will be reset to default if the field's type is set again. + void set_default(Value value) { upb_fielddef_setdefault(this, value); } + void set_default(const char *str) { upb_fielddef_setdefaultcstr(this, str); } + void set_default(const char *str, size_t len) { + upb_fielddef_setdefaultstr(this, str, len); + } + void set_default(const std::string& str) { + upb_fielddef_setdefaultstr(this, str.c_str(), str.size()); + } + + // The results of this function are only meaningful for mutable enum fields, + // which can have a default specified either as an integer or as a string. + // If this returns true, the default returned from upb_fielddef_default() is + // a string, otherwise it is an integer. + bool DefaultIsSymbolic() { return upb_fielddef_default_is_symbolic(this); } + + // Subdef. /////////////////////////////////////////////////////////////////// + + // Submessage and enum fields must reference a "subdef", which is the + // MessageDef or EnumDef that defines their type. Note that when the + // FieldDef is mutable it may not have a subdef *yet*, but this still returns + // true to indicate that the field's type requires a subdef. + bool HasSubDef() { return upb_hassubdef(this); } + + // Before a FieldDef is finalized, its subdef may be set either directly + // (with a Def*) or symbolically. Symbolic refs must be resolved by the + // client before the containing msgdef can be finalized. + // + // Both methods require that HasSubDef() (so the type must be set prior to + // calling these methods). Returns false if this is not the case, or if the + // given subdef is not of the correct type. The subtype is reset if the + // field's type is changed. + bool set_subdef(Def* def); + bool set_subtype_name(const char *name) { + return upb_fielddef_setsubtypename(this, name); + } + bool set_subtype_name(const std::string& str) { + return set_subtype_name(str.c_str()); } - // Write accessors -- may not be called once the FieldDef is finalized. + // Returns the enum or submessage def or symbolic name for this field, if + // any. May only be called for fields where HasSubDef() is true. Returns + // NULL if the subdef has not been set or if you ask for a subtype name when + // the subtype is currently set symbolically (or vice-versa). + // + // Caller does *not* own a ref on the returned def or string. + // subtypename_name() is non-const because only mutable defs can have the + // subtype name set symbolically (symbolic references must be resolved before + // the MessageDef can be finalized). + const Def* subdef() const; + const char *subtype_name() { return upb_fielddef_subtypename(this); } private: - FieldDef(); - ~FieldDef(); + UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(FieldDef); +}; + +class Def : public upb_def { + public: + // Converting from C types to C++ wrapper types. + static Def* Cast(upb_def *def) { return static_cast(def); } + static const Def* Cast(const upb_def *def) { + return static_cast(def); + } + + void Ref(void *owner) const { upb_def_ref(this, owner); } + void Unref(void *owner) const { upb_def_unref(this, owner); } + + void set_full_name(const char *name) { upb_def_setfullname(this, name); } + void set_full_name(const std::string& name) { + upb_def_setfullname(this, name.c_str()); + } + + const char *full_name() const { return upb_def_fullname(this); } + + // Finalizes the given list of defs (as well as the fielddefs for the given + // msgdefs). All defs reachable from any def in this list must either be + // already finalized or elsewhere in the list. Any symbolic references to + // enums or submessages must already have been resolved. Returns true on + // success, otherwise false is returned and status contains details. In the + // error case the input defs are unmodified. See the comment at the top of + // this file for the semantics of finalized defs. + // + // n is currently limited to 64k defs, if more are required break them into + // batches of 64k (or we could raise this limit, at the cost of a bigger + // upb_def structure or complexity in upb_def_finalize()). + static bool Finalize(Def*const* defs, int n, Status* status) { + return upb_finalize(reinterpret_cast(defs), n, status); + } + static bool Finalize(const std::vector& defs, Status* status) { + return Finalize(&defs[0], defs.size(), status); + } }; class MessageDef : public upb_msgdef { public: // Converting from C types to C++ wrapper types. - static MessageDef* Cast(upb_msgdef *md) { return (MessageDef*)md; } + static MessageDef* Cast(upb_msgdef *md) { + return static_cast(md); + } static const MessageDef* Cast(const upb_msgdef *md) { - return (MessageDef*)md; + return static_cast(md); + } + static MessageDef* DynamicCast(Def* def) { + return Cast(upb_dyncast_msgdef(def)); + } + static const MessageDef* DynamicCast(const Def* def) { + return Cast(upb_dyncast_msgdef_const(def)); } - static MessageDef* New() { return Cast(upb_msgdef_new()); } - MessageDef* Dup() { return Cast(upb_msgdef_dup(this)); } + Def* AsDef() { return Def::Cast(UPB_UPCAST(this)); } + const Def* AsDef() const { return Def::Cast(UPB_UPCAST(this)); } + + static MessageDef* New(void *owner) { return Cast(upb_msgdef_new(owner)); } + MessageDef* Dup(void *owner) const { + return Cast(upb_msgdef_dup(this, owner)); + } - void Ref() const { upb_msgdef_ref(this); } - void Unref() const { upb_msgdef_unref(this); } + void Ref(void *owner) const { upb_msgdef_ref(this, owner); } + void Unref(void *owner) const { upb_msgdef_unref(this, owner); } // Read accessors -- may be called at any time. + const char *full_name() const { return AsDef()->full_name(); } + // The total size of in-memory messages created with this MessageDef. uint16_t instance_size() const { return upb_msgdef_size(this); } @@ -116,25 +265,32 @@ class MessageDef : public upb_msgdef { // Write accessors. May only be called before the msgdef is in a symtab. + void set_full_name(const char *name) { AsDef()->set_full_name(name); } + void set_full_name(const std::string& name) { AsDef()->set_full_name(name); } + void set_instance_size(uint16_t size) { upb_msgdef_setsize(this, size); } void set_hasbit_bytes(uint16_t size) { upb_msgdef_setsize(this, size); } bool SetExtensionRange(uint32_t start, uint32_t end) { return upb_msgdef_setextrange(this, start, end); } - // Adds a set of fields (upb_fielddef objects) to a msgdef. Caller retains - // its ref on the fielddef. May only be done before the msgdef is in a - // symtab (requires upb_def_ismutable(m) for the msgdef). The fielddef's - // name and number must be set, and the message may not already contain any - // field with this name or number, and this fielddef may not be part of - // another message, otherwise false is returned and no action is performed. - bool AddFields(FieldDef*const * f, int n) { - return upb_msgdef_addfields(this, (upb_fielddef**)f, n); + // Adds a set of fields (FieldDef objects) to a MessageDef. Caller passes a + // ref on the FieldDef to the MessageDef in both success and failure cases. + // May only be done before the MessageDef is in a SymbolTable (requires + // m->IsMutable() for the MessageDef). The FieldDef's name and number must + // be set, and the message may not already contain any field with this name + // or number, and this FieldDef may not be part of another message, otherwise + // false is returned and the MessageDef is unchanged. + bool AddField(FieldDef* f, void *owner) { return AddFields(&f, 1, owner); } + bool AddFields(FieldDef*const * f, int n, void *owner) { + return upb_msgdef_addfields(this, (upb_fielddef*const*)f, n, owner); } - bool AddFields(const std::vector& fields) { - return AddFields(&fields[0], fields.size()); + bool AddFields(const std::vector& fields, void *owner) { + return AddFields(&fields[0], fields.size(), owner); } + int field_count() const { return upb_msgdef_numfields(this); } + // Lookup fields by name or number, returning NULL if no such field exists. FieldDef* FindFieldByName(const char *name) { return FieldDef::Cast(upb_msgdef_ntof(this, name)); @@ -156,19 +312,89 @@ class MessageDef : public upb_msgdef { return FindFieldByNumber(num); } - // TODO: iteration over fields. + class Iterator : public upb_msg_iter { + public: + explicit Iterator(MessageDef* md) { upb_msg_begin(this, md); } + Iterator() {} + + FieldDef* field() { return FieldDef::Cast(upb_msg_iter_field(this)); } + bool Done() { return upb_msg_done(this); } + void Next() { return upb_msg_next(this); } + }; + + class ConstIterator : public upb_msg_iter { + public: + explicit ConstIterator(const MessageDef* md) { upb_msg_begin(this, md); } + ConstIterator() {} + + const FieldDef* field() { return FieldDef::Cast(upb_msg_iter_field(this)); } + bool Done() { return upb_msg_done(this); } + void Next() { return upb_msg_next(this); } + }; private: - MessageDef(); - ~MessageDef(); + UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(MessageDef); +}; + +class EnumDef : public upb_enumdef { + public: + // Converting from C types to C++ wrapper types. + static EnumDef* Cast(upb_enumdef *e) { return static_cast(e); } + static const EnumDef* Cast(const upb_enumdef *e) { + return static_cast(e); + } + + static EnumDef* New(void *owner) { return Cast(upb_enumdef_new(owner)); } + + void Ref(void *owner) { upb_enumdef_ref(this, owner); } + void Unref(void *owner) { upb_enumdef_unref(this, owner); } + EnumDef* Dup(void *owner) const { return Cast(upb_enumdef_dup(this, owner)); } + + Def* AsDef() { return Def::Cast(UPB_UPCAST(this)); } + const Def* AsDef() const { return Def::Cast(UPB_UPCAST(this)); } + + int32_t default_value() const { return upb_enumdef_default(this); } + + // May only be set if IsMutable(). + void set_full_name(const char *name) { AsDef()->set_full_name(name); } + void set_full_name(const std::string& name) { AsDef()->set_full_name(name); } + void set_default_value(int32_t val) { + return upb_enumdef_setdefault(this, val); + } + + // Adds a value to the enumdef. Requires that no existing val has this + // name or number (returns false and does not add if there is). May only + // be called if IsMutable(). + bool AddValue(char *name, int32_t num) { + return upb_enumdef_addval(this, name, num); + } + bool AddValue(const std::string& name, int32_t num) { + return upb_enumdef_addval(this, name.c_str(), num); + } + + // Lookups from name to integer and vice-versa. + bool LookupName(const char *name, int32_t* num) const { + return upb_enumdef_ntoi(this, name, num); + } + + // Lookup from integer to name, returns a NULL-terminated string which + // the caller does not own, or NULL if not found. + const char *LookupNumber(int32_t num) const { + return upb_enumdef_iton(this, num); + } + + private: + UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(EnumDef); }; class SymbolTable : public upb_symtab { public: // Converting from C types to C++ wrapper types. - static SymbolTable* Cast(upb_symtab *s) { return (SymbolTable*)s; } + static SymbolTable* Cast(upb_symtab *s) { + return static_cast(s); + } static const SymbolTable* Cast(const upb_symtab *s) { - return (SymbolTable*)s; + return static_cast(s); } static SymbolTable* New() { return Cast(upb_symtab_new()); } @@ -176,17 +402,50 @@ class SymbolTable : public upb_symtab { void Ref() const { upb_symtab_unref(this); } void Unref() const { upb_symtab_unref(this); } + // Adds the given defs to the symtab, resolving all symbols. Only one def + // per name may be in the list, but defs can replace existing defs in the + // symtab. The entire operation either succeeds or fails. If the operation + // fails, the symtab is unchanged, false is returned, and status indicates + // the error. The caller passes a ref on the defs in all cases. + bool Add(Def *const *defs, int n, void *owner, Status* status) { + return upb_symtab_add(this, (upb_def*const*)defs, n, owner, status); + } + bool Add(const std::vector& defs, void *owner, Status* status) { + return Add(&defs[0], defs.size(), owner, status); + } + // If the given name refers to a message in this symbol table, returns a new // ref to that MessageDef object, otherwise returns NULL. - const MessageDef* LookupMessage(const char *name) const { - return MessageDef::Cast(upb_symtab_lookupmsg(this, name)); + const MessageDef* LookupMessage(const char *name, void *owner) const { + return MessageDef::Cast(upb_symtab_lookupmsg(this, name, owner)); } private: - SymbolTable(); - ~SymbolTable(); + UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(SymbolTable); }; +template <> inline const FieldDef* GetValue(Value v) { + return static_cast(upb_value_getfielddef(v)); +} + +template <> inline Value MakeValue(FieldDef* v) { + return upb_value_fielddef(v); +} + +inline MessageDef* FieldDef::message() { + return MessageDef::Cast(upb_fielddef_msgdef(this)); +} +inline const MessageDef* FieldDef::message() const { + return MessageDef::Cast(upb_fielddef_msgdef(this)); +} + +inline const Def* FieldDef::subdef() const { + return Def::Cast(upb_fielddef_subdef(this)); +} +inline bool FieldDef::set_subdef(Def* def) { + return upb_fielddef_setsubdef(this, def); +} + } // namespace upb #endif diff --git a/bindings/cpp/upb/handlers.cc b/bindings/cpp/upb/handlers.cc new file mode 100644 index 0000000000..c96a74e17f --- /dev/null +++ b/bindings/cpp/upb/handlers.cc @@ -0,0 +1,39 @@ +// +// upb - a minimalist implementation of protocol buffers. +// +// Copyright (c) 2011 Google Inc. See LICENSE for details. +// Author: Josh Haberman + +#include "handlers.hpp" + +#include "def.hpp" + +namespace upb { + +namespace { + +void MessageCallbackWrapper( + void* closure, upb_mhandlers* mh, const upb_msgdef* m) { + Handlers::MessageRegistrationVisitor* visitor = + static_cast(closure); + visitor->OnMessage(static_cast(mh), + static_cast(m)); +} + +void FieldCallbackWrapper( + void* closure, upb_fhandlers* fh, const upb_fielddef* f) { + Handlers::MessageRegistrationVisitor* visitor = + static_cast(closure); + visitor->OnField(static_cast(fh), + static_cast(f)); +} +} // namepace + +MessageHandlers* Handlers::RegisterMessageDef( + const MessageDef& m, Handlers::MessageRegistrationVisitor* visitor) { + upb_mhandlers* mh = upb_handlers_regmsgdef( + this, &m, &MessageCallbackWrapper, &FieldCallbackWrapper, &visitor); + return static_cast(mh); +} + +} // namespace upb diff --git a/bindings/cpp/upb/handlers.hpp b/bindings/cpp/upb/handlers.hpp index d356a33de3..a366c3d640 100644 --- a/bindings/cpp/upb/handlers.hpp +++ b/bindings/cpp/upb/handlers.hpp @@ -15,11 +15,16 @@ #include "upb/handlers.h" +#include "upb/upb.hpp" + namespace upb { typedef upb_fieldtype_t FieldType; typedef upb_flow_t Flow; +typedef upb_sflow_t SubFlow; class MessageHandlers; +class MessageDef; +class FieldDef; class FieldHandlers : public upb_fhandlers { public: @@ -68,12 +73,11 @@ class FieldHandlers : public upb_fhandlers { MessageHandlers* GetSubMessageHandlers() const; // If set to >=0, the given hasbit will be set after the value callback is // called (offset relative to the current closure). - int32_t GetValueHasbit() const { return upb_fhandlers_getvaluehasbit(this); } - void SetValueHasbit(int32_t bit) { upb_fhandlers_setvaluehasbit(this, bit); } + int32_t GetHasbit() const { return upb_fhandlers_gethasbit(this); } + void SetHasbit(int32_t bit) { upb_fhandlers_sethasbit(this, bit); } private: - FieldHandlers(); // Only created by upb::Handlers. - ~FieldHandlers(); // Only destroyed by refcounting. + UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(FieldHandlers); }; class MessageHandlers : public upb_mhandlers { @@ -81,6 +85,13 @@ class MessageHandlers : public upb_mhandlers { typedef upb_startmsg_handler StartMessageHandler; typedef upb_endmsg_handler EndMessageHandler; + static MessageHandlers* Cast(upb_mhandlers* mh) { + return static_cast(mh); + } + static const MessageHandlers* Cast(const upb_mhandlers* mh) { + return static_cast(mh); + } + // The MessageHandlers will live at least as long as the upb::Handlers to // which it belongs, but can be Ref'd/Unref'd to make it live longer (which // will prolong the life of the underlying upb::Handlers also). @@ -89,7 +100,7 @@ class MessageHandlers : public upb_mhandlers { // Functions to set this message's handlers. // These return "this" so they can be conveniently chained, eg. - // handlers->NewMessage() + // handlers->NewMessageHandlers() // ->SetStartMessageHandler(&StartMessage) // ->SetEndMessageHandler(&EndMessage); MessageHandlers* SetStartMessageHandler(StartMessageHandler* h) { @@ -111,13 +122,13 @@ class MessageHandlers : public upb_mhandlers { FieldHandlers* NewFieldHandlersForSubmessage(uint32_t n, const char *name, FieldType type, bool repeated, MessageHandlers* subm) { + (void)name; return static_cast( upb_mhandlers_newfhandlers_subm(this, n, type, repeated, subm)); } private: - MessageHandlers(); // Only created by upb::Handlers. - ~MessageHandlers(); // Only destroyed by refcounting. + UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(MessageHandlers); }; class Handlers : public upb_handlers { @@ -134,17 +145,29 @@ class Handlers : public upb_handlers { return static_cast(upb_handlers_newmhandlers(this)); } + // Convenience function for registering handlers for all messages and fields + // in a MessageDef and all its children. For every registered message, + // OnMessage will be called on the visitor with newly-created MessageHandlers + // and MessageDef. Likewise with OnField will be called with newly-created + // FieldHandlers and FieldDef for each field. + class MessageRegistrationVisitor { + public: + virtual ~MessageRegistrationVisitor() {} + virtual void OnMessage(MessageHandlers* mh, const MessageDef* m) = 0; + virtual void OnField(FieldHandlers* fh, const FieldDef* f) = 0; + }; + MessageHandlers* RegisterMessageDef(const MessageDef& m, + MessageRegistrationVisitor* visitor); + private: - Handlers(); // Only created by Handlers::New(). - ~Handlers(); // Only destroyed by refcounting. + UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(Handlers); }; - -MessageHandlers* FieldHandlers::GetMessageHandlers() const { +inline MessageHandlers* FieldHandlers::GetMessageHandlers() const { return static_cast(upb_fhandlers_getmsg(this)); } -MessageHandlers* FieldHandlers::GetSubMessageHandlers() const { +inline MessageHandlers* FieldHandlers::GetSubMessageHandlers() const { return static_cast(upb_fhandlers_getsubmsg(this)); } diff --git a/bindings/cpp/upb/msg.hpp b/bindings/cpp/upb/msg.hpp new file mode 100644 index 0000000000..c7cf1f2627 --- /dev/null +++ b/bindings/cpp/upb/msg.hpp @@ -0,0 +1,62 @@ +// +// upb - a minimalist implementation of protocol buffers. +// +// Copyright (c) 2011 Google Inc. See LICENSE for details. +// Author: Josh Haberman +// Routines for reading and writing message data to an in-memory structure, +// similar to a C struct. +// +// upb does not define one single message object that everyone must use. +// Rather it defines an abstract interface for reading and writing members +// of a message object, and all of the parsers and serializers use this +// abstract interface. This allows upb's parsers and serializers to be used +// regardless of what memory management scheme or synchronization model the +// application is using. +// +// A standard set of accessors is provided for doing simple reads and writes at +// a known offset into the message. These accessors should be used when +// possible, because they are specially optimized -- for example, the JIT can +// recognize them and emit specialized code instead of having to call the +// function at all. The application can substitute its own accessors when the +// standard accessors are not suitable. + +#ifndef UPB_MSG_HPP +#define UPB_MSG_HPP + +#include "upb/msg.h" +#include "upb/handlers.hpp" + +namespace upb { + +typedef upb_accessor_vtbl AccessorVTable; + +// Registers handlers for writing into a message of the given type using +// whatever accessors it has defined. +inline MessageHandlers* RegisterWriteHandlers(upb::Handlers* handlers, + const upb::MessageDef* md) { + return MessageHandlers::Cast( + upb_accessors_reghandlers(handlers, md)); +} + +template static FieldHandlers::ValueHandler* GetValueHandler(); + +// A handy templated function that will retrieve a value handler for a given +// C++ type. +#define GET_VALUE_HANDLER(type, ctype) \ + template <> \ + FieldHandlers::ValueHandler* GetValueHandler() { \ + return &upb_stdmsg_set ## type; \ + } + +GET_VALUE_HANDLER(double, double); +GET_VALUE_HANDLER(float, float); +GET_VALUE_HANDLER(uint64, uint64_t); +GET_VALUE_HANDLER(uint32, uint32_t); +GET_VALUE_HANDLER(int64, int64_t); +GET_VALUE_HANDLER(int32, int32_t); +GET_VALUE_HANDLER(bool, bool); +#undef GET_VALUE_HANDLER + +} // namespace + +#endif diff --git a/bindings/cpp/upb/pb/glue.hpp b/bindings/cpp/upb/pb/glue.hpp index be072a7605..d43baeb933 100644 --- a/bindings/cpp/upb/pb/glue.hpp +++ b/bindings/cpp/upb/pb/glue.hpp @@ -13,11 +13,23 @@ namespace upb { +// All routines that load descriptors expect the descriptor to be a +// FileDescriptorSet. bool LoadDescriptorFileIntoSymtab(SymbolTable* s, const char *fname, Status* status) { return upb_load_descriptor_file_into_symtab(s, fname, status); } +bool LoadDescriptorIntoSymtab(SymbolTable* s, const char* str, + size_t len, Status* status) { + return upb_load_descriptor_into_symtab(s, str, len, status); +} + +template +bool LoadDescriptorIntoSymtab(SymbolTable* s, const T& desc, Status* status) { + return upb_load_descriptor_into_symtab(s, desc.c_str(), desc.size(), status); +} + } // namespace upb #endif diff --git a/bindings/cpp/upb/proto2_bridge.cc b/bindings/cpp/upb/proto2_bridge.cc new file mode 100644 index 0000000000..6119295fa4 --- /dev/null +++ b/bindings/cpp/upb/proto2_bridge.cc @@ -0,0 +1,892 @@ +// +// upb - a minimalist implementation of protocol buffers. +// +// Copyright (c) 2011-2012 Google Inc. See LICENSE for details. +// Author: Josh Haberman + +#include +#include +#include "upb/bytestream.hpp" +#include "upb/def.hpp" +#include "upb/handlers.hpp" +#include "upb/msg.hpp" +#include "upb/proto2_bridge.hpp" + +namespace { + +static void* GetFieldPointer(void *message, const upb::FieldDef* f) { + return static_cast(message) + f->offset(); +} + +} // namespace + +#ifdef UPB_GOOGLE3 + +// TODO(haberman): friend upb so that this isn't required. +#define protected public +#include "net/proto2/public/repeated_field.h" +#undef private + +#define private public +#include "net/proto/proto2_reflection.h" +#undef private + +#include "net/proto2/proto/descriptor.pb.h" +#include "net/proto2/public/descriptor.h" +#include "net/proto2/public/generated_message_reflection.h" +#include "net/proto2/public/lazy_field.h" +#include "net/proto2/public/message.h" +#include "net/proto2/public/string_piece_field_support.h" +#include "net/proto/internal_layout.h" +#include "strings/cord.h" +using ::proto2::Descriptor; +using ::proto2::EnumDescriptor; +using ::proto2::EnumValueDescriptor; +using ::proto2::FieldDescriptor; +using ::proto2::FieldOptions; +using ::proto2::FileDescriptor; +using ::proto2::internal::GeneratedMessageReflection; +using ::proto2::internal::RepeatedPtrFieldBase; +using ::proto2::internal::StringPieceField; +using ::proto2::Message; +using ::proto2::MessageFactory; +using ::proto2::Reflection; +using ::proto2::RepeatedField; +using ::proto2::RepeatedPtrField; + +namespace upb { + +static const Message* GetPrototypeForField(const Message& m, + const FieldDescriptor* f); + +namespace proto2_bridge_google3 { class FieldAccessor; } + +using ::upb::proto2_bridge_google3::FieldAccessor; + +namespace proto2_bridge_google3 { + +static void AssignToCord(const ByteRegion* r, Cord* cord) { + // TODO(haberman): ref source data if source is a cord. + cord->Clear(); + uint64_t ofs = r->start_ofs(); + while (ofs < r->end_ofs()) { + size_t len; + const char *buf = r->GetPtr(ofs, &len); + cord->Append(StringPiece(buf, len)); + ofs += len; + } +} + +#else + +// TODO(haberman): friend upb so that this isn't required. +#define protected public +#include "google/protobuf/repeated_field.h" +#undef protected + +#define private public +#include "google/protobuf/generated_message_reflection.h" +#undef private + +#include "google/protobuf/descriptor.h" +#include "google/protobuf/descriptor.pb.h" +#include "google/protobuf/message.h" +using ::google::protobuf::Descriptor; +using ::google::protobuf::EnumDescriptor; +using ::google::protobuf::EnumValueDescriptor; +using ::google::protobuf::FieldDescriptor; +using ::google::protobuf::FieldOptions; +using ::google::protobuf::FileDescriptor; +using ::google::protobuf::internal::GeneratedMessageReflection; +using ::google::protobuf::internal::RepeatedPtrFieldBase; +using ::google::protobuf::Message; +using ::google::protobuf::MessageFactory; +using ::google::protobuf::Reflection; +using ::google::protobuf::RepeatedField; +using ::google::protobuf::RepeatedPtrField; + +namespace upb { +static const Message* GetPrototypeForField(const Message& m, + const FieldDescriptor* f); + +namespace proto2_bridge_opensource { class FieldAccessor; } + +using ::upb::proto2_bridge_opensource::FieldAccessor; + +namespace proto2_bridge_opensource { + +#endif // ifdef UPB_GOOGLE3 + +// Have to define this manually since older versions of proto2 didn't define +// an enum value for STRING. +#define UPB_CTYPE_STRING 0 + +// The code in this class depends on the internal representation of the proto2 +// generated classes, which is an internal implementation detail of proto2 and +// is not a public interface. As a result, this class's implementation may +// need to be changed if/when proto2 changes its internal representation. It +// is intended that this class is the only code that depends on these internal, +// non-public interfaces. +// +// This class only works with messages that use GeneratedMessageReflection. +// Other reflection classes will need other accessor implementations. +class FieldAccessor { + public: + // Returns true if we were able to set an accessor and any other properties + // of the FieldDef that are necessary to read/write this field to a + // proto2::Message. + static bool TrySet(const FieldDescriptor* proto2_f, + const upb::MessageDef* md, + upb::FieldDef* upb_f) { + const Message* prototype = static_cast(md->prototype); + const Reflection* base_r = prototype->GetReflection(); + const GeneratedMessageReflection* r = + dynamic_cast(base_r); + // Old versions of the open-source protobuf release erroneously default to + // Cord even though that has never been supported in the open-source + // release. + int32_t ctype = proto2_f->options().has_ctype() ? + proto2_f->options().ctype() : UPB_CTYPE_STRING; + if (!r) return false; + // Extensions not supported yet. + if (proto2_f->is_extension()) return false; + + upb_f->set_accessor(GetForFieldDescriptor(proto2_f, ctype)); + upb_f->set_hasbit(GetHasbit(proto2_f, r)); + upb_f->set_offset(GetOffset(proto2_f, r)); + if (upb_f->IsSubmessage()) { + upb_f->set_subtype_name(proto2_f->message_type()->full_name()); + upb_f->prototype = GetPrototypeForField(*prototype, proto2_f); + } + + if (upb_f->IsString() && !upb_f->IsSequence() && + ctype == UPB_CTYPE_STRING) { + upb_f->prototype = &r->GetStringReference(*prototype, proto2_f, NULL); + } + return true; + } + + static MessageFactory* GetMessageFactory(const Message& m) { + const GeneratedMessageReflection* r = + dynamic_cast(m.GetReflection()); + return r ? r->message_factory_ : NULL; + } + + private: + static int64_t GetHasbit(const FieldDescriptor* f, + const GeneratedMessageReflection* r) { + if (f->is_repeated()) { + // proto2 does not store hasbits for repeated fields. + return -1; + } else { + return (r->has_bits_offset_ * 8) + f->index(); + } + } + + static uint16_t GetOffset(const FieldDescriptor* f, + const GeneratedMessageReflection* r) { + return r->offsets_[f->index()]; + } + + static AccessorVTable *GetForFieldDescriptor(const FieldDescriptor* f, + int32_t ctype) { + switch (f->cpp_type()) { + case FieldDescriptor::CPPTYPE_ENUM: + // Should handlers validate enum membership to match proto2? + case FieldDescriptor::CPPTYPE_INT32: return Get(); + case FieldDescriptor::CPPTYPE_INT64: return Get(); + case FieldDescriptor::CPPTYPE_UINT32: return Get(); + case FieldDescriptor::CPPTYPE_UINT64: return Get(); + case FieldDescriptor::CPPTYPE_DOUBLE: return Get(); + case FieldDescriptor::CPPTYPE_FLOAT: return Get(); + case FieldDescriptor::CPPTYPE_BOOL: return Get(); + case FieldDescriptor::CPPTYPE_STRING: + switch (ctype) { +#ifdef UPB_GOOGLE3 + case FieldOptions::STRING: + return GetForString(); + case FieldOptions::CORD: + return GetForCord(); + case FieldOptions::STRING_PIECE: + return GetForStringPiece(); +#else + case UPB_CTYPE_STRING: + return GetForString(); +#endif + default: return NULL; + } + case FieldDescriptor::CPPTYPE_MESSAGE: +#ifdef UPB_GOOGLE3 + if (f->options().lazy()) { + return NULL; // Not yet implemented. + } else { + return GetForMessage(); + } +#else + return GetForMessage(); +#endif + default: return NULL; + } + } + + // PushOffset handler (used for StartSequence and others) /////////////////// + + static SubFlow PushOffset(void *m, Value fval) { + const FieldDef *f = GetValue(fval); + return UPB_CONTINUE_WITH(GetFieldPointer(m, f)); + } + + // Primitive Value (numeric, enum, bool) ///////////////////////////////////// + + template static AccessorVTable *Get() { + static upb_accessor_vtbl vtbl = { + NULL, // StartSubMessage handler + GetValueHandler(), + &PushOffset, // StartSequence handler + NULL, // StartRepeatedSubMessage handler + &Append, + NULL, NULL, NULL, NULL, NULL, NULL}; + return &vtbl; + } + + template + static Flow Append(void *_r, Value fval, Value val) { + (void)fval; + RepeatedField* r = static_cast*>(_r); + r->Add(GetValue(val)); + return UPB_CONTINUE; + } + + // String //////////////////////////////////////////////////////////////////// + + template static AccessorVTable *GetForString() { + static upb_accessor_vtbl vtbl = { + NULL, // StartSubMessage handler + &SetString, + &PushOffset, // StartSequence handler + NULL, // StartRepeatedSubMessage handler + &AppendString, + NULL, NULL, NULL, NULL, NULL, NULL}; + return &vtbl; + } + + // This needs to be templated because google3 string is not std::string. + template static Flow SetString(void *m, Value fval, Value val) { + const FieldDef* f = GetValue(fval); + T **str = static_cast(GetFieldPointer(m, f)); + // If it points to the default instance, we must create a new instance. + if (*str == f->prototype) *str = new T(); + GetValue(val)->AssignToString(*str); + return UPB_CONTINUE; + } + + template + static Flow AppendString(void *_r, Value fval, Value val) { + (void)fval; + RepeatedPtrField* r = static_cast*>(_r); + GetValue(val)->AssignToString(r->Add()); + return UPB_CONTINUE; + } + + // SubMessage //////////////////////////////////////////////////////////////// + + static AccessorVTable *GetForMessage() { + static upb_accessor_vtbl vtbl = { + &StartSubMessage, + NULL, // Value handler + &PushOffset, // StartSequence handler + &StartRepeatedSubMessage, + NULL, // Repeated value handler + NULL, NULL, NULL, NULL, NULL, NULL}; + return &vtbl; + } + + static SubFlow StartSubMessage(void *m, Value fval) { + const FieldDef* f = GetValue(fval); + void **subm = static_cast(GetFieldPointer(m, f)); + if (*subm == NULL || *subm == f->prototype) { + const Message* prototype = static_cast(f->prototype); + *subm = prototype->New(); + } + return UPB_CONTINUE_WITH(*subm); + } + + class RepeatedMessageTypeHandler { + public: + typedef void Type; + // AddAllocated() calls this, but only if other objects are sitting + // around waiting for reuse, which we will not do. + static void Delete(Type* t) { + (void)t; + assert(false); + } + }; + + // Closure is a RepeatedPtrField*, but we access it through + // its base class RepeatedPtrFieldBase*. + static SubFlow StartRepeatedSubMessage(void* _r, Value fval) { + const FieldDef* f = GetValue(fval); + RepeatedPtrFieldBase *r = static_cast(_r); + void *submsg = r->AddFromCleared(); + if (!submsg) { + const Message* prototype = static_cast(f->prototype); + submsg = prototype->New(); + r->AddAllocated(submsg); + } + return UPB_CONTINUE_WITH(submsg); + } + + // TODO(haberman): handle Extensions, Unknown Fields. + +#ifdef UPB_GOOGLE3 + // Handlers for types/features only included in internal proto2 release: + // Cord, StringPiece, LazyField, and MessageSet. + // TODO(haberman): LazyField, MessageSet. + + // Cord ////////////////////////////////////////////////////////////////////// + + static AccessorVTable *GetForCord() { + static upb_accessor_vtbl vtbl = { + NULL, // StartSubMessage handler + &SetCord, + &PushOffset, // StartSequence handler + NULL, // StartRepeatedSubMessage handler + &AppendCord, + NULL, NULL, NULL, NULL, NULL, NULL}; + return &vtbl; + } + + static Flow SetCord(void *m, Value fval, Value val) { + const FieldDef* f = GetValue(fval); + Cord* field = static_cast(GetFieldPointer(m, f)); + AssignToCord(GetValue(val), field); + return UPB_CONTINUE; + } + + static Flow AppendCord(void *_r, Value fval, Value val) { + RepeatedField* r = static_cast*>(_r); + AssignToCord(GetValue(val), r->Add()); + return UPB_CONTINUE; + } + + // StringPiece /////////////////////////////////////////////////////////////// + + static AccessorVTable *GetForStringPiece() { + static upb_accessor_vtbl vtbl = { + NULL, // StartSubMessage handler + &SetStringPiece, + &PushOffset, // StartSequence handler + NULL, // StartRepeatedSubMessage handler + &AppendStringPiece, + NULL, NULL, NULL, NULL, NULL, NULL}; + return &vtbl; + } + + static void AssignToStringPieceField(const ByteRegion* r, + proto2::internal::StringPieceField* f) { + // TODO(haberman): alias if possible and enabled on the input stream. + // TODO(haberman): add a method to StringPieceField that lets us avoid + // this copy/malloc/free. + char *data = new char[r->Length()]; + r->Copy(r->start_ofs(), r->Length(), data); + f->CopyFrom(StringPiece(data, r->Length())); + delete[] data; + } + + static Flow SetStringPiece(void *m, Value fval, Value val) { + const FieldDef* f = GetValue(fval); + StringPieceField* field = + static_cast(GetFieldPointer(m, f)); + AssignToStringPieceField(GetValue(val), field); + return UPB_CONTINUE; + } + + static Flow AppendStringPiece(void* _r, Value fval, Value val) { + RepeatedPtrField* r = + static_cast*>(_r); + AssignToStringPieceField(GetValue(val), r->Add()); + return UPB_CONTINUE; + } + +#endif // UPB_GOOGLE3 +}; + +#ifdef UPB_GOOGLE3 + +// Proto1 accessor -- only needed inside Google. +class Proto1FieldAccessor { + public: + // Returns true if we were able to set an accessor and any other properties + // of the FieldDef that are necessary to read/write this field to a + // proto2::Message. + static bool TrySet(const FieldDescriptor* proto2_f, + const upb::MessageDef* md, + upb::FieldDef* upb_f) { + const Message* m = static_cast(md->prototype); + const proto2::Reflection* base_r = m->GetReflection(); + const _pi::Proto2Reflection* r = + dynamic_cast(base_r); + if (!r) return false; + // Extensions not supported yet. + if (proto2_f->is_extension()) return false; + + const _pi::Field* f = r->GetFieldLayout(proto2_f); + + if (f->crep == _pi::CREP_OPTIONAL_FOREIGN_WEAK) { + // Override the BYTES type that proto2 descriptors have for weak fields. + upb_f->set_type(UPB_TYPE(MESSAGE)); + } + + if (upb_f->IsSubmessage()) { + const Message* prototype = upb::GetPrototypeForField(*m, proto2_f); + upb_f->set_subtype_name(prototype->GetDescriptor()->full_name()); + upb_f->prototype = prototype; + } + + upb_f->set_accessor(GetForCrep(f->crep)); + upb_f->set_hasbit(GetHasbit(proto2_f, r)); + upb_f->set_offset(GetOffset(proto2_f, r)); + return true; + } + + private: + static int16_t GetHasbit(const FieldDescriptor* f, + const _pi::Proto2Reflection* r) { + if (f->is_repeated()) { + // proto1 does not store hasbits for repeated fields. + return -1; + } else { + return (r->layout_->has_bit_offset * 8) + r->GetFieldLayout(f)->has_index; + } + } + + static uint16_t GetOffset(const FieldDescriptor* f, + const _pi::Proto2Reflection* r) { + return r->GetFieldLayout(f)->offset; + } + + static AccessorVTable *GetForCrep(int crep) { +#define PRIMITIVE(name, type_name) \ + case _pi::CREP_REQUIRED_ ## name: \ + case _pi::CREP_OPTIONAL_ ## name: \ + case _pi::CREP_REPEATED_ ## name: return Get(); + + switch (crep) { + PRIMITIVE(DOUBLE, double); + PRIMITIVE(FLOAT, float); + PRIMITIVE(INT64, int64_t); + PRIMITIVE(UINT64, uint64_t); + PRIMITIVE(INT32, int32_t); + PRIMITIVE(FIXED64, uint64_t); + PRIMITIVE(FIXED32, uint32_t); + PRIMITIVE(BOOL, bool); + case _pi::CREP_REQUIRED_STRING: + case _pi::CREP_OPTIONAL_STRING: + case _pi::CREP_REPEATED_STRING: return GetForString(); + case _pi::CREP_OPTIONAL_OUTOFLINE_STRING: return GetForOutOfLineString(); + case _pi::CREP_REQUIRED_CORD: + case _pi::CREP_OPTIONAL_CORD: + case _pi::CREP_REPEATED_CORD: return GetForCord(); + case _pi::CREP_REQUIRED_GROUP: + case _pi::CREP_REQUIRED_FOREIGN: + case _pi::CREP_REQUIRED_FOREIGN_PROTO2: return GetForRequiredMessage(); + case _pi::CREP_OPTIONAL_GROUP: + case _pi::CREP_REPEATED_GROUP: + case _pi::CREP_OPTIONAL_FOREIGN: + case _pi::CREP_REPEATED_FOREIGN: + case _pi::CREP_OPTIONAL_FOREIGN_PROTO2: + case _pi::CREP_REPEATED_FOREIGN_PROTO2: return GetForMessage(); + case _pi::CREP_OPTIONAL_FOREIGN_WEAK: return GetForWeakMessage(); + default: assert(false); return NULL; + } +#undef PRIMITIVE + } + + // PushOffset handler (used for StartSequence and others) /////////////////// + + // We can find a RepeatedField* or a RepeatedPtrField* at f->offset(). + static SubFlow PushOffset(void *m, Value fval) { + const FieldDef *f = GetValue(fval); + return UPB_CONTINUE_WITH(GetFieldPointer(m, f)); + } + + // Primitive Value (numeric, enum, bool) ///////////////////////////////////// + + template static AccessorVTable *Get() { + static upb_accessor_vtbl vtbl = { + NULL, // StartSubMessage handler + GetValueHandler(), + &PushOffset, // StartSequence handler + NULL, // StartRepeatedSubMessage handler + &Append, + NULL, NULL, NULL, NULL, NULL, NULL}; + return &vtbl; + } + + template + static Flow Append(void *_r, Value fval, Value val) { + (void)fval; + // Proto1's ProtoArray class derives from RepeatedField. + RepeatedField* r = static_cast*>(_r); + r->Add(GetValue(val)); + return UPB_CONTINUE; + } + + // String //////////////////////////////////////////////////////////////////// + + static AccessorVTable *GetForString() { + static upb_accessor_vtbl vtbl = { + NULL, // StartSubMessage handler + &SetString, + &PushOffset, // StartSequence handler + NULL, // StartRepeatedSubMessage handler + &AppendString, + NULL, NULL, NULL, NULL, NULL, NULL}; + return &vtbl; + } + + static Flow SetString(void *m, Value fval, Value val) { + const FieldDef* f = GetValue(fval); + string *str = static_cast(GetFieldPointer(m, f)); + GetValue(val)->AssignToString(str); + return UPB_CONTINUE; + } + + static Flow AppendString(void *_r, Value fval, Value val) { + (void)fval; + RepeatedPtrField* r = static_cast*>(_r); + GetValue(val)->AssignToString(r->Add()); + return UPB_CONTINUE; + } + + // Out-of-line string //////////////////////////////////////////////////////// + + static AccessorVTable *GetForOutOfLineString() { + static upb_accessor_vtbl vtbl = { + NULL, &SetOutOfLineString, + // This type is only used for non-repeated string fields. + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}; + return &vtbl; + } + + static Flow SetOutOfLineString(void *m, Value fval, Value val) { + const FieldDef* f = GetValue(fval); + string **str = static_cast(GetFieldPointer(m, f)); + if (*str == &::ProtocolMessage::___empty_internal_proto_string_) + *str = new string(); + GetValue(val)->AssignToString(*str); + return UPB_CONTINUE; + } + + // Cord ////////////////////////////////////////////////////////////////////// + + static AccessorVTable *GetForCord() { + static upb_accessor_vtbl vtbl = { + NULL, // StartSubMessage handler + &SetCord, + &PushOffset, // StartSequence handler + NULL, // StartRepeatedSubMessage handler + &AppendCord, + NULL, NULL, NULL, NULL, NULL, NULL}; + return &vtbl; + } + + static Flow SetCord(void *m, Value fval, Value val) { + const FieldDef* f = GetValue(fval); + Cord* field = static_cast(GetFieldPointer(m, f)); + AssignToCord(GetValue(val), field); + return UPB_CONTINUE; + } + + static Flow AppendCord(void *_r, Value fval, Value val) { + RepeatedField* r = static_cast*>(_r); + AssignToCord(GetValue(val), r->Add()); + return UPB_CONTINUE; + } + + // SubMessage //////////////////////////////////////////////////////////////// + + static AccessorVTable *GetForRequiredMessage() { + static upb_accessor_vtbl vtbl = { + &PushOffset, // StartSubMessage handler + NULL, // Value handler + &PushOffset, // StartSequence handler + &StartRepeatedSubMessage, + NULL, // Repeated value handler + NULL, NULL, NULL, NULL, NULL, NULL}; + return &vtbl; + } + + static AccessorVTable *GetForWeakMessage() { + static upb_accessor_vtbl vtbl = { + &StartWeakSubMessage, // StartSubMessage handler + NULL, // Value handler + &PushOffset, // StartSequence handler + &StartRepeatedSubMessage, + NULL, // Repeated value handler + NULL, NULL, NULL, NULL, NULL, NULL}; + return &vtbl; + } + + static AccessorVTable *GetForMessage() { + static upb_accessor_vtbl vtbl = { + &StartSubMessage, + NULL, // Value handler + &PushOffset, // StartSequence handler + &StartRepeatedSubMessage, + NULL, // Repeated value handler + NULL, NULL, NULL, NULL, NULL, NULL}; + return &vtbl; + } + + static SubFlow StartSubMessage(void *m, Value fval) { + const FieldDef* f = GetValue(fval); + Message **subm = static_cast(GetFieldPointer(m, f)); + if (*subm == f->prototype) *subm = (*subm)->New(); + return UPB_CONTINUE_WITH(*subm); + } + + static SubFlow StartWeakSubMessage(void *m, Value fval) { + const FieldDef* f = GetValue(fval); + Message **subm = static_cast(GetFieldPointer(m, f)); + if (*subm == NULL) { + const Message* prototype = static_cast(f->prototype); + *subm = prototype->New(); + } + return UPB_CONTINUE_WITH(*subm); + } + + class RepeatedMessageTypeHandler { + public: + typedef void Type; + // AddAllocated() calls this, but only if other objects are sitting + // around waiting for reuse, which we will not do. + static void Delete(Type* t) { + (void)t; + assert(false); + } + }; + + // Closure is a RepeatedPtrField*, but we access it through + // its base class RepeatedPtrFieldBase*. + static SubFlow StartRepeatedSubMessage(void* _r, Value fval) { + const FieldDef* f = GetValue(fval); + RepeatedPtrFieldBase *r = static_cast(_r); + void *submsg = r->AddFromCleared(); + if (!submsg) { + const Message* prototype = static_cast(f->prototype); + submsg = prototype->New(); + r->AddAllocated(submsg); + } + return UPB_CONTINUE_WITH(submsg); + } +}; + +#endif + +} // namespace proto2_bridge_{google3,opensource} + +static const Message* GetPrototypeForMessage(const Message& m) { + const Message* ret = NULL; + MessageFactory* factory = FieldAccessor::GetMessageFactory(m); + if (factory) { + // proto2 generated message or DynamicMessage. + ret = factory->GetPrototype(m.GetDescriptor()); + assert(ret); + } else { + // Proto1 message; since proto1 has no dynamic message, it must be + // from the generated factory. + ret = MessageFactory::generated_factory()->GetPrototype(m.GetDescriptor()); + assert(ret); // If NULL, then wasn't a proto1 message, can't handle it. + } + assert(ret->GetReflection() == m.GetReflection()); + return ret; +} + +static const Message* GetPrototypeForField(const Message& m, + const FieldDescriptor* f) { +#ifdef UPB_GOOGLE3 + if (f->type() == FieldDescriptor::TYPE_BYTES) { + // Proto1 weak field: the proto2 descriptor says their type is BYTES. + const _pi::Proto2Reflection* r = + dynamic_cast(m.GetReflection()); + assert(r); + const _pi::Field* field = r->GetFieldLayout(f); + assert(field->crep == _pi::CREP_OPTIONAL_FOREIGN_WEAK); + return GetPrototypeForMessage( + *static_cast(field->weak_layout()->default_instance)); + } else if (dynamic_cast(m.GetReflection())) { + // Proto1 message; since proto1 has no dynamic message, it must be from + // the generated factory. + const Message* ret = + MessageFactory::generated_factory()->GetPrototype(f->message_type()); + assert(ret); + return ret; + } +#endif + assert(f->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE); + // We assume that all submessages (and extensions) will be constructed using + // the same MessageFactory as this message. This doesn't cover the case of + // CodedInputStream::SetExtensionRegistry(). + MessageFactory* factory = FieldAccessor::GetMessageFactory(m); + assert(factory); // If neither proto1 nor proto2 we can't handle it. + const Message* ret = factory->GetPrototype(f->message_type()); + assert(ret); + return ret; +} + +namespace proto2_bridge { + +upb::FieldDef* AddFieldDef(const FieldDescriptor* f, upb::MessageDef* md) { + upb::FieldDef* upb_f = upb::FieldDef::New(&upb_f); + upb_f->set_number(f->number()); + upb_f->set_name(f->name()); + upb_f->set_label(static_cast(f->label())); + upb_f->set_type(static_cast(f->type())); + + if (!FieldAccessor::TrySet(f, md, upb_f) +#ifdef UPB_GOOGLE3 + && !proto2_bridge_google3::Proto1FieldAccessor::TrySet(f, md, upb_f) +#endif + ) { + // Unsupported reflection class. + assert(false); + } + + if (upb_f->type() == UPB_TYPE(ENUM)) { + // We set the enum default symbolically. + upb_f->set_default(f->default_value_enum()->name()); + upb_f->set_subtype_name(f->enum_type()->full_name()); + } else { + // Set field default for primitive types. Need to switch on the upb type + // rather than the proto2 type, because upb_f->type() may have been changed + // from BYTES to MESSAGE for a weak field. + switch (upb_types[upb_f->type()].inmemory_type) { + case UPB_CTYPE_INT32: + upb_f->set_default(MakeValue(f->default_value_int32())); + break; + case UPB_CTYPE_INT64: + upb_f->set_default( + MakeValue(static_cast(f->default_value_int64()))); + break; + case UPB_CTYPE_UINT32: + upb_f->set_default(MakeValue(f->default_value_uint32())); + break; + case UPB_CTYPE_UINT64: + upb_f->set_default( + MakeValue(static_cast(f->default_value_uint64()))); + break; + case UPB_CTYPE_DOUBLE: + upb_f->set_default(MakeValue(f->default_value_double())); + break; + case UPB_CTYPE_FLOAT: + upb_f->set_default(MakeValue(f->default_value_float())); + break; + case UPB_CTYPE_BOOL: + upb_f->set_default(MakeValue(f->default_value_bool())); + break; + case UPB_CTYPE_BYTEREGION: + upb_f->set_default(f->default_value_string()); + break; + } + } + return md->AddField(upb_f, &upb_f) ? upb_f : NULL; +} + +upb::MessageDef *NewEmptyMessageDef(const Message& m, void *owner) { + upb::MessageDef *md = upb::MessageDef::New(owner); + md->set_full_name(m.GetDescriptor()->full_name()); + md->prototype = GetPrototypeForMessage(m); + return md; +} + +upb::EnumDef* NewEnumDef(const EnumDescriptor* desc, void *owner) { + upb::EnumDef* e = upb::EnumDef::New(owner); + e->set_full_name(desc->full_name()); + for (int i = 0; i < desc->value_count(); i++) { + const EnumValueDescriptor* val = desc->value(i); + bool success = e->AddValue(val->name(), val->number()); + assert(success); + (void)success; + } + return e; +} + +void AddAllFields(upb::MessageDef* md) { + const Descriptor* d = + static_cast(md->prototype)->GetDescriptor(); + for (int i = 0; i < d->field_count(); i++) { +#ifdef UPB_GOOGLE3 + // Skip lazy fields for now since we can't properly handle them. + if (d->field(i)->options().lazy()) continue; +#endif + // Extensions not supported yet. + if (d->field(i)->is_extension()) continue; + AddFieldDef(d->field(i), md); + } +} + +upb::MessageDef *NewFullMessageDef(const Message& m, void *owner) { + upb::MessageDef* md = NewEmptyMessageDef(m, owner); + AddAllFields(md); + // TODO(haberman): add unknown field handler and extensions. + return md; +} + +typedef std::map SymbolMap; + +static upb::MessageDef* NewFinalMessageDefHelper(const Message& m, void *owner, + SymbolMap* symbols) { + upb::MessageDef* md = NewFullMessageDef(m, owner); + // Must do this before processing submessages to prevent infinite recursion. + (*symbols)[std::string(md->full_name())] = md->AsDef(); + + for (upb::MessageDef::Iterator i(md); !i.Done(); i.Next()) { + upb::FieldDef* f = i.field(); + if (!f->HasSubDef()) continue; + SymbolMap::iterator iter = symbols->find(f->subtype_name()); + upb::Def* subdef; + if (iter != symbols->end()) { + subdef = iter->second; + } else { + const FieldDescriptor* proto2_f = + m.GetDescriptor()->FindFieldByNumber(f->number()); + if (f->type() == UPB_TYPE(ENUM)) { + subdef = NewEnumDef(proto2_f->enum_type(), owner)->AsDef(); + (*symbols)[std::string(subdef->full_name())] = subdef; + } else { + assert(f->IsSubmessage()); + const Message* prototype = GetPrototypeForField(m, proto2_f); + subdef = NewFinalMessageDefHelper(*prototype, owner, symbols)->AsDef(); + } + } + f->set_subdef(subdef); + } + return md; +} + +const upb::MessageDef* NewFinalMessageDef(const Message& m, void *owner) { + SymbolMap symbols; + upb::MessageDef* ret = NewFinalMessageDefHelper(m, owner, &symbols); + + // Finalize defs. + std::vector defs; + SymbolMap::iterator iter; + for (iter = symbols.begin(); iter != symbols.end(); ++iter) { + defs.push_back(iter->second); + } + Status status; + bool success = Def::Finalize(defs, &status); + assert(success); + (void)success; + + // Unref all defs except the top-level one that we are returning. + for (int i = 0; i < static_cast(defs.size()); i++) { + if (defs[i] != ret->AsDef()) defs[i]->Unref(owner); + } + + return ret; +} + +} // namespace proto2_bridge +} // namespace upb diff --git a/bindings/cpp/upb/proto2_bridge.hpp b/bindings/cpp/upb/proto2_bridge.hpp new file mode 100644 index 0000000000..ace08ce830 --- /dev/null +++ b/bindings/cpp/upb/proto2_bridge.hpp @@ -0,0 +1,170 @@ +// +// upb - a minimalist implementation of protocol buffers. +// +// Copyright (c) 2011-2012 Google Inc. See LICENSE for details. +// Author: Josh Haberman +// +// A bridge between upb and proto2, allows populating proto2 generated +// classes using upb's parser, translating between descriptors and defs, etc. +// +// This is designed to be able to be compiled against either the open-source +// version of protocol buffers or the Google-internal proto2. The two are +// the same in most ways, but live in different namespaces (proto2 vs +// google::protobuf) and have a few other more minor differences. +// +// The bridge gives you a lot of control over which fields will be written to +// the message (fields that are not written will just be skipped), and whether +// unknown fields are written to the UnknownFieldSet. This can save a lot of +// work if the client only cares about some subset of the fields. +// +// Example usage: +// +// // Build a def that will have all fields and parse just like proto2 would. +// const upb::MessageDef* md = upb::proto2_bridge::NewMessageDef(&MyProto()); +// +// // JIT the parser; should only be done once ahead-of-time. +// upb::Handlers* handlers = upb::NewHandlersForMessage(md); +// upb::DecoderPlan* plan = upb::DecoderPlan::New(handlers); +// handlers->Unref(); +// +// // The actual parsing. +// MyProto proto; +// upb::Decoder decoder; +// upb::StringSource source(buf, len); +// decoder.ResetPlan(plan, 0); +// decoder.ResetInput(source.AllBytes(), &proto); +// CHECK(decoder.Decode() == UPB_OK) << decoder.status(); +// +// To parse only one field and skip all others: +// +// const upb::MessageDef* md = +// upb::proto2_bridge::NewEmptyMessageDef(MyProto().GetPrototype()); +// upb::proto2_bridge::AddFieldDef( +// MyProto::descriptor()->FindFieldByName("my_field"), md); +// upb::Finalize(md); +// +// // Now continue with "JIT the parser" from above. +// +// Note that there is currently no support for +// CodedInputStream::SetExtensionRegistry(), which allows specifying a separate +// DescriptorPool and MessageFactory for extensions. Since this is a property +// of the input in proto2, it's difficult to build a plan ahead-of-time that +// can properly support this. If it's an important use case, the caller should +// probably build a upb plan explicitly. + +#ifndef UPB_PROTO2_BRIDGE +#define UPB_PROTO2_BRIDGE + +#include + +namespace google { +namespace protobuf { +class Descriptor; +class EnumDescriptor; +class FieldDescriptor; +class FileDescriptor; +class Message; +} // namespace google +} // namespace protobuf + +namespace proto2 { +class Descriptor; +class EnumDescriptor; +class FieldDescriptor; +class FileDescriptor; +class Message; +} // namespace proto2 + + +namespace upb { + +class Def; +class FieldDef; +class MessageDef; + +namespace proto2_bridge { + +// Unfinalized defs //////////////////////////////////////////////////////////// + +// Creating of UNFINALIZED defs. All of these functions return defs that are +// still mutable and have not been finalized. They must be finalized before +// using them to parse anything. This is useful if you want more control over +// the process of constructing defs, eg. to add the specific set of fields you +// care about. + +// Creates a new upb::MessageDef that corresponds to the type in the given +// prototype message. The MessageDef will not have any fields added to it. +upb::MessageDef *NewEmptyMessageDef(const proto2::Message& m, void *owner); +upb::MessageDef *NewEmptyMessageDef(const google::protobuf::Message& desc, + void *owner); + +// Adds a new upb::FieldDef to the given MessageDef corresponding to the given +// FieldDescriptor. The FieldDef will be given an accessor and offset so that +// it can be used to read and write data into the proto2::Message classes. +// The given MessageDef must have been constructed with NewEmptyDefForMessage() +// and f->containing_type() must correspond to the message that was used. +// +// Any submessage, group, or enum fields will be given symbolic references to +// the subtype, which must be resolved before the MessageDef can be finalized. +// +// On success, returns the FieldDef that was added (caller does not own a ref). +// If an existing field had the same name or number, returns NULL. +upb::FieldDef* AddFieldDef(const proto2::FieldDescriptor* f, + upb::MessageDef* md); +upb::FieldDef* AddFieldDef(const google::protobuf::FieldDescriptor* f, + upb::MessageDef* md); + +// Given a MessageDef that was constructed with NewEmptyDefForMessage(), adds +// FieldDefs for all fields defined in the original message, but not for any +// extensions or unknown fields. The given MessageDef must not have any fields +// that have the same name or number as any of the fields we are adding (the +// easiest way to guarantee this is to start with an empty MessageDef). +// +// Returns true on success or false if any of the fields could not be added. +void AddAllFields(upb::MessageDef* md); + +// TODO(haberman): Add: +// // Adds a handler that will store unknown fields in the UnknownFieldSet. +// void AddUnknownFieldHandler(upb::MessageDef* md); + +// Returns a new upb::MessageDef that contains handlers for all fields, unknown +// fields, and any extensions in the descriptor's pool. The resulting +// def/handlers should be equivalent to the generated code constructed by the +// protobuf compiler (or the code in DynamicMessage) for the given type. +// The subdefs for message/enum fields (if any) will be referenced symbolically, +// and will need to be resolved before being finalized. +// +// TODO(haberman): Add missing support (LazyField, MessageSet, and extensions). +// +// TODO(haberman): possibly add a similar function that lets you supply a +// separate DescriptorPool and MessageFactory for extensions, to support +// proto2's io::CodedInputStream::SetExtensionRegistry(). +upb::MessageDef* NewFullMessageDef(const proto2::Message& m, void *owner); +upb::MessageDef* NewFullMessageDef(const google::protobuf::Message& m, + void *owner); + +// Returns a new upb::EnumDef that corresponds to the given EnumDescriptor. +// Caller owns a ref on the returned EnumDef. +upb::EnumDef* NewEnumDef(const proto2::EnumDescriptor* desc, void *owner); +upb::EnumDef* NewEnumDef(const google::protobuf::EnumDescriptor* desc, + void *owner); + +// Finalized defs ////////////////////////////////////////////////////////////// + +// These functions return FINALIZED defs, meaning that they are immutable and +// ready for use. Since they are immutable you cannot make any further changes +// to eg. the set of fields, but these functions are more convenient if you +// simply want to parse a message exactly how the built-in proto2 parser would. + +// Creates a returns a finalized MessageDef for the give message and its entire +// type tree that will include all fields and unknown handlers (ie. it will +// parse just like proto2 would). +const upb::MessageDef* NewFinalMessageDef(const proto2::Message& m, + void *owner); +const upb::MessageDef* NewFinalMessageDef(const google::protobuf::Message& m, + void *owner); + +} // namespace proto2_bridge +} // namespace upb + +#endif diff --git a/bindings/cpp/upb/upb.hpp b/bindings/cpp/upb/upb.hpp index 226859c459..48c2708e8d 100644 --- a/bindings/cpp/upb/upb.hpp +++ b/bindings/cpp/upb/upb.hpp @@ -10,6 +10,16 @@ #include "upb/upb.h" #include +#if defined(__GXX_EXPERIMENTAL_CXX0X__) && !defined(UPB_NO_CXX11) +#define UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(class_name) \ + class_name() = delete; \ + ~class_name() = delete; +#else +#define UPB_DISALLOW_CONSTRUCT_AND_DESTRUCT(class_name) \ + class_name(); \ + ~class_name(); +#endif + namespace upb { typedef upb_success_t Success; @@ -31,11 +41,35 @@ class Status : public upb_status { void Clear() { upb_status_clear(this); } }; -class Value : public upb_value { - public: - Value(const upb_value& val) { *this = val; } - Value() {} -}; +typedef upb_value Value; + +template T GetValue(Value v); +template Value MakeValue(T v); + +#define UPB_VALUE_ACCESSORS(type, ctype) \ + template <> inline ctype GetValue(Value v) { \ + return upb_value_get ## type(v); \ + } \ + template <> inline Value MakeValue(ctype v) { \ + return upb_value_ ## type(v); \ + } + +UPB_VALUE_ACCESSORS(double, double); +UPB_VALUE_ACCESSORS(float, float); +UPB_VALUE_ACCESSORS(int32, int32_t); +UPB_VALUE_ACCESSORS(int64, int64_t); +UPB_VALUE_ACCESSORS(uint32, uint32_t); +UPB_VALUE_ACCESSORS(uint64, uint64_t); +UPB_VALUE_ACCESSORS(bool, bool); + +#undef UPB_VALUE_ACCESSORS + +template inline T* GetPtrValue(Value v) { + return static_cast(upb_value_getptr(v)); +} +template inline Value MakePtrValue(T* v) { + return upb_value_ptr(static_cast(v)); +} INLINE std::ostream& operator<<(std::ostream& out, const Status& status) { out << status.GetString(); diff --git a/bindings/lua/upb.c b/bindings/lua/upb.c index 56c5be96d2..4cce4b6589 100644 --- a/bindings/lua/upb.c +++ b/bindings/lua/upb.c @@ -37,11 +37,15 @@ static uint32_t lupb_touint32(lua_State *L, int narg, const char *name) { return n; } -static void lupb_pushstring(lua_State *L, const upb_byteregion *r) { - // TODO: could avoid a copy in the case that the string is contiguous. - char *str = upb_byteregion_strdup(r); - lua_pushlstring(L, str, upb_byteregion_len(r)); - free(str); +static void lupb_pushstring(lua_State *L, const upb_strref *ref) { + if (ref->ptr) { + lua_pushlstring(L, ref->ptr, ref->len); + } else { + // Lua requires a continguous string; must copy+allocate. + char *str = upb_strref_dup(ref); + lua_pushlstring(L, str, ref->len); + free(str); + } } static void lupb_pushvalue(lua_State *L, upb_value val, upb_fielddef *f) { @@ -73,7 +77,7 @@ static void lupb_pushvalue(lua_State *L, upb_value val, upb_fielddef *f) { // Returns a scalar value (ie. not a submessage) as a upb_value. static upb_value lupb_getvalue(lua_State *L, int narg, upb_fielddef *f, - upb_byteregion *ref) { + upb_strref *ref) { assert(!upb_issubmsg(f)); upb_value val; if (upb_fielddef_type(f) == UPB_TYPE(BOOL)) { @@ -135,7 +139,7 @@ static upb_value lupb_getvalue(lua_State *L, int narg, upb_fielddef *f, } static void lupb_typecheck(lua_State *L, int narg, upb_fielddef *f) { - upb_byteregion ref; + upb_strref ref; lupb_getvalue(L, narg, f, &ref); } @@ -298,8 +302,8 @@ static void lupb_fielddef_set(lua_State *L, upb_fielddef *f, } else if (streql(field, "default_value")) { if (!upb_fielddef_type(f)) luaL_error(L, "Must set type before setting default_value"); - upb_byteregion region; - upb_fielddef_setdefault(f, lupb_getvalue(L, narg, f, ®ion)); + upb_strref ref; + upb_fielddef_setdefault(f, lupb_getvalue(L, narg, f, &ref)); } else { luaL_error(L, "Cannot set fielddef member '%s'", field); } @@ -778,7 +782,7 @@ static upb_flow_t lupb_msg_string(void *m, upb_value fval, upb_value val, lua_State *L = *(lua_State**)m; int offset = array ? lua_rawlen(L, -1) : f->offset; if (!lua_checkstack(L, 1)) luaL_error(L, "stack full"); - lupb_pushstring(L, upb_value_getbyteregion(val)); + lupb_pushstring(L, upb_value_getstrref(val)); lua_rawseti(L, -2, offset); return UPB_CONTINUE; } diff --git a/bindings/python/upb.c b/bindings/python/upb.c index 8f36f7006b..497074b5c9 100644 --- a/bindings/python/upb.c +++ b/bindings/python/upb.c @@ -612,9 +612,8 @@ static upb_sflow_t PyUpb_Message_StartRepeatedSubmessage(void *a, upb_value fval static upb_flow_t PyUpb_Message_StringValue(void *m, upb_value fval, upb_value val) { PyObject **str = PyUpb_Accessor_GetPtr(m, fval); if (*str) { Py_DECREF(*str); } - upb_byteregion *r = upb_value_getbyteregion(val); - *str = PyString_FromStringAndSize(NULL, upb_byteregion_len(r)); - upb_byteregion_copyall(r, PyString_AsString(*str)); + *str = PyString_FromStringAndSize(NULL, upb_value_getstrref(val)->len); + upb_strref_read(upb_value_getstrref(val), PyString_AsString(*str)); upb_stdmsg_sethas(m, fval); return UPB_CONTINUE; } @@ -622,9 +621,8 @@ static upb_flow_t PyUpb_Message_StringValue(void *m, upb_value fval, upb_value v static upb_flow_t PyUpb_Message_AppendStringValue(void *a, upb_value fval, upb_value val) { (void)fval; PyObject **elem = upb_stdarray_append(a, sizeof(void*)); - upb_byteregion *r = upb_value_getbyteregion(val); - *elem = PyString_FromStringAndSize(NULL, upb_byteregion_len(r)); - upb_byteregion_copyall(r, PyString_AsString(*elem)); + *elem = PyString_FromStringAndSize(NULL, upb_value_getstrref(val)->len); + upb_strref_read(upb_value_getstrref(val), PyString_AsString(*elem)); return UPB_CONTINUE; } diff --git a/tests/test.proto b/tests/test.proto index f3dde2437d..e634ed2983 100644 --- a/tests/test.proto +++ b/tests/test.proto @@ -1,14 +1,10 @@ // A series of messages with various kinds of cycles in them. -// +-+---+ +---+ -// V | | | | -// A -> B-+-> C -> D<--+ -// ^ | | -// +----------+----+ -// -// This tests the following cases: -// - B and C are together in multiple cycles -// - B and D are cycles to themselves. +// +-+---+ +---+ +---+ +// V | | V | V | +// A -> B-+-> C -> D---+--->E---+ +// ^ |`---|--------^ +// +----------+----+ F message A { optional B b = 1; @@ -23,11 +19,21 @@ message C { optional A a = 1; optional B b = 2; optional D d = 3; + optional E e = 4; } message D { optional A a = 1; optional D d = 2; + optional E e = 3; +} + +message E { + optional E e = 1; +} + +message F { + optional E e = 1; } // A proto with a bunch of simple primitives. diff --git a/tests/test_cpp.cc b/tests/test_cpp.cc index 5182217725..4d70e85a76 100644 --- a/tests/test_cpp.cc +++ b/tests/test_cpp.cc @@ -15,6 +15,7 @@ #include "upb/upb.hpp" #include "upb/pb/decoder.hpp" #include "upb/pb/glue.hpp" +#include "upb_test.h" static void TestSymbolTable(const char *descriptor_file) { upb::SymbolTable *s = upb::SymbolTable::New(); @@ -23,20 +24,20 @@ static void TestSymbolTable(const char *descriptor_file) { std::cerr << "Couldn't load descriptor: " << status; exit(1); } - const upb::MessageDef *md = s->LookupMessage("A"); - assert(md); + const upb::MessageDef *md = s->LookupMessage("A", &md); + ASSERT(md); s->Unref(); - md->Unref(); + md->Unref(&md); } static void TestByteStream() { upb::StringSource stringsrc; stringsrc.Reset("testing", 7); upb::ByteRegion* byteregion = stringsrc.AllBytes(); - assert(byteregion->FetchAll() == UPB_BYTE_OK); + ASSERT(byteregion->FetchAll() == UPB_BYTE_OK); char* str = byteregion->StrDup(); - assert(strcmp(str, "testing") == 0); + ASSERT(strcmp(str, "testing") == 0); free(str); } diff --git a/tests/test_decoder.c b/tests/test_decoder.cc similarity index 57% rename from tests/test_decoder.c rename to tests/test_decoder.cc index 14d0e2db86..13403bb78b 100644 --- a/tests/test_decoder.c +++ b/tests/test_decoder.cc @@ -21,6 +21,10 @@ * of submsg/sequences, etc. */ +#ifndef __STDC_FORMAT_MACROS +#define __STDC_FORMAT_MACROS // For PRIuS, etc. +#endif + #include #include #include @@ -32,95 +36,133 @@ #include "upb/upb.h" #include "upb_test.h" +// Copied from decoder.c, since this is not a public interface. typedef struct { - char *buf; - size_t len; -} buffer; + uint8_t native_wire_type; + bool is_numeric; +} upb_decoder_typeinfo; + +static const upb_decoder_typeinfo upb_decoder_types[] = { + {UPB_WIRE_TYPE_END_GROUP, false}, // ENDGROUP + {UPB_WIRE_TYPE_64BIT, true}, // DOUBLE + {UPB_WIRE_TYPE_32BIT, true}, // FLOAT + {UPB_WIRE_TYPE_VARINT, true}, // INT64 + {UPB_WIRE_TYPE_VARINT, true}, // UINT64 + {UPB_WIRE_TYPE_VARINT, true}, // INT32 + {UPB_WIRE_TYPE_64BIT, true}, // FIXED64 + {UPB_WIRE_TYPE_32BIT, true}, // FIXED32 + {UPB_WIRE_TYPE_VARINT, true}, // BOOL + {UPB_WIRE_TYPE_DELIMITED, false}, // STRING + {UPB_WIRE_TYPE_START_GROUP, false}, // GROUP + {UPB_WIRE_TYPE_DELIMITED, false}, // MESSAGE + {UPB_WIRE_TYPE_DELIMITED, false}, // BYTES + {UPB_WIRE_TYPE_VARINT, true}, // UINT32 + {UPB_WIRE_TYPE_VARINT, true}, // ENUM + {UPB_WIRE_TYPE_32BIT, true}, // SFIXED32 + {UPB_WIRE_TYPE_64BIT, true}, // SFIXED64 + {UPB_WIRE_TYPE_VARINT, true}, // SINT32 + {UPB_WIRE_TYPE_VARINT, true}, // SINT64 +}; + + +class buffer { + public: + buffer(const void *data, size_t len) : len_(0) { append(data, len); } + explicit buffer(const char *data) : len_(0) { append(data); } + explicit buffer(size_t len) : len_(len) { memset(buf_, 0, len); } + buffer(const buffer& buf) : len_(0) { append(buf); } + buffer() : len_(0) {} + + void append(const void *data, size_t len) { + ASSERT_NOCOUNT(len + len_ < sizeof(buf_)); + memcpy(buf_ + len_, data, len); + len_ += len; + buf_[len_] = NULL; + } -// Mem is initialized to NULL. -buffer *buffer_new(size_t len) { - buffer *buf = malloc(sizeof(*buf)); - buf->buf = malloc(len); - buf->len = len; - memset(buf->buf, 0, buf->len); - return buf; -} + void append(const buffer& buf) { + append(buf.buf_, buf.len_); + } -buffer *buffer_new2(const void *data, size_t len) { - buffer *buf = buffer_new(len); - memcpy(buf->buf, data, len); - return buf; -} + void append(const char *str) { + append(str, strlen(str)); + } -buffer *buffer_new3(const char *data) { - return buffer_new2(data, strlen(data)); -} + void vappendf(const char *fmt, va_list args) { + size_t avail = sizeof(buf_) - len_; + size_t size = vsnprintf(buf_ + len_, avail, fmt, args); + ASSERT_NOCOUNT(avail > size); + len_ += size; + } -buffer *buffer_dup(buffer *buf) { return buffer_new2(buf->buf, buf->len); } + void appendf(const char *fmt, ...) { + va_list args; + va_start(args, fmt); + vappendf(fmt, args); + va_end(args); + } -void buffer_free(buffer *buf) { - free(buf->buf); - free(buf); -} + void assign(const buffer& buf) { + clear(); + append(buf); + } -void buffer_appendf(buffer *buf, const char *fmt, ...) { - va_list args; - va_start(args, fmt); - size_t size = buf->len; - buf->len += upb_vrprintf(&buf->buf, &size, buf->len, fmt, args); - va_end(args); -} + bool eql(const buffer& other) const { + return len_ == other.len_ && memcmp(buf_, other.buf_, len_) == 0; + } -void buffer_cat(buffer *buf, buffer *buf2) { - size_t newlen = buf->len + buf2->len; - buf->buf = realloc(buf->buf, newlen); - memcpy(buf->buf + buf->len, buf2->buf, buf2->len); - buf->len = newlen; - buffer_free(buf2); -} + void clear() { len_ = 0; } + size_t len() const { return len_; } + const char *buf() const { return buf_; } -bool buffer_eql(buffer *buf, buffer *buf2) { - return buf->len == buf2->len && memcmp(buf->buf, buf2->buf, buf->len) == 0; -} + private: + // Has to be big enough for the largest string used in the test. + char buf_[32768]; + size_t len_; +}; /* Routines for building arbitrary protos *************************************/ -buffer *cat(buffer *arg1, ...) { - va_list ap; - buffer *arg; - va_start(ap, arg1); - while ((arg = va_arg(ap, buffer*)) != NULL) { - buffer_cat(arg1, arg); - } - va_end(ap); - return arg1; +const buffer empty; + +buffer cat(const buffer& a, const buffer& b, + const buffer& c = empty, + const buffer& d = empty, + const buffer& e = empty) { + buffer ret; + ret.append(a); + ret.append(b); + ret.append(c); + ret.append(d); + ret.append(e); + return ret; } -buffer *varint(uint64_t x) { - buffer *buf = buffer_new(UPB_PB_VARINT_MAX_LEN + 1); - buf->len = upb_vencode64(x, buf->buf); - return buf; +buffer varint(uint64_t x) { + char buf[UPB_PB_VARINT_MAX_LEN]; + size_t len = upb_vencode64(x, buf); + return buffer(buf, len); } // TODO: proper byte-swapping for big-endian machines. -buffer *fixed32(void *data) { return buffer_new2(data, 4); } -buffer *fixed64(void *data) { return buffer_new2(data, 8); } - -buffer *delim(buffer *buf) { return cat( varint(buf->len), buf, NULL ); } -buffer *uint32(uint32_t u32) { return fixed32(&u32); } -buffer *uint64(uint64_t u64) { return fixed64(&u64); } -buffer *flt(float f) { return fixed32(&f); } -buffer *dbl(double d) { return fixed64(&d); } -buffer *zz32(int32_t x) { return varint(upb_zzenc_32(x)); } -buffer *zz64(int64_t x) { return varint(upb_zzenc_64(x)); } - -buffer *tag(uint32_t fieldnum, char wire_type) { +buffer fixed32(void *data) { return buffer(data, 4); } +buffer fixed64(void *data) { return buffer(data, 8); } + +buffer delim(const buffer& buf) { return cat(varint(buf.len()), buf); } +buffer uint32(uint32_t u32) { return fixed32(&u32); } +buffer uint64(uint64_t u64) { return fixed64(&u64); } +buffer flt(float f) { return fixed32(&f); } +buffer dbl(double d) { return fixed64(&d); } +buffer zz32(int32_t x) { return varint(upb_zzenc_32(x)); } +buffer zz64(int64_t x) { return varint(upb_zzenc_64(x)); } + +buffer tag(uint32_t fieldnum, char wire_type) { return varint((fieldnum << 3) | wire_type); } -buffer *submsg(uint32_t fn, buffer *buf) { - return cat( tag(fn, UPB_WIRE_TYPE_DELIMITED), delim(buf), NULL ); +buffer submsg(uint32_t fn, const buffer& buf) { + return cat( tag(fn, UPB_WIRE_TYPE_DELIMITED), delim(buf) ); } @@ -128,11 +170,26 @@ buffer *submsg(uint32_t fn, buffer *buf) { // The handlers simply append to a string indicating what handlers were called. // This string is similar to protobuf text format but fields are referred to by -// number instead of name and sequences are explicitly delimited. +// number instead of name and sequences are explicitly delimited. We indent +// using the closure depth to test that the stack of closures is properly +// handled. + +int closures[UPB_MAX_NESTING]; +buffer output; + +void indentbuf(buffer *buf, int depth) { + for (int i = 0; i < depth; i++) + buf->append(" ", 2); +} + +void indent(void *depth) { + indentbuf(&output, *(int*)depth); +} #define VALUE_HANDLER(member, fmt) \ upb_flow_t value_ ## member(void *closure, upb_value fval, upb_value val) { \ - buffer_appendf(closure, "%" PRIu32 ":%" fmt "; ", \ + indent(closure); \ + output.appendf("%" PRIu32 ":%" fmt "\n", \ upb_value_getuint32(fval), upb_value_get ## member(val)); \ return UPB_CONTINUE; \ } @@ -145,7 +202,8 @@ VALUE_HANDLER(float, "g") VALUE_HANDLER(double, "g") upb_flow_t value_bool(void *closure, upb_value fval, upb_value val) { - buffer_appendf(closure, "%" PRIu32 ":%s; ", + indent(closure); + output.appendf("%" PRIu32 ":%s\n", upb_value_getuint32(fval), upb_value_getbool(val) ? "true" : "false"); return UPB_CONTINUE; @@ -153,34 +211,49 @@ upb_flow_t value_bool(void *closure, upb_value fval, upb_value val) { upb_flow_t value_string(void *closure, upb_value fval, upb_value val) { // Note: won't work with strings that contain NULL. + indent(closure); char *str = upb_byteregion_strdup(upb_value_getbyteregion(val)); - buffer_appendf(closure, "%" PRIu32 ":%s; ", upb_value_getuint32(fval), str); + output.appendf("%" PRIu32 ":%s\n", upb_value_getuint32(fval), str); free(str); return UPB_CONTINUE; } upb_sflow_t startsubmsg(void *closure, upb_value fval) { - buffer_appendf(closure, "%" PRIu32 ":{ ", upb_value_getuint32(fval)); - return UPB_CONTINUE_WITH(closure); + indent(closure); + output.appendf("%" PRIu32 ":{\n", upb_value_getuint32(fval)); + return UPB_CONTINUE_WITH(((int*)closure) + 1); } upb_flow_t endsubmsg(void *closure, upb_value fval) { - (void)fval; - buffer_appendf(closure, "} "); + indent(closure); + output.append("}\n"); return UPB_CONTINUE; } upb_sflow_t startseq(void *closure, upb_value fval) { - buffer_appendf(closure, "%" PRIu32 ":[ ", upb_value_getuint32(fval)); - return UPB_CONTINUE_WITH(closure); + indent(closure); + output.appendf("%" PRIu32 ":[\n", upb_value_getuint32(fval)); + return UPB_CONTINUE_WITH(((int*)closure) + 1); } upb_flow_t endseq(void *closure, upb_value fval) { - (void)fval; - buffer_appendf(closure, "] "); + indent(closure); + output.append("]\n"); return UPB_CONTINUE; } +upb_flow_t startmsg(void *closure) { + indent(closure); + output.append("<\n"); + return UPB_CONTINUE; +} + +void endmsg(void *closure, upb_status *status) { + (void)status; + indent(closure); + output.append(">\n"); +} + void doreg(upb_mhandlers *m, uint32_t num, upb_fieldtype_t type, bool repeated, upb_value_handler *handler) { upb_fhandlers *f = upb_mhandlers_newfhandlers(m, num, type, repeated); @@ -221,6 +294,9 @@ void reg_subm(upb_mhandlers *m, uint32_t num, upb_fieldtype_t type, } void reghandlers(upb_mhandlers *m) { + upb_mhandlers_setstartmsg(m, &startmsg); + upb_mhandlers_setendmsg(m, &endmsg); + // Register handlers for each type. reg(m, UPB_TYPE(DOUBLE), &value_double); reg(m, UPB_TYPE(FLOAT), &value_float); @@ -267,7 +343,7 @@ size_t upb_seamsrc_avail(const upb_seamsrc *src, size_t ofs) { } upb_bytesuccess_t upb_seamsrc_fetch(void *_src, uint64_t ofs, size_t *read) { - upb_seamsrc *src = _src; + upb_seamsrc *src = (upb_seamsrc*)_src; assert(ofs < src->len); if (ofs == src->len) { upb_status_seteof(&src->bytesrc.status); @@ -279,7 +355,7 @@ upb_bytesuccess_t upb_seamsrc_fetch(void *_src, uint64_t ofs, size_t *read) { void upb_seamsrc_copy(const void *_src, uint64_t ofs, size_t len, char *dst) { - const upb_seamsrc *src = _src; + const upb_seamsrc *src = (const upb_seamsrc*)_src; assert(ofs + len <= src->len); memcpy(dst, src->str + ofs, len); } @@ -290,7 +366,7 @@ void upb_seamsrc_discard(void *src, uint64_t ofs) { } const char *upb_seamsrc_getptr(const void *_s, uint64_t ofs, size_t *len) { - const upb_seamsrc *src = _s; + const upb_seamsrc *src = (const upb_seamsrc*)_s; *len = upb_seamsrc_avail(src, ofs); return src->str + ofs; } @@ -314,7 +390,7 @@ void upb_seamsrc_init(upb_seamsrc *s, const char *str, size_t len) { } void upb_seamsrc_resetseams(upb_seamsrc *s, size_t seam1, size_t seam2) { - ASSERT(seam1 <= seam2); + assert(seam1 <= seam2); s->seam1 = seam1; s->seam2 = seam2; s->byteregion.discard = 0; @@ -337,83 +413,68 @@ upb_byteregion *upb_seamsrc_allbytes(upb_seamsrc *s) { /* Running of test cases ******************************************************/ upb_decoderplan *plan; - -void run_decoder(buffer *proto, buffer *expected_output) { +#define LINE(x) x "\n" +void run_decoder(const buffer& proto, const buffer* expected_output) { upb_seamsrc src; - upb_seamsrc_init(&src, proto->buf, proto->len); + upb_seamsrc_init(&src, proto.buf(), proto.len()); upb_decoder d; upb_decoder_init(&d); upb_decoder_resetplan(&d, plan, 0); - for (size_t i = 0; i < proto->len; i++) { - for (size_t j = i; j < proto->len; j++) { + for (size_t i = 0; i < proto.len(); i++) { + for (size_t j = i; j < UPB_MIN(proto.len(), i + 5); j++) { upb_seamsrc_resetseams(&src, i, j); upb_byteregion *input = upb_seamsrc_allbytes(&src); - buffer *output = buffer_new(0); - upb_decoder_resetinput(&d, input, output); + output.clear(); + upb_decoder_resetinput(&d, input, &closures[0]); upb_success_t success = UPB_SUSPENDED; while (success == UPB_SUSPENDED) success = upb_decoder_decode(&d); ASSERT(upb_ok(upb_decoder_status(&d)) == (success == UPB_OK)); if (expected_output) { - ASSERT(success == UPB_OK); + ASSERT_STATUS(success == UPB_OK, upb_decoder_status(&d)); // The input should be fully consumed. ASSERT(upb_byteregion_fetchofs(input) == upb_byteregion_endofs(input)); ASSERT(upb_byteregion_discardofs(input) == upb_byteregion_endofs(input)); - if (!buffer_eql(output, expected_output)) { + if (!output.eql(*expected_output)) { fprintf(stderr, "Text mismatch: '%s' vs '%s'\n", - output->buf, expected_output->buf); + output.buf(), expected_output->buf()); } - ASSERT(strcmp(output->buf, expected_output->buf) == 0); + ASSERT(output.eql(*expected_output)); } else { ASSERT(success == UPB_ERROR); } - buffer_free(output); } } - upb_seamsrc_uninit(&src); upb_decoder_uninit(&d); - buffer_free(proto); -} - -void assert_successful_parse_at_eof(buffer *proto, const char *expected_fmt, - va_list args) { - buffer *expected_text = buffer_new(0); - size_t size = expected_text->len; - expected_text->len += upb_vrprintf(&expected_text->buf, &size, - expected_text->len, expected_fmt, args); - run_decoder(proto, expected_text); - buffer_free(expected_text); + upb_seamsrc_uninit(&src); } -void assert_does_not_parse_at_eof(buffer *proto) { - run_decoder(proto, NULL); -} +const static buffer thirty_byte_nop = buffer(cat( + tag(NOP_FIELD, UPB_WIRE_TYPE_DELIMITED), delim(buffer(30)) )); -void assert_successful_parse(buffer *proto, const char *expected_fmt, ...) { - // The JIT is only used for data >=20 bytes from end-of-buffer, so - // repeat once with no-op padding data at the end of buffer. - va_list args, args2; +void assert_successful_parse(const buffer& proto, + const char *expected_fmt, ...) { + buffer expected_text; + va_list args; va_start(args, expected_fmt); - va_copy(args2, args); - assert_successful_parse_at_eof(buffer_dup(proto), expected_fmt, args); - assert_successful_parse_at_eof( - cat( proto, - tag(NOP_FIELD, UPB_WIRE_TYPE_DELIMITED), delim(buffer_new(30)), - NULL ), - expected_fmt, args2); + expected_text.vappendf(expected_fmt, args); va_end(args); - va_end(args2); + // The JIT is only used for data >=20 bytes from end-of-buffer, so + // repeat once with no-op padding data at the end of buffer. + run_decoder(proto, &expected_text); + run_decoder(cat( proto, thirty_byte_nop ), &expected_text); } -void assert_does_not_parse(buffer *proto) { +void assert_does_not_parse_at_eof(const buffer& proto) { + run_decoder(proto, NULL); +} + +void assert_does_not_parse(const buffer& proto) { // The JIT is only used for data >=20 bytes from end-of-buffer, so // repeat once with no-op padding data at the end of buffer. - assert_does_not_parse_at_eof(buffer_dup(proto)); - assert_does_not_parse_at_eof( - cat( proto, - tag(NOP_FIELD, UPB_WIRE_TYPE_DELIMITED), delim( buffer_new(30)), - NULL )); + assert_does_not_parse_at_eof(proto); + assert_does_not_parse_at_eof(cat( proto, thirty_byte_nop )); } @@ -421,19 +482,19 @@ void assert_does_not_parse(buffer *proto) { void test_premature_eof_for_type(upb_fieldtype_t type) { // Incomplete values for each wire type. - static const char *incompletes[] = { - "\x80", // UPB_WIRE_TYPE_VARINT - "abcdefg", // UPB_WIRE_TYPE_64BIT - "\x80", // UPB_WIRE_TYPE_DELIMITED (partial length) - NULL, // UPB_WIRE_TYPE_START_GROUP (no value required) - NULL, // UPB_WIRE_TYPE_END_GROUP (no value required) - "abc" // UPB_WIRE_TYPE_32BIT + static const buffer incompletes[6] = { + buffer("\x80"), // UPB_WIRE_TYPE_VARINT + buffer("abcdefg"), // UPB_WIRE_TYPE_64BIT + buffer("\x80"), // UPB_WIRE_TYPE_DELIMITED (partial length) + buffer(), // UPB_WIRE_TYPE_START_GROUP (no value required) + buffer(), // UPB_WIRE_TYPE_END_GROUP (no value required) + buffer("abc") // UPB_WIRE_TYPE_32BIT }; uint32_t fieldnum = type; uint32_t rep_fieldnum = rep_fn(type); - int wire_type = upb_types[type].native_wire_type; - const char *incomplete = incompletes[wire_type]; + int wire_type = upb_decoder_types[type].native_wire_type; + const buffer& incomplete = incompletes[wire_type]; // EOF before a known non-repeated value. assert_does_not_parse_at_eof(tag(fieldnum, wire_type)); @@ -446,108 +507,128 @@ void test_premature_eof_for_type(upb_fieldtype_t type) { // EOF inside a known non-repeated value. assert_does_not_parse_at_eof( - cat( tag(fieldnum, wire_type), buffer_new3(incomplete), NULL )); + cat( tag(fieldnum, wire_type), incomplete )); // EOF inside a known repeated value. assert_does_not_parse_at_eof( - cat( tag(rep_fieldnum, wire_type), buffer_new3(incomplete), NULL )); + cat( tag(rep_fieldnum, wire_type), incomplete )); // EOF inside an unknown value. assert_does_not_parse_at_eof( - cat( tag(UNKNOWN_FIELD, wire_type), buffer_new3(incomplete), NULL )); + cat( tag(UNKNOWN_FIELD, wire_type), incomplete )); if (wire_type == UPB_WIRE_TYPE_DELIMITED) { // EOF in the middle of delimited data for known non-repeated value. assert_does_not_parse_at_eof( - cat( tag(fieldnum, wire_type), varint(1), NULL )); + cat( tag(fieldnum, wire_type), varint(1) )); // EOF in the middle of delimited data for known repeated value. assert_does_not_parse_at_eof( - cat( tag(rep_fieldnum, wire_type), varint(1), NULL )); + cat( tag(rep_fieldnum, wire_type), varint(1) )); // EOF in the middle of delimited data for unknown value. assert_does_not_parse_at_eof( - cat( tag(UNKNOWN_FIELD, wire_type), varint(1), NULL )); + cat( tag(UNKNOWN_FIELD, wire_type), varint(1) )); if (type == UPB_TYPE(MESSAGE)) { // Submessage ends in the middle of a value. - buffer *incomplete_submsg = + buffer incomplete_submsg = cat ( tag(UPB_TYPE(INT32), UPB_WIRE_TYPE_VARINT), - buffer_new3(incompletes[UPB_WIRE_TYPE_VARINT]), NULL ); + incompletes[UPB_WIRE_TYPE_VARINT] ); assert_does_not_parse( cat( tag(fieldnum, UPB_WIRE_TYPE_DELIMITED), - varint(incomplete_submsg->len), - incomplete_submsg, NULL )); + varint(incomplete_submsg.len()), + incomplete_submsg )); } } else { // Packed region ends in the middle of a value. assert_does_not_parse( cat( tag(rep_fieldnum, UPB_WIRE_TYPE_DELIMITED), - varint(strlen(incomplete)), - buffer_new3(incomplete), NULL )); + varint(incomplete.len()), + incomplete )); // EOF in the middle of packed region. assert_does_not_parse_at_eof( - cat( tag(rep_fieldnum, UPB_WIRE_TYPE_DELIMITED), varint(1), NULL )); + cat( tag(rep_fieldnum, UPB_WIRE_TYPE_DELIMITED), varint(1) )); } } // "33" and "66" are just two random values that all numeric types can // represent. void test_valid_data_for_type(upb_fieldtype_t type, - buffer *enc33, buffer *enc66) { + const buffer& enc33, const buffer& enc66) { uint32_t fieldnum = type; uint32_t rep_fieldnum = rep_fn(type); - int wire_type = upb_types[type].native_wire_type; + int wire_type = upb_decoder_types[type].native_wire_type; // Non-repeated assert_successful_parse( - cat( tag(fieldnum, wire_type), buffer_dup(enc33), - tag(fieldnum, wire_type), buffer_dup(enc66), NULL ), - "%u:33; %u:66; ", fieldnum, fieldnum); + cat( tag(fieldnum, wire_type), enc33, + tag(fieldnum, wire_type), enc66 ), + LINE("<") + LINE("%u:33") + LINE("%u:66") + LINE(">"), fieldnum, fieldnum); // Non-packed repeated. assert_successful_parse( - cat( tag(rep_fieldnum, wire_type), buffer_dup(enc33), - tag(rep_fieldnum, wire_type), buffer_dup(enc66), NULL ), - "%u:[ %u:33; %u:66; ] ", rep_fieldnum, rep_fieldnum, rep_fieldnum); + cat( tag(rep_fieldnum, wire_type), enc33, + tag(rep_fieldnum, wire_type), enc66 ), + LINE("<") + LINE("%u:[") + LINE(" %u:33") + LINE(" %u:66") + LINE("]") + LINE(">"), rep_fieldnum, rep_fieldnum, rep_fieldnum); // Packed repeated. assert_successful_parse( cat( tag(rep_fieldnum, UPB_WIRE_TYPE_DELIMITED), - delim(cat( buffer_dup(enc33), buffer_dup(enc66), NULL )), NULL ), - "%u:[ %u:33; %u:66; ] ", rep_fieldnum, rep_fieldnum, rep_fieldnum); - - buffer_free(enc33); - buffer_free(enc66); + delim(cat( enc33, enc66 )) ), + LINE("<") + LINE("%u:[") + LINE(" %u:33") + LINE(" %u:66") + LINE("]") + LINE(">"), rep_fieldnum, rep_fieldnum, rep_fieldnum); } void test_valid_data_for_signed_type(upb_fieldtype_t type, - buffer *enc33, buffer *enc66) { + const buffer& enc33, const buffer& enc66) { uint32_t fieldnum = type; uint32_t rep_fieldnum = rep_fn(type); - int wire_type = upb_types[type].native_wire_type; + int wire_type = upb_decoder_types[type].native_wire_type; // Non-repeated assert_successful_parse( - cat( tag(fieldnum, wire_type), buffer_dup(enc33), - tag(fieldnum, wire_type), buffer_dup(enc66), NULL ), - "%u:33; %u:-66; ", fieldnum, fieldnum); + cat( tag(fieldnum, wire_type), enc33, + tag(fieldnum, wire_type), enc66 ), + LINE("<") + LINE("%u:33") + LINE("%u:-66") + LINE(">"), fieldnum, fieldnum); // Non-packed repeated. assert_successful_parse( - cat( tag(rep_fieldnum, wire_type), buffer_dup(enc33), - tag(rep_fieldnum, wire_type), buffer_dup(enc66), NULL ), - "%u:[ %u:33; %u:-66; ] ", rep_fieldnum, rep_fieldnum, rep_fieldnum); + cat( tag(rep_fieldnum, wire_type), enc33, + tag(rep_fieldnum, wire_type), enc66 ), + LINE("<") + LINE("%u:[") + LINE(" %u:33") + LINE(" %u:-66") + LINE("]") + LINE(">"), rep_fieldnum, rep_fieldnum, rep_fieldnum); // Packed repeated. assert_successful_parse( cat( tag(rep_fieldnum, UPB_WIRE_TYPE_DELIMITED), - delim(cat( buffer_dup(enc33), buffer_dup(enc66), NULL )), NULL ), - "%u:[ %u:33; %u:-66; ] ", rep_fieldnum, rep_fieldnum, rep_fieldnum); - - buffer_free(enc33); - buffer_free(enc66); + delim(cat( enc33, enc66 )) ), + LINE("<") + LINE("%u:[") + LINE(" %u:33") + LINE(" %u:-66") + LINE("]") + LINE(">"), rep_fieldnum, rep_fieldnum, rep_fieldnum); } // Test that invalid protobufs are properly detected (without crashing) and @@ -571,7 +652,7 @@ void test_invalid() { test_premature_eof_for_type(UPB_TYPE(SINT64)); // EOF inside a tag's varint. - assert_does_not_parse_at_eof( buffer_new3("\x80") ); + assert_does_not_parse_at_eof( buffer("\x80") ); // EOF inside a known group. assert_does_not_parse_at_eof( tag(4, UPB_WIRE_TYPE_START_GROUP) ); @@ -584,33 +665,19 @@ void test_invalid() { // Field number is 0. assert_does_not_parse( - cat( tag(0, UPB_WIRE_TYPE_DELIMITED), varint(0), NULL )); + cat( tag(0, UPB_WIRE_TYPE_DELIMITED), varint(0) )); // Field number is too large. assert_does_not_parse( cat( tag(UPB_MAX_FIELDNUMBER + 1, UPB_WIRE_TYPE_DELIMITED), - varint(0), NULL )); + varint(0) )); // Test exceeding the resource limit of stack depth. - buffer *buf = buffer_new3(""); + buffer buf; for (int i = 0; i < UPB_MAX_NESTING; i++) { - buf = submsg(UPB_TYPE(MESSAGE), buf); + buf.assign(submsg(UPB_TYPE(MESSAGE), buf)); } assert_does_not_parse(buf); - - // Staying within the stack limit should work properly. - buf = buffer_new3(""); - buffer *textbuf = buffer_new3(""); - int total = UPB_MAX_NESTING - 1; - for (int i = 0; i < total; i++) { - buf = submsg(UPB_TYPE(MESSAGE), buf); - buffer_appendf(textbuf, "%u:{ ", UPB_TYPE(MESSAGE)); - } - for (int i = 0; i < total; i++) { - buffer_appendf(textbuf, "} "); - } - assert_successful_parse(buf, "%s", textbuf->buf); - buffer_free(textbuf); } void test_valid() { @@ -629,16 +696,80 @@ void test_valid() { test_valid_data_for_type(UPB_TYPE(FIXED64), uint64(33), uint64(66)); test_valid_data_for_type(UPB_TYPE(FIXED32), uint32(33), uint32(66)); + // Test implicit startseq/endseq. + uint32_t repfl_fn = rep_fn(UPB_TYPE(FLOAT)); + uint32_t repdb_fn = rep_fn(UPB_TYPE(DOUBLE)); + assert_successful_parse( + cat( tag(repfl_fn, UPB_WIRE_TYPE_32BIT), flt(33), + tag(repdb_fn, UPB_WIRE_TYPE_64BIT), dbl(66) ), + LINE("<") + LINE("%u:[") + LINE(" %u:33") + LINE("]") + LINE("%u:[") + LINE(" %u:66") + LINE("]") + LINE(">"), repfl_fn, repfl_fn, repdb_fn, repdb_fn); + // Submessage tests. uint32_t msg_fn = UPB_TYPE(MESSAGE); assert_successful_parse( - submsg(msg_fn, submsg(msg_fn, submsg(msg_fn, buffer_new3("")))), - "%u:{ %u:{ %u:{ } } } ", msg_fn, msg_fn, msg_fn); + submsg(msg_fn, submsg(msg_fn, submsg(msg_fn, buffer()))), + LINE("<") + LINE("%u:{") + LINE(" <") + LINE(" %u:{") + LINE(" <") + LINE(" %u:{") + LINE(" <") + LINE(" >") + LINE(" }") + LINE(" >") + LINE(" }") + LINE(" >") + LINE("}") + LINE(">"), msg_fn, msg_fn, msg_fn); uint32_t repm_fn = rep_fn(UPB_TYPE(MESSAGE)); assert_successful_parse( - submsg(repm_fn, submsg(repm_fn, buffer_new3(""))), - "%u:[ %u:{ %u:[ %u:{ } ] } ] ", repm_fn, repm_fn, repm_fn, repm_fn); + submsg(repm_fn, submsg(repm_fn, buffer())), + LINE("<") + LINE("%u:[") + LINE(" %u:{") + LINE(" <") + LINE(" %u:[") + LINE(" %u:{") + LINE(" <") + LINE(" >") + LINE(" }") + LINE(" ]") + LINE(" >") + LINE(" }") + LINE("]") + LINE(">"), repm_fn, repm_fn, repm_fn, repm_fn); + + // Staying within the stack limit should work properly. + buffer buf; + buffer textbuf; + int total = UPB_MAX_NESTING - 1; + for (int i = 0; i < total; i++) { + buf.assign(submsg(UPB_TYPE(MESSAGE), buf)); + indentbuf(&textbuf, i); + textbuf.append("<\n"); + indentbuf(&textbuf, i); + textbuf.appendf("%u:{\n", UPB_TYPE(MESSAGE)); + } + indentbuf(&textbuf, total); + textbuf.append("<\n"); + indentbuf(&textbuf, total); + textbuf.append(">\n"); + for (int i = 0; i < total; i++) { + indentbuf(&textbuf, total - i - 1); + textbuf.append("}\n"); + indentbuf(&textbuf, total - i - 1); + textbuf.append(">\n"); + } + assert_successful_parse(buf, "%s", textbuf.buf()); } void run_tests() { @@ -647,10 +778,17 @@ void run_tests() { } int main() { + for (int i = 0; i < UPB_MAX_NESTING; i++) { + closures[i] = i; + } // Construct decoder plan. upb_handlers *h = upb_handlers_new(); reghandlers(upb_handlers_newmhandlers(h)); + // Create an empty handlers to make sure that the decoder can handle empty + // messages. + upb_handlers_newmhandlers(h); + // Test without JIT. plan = upb_decoderplan_new(h, false); run_tests(); @@ -658,6 +796,11 @@ int main() { // Test JIT. plan = upb_decoderplan_new(h, true); +#ifdef UPB_USE_JIT_X64 + ASSERT(upb_decoderplan_hasjitcode(plan)); +#else + ASSERT(!upb_decoderplan_hasjitcode(plan)); +#endif run_tests(); upb_decoderplan_unref(plan); diff --git a/tests/test_def.c b/tests/test_def.c index 3ca30648dc..698532eaf1 100644 --- a/tests/test_def.c +++ b/tests/test_def.c @@ -1,19 +1,174 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2011 Google Inc. See LICENSE for details. + * + * Test of defs and symtab. There should be far more tests of edge conditions + * (like attempts to link defs that don't have required properties set). + */ -#undef NDEBUG /* ensure tests always assert. */ #include "upb/def.h" +#include "upb/pb/glue.h" +#include "upb_test.h" #include +#include -int main() { - upb_symtab *s = upb_symtab_new(); +const char *descriptor_file; - // Will be empty atm since we haven't added anything to the symtab. +static void test_empty_symtab() { + upb_symtab *s = upb_symtab_new(); int count; - const upb_def **defs = upb_symtab_getdefs(s, &count, UPB_DEF_ANY); - for (int i = 0; i < count; i++) { - upb_def_unref(defs[i]); - } + const upb_def **defs = upb_symtab_getdefs(s, &count, UPB_DEF_ANY, NULL); + ASSERT(count == 0); free(defs); + upb_symtab_unref(s); +} +static upb_symtab *load_test_proto() { + upb_symtab *s = upb_symtab_new(); + ASSERT(s); + upb_status status = UPB_STATUS_INIT; + if (!upb_load_descriptor_file_into_symtab(s, descriptor_file, &status)) { + fprintf(stderr, "Error loading descriptor file: %s\n", + upb_status_getstr(&status)); + exit(1); + } + upb_status_uninit(&status); + return s; +} + +static void test_cycles() { + upb_symtab *s = load_test_proto(); + + // Test cycle detection by making a cyclic def's main refcount go to zero + // and then be incremented to one again. + const upb_def *def = upb_symtab_lookup(s, "A", &def); + ASSERT(def); + ASSERT(upb_def_isfinalized(def)); upb_symtab_unref(s); + + // Message A has only one subfield: "optional B b = 1". + const upb_msgdef *m = upb_downcast_msgdef_const(def); + upb_fielddef *f = upb_msgdef_itof(m, 1); + ASSERT(f); + ASSERT(upb_hassubdef(f)); + const upb_def *def2 = upb_fielddef_subdef(f); + ASSERT(upb_downcast_msgdef_const(def2)); + ASSERT(strcmp(upb_def_fullname(def2), "B") == 0); + + upb_def_ref(def2, &def2); + upb_def_unref(def, &def); + upb_def_unref(def2, &def2); +} + +static void test_fielddef_unref() { + upb_symtab *s = load_test_proto(); + const upb_msgdef *md = upb_symtab_lookupmsg(s, "A", &md); + upb_fielddef *f = upb_msgdef_itof(md, 1); + upb_fielddef_ref(f, &f); + + // Unref symtab and msgdef; now fielddef is the only thing keeping the msgdef + // alive. + upb_symtab_unref(s); + upb_msgdef_unref(md, &md); + // Check that md is still alive. + ASSERT(strcmp(upb_def_fullname(UPB_UPCAST(md)), "A") == 0); + + // Check that unref of fielddef frees the whole remaining graph. + upb_fielddef_unref(f, &f); +} + +static void test_fielddef_accessors() { + upb_fielddef *f1 = upb_fielddef_new(&f1); + upb_fielddef *f2 = upb_fielddef_new(&f2); + + ASSERT(upb_fielddef_ismutable(f1)); + upb_fielddef_setname(f1, "f1"); + upb_fielddef_setnumber(f1, 1937); + upb_fielddef_settype(f1, UPB_TYPE(FIXED64)); + upb_fielddef_setlabel(f1, UPB_LABEL(REPEATED)); + ASSERT(upb_fielddef_number(f1) == 1937); + + ASSERT(upb_fielddef_ismutable(f2)); + upb_fielddef_setname(f2, "f2"); + upb_fielddef_setnumber(f2, 1572); + upb_fielddef_settype(f2, UPB_TYPE(BYTES)); + upb_fielddef_setlabel(f2, UPB_LABEL(REPEATED)); + ASSERT(upb_fielddef_number(f2) == 1572); + + upb_fielddef_unref(f1, &f1); + upb_fielddef_unref(f2, &f2); +} + +static upb_fielddef *newfield( + const char *name, int32_t num, uint8_t type, uint8_t label, + const char *type_name, void *owner) { + upb_fielddef *f = upb_fielddef_new(owner); + upb_fielddef_setname(f, name); + upb_fielddef_setnumber(f, num); + upb_fielddef_settype(f, type); + upb_fielddef_setlabel(f, label); + upb_fielddef_setsubtypename(f, type_name); + return f; +} + +static upb_msgdef *upb_msgdef_newnamed(const char *name, void *owner) { + upb_msgdef *m = upb_msgdef_new(owner); + upb_def_setfullname(UPB_UPCAST(m), name); + return m; +} + +INLINE upb_enumdef *upb_enumdef_newnamed(const char *name, void *owner) { + upb_enumdef *e = upb_enumdef_new(owner); + upb_def_setfullname(UPB_UPCAST(e), name); + return e; +} + +void test_replacement() { + upb_symtab *s = upb_symtab_new(); + + upb_msgdef *m = upb_msgdef_newnamed("MyMessage", &s); + upb_msgdef_addfield(m, newfield( + "field1", 1, UPB_TYPE(ENUM), UPB_LABEL(OPTIONAL), ".MyEnum", &s), &s); + upb_msgdef *m2 = upb_msgdef_newnamed("MyMessage2", &s); + upb_enumdef *e = upb_enumdef_newnamed("MyEnum", &s); + + upb_def *newdefs[] = {UPB_UPCAST(m), UPB_UPCAST(m2), UPB_UPCAST(e)}; + upb_status status = UPB_STATUS_INIT; + ASSERT_STATUS(upb_symtab_add(s, newdefs, 3, &s, &status), &status); + + // Try adding a new definition of MyEnum, MyMessage should get replaced with + // a new version. + upb_enumdef *e2 = upb_enumdef_new(&s); + upb_def_setfullname(UPB_UPCAST(e2), "MyEnum"); + upb_def *newdefs2[] = {UPB_UPCAST(e2)}; + ASSERT_STATUS(upb_symtab_add(s, newdefs2, 1, &s, &status), &status); + + const upb_msgdef *m3 = upb_symtab_lookupmsg(s, "MyMessage", &m3); + ASSERT(m3); + // Must be different because it points to MyEnum which was replaced. + ASSERT(m3 != m); + upb_msgdef_unref(m3, &m3); + + m3 = upb_symtab_lookupmsg(s, "MyMessage2", &m3); + // Should be the same because it was not replaced, nor were any defs that + // are reachable from it. + ASSERT(m3 == m2); + upb_msgdef_unref(m3, &m3); + + upb_symtab_unref(s); +} + +int main(int argc, char *argv[]) { + if (argc < 2) { + fprintf(stderr, "Usage: test_def \n"); + return 1; + } + descriptor_file = argv[1]; + test_empty_symtab(); + test_cycles(); + test_fielddef_accessors(); + test_fielddef_unref(); + test_replacement(); return 0; } diff --git a/tests/test_table.cc b/tests/test_table.cc index 47e083fda7..2538e350f6 100644 --- a/tests/test_table.cc +++ b/tests/test_table.cc @@ -1,8 +1,11 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2012 Google Inc. See LICENSE for details. + * + * Tests for upb_table. + */ -#undef NDEBUG /* ensure tests always assert. */ -#include "upb/table.h" -#include "test_util.h" -#include #include #include #include @@ -11,55 +14,45 @@ #include #include #include +#include "tests/test_util.h" +#include "tests/upb_test.h" +#include "upb/table.h" bool benchmark = false; #define CPU_TIME_PER_TEST 0.5 using std::vector; -typedef struct { - uint32_t value; /* key*2 */ -} inttable_entry; - -typedef struct { - int32_t value; /* ASCII Value of first letter */ -} strtable_entry; - -double get_usertime() -{ +double get_usertime() { struct rusage usage; getrusage(RUSAGE_SELF, &usage); return usage.ru_utime.tv_sec + (usage.ru_utime.tv_usec/1000000.0); } /* num_entries must be a power of 2. */ -void test_strtable(const vector& keys, uint32_t num_to_insert) -{ +void test_strtable(const vector& keys, uint32_t num_to_insert) { /* Initialize structures. */ upb_strtable table; std::map m; - upb_strtable_init(&table, 0, sizeof(strtable_entry)); + upb_strtable_init(&table); std::set all; for(size_t i = 0; i < num_to_insert; i++) { const std::string& key = keys[i]; all.insert(key); - strtable_entry e; - e.value = key[0]; - upb_strtable_insert(&table, key.c_str(), &e); + upb_strtable_insert(&table, key.c_str(), upb_value_int32(key[0])); m[key] = key[0]; } /* Test correctness. */ for(uint32_t i = 0; i < keys.size(); i++) { const std::string& key = keys[i]; - strtable_entry *e = - (strtable_entry*)upb_strtable_lookup(&table, key.c_str()); + const upb_value *v = upb_strtable_lookup(&table, key.c_str()); if(m.find(key) != m.end()) { /* Assume map implementation is correct. */ - assert(e); - assert(e->value == key[0]); - assert(m[key] == key[0]); + ASSERT(v); + ASSERT(upb_value_getint32(*v) == key[0]); + ASSERT(m[key] == key[0]); } else { - assert(e == NULL); + ASSERT(v == NULL); } } @@ -69,66 +62,83 @@ void test_strtable(const vector& keys, uint32_t num_to_insert) const char *key = upb_strtable_iter_key(&iter); std::string tmp(key, strlen(key)); std::set::iterator i = all.find(tmp); - assert(i != all.end()); + ASSERT(i != all.end()); all.erase(i); } - assert(all.empty()); + ASSERT(all.empty()); - upb_strtable_free(&table); + upb_strtable_uninit(&table); } /* num_entries must be a power of 2. */ -void test_inttable(int32_t *keys, uint16_t num_entries, const char *desc) -{ +void test_inttable(int32_t *keys, uint16_t num_entries, const char *desc) { /* Initialize structures. */ upb_inttable table; uint32_t largest_key = 0; std::map m; __gnu_cxx::hash_map hm; - upb_inttable_init(&table, num_entries, sizeof(inttable_entry)); + upb_inttable_init(&table); for(size_t i = 0; i < num_entries; i++) { int32_t key = keys[i]; largest_key = UPB_MAX((int32_t)largest_key, key); - inttable_entry e; - e.value = (key*2) << 1; - upb_inttable_insert(&table, key, &e); + upb_inttable_insert(&table, key, upb_value_uint32(key * 2)); m[key] = key*2; hm[key] = key*2; } /* Test correctness. */ for(uint32_t i = 0; i <= largest_key; i++) { - inttable_entry *e = (inttable_entry*)upb_inttable_lookup( - &table, i); + const upb_value *v = upb_inttable_lookup(&table, i); + if(m.find(i) != m.end()) { /* Assume map implementation is correct. */ + ASSERT(v); + ASSERT(upb_value_getuint32(*v) == i*2); + ASSERT(m[i] == i*2); + ASSERT(hm[i] == i*2); + } else { + ASSERT(v == NULL); + } + } + + for(uint16_t i = 0; i < num_entries; i += 2) { + upb_value val; + bool ret = upb_inttable_remove(&table, keys[i], &val); + ASSERT(ret == (m.erase(keys[i]) == 1)); + if (ret) ASSERT(upb_value_getuint32(val) == keys[i] * 2); + hm.erase(keys[i]); + m.erase(keys[i]); + } + + ASSERT(upb_inttable_count(&table) == hm.size()); + + /* Test correctness. */ + for(uint32_t i = 0; i <= largest_key; i++) { + const upb_value *v = upb_inttable_lookup(&table, i); if(m.find(i) != m.end()) { /* Assume map implementation is correct. */ - assert(e); - //printf("addr: %p, expected: %d, actual: %d\n", e, i*2, e->value); - assert(((e->value) >> 1) == i*2); - assert(m[i] == i*2); - assert(hm[i] == i*2); + ASSERT(v); + ASSERT(upb_value_getuint32(*v) == i*2); + ASSERT(m[i] == i*2); + ASSERT(hm[i] == i*2); } else { - assert(e == NULL); + ASSERT(v == NULL); } } // Compact and test correctness again. upb_inttable_compact(&table); for(uint32_t i = 0; i <= largest_key; i++) { - inttable_entry *e = (inttable_entry*)upb_inttable_lookup( - &table, i); + const upb_value *v = upb_inttable_lookup(&table, i); if(m.find(i) != m.end()) { /* Assume map implementation is correct. */ - assert(e); - //printf("addr: %p, expected: %d, actual: %d\n", e, i*2, e->value); - assert(((e->value) >> 1) == i*2); - assert(m[i] == i*2); - assert(hm[i] == i*2); + ASSERT(v); + ASSERT(upb_value_getuint32(*v) == i*2); + ASSERT(m[i] == i*2); + ASSERT(hm[i] == i*2); } else { - assert(e == NULL); + ASSERT(v == NULL); } } if(!benchmark) { - upb_inttable_free(&table); + upb_inttable_uninit(&table); return; } @@ -141,7 +151,7 @@ void test_inttable(int32_t *keys, uint16_t num_entries, const char *desc) } for(uint16_t i = num_entries - 1; i >= 1; i--) { uint16_t rand_i = (random() / (double)RAND_MAX) * i; - assert(rand_i <= i); + ASSERT(rand_i <= i); uint16_t tmp = rand_order[rand_i]; rand_order[rand_i] = rand_order[i]; rand_order[i] = tmp; @@ -162,8 +172,8 @@ void test_inttable(int32_t *keys, uint16_t num_entries, const char *desc) for(i = 0; true; i++) { MAYBE_BREAK; int32_t key = keys[i & mask]; - inttable_entry *e = (inttable_entry*)upb_inttable_lookup(&table, key); - x += (uintptr_t)e; + const upb_value *v = upb_inttable_lookup32(&table, key); + x += (uintptr_t)v; } double total = get_usertime() - before; printf("%s/s\n", eng(i/total, 3, false)); @@ -174,8 +184,8 @@ void test_inttable(int32_t *keys, uint16_t num_entries, const char *desc) for(i = 0; true; i++) { MAYBE_BREAK; int32_t key = keys[rand_order[i & mask]]; - inttable_entry *e = (inttable_entry*)upb_inttable_lookup(&table, key); - x += (uintptr_t)e; + const upb_value *v = upb_inttable_lookup32(&table, key); + x += (uintptr_t)v; } total = get_usertime() - before; printf("%s/s\n", eng(i/total, 3, false)); @@ -223,20 +233,18 @@ void test_inttable(int32_t *keys, uint16_t num_entries, const char *desc) } total = get_usertime() - before; printf("%s/s\n\n", eng(i/total, 3, false)); - upb_inttable_free(&table); + upb_inttable_uninit(&table); delete rand_order; } -int32_t *get_contiguous_keys(int32_t num) -{ +int32_t *get_contiguous_keys(int32_t num) { int32_t *buf = new int32_t[num]; for(int32_t i = 0; i < num; i++) - buf[i] = i+1; + buf[i] = i; return buf; } -int main(int argc, char *argv[]) -{ +int main(int argc, char *argv[]) { for (int i = 1; i < argc; i++) { if (strcmp(argv[i], "--benchmark") == 0) benchmark = true; } diff --git a/tests/test_vs_proto2.cc b/tests/test_vs_proto2.cc index 53b2498324..020dca500f 100644 --- a/tests/test_vs_proto2.cc +++ b/tests/test_vs_proto2.cc @@ -1,7 +1,7 @@ /* * upb - a minimalist implementation of protocol buffers. * - * Copyright (c) 2011 Google Inc. See LICENSE for details. + * Copyright (c) 2011-2012 Google Inc. See LICENSE for details. * * A test that verifies that our results are identical to proto2 for a * given proto type and input protobuf. @@ -9,230 +9,87 @@ #define __STDC_LIMIT_MACROS // So we get UINT32_MAX #include +#include +#include #include #include #include #include -#include -#include #include "benchmarks/google_messages.pb.h" -#include "upb/def.h" -#include "upb/msg.h" +#include "upb/def.hpp" +#include "upb/handlers.hpp" +#include "upb/msg.hpp" +#include "upb/pb/decoder.hpp" #include "upb/pb/glue.h" #include "upb/pb/varint.h" +#include "upb/proto2_bridge.hpp" #include "upb_test.h" -size_t string_size; - -void compare(const google::protobuf::Message& proto2_msg, - void *upb_msg, const upb_msgdef *upb_md); - -void compare_arrays(const google::protobuf::Reflection *r, - const google::protobuf::Message& proto2_msg, - const google::protobuf::FieldDescriptor *proto2_f, - void *upb_msg, upb_fielddef *upb_f) -{ - ASSERT(upb_msg_has(upb_msg, upb_f)); - ASSERT(upb_isseq(upb_f)); - const void *arr = upb_value_getptr(upb_msg_getseq(upb_msg, upb_f)); - const void *iter = upb_seq_begin(arr, upb_f); - for(int i = 0; - i < r->FieldSize(proto2_msg, proto2_f); - i++, iter = upb_seq_next(arr, iter, upb_f)) { - ASSERT(!upb_seq_done(iter)); - upb_value v = upb_seq_get(iter, upb_f); - switch(upb_f->type) { - default: - ASSERT(false); - case UPB_TYPE(DOUBLE): - ASSERT(r->GetRepeatedDouble(proto2_msg, proto2_f, i) == upb_value_getdouble(v)); - break; - case UPB_TYPE(FLOAT): - ASSERT(r->GetRepeatedFloat(proto2_msg, proto2_f, i) == upb_value_getfloat(v)); - break; - case UPB_TYPE(INT64): - case UPB_TYPE(SINT64): - case UPB_TYPE(SFIXED64): - ASSERT(r->GetRepeatedInt64(proto2_msg, proto2_f, i) == upb_value_getint64(v)); - break; - case UPB_TYPE(UINT64): - case UPB_TYPE(FIXED64): - ASSERT(r->GetRepeatedUInt64(proto2_msg, proto2_f, i) == upb_value_getuint64(v)); - break; - case UPB_TYPE(SFIXED32): - case UPB_TYPE(SINT32): - case UPB_TYPE(INT32): - case UPB_TYPE(ENUM): - ASSERT(r->GetRepeatedInt32(proto2_msg, proto2_f, i) == upb_value_getint32(v)); - break; - case UPB_TYPE(FIXED32): - case UPB_TYPE(UINT32): - ASSERT(r->GetRepeatedUInt32(proto2_msg, proto2_f, i) == upb_value_getuint32(v)); - break; - case UPB_TYPE(BOOL): - ASSERT(r->GetRepeatedBool(proto2_msg, proto2_f, i) == upb_value_getbool(v)); - break; - case UPB_TYPE(STRING): - case UPB_TYPE(BYTES): { - std::string str = r->GetRepeatedString(proto2_msg, proto2_f, i); - upb_stdarray *upbstr = (upb_stdarray*)upb_value_getptr(v); - std::string str2(upbstr->ptr, upbstr->len); - string_size += upbstr->len; - ASSERT(str == str2); - break; - } - case UPB_TYPE(GROUP): - case UPB_TYPE(MESSAGE): - ASSERT(upb_dyncast_msgdef(upb_f->def) != NULL); - compare(r->GetRepeatedMessage(proto2_msg, proto2_f, i), - upb_value_getptr(v), upb_downcast_msgdef(upb_f->def)); - } - } - ASSERT(upb_seq_done(iter)); -} - -void compare_values(const google::protobuf::Reflection *r, - const google::protobuf::Message& proto2_msg, - const google::protobuf::FieldDescriptor *proto2_f, - void *upb_msg, upb_fielddef *upb_f) -{ - upb_value v = upb_msg_get(upb_msg, upb_f); - switch(upb_f->type) { - default: - ASSERT(false); - case UPB_TYPE(DOUBLE): - ASSERT(r->GetDouble(proto2_msg, proto2_f) == upb_value_getdouble(v)); - break; - case UPB_TYPE(FLOAT): - ASSERT(r->GetFloat(proto2_msg, proto2_f) == upb_value_getfloat(v)); - break; - case UPB_TYPE(INT64): - case UPB_TYPE(SINT64): - case UPB_TYPE(SFIXED64): - ASSERT(r->GetInt64(proto2_msg, proto2_f) == upb_value_getint64(v)); - break; - case UPB_TYPE(UINT64): - case UPB_TYPE(FIXED64): - ASSERT(r->GetUInt64(proto2_msg, proto2_f) == upb_value_getuint64(v)); - break; - case UPB_TYPE(SFIXED32): - case UPB_TYPE(SINT32): - case UPB_TYPE(INT32): - case UPB_TYPE(ENUM): - ASSERT(r->GetInt32(proto2_msg, proto2_f) == upb_value_getint32(v)); - break; - case UPB_TYPE(FIXED32): - case UPB_TYPE(UINT32): - ASSERT(r->GetUInt32(proto2_msg, proto2_f) == upb_value_getuint32(v)); - break; - case UPB_TYPE(BOOL): - ASSERT(r->GetBool(proto2_msg, proto2_f) == upb_value_getbool(v)); - break; - case UPB_TYPE(STRING): - case UPB_TYPE(BYTES): { - std::string str = r->GetString(proto2_msg, proto2_f); - upb_stdarray *upbstr = (upb_stdarray*)upb_value_getptr(v); - std::string str2(upbstr->ptr, upbstr->len); - string_size += upbstr->len; - ASSERT(str == str2); - break; - } - case UPB_TYPE(GROUP): - case UPB_TYPE(MESSAGE): - // XXX: getstr - compare(r->GetMessage(proto2_msg, proto2_f), - upb_value_getptr(v), upb_downcast_msgdef(upb_f->def)); - } -} - -void compare(const google::protobuf::Message& proto2_msg, - void *upb_msg, const upb_msgdef *upb_md) -{ - const google::protobuf::Reflection *r = proto2_msg.GetReflection(); - const google::protobuf::Descriptor *d = proto2_msg.GetDescriptor(); - - ASSERT(d->field_count() == upb_msgdef_numfields(upb_md)); - upb_msg_iter i; - for(i = upb_msg_begin(upb_md); !upb_msg_done(i); i = upb_msg_next(upb_md, i)) { - upb_fielddef *upb_f = upb_msg_iter_field(i); +void compare_metadata(const google::protobuf::Descriptor* d, + const upb::MessageDef *upb_md) { + ASSERT(d->field_count() == upb_md->field_count()); + for (upb::MessageDef::ConstIterator i(upb_md); !i.Done(); i.Next()) { + const upb::FieldDef* upb_f = i.field(); const google::protobuf::FieldDescriptor *proto2_f = - d->FindFieldByNumber(upb_f->number); - // Make sure the definitions are equal. + d->FindFieldByNumber(upb_f->number()); ASSERT(upb_f); ASSERT(proto2_f); - ASSERT(upb_f->number == proto2_f->number()); - ASSERT(std::string(upb_f->name) == proto2_f->name()); - ASSERT(upb_f->type == proto2_f->type()); - ASSERT(upb_isseq(upb_f) == proto2_f->is_repeated()); - - if(!upb_msg_has(upb_msg, upb_f)) { - if(upb_isseq(upb_f)) - ASSERT(r->FieldSize(proto2_msg, proto2_f) == 0); - else - ASSERT(r->HasField(proto2_msg, proto2_f) == false); - } else { - if(upb_isseq(upb_f)) { - compare_arrays(r, proto2_msg, proto2_f, upb_msg, upb_f); - } else { - ASSERT(r->HasField(proto2_msg, proto2_f) == true); - compare_values(r, proto2_msg, proto2_f, upb_msg, upb_f); - } - } + ASSERT(upb_f->number() == proto2_f->number()); + ASSERT(std::string(upb_f->name()) == proto2_f->name()); + ASSERT(upb_f->type() == static_cast(proto2_f->type())); + ASSERT(upb_f->IsSequence() == proto2_f->is_repeated()); } } -void parse_and_compare(MESSAGE_CIDENT *proto2_msg, - void *upb_msg, const upb_msgdef *upb_md, - const char *str, size_t len, bool allow_jit) -{ +void parse_and_compare(MESSAGE_CIDENT *msg1, MESSAGE_CIDENT *msg2, + const upb::MessageDef *upb_md, + const char *str, size_t len, bool allow_jit) { // Parse to both proto2 and upb. - ASSERT(proto2_msg->ParseFromArray(str, len)); - upb_status status = UPB_STATUS_INIT; - upb_msg_clear(upb_msg, upb_md); - upb_strtomsg(str, len, upb_msg, upb_md, allow_jit, &status); - if (!upb_ok(&status)) { - fprintf(stderr, "Error parsing protobuf: %s", upb_status_getstr(&status)); - exit(1); - } - string_size = 0; - compare(*proto2_msg, upb_msg, upb_md); - printf("Total size: %zd, string size: %zd (%0.2f%%)\n", len, - string_size, (double)string_size / len * 100); - upb_status_uninit(&status); + ASSERT(msg1->ParseFromArray(str, len)); + + upb::Handlers* handlers = upb::Handlers::New(); + upb::RegisterWriteHandlers(handlers, upb_md); + upb::DecoderPlan* plan = upb::DecoderPlan::New(handlers, allow_jit); + upb::StringSource src(str, len); + upb::Decoder decoder; + decoder.ResetPlan(plan, 0); + decoder.ResetInput(src.AllBytes(), msg2); + msg2->Clear(); + ASSERT(decoder.Decode() == UPB_OK); + plan->Unref(); + handlers->Unref(); + + // Would like to just compare the message objects themselves, but + // unfortunately MessageDifferencer is not part of the open-source release of + // proto2, so we compare their serialized strings, which we expect will be + // equivalent. + std::string str1; + std::string str2; + msg1->SerializeToString(&str1); + msg2->SerializeToString(&str2); + ASSERT(str1 == str2); + ASSERT(std::string(str, len) == str2); } -int main(int argc, char *argv[]) -{ - if (argc < 3) { - fprintf(stderr, "Usage: test_vs_proto2 \n"); - return 1; +void test_zig_zag() { + for (uint64_t num = 5; num * 1.5 > num; num *= 1.5) { + ASSERT(upb_zzenc_64(num) == + google::protobuf::internal::WireFormatLite::ZigZagEncode64(num)); + if (num < UINT32_MAX) { + ASSERT(upb_zzenc_32(num) == + google::protobuf::internal::WireFormatLite::ZigZagEncode32(num)); + } } - const char *descriptor_file = argv[1]; - const char *message_file = argv[2]; - // Initialize upb state, parse descriptor. - upb_status status = UPB_STATUS_INIT; - upb_symtab *symtab = upb_symtab_new(); - size_t fds_len; - const char *fds = upb_readfile(descriptor_file, &fds_len); - if(fds == NULL) { - fprintf(stderr, "Couldn't read %s.\n", descriptor_file); - return 1; - } - upb_load_descriptor_into_symtab(symtab, fds, fds_len, &status); - if(!upb_ok(&status)) { - fprintf(stderr, "Error importing %s: %s", descriptor_file, - upb_status_getstr(&status)); - return 1; - } - free((void*)fds); +} - const upb_def *def = upb_symtab_lookup(symtab, MESSAGE_NAME); - const upb_msgdef *msgdef; - if(!def || !(msgdef = upb_dyncast_msgdef_const(def))) { - fprintf(stderr, "Error finding symbol '%s'.\n", MESSAGE_NAME); +int main(int argc, char *argv[]) +{ + if (argc < 2) { + fprintf(stderr, "Usage: test_vs_proto2 \n"); return 1; } + const char *message_file = argv[1]; // Read the message data itself. size_t len; @@ -242,32 +99,25 @@ int main(int argc, char *argv[]) return 1; } + MESSAGE_CIDENT msg1; + MESSAGE_CIDENT msg2; + + const upb::MessageDef* m = upb::proto2_bridge::NewFinalMessageDef(msg1, &m); + + compare_metadata(msg1.GetDescriptor(), m); + // Run twice to test proper object reuse. - MESSAGE_CIDENT proto2_msg; - void *upb_msg = upb_stdmsg_new(msgdef); - parse_and_compare(&proto2_msg, upb_msg, msgdef, str, len, true); - parse_and_compare(&proto2_msg, upb_msg, msgdef, str, len, false); - parse_and_compare(&proto2_msg, upb_msg, msgdef, str, len, true); - parse_and_compare(&proto2_msg, upb_msg, msgdef, str, len, false); + parse_and_compare(&msg1, &msg2, m, str, len, true); + parse_and_compare(&msg1, &msg2, m, str, len, false); + parse_and_compare(&msg1, &msg2, m, str, len, true); + parse_and_compare(&msg1, &msg2, m, str, len, false); printf("All tests passed, %d assertions.\n", num_assertions); - upb_stdmsg_free(upb_msg, msgdef); - upb_def_unref(UPB_UPCAST(msgdef)); + m->Unref(&m); free((void*)str); - upb_symtab_unref(symtab); - upb_status_uninit(&status); - // Test Zig-Zag encoding/decoding. - for (uint64_t num = 5; num * 1.5 > num; num *= 1.5) { - ASSERT(upb_zzenc_64(num) == - google::protobuf::internal::WireFormatLite::ZigZagEncode64(num)); - if (num < UINT32_MAX) { - ASSERT(upb_zzenc_32(num) == - google::protobuf::internal::WireFormatLite::ZigZagEncode32(num)); - } - } + test_zig_zag(); google::protobuf::ShutdownProtobufLibrary(); - return 0; } diff --git a/tests/tests.c b/tests/tests.c deleted file mode 100644 index 12ff4bb23d..0000000000 --- a/tests/tests.c +++ /dev/null @@ -1,121 +0,0 @@ - - -#include -#include -#include -#include "upb/def.h" -#include "upb/handlers.h" -#include "upb/pb/decoder.h" -#include "upb/pb/glue.h" -#include "upb_test.h" - -const char *descriptor_file; - -static upb_symtab *load_test_proto() { - upb_symtab *s = upb_symtab_new(); - ASSERT(s); - upb_status status = UPB_STATUS_INIT; - if (!upb_load_descriptor_file_into_symtab(s, descriptor_file, &status)) { - fprintf(stderr, "Error loading descriptor file: %s\n", - upb_status_getstr(&status)); - exit(1); - } - upb_status_uninit(&status); - return s; -} - -static upb_flow_t upb_test_onvalue(void *c, upb_value fval, upb_value val) { - (void)c; - (void)fval; - (void)val; - return UPB_CONTINUE; -} - -static void test_upb_jit() { - upb_symtab *s = load_test_proto(); - const upb_def *def = upb_symtab_lookup(s, "SimplePrimitives"); - ASSERT(def); - - upb_handlers *h = upb_handlers_new(); - upb_handlerset hset = {NULL, NULL, &upb_test_onvalue, NULL, NULL, NULL, NULL}; - upb_handlers_reghandlerset(h, upb_downcast_msgdef_const(def), &hset); - upb_decoderplan *p = upb_decoderplan_new(h, true); -#ifdef UPB_USE_JIT_X64 - ASSERT(upb_decoderplan_hasjitcode(p)); -#else - ASSERT(!upb_decoderplan_hasjitcode(p)); -#endif - upb_decoderplan_unref(p); - upb_symtab_unref(s); - upb_def_unref(def); - upb_handlers_unref(h); -} - -static void test_upb_symtab() { - upb_symtab *s = load_test_proto(); - - // Test cycle detection by making a cyclic def's main refcount go to zero - // and then be incremented to one again. - const upb_def *def = upb_symtab_lookup(s, "A"); - ASSERT(def); - upb_symtab_unref(s); - const upb_msgdef *m = upb_downcast_msgdef_const(def); - upb_msg_iter i = upb_msg_begin(m); - ASSERT(!upb_msg_done(i)); - upb_fielddef *f = upb_msg_iter_field(i); - ASSERT(upb_hassubdef(f)); - upb_def *def2 = f->def; - - i = upb_msg_next(m, i); - ASSERT(upb_msg_done(i)); // "A" should only have one field. - - ASSERT(upb_downcast_msgdef(def2)); - upb_def_ref(def2); - upb_def_unref(def); - upb_def_unref(def2); -} - -static void test_upb_two_fielddefs() { - upb_fielddef *f1 = upb_fielddef_new(); - upb_fielddef *f2 = upb_fielddef_new(); - - ASSERT(upb_fielddef_ismutable(f1)); - upb_fielddef_setname(f1, ""); - upb_fielddef_setnumber(f1, 1937); - upb_fielddef_settype(f1, UPB_TYPE(FIXED64)); - upb_fielddef_setlabel(f1, UPB_LABEL(REPEATED)); - upb_fielddef_settypename(f1, ""); - ASSERT(upb_fielddef_number(f1) == 1937); - - ASSERT(upb_fielddef_ismutable(f2)); - upb_fielddef_setname(f2, ""); - upb_fielddef_setnumber(f2, 1572); - upb_fielddef_settype(f2, UPB_TYPE(BYTES)); - upb_fielddef_setlabel(f2, UPB_LABEL(REPEATED)); - upb_fielddef_settypename(f2, ""); - ASSERT(upb_fielddef_number(f2) == 1572); - - upb_fielddef_unref(f1); - upb_fielddef_unref(f2); -} - -int main(int argc, char *argv[]) -{ - if (argc < 2) { - fprintf(stderr, "Usage: test_cpp \n"); - return 1; - } - descriptor_file = argv[1]; -#define TEST(func) do { \ - int assertions_before = num_assertions; \ - printf("Running " #func "..."); fflush(stdout); \ - func(); \ - printf("ok (%d assertions).\n", num_assertions - assertions_before); \ - } while (0) - - TEST(test_upb_symtab); - TEST(test_upb_jit); - TEST(test_upb_two_fielddefs); - printf("All tests passed (%d assertions).\n", num_assertions); - return 0; -} diff --git a/tests/upb_test.h b/tests/upb_test.h index 2bd340e213..652977b623 100644 --- a/tests/upb_test.h +++ b/tests/upb_test.h @@ -7,6 +7,7 @@ #ifndef UPB_TEST_H_ #define UPB_TEST_H_ +#include #include #ifdef __cplusplus @@ -18,9 +19,28 @@ int num_assertions = 0; ++num_assertions; \ if (!(expr)) { \ fprintf(stderr, "Assertion failed: %s:%d\n", __FILE__, __LINE__); \ + fprintf(stderr, "expr: %s\n", #expr); \ abort(); \ } \ -} while(0) +} while (0) + +#define ASSERT_NOCOUNT(expr) do { \ + if (!(expr)) { \ + fprintf(stderr, "Assertion failed: %s:%d\n", __FILE__, __LINE__); \ + fprintf(stderr, "expr: %s\n", #expr); \ + abort(); \ + } \ +} while (0) + +#define ASSERT_STATUS(expr, status) do { \ + ++num_assertions; \ + if (!(expr)) { \ + fprintf(stderr, "Assertion failed: %s:%d\n", __FILE__, __LINE__); \ + fprintf(stderr, "expr: %s\n", #expr); \ + fprintf(stderr, "failed status: %s\n", upb_status_getstr(status)); \ + abort(); \ + } \ +} while (0) #ifdef __cplusplus } /* extern "C" */ diff --git a/tools/upbc.c b/tools/upbc.c index a5d8897324..4b25f3e0b6 100644 --- a/tools/upbc.c +++ b/tools/upbc.c @@ -55,7 +55,7 @@ static void write_const_h(const upb_def *defs[], int num_entries, for(int i = 0; i < num_entries; i++) { /* Foreach enum */ if(defs[i]->type != UPB_DEF_ENUM) continue; const upb_enumdef *enumdef = upb_downcast_enumdef_const(defs[i]); - char *enum_name = strdup(upb_def_fqname(UPB_UPCAST(enumdef))); + char *enum_name = strdup(upb_def_fullname(UPB_UPCAST(enumdef))); char *enum_val_prefix = strdup(enum_name); to_cident(enum_name); to_preproc(enum_val_prefix); @@ -63,11 +63,12 @@ static void write_const_h(const upb_def *defs[], int num_entries, fprintf(stream, "typedef enum %s {\n", enum_name); bool first = true; /* Foreach enum value. */ - for (upb_enum_iter iter = upb_enum_begin(enumdef); - !upb_enum_done(iter); - iter = upb_enum_next(enumdef, iter)) { - char *value_name = strdup(upb_enum_iter_name(iter)); - uint32_t value = upb_enum_iter_number(iter); + upb_enum_iter iter; + for (upb_enum_begin(&iter, enumdef); + !upb_enum_done(&iter); + upb_enum_next(&iter)) { + char *value_name = strdup(upb_enum_iter_name(&iter)); + uint32_t value = upb_enum_iter_number(&iter); to_preproc(value_name); /* " GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_UINT32 = 13," */ if (!first) fputs(",\n", stream); @@ -85,20 +86,20 @@ static void write_const_h(const upb_def *defs[], int num_entries, for(int i = 0; i < num_entries; i++) { /* Foreach enum */ const upb_msgdef *m = upb_dyncast_msgdef_const(defs[i]); if(!m) continue; - char *msg_name = strdup(upb_def_fqname(UPB_UPCAST(m))); + char *msg_name = strdup(upb_def_fullname(UPB_UPCAST(m))); char *msg_val_prefix = strdup(msg_name); to_preproc(msg_val_prefix); upb_msg_iter i; - for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { - upb_fielddef *f = upb_msg_iter_field(i); - char *preproc_field_name = strdup(f->name); + for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { + upb_fielddef *f = upb_msg_iter_field(&i); + char *preproc_field_name = strdup(upb_fielddef_name(f)); to_preproc(preproc_field_name); fprintf(stream, "#define %s_%s__FIELDNUM %d\n", msg_val_prefix, preproc_field_name, upb_fielddef_number(f)); fprintf(stream, "#define %s_%s__FIELDNAME \"%s\"\n", - msg_val_prefix, preproc_field_name, f->name); + msg_val_prefix, preproc_field_name, upb_fielddef_name(f)); fprintf(stream, "#define %s_%s__FIELDTYPE %d\n\n", - msg_val_prefix, preproc_field_name, f->type); + msg_val_prefix, preproc_field_name, upb_fielddef_type(f)); free(preproc_field_name); } free(msg_val_prefix); @@ -123,13 +124,13 @@ const char usage[] = " of using the input file as a basename.\n" ; -void usage_err(char *err) { +void usage_err(const char *err) { fprintf(stderr, "upbc: %s\n\n", err); fputs(usage, stderr); exit(1); } -void error(char *err, ...) { +void error(const char *err, ...) { va_list args; va_start(args, err); fprintf(stderr, "upbc: "); @@ -175,8 +176,8 @@ int main(int argc, char *argv[]) { upb_status_uninit(&status); /* Emit output files. */ - const int maxsize = 256; - char h_const_filename[maxsize]; + char h_const_filename[256]; + const int maxsize = sizeof(h_const_filename); if(snprintf(h_const_filename, maxsize, "%s_const.h", outfile_base) >= maxsize) error("File base too long.\n"); @@ -184,9 +185,9 @@ int main(int argc, char *argv[]) { if(!h_const_file) error("Failed to open _const.h output file\n"); int symcount; - const upb_def **defs = upb_symtab_getdefs(s, &symcount, UPB_DEF_ANY); + const upb_def **defs = upb_symtab_getdefs(s, &symcount, UPB_DEF_ANY, &defs); write_const_h(defs, symcount, h_const_filename, h_const_file); - for (int i = 0; i < symcount; i++) upb_def_unref(defs[i]); + for (int i = 0; i < symcount; i++) upb_def_unref(defs[i], &defs); free(defs); free(descriptor); upb_symtab_unref(s); diff --git a/upb/atomic.h b/upb/atomic.h deleted file mode 100644 index 2478fe4938..0000000000 --- a/upb/atomic.h +++ /dev/null @@ -1,181 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009 Google Inc. See LICENSE for details. - * Author: Josh Haberman - * - * Only a very small part of upb is thread-safe. Notably, individual - * messages, arrays, and strings are *not* thread safe for mutating. - * However, we do make message *metadata* such as upb_msgdef and - * upb_symtab thread-safe, and their ownership is tracked via atomic - * refcounting. This header implements the small number of atomic - * primitives required to support this. The primitives we implement - * are: - * - * - a reader/writer lock (wrappers around platform-provided mutexes). - * - an atomic refcount. - * - * TODO: This needs some revisiting/refinement, see: - * http://code.google.com/p/upb/issues/detail?id=8 - */ - -#ifndef UPB_ATOMIC_H_ -#define UPB_ATOMIC_H_ - -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -/* inline if possible, emit standalone code if required. */ -#ifndef INLINE -#define INLINE static inline -#endif - -// Until this stuff is actually working, make thread-unsafe the default. -#define UPB_THREAD_UNSAFE - -#ifdef UPB_THREAD_UNSAFE - -/* Non-thread-safe implementations. ******************************************/ - -typedef struct { - int v; -} upb_atomic_t; - -#define UPB_ATOMIC_INIT(x) {x} - -INLINE void upb_atomic_init(upb_atomic_t *a, int val) { a->v = val; } -INLINE bool upb_atomic_ref(upb_atomic_t *a) { return a->v++ == 0; } -INLINE bool upb_atomic_unref(upb_atomic_t *a) { assert(a->v > 0); return --a->v == 0; } -INLINE int upb_atomic_read(upb_atomic_t *a) { return a->v; } -INLINE bool upb_atomic_add(upb_atomic_t *a, int val) { - a->v += val; - return a->v == 0; -} - -#endif - -/* Atomic refcount ************************************************************/ - -#ifdef UPB_THREAD_UNSAFE - -/* Already defined above. */ - -#elif (__GNUC__ == 4 && __GNUC_MINOR__ >= 1) || __GNUC__ > 4 - -/* GCC includes atomic primitives. */ - -typedef struct { - volatile int v; -} upb_atomic_t; - -INLINE void upb_atomic_init(upb_atomic_t *a, int val) { - a->v = val; - __sync_synchronize(); /* Ensure the initialized value is visible. */ -} - -INLINE bool upb_atomic_ref(upb_atomic_t *a) { - return __sync_fetch_and_add(&a->v, 1) == 0; -} - -INLINE bool upb_atomic_add(upb_atomic_t *a, int n) { - return __sync_add_and_fetch(&a->v, n) == 0; -} - -INLINE bool upb_atomic_unref(upb_atomic_t *a) { - return __sync_sub_and_fetch(&a->v, 1) == 0; -} - -INLINE bool upb_atomic_read(upb_atomic_t *a) { - return __sync_fetch_and_add(&a->v, 0); -} - -#elif defined(WIN32) - -/* Windows defines atomic increment/decrement. */ -#include - -typedef struct { - volatile LONG val; -} upb_atomic_t; - -INLINE void upb_atomic_init(upb_atomic_t *a, int val) { - InterlockedExchange(&a->val, val); -} - -INLINE bool upb_atomic_ref(upb_atomic_t *a) { - return InterlockedIncrement(&a->val) == 1; -} - -INLINE bool upb_atomic_unref(upb_atomic_t *a) { - return InterlockedDecrement(&a->val) == 0; -} - -#else -#error Atomic primitives not defined for your platform/CPU. \ - Implement them or compile with UPB_THREAD_UNSAFE. -#endif - -INLINE bool upb_atomic_only(upb_atomic_t *a) { - return upb_atomic_read(a) == 1; -} - -/* Reader/Writer lock. ********************************************************/ - -#ifdef UPB_THREAD_UNSAFE - -typedef struct { -} upb_rwlock_t; - -INLINE void upb_rwlock_init(const upb_rwlock_t *l) { (void)l; } -INLINE void upb_rwlock_destroy(const upb_rwlock_t *l) { (void)l; } -INLINE void upb_rwlock_rdlock(const upb_rwlock_t *l) { (void)l; } -INLINE void upb_rwlock_wrlock(const upb_rwlock_t *l) { (void)l; } -INLINE void upb_rwlock_unlock(const upb_rwlock_t *l) { (void)l; } - -#elif defined(UPB_USE_PTHREADS) - -#include - -typedef struct { - pthread_rwlock_t lock; -} upb_rwlock_t; - -INLINE void upb_rwlock_init(const upb_rwlock_t *l) { - /* TODO: check return value. */ - pthread_rwlock_init(&l->lock, NULL); -} - -INLINE void upb_rwlock_destroy(const upb_rwlock_t *l) { - /* TODO: check return value. */ - pthread_rwlock_destroy(&l->lock); -} - -INLINE void upb_rwlock_rdlock(const upb_rwlock_t *l) { - /* TODO: check return value. */ - pthread_rwlock_rdlock(&l->lock); -} - -INLINE void upb_rwlock_wrlock(const upb_rwlock_t *l) { - /* TODO: check return value. */ - pthread_rwlock_wrlock(&l->lock); -} - -INLINE void upb_rwlock_unlock(const upb_rwlock_t *l) { - /* TODO: check return value. */ - pthread_rwlock_unlock(&l->lock); -} - -#else -#error Reader/writer lock is not defined for your platform/CPU. \ - Implement it or compile with UPB_THREAD_UNSAFE. -#endif - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* UPB_ATOMIC_H_ */ diff --git a/upb/bytestream.c b/upb/bytestream.c index 812e55226d..8feb678037 100644 --- a/upb/bytestream.c +++ b/upb/bytestream.c @@ -32,8 +32,6 @@ upb_byteregion *upb_byteregion_newl(const void *str, size_t len) { memcpy(ptr, str, len); ptr[len] = '\0'; upb_stringsrc_reset(src, ptr, len); - upb_byteregion_fetch(upb_stringsrc_allbytes(src)); - assert(len == upb_byteregion_available(upb_stringsrc_allbytes(src), 0)); return upb_stringsrc_allbytes(src); } @@ -93,10 +91,10 @@ static upb_stdio_buf *upb_stdio_findbuf(const upb_stdio *s, uint64_t ofs) { static upb_stdio_buf *upb_stdio_rotatebufs(upb_stdio *s) { upb_stdio_buf **reuse = NULL; // XXX - uint32_t num_reused = 0, num_inuse = 0; + int num_reused = 0, num_inuse = 0; // Could sweep only a subset of bufs if this was a hotspot. - for (uint32_t i = 0; i < s->nbuf; i++) { + for (int i = 0; i < s->nbuf; i++) { upb_stdio_buf *buf = s->bufs[i]; if (buf->refcount > 0) { s->bufs[num_inuse++] = buf; @@ -243,10 +241,9 @@ upb_bytesink* upb_stdio_bytesink(upb_stdio *stdio) { return &stdio->sink; } upb_bytesuccess_t upb_stringsrc_fetch(void *_src, uint64_t ofs, size_t *read) { upb_stringsrc *src = _src; - assert(ofs <= src->len); + assert(ofs < src->len); if (ofs == src->len) { upb_status_seteof(&src->bytesrc.status); - *read = 0; return UPB_BYTE_EOF; } *read = src->len - ofs; diff --git a/upb/bytestream.h b/upb/bytestream.h index fe049d2303..3217ee15c3 100644 --- a/upb/bytestream.h +++ b/upb/bytestream.h @@ -372,8 +372,7 @@ INLINE int upb_bytesink_putc(upb_bytesink *sink, char ch) { } INLINE int upb_bytesink_putrepeated(upb_bytesink *sink, char ch, int len) { - int i; - for (i = 0; i < len; i++) + for (int i = 0; i < len; i++) if (upb_bytesink_write(sink, &ch, 1) < 0) return -1; return len; @@ -436,7 +435,8 @@ typedef struct { FILE *file; bool should_close; upb_stdio_buf **bufs; - uint32_t nbuf, szbuf; + int nbuf; + uint32_t szbuf; } upb_stdio; void upb_stdio_init(upb_stdio *stdio); diff --git a/upb/def.c b/upb/def.c index 5ac3498868..5a5b0f463a 100644 --- a/upb/def.c +++ b/upb/def.c @@ -1,7 +1,7 @@ /* * upb - a minimalist implementation of protocol buffers. * - * Copyright (c) 2008-2009 Google Inc. See LICENSE for details. + * Copyright (c) 2008-2012 Google Inc. See LICENSE for details. * Author: Josh Haberman */ @@ -11,168 +11,283 @@ #include "upb/bytestream.h" #include "upb/def.h" -#define alignof(t) offsetof(struct { char c; t x; }, x) +// isalpha() etc. from are locale-dependent, which we don't want. +static bool upb_isbetween(char c, char low, char high) { + return c >= low && c <= high; +} -void upb_deflist_init(upb_deflist *l) { - l->size = 8; - l->defs = malloc(l->size * sizeof(void*)); - l->len = 0; +static bool upb_isletter(char c) { + return upb_isbetween(c, 'A', 'Z') || upb_isbetween(c, 'a', 'z') || c == '_'; } -void upb_deflist_uninit(upb_deflist *l) { - for(uint32_t i = 0; i < l->len; i++) upb_def_unref(l->defs[i]); - free(l->defs); +static bool upb_isalphanum(char c) { + return upb_isletter(c) || upb_isbetween(c, '0', '9'); } -void upb_deflist_push(upb_deflist *l, upb_def *d) { - if(l->len == l->size) { - l->size *= 2; - l->defs = realloc(l->defs, l->size * sizeof(void*)); +static bool upb_isident(const char *str, size_t len, bool full) { + bool start = true; + for (size_t i = 0; i < len; i++) { + char c = str[i]; + if (c == '.') { + if (start || !full) return false; + start = true; + } else if (start) { + if (!upb_isletter(c)) return false; + start = false; + } else { + if (!upb_isalphanum(c)) return false; + } } - l->defs[l->len++] = d; + return !start; } /* upb_def ********************************************************************/ static void upb_msgdef_free(upb_msgdef *m); +static void upb_fielddef_free(upb_fielddef *f); static void upb_enumdef_free(upb_enumdef *e); -static void upb_unresolveddef_free(struct _upb_unresolveddef *u); -bool upb_def_ismutable(const upb_def *def) { return def->symtab == NULL; } +bool upb_def_ismutable(const upb_def *def) { return !def->is_finalized; } +bool upb_def_isfinalized(const upb_def *def) { return def->is_finalized; } -bool upb_def_setfqname(upb_def *def, const char *fqname) { +bool upb_def_setfullname(upb_def *def, const char *fullname) { assert(upb_def_ismutable(def)); - free(def->fqname); - def->fqname = strdup(fqname); - return true; // TODO: check for acceptable characters. -} - -static void upb_def_free(upb_def *def) { - switch (def->type) { - case UPB_DEF_MSG: upb_msgdef_free(upb_downcast_msgdef(def)); break; - case UPB_DEF_ENUM: upb_enumdef_free(upb_downcast_enumdef(def)); break; - case UPB_DEF_UNRESOLVED: - upb_unresolveddef_free(upb_downcast_unresolveddef(def)); break; - default: - assert(false); - } + if (!upb_isident(fullname, strlen(fullname), true)) return false; + free(def->fullname); + def->fullname = strdup(fullname); + return true; } -upb_def *upb_def_dup(const upb_def *def) { +upb_def *upb_def_dup(const upb_def *def, void *o) { switch (def->type) { case UPB_DEF_MSG: - return UPB_UPCAST(upb_msgdef_dup(upb_downcast_msgdef_const(def))); + return UPB_UPCAST(upb_msgdef_dup(upb_downcast_msgdef_const(def), o)); + case UPB_DEF_FIELD: + return UPB_UPCAST(upb_fielddef_dup(upb_downcast_fielddef_const(def), o)); case UPB_DEF_ENUM: - return UPB_UPCAST(upb_enumdef_dup(upb_downcast_enumdef_const(def))); + return UPB_UPCAST(upb_enumdef_dup(upb_downcast_enumdef_const(def), o)); default: assert(false); return NULL; } } -// Prior to being in a symtab, the def's refcount controls the lifetime of the -// def itself. If the refcount falls to zero, the def is deleted. Once the -// def belongs to a symtab, the def is owned by the symtab and its refcount -// determines whether the def owns a ref on the symtab or not. -void upb_def_ref(const upb_def *_def) { - upb_def *def = (upb_def*)_def; // Need to modify refcount. - if (upb_atomic_ref(&def->refcount) && def->symtab) - upb_symtab_ref(def->symtab); -} - -static void upb_def_movetosymtab(upb_def *d, upb_symtab *s) { - assert(upb_atomic_read(&d->refcount) > 0); - d->symtab = s; - upb_symtab_ref(s); - upb_msgdef *m = upb_dyncast_msgdef(d); - if (m) upb_inttable_compact(&m->itof); +void upb_def_ref(const upb_def *_def, void *owner) { + upb_def *def = (upb_def*)_def; + upb_refcount_ref(&def->refcount, owner); } -void upb_def_unref(const upb_def *_def) { - upb_def *def = (upb_def*)_def; // Need to modify refcount. +void upb_def_unref(const upb_def *_def, void *owner) { + upb_def *def = (upb_def*)_def; if (!def) return; - if (upb_atomic_unref(&def->refcount)) { - if (def->symtab) { - upb_symtab_unref(def->symtab); - // Def might be deleted now. - } else { - upb_def_free(def); + if (!upb_refcount_unref(&def->refcount, owner)) return; + upb_def *base = def; + // Free all defs in the SCC. + do { + upb_def *next = (upb_def*)def->refcount.next; + switch (def->type) { + case UPB_DEF_MSG: upb_msgdef_free(upb_downcast_msgdef(def)); break; + case UPB_DEF_FIELD: upb_fielddef_free(upb_downcast_fielddef(def)); break; + case UPB_DEF_ENUM: upb_enumdef_free(upb_downcast_enumdef(def)); break; + default: + assert(false); } - } + def = next; + } while(def != base); } -static void upb_def_init(upb_def *def, upb_deftype_t type) { +static bool upb_def_init(upb_def *def, upb_deftype_t type, void *owner) { def->type = type; - def->fqname = NULL; - def->symtab = NULL; - upb_atomic_init(&def->refcount, 1); + def->is_finalized = false; + def->fullname = NULL; + return upb_refcount_init(&def->refcount, owner); } static void upb_def_uninit(upb_def *def) { - free(def->fqname); + upb_refcount_uninit(&def->refcount); + free(def->fullname); } +void upb_def_donateref(const upb_def *_def, void *from, void *to) { + upb_def *def = (upb_def*)_def; + upb_refcount_donateref(&def->refcount, from, to); +} -/* upb_unresolveddef **********************************************************/ - -// Unresolved defs are used as temporary placeholders for a def whose name has -// not been resolved yet. During the name resolution step, all unresolved defs -// are replaced with pointers to the actual def being referenced. -typedef struct _upb_unresolveddef { - upb_def base; -} upb_unresolveddef; +static void upb_def_getsuccessors(upb_refcount *refcount, void *closure) { + upb_def *def = (upb_def*)refcount; + switch (def->type) { + case UPB_DEF_MSG: { + upb_msgdef *m = upb_downcast_msgdef(def); + upb_msg_iter i; + for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { + upb_fielddef *f = upb_msg_iter_field(&i); + upb_refcount_visit(refcount, &f->base.refcount, closure); + } + break; + } + case UPB_DEF_FIELD: { + upb_fielddef *f = upb_downcast_fielddef(def); + assert(f->msgdef); + upb_refcount_visit(refcount, &f->msgdef->base.refcount, closure); + upb_def *subdef = f->sub.def; + if (subdef) + upb_refcount_visit(refcount, &subdef->refcount, closure); + break; + } + case UPB_DEF_ENUM: + case UPB_DEF_SERVICE: + case UPB_DEF_ANY: + break; + } +} -// Is passed a ref on the string. -static upb_unresolveddef *upb_unresolveddef_new(const char *str) { - upb_unresolveddef *def = malloc(sizeof(*def)); - upb_def_init(&def->base, UPB_DEF_UNRESOLVED); - def->base.fqname = strdup(str); - return def; +static bool upb_validate_field(const upb_fielddef *f, upb_status *s) { + if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == -1) { + upb_status_seterrliteral(s, "fielddef must have name and number set"); + return false; + } + if (upb_hassubdef(f)) { + if (f->subdef_is_symbolic) { + upb_status_seterrf(s, + "field %s has not been resolved", upb_fielddef_name(f)); + return false; + } else if (upb_fielddef_subdef(f) == NULL) { + upb_status_seterrf(s, + "field is %s missing required subdef", upb_fielddef_name(f)); + return false; + } else if (!upb_def_isfinalized(upb_fielddef_subdef(f))) { + upb_status_seterrf(s, + "field %s subtype is not being finalized", upb_fielddef_name(f)); + return false; + } + } + return true; } -static void upb_unresolveddef_free(struct _upb_unresolveddef *def) { - upb_def_uninit(&def->base); - free(def); +bool upb_finalize(upb_def *const*defs, int n, upb_status *s) { + if (n >= UINT16_MAX - 1) { + upb_status_seterrliteral(s, "too many defs (max is 64k at a time)"); + return false; + } + + // First perform validation, in two passes so we can check that we have a + // transitive closure without needing to search. + for (int i = 0; i < n; i++) { + upb_def *def = defs[i]; + if (upb_def_isfinalized(def)) { + // Could relax this requirement if it's annoying. + upb_status_seterrliteral(s, "def is already finalized"); + goto err; + } else if (def->type == UPB_DEF_FIELD) { + upb_status_seterrliteral(s, "standalone fielddefs can not be finalized"); + goto err; + } else { + // Set now to detect transitive closure in the second pass. + def->is_finalized = true; + } + } + + for (int i = 0; i < n; i++) { + upb_msgdef *m = upb_dyncast_msgdef(defs[i]); + if (!m) continue; + upb_inttable_compact(&m->itof); + upb_msg_iter j; + for(upb_msg_begin(&j, m); !upb_msg_done(&j); upb_msg_next(&j)) { + upb_fielddef *f = upb_msg_iter_field(&j); + assert(f->msgdef == m); + if (!upb_validate_field(f, s)) goto err; + } + } + + // Validation all passed, now find strongly-connected components so that + // our refcounting works with cycles. + upb_refcount_findscc((upb_refcount**)defs, n, &upb_def_getsuccessors); + + // Now that ref cycles have been removed it is safe to have each fielddef + // take a ref on its subdef (if any), but only if it's a member of another + // SCC. + for (int i = 0; i < n; i++) { + upb_msgdef *m = upb_dyncast_msgdef(defs[i]); + if (!m) continue; + upb_msg_iter j; + for(upb_msg_begin(&j, m); !upb_msg_done(&j); upb_msg_next(&j)) { + upb_fielddef *f = upb_msg_iter_field(&j); + f->base.is_finalized = true; + // Release the ref taken in upb_msgdef_addfields(). + upb_fielddef_unref(f, m); + if (!upb_hassubdef(f)) continue; + assert(upb_fielddef_subdef(f)); + if (!upb_refcount_merged(&f->base.refcount, &f->sub.def->refcount)) { + // Subdef is part of a different strongly-connected component. + upb_def_ref(f->sub.def, &f->sub.def); + f->subdef_is_owned = true; + } + } + } + + return true; + +err: + for (int i = 0; i < n; i++) { + defs[i]->is_finalized = false; + } + return false; } /* upb_enumdef ****************************************************************/ -upb_enumdef *upb_enumdef_new() { +upb_enumdef *upb_enumdef_new(void *owner) { upb_enumdef *e = malloc(sizeof(*e)); - upb_def_init(&e->base, UPB_DEF_ENUM); - upb_strtable_init(&e->ntoi, 0, sizeof(upb_ntoi_ent)); - upb_inttable_init(&e->iton, 0, sizeof(upb_iton_ent)); + if (!e) return NULL; + if (!upb_def_init(&e->base, UPB_DEF_ENUM, owner)) goto err2; + if (!upb_strtable_init(&e->ntoi)) goto err2; + if (!upb_inttable_init(&e->iton)) goto err1; return e; + +err1: + upb_strtable_uninit(&e->ntoi); +err2: + free(e); + return NULL; } static void upb_enumdef_free(upb_enumdef *e) { - upb_enum_iter i; - for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) { - // Frees the ref taken when the string was parsed. - free(upb_enum_iter_name(i)); - } - upb_strtable_free(&e->ntoi); - upb_inttable_free(&e->iton); + upb_inttable_iter i; + upb_inttable_begin(&i, &e->iton); + for( ; !upb_inttable_done(&i); upb_inttable_next(&i)) { + // To clean up the strdup() from upb_enumdef_addval(). + free(upb_value_getptr(upb_inttable_iter_value(&i))); + } + upb_strtable_uninit(&e->ntoi); + upb_inttable_uninit(&e->iton); upb_def_uninit(&e->base); free(e); } -upb_enumdef *upb_enumdef_dup(const upb_enumdef *e) { - upb_enumdef *new_e = upb_enumdef_new(); +upb_enumdef *upb_enumdef_dup(const upb_enumdef *e, void *owner) { + upb_enumdef *new_e = upb_enumdef_new(owner); + if (!new_e) return NULL; upb_enum_iter i; - for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) { - assert(upb_enumdef_addval(new_e, upb_enum_iter_name(i), - upb_enum_iter_number(i))); + for(upb_enum_begin(&i, e); !upb_enum_done(&i); upb_enum_next(&i)) { + bool success = upb_enumdef_addval( + new_e, upb_enum_iter_name(&i),upb_enum_iter_number(&i)); + if (!success) { + upb_enumdef_unref(new_e, owner); + return NULL; + } } return new_e; } -bool upb_enumdef_addval(upb_enumdef *e, char *name, int32_t num) { - if (upb_enumdef_iton(e, num) || upb_enumdef_ntoi(e, name, NULL)) +bool upb_enumdef_addval(upb_enumdef *e, const char *name, int32_t num) { + if (!upb_isident(name, strlen(name), false)) return false; + if (upb_enumdef_ntoi(e, name, NULL)) + return false; + if (!upb_strtable_insert(&e->ntoi, name, upb_value_int32(num))) + return false; + if (!upb_inttable_lookup(&e->iton, num) && + !upb_inttable_insert(&e->iton, num, upb_value_ptr(strdup(name)))) return false; - upb_iton_ent ent = {0, strdup(name)}; - upb_strtable_insert(&e->ntoi, name, &num); - upb_inttable_insert(&e->iton, num, &ent); return true; } @@ -181,42 +296,70 @@ void upb_enumdef_setdefault(upb_enumdef *e, int32_t val) { e->defaultval = val; } -upb_enum_iter upb_enum_begin(const upb_enumdef *e) { - // We could iterate over either table here; the choice is arbitrary. - return upb_inttable_begin(&e->iton); +void upb_enum_begin(upb_enum_iter *i, const upb_enumdef *e) { + // We iterate over the ntoi table, to account for duplicate numbers. + upb_strtable_begin(i, &e->ntoi); } -upb_enum_iter upb_enum_next(const upb_enumdef *e, upb_enum_iter iter) { - return upb_inttable_next(&e->iton, iter); -} +void upb_enum_next(upb_enum_iter *iter) { upb_strtable_next(iter); } +bool upb_enum_done(upb_enum_iter *iter) { return upb_strtable_done(iter); } -const char *upb_enumdef_iton(upb_enumdef *def, int32_t num) { - upb_iton_ent *e = upb_inttable_fastlookup(&def->iton, num, sizeof(*e)); - return e ? e->str : NULL; -} - -bool upb_enumdef_ntoil(upb_enumdef *def, const char *name, size_t len, int32_t *num) { - upb_ntoi_ent *e = upb_strtable_lookupl(&def->ntoi, name, len); - if (!e) return false; - if (num) *num = e->value; +bool upb_enumdef_ntoi(const upb_enumdef *def, const char *name, int32_t *num) { + const upb_value *v = upb_strtable_lookup(&def->ntoi, name); + if (!v) return false; + if (num) *num = upb_value_getint32(*v); return true; } -bool upb_enumdef_ntoi(upb_enumdef *e, const char *name, int32_t *num) { - return upb_enumdef_ntoil(e, name, strlen(name), num); +const char *upb_enumdef_iton(const upb_enumdef *def, int32_t num) { + const upb_value *v = upb_inttable_lookup32(&def->iton, num); + return v ? upb_value_getptr(*v) : NULL; } /* upb_fielddef ***************************************************************/ +#define alignof(t) offsetof(struct { char c; t x; }, x) +#define TYPE_INFO(ctype, inmemory_type) \ + {alignof(ctype), sizeof(ctype), UPB_CTYPE_ ## inmemory_type} + +const upb_typeinfo upb_types[UPB_NUM_TYPES] = { + // END_GROUP is not real, but used to signify the pseudo-field that + // ends a group from within the group. + TYPE_INFO(void*, PTR), // ENDGROUP + TYPE_INFO(double, DOUBLE), // DOUBLE + TYPE_INFO(float, FLOAT), // FLOAT + TYPE_INFO(int64_t, INT64), // INT64 + TYPE_INFO(uint64_t, UINT64), // UINT64 + TYPE_INFO(int32_t, INT32), // INT32 + TYPE_INFO(uint64_t, UINT64), // FIXED64 + TYPE_INFO(uint32_t, UINT32), // FIXED32 + TYPE_INFO(bool, BOOL), // BOOL + TYPE_INFO(void*, BYTEREGION), // STRING + TYPE_INFO(void*, PTR), // GROUP + TYPE_INFO(void*, PTR), // MESSAGE + TYPE_INFO(void*, BYTEREGION), // BYTES + TYPE_INFO(uint32_t, UINT32), // UINT32 + TYPE_INFO(uint32_t, INT32), // ENUM + TYPE_INFO(int32_t, INT32), // SFIXED32 + TYPE_INFO(int64_t, INT64), // SFIXED64 + TYPE_INFO(int32_t, INT32), // SINT32 + TYPE_INFO(int64_t, INT64), // SINT64 +}; + static void upb_fielddef_init_default(upb_fielddef *f); -upb_fielddef *upb_fielddef_new() { +upb_fielddef *upb_fielddef_new(void *owner) { upb_fielddef *f = malloc(sizeof(*f)); + if (!f) return NULL; + if (!upb_def_init(UPB_UPCAST(f), UPB_DEF_FIELD, owner)) { + free(f); + return NULL; + } f->msgdef = NULL; - f->def = NULL; - upb_atomic_init(&f->refcount, 1); - f->finalized = false; + f->sub.def = NULL; + f->subdef_is_symbolic = false; + f->subdef_is_owned = false; f->label = UPB_LABEL(OPTIONAL); f->hasbit = -1; f->offset = 0; @@ -226,14 +369,68 @@ upb_fielddef *upb_fielddef_new() { // These are initialized to be invalid; the user must set them explicitly. // Could relax this later if it's convenient and non-confusing to have a // defaults for them. - f->name = NULL; - f->type = 0; + f->type = UPB_TYPE_NONE; f->number = 0; upb_fielddef_init_default(f); return f; } +static void upb_fielddef_uninit_default(upb_fielddef *f) { + if (f->default_is_string) + upb_byteregion_free(upb_value_getbyteregion(f->defaultval)); +} + +static void upb_fielddef_free(upb_fielddef *f) { + if (f->subdef_is_owned) + upb_def_unref(f->sub.def, &f->sub.def); + upb_fielddef_uninit_default(f); + upb_def_uninit(UPB_UPCAST(f)); + free(f); +} + +upb_fielddef *upb_fielddef_dup(const upb_fielddef *f, void *owner) { + upb_fielddef *newf = upb_fielddef_new(owner); + if (!newf) return NULL; + upb_fielddef_settype(newf, upb_fielddef_type(f)); + upb_fielddef_setlabel(newf, upb_fielddef_label(f)); + upb_fielddef_setnumber(newf, upb_fielddef_number(f)); + upb_fielddef_setname(newf, upb_fielddef_name(f)); + upb_fielddef_sethasbit(newf, upb_fielddef_hasbit(f)); + upb_fielddef_setoffset(newf, upb_fielddef_offset(f)); + upb_fielddef_setaccessor(newf, upb_fielddef_accessor(f)); + upb_fielddef_setfval(newf, upb_fielddef_fval(f)); + if (f->default_is_string) { + upb_byteregion *r = upb_value_getbyteregion(upb_fielddef_default(f)); + size_t len; + const char *ptr = upb_byteregion_getptr(r, 0, &len); + assert(len == upb_byteregion_len(r)); + upb_fielddef_setdefaultstr(newf, ptr, len); + } else { + upb_fielddef_setdefault(newf, upb_fielddef_default(f)); + } + + const char *srcname; + if (f->subdef_is_symbolic) { + srcname = f->sub.name; // Might be NULL. + } else { + srcname = f->sub.def ? upb_def_fullname(f->sub.def) : NULL; + } + if (srcname) { + char *newname = malloc(strlen(f->sub.def->fullname) + 2); + if (!newname) { + upb_fielddef_unref(newf, owner); + return NULL; + } + strcpy(newname, "."); + strcat(newname, f->sub.def->fullname); + upb_fielddef_setsubtypename(newf, newname); + free(newname); + } + + return newf; +} + static void upb_fielddef_init_default(upb_fielddef *f) { f->default_is_string = false; switch (upb_fielddef_type(f)) { @@ -253,105 +450,62 @@ static void upb_fielddef_init_default(upb_fielddef *f) { case UPB_TYPE(BOOL): upb_value_setbool(&f->defaultval, false); break; case UPB_TYPE(STRING): case UPB_TYPE(BYTES): - f->default_is_string = true; - upb_value_setbyteregion(&f->defaultval, upb_byteregion_new("")); - break; + upb_value_setbyteregion(&f->defaultval, upb_byteregion_new("")); + f->default_is_string = true; + break; case UPB_TYPE(GROUP): case UPB_TYPE(MESSAGE): upb_value_setptr(&f->defaultval, NULL); break; + case UPB_TYPE_ENDGROUP: assert(false); + case UPB_TYPE_NONE: break; } } -static void upb_fielddef_uninit_default(upb_fielddef *f) { - if (f->default_is_string) { - upb_byteregion_free(upb_value_getbyteregion(f->defaultval)); - } -} - -static void upb_fielddef_free(upb_fielddef *f) { - upb_fielddef_uninit_default(f); - if (f->def) { - // We own a ref on the subdef iff we are not part of a msgdef. - if (f->msgdef == NULL) { - if (f->def) upb_downcast_unresolveddef(f->def); // assert() check. - upb_def_unref(f->def); - } - } - free(f->name); - free(f); -} - -void upb_fielddef_ref(upb_fielddef *f) { - // TODO. - (void)f; -} - -void upb_fielddef_unref(upb_fielddef *f) { - // TODO. - (void)f; - if (!f) return; - if (upb_atomic_unref(&f->refcount)) { - if (f->msgdef) { - upb_msgdef_unref(f->msgdef); - // fielddef might be deleted now. - } else { - upb_fielddef_free(f); - } +const upb_def *upb_fielddef_subdef(const upb_fielddef *f) { + if (upb_hassubdef(f) && upb_fielddef_isfinalized(f)) { + assert(f->sub.def); + return f->sub.def; + } else { + return f->subdef_is_symbolic ? NULL : f->sub.def; } } -upb_fielddef *upb_fielddef_dup(upb_fielddef *f) { - upb_fielddef *newf = upb_fielddef_new(); - newf->msgdef = f->msgdef; - newf->type = f->type; - newf->label = f->label; - newf->number = f->number; - newf->name = f->name; - upb_fielddef_settypename(newf, f->def->fqname); - return f; +upb_def *upb_fielddef_subdef_mutable(upb_fielddef *f) { + return (upb_def*)upb_fielddef_subdef(f); } -bool upb_fielddef_ismutable(const upb_fielddef *f) { - return !f->msgdef || upb_def_ismutable(UPB_UPCAST(f->msgdef)); +const char *upb_fielddef_subtypename(upb_fielddef *f) { + assert(upb_fielddef_ismutable(f)); + return f->subdef_is_symbolic ? f->sub.name : NULL; } -upb_def *upb_fielddef_subdef(const upb_fielddef *f) { - if (upb_hassubdef(f) && !upb_fielddef_ismutable(f)) - return f->def; - else - return NULL; -} - -static bool upb_fielddef_resolve(upb_fielddef *f, upb_def *def, upb_status *s) { - assert(upb_dyncast_unresolveddef(f->def)); - upb_def_unref(f->def); - f->def = def; - if (f->type == UPB_TYPE(ENUM) && f->default_is_string) { - // Resolve the enum's default from a string to an integer. - upb_byteregion *bytes = upb_value_getbyteregion(f->defaultval); - assert(bytes); // Points to either a real default or the empty string. - upb_enumdef *e = upb_downcast_enumdef(f->def); - int32_t val = 0; - // Could do a sanity check that the default value does not have embedded - // NULLs. - if (upb_byteregion_len(bytes) == 0) { - upb_value_setint32(&f->defaultval, e->defaultval); - } else { - size_t len; - // ptr is guaranteed to be NULL-terminated because the byteregion was - // created with upb_byteregion_newl(). - const char *ptr = upb_byteregion_getptr(bytes, 0, &len); - assert(len == upb_byteregion_len(bytes)); // Should all be in one chunk. - bool success = upb_enumdef_ntoi(e, ptr, &val); - if (!success) { - upb_status_seterrf( - s, "Default enum value (%s) is not a member of the enum", ptr); - return false; - } - upb_value_setint32(&f->defaultval, val); +// Could expose this to clients if a client wants to call it independently +// of upb_resolve() for whatever reason. +static bool upb_fielddef_resolvedefault(upb_fielddef *f, upb_status *s) { + if (!f->default_is_string) return true; + // Resolve the enum's default from a string to an integer. + upb_byteregion *bytes = upb_value_getbyteregion(f->defaultval); + assert(bytes); // Points to either a real default or the empty string. + upb_enumdef *e = upb_downcast_enumdef(upb_fielddef_subdef_mutable(f)); + int32_t val = 0; + if (upb_byteregion_len(bytes) == 0) { + upb_value_setint32(&f->defaultval, e->defaultval); + } else { + size_t len; + // ptr is guaranteed to be NULL-terminated because the byteregion was + // created with upb_byteregion_newl(). + const char *ptr = upb_byteregion_getptr( + bytes, upb_byteregion_startofs(bytes), &len); + assert(len == upb_byteregion_len(bytes)); // Should all be in one chunk. + bool success = upb_enumdef_ntoi(e, ptr, &val); + if (!success) { + upb_status_seterrf( + s, "Default enum value (%s) is not a member of the enum", ptr); + return false; } - f->default_is_string = false; - upb_byteregion_free(bytes); + upb_value_setint32(&f->defaultval, val); } + f->default_is_string = false; + upb_byteregion_free(bytes); return true; } @@ -361,42 +515,50 @@ bool upb_fielddef_setnumber(upb_fielddef *f, int32_t number) { return true; } -bool upb_fielddef_setname(upb_fielddef *f, const char *name) { - assert(f->msgdef == NULL); - free(f->name); - f->name = strdup(name); - return true; -} - -bool upb_fielddef_settype(upb_fielddef *f, uint8_t type) { - assert(!f->finalized); +bool upb_fielddef_settype(upb_fielddef *f, upb_fieldtype_t type) { + assert(upb_fielddef_ismutable(f)); upb_fielddef_uninit_default(f); f->type = type; upb_fielddef_init_default(f); return true; } -bool upb_fielddef_setlabel(upb_fielddef *f, uint8_t label) { - assert(!f->finalized); +bool upb_fielddef_setlabel(upb_fielddef *f, upb_label_t label) { + assert(upb_fielddef_ismutable(f)); f->label = label; return true; } void upb_fielddef_setdefault(upb_fielddef *f, upb_value value) { - assert(!f->finalized); - assert(!upb_isstring(f)); + assert(upb_fielddef_ismutable(f)); + assert(!upb_isstring(f) && !upb_issubmsg(f)); + if (f->default_is_string) { + upb_byteregion *bytes = upb_value_getbyteregion(f->defaultval); + assert(bytes); + upb_byteregion_free(bytes); + } f->defaultval = value; + f->default_is_string = false; } -void upb_fielddef_setdefaultstr(upb_fielddef *f, const void *str, size_t len) { +bool upb_fielddef_setdefaultstr(upb_fielddef *f, const void *str, size_t len) { assert(upb_isstring(f) || f->type == UPB_TYPE(ENUM)); if (f->default_is_string) { upb_byteregion *bytes = upb_value_getbyteregion(f->defaultval); assert(bytes); upb_byteregion_free(bytes); - } - upb_value_setbyteregion(&f->defaultval, upb_byteregion_newl(str, len)); + } else { + assert(f->type == UPB_TYPE(ENUM)); + } + if (f->type == UPB_TYPE(ENUM) && !upb_isident(str, len, false)) return false; + upb_byteregion *r = upb_byteregion_newl(str, len); + upb_value_setbyteregion(&f->defaultval, r); + upb_bytesuccess_t ret = upb_byteregion_fetch(r); + (void)ret; + assert(ret == (len == 0 ? UPB_BYTE_EOF : UPB_BYTE_OK)); + assert(upb_byteregion_available(r, 0) == upb_byteregion_len(r)); f->default_is_string = true; + return true; } void upb_fielddef_setdefaultcstr(upb_fielddef *f, const char *str) { @@ -404,82 +566,106 @@ void upb_fielddef_setdefaultcstr(upb_fielddef *f, const char *str) { } void upb_fielddef_setfval(upb_fielddef *f, upb_value fval) { - assert(!f->finalized); - // TODO: string ownership? + assert(upb_fielddef_ismutable(f)); + // TODO: we need an ownership/freeing mechanism for dynamically-allocated + // fvals. One possibility is to let the user supply a free() function + // and call it when the fval is no longer referenced. Would have to + // ensure that no common use cases need cycles. + // + // For now the fval has no ownership; the caller must simply guarantee + // somehow that it outlives any handlers/plan. f->fval = fval; } -void upb_fielddef_setaccessor(upb_fielddef *f, struct _upb_accessor_vtbl *vtbl) { - assert(!f->finalized); - f->accessor = vtbl; +void upb_fielddef_sethasbit(upb_fielddef *f, int16_t hasbit) { + assert(upb_fielddef_ismutable(f)); + f->hasbit = hasbit; } -bool upb_fielddef_settypename(upb_fielddef *f, const char *name) { - upb_def_unref(f->def); - f->def = UPB_UPCAST(upb_unresolveddef_new(name)); - return true; +void upb_fielddef_setoffset(upb_fielddef *f, uint16_t offset) { + assert(upb_fielddef_ismutable(f)); + f->offset = offset; } -// Returns an ordering of fields based on: -// 1. value size (small to large). -// 2. field number. -static int upb_fielddef_cmpval(const void *_f1, const void *_f2) { - upb_fielddef *f1 = *(void**)_f1; - upb_fielddef *f2 = *(void**)_f2; - size_t size1 = upb_types[f1->type].size; - size_t size2 = upb_types[f2->type].size; - if (size1 != size2) return size1 - size2; - // Otherwise return in number order. - return f1->number - f2->number; +void upb_fielddef_setaccessor(upb_fielddef *f, struct _upb_accessor_vtbl *tbl) { + assert(upb_fielddef_ismutable(f)); + f->accessor = tbl; } -// Returns an ordering of all fields based on: -// 1. required/optional (required fields first). -// 2. field number -static int upb_fielddef_cmphasbit(const void *_f1, const void *_f2) { - upb_fielddef *f1 = *(void**)_f1; - upb_fielddef *f2 = *(void**)_f2; - size_t req1 = f1->label == UPB_LABEL(REQUIRED); - size_t req2 = f2->label == UPB_LABEL(REQUIRED); - if (req1 != req2) return req1 - req2; - // Otherwise return in number order. - return f1->number - f2->number; +static bool upb_subtype_typecheck(upb_fielddef *f, const upb_def *subdef) { + if (f->type == UPB_TYPE(MESSAGE) || f->type == UPB_TYPE(GROUP)) + return upb_dyncast_msgdef_const(subdef) != NULL; + else if (f->type == UPB_TYPE(ENUM)) + return upb_dyncast_enumdef_const(subdef) != NULL; + else { + assert(false); + return false; + } +} + +bool upb_fielddef_setsubdef(upb_fielddef *f, upb_def *subdef) { + assert(upb_fielddef_ismutable(f)); + assert(upb_hassubdef(f)); + assert(subdef); + if (!upb_subtype_typecheck(f, subdef)) return false; + if (f->subdef_is_symbolic) free(f->sub.name); + f->sub.def = subdef; + f->subdef_is_symbolic = false; + return true; +} + +bool upb_fielddef_setsubtypename(upb_fielddef *f, const char *name) { + assert(upb_fielddef_ismutable(f)); + assert(upb_hassubdef(f)); + if (f->subdef_is_symbolic) free(f->sub.name); + f->sub.name = strdup(name); + f->subdef_is_symbolic = true; + return true; } /* upb_msgdef *****************************************************************/ -upb_msgdef *upb_msgdef_new() { +upb_msgdef *upb_msgdef_new(void *owner) { upb_msgdef *m = malloc(sizeof(*m)); - upb_def_init(&m->base, UPB_DEF_MSG); - upb_inttable_init(&m->itof, 4, sizeof(upb_itof_ent)); - upb_strtable_init(&m->ntof, 4, sizeof(upb_ntof_ent)); + if (!m) return NULL; + if (!upb_def_init(&m->base, UPB_DEF_MSG, owner)) goto err2; + if (!upb_inttable_init(&m->itof)) goto err2; + if (!upb_strtable_init(&m->ntof)) goto err1; m->size = 0; m->hasbit_bytes = 0; m->extstart = 0; m->extend = 0; return m; + +err1: + upb_inttable_uninit(&m->itof); +err2: + free(m); + return NULL; } static void upb_msgdef_free(upb_msgdef *m) { - upb_msg_iter i; - for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) - upb_fielddef_free(upb_msg_iter_field(i)); - upb_strtable_free(&m->ntof); - upb_inttable_free(&m->itof); + upb_strtable_uninit(&m->ntof); + upb_inttable_uninit(&m->itof); upb_def_uninit(&m->base); free(m); } -upb_msgdef *upb_msgdef_dup(const upb_msgdef *m) { - upb_msgdef *newm = upb_msgdef_new(); - newm->size = m->size; - newm->hasbit_bytes = m->hasbit_bytes; - newm->extstart = m->extstart; - newm->extend = m->extend; +upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, void *owner) { + upb_msgdef *newm = upb_msgdef_new(owner); + if (!newm) return NULL; + upb_msgdef_setsize(newm, upb_msgdef_size(m)); + upb_msgdef_sethasbit_bytes(newm, upb_msgdef_hasbit_bytes(m)); + upb_msgdef_setextrange(newm, upb_msgdef_extstart(m), upb_msgdef_extend(m)); + upb_def_setfullname(UPB_UPCAST(newm), upb_def_fullname(UPB_UPCAST(m))); upb_msg_iter i; - for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { - upb_msgdef_addfield(newm, upb_fielddef_dup(upb_msg_iter_field(i))); + for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { + upb_fielddef *f = upb_fielddef_dup(upb_msg_iter_field(&i), &f); + if (!f || !upb_msgdef_addfield(newm, f, &f)) { + upb_msgdef_unref(newm, owner); + return NULL; + } } return newm; } @@ -506,160 +692,69 @@ bool upb_msgdef_setextrange(upb_msgdef *m, uint32_t start, uint32_t end) { return true; } -bool upb_msgdef_addfields(upb_msgdef *m, upb_fielddef *const *fields, int n) { +bool upb_msgdef_addfields(upb_msgdef *m, upb_fielddef *const *fields, int n, + void *ref_donor) { // Check constraints for all fields before performing any action. for (int i = 0; i < n; i++) { upb_fielddef *f = fields[i]; - assert(upb_atomic_read(&f->refcount) > 0); - if (f->name == NULL || f->number == 0 || - upb_msgdef_itof(m, f->number) || upb_msgdef_ntof(m, f->name)) + if (f->msgdef != NULL || + upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0 || + upb_msgdef_itof(m, upb_fielddef_number(f)) || + upb_msgdef_ntof(m, upb_fielddef_name(f))) return false; } // Constraint checks ok, perform the action. for (int i = 0; i < n; i++) { upb_fielddef *f = fields[i]; - upb_msgdef_ref(m); - assert(f->msgdef == NULL); f->msgdef = m; - upb_itof_ent itof_ent = {0, f}; - upb_inttable_insert(&m->itof, f->number, &itof_ent); - upb_strtable_insert(&m->ntof, f->name, &f); + upb_inttable_insert(&m->itof, upb_fielddef_number(f), upb_value_ptr(f)); + upb_strtable_insert(&m->ntof, upb_fielddef_name(f), upb_value_ptr(f)); + upb_fielddef_ref(f, m); + if (ref_donor) upb_fielddef_unref(f, ref_donor); } return true; } -static int upb_div_round_up(int numerator, int denominator) { - /* cf. http://stackoverflow.com/questions/17944/how-to-round-up-the-result-of-integer-division */ - return numerator > 0 ? (numerator - 1) / denominator + 1 : 0; -} - -void upb_msgdef_layout(upb_msgdef *m) { - // Create an ordering over the fields, but only include fields with accessors. - upb_fielddef **sorted_fields = - malloc(sizeof(upb_fielddef*) * upb_msgdef_numfields(m)); - int n = 0; - upb_msg_iter i; - for (i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { - upb_fielddef *f = upb_msg_iter_field(i); - if (f->accessor) sorted_fields[n++] = f; - } - - m->hasbit_bytes = upb_div_round_up(n, 8); - m->size = m->hasbit_bytes; // + header_size? - - // Assign hasbits. - qsort(sorted_fields, n, sizeof(*sorted_fields), upb_fielddef_cmphasbit); - for (int i = 0; i < n; i++) { - upb_fielddef *f = sorted_fields[i]; - f->hasbit = i; - } - - // Assign value offsets. - qsort(sorted_fields, n, sizeof(*sorted_fields), upb_fielddef_cmpval); - size_t max_align = 0; - for (int i = 0; i < n; i++) { - upb_fielddef *f = sorted_fields[i]; - const upb_type_info *type_info = &upb_types[f->type]; - size_t size = type_info->size; - size_t align = type_info->align; - if (upb_isseq(f)) { - size = sizeof(void*); - align = alignof(void*); - } - - // General alignment rules are: each member must be at an address that is a - // multiple of that type's alignment. Also, the size of the structure as a - // whole must be a multiple of the greatest alignment of any member. - f->offset = upb_align_up(m->size, align); - m->size = f->offset + size; - max_align = UPB_MAX(max_align, align); - } - if (max_align > 0) m->size = upb_align_up(m->size, max_align); - - free(sorted_fields); -} - -upb_msg_iter upb_msg_begin(const upb_msgdef *m) { - return upb_inttable_begin(&m->itof); +void upb_msg_begin(upb_msg_iter *iter, const upb_msgdef *m) { + upb_inttable_begin(iter, &m->itof); } -upb_msg_iter upb_msg_next(const upb_msgdef *m, upb_msg_iter iter) { - return upb_inttable_next(&m->itof, iter); -} +void upb_msg_next(upb_msg_iter *iter) { upb_inttable_next(iter); } /* upb_symtab *****************************************************************/ -typedef struct { - upb_def *def; -} upb_symtab_ent; - -// Given a symbol and the base symbol inside which it is defined, find the -// symbol's definition in t. -static upb_symtab_ent *upb_resolve(const upb_strtable *t, - const char *base, const char *sym) { - if(strlen(sym) == 0) return NULL; - if(sym[0] == UPB_SYMBOL_SEPARATOR) { - // Symbols starting with '.' are absolute, so we do a single lookup. - // Slice to omit the leading '.' - return upb_strtable_lookup(t, sym + 1); - } else { - // Remove components from base until we find an entry or run out. - // TODO: This branch is totally broken, but currently not used. - (void)base; - assert(false); - return NULL; - } -} - -static void _upb_symtab_free(upb_strtable *t) { - upb_strtable_iter i; - upb_strtable_begin(&i, t); - for (; !upb_strtable_done(&i); upb_strtable_next(&i)) { - const upb_symtab_ent *e = upb_strtable_iter_value(&i); - assert(upb_atomic_read(&e->def->refcount) == 0); - upb_def_free(e->def); - } - upb_strtable_free(t); -} - static void upb_symtab_free(upb_symtab *s) { - _upb_symtab_free(&s->symtab); - for (uint32_t i = 0; i < s->olddefs.len; i++) { - upb_def *d = s->olddefs.defs[i]; - assert(upb_atomic_read(&d->refcount) == 0); - upb_def_free(d); - } - upb_rwlock_destroy(&s->lock); - upb_deflist_uninit(&s->olddefs); + upb_strtable_iter i; + upb_strtable_begin(&i, &s->symtab); + for (; !upb_strtable_done(&i); upb_strtable_next(&i)) + upb_def_unref(upb_value_getptr(upb_strtable_iter_value(&i)), s); + upb_strtable_uninit(&s->symtab); free(s); } void upb_symtab_ref(const upb_symtab *_s) { upb_symtab *s = (upb_symtab*)_s; - upb_atomic_ref(&s->refcount); + s->refcount++; } void upb_symtab_unref(const upb_symtab *_s) { upb_symtab *s = (upb_symtab*)_s; - if(s && upb_atomic_unref(&s->refcount)) { + if(s && --s->refcount == 0) { upb_symtab_free(s); } } upb_symtab *upb_symtab_new() { upb_symtab *s = malloc(sizeof(*s)); - upb_atomic_init(&s->refcount, 1); - upb_rwlock_init(&s->lock); - upb_strtable_init(&s->symtab, 16, sizeof(upb_symtab_ent)); - upb_deflist_init(&s->olddefs); + s->refcount = 1; + upb_strtable_init(&s->symtab); return s; } const upb_def **upb_symtab_getdefs(const upb_symtab *s, int *count, - upb_deftype_t type) { - upb_rwlock_rdlock(&s->lock); + upb_deftype_t type, void *owner) { int total = upb_strtable_count(&s->symtab); // We may only use part of this, depending on how many symbols are of the // correct type. @@ -668,177 +763,252 @@ const upb_def **upb_symtab_getdefs(const upb_symtab *s, int *count, upb_strtable_begin(&iter, &s->symtab); int i = 0; for(; !upb_strtable_done(&iter); upb_strtable_next(&iter)) { - const upb_symtab_ent *e = upb_strtable_iter_value(&iter); - upb_def *def = e->def; + upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter)); assert(def); if(type == UPB_DEF_ANY || def->type == type) defs[i++] = def; } - upb_rwlock_unlock(&s->lock); *count = i; - for(i = 0; i < *count; i++) upb_def_ref(defs[i]); + if (owner) + for(i = 0; i < *count; i++) upb_def_ref(defs[i], owner); return defs; } -const upb_def *upb_symtab_lookup(const upb_symtab *s, const char *sym) { - upb_rwlock_rdlock(&s->lock); - upb_symtab_ent *e = upb_strtable_lookup(&s->symtab, sym); - upb_def *ret = NULL; - if(e) { - ret = e->def; - upb_def_ref(ret); - } - upb_rwlock_unlock(&s->lock); +const upb_def *upb_symtab_lookup(const upb_symtab *s, const char *sym, + void *owner) { + const upb_value *v = upb_strtable_lookup(&s->symtab, sym); + upb_def *ret = v ? upb_value_getptr(*v) : NULL; + if (ret) upb_def_ref(ret, owner); return ret; } -const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym) { - upb_rwlock_rdlock(&s->lock); - upb_symtab_ent *e = upb_strtable_lookup(&s->symtab, sym); +const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym, + void *owner) { + const upb_value *v = upb_strtable_lookup(&s->symtab, sym); + upb_def *def = v ? upb_value_getptr(*v) : NULL; upb_msgdef *ret = NULL; - if(e && e->def->type == UPB_DEF_MSG) { - ret = upb_downcast_msgdef(e->def); - upb_def_ref(UPB_UPCAST(ret)); + if(def && def->type == UPB_DEF_MSG) { + ret = upb_downcast_msgdef(def); + upb_def_ref(def, owner); } - upb_rwlock_unlock(&s->lock); return ret; } +// Given a symbol and the base symbol inside which it is defined, find the +// symbol's definition in t. +static upb_def *upb_resolvename(const upb_strtable *t, + const char *base, const char *sym) { + if(strlen(sym) == 0) return NULL; + if(sym[0] == UPB_SYMBOL_SEPARATOR) { + // Symbols starting with '.' are absolute, so we do a single lookup. + // Slice to omit the leading '.' + const upb_value *v = upb_strtable_lookup(t, sym + 1); + return v ? upb_value_getptr(*v) : NULL; + } else { + // Remove components from base until we find an entry or run out. + // TODO: This branch is totally broken, but currently not used. + (void)base; + assert(false); + return NULL; + } +} + const upb_def *upb_symtab_resolve(const upb_symtab *s, const char *base, - const char *sym) { - upb_rwlock_rdlock(&s->lock); - upb_symtab_ent *e = upb_resolve(&s->symtab, base, sym); - upb_def *ret = NULL; - if(e) { - ret = e->def; - upb_def_ref(ret); - } - upb_rwlock_unlock(&s->lock); + const char *sym, void *owner) { + upb_def *ret = upb_resolvename(&s->symtab, base, sym); + if (ret) upb_def_ref(ret, owner); return ret; } -bool upb_symtab_dfs(upb_def *def, upb_def **open_defs, int n, - upb_strtable *addtab) { - // This linear search makes the DFS O(n^2) in the length of the paths. - // Could make this O(n) with a hash table, but n is small. - for (int i = 0; i < n; i++) { - if (def == open_defs[i]) return false; - } - - bool needcopy = false; - upb_msgdef *m = upb_dyncast_msgdef(def); - if (m) { - upb_msg_iter i; - open_defs[n++] = def; - for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { - upb_fielddef *f = upb_msg_iter_field(i); - if (!upb_hassubdef(f)) continue; - needcopy |= upb_symtab_dfs(f->def, open_defs, n, addtab); +// Adds dups of any existing def that can reach a def with the same name as one +// of "defs." This is to provide a consistent output graph as documented in +// the header file. We use a modified depth-first traversal that traverses +// each SCC (which we already computed) as if it were a single node. This +// allows us to traverse the possibly-cyclic graph as if it were a DAG and to +// easily dup the correct set of nodes with O(n) time. +// +// Returns true if defs that can reach "def" need to be duplicated into deftab. +static bool upb_resolve_dfs(const upb_def *def, upb_strtable *deftab, + void *new_owner, upb_inttable *seen, + upb_status *s) { + // Memoize results of this function for efficiency (since we're traversing a + // DAG this is not needed to limit the depth of the search). + upb_value *v = upb_inttable_lookup(seen, (uintptr_t)def); + if (v) return upb_value_getbool(*v); + + // Visit submessages for all messages in the SCC. + bool need_dup = false; + const upb_def *base = def; + do { + assert(upb_def_isfinalized(def)); + if (def->type == UPB_DEF_FIELD) continue; + upb_value *v = upb_strtable_lookup(deftab, upb_def_fullname(def)); + if (v) { + upb_def *add_def = upb_value_getptr(*v); + if (add_def->refcount.next && add_def->refcount.next != &def->refcount) { + upb_status_seterrf(s, "conflicting existing defs for name: '%s'", + upb_def_fullname(def)); + return false; + } + need_dup = true; + } + const upb_msgdef *m = upb_dyncast_msgdef_const(def); + if (m) { + upb_msg_iter i; + for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { + upb_fielddef *f = upb_msg_iter_field(&i); + if (!upb_hassubdef(f)) continue; + // |= to avoid short-circuit; we need its side-effects. + need_dup |= upb_resolve_dfs( + upb_fielddef_subdef_mutable(f), deftab, new_owner, seen, s); + if (!upb_ok(s)) return false; + } } + } while ((def = (upb_def*)def->refcount.next) != base); + + if (need_dup) { + // Dup any defs that don't already have entries in deftab. + def = base; + do { + if (def->type == UPB_DEF_FIELD) continue; + const char *name = upb_def_fullname(def); + if (upb_strtable_lookup(deftab, name) == NULL) { + upb_def *newdef = upb_def_dup(def, new_owner); + if (!newdef) goto oom; + // We temporarily use this field to track who we were dup'd from. + newdef->refcount.next = (upb_refcount*)def; + if (!upb_strtable_insert(deftab, name, upb_value_ptr(newdef))) + goto oom; + } + } while ((def = (upb_def*)def->refcount.next) != base); } - bool replacing = (upb_strtable_lookup(addtab, m->base.fqname) != NULL); - if (needcopy && !replacing) { - upb_symtab_ent e = {upb_def_dup(def)}; - upb_strtable_insert(addtab, def->fqname, &e); - replacing = true; - } - return replacing; -} + upb_inttable_insert(seen, (uintptr_t)def, upb_value_bool(need_dup)); + return need_dup; -bool upb_symtab_add(upb_symtab *s, upb_def **defs, int n, upb_status *status) { - upb_rwlock_wrlock(&s->lock); +oom: + upb_status_seterrliteral(s, "out of memory"); + return false; +} - // Add all defs to a table for resolution. +bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, int n, void *ref_donor, + upb_status *status) { + upb_def **add_defs = NULL; upb_strtable addtab; - upb_strtable_init(&addtab, n, sizeof(upb_symtab_ent)); + if (!upb_strtable_init(&addtab)) { + upb_status_seterrliteral(status, "out of memory"); + return false; + } + + // Add new defs to table. for (int i = 0; i < n; i++) { upb_def *def = defs[i]; - if (upb_strtable_lookup(&addtab, def->fqname)) { - upb_status_seterrf(status, "Conflicting defs named '%s'", def->fqname); - upb_strtable_free(&addtab); - return false; + assert(upb_def_ismutable(def)); + const char *fullname = upb_def_fullname(def); + if (!fullname) { + upb_status_seterrliteral( + status, "Anonymous defs cannot be added to a symtab"); + goto err; } - upb_strtable_insert(&addtab, def->fqname, &def); + if (upb_strtable_lookup(&addtab, fullname) != NULL) { + upb_status_seterrf(status, "Conflicting defs named '%s'", fullname); + goto err; + } + if (!upb_strtable_insert(&addtab, fullname, upb_value_ptr(def))) + goto oom_err; + // We temporarily use this field to indicate that we came from the user's + // list rather than being dup'd. + def->refcount.next = NULL; } - // All existing defs that can reach defs that are being replaced must - // themselves be replaced with versions that will point to the new defs. - // Do a DFS -- any path that finds a new def must replace all ancestors. - upb_strtable *symtab = &s->symtab; + // Add dups of any existing def that can reach a def with the same name as + // one of "defs." + upb_inttable seen; + if (!upb_inttable_init(&seen)) goto oom_err; upb_strtable_iter i; - upb_strtable_begin(&i, symtab); - for(; !upb_strtable_done(&i); upb_strtable_next(&i)) { - upb_def *open_defs[UPB_MAX_TYPE_DEPTH]; - const upb_symtab_ent *e = upb_strtable_iter_value(&i); - upb_symtab_dfs(e->def, open_defs, 0, &addtab); + upb_strtable_begin(&i, &s->symtab); + for (; !upb_strtable_done(&i); upb_strtable_next(&i)) { + upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i)); + upb_resolve_dfs(def, &addtab, ref_donor, &seen, status); + if (!upb_ok(status)) goto err; } + upb_inttable_uninit(&seen); - // Resolve all refs. + // Now using the table, resolve symbolic references. upb_strtable_begin(&i, &addtab); - for(; !upb_strtable_done(&i); upb_strtable_next(&i)) { - const upb_symtab_ent *e = upb_strtable_iter_value(&i); - upb_msgdef *m = upb_dyncast_msgdef(e->def); - if(!m) continue; + for (; !upb_strtable_done(&i); upb_strtable_next(&i)) { + upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i)); + upb_msgdef *m = upb_dyncast_msgdef(def); + if (!m) continue; // Type names are resolved relative to the message in which they appear. - const char *base = m->base.fqname; + const char *base = upb_def_fullname(UPB_UPCAST(m)); upb_msg_iter j; - for(j = upb_msg_begin(m); !upb_msg_done(j); j = upb_msg_next(m, j)) { - upb_fielddef *f = upb_msg_iter_field(j); - if (f->type == 0) { - upb_status_seterrf(status, "Field type was not set."); - return false; - } - - if (!upb_hassubdef(f)) continue; // No resolving necessary. - upb_downcast_unresolveddef(f->def); // Type check. - const char *name = f->def->fqname; - - // Resolve from either the addtab (pending adds) or symtab (existing - // defs). If both exist, prefer the pending add, because it will be - // overwriting the existing def. - upb_symtab_ent *found; - if(!(found = upb_resolve(&addtab, base, name)) && - !(found = upb_resolve(symtab, base, name))) { - upb_status_seterrf(status, "could not resolve symbol '%s' " - "in context '%s'", name, base); - return false; + for(upb_msg_begin(&j, m); !upb_msg_done(&j); upb_msg_next(&j)) { + upb_fielddef *f = upb_msg_iter_field(&j); + const char *name = upb_fielddef_subtypename(f); + if (name) { + upb_def *subdef = upb_resolvename(&addtab, base, name); + if (subdef == NULL) { + upb_status_seterrf( + status, "couldn't resolve name '%s' in message '%s'", name, base); + goto err; + } else if (!upb_fielddef_setsubdef(f, subdef)) { + upb_status_seterrf( + status, "def '%s' had the wrong type for field '%s'", + upb_def_fullname(subdef), upb_fielddef_name(f)); + goto err; + } } - // Check the type of the found def. - upb_fieldtype_t expected = upb_issubmsg(f) ? UPB_DEF_MSG : UPB_DEF_ENUM; - if(found->def->type != expected) { - upb_status_seterrliteral(status, "Unexpected type"); - return false; - } - if (!upb_fielddef_resolve(f, found->def, status)) return false; + if (upb_fielddef_type(f) == UPB_TYPE(ENUM) && upb_fielddef_subdef(f) && + !upb_fielddef_resolvedefault(f, status)) + goto err; } } - // The defs in the transaction have been vetted, and can be moved to the - // symtab without causing errors. + // We need an array of the defs in addtab, for passing to upb_finalize. + add_defs = malloc(sizeof(void*) * upb_strtable_count(&addtab)); + if (add_defs == NULL) goto oom_err; upb_strtable_begin(&i, &addtab); - for(; !upb_strtable_done(&i); upb_strtable_next(&i)) { - const upb_symtab_ent *tmptab_e = upb_strtable_iter_value(&i); - upb_def_movetosymtab(tmptab_e->def, s); - upb_symtab_ent *symtab_e = - upb_strtable_lookup(&s->symtab, tmptab_e->def->fqname); - if(symtab_e) { - upb_deflist_push(&s->olddefs, symtab_e->def); - symtab_e->def = tmptab_e->def; + for (n = 0; !upb_strtable_done(&i); upb_strtable_next(&i)) + add_defs[n++] = upb_value_getptr(upb_strtable_iter_value(&i)); + + // Restore the next pointer that we stole. + for (int i = 0; i < n; i++) + add_defs[i]->refcount.next = &add_defs[i]->refcount; + + if (!upb_finalize(add_defs, n, status)) goto err; + upb_strtable_uninit(&addtab); + + for (int i = 0; i < n; i++) { + upb_def *def = add_defs[i]; + const char *name = upb_def_fullname(def); + upb_def_donateref(def, ref_donor, s); + upb_value *v = upb_strtable_lookup(&s->symtab, name); + if(v) { + upb_def_unref(upb_value_getptr(*v), s); + upb_value_setptr(v, def); } else { - upb_strtable_insert(&s->symtab, tmptab_e->def->fqname, tmptab_e); + upb_strtable_insert(&s->symtab, name, upb_value_ptr(def)); } } - - upb_strtable_free(&addtab); - upb_rwlock_unlock(&s->lock); - upb_symtab_gc(s); + free(add_defs); return true; -} -void upb_symtab_gc(upb_symtab *s) { - (void)s; - // TODO. +oom_err: + upb_status_seterrliteral(status, "out of memory"); +err: { + // Need to unref any defs we dup'd (we can distinguish them from defs that + // the user passed in by their def->refcount.next pointers). + upb_strtable_iter i; + upb_strtable_begin(&i, &addtab); + for (; !upb_strtable_done(&i); upb_strtable_next(&i)) { + upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i)); + if (def->refcount.next) upb_def_unref(def, s); + } + } + upb_strtable_uninit(&addtab); + free(add_defs); + return false; } diff --git a/upb/def.h b/upb/def.h index 462655a5fe..452b809438 100644 --- a/upb/def.h +++ b/upb/def.h @@ -1,17 +1,17 @@ /* * upb - a minimalist implementation of protocol buffers. * - * Copyright (c) 2009-2011 Google Inc. See LICENSE for details. + * Copyright (c) 2009-2012 Google Inc. See LICENSE for details. * Author: Josh Haberman * - * Provides a mechanism for creating and linking proto definitions. - * These form the protobuf schema, and are used extensively throughout upb: + * Defs are upb's internal representation of the constructs that can appear + * in a .proto file: + * * - upb_msgdef: describes a "message" construct. * - upb_fielddef: describes a message field. * - upb_enumdef: describes an enum. * (TODO: definitions of services). * - * * Defs go through two distinct phases of life: * * 1. MUTABLE: when first created, the properties of the def can be set freely @@ -20,16 +20,15 @@ * not be used for any purpose except to set its properties (it can't be * used to parse anything, create any messages in memory, etc). * - * 2. FINALIZED: after being added to a symtab (which links the defs together) - * the defs become finalized (thread-safe and immutable). Programs may only - * access defs through a CONST POINTER during this stage -- upb_symtab will - * help you out with this requirement by only vending const pointers, but - * you need to make sure not to use any non-const pointers you still have - * sitting around. In practice this means that you may not call any setters - * on the defs (or functions that themselves call the setters). If you want - * to modify an existing immutable def, copy it with upb_*_dup(), modify the - * copy, and add the modified def to the symtab (replacing the existing - * def). + * 2. FINALIZED: the upb_def_finalize() operation finalizes a set of defs, + * which makes them thread-safe and immutable. Finalized defs may only be + * accessed through a CONST POINTER. If you want to modify an existing + * immutable def, copy it with upb_*_dup() and modify and finalize the copy. + * + * The refcounting of defs works properly no matter what state the def is in. + * Once the def is finalized it is guaranteed that any def reachable from a + * live def is also live (so a ref on the base of a message tree keeps the + * whole tree alive). * * You can test for which stage of life a def is in by calling * upb_def_ismutable(). This is particularly useful for dynamic language @@ -46,181 +45,306 @@ #ifndef UPB_DEF_H_ #define UPB_DEF_H_ -#include "upb/atomic.h" +#include "upb/refcount.h" #include "upb/table.h" #ifdef __cplusplus extern "C" { #endif -struct _upb_symtab; -typedef struct _upb_symtab upb_symtab; +/* upb_def: base class for defs **********************************************/ // All the different kind of defs we support. These correspond 1:1 with // declarations in a .proto file. typedef enum { - UPB_DEF_MSG = 1, + UPB_DEF_MSG, + UPB_DEF_FIELD, UPB_DEF_ENUM, UPB_DEF_SERVICE, // Not yet implemented. UPB_DEF_ANY = -1, // Wildcard for upb_symtab_get*() - UPB_DEF_UNRESOLVED = 99, // Internal-only. } upb_deftype_t; - -/* upb_def: base class for defs **********************************************/ - -typedef struct { - char *fqname; // Fully qualified. - upb_symtab *symtab; // Def is mutable iff symtab == NULL. - upb_atomic_t refcount; // Owns a ref on symtab iff (symtab && refcount > 0). +typedef struct _upb_def { + upb_refcount refcount; + char *fullname; upb_deftype_t type; + bool is_finalized; } upb_def; +#define UPB_UPCAST(ptr) (&(ptr)->base) + // Call to ref/unref a def. Can be used at any time, but is not thread-safe -// until the def is in a symtab. While a def is in a symtab, everything -// reachable from that def (the symtab and all defs in the symtab) are -// guaranteed to be alive. -void upb_def_ref(const upb_def *def); -void upb_def_unref(const upb_def *def); -upb_def *upb_def_dup(const upb_def *def); - -// A def is mutable until it has been added to a symtab. +// until the def is finalized. While a def is finalized, everything reachable +// from that def is guaranteed to be alive. +void upb_def_ref(const upb_def *def, void *owner); +void upb_def_unref(const upb_def *def, void *owner); +void upb_def_donateref(const upb_def *def, void *from, void *to); +upb_def *upb_def_dup(const upb_def *def, void *owner); + +// A def is mutable until it has been finalized. bool upb_def_ismutable(const upb_def *def); -INLINE const char *upb_def_fqname(const upb_def *def) { return def->fqname; } -bool upb_def_setfqname(upb_def *def, const char *fqname); // Only if mutable. +bool upb_def_isfinalized(const upb_def *def); -#define UPB_UPCAST(ptr) (&(ptr)->base) +// "fullname" is the def's fully-qualified name (eg. foo.bar.Message). +INLINE const char *upb_def_fullname(const upb_def *d) { return d->fullname; } + +// The def must be mutable. Caller retains ownership of fullname. Defs are +// not required to have a name; if a def has no name when it is finalized, it +// will remain an anonymous def. +bool upb_def_setfullname(upb_def *def, const char *fullname); + +// Finalizes the given defs; this validates all constraints and marks the defs +// as finalized (read-only). This will also cause fielddefs to take refs on +// their subdefs so that any reachable def will be kept alive (but this is +// done in a way that correctly handles circular references). +// +// On success, a new list is returned containing the finalized defs and +// ownership of the "defs" list passes to the function. On failure NULL is +// returned and the caller retains ownership of "defs." +// +// Symbolic references to sub-types or enum defaults must have already been +// resolved. "defs" must contain the transitive closure of any mutable defs +// reachable from the any def in the list. In other words, there may not be a +// mutable def which is reachable from one of "defs" that does not appear +// elsewhere in "defs." "defs" may not contain fielddefs, but any fielddefs +// reachable from the given msgdefs will be finalized. +// +// n is currently limited to 64k defs, if more are required break them into +// batches of 64k (or we could raise this limit, at the cost of a bigger +// upb_def structure or complexity in upb_finalize()). +bool upb_finalize(upb_def *const*defs, int n, upb_status *status); /* upb_fielddef ***************************************************************/ -// A upb_fielddef describes a single field in a message. It isn't a full def -// in the sense that it derives from upb_def. It cannot stand on its own; it -// must be part of a upb_msgdef. It is also reference-counted. +// We choose these to match descriptor.proto. Clients may use UPB_TYPE() and +// UPB_LABEL() instead of referencing these directly. +typedef enum { + UPB_TYPE_NONE = -1, // Internal-only, may be removed. + UPB_TYPE_ENDGROUP = 0, // Internal-only, may be removed. + UPB_TYPE_DOUBLE = 1, + UPB_TYPE_FLOAT = 2, + UPB_TYPE_INT64 = 3, + UPB_TYPE_UINT64 = 4, + UPB_TYPE_INT32 = 5, + UPB_TYPE_FIXED64 = 6, + UPB_TYPE_FIXED32 = 7, + UPB_TYPE_BOOL = 8, + UPB_TYPE_STRING = 9, + UPB_TYPE_GROUP = 10, + UPB_TYPE_MESSAGE = 11, + UPB_TYPE_BYTES = 12, + UPB_TYPE_UINT32 = 13, + UPB_TYPE_ENUM = 14, + UPB_TYPE_SFIXED32 = 15, + UPB_TYPE_SFIXED64 = 16, + UPB_TYPE_SINT32 = 17, + UPB_TYPE_SINT64 = 18, +} upb_fieldtype_t; + +#define UPB_NUM_TYPES 19 + +typedef enum { + UPB_LABEL_OPTIONAL = 1, + UPB_LABEL_REQUIRED = 2, + UPB_LABEL_REPEATED = 3, +} upb_label_t; + +// These macros are provided for legacy reasons. +#define UPB_TYPE(type) UPB_TYPE_ ## type +#define UPB_LABEL(type) UPB_LABEL_ ## type + +// Info for a given field type. +typedef struct { + uint8_t align; + uint8_t size; + uint8_t inmemory_type; // For example, INT32, SINT32, and SFIXED32 -> INT32 +} upb_typeinfo; + +extern const upb_typeinfo upb_types[UPB_NUM_TYPES]; + +// A upb_fielddef describes a single field in a message. It is most often +// found as a part of a upb_msgdef, but can also stand alone to represent +// an extension. typedef struct _upb_fielddef { + upb_def base; struct _upb_msgdef *msgdef; - upb_def *def; // if upb_hasdef(f) - upb_atomic_t refcount; - bool finalized; - - // The following fields may be modified until the def is finalized. - uint8_t type; // Use UPB_TYPE() constants. - uint8_t label; // Use UPB_LABEL() constants. + union { + char *name; // If subdef_is_symbolic. + upb_def *def; // If !subdef_is_symbolic. + } sub; // The msgdef or enumdef for this field, if upb_hassubdef(f). + bool subdef_is_symbolic; + bool default_is_string; + bool subdef_is_owned; + upb_fieldtype_t type; + upb_label_t label; int16_t hasbit; uint16_t offset; - bool default_is_string; - bool active; int32_t number; - char *name; - upb_value defaultval; // Only meaningful for non-repeated scalars and strings. + upb_value defaultval; // Only for non-repeated scalars and strings. upb_value fval; struct _upb_accessor_vtbl *accessor; - const void *default_ptr; const void *prototype; } upb_fielddef; -upb_fielddef *upb_fielddef_new(void); -void upb_fielddef_ref(upb_fielddef *f); -void upb_fielddef_unref(upb_fielddef *f); -upb_fielddef *upb_fielddef_dup(upb_fielddef *f); +// Returns NULL if memory allocation failed. +upb_fielddef *upb_fielddef_new(void *owner); + +INLINE void upb_fielddef_ref(upb_fielddef *f, void *owner) { + upb_def_ref(UPB_UPCAST(f), owner); +} +INLINE void upb_fielddef_unref(upb_fielddef *f, void *owner) { + upb_def_unref(UPB_UPCAST(f), owner); +} + +// Duplicates the given field, returning NULL if memory allocation failed. +// When a fielddef is duplicated, the subdef (if any) is made symbolic if it +// wasn't already. If the subdef is set but has no name (which is possible +// since msgdefs are not required to have a name) the new fielddef's subdef +// will be unset. +upb_fielddef *upb_fielddef_dup(const upb_fielddef *f, void *owner); + +INLINE bool upb_fielddef_ismutable(const upb_fielddef *f) { + return upb_def_ismutable(UPB_UPCAST(f)); +} +INLINE bool upb_fielddef_isfinalized(const upb_fielddef *f) { + return !upb_fielddef_ismutable(f); +} -// A fielddef is mutable until its msgdef has been added to a symtab. -bool upb_fielddef_ismutable(const upb_fielddef *f); +// Simple accessors. /////////////////////////////////////////////////////////// -// Read accessors. May be called any time. -INLINE uint8_t upb_fielddef_type(const upb_fielddef *f) { return f->type; } -INLINE uint8_t upb_fielddef_label(const upb_fielddef *f) { return f->label; } +INLINE upb_fieldtype_t upb_fielddef_type(const upb_fielddef *f) { + return f->type; +} +INLINE upb_label_t upb_fielddef_label(const upb_fielddef *f) { + return f->label; +} INLINE int32_t upb_fielddef_number(const upb_fielddef *f) { return f->number; } -INLINE char *upb_fielddef_name(const upb_fielddef *f) { return f->name; } +INLINE uint16_t upb_fielddef_offset(const upb_fielddef *f) { return f->offset; } +INLINE int16_t upb_fielddef_hasbit(const upb_fielddef *f) { return f->hasbit; } +INLINE const char *upb_fielddef_name(const upb_fielddef *f) { + return upb_def_fullname(UPB_UPCAST(f)); +} INLINE upb_value upb_fielddef_fval(const upb_fielddef *f) { return f->fval; } -INLINE bool upb_fielddef_finalized(const upb_fielddef *f) { return f->finalized; } INLINE struct _upb_msgdef *upb_fielddef_msgdef(const upb_fielddef *f) { return f->msgdef; } INLINE struct _upb_accessor_vtbl *upb_fielddef_accessor(const upb_fielddef *f) { return f->accessor; } -INLINE const char *upb_fielddef_typename(const upb_fielddef *f) { - return f->def ? f->def->fqname : NULL; -} -// Returns the default value for this fielddef, which may either be something -// the client set explicitly or the "default default" (0 for numbers, empty for -// strings). The field's type indicates the type of the returned value, except -// for enums. For enums the default can be set either numerically or -// symbolically -- the upb_fielddef_default_is_symbolic() function below will -// indicate which it is. For string defaults, the value will be a upb_strref -// which is invalidated by any other call on this object. -INLINE upb_value upb_fielddef_default(const upb_fielddef *f) { - return f->defaultval; -} +bool upb_fielddef_settype(upb_fielddef *f, upb_fieldtype_t type); +bool upb_fielddef_setlabel(upb_fielddef *f, upb_label_t label); +void upb_fielddef_sethasbit(upb_fielddef *f, int16_t hasbit); +void upb_fielddef_setoffset(upb_fielddef *f, uint16_t offset); +// TODO(haberman): need a way of keeping the fval alive even if some handlers +// outlast the fielddef. +void upb_fielddef_setfval(upb_fielddef *f, upb_value fval); +void upb_fielddef_setaccessor(upb_fielddef *f, struct _upb_accessor_vtbl *vtbl); -// The results of this function are only meaningful for enum fields, which can -// have a default specified either as an integer or as a string. If this -// returns true, the default returned from upb_fielddef_default() is a string, -// otherwise it is an integer. -INLINE bool upb_fielddef_default_is_symbolic(const upb_fielddef *f) { - return f->default_is_string; +// "Number" and "fullname" must be set before the fielddef is added to a msgdef. +// For the moment we do not allow these to be set once the fielddef is added to +// a msgdef -- this could be relaxed in the future. +bool upb_fielddef_setnumber(upb_fielddef *f, int32_t number); +INLINE bool upb_fielddef_setname(upb_fielddef *f, const char *name) { + return upb_def_setfullname(UPB_UPCAST(f), name); } -// The enum or submessage def for this field, if any. Only meaningful for -// submessage, group, and enum fields (ie. when upb_hassubdef(f) is true). -// Since defs are not linked together until they are in a symtab, this -// will return NULL until the msgdef is in a symtab. -upb_def *upb_fielddef_subdef(const upb_fielddef *f); +// Field type tests. /////////////////////////////////////////////////////////// -// Write accessors. "Number" and "name" must be set before the fielddef is -// added to a msgdef. For the moment we do not allow these to be set once -// the fielddef is added to a msgdef -- this could be relaxed in the future. -bool upb_fielddef_setnumber(upb_fielddef *f, int32_t number); -bool upb_fielddef_setname(upb_fielddef *f, const char *name); +INLINE bool upb_issubmsgtype(upb_fieldtype_t type) { + return type == UPB_TYPE(GROUP) || type == UPB_TYPE(MESSAGE); +} +INLINE bool upb_isstringtype(upb_fieldtype_t type) { + return type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES); +} +INLINE bool upb_isprimitivetype(upb_fieldtype_t type) { + return !upb_issubmsgtype(type) && !upb_isstringtype(type); +} +INLINE bool upb_issubmsg(const upb_fielddef *f) { + return upb_issubmsgtype(f->type); +} +INLINE bool upb_isstring(const upb_fielddef *f) { + return upb_isstringtype(f->type); +} +INLINE bool upb_isseq(const upb_fielddef *f) { + return f->label == UPB_LABEL(REPEATED); +} -// These writers may be called at any time prior to being put in a symtab. -bool upb_fielddef_settype(upb_fielddef *f, uint8_t type); -bool upb_fielddef_setlabel(upb_fielddef *f, uint8_t label); -void upb_fielddef_setfval(upb_fielddef *f, upb_value fval); -void upb_fielddef_setaccessor(upb_fielddef *f, struct _upb_accessor_vtbl *vtbl); +// Default value. ////////////////////////////////////////////////////////////// -// The name of the message or enum this field is referring to. Must be found -// at name resolution time (when upb_symtab_add() is called). +// Returns the default value for this fielddef, which may either be something +// the client set explicitly or the "default default" (0 for numbers, empty for +// strings). The field's type indicates the type of the returned value, except +// for enum fields that are still mutable. // -// NOTE: May only be called for fields whose type has already been set to -// be a submessage, group, or enum! Also, will be reset to empty if the -// field's type is set again. -bool upb_fielddef_settypename(upb_fielddef *f, const char *name); - -// The default value for the field. For numeric types, use +// For enums the default can be set either numerically or symbolically -- the +// upb_fielddef_default_is_symbolic() function below will indicate which it is. +// For string defaults, the value will be a upb_byteregion which is invalidated +// by any other non-const call on this object. Once the fielddef is finalized, +// symbolic enum defaults are resolved, so finalized enum fielddefs always have +// a default of type int32. +INLINE upb_value upb_fielddef_default(const upb_fielddef *f) { + return f->defaultval; +} +// Sets default value for the field. For numeric types, use // upb_fielddef_setdefault(), and "value" must match the type of the field. -// For string/bytes types, use upb_fielddef_setdefaultstr(). -// Enum types may use either, since the default may be set either numerically -// or symbolically. +// For string/bytes types, use upb_fielddef_setdefaultstr(). Enum types may +// use either, since the default may be set either numerically or symbolically. // // NOTE: May only be called for fields whose type has already been set. // Also, will be reset to default if the field's type is set again. void upb_fielddef_setdefault(upb_fielddef *f, upb_value value); -void upb_fielddef_setdefaultstr(upb_fielddef *f, const void *str, size_t len); +bool upb_fielddef_setdefaultstr(upb_fielddef *f, const void *str, size_t len); void upb_fielddef_setdefaultcstr(upb_fielddef *f, const char *str); -// A variety of tests about the type of a field. -INLINE bool upb_issubmsgtype(upb_fieldtype_t type) { - return type == UPB_TYPE(GROUP) || type == UPB_TYPE(MESSAGE); -} -INLINE bool upb_isstringtype(upb_fieldtype_t type) { - return type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES); -} -INLINE bool upb_isprimitivetype(upb_fieldtype_t type) { - return !upb_issubmsgtype(type) && !upb_isstringtype(type); +// The results of this function are only meaningful for mutable enum fields, +// which can have a default specified either as an integer or as a string. If +// this returns true, the default returned from upb_fielddef_default() is a +// string, otherwise it is an integer. +INLINE bool upb_fielddef_default_is_symbolic(const upb_fielddef *f) { + assert(f->type == UPB_TYPE(ENUM)); + return f->default_is_string; } -INLINE bool upb_issubmsg(const upb_fielddef *f) { return upb_issubmsgtype(f->type); } -INLINE bool upb_isstring(const upb_fielddef *f) { return upb_isstringtype(f->type); } -INLINE bool upb_isseq(const upb_fielddef *f) { return f->label == UPB_LABEL(REPEATED); } -// Does the type of this field imply that it should contain an associated def? +// Subdef. ///////////////////////////////////////////////////////////////////// + +// Submessage and enum fields must reference a "subdef", which is the +// upb_msgdef or upb_enumdef that defines their type. Note that when the +// fielddef is mutable it may not have a subdef *yet*, but this function still +// returns true to indicate that the field's type requires a subdef. INLINE bool upb_hassubdef(const upb_fielddef *f) { return upb_issubmsg(f) || f->type == UPB_TYPE(ENUM); } +// Before a fielddef is finalized, its subdef may be set either directly (with +// a upb_def*) or symbolically. Symbolic refs must be resolved before the +// containing msgdef can be finalized (see upb_resolve() above). The client is +// responsible for making sure that "subdef" lives until this fielddef is +// finalized or deleted. +// +// Both methods require that upb_hassubdef(f) (so the type must be set prior +// to calling these methods). Returns false if this is not the case, or if +// the given subdef is not of the correct type. The subtype is reset if the +// field's type is changed. +bool upb_fielddef_setsubdef(upb_fielddef *f, upb_def *subdef); +bool upb_fielddef_setsubtypename(upb_fielddef *f, const char *name); + +// Returns the enum or submessage def or symbolic name for this field, if any. +// Requires that upb_hassubdef(f). Returns NULL if the subdef has not been set +// or if you ask for a subtype name when the subtype is currently set +// symbolically (or vice-versa). To access the subtype's name for a linked +// fielddef, use upb_def_fullname(upb_fielddef_subdef(f)). +// +// Caller does *not* own a ref on the returned def or string. +// upb_fielddef_subtypename() is non-const because finalized defs will never +// have a symbolic reference (they must be resolved before the msgdef can be +// finalized). +upb_def *upb_fielddef_subdef_mutable(upb_fielddef *f); +const upb_def *upb_fielddef_subdef(const upb_fielddef *f); +const char *upb_fielddef_subtypename(upb_fielddef *f); + /* upb_msgdef *****************************************************************/ @@ -232,31 +356,31 @@ typedef struct _upb_msgdef { upb_inttable itof; // int to field upb_strtable ntof; // name to field - // The following fields may be modified until finalized. + // The following fields may be modified while mutable. uint16_t size; uint8_t hasbit_bytes; // The range of tag numbers used to store extensions. uint32_t extstart, extend; + // Used for proto2 integration. + const void *prototype; } upb_msgdef; -// Hash table entries for looking up fields by name or number. -typedef struct { - bool junk; - upb_fielddef *f; -} upb_itof_ent; -typedef struct { - upb_fielddef *f; -} upb_ntof_ent; +// Returns NULL if memory allocation failed. +upb_msgdef *upb_msgdef_new(void *owner); -upb_msgdef *upb_msgdef_new(void); -INLINE void upb_msgdef_unref(const upb_msgdef *md) { upb_def_unref(UPB_UPCAST(md)); } -INLINE void upb_msgdef_ref(const upb_msgdef *md) { upb_def_ref(UPB_UPCAST(md)); } +INLINE void upb_msgdef_unref(const upb_msgdef *md, void *owner) { + upb_def_unref(UPB_UPCAST(md), owner); +} +INLINE void upb_msgdef_ref(const upb_msgdef *md, void *owner) { + upb_def_ref(UPB_UPCAST(md), owner); +} // Returns a new msgdef that is a copy of the given msgdef (and a copy of all // the fields) but with any references to submessages broken and replaced with -// just the name of the submessage. This can be put back into another symtab -// and the names will be re-resolved in the new context. -upb_msgdef *upb_msgdef_dup(const upb_msgdef *m); +// just the name of the submessage. Returns NULL if memory allocation failed. +// This can be put back into another symtab and the names will be re-resolved +// in the new context. +upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, void *owner); // Read accessors. May be called at any time. INLINE size_t upb_msgdef_size(const upb_msgdef *m) { return m->size; } @@ -271,38 +395,35 @@ void upb_msgdef_setsize(upb_msgdef *m, uint16_t size); void upb_msgdef_sethasbit_bytes(upb_msgdef *m, uint16_t bytes); bool upb_msgdef_setextrange(upb_msgdef *m, uint32_t start, uint32_t end); -// Adds a set of fields (upb_fielddef objects) to a msgdef. Caller retains its -// ref on the fielddef. May only be done before the msgdef is in a symtab -// (requires upb_def_ismutable(m) for the msgdef). The fielddef's name and -// number must be set, and the message may not already contain any field with -// this name or number, and this fielddef may not be part of another message, -// otherwise false is returned and no action is performed. -bool upb_msgdef_addfields(upb_msgdef *m, upb_fielddef *const *f, int n); -INLINE bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f) { - return upb_msgdef_addfields(m, &f, 1); -} - -// Sets the layout of all fields according to default rules: -// 1. Hasbits for required fields come first, then optional fields. -// 2. Values are laid out in a way that respects alignment rules. -// 3. The order is chosen to minimize memory usage. -// This should only be called once all fielddefs have been added. -// TODO: will likely want the ability to exclude strings/submessages/arrays. -// TODO: will likely want the ability to define a header size. -void upb_msgdef_layout(upb_msgdef *m); +// Adds a set of fields (upb_fielddef objects) to a msgdef. Requires that the +// msgdef and all the fielddefs are mutable. The fielddef's name and number +// must be set, and the message may not already contain any field with this +// name or number, and this fielddef may not be part of another message. In +// error cases false is returned and the msgdef is unchanged. +// +// On success, the msgdef takes a ref on the fielddef so the caller needn't +// worry about continuing to keep it alive (however the reverse is not true; +// refs on the fielddef will *not* keep the msgdef alive). If ref_donor is +// non-NULL, caller passes a ref on the fielddef from ref_donor to the msgdef, +// otherwise caller retains its reference(s) on the defs in f. +bool upb_msgdef_addfields( + upb_msgdef *m, upb_fielddef *const *f, int n, void *ref_donor); +INLINE bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f, + void *ref_donor) { + return upb_msgdef_addfields(m, &f, 1, ref_donor); +} // Looks up a field by name or number. While these are written to be as fast // as possible, it will still be faster to cache the results of this lookup if // possible. These return NULL if no such field is found. INLINE upb_fielddef *upb_msgdef_itof(const upb_msgdef *m, uint32_t i) { - upb_itof_ent *e = (upb_itof_ent*) - upb_inttable_fastlookup(&m->itof, i, sizeof(upb_itof_ent)); - return e ? e->f : NULL; + const upb_value *val = upb_inttable_lookup32(&m->itof, i); + return val ? (upb_fielddef*)upb_value_getptr(*val) : NULL; } INLINE upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name) { - upb_ntof_ent *e = (upb_ntof_ent*)upb_strtable_lookup(&m->ntof, name); - return e ? e->f : NULL; + const upb_value *val = upb_strtable_lookup(&m->ntof, name); + return val ? (upb_fielddef*)upb_value_getptr(*val) : NULL; } INLINE int upb_msgdef_numfields(const upb_msgdef *m) { @@ -313,20 +434,19 @@ INLINE int upb_msgdef_numfields(const upb_msgdef *m) { // TODO: the iteration should be in field order. // Iterators are invalidated when a field is added or removed. // upb_msg_iter i; -// for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { -// upb_fielddef *f = upb_msg_iter_field(i); +// for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { +// upb_fielddef *f = upb_msg_iter_field(&i); // // ... // } typedef upb_inttable_iter upb_msg_iter; -upb_msg_iter upb_msg_begin(const upb_msgdef *m); -upb_msg_iter upb_msg_next(const upb_msgdef *m, upb_msg_iter iter); -INLINE bool upb_msg_done(upb_msg_iter iter) { return upb_inttable_done(iter); } +void upb_msg_begin(upb_msg_iter *iter, const upb_msgdef *m); +void upb_msg_next(upb_msg_iter *iter); +INLINE bool upb_msg_done(upb_msg_iter *iter) { return upb_inttable_done(iter); } // Iterator accessor. -INLINE upb_fielddef *upb_msg_iter_field(upb_msg_iter iter) { - upb_itof_ent *ent = (upb_itof_ent*)upb_inttable_iter_value(iter); - return ent->f; +INLINE upb_fielddef *upb_msg_iter_field(upb_msg_iter *iter) { + return (upb_fielddef*)upb_value_getptr(upb_inttable_iter_value(iter)); } @@ -339,84 +459,75 @@ typedef struct _upb_enumdef { int32_t defaultval; } upb_enumdef; -typedef struct { - uint32_t value; -} upb_ntoi_ent; - -typedef struct { - bool junk; - char *str; -} upb_iton_ent; - -upb_enumdef *upb_enumdef_new(void); -INLINE void upb_enumdef_ref(const upb_enumdef *e) { upb_def_ref(UPB_UPCAST(e)); } -INLINE void upb_enumdef_unref(const upb_enumdef *e) { upb_def_unref(UPB_UPCAST(e)); } -upb_enumdef *upb_enumdef_dup(const upb_enumdef *e); +// Returns NULL if memory allocation failed. +upb_enumdef *upb_enumdef_new(void *owner); +INLINE void upb_enumdef_ref(const upb_enumdef *e, void *owner) { + upb_def_ref(&e->base, owner); +} +INLINE void upb_enumdef_unref(const upb_enumdef *e, void *owner) { + upb_def_unref(&e->base, owner); +} +upb_enumdef *upb_enumdef_dup(const upb_enumdef *e, void *owner); -INLINE int32_t upb_enumdef_default(upb_enumdef *e) { return e->defaultval; } +INLINE int32_t upb_enumdef_default(const upb_enumdef *e) { + return e->defaultval; +} // May only be set if upb_def_ismutable(e). void upb_enumdef_setdefault(upb_enumdef *e, int32_t val); -// Adds a value to the enumdef. Requires that no existing val has this -// name or number (returns false and does not add if there is). May only -// be called before the enumdef is in a symtab. -bool upb_enumdef_addval(upb_enumdef *e, char *name, int32_t num); +// Returns the number of values currently defined in the enum. Note that +// multiple names can refer to the same number, so this may be greater than the +// total number of unique numbers. +INLINE int upb_enumdef_numvals(const upb_enumdef *e) { + return upb_strtable_count(&e->ntoi); +} + +// Adds a value to the enumdef. Requires that no existing val has this name, +// but duplicate numbers are allowed. May only be called if the enumdef is +// mutable. Returns false if the existing name is used, or if "name" is not a +// valid label, or on memory allocation failure (we may want to distinguish +// these failure cases in the future). +bool upb_enumdef_addval(upb_enumdef *e, const char *name, int32_t num); -// Lookups from name to integer and vice-versa. -bool upb_enumdef_ntoil(upb_enumdef *e, const char *name, size_t len, int32_t *num); -bool upb_enumdef_ntoi(upb_enumdef *e, const char *name, int32_t *num); -// Caller does not own the returned string. -const char *upb_enumdef_iton(upb_enumdef *e, int32_t num); +// Lookups from name to integer, returning true if found. +bool upb_enumdef_ntoi(const upb_enumdef *e, const char *name, int32_t *num); + +// Finds the name corresponding to the given number, or NULL if none was found. +// If more than one name corresponds to this number, returns the first one that +// was added. +const char *upb_enumdef_iton(const upb_enumdef *e, int32_t num); // Iteration over name/value pairs. The order is undefined. // Adding an enum val invalidates any iterators. // upb_enum_iter i; -// for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) { +// for(upb_enum_begin(&i, e); !upb_enum_done(&i); upb_enum_next(&i)) { // // ... // } -typedef upb_inttable_iter upb_enum_iter; +typedef upb_strtable_iter upb_enum_iter; -upb_enum_iter upb_enum_begin(const upb_enumdef *e); -upb_enum_iter upb_enum_next(const upb_enumdef *e, upb_enum_iter iter); -INLINE bool upb_enum_done(upb_enum_iter iter) { return upb_inttable_done(iter); } +void upb_enum_begin(upb_enum_iter *iter, const upb_enumdef *e); +void upb_enum_next(upb_enum_iter *iter); +bool upb_enum_done(upb_enum_iter *iter); // Iterator accessors. -INLINE char *upb_enum_iter_name(upb_enum_iter iter) { - upb_iton_ent *e = (upb_iton_ent*)upb_inttable_iter_value(iter); - return e->str; +INLINE const char *upb_enum_iter_name(upb_enum_iter *iter) { + return upb_strtable_iter_key(iter); } -INLINE int32_t upb_enum_iter_number(upb_enum_iter iter) { - return upb_inttable_iter_key(iter); +INLINE int32_t upb_enum_iter_number(upb_enum_iter *iter) { + return upb_value_getint32(upb_strtable_iter_value(iter)); } -/* upb_deflist ****************************************************************/ - -// upb_deflist is an internal-only dynamic array for storing a growing list of -// upb_defs. -typedef struct { - upb_def **defs; - uint32_t len; - uint32_t size; -} upb_deflist; - -void upb_deflist_init(upb_deflist *l); -void upb_deflist_uninit(upb_deflist *l); -void upb_deflist_push(upb_deflist *l, upb_def *d); - - /* upb_symtab *****************************************************************/ -// A symtab (symbol table) is where upb_defs live. It is empty when first -// constructed. Clients add definitions to the symtab (or replace existing -// definitions) by calling upb_symtab_add(). -struct _upb_symtab { - upb_atomic_t refcount; - upb_rwlock_t lock; // Protects all members except the refcount. - upb_strtable symtab; // The symbol table. - upb_deflist olddefs; -}; +// A symtab (symbol table) stores a name->def map of upb_defs. Clients could +// always create such tables themselves, but upb_symtab has logic for resolving +// symbolic references, which is nontrivial. +typedef struct { + uint32_t refcount; + upb_strtable symtab; +} upb_symtab; upb_symtab *upb_symtab_new(void); void upb_symtab_ref(const upb_symtab *s); @@ -430,33 +541,47 @@ void upb_symtab_unref(const upb_symtab *s); // within this message are searched, then within the parent, on up to the // root namespace). // -// If a def is found, the caller owns one ref on the returned def. Otherwise -// returns NULL. +// If a def is found, the caller owns one ref on the returned def, owned by +// owner. Otherwise returns NULL. const upb_def *upb_symtab_resolve(const upb_symtab *s, const char *base, - const char *sym); + const char *sym, void *owner); -// Find an entry in the symbol table with this exact name. If a def is found, -// the caller owns one ref on the returned def. Otherwise returns NULL. -const upb_def *upb_symtab_lookup(const upb_symtab *s, const char *sym); -const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym); +// Finds an entry in the symbol table with this exact name. If a def is found, +// the caller owns one ref on the returned def, owned by owner. Otherwise +// returns NULL. +const upb_def *upb_symtab_lookup( + const upb_symtab *s, const char *sym, void *owner); +const upb_msgdef *upb_symtab_lookupmsg( + const upb_symtab *s, const char *sym, void *owner); // Gets an array of pointers to all currently active defs in this symtab. The // caller owns the returned array (which is of length *count) as well as a ref -// to each symbol inside. If type is UPB_DEF_ANY then defs of all types are -// returned, otherwise only defs of the required type are returned. -const upb_def **upb_symtab_getdefs(const upb_symtab *s, int *n, upb_deftype_t type); - -// Adds the given defs to the symtab, resolving all symbols. Only one def per -// name may be in the list, but defs can replace existing defs in the symtab. +// to each symbol inside (owned by owner). If type is UPB_DEF_ANY then defs of +// all types are returned, otherwise only defs of the required type are +// returned. +const upb_def **upb_symtab_getdefs( + const upb_symtab *s, int *n, upb_deftype_t type, void *owner); + +// Adds the given defs to the symtab, resolving all symbols (including enum +// default values) and finalizing the defs. Only one def per name may be in +// the list, but defs can replace existing defs in the symtab. All defs must +// have a name -- anonymous defs are not allowed. Anonymous defs can still be +// finalized by calling upb_def_finalize() directly. +// +// Any existing defs that can reach defs that are being replaced will +// themselves be replaced also, so that the resulting set of defs is fully +// consistent. +// +// This logic implemented in this method is a convenience; ultimately it calls +// some combination of upb_fielddef_setsubdef(), upb_def_dup(), and +// upb_finalize(), any of which the client could call themself. However, since +// the logic for doing so is nontrivial, we provide it here. +// // The entire operation either succeeds or fails. If the operation fails, the // symtab is unchanged, false is returned, and status indicates the error. The -// caller retains its ref on all defs in all cases. -bool upb_symtab_add(upb_symtab *s, upb_def **defs, int n, upb_status *status); - -// Frees defs that are no longer active in the symtab and are no longer -// reachable. Such defs are not freed when they are replaced in the symtab -// if they are still reachable from defs that are still referenced. -void upb_symtab_gc(upb_symtab *s); +// caller passes a ref on all defs to the symtab (even if the operation fails). +bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, int n, void *ref_donor, + upb_status *status); /* upb_def casts **************************************************************/ @@ -483,9 +608,9 @@ void upb_symtab_gc(upb_symtab *s); return (const struct _upb_ ## lower*)def; \ } UPB_DEF_CASTS(msgdef, MSG); +UPB_DEF_CASTS(fielddef, FIELD); UPB_DEF_CASTS(enumdef, ENUM); UPB_DEF_CASTS(svcdef, SERVICE); -UPB_DEF_CASTS(unresolveddef, UNRESOLVED); #undef UPB_DEF_CASTS #ifdef __cplusplus diff --git a/upb/descriptor_const.h b/upb/descriptor/descriptor_const.h similarity index 98% rename from upb/descriptor_const.h rename to upb/descriptor/descriptor_const.h index 20058e44bc..52ca803ee5 100644 --- a/upb/descriptor_const.h +++ b/upb/descriptor/descriptor_const.h @@ -9,79 +9,47 @@ extern "C" { /* Enums. */ -typedef enum google_protobuf_FieldOptions_CType { - GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_STRING = 0, - GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_CORD = 1, - GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_STRING_PIECE = 2 -} google_protobuf_FieldOptions_CType; - typedef enum google_protobuf_FieldDescriptorProto_Type { - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_DOUBLE = 1, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_FIXED64 = 6, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_STRING = 9, GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_FLOAT = 2, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_INT64 = 3, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_UINT64 = 4, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_DOUBLE = 1, GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_INT32 = 5, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_FIXED64 = 6, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SFIXED32 = 15, GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_FIXED32 = 7, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_BOOL = 8, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_STRING = 9, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_GROUP = 10, GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_MESSAGE = 11, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_BYTES = 12, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_UINT32 = 13, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_INT64 = 3, GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_ENUM = 14, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SFIXED32 = 15, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_UINT32 = 13, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_UINT64 = 4, GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SFIXED64 = 16, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SINT32 = 17, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SINT64 = 18 + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_BYTES = 12, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SINT64 = 18, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_BOOL = 8, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_GROUP = 10, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_SINT32 = 17 } google_protobuf_FieldDescriptorProto_Type; typedef enum google_protobuf_FieldDescriptorProto_Label { - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_LABEL_OPTIONAL = 1, GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_LABEL_REQUIRED = 2, - GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_LABEL_REPEATED = 3 + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_LABEL_REPEATED = 3, + GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_LABEL_OPTIONAL = 1 } google_protobuf_FieldDescriptorProto_Label; +typedef enum google_protobuf_FieldOptions_CType { + GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_CORD = 1, + GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_STRING = 0, + GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_STRING_PIECE = 2 +} google_protobuf_FieldOptions_CType; + typedef enum google_protobuf_FileOptions_OptimizeMode { - GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZEMODE_SPEED = 1, GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZEMODE_CODE_SIZE = 2, + GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZEMODE_SPEED = 1, GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZEMODE_LITE_RUNTIME = 3 } google_protobuf_FileOptions_OptimizeMode; /* Constants for field names and numbers. */ -#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDNUM 1 -#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDNAME "file" -#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDNUM 1 -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDNAME "name" -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDNUM 2 -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDNAME "field" -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDNUM 3 -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDNAME "nested_type" -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDNUM 4 -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDNAME "enum_type" -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE__FIELDNUM 5 -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE__FIELDNAME "extension_range" -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION__FIELDNUM 6 -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION__FIELDNAME "extension" -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_OPTIONS__FIELDNUM 7 -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_OPTIONS__FIELDNAME "options" -#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_OPTIONS__FIELDTYPE 11 - #define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_PATH__FIELDNUM 1 #define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_PATH__FIELDNAME "path" #define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_PATH__FIELDTYPE 5 @@ -106,6 +74,10 @@ typedef enum google_protobuf_FileOptions_OptimizeMode { #define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NEGATIVE_INT_VALUE__FIELDNAME "negative_int_value" #define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NEGATIVE_INT_VALUE__FIELDTYPE 3 +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE__FIELDNUM 8 +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE__FIELDNAME "aggregate_value" +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE__FIELDTYPE 9 + #define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_DOUBLE_VALUE__FIELDNUM 6 #define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_DOUBLE_VALUE__FIELDNAME "double_value" #define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_DOUBLE_VALUE__FIELDTYPE 1 @@ -114,10 +86,6 @@ typedef enum google_protobuf_FileOptions_OptimizeMode { #define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_STRING_VALUE__FIELDNAME "string_value" #define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_STRING_VALUE__FIELDTYPE 12 -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE__FIELDNUM 8 -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE__FIELDNAME "aggregate_value" -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE__FIELDTYPE 9 - #define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME__FIELDNUM 1 #define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME__FIELDNAME "name" #define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME__FIELDTYPE 9 @@ -138,14 +106,6 @@ typedef enum google_protobuf_FileOptions_OptimizeMode { #define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE__FIELDNAME "enum_type" #define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE__FIELDTYPE 11 -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE__FIELDNUM 6 -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE__FIELDNAME "service" -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION__FIELDNUM 7 -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION__FIELDNAME "extension" -#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION__FIELDTYPE 11 - #define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_OPTIONS__FIELDNUM 8 #define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_OPTIONS__FIELDNAME "options" #define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11 @@ -154,6 +114,14 @@ typedef enum google_protobuf_FileOptions_OptimizeMode { #define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SOURCE_CODE_INFO__FIELDNAME "source_code_info" #define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SOURCE_CODE_INFO__FIELDTYPE 11 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE__FIELDNUM 6 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE__FIELDNAME "service" +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE__FIELDTYPE 11 + +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION__FIELDNUM 7 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION__FIELDNAME "extension" +#define GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION__FIELDTYPE 11 + #define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_NAME__FIELDNUM 1 #define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_NAME__FIELDNAME "name" #define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_NAME__FIELDTYPE 9 @@ -170,53 +138,13 @@ typedef enum google_protobuf_FileOptions_OptimizeMode { #define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OPTIONS__FIELDNAME "options" #define GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11 -#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDNUM 1 -#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDNAME "name" -#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDNUM 2 -#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDNAME "value" -#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS__FIELDNUM 3 -#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS__FIELDNAME "options" -#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11 - #define GOOGLE_PROTOBUF_ENUMVALUEOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999 #define GOOGLE_PROTOBUF_ENUMVALUEOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option" #define GOOGLE_PROTOBUF_ENUMVALUEOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11 -#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDNUM 1 -#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDNAME "name" -#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDNUM 2 -#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDNAME "number" -#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDTYPE 5 - -#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS__FIELDNUM 3 -#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS__FIELDNAME "options" -#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME__FIELDNUM 1 -#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME__FIELDNAME "name" -#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD__FIELDNUM 2 -#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD__FIELDNAME "method" -#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS__FIELDNUM 3 -#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS__FIELDNAME "options" -#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11 - -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART__FIELDNUM 1 -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART__FIELDNAME "name_part" -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION__FIELDNUM 2 -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION__FIELDNAME "is_extension" -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION__FIELDTYPE 8 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDNUM 1 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDNAME "file" +#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE__FIELDTYPE 11 #define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION__FIELDNUM 1 #define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION__FIELDNAME "location" @@ -230,6 +158,18 @@ typedef enum google_protobuf_FileOptions_OptimizeMode { #define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE_END__FIELDNAME "end" #define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE_END__FIELDTYPE 5 +#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDNUM 1 +#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDNAME "name" +#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME__FIELDTYPE 9 + +#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDNUM 2 +#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDNAME "number" +#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER__FIELDTYPE 5 + +#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS__FIELDNUM 3 +#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS__FIELDNAME "options" +#define GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11 + #define GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE__FIELDNUM 1 #define GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE__FIELDNAME "ctype" #define GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE__FIELDTYPE 14 @@ -254,18 +194,6 @@ typedef enum google_protobuf_FileOptions_OptimizeMode { #define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_PACKAGE__FIELDNAME "java_package" #define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_PACKAGE__FIELDTYPE 9 -#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME__FIELDNUM 8 -#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME__FIELDNAME "java_outer_classname" -#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME__FIELDTYPE 9 - -#define GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZE_FOR__FIELDNUM 9 -#define GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZE_FOR__FIELDNAME "optimize_for" -#define GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZE_FOR__FIELDTYPE 14 - -#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_MULTIPLE_FILES__FIELDNUM 10 -#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_MULTIPLE_FILES__FIELDNAME "java_multiple_files" -#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_MULTIPLE_FILES__FIELDTYPE 8 - #define GOOGLE_PROTOBUF_FILEOPTIONS_CC_GENERIC_SERVICES__FIELDNUM 16 #define GOOGLE_PROTOBUF_FILEOPTIONS_CC_GENERIC_SERVICES__FIELDNAME "cc_generic_services" #define GOOGLE_PROTOBUF_FILEOPTIONS_CC_GENERIC_SERVICES__FIELDTYPE 8 @@ -286,17 +214,69 @@ typedef enum google_protobuf_FileOptions_OptimizeMode { #define GOOGLE_PROTOBUF_FILEOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option" #define GOOGLE_PROTOBUF_FILEOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11 -#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT__FIELDNUM 1 -#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT__FIELDNAME "message_set_wire_format" -#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT__FIELDTYPE 8 +#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME__FIELDNUM 8 +#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME__FIELDNAME "java_outer_classname" +#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME__FIELDTYPE 9 -#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_NO_STANDARD_DESCRIPTOR_ACCESSOR__FIELDNUM 2 -#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_NO_STANDARD_DESCRIPTOR_ACCESSOR__FIELDNAME "no_standard_descriptor_accessor" -#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_NO_STANDARD_DESCRIPTOR_ACCESSOR__FIELDTYPE 8 +#define GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZE_FOR__FIELDNUM 9 +#define GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZE_FOR__FIELDNAME "optimize_for" +#define GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZE_FOR__FIELDTYPE 14 -#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999 -#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option" -#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11 +#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_MULTIPLE_FILES__FIELDNUM 10 +#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_MULTIPLE_FILES__FIELDNAME "java_multiple_files" +#define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_MULTIPLE_FILES__FIELDTYPE 8 + +#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDNUM 1 +#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDNAME "name" +#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME__FIELDTYPE 9 + +#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDNUM 2 +#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDNAME "value" +#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE__FIELDTYPE 11 + +#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS__FIELDNUM 3 +#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS__FIELDNAME "options" +#define GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11 + +#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME__FIELDNUM 1 +#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME__FIELDNAME "name" +#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME__FIELDTYPE 9 + +#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD__FIELDNUM 2 +#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD__FIELDNAME "method" +#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD__FIELDTYPE 11 + +#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS__FIELDNUM 3 +#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS__FIELDNAME "options" +#define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11 + +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDNUM 1 +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDNAME "name" +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME__FIELDTYPE 9 + +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDNUM 2 +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDNAME "field" +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD__FIELDTYPE 11 + +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDNUM 3 +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDNAME "nested_type" +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE__FIELDTYPE 11 + +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDNUM 4 +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDNAME "enum_type" +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE__FIELDTYPE 11 + +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE__FIELDNUM 5 +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE__FIELDNAME "extension_range" +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE__FIELDTYPE 11 + +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION__FIELDNUM 6 +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION__FIELDNAME "extension" +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION__FIELDTYPE 11 + +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_OPTIONS__FIELDNUM 7 +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_OPTIONS__FIELDNAME "options" +#define GOOGLE_PROTOBUF_DESCRIPTORPROTO_OPTIONS__FIELDTYPE 11 #define GOOGLE_PROTOBUF_ENUMOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999 #define GOOGLE_PROTOBUF_ENUMOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option" @@ -322,6 +302,10 @@ typedef enum google_protobuf_FileOptions_OptimizeMode { #define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE__FIELDNAME "type" #define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE__FIELDTYPE 14 +#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS__FIELDNUM 8 +#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS__FIELDNAME "options" +#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11 + #define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME__FIELDNUM 6 #define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME__FIELDNAME "type_name" #define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME__FIELDTYPE 9 @@ -330,18 +314,34 @@ typedef enum google_protobuf_FileOptions_OptimizeMode { #define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE__FIELDNAME "default_value" #define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE__FIELDTYPE 9 -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS__FIELDNUM 8 -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS__FIELDNAME "options" -#define GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS__FIELDTYPE 11 - #define GOOGLE_PROTOBUF_SERVICEOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999 #define GOOGLE_PROTOBUF_SERVICEOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option" #define GOOGLE_PROTOBUF_SERVICEOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11 +#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT__FIELDNUM 1 +#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT__FIELDNAME "message_set_wire_format" +#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT__FIELDTYPE 8 + +#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_NO_STANDARD_DESCRIPTOR_ACCESSOR__FIELDNUM 2 +#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_NO_STANDARD_DESCRIPTOR_ACCESSOR__FIELDNAME "no_standard_descriptor_accessor" +#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_NO_STANDARD_DESCRIPTOR_ACCESSOR__FIELDTYPE 8 + +#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999 +#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option" +#define GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11 + #define GOOGLE_PROTOBUF_METHODOPTIONS_UNINTERPRETED_OPTION__FIELDNUM 999 #define GOOGLE_PROTOBUF_METHODOPTIONS_UNINTERPRETED_OPTION__FIELDNAME "uninterpreted_option" #define GOOGLE_PROTOBUF_METHODOPTIONS_UNINTERPRETED_OPTION__FIELDTYPE 11 +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART__FIELDNUM 1 +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART__FIELDNAME "name_part" +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART__FIELDTYPE 9 + +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION__FIELDNUM 2 +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION__FIELDNAME "is_extension" +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION__FIELDTYPE 8 + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/upb/descriptor.c b/upb/descriptor/reader.c similarity index 89% rename from upb/descriptor.c rename to upb/descriptor/reader.c index 0c589f231a..81775605eb 100644 --- a/upb/descriptor.c +++ b/upb/descriptor/reader.c @@ -8,13 +8,14 @@ #include #include #include "upb/def.h" -#include "upb/descriptor.h" +#include "upb/descriptor/descriptor_const.h" +#include "upb/descriptor/reader.h" // Returns a newly allocated string that joins input strings together, for example: // join("Foo.Bar", "Baz") -> "Foo.Bar.Baz" // join("", "Baz") -> "Baz" // Caller owns a ref on the returned string. */ -static char *upb_join(char *base, char *name) { +static char *upb_join(const char *base, const char *name) { if (!base || strlen(base) == 0) { return strdup(name); } else { @@ -27,6 +28,36 @@ static char *upb_join(char *base, char *name) { } } +void upb_deflist_init(upb_deflist *l) { + l->size = 8; + l->defs = malloc(l->size * sizeof(void*)); + l->len = 0; + l->owned = true; +} + +void upb_deflist_uninit(upb_deflist *l) { + if (l->owned) + for(size_t i = 0; i < l->len; i++) + upb_def_unref(l->defs[i], &l->defs); + free(l->defs); +} + +void upb_deflist_push(upb_deflist *l, upb_def *d) { + if(l->len == l->size) { + l->size *= 2; + l->defs = realloc(l->defs, l->size * sizeof(void*)); + } + l->defs[l->len++] = d; +} + +void upb_deflist_donaterefs(upb_deflist *l, void *owner) { + assert(l->owned); + for (size_t i = 0; i < l->len; i++) + upb_def_donateref(l->defs[i], &l->defs, owner); + l->owned = false; +} + + /* upb_descreader ************************************************************/ static upb_def *upb_deflist_last(upb_deflist *l) { @@ -37,8 +68,8 @@ static upb_def *upb_deflist_last(upb_deflist *l) { static void upb_deflist_qualify(upb_deflist *l, char *str, int32_t start) { for(uint32_t i = start; i < l->len; i++) { upb_def *def = l->defs[i]; - char *name = def->fqname; - def->fqname = upb_join(str, name); + char *name = upb_join(str, upb_def_fullname(def)); + upb_def_setfullname(def, name); free(name); } } @@ -66,9 +97,9 @@ void upb_descreader_uninit(upb_descreader *r) { } } -upb_def **upb_descreader_getdefs(upb_descreader *r, int *n) { +upb_def **upb_descreader_getdefs(upb_descreader *r, void *owner, int *n) { *n = r->defs.len; - r->defs.len = 0; + upb_deflist_donaterefs(&r->defs, owner); return r->defs.defs; } @@ -204,7 +235,7 @@ static void upb_enumdef_EnumValueDescriptorProto_endmsg(void *_r, return; } upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r)); - if (upb_inttable_count(&e->iton) == 0) { + if (upb_enumdef_numvals(e) == 0) { // The default value of an enum (in the absence of an explicit default) is // its first listed value. upb_enumdef_setdefault(e, r->number); @@ -236,18 +267,18 @@ static upb_mhandlers *upb_enumdef_register_EnumValueDescriptorProto( // google.protobuf.EnumDescriptorProto. static upb_flow_t upb_enumdef_EnumDescriptorProto_startmsg(void *_r) { upb_descreader *r = _r; - upb_deflist_push(&r->defs, UPB_UPCAST(upb_enumdef_new())); + upb_deflist_push(&r->defs, UPB_UPCAST(upb_enumdef_new(&r->defs))); return UPB_CONTINUE; } static void upb_enumdef_EnumDescriptorProto_endmsg(void *_r, upb_status *status) { upb_descreader *r = _r; upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r)); - if (upb_descreader_last((upb_descreader*)_r)->fqname == NULL) { + if (upb_def_fullname(upb_descreader_last((upb_descreader*)_r)) == NULL) { upb_status_seterrliteral(status, "Enum had no name."); return; } - if (upb_inttable_count(&e->iton) == 0) { + if (upb_enumdef_numvals(e) == 0) { upb_status_seterrliteral(status, "Enum had no values."); return; } @@ -258,9 +289,9 @@ static upb_flow_t upb_enumdef_EnumDescriptorProto_name(void *_r, upb_value val) { (void)fval; upb_descreader *r = _r; - upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r)); - free(e->base.fqname); - e->base.fqname = upb_byteregion_strdup(upb_value_getbyteregion(val)); + char *fullname = upb_byteregion_strdup(upb_value_getbyteregion(val)); + upb_def_setfullname(upb_descreader_last(r), fullname); + free(fullname); return UPB_CONTINUE; } @@ -284,7 +315,7 @@ static upb_mhandlers *upb_enumdef_register_EnumDescriptorProto(upb_handlers *h) static upb_flow_t upb_fielddef_startmsg(void *_r) { upb_descreader *r = _r; - r->f = upb_fielddef_new(); + r->f = upb_fielddef_new(&r->defs); free(r->default_string); r->default_string = NULL; return UPB_CONTINUE; @@ -370,13 +401,12 @@ static void upb_fielddef_endmsg(void *_r, upb_status *status) { upb_descreader *r = _r; upb_fielddef *f = r->f; // TODO: verify that all required fields were present. - assert(f->number != -1 && f->name != NULL); - assert((f->def != NULL) == upb_hassubdef(f)); + assert(f->number != -1 && upb_fielddef_name(f) != NULL); + assert((upb_fielddef_subtypename(f) != NULL) == upb_hassubdef(f)); // Field was successfully read, add it as a field of the msgdef. upb_msgdef *m = upb_descreader_top(r); - upb_msgdef_addfield(m, f); - upb_fielddef_unref(f); + upb_msgdef_addfield(m, f, &r->defs); r->f = NULL; if (r->default_string) { @@ -435,7 +465,7 @@ static upb_flow_t upb_fielddef_ontypename(void *_r, upb_value fval, (void)fval; upb_descreader *r = _r; char *name = upb_byteregion_strdup(upb_value_getbyteregion(val)); - upb_fielddef_settypename(r->f, name); + upb_fielddef_setsubtypename(r->f, name); free(name); return UPB_CONTINUE; } @@ -479,7 +509,7 @@ static upb_mhandlers *upb_fielddef_register_FieldDescriptorProto( // google.protobuf.DescriptorProto. static upb_flow_t upb_msgdef_startmsg(void *_r) { upb_descreader *r = _r; - upb_deflist_push(&r->defs, UPB_UPCAST(upb_msgdef_new())); + upb_deflist_push(&r->defs, UPB_UPCAST(upb_msgdef_new(&r->defs))); upb_descreader_startcontainer(r); return UPB_CONTINUE; } @@ -487,7 +517,7 @@ static upb_flow_t upb_msgdef_startmsg(void *_r) { static void upb_msgdef_endmsg(void *_r, upb_status *status) { upb_descreader *r = _r; upb_msgdef *m = upb_descreader_top(r); - if(!m->base.fqname) { + if(!upb_def_fullname(UPB_UPCAST(m))) { upb_status_seterrliteral(status, "Encountered message with no name."); return; } @@ -497,11 +527,10 @@ static void upb_msgdef_endmsg(void *_r, upb_status *status) { static upb_flow_t upb_msgdef_onname(void *_r, upb_value fval, upb_value val) { (void)fval; upb_descreader *r = _r; - assert(val.type == UPB_TYPE(STRING)); upb_msgdef *m = upb_descreader_top(r); - free(m->base.fqname); - m->base.fqname = upb_byteregion_strdup(upb_value_getbyteregion(val)); - upb_descreader_setscopename(r, strdup(m->base.fqname)); + char *name = upb_byteregion_strdup(upb_value_getbyteregion(val)); + upb_def_setfullname(UPB_UPCAST(m), name); + upb_descreader_setscopename(r, name); // Passes ownership of name. return UPB_CONTINUE; } @@ -530,4 +559,3 @@ static upb_mhandlers *upb_msgdef_register_DescriptorProto(upb_handlers *h) { } #undef FNUM #undef FTYPE - diff --git a/upb/descriptor.h b/upb/descriptor/reader.h similarity index 61% rename from upb/descriptor.h rename to upb/descriptor/reader.h index 21099b3c74..0e1bfa0fe3 100644 --- a/upb/descriptor.h +++ b/upb/descriptor/reader.h @@ -4,9 +4,9 @@ * Copyright (c) 2011 Google Inc. See LICENSE for details. * Author: Josh Haberman * - * Routines for building defs by parsing descriptors in descriptor.proto format. - * This only needs to use the public API of upb_symtab. Later we may also - * add routines for dumping a symtab to a descriptor. + * upb_descreader provides a set of sink handlers that will build defs from a + * data source that uses the descriptor.proto schema (like a protobuf binary + * descriptor). */ #ifndef UPB_DESCRIPTOR_H @@ -18,6 +18,20 @@ extern "C" { #endif +/* upb_deflist ****************************************************************/ + +// upb_deflist is an internal-only dynamic array for storing a growing list of +// upb_defs. +typedef struct { + upb_def **defs; + size_t len; + size_t size; + bool owned; +} upb_deflist; + +void upb_deflist_init(upb_deflist *l); +void upb_deflist_uninit(upb_deflist *l); +void upb_deflist_push(upb_deflist *l, upb_def *d); /* upb_descreader ************************************************************/ @@ -56,11 +70,11 @@ void upb_descreader_uninit(upb_descreader *r); upb_mhandlers *upb_descreader_reghandlers(upb_handlers *h); // Gets the array of defs that have been parsed and removes them from the -// descreader. Ownership of the defs is passed to the caller, but the -// ownership of the returned array is retained and is invalidated by any other -// call into the descreader. The defs will not have been resolved, and are -// ready to be added to a symtab. -upb_def **upb_descreader_getdefs(upb_descreader *r, int *n); +// descreader. Ownership of the defs is passed to the caller using the given +// owner), but the ownership of the returned array is retained and is +// invalidated by any other call into the descreader. The defs will not have +// been resolved, and are ready to be added to a symtab. +upb_def **upb_descreader_getdefs(upb_descreader *r, void *owner, int *n); #ifdef __cplusplus } /* extern "C" */ diff --git a/upb/handlers.c b/upb/handlers.c index 1ccaf8d5e5..ea5a054a33 100644 --- a/upb/handlers.c +++ b/upb/handlers.c @@ -13,7 +13,7 @@ static upb_mhandlers *upb_mhandlers_new() { upb_mhandlers *m = malloc(sizeof(*m)); - upb_inttable_init(&m->fieldtab, 8, sizeof(upb_itofhandlers_ent)); + upb_inttable_init(&m->fieldtab); m->startmsg = NULL; m->endmsg = NULL; m->is_group = false; @@ -26,20 +26,19 @@ static upb_mhandlers *upb_mhandlers_new() { static upb_fhandlers *_upb_mhandlers_newfhandlers(upb_mhandlers *m, uint32_t n, upb_fieldtype_t type, bool repeated) { - upb_itofhandlers_ent *e = upb_inttable_lookup(&m->fieldtab, n); + const upb_value *v = upb_inttable_lookup(&m->fieldtab, n); // TODO: design/refine the API for changing the set of fields or modifying // existing handlers. - if (e) return NULL; - upb_fhandlers new_f = {type, repeated, UPB_ATOMIC_INIT(0), + if (v) return NULL; + upb_fhandlers new_f = {type, repeated, 0, n, -1, m, NULL, UPB_NO_VALUE, NULL, NULL, NULL, NULL, NULL, #ifdef UPB_USE_JIT_X64 0, 0, 0, #endif - NULL}; + }; upb_fhandlers *ptr = malloc(sizeof(*ptr)); memcpy(ptr, &new_f, sizeof(upb_fhandlers)); - upb_itofhandlers_ent ent = {false, ptr}; - upb_inttable_insert(&m->fieldtab, n, &ent); + upb_inttable_insert(&m->fieldtab, n, upb_value_ptr(ptr)); return ptr; } @@ -64,12 +63,17 @@ upb_fhandlers *upb_mhandlers_newfhandlers_subm(upb_mhandlers *m, uint32_t n, return f; } +upb_fhandlers *upb_mhandlers_lookup(const upb_mhandlers *m, uint32_t n) { + const upb_value *v = upb_inttable_lookup(&m->fieldtab, n); + return v ? upb_value_getptr(*v) : NULL; +} + /* upb_handlers ***************************************************************/ upb_handlers *upb_handlers_new() { upb_handlers *h = malloc(sizeof(*h)); - upb_atomic_init(&h->refcount, 1); + h->refcount = 1; h->msgs_len = 0; h->msgs_size = 4; h->msgs = malloc(h->msgs_size * sizeof(*h->msgs)); @@ -77,19 +81,18 @@ upb_handlers *upb_handlers_new() { return h; } -void upb_handlers_ref(upb_handlers *h) { upb_atomic_ref(&h->refcount); } +void upb_handlers_ref(upb_handlers *h) { h->refcount++; } void upb_handlers_unref(upb_handlers *h) { - if (upb_atomic_unref(&h->refcount)) { + if (--h->refcount == 0) { for (int i = 0; i < h->msgs_len; i++) { upb_mhandlers *mh = h->msgs[i]; - for(upb_inttable_iter j = upb_inttable_begin(&mh->fieldtab); - !upb_inttable_done(j); - j = upb_inttable_next(&mh->fieldtab, j)) { - upb_itofhandlers_ent *e = upb_inttable_iter_value(j); - free(e->f); + upb_inttable_iter j; + upb_inttable_begin(&j, &mh->fieldtab); + for(; !upb_inttable_done(&j); upb_inttable_next(&j)) { + free(upb_value_getptr(upb_inttable_iter_value(&j))); } - upb_inttable_free(&mh->fieldtab); + upb_inttable_uninit(&mh->fieldtab); #ifdef UPB_USE_JIT_X64 free(mh->tablearray); #endif @@ -110,31 +113,28 @@ upb_mhandlers *upb_handlers_newmhandlers(upb_handlers *h) { return mh; } -typedef struct { - upb_mhandlers *mh; -} upb_mtab_ent; - static upb_mhandlers *upb_regmsg_dfs(upb_handlers *h, const upb_msgdef *m, upb_onmsgreg *msgreg_cb, upb_onfieldreg *fieldreg_cb, void *closure, upb_strtable *mtab) { upb_mhandlers *mh = upb_handlers_newmhandlers(h); - upb_mtab_ent e = {mh}; - upb_strtable_insert(mtab, m->base.fqname, &e); + upb_strtable_insert(mtab, upb_def_fullname(UPB_UPCAST(m)), upb_value_ptr(mh)); if (msgreg_cb) msgreg_cb(closure, mh, m); upb_msg_iter i; - for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { - upb_fielddef *f = upb_msg_iter_field(i); + for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { + upb_fielddef *f = upb_msg_iter_field(&i); upb_fhandlers *fh; if (upb_issubmsg(f)) { upb_mhandlers *sub_mh; - upb_mtab_ent *subm_ent; + const upb_value *subm_ent; // The table lookup is necessary to break the DFS for type cycles. - if ((subm_ent = upb_strtable_lookup(mtab, f->def->fqname)) != NULL) { - sub_mh = subm_ent->mh; + const char *subname = upb_def_fullname(upb_fielddef_subdef(f)); + if ((subm_ent = upb_strtable_lookup(mtab, subname)) != NULL) { + sub_mh = upb_value_getptr(*subm_ent); } else { - sub_mh = upb_regmsg_dfs(h, upb_downcast_msgdef(f->def), msgreg_cb, - fieldreg_cb, closure, mtab); + sub_mh = upb_regmsg_dfs( + h, upb_downcast_msgdef_const(upb_fielddef_subdef(f)), + msgreg_cb, fieldreg_cb, closure, mtab); } fh = upb_mhandlers_newfhandlers_subm( mh, f->number, f->type, upb_isseq(f), sub_mh); @@ -151,10 +151,10 @@ upb_mhandlers *upb_handlers_regmsgdef(upb_handlers *h, const upb_msgdef *m, upb_onfieldreg *fieldreg_cb, void *closure) { upb_strtable mtab; - upb_strtable_init(&mtab, 8, sizeof(upb_mtab_ent)); + upb_strtable_init(&mtab); upb_mhandlers *ret = upb_regmsg_dfs(h, m, msgreg_cb, fieldreg_cb, closure, &mtab); - upb_strtable_free(&mtab); + upb_strtable_uninit(&mtab); return ret; } @@ -212,6 +212,7 @@ upb_dispatcher_frame *upb_dispatch_startseq(upb_dispatcher *d, upb_sflow_t sflow = UPB_CONTINUE_WITH(d->top->closure); if (f->startseq) sflow = f->startseq(d->top->closure, f->fval); + _upb_dispatcher_sethas(d->top->closure, f->hasbit); if (sflow.flow != UPB_CONTINUE) { _upb_dispatcher_abortjmp(d); } @@ -247,6 +248,7 @@ upb_dispatcher_frame *upb_dispatch_startsubmsg(upb_dispatcher *d, upb_sflow_t sflow = UPB_CONTINUE_WITH(d->top->closure); if (f->startsubmsg) sflow = f->startsubmsg(d->top->closure, f->fval); + _upb_dispatcher_sethas(d->top->closure, f->hasbit); if (sflow.flow != UPB_CONTINUE) { _upb_dispatcher_abortjmp(d); } diff --git a/upb/handlers.h b/upb/handlers.h index 9ed02c114f..9083a2ef09 100644 --- a/upb/handlers.h +++ b/upb/handlers.h @@ -9,6 +9,10 @@ * for each message and/or field as the data is being parsed or iterated over, * without having to know the source format that we are parsing from. This * decouples the parsing logic from the processing logic. + * + * TODO: should we allow handlers to longjmp()? Would be necessary to eg. let + * a Lua handler "yield" from the current coroutine. I *think* everything + * would "just work" with our current decoder. */ #ifndef UPB_HANDLERS_H @@ -141,9 +145,9 @@ struct _upb_mhandlers; typedef struct _upb_fieldent { upb_fieldtype_t type; bool repeated; - upb_atomic_t refcount; + uint32_t refcount; uint32_t number; - int32_t valuehasbit; + int32_t hasbit; struct _upb_mhandlers *msg; struct _upb_mhandlers *submsg; // Set iff upb_issubmsgtype(type) == true. upb_value fval; @@ -157,14 +161,8 @@ typedef struct _upb_fieldent { uint32_t jit_pclabel_notypecheck; uint32_t jit_submsg_done_pclabel; #endif - void (*decode)(struct _upb_decoder *d, struct _upb_fieldent *f); } upb_fhandlers; -typedef struct { - bool junk; // Stolen by table impl; see table.h for details. - upb_fhandlers *f; -} upb_itofhandlers_ent; - // fhandlers are created as part of a upb_handlers instance, but can be ref'd // and unref'd to prolong the life of the handlers. void upb_fhandlers_ref(upb_fhandlers *m); @@ -174,6 +172,8 @@ void upb_fhandlers_unref(upb_fhandlers *m); #define UPB_FHANDLERS_ACCESSORS(name, type) \ INLINE void upb_fhandlers_set ## name(upb_fhandlers *f, type v){f->name = v;} \ INLINE type upb_fhandlers_get ## name(const upb_fhandlers *f) { return f->name; } +// TODO(haberman): need a way of keeping the fval alive even if a plan outlasts +// the handlers. UPB_FHANDLERS_ACCESSORS(fval, upb_value) UPB_FHANDLERS_ACCESSORS(value, upb_value_handler*) UPB_FHANDLERS_ACCESSORS(startsubmsg, upb_startfield_handler*) @@ -182,11 +182,13 @@ UPB_FHANDLERS_ACCESSORS(startseq, upb_startfield_handler*) UPB_FHANDLERS_ACCESSORS(endseq, upb_endfield_handler*) UPB_FHANDLERS_ACCESSORS(msg, struct _upb_mhandlers*) UPB_FHANDLERS_ACCESSORS(submsg, struct _upb_mhandlers*) -// If set to >= 0, the hasbit will automatically be set after the corresponding -// callback is called (when a JIT is enabled, this can be significantly more -// efficient than setting the hasbit yourself inside the callback). Could add -// this for seq and submsg also, but doesn't look like a win at the moment. -UPB_FHANDLERS_ACCESSORS(valuehasbit, int32_t) +// If set to >= 0, the hasbit will automatically be set when the corresponding +// field is parsed (when a JIT is enabled, this can be significantly more +// efficient than setting the hasbit yourself inside the callback). For values +// it is undefined whether the hasbit is set before or after the callback is +// called. For seq and submsg, the hasbit is set *after* the start handler is +// called, but before any of the handlers for the submsg or sequence. +UPB_FHANDLERS_ACCESSORS(hasbit, int32_t) /* upb_mhandlers **************************************************************/ @@ -195,7 +197,7 @@ UPB_FHANDLERS_ACCESSORS(valuehasbit, int32_t) // message in the graph of messages. typedef struct _upb_mhandlers { - upb_atomic_t refcount; + uint32_t refcount; upb_startmsg_handler *startmsg; upb_endmsg_handler *endmsg; upb_inttable fieldtab; // Maps field number -> upb_fhandlers. @@ -203,6 +205,7 @@ typedef struct _upb_mhandlers { #ifdef UPB_USE_JIT_X64 // Used inside the JIT to track labels (jmp targets) in the generated code. uint32_t jit_startmsg_pclabel; // Starting a parse of this (sub-)message. + uint32_t jit_afterstartmsg_pclabel; // After calling the startmsg handler. uint32_t jit_endofbuf_pclabel; // ptr hitend, but delim_end or jit_end? uint32_t jit_endofmsg_pclabel; // Done parsing this (sub-)message. uint32_t jit_dyndispatch_pclabel; // Dispatch by table lookup. @@ -240,11 +243,14 @@ upb_fhandlers *upb_mhandlers_newfhandlers_subm(upb_mhandlers *m, uint32_t n, UPB_MHANDLERS_ACCESSORS(startmsg, upb_startmsg_handler*); UPB_MHANDLERS_ACCESSORS(endmsg, upb_endmsg_handler*); +// Returns fhandlers for the given field, or NULL if none. +upb_fhandlers *upb_mhandlers_lookup(const upb_mhandlers *m, uint32_t n); + /* upb_handlers ***************************************************************/ struct _upb_handlers { - upb_atomic_t refcount; + uint32_t refcount; upb_mhandlers **msgs; // Array of msgdefs, [0]=toplevel. int msgs_len, msgs_size; bool should_jit; @@ -267,8 +273,10 @@ upb_mhandlers *upb_handlers_getmhandlers(upb_handlers *h, int index); // with "fieldreg_cb" // // See upb_handlers_reghandlerset() below for an example. -typedef void upb_onmsgreg(void *closure, upb_mhandlers *mh, const upb_msgdef *m); -typedef void upb_onfieldreg(void *closure, upb_fhandlers *mh, const upb_fielddef *m); +typedef void upb_onmsgreg( + void *closure, upb_mhandlers *mh, const upb_msgdef *m); +typedef void upb_onfieldreg( + void *closure, upb_fhandlers *fh, const upb_fielddef *f); upb_mhandlers *upb_handlers_regmsgdef(upb_handlers *h, const upb_msgdef *m, upb_onmsgreg *msgreg_cb, upb_onfieldreg *fieldreg_cb, @@ -305,8 +313,8 @@ INLINE void upb_onfreg_hset(void *c, upb_fhandlers *fh, const upb_fielddef *f) { upb_value_setfielddef(&val, f); upb_fhandlers_setfval(fh, val); } -INLINE upb_mhandlers *upb_handlers_reghandlerset(upb_handlers *h, const upb_msgdef *m, - upb_handlerset *hs) { +INLINE upb_mhandlers *upb_handlers_reghandlerset( + upb_handlers *h, const upb_msgdef *m, upb_handlerset *hs) { return upb_handlers_regmsgdef(h, m, &upb_onmreg_hset, &upb_onfreg_hset, hs); } @@ -373,7 +381,7 @@ INLINE void upb_dispatch_value(upb_dispatcher *d, upb_fhandlers *f, upb_value val) { upb_flow_t flow = UPB_CONTINUE; if (f->value) flow = f->value(d->top->closure, f->fval, val); - _upb_dispatcher_sethas(d->top->closure, f->valuehasbit); + _upb_dispatcher_sethas(d->top->closure, f->hasbit); if (flow != UPB_CONTINUE) _upb_dispatcher_abortjmp(d); } void upb_dispatch_startmsg(upb_dispatcher *d); @@ -381,7 +389,8 @@ void upb_dispatch_endmsg(upb_dispatcher *d, upb_status *status); upb_dispatcher_frame *upb_dispatch_startsubmsg(upb_dispatcher *d, upb_fhandlers *f); upb_dispatcher_frame *upb_dispatch_endsubmsg(upb_dispatcher *d); -upb_dispatcher_frame *upb_dispatch_startseq(upb_dispatcher *d, upb_fhandlers *f); +upb_dispatcher_frame *upb_dispatch_startseq(upb_dispatcher *d, + upb_fhandlers *f); upb_dispatcher_frame *upb_dispatch_endseq(upb_dispatcher *d); #ifdef __cplusplus diff --git a/upb/msg.c b/upb/msg.c index 77521e5f0a..c671b7b6c2 100644 --- a/upb/msg.c +++ b/upb/msg.c @@ -4,101 +4,12 @@ * Copyright (c) 2010 Google Inc. See LICENSE for details. * Author: Josh Haberman * - * Data structure for storing a message of protobuf data. */ #include "upb/upb.h" #include "upb/msg.h" -void upb_msg_clear(void *msg, const upb_msgdef *md) { - assert(msg != NULL); - memset(msg, 0, md->hasbit_bytes); - // TODO: set primitive fields to defaults? -} - -void *upb_stdarray_append(upb_stdarray *a, size_t type_size) { - assert(a != NULL); - assert(a->len <= a->size); - if (a->len == a->size) { - size_t old_size = a->size; - a->size = old_size == 0 ? 8 : (old_size * 2); - a->ptr = realloc(a->ptr, a->size * type_size); - memset(&a->ptr[old_size * type_size], 0, (a->size - old_size) * type_size); - } - return &a->ptr[a->len++ * type_size]; -} - -#if 0 -static upb_flow_t upb_msg_dispatch(upb_msg *msg, upb_msgdef *md, - upb_dispatcher *d); - -static upb_flow_t upb_msg_pushval(upb_value val, upb_fielddef *f, - upb_dispatcher *d, upb_fhandlers *hf) { - if (upb_issubmsg(f)) { - upb_msg *msg = upb_value_getmsg(val); - upb_dispatch_startsubmsg(d, hf); - upb_msg_dispatch(msg, upb_downcast_msgdef(f->def), d); - upb_dispatch_endsubmsg(d); - } else { - upb_dispatch_value(d, hf, val); - } - return UPB_CONTINUE; -} - -static upb_flow_t upb_msg_dispatch(upb_msg *msg, upb_msgdef *md, - upb_dispatcher *d) { - upb_msg_iter i; - for(i = upb_msg_begin(md); !upb_msg_done(i); i = upb_msg_next(md, i)) { - upb_fielddef *f = upb_msg_iter_field(i); - if (!upb_msg_has(msg, f)) continue; - upb_fhandlers *hf = upb_dispatcher_lookup(d, f->number); - if (!hf) continue; - upb_value val = upb_msg_get(msg, f); - if (upb_isarray(f)) { - upb_array *arr = upb_value_getarr(val); - for (uint32_t j = 0; j < upb_array_len(arr); ++j) { - upb_msg_pushval(upb_array_get(arr, f, j), f, d, hf); - } - } else { - upb_msg_pushval(val, f, d, hf); - } - } - return UPB_CONTINUE; -} - -void upb_msg_runhandlers(upb_msg *msg, upb_msgdef *md, upb_handlers *h, - void *closure, upb_status *status) { - upb_dispatcher d; - upb_dispatcher_init(&d, h, NULL, NULL, NULL); - upb_dispatcher_reset(&d, closure); - - upb_dispatch_startmsg(&d); - upb_msg_dispatch(msg, md, &d); - upb_dispatch_endmsg(&d, status); - - upb_dispatcher_uninit(&d); -} -#endif - -/* Standard writers. **********************************************************/ - -void upb_stdmsg_sethas(void *_m, upb_value fval) { - assert(_m != NULL); - char *m = _m; - const upb_fielddef *f = upb_value_getfielddef(fval); - if (f->hasbit >= 0) - m[(uint32_t)f->hasbit / 8] |= (1 << ((uint32_t)f->hasbit % 8)); -} - -bool upb_stdmsg_has(const void *_m, upb_value fval) { - assert(_m != NULL); - const char *m = _m; - const upb_fielddef *f = upb_value_getfielddef(fval); - return f->hasbit < 0 || - (m[(uint32_t)f->hasbit / 8] & (1 << ((uint32_t)f->hasbit % 8))); -} - -#define UPB_ACCESSORS(type, ctype) \ +#define UPB_ACCESSOR(type, ctype) \ upb_flow_t upb_stdmsg_set ## type (void *_m, upb_value fval, \ upb_value val) { \ assert(_m != NULL); \ @@ -108,230 +19,17 @@ bool upb_stdmsg_has(const void *_m, upb_value fval) { *(ctype*)&m[f->offset] = upb_value_get ## type(val); \ return UPB_CONTINUE; \ } \ - \ - upb_flow_t upb_stdmsg_set ## type ## _r(void *a, upb_value _fval, \ - upb_value val) { \ - (void)_fval; \ - assert(a != NULL); \ - ctype *p = upb_stdarray_append((upb_stdarray*)a, sizeof(ctype)); \ - *p = upb_value_get ## type(val); \ - return UPB_CONTINUE; \ - } \ - \ - upb_value upb_stdmsg_get ## type(const void *_m, upb_value fval) { \ - assert(_m != NULL); \ - const uint8_t *m = _m; \ - const upb_fielddef *f = upb_value_getfielddef(fval); \ - upb_value ret; \ - upb_value_set ## type(&ret, *(ctype*)&m[f->offset]); \ - return ret; \ - } \ - upb_value upb_stdmsg_seqget ## type(const void *i) { \ - assert(i != NULL); \ - upb_value val; \ - upb_value_set ## type(&val, *(ctype*)i); \ - return val; \ - } -UPB_ACCESSORS(double, double) -UPB_ACCESSORS(float, float) -UPB_ACCESSORS(int32, int32_t) -UPB_ACCESSORS(int64, int64_t) -UPB_ACCESSORS(uint32, uint32_t) -UPB_ACCESSORS(uint64, uint64_t) -UPB_ACCESSORS(bool, bool) -UPB_ACCESSORS(ptr, void*) +UPB_ACCESSOR(double, double) +UPB_ACCESSOR(float, float) +UPB_ACCESSOR(int32, int32_t) +UPB_ACCESSOR(int64, int64_t) +UPB_ACCESSOR(uint32, uint32_t) +UPB_ACCESSOR(uint64, uint64_t) +UPB_ACCESSOR(bool, bool) +UPB_ACCESSOR(ptr, void*) #undef UPB_ACCESSORS -static void _upb_stdmsg_setstr(void *_dst, upb_value src) { - upb_stdarray **dstp = _dst; - upb_stdarray *dst = *dstp; - if (!dst) { - dst = malloc(sizeof(*dst)); - dst->size = 0; - dst->ptr = NULL; - *dstp = dst; - } - dst->len = 0; - const upb_byteregion *bytes = upb_value_getbyteregion(src); - uint32_t len = upb_byteregion_len(bytes); - if (len > dst->size) { - dst->size = len; - dst->ptr = realloc(dst->ptr, dst->size); - } - dst->len = len; - upb_byteregion_copyall(bytes, dst->ptr); -} - -upb_flow_t upb_stdmsg_setstr(void *_m, upb_value fval, upb_value val) { - assert(_m != NULL); - char *m = _m; - const upb_fielddef *f = upb_value_getfielddef(fval); - // Hasbit automatically set by the handlers. - _upb_stdmsg_setstr(&m[f->offset], val); - return UPB_CONTINUE; -} - -upb_flow_t upb_stdmsg_setstr_r(void *a, upb_value fval, upb_value val) { - assert(a != NULL); - (void)fval; - _upb_stdmsg_setstr(upb_stdarray_append((upb_stdarray*)a, sizeof(void*)), val); - return UPB_CONTINUE; -} - -upb_value upb_stdmsg_getstr(const void *m, upb_value fval) { - assert(m != NULL); - return upb_stdmsg_getptr(m, fval); -} - -upb_value upb_stdmsg_seqgetstr(const void *i) { - assert(i != NULL); - return upb_stdmsg_seqgetptr(i); -} - -void *upb_stdmsg_new(const upb_msgdef *md) { - void *m = malloc(md->size); - memset(m, 0, md->size); - upb_msg_clear(m, md); - return m; -} - -void upb_stdseq_free(void *s, upb_fielddef *f) { - upb_stdarray *a = s; - if (upb_issubmsg(f) || upb_isstring(f)) { - void **p = (void**)a->ptr; - for (uint32_t i = 0; i < a->size; i++) { - if (upb_issubmsg(f)) { - upb_stdmsg_free(p[i], upb_downcast_msgdef(f->def)); - } else { - upb_stdarray *str = p[i]; - free(str->ptr); - free(str); - } - } - } - free(a->ptr); - free(a); -} - -void upb_stdmsg_free(void *m, const upb_msgdef *md) { - if (m == NULL) return; - upb_msg_iter i; - for(i = upb_msg_begin(md); !upb_msg_done(i); i = upb_msg_next(md, i)) { - upb_fielddef *f = upb_msg_iter_field(i); - if (!upb_isseq(f) && !upb_issubmsg(f) && !upb_isstring(f)) continue; - void *subp = upb_value_getptr(upb_stdmsg_getptr(m, f->fval)); - if (subp == NULL) continue; - if (upb_isseq(f)) { - upb_stdseq_free(subp, f); - } else if (upb_issubmsg(f)) { - upb_stdmsg_free(subp, upb_downcast_msgdef(f->def)); - } else { - upb_stdarray *str = subp; - free(str->ptr); - free(str); - } - } - free(m); -} - -upb_sflow_t upb_stdmsg_startseq(void *_m, upb_value fval) { - char *m = _m; - const upb_fielddef *f = upb_value_getfielddef(fval); - upb_stdarray **arr = (void*)&m[f->offset]; - if (!upb_stdmsg_has(_m, fval)) { - if (!*arr) { - *arr = malloc(sizeof(**arr)); - (*arr)->size = 0; - (*arr)->ptr = NULL; - } - (*arr)->len = 0; - upb_stdmsg_sethas(m, fval); - } - return UPB_CONTINUE_WITH(*arr); -} - -void upb_stdmsg_recycle(void **m, const upb_msgdef *md) { - if (*m) - upb_msg_clear(*m, md); - else - *m = upb_stdmsg_new(md); -} - -upb_sflow_t upb_stdmsg_startsubmsg(void *_m, upb_value fval) { - assert(_m != NULL); - char *m = _m; - const upb_fielddef *f = upb_value_getfielddef(fval); - void **subm = (void*)&m[f->offset]; - if (!upb_stdmsg_has(m, fval)) { - upb_stdmsg_recycle(subm, upb_downcast_msgdef(f->def)); - upb_stdmsg_sethas(m, fval); - } - return UPB_CONTINUE_WITH(*subm); -} - -upb_sflow_t upb_stdmsg_startsubmsg_r(void *a, upb_value fval) { - assert(a != NULL); - const upb_fielddef *f = upb_value_getfielddef(fval); - void **subm = upb_stdarray_append((upb_stdarray*)a, sizeof(void*)); - upb_stdmsg_recycle(subm, upb_downcast_msgdef(f->def)); - return UPB_CONTINUE_WITH(*subm); -} - -const void *upb_stdmsg_seqbegin(const void *_a) { - const upb_stdarray *a = _a; - return a->len > 0 ? a->ptr : NULL; -} - -#define NEXTFUNC(size) \ - const void *upb_stdmsg_ ## size ## byte_seqnext(const void *_a, const void *iter) {\ - const upb_stdarray *a = _a; \ - const void *next = (char*)iter + size; \ - return (char*)next < (char*)a->ptr + (a->len * size) ? next : NULL; \ - } - -NEXTFUNC(8) -NEXTFUNC(4) -NEXTFUNC(1) - -#define STDMSG(type, size) { static upb_accessor_vtbl vtbl = { \ - &upb_stdmsg_startsubmsg, \ - &upb_stdmsg_set ## type, \ - &upb_stdmsg_startseq, \ - &upb_stdmsg_startsubmsg_r, \ - &upb_stdmsg_set ## type ## _r, \ - &upb_stdmsg_has, \ - &upb_stdmsg_getptr, \ - &upb_stdmsg_get ## type, \ - &upb_stdmsg_seqbegin, \ - &upb_stdmsg_ ## size ## byte_seqnext, \ - &upb_stdmsg_seqget ## type}; \ - return &vtbl; } - -upb_accessor_vtbl *upb_stdmsg_accessor(upb_fielddef *f) { - switch (f->type) { - case UPB_TYPE(DOUBLE): STDMSG(double, 8) - case UPB_TYPE(FLOAT): STDMSG(float, 4) - case UPB_TYPE(UINT64): - case UPB_TYPE(FIXED64): STDMSG(uint64, 8) - case UPB_TYPE(INT64): - case UPB_TYPE(SFIXED64): - case UPB_TYPE(SINT64): STDMSG(int64, 8) - case UPB_TYPE(INT32): - case UPB_TYPE(SINT32): - case UPB_TYPE(ENUM): - case UPB_TYPE(SFIXED32): STDMSG(int32, 4) - case UPB_TYPE(UINT32): - case UPB_TYPE(FIXED32): STDMSG(uint32, 4) - case UPB_TYPE(BOOL): STDMSG(bool, 1) - case UPB_TYPE(STRING): - case UPB_TYPE(BYTES): - case UPB_TYPE(GROUP): - case UPB_TYPE(MESSAGE): STDMSG(str, 8) // TODO: 32-bit - } - return NULL; -} - static void upb_accessors_onfreg(void *c, upb_fhandlers *fh, const upb_fielddef *f) { (void)c; @@ -344,7 +42,7 @@ static void upb_accessors_onfreg(void *c, upb_fhandlers *fh, } else { upb_fhandlers_setvalue(fh, f->accessor->set); upb_fhandlers_setstartsubmsg(fh, f->accessor->startsubmsg); - upb_fhandlers_setvaluehasbit(fh, f->hasbit); + upb_fhandlers_sethasbit(fh, f->hasbit); } } } diff --git a/upb/msg.h b/upb/msg.h index 67903d0425..7aaaf2a40c 100644 --- a/upb/msg.h +++ b/upb/msg.h @@ -68,34 +68,18 @@ typedef struct _upb_accessor_vtbl { upb_seqget_handler *seqget; } upb_accessor_vtbl; -// Registers handlers for writing into a message of the given type. +// Registers handlers for writing into a message of the given type using +// whatever accessors it has defined. upb_mhandlers *upb_accessors_reghandlers(upb_handlers *h, const upb_msgdef *m); -// Returns an stdmsg accessor for the given fielddef. -upb_accessor_vtbl *upb_stdmsg_accessor(upb_fielddef *f); - - -/* upb_msg/upb_seq ************************************************************/ - -// upb_msg and upb_seq allow for generic access to a message through its -// accessor vtable. Note that these do *not* allow you to create, destroy, or -// take references on the objects -- these operations are specifically outside -// the scope of what the accessors define. - -// Clears all hasbits. -// TODO: Add a separate function for setting primitive values back to their -// defaults (but not strings, submessages, or arrays). -void upb_msg_clear(void *msg, const upb_msgdef *md); - INLINE void upb_msg_clearbit(void *msg, const upb_fielddef *f) { ((char*)msg)[f->hasbit / 8] &= ~(1 << (f->hasbit % 8)); } -// Could add a method that recursively clears submessages, strings, and -// arrays if desired. This could be a win if you wanted to merge without -// needing hasbits, because during parsing you would never clear submessages -// or arrays. Also this could be desired to provide proto2 operations on -// generated messages. +/* upb_msg/upb_seq ************************************************************/ + +// These accessor functions are simply convenience methods for reading or +// writing to a message through its accessors. INLINE bool upb_msg_has(const void *m, const upb_fielddef *f) { return f->accessor && f->accessor->has(m, f->fval); @@ -148,65 +132,11 @@ INLINE bool upb_msg_get_named(const void *m, const upb_msgdef *md, return true; } - -/* upb_msgvisitor *************************************************************/ - -// A upb_msgvisitor reads data from an in-memory structure using its accessors, -// pushing the results to a given set of upb_handlers. -// TODO: not yet implemented. - -typedef struct { - upb_fhandlers *fh; - upb_fielddef *f; - uint16_t msgindex; // Only when upb_issubmsg(f). -} upb_msgvisitor_field; - -typedef struct { - upb_msgvisitor_field *fields; - int fields_len; -} upb_msgvisitor_msg; - -typedef struct { - uint16_t msgindex; - uint16_t fieldindex; - uint32_t arrayindex; // UINT32_MAX if not an array frame. -} upb_msgvisitor_frame; - -typedef struct { - upb_msgvisitor_msg *messages; - int messages_len; - upb_dispatcher dispatcher; -} upb_msgvisitor; - -// Initializes a msgvisitor that will push data from messages of the given -// msgdef to the given set of handlers. -void upb_msgvisitor_init(upb_msgvisitor *v, upb_msgdef *md, upb_handlers *h); -void upb_msgvisitor_uninit(upb_msgvisitor *v); - -void upb_msgvisitor_reset(upb_msgvisitor *v, void *m); -void upb_msgvisitor_visit(upb_msgvisitor *v, upb_status *status); - - -/* Standard writers. **********************************************************/ - -// Allocates a new stdmsg. -void *upb_stdmsg_new(const upb_msgdef *md); - -// Recursively frees any strings or submessages that the message refers to. -void upb_stdmsg_free(void *m, const upb_msgdef *md); - -void upb_stdmsg_sethas(void *_m, upb_value fval); - -// "hasbit" must be <= UPB_MAX_FIELDS. If it is <0, this field has no hasbit. -upb_value upb_stdmsg_packfval(int16_t hasbit, uint16_t value_offset); -upb_value upb_stdmsg_packfval_subm(int16_t hasbit, uint16_t value_offset, - uint16_t subm_size, uint8_t subm_setbytes); - // Value writers for every in-memory type: write the data to a known offset -// from the closure "c" and set the hasbit (if any). -// TODO: can we get away with having only one for int64, uint64, double, etc? -// The main thing in the way atm is that the upb_value is strongly typed. -// in debug mode. +// from the closure "c." +// +// TODO(haberman): instead of having standard writer functions, should we have +// a bool in the accessor that says "write raw value to the field's offset"? upb_flow_t upb_stdmsg_setint64(void *c, upb_value fval, upb_value val); upb_flow_t upb_stdmsg_setint32(void *c, upb_value fval, upb_value val); upb_flow_t upb_stdmsg_setuint64(void *c, upb_value fval, upb_value val); @@ -216,94 +146,6 @@ upb_flow_t upb_stdmsg_setfloat(void *c, upb_value fval, upb_value val); upb_flow_t upb_stdmsg_setbool(void *c, upb_value fval, upb_value val); upb_flow_t upb_stdmsg_setptr(void *c, upb_value fval, upb_value val); -// Value writers for repeated fields: the closure points to a standard array -// struct, appends the value to the end of the array, resizing with realloc() -// if necessary. -typedef struct { - char *ptr; - uint32_t len; // Number of elements present. - uint32_t size; // Number of elements allocated. -} upb_stdarray; - -void *upb_stdarray_append(upb_stdarray *a, size_t type_size); - -upb_flow_t upb_stdmsg_setint64_r(void *c, upb_value fval, upb_value val); -upb_flow_t upb_stdmsg_setint32_r(void *c, upb_value fval, upb_value val); -upb_flow_t upb_stdmsg_setuint64_r(void *c, upb_value fval, upb_value val); -upb_flow_t upb_stdmsg_setuint32_r(void *c, upb_value fval, upb_value val); -upb_flow_t upb_stdmsg_setdouble_r(void *c, upb_value fval, upb_value val); -upb_flow_t upb_stdmsg_setfloat_r(void *c, upb_value fval, upb_value val); -upb_flow_t upb_stdmsg_setbool_r(void *c, upb_value fval, upb_value val); -upb_flow_t upb_stdmsg_setptr_r(void *c, upb_value fval, upb_value val); - -// Writers for C strings (NULL-terminated): we can find a char* at a known -// offset from the closure "c". Calls realloc() on the pointer to allocate -// the memory (TODO: investigate whether checking malloc_usable_size() would -// be cheaper than realloc()). Also sets the hasbit, if any. -// -// Since the string is NULL terminated and does not store an explicit length, -// these are not suitable for binary data that can contain NULLs. -upb_flow_t upb_stdmsg_setcstr(void *c, upb_value fval, upb_value val); -upb_flow_t upb_stdmsg_setcstr_r(void *c, upb_value fval, upb_value val); - -// Writers for length-delimited strings: we explicitly store the length, so -// the data can contain NULLs. Stores the data using upb_stdarray -// which is located at a known offset from the closure "c" (note that it -// is included inline rather than pointed to). Also sets the hasbit, if any. -upb_flow_t upb_stdmsg_setstr(void *c, upb_value fval, upb_value val); -upb_flow_t upb_stdmsg_setstr_r(void *c, upb_value fval, upb_value val); - -// Writers for startseq and startmsg which allocate (or reuse, if possible) -// a sub data structure (upb_stdarray or a submessage, respectively), -// setting the hasbit. If the hasbit is already set, the existing data -// structure is used verbatim. If the hasbit is not already set, the pointer -// is checked for NULL. If it is NULL, a new substructure is allocated, -// cleared, and used. If it is not NULL, the existing substructure is -// cleared and reused. -// -// If there is no hasbit, we always behave as if the hasbit was not set, -// so any existing data for this array or submessage is cleared. In most -// cases this will be fine since each array or non-repeated submessage should -// occur at most once in the stream. But if the client is using "concatenation -// as merging", it will want to make sure hasbits are allocated so merges can -// happen appropriately. -// -// If there was a demand for the behavior that absence of a hasbit acts as if -// the bit was always set, we could provide that also. But Clear() would need -// to act recursively, which is less efficient since it requires an extra pass -// over the tree. -upb_sflow_t upb_stdmsg_startseq(void *c, upb_value fval); -upb_sflow_t upb_stdmsg_startsubmsg(void *c, upb_value fval); -upb_sflow_t upb_stdmsg_startsubmsg_r(void *c, upb_value fval); - - -/* Standard readers. **********************************************************/ - -bool upb_stdmsg_has(const void *c, upb_value fval); -const void *upb_stdmsg_seqbegin(const void *c); - -upb_value upb_stdmsg_getint64(const void *c, upb_value fval); -upb_value upb_stdmsg_getint32(const void *c, upb_value fval); -upb_value upb_stdmsg_getuint64(const void *c, upb_value fval); -upb_value upb_stdmsg_getuint32(const void *c, upb_value fval); -upb_value upb_stdmsg_getdouble(const void *c, upb_value fval); -upb_value upb_stdmsg_getfloat(const void *c, upb_value fval); -upb_value upb_stdmsg_getbool(const void *c, upb_value fval); -upb_value upb_stdmsg_getptr(const void *c, upb_value fval); - -const void *upb_stdmsg_8byte_seqnext(const void *c, const void *iter); -const void *upb_stdmsg_4byte_seqnext(const void *c, const void *iter); -const void *upb_stdmsg_1byte_seqnext(const void *c, const void *iter); - -upb_value upb_stdmsg_seqgetint64(const void *c); -upb_value upb_stdmsg_seqgetint32(const void *c); -upb_value upb_stdmsg_seqgetuint64(const void *c); -upb_value upb_stdmsg_seqgetuint32(const void *c); -upb_value upb_stdmsg_seqgetdouble(const void *c); -upb_value upb_stdmsg_seqgetfloat(const void *c); -upb_value upb_stdmsg_seqgetbool(const void *c); -upb_value upb_stdmsg_seqgetptr(const void *c); - #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/upb/pb/decoder.c b/upb/pb/decoder.c index 06125ddf3d..b0e2392ab5 100644 --- a/upb/pb/decoder.c +++ b/upb/pb/decoder.c @@ -13,6 +13,33 @@ #include "upb/pb/decoder.h" #include "upb/pb/varint.h" +typedef struct { + uint8_t native_wire_type; + bool is_numeric; +} upb_decoder_typeinfo; + +static const upb_decoder_typeinfo upb_decoder_types[] = { + {UPB_WIRE_TYPE_END_GROUP, false}, // ENDGROUP + {UPB_WIRE_TYPE_64BIT, true}, // DOUBLE + {UPB_WIRE_TYPE_32BIT, true}, // FLOAT + {UPB_WIRE_TYPE_VARINT, true}, // INT64 + {UPB_WIRE_TYPE_VARINT, true}, // UINT64 + {UPB_WIRE_TYPE_VARINT, true}, // INT32 + {UPB_WIRE_TYPE_64BIT, true}, // FIXED64 + {UPB_WIRE_TYPE_32BIT, true}, // FIXED32 + {UPB_WIRE_TYPE_VARINT, true}, // BOOL + {UPB_WIRE_TYPE_DELIMITED, false}, // STRING + {UPB_WIRE_TYPE_START_GROUP, false}, // GROUP + {UPB_WIRE_TYPE_DELIMITED, false}, // MESSAGE + {UPB_WIRE_TYPE_DELIMITED, false}, // BYTES + {UPB_WIRE_TYPE_VARINT, true}, // UINT32 + {UPB_WIRE_TYPE_VARINT, true}, // ENUM + {UPB_WIRE_TYPE_32BIT, true}, // SFIXED32 + {UPB_WIRE_TYPE_64BIT, true}, // SFIXED64 + {UPB_WIRE_TYPE_VARINT, true}, // SINT32 + {UPB_WIRE_TYPE_VARINT, true}, // SINT64 +}; + /* upb_decoderplan ************************************************************/ #ifdef UPB_USE_JIT_X64 @@ -32,37 +59,6 @@ #include "upb/pb/decoder_x64.h" #endif -typedef struct { - upb_fhandlers base; - void (*decode)(struct _upb_decoder *d, struct _upb_fieldent *f); -#ifdef UPB_USE_JIT_X64 - uint32_t jit_pclabel; - uint32_t jit_pclabel_notypecheck; -#endif -} upb_dplanfield; - -typedef struct { - upb_mhandlers base; -#ifdef UPB_USE_JIT_X64 - uint32_t jit_startmsg_pclabel; - uint32_t jit_endofbuf_pclabel; - uint32_t jit_endofmsg_pclabel; - uint32_t jit_dyndispatch_pclabel; - uint32_t jit_unknownfield_pclabel; - int32_t jit_parent_field_done_pclabel; - uint32_t max_field_number; - // Currently keyed on field number. Could also try keying it - // on encoded or decoded tag, or on encoded field number. - void **tablearray; -#endif -} upb_dplanmsg; - -static void *upb_decoderplan_fptrs[]; - -void upb_decoderplan_initfhandlers(upb_fhandlers *f) { - f->decode = upb_decoderplan_fptrs[f->type]; -} - upb_decoderplan *upb_decoderplan_new(upb_handlers *h, bool allowjit) { upb_decoderplan *p = malloc(sizeof(*p)); p->handlers = h; @@ -72,17 +68,6 @@ upb_decoderplan *upb_decoderplan_new(upb_handlers *h, bool allowjit) { p->jit_code = NULL; if (allowjit) upb_decoderplan_makejit(p); #endif - // Set function pointers for each field's decode function. - for (int i = 0; i < h->msgs_len; i++) { - upb_mhandlers *m = h->msgs[i]; - for(upb_inttable_iter i = upb_inttable_begin(&m->fieldtab); - !upb_inttable_done(i); - i = upb_inttable_next(&m->fieldtab, i)) { - upb_itofhandlers_ent *e = upb_inttable_iter_value(i); - upb_fhandlers *f = e->f; - upb_decoderplan_initfhandlers(f); - } - } return p; } @@ -396,14 +381,6 @@ static void upb_decode_MESSAGE(upb_decoder *d, upb_fhandlers *f) { upb_push_msg(d, f, upb_decoder_offset(d) + len); } -#define F(type) &upb_decode_ ## type -static void *upb_decoderplan_fptrs[] = { - &upb_endgroup, F(DOUBLE), F(FLOAT), F(INT64), - F(UINT64), F(INT32), F(FIXED64), F(FIXED32), F(BOOL), F(STRING), - F(GROUP), F(MESSAGE), F(STRING), F(UINT32), F(ENUM), F(SFIXED32), - F(SFIXED64), F(SINT32), F(SINT64)}; -#undef F - /* The main decoding loop *****************************************************/ @@ -431,16 +408,18 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) { if (!upb_trydecode_varint32(d, &tag)) return NULL; uint8_t wire_type = tag & 0x7; uint32_t fieldnum = tag >> 3; - upb_itofhandlers_ent *e = upb_inttable_fastlookup( - d->dispatch_table, fieldnum, sizeof(upb_itofhandlers_ent)); - upb_fhandlers *f = e ? e->f : NULL; + const upb_value *val = upb_inttable_lookup32(d->dispatch_table, fieldnum); + upb_fhandlers *f = val ? upb_value_getptr(*val) : NULL; + bool is_packed = false; if (f) { // Wire type check. - if (wire_type == upb_types[f->type].native_wire_type || - (wire_type == UPB_WIRE_TYPE_DELIMITED && - upb_types[f->type].is_numeric)) { + if (wire_type == upb_decoder_types[f->type].native_wire_type) { // Wire type is ok. + } else if ((wire_type == UPB_WIRE_TYPE_DELIMITED && + upb_decoder_types[f->type].is_numeric)) { + // Wire type is ok (and packed). + is_packed = true; } else { f = NULL; } @@ -453,19 +432,18 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) { if (fr->is_sequence && fr->f != f) { upb_dispatch_endseq(&d->dispatcher); upb_decoder_setmsgend(d); + fr = d->dispatcher.top; } - if (f && f->repeated && (!fr->is_sequence || fr->f != f)) { - uint64_t old_end = d->dispatcher.top->end_ofs; - upb_dispatcher_frame *fr = upb_dispatch_startseq(&d->dispatcher, f); - if (wire_type != UPB_WIRE_TYPE_DELIMITED || - upb_issubmsgtype(f->type) || upb_isstringtype(f->type)) { - // Non-packed field -- this tag pertains to only a single message. - fr->end_ofs = old_end; - } else { + if (f && f->repeated && !fr->is_sequence) { + upb_dispatcher_frame *fr2 = upb_dispatch_startseq(&d->dispatcher, f); + if (is_packed) { // Packed primitive field. uint32_t len = upb_decode_varint32(d); - fr->end_ofs = upb_decoder_offset(d) + len; - fr->is_packed = true; + fr2->end_ofs = upb_decoder_offset(d) + len; + fr2->is_packed = true; + } else { + // Non-packed field -- this tag pertains to only a single message. + fr2->end_ofs = fr->end_ofs; } upb_decoder_setmsgend(d); } @@ -513,13 +491,37 @@ upb_success_t upb_decoder_decode(upb_decoder *d) { if (!d->top_is_packed) f = upb_decode_tag(d); if (!f) { // Sucessful EOF. We may need to dispatch a top-level implicit frame. - if (d->dispatcher.top == d->dispatcher.stack + 1) { - assert(d->dispatcher.top->is_sequence); + if (d->dispatcher.top->is_sequence) { + assert(d->dispatcher.top == d->dispatcher.stack + 1); upb_dispatch_endseq(&d->dispatcher); } + assert(d->dispatcher.top == d->dispatcher.stack); + upb_dispatch_endmsg(&d->dispatcher, &d->status); return UPB_OK; } - f->decode(d, f); + + switch (f->type) { + case UPB_TYPE_ENDGROUP: upb_endgroup(d, f); break; + case UPB_TYPE(DOUBLE): upb_decode_DOUBLE(d, f); break; + case UPB_TYPE(FLOAT): upb_decode_FLOAT(d, f); break; + case UPB_TYPE(INT64): upb_decode_INT64(d, f); break; + case UPB_TYPE(UINT64): upb_decode_UINT64(d, f); break; + case UPB_TYPE(INT32): upb_decode_INT32(d, f); break; + case UPB_TYPE(FIXED64): upb_decode_FIXED64(d, f); break; + case UPB_TYPE(FIXED32): upb_decode_FIXED32(d, f); break; + case UPB_TYPE(BOOL): upb_decode_BOOL(d, f); break; + case UPB_TYPE(STRING): + case UPB_TYPE(BYTES): upb_decode_STRING(d, f); break; + case UPB_TYPE(GROUP): upb_decode_GROUP(d, f); break; + case UPB_TYPE(MESSAGE): upb_decode_MESSAGE(d, f); break; + case UPB_TYPE(UINT32): upb_decode_UINT32(d, f); break; + case UPB_TYPE(ENUM): upb_decode_ENUM(d, f); break; + case UPB_TYPE(SFIXED32): upb_decode_SFIXED32(d, f); break; + case UPB_TYPE(SFIXED64): upb_decode_SFIXED64(d, f); break; + case UPB_TYPE(SINT32): upb_decode_SINT32(d, f); break; + case UPB_TYPE(SINT64): upb_decode_SINT64(d, f); break; + case UPB_TYPE_NONE: assert(false); break; + } upb_decoder_checkpoint(d); } } @@ -542,7 +544,6 @@ void upb_decoder_resetplan(upb_decoder *d, upb_decoderplan *p, int msg_offset) { void upb_decoder_resetinput(upb_decoder *d, upb_byteregion *input, void *closure) { assert(d->plan); - assert(upb_byteregion_discardofs(input) == upb_byteregion_startofs(input)); upb_dispatcher_frame *f = upb_dispatcher_reset(&d->dispatcher, closure, d->plan->handlers->msgs[0]); upb_status_clear(&d->status); diff --git a/upb/pb/decoder_x64.dasc b/upb/pb/decoder_x64.dasc index fa984ef8d2..f58e4033cd 100644 --- a/upb/pb/decoder_x64.dasc +++ b/upb/pb/decoder_x64.dasc @@ -9,8 +9,8 @@ |// parsing the specific message and calling specific handlers. |// |// Since the JIT can call other functions (the JIT'ted code is not a leaf -|// function) we must respect alignment rules. On OS X, this means aligning -|// the stack to 16 bytes. +|// function) we must respect alignment rules. All x86-64 systems require +|// 16-byte stack alignment. #include #include "dynasm/dasm_x86.h" @@ -103,7 +103,7 @@ void upb_reg_jit_gdb(upb_decoderplan *plan) { // Has to be a separate function, otherwise GCC will complain about // expressions like (&foo != NULL) because they will never evaluate // to false. -static void upb_assert_notnull(void *addr) { assert(addr != NULL); } +static void upb_assert_notnull(void *addr) { assert(addr != NULL); (void)addr; } |.arch x64 |.actionlist upb_jit_actionlist @@ -401,45 +401,10 @@ static void upb_decoderplan_jit_decodefield(upb_decoderplan *plan, } } -#if 0 -// These appear not to speed things up, but keeping around for -// further experimentation. -static void upb_decoderplan_jit_doappend(upb_decoderplan *plan, uint8_t size, - upb_fhandlers *f) { - | mov eax, STDARRAY:ARG1_64->len - | cmp eax, STDARRAY:ARG1_64->size - | jne >2 - // If array is full, fall back to actual function. - | loadfval f - | callp f->value - | jmp >3 - |2: - | mov rcx, STDARRAY:ARG1_64->ptr - | mov esi, eax - | add eax, 1 - - switch (size) { - case 8: - | mov [rcx + rsi * 8], ARG3_64 - break; - - case 4: - | mov [rcx + rsi * 4], ARG3_32 - break; - - case 1: - | mov [rcx + rsi * 4], ARG3_8 - break; - } - - | mov STDARRAY:ARG1_64->len, eax - |3: -} -#endif - static void upb_decoderplan_jit_callcb(upb_decoderplan *plan, upb_fhandlers *f) { - // Call callbacks. + // Call callbacks. Specializing the append accessors didn't yield a speed + // increase in benchmarks. if (upb_issubmsgtype(f->type)) { if (f->type == UPB_TYPE(MESSAGE)) { | mov rsi, PTR @@ -457,7 +422,10 @@ static void upb_decoderplan_jit_callcb(upb_decoderplan *plan, | mov ARG1_64, CLOSURE | loadfval f | callp f->startsubmsg + | sethas CLOSURE, f->hasbit | mov CLOSURE, rdx + } else { + | sethas CLOSURE, f->hasbit } | mov qword FRAME->closure, CLOSURE // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK @@ -465,6 +433,7 @@ static void upb_decoderplan_jit_callcb(upb_decoderplan *plan, const upb_mhandlers *sub_m = upb_fhandlers_getsubmsg(f); | call =>sub_m->jit_startmsg_pclabel; + | popframe upb_fhandlers_getmsg(f) // Call endsubmsg handler (if any). if (f->endsubmsg) { @@ -473,7 +442,6 @@ static void upb_decoderplan_jit_callcb(upb_decoderplan *plan, | loadfval f | callp f->endsubmsg } - | popframe upb_fhandlers_getmsg(f) // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK | mov DECODER->ptr, PTR } else { @@ -494,21 +462,6 @@ static void upb_decoderplan_jit_callcb(upb_decoderplan *plan, } else if (f->value == &upb_stdmsg_setbool) { const upb_fielddef *fd = upb_value_getfielddef(f->fval); | mov [ARG1_64 + fd->offset], ARG3_8 -#if 0 - // These appear not to speed things up, but keeping around for - // further experimentation. - } else if (f->value == &upb_stdmsg_setint64_r || - f->value == &upb_stdmsg_setuint64_r || - f->value == &upb_stdmsg_setptr_r || - f->value == &upb_stdmsg_setdouble_r) { - upb_decoderplan_jit_doappend(plan, 8, f); - } else if (f->value == &upb_stdmsg_setint32_r || - f->value == &upb_stdmsg_setuint32_r || - f->value == &upb_stdmsg_setfloat_r) { - upb_decoderplan_jit_doappend(plan, 4, f); - } else if (f->value == &upb_stdmsg_setbool_r) { - upb_decoderplan_jit_doappend(plan, 1, f); -#endif } else if (f->value) { // Load closure and fval into arg registers. ||#ifndef NDEBUG @@ -520,16 +473,26 @@ static void upb_decoderplan_jit_callcb(upb_decoderplan *plan, | loadfval f | callp f->value } - | sethas CLOSURE, f->valuehasbit + | sethas CLOSURE, f->hasbit // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK | mov DECODER->ptr, PTR } } +static uint64_t upb_get_encoded_tag(upb_fhandlers *f) { + uint32_t tag = (f->number << 3) | upb_decoder_types[f->type].native_wire_type; + uint64_t encoded_tag = upb_vencode32(tag); + // No tag should be greater than 5 bytes. + assert(encoded_tag <= 0xffffffffff); + return encoded_tag; +} + // PTR should point to the beginning of the tag. -static void upb_decoderplan_jit_field(upb_decoderplan *plan, uint64_t tag, - uint64_t next_tag, upb_mhandlers *m, +static void upb_decoderplan_jit_field(upb_decoderplan *plan, upb_mhandlers *m, upb_fhandlers *f, upb_fhandlers *next_f) { + uint64_t tag = upb_get_encoded_tag(f); + uint64_t next_tag = next_f ? upb_get_encoded_tag(next_f) : 0; + // PC-label for the dispatch table. // We check the wire type (which must be loaded in edx) because the // table is keyed on field number, not type. @@ -541,10 +504,13 @@ static void upb_decoderplan_jit_field(upb_decoderplan *plan, uint64_t tag, | mov rsi, FRAME->end_ofs | pushframe f, rsi, true if (f->startseq) { - | mov ARG1_64, CLOSURE + | mov ARG1_64, CLOSURE | loadfval f - | callp f->startseq - | mov CLOSURE, rdx + | callp f->startseq + | sethas CLOSURE, f->hasbit + | mov CLOSURE, rdx + } else { + | sethas CLOSURE, f->hasbit } | mov qword FRAME->closure, CLOSURE } @@ -590,6 +556,11 @@ static int upb_compare_uint32(const void *a, const void *b) { } static void upb_decoderplan_jit_msg(upb_decoderplan *plan, upb_mhandlers *m) { + |=>m->jit_afterstartmsg_pclabel: + // There was a call to get here, so we need to align the stack. + | sub rsp, 8 + | jmp >1 + |=>m->jit_startmsg_pclabel: // There was a call to get here, so we need to align the stack. | sub rsp, 8 @@ -602,6 +573,7 @@ static void upb_decoderplan_jit_msg(upb_decoderplan *plan, upb_mhandlers *m) { // TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK } + |1: | setmsgend m | check_eob m | mov ecx, dword [PTR] @@ -616,30 +588,19 @@ static void upb_decoderplan_jit_msg(upb_decoderplan *plan, upb_mhandlers *m) { int num_keys = upb_inttable_count(&m->fieldtab); uint32_t *keys = malloc(num_keys * sizeof(*keys)); int idx = 0; - for(upb_inttable_iter i = upb_inttable_begin(&m->fieldtab); - !upb_inttable_done(i); - i = upb_inttable_next(&m->fieldtab, i)) { - keys[idx++] = upb_inttable_iter_key(i); + upb_inttable_iter i; + upb_inttable_begin(&i, &m->fieldtab); + for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { + keys[idx++] = upb_inttable_iter_key(&i); } qsort(keys, num_keys, sizeof(uint32_t), &upb_compare_uint32); - upb_fhandlers *last_f = NULL; - uint64_t last_encoded_tag = 0; for(int i = 0; i < num_keys; i++) { - uint32_t fieldnum = keys[i]; - upb_itofhandlers_ent *e = upb_inttable_lookup(&m->fieldtab, fieldnum); - upb_fhandlers *f = e->f; - assert(f->number == fieldnum); - uint32_t tag = (f->number << 3) | upb_types[f->type].native_wire_type; - uint64_t encoded_tag = upb_vencode32(tag); - // No tag should be greater than 5 bytes. - assert(encoded_tag <= 0xffffffffff); - if (last_f) upb_decoderplan_jit_field( - plan, last_encoded_tag, encoded_tag, m, last_f, f); - last_encoded_tag = encoded_tag; - last_f = f; + upb_fhandlers *f = upb_mhandlers_lookup(m, keys[i]); + upb_fhandlers *next_f = + (i + 1 < num_keys) ? upb_mhandlers_lookup(m, keys[i + 1]) : NULL; + upb_decoderplan_jit_field(plan, m, f, next_f); } - upb_decoderplan_jit_field(plan, last_encoded_tag, 0, m, last_f, NULL); free(keys); @@ -733,18 +694,19 @@ static void upb_decoderplan_jit_assignfieldlabs(upb_fhandlers *f, static void upb_decoderplan_jit_assignmsglabs(upb_mhandlers *m, uint32_t *pclabel_count) { m->jit_startmsg_pclabel = (*pclabel_count)++; + m->jit_afterstartmsg_pclabel = (*pclabel_count)++; m->jit_endofbuf_pclabel = (*pclabel_count)++; m->jit_endofmsg_pclabel = (*pclabel_count)++; m->jit_dyndispatch_pclabel = (*pclabel_count)++; m->jit_unknownfield_pclabel = (*pclabel_count)++; m->max_field_number = 0; upb_inttable_iter i; - for(i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i); - i = upb_inttable_next(&m->fieldtab, i)) { - uint32_t key = upb_inttable_iter_key(i); + upb_inttable_begin(&i, &m->fieldtab); + for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { + uint32_t key = upb_inttable_iter_key(&i); m->max_field_number = UPB_MAX(m->max_field_number, key); - upb_itofhandlers_ent *e = upb_inttable_iter_value(i); - upb_decoderplan_jit_assignfieldlabs(e->f, pclabel_count); + upb_fhandlers *f = upb_value_getptr(upb_inttable_iter_value(&i)); + upb_decoderplan_jit_assignfieldlabs(f, pclabel_count); } // TODO: support large field numbers by either using a hash table or // generating code for a binary search. For now large field numbers @@ -784,11 +746,12 @@ static void upb_decoderplan_makejit(upb_decoderplan *plan) { // Create dispatch tables. for (int i = 0; i < h->msgs_len; i++) { upb_mhandlers *m = h->msgs[i]; + // We jump to after the startmsg handler since it is called before entering + // the JIT (either by upb_decoder or by a previous call to the JIT). m->jit_func = - plan->jit_code + dasm_getpclabel(plan, m->jit_startmsg_pclabel); + plan->jit_code + dasm_getpclabel(plan, m->jit_afterstartmsg_pclabel); for (uint32_t j = 0; j <= m->max_field_number; j++) { - upb_itofhandlers_ent *e = upb_inttable_lookup(&m->fieldtab, j); - upb_fhandlers *f = e ? e->f : NULL; + upb_fhandlers *f = upb_mhandlers_lookup(m, j); if (f) { m->tablearray[j] = plan->jit_code + dasm_getpclabel(plan, f->jit_pclabel); diff --git a/upb/pb/glue.c b/upb/pb/glue.c index 4949fe3e24..40b901dce1 100644 --- a/upb/pb/glue.c +++ b/upb/pb/glue.c @@ -1,84 +1,17 @@ /* * upb - a minimalist implementation of protocol buffers. * - * Copyright (c) 2010 Google Inc. See LICENSE for details. + * Copyright (c) 2010-2012 Google Inc. See LICENSE for details. * Author: Josh Haberman */ #include "upb/bytestream.h" -#include "upb/descriptor.h" -#include "upb/msg.h" +#include "upb/descriptor/reader.h" #include "upb/pb/decoder.h" #include "upb/pb/glue.h" -#include "upb/pb/textprinter.h" - -bool upb_strtomsg(const char *str, size_t len, void *msg, const upb_msgdef *md, - bool allow_jit, upb_status *status) { - upb_stringsrc strsrc; - upb_stringsrc_init(&strsrc); - upb_stringsrc_reset(&strsrc, str, len); - - upb_decoder d; - upb_handlers *h = upb_handlers_new(); - upb_accessors_reghandlers(h, md); - upb_decoderplan *p = upb_decoderplan_new(h, allow_jit); - upb_decoder_init(&d); - upb_handlers_unref(h); - upb_decoder_resetplan(&d, p, 0); - upb_decoder_resetinput(&d, upb_stringsrc_allbytes(&strsrc), msg); - upb_success_t ret = upb_decoder_decode(&d); - // stringsrc and the handlers registered by upb_accessors_reghandlers() - // should not suspend. - assert((ret == UPB_OK) == upb_ok(upb_decoder_status(&d))); - if (status) upb_status_copy(status, upb_decoder_status(&d)); - - upb_stringsrc_uninit(&strsrc); - upb_decoder_uninit(&d); - upb_decoderplan_unref(p); - return ret == UPB_OK; -} - -void *upb_filetonewmsg(const char *fname, const upb_msgdef *md, upb_status *s) { - void *msg = upb_stdmsg_new(md); - size_t len; - char *data = upb_readfile(fname, &len); - if (!data) goto err; - upb_strtomsg(data, len, msg, md, false, s); - if (!upb_ok(s)) goto err; - return msg; - -err: - upb_stdmsg_free(msg, md); - return NULL; -} - -#if 0 -void upb_msgtotext(upb_string *str, upb_msg *msg, upb_msgdef *md, - bool single_line) { - upb_stringsink strsink; - upb_stringsink_init(&strsink); - upb_stringsink_reset(&strsink, str); - - upb_textprinter *p = upb_textprinter_new(); - upb_handlers *h = upb_handlers_new(); - upb_textprinter_reghandlers(h, md); - upb_textprinter_reset(p, upb_stringsink_bytesink(&strsink), single_line); - - upb_status status = UPB_STATUS_INIT; - upb_msg_runhandlers(msg, md, h, p, &status); - // None of {upb_msg_runhandlers, upb_textprinter, upb_stringsink} should be - // capable of returning an error. - assert(upb_ok(&status)); - upb_status_uninit(&status); - - upb_stringsink_uninit(&strsink); - upb_textprinter_free(p); - upb_handlers_unref(h); -} -#endif upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n, - upb_status *status) { + void *owner, upb_status *status) { upb_stringsrc strsrc; upb_stringsrc_init(&strsrc); upb_stringsrc_reset(&strsrc, str, len); @@ -104,35 +37,20 @@ upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n, upb_descreader_uninit(&r); return NULL; } - upb_def **defs = upb_descreader_getdefs(&r, n); + upb_def **defs = upb_descreader_getdefs(&r, owner, n); upb_def **defscopy = malloc(sizeof(upb_def*) * (*n)); memcpy(defscopy, defs, sizeof(upb_def*) * (*n)); upb_descreader_uninit(&r); - // Set default accessors and layouts on all messages. - for(int i = 0; i < *n; i++) { - upb_def *def = defscopy[i]; - upb_msgdef *md = upb_dyncast_msgdef(def); - if (!md) continue; - // For field in msgdef: - upb_msg_iter i; - for(i = upb_msg_begin(md); !upb_msg_done(i); i = upb_msg_next(md, i)) { - upb_fielddef *f = upb_msg_iter_field(i); - upb_fielddef_setaccessor(f, upb_stdmsg_accessor(f)); - } - upb_msgdef_layout(md); - } - return defscopy; } bool upb_load_descriptor_into_symtab(upb_symtab *s, const char *str, size_t len, upb_status *status) { int n; - upb_def **defs = upb_load_defs_from_descriptor(str, len, &n, status); + upb_def **defs = upb_load_defs_from_descriptor(str, len, &n, &defs, status); if (!defs) return false; - bool success = upb_symtab_add(s, defs, n, status); - for(int i = 0; i < n; i++) upb_def_unref(defs[i]); + bool success = upb_symtab_add(s, defs, n, &defs, status); free(defs); return success; } diff --git a/upb/pb/glue.h b/upb/pb/glue.h index ff8c85e535..6179d8dcac 100644 --- a/upb/pb/glue.h +++ b/upb/pb/glue.h @@ -1,7 +1,7 @@ /* * upb - a minimalist implementation of protocol buffers. * - * Copyright (c) 2011 Google Inc. See LICENSE for details. + * Copyright (c) 2011-2012 Google Inc. See LICENSE for details. * Author: Josh Haberman * * upb's core components like upb_decoder and upb_msg are carefully designed to @@ -34,25 +34,12 @@ extern "C" { #endif -// Decodes the given string, which must be in protobuf binary format, to the -// given upb_msg with msgdef "md", storing the status of the operation in "s". -bool upb_strtomsg(const char *str, size_t len, void *msg, - const upb_msgdef *md, bool allow_jit, upb_status *s); - -// Parses the given file into a new message of the given type. Caller owns -// the returned message (or NULL if an error occurred). -void *upb_filetonewmsg(const char *fname, const upb_msgdef *md, upb_status *s); - -//void upb_msgtotext(struct _upb_string *str, void *msg, -// struct _upb_msgdef *md, bool single_line); - - // Loads all defs from the given protobuf binary descriptor, setting default // accessors and a default layout on all messages. The caller owns the // returned array of defs, which will be of length *n. On error NULL is // returned and status is set (if non-NULL). upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n, - upb_status *status); + void *owner, upb_status *status); // Like the previous but also adds the loaded defs to the given symtab. bool upb_load_descriptor_into_symtab(upb_symtab *symtab, const char *str, diff --git a/upb/pb/textprinter.c b/upb/pb/textprinter.c index 3f68f90e9c..0d9c96745c 100644 --- a/upb/pb/textprinter.c +++ b/upb/pb/textprinter.c @@ -96,7 +96,7 @@ err: const upb_fielddef *f = upb_value_getfielddef(fval); \ uint64_t start_ofs = upb_bytesink_getoffset(p->sink); \ CHECK(upb_textprinter_indent(p)); \ - CHECK(upb_bytesink_writestr(p->sink, f->name)); \ + CHECK(upb_bytesink_writestr(p->sink, upb_fielddef_name(f))); \ CHECK(upb_bytesink_writestr(p->sink, ": ")); \ CHECK(upb_bytesink_printf(p->sink, fmt, upb_value_get ## member(val))); \ CHECK(upb_textprinter_endfield(p)); \ @@ -124,7 +124,8 @@ static upb_flow_t upb_textprinter_putenum(void *_p, upb_value fval, upb_textprinter *p = _p; uint64_t start_ofs = upb_bytesink_getoffset(p->sink); const upb_fielddef *f = upb_value_getfielddef(fval); - upb_enumdef *enum_def = upb_downcast_enumdef(f->def); + const upb_enumdef *enum_def = + upb_downcast_enumdef_const(upb_fielddef_subdef(f)); const char *label = upb_enumdef_iton(enum_def, upb_value_getint32(val)); if (label) { CHECK(upb_bytesink_writestr(p->sink, label)); @@ -157,7 +158,7 @@ static upb_sflow_t upb_textprinter_startsubmsg(void *_p, upb_value fval) { uint64_t start_ofs = upb_bytesink_getoffset(p->sink); const upb_fielddef *f = upb_value_getfielddef(fval); CHECK(upb_textprinter_indent(p)); - CHECK(upb_bytesink_printf(p->sink, "%s {", f->name)); + CHECK(upb_bytesink_printf(p->sink, "%s {", upb_fielddef_name(f))); if (!p->single_line) CHECK(upb_bytesink_putc(p->sink, '\n')); p->indent_depth++; diff --git a/upb/pb/varint.h b/upb/pb/varint.h index 815a7a1ea2..c0e01348fd 100644 --- a/upb/pb/varint.h +++ b/upb/pb/varint.h @@ -19,6 +19,16 @@ extern "C" { #endif +// A list of types as they are encoded on-the-wire. +typedef enum { + UPB_WIRE_TYPE_VARINT = 0, + UPB_WIRE_TYPE_64BIT = 1, + UPB_WIRE_TYPE_DELIMITED = 2, + UPB_WIRE_TYPE_START_GROUP = 3, + UPB_WIRE_TYPE_END_GROUP = 4, + UPB_WIRE_TYPE_32BIT = 5, +} upb_wiretype_t; + // The maximum number of bytes that it takes to encode a 64-bit varint. // Note that with a better encoding this could be 9 (TODO: write up a // wiki document about this). diff --git a/upb/refcount.c b/upb/refcount.c new file mode 100644 index 0000000000..a15547a5d2 --- /dev/null +++ b/upb/refcount.c @@ -0,0 +1,224 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2012 Google Inc. See LICENSE for details. + * Author: Josh Haberman + */ + +#include +#include +#include "upb/refcount.h" + +// TODO(haberman): require client to define these if ref debugging is on. +#ifndef UPB_LOCK +#define UPB_LOCK +#endif + +#ifndef UPB_UNLOCK +#define UPB_UNLOCK +#endif + +/* arch-specific atomic primitives *******************************************/ + +#ifdef UPB_THREAD_UNSAFE ////////////////////////////////////////////////////// + +INLINE void upb_atomic_inc(uint32_t *a) { (*a)++; } +INLINE bool upb_atomic_dec(uint32_t *a) { return --(*a) == 0; } + +#elif (__GNUC__ == 4 && __GNUC_MINOR__ >= 1) || __GNUC__ > 4 /////////////////// + +INLINE void upb_atomic_inc(uint32_t *a) { __sync_fetch_and_add(a, 1); } +INLINE bool upb_atomic_dec(uint32_t *a) { + return __sync_sub_and_fetch(a, 1) == 0; +} + +#elif defined(WIN32) /////////////////////////////////////////////////////////// + +#include + +INLINE void upb_atomic_inc(upb_atomic_t *a) { InterlockedIncrement(&a->val); } +INLINE bool upb_atomic_dec(upb_atomic_t *a) { + return InterlockedDecrement(&a->val) == 0; +} + +#else +#error Atomic primitives not defined for your platform/CPU. \ + Implement them or compile with UPB_THREAD_UNSAFE. +#endif + +// Reserved index values. +#define UPB_INDEX_UNDEFINED UINT16_MAX +#define UPB_INDEX_NOT_IN_STACK (UINT16_MAX - 1) + +static void upb_refcount_merge(upb_refcount *r, upb_refcount *from) { + if (upb_refcount_merged(r, from)) return; + *r->count += *from->count; + free(from->count); + upb_refcount *base = from; + + // Set all refcount pointers in the "from" chain to the merged refcount. + do { from->count = r->count; } while ((from = from->next) != base); + + // Merge the two circularly linked lists by swapping their next pointers. + upb_refcount *tmp = r->next; + r->next = base->next; + base->next = tmp; +} + +// Tarjan's algorithm, see: +// http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm + +typedef struct { + int index; + upb_refcount **stack; + int stack_len; + upb_getsuccessors *func; +} upb_tarjan_state; + +static void upb_refcount_dofindscc(upb_refcount *obj, upb_tarjan_state *state); + +void upb_refcount_visit(upb_refcount *obj, upb_refcount *subobj, void *_state) { + upb_tarjan_state *state = _state; + if (subobj->index == UPB_INDEX_UNDEFINED) { + // Subdef has not yet been visited; recurse on it. + upb_refcount_dofindscc(subobj, state); + obj->lowlink = UPB_MIN(obj->lowlink, subobj->lowlink); + } else if (subobj->index != UPB_INDEX_NOT_IN_STACK) { + // Subdef is in the stack and hence in the current SCC. + obj->lowlink = UPB_MIN(obj->lowlink, subobj->index); + } +} + +static void upb_refcount_dofindscc(upb_refcount *obj, upb_tarjan_state *state) { + obj->index = state->index; + obj->lowlink = state->index; + state->index++; + state->stack[state->stack_len++] = obj; + + state->func(obj, state); // Visit successors. + + if (obj->lowlink == obj->index) { + upb_refcount *scc_obj; + while ((scc_obj = state->stack[--state->stack_len]) != obj) { + upb_refcount_merge(obj, scc_obj); + scc_obj->index = UPB_INDEX_NOT_IN_STACK; + } + obj->index = UPB_INDEX_NOT_IN_STACK; + } +} + +bool upb_refcount_findscc(upb_refcount **refs, int n, upb_getsuccessors *func) { + // TODO(haberman): allocate less memory. We can't use n as a bound because + // it doesn't include fielddefs. Could either use a dynamically-resizing + // array or think of some other way. + upb_tarjan_state state = {0, malloc(UINT16_MAX * sizeof(void*)), 0, func}; + if (state.stack == NULL) return false; + for (int i = 0; i < n; i++) + if (refs[i]->index == UPB_INDEX_UNDEFINED) + upb_refcount_dofindscc(refs[i], &state); + free(state.stack); + return true; +} + + +/* upb_refcount **************************************************************/ + +bool upb_refcount_init(upb_refcount *r, void *owner) { + r->count = malloc(sizeof(uint32_t)); + if (!r->count) return false; + // Initializing this here means upb_refcount_findscc() can only run once for + // each refcount; may need to revise this to be more flexible. + r->index = UPB_INDEX_UNDEFINED; + r->next = r; +#ifdef UPB_DEBUG_REFS + // We don't detect malloc() failures for UPB_DEBUG_REFS. + upb_inttable_init(&r->refs); + *r->count = 0; + upb_refcount_ref(r, owner); +#else + *r->count = 1; +#endif + return true; +} + +void upb_refcount_uninit(upb_refcount *r) { + (void)r; +#ifdef UPB_DEBUG_REFS + assert(upb_inttable_count(&r->refs) == 0); + upb_inttable_uninit(&r->refs); +#endif +} + +// Moves an existing ref from ref_donor to new_owner, without changing the +// overall ref count. +void upb_refcount_donateref(upb_refcount *r, void *from, void *to) { + (void)r; (void)from; (void)to; + assert(from != to); +#ifdef UPB_DEBUG_REFS + upb_refcount_ref(r, to); + upb_refcount_unref(r, from); +#endif +} + +// Thread-safe operations ////////////////////////////////////////////////////// + +// Ref and unref are thread-safe. +void upb_refcount_ref(upb_refcount *r, void *owner) { + (void)owner; + upb_atomic_inc(r->count); +#ifdef UPB_DEBUG_REFS + UPB_LOCK; + // Caller must not already own a ref. + assert(upb_inttable_lookup(&r->refs, (uintptr_t)owner) == NULL); + + // If a ref is leaked we want to blame the leak on the whoever leaked the + // ref, not on who originally allocated the refcounted object. We accomplish + // this as follows. When a ref is taken in DEBUG_REFS mode, we malloc() some + // memory and arrange setup pointers like so: + // + // upb_refcount + // +----------+ +---------+ + // | count |<-+ | + // +----------+ +----------+ + // | table |---X-->| malloc'd | + // +----------+ | memory | + // +----------+ + // + // Since the "malloc'd memory" is allocated inside of "ref" and free'd in + // unref, it will cause a leak if not unref'd. And since the leaked memory + // points to the object itself, the object will be considered "indirectly + // lost" by tools like Valgrind and not shown unless requested (which is good + // because the object's creator may not be responsible for the leak). But we + // have to hide the pointer marked "X" above from Valgrind, otherwise the + // malloc'd memory will appear to be indirectly leaked and the object itself + // will still be considered the primary leak. We hide this pointer from + // Valgrind (et all) by doing a bitwise not on it. + upb_refcount **target = malloc(sizeof(void*)); + uintptr_t obfuscated = ~(uintptr_t)target; + *target = r; + upb_inttable_insert(&r->refs, (uintptr_t)owner, upb_value_uint64(obfuscated)); + UPB_UNLOCK; +#endif +} + +bool upb_refcount_unref(upb_refcount *r, void *owner) { + (void)owner; + bool ret = upb_atomic_dec(r->count); +#ifdef UPB_DEBUG_REFS + UPB_LOCK; + upb_value v; + bool success = upb_inttable_remove(&r->refs, (uintptr_t)owner, &v); + assert(success); + if (success) { + // Must un-obfuscate the pointer (see above). + free((void*)(~upb_value_getuint64(v))); + } + UPB_UNLOCK; +#endif + if (ret) free(r->count); + return ret; +} + +bool upb_refcount_merged(const upb_refcount *r, const upb_refcount *r2) { + return r->count == r2->count; +} diff --git a/upb/refcount.h b/upb/refcount.h new file mode 100644 index 0000000000..cb2bda9aa5 --- /dev/null +++ b/upb/refcount.h @@ -0,0 +1,70 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Google Inc. See LICENSE for details. + * Author: Josh Haberman + * + * A thread-safe refcount that can optionally track references for debugging + * purposes. It helps avoid circular references by allowing a + * strongly-connected component in the graph to share a refcount. + * + * This interface is internal to upb. + */ + +#ifndef UPB_REFCOUNT_H_ +#define UPB_REFCOUNT_H_ + +#include +#include +#include "upb/table.h" + +#ifndef NDEBUG +#define UPB_DEBUG_REFS +#endif + +typedef struct _upb_refcount { + uint32_t *count; + struct _upb_refcount *next; // Circularly-linked list of this SCC. + uint16_t index; // For SCC algorithm. + uint16_t lowlink; // For SCC algorithm. +#ifdef UPB_DEBUG_REFS + upb_inttable refs; +#endif +} upb_refcount; + +// NON THREAD SAFE operations ////////////////////////////////////////////////// + +// Initializes the refcount with a single ref for the given owner. Returns +// NULL if memory could not be allocated. +bool upb_refcount_init(upb_refcount *r, void *owner); + +// Uninitializes the refcount. May only be called after unref() returns true. +void upb_refcount_uninit(upb_refcount *r); + +// Moves an existing ref from ref_donor to new_owner, without changing the +// overall ref count. +void upb_refcount_donateref(upb_refcount *r, void *from, void *to); + +// Finds strongly-connected components among some set of objects and merges all +// refcounts that share a SCC. The given function will be called when the +// algorithm needs to visit children of a particular object; the function +// should call upb_refcount_visit() once for each child obj. +// +// Returns false if memory allocation failed. +typedef void upb_getsuccessors(upb_refcount *obj, void*); +bool upb_refcount_findscc(upb_refcount **objs, int n, upb_getsuccessors *func); +void upb_refcount_visit(upb_refcount *obj, upb_refcount *subobj, void *closure); + +// Thread-safe operations ////////////////////////////////////////////////////// + +// Increases the ref count, the new ref is owned by "owner" which must not +// already own a ref. Circular reference chains are not allowed. +void upb_refcount_ref(upb_refcount *r, void *owner); + +// Release a ref owned by owner, returns true if that was the last ref. +bool upb_refcount_unref(upb_refcount *r, void *owner); + +// Returns true if these two objects share a refcount. +bool upb_refcount_merged(const upb_refcount *r, const upb_refcount *r2); + +#endif // UPB_REFCOUNT_H_ diff --git a/upb/table.c b/upb/table.c index 31c91b145a..4e3544eea6 100644 --- a/upb/table.c +++ b/upb/table.c @@ -4,8 +4,10 @@ * Copyright (c) 2009 Google Inc. See LICENSE for details. * Author: Josh Haberman * - * There are a few printf's strewn throughout this file, uncommenting them - * can be useful for debugging. + * Implementation is heavily inspired by Lua's ltable.c. + * + * TODO: for table iteration we use (array - 1) in several places; is this + * undefined behavior? If so find a better solution. */ #include "upb/table.h" @@ -14,6 +16,8 @@ #include #include +#define UPB_MAXARRSIZE 16 // 64k. + static const double MAX_LOAD = 0.85; // The minimum percentage of an array part that we will allow. This is a @@ -21,385 +25,319 @@ static const double MAX_LOAD = 0.85; // cache effects). The lower this is, the more memory we'll use. static const double MIN_DENSITY = 0.1; +int upb_log2(uint64_t v) { +#ifdef __GNUC__ + int ret = 31 - __builtin_clz(v); +#else + int ret = 0; + while (v >>= 1) ret++; +#endif + return UPB_MIN(UPB_MAXARRSIZE, ret); +} + +static upb_tabkey upb_strkey(const char *str) { + upb_tabkey k; + k.str = (char*)str; + return k; +} + static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed); +typedef upb_tabent *upb_hashfunc_t(const upb_table *t, upb_tabkey key); +typedef bool upb_eqlfunc_t(upb_tabkey k1, upb_tabkey k2); /* Base table (shared code) ***************************************************/ -static uint32_t upb_table_size(const upb_table *t) { return 1 << t->size_lg2; } -static size_t upb_table_entrysize(const upb_table *t) { return t->entry_size; } -static size_t upb_table_valuesize(const upb_table *t) { return t->value_size; } +static size_t upb_table_size(const upb_table *t) { return 1 << t->size_lg2; } + +static bool upb_table_isfull(upb_table *t) { + return (double)(t->count + 1) / upb_table_size(t) > MAX_LOAD; +} -void upb_table_init(upb_table *t, uint32_t size, uint16_t entry_size) { +static bool upb_table_init(upb_table *t, uint8_t size_lg2) { t->count = 0; - t->entry_size = entry_size; - t->size_lg2 = 1; - while(upb_table_size(t) < size) t->size_lg2++; - size_t bytes = upb_table_size(t) * t->entry_size; + t->size_lg2 = size_lg2; + size_t bytes = upb_table_size(t) * sizeof(upb_tabent); t->mask = upb_table_size(t) - 1; t->entries = malloc(bytes); + if (!t->entries) return false; + memset(t->entries, 0, bytes); + return true; } -void upb_table_free(upb_table *t) { free(t->entries); } +static void upb_table_uninit(upb_table *t) { free(t->entries); } -/* upb_inttable ***************************************************************/ +static bool upb_tabent_isempty(const upb_tabent *e) { return e->key.num == 0; } -static upb_inttable_entry *intent(const upb_inttable *t, int32_t i) { - //printf("looking up int entry %d, size of entry: %d\n", i, t->t.entry_size); - return UPB_INDEX(t->t.entries, i, t->t.entry_size); +static upb_tabent *upb_table_emptyent(const upb_table *t) { + upb_tabent *e = t->entries + upb_table_size(t); + while (1) { if (upb_tabent_isempty(--e)) return e; assert(e > t->entries); } } -static uint32_t upb_inttable_hashtablesize(const upb_inttable *t) { - return upb_table_size(&t->t); +static upb_value *upb_table_lookup(const upb_table *t, upb_tabkey key, + upb_hashfunc_t *hash, upb_eqlfunc_t *eql) { + upb_tabent *e = hash(t, key); + if (upb_tabent_isempty(e)) return NULL; + while (1) { + if (eql(e->key, key)) return &e->val; + if ((e = e->next) == NULL) return NULL; + } } -void upb_inttable_sizedinit(upb_inttable *t, uint32_t arrsize, uint32_t hashsize, - uint16_t value_size) { - size_t entsize = _upb_inttable_entrysize(value_size); - upb_table_init(&t->t, hashsize, entsize); - for (uint32_t i = 0; i < upb_table_size(&t->t); i++) { - upb_inttable_entry *e = intent(t, i); - e->hdr.key = 0; - e->hdr.next = UPB_END_OF_CHAIN; - e->val.has_entry = 0; +// The given key must not already exist in the table. +static void upb_table_insert(upb_table *t, upb_tabkey key, upb_value val, + upb_hashfunc_t *hash, upb_eqlfunc_t *eql) { + assert(upb_table_lookup(t, key, hash, eql) == NULL); + t->count++; + upb_tabent *mainpos_e = hash(t, key); + upb_tabent *our_e = mainpos_e; + if (!upb_tabent_isempty(mainpos_e)) { // Collision. + upb_tabent *new_e = upb_table_emptyent(t); + upb_tabent *chain = hash(t, mainpos_e->key); // Head of collider's chain. + if (chain == mainpos_e) { + // Existing ent is in its main posisiton (it has the same hash as us, and + // is the head of our chain). Insert to new ent and append to this chain. + new_e->next = mainpos_e->next; + mainpos_e->next = new_e; + our_e = new_e; + } else { + // Existing ent is not in its main position (it is a node in some other + // chain). This implies that no existing ent in the table has our hash. + // Evict it (updating its chain) and use its ent for head of our chain. + *new_e = *mainpos_e; // copies next. + while (chain->next != mainpos_e) chain = chain->next; + chain->next = new_e; + our_e = mainpos_e; + our_e->next = NULL; + } } - t->t.value_size = value_size; - // Always make the array part at least 1 long, so that we know key 0 - // won't be in the hash part (which lets us speed up that code path). - t->array_size = UPB_MAX(1, arrsize); - t->array = malloc(upb_table_valuesize(&t->t) * t->array_size); - t->array_count = 0; - for (uint32_t i = 0; i < t->array_size; i++) { - upb_inttable_value *val = UPB_INDEX(t->array, i, upb_table_valuesize(&t->t)); - val->has_entry = false; + our_e->key = key; + our_e->val = val; + assert(upb_table_lookup(t, key, hash, eql) == &our_e->val); +} + +static bool upb_table_remove(upb_table *t, upb_tabkey key, upb_value *val, + upb_hashfunc_t *hash, upb_eqlfunc_t *eql) { + upb_tabent *chain = hash(t, key); + if (eql(chain->key, key)) { + t->count--; + if (val) *val = chain->val; + if (chain->next) { + upb_tabent *move = chain->next; + *chain = *move; + move->key.num = 0; // Make the slot empty. + } else { + chain->key.num = 0; // Make the slot empty. + } + return true; + } else { + while (chain->next && !eql(chain->next->key, key)) + chain = chain->next; + if (chain->next) { + // Found element to remove. + if (val) *val = chain->next->val; + chain->next->key.num = 0; + chain->next = chain->next->next; + t->count--; + return true; + } else { + return false; + } } } -void upb_inttable_init(upb_inttable *t, uint32_t hashsize, uint16_t value_size) { - upb_inttable_sizedinit(t, 0, hashsize, value_size); +static upb_tabent *upb_table_next(const upb_table *t, upb_tabent *e) { + upb_tabent *end = t->entries + upb_table_size(t); + do { if (++e == end) return NULL; } while(e->key.num == 0); + return e; } -void upb_inttable_free(upb_inttable *t) { - upb_table_free(&t->t); - free(t->array); +static upb_tabent *upb_table_begin(const upb_table *t) { + return upb_table_next(t, t->entries - 1); } -static uint32_t empty_intbucket(upb_inttable *table) -{ - // TODO: does it matter that this is biased towards the front of the table? - for(uint32_t i = 0; i < upb_inttable_hashtablesize(table); i++) { - upb_inttable_entry *e = intent(table, i); - if(!e->val.has_entry) return i; - } - assert(false); - return 0; + +/* upb_strtable ***************************************************************/ + +// A simple "subclass" of upb_table that only adds a hash function for strings. + +static upb_tabent *upb_strhash(const upb_table *t, upb_tabkey key) { + // Could avoid the strlen() by using a hash function that terminates on NULL. + return t->entries + (MurmurHash2(key.str, strlen(key.str), 0) & t->mask); } -// The insert routines have a lot more code duplication between int/string -// variants than I would like, but there's just a bit too much that varies to -// parameterize them. -static void intinsert(upb_inttable *t, uint32_t key, const void *val) { - assert(upb_inttable_lookup(t, key) == NULL); - upb_inttable_value *table_val; - if (_upb_inttable_isarrkey(t, key)) { - table_val = UPB_INDEX(t->array, key, upb_table_valuesize(&t->t)); - t->array_count++; - //printf("Inserting key %d to Array part! %p\n", key, table_val); - } else { - t->t.count++; - uint32_t bucket = _upb_inttable_bucket(t, key); - upb_inttable_entry *table_e = intent(t, bucket); - //printf("Hash part! Inserting into bucket %d?\n", bucket); - if(table_e->val.has_entry) { /* Collision. */ - //printf("Collision!\n"); - if(bucket == _upb_inttable_bucket(t, table_e->hdr.key)) { - /* Existing element is in its main posisiton. Find an empty slot to - * place our new element and append it to this key's chain. */ - uint32_t empty_bucket = empty_intbucket(t); - while (table_e->hdr.next != UPB_END_OF_CHAIN) - table_e = intent(t, table_e->hdr.next); - table_e->hdr.next = empty_bucket; - table_e = intent(t, empty_bucket); - } else { - /* Existing element is not in its main position. Move it to an empty - * slot and put our element in its main position. */ - uint32_t empty_bucket = empty_intbucket(t); - uint32_t evictee_bucket = _upb_inttable_bucket(t, table_e->hdr.key); - memcpy(intent(t, empty_bucket), table_e, t->t.entry_size); /* copies next */ - upb_inttable_entry *evictee_e = intent(t, evictee_bucket); - while(1) { - assert(evictee_e->val.has_entry); - assert(evictee_e->hdr.next != UPB_END_OF_CHAIN); - if(evictee_e->hdr.next == bucket) { - evictee_e->hdr.next = empty_bucket; - break; - } - evictee_e = intent(t, evictee_e->hdr.next); - } - /* table_e remains set to our mainpos. */ - } - } - //printf("Inserting! to:%p, copying to: %p\n", table_e, &table_e->val); - table_val = &table_e->val; - table_e->hdr.key = key; - table_e->hdr.next = UPB_END_OF_CHAIN; - } - memcpy(table_val, val, upb_table_valuesize(&t->t)); - table_val->has_entry = true; - assert(upb_inttable_lookup(t, key) == table_val); +static bool upb_streql(upb_tabkey k1, upb_tabkey k2) { + return strcmp(k1.str, k2.str) == 0; } -// Insert all elements from src into dest. Caller ensures that a resize will -// not be necessary. -static void upb_inttable_insertall(upb_inttable *dst, upb_inttable *src) { - for(upb_inttable_iter i = upb_inttable_begin(src); !upb_inttable_done(i); - i = upb_inttable_next(src, i)) { - //printf("load check: %d %d\n", upb_table_count(&dst->t), upb_inttable_hashtablesize(dst)); - assert((double)(upb_table_count(&dst->t)) / - upb_inttable_hashtablesize(dst) <= MAX_LOAD); - intinsert(dst, upb_inttable_iter_key(i), upb_inttable_iter_value(i)); - } +bool upb_strtable_init(upb_strtable *t) { return upb_table_init(&t->t, 4); } + +void upb_strtable_uninit(upb_strtable *t) { + for (size_t i = 0; i < upb_table_size(&t->t); i++) + free(t->t.entries[i].key.str); + upb_table_uninit(&t->t); } -void upb_inttable_insert(upb_inttable *t, uint32_t key, const void *val) { - if((double)(t->t.count + 1) / upb_inttable_hashtablesize(t) > MAX_LOAD) { - //printf("RESIZE!\n"); - // Need to resize. Allocate new table with double the size of however many - // elements we have now, add old elements to it. We create the new hash - // table without an array part, even if the old table had an array part. - // If/when the user calls upb_inttable_compact() again, we'll create an - // array part then. - upb_inttable new_table; - //printf("Old table count=%d, size=%d\n", upb_inttable_count(t), upb_inttable_hashtablesize(t)); - upb_inttable_init(&new_table, upb_inttable_count(t)*2, upb_table_valuesize(&t->t)); - upb_inttable_insertall(&new_table, t); - upb_inttable_free(t); +bool upb_strtable_insert(upb_strtable *t, const char *k, upb_value v) { + if (upb_table_isfull(&t->t)) { + // Need to resize. New table of double the size, add old elements to it. + upb_strtable new_table; + if (!upb_table_init(&new_table.t, t->t.size_lg2 + 1)) return false; + upb_strtable_iter i; + upb_strtable_begin(&i, t); + for ( ; !upb_strtable_done(&i); upb_strtable_next(&i)) { + upb_strtable_insert( + &new_table, upb_strtable_iter_key(&i), upb_strtable_iter_value(&i)); + } + upb_strtable_uninit(t); *t = new_table; } - intinsert(t, key, val); + if ((k = strdup(k)) == NULL) return false; + upb_table_insert(&t->t, upb_strkey(k), v, &upb_strhash, &upb_streql); + return true; } -void upb_inttable_compact(upb_inttable *t) { - // Find the largest array part we can that satisfies the MIN_DENSITY - // definition. For now we just count down powers of two. - uint32_t largest_key = 0; - for(upb_inttable_iter i = upb_inttable_begin(t); !upb_inttable_done(i); - i = upb_inttable_next(t, i)) { - largest_key = UPB_MAX(largest_key, upb_inttable_iter_key(i)); - } - int lg2_array = 0; - while ((1UL << lg2_array) < largest_key) ++lg2_array; - ++lg2_array; // Undo the first iteration. - size_t array_size = 0; - int array_count = 0; - while (lg2_array > 0) { - array_size = (1 << --lg2_array); - //printf("Considering size %d (btw, our table has %d things total)\n", array_size, upb_inttable_count(t)); - if ((double)upb_inttable_count(t) / array_size < MIN_DENSITY) { - // Even if 100% of the keys were in the array pary, an array of this - // size would not be dense enough. - continue; - } - array_count = 0; - for(upb_inttable_iter i = upb_inttable_begin(t); !upb_inttable_done(i); - i = upb_inttable_next(t, i)) { - if (upb_inttable_iter_key(i) < array_size) - array_count++; - } - //printf("There would be %d things in that array\n", array_count); - if ((double)array_count / array_size >= MIN_DENSITY) break; - } - upb_inttable new_table; - int hash_size = (upb_inttable_count(t) - array_count + 1) / MAX_LOAD; - //printf("array_count: %d, array_size: %d, hash_size: %d, table size: %d\n", array_count, array_size, hash_size, upb_inttable_count(t)); - upb_inttable_sizedinit(&new_table, array_size, hash_size, - upb_table_valuesize(&t->t)); - //printf("For %d things, using array size=%d, hash_size = %d\n", upb_inttable_count(t), array_size, hash_size); - upb_inttable_insertall(&new_table, t); - upb_inttable_free(t); - *t = new_table; +upb_value *upb_strtable_lookup(const upb_strtable *t, const char *key) { + return upb_table_lookup(&t->t, upb_strkey(key), &upb_strhash, &upb_streql); } -upb_inttable_iter upb_inttable_begin(const upb_inttable *t) { - upb_inttable_iter iter = {-1, NULL, true}; // -1 will overflow to 0 on the first iteration. - return upb_inttable_next(t, iter); +void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t) { + i->t = t; + i->e = upb_table_begin(&t->t); } -upb_inttable_iter upb_inttable_next(const upb_inttable *t, - upb_inttable_iter iter) { - const size_t hdrsize = sizeof(upb_inttable_header); - const size_t entsize = upb_table_entrysize(&t->t); - if (iter.array_part) { - while (++iter.key < t->array_size) { - //printf("considering value %d\n", iter.key); - iter.value = UPB_INDEX(t->array, iter.key, t->t.value_size); - if (iter.value->has_entry) return iter; - } - //printf("Done with array part!\n"); - iter.array_part = false; - // Point to the value of the table[-1] entry. - iter.value = UPB_INDEX(intent(t, -1), 1, hdrsize); - } - void *end = intent(t, upb_inttable_hashtablesize(t)); - // Point to the entry for the value that was previously in iter. - upb_inttable_entry *e = UPB_INDEX(iter.value, -1, hdrsize); - do { - e = UPB_INDEX(e, 1, entsize); - //printf("considering value %p (val: %p)\n", e, &e->val); - if(e == end) { - //printf("No values.\n"); - iter.value = NULL; - return iter; - } - } while(!e->val.has_entry); - //printf("USING VALUE! %p\n", e); - iter.key = e->hdr.key; - iter.value = &e->val; - return iter; +void upb_strtable_next(upb_strtable_iter *i) { + i->e = upb_table_next(&i->t->t, i->e); } -/* upb_strtable ***************************************************************/ +/* upb_inttable ***************************************************************/ -static upb_strtable_entry *strent(const upb_strtable *t, int32_t i) { - //fprintf(stderr, "i: %d, table_size: %d\n", i, upb_table_size(&t->t)); - assert(i <= (int32_t)upb_table_size(&t->t)); - return UPB_INDEX(t->t.entries, i, t->t.entry_size); -} +// For inttables we use a hybrid structure where small keys are kept in an +// array and large keys are put in the hash table. -static uint32_t upb_strtable_size(const upb_strtable *t) { - return upb_table_size(&t->t); +static bool upb_inteql(upb_tabkey k1, upb_tabkey k2) { + return k1.num == k2.num; } -void upb_strtable_init(upb_strtable *t, uint32_t size, uint16_t valuesize) { - t->t.value_size = valuesize; - size_t entsize = upb_align_up(sizeof(upb_strtable_header) + valuesize, 8); - upb_table_init(&t->t, size, entsize); - for (uint32_t i = 0; i < upb_table_size(&t->t); i++) { - upb_strtable_entry *e = strent(t, i); - e->hdr.key = NULL; - e->hdr.next = UPB_END_OF_CHAIN; - } +size_t upb_inttable_count(const upb_inttable *t) { + return t->t.count + t->array_count; } -void upb_strtable_free(upb_strtable *t) { - // Free keys from the strtable. - upb_strtable_iter i; - for(upb_strtable_begin(&i, t); !upb_strtable_done(&i); upb_strtable_next(&i)) - free((char*)upb_strtable_iter_key(&i)); - upb_table_free(&t->t); +bool upb_inttable_sizedinit(upb_inttable *t, size_t asize, int hsize_lg2) { + if (!upb_table_init(&t->t, hsize_lg2)) return false; + // Always make the array part at least 1 long, so that we know key 0 + // won't be in the hash part, which simplifies things. + t->array_size = UPB_MAX(1, asize); + t->array_count = 0; + size_t array_bytes = t->array_size * sizeof(upb_value); + t->array = malloc(array_bytes); + if (!t->array) { + upb_table_uninit(&t->t); + return false; + } + memset(t->array, 0xff, array_bytes); + return true; } -static uint32_t strtable_bucket(const upb_strtable *t, const char *key) { - uint32_t hash = MurmurHash2(key, strlen(key), 0); - return (hash & t->t.mask); +bool upb_inttable_init(upb_inttable *t) { + return upb_inttable_sizedinit(t, 0, 4); } -void *upb_strtable_lookup(const upb_strtable *t, const char *key) { - uint32_t bucket = strtable_bucket(t, key); - upb_strtable_entry *e; - do { - e = strent(t, bucket); - if(e->hdr.key && strcmp(e->hdr.key, key) == 0) return &e->val; - } while((bucket = e->hdr.next) != UPB_END_OF_CHAIN); - return NULL; +void upb_inttable_uninit(upb_inttable *t) { + upb_table_uninit(&t->t); + free(t->array); } -void *upb_strtable_lookupl(const upb_strtable *t, const char *key, size_t len) { - // TODO: improve. - char *key2 = malloc(len+1); - memcpy(key2, key, len); - key2[len] = '\0'; - void *ret = upb_strtable_lookup(t, key2); - free(key2); - return ret; +bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val) { + assert(upb_arrhas(val)); + if (key < t->array_size) { + assert(!upb_arrhas(t->array[key])); + t->array_count++; + t->array[key] = val; + } else { + if (upb_table_isfull(&t->t)) { + // Need to resize the hash part, but we re-use the array part. + upb_table new_table; + if (!upb_table_init(&new_table, t->t.size_lg2 + 1)) return false; + upb_tabent *e; + for (e = upb_table_begin(&t->t); e; e = upb_table_next(&t->t, e)) + upb_table_insert(&new_table, e->key, e->val, &upb_inthash, &upb_inteql); + upb_table_uninit(&t->t); + t->t = new_table; + } + upb_table_insert(&t->t, upb_intkey(key), val, &upb_inthash, &upb_inteql); + } + return true; } -static uint32_t empty_strbucket(upb_strtable *table) { - // TODO: does it matter that this is biased towards the front of the table? - for(uint32_t i = 0; i < upb_strtable_size(table); i++) { - upb_strtable_entry *e = strent(table, i); - if(!e->hdr.key) return i; +upb_value *upb_inttable_lookup(const upb_inttable *t, uintptr_t key) { + if (key < t->array_size) { + upb_value *v = &t->array[key]; + return upb_arrhas(*v) ? v : NULL; } - assert(false); - return 0; + return upb_table_lookup(&t->t, upb_intkey(key), &upb_inthash, &upb_inteql); } -static void strinsert(upb_strtable *t, const char *key, const void *val) { - assert(upb_strtable_lookup(t, key) == NULL); - t->t.count++; - uint32_t bucket = strtable_bucket(t, key); - upb_strtable_entry *table_e = strent(t, bucket); - if(table_e->hdr.key) { /* Collision. */ - if(bucket == strtable_bucket(t, table_e->hdr.key)) { - /* Existing element is in its main posisiton. Find an empty slot to - * place our new element and append it to this key's chain. */ - uint32_t empty_bucket = empty_strbucket(t); - while (table_e->hdr.next != UPB_END_OF_CHAIN) - table_e = strent(t, table_e->hdr.next); - table_e->hdr.next = empty_bucket; - table_e = strent(t, empty_bucket); +bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val) { + if (key < t->array_size) { + if (upb_arrhas(t->array[key])) { + t->array_count--; + if (val) *val = t->array[key]; + t->array[key] = upb_value_uint64(-1); + return true; } else { - /* Existing element is not in its main position. Move it to an empty - * slot and put our element in its main position. */ - uint32_t empty_bucket = empty_strbucket(t); - uint32_t evictee_bucket = strtable_bucket(t, table_e->hdr.key); - memcpy(strent(t, empty_bucket), table_e, t->t.entry_size); /* copies next */ - upb_strtable_entry *evictee_e = strent(t, evictee_bucket); - while(1) { - assert(evictee_e->hdr.key); - assert(evictee_e->hdr.next != UPB_END_OF_CHAIN); - if(evictee_e->hdr.next == bucket) { - evictee_e->hdr.next = empty_bucket; - break; - } - evictee_e = strent(t, evictee_e->hdr.next); - } - /* table_e remains set to our mainpos. */ + return false; } + } else { + return upb_table_remove( + &t->t, upb_intkey(key), val, &upb_inthash, &upb_inteql); } - //fprintf(stderr, "val: %p\n", val); - //fprintf(stderr, "val size: %d\n", t->t.value_size); - memcpy(&table_e->val, val, t->t.value_size); - table_e->hdr.key = strdup(key); - table_e->hdr.next = UPB_END_OF_CHAIN; - //fprintf(stderr, "Looking up, string=%s...\n", key); - assert(upb_strtable_lookup(t, key) == &table_e->val); - //printf("Yay!\n"); } -void upb_strtable_insert(upb_strtable *t, const char *key, const void *val) { - if((double)(t->t.count + 1) / upb_strtable_size(t) > MAX_LOAD) { - // Need to resize. New table of double the size, add old elements to it. - //printf("RESIZE!!\n"); - upb_strtable new_table; - upb_strtable_init(&new_table, upb_strtable_size(t)*2, t->t.value_size); - upb_strtable_iter i; - upb_strtable_begin(&i, t); - for(; !upb_strtable_done(&i); upb_strtable_next(&i)) { - strinsert(&new_table, - upb_strtable_iter_key(&i), - upb_strtable_iter_value(&i)); - } - upb_strtable_free(t); - *t = new_table; +void upb_inttable_compact(upb_inttable *t) { + // Find the largest power of two that satisfies the MIN_DENSITY definition. + int counts[UPB_MAXARRSIZE + 1] = {0}; + upb_inttable_iter i; + for (upb_inttable_begin(&i, t); !upb_inttable_done(&i); upb_inttable_next(&i)) + counts[upb_log2(upb_inttable_iter_key(&i))]++; + int count = upb_inttable_count(t); + int size; + for (size = UPB_MAXARRSIZE; size > 1; size--) { + count -= counts[size]; + if (count >= (1 << size) * MIN_DENSITY) break; } - strinsert(t, key, val); + + // Insert all elements into new, perfectly-sized table. + upb_inttable new_table; + int hashsize = (upb_inttable_count(t) - count + 1) / MAX_LOAD; + upb_inttable_sizedinit(&new_table, size, upb_log2(hashsize) + 1); + for (upb_inttable_begin(&i, t); !upb_inttable_done(&i); upb_inttable_next(&i)) + upb_inttable_insert( + &new_table, upb_inttable_iter_key(&i), upb_inttable_iter_value(&i)); + upb_inttable_uninit(t); + *t = new_table; } -void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t) { - i->e = strent(t, -1); +void upb_inttable_begin(upb_inttable_iter *i, const upb_inttable *t) { i->t = t; - upb_strtable_next(i); + i->arrkey = -1; + i->array_part = true; + upb_inttable_next(i); } -void upb_strtable_next(upb_strtable_iter *i) { - upb_strtable_entry *end = strent(i->t, upb_strtable_size(i->t)); - upb_strtable_entry *cur = i->e; - do { - cur = (void*)((char*)cur + i->t->t.entry_size); - if(cur == end) { i->e = NULL; return; } - } while(cur->hdr.key == NULL); - i->e = cur; +void upb_inttable_next(upb_inttable_iter *iter) { + const upb_inttable *t = iter->t; + if (iter->array_part) { + for (size_t i = iter->arrkey; ++i < t->array_size; ) + if (upb_arrhas(t->array[i])) { + iter->ptr.val = &t->array[i]; + iter->arrkey = i; + return; + } + iter->array_part = false; + iter->ptr.ent = t->t.entries - 1; + } + iter->ptr.ent = upb_table_next(&t->t, iter->ptr.ent); } #ifdef UPB_UNALIGNED_READS_OK @@ -413,8 +351,7 @@ void upb_strtable_next(upb_strtable_iter *i) { // 1. It will not work incrementally. // 2. It will not produce the same results on little-endian and big-endian // machines. -static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed) -{ +static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed) { // 'm' and 'r' are mixing constants generated offline. // They're not really 'magic', they just happen to work well. const uint32_t m = 0x5bd1e995; @@ -465,8 +402,7 @@ static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed) #define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } -static uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed) -{ +static uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed) { const uint32_t m = 0x5bd1e995; const int32_t r = 24; const uint8_t * data = (const uint8_t *)key; diff --git a/upb/table.h b/upb/table.h index 0c0a7854c0..f6bff66693 100644 --- a/upb/table.h +++ b/upb/table.h @@ -4,13 +4,16 @@ * Copyright (c) 2009 Google Inc. See LICENSE for details. * Author: Josh Haberman * - * This file defines very fast int->struct (inttable) and string->struct - * (strtable) hash tables. The struct can be of any size, and it is stored - * in the table itself, for cache-friendly performance. + * This file defines very fast int->upb_value (inttable) and string->upb_value + * (strtable) hash tables. * - * The table uses internal chaining with Brent's variation (inspired by the - * Lua implementation of hash tables). The hash function for strings is - * Austin Appleby's "MurmurHash." + * The table uses chained scatter with Brent's variation (inspired by the Lua + * implementation of hash tables). The hash function for strings is Austin + * Appleby's "MurmurHash." + * + * The inttable uses uintptr_t as its key, which guarantees it can be used to + * store pointers or integers of at least 32 bits (upb isn't really useful on + * systems where sizeof(void*) < 4). * * This header is internal to upb; its interface should not be considered * public or stable. @@ -19,52 +22,30 @@ #ifndef UPB_TABLE_H_ #define UPB_TABLE_H_ -#include #include +#include #include "upb.h" #ifdef __cplusplus extern "C" { #endif -#define UPB_END_OF_CHAIN (uint32_t)-1 - -typedef struct { - bool has_entry:1; - // The rest of the bits are the user's. -} upb_inttable_value; - -typedef struct { - uint32_t key; - uint32_t next; // Internal chaining. -} upb_inttable_header; - -typedef struct { - upb_inttable_header hdr; - upb_inttable_value val; -} upb_inttable_entry; - -// TODO: consider storing the hash in the entry. This would avoid the need to -// rehash on table resizes, but more importantly could possibly improve lookup -// performance by letting us compare hashes before comparing lengths or the -// strings themselves. -typedef struct { - char *key; // We own, nullz. TODO: store explicit len? - uint32_t next; // Internal chaining. -} upb_strtable_header; +typedef union { + uintptr_t num; + char *str; // We own, nullz. +} upb_tabkey; -typedef struct { - upb_strtable_header hdr; - uint32_t val; // Val is at least 32 bits. -} upb_strtable_entry; +typedef struct _upb_tabent { + upb_tabkey key; + upb_value val; + struct _upb_tabent *next; // Internal chaining. +} upb_tabent; typedef struct { - void *entries; // Hash table. - uint32_t count; // Number of entries in the hash part. - uint32_t mask; // Mask to turn hash value -> bucket. - uint16_t entry_size; // Size of each entry. - uint16_t value_size; // Size of each value. - uint8_t size_lg2; // Size of the hash table part is 2^size_lg2 entries. + upb_tabent *entries; // Hash table. + size_t count; // Number of entries in the hash part. + size_t mask; // Mask to turn hash value -> bucket. + uint8_t size_lg2; // Size of the hash table part is 2^size_lg2 entries. } upb_table; typedef struct { @@ -72,149 +53,124 @@ typedef struct { } upb_strtable; typedef struct { - upb_table t; - void *array; // Array part of the table. - uint32_t array_size; // Array part size. - uint32_t array_count; // Array part number of elements. + upb_table t; // For entries that don't fit in the array part. + upb_value *array; // Array part of the table. + size_t array_size; // Array part size. + size_t array_count; // Array part number of elements. } upb_inttable; -// Initialize and free a table, respectively. Specify the initial size -// with 'size' (the size will be increased as necessary). Value size -// specifies how many bytes each value in the table is. -// -// WARNING! The lowest bit of every entry is reserved by the hash table. -// It will always be overwritten when you insert, and must not be modified -// when looked up! -void upb_inttable_init(upb_inttable *table, uint32_t size, uint16_t value_size); -void upb_inttable_free(upb_inttable *table); -void upb_strtable_init(upb_strtable *table, uint32_t size, uint16_t value_size); -void upb_strtable_free(upb_strtable *table); - -// Number of values in the hash table. -INLINE uint32_t upb_table_count(const upb_table *t) { return t->count; } -INLINE uint32_t upb_inttable_count(const upb_inttable *t) { - return t->array_count + upb_table_count(&t->t); -} -INLINE uint32_t upb_strtable_count(const upb_strtable *t) { - return upb_table_count(&t->t); +INLINE upb_tabkey upb_intkey(uintptr_t key) { upb_tabkey k = {key}; return k; } + +INLINE upb_tabent *upb_inthash(const upb_table *t, upb_tabkey key) { + return t->entries + ((uint32_t)key.num & t->mask); } -// Inserts the given key into the hashtable with the given value. The key must -// not already exist in the hash table. The data will be copied from val into -// the hashtable (the amount of data copied comes from value_size when the -// table was constructed). Therefore the data at val may be freed once the -// call returns. For string tables, the table takes ownership of the string. -// -// WARNING: the lowest bit of val is reserved and will be overwritten! -void upb_inttable_insert(upb_inttable *t, uint32_t key, const void *val); -// TODO: may want to allow for more complex keys with custom hash/comparison -// functions. -void upb_strtable_insert(upb_strtable *t, const char *key, const void *val); -void upb_inttable_compact(upb_inttable *t); +INLINE bool upb_arrhas(upb_value v) { return v.val.uint64 != (uint64_t)-1; } -INLINE uint32_t _upb_inttable_bucket(const upb_inttable *t, uint32_t k) { - uint32_t bucket = k & t->t.mask; // Identity hash for ints. - assert(bucket != UPB_END_OF_CHAIN); - return bucket; -} +// Initialize and uninitialize a table, respectively. If memory allocation +// failed, false is returned that the table is uninitialized. +bool upb_inttable_init(upb_inttable *table); +bool upb_strtable_init(upb_strtable *table); +void upb_inttable_uninit(upb_inttable *table); +void upb_strtable_uninit(upb_strtable *table); -// Returns true if this key belongs in the array part of the table. -INLINE bool _upb_inttable_isarrkey(const upb_inttable *t, uint32_t k) { - return (k < t->array_size); -} +// Returns the number of values in the table. +size_t upb_inttable_count(const upb_inttable *t); +INLINE size_t upb_strtable_count(const upb_strtable *t) { return t->t.count; } -// Looks up key in this table, returning a pointer to the user's inserted data. -// We have the caller specify the entry_size because fixing this as a literal -// (instead of reading table->entry_size) gives the compiler more ability to -// optimize. +// Inserts the given key into the hashtable with the given value. The key must +// not already exist in the hash table. For string tables, the key must be +// NULL-terminated, and the table will make an internal copy of the key. +// Inttables must not insert a value of UINTPTR_MAX. // -// Note: All returned pointers are invalidated by inserts! -INLINE void *_upb_inttable_fastlookup(const upb_inttable *t, uint32_t key, - size_t entry_size, size_t value_size) { - upb_inttable_value *arrval = - (upb_inttable_value*)UPB_INDEX(t->array, key, value_size); - if (_upb_inttable_isarrkey(t, key)) { - return (arrval->has_entry) ? arrval : NULL; +// If a table resize was required but memory allocation failed, false is +// returned and the table is unchanged. +bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val); +bool upb_strtable_insert(upb_strtable *t, const char *key, upb_value val); + +// Looks up key in this table, returning a pointer to the table's internal copy +// of the user's inserted data, or NULL if this key is not in the table. The +// user is free to modify the given upb_value, which will be reflected in any +// future lookups of this key. The returned pointer is invalidated by inserts. +upb_value *upb_inttable_lookup(const upb_inttable *t, uintptr_t key); +upb_value *upb_strtable_lookup(const upb_strtable *t, const char *key); + +// Removes an item from the table. Returns true if the remove was successful, +// and stores the removed item in *val if non-NULL. +bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val); + +// Optimizes the table for the current set of entries, for both memory use and +// lookup time. Client should call this after all entries have been inserted; +// inserting more entries is legal, but will likely require a table resize. +void upb_inttable_compact(upb_inttable *t); + +// A special-case inlinable version of the lookup routine for 32-bit integers. +INLINE upb_value *upb_inttable_lookup32(const upb_inttable *t, uint32_t key) { + if (key < t->array_size) { + upb_value *v = &t->array[key]; + return upb_arrhas(*v) ? v : NULL; } - uint32_t bucket = _upb_inttable_bucket(t, key); - upb_inttable_entry *e = - (upb_inttable_entry*)UPB_INDEX(t->t.entries, bucket, entry_size); - while (1) { - if (e->hdr.key == key) { - return &e->val; - } - if ((bucket = e->hdr.next) == UPB_END_OF_CHAIN) return NULL; - e = (upb_inttable_entry*)UPB_INDEX(t->t.entries, bucket, entry_size); + for (upb_tabent *e = upb_inthash(&t->t, upb_intkey(key)); true; e = e->next) { + if ((uint32_t)e->key.num == key) return &e->val; + if (e->next == NULL) return NULL; } } -INLINE size_t _upb_inttable_entrysize(size_t value_size) { - return upb_align_up(sizeof(upb_inttable_header) + value_size, 8); -} - -INLINE void *upb_inttable_fastlookup(const upb_inttable *t, uint32_t key, - uint32_t value_size) { - return _upb_inttable_fastlookup( - t, key, _upb_inttable_entrysize(value_size), value_size); -} - -INLINE void *upb_inttable_lookup(upb_inttable *t, uint32_t key) { - return _upb_inttable_fastlookup(t, key, t->t.entry_size, t->t.value_size); -} - -void *upb_strtable_lookupl(const upb_strtable *t, const char *key, size_t len); -void *upb_strtable_lookup(const upb_strtable *t, const char *key); - /* upb_strtable_iter **********************************************************/ // Strtable iteration. Order is undefined. Insertions invalidate iterators. // upb_strtable_iter i; -// for(upb_strtable_begin(&i, t); !upb_strtable_done(&i); upb_strtable_next(&i)) { +// upb_strtable_begin(&i, t); +// for(; !upb_strtable_done(&i); upb_strtable_next(&i)) { // const char *key = upb_strtable_iter_key(&i); // const myval *val = upb_strtable_iter_value(&i); // // ... // } typedef struct { const upb_strtable *t; - upb_strtable_entry *e; + upb_tabent *e; } upb_strtable_iter; void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t); void upb_strtable_next(upb_strtable_iter *i); INLINE bool upb_strtable_done(upb_strtable_iter *i) { return i->e == NULL; } INLINE const char *upb_strtable_iter_key(upb_strtable_iter *i) { - return i->e->hdr.key; + return i->e->key.str; } -INLINE const void *upb_strtable_iter_value(upb_strtable_iter *i) { - return &i->e->val; +INLINE upb_value upb_strtable_iter_value(upb_strtable_iter *i) { + return i->e->val; } /* upb_inttable_iter **********************************************************/ // Inttable iteration. Order is undefined. Insertions invalidate iterators. -// for(upb_inttable_iter i = upb_inttable_begin(t); !upb_inttable_done(i); -// i = upb_inttable_next(t, i)) { +// upb_inttable_iter i; +// upb_inttable_begin(&i, t); +// for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { // // ... // } typedef struct { - uint32_t key; - upb_inttable_value *value; + const upb_inttable *t; + union { + upb_tabent *ent; // For hash iteration. + upb_value *val; // For array iteration. + } ptr; + uintptr_t arrkey; bool array_part; } upb_inttable_iter; -upb_inttable_iter upb_inttable_begin(const upb_inttable *t); -upb_inttable_iter upb_inttable_next(const upb_inttable *t, - upb_inttable_iter iter); -INLINE bool upb_inttable_done(upb_inttable_iter iter) { - return iter.value == NULL; +void upb_inttable_begin(upb_inttable_iter *i, const upb_inttable *t); +void upb_inttable_next(upb_inttable_iter *i); +INLINE bool upb_inttable_done(upb_inttable_iter *i) { + return i->ptr.ent == NULL; } -INLINE uint32_t upb_inttable_iter_key(upb_inttable_iter iter) { - return iter.key; +INLINE uintptr_t upb_inttable_iter_key(upb_inttable_iter *i) { + return i->array_part ? i->arrkey : i->ptr.ent->key.num; } -INLINE void *upb_inttable_iter_value(upb_inttable_iter iter) { - return iter.value; +INLINE upb_value upb_inttable_iter_value(upb_inttable_iter *i) { + return i->array_part ? *i->ptr.val : i->ptr.ent->val; } #ifdef __cplusplus diff --git a/upb/upb.c b/upb/upb.c index 3af9b752d8..c172bd3c80 100644 --- a/upb/upb.c +++ b/upb/upb.c @@ -1,47 +1,17 @@ /* * upb - a minimalist implementation of protocol buffers. * - * Copyright (c) 2009 Google Inc. See LICENSE for details. + * Copyright (c) 2009-2012 Google Inc. See LICENSE for details. * Author: Josh Haberman */ #include #include #include +#include #include #include -#include "upb/descriptor_const.h" #include "upb/upb.h" -#include "upb/bytestream.h" - -#define alignof(t) offsetof(struct { char c; t x; }, x) -#define TYPE_INFO(wire_type, ctype, inmemory_type, is_numeric) \ - {alignof(ctype), sizeof(ctype), wire_type, UPB_TYPE(inmemory_type), \ - #ctype, is_numeric}, - -const upb_type_info upb_types[] = { - // END_GROUP is not real, but used to signify the pseudo-field that - // ends a group from within the group. - TYPE_INFO(UPB_WIRE_TYPE_END_GROUP, void*, MESSAGE, false) // ENDGROUP - TYPE_INFO(UPB_WIRE_TYPE_64BIT, double, DOUBLE, true) // DOUBLE - TYPE_INFO(UPB_WIRE_TYPE_32BIT, float, FLOAT, true) // FLOAT - TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, INT64, true) // INT64 - TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint64_t, UINT64, true) // UINT64 - TYPE_INFO(UPB_WIRE_TYPE_VARINT, int32_t, INT32, true) // INT32 - TYPE_INFO(UPB_WIRE_TYPE_64BIT, uint64_t, UINT64, true) // FIXED64 - TYPE_INFO(UPB_WIRE_TYPE_32BIT, uint32_t, UINT32, true) // FIXED32 - TYPE_INFO(UPB_WIRE_TYPE_VARINT, bool, BOOL, true) // BOOL - TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, STRING, false) // STRING - TYPE_INFO(UPB_WIRE_TYPE_START_GROUP, void*, MESSAGE, false) // GROUP - TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, MESSAGE, false) // MESSAGE - TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, STRING, false) // BYTES - TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint32_t, UINT32, true) // UINT32 - TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint32_t, INT32, true) // ENUM - TYPE_INFO(UPB_WIRE_TYPE_32BIT, int32_t, INT32, true) // SFIXED32 - TYPE_INFO(UPB_WIRE_TYPE_64BIT, int64_t, INT64, true) // SFIXED64 - TYPE_INFO(UPB_WIRE_TYPE_VARINT, int32_t, INT32, true) // SINT32 - TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, INT64, true) // SINT64 -}; #ifdef NDEBUG upb_value UPB_NO_VALUE = {{0}}; @@ -142,8 +112,9 @@ bool upb_errno_is_wouldblock() { bool upb_posix_codetostr(int code, char *buf, size_t len) { if (strerror_r(code, buf, len) == -1) { if (errno == EINVAL) { - int n = snprintf(buf, len, "Invalid POSIX error number %d\n", code); - return n >= (int)len; + size_t actual_len = + snprintf(buf, len, "Invalid POSIX error number %d\n", code); + return actual_len >= len; } else if (errno == ERANGE) { return false; } diff --git a/upb/upb.h b/upb/upb.h index 01970ca8bc..ef440fb733 100644 --- a/upb/upb.h +++ b/upb/upb.h @@ -15,9 +15,6 @@ #include #include #include -#include -#include "descriptor_const.h" -#include "atomic.h" #ifdef __cplusplus extern "C" { @@ -36,20 +33,6 @@ extern "C" { #define UPB_MAX(x, y) ((x) > (y) ? (x) : (y)) #define UPB_MIN(x, y) ((x) < (y) ? (x) : (y)) -#define UPB_INDEX(base, i, m) (void*)((char*)(base) + ((i)*(m))) - -INLINE void nop_printf(const char *fmt, ...) { (void)fmt; } - -#ifdef NDEBUG -#define DEBUGPRINTF nop_printf -#else -#define DEBUGPRINTF printf -#endif - -// Rounds val up to the next multiple of align. -INLINE uint32_t upb_align_up(uint32_t val, uint32_t align) { - return val % align == 0 ? val : val + align - (val % align); -} // The maximum that any submessages can be nested. Matches proto2's limit. // At the moment this specifies the size of several statically-sized arrays @@ -94,73 +77,46 @@ INLINE uint32_t upb_align_up(uint32_t val, uint32_t align) { #define UPB_MAX_TYPE_DEPTH 64 -/* Fundamental types and type constants. **************************************/ - -// A list of types as they are encoded on-the-wire. -enum upb_wire_type { - UPB_WIRE_TYPE_VARINT = 0, - UPB_WIRE_TYPE_64BIT = 1, - UPB_WIRE_TYPE_DELIMITED = 2, - UPB_WIRE_TYPE_START_GROUP = 3, - UPB_WIRE_TYPE_END_GROUP = 4, - UPB_WIRE_TYPE_32BIT = 5, -}; - -// Type of a field as defined in a .proto file. eg. string, int32, etc. The -// integers that represent this are defined by descriptor.proto. Note that -// descriptor.proto reserves "0" for errors, and we use it to represent -// exceptional circumstances. -typedef uint8_t upb_fieldtype_t; - -// For referencing the type constants tersely. -#define UPB_TYPE(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_TYPE_ ## type -#define UPB_LABEL(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_LABEL_ ## type - -// Info for a given field type. -typedef struct { - uint8_t align; - uint8_t size; - uint8_t native_wire_type; - uint8_t inmemory_type; // For example, INT32, SINT32, and SFIXED32 -> INT32 - const char *ctype; - bool is_numeric; // Only numeric types can be packed. -} upb_type_info; - -// A static array of info about all of the field types, indexed by type number. -extern const upb_type_info upb_types[]; - - /* upb_value ******************************************************************/ +// Clients should not need to access these enum values; they are used internally +// to do typechecks of upb_value accesses. +typedef enum { + UPB_CTYPE_INT32 = 1, + UPB_CTYPE_INT64 = 2, + UPB_CTYPE_UINT32 = 3, + UPB_CTYPE_UINT64 = 4, + UPB_CTYPE_DOUBLE = 5, + UPB_CTYPE_FLOAT = 6, + UPB_CTYPE_BOOL = 7, + UPB_CTYPE_PTR = 8, + UPB_CTYPE_BYTEREGION = 9, + UPB_CTYPE_FIELDDEF = 10, +} upb_ctype_t; + struct _upb_byteregion; struct _upb_fielddef; -// Special constants for the upb_value.type field. These must not conflict -// with any members of FieldDescriptorProto.Type. -#define UPB_TYPE_ENDGROUP 0 -#define UPB_VALUETYPE_FIELDDEF 32 -#define UPB_VALUETYPE_PTR 33 - // A single .proto value. The owner must have an out-of-band way of knowing // the type, so that it knows which union member to use. typedef struct { union { uint64_t uint64; - double _double; - float _float; int32_t int32; int64_t int64; uint32_t uint32; + double _double; + float _float; bool _bool; + void *_void; struct _upb_byteregion *byteregion; const struct _upb_fielddef *fielddef; - void *_void; } val; #ifndef NDEBUG // In debug mode we carry the value type around also so we can check accesses // to be sure the right member is being read. - char type; + upb_ctype_t type; #endif } upb_value; @@ -185,7 +141,7 @@ typedef struct { return val.val.membername; \ } \ INLINE void upb_value_set ## name(upb_value *val, ctype cval) { \ - memset(val, 0, sizeof(*val)); \ + val->val.uint64 = 0; \ SET_TYPE(val->type, proto_type); \ val->val.membername = cval; \ } \ @@ -195,21 +151,23 @@ typedef struct { return ret; \ } -UPB_VALUE_ACCESSORS(double, _double, double, UPB_TYPE(DOUBLE)); -UPB_VALUE_ACCESSORS(float, _float, float, UPB_TYPE(FLOAT)); -UPB_VALUE_ACCESSORS(int32, int32, int32_t, UPB_TYPE(INT32)); -UPB_VALUE_ACCESSORS(int64, int64, int64_t, UPB_TYPE(INT64)); -UPB_VALUE_ACCESSORS(uint32, uint32, uint32_t, UPB_TYPE(UINT32)); -UPB_VALUE_ACCESSORS(uint64, uint64, uint64_t, UPB_TYPE(UINT64)); -UPB_VALUE_ACCESSORS(bool, _bool, bool, UPB_TYPE(BOOL)); -UPB_VALUE_ACCESSORS(ptr, _void, void*, UPB_VALUETYPE_PTR); +UPB_VALUE_ACCESSORS(int32, int32, int32_t, UPB_CTYPE_INT32); +UPB_VALUE_ACCESSORS(int64, int64, int64_t, UPB_CTYPE_INT64); +UPB_VALUE_ACCESSORS(uint32, uint32, uint32_t, UPB_CTYPE_UINT32); +UPB_VALUE_ACCESSORS(uint64, uint64, uint64_t, UPB_CTYPE_UINT64); +UPB_VALUE_ACCESSORS(double, _double, double, UPB_CTYPE_DOUBLE); +UPB_VALUE_ACCESSORS(float, _float, float, UPB_CTYPE_FLOAT); +UPB_VALUE_ACCESSORS(bool, _bool, bool, UPB_CTYPE_BOOL); +UPB_VALUE_ACCESSORS(ptr, _void, void*, UPB_CTYPE_PTR); UPB_VALUE_ACCESSORS(byteregion, byteregion, struct _upb_byteregion*, - UPB_TYPE(STRING)); + UPB_CTYPE_BYTEREGION); // upb_fielddef should never be modified from a callback // (ie. when they're getting passed through a upb_value). UPB_VALUE_ACCESSORS(fielddef, fielddef, const struct _upb_fielddef*, - UPB_VALUETYPE_FIELDDEF); + UPB_CTYPE_FIELDDEF); + +#undef UPB_VALUE_ACCESSORS extern upb_value UPB_NO_VALUE; @@ -262,7 +220,7 @@ void upb_status_copy(upb_status *to, const upb_status *from); extern upb_errorspace upb_posix_errorspace; void upb_status_fromerrno(upb_status *status); -bool upb_errno_is_wouldblock(void); +bool upb_errno_is_wouldblock(); // Like vasprintf (which allocates a string large enough for the result), but // uses *buf (which can be NULL) as a starting point and reallocates it only if