Ported upb to C89, for greater portability.

A large part of this change contains surface-level
porting, like moving variable declarations to the
top of the block.

However there are a few more substantial things too:

- moved internal-only struct definitions to a separate
  file (structdefs.int.h), for greater encapsulation
  and ABI compatibility.

- removed the UPB_UPCAST macro, since it requires access
  to the internal-only struct definitions.  Replaced uses
  with calls to inline, type-safe casting functions.

- removed the UPB_DEFINE_CLASS/UPB_DEFINE_STRUCT macros.
  Class and struct definitions are now more explicit -- you
  get to see the actual class/struct keywords in the source.
  The casting convenience functions have been moved into
  UPB_DECLARE_DERIVED_TYPE() and UPB_DECLARE_DERIVED_TYPE2().

- the new way that we duplicate base methods in derived types
  is also more convenient and requires less duplication.
  It is also less greppable, but hopefully that is not
  too big a problem.

Compiler flags (-std=c89 -pedantic) should help to rigorously
enforce that the code is free of C99-isms.

A few functions are not available in C89 (strtoll).  There
are temporary, hacky solutions in place.
pull/13171/head
Josh Haberman 10 years ago
parent 6650b3c652
commit 919fea438a
  1. 58
      Makefile
  2. 4
      tests/pb/test_decoder.cc
  3. 49
      tests/pb/test_varint.c
  4. 204
      tests/test_def.c
  5. 6
      tests/test_handlers.c
  6. 24
      tests/test_util.h
  7. 78
      tools/dump_cinit.lua
  8. 42
      travis.sh
  9. 5
      upb/bindings/googlepb/proto2.cc
  10. 523
      upb/bindings/lua/upb.c
  11. 82
      upb/bindings/lua/upb.h
  12. 44
      upb/bindings/lua/upb/pb.c
  13. 62
      upb/bindings/lua/upb/table.c
  14. 486
      upb/def.c
  15. 973
      upb/def.h
  16. 1041
      upb/descriptor/descriptor.upb.c
  17. 218
      upb/descriptor/descriptor.upb.h
  18. 212
      upb/descriptor/reader.c
  19. 56
      upb/descriptor/reader.h
  20. 67
      upb/env.c
  21. 153
      upb/env.h
  22. 391
      upb/handlers-inl.h
  23. 251
      upb/handlers.c
  24. 687
      upb/handlers.h
  25. 562
      upb/json/parser.c
  26. 25
      upb/json/parser.h
  27. 490
      upb/json/parser.rl
  28. 226
      upb/json/printer.c
  29. 22
      upb/json/printer.h
  30. 320
      upb/pb/compile_decoder.c
  31. 336
      upb/pb/compile_decoder_x64.c
  32. 180
      upb/pb/compile_decoder_x64.dasc
  33. 1850
      upb/pb/compile_decoder_x64.h
  34. 339
      upb/pb/decoder.c
  35. 266
      upb/pb/decoder.h
  36. 308
      upb/pb/decoder.int.h
  37. 199
      upb/pb/encoder.c
  38. 26
      upb/pb/encoder.h
  39. 39
      upb/pb/glue.c
  40. 26
      upb/pb/glue.h
  41. 64
      upb/pb/textprinter.c
  42. 20
      upb/pb/textprinter.h
  43. 81
      upb/pb/varint.c
  44. 86
      upb/pb/varint.int.h
  45. 399
      upb/refcounted.c
  46. 206
      upb/refcounted.h
  47. 8
      upb/shim/shim.c
  48. 24
      upb/shim/shim.h
  49. 362
      upb/sink.h
  50. 176
      upb/structdefs.int.h
  51. 214
      upb/symtab.c
  52. 213
      upb/symtab.h
  53. 305
      upb/table.c
  54. 391
      upb/table.int.h
  55. 10
      upb/upb.c
  56. 320
      upb/upb.h

@ -43,19 +43,23 @@ WITH_JIT=no
UPB_FAIL_WARNINGS?=no
# Basic compiler/flag setup.
# We are C89/C++98, with the one exception that we need stdint and "long long."
CC?=cc
CXX?=c++
CFLAGS=-std=c99
CXXFLAGS=-Wno-unused-private-field
CFLAGS=
CXXFLAGS=
INCLUDE=-I.
CSTD=-std=c89 -pedantic -Wno-long-long
CXXSTD=-std=c++98 -pedantic -Wno-long-long
WARNFLAGS=-Wall -Wextra -Wpointer-arith
WARNFLAGS_CXX=-Wall -Wextra -Wpointer-arith
WARNFLAGS_CXX=$(WARNFLAGS) -Wno-unused-private-field
CPPFLAGS=$(INCLUDE) -DNDEBUG $(USER_CPPFLAGS)
LUA=lua # 5.1 and 5.2 should both be supported
ifneq ($(WITH_JIT), no)
USE_JIT=true
CPPFLAGS += -DUPB_USE_JIT_X64
EXTRA_LIBS += -ldl
endif
ifeq ($(CC), clang)
@ -168,11 +172,16 @@ upb_pb_SRCS = \
# If Lua is present we can use DynASM to regenerate the .h file.
ifdef USE_JIT
upb_pb_SRCS += upb/pb/compile_decoder_x64.c
obj/upb/pb/compile_decoder_x64.o obj/upb/pb/compile_decoder_x64.lo: upb/pb/compile_decoder_x64.h
# The JIT can't compile with -Wpedantic, since it does some inherently
# platform-specific things like casting between data pointers and function
# pointers. Also DynASM emits some GNU extensions.
obj/upb/pb/compile_decoder_x64.o : CSTD = -std=gnu89
obj/upb/pb/compile_decoder_x64.lo : CSTD = -std=gnu89
upb/pb/compile_decoder_x64.h: upb/pb/compile_decoder_x64.dasc
$(E) DYNASM $<
$(Q) $(LUA) dynasm/dynasm.lua upb/pb/compile_decoder_x64.dasc > upb/pb/compile_decoder_x64.h || (rm upb/pb/compile_decoder_x64.h ; false)
$(Q) $(LUA) dynasm/dynasm.lua -c upb/pb/compile_decoder_x64.dasc > upb/pb/compile_decoder_x64.h || (rm upb/pb/compile_decoder_x64.h ; false)
endif
upb_json_SRCS = \
@ -221,19 +230,19 @@ $(UPB_LIBS): lib/lib%.a: $(call make_objs,o)
obj/upb/%.o: upb/%.c | $$(@D)/.
$(E) CC $<
$(Q) $(CC) $(OPT) $(WARNFLAGS) $(CPPFLAGS) $(CFLAGS) -c -o $@ $<
$(Q) $(CC) $(OPT) $(CSTD) $(WARNFLAGS) $(CPPFLAGS) $(CFLAGS) -c -o $@ $<
obj/upb/%.o: upb/%.cc | $$(@D)/.
$(E) CXX $<
$(Q) $(CXX) $(OPT) $(WARNFLAGS_CXX) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $<
$(Q) $(CXX) $(OPT) $(CXXSTD) $(WARNFLAGS_CXX) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $<
obj/upb/%.lo: upb/%.c | $$(@D)/.
$(E) 'CC -fPIC' $<
$(Q) $(CC) $(OPT) $(WARNFLAGS) $(CPPFLAGS) $(CFLAGS) -c -o $@ $< -fPIC
$(Q) $(CC) $(OPT) $(CSTD) $(WARNFLAGS) $(CPPFLAGS) $(CFLAGS) -c -o $@ $< -fPIC
obj/upb/%.lo: upb/%.cc | $$(@D)/.
$(E) CXX -fPIC $<
$(Q) $(CXX) $(OPT) $(WARNFLAGS_CXX) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $< -fPIC
$(Q) $(CXX) $(OPT) $(CXXSTD) $(WARNFLAGS_CXX) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $< -fPIC
# Note: mkdir -p is technically susceptible to races when used with make -j.
%/.:
@ -246,7 +255,7 @@ upb/descriptor/descriptor.pb: upb/descriptor/descriptor.proto
genfiles: upb/descriptor/descriptor.pb tools/upbc
./tools/upbc upb/descriptor/descriptor.pb upb/descriptor/descriptor google_protobuf_descriptor
lua dynasm/dynasm.lua upb/pb/compile_decoder_x64.dasc > upb/pb/compile_decoder_x64.h || (rm upb/pb/compile_decoder_x64.h ; false)
$(LUA) dynasm/dynasm.lua -c upb/pb/compile_decoder_x64.dasc > upb/pb/compile_decoder_x64.h || (rm upb/pb/compile_decoder_x64.h ; false)
# upbc depends on these Lua extensions.
UPBC_LUA_EXTS = \
@ -288,28 +297,28 @@ tests: $(TESTS)
tests/testmain.o: tests/testmain.cc
$(E) CXX $<
$(Q) $(CXX) $(OPT) $(WARNFLAGS_CXX) $(CXXFLAGS) $(CPPFLAGS) -c -o $@ $<
$(Q) $(CXX) $(OPT) $(CXXSTD) $(WARNFLAGS_CXX) $(CXXFLAGS) $(CPPFLAGS) -c -o $@ $<
$(C_TESTS): % : %.c tests/testmain.o $$(LIBS)
$(E) CC $<
$(Q) $(CC) $(OPT) $(WARNFLAGS) $(CPPFLAGS) $(CFLAGS) -o $@ tests/testmain.o $< $(LIBS)
$(Q) $(CC) $(OPT) $(CSTD) $(WARNFLAGS) $(CPPFLAGS) $(CFLAGS) -o $@ tests/testmain.o $< $(LIBS)
$(CC_TESTS): % : %.cc tests/testmain.o $$(LIBS)
$(E) CXX $<
$(Q) $(CXX) $(OPT) $(WARNFLAGS_CXX) $(CPPFLAGS) $(CXXFLAGS) -Wno-deprecated -o $@ tests/testmain.o $< $(LIBS)
$(Q) $(CXX) $(OPT) $(CXXSTD) $(WARNFLAGS_CXX) $(CPPFLAGS) $(CXXFLAGS) -Wno-deprecated -o $@ tests/testmain.o $< $(LIBS)
# Several of these tests don't actually test these libs, but use them
# incidentally to load a descriptor
LOAD_DESCRIPTOR_LIBS = lib/libupb.pb.a lib/libupb.descriptor.a
# Specify which libs each test depends on.
tests/pb/test_varint: LIBS = lib/libupb.pb.a lib/libupb.a
tests/test_def: LIBS = $(LOAD_DESCRIPTOR_LIBS) lib/libupb.a
tests/test_handlers: LIBS = lib/libupb.descriptor.a lib/libupb.a
tests/pb/test_decoder: LIBS = lib/libupb.pb.a lib/libupb.a
tests/test_cpp: LIBS = $(LOAD_DESCRIPTOR_LIBS) lib/libupb.a
tests/test_table: LIBS = lib/libupb.a
tests/json/test_json: LIBS = lib/libupb.a lib/libupb.json.a
tests/pb/test_varint: LIBS = lib/libupb.pb.a lib/libupb.a $(EXTRA_LIBS)
tests/test_def: LIBS = $(LOAD_DESCRIPTOR_LIBS) lib/libupb.a $(EXTRA_LIBS)
tests/test_handlers: LIBS = lib/libupb.descriptor.a lib/libupb.a $(EXTRA_LIBS)
tests/pb/test_decoder: LIBS = lib/libupb.pb.a lib/libupb.a $(EXTRA_LIBS)
tests/test_cpp: LIBS = $(LOAD_DESCRIPTOR_LIBS) lib/libupb.a $(EXTRA_LIBS)
tests/test_table: LIBS = lib/libupb.a $(EXTRA_LIBS)
tests/json/test_json: LIBS = lib/libupb.a lib/libupb.json.a $(EXTRA_LIBS)
tests/test_def: tests/test.proto.pb
@ -382,7 +391,8 @@ GOOGLEPB_TEST_LIBS = \
lib/libupb.bindings.googlepb.a \
lib/libupb.pb.a \
lib/libupb.descriptor.a \
lib/libupb.a
lib/libupb.a \
$(EXTRA_LIBS)
GOOGLEPB_TEST_DEPS = \
tests/bindings/googlepb/test_vs_proto2.cc \
@ -456,15 +466,15 @@ LUA_LIB_DEPS = \
upb/bindings/lua/upb_c.so: upb/bindings/lua/upb.c $(LUA_LIB_DEPS)
$(E) CC upb/bindings/lua/upb.c
$(Q) $(CC) $(OPT) $(WARNFLAGS) $(CPPFLAGS) $(CFLAGS) -fpic -shared -o $@ $< $(LUA_LDFLAGS) $(LUA_LIB_DEPS)
$(Q) $(CC) $(OPT) $(CSTD) $(WARNFLAGS) $(CPPFLAGS) $(CFLAGS) -fpic -shared -o $@ $< $(LUA_LDFLAGS) $(LUA_LIB_DEPS)
upb/bindings/lua/upb/table_c.so: upb/bindings/lua/upb/table.c lib/libupb_pic.a
$(E) CC upb/bindings/lua/upb/table.c
$(Q) $(CC) $(OPT) $(WARNFLAGS) $(CPPFLAGS) $(CFLAGS) -fpic -shared -o $@ $< $(LUA_LDFLAGS)
$(Q) $(CC) $(OPT) $(CSTD) $(WARNFLAGS) $(CPPFLAGS) $(CFLAGS) -fpic -shared -o $@ $< $(LUA_LDFLAGS)
upb/bindings/lua/upb/pb_c.so: upb/bindings/lua/upb/pb.c $(LUA_LIB_DEPS)
$(E) CC upb/bindings/lua/upb/pb.c
$(Q) $(CC) $(OPT) $(WARNFLAGS) $(CPPFLAGS) $(CFLAGS) -fpic -shared -o $@ $< $(LUA_LDFLAGS)
$(Q) $(CC) $(OPT) $(CSTD) $(WARNFLAGS) $(CPPFLAGS) $(CFLAGS) -fpic -shared -o $@ $< $(LUA_LDFLAGS)
# Python extension #############################################################

@ -111,7 +111,7 @@ using std::string;
void vappendf(string* str, const char *format, va_list args) {
va_list copy;
va_copy(copy, args);
__va_copy(copy, args);
int count = vsnprintf(NULL, 0, format, args);
if (count >= 0)
@ -577,7 +577,7 @@ void run_decoder(const string& proto, const string* expected_output) {
fprintf(stderr, "RUNNING TEST CASE, hash=%x\n", testhash);
fprintf(stderr, "JIT on: %s\n",
global_method->is_native() ? "true" : "false");
fprintf(stderr, "Input (len=%zu): ", proto.size());
fprintf(stderr, "Input (len=%u): ", (unsigned)proto.size());
PrintBinary(proto);
fprintf(stderr, "\n");
if (expected_output) {

@ -8,25 +8,30 @@
#include "upb/pb/varint.int.h"
#include "tests/upb_test.h"
// Test that we can round-trip from int->varint->int.
/* Test that we can round-trip from int->varint->int. */
static void test_varint_for_num(upb_decoderet (*decoder)(const char*),
uint64_t num) {
char buf[16];
size_t bytes;
upb_decoderet r;
memset(buf, 0xff, sizeof(buf));
size_t bytes = upb_vencode64(num, buf);
bytes = upb_vencode64(num, buf);
if (num <= UINT32_MAX) {
uint64_t encoded = upb_vencode32(num);
char buf2[16];
upb_decoderet r;
memset(buf2, 0, sizeof(buf2));
uint64_t encoded = upb_vencode32(num);
memcpy(&buf2, &encoded, 8);
upb_decoderet r = decoder(buf2);
r = decoder(buf2);
ASSERT(r.val == num);
ASSERT(r.p == buf2 + upb_value_size(encoded));
ASSERT(upb_zzenc_32(upb_zzdec_32(num)) == num);
}
upb_decoderet r = decoder(buf);
r = decoder(buf);
ASSERT(r.val == num);
ASSERT(r.p == buf + bytes);
ASSERT(upb_zzenc_64(upb_zzdec_64(num)) == num);
@ -36,13 +41,23 @@ static void test_varint_decoder(upb_decoderet (*decoder)(const char*)) {
#define TEST(bytes, expected_val) {\
size_t n = sizeof(bytes) - 1; /* for NULL */ \
char buf[UPB_PB_VARINT_MAX_LEN]; \
upb_decoderet r; \
memset(buf, 0xff, sizeof(buf)); \
memcpy(buf, bytes, n); \
upb_decoderet r = decoder(buf); \
r = decoder(buf); \
ASSERT(r.val == expected_val); \
ASSERT(r.p == buf + n); \
}
uint64_t num;
char twelvebyte[16] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1};
const char *twelvebyte_buf = twelvebyte;
/* A varint that terminates before hitting the end of the provided buffer,
* but in too many bytes (11 instead of 10). */
upb_decoderet r = decoder(twelvebyte_buf);
ASSERT(r.p == NULL);
TEST("\x00", 0ULL);
TEST("\x01", 1ULL);
TEST("\x81\x14", 0xa01ULL);
@ -57,16 +72,7 @@ static void test_varint_decoder(upb_decoderet (*decoder)(const char*)) {
TEST("\x81\x83\x87\x8f\x9f\xbf\xff\x81\x83\x07", 0x8303fdf9f1e1c181ULL);
#undef TEST
char twelvebyte[16] = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x01, 0x01};
const char *twelvebyte_buf = twelvebyte;
// A varint that terminates before hitting the end of the provided buffer,
// but in too many bytes (11 instead of 10).
upb_decoderet r = decoder(twelvebyte_buf);
ASSERT(r.p == NULL);
for (uint64_t num = 5; num * 1.5 < UINT64_MAX; num *= 1.5) {
for (num = 5; num * 1.5 < UINT64_MAX; num *= 1.5) {
test_varint_for_num(decoder, num);
}
test_varint_for_num(decoder, 0);
@ -80,16 +86,13 @@ static void test_varint_decoder(upb_decoderet (*decoder)(const char*)) {
return upb_vdecode_ ## decoder(p); \
} \
void test_ ## decoder() { \
printf("Testing varint decoder: " #decoder "..."); \
fflush(stdout); \
test_varint_decoder(&_upb_vdecode_ ## decoder); \
printf("ok.\n"); \
} \
TEST_VARINT_DECODER(check2_branch32);
TEST_VARINT_DECODER(check2_branch64);
TEST_VARINT_DECODER(check2_wright);
TEST_VARINT_DECODER(check2_massimino);
TEST_VARINT_DECODER(check2_branch32)
TEST_VARINT_DECODER(check2_branch64)
TEST_VARINT_DECODER(check2_wright)
TEST_VARINT_DECODER(check2_massimino)
int run_tests(int argc, char *argv[]) {
UPB_UNUSED(argc);

@ -20,23 +20,23 @@ static void test_empty_symtab() {
upb_symtab_iter i;
for (upb_symtab_begin(&i, s, UPB_DEF_ANY); !upb_symtab_done(&i);
upb_symtab_next(&i)) {
ASSERT(false); // Should not get here.
ASSERT(false); /* Should not get here. */
}
upb_symtab_unref(s, &s);
}
static void test_noreftracking() {
// Reftracking is not required; clients can pass UPB_UNTRACKED_REF for owner.
/* Reftracking is not required; clients can pass UPB_UNTRACKED_REF for owner. */
upb_msgdef *md = upb_msgdef_new(UPB_UNTRACKED_REF);
upb_msgdef_ref(md, UPB_UNTRACKED_REF);
// Clients can mix tracked and untracked refs.
/* Clients can mix tracked and untracked refs. */
upb_msgdef_ref(md, &md);
upb_msgdef_unref(md, UPB_UNTRACKED_REF);
upb_msgdef_unref(md, UPB_UNTRACKED_REF);
// Call some random function on the messagedef to test that it is alive.
/* Call some random function on the messagedef to test that it is alive. */
ASSERT(!upb_msgdef_isfrozen(md));
upb_msgdef_unref(md, &md);
@ -44,8 +44,8 @@ static void test_noreftracking() {
static upb_symtab *load_test_proto(void *owner) {
upb_symtab *s = upb_symtab_new(owner);
ASSERT(s);
upb_status status = UPB_STATUS_INIT;
ASSERT(s);
if (!upb_load_descriptor_file_into_symtab(s, descriptor_file, &status)) {
fprintf(stderr, "Error loading descriptor file: %s\n",
upb_status_errmsg(&status));
@ -58,60 +58,73 @@ static upb_symtab *load_test_proto(void *owner) {
}
static void test_cycles() {
bool ok;
upb_symtab *s = load_test_proto(&s);
// Test cycle detection by making a cyclic def's main refcount go to zero
// and then be incremented to one again.
const upb_def *def = upb_symtab_lookup(s, "A");
const upb_msgdef *m;
const upb_fielddef *f;
const upb_def *def;
const upb_def *def2;
/* Test cycle detection by making a cyclic def's main refcount go to zero
* and then be incremented to one again. */
def = upb_symtab_lookup(s, "A");
upb_def_ref(def, &def);
ASSERT(def);
ASSERT(upb_def_isfrozen(def));
upb_symtab_unref(s, &s);
// Message A has only one subfield: "optional B b = 1".
const upb_msgdef *m = upb_downcast_msgdef(def);
const upb_fielddef *f = upb_msgdef_itof(m, 1);
/* Message A has only one subfield: "optional B b = 1". */
m = upb_downcast_msgdef(def);
f = upb_msgdef_itof(m, 1);
ASSERT(f);
ASSERT(upb_fielddef_hassubdef(f));
ASSERT(upb_msgdef_ntofz(m, "b") == f);
ASSERT(upb_msgdef_ntof(m, "b", 1) == f);
const upb_def *def2 = upb_fielddef_subdef(f);
def2 = upb_fielddef_subdef(f);
ASSERT(upb_downcast_msgdef(def2));
ASSERT(strcmp(upb_def_fullname(def2), "B") == 0);
ok = strcmp(upb_def_fullname(def2), "B") == 0;
ASSERT(ok);
upb_def_ref(def2, &def2);
upb_def_unref(def, &def);
// We know "def" is still alive because it's reachable from def2.
ASSERT(strcmp(upb_def_fullname(def), "A") == 0);
/* We know "def" is still alive because it's reachable from def2. */
ok = strcmp(upb_def_fullname(def), "A") == 0;
ASSERT(ok);
upb_def_unref(def2, &def2);
}
static void test_symbol_resolution() {
upb_status s = UPB_STATUS_INIT;
upb_def *defs[2];
upb_msgdef *m1;
upb_msgdef *m2;
upb_msgdef *m3;
upb_fielddef *m3_field1;
upb_fielddef *m3_field2;
upb_symtab *symtab = upb_symtab_new(&symtab);
ASSERT(symtab);
// m1 has name "A.B.C" and no fields. We'll add it to the symtab now.
upb_msgdef *m1 = upb_msgdef_new(&m1);
/* m1 has name "A.B.C" and no fields. We'll add it to the symtab now. */
m1 = upb_msgdef_new(&m1);
ASSERT(m1);
ASSERT_STATUS(upb_msgdef_setfullname(m1, "A.B.C", &s), &s);
ASSERT_STATUS(upb_symtab_add(symtab, (upb_def**)&m1, 1,
NULL, &s), &s);
// m2 has name "D.E" and no fields. We'll add it in the same batch as m3
// below.
upb_msgdef *m2 = upb_msgdef_new(&m2);
/* m2 has name "D.E" and no fields. We'll add it in the same batch as m3
* below. */
m2 = upb_msgdef_new(&m2);
ASSERT(m2);
ASSERT_STATUS(upb_msgdef_setfullname(m2, "D.E", &s), &s);
// m3 has name "F.G" and two fields, of type A.B.C and D.E respectively. We'll
// add it in the same batch as m2 above.
upb_msgdef *m3 = upb_msgdef_new(&m3);
/* m3 has name "F.G" and two fields, of type A.B.C and D.E respectively. We'll
* add it in the same batch as m2 above. */
m3 = upb_msgdef_new(&m3);
ASSERT(m3);
ASSERT_STATUS(upb_msgdef_setfullname(m3, "F.G", &s), &s);
upb_fielddef *m3_field1 = upb_fielddef_new(&m3_field1);
m3_field1 = upb_fielddef_new(&m3_field1);
ASSERT_STATUS(upb_fielddef_setname(m3_field1, "field1", &s), &s);
ASSERT_STATUS(upb_fielddef_setnumber(m3_field1, 1, &s), &s);
upb_fielddef_setlabel(m3_field1, UPB_LABEL_OPTIONAL);
@ -119,7 +132,7 @@ static void test_symbol_resolution() {
ASSERT_STATUS(upb_fielddef_setsubdefname(m3_field1, ".A.B.C", &s), &s);
ASSERT_STATUS(upb_msgdef_addfield(m3, m3_field1, NULL, &s), &s);
upb_fielddef *m3_field2 = upb_fielddef_new(&m3_field2);
m3_field2 = upb_fielddef_new(&m3_field2);
ASSERT_STATUS(upb_fielddef_setname(m3_field2, "field2", &s), &s);
ASSERT_STATUS(upb_fielddef_setnumber(m3_field2, 2, &s), &s);
upb_fielddef_setlabel(m3_field2, UPB_LABEL_OPTIONAL);
@ -127,7 +140,8 @@ static void test_symbol_resolution() {
ASSERT_STATUS(upb_fielddef_setsubdefname(m3_field2, ".D.E", &s), &s);
ASSERT_STATUS(upb_msgdef_addfield(m3, m3_field2, NULL, &s), &s);
upb_def *defs[2] = { (upb_def*)m2, (upb_def*)m3 };
defs[0] = upb_msgdef_upcast_mutable(m2);
defs[1] = upb_msgdef_upcast_mutable(m3);
ASSERT_STATUS(upb_symtab_add(symtab, defs, 2, NULL, &s), &s);
upb_fielddef_unref(m3_field2, &m3_field2);
@ -139,22 +153,24 @@ static void test_symbol_resolution() {
}
static void test_fielddef_unref() {
bool ok;
upb_symtab *s = load_test_proto(&s);
const upb_msgdef *md = upb_symtab_lookupmsg(s, "A");
const upb_fielddef *f = upb_msgdef_itof(md, 1);
upb_fielddef_ref(f, &f);
// Unref symtab; now fielddef is the only thing keeping the msgdef alive.
/* Unref symtab; now fielddef is the only thing keeping the msgdef alive. */
upb_symtab_unref(s, &s);
// Check that md is still alive.
ASSERT(strcmp(upb_msgdef_fullname(md), "A") == 0);
/* Check that md is still alive. */
ok = strcmp(upb_msgdef_fullname(md), "A") == 0;
ASSERT(ok);
// Check that unref of fielddef frees the whole remaining graph.
/* Check that unref of fielddef frees the whole remaining graph. */
upb_fielddef_unref(f, &f);
}
static void test_fielddef() {
// Test that we don't leak an unresolved subdef name.
/* Test that we don't leak an unresolved subdef name. */
upb_fielddef *f1 = upb_fielddef_new(&f1);
upb_fielddef_settype(f1, UPB_TYPE_MESSAGE);
ASSERT(upb_fielddef_setsubdefname(f1, "YO", NULL));
@ -189,51 +205,61 @@ static upb_enumdef *upb_enumdef_newnamed(const char *name, void *owner) {
static void test_replacement() {
upb_symtab *s = upb_symtab_new(&s);
upb_enumdef *e2;
upb_msgdef *m2;
upb_enumdef *e;
upb_status status = UPB_STATUS_INIT;
upb_def *newdefs[3];
upb_def *newdefs2[1];
const upb_msgdef *m3;
upb_msgdef *m = upb_msgdef_newnamed("MyMessage", &s);
upb_msgdef_addfield(m, newfield("field1", 1, UPB_TYPE_ENUM,
UPB_LABEL_OPTIONAL, ".MyEnum", &s),
&s, NULL);
upb_msgdef *m2 = upb_msgdef_newnamed("MyMessage2", &s);
upb_enumdef *e = upb_enumdef_newnamed("MyEnum", &s);
upb_status status = UPB_STATUS_INIT;
m2 = upb_msgdef_newnamed("MyMessage2", &s);
e = upb_enumdef_newnamed("MyEnum", &s);
ASSERT_STATUS(upb_enumdef_addval(e, "VAL1", 1, &status), &status);
upb_def *newdefs[] = {UPB_UPCAST(m), UPB_UPCAST(m2), UPB_UPCAST(e)};
newdefs[0] = upb_msgdef_upcast_mutable(m);
newdefs[1] = upb_msgdef_upcast_mutable(m2);
newdefs[2] = upb_enumdef_upcast_mutable(e);
ASSERT_STATUS(upb_symtab_add(s, newdefs, 3, &s, &status), &status);
// Try adding a new definition of MyEnum, MyMessage should get replaced with
// a new version.
upb_enumdef *e2 = upb_enumdef_newnamed("MyEnum", &s);
/* Try adding a new definition of MyEnum, MyMessage should get replaced with
* a new version. */
e2 = upb_enumdef_newnamed("MyEnum", &s);
ASSERT_STATUS(upb_enumdef_addval(e2, "VAL1", 1, &status), &status);
upb_def *newdefs2[] = {UPB_UPCAST(e2)};
newdefs2[0] = upb_enumdef_upcast_mutable(e2);
ASSERT_STATUS(upb_symtab_add(s, newdefs2, 1, &s, &status), &status);
const upb_msgdef *m3 = upb_symtab_lookupmsg(s, "MyMessage");
m3 = upb_symtab_lookupmsg(s, "MyMessage");
ASSERT(m3);
// Must be different because it points to MyEnum which was replaced.
/* Must be different because it points to MyEnum which was replaced. */
ASSERT(m3 != m);
m3 = upb_symtab_lookupmsg(s, "MyMessage2");
// Should be the same because it was not replaced, nor were any defs that
// are reachable from it.
/* Should be the same because it was not replaced, nor were any defs that
* are reachable from it. */
ASSERT(m3 == m2);
upb_symtab_unref(s, &s);
}
static void test_freeze_free() {
// Test that freeze frees defs that were only being kept alive by virtue of
// sharing a group with other defs that are being frozen.
bool ok;
/* Test that freeze frees defs that were only being kept alive by virtue of
* sharing a group with other defs that are being frozen. */
upb_msgdef *m1 = upb_msgdef_newnamed("M1", &m1);
upb_msgdef *m2 = upb_msgdef_newnamed("M2", &m2);
upb_msgdef *m3 = upb_msgdef_newnamed("M3", &m3);
upb_msgdef *m4 = upb_msgdef_newnamed("M4", &m4);
upb_fielddef *f = upb_fielddef_new(&f);
// Freeze M4 and make M1 point to it.
/* Freeze M4 and make M1 point to it. */
upb_def_freeze((upb_def*const*)&m4, 1, NULL);
upb_fielddef *f = upb_fielddef_new(&f);
upb_fielddef_settype(f, UPB_TYPE_MESSAGE);
ASSERT(upb_fielddef_setnumber(f, 1, NULL));
ASSERT(upb_fielddef_setname(f, "foo", NULL));
@ -241,10 +267,10 @@ static void test_freeze_free() {
ASSERT(upb_msgdef_addfield(m1, f, &f, NULL));
// After this unref, M1 is the only thing keeping M4 alive.
/* After this unref, M1 is the only thing keeping M4 alive. */
upb_msgdef_unref(m4, &m4);
// Force M1/M2/M3 into a single mutable refcounting group.
/* Force M1/M2/M3 into a single mutable refcounting group. */
f = upb_fielddef_new(&f);
upb_fielddef_settype(f, UPB_TYPE_MESSAGE);
ASSERT(upb_fielddef_setnumber(f, 1, NULL));
@ -254,40 +280,45 @@ static void test_freeze_free() {
ASSERT(upb_fielddef_setmsgsubdef(f, m2, NULL));
ASSERT(upb_fielddef_setmsgsubdef(f, m3, NULL));
// Make M3 cyclic with itself.
/* Make M3 cyclic with itself. */
ASSERT(upb_msgdef_addfield(m3, f, &f, NULL));
// These will be kept alive since they are in the same refcounting group as
// M3, which still has a ref. Note: this behavior is not guaranteed by the
// API, but true in practice with its current implementation.
/* These will be kept alive since they are in the same refcounting group as
* M3, which still has a ref. Note: this behavior is not guaranteed by the
* API, but true in practice with its current implementation. */
upb_msgdef_unref(m1, &m1);
upb_msgdef_unref(m2, &m2);
// Test that they are still alive (NOT allowed by the API).
ASSERT(strcmp("M1", upb_msgdef_fullname(m1)) == 0);
ASSERT(strcmp("M2", upb_msgdef_fullname(m2)) == 0);
/* Test that they are still alive (NOT allowed by the API). */
ok = strcmp("M1", upb_msgdef_fullname(m1)) == 0;
ASSERT(ok);
ok = strcmp("M2", upb_msgdef_fullname(m2)) == 0;
ASSERT(ok);
// Freeze M3. If the test leaked no memory, then freeing m1 and m2 was
// successful.
/* Freeze M3. If the test leaked no memory, then freeing m1 and m2 was
* successful. */
ASSERT(upb_def_freeze((upb_def*const*)&m3, 1, NULL));
upb_msgdef_unref(m3, &m3);
}
static void test_partial_freeze() {
// Test that freeze of only part of the graph correctly adjusts objects that
// point to the newly-frozen objects.
/* Test that freeze of only part of the graph correctly adjusts objects that
* point to the newly-frozen objects. */
upb_msgdef *m1 = upb_msgdef_newnamed("M1", &m1);
upb_msgdef *m2 = upb_msgdef_newnamed("M2", &m2);
upb_msgdef *m3 = upb_msgdef_newnamed("M3", &m3);
upb_fielddef *f1 = upb_fielddef_new(&f1);
upb_fielddef *f2 = upb_fielddef_new(&f2);
upb_def *defs[2];
defs[0] = upb_msgdef_upcast_mutable(m1);
defs[1] = upb_msgdef_upcast_mutable(m2);
upb_fielddef_settype(f1, UPB_TYPE_MESSAGE);
ASSERT(upb_fielddef_setnumber(f1, 1, NULL));
ASSERT(upb_fielddef_setname(f1, "f1", NULL));
ASSERT(upb_fielddef_setmsgsubdef(f1, m1, NULL));
upb_fielddef *f2 = upb_fielddef_new(&f2);
upb_fielddef_settype(f2, UPB_TYPE_MESSAGE);
ASSERT(upb_fielddef_setnumber(f2, 2, NULL));
ASSERT(upb_fielddef_setname(f2, "f2", NULL));
@ -296,9 +327,8 @@ static void test_partial_freeze() {
ASSERT(upb_msgdef_addfield(m3, f1, &f1, NULL));
ASSERT(upb_msgdef_addfield(m3, f2, &f2, NULL));
// Freeze M1 and M2, which should cause the group to be split
// and m3 (left mutable) to take references on m1 and m2.
upb_def *defs[] = {UPB_UPCAST(m1), UPB_UPCAST(m2)};
/* Freeze M1 and M2, which should cause the group to be split
* and m3 (left mutable) to take references on m1 and m2. */
ASSERT(upb_def_freeze(defs, 2, NULL));
ASSERT(upb_msgdef_isfrozen(m1));
@ -312,13 +342,15 @@ static void test_partial_freeze() {
static void test_descriptor_flags() {
upb_msgdef *m = upb_msgdef_new(&m);
ASSERT(upb_msgdef_mapentry(m) == false);
upb_msgdef *m2;
upb_status s = UPB_STATUS_INIT;
ASSERT(upb_msgdef_mapentry(m) == false);
upb_msgdef_setfullname(m, "TestMessage", &s);
ASSERT(upb_ok(&s));
upb_msgdef_setmapentry(m, true);
ASSERT(upb_msgdef_mapentry(m) == true);
upb_msgdef *m2 = upb_msgdef_dup(m, &m2);
m2 = upb_msgdef_dup(m, &m2);
ASSERT(upb_msgdef_mapentry(m2) == true);
upb_msgdef_unref(m, &m);
upb_msgdef_unref(m2, &m2);
@ -326,10 +358,13 @@ static void test_descriptor_flags() {
static void test_mapentry_check() {
upb_status s = UPB_STATUS_INIT;
upb_msgdef *m = upb_msgdef_new(&m);
upb_msgdef_setfullname(m, "TestMessage", &s);
upb_fielddef *f = upb_fielddef_new(&f);
upb_symtab *symtab = upb_symtab_new(&symtab);
upb_msgdef *subm = upb_msgdef_new(&subm);
upb_def *defs[2];
upb_msgdef_setfullname(m, "TestMessage", &s);
upb_fielddef_setname(f, "field1", &s);
upb_fielddef_setnumber(f, 1, &s);
upb_fielddef_setlabel(f, UPB_LABEL_OPTIONAL);
@ -338,14 +373,13 @@ static void test_mapentry_check() {
upb_msgdef_addfield(m, f, &f, &s);
ASSERT(upb_ok(&s));
upb_msgdef *subm = upb_msgdef_new(&subm);
upb_msgdef_setfullname(subm, "MapEntry", &s);
upb_msgdef_setmapentry(subm, true);
upb_symtab *symtab = upb_symtab_new(&symtab);
upb_def *defs[] = {UPB_UPCAST(m), UPB_UPCAST(subm)};
defs[0] = upb_msgdef_upcast_mutable(m);
defs[1] = upb_msgdef_upcast_mutable(subm);
upb_symtab_add(symtab, defs, 2, NULL, &s);
// Should not have succeeded: non-repeated field pointing to a MapEntry.
/* Should not have succeeded: non-repeated field pointing to a MapEntry. */
ASSERT(!upb_ok(&s));
upb_fielddef_setlabel(f, UPB_LABEL_REPEATED);
@ -360,22 +394,26 @@ static void test_mapentry_check() {
static void test_oneofs() {
upb_status s = UPB_STATUS_INIT;
bool ok = true;
upb_def *subm_defs[1];
upb_symtab *symtab = upb_symtab_new(&symtab);
upb_msgdef *subm = upb_msgdef_newnamed("SubMessage", &symtab);
upb_msgdef *m = upb_msgdef_newnamed("TestMessage", &symtab);
upb_oneofdef *o = upb_oneofdef_new(&o);
const upb_oneofdef *lookup_o;
const upb_fielddef *lookup_field;
upb_def *defs[1];
ASSERT(symtab != NULL);
// Create a test message for fields to refer to.
upb_msgdef *subm = upb_msgdef_newnamed("SubMessage", &symtab);
/* Create a test message for fields to refer to. */
upb_msgdef_addfield(subm, newfield("field1", 1, UPB_TYPE_INT32,
UPB_LABEL_OPTIONAL, NULL, &symtab),
&symtab, NULL);
upb_def *subm_defs[] = {UPB_UPCAST(subm)};
subm_defs[0] = upb_msgdef_upcast_mutable(subm);
ASSERT_STATUS(upb_symtab_add(symtab, subm_defs, 1, &symtab, &s), &s);
upb_msgdef *m = upb_msgdef_newnamed("TestMessage", &symtab);
ASSERT(upb_msgdef_numoneofs(m) == 0);
upb_oneofdef *o = upb_oneofdef_new(&o);
ASSERT(upb_oneofdef_numfields(o) == 0);
ASSERT(upb_oneofdef_name(o) == NULL);
@ -395,14 +433,14 @@ static void test_oneofs() {
ok = upb_msgdef_addoneof(m, o, NULL, &s);
ASSERT_STATUS(ok, &s);
upb_def *defs[] = {UPB_UPCAST(m)};
defs[0] = upb_msgdef_upcast_mutable(m);
ASSERT_STATUS(upb_symtab_add(symtab, defs, 1, &symtab, &s), &s);
ASSERT(upb_msgdef_numoneofs(m) == 1);
const upb_oneofdef *lookup_o = upb_msgdef_ntooz(m, "test_oneof");
lookup_o = upb_msgdef_ntooz(m, "test_oneof");
ASSERT(lookup_o == o);
const upb_fielddef *lookup_field = upb_oneofdef_ntofz(o, "field1");
lookup_field = upb_oneofdef_ntofz(o, "field1");
ASSERT(lookup_field != NULL && upb_fielddef_number(lookup_field) == 1);
upb_symtab_unref(symtab, &symtab);

@ -18,13 +18,13 @@ static bool startmsg(void *c, const void *hd) {
}
static void test_error() {
// Test creating handlers of a static msgdef.
/* Test creating handlers of a static msgdef. */
const upb_symtab *s = upbdefs_google_protobuf_descriptor(&s);
upb_handlers *h =
upb_handlers_new(upbdefs_google_protobuf_DescriptorProto(s), &h);
upb_symtab_unref(s, &s);
// Attempt to set the same handler twice causes error.
/* Attempt to set the same handler twice causes error. */
ASSERT(upb_ok(upb_handlers_status(h)));
upb_handlers_setstartmsg(h, &startmsg, NULL);
ASSERT(upb_ok(upb_handlers_status(h)));
@ -32,7 +32,7 @@ static void test_error() {
ASSERT(!upb_ok(upb_handlers_status(h)));
ASSERT(!upb_handlers_freeze(&h, 1, NULL));
// Clearing the error will let us proceed.
/* Clearing the error will let us proceed. */
upb_handlers_clearerr(h);
ASSERT(upb_handlers_freeze(&h, 1, NULL));
ASSERT(upb_handlers_isfrozen(h));

@ -38,8 +38,8 @@ bool parse_buffer(upb::BytesSink* sink, void* subc, const char* buf,
memcpy(buf2, buf + start, len);
if (verbose) {
fprintf(stderr, "Calling parse(%zu) for bytes %zu-%zu of the input\n",
len, start, end);
fprintf(stderr, "Calling parse(%u) for bytes %u-%u of the input\n",
(unsigned)len, (unsigned)start, (unsigned)end);
}
size_t parsed = sink->PutBuffer(subc, buf2, len, &global_handle);
@ -48,18 +48,18 @@ bool parse_buffer(upb::BytesSink* sink, void* subc, const char* buf,
if (verbose) {
if (parsed == len) {
fprintf(stderr,
"parse(%zu) = %zu, complete byte count indicates success\n",
len, len);
"parse(%u) = %u, complete byte count indicates success\n",
(unsigned)len, (unsigned)len);
} else if (parsed > len) {
fprintf(stderr,
"parse(%zu) = %zu, long byte count indicates success and skip"
"of the next %zu bytes\n",
len, parsed, parsed - len);
"parse(%u) = %u, long byte count indicates success and skip"
"of the next %u bytes\n",
(unsigned)len, (unsigned)parsed, (unsigned)(parsed - len));
} else {
fprintf(stderr,
"parse(%zu) = %zu, short byte count indicates failure; "
"last %zu bytes were not consumed\n",
len, parsed, len - parsed);
"parse(%u) = %u, short byte count indicates failure; "
"last %u bytes were not consumed\n",
(unsigned)len, (unsigned)parsed, (unsigned)(len - parsed));
}
}
@ -73,8 +73,8 @@ bool parse_buffer(upb::BytesSink* sink, void* subc, const char* buf,
"Error: decode function returned complete byte count but set "
"error status\n");
}
fprintf(stderr, "Status: %s, parsed=%zu, len=%zu\n",
status->error_message(), parsed, len);
fprintf(stderr, "Status: %s, parsed=%u, len=%u\n",
status->error_message(), (unsigned)parsed, (unsigned)len);
ASSERT(false);
}

@ -205,25 +205,25 @@ function Dumper:new(linktab)
return obj
end
-- Dumps a _upb_value, eg:
-- UPB_VALUE_INIT_INT32(5)
-- Dumps a upb_tabval, eg:
-- UPB_TABVALUE_INIT(5)
function Dumper:_value(val, upbtype)
if type(val) == "nil" then
return "UPB__VALUE_INIT_NONE"
return "UPB_TABVALUE_EMPTY_INIT"
elseif type(val) == "number" then
-- Use upbtype to disambiguate what kind of number it is.
if upbtype == upbtable.CTYPE_INT32 then
return string.format("UPB_VALUE_INIT_INT32(%d)", val)
return string.format("UPB_TABVALUE_INT_INIT(%d)", val)
else
-- TODO(haberman): add support for these so we can properly support
-- default values.
error("Unsupported number type " .. upbtype)
end
elseif type(val) == "string" then
return string.format('UPB_VALUE_INIT_CONSTPTR("%s")', val)
return string.format('UPB_TABVALUE_PTR_INIT("%s")', val)
else
-- We take this as an object reference that has an entry in the link table.
return string.format("UPB_VALUE_INIT_CONSTPTR(%s)", self.linktab:addr(val))
return string.format("UPB_TABVALUE_PTR_INIT(%s)", self.linktab:addr(val))
end
end
@ -258,7 +258,7 @@ function Dumper:arrayval(val)
if val.val then
return string.format(" %s,\n", self:_value(val.val, val.valtype))
else
return " UPB_ARRAY_EMPTYENT,\n"
return " UPB_TABVALUE_EMPTY_INIT,\n"
end
end
@ -296,9 +296,9 @@ local function gettables(def)
end
local function emit_file_warning(append)
append('// This file was generated by upbc (the upb compiler).\n')
append('// Do not edit -- your changes will be discarded when the file is\n')
append('// regenerated.\n\n')
append('/* This file was generated by upbc (the upb compiler).\n')
append(' * Do not edit -- your changes will be discarded when the file is\n')
append(' * regenerated. */\n\n')
end
local function join(...)
@ -356,7 +356,7 @@ end
local function end_namespace(package, append)
local package_components = split(package)
for i=#package_components,1,-1 do
append("} // namespace %s\n", package_components[i])
append("} /* namespace %s */\n", package_components[i])
end
end
@ -426,6 +426,7 @@ local function dump_defs_c(symtab, basename, namespace, append)
-- Emit forward declarations.
emit_file_warning(append)
append('#include "upb/def.h"\n')
append('#include "upb/structdefs.int.h"\n')
append('#include "upb/symtab.h"\n\n')
append("static const upb_msgdef %s;\n", linktab:cdecl(upb.DEF_MSG))
append("static const upb_fielddef %s;\n", linktab:cdecl(upb.DEF_FIELD))
@ -436,7 +437,7 @@ local function dump_defs_c(symtab, basename, namespace, append)
if not linktab:empty("intentries") then
append("static const upb_tabent %s;\n", linktab:cdecl("intentries"))
end
append("static const _upb_value %s;\n", linktab:cdecl("arrays"))
append("static const upb_tabval %s;\n", linktab:cdecl("arrays"))
append("\n")
append("#ifdef UPB_DEBUG_REFS\n")
append("static upb_inttable reftables[%d];\n", reftable_count)
@ -456,8 +457,8 @@ local function dump_defs_c(symtab, basename, namespace, append)
append(' UPB_MSGDEF_INIT("%s", %d, %d, %s, %s,' ..
'&reftables[%d], &reftables[%d]),\n',
m:full_name(),
m:_selector_count(),
m:_submsg_field_count(),
upbtable.msgdef_selector_count(m),
upbtable.msgdef_submsg_field_count(m),
dumper:inttable(tables.int),
dumper:strtable(tables.str),
reftable, reftable + 1)
@ -469,7 +470,7 @@ local function dump_defs_c(symtab, basename, namespace, append)
for f in linktab:objs(upb.DEF_FIELD) do
local subdef = "NULL"
if f:has_subdef() then
subdef = string.format("UPB_UPCAST(%s)", linktab:addr(f:subdef()))
subdef = string.format("(const upb_def*)(%s)", linktab:addr(f:subdef()))
end
local intfmt
if f:type() == upb.TYPE_UINT32 or
@ -490,7 +491,7 @@ local function dump_defs_c(symtab, basename, namespace, append)
boolstr(f:istagdelim()), boolstr(f:is_extension()),
boolstr(f:lazy()), boolstr(f:packed()), f:name(), f:number(),
linktab:addr(f:containing_type()), subdef,
f:_selector_base(), f:index(),
upbtable.fielddef_selector_base(f), f:index(),
reftable, reftable + 1
)
reftable = reftable + 2
@ -529,7 +530,7 @@ local function dump_defs_c(symtab, basename, namespace, append)
append("};\n\n");
end
append("static const _upb_value %s = {\n", linktab:cdecl("arrays"))
append("static const upb_tabval %s = {\n", linktab:cdecl("arrays"))
for ent in linktab:objs("arrays") do
append(dumper:arrayval(ent))
end
@ -602,9 +603,14 @@ local function dump_enum_vals(enumdef, append)
--
-- (notice the duplicated "TYPE").
local cident = to_cident(getpackage(enumdef:full_name()))
for _, pair in ipairs(enum_vals) do
for i, pair in ipairs(enum_vals) do
k, v = pair[1], pair[2]
append(' %s = %d,\n', to_preproc(cident, k), v)
append(' %s = %d', to_preproc(cident, k), v)
if i == #enum_vals then
append('\n')
else
append(',\n')
end
end
end
@ -644,7 +650,7 @@ local function dump_selectors(msgdef, append, base)
--
-- // So instead we make the latter the very beautiful:
-- SEL_GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT32
append("// %s\n", msgdef:full_name())
append("/* %s */\n", msgdef:full_name())
local cident = to_cident(msgdef:full_name())
for _, pair in ipairs(selectors) do
k, v = pair[1], pair[2]
@ -655,19 +661,19 @@ end
local function dump_defs_h(symtab, basename, namespace, append, linktab)
local basename_preproc = to_preproc(basename)
append("// This file contains accessors for a set of compiled-in defs.\n")
append("// Note that unlike Google's protobuf, it does *not* define\n")
append("// generated classes or any other kind of data structure for\n")
append("// actually storing protobufs. It only contains *defs* which\n")
append("// let you reflect over a protobuf *schema*.\n")
append("//\n")
append("/* This file contains accessors for a set of compiled-in defs.\n")
append(" * Note that unlike Google's protobuf, it does *not* define\n")
append(" * generated classes or any other kind of data structure for\n")
append(" * actually storing protobufs. It only contains *defs* which\n")
append(" * let you reflect over a protobuf *schema*.\n")
append(" */\n")
emit_file_warning(append)
append('#ifndef %s_UPB_H_\n', basename_preproc)
append('#define %s_UPB_H_\n\n', basename_preproc)
append('#include "upb/def.h"\n')
append('#include "upb/symtab.h"\n\n')
append('#ifdef __cplusplus\n')
append('extern "C" {\n')
append('UPB_BEGIN_EXTERN_C\n')
append('#endif\n\n')
local packages = {}
@ -681,7 +687,7 @@ local function dump_defs_h(symtab, basename, namespace, append, linktab)
-- Dump C enums for proto enums.
append("// Enums\n\n")
append("/* Enums */\n\n")
for _, def in ipairs(sorted_defs(symtab:defs(upb.DEF_ENUM))) do
local cident = to_cident(def:full_name())
append('typedef enum {\n')
@ -690,7 +696,7 @@ local function dump_defs_h(symtab, basename, namespace, append, linktab)
end
-- selectors
append("// Selectors\n\n")
append("/* Selectors */\n\n")
for _, def in ipairs(sorted_defs(symtab:defs(upb.DEF_MSG))) do
dump_selectors(def, append, upb)
end
@ -698,7 +704,7 @@ local function dump_defs_h(symtab, basename, namespace, append, linktab)
append("const upb_symtab *%s_%s(const void *owner);" ..
"\n\n", namespace, to_cident(basename))
append("// MessageDefs\n")
append("/* MessageDefs */\n")
dump_defs_for_type(
"UPB_INLINE const upb_msgdef *%s_%s(const upb_symtab *s) {\n" ..
" const upb_msgdef *m = upb_symtab_lookupmsg(s, \"%s\");\n" ..
@ -710,7 +716,7 @@ local function dump_defs_h(symtab, basename, namespace, append, linktab)
append("\n")
append("// EnumDefs\n")
append("/* EnumDefs */\n")
dump_defs_for_type(
"UPB_INLINE const upb_enumdef *%s_%s(const upb_symtab *s) {\n" ..
" const upb_enumdef *e = upb_symtab_lookupenum(s, \"%s\");\n" ..
@ -739,9 +745,7 @@ local function dump_defs_h(symtab, basename, namespace, append, linktab)
end
append("\n")
append('#ifdef __cplusplus\n')
append('}; // extern "C"\n')
append('#endif\n\n')
append('UPB_END_EXTERN_C\n\n')
append("#ifdef __cplusplus\n\n")
append("namespace %s {\n", namespace)
@ -794,12 +798,12 @@ local function dump_defs_h(symtab, basename, namespace, append, linktab)
append("\n")
end
append("} // namespace %s\n\n\n", namespace)
append("} /* namespace %s */\n\n\n", namespace)
append("#undef RETURN_REFFED\n")
append("#endif // __cplusplus\n\n")
append("#endif /* __cplusplus */\n\n")
append('#endif // %s_UPB_H_\n', basename_preproc)
append('#endif /* %s_UPB_H_ */\n', basename_preproc)
end
function export.dump_defs(symtab, basename, append_h, append_c)

@ -46,7 +46,7 @@ core32_install() {
sudo apt-get install libc6-dev-i386 g++-multilib
}
core32_script() {
make -j12 tests USER_CPPFLAGS=-m32
make -j12 tests USER_CPPFLAGS="$USER_CPPFLAGS -m32"
make test
}
@ -56,7 +56,7 @@ lua_install() {
sudo apt-get install lua5.2 liblua5.2-dev
}
lua_script() {
make -j12 testlua USER_CPPFLAGS=`pkg-config lua5.2 --cflags`
make -j12 testlua USER_CPPFLAGS="$USER_CPPFLAGS `pkg-config lua5.2 --cflags`"
}
# Test that generated files don't need to be regenerated.
@ -69,7 +69,7 @@ genfiles_install() {
sudo apt-get install lua5.2 liblua5.2-dev protobuf-compiler
}
genfiles_script() {
make -j12 genfiles USER_CPPFLAGS=`pkg-config lua5.2 --cflags`
make -j12 genfiles USER_CPPFLAGS="$USER_CPPFLAGS `pkg-config lua5.2 --cflags`"
# Will fail if any differences were observed.
git diff --exit-code
}
@ -104,10 +104,35 @@ coverage_after_success() {
set -e
set -x
if [ "$1" == "local" ]; then
run_config() {
make clean
echo
echo "travis.sh: TESTING CONFIGURATION $1 ==============================="
echo
UPB_TRAVIS_BUILD=$1 ./travis.sh script
}
# Run all configurations serially locally to test before pushing a pull
# request.
export CC=gcc
export CXX=g++
run_config "bare"
run_config "barejit"
run_config "core32"
run_config "withprotobuf"
run_config "lua"
run_config "ndebug"
run_config "genfiles"
exit
fi
$CC --version
$CXX --version
if [ "$1" == "after_failure" ]; then
# Uncomment to enable uploading failure logs to S3.
# UPLOAD_TO_S3=true
if [ "$1" == "after_failure" ] && [ "$UPLOAD_TO_S3" == "true" ]; then
# Upload failing tree to S3.
curl -sL https://raw.githubusercontent.com/travis-ci/artifacts/master/install | bash
PATH="$PATH:$HOME/bin"
@ -131,6 +156,15 @@ fi
# Enable asserts and ref debugging (though some configurations override this).
export USER_CPPFLAGS="-UNDEBUG -DUPB_DEBUG_REFS -DUPB_THREAD_UNSAFE -g"
if [ "$CC" == "gcc" ]; then
# For the GCC build test loading JIT code via SO. For the Clang build test
# loading it in the normal way.
export USER_CPPFLAGS="$USER_CPPFLAGS -DUPB_JIT_LOAD_SO"
fi
# TODO(haberman): Test UPB_DUMP_BYTECODE? We don't right now because it is so
# noisy.
# Enable verbose build.
export Q=

@ -449,12 +449,13 @@ case goog::FieldDescriptor::cpptype: \
enum OneofType {
ONEOF_TYPE_NONE,
ONEOF_TYPE_STRING,
ONEOF_TYPE_MESSAGE,
ONEOF_TYPE_MESSAGE
#ifdef UPB_GOOGLE3
,
ONEOF_TYPE_GLOBALSTRING,
ONEOF_TYPE_CORD,
ONEOF_TYPE_STRINGPIECE,
ONEOF_TYPE_LAZYFIELD,
ONEOF_TYPE_LAZYFIELD
#endif
};

File diff suppressed because it is too large Load Diff

@ -15,14 +15,14 @@
#include "upb/handlers.h"
#include "upb/symtab.h"
// Lua 5.1/5.2 compatibility code.
/* Lua 5.1/5.2 compatibility code. */
#if LUA_VERSION_NUM == 501
#define lua_rawlen lua_objlen
// Lua >= 5.2's getuservalue/setuservalue functions do not exist in prior
// versions but the older function lua_getfenv() can provide 100% of its
// capabilities (the reverse is not true).
/* Lua >= 5.2's getuservalue/setuservalue functions do not exist in prior
* versions but the older function lua_getfenv() can provide 100% of its
* capabilities (the reverse is not true). */
#define lua_getuservalue(L, index) lua_getfenv(L, index)
#define lua_setuservalue(L, index) lua_setfenv(L, index)
@ -40,33 +40,33 @@ int luaL_typerror(lua_State *L, int narg, const char *tname);
if (!(predicate)) \
luaL_error(L, "internal error: %s, %s:%d ", #predicate, __FILE__, __LINE__);
// Function for initializing the core library. This function is idempotent,
// and should be called at least once before calling any of the functions that
// construct core upb types.
/* Function for initializing the core library. This function is idempotent,
* and should be called at least once before calling any of the functions that
* construct core upb types. */
int luaopen_upb(lua_State *L);
// Gets or creates a package table for a C module that is uniquely identified by
// "ptr". The easiest way to supply a unique "ptr" is to pass the address of a
// static variable private in the module's .c file.
//
// If this module has already been registered in this lua_State, pushes it and
// returns true.
//
// Otherwise, creates a new module table for this module with the given name,
// pushes it, and registers the given top-level functions in it. It also sets
// it as a global variable, but only if the current version of Lua expects that
// (ie Lua 5.1/LuaJIT).
//
// If "false" is returned, the caller is guaranteed that this lib has not been
// registered in this Lua state before (regardless of any funny business the
// user might have done to the global state), so the caller can safely perform
// one-time initialization.
/* Gets or creates a package table for a C module that is uniquely identified by
* "ptr". The easiest way to supply a unique "ptr" is to pass the address of a
* static variable private in the module's .c file.
*
* If this module has already been registered in this lua_State, pushes it and
* returns true.
*
* Otherwise, creates a new module table for this module with the given name,
* pushes it, and registers the given top-level functions in it. It also sets
* it as a global variable, but only if the current version of Lua expects that
* (ie Lua 5.1/LuaJIT).
*
* If "false" is returned, the caller is guaranteed that this lib has not been
* registered in this Lua state before (regardless of any funny business the
* user might have done to the global state), so the caller can safely perform
* one-time initialization. */
bool lupb_openlib(lua_State *L, void *ptr, const char *name,
const luaL_Reg *funcs);
// Custom check/push functions. Unlike the Lua equivalents, they are pinned to
// specific types (instead of lua_Number, etc), and do not allow any implicit
// conversion or data loss.
/* Custom check/push functions. Unlike the Lua equivalents, they are pinned to
* specific types (instead of lua_Number, etc), and do not allow any implicit
* conversion or data loss. */
int64_t lupb_checkint64(lua_State *L, int narg);
int32_t lupb_checkint32(lua_State *L, int narg);
uint64_t lupb_checkuint64(lua_State *L, int narg);
@ -84,8 +84,8 @@ void lupb_pushdouble(lua_State *L, double val);
void lupb_pushfloat(lua_State *L, float val);
void lupb_pushbool(lua_State *L, bool val);
// Functions for getting/pushing wrappers to various types defined in the
// core library.
/* Functions for getting/pushing wrappers to various types defined in the
* core library. */
void *lupb_refcounted_check(lua_State *L, int narg, const char *type);
const upb_msgdef *lupb_msg_checkdef(lua_State *L, int narg);
const upb_msgdef *lupb_msgdef_check(lua_State *L, int narg);
@ -104,26 +104,26 @@ void lupb_symtab_pushwrapper(lua_State *L, const upb_symtab *s,
void lupb_symtab_pushnewrapper(lua_State *L, const upb_symtab *s,
const void *ref_donor);
// For constructing a new message. narg is the Lua value for the MessageDef
// object.
/* For constructing a new message. narg is the Lua value for the MessageDef
* object. */
void lupb_msg_pushnew(lua_State *L, int narg);
// Builds and returns a handlers object for populating a lupb_msg described by
// the MessageDef at "narg".
//
// TODO(haberman): factor this so it doesn't have to take a lua_State. We
// should be able to generate message handlers for a upb_msgdef that can be used
// across many Lua states, so we can shared JIT code across lua_States.
/* Builds and returns a handlers object for populating a lupb_msg described by
* the MessageDef at "narg".
*
* TODO(haberman): factor this so it doesn't have to take a lua_State. We
* should be able to generate message handlers for a upb_msgdef that can be used
* across many Lua states, so we can shared JIT code across lua_States. */
const upb_handlers *lupb_msg_newwritehandlers(lua_State *L, int narg,
const void *owner);
// Registers a type with the given name, methods, and metamethods.
// If "refcount_gc" is true, adds a __gc metamethod that does an unref.
// Refcounted types must be allocated with lupb_refcounted_push[new]wrapper.
/* Registers a type with the given name, methods, and metamethods.
* If "refcount_gc" is true, adds a __gc metamethod that does an unref.
* Refcounted types must be allocated with lupb_refcounted_push[new]wrapper. */
void lupb_register_type(lua_State *L, const char *name, const luaL_Reg *m,
const luaL_Reg *mm, bool refcount_gc);
// Checks the given upb_status and throws a Lua error if it is not ok.
/* Checks the given upb_status and throws a Lua error if it is not ok. */
void lupb_checkstatus(lua_State *L, upb_status *s);
#endif // UPB_LUA_UPB_H_
#endif /* UPB_LUA_UPB_H_ */

@ -23,61 +23,65 @@ static upb_pbdecodermethod *lupb_pbdecodermethod_check(lua_State *L, int narg) {
static int lupb_pbdecodermethod_new(lua_State *L) {
const upb_handlers *handlers = lupb_msg_newwritehandlers(L, 1, &handlers);
const upb_pbdecodermethod *m;
upb_pbdecodermethodopts opts;
upb_pbdecodermethodopts_init(&opts, handlers);
const upb_pbdecodermethod *m = upb_pbdecodermethod_new(&opts, &m);
m = upb_pbdecodermethod_new(&opts, &m);
upb_handlers_unref(handlers, &handlers);
lupb_refcounted_pushnewrapper(L, UPB_UPCAST(m), LUPB_PBDECODERMETHOD, &m);
lupb_refcounted_pushnewrapper(
L, upb_pbdecodermethod_upcast(m), LUPB_PBDECODERMETHOD, &m);
// We need to keep a pointer to the MessageDef (in Lua space) so we can
// construct new messages in parse().
/* We need to keep a pointer to the MessageDef (in Lua space) so we can
* construct new messages in parse(). */
lua_newtable(L);
lua_pushvalue(L, 1);
lua_rawseti(L, -2, MSGDEF_INDEX);
lua_setuservalue(L, -2);
return 1; // The DecoderMethod wrapper.
return 1; /* The DecoderMethod wrapper. */
}
// Unlike most of our exposed Lua functions, this does not correspond to an
// actual method on the underlying DecoderMethod. But it's convenient, and
// important to implement in C because we can do stack allocation and
// initialization of our runtime structures like the Decoder and Sink.
/* Unlike most of our exposed Lua functions, this does not correspond to an
* actual method on the underlying DecoderMethod. But it's convenient, and
* important to implement in C because we can do stack allocation and
* initialization of our runtime structures like the Decoder and Sink. */
static int lupb_pbdecodermethod_parse(lua_State *L) {
size_t len;
const upb_pbdecodermethod *method = lupb_pbdecodermethod_check(L, 1);
const char *pb = lua_tolstring(L, 2, &len);
void *msg;
upb_status status = UPB_STATUS_INIT;
upb_env env;
upb_sink sink;
upb_pbdecoder *decoder;
const upb_handlers *handlers = upb_pbdecodermethod_desthandlers(method);
lua_getuservalue(L, 1);
lua_rawgeti(L, -1, MSGDEF_INDEX);
lupb_assert(L, !lua_isnil(L, -1));
lupb_msg_pushnew(L, -1); // Push new message.
void *msg = lua_touserdata(L, -1);
lupb_msg_pushnew(L, -1); /* Push new message. */
msg = lua_touserdata(L, -1);
// Handlers need this.
/* Handlers need this. */
lua_getuservalue(L, -1);
upb_status status = UPB_STATUS_INIT;
upb_env env;
upb_env_init(&env);
upb_env_reporterrorsto(&env, &status);
upb_sink sink;
upb_sink_reset(&sink, handlers, msg);
upb_pbdecoder *decoder = upb_pbdecoder_create(&env, method, &sink);
decoder = upb_pbdecoder_create(&env, method, &sink);
upb_bufsrc_putbuf(pb, len, upb_pbdecoder_input(decoder));
// TODO: This won't get called in the error case, which longjmp's across us.
// This will cause the memory to leak. To remedy this, we should make the
// upb_env wrapped in a userdata that guarantees this will get called.
/* TODO: This won't get called in the error case, which longjmp's across us.
* This will cause the memory to leak. To remedy this, we should make the
* upb_env wrapped in a userdata that guarantees this will get called. */
upb_env_uninit(&env);
lupb_checkstatus(L, &status);
lua_pop(L, 1); // Uservalue.
lua_pop(L, 1); /* Uservalue. */
return 1;
}

@ -25,7 +25,9 @@
#include "lauxlib.h"
#include "upb/bindings/lua/upb.h"
#include "upb/def.h"
#include "upb/structdefs.int.h"
#include "upb/symtab.h"
#include "upb/table.int.h"
static void lupbtable_setnum(lua_State *L, int tab, const char *key,
lua_Number val) {
@ -33,30 +35,30 @@ static void lupbtable_setnum(lua_State *L, int tab, const char *key,
lua_setfield(L, tab - 1, key);
}
static void lupbtable_pushval(lua_State *L, _upb_value val, upb_ctype_t ctype) {
static void lupbtable_pushval(lua_State *L, upb_tabval val, upb_ctype_t ctype) {
switch (ctype) {
case UPB_CTYPE_INT32:
lua_pushnumber(L, val.int32);
lua_pushnumber(L, val.val);
break;
case UPB_CTYPE_PTR:
lupb_def_pushwrapper(L, val.ptr, NULL);
lupb_def_pushwrapper(L, (void*)val.val, NULL);
break;
case UPB_CTYPE_CSTR:
lua_pushstring(L, val.cstr);
lua_pushstring(L, (const char*)val.val);
break;
default:
luaL_error(L, "Unexpected type: %d", ctype);
}
}
// Sets a few fields common to both hash table entries and arrays.
/* Sets a few fields common to both hash table entries and arrays. */
static void lupbtable_setmetafields(lua_State *L, int ctype, const void *ptr) {
// We tack this onto every entry so we know it even if the entries
// don't stay with the table.
/* We tack this onto every entry so we know it even if the entries
* don't stay with the table. */
lua_pushnumber(L, ctype);
lua_setfield(L, -2, "valtype");
// Set this to facilitate linking.
/* Set this to facilitate linking. */
lua_pushlightuserdata(L, (void*)ptr);
lua_setfield(L, -2, "ptr");
}
@ -81,8 +83,10 @@ static void lupbtable_pushent(lua_State *L, const upb_tabent *e,
lupbtable_setmetafields(L, ctype, e);
}
// Dumps the shared part of upb_table into a Lua table.
/* Dumps the shared part of upb_table into a Lua table. */
static void lupbtable_pushtable(lua_State *L, const upb_table *t, bool inttab) {
size_t i;
lua_newtable(L);
lupbtable_setnum(L, -1, "count", t->count);
lupbtable_setnum(L, -1, "mask", t->mask);
@ -90,21 +94,23 @@ static void lupbtable_pushtable(lua_State *L, const upb_table *t, bool inttab) {
lupbtable_setnum(L, -1, "size_lg2", t->size_lg2);
lua_newtable(L);
for (size_t i = 0; i < upb_table_size(t); i++) {
for (i = 0; i < upb_table_size(t); i++) {
lupbtable_pushent(L, &t->entries[i], inttab, t->ctype);
lua_rawseti(L, -2, i + 1);
}
lua_setfield(L, -2, "entries");
}
// Dumps a upb_inttable to a Lua table.
/* Dumps a upb_inttable to a Lua table. */
static void lupbtable_pushinttable(lua_State *L, const upb_inttable *t) {
size_t i;
lupbtable_pushtable(L, &t->t, true);
lupbtable_setnum(L, -1, "array_size", t->array_size);
lupbtable_setnum(L, -1, "array_count", t->array_count);
lua_newtable(L);
for (size_t i = 0; i < t->array_size; i++) {
for (i = 0; i < t->array_size; i++) {
lua_newtable(L);
if (upb_arrhas(t->array[i])) {
lupbtable_pushval(L, t->array[i], t->t.ctype);
@ -155,12 +161,40 @@ static void lupbtable_setfieldi(lua_State *L, const char *field, int i) {
lua_setfield(L, -2, field);
}
/* These aren't from the table, but they access other internal-only
* definitions. */
static int lupb_fielddef_selectorbase(lua_State *L) {
const upb_fielddef *f = lupb_fielddef_check(L, 1);
if (!upb_fielddef_isfrozen(f))
luaL_error(L, "_selectorbase is only defined for frozen fielddefs");
lua_pushinteger(L, f->selector_base);
return 1;
}
static int lupb_msgdef_selectorcount(lua_State *L) {
const upb_msgdef *m = lupb_msgdef_check(L, 1);
lua_pushinteger(L, m->selector_count);
return 1;
}
static int lupb_msgdef_submsgfieldcount(lua_State *L) {
const upb_msgdef *m = lupb_msgdef_check(L, 1);
lua_pushinteger(L, m->submsg_field_count);
return 1;
}
static const struct luaL_Reg lupbtable_toplevel_m[] = {
{"msgdef_itof", lupbtable_msgdef_itof},
{"msgdef_ntof", lupbtable_msgdef_ntof},
{"enumdef_iton", lupbtable_enumdef_iton},
{"enumdef_ntoi", lupbtable_enumdef_ntoi},
{"symtab_symtab", lupbtable_symtab_symtab},
{"msgdef_selector_count", lupb_msgdef_selectorcount},
{"msgdef_submsg_field_count", lupb_msgdef_submsgfieldcount},
{"fielddef_selector_base", lupb_fielddef_selectorbase},
{NULL, NULL}
};
@ -170,7 +204,7 @@ int luaopen_upb_table_c(lua_State *L) {
return 1;
}
// We define these here because they are not public.
/* We define these here because they are not public. */
lupbtable_setfieldi(L, "CTYPE_PTR", UPB_CTYPE_PTR);
lupbtable_setfieldi(L, "CTYPE_CSTR", UPB_CTYPE_CSTR);
lupbtable_setfieldi(L, "CTYPE_INT32", UPB_CTYPE_INT32);
@ -178,5 +212,5 @@ int luaopen_upb_table_c(lua_State *L) {
lua_pushlightuserdata(L, NULL);
lua_setfield(L, -2, "NULL");
return 1; // Return a single Lua value, the package table created above.
return 1; /* Return a single Lua value, the package table created above. */
}

@ -9,12 +9,12 @@
#include <stdlib.h>
#include <string.h>
#include "upb/descriptor/descriptor.upb.h"
#include "upb/structdefs.int.h"
#include "upb/handlers.h"
typedef struct {
size_t len;
char str[1]; // Null-terminated string data follows.
char str[1]; /* Null-terminated string data follows. */
} str_t;
static str_t *newstr(const char *data, size_t len) {
@ -28,7 +28,7 @@ static str_t *newstr(const char *data, size_t len) {
static void freestr(str_t *s) { free(s); }
// isalpha() etc. from <ctype.h> are locale-dependent, which we don't want.
/* isalpha() etc. from <ctype.h> are locale-dependent, which we don't want. */
static bool upb_isbetween(char c, char low, char high) {
return c >= low && c <= high;
}
@ -43,7 +43,8 @@ static bool upb_isalphanum(char c) {
static bool upb_isident(const char *str, size_t len, bool full, upb_status *s) {
bool start = true;
for (size_t i = 0; i < len; i++) {
size_t i;
for (i = 0; i < len; i++) {
char c = str[i];
if (c == '.') {
if (start || !full) {
@ -88,39 +89,22 @@ bool upb_def_setfullname(upb_def *def, const char *fullname, upb_status *s) {
upb_def *upb_def_dup(const upb_def *def, const void *o) {
switch (def->type) {
case UPB_DEF_MSG:
return UPB_UPCAST(upb_msgdef_dup(upb_downcast_msgdef(def), o));
return upb_msgdef_upcast_mutable(
upb_msgdef_dup(upb_downcast_msgdef(def), o));
case UPB_DEF_FIELD:
return UPB_UPCAST(upb_fielddef_dup(upb_downcast_fielddef(def), o));
return upb_fielddef_upcast_mutable(
upb_fielddef_dup(upb_downcast_fielddef(def), o));
case UPB_DEF_ENUM:
return UPB_UPCAST(upb_enumdef_dup(upb_downcast_enumdef(def), o));
return upb_enumdef_upcast_mutable(
upb_enumdef_dup(upb_downcast_enumdef(def), o));
default: assert(false); return NULL;
}
}
bool upb_def_isfrozen(const upb_def *def) {
return upb_refcounted_isfrozen(UPB_UPCAST(def));
}
void upb_def_ref(const upb_def *def, const void *owner) {
upb_refcounted_ref(UPB_UPCAST(def), owner);
}
void upb_def_unref(const upb_def *def, const void *owner) {
upb_refcounted_unref(UPB_UPCAST(def), owner);
}
void upb_def_donateref(const upb_def *def, const void *from, const void *to) {
upb_refcounted_donateref(UPB_UPCAST(def), from, to);
}
void upb_def_checkref(const upb_def *def, const void *owner) {
upb_refcounted_checkref(UPB_UPCAST(def), owner);
}
static bool upb_def_init(upb_def *def, upb_deftype_t type,
const struct upb_refcounted_vtbl *vtbl,
const void *owner) {
if (!upb_refcounted_init(UPB_UPCAST(def), vtbl, owner)) return false;
if (!upb_refcounted_init(upb_def_upcast_mutable(def), vtbl, owner)) return false;
def->type = type;
def->fullname = NULL;
def->came_from_user = false;
@ -132,7 +116,7 @@ static void upb_def_uninit(upb_def *def) {
}
static const char *msgdef_name(const upb_msgdef *m) {
const char *name = upb_def_fullname(UPB_UPCAST(m));
const char *name = upb_def_fullname(upb_msgdef_upcast(m));
return name ? name : "(anonymous)";
}
@ -155,13 +139,15 @@ static bool upb_validate_field(upb_fielddef *f, upb_status *s) {
}
if (upb_fielddef_hassubdef(f)) {
const upb_def *subdef;
if (f->subdef_is_symbolic) {
upb_status_seterrf(s, "field '%s.%s' has not been resolved",
msgdef_name(f->msg.def), upb_fielddef_name(f));
return false;
}
const upb_def *subdef = upb_fielddef_subdef(f);
subdef = upb_fielddef_subdef(f);
if (subdef == NULL) {
upb_status_seterrf(s, "field %s.%s is missing required subdef",
msgdef_name(f->msg.def), upb_fielddef_name(f));
@ -180,14 +166,14 @@ static bool upb_validate_field(upb_fielddef *f, upb_status *s) {
bool has_default_name = upb_fielddef_enumhasdefaultstr(f);
bool has_default_number = upb_fielddef_enumhasdefaultint32(f);
// Previously verified by upb_validate_enumdef().
/* Previously verified by upb_validate_enumdef(). */
assert(upb_enumdef_numvals(upb_fielddef_enumsubdef(f)) > 0);
// We've already validated that we have an associated enumdef and that it
// has at least one member, so at least one of these should be true.
// Because if the user didn't set anything, we'll pick up the enum's
// default, but if the user *did* set something we should at least pick up
// the one they set (int32 or string).
/* We've already validated that we have an associated enumdef and that it
* has at least one member, so at least one of these should be true.
* Because if the user didn't set anything, we'll pick up the enum's
* default, but if the user *did* set something we should at least pick up
* the one they set (int32 or string). */
assert(has_default_name || has_default_number);
if (!has_default_name) {
@ -206,13 +192,13 @@ static bool upb_validate_field(upb_fielddef *f, upb_status *s) {
return false;
}
// Lift the effective numeric default into the field's default slot, in case
// we were only getting it "by reference" from the enumdef.
/* Lift the effective numeric default into the field's default slot, in case
* we were only getting it "by reference" from the enumdef. */
upb_fielddef_setdefaultint32(f, upb_fielddef_defaultint32(f));
}
// Ensure that MapEntry submessages only appear as repeated fields, not
// optional/required (singular) fields.
/* Ensure that MapEntry submessages only appear as repeated fields, not
* optional/required (singular) fields. */
if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE &&
upb_fielddef_msgsubdef(f) != NULL) {
const upb_msgdef *subdef = upb_fielddef_msgsubdef(f);
@ -239,8 +225,8 @@ static bool upb_validate_enumdef(const upb_enumdef *e, upb_status *s) {
return true;
}
// All submessage fields are lower than all other fields.
// Secondly, fields are increasing in order.
/* All submessage fields are lower than all other fields.
* Secondly, fields are increasing in order. */
uint32_t field_rank(const upb_fielddef *f) {
uint32_t ret = upb_fielddef_number(f);
const uint32_t high_bit = 1 << 30;
@ -257,14 +243,15 @@ int cmp_fields(const void *p1, const void *p2) {
}
static bool assign_msg_indices(upb_msgdef *m, upb_status *s) {
// Sort fields. upb internally relies on UPB_TYPE_MESSAGE fields having the
// lowest indexes, but we do not publicly guarantee this.
/* Sort fields. upb internally relies on UPB_TYPE_MESSAGE fields having the
* lowest indexes, but we do not publicly guarantee this. */
upb_msg_field_iter j;
int i;
uint32_t selector;
int n = upb_msgdef_numfields(m);
upb_fielddef **fields = malloc(n * sizeof(*fields));
if (!fields) return false;
upb_msg_field_iter j;
int i;
m->submsg_field_count = 0;
for(i = 0, upb_msg_field_begin(&j, m);
!upb_msg_field_done(&j);
@ -283,7 +270,7 @@ static bool assign_msg_indices(upb_msgdef *m, upb_status *s) {
qsort(fields, n, sizeof(*fields), cmp_fields);
uint32_t selector = UPB_STATIC_SELECTOR_COUNT + m->submsg_field_count;
selector = UPB_STATIC_SELECTOR_COUNT + m->submsg_field_count;
for (i = 0; i < n; i++) {
upb_fielddef *f = fields[i];
f->index_ = i;
@ -293,38 +280,42 @@ static bool assign_msg_indices(upb_msgdef *m, upb_status *s) {
m->selector_count = selector;
#ifndef NDEBUG
// Verify that all selectors for the message are distinct.
//
{
/* Verify that all selectors for the message are distinct. */
#define TRY(type) \
if (upb_handlers_getselector(f, type, &sel)) upb_inttable_insert(&t, sel, v);
upb_inttable t;
upb_inttable_init(&t, UPB_CTYPE_BOOL);
upb_value v = upb_value_bool(true);
upb_selector_t sel;
upb_inttable_insert(&t, UPB_STARTMSG_SELECTOR, v);
upb_inttable_insert(&t, UPB_ENDMSG_SELECTOR, v);
for(upb_msg_field_begin(&j, m);
!upb_msg_field_done(&j);
upb_msg_field_next(&j)) {
upb_fielddef *f = upb_msg_iter_field(&j);
// These calls will assert-fail in upb_table if the value already exists.
TRY(UPB_HANDLER_INT32);
TRY(UPB_HANDLER_INT64)
TRY(UPB_HANDLER_UINT32)
TRY(UPB_HANDLER_UINT64)
TRY(UPB_HANDLER_FLOAT)
TRY(UPB_HANDLER_DOUBLE)
TRY(UPB_HANDLER_BOOL)
TRY(UPB_HANDLER_STARTSTR)
TRY(UPB_HANDLER_STRING)
TRY(UPB_HANDLER_ENDSTR)
TRY(UPB_HANDLER_STARTSUBMSG)
TRY(UPB_HANDLER_ENDSUBMSG)
TRY(UPB_HANDLER_STARTSEQ)
TRY(UPB_HANDLER_ENDSEQ)
}
upb_inttable_uninit(&t);
if (upb_handlers_getselector(f, type, &sel)) upb_inttable_insert(&t, sel, v);
upb_inttable t;
upb_value v;
upb_selector_t sel;
upb_inttable_init(&t, UPB_CTYPE_BOOL);
v = upb_value_bool(true);
upb_inttable_insert(&t, UPB_STARTMSG_SELECTOR, v);
upb_inttable_insert(&t, UPB_ENDMSG_SELECTOR, v);
for(upb_msg_field_begin(&j, m);
!upb_msg_field_done(&j);
upb_msg_field_next(&j)) {
upb_fielddef *f = upb_msg_iter_field(&j);
/* These calls will assert-fail in upb_table if the value already
* exists. */
TRY(UPB_HANDLER_INT32);
TRY(UPB_HANDLER_INT64)
TRY(UPB_HANDLER_UINT32)
TRY(UPB_HANDLER_UINT64)
TRY(UPB_HANDLER_FLOAT)
TRY(UPB_HANDLER_DOUBLE)
TRY(UPB_HANDLER_BOOL)
TRY(UPB_HANDLER_STARTSTR)
TRY(UPB_HANDLER_STRING)
TRY(UPB_HANDLER_ENDSTR)
TRY(UPB_HANDLER_STARTSUBMSG)
TRY(UPB_HANDLER_ENDSUBMSG)
TRY(UPB_HANDLER_STARTSEQ)
TRY(UPB_HANDLER_ENDSEQ)
}
upb_inttable_uninit(&t);
}
#undef TRY
#endif
@ -333,14 +324,17 @@ static bool assign_msg_indices(upb_msgdef *m, upb_status *s) {
}
bool upb_def_freeze(upb_def *const* defs, int n, upb_status *s) {
int i;
int maxdepth;
bool ret;
upb_status_clear(s);
// First perform validation, in two passes so we can check that we have a
// transitive closure without needing to search.
for (int i = 0; i < n; i++) {
/* First perform validation, in two passes so we can check that we have a
* transitive closure without needing to search. */
for (i = 0; i < n; i++) {
upb_def *def = defs[i];
if (upb_def_isfrozen(def)) {
// Could relax this requirement if it's annoying.
/* Could relax this requirement if it's annoying. */
upb_status_seterrmsg(s, "def is already frozen");
goto err;
} else if (def->type == UPB_DEF_FIELD) {
@ -351,14 +345,14 @@ bool upb_def_freeze(upb_def *const* defs, int n, upb_status *s) {
goto err;
}
} else {
// Set now to detect transitive closure in the second pass.
/* Set now to detect transitive closure in the second pass. */
def->came_from_user = true;
}
}
// Second pass of validation. Also assign selector bases and indexes, and
// compact tables.
for (int i = 0; i < n; i++) {
/* Second pass of validation. Also assign selector bases and indexes, and
* compact tables. */
for (i = 0; i < n; i++) {
upb_msgdef *m = upb_dyncast_msgdef_mutable(defs[i]);
upb_enumdef *e = upb_dyncast_enumdef_mutable(defs[i]);
if (m) {
@ -371,17 +365,17 @@ bool upb_def_freeze(upb_def *const* defs, int n, upb_status *s) {
}
}
// Def graph contains FieldDefs between each MessageDef, so double the limit.
int maxdepth = UPB_MAX_MESSAGE_DEPTH * 2;
/* Def graph contains FieldDefs between each MessageDef, so double the
* limit. */
maxdepth = UPB_MAX_MESSAGE_DEPTH * 2;
// Validation all passed; freeze the defs.
bool ret =
upb_refcounted_freeze((upb_refcounted * const *)defs, n, s, maxdepth);
/* Validation all passed; freeze the defs. */
ret = upb_refcounted_freeze((upb_refcounted * const *)defs, n, s, maxdepth);
assert(!(s && ret != upb_ok(s)));
return ret;
err:
for (int i = 0; i < n; i++) {
for (i = 0; i < n; i++) {
defs[i]->came_from_user = false;
}
assert(!(s && upb_ok(s)));
@ -396,12 +390,12 @@ static void upb_enumdef_free(upb_refcounted *r) {
upb_inttable_iter i;
upb_inttable_begin(&i, &e->iton);
for( ; !upb_inttable_done(&i); upb_inttable_next(&i)) {
// To clean up the upb_strdup() from upb_enumdef_addval().
/* To clean up the upb_strdup() from upb_enumdef_addval(). */
free(upb_value_getcstr(upb_inttable_iter_value(&i)));
}
upb_strtable_uninit(&e->ntoi);
upb_inttable_uninit(&e->iton);
upb_def_uninit(UPB_UPCAST(e));
upb_def_uninit(upb_enumdef_upcast_mutable(e));
free(e);
}
@ -409,7 +403,8 @@ upb_enumdef *upb_enumdef_new(const void *owner) {
static const struct upb_refcounted_vtbl vtbl = {NULL, &upb_enumdef_free};
upb_enumdef *e = malloc(sizeof(*e));
if (!e) return NULL;
if (!upb_def_init(UPB_UPCAST(e), UPB_DEF_ENUM, &vtbl, owner)) goto err2;
if (!upb_def_init(upb_enumdef_upcast_mutable(e), UPB_DEF_ENUM, &vtbl, owner))
goto err2;
if (!upb_strtable_init(&e->ntoi, UPB_CTYPE_INT32)) goto err2;
if (!upb_inttable_init(&e->iton, UPB_CTYPE_CSTR)) goto err1;
return e;
@ -422,9 +417,9 @@ err2:
}
upb_enumdef *upb_enumdef_dup(const upb_enumdef *e, const void *owner) {
upb_enum_iter i;
upb_enumdef *new_e = upb_enumdef_new(owner);
if (!new_e) return NULL;
upb_enum_iter i;
for(upb_enum_begin(&i, e); !upb_enum_done(&i); upb_enum_next(&i)) {
bool success = upb_enumdef_addval(
new_e, upb_enum_iter_name(&i),upb_enum_iter_number(&i), NULL);
@ -436,39 +431,18 @@ upb_enumdef *upb_enumdef_dup(const upb_enumdef *e, const void *owner) {
return new_e;
}
bool upb_enumdef_isfrozen(const upb_enumdef *e) {
return upb_def_isfrozen(UPB_UPCAST(e));
}
void upb_enumdef_ref(const upb_enumdef *e, const void *owner) {
upb_def_ref(UPB_UPCAST(e), owner);
}
void upb_enumdef_unref(const upb_enumdef *e, const void *owner) {
upb_def_unref(UPB_UPCAST(e), owner);
}
void upb_enumdef_donateref(
const upb_enumdef *e, const void *from, const void *to) {
upb_def_donateref(UPB_UPCAST(e), from, to);
}
void upb_enumdef_checkref(const upb_enumdef *e, const void *owner) {
upb_def_checkref(UPB_UPCAST(e), owner);
}
bool upb_enumdef_freeze(upb_enumdef *e, upb_status *status) {
upb_def *d = UPB_UPCAST(e);
upb_def *d = upb_enumdef_upcast_mutable(e);
return upb_def_freeze(&d, 1, status);
}
const char *upb_enumdef_fullname(const upb_enumdef *e) {
return upb_def_fullname(UPB_UPCAST(e));
return upb_def_fullname(upb_enumdef_upcast(e));
}
bool upb_enumdef_setfullname(upb_enumdef *e, const char *fullname,
upb_status *s) {
return upb_def_setfullname(UPB_UPCAST(e), fullname, s);
return upb_def_setfullname(upb_enumdef_upcast_mutable(e), fullname, s);
}
bool upb_enumdef_addval(upb_enumdef *e, const char *name, int32_t num,
@ -517,7 +491,7 @@ int upb_enumdef_numvals(const upb_enumdef *e) {
}
void upb_enum_begin(upb_enum_iter *i, const upb_enumdef *e) {
// We iterate over the ntoi table, to account for duplicate numbers.
/* We iterate over the ntoi table, to account for duplicate numbers. */
upb_strtable_begin(i, &e->ntoi);
}
@ -562,13 +536,13 @@ static void visitfield(const upb_refcounted *r, upb_refcounted_visit *visit,
void *closure) {
const upb_fielddef *f = (const upb_fielddef*)r;
if (upb_fielddef_containingtype(f)) {
visit(r, UPB_UPCAST2(upb_fielddef_containingtype(f)), closure);
visit(r, upb_msgdef_upcast2(upb_fielddef_containingtype(f)), closure);
}
if (upb_fielddef_containingoneof(f)) {
visit(r, UPB_UPCAST2(upb_fielddef_containingoneof(f)), closure);
visit(r, upb_oneofdef_upcast2(upb_fielddef_containingoneof(f)), closure);
}
if (upb_fielddef_subdef(f)) {
visit(r, UPB_UPCAST(upb_fielddef_subdef(f)), closure);
visit(r, upb_def_upcast(upb_fielddef_subdef(f)), closure);
}
}
@ -577,26 +551,27 @@ static void freefield(upb_refcounted *r) {
upb_fielddef_uninit_default(f);
if (f->subdef_is_symbolic)
free(f->sub.name);
upb_def_uninit(UPB_UPCAST(f));
upb_def_uninit(upb_fielddef_upcast_mutable(f));
free(f);
}
static const char *enumdefaultstr(const upb_fielddef *f) {
const upb_enumdef *e;
assert(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
const upb_enumdef *e = upb_fielddef_enumsubdef(f);
e = upb_fielddef_enumsubdef(f);
if (f->default_is_string && f->defaultval.bytes) {
// Default was explicitly set as a string.
/* Default was explicitly set as a string. */
str_t *s = f->defaultval.bytes;
return s->str;
} else if (e) {
if (!f->default_is_string) {
// Default was explicitly set as an integer; look it up in enumdef.
/* Default was explicitly set as an integer; look it up in enumdef. */
const char *name = upb_enumdef_iton(e, f->defaultval.sint);
if (name) {
return name;
}
} else {
// Default is completely unset; pull enumdef default.
/* Default is completely unset; pull enumdef default. */
if (upb_enumdef_numvals(e) > 0) {
const char *name = upb_enumdef_iton(e, upb_enumdef_default(e));
assert(name);
@ -608,21 +583,22 @@ static const char *enumdefaultstr(const upb_fielddef *f) {
}
static bool enumdefaultint32(const upb_fielddef *f, int32_t *val) {
const upb_enumdef *e;
assert(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
const upb_enumdef *e = upb_fielddef_enumsubdef(f);
e = upb_fielddef_enumsubdef(f);
if (!f->default_is_string) {
// Default was explicitly set as an integer.
/* Default was explicitly set as an integer. */
*val = f->defaultval.sint;
return true;
} else if (e) {
if (f->defaultval.bytes) {
// Default was explicitly set as a str; try to lookup corresponding int.
/* Default was explicitly set as a str; try to lookup corresponding int. */
str_t *s = f->defaultval.bytes;
if (upb_enumdef_ntoiz(e, s->str, val)) {
return true;
}
} else {
// Default is unset; try to pull in enumdef default.
/* Default is unset; try to pull in enumdef default. */
if (upb_enumdef_numvals(e) > 0) {
*val = upb_enumdef_default(e);
return true;
@ -632,11 +608,11 @@ static bool enumdefaultint32(const upb_fielddef *f, int32_t *val) {
return false;
}
upb_fielddef *upb_fielddef_new(const void *owner) {
upb_fielddef *upb_fielddef_new(const void *o) {
static const struct upb_refcounted_vtbl vtbl = {visitfield, freefield};
upb_fielddef *f = malloc(sizeof(*f));
if (!f) return NULL;
if (!upb_def_init(UPB_UPCAST(f), UPB_DEF_FIELD, &vtbl, owner)) {
if (!upb_def_init(upb_fielddef_upcast_mutable(f), UPB_DEF_FIELD, &vtbl, o)) {
free(f);
return NULL;
}
@ -654,19 +630,20 @@ upb_fielddef *upb_fielddef_new(const void *owner) {
f->lazy_ = false;
f->packed_ = true;
// For the moment we default this to UPB_INTFMT_VARIABLE, since it will work
// with all integer types and is in some since more "default" since the most
// normal-looking proto2 types int32/int64/uint32/uint64 use variable.
//
// Other options to consider:
// - there is no default; users must set this manually (like type).
// - default signed integers to UPB_INTFMT_ZIGZAG, since it's more likely to
// be an optimal default for signed integers.
/* For the moment we default this to UPB_INTFMT_VARIABLE, since it will work
* with all integer types and is in some since more "default" since the most
* normal-looking proto2 types int32/int64/uint32/uint64 use variable.
*
* Other options to consider:
* - there is no default; users must set this manually (like type).
* - default signed integers to UPB_INTFMT_ZIGZAG, since it's more likely to
* be an optimal default for signed integers. */
f->intfmt = UPB_INTFMT_VARIABLE;
return f;
}
upb_fielddef *upb_fielddef_dup(const upb_fielddef *f, const void *owner) {
const char *srcname;
upb_fielddef *newf = upb_fielddef_new(owner);
if (!newf) return NULL;
upb_fielddef_settype(newf, upb_fielddef_type(f));
@ -681,9 +658,8 @@ upb_fielddef *upb_fielddef_dup(const upb_fielddef *f, const void *owner) {
newf->defaultval = f->defaultval;
}
const char *srcname;
if (f->subdef_is_symbolic) {
srcname = f->sub.name; // Might be NULL.
srcname = f->sub.name; /* Might be NULL. */
} else {
srcname = f->sub.def ? upb_def_fullname(f->sub.def) : NULL;
}
@ -702,27 +678,6 @@ upb_fielddef *upb_fielddef_dup(const upb_fielddef *f, const void *owner) {
return newf;
}
bool upb_fielddef_isfrozen(const upb_fielddef *f) {
return upb_def_isfrozen(UPB_UPCAST(f));
}
void upb_fielddef_ref(const upb_fielddef *f, const void *owner) {
upb_def_ref(UPB_UPCAST(f), owner);
}
void upb_fielddef_unref(const upb_fielddef *f, const void *owner) {
upb_def_unref(UPB_UPCAST(f), owner);
}
void upb_fielddef_donateref(
const upb_fielddef *f, const void *from, const void *to) {
upb_def_donateref(UPB_UPCAST(f), from, to);
}
void upb_fielddef_checkref(const upb_fielddef *f, const void *owner) {
upb_def_checkref(UPB_UPCAST(f), owner);
}
bool upb_fielddef_typeisset(const upb_fielddef *f) {
return f->type_is_set_;
}
@ -765,7 +720,7 @@ bool upb_fielddef_packed(const upb_fielddef *f) {
}
const char *upb_fielddef_name(const upb_fielddef *f) {
return upb_def_fullname(UPB_UPCAST(f));
return upb_def_fullname(upb_fielddef_upcast(f));
}
const upb_msgdef *upb_fielddef_containingtype(const upb_fielddef *f) {
@ -795,8 +750,8 @@ bool upb_fielddef_setcontainingtypename(upb_fielddef *f, const char *name,
upb_status_seterrmsg(s, "field has already been added to a message.");
return false;
}
// TODO: validate name (upb_isident() doesn't quite work atm because this name
// may have a leading ".").
/* TODO: validate name (upb_isident() doesn't quite work atm because this name
* may have a leading "."). */
release_containingtype(f);
f->msg.name = upb_strdup(name);
f->msg_is_symbolic = true;
@ -808,7 +763,7 @@ bool upb_fielddef_setname(upb_fielddef *f, const char *name, upb_status *s) {
upb_status_seterrmsg(s, "Already added to message or oneof");
return false;
}
return upb_def_setfullname(UPB_UPCAST(f), name, s);
return upb_def_setfullname(upb_fielddef_upcast_mutable(f), name, s);
}
static void chkdefaulttype(const upb_fielddef *f, upb_fieldtype_t type) {
@ -868,7 +823,7 @@ const char *upb_fielddef_defaultstr(const upb_fielddef *f, size_t *len) {
if (upb_fielddef_type(f) == UPB_TYPE_ENUM) {
const char *ret = enumdefaultstr(f);
assert(ret);
// Enum defaults can't have embedded NULLs.
/* Enum defaults can't have embedded NULLs. */
if (len) *len = strlen(ret);
return ret;
}
@ -899,7 +854,7 @@ static void upb_fielddef_init_default(upb_fielddef *f) {
break;
case UPB_TYPE_MESSAGE: break;
case UPB_TYPE_ENUM:
// This is our special sentinel that indicates "not set" for an enum.
/* This is our special sentinel that indicates "not set" for an enum. */
f->default_is_string = true;
f->defaultval.bytes = NULL;
break;
@ -1145,6 +1100,7 @@ void upb_fielddef_setdefaultdouble(upb_fielddef *f, double value) {
bool upb_fielddef_setdefaultstr(upb_fielddef *f, const void *str, size_t len,
upb_status *s) {
str_t *str2;
assert(upb_fielddef_isstring(f) || f->type_ == UPB_TYPE_ENUM);
if (f->type_ == UPB_TYPE_ENUM && !upb_isident(str, len, false, s))
return false;
@ -1157,7 +1113,7 @@ bool upb_fielddef_setdefaultstr(upb_fielddef *f, const void *str, size_t len,
assert(f->type_ == UPB_TYPE_ENUM);
}
str_t *str2 = newstr(str, len);
str2 = newstr(str, len);
f->defaultval.bytes = str2;
f->default_is_string = true;
return true;
@ -1170,8 +1126,8 @@ void upb_fielddef_setdefaultcstr(upb_fielddef *f, const char *str,
}
bool upb_fielddef_enumhasdefaultint32(const upb_fielddef *f) {
assert(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
int32_t val;
assert(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
return enumdefaultint32(f, &val);
}
@ -1218,12 +1174,12 @@ bool upb_fielddef_setsubdef(upb_fielddef *f, const upb_def *subdef,
bool upb_fielddef_setmsgsubdef(upb_fielddef *f, const upb_msgdef *subdef,
upb_status *s) {
return upb_fielddef_setsubdef(f, UPB_UPCAST(subdef), s);
return upb_fielddef_setsubdef(f, upb_msgdef_upcast(subdef), s);
}
bool upb_fielddef_setenumsubdef(upb_fielddef *f, const upb_enumdef *subdef,
upb_status *s) {
return upb_fielddef_setsubdef(f, UPB_UPCAST(subdef), s);
return upb_fielddef_setsubdef(f, upb_enumdef_upcast(subdef), s);
}
bool upb_fielddef_setsubdefname(upb_fielddef *f, const char *name,
@ -1233,8 +1189,8 @@ bool upb_fielddef_setsubdefname(upb_fielddef *f, const char *name,
upb_status_seterrmsg(s, "field type does not accept a subdef");
return false;
}
// TODO: validate name (upb_isident() doesn't quite work atm because this name
// may have a leading ".").
/* TODO: validate name (upb_isident() doesn't quite work atm because this name
* may have a leading "."). */
release_subdef(f);
f->sub.name = upb_strdup(name);
f->subdef_is_symbolic = true;
@ -1283,20 +1239,20 @@ bool upb_fielddef_checkdescriptortype(int32_t type) {
static void visitmsg(const upb_refcounted *r, upb_refcounted_visit *visit,
void *closure) {
upb_msg_oneof_iter o;
const upb_msgdef *m = (const upb_msgdef*)r;
upb_msg_field_iter i;
for(upb_msg_field_begin(&i, m);
!upb_msg_field_done(&i);
upb_msg_field_next(&i)) {
upb_fielddef *f = upb_msg_iter_field(&i);
visit(r, UPB_UPCAST2(f), closure);
visit(r, upb_fielddef_upcast2(f), closure);
}
upb_msg_oneof_iter o;
for(upb_msg_oneof_begin(&o, m);
!upb_msg_oneof_done(&o);
upb_msg_oneof_next(&o)) {
upb_oneofdef *f = upb_msg_iter_oneof(&o);
visit(r, UPB_UPCAST2(f), closure);
visit(r, upb_oneofdef_upcast2(f), closure);
}
}
@ -1305,7 +1261,7 @@ static void freemsg(upb_refcounted *r) {
upb_strtable_uninit(&m->ntoo);
upb_strtable_uninit(&m->ntof);
upb_inttable_uninit(&m->itof);
upb_def_uninit(UPB_UPCAST(m));
upb_def_uninit(upb_msgdef_upcast_mutable(m));
free(m);
}
@ -1313,7 +1269,8 @@ upb_msgdef *upb_msgdef_new(const void *owner) {
static const struct upb_refcounted_vtbl vtbl = {visitmsg, freemsg};
upb_msgdef *m = malloc(sizeof(*m));
if (!m) return NULL;
if (!upb_def_init(UPB_UPCAST(m), UPB_DEF_MSG, &vtbl, owner)) goto err2;
if (!upb_def_init(upb_msgdef_upcast_mutable(m), UPB_DEF_MSG, &vtbl, owner))
goto err2;
if (!upb_inttable_init(&m->itof, UPB_CTYPE_PTR)) goto err3;
if (!upb_strtable_init(&m->ntof, UPB_CTYPE_PTR)) goto err2;
if (!upb_strtable_init(&m->ntoo, UPB_CTYPE_PTR)) goto err1;
@ -1330,25 +1287,28 @@ err3:
}
upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, const void *owner) {
bool ok;
upb_msg_field_iter i;
upb_msg_oneof_iter o;
upb_msgdef *newm = upb_msgdef_new(owner);
if (!newm) return NULL;
bool ok = upb_def_setfullname(UPB_UPCAST(newm),
upb_def_fullname(UPB_UPCAST(m)), NULL);
ok = upb_def_setfullname(upb_msgdef_upcast_mutable(newm),
upb_def_fullname(upb_msgdef_upcast(m)),
NULL);
newm->map_entry = m->map_entry;
UPB_ASSERT_VAR(ok, ok);
upb_msg_field_iter i;
for(upb_msg_field_begin(&i, m);
!upb_msg_field_done(&i);
upb_msg_field_next(&i)) {
upb_fielddef *f = upb_fielddef_dup(upb_msg_iter_field(&i), &f);
// Fields in oneofs are dup'd below.
/* Fields in oneofs are dup'd below. */
if (upb_fielddef_containingoneof(f)) continue;
if (!f || !upb_msgdef_addfield(newm, f, &f, NULL)) {
upb_msgdef_unref(newm, owner);
return NULL;
}
}
upb_msg_oneof_iter o;
for(upb_msg_oneof_begin(&o, m);
!upb_msg_oneof_done(&o);
upb_msg_oneof_next(&o)) {
@ -1361,43 +1321,22 @@ upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, const void *owner) {
return newm;
}
bool upb_msgdef_isfrozen(const upb_msgdef *m) {
return upb_def_isfrozen(UPB_UPCAST(m));
}
void upb_msgdef_ref(const upb_msgdef *m, const void *owner) {
upb_def_ref(UPB_UPCAST(m), owner);
}
void upb_msgdef_unref(const upb_msgdef *m, const void *owner) {
upb_def_unref(UPB_UPCAST(m), owner);
}
void upb_msgdef_donateref(
const upb_msgdef *m, const void *from, const void *to) {
upb_def_donateref(UPB_UPCAST(m), from, to);
}
void upb_msgdef_checkref(const upb_msgdef *m, const void *owner) {
upb_def_checkref(UPB_UPCAST(m), owner);
}
bool upb_msgdef_freeze(upb_msgdef *m, upb_status *status) {
upb_def *d = UPB_UPCAST(m);
upb_def *d = upb_msgdef_upcast_mutable(m);
return upb_def_freeze(&d, 1, status);
}
const char *upb_msgdef_fullname(const upb_msgdef *m) {
return upb_def_fullname(UPB_UPCAST(m));
return upb_def_fullname(upb_msgdef_upcast(m));
}
bool upb_msgdef_setfullname(upb_msgdef *m, const char *fullname,
upb_status *s) {
return upb_def_setfullname(UPB_UPCAST(m), fullname, s);
return upb_def_setfullname(upb_msgdef_upcast_mutable(m), fullname, s);
}
// Helper: check that the field |f| is safe to add to msgdef |m|. Set an error
// on status |s| and return false if not.
/* Helper: check that the field |f| is safe to add to msgdef |m|. Set an error
* on status |s| and return false if not. */
static bool check_field_add(const upb_msgdef *m, const upb_fielddef *f,
upb_status *s) {
if (upb_fielddef_containingtype(f) != NULL) {
@ -1427,40 +1366,42 @@ static void add_field(upb_msgdef *m, upb_fielddef *f, const void *ref_donor) {
bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f, const void *ref_donor,
upb_status *s) {
// TODO: extensions need to have a separate namespace, because proto2 allows a
// top-level extension (ie. one not in any package) to have the same name as a
// field from the message.
//
// This also implies that there needs to be a separate lookup-by-name method
// for extensions. It seems desirable for iteration to return both extensions
// and non-extensions though.
//
// We also need to validate that the field number is in an extension range iff
// it is an extension.
// This method is idempotent. Check if |f| is already part of this msgdef and
// return immediately if so.
/* TODO: extensions need to have a separate namespace, because proto2 allows a
* top-level extension (ie. one not in any package) to have the same name as a
* field from the message.
*
* This also implies that there needs to be a separate lookup-by-name method
* for extensions. It seems desirable for iteration to return both extensions
* and non-extensions though.
*
* We also need to validate that the field number is in an extension range iff
* it is an extension.
*
* This method is idempotent. Check if |f| is already part of this msgdef and
* return immediately if so. */
if (upb_fielddef_containingtype(f) == m) {
return true;
}
// Check constraints for all fields before performing any action.
/* Check constraints for all fields before performing any action. */
if (!check_field_add(m, f, s)) {
return false;
} else if (upb_fielddef_containingoneof(f) != NULL) {
// Fields in a oneof can only be added by adding the oneof to the msgdef.
/* Fields in a oneof can only be added by adding the oneof to the msgdef. */
upb_status_seterrmsg(s, "fielddef is part of a oneof");
return false;
}
// Constraint checks ok, perform the action.
/* Constraint checks ok, perform the action. */
add_field(m, f, ref_donor);
return true;
}
bool upb_msgdef_addoneof(upb_msgdef *m, upb_oneofdef *o, const void *ref_donor,
upb_status *s) {
// Check various conditions that would prevent this oneof from being added.
upb_oneof_iter it;
/* Check various conditions that would prevent this oneof from being added. */
if (upb_oneofdef_containingtype(o)) {
upb_status_seterrmsg(s, "oneofdef already belongs to a message");
return false;
@ -1472,9 +1413,8 @@ bool upb_msgdef_addoneof(upb_msgdef *m, upb_oneofdef *o, const void *ref_donor,
return false;
}
// Check that all of the oneof's fields do not conflict with names or numbers
// of fields already in the message.
upb_oneof_iter it;
/* Check that all of the oneof's fields do not conflict with names or numbers
* of fields already in the message. */
for (upb_oneof_begin(&it, o); !upb_oneof_done(&it); upb_oneof_next(&it)) {
const upb_fielddef *f = upb_oneof_iter_field(&it);
if (!check_field_add(m, f, s)) {
@ -1482,15 +1422,15 @@ bool upb_msgdef_addoneof(upb_msgdef *m, upb_oneofdef *o, const void *ref_donor,
}
}
// Everything checks out -- commit now.
/* Everything checks out -- commit now. */
// Add oneof itself first.
/* Add oneof itself first. */
o->parent = m;
upb_strtable_insert(&m->ntoo, upb_oneofdef_name(o), upb_value_ptr(o));
upb_ref2(o, m);
upb_ref2(m, o);
// Add each field of the oneof directly to the msgdef.
/* Add each field of the oneof directly to the msgdef. */
for (upb_oneof_begin(&it, o); !upb_oneof_done(&it); upb_oneof_next(&it)) {
upb_fielddef *f = upb_oneof_iter_field(&it);
add_field(m, f, NULL);
@ -1582,10 +1522,10 @@ static void visitoneof(const upb_refcounted *r, upb_refcounted_visit *visit,
upb_oneof_iter i;
for (upb_oneof_begin(&i, o); !upb_oneof_done(&i); upb_oneof_next(&i)) {
const upb_fielddef *f = upb_oneof_iter_field(&i);
visit(r, UPB_UPCAST2(f), closure);
visit(r, upb_fielddef_upcast2(f), closure);
}
if (o->parent) {
visit(r, UPB_UPCAST2(o->parent), closure);
visit(r, upb_msgdef_upcast2(o->parent), closure);
}
}
@ -1593,7 +1533,7 @@ static void freeoneof(upb_refcounted *r) {
upb_oneofdef *o = (upb_oneofdef*)r;
upb_strtable_uninit(&o->ntof);
upb_inttable_uninit(&o->itof);
upb_def_uninit(UPB_UPCAST(o));
upb_def_uninit(upb_oneofdef_upcast_mutable(o));
free(o);
}
@ -1602,7 +1542,9 @@ upb_oneofdef *upb_oneofdef_new(const void *owner) {
upb_oneofdef *o = malloc(sizeof(*o));
o->parent = NULL;
if (!o) return NULL;
if (!upb_def_init(UPB_UPCAST(o), UPB_DEF_ONEOF, &vtbl, owner)) goto err2;
if (!upb_def_init(upb_oneofdef_upcast_mutable(o), UPB_DEF_ONEOF, &vtbl,
owner))
goto err2;
if (!upb_inttable_init(&o->itof, UPB_CTYPE_PTR)) goto err2;
if (!upb_strtable_init(&o->ntof, UPB_CTYPE_PTR)) goto err1;
return o;
@ -1615,12 +1557,13 @@ err2:
}
upb_oneofdef *upb_oneofdef_dup(const upb_oneofdef *o, const void *owner) {
bool ok;
upb_oneof_iter i;
upb_oneofdef *newo = upb_oneofdef_new(owner);
if (!newo) return NULL;
bool ok = upb_def_setfullname(UPB_UPCAST(newo),
upb_def_fullname(UPB_UPCAST(o)), NULL);
ok = upb_def_setfullname(upb_oneofdef_upcast_mutable(newo),
upb_def_fullname(upb_oneofdef_upcast(o)), NULL);
UPB_ASSERT_VAR(ok, ok);
upb_oneof_iter i;
for (upb_oneof_begin(&i, o); !upb_oneof_done(&i); upb_oneof_next(&i)) {
upb_fielddef *f = upb_fielddef_dup(upb_oneof_iter_field(&i), &f);
if (!f || !upb_oneofdef_addfield(newo, f, &f, NULL)) {
@ -1631,29 +1574,8 @@ upb_oneofdef *upb_oneofdef_dup(const upb_oneofdef *o, const void *owner) {
return newo;
}
bool upb_oneofdef_isfrozen(const upb_oneofdef *o) {
return upb_def_isfrozen(UPB_UPCAST(o));
}
void upb_oneofdef_ref(const upb_oneofdef *o, const void *owner) {
upb_def_ref(UPB_UPCAST(o), owner);
}
void upb_oneofdef_unref(const upb_oneofdef *o, const void *owner) {
upb_def_unref(UPB_UPCAST(o), owner);
}
void upb_oneofdef_donateref(const upb_oneofdef *o, const void *from,
const void *to) {
upb_def_donateref(UPB_UPCAST(o), from, to);
}
void upb_oneofdef_checkref(const upb_oneofdef *o, const void *owner) {
upb_def_checkref(UPB_UPCAST(o), owner);
}
const char *upb_oneofdef_name(const upb_oneofdef *o) {
return upb_def_fullname(UPB_UPCAST(o));
return upb_def_fullname(upb_oneofdef_upcast(o));
}
bool upb_oneofdef_setname(upb_oneofdef *o, const char *fullname,
@ -1662,7 +1584,7 @@ bool upb_oneofdef_setname(upb_oneofdef *o, const char *fullname,
upb_status_seterrmsg(s, "oneof already added to a message");
return false;
}
return upb_def_setfullname(UPB_UPCAST(o), fullname, s);
return upb_def_setfullname(upb_oneofdef_upcast_mutable(o), fullname, s);
}
const upb_msgdef *upb_oneofdef_containingtype(const upb_oneofdef *o) {
@ -1679,20 +1601,20 @@ bool upb_oneofdef_addfield(upb_oneofdef *o, upb_fielddef *f,
assert(!upb_oneofdef_isfrozen(o));
assert(!o->parent || !upb_msgdef_isfrozen(o->parent));
// This method is idempotent. Check if |f| is already part of this oneofdef
// and return immediately if so.
/* This method is idempotent. Check if |f| is already part of this oneofdef
* and return immediately if so. */
if (upb_fielddef_containingoneof(f) == o) {
return true;
}
// The field must have an OPTIONAL label.
/* The field must have an OPTIONAL label. */
if (upb_fielddef_label(f) != UPB_LABEL_OPTIONAL) {
upb_status_seterrmsg(s, "fields in oneof must have OPTIONAL label");
return false;
}
// Check that no field with this name or number exists already in the oneof.
// Also check that the field is not already part of a oneof.
/* Check that no field with this name or number exists already in the oneof.
* Also check that the field is not already part of a oneof. */
if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0) {
upb_status_seterrmsg(s, "field name or number were not set");
return false;
@ -1705,21 +1627,21 @@ bool upb_oneofdef_addfield(upb_oneofdef *o, upb_fielddef *f,
return false;
}
// We allow adding a field to the oneof either if the field is not part of a
// msgdef, or if it is and we are also part of the same msgdef.
/* We allow adding a field to the oneof either if the field is not part of a
* msgdef, or if it is and we are also part of the same msgdef. */
if (o->parent == NULL) {
// If we're not in a msgdef, the field cannot be either. Otherwise we would
// need to magically add this oneof to a msgdef to remain consistent, which
// is surprising behavior.
/* If we're not in a msgdef, the field cannot be either. Otherwise we would
* need to magically add this oneof to a msgdef to remain consistent, which
* is surprising behavior. */
if (upb_fielddef_containingtype(f) != NULL) {
upb_status_seterrmsg(s, "fielddef already belongs to a message, but "
"oneof does not");
return false;
}
} else {
// If we're in a msgdef, the user can add fields that either aren't in any
// msgdef (in which case they're added to our msgdef) or already a part of
// our msgdef.
/* If we're in a msgdef, the user can add fields that either aren't in any
* msgdef (in which case they're added to our msgdef) or already a part of
* our msgdef. */
if (upb_fielddef_containingtype(f) != NULL &&
upb_fielddef_containingtype(f) != o->parent) {
upb_status_seterrmsg(s, "fielddef belongs to a different message "
@ -1728,8 +1650,8 @@ bool upb_oneofdef_addfield(upb_oneofdef *o, upb_fielddef *f,
}
}
// Commit phase. First add the field to our parent msgdef, if any, because
// that may fail; then add the field to our own tables.
/* Commit phase. First add the field to our parent msgdef, if any, because
* that may fail; then add the field to our own tables. */
if (o->parent != NULL && upb_fielddef_containingtype(f) == NULL) {
if (!upb_msgdef_addfield((upb_msgdef*)o->parent, f, NULL, s)) {

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -1,12 +1,12 @@
// This file contains accessors for a set of compiled-in defs.
// Note that unlike Google's protobuf, it does *not* define
// generated classes or any other kind of data structure for
// actually storing protobufs. It only contains *defs* which
// let you reflect over a protobuf *schema*.
//
// This file was generated by upbc (the upb compiler).
// Do not edit -- your changes will be discarded when the file is
// regenerated.
/* This file contains accessors for a set of compiled-in defs.
* Note that unlike Google's protobuf, it does *not* define
* generated classes or any other kind of data structure for
* actually storing protobufs. It only contains *defs* which
* let you reflect over a protobuf *schema*.
*/
/* This file was generated by upbc (the upb compiler).
* Do not edit -- your changes will be discarded when the file is
* regenerated. */
#ifndef GOOGLE_PROTOBUF_DESCRIPTOR_UPB_H_
#define GOOGLE_PROTOBUF_DESCRIPTOR_UPB_H_
@ -15,15 +15,15 @@
#include "upb/symtab.h"
#ifdef __cplusplus
extern "C" {
UPB_BEGIN_EXTERN_C
#endif
// Enums
/* Enums */
typedef enum {
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_OPTIONAL = 1,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED = 2,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REPEATED = 3,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REPEATED = 3
} google_protobuf_FieldDescriptorProto_Label;
typedef enum {
@ -44,24 +44,24 @@ typedef enum {
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED32 = 15,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED64 = 16,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT32 = 17,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT64 = 18,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT64 = 18
} google_protobuf_FieldDescriptorProto_Type;
typedef enum {
GOOGLE_PROTOBUF_FIELDOPTIONS_STRING = 0,
GOOGLE_PROTOBUF_FIELDOPTIONS_CORD = 1,
GOOGLE_PROTOBUF_FIELDOPTIONS_STRING_PIECE = 2,
GOOGLE_PROTOBUF_FIELDOPTIONS_STRING_PIECE = 2
} google_protobuf_FieldOptions_CType;
typedef enum {
GOOGLE_PROTOBUF_FILEOPTIONS_SPEED = 1,
GOOGLE_PROTOBUF_FILEOPTIONS_CODE_SIZE = 2,
GOOGLE_PROTOBUF_FILEOPTIONS_LITE_RUNTIME = 3,
GOOGLE_PROTOBUF_FILEOPTIONS_LITE_RUNTIME = 3
} google_protobuf_FileOptions_OptimizeMode;
// Selectors
/* Selectors */
// google.protobuf.DescriptorProto
/* google.protobuf.DescriptorProto */
#define SEL_GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD_STARTSUBMSG 2
#define SEL_GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE_STARTSUBMSG 3
#define SEL_GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE_STARTSUBMSG 4
@ -88,11 +88,11 @@ typedef enum {
#define SEL_GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME_STARTSTR 25
#define SEL_GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME_ENDSTR 26
// google.protobuf.DescriptorProto.ExtensionRange
/* google.protobuf.DescriptorProto.ExtensionRange */
#define SEL_GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE_START_INT32 2
#define SEL_GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE_END_INT32 3
// google.protobuf.EnumDescriptorProto
/* google.protobuf.EnumDescriptorProto */
#define SEL_GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_STARTSUBMSG 2
#define SEL_GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS_STARTSUBMSG 3
#define SEL_GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_STARTSEQ 4
@ -103,14 +103,14 @@ typedef enum {
#define SEL_GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME_STARTSTR 9
#define SEL_GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME_ENDSTR 10
// google.protobuf.EnumOptions
/* google.protobuf.EnumOptions */
#define SEL_GOOGLE_PROTOBUF_ENUMOPTIONS_UNINTERPRETED_OPTION_STARTSUBMSG 2
#define SEL_GOOGLE_PROTOBUF_ENUMOPTIONS_UNINTERPRETED_OPTION_STARTSEQ 3
#define SEL_GOOGLE_PROTOBUF_ENUMOPTIONS_UNINTERPRETED_OPTION_ENDSEQ 4
#define SEL_GOOGLE_PROTOBUF_ENUMOPTIONS_UNINTERPRETED_OPTION_ENDSUBMSG 5
#define SEL_GOOGLE_PROTOBUF_ENUMOPTIONS_ALLOW_ALIAS_BOOL 6
// google.protobuf.EnumValueDescriptorProto
/* google.protobuf.EnumValueDescriptorProto */
#define SEL_GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS_STARTSUBMSG 2
#define SEL_GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS_ENDSUBMSG 3
#define SEL_GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME_STRING 4
@ -118,13 +118,13 @@ typedef enum {
#define SEL_GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME_ENDSTR 6
#define SEL_GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER_INT32 7
// google.protobuf.EnumValueOptions
/* google.protobuf.EnumValueOptions */
#define SEL_GOOGLE_PROTOBUF_ENUMVALUEOPTIONS_UNINTERPRETED_OPTION_STARTSUBMSG 2
#define SEL_GOOGLE_PROTOBUF_ENUMVALUEOPTIONS_UNINTERPRETED_OPTION_STARTSEQ 3
#define SEL_GOOGLE_PROTOBUF_ENUMVALUEOPTIONS_UNINTERPRETED_OPTION_ENDSEQ 4
#define SEL_GOOGLE_PROTOBUF_ENUMVALUEOPTIONS_UNINTERPRETED_OPTION_ENDSUBMSG 5
// google.protobuf.FieldDescriptorProto
/* google.protobuf.FieldDescriptorProto */
#define SEL_GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS_STARTSUBMSG 2
#define SEL_GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS_ENDSUBMSG 3
#define SEL_GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME_STRING 4
@ -143,7 +143,7 @@ typedef enum {
#define SEL_GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE_STARTSTR 17
#define SEL_GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE_ENDSTR 18
// google.protobuf.FieldOptions
/* google.protobuf.FieldOptions */
#define SEL_GOOGLE_PROTOBUF_FIELDOPTIONS_UNINTERPRETED_OPTION_STARTSUBMSG 2
#define SEL_GOOGLE_PROTOBUF_FIELDOPTIONS_UNINTERPRETED_OPTION_STARTSEQ 3
#define SEL_GOOGLE_PROTOBUF_FIELDOPTIONS_UNINTERPRETED_OPTION_ENDSEQ 4
@ -157,7 +157,7 @@ typedef enum {
#define SEL_GOOGLE_PROTOBUF_FIELDOPTIONS_EXPERIMENTAL_MAP_KEY_ENDSTR 12
#define SEL_GOOGLE_PROTOBUF_FIELDOPTIONS_WEAK_BOOL 13
// google.protobuf.FileDescriptorProto
/* google.protobuf.FileDescriptorProto */
#define SEL_GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_STARTSUBMSG 2
#define SEL_GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_STARTSUBMSG 3
#define SEL_GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE_STARTSUBMSG 4
@ -196,13 +196,13 @@ typedef enum {
#define SEL_GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_WEAK_DEPENDENCY_ENDSEQ 37
#define SEL_GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_WEAK_DEPENDENCY_INT32 38
// google.protobuf.FileDescriptorSet
/* google.protobuf.FileDescriptorSet */
#define SEL_GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_STARTSUBMSG 2
#define SEL_GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_STARTSEQ 3
#define SEL_GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_ENDSEQ 4
#define SEL_GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_ENDSUBMSG 5
// google.protobuf.FileOptions
/* google.protobuf.FileOptions */
#define SEL_GOOGLE_PROTOBUF_FILEOPTIONS_UNINTERPRETED_OPTION_STARTSUBMSG 2
#define SEL_GOOGLE_PROTOBUF_FILEOPTIONS_UNINTERPRETED_OPTION_STARTSEQ 3
#define SEL_GOOGLE_PROTOBUF_FILEOPTIONS_UNINTERPRETED_OPTION_ENDSEQ 4
@ -223,7 +223,7 @@ typedef enum {
#define SEL_GOOGLE_PROTOBUF_FILEOPTIONS_PY_GENERIC_SERVICES_BOOL 19
#define SEL_GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_GENERATE_EQUALS_AND_HASH_BOOL 20
// google.protobuf.MessageOptions
/* google.protobuf.MessageOptions */
#define SEL_GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION_STARTSUBMSG 2
#define SEL_GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION_STARTSEQ 3
#define SEL_GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION_ENDSEQ 4
@ -231,7 +231,7 @@ typedef enum {
#define SEL_GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT_BOOL 6
#define SEL_GOOGLE_PROTOBUF_MESSAGEOPTIONS_NO_STANDARD_DESCRIPTOR_ACCESSOR_BOOL 7
// google.protobuf.MethodDescriptorProto
/* google.protobuf.MethodDescriptorProto */
#define SEL_GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OPTIONS_STARTSUBMSG 2
#define SEL_GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OPTIONS_ENDSUBMSG 3
#define SEL_GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_NAME_STRING 4
@ -244,13 +244,13 @@ typedef enum {
#define SEL_GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OUTPUT_TYPE_STARTSTR 11
#define SEL_GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OUTPUT_TYPE_ENDSTR 12
// google.protobuf.MethodOptions
/* google.protobuf.MethodOptions */
#define SEL_GOOGLE_PROTOBUF_METHODOPTIONS_UNINTERPRETED_OPTION_STARTSUBMSG 2
#define SEL_GOOGLE_PROTOBUF_METHODOPTIONS_UNINTERPRETED_OPTION_STARTSEQ 3
#define SEL_GOOGLE_PROTOBUF_METHODOPTIONS_UNINTERPRETED_OPTION_ENDSEQ 4
#define SEL_GOOGLE_PROTOBUF_METHODOPTIONS_UNINTERPRETED_OPTION_ENDSUBMSG 5
// google.protobuf.ServiceDescriptorProto
/* google.protobuf.ServiceDescriptorProto */
#define SEL_GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD_STARTSUBMSG 2
#define SEL_GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS_STARTSUBMSG 3
#define SEL_GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD_STARTSEQ 4
@ -261,19 +261,19 @@ typedef enum {
#define SEL_GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME_STARTSTR 9
#define SEL_GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME_ENDSTR 10
// google.protobuf.ServiceOptions
/* google.protobuf.ServiceOptions */
#define SEL_GOOGLE_PROTOBUF_SERVICEOPTIONS_UNINTERPRETED_OPTION_STARTSUBMSG 2
#define SEL_GOOGLE_PROTOBUF_SERVICEOPTIONS_UNINTERPRETED_OPTION_STARTSEQ 3
#define SEL_GOOGLE_PROTOBUF_SERVICEOPTIONS_UNINTERPRETED_OPTION_ENDSEQ 4
#define SEL_GOOGLE_PROTOBUF_SERVICEOPTIONS_UNINTERPRETED_OPTION_ENDSUBMSG 5
// google.protobuf.SourceCodeInfo
/* google.protobuf.SourceCodeInfo */
#define SEL_GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_STARTSUBMSG 2
#define SEL_GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_STARTSEQ 3
#define SEL_GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_ENDSEQ 4
#define SEL_GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_ENDSUBMSG 5
// google.protobuf.SourceCodeInfo.Location
/* google.protobuf.SourceCodeInfo.Location */
#define SEL_GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_PATH_STARTSEQ 2
#define SEL_GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_PATH_ENDSEQ 3
#define SEL_GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_PATH_INT32 4
@ -287,7 +287,7 @@ typedef enum {
#define SEL_GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_TRAILING_COMMENTS_STARTSTR 12
#define SEL_GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_TRAILING_COMMENTS_ENDSTR 13
// google.protobuf.UninterpretedOption
/* google.protobuf.UninterpretedOption */
#define SEL_GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAME_STARTSUBMSG 2
#define SEL_GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAME_STARTSEQ 3
#define SEL_GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAME_ENDSEQ 4
@ -305,7 +305,7 @@ typedef enum {
#define SEL_GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE_STARTSTR 16
#define SEL_GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE_ENDSTR 17
// google.protobuf.UninterpretedOption.NamePart
/* google.protobuf.UninterpretedOption.NamePart */
#define SEL_GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART_STRING 2
#define SEL_GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART_STARTSTR 3
#define SEL_GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART_ENDSTR 4
@ -313,7 +313,7 @@ typedef enum {
const upb_symtab *upbdefs_google_protobuf_descriptor(const void *owner);
// MessageDefs
/* MessageDefs */
UPB_INLINE const upb_msgdef *upbdefs_google_protobuf_DescriptorProto(const upb_symtab *s) {
const upb_msgdef *m = upb_symtab_lookupmsg(s, "google.protobuf.DescriptorProto");
assert(m);
@ -416,7 +416,7 @@ UPB_INLINE const upb_msgdef *upbdefs_google_protobuf_UninterpretedOption_NamePar
}
// EnumDefs
/* EnumDefs */
UPB_INLINE const upb_enumdef *upbdefs_google_protobuf_FieldDescriptorProto_Label(const upb_symtab *s) {
const upb_enumdef *e = upb_symtab_lookupenum(s, "google.protobuf.FieldDescriptorProto.Label");
assert(e);
@ -520,9 +520,7 @@ UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_UninterpretedOption_negat
UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_UninterpretedOption_positive_int_value(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_UninterpretedOption(s), 4); }
UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_UninterpretedOption_string_value(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_UninterpretedOption(s), 7); }
#ifdef __cplusplus
}; // extern "C"
#endif
UPB_END_EXTERN_C
#ifdef __cplusplus
@ -534,9 +532,9 @@ inline upb::reffed_ptr<const upb::SymbolTable> SymbolTable() {
const upb::SymbolTable* s = upbdefs_google_protobuf_descriptor(&s);
return upb::reffed_ptr<const upb::SymbolTable>(s, &s);
}
} // namespace descriptor
} // namespace protobuf
} // namespace google
} /* namespace descriptor */
} /* namespace protobuf */
} /* namespace google */
#define RETURN_REFFED(type, func) \
const type* obj = func(upbdefs::google::protobuf::descriptor::SymbolTable().get()); \
@ -553,9 +551,9 @@ inline upb::reffed_ptr<const upb::FieldDef> field() { RETURN_REFFED(upb::FieldDe
inline upb::reffed_ptr<const upb::FieldDef> name() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_DescriptorProto_name) }
inline upb::reffed_ptr<const upb::FieldDef> nested_type() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_DescriptorProto_nested_type) }
inline upb::reffed_ptr<const upb::FieldDef> options() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_DescriptorProto_options) }
} // namespace DescriptorProto
} // namespace protobuf
} // namespace google
} /* namespace DescriptorProto */
} /* namespace protobuf */
} /* namespace google */
namespace google {
namespace protobuf {
@ -564,10 +562,10 @@ namespace ExtensionRange {
inline upb::reffed_ptr<const upb::MessageDef> MessageDef() { RETURN_REFFED(upb::MessageDef, upbdefs_google_protobuf_DescriptorProto_ExtensionRange) }
inline upb::reffed_ptr<const upb::FieldDef> end() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_DescriptorProto_ExtensionRange_end) }
inline upb::reffed_ptr<const upb::FieldDef> start() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_DescriptorProto_ExtensionRange_start) }
} // namespace ExtensionRange
} // namespace DescriptorProto
} // namespace protobuf
} // namespace google
} /* namespace ExtensionRange */
} /* namespace DescriptorProto */
} /* namespace protobuf */
} /* namespace google */
namespace google {
namespace protobuf {
@ -576,9 +574,9 @@ inline upb::reffed_ptr<const upb::MessageDef> MessageDef() { RETURN_REFFED(upb::
inline upb::reffed_ptr<const upb::FieldDef> name() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_EnumDescriptorProto_name) }
inline upb::reffed_ptr<const upb::FieldDef> options() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_EnumDescriptorProto_options) }
inline upb::reffed_ptr<const upb::FieldDef> value() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_EnumDescriptorProto_value) }
} // namespace EnumDescriptorProto
} // namespace protobuf
} // namespace google
} /* namespace EnumDescriptorProto */
} /* namespace protobuf */
} /* namespace google */
namespace google {
namespace protobuf {
@ -586,9 +584,9 @@ namespace EnumOptions {
inline upb::reffed_ptr<const upb::MessageDef> MessageDef() { RETURN_REFFED(upb::MessageDef, upbdefs_google_protobuf_EnumOptions) }
inline upb::reffed_ptr<const upb::FieldDef> allow_alias() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_EnumOptions_allow_alias) }
inline upb::reffed_ptr<const upb::FieldDef> uninterpreted_option() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_EnumOptions_uninterpreted_option) }
} // namespace EnumOptions
} // namespace protobuf
} // namespace google
} /* namespace EnumOptions */
} /* namespace protobuf */
} /* namespace google */
namespace google {
namespace protobuf {
@ -597,18 +595,18 @@ inline upb::reffed_ptr<const upb::MessageDef> MessageDef() { RETURN_REFFED(upb::
inline upb::reffed_ptr<const upb::FieldDef> name() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_EnumValueDescriptorProto_name) }
inline upb::reffed_ptr<const upb::FieldDef> number() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_EnumValueDescriptorProto_number) }
inline upb::reffed_ptr<const upb::FieldDef> options() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_EnumValueDescriptorProto_options) }
} // namespace EnumValueDescriptorProto
} // namespace protobuf
} // namespace google
} /* namespace EnumValueDescriptorProto */
} /* namespace protobuf */
} /* namespace google */
namespace google {
namespace protobuf {
namespace EnumValueOptions {
inline upb::reffed_ptr<const upb::MessageDef> MessageDef() { RETURN_REFFED(upb::MessageDef, upbdefs_google_protobuf_EnumValueOptions) }
inline upb::reffed_ptr<const upb::FieldDef> uninterpreted_option() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_EnumValueOptions_uninterpreted_option) }
} // namespace EnumValueOptions
} // namespace protobuf
} // namespace google
} /* namespace EnumValueOptions */
} /* namespace protobuf */
} /* namespace google */
namespace google {
namespace protobuf {
@ -624,9 +622,9 @@ inline upb::reffed_ptr<const upb::FieldDef> type() { RETURN_REFFED(upb::FieldDef
inline upb::reffed_ptr<const upb::FieldDef> type_name() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_FieldDescriptorProto_type_name) }
inline upb::reffed_ptr<const upb::EnumDef> Label() { RETURN_REFFED(upb::EnumDef, upbdefs_google_protobuf_FieldDescriptorProto_Label) }
inline upb::reffed_ptr<const upb::EnumDef> Type() { RETURN_REFFED(upb::EnumDef, upbdefs_google_protobuf_FieldDescriptorProto_Type) }
} // namespace FieldDescriptorProto
} // namespace protobuf
} // namespace google
} /* namespace FieldDescriptorProto */
} /* namespace protobuf */
} /* namespace google */
namespace google {
namespace protobuf {
@ -640,9 +638,9 @@ inline upb::reffed_ptr<const upb::FieldDef> packed() { RETURN_REFFED(upb::FieldD
inline upb::reffed_ptr<const upb::FieldDef> uninterpreted_option() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_FieldOptions_uninterpreted_option) }
inline upb::reffed_ptr<const upb::FieldDef> weak() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_FieldOptions_weak) }
inline upb::reffed_ptr<const upb::EnumDef> CType() { RETURN_REFFED(upb::EnumDef, upbdefs_google_protobuf_FieldOptions_CType) }
} // namespace FieldOptions
} // namespace protobuf
} // namespace google
} /* namespace FieldOptions */
} /* namespace protobuf */
} /* namespace google */
namespace google {
namespace protobuf {
@ -659,18 +657,18 @@ inline upb::reffed_ptr<const upb::FieldDef> public_dependency() { RETURN_REFFED(
inline upb::reffed_ptr<const upb::FieldDef> service() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_FileDescriptorProto_service) }
inline upb::reffed_ptr<const upb::FieldDef> source_code_info() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_FileDescriptorProto_source_code_info) }
inline upb::reffed_ptr<const upb::FieldDef> weak_dependency() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_FileDescriptorProto_weak_dependency) }
} // namespace FileDescriptorProto
} // namespace protobuf
} // namespace google
} /* namespace FileDescriptorProto */
} /* namespace protobuf */
} /* namespace google */
namespace google {
namespace protobuf {
namespace FileDescriptorSet {
inline upb::reffed_ptr<const upb::MessageDef> MessageDef() { RETURN_REFFED(upb::MessageDef, upbdefs_google_protobuf_FileDescriptorSet) }
inline upb::reffed_ptr<const upb::FieldDef> file() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_FileDescriptorSet_file) }
} // namespace FileDescriptorSet
} // namespace protobuf
} // namespace google
} /* namespace FileDescriptorSet */
} /* namespace protobuf */
} /* namespace google */
namespace google {
namespace protobuf {
@ -687,9 +685,9 @@ inline upb::reffed_ptr<const upb::FieldDef> optimize_for() { RETURN_REFFED(upb::
inline upb::reffed_ptr<const upb::FieldDef> py_generic_services() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_FileOptions_py_generic_services) }
inline upb::reffed_ptr<const upb::FieldDef> uninterpreted_option() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_FileOptions_uninterpreted_option) }
inline upb::reffed_ptr<const upb::EnumDef> OptimizeMode() { RETURN_REFFED(upb::EnumDef, upbdefs_google_protobuf_FileOptions_OptimizeMode) }
} // namespace FileOptions
} // namespace protobuf
} // namespace google
} /* namespace FileOptions */
} /* namespace protobuf */
} /* namespace google */
namespace google {
namespace protobuf {
@ -698,9 +696,9 @@ inline upb::reffed_ptr<const upb::MessageDef> MessageDef() { RETURN_REFFED(upb::
inline upb::reffed_ptr<const upb::FieldDef> message_set_wire_format() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_MessageOptions_message_set_wire_format) }
inline upb::reffed_ptr<const upb::FieldDef> no_standard_descriptor_accessor() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_MessageOptions_no_standard_descriptor_accessor) }
inline upb::reffed_ptr<const upb::FieldDef> uninterpreted_option() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_MessageOptions_uninterpreted_option) }
} // namespace MessageOptions
} // namespace protobuf
} // namespace google
} /* namespace MessageOptions */
} /* namespace protobuf */
} /* namespace google */
namespace google {
namespace protobuf {
@ -710,18 +708,18 @@ inline upb::reffed_ptr<const upb::FieldDef> input_type() { RETURN_REFFED(upb::Fi
inline upb::reffed_ptr<const upb::FieldDef> name() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_MethodDescriptorProto_name) }
inline upb::reffed_ptr<const upb::FieldDef> options() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_MethodDescriptorProto_options) }
inline upb::reffed_ptr<const upb::FieldDef> output_type() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_MethodDescriptorProto_output_type) }
} // namespace MethodDescriptorProto
} // namespace protobuf
} // namespace google
} /* namespace MethodDescriptorProto */
} /* namespace protobuf */
} /* namespace google */
namespace google {
namespace protobuf {
namespace MethodOptions {
inline upb::reffed_ptr<const upb::MessageDef> MessageDef() { RETURN_REFFED(upb::MessageDef, upbdefs_google_protobuf_MethodOptions) }
inline upb::reffed_ptr<const upb::FieldDef> uninterpreted_option() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_MethodOptions_uninterpreted_option) }
} // namespace MethodOptions
} // namespace protobuf
} // namespace google
} /* namespace MethodOptions */
} /* namespace protobuf */
} /* namespace google */
namespace google {
namespace protobuf {
@ -730,27 +728,27 @@ inline upb::reffed_ptr<const upb::MessageDef> MessageDef() { RETURN_REFFED(upb::
inline upb::reffed_ptr<const upb::FieldDef> method() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_ServiceDescriptorProto_method) }
inline upb::reffed_ptr<const upb::FieldDef> name() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_ServiceDescriptorProto_name) }
inline upb::reffed_ptr<const upb::FieldDef> options() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_ServiceDescriptorProto_options) }
} // namespace ServiceDescriptorProto
} // namespace protobuf
} // namespace google
} /* namespace ServiceDescriptorProto */
} /* namespace protobuf */
} /* namespace google */
namespace google {
namespace protobuf {
namespace ServiceOptions {
inline upb::reffed_ptr<const upb::MessageDef> MessageDef() { RETURN_REFFED(upb::MessageDef, upbdefs_google_protobuf_ServiceOptions) }
inline upb::reffed_ptr<const upb::FieldDef> uninterpreted_option() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_ServiceOptions_uninterpreted_option) }
} // namespace ServiceOptions
} // namespace protobuf
} // namespace google
} /* namespace ServiceOptions */
} /* namespace protobuf */
} /* namespace google */
namespace google {
namespace protobuf {
namespace SourceCodeInfo {
inline upb::reffed_ptr<const upb::MessageDef> MessageDef() { RETURN_REFFED(upb::MessageDef, upbdefs_google_protobuf_SourceCodeInfo) }
inline upb::reffed_ptr<const upb::FieldDef> location() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_SourceCodeInfo_location) }
} // namespace SourceCodeInfo
} // namespace protobuf
} // namespace google
} /* namespace SourceCodeInfo */
} /* namespace protobuf */
} /* namespace google */
namespace google {
namespace protobuf {
@ -761,10 +759,10 @@ inline upb::reffed_ptr<const upb::FieldDef> leading_comments() { RETURN_REFFED(u
inline upb::reffed_ptr<const upb::FieldDef> path() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_SourceCodeInfo_Location_path) }
inline upb::reffed_ptr<const upb::FieldDef> span() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_SourceCodeInfo_Location_span) }
inline upb::reffed_ptr<const upb::FieldDef> trailing_comments() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_SourceCodeInfo_Location_trailing_comments) }
} // namespace Location
} // namespace SourceCodeInfo
} // namespace protobuf
} // namespace google
} /* namespace Location */
} /* namespace SourceCodeInfo */
} /* namespace protobuf */
} /* namespace google */
namespace google {
namespace protobuf {
@ -777,9 +775,9 @@ inline upb::reffed_ptr<const upb::FieldDef> name() { RETURN_REFFED(upb::FieldDef
inline upb::reffed_ptr<const upb::FieldDef> negative_int_value() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_UninterpretedOption_negative_int_value) }
inline upb::reffed_ptr<const upb::FieldDef> positive_int_value() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_UninterpretedOption_positive_int_value) }
inline upb::reffed_ptr<const upb::FieldDef> string_value() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_UninterpretedOption_string_value) }
} // namespace UninterpretedOption
} // namespace protobuf
} // namespace google
} /* namespace UninterpretedOption */
} /* namespace protobuf */
} /* namespace google */
namespace google {
namespace protobuf {
@ -788,15 +786,15 @@ namespace NamePart {
inline upb::reffed_ptr<const upb::MessageDef> MessageDef() { RETURN_REFFED(upb::MessageDef, upbdefs_google_protobuf_UninterpretedOption_NamePart) }
inline upb::reffed_ptr<const upb::FieldDef> is_extension() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_UninterpretedOption_NamePart_is_extension) }
inline upb::reffed_ptr<const upb::FieldDef> name_part() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_UninterpretedOption_NamePart_name_part) }
} // namespace NamePart
} // namespace UninterpretedOption
} // namespace protobuf
} // namespace google
} /* namespace NamePart */
} /* namespace UninterpretedOption */
} /* namespace protobuf */
} /* namespace google */
} // namespace upbdefs
} /* namespace upbdefs */
#undef RETURN_REFFED
#endif // __cplusplus
#endif /* __cplusplus */
#endif // GOOGLE_PROTOBUF_DESCRIPTOR_UPB_H_
#endif /* GOOGLE_PROTOBUF_DESCRIPTOR_UPB_H_ */

@ -20,8 +20,8 @@
#include "upb/sink.h"
#include "upb/descriptor/descriptor.upb.h"
// upb_deflist is an internal-only dynamic array for storing a growing list of
// upb_defs.
/* upb_deflist is an internal-only dynamic array for storing a growing list of
* upb_defs. */
typedef struct {
upb_def **defs;
size_t len;
@ -29,27 +29,27 @@ typedef struct {
bool owned;
} upb_deflist;
// We keep a stack of all the messages scopes we are currently in, as well as
// the top-level file scope. This is necessary to correctly qualify the
// definitions that are contained inside. "name" tracks the name of the
// message or package (a bare name -- not qualified by any enclosing scopes).
/* We keep a stack of all the messages scopes we are currently in, as well as
* the top-level file scope. This is necessary to correctly qualify the
* definitions that are contained inside. "name" tracks the name of the
* message or package (a bare name -- not qualified by any enclosing scopes). */
typedef struct {
char *name;
// Index of the first def that is under this scope. For msgdefs, the
// msgdef itself is at start-1.
/* Index of the first def that is under this scope. For msgdefs, the
* msgdef itself is at start-1. */
int start;
} upb_descreader_frame;
// The maximum number of nested declarations that are allowed, ie.
// message Foo {
// message Bar {
// message Baz {
// }
// }
// }
//
// This is a resource limit that affects how big our runtime stack can grow.
// TODO: make this a runtime-settable property of the Reader instance.
/* The maximum number of nested declarations that are allowed, ie.
* message Foo {
* message Bar {
* message Baz {
* }
* }
* }
*
* This is a resource limit that affects how big our runtime stack can grow.
* TODO: make this a runtime-settable property of the Reader instance. */
#define UPB_MAX_MESSAGE_NESTING 64
struct upb_descreader {
@ -76,11 +76,11 @@ static char *upb_strndup(const char *buf, size_t n) {
return ret;
}
// Returns a newly allocated string that joins input strings together, for
// example:
// join("Foo.Bar", "Baz") -> "Foo.Bar.Baz"
// join("", "Baz") -> "Baz"
// Caller owns a ref on the returned string.
/* Returns a newly allocated string that joins input strings together, for
* example:
* join("Foo.Bar", "Baz") -> "Foo.Bar.Baz"
* join("", "Baz") -> "Baz"
* Caller owns a ref on the returned string. */
static char *upb_join(const char *base, const char *name) {
if (!base || strlen(base) == 0) {
return upb_strdup(name);
@ -105,8 +105,9 @@ void upb_deflist_init(upb_deflist *l) {
}
void upb_deflist_uninit(upb_deflist *l) {
size_t i;
if (l->owned)
for(size_t i = 0; i < l->len; i++)
for(i = 0; i < l->len; i++)
upb_def_unref(l->defs[i], l);
free(l->defs);
}
@ -124,8 +125,9 @@ bool upb_deflist_push(upb_deflist *l, upb_def *d) {
}
void upb_deflist_donaterefs(upb_deflist *l, void *owner) {
size_t i;
assert(l->owned);
for (size_t i = 0; i < l->len; i++)
for (i = 0; i < l->len; i++)
upb_def_donateref(l->defs[i], l, owner);
l->owned = false;
}
@ -134,9 +136,10 @@ static upb_def *upb_deflist_last(upb_deflist *l) {
return l->defs[l->len-1];
}
// Qualify the defname for all defs starting with offset "start" with "str".
/* Qualify the defname for all defs starting with offset "start" with "str". */
static void upb_deflist_qualify(upb_deflist *l, char *str, int32_t start) {
for (uint32_t i = start; i < l->len; i++) {
uint32_t i;
for (i = start; i < l->len; i++) {
upb_def *def = l->defs[i];
char *name = upb_join(str, upb_def_fullname(def));
upb_def_setfullname(def, name, NULL);
@ -148,8 +151,9 @@ static void upb_deflist_qualify(upb_deflist *l, char *str, int32_t start) {
/* upb_descreader ************************************************************/
static upb_msgdef *upb_descreader_top(upb_descreader *r) {
int index;
assert(r->stack_len > 1);
int index = r->stack[r->stack_len-1].start - 1;
index = r->stack[r->stack_len-1].start - 1;
assert(index >= 0);
return upb_downcast_msgdef_mutable(r->defs.defs[index]);
}
@ -158,8 +162,8 @@ static upb_def *upb_descreader_last(upb_descreader *r) {
return upb_deflist_last(&r->defs);
}
// Start/end handlers for FileDescriptorProto and DescriptorProto (the two
// entities that have names and can contain sub-definitions.
/* Start/end handlers for FileDescriptorProto and DescriptorProto (the two
* entities that have names and can contain sub-definitions. */
void upb_descreader_startcontainer(upb_descreader *r) {
upb_descreader_frame *f = &r->stack[r->stack_len++];
f->start = r->defs.len;
@ -179,7 +183,7 @@ void upb_descreader_setscopename(upb_descreader *r, char *str) {
f->name = str;
}
// Handlers for google.protobuf.FileDescriptorProto.
/* Handlers for google.protobuf.FileDescriptorProto. */
static bool file_startmsg(void *r, const void *hd) {
UPB_UNUSED(hd);
upb_descreader_startcontainer(r);
@ -187,27 +191,27 @@ static bool file_startmsg(void *r, const void *hd) {
}
static bool file_endmsg(void *closure, const void *hd, upb_status *status) {
upb_descreader *r = closure;
UPB_UNUSED(hd);
UPB_UNUSED(status);
upb_descreader *r = closure;
upb_descreader_endcontainer(r);
return true;
}
static size_t file_onpackage(void *closure, const void *hd, const char *buf,
size_t n, const upb_bufhandle *handle) {
upb_descreader *r = closure;
UPB_UNUSED(hd);
UPB_UNUSED(handle);
upb_descreader *r = closure;
// XXX: see comment at the top of the file.
/* XXX: see comment at the top of the file. */
upb_descreader_setscopename(r, upb_strndup(buf, n));
return n;
}
// Handlers for google.protobuf.EnumValueDescriptorProto.
/* Handlers for google.protobuf.EnumValueDescriptorProto. */
static bool enumval_startmsg(void *closure, const void *hd) {
UPB_UNUSED(hd);
upb_descreader *r = closure;
UPB_UNUSED(hd);
r->saw_number = false;
r->saw_name = false;
return true;
@ -215,10 +219,10 @@ static bool enumval_startmsg(void *closure, const void *hd) {
static size_t enumval_onname(void *closure, const void *hd, const char *buf,
size_t n, const upb_bufhandle *handle) {
upb_descreader *r = closure;
UPB_UNUSED(hd);
UPB_UNUSED(handle);
upb_descreader *r = closure;
// XXX: see comment at the top of the file.
/* XXX: see comment at the top of the file. */
free(r->name);
r->name = upb_strndup(buf, n);
r->saw_name = true;
@ -226,21 +230,23 @@ static size_t enumval_onname(void *closure, const void *hd, const char *buf,
}
static bool enumval_onnumber(void *closure, const void *hd, int32_t val) {
UPB_UNUSED(hd);
upb_descreader *r = closure;
UPB_UNUSED(hd);
r->number = val;
r->saw_number = true;
return true;
}
static bool enumval_endmsg(void *closure, const void *hd, upb_status *status) {
UPB_UNUSED(hd);
upb_descreader *r = closure;
upb_enumdef *e;
UPB_UNUSED(hd);
if(!r->saw_number || !r->saw_name) {
upb_status_seterrmsg(status, "Enum value missing name or number.");
return false;
}
upb_enumdef *e = upb_downcast_enumdef_mutable(upb_descreader_last(r));
e = upb_downcast_enumdef_mutable(upb_descreader_last(r));
upb_enumdef_addval(e, r->name, r->number, status);
free(r->name);
r->name = NULL;
@ -248,18 +254,21 @@ static bool enumval_endmsg(void *closure, const void *hd, upb_status *status) {
}
// Handlers for google.protobuf.EnumDescriptorProto.
/* Handlers for google.protobuf.EnumDescriptorProto. */
static bool enum_startmsg(void *closure, const void *hd) {
UPB_UNUSED(hd);
upb_descreader *r = closure;
upb_deflist_push(&r->defs, UPB_UPCAST(upb_enumdef_new(&r->defs)));
UPB_UNUSED(hd);
upb_deflist_push(&r->defs,
upb_enumdef_upcast_mutable(upb_enumdef_new(&r->defs)));
return true;
}
static bool enum_endmsg(void *closure, const void *hd, upb_status *status) {
UPB_UNUSED(hd);
upb_descreader *r = closure;
upb_enumdef *e = upb_downcast_enumdef_mutable(upb_descreader_last(r));
upb_enumdef *e;
UPB_UNUSED(hd);
e = upb_downcast_enumdef_mutable(upb_descreader_last(r));
if (upb_def_fullname(upb_descreader_last(r)) == NULL) {
upb_status_seterrmsg(status, "Enum had no name.");
return false;
@ -273,31 +282,31 @@ static bool enum_endmsg(void *closure, const void *hd, upb_status *status) {
static size_t enum_onname(void *closure, const void *hd, const char *buf,
size_t n, const upb_bufhandle *handle) {
UPB_UNUSED(hd);
UPB_UNUSED(handle);
upb_descreader *r = closure;
// XXX: see comment at the top of the file.
char *fullname = upb_strndup(buf, n);
UPB_UNUSED(hd);
UPB_UNUSED(handle);
/* XXX: see comment at the top of the file. */
upb_def_setfullname(upb_descreader_last(r), fullname, NULL);
free(fullname);
return n;
}
// Handlers for google.protobuf.FieldDescriptorProto
/* Handlers for google.protobuf.FieldDescriptorProto */
static bool field_startmsg(void *closure, const void *hd) {
UPB_UNUSED(hd);
upb_descreader *r = closure;
UPB_UNUSED(hd);
r->f = upb_fielddef_new(&r->defs);
free(r->default_string);
r->default_string = NULL;
// fielddefs default to packed, but descriptors default to non-packed.
/* fielddefs default to packed, but descriptors default to non-packed. */
upb_fielddef_setpacked(r->f, false);
return true;
}
// Converts the default value in string "str" into "d". Passes a ref on str.
// Returns true on success.
/* Converts the default value in string "str" into "d". Passes a ref on str.
* Returns true on success. */
static bool parse_default(char *str, upb_fielddef *f) {
bool success = true;
char *end;
@ -311,7 +320,8 @@ static bool parse_default(char *str, upb_fielddef *f) {
break;
}
case UPB_TYPE_INT64: {
long long val = strtoll(str, &end, 0);
/* XXX: Need to write our own strtoll, since it's not available in c89. */
long long val = strtol(str, &end, 0);
if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || *end)
success = false;
else
@ -327,7 +337,8 @@ static bool parse_default(char *str, upb_fielddef *f) {
break;
}
case UPB_TYPE_UINT64: {
unsigned long long val = strtoull(str, &end, 0);
/* XXX: Need to write our own strtoull, since it's not available in c89. */
unsigned long long val = strtoul(str, &end, 0);
if (val > UINT64_MAX || errno == ERANGE || *end)
success = false;
else
@ -343,7 +354,8 @@ static bool parse_default(char *str, upb_fielddef *f) {
break;
}
case UPB_TYPE_FLOAT: {
float val = strtof(str, &end);
/* XXX: Need to write our own strtof, since it's not available in c89. */
float val = strtod(str, &end);
if (errno == ERANGE || *end)
success = false;
else
@ -365,10 +377,11 @@ static bool parse_default(char *str, upb_fielddef *f) {
}
static bool field_endmsg(void *closure, const void *hd, upb_status *status) {
UPB_UNUSED(hd);
upb_descreader *r = closure;
upb_fielddef *f = r->f;
// TODO: verify that all required fields were present.
UPB_UNUSED(hd);
/* TODO: verify that all required fields were present. */
assert(upb_fielddef_number(f) != 0);
assert(upb_fielddef_name(f) != NULL);
assert((upb_fielddef_subdefname(f) != NULL) == upb_fielddef_hassubdef(f));
@ -382,8 +395,8 @@ static bool field_endmsg(void *closure, const void *hd, upb_status *status) {
upb_fielddef_setdefaultcstr(f, r->default_string, NULL);
} else {
if (r->default_string && !parse_default(r->default_string, f)) {
// We don't worry too much about giving a great error message since the
// compiler should have ensured this was correct.
/* We don't worry too much about giving a great error message since the
* compiler should have ensured this was correct. */
upb_status_seterrmsg(status, "Error converting default value.");
return false;
}
@ -393,48 +406,54 @@ static bool field_endmsg(void *closure, const void *hd, upb_status *status) {
}
static bool field_onlazy(void *closure, const void *hd, bool val) {
UPB_UNUSED(hd);
upb_descreader *r = closure;
UPB_UNUSED(hd);
upb_fielddef_setlazy(r->f, val);
return true;
}
static bool field_onpacked(void *closure, const void *hd, bool val) {
UPB_UNUSED(hd);
upb_descreader *r = closure;
UPB_UNUSED(hd);
upb_fielddef_setpacked(r->f, val);
return true;
}
static bool field_ontype(void *closure, const void *hd, int32_t val) {
UPB_UNUSED(hd);
upb_descreader *r = closure;
UPB_UNUSED(hd);
upb_fielddef_setdescriptortype(r->f, val);
return true;
}
static bool field_onlabel(void *closure, const void *hd, int32_t val) {
UPB_UNUSED(hd);
upb_descreader *r = closure;
UPB_UNUSED(hd);
upb_fielddef_setlabel(r->f, val);
return true;
}
static bool field_onnumber(void *closure, const void *hd, int32_t val) {
UPB_UNUSED(hd);
upb_descreader *r = closure;
bool ok = upb_fielddef_setnumber(r->f, val, NULL);
UPB_UNUSED(hd);
UPB_ASSERT_VAR(ok, ok);
return true;
}
static size_t field_onname(void *closure, const void *hd, const char *buf,
size_t n, const upb_bufhandle *handle) {
UPB_UNUSED(hd);
UPB_UNUSED(handle);
upb_descreader *r = closure;
// XXX: see comment at the top of the file.
char *name = upb_strndup(buf, n);
UPB_UNUSED(hd);
UPB_UNUSED(handle);
/* XXX: see comment at the top of the file. */
upb_fielddef_setname(r->f, name, NULL);
free(name);
return n;
@ -442,11 +461,12 @@ static size_t field_onname(void *closure, const void *hd, const char *buf,
static size_t field_ontypename(void *closure, const void *hd, const char *buf,
size_t n, const upb_bufhandle *handle) {
UPB_UNUSED(hd);
UPB_UNUSED(handle);
upb_descreader *r = closure;
// XXX: see comment at the top of the file.
char *name = upb_strndup(buf, n);
UPB_UNUSED(hd);
UPB_UNUSED(handle);
/* XXX: see comment at the top of the file. */
upb_fielddef_setsubdefname(r->f, name, NULL);
free(name);
return n;
@ -454,11 +474,12 @@ static size_t field_ontypename(void *closure, const void *hd, const char *buf,
static size_t field_onextendee(void *closure, const void *hd, const char *buf,
size_t n, const upb_bufhandle *handle) {
UPB_UNUSED(hd);
UPB_UNUSED(handle);
upb_descreader *r = closure;
// XXX: see comment at the top of the file.
char *name = upb_strndup(buf, n);
UPB_UNUSED(hd);
UPB_UNUSED(handle);
/* XXX: see comment at the top of the file. */
upb_fielddef_setcontainingtypename(r->f, name, NULL);
free(name);
return n;
@ -466,31 +487,35 @@ static size_t field_onextendee(void *closure, const void *hd, const char *buf,
static size_t field_ondefaultval(void *closure, const void *hd, const char *buf,
size_t n, const upb_bufhandle *handle) {
upb_descreader *r = closure;
UPB_UNUSED(hd);
UPB_UNUSED(handle);
upb_descreader *r = closure;
// Have to convert from string to the correct type, but we might not know the
// type yet, so we save it as a string until the end of the field.
// XXX: see comment at the top of the file.
/* Have to convert from string to the correct type, but we might not know the
* type yet, so we save it as a string until the end of the field.
* XXX: see comment at the top of the file. */
free(r->default_string);
r->default_string = upb_strndup(buf, n);
return n;
}
// Handlers for google.protobuf.DescriptorProto (representing a message).
/* Handlers for google.protobuf.DescriptorProto (representing a message). */
static bool msg_startmsg(void *closure, const void *hd) {
UPB_UNUSED(hd);
upb_descreader *r = closure;
upb_deflist_push(&r->defs, UPB_UPCAST(upb_msgdef_new(&r->defs)));
UPB_UNUSED(hd);
upb_deflist_push(&r->defs,
upb_msgdef_upcast_mutable(upb_msgdef_new(&r->defs)));
upb_descreader_startcontainer(r);
return true;
}
static bool msg_endmsg(void *closure, const void *hd, upb_status *status) {
UPB_UNUSED(hd);
upb_descreader *r = closure;
upb_msgdef *m = upb_descreader_top(r);
if(!upb_def_fullname(UPB_UPCAST(m))) {
UPB_UNUSED(hd);
if(!upb_def_fullname(upb_msgdef_upcast_mutable(m))) {
upb_status_seterrmsg(status, "Encountered message with no name.");
return false;
}
@ -500,32 +525,35 @@ static bool msg_endmsg(void *closure, const void *hd, upb_status *status) {
static size_t msg_onname(void *closure, const void *hd, const char *buf,
size_t n, const upb_bufhandle *handle) {
UPB_UNUSED(hd);
UPB_UNUSED(handle);
upb_descreader *r = closure;
upb_msgdef *m = upb_descreader_top(r);
// XXX: see comment at the top of the file.
/* XXX: see comment at the top of the file. */
char *name = upb_strndup(buf, n);
upb_def_setfullname(UPB_UPCAST(m), name, NULL);
upb_descreader_setscopename(r, name); // Passes ownership of name.
UPB_UNUSED(hd);
UPB_UNUSED(handle);
upb_def_setfullname(upb_msgdef_upcast_mutable(m), name, NULL);
upb_descreader_setscopename(r, name); /* Passes ownership of name. */
return n;
}
static bool msg_onendfield(void *closure, const void *hd) {
UPB_UNUSED(hd);
upb_descreader *r = closure;
upb_msgdef *m = upb_descreader_top(r);
UPB_UNUSED(hd);
upb_msgdef_addfield(m, r->f, &r->defs, NULL);
r->f = NULL;
return true;
}
static bool pushextension(void *closure, const void *hd) {
UPB_UNUSED(hd);
upb_descreader *r = closure;
UPB_UNUSED(hd);
assert(upb_fielddef_containingtypename(r->f));
upb_fielddef_setisextension(r->f, true);
upb_deflist_push(&r->defs, UPB_UPCAST(r->f));
upb_deflist_push(&r->defs, upb_fielddef_upcast_mutable(r->f));
r->f = NULL;
return true;
}

@ -18,52 +18,52 @@
namespace upb {
namespace descriptor {
class Reader;
} // namespace descriptor
} // namespace upb
} /* namespace descriptor */
} /* namespace upb */
#endif
UPB_DECLARE_TYPE(upb::descriptor::Reader, upb_descreader);
UPB_DECLARE_TYPE(upb::descriptor::Reader, upb_descreader)
#ifdef __cplusplus
// Class that receives descriptor data according to the descriptor.proto schema
// and use it to build upb::Defs corresponding to that schema.
/* Class that receives descriptor data according to the descriptor.proto schema
* and use it to build upb::Defs corresponding to that schema. */
class upb::descriptor::Reader {
public:
// These handlers must have come from NewHandlers() and must outlive the
// Reader.
//
// TODO: generate the handlers statically (like we do with the
// descriptor.proto defs) so that there is no need to pass this parameter (or
// to build/memory-manage the handlers at runtime at all). Unfortunately this
// is a bit tricky to implement for Handlers, but necessary to simplify this
// interface.
/* These handlers must have come from NewHandlers() and must outlive the
* Reader.
*
* TODO: generate the handlers statically (like we do with the
* descriptor.proto defs) so that there is no need to pass this parameter (or
* to build/memory-manage the handlers at runtime at all). Unfortunately this
* is a bit tricky to implement for Handlers, but necessary to simplify this
* interface. */
static Reader* Create(Environment* env, const Handlers* handlers);
// The reader's input; this is where descriptor.proto data should be sent.
/* The reader's input; this is where descriptor.proto data should be sent. */
Sink* input();
// Returns an array of all defs that have been parsed, and transfers ownership
// of them to "owner". The number of defs is stored in *n. Ownership of the
// returned array is retained and is invalidated by any other call into
// Reader.
//
// These defs are not frozen or resolved; they are ready to be added to a
// symtab.
/* Returns an array of all defs that have been parsed, and transfers ownership
* of them to "owner". The number of defs is stored in *n. Ownership of the
* returned array is retained and is invalidated by any other call into
* Reader.
*
* These defs are not frozen or resolved; they are ready to be added to a
* symtab. */
upb::Def** GetDefs(void* owner, int* n);
// Builds and returns handlers for the reader, owned by "owner."
/* Builds and returns handlers for the reader, owned by "owner." */
static Handlers* NewHandlers(const void* owner);
private:
UPB_DISALLOW_POD_OPS(Reader, upb::descriptor::Reader);
UPB_DISALLOW_POD_OPS(Reader, upb::descriptor::Reader)
};
#endif
UPB_BEGIN_EXTERN_C
// C API.
/* C API. */
upb_descreader *upb_descreader_create(upb_env *e, const upb_handlers *h);
upb_sink *upb_descreader_input(upb_descreader *r);
upb_def **upb_descreader_getdefs(upb_descreader *r, void *owner, int *n);
@ -72,7 +72,7 @@ const upb_handlers *upb_descreader_newhandlers(const void *owner);
UPB_END_EXTERN_C
#ifdef __cplusplus
// C++ implementation details. /////////////////////////////////////////////////
/* C++ implementation details. ************************************************/
namespace upb {
namespace descriptor {
inline Reader* Reader::Create(Environment* e, const Handlers *h) {
@ -82,8 +82,8 @@ inline Sink* Reader::input() { return upb_descreader_input(this); }
inline upb::Def** Reader::GetDefs(void* owner, int* n) {
return upb_descreader_getdefs(this, owner, n);
}
} // namespace descriptor
} // namespace upb
} /* namespace descriptor */
} /* namespace upb */
#endif
#endif // UPB_DESCRIPTOR_H
#endif /* UPB_DESCRIPTOR_H */

@ -21,18 +21,17 @@ static void *seeded_alloc(void *ud, void *ptr, size_t oldsize, size_t size);
/* Default allocator **********************************************************/
// Just use realloc, keeping all allocated blocks in a linked list to destroy at
// the end.
/* Just use realloc, keeping all allocated blocks in a linked list to destroy at
* the end. */
typedef struct mem_block {
// List is doubly-linked, because in cases where realloc() moves an existing
// block, we need to be able to remove the old pointer from the list
// efficiently.
/* List is doubly-linked, because in cases where realloc() moves an existing
* block, we need to be able to remove the old pointer from the list
* efficiently. */
struct mem_block *prev, *next;
#ifndef NDEBUG
size_t size; // Doesn't include mem_block structure.
size_t size; /* Doesn't include mem_block structure. */
#endif
char data[];
} mem_block;
typedef struct {
@ -40,10 +39,12 @@ typedef struct {
} default_alloc_ud;
static void *default_alloc(void *_ud, void *ptr, size_t oldsize, size_t size) {
UPB_UNUSED(oldsize);
default_alloc_ud *ud = _ud;
mem_block *from, *block;
void *ret;
UPB_UNUSED(oldsize);
mem_block *from = ptr ? (void*)((char*)ptr - sizeof(mem_block)) : NULL;
from = ptr ? (void*)((char*)ptr - sizeof(mem_block)) : NULL;
#ifndef NDEBUG
if (from) {
@ -51,8 +52,11 @@ static void *default_alloc(void *_ud, void *ptr, size_t oldsize, size_t size) {
}
#endif
mem_block *block = realloc(from, size + sizeof(mem_block));
/* TODO(haberman): we probably need to provide even better alignment here,
* like 16-byte alignment of the returned data pointer. */
block = realloc(from, size + sizeof(mem_block));
if (!block) return NULL;
ret = (char*)block + sizeof(*block);
#ifndef NDEBUG
block->size = size;
@ -60,20 +64,20 @@ static void *default_alloc(void *_ud, void *ptr, size_t oldsize, size_t size) {
if (from) {
if (block != from) {
// The block was moved, so pointers in next and prev blocks must be
// updated to its new location.
/* The block was moved, so pointers in next and prev blocks must be
* updated to its new location. */
if (block->next) block->next->prev = block;
if (block->prev) block->prev->next = block;
}
} else {
// Insert at head of linked list.
/* Insert at head of linked list. */
block->prev = NULL;
block->next = ud->head;
if (block->next) block->next->prev = block;
ud->head = block;
}
return &block->data;
return ret;
}
static void default_alloc_cleanup(void *_ud) {
@ -106,14 +110,14 @@ static bool write_err_to(void *ud, const upb_status *status) {
/* upb_env ********************************************************************/
void upb_env_init(upb_env *e) {
default_alloc_ud *ud = (default_alloc_ud*)&e->default_alloc_ud;
e->ok_ = true;
e->bytes_allocated = 0;
e->cleanup_head = NULL;
default_alloc_ud *ud = (default_alloc_ud*)&e->default_alloc_ud;
ud->head = NULL;
// Set default functions.
/* Set default functions. */
upb_env_setallocfunc(e, default_alloc, ud);
upb_env_seterrorfunc(e, default_err, NULL);
}
@ -126,8 +130,8 @@ void upb_env_uninit(upb_env *e) {
ent = ent->next;
}
// Must do this after running cleanup functions, because this will delete
// the memory we store our cleanup entries in!
/* Must do this after running cleanup functions, because this will delete
the memory we store our cleanup entries in! */
if (e->alloc == default_alloc) {
default_alloc_cleanup(e->alloc_ud);
}
@ -174,8 +178,8 @@ bool upb_env_addcleanup(upb_env *e, upb_cleanup_func *func, void *ud) {
void *upb_env_malloc(upb_env *e, size_t size) {
e->bytes_allocated += size;
if (e->alloc == seeded_alloc) {
// This is equivalent to the next branch, but allows inlining for a
// measurable perf benefit.
/* This is equivalent to the next branch, but allows inlining for a
* measurable perf benefit. */
return seeded_alloc(e->alloc_ud, NULL, 0, size);
} else {
return e->alloc(e->alloc_ud, NULL, 0, size);
@ -183,12 +187,13 @@ void *upb_env_malloc(upb_env *e, size_t size) {
}
void *upb_env_realloc(upb_env *e, void *ptr, size_t oldsize, size_t size) {
char *ret;
assert(oldsize <= size);
char *ret = e->alloc(e->alloc_ud, ptr, oldsize, size);
ret = e->alloc(e->alloc_ud, ptr, oldsize, size);
#ifndef NDEBUG
// Overwrite non-preserved memory to ensure callers are passing the oldsize
// that they truly require.
/* Overwrite non-preserved memory to ensure callers are passing the oldsize
* that they truly require. */
memset(ret + oldsize, 0xff, size - oldsize);
#endif
@ -202,7 +207,7 @@ size_t upb_env_bytesallocated(const upb_env *e) {
/* upb_seededalloc ************************************************************/
// Be conservative and choose 16 in case anyone is using SSE.
/* Be conservative and choose 16 in case anyone is using SSE. */
static const size_t maxalign = 16;
static size_t align_up(size_t size) {
@ -211,24 +216,24 @@ static size_t align_up(size_t size) {
UPB_FORCEINLINE static void *seeded_alloc(void *ud, void *ptr, size_t oldsize,
size_t size) {
upb_seededalloc *a = ud;
UPB_UNUSED(ptr);
upb_seededalloc *a = ud;
size = align_up(size);
assert(a->mem_limit >= a->mem_ptr);
if (oldsize == 0 && size <= (size_t)(a->mem_limit - a->mem_ptr)) {
// Fast path: we can satisfy from the initial allocation.
/* Fast path: we can satisfy from the initial allocation. */
void *ret = a->mem_ptr;
a->mem_ptr += size;
return ret;
} else {
// Slow path: fallback to other allocator.
a->need_cleanup = true;
// Is `ptr` part of the user-provided initial block? Don't pass it to the
// default allocator if so; otherwise, it may try to realloc() the block.
char *chptr = ptr;
/* Slow path: fallback to other allocator. */
a->need_cleanup = true;
/* Is `ptr` part of the user-provided initial block? Don't pass it to the
* default allocator if so; otherwise, it may try to realloc() the block. */
if (chptr >= a->mem_base && chptr < a->mem_limit) {
return a->alloc(a->alloc_ud, NULL, 0, size);
} else {
@ -238,13 +243,13 @@ UPB_FORCEINLINE static void *seeded_alloc(void *ud, void *ptr, size_t oldsize,
}
void upb_seededalloc_init(upb_seededalloc *a, void *mem, size_t len) {
default_alloc_ud *ud = (default_alloc_ud*)&a->default_alloc_ud;
a->mem_base = mem;
a->mem_ptr = mem;
a->mem_limit = (char*)mem + len;
a->need_cleanup = false;
a->returned_allocfunc = false;
default_alloc_ud *ud = (default_alloc_ud*)&a->default_alloc_ud;
ud->head = NULL;
upb_seededalloc_setfallbackalloc(a, default_alloc, ud);

@ -27,32 +27,34 @@ class SeededAllocator;
}
#endif
UPB_DECLARE_TYPE(upb::Environment, upb_env);
UPB_DECLARE_TYPE(upb::SeededAllocator, upb_seededalloc);
UPB_DECLARE_TYPE(upb::Environment, upb_env)
UPB_DECLARE_TYPE(upb::SeededAllocator, upb_seededalloc)
typedef void *upb_alloc_func(void *ud, void *ptr, size_t oldsize, size_t size);
typedef void upb_cleanup_func(void *ud);
typedef bool upb_error_func(void *ud, const upb_status *status);
// An environment is *not* thread-safe.
UPB_DEFINE_CLASS0(upb::Environment,
#ifdef __cplusplus
/* An environment is *not* thread-safe. */
class upb::Environment {
public:
Environment();
~Environment();
// Set a custom memory allocation function for the environment. May ONLY
// be called before any calls to Malloc()/Realloc()/AddCleanup() below.
// If this is not called, the system realloc() function will be used.
// The given user pointer "ud" will be passed to the allocation function.
//
// The allocation function will not receive corresponding "free" calls. it
// must ensure that the memory is valid for the lifetime of the Environment,
// but it may be reclaimed any time thereafter. The likely usage is that
// "ud" points to a stateful allocator, and that the allocator frees all
// memory, arena-style, when it is destroyed. In this case the allocator must
// outlive the Environment. Another possibility is that the allocation
// function returns GC-able memory that is guaranteed to be GC-rooted for the
// life of the Environment.
/* Set a custom memory allocation function for the environment. May ONLY
* be called before any calls to Malloc()/Realloc()/AddCleanup() below.
* If this is not called, the system realloc() function will be used.
* The given user pointer "ud" will be passed to the allocation function.
*
* The allocation function will not receive corresponding "free" calls. it
* must ensure that the memory is valid for the lifetime of the Environment,
* but it may be reclaimed any time thereafter. The likely usage is that
* "ud" points to a stateful allocator, and that the allocator frees all
* memory, arena-style, when it is destroyed. In this case the allocator must
* outlive the Environment. Another possibility is that the allocation
* function returns GC-able memory that is guaranteed to be GC-rooted for the
* life of the Environment. */
void SetAllocationFunction(upb_alloc_func* alloc, void* ud);
template<class T>
@ -60,74 +62,76 @@ UPB_DEFINE_CLASS0(upb::Environment,
SetAllocationFunction(allocator->GetAllocationFunction(), allocator);
}
// Set a custom error reporting function.
/* Set a custom error reporting function. */
void SetErrorFunction(upb_error_func* func, void* ud);
// Set the error reporting function to simply copy the status to the given
// status and abort.
/* Set the error reporting function to simply copy the status to the given
* status and abort. */
void ReportErrorsTo(Status* status);
// Returns true if all allocations and AddCleanup() calls have succeeded,
// and no errors were reported with ReportError() (except ones that recovered
// successfully).
/* Returns true if all allocations and AddCleanup() calls have succeeded,
* and no errors were reported with ReportError() (except ones that recovered
* successfully). */
bool ok() const;
//////////////////////////////////////////////////////////////////////////////
// Functions for use by encoders/decoders.
/* Functions for use by encoders/decoders. **********************************/
// Reports an error to this environment's callback, returning true if
// the caller should try to recover.
/* Reports an error to this environment's callback, returning true if
* the caller should try to recover. */
bool ReportError(const Status* status);
// Allocate memory. Uses the environment's allocation function.
//
// There is no need to free(). All memory will be freed automatically, but is
// guaranteed to outlive the Environment.
/* Allocate memory. Uses the environment's allocation function.
*
* There is no need to free(). All memory will be freed automatically, but is
* guaranteed to outlive the Environment. */
void* Malloc(size_t size);
// Reallocate memory. Preserves "oldsize" bytes from the existing buffer
// Requires: oldsize <= existing_size.
//
// TODO(haberman): should we also enforce that oldsize <= size?
/* Reallocate memory. Preserves "oldsize" bytes from the existing buffer
* Requires: oldsize <= existing_size.
*
* TODO(haberman): should we also enforce that oldsize <= size? */
void* Realloc(void* ptr, size_t oldsize, size_t size);
// Add a cleanup function to run when the environment is destroyed.
// Returns false on out-of-memory.
//
// The first call to AddCleanup() after SetAllocationFunction() is guaranteed
// to return true -- this makes it possible to robustly set a cleanup handler
// for a custom allocation function.
/* Add a cleanup function to run when the environment is destroyed.
* Returns false on out-of-memory.
*
* The first call to AddCleanup() after SetAllocationFunction() is guaranteed
* to return true -- this makes it possible to robustly set a cleanup handler
* for a custom allocation function. */
bool AddCleanup(upb_cleanup_func* func, void* ud);
// Total number of bytes that have been allocated. It is undefined what
// Realloc() does to this counter.
/* Total number of bytes that have been allocated. It is undefined what
* Realloc() does to this counter. */
size_t BytesAllocated() const;
private:
UPB_DISALLOW_COPY_AND_ASSIGN(Environment);
,
UPB_DEFINE_STRUCT0(upb_env,
UPB_DISALLOW_COPY_AND_ASSIGN(Environment)
#else
struct upb_env {
#endif /* __cplusplus */
bool ok_;
size_t bytes_allocated;
// Alloc function.
/* Alloc function. */
upb_alloc_func *alloc;
void *alloc_ud;
// Error-reporting function.
/* Error-reporting function. */
upb_error_func *err;
void *err_ud;
// Userdata for default alloc func.
/* Userdata for default alloc func. */
void *default_alloc_ud;
// Cleanup entries. Pointer to a cleanup_ent, defined in env.c
/* Cleanup entries. Pointer to a cleanup_ent, defined in env.c */
void *cleanup_head;
// For future expansion, since the size of this struct is exposed to users.
/* For future expansion, since the size of this struct is exposed to users. */
void *future1;
void *future2;
));
};
UPB_BEGIN_EXTERN_C
@ -145,46 +149,51 @@ size_t upb_env_bytesallocated(const upb_env *e);
UPB_END_EXTERN_C
// An allocator that allocates from an initial memory region (likely the stack)
// before falling back to another allocator.
UPB_DEFINE_CLASS0(upb::SeededAllocator,
#ifdef __cplusplus
/* An allocator that allocates from an initial memory region (likely the stack)
* before falling back to another allocator. */
class upb::SeededAllocator {
public:
SeededAllocator(void *mem, size_t len);
~SeededAllocator();
// Set a custom fallback memory allocation function for the allocator, to use
// once the initial region runs out.
//
// May ONLY be called before GetAllocationFunction(). If this is not
// called, the system realloc() will be the fallback allocator.
/* Set a custom fallback memory allocation function for the allocator, to use
* once the initial region runs out.
*
* May ONLY be called before GetAllocationFunction(). If this is not
* called, the system realloc() will be the fallback allocator. */
void SetFallbackAllocator(upb_alloc_func *alloc, void *ud);
// Gets the allocation function for this allocator.
/* Gets the allocation function for this allocator. */
upb_alloc_func* GetAllocationFunction();
private:
UPB_DISALLOW_COPY_AND_ASSIGN(SeededAllocator);
,
UPB_DEFINE_STRUCT0(upb_seededalloc,
// Fallback alloc function.
UPB_DISALLOW_COPY_AND_ASSIGN(SeededAllocator)
#else
struct upb_seededalloc {
#endif /* __cplusplus */
/* Fallback alloc function. */
upb_alloc_func *alloc;
upb_cleanup_func *alloc_cleanup;
void *alloc_ud;
bool need_cleanup;
bool returned_allocfunc;
// Userdata for default alloc func.
/* Userdata for default alloc func. */
void *default_alloc_ud;
// Pointers for the initial memory region.
/* Pointers for the initial memory region. */
char *mem_base;
char *mem_ptr;
char *mem_limit;
// For future expansion, since the size of this struct is exposed to users.
/* For future expansion, since the size of this struct is exposed to users. */
void *future1;
void *future2;
));
};
UPB_BEGIN_EXTERN_C
@ -249,8 +258,8 @@ inline upb_alloc_func *SeededAllocator::GetAllocationFunction() {
return upb_seededalloc_getallocfunc(this);
}
} // namespace upb
} /* namespace upb */
#endif // __cplusplus
#endif /* __cplusplus */
#endif // UPB_ENV_H_
#endif /* UPB_ENV_H_ */

@ -13,21 +13,56 @@
#include <limits.h>
// Type detection and typedefs for integer types.
// For platforms where there are multiple 32-bit or 64-bit types, we need to be
// able to enumerate them so we can properly create overloads for all variants.
//
// If any platform existed where there were three integer types with the same
// size, this would have to become more complicated. For example, short, int,
// and long could all be 32-bits. Even more diabolically, short, int, long,
// and long long could all be 64 bits and still be standard-compliant.
// However, few platforms are this strange, and it's unlikely that upb will be
// used on the strangest ones.
// Can't count on stdint.h limits like INT32_MAX, because in C++ these are
// only defined when __STDC_LIMIT_MACROS are defined before the *first* include
// of stdint.h. We can't guarantee that someone else didn't include these first
// without defining __STDC_LIMIT_MACROS.
/* C inline methods. */
/* upb_bufhandle */
UPB_INLINE void upb_bufhandle_init(upb_bufhandle *h) {
h->obj_ = NULL;
h->objtype_ = NULL;
h->buf_ = NULL;
h->objofs_ = 0;
}
UPB_INLINE void upb_bufhandle_uninit(upb_bufhandle *h) {
UPB_UNUSED(h);
}
UPB_INLINE void upb_bufhandle_setobj(upb_bufhandle *h, const void *obj,
const void *type) {
h->obj_ = obj;
h->objtype_ = type;
}
UPB_INLINE void upb_bufhandle_setbuf(upb_bufhandle *h, const char *buf,
size_t ofs) {
h->buf_ = buf;
h->objofs_ = ofs;
}
UPB_INLINE const void *upb_bufhandle_obj(const upb_bufhandle *h) {
return h->obj_;
}
UPB_INLINE const void *upb_bufhandle_objtype(const upb_bufhandle *h) {
return h->objtype_;
}
UPB_INLINE const char *upb_bufhandle_buf(const upb_bufhandle *h) {
return h->buf_;
}
#ifdef __cplusplus
/* Type detection and typedefs for integer types.
* For platforms where there are multiple 32-bit or 64-bit types, we need to be
* able to enumerate them so we can properly create overloads for all variants.
*
* If any platform existed where there were three integer types with the same
* size, this would have to become more complicated. For example, short, int,
* and long could all be 32-bits. Even more diabolically, short, int, long,
* and long long could all be 64 bits and still be standard-compliant.
* However, few platforms are this strange, and it's unlikely that upb will be
* used on the strangest ones. */
/* Can't count on stdint.h limits like INT32_MAX, because in C++ these are
* only defined when __STDC_LIMIT_MACROS are defined before the *first* include
* of stdint.h. We can't guarantee that someone else didn't include these first
* without defining __STDC_LIMIT_MACROS. */
#define UPB_INT32_MAX 0x7fffffffLL
#define UPB_INT32_MIN (-UPB_INT32_MAX - 1)
#define UPB_INT64_MAX 0x7fffffffffffffffLL
@ -49,8 +84,8 @@
#define UPB_LLONG_IS_64BITS 1
#endif
// We use macros instead of typedefs so we can undefine them later and avoid
// leaking them outside this header file.
/* We use macros instead of typedefs so we can undefine them later and avoid
* leaking them outside this header file. */
#if UPB_INT_IS_32BITS
#define UPB_INT32_T int
#define UPB_UINT32_T unsigned int
@ -59,12 +94,12 @@
#define UPB_TWO_32BIT_TYPES 1
#define UPB_INT32ALT_T long
#define UPB_UINT32ALT_T unsigned long
#endif // UPB_LONG_IS_32BITS
#endif /* UPB_LONG_IS_32BITS */
#elif UPB_LONG_IS_32BITS // && !UPB_INT_IS_32BITS
#elif UPB_LONG_IS_32BITS /* && !UPB_INT_IS_32BITS */
#define UPB_INT32_T long
#define UPB_UINT32_T unsigned long
#endif // UPB_INT_IS_32BITS
#endif /* UPB_INT_IS_32BITS */
#if UPB_LONG_IS_64BITS
@ -75,12 +110,12 @@
#define UPB_TWO_64BIT_TYPES 1
#define UPB_INT64ALT_T long long
#define UPB_UINT64ALT_T unsigned long long
#endif // UPB_LLONG_IS_64BITS
#endif /* UPB_LLONG_IS_64BITS */
#elif UPB_LLONG_IS_64BITS // && !UPB_LONG_IS_64BITS
#elif UPB_LLONG_IS_64BITS /* && !UPB_LONG_IS_64BITS */
#define UPB_INT64_T long long
#define UPB_UINT64_T unsigned long long
#endif // UPB_LONG_IS_64BITS
#endif /* UPB_LONG_IS_64BITS */
#undef UPB_INT32_MAX
#undef UPB_INT32_MIN
@ -91,56 +126,22 @@
#undef UPB_LONG_IS_64BITS
#undef UPB_LLONG_IS_64BITS
// C inline methods.
// upb_bufhandle
UPB_INLINE void upb_bufhandle_init(upb_bufhandle *h) {
h->obj_ = NULL;
h->objtype_ = NULL;
h->buf_ = NULL;
h->objofs_ = 0;
}
UPB_INLINE void upb_bufhandle_uninit(upb_bufhandle *h) {
UPB_UNUSED(h);
}
UPB_INLINE void upb_bufhandle_setobj(upb_bufhandle *h, const void *obj,
const void *type) {
h->obj_ = obj;
h->objtype_ = type;
}
UPB_INLINE void upb_bufhandle_setbuf(upb_bufhandle *h, const char *buf,
size_t ofs) {
h->buf_ = buf;
h->objofs_ = ofs;
}
UPB_INLINE const void *upb_bufhandle_obj(const upb_bufhandle *h) {
return h->obj_;
}
UPB_INLINE const void *upb_bufhandle_objtype(const upb_bufhandle *h) {
return h->objtype_;
}
UPB_INLINE const char *upb_bufhandle_buf(const upb_bufhandle *h) {
return h->buf_;
}
#ifdef __cplusplus
namespace upb {
typedef void CleanupFunc(void *ptr);
// Template to remove "const" from "const T*" and just return "T*".
//
// We define a nonsense default because otherwise it will fail to instantiate as
// a function parameter type even in cases where we don't expect any caller to
// actually match the overload.
/* Template to remove "const" from "const T*" and just return "T*".
*
* We define a nonsense default because otherwise it will fail to instantiate as
* a function parameter type even in cases where we don't expect any caller to
* actually match the overload. */
class CouldntRemoveConst {};
template <class T> struct remove_constptr { typedef CouldntRemoveConst type; };
template <class T> struct remove_constptr<const T *> { typedef T *type; };
// Template that we use below to remove a template specialization from
// consideration if it matches a specific type.
/* Template that we use below to remove a template specialization from
* consideration if it matches a specific type. */
template <class T, class U> struct disable_if_same { typedef void Type; };
template <class T> struct disable_if_same<T, T> {};
@ -177,27 +178,27 @@ bool is_same<T, U>::value = false;
template<class T>
bool is_same<T, T>::value = true;
// FuncInfo ////////////////////////////////////////////////////////////////////
/* FuncInfo *******************************************************************/
// Info about the user's original, pre-wrapped function.
/* Info about the user's original, pre-wrapped function. */
template <class C, class R = void>
struct FuncInfo {
// The type of the closure that the function takes (its first param).
/* The type of the closure that the function takes (its first param). */
typedef C Closure;
// The return type.
/* The return type. */
typedef R Return;
};
// Func ////////////////////////////////////////////////////////////////////////
/* Func ***********************************************************************/
// Func1, Func2, Func3: Template classes representing a function and its
// signature.
//
// Since the function is a template parameter, calling the function can be
// inlined at compile-time and does not require a function pointer at runtime.
// These functions are not bound to a handler data so have no data or cleanup
// handler.
/* Func1, Func2, Func3: Template classes representing a function and its
* signature.
*
* Since the function is a template parameter, calling the function can be
* inlined at compile-time and does not require a function pointer at runtime.
* These functions are not bound to a handler data so have no data or cleanup
* handler. */
struct UnboundFunc {
CleanupFunc *GetCleanup() { return NULL; }
void *GetData() { return NULL; }
@ -242,13 +243,13 @@ struct Func5 : public UnboundFunc {
}
};
// BoundFunc ///////////////////////////////////////////////////////////////////
/* BoundFunc ******************************************************************/
// BoundFunc2, BoundFunc3: Like Func2/Func3 except also contains a value that
// shall be bound to the function's second parameter.
//
// Note that the second parameter is a const pointer, but our stored bound value
// is non-const so we can free it when the handlers are destroyed.
/* BoundFunc2, BoundFunc3: Like Func2/Func3 except also contains a value that
* shall be bound to the function's second parameter.
*
* Note that the second parameter is a const pointer, but our stored bound value
* is non-const so we can free it when the handlers are destroyed. */
template <class T>
struct BoundFunc {
typedef typename remove_constptr<T>::type MutableP2;
@ -288,13 +289,13 @@ struct BoundFunc5 : public BoundFunc<P2> {
explicit BoundFunc5(typename Base::MutableP2 arg) : Base(arg) {}
};
// FuncSig /////////////////////////////////////////////////////////////////////
/* FuncSig ********************************************************************/
// FuncSig1, FuncSig2, FuncSig3: template classes reflecting a function
// *signature*, but without a specific function attached.
//
// These classes contain member functions that can be invoked with a
// specific function to return a Func/BoundFunc class.
/* FuncSig1, FuncSig2, FuncSig3: template classes reflecting a function
* *signature*, but without a specific function attached.
*
* These classes contain member functions that can be invoked with a
* specific function to return a Func/BoundFunc class. */
template <class R, class P1>
struct FuncSig1 {
template <R F(P1)>
@ -359,41 +360,41 @@ struct FuncSig5 {
}
};
// Overloaded template function that can construct the appropriate FuncSig*
// class given a function pointer by deducing the template parameters.
/* Overloaded template function that can construct the appropriate FuncSig*
* class given a function pointer by deducing the template parameters. */
template <class R, class P1>
inline FuncSig1<R, P1> MatchFunc(R (*f)(P1)) {
UPB_UNUSED(f); // Only used for template parameter deduction.
UPB_UNUSED(f); /* Only used for template parameter deduction. */
return FuncSig1<R, P1>();
}
template <class R, class P1, class P2>
inline FuncSig2<R, P1, P2> MatchFunc(R (*f)(P1, P2)) {
UPB_UNUSED(f); // Only used for template parameter deduction.
UPB_UNUSED(f); /* Only used for template parameter deduction. */
return FuncSig2<R, P1, P2>();
}
template <class R, class P1, class P2, class P3>
inline FuncSig3<R, P1, P2, P3> MatchFunc(R (*f)(P1, P2, P3)) {
UPB_UNUSED(f); // Only used for template parameter deduction.
UPB_UNUSED(f); /* Only used for template parameter deduction. */
return FuncSig3<R, P1, P2, P3>();
}
template <class R, class P1, class P2, class P3, class P4>
inline FuncSig4<R, P1, P2, P3, P4> MatchFunc(R (*f)(P1, P2, P3, P4)) {
UPB_UNUSED(f); // Only used for template parameter deduction.
UPB_UNUSED(f); /* Only used for template parameter deduction. */
return FuncSig4<R, P1, P2, P3, P4>();
}
template <class R, class P1, class P2, class P3, class P4, class P5>
inline FuncSig5<R, P1, P2, P3, P4, P5> MatchFunc(R (*f)(P1, P2, P3, P4, P5)) {
UPB_UNUSED(f); // Only used for template parameter deduction.
UPB_UNUSED(f); /* Only used for template parameter deduction. */
return FuncSig5<R, P1, P2, P3, P4, P5>();
}
// MethodSig ///////////////////////////////////////////////////////////////////
/* MethodSig ******************************************************************/
// CallMethod*: a function template that calls a given method.
/* CallMethod*: a function template that calls a given method. */
template <class R, class C, R (C::*F)()>
R CallMethod0(C *obj) {
return ((*obj).*F)();
@ -420,10 +421,10 @@ R CallMethod4(C *obj, P1 arg1, P2 arg2, P3 arg3, P4 arg4) {
return ((*obj).*F)(arg1, arg2, arg3, arg4);
}
// MethodSig: like FuncSig, but for member functions.
//
// GetFunc() returns a normal FuncN object, so after calling GetFunc() no
// more logic is required to special-case methods.
/* MethodSig: like FuncSig, but for member functions.
*
* GetFunc() returns a normal FuncN object, so after calling GetFunc() no
* more logic is required to special-case methods. */
template <class R, class C>
struct MethodSig0 {
template <R (C::*F)()>
@ -504,61 +505,61 @@ struct MethodSig4 {
template <class R, class C>
inline MethodSig0<R, C> MatchFunc(R (C::*f)()) {
UPB_UNUSED(f); // Only used for template parameter deduction.
UPB_UNUSED(f); /* Only used for template parameter deduction. */
return MethodSig0<R, C>();
}
template <class R, class C, class P1>
inline MethodSig1<R, C, P1> MatchFunc(R (C::*f)(P1)) {
UPB_UNUSED(f); // Only used for template parameter deduction.
UPB_UNUSED(f); /* Only used for template parameter deduction. */
return MethodSig1<R, C, P1>();
}
template <class R, class C, class P1, class P2>
inline MethodSig2<R, C, P1, P2> MatchFunc(R (C::*f)(P1, P2)) {
UPB_UNUSED(f); // Only used for template parameter deduction.
UPB_UNUSED(f); /* Only used for template parameter deduction. */
return MethodSig2<R, C, P1, P2>();
}
template <class R, class C, class P1, class P2, class P3>
inline MethodSig3<R, C, P1, P2, P3> MatchFunc(R (C::*f)(P1, P2, P3)) {
UPB_UNUSED(f); // Only used for template parameter deduction.
UPB_UNUSED(f); /* Only used for template parameter deduction. */
return MethodSig3<R, C, P1, P2, P3>();
}
template <class R, class C, class P1, class P2, class P3, class P4>
inline MethodSig4<R, C, P1, P2, P3, P4> MatchFunc(R (C::*f)(P1, P2, P3, P4)) {
UPB_UNUSED(f); // Only used for template parameter deduction.
UPB_UNUSED(f); /* Only used for template parameter deduction. */
return MethodSig4<R, C, P1, P2, P3, P4>();
}
// MaybeWrapReturn /////////////////////////////////////////////////////////////
/* MaybeWrapReturn ************************************************************/
// Template class that attempts to wrap the return value of the function so it
// matches the expected type. There are two main adjustments it may make:
//
// 1. If the function returns void, make it return the expected type and with
// a value that always indicates success.
// 2. If the function returns bool, make it return the expected type with a
// value that indicates success or failure.
//
// The "expected type" for return is:
// 1. void* for start handlers. If the closure parameter has a different type
// we will cast it to void* for the return in the success case.
// 2. size_t for string buffer handlers.
// 3. bool for everything else.
/* Template class that attempts to wrap the return value of the function so it
* matches the expected type. There are two main adjustments it may make:
*
* 1. If the function returns void, make it return the expected type and with
* a value that always indicates success.
* 2. If the function returns bool, make it return the expected type with a
* value that indicates success or failure.
*
* The "expected type" for return is:
* 1. void* for start handlers. If the closure parameter has a different type
* we will cast it to void* for the return in the success case.
* 2. size_t for string buffer handlers.
* 3. bool for everything else. */
// Template parameters are FuncN type and desired return type.
/* Template parameters are FuncN type and desired return type. */
template <class F, class R, class Enable = void>
struct MaybeWrapReturn;
// If the return type matches, return the given function unwrapped.
/* If the return type matches, return the given function unwrapped. */
template <class F>
struct MaybeWrapReturn<F, typename F::Return> {
typedef F Func;
};
// Function wrapper that munges the return value from void to (bool)true.
/* Function wrapper that munges the return value from void to (bool)true. */
template <class P1, class P2, void F(P1, P2)>
bool ReturnTrue2(P1 p1, P2 p2) {
F(p1, p2);
@ -571,7 +572,7 @@ bool ReturnTrue3(P1 p1, P2 p2, P3 p3) {
return true;
}
// Function wrapper that munges the return value from void to (void*)arg1
/* Function wrapper that munges the return value from void to (void*)arg1 */
template <class P1, class P2, void F(P1, P2)>
void *ReturnClosure2(P1 p1, P2 p2) {
F(p1, p2);
@ -584,7 +585,7 @@ void *ReturnClosure3(P1 p1, P2 p2, P3 p3) {
return p1;
}
// Function wrapper that munges the return value from R to void*.
/* Function wrapper that munges the return value from R to void*. */
template <class R, class P1, class P2, R F(P1, P2)>
void *CastReturnToVoidPtr2(P1 p1, P2 p2) {
return F(p1, p2);
@ -595,7 +596,7 @@ void *CastReturnToVoidPtr3(P1 p1, P2 p2, P3 p3) {
return F(p1, p2, p3);
}
// Function wrapper that munges the return value from bool to void*.
/* Function wrapper that munges the return value from bool to void*. */
template <class P1, class P2, bool F(P1, P2)>
void *ReturnClosureOrBreak2(P1 p1, P2 p2) {
return F(p1, p2) ? p1 : UPB_BREAK;
@ -606,7 +607,7 @@ void *ReturnClosureOrBreak3(P1 p1, P2 p2, P3 p3) {
return F(p1, p2, p3) ? p1 : UPB_BREAK;
}
// For the string callback, which takes five params, returns the size param.
/* For the string callback, which takes five params, returns the size param. */
template <class P1, class P2,
void F(P1, P2, const char *, size_t, const BufferHandle *)>
size_t ReturnStringLen(P1 p1, P2 p2, const char *p3, size_t p4,
@ -615,8 +616,8 @@ size_t ReturnStringLen(P1 p1, P2 p2, const char *p3, size_t p4,
return p4;
}
// For the string callback, which takes five params, returns the size param or
// zero.
/* For the string callback, which takes five params, returns the size param or
* zero. */
template <class P1, class P2,
bool F(P1, P2, const char *, size_t, const BufferHandle *)>
size_t ReturnNOr0(P1 p1, P2 p2, const char *p3, size_t p4,
@ -624,8 +625,8 @@ size_t ReturnNOr0(P1 p1, P2 p2, const char *p3, size_t p4,
return F(p1, p2, p3, p4, p5) ? p4 : 0;
}
// If we have a function returning void but want a function returning bool, wrap
// it in a function that returns true.
/* If we have a function returning void but want a function returning bool, wrap
* it in a function that returns true. */
template <class P1, class P2, void F(P1, P2), class I>
struct MaybeWrapReturn<Func2<void, P1, P2, F, I>, bool> {
typedef Func2<bool, P1, P2, ReturnTrue2<P1, P2, F>, I> Func;
@ -636,8 +637,8 @@ struct MaybeWrapReturn<Func3<void, P1, P2, P3, F, I>, bool> {
typedef Func3<bool, P1, P2, P3, ReturnTrue3<P1, P2, P3, F>, I> Func;
};
// If our function returns void but we want one returning void*, wrap it in a
// function that returns the first argument.
/* If our function returns void but we want one returning void*, wrap it in a
* function that returns the first argument. */
template <class P1, class P2, void F(P1, P2), class I>
struct MaybeWrapReturn<Func2<void, P1, P2, F, I>, void *> {
typedef Func2<void *, P1, P2, ReturnClosure2<P1, P2, F>, I> Func;
@ -648,8 +649,8 @@ struct MaybeWrapReturn<Func3<void, P1, P2, P3, F, I>, void *> {
typedef Func3<void *, P1, P2, P3, ReturnClosure3<P1, P2, P3, F>, I> Func;
};
// If our function returns R* but we want one returning void*, wrap it in a
// function that casts to void*.
/* If our function returns R* but we want one returning void*, wrap it in a
* function that casts to void*. */
template <class R, class P1, class P2, R *F(P1, P2), class I>
struct MaybeWrapReturn<Func2<R *, P1, P2, F, I>, void *,
typename disable_if_same<R *, void *>::Type> {
@ -663,8 +664,8 @@ struct MaybeWrapReturn<Func3<R *, P1, P2, P3, F, I>, void *,
Func;
};
// If our function returns bool but we want one returning void*, wrap it in a
// function that returns either the first param or UPB_BREAK.
/* If our function returns bool but we want one returning void*, wrap it in a
* function that returns either the first param or UPB_BREAK. */
template <class P1, class P2, bool F(P1, P2), class I>
struct MaybeWrapReturn<Func2<bool, P1, P2, F, I>, void *> {
typedef Func2<void *, P1, P2, ReturnClosureOrBreak2<P1, P2, F>, I> Func;
@ -676,8 +677,8 @@ struct MaybeWrapReturn<Func3<bool, P1, P2, P3, F, I>, void *> {
Func;
};
// If our function returns void but we want one returning size_t, wrap it in a
// function that returns the size argument.
/* If our function returns void but we want one returning size_t, wrap it in a
* function that returns the size argument. */
template <class P1, class P2,
void F(P1, P2, const char *, size_t, const BufferHandle *), class I>
struct MaybeWrapReturn<
@ -687,8 +688,8 @@ struct MaybeWrapReturn<
ReturnStringLen<P1, P2, F>, I> Func;
};
// If our function returns bool but we want one returning size_t, wrap it in a
// function that returns either 0 or the buf size.
/* If our function returns bool but we want one returning size_t, wrap it in a
* function that returns either 0 or the buf size. */
template <class P1, class P2,
bool F(P1, P2, const char *, size_t, const BufferHandle *), class I>
struct MaybeWrapReturn<
@ -698,16 +699,16 @@ struct MaybeWrapReturn<
ReturnNOr0<P1, P2, F>, I> Func;
};
// ConvertParams ///////////////////////////////////////////////////////////////
/* ConvertParams **************************************************************/
// Template class that converts the function parameters if necessary, and
// ignores the HandlerData parameter if appropriate.
//
// Template parameter is the are FuncN function type.
/* Template class that converts the function parameters if necessary, and
* ignores the HandlerData parameter if appropriate.
*
* Template parameter is the are FuncN function type. */
template <class F, class T>
struct ConvertParams;
// Function that discards the handler data parameter.
/* Function that discards the handler data parameter. */
template <class R, class P1, R F(P1)>
R IgnoreHandlerData2(void *p1, const void *hd) {
UPB_UNUSED(hd);
@ -741,7 +742,7 @@ R IgnoreHandlerDataIgnoreHandle(void *p1, const void *hd, const char *p2,
return F(static_cast<P1>(p1), p2, p3);
}
// Function that casts the handler data parameter.
/* Function that casts the handler data parameter. */
template <class R, class P1, class P2, R F(P1, P2)>
R CastHandlerData2(void *c, const void *hd) {
return F(static_cast<P1>(c), static_cast<P2>(hd));
@ -766,7 +767,7 @@ R CastHandlerDataIgnoreHandle(void *c, const void *hd, const char *p3,
return F(static_cast<P1>(c), static_cast<P2>(hd), p3, p4);
}
// For unbound functions, ignore the handler data.
/* For unbound functions, ignore the handler data. */
template <class R, class P1, R F(P1), class I, class T>
struct ConvertParams<Func1<R, P1, F, I>, T> {
typedef Func2<R, void *, const void *, IgnoreHandlerData2<R, P1, F>, I> Func;
@ -780,8 +781,8 @@ struct ConvertParams<Func2<R, P1, P2, F, I>,
IgnoreHandlerData3<R, P1, P3_2, P2, F>, I> Func;
};
// For StringBuffer only; this ignores both the handler data and the
// BufferHandle.
/* For StringBuffer only; this ignores both the handler data and the
* BufferHandle. */
template <class R, class P1, R F(P1, const char *, size_t), class I, class T>
struct ConvertParams<Func3<R, P1, const char *, size_t, F, I>, T> {
typedef Func5<R, void *, const void *, const char *, size_t,
@ -796,7 +797,7 @@ struct ConvertParams<Func4<R, P1, P2, P3, P4, F, I>, T> {
IgnoreHandlerData5<R, P1, P2, P3, P4, F>, I> Func;
};
// For bound functions, cast the handler data.
/* For bound functions, cast the handler data. */
template <class R, class P1, class P2, R F(P1, P2), class I, class T>
struct ConvertParams<BoundFunc2<R, P1, P2, F, I>, T> {
typedef Func2<R, void *, const void *, CastHandlerData2<R, P1, P2, F>, I>
@ -811,7 +812,7 @@ struct ConvertParams<BoundFunc3<R, P1, P2, P3, F, I>,
CastHandlerData3<R, P1, P2, P3_2, P3, F>, I> Func;
};
// For StringBuffer only; this ignores the BufferHandle.
/* For StringBuffer only; this ignores the BufferHandle. */
template <class R, class P1, class P2, R F(P1, P2, const char *, size_t),
class I, class T>
struct ConvertParams<BoundFunc4<R, P1, P2, const char *, size_t, F, I>, T> {
@ -827,8 +828,8 @@ struct ConvertParams<BoundFunc5<R, P1, P2, P3, P4, P5, F, I>, T> {
CastHandlerData5<R, P1, P2, P3, P4, P5, F>, I> Func;
};
// utype/ltype are upper/lower-case, ctype is canonical C type, vtype is
// variant C type.
/* utype/ltype are upper/lower-case, ctype is canonical C type, vtype is
* variant C type. */
#define TYPE_METHODS(utype, ltype, ctype, vtype) \
template <> struct CanonicalType<vtype> { \
typedef ctype Type; \
@ -843,22 +844,22 @@ struct ConvertParams<BoundFunc5<R, P1, P2, P3, P4, P5, F, I>, T> {
return upb_handlers_set##ltype(this, f, handler.handler_, &handler.attr_); \
} \
TYPE_METHODS(Double, double, double, double);
TYPE_METHODS(Float, float, float, float);
TYPE_METHODS(UInt64, uint64, uint64_t, UPB_UINT64_T);
TYPE_METHODS(UInt32, uint32, uint32_t, UPB_UINT32_T);
TYPE_METHODS(Int64, int64, int64_t, UPB_INT64_T);
TYPE_METHODS(Int32, int32, int32_t, UPB_INT32_T);
TYPE_METHODS(Bool, bool, bool, bool);
TYPE_METHODS(Double, double, double, double)
TYPE_METHODS(Float, float, float, float)
TYPE_METHODS(UInt64, uint64, uint64_t, UPB_UINT64_T)
TYPE_METHODS(UInt32, uint32, uint32_t, UPB_UINT32_T)
TYPE_METHODS(Int64, int64, int64_t, UPB_INT64_T)
TYPE_METHODS(Int32, int32, int32_t, UPB_INT32_T)
TYPE_METHODS(Bool, bool, bool, bool)
#ifdef UPB_TWO_32BIT_TYPES
TYPE_METHODS(Int32, int32, int32_t, UPB_INT32ALT_T);
TYPE_METHODS(UInt32, uint32, uint32_t, UPB_UINT32ALT_T);
TYPE_METHODS(Int32, int32, int32_t, UPB_INT32ALT_T)
TYPE_METHODS(UInt32, uint32, uint32_t, UPB_UINT32ALT_T)
#endif
#ifdef UPB_TWO_64BIT_TYPES
TYPE_METHODS(Int64, int64, int64_t, UPB_INT64ALT_T);
TYPE_METHODS(UInt64, uint64, uint64_t, UPB_UINT64ALT_T);
TYPE_METHODS(Int64, int64, int64_t, UPB_INT64ALT_T)
TYPE_METHODS(UInt64, uint64, uint64_t, UPB_UINT64ALT_T)
#endif
#undef TYPE_METHODS
@ -866,7 +867,8 @@ template <> struct CanonicalType<Status*> {
typedef Status* Type;
};
// Type methods that are only one-per-canonical-type and not one-per-cvariant.
/* Type methods that are only one-per-canonical-type and not
* one-per-cvariant. */
#define TYPE_METHODS(utype, ctype) \
inline bool Handlers::Set##utype##Handler(const FieldDef *f, \
@ -874,13 +876,13 @@ template <> struct CanonicalType<Status*> {
return SetValueHandler<ctype>(f, h); \
} \
TYPE_METHODS(Double, double);
TYPE_METHODS(Float, float);
TYPE_METHODS(UInt64, uint64_t);
TYPE_METHODS(UInt32, uint32_t);
TYPE_METHODS(Int64, int64_t);
TYPE_METHODS(Int32, int32_t);
TYPE_METHODS(Bool, bool);
TYPE_METHODS(Double, double)
TYPE_METHODS(Float, float)
TYPE_METHODS(UInt64, uint64_t)
TYPE_METHODS(UInt32, uint32_t)
TYPE_METHODS(Int64, int64_t)
TYPE_METHODS(Int32, int32_t)
TYPE_METHODS(Bool, bool)
#undef TYPE_METHODS
template <class F> struct ReturnOf;
@ -923,23 +925,23 @@ inline Handler<T>::Handler(F func)
ReturnWrappedFunc;
handler_ = ReturnWrappedFunc().Call;
// Set attributes based on what templates can statically tell us about the
// user's function.
/* Set attributes based on what templates can statically tell us about the
* user's function. */
// If the original function returns void, then we know that we wrapped it to
// always return ok.
/* If the original function returns void, then we know that we wrapped it to
* always return ok. */
bool always_ok = is_same<typename F::FuncInfo::Return, void>::value;
attr_.SetAlwaysOk(always_ok);
// Closure parameter and return type.
/* Closure parameter and return type. */
attr_.SetClosureType(UniquePtrForType<typename F::FuncInfo::Closure>());
// We use the closure type (from the first parameter) if the return type is
// void or bool, since these are the two cases we wrap to return the closure's
// type anyway.
//
// This is all nonsense for non START* handlers, but it doesn't matter because
// in that case the value will be ignored.
/* We use the closure type (from the first parameter) if the return type is
* void or bool, since these are the two cases we wrap to return the closure's
* type anyway.
*
* This is all nonsense for non START* handlers, but it doesn't matter because
* in that case the value will be ignored. */
typedef typename FirstUnlessVoidOrBool<typename F::FuncInfo::Return,
typename F::FuncInfo::Closure>::value
EffectiveReturn;
@ -1010,19 +1012,6 @@ inline reffed_ptr<const Handlers> Handlers::NewFrozen(
const upb_handlers *h = upb_handlers_newfrozen(m, &h, callback, closure);
return reffed_ptr<const Handlers>(h, &h);
}
inline bool Handlers::IsFrozen() const { return upb_handlers_isfrozen(this); }
inline void Handlers::Ref(const void *owner) const {
upb_handlers_ref(this, owner);
}
inline void Handlers::Unref(const void *owner) const {
upb_handlers_unref(this, owner);
}
inline void Handlers::DonateRef(const void *from, const void *to) const {
upb_handlers_donateref(this, from, to);
}
inline void Handlers::CheckRef(const void *owner) const {
upb_handlers_checkref(this, owner);
}
inline const Status* Handlers::status() {
return upb_handlers_status(this);
}
@ -1138,9 +1127,9 @@ inline BytesHandler::BytesHandler() {
inline BytesHandler::~BytesHandler() {}
} // namespace upb
} /* namespace upb */
#endif // __cplusplus
#endif /* __cplusplus */
#undef UPB_TWO_32BIT_TYPES
@ -1154,4 +1143,4 @@ inline BytesHandler::~BytesHandler() {}
#undef UPB_INT64ALT_T
#undef UPB_UINT64ALT_T
#endif // UPB_HANDLERS_INL_H_
#endif /* UPB_HANDLERS_INL_H_ */

@ -9,14 +9,16 @@
*/
#include "upb/handlers.h"
#include "upb/structdefs.int.h"
#include <stdlib.h>
#include <string.h>
#include "upb/sink.h"
// Defined for the sole purpose of having a unique pointer value for
// UPB_NO_CLOSURE.
/* Defined for the sole purpose of having a unique pointer value for
* UPB_NO_CLOSURE. */
char _upb_noclosure;
static void freehandlers(upb_refcounted *r) {
@ -45,42 +47,45 @@ static void visithandlers(const upb_refcounted *r, upb_refcounted_visit *visit,
!upb_msg_field_done(&i);
upb_msg_field_next(&i)) {
upb_fielddef *f = upb_msg_iter_field(&i);
const upb_handlers *sub;
if (!upb_fielddef_issubmsg(f)) continue;
const upb_handlers *sub = upb_handlers_getsubhandlers(h, f);
if (sub) visit(r, UPB_UPCAST(sub), closure);
sub = upb_handlers_getsubhandlers(h, f);
if (sub) visit(r, upb_handlers_upcast(sub), closure);
}
}
static const struct upb_refcounted_vtbl vtbl = {visithandlers, freehandlers};
typedef struct {
upb_inttable tab; // maps upb_msgdef* -> upb_handlers*.
upb_inttable tab; /* maps upb_msgdef* -> upb_handlers*. */
upb_handlers_callback *callback;
const void *closure;
} dfs_state;
// TODO(haberman): discard upb_handlers* objects that do not actually have any
// handlers set and cannot reach any upb_handlers* object that does. This is
// slightly tricky to do correctly.
/* TODO(haberman): discard upb_handlers* objects that do not actually have any
* handlers set and cannot reach any upb_handlers* object that does. This is
* slightly tricky to do correctly. */
static upb_handlers *newformsg(const upb_msgdef *m, const void *owner,
dfs_state *s) {
upb_msg_field_iter i;
upb_handlers *h = upb_handlers_new(m, owner);
if (!h) return NULL;
if (!upb_inttable_insertptr(&s->tab, m, upb_value_ptr(h))) goto oom;
s->callback(s->closure, h);
// For each submessage field, get or create a handlers object and set it as
// the subhandlers.
upb_msg_field_iter i;
/* For each submessage field, get or create a handlers object and set it as
* the subhandlers. */
for(upb_msg_field_begin(&i, m);
!upb_msg_field_done(&i);
upb_msg_field_next(&i)) {
upb_fielddef *f = upb_msg_iter_field(&i);
const upb_msgdef *subdef;
upb_value subm_ent;
if (!upb_fielddef_issubmsg(f)) continue;
const upb_msgdef *subdef = upb_downcast_msgdef(upb_fielddef_subdef(f));
upb_value subm_ent;
subdef = upb_downcast_msgdef(upb_fielddef_subdef(f));
if (upb_inttable_lookupptr(&s->tab, subdef, &subm_ent)) {
upb_handlers_setsubhandlers(h, f, upb_value_getptr(subm_ent));
} else {
@ -97,11 +102,11 @@ oom:
return NULL;
}
// Given a selector for a STARTSUBMSG handler, resolves to a pointer to the
// subhandlers for this submessage field.
/* Given a selector for a STARTSUBMSG handler, resolves to a pointer to the
* subhandlers for this submessage field. */
#define SUBH(h, selector) (h->sub[selector])
// The selector for a submessage field is the field index.
/* The selector for a submessage field is the field index. */
#define SUBH_F(h, f) SUBH(h, f->index_)
static int32_t trygetsel(upb_handlers *h, const upb_fielddef *f,
@ -139,6 +144,10 @@ static const void **returntype(upb_handlers *h, const upb_fielddef *f,
static bool doset(upb_handlers *h, int32_t sel, const upb_fielddef *f,
upb_handlertype_t type, upb_func *func,
upb_handlerattr *attr) {
upb_handlerattr set_attr = UPB_HANDLERATTR_INITIALIZER;
const void *closure_type;
const void **context_closure_type;
assert(!upb_handlers_isfrozen(h));
if (sel < 0) {
@ -153,15 +162,13 @@ static bool doset(upb_handlers *h, int32_t sel, const upb_fielddef *f,
return false;
}
upb_handlerattr set_attr = UPB_HANDLERATTR_INITIALIZER;
if (attr) {
set_attr = *attr;
}
// Check that the given closure type matches the closure type that has been
// established for this context (if any).
const void *closure_type = upb_handlerattr_closuretype(&set_attr);
const void **context_closure_type;
/* Check that the given closure type matches the closure type that has been
* established for this context (if any). */
closure_type = upb_handlerattr_closuretype(&set_attr);
if (type == UPB_HANDLER_STRING) {
context_closure_type = returntype(h, f, UPB_HANDLER_STARTSTR);
@ -175,7 +182,7 @@ static bool doset(upb_handlers *h, int32_t sel, const upb_fielddef *f,
if (closure_type && *context_closure_type &&
closure_type != *context_closure_type) {
// TODO(haberman): better message for debugging.
/* TODO(haberman): better message for debugging. */
if (f) {
upb_status_seterrf(&h->status_,
"closure type does not match for field %s",
@ -190,8 +197,8 @@ static bool doset(upb_handlers *h, int32_t sel, const upb_fielddef *f,
if (closure_type)
*context_closure_type = closure_type;
// If this is a STARTSEQ or STARTSTR handler, check that the returned pointer
// matches any pre-existing expectations about what type is expected.
/* If this is a STARTSEQ or STARTSTR handler, check that the returned pointer
* matches any pre-existing expectations about what type is expected. */
if (type == UPB_HANDLER_STARTSEQ || type == UPB_HANDLER_STARTSTR) {
const void *return_type = upb_handlerattr_returnclosuretype(&set_attr);
const void *table_return_type =
@ -210,17 +217,20 @@ static bool doset(upb_handlers *h, int32_t sel, const upb_fielddef *f,
return true;
}
// Returns the effective closure type for this handler (which will propagate
// from outer frames if this frame has no START* handler). Not implemented for
// UPB_HANDLER_STRING at the moment since this is not needed. Returns NULL is
// the effective closure type is unspecified (either no handler was registered
// to specify it or the handler that was registered did not specify the closure
// type).
/* Returns the effective closure type for this handler (which will propagate
* from outer frames if this frame has no START* handler). Not implemented for
* UPB_HANDLER_STRING at the moment since this is not needed. Returns NULL is
* the effective closure type is unspecified (either no handler was registered
* to specify it or the handler that was registered did not specify the closure
* type). */
const void *effective_closure_type(upb_handlers *h, const upb_fielddef *f,
upb_handlertype_t type) {
assert(type != UPB_HANDLER_STRING);
const void *ret = h->top_closure_type;
const void *ret;
upb_selector_t sel;
assert(type != UPB_HANDLER_STRING);
ret = h->top_closure_type;
if (upb_fielddef_isseq(f) &&
type != UPB_HANDLER_STARTSEQ &&
type != UPB_HANDLER_ENDSEQ &&
@ -233,26 +243,30 @@ const void *effective_closure_type(upb_handlers *h, const upb_fielddef *f,
ret = upb_handlerattr_returnclosuretype(&h->table[sel].attr);
}
// The effective type of the submessage; not used yet.
// if (type == SUBMESSAGE &&
// h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSUBMSG)].func) {
// ret = upb_handlerattr_returnclosuretype(&h->table[sel].attr);
// }
/* The effective type of the submessage; not used yet.
* if (type == SUBMESSAGE &&
* h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSUBMSG)].func) {
* ret = upb_handlerattr_returnclosuretype(&h->table[sel].attr);
* } */
return ret;
}
// Checks whether the START* handler specified by f & type is missing even
// though it is required to convert the established type of an outer frame
// ("closure_type") into the established type of an inner frame (represented in
// the return closure type of this handler's attr.
/* Checks whether the START* handler specified by f & type is missing even
* though it is required to convert the established type of an outer frame
* ("closure_type") into the established type of an inner frame (represented in
* the return closure type of this handler's attr. */
bool checkstart(upb_handlers *h, const upb_fielddef *f, upb_handlertype_t type,
upb_status *status) {
const void *closure_type;
const upb_handlerattr *attr;
const void *return_closure_type;
upb_selector_t sel = handlers_getsel(h, f, type);
if (h->table[sel].func) return true;
const void *closure_type = effective_closure_type(h, f, type);
const upb_handlerattr *attr = &h->table[sel].attr;
const void *return_closure_type = upb_handlerattr_returnclosuretype(attr);
closure_type = effective_closure_type(h, f, type);
attr = &h->table[sel].attr;
return_closure_type = upb_handlerattr_returnclosuretype(attr);
if (closure_type && return_closure_type &&
closure_type != return_closure_type) {
upb_status_seterrf(status,
@ -265,32 +279,14 @@ bool checkstart(upb_handlers *h, const upb_fielddef *f, upb_handlertype_t type,
/* Public interface ***********************************************************/
bool upb_handlers_isfrozen(const upb_handlers *h) {
return upb_refcounted_isfrozen(UPB_UPCAST(h));
}
void upb_handlers_ref(const upb_handlers *h, const void *owner) {
upb_refcounted_ref(UPB_UPCAST(h), owner);
}
void upb_handlers_unref(const upb_handlers *h, const void *owner) {
upb_refcounted_unref(UPB_UPCAST(h), owner);
}
void upb_handlers_donateref(
const upb_handlers *h, const void *from, const void *to) {
upb_refcounted_donateref(UPB_UPCAST(h), from, to);
}
void upb_handlers_checkref(const upb_handlers *h, const void *owner) {
upb_refcounted_checkref(UPB_UPCAST(h), owner);
}
upb_handlers *upb_handlers_new(const upb_msgdef *md, const void *owner) {
int extra;
upb_handlers *h;
assert(upb_msgdef_isfrozen(md));
int extra = sizeof(upb_handlers_tabent) * (md->selector_count - 1);
upb_handlers *h = calloc(sizeof(*h) + extra, 1);
extra = sizeof(upb_handlers_tabent) * (md->selector_count - 1);
h = calloc(sizeof(*h) + extra, 1);
if (!h) return NULL;
h->msg = md;
@ -298,14 +294,15 @@ upb_handlers *upb_handlers_new(const upb_msgdef *md, const void *owner) {
upb_status_clear(&h->status_);
h->sub = calloc(md->submsg_field_count, sizeof(*h->sub));
if (!h->sub) goto oom;
if (!upb_refcounted_init(UPB_UPCAST(h), &vtbl, owner)) goto oom;
if (!upb_refcounted_init(upb_handlers_upcast_mutable(h), &vtbl, owner))
goto oom;
if (!upb_inttable_init(&h->cleanup_, UPB_CTYPE_FPTR)) goto oom;
// calloc() above initialized all handlers to NULL.
/* calloc() above initialized all handlers to NULL. */
return h;
oom:
freehandlers(UPB_UPCAST(h));
freehandlers(upb_handlers_upcast_mutable(h));
return NULL;
}
@ -314,17 +311,21 @@ const upb_handlers *upb_handlers_newfrozen(const upb_msgdef *m,
upb_handlers_callback *callback,
const void *closure) {
dfs_state state;
upb_handlers *ret;
bool ok;
upb_refcounted *r;
state.callback = callback;
state.closure = closure;
if (!upb_inttable_init(&state.tab, UPB_CTYPE_PTR)) return NULL;
upb_handlers *ret = newformsg(m, owner, &state);
ret = newformsg(m, owner, &state);
upb_inttable_uninit(&state.tab);
if (!ret) return NULL;
upb_refcounted *r = UPB_UPCAST(ret);
bool ok = upb_refcounted_freeze(&r, 1, NULL, UPB_MAX_HANDLER_DEPTH);
r = upb_handlers_upcast_mutable(ret);
ok = upb_refcounted_freeze(&r, 1, NULL, UPB_MAX_HANDLER_DEPTH);
UPB_ASSERT_VAR(ok, ok);
return ret;
@ -347,20 +348,20 @@ void upb_handlers_clearerr(upb_handlers *h) {
return doset(h, sel, f, handlertype, (upb_func*)func, attr); \
}
SETTER(int32, upb_int32_handlerfunc*, UPB_HANDLER_INT32);
SETTER(int64, upb_int64_handlerfunc*, UPB_HANDLER_INT64);
SETTER(uint32, upb_uint32_handlerfunc*, UPB_HANDLER_UINT32);
SETTER(uint64, upb_uint64_handlerfunc*, UPB_HANDLER_UINT64);
SETTER(float, upb_float_handlerfunc*, UPB_HANDLER_FLOAT);
SETTER(double, upb_double_handlerfunc*, UPB_HANDLER_DOUBLE);
SETTER(bool, upb_bool_handlerfunc*, UPB_HANDLER_BOOL);
SETTER(startstr, upb_startstr_handlerfunc*, UPB_HANDLER_STARTSTR);
SETTER(string, upb_string_handlerfunc*, UPB_HANDLER_STRING);
SETTER(endstr, upb_endfield_handlerfunc*, UPB_HANDLER_ENDSTR);
SETTER(startseq, upb_startfield_handlerfunc*, UPB_HANDLER_STARTSEQ);
SETTER(startsubmsg, upb_startfield_handlerfunc*, UPB_HANDLER_STARTSUBMSG);
SETTER(endsubmsg, upb_endfield_handlerfunc*, UPB_HANDLER_ENDSUBMSG);
SETTER(endseq, upb_endfield_handlerfunc*, UPB_HANDLER_ENDSEQ);
SETTER(int32, upb_int32_handlerfunc*, UPB_HANDLER_INT32)
SETTER(int64, upb_int64_handlerfunc*, UPB_HANDLER_INT64)
SETTER(uint32, upb_uint32_handlerfunc*, UPB_HANDLER_UINT32)
SETTER(uint64, upb_uint64_handlerfunc*, UPB_HANDLER_UINT64)
SETTER(float, upb_float_handlerfunc*, UPB_HANDLER_FLOAT)
SETTER(double, upb_double_handlerfunc*, UPB_HANDLER_DOUBLE)
SETTER(bool, upb_bool_handlerfunc*, UPB_HANDLER_BOOL)
SETTER(startstr, upb_startstr_handlerfunc*, UPB_HANDLER_STARTSTR)
SETTER(string, upb_string_handlerfunc*, UPB_HANDLER_STRING)
SETTER(endstr, upb_endfield_handlerfunc*, UPB_HANDLER_ENDSTR)
SETTER(startseq, upb_startfield_handlerfunc*, UPB_HANDLER_STARTSEQ)
SETTER(startsubmsg, upb_startfield_handlerfunc*, UPB_HANDLER_STARTSUBMSG)
SETTER(endsubmsg, upb_endfield_handlerfunc*, UPB_HANDLER_ENDSUBMSG)
SETTER(endseq, upb_endfield_handlerfunc*, UPB_HANDLER_ENDSEQ)
#undef SETTER
@ -382,8 +383,8 @@ bool upb_handlers_setsubhandlers(upb_handlers *h, const upb_fielddef *f,
assert(sub);
assert(!upb_handlers_isfrozen(h));
assert(upb_fielddef_issubmsg(f));
if (SUBH_F(h, f)) return false; // Can't reset.
if (UPB_UPCAST(upb_handlers_msgdef(sub)) != upb_fielddef_subdef(f)) {
if (SUBH_F(h, f)) return false; /* Can't reset. */
if (upb_msgdef_upcast(upb_handlers_msgdef(sub)) != upb_fielddef_subdef(f)) {
return false;
}
SUBH_F(h, f) = sub;
@ -407,17 +408,18 @@ bool upb_handlers_getattr(const upb_handlers *h, upb_selector_t sel,
const upb_handlers *upb_handlers_getsubhandlers_sel(const upb_handlers *h,
upb_selector_t sel) {
// STARTSUBMSG selector in sel is the field's selector base.
/* STARTSUBMSG selector in sel is the field's selector base. */
return SUBH(h, sel - UPB_STATIC_SELECTOR_COUNT);
}
const upb_msgdef *upb_handlers_msgdef(const upb_handlers *h) { return h->msg; }
bool upb_handlers_addcleanup(upb_handlers *h, void *p, upb_handlerfree *func) {
bool ok;
if (upb_inttable_lookupptr(&h->cleanup_, p, NULL)) {
return false;
}
bool ok = upb_inttable_insertptr(&h->cleanup_, p, upb_value_fptr(func));
ok = upb_inttable_insertptr(&h->cleanup_, p, upb_value_fptr(func));
UPB_ASSERT_VAR(ok, ok);
return true;
}
@ -426,8 +428,10 @@ bool upb_handlers_addcleanup(upb_handlers *h, void *p, upb_handlerfree *func) {
/* "Static" methods ***********************************************************/
bool upb_handlers_freeze(upb_handlers *const*handlers, int n, upb_status *s) {
// TODO: verify we have a transitive closure.
for (int i = 0; i < n; i++) {
/* TODO: verify we have a transitive closure. */
int i;
for (i = 0; i < n; i++) {
upb_msg_field_iter j;
upb_handlers *h = handlers[i];
if (!upb_ok(&h->status_)) {
@ -437,9 +441,8 @@ bool upb_handlers_freeze(upb_handlers *const*handlers, int n, upb_status *s) {
return false;
}
// Check that there are no closure mismatches due to missing Start* handlers
// or subhandlers with different type-level types.
upb_msg_field_iter j;
/* Check that there are no closure mismatches due to missing Start* handlers
* or subhandlers with different type-level types. */
for(upb_msg_field_begin(&j, h->msg);
!upb_msg_field_done(&j);
upb_msg_field_next(&j)) {
@ -473,29 +476,29 @@ bool upb_handlers_freeze(upb_handlers *const*handlers, int n, upb_status *s) {
}
if (hashandler && !upb_handlers_getsubhandlers(h, f)) {
// For now we add an empty subhandlers in this case. It makes the
// decoder code generator simpler, because it only has to handle two
// cases (submessage has handlers or not) as opposed to three
// (submessage has handlers in enclosing message but no subhandlers).
//
// This makes parsing less efficient in the case that we want to
// notice a submessage but skip its contents (like if we're testing
// for submessage presence or counting the number of repeated
// submessages). In this case we will end up parsing the submessage
// field by field and throwing away the results for each, instead of
// skipping the whole delimited thing at once. If this is an issue we
// can revisit it, but do remember that this only arises when you have
// handlers (startseq/startsubmsg/endsubmsg/endseq) set for the
// submessage but no subhandlers. The uses cases for this are
// limited.
/* For now we add an empty subhandlers in this case. It makes the
* decoder code generator simpler, because it only has to handle two
* cases (submessage has handlers or not) as opposed to three
* (submessage has handlers in enclosing message but no subhandlers).
*
* This makes parsing less efficient in the case that we want to
* notice a submessage but skip its contents (like if we're testing
* for submessage presence or counting the number of repeated
* submessages). In this case we will end up parsing the submessage
* field by field and throwing away the results for each, instead of
* skipping the whole delimited thing at once. If this is an issue we
* can revisit it, but do remember that this only arises when you have
* handlers (startseq/startsubmsg/endsubmsg/endseq) set for the
* submessage but no subhandlers. The uses cases for this are
* limited. */
upb_handlers *sub = upb_handlers_new(upb_fielddef_msgsubdef(f), &sub);
upb_handlers_setsubhandlers(h, f, sub);
upb_handlers_unref(sub, &sub);
}
// TODO(haberman): check type of submessage.
// This is slightly tricky; also consider whether we should check that
// they match at setsubhandlers time.
/* TODO(haberman): check type of submessage.
* This is slightly tricky; also consider whether we should check that
* they match at setsubhandlers time. */
}
}
}
@ -518,7 +521,7 @@ upb_handlertype_t upb_handlers_getprimitivehandlertype(const upb_fielddef *f) {
case UPB_TYPE_FLOAT: return UPB_HANDLER_FLOAT;
case UPB_TYPE_DOUBLE: return UPB_HANDLER_DOUBLE;
case UPB_TYPE_BOOL: return UPB_HANDLER_BOOL;
default: assert(false); return -1; // Invalid input.
default: assert(false); return -1; /* Invalid input. */
}
}
@ -570,10 +573,10 @@ bool upb_handlers_getselector(const upb_fielddef *f, upb_handlertype_t type,
break;
case UPB_HANDLER_STARTSUBMSG:
if (!upb_fielddef_issubmsg(f)) return false;
// Selectors for STARTSUBMSG are at the beginning of the table so that the
// selector can also be used as an index into the "sub" array of
// subhandlers. The indexes for the two into these two tables are the
// same, except that in the handler table the static selectors come first.
/* Selectors for STARTSUBMSG are at the beginning of the table so that the
* selector can also be used as an index into the "sub" array of
* subhandlers. The indexes for the two into these two tables are the
* same, except that in the handler table the static selectors come first. */
*s = f->index_ + UPB_STATIC_SELECTOR_COUNT;
break;
case UPB_HANDLER_ENDSUBMSG:
@ -591,13 +594,13 @@ uint32_t upb_handlers_selectorbaseoffset(const upb_fielddef *f) {
uint32_t upb_handlers_selectorcount(const upb_fielddef *f) {
uint32_t ret = 1;
if (upb_fielddef_isseq(f)) ret += 2; // STARTSEQ/ENDSEQ
if (upb_fielddef_isstring(f)) ret += 2; // [STRING]/STARTSTR/ENDSTR
if (upb_fielddef_isseq(f)) ret += 2; /* STARTSEQ/ENDSEQ */
if (upb_fielddef_isstring(f)) ret += 2; /* [STRING]/STARTSTR/ENDSTR */
if (upb_fielddef_issubmsg(f)) {
// ENDSUBMSG (STARTSUBMSG is at table beginning)
/* ENDSUBMSG (STARTSUBMSG is at table beginning) */
ret += 0;
if (upb_fielddef_lazy(f)) {
// STARTSTR/ENDSTR/STRING (for lazy)
/* STARTSTR/ENDSTR/STRING (for lazy) */
ret += 3;
}
}
@ -661,7 +664,7 @@ void upb_byteshandler_init(upb_byteshandler* h) {
memset(h, 0, sizeof(*h));
}
// For when we support handlerfree callbacks.
/* For when we support handlerfree callbacks. */
void upb_byteshandler_uninit(upb_byteshandler* h) {
UPB_UNUSED(h);
}

@ -33,25 +33,26 @@ class HandlerAttributes;
class Handlers;
template <class T> class Handler;
template <class T> struct CanonicalType;
} // namespace upb
} /* namespace upb */
#endif
UPB_DECLARE_TYPE(upb::BufferHandle, upb_bufhandle);
UPB_DECLARE_TYPE(upb::BytesHandler, upb_byteshandler);
UPB_DECLARE_TYPE(upb::HandlerAttributes, upb_handlerattr);
UPB_DECLARE_TYPE(upb::Handlers, upb_handlers);
// The maximum depth that the handler graph can have. This is a resource limit
// for the C stack since we sometimes need to recursively traverse the graph.
// Cycles are ok; the traversal will stop when it detects a cycle, but we must
// hit the cycle before the maximum depth is reached.
//
// If having a single static limit is too inflexible, we can add another variant
// of Handlers::Freeze that allows specifying this as a parameter.
UPB_DECLARE_TYPE(upb::BufferHandle, upb_bufhandle)
UPB_DECLARE_TYPE(upb::BytesHandler, upb_byteshandler)
UPB_DECLARE_TYPE(upb::HandlerAttributes, upb_handlerattr)
UPB_DECLARE_DERIVED_TYPE(upb::Handlers, upb::RefCounted,
upb_handlers, upb_refcounted)
/* The maximum depth that the handler graph can have. This is a resource limit
* for the C stack since we sometimes need to recursively traverse the graph.
* Cycles are ok; the traversal will stop when it detects a cycle, but we must
* hit the cycle before the maximum depth is reached.
*
* If having a single static limit is too inflexible, we can add another variant
* of Handlers::Freeze that allows specifying this as a parameter. */
#define UPB_MAX_HANDLER_DEPTH 64
// All the different types of handlers that can be registered.
// Only needed for the advanced functions in upb::Handlers.
/* All the different types of handlers that can be registered.
* Only needed for the advanced functions in upb::Handlers. */
typedef enum {
UPB_HANDLER_INT32,
UPB_HANDLER_INT64,
@ -66,25 +67,25 @@ typedef enum {
UPB_HANDLER_STARTSUBMSG,
UPB_HANDLER_ENDSUBMSG,
UPB_HANDLER_STARTSEQ,
UPB_HANDLER_ENDSEQ,
UPB_HANDLER_ENDSEQ
} upb_handlertype_t;
#define UPB_HANDLER_MAX (UPB_HANDLER_ENDSEQ+1)
#define UPB_BREAK NULL
// A convenient definition for when no closure is needed.
/* A convenient definition for when no closure is needed. */
extern char _upb_noclosure;
#define UPB_NO_CLOSURE &_upb_noclosure
// A selector refers to a specific field handler in the Handlers object
// (for example: the STARTSUBMSG handler for field "field15").
/* A selector refers to a specific field handler in the Handlers object
* (for example: the STARTSUBMSG handler for field "field15"). */
typedef int32_t upb_selector_t;
UPB_BEGIN_EXTERN_C
// Forward-declares for C inline accessors. We need to declare these here
// so we can "friend" them in the class declarations in C++.
/* Forward-declares for C inline accessors. We need to declare these here
* so we can "friend" them in the class declarations in C++. */
UPB_INLINE upb_func *upb_handlers_gethandler(const upb_handlers *h,
upb_selector_t s);
UPB_INLINE const void *upb_handlerattr_handlerdata(const upb_handlerattr *attr);
@ -103,104 +104,111 @@ UPB_INLINE const char *upb_bufhandle_buf(const upb_bufhandle *h);
UPB_END_EXTERN_C
// Static selectors for upb::Handlers.
/* Static selectors for upb::Handlers. */
#define UPB_STARTMSG_SELECTOR 0
#define UPB_ENDMSG_SELECTOR 1
#define UPB_STATIC_SELECTOR_COUNT 2
// Static selectors for upb::BytesHandler.
/* Static selectors for upb::BytesHandler. */
#define UPB_STARTSTR_SELECTOR 0
#define UPB_STRING_SELECTOR 1
#define UPB_ENDSTR_SELECTOR 2
typedef void upb_handlerfree(void *d);
// A set of attributes that accompanies a handler's function pointer.
UPB_DEFINE_CLASS0(upb::HandlerAttributes,
#ifdef __cplusplus
/* A set of attributes that accompanies a handler's function pointer. */
class upb::HandlerAttributes {
public:
HandlerAttributes();
~HandlerAttributes();
// Sets the handler data that will be passed as the second parameter of the
// handler. To free this pointer when the handlers are freed, call
// Handlers::AddCleanup().
/* Sets the handler data that will be passed as the second parameter of the
* handler. To free this pointer when the handlers are freed, call
* Handlers::AddCleanup(). */
bool SetHandlerData(const void *handler_data);
const void* handler_data() const;
// Use this to specify the type of the closure. This will be checked against
// all other closure types for handler that use the same closure.
// Registration will fail if this does not match all other non-NULL closure
// types.
/* Use this to specify the type of the closure. This will be checked against
* all other closure types for handler that use the same closure.
* Registration will fail if this does not match all other non-NULL closure
* types. */
bool SetClosureType(const void *closure_type);
const void* closure_type() const;
// Use this to specify the type of the returned closure. Only used for
// Start*{String,SubMessage,Sequence} handlers. This must match the closure
// type of any handlers that use it (for example, the StringBuf handler must
// match the closure returned from StartString).
/* Use this to specify the type of the returned closure. Only used for
* Start*{String,SubMessage,Sequence} handlers. This must match the closure
* type of any handlers that use it (for example, the StringBuf handler must
* match the closure returned from StartString). */
bool SetReturnClosureType(const void *return_closure_type);
const void* return_closure_type() const;
// Set to indicate that the handler always returns "ok" (either "true" or a
// non-NULL closure). This is a hint that can allow code generators to
// generate more efficient code.
/* Set to indicate that the handler always returns "ok" (either "true" or a
* non-NULL closure). This is a hint that can allow code generators to
* generate more efficient code. */
bool SetAlwaysOk(bool always_ok);
bool always_ok() const;
private:
friend UPB_INLINE const void * ::upb_handlerattr_handlerdata(
const upb_handlerattr *attr);
,
UPB_DEFINE_STRUCT0(upb_handlerattr,
#else
struct upb_handlerattr {
#endif
const void *handler_data_;
const void *closure_type_;
const void *return_closure_type_;
bool alwaysok_;
));
};
#define UPB_HANDLERATTR_INITIALIZER {NULL, NULL, NULL, false}
typedef struct {
upb_func *func;
// It is wasteful to include the entire attributes here:
//
// * Some of the information is redundant (like storing the closure type
// separately for each handler that must match).
// * Some of the info is only needed prior to freeze() (like closure types).
// * alignment padding wastes a lot of space for alwaysok_.
//
// If/when the size and locality of handlers is an issue, we can optimize this
// not to store the entire attr like this. We do not expose the table's
// layout to allow this optimization in the future.
/* It is wasteful to include the entire attributes here:
*
* * Some of the information is redundant (like storing the closure type
* separately for each handler that must match).
* * Some of the info is only needed prior to freeze() (like closure types).
* * alignment padding wastes a lot of space for alwaysok_.
*
* If/when the size and locality of handlers is an issue, we can optimize this
* not to store the entire attr like this. We do not expose the table's
* layout to allow this optimization in the future. */
upb_handlerattr attr;
} upb_handlers_tabent;
// Extra information about a buffer that is passed to a StringBuf handler.
// TODO(haberman): allow the handle to be pinned so that it will outlive
// the handler invocation.
UPB_DEFINE_CLASS0(upb::BufferHandle,
#ifdef __cplusplus
/* Extra information about a buffer that is passed to a StringBuf handler.
* TODO(haberman): allow the handle to be pinned so that it will outlive
* the handler invocation. */
class upb::BufferHandle {
public:
BufferHandle();
~BufferHandle();
// The beginning of the buffer. This may be different than the pointer
// passed to a StringBuf handler because the handler may receive data
// that is from the middle or end of a larger buffer.
/* The beginning of the buffer. This may be different than the pointer
* passed to a StringBuf handler because the handler may receive data
* that is from the middle or end of a larger buffer. */
const char* buffer() const;
// The offset within the attached object where this buffer begins. Only
// meaningful if there is an attached object.
/* The offset within the attached object where this buffer begins. Only
* meaningful if there is an attached object. */
size_t object_offset() const;
// Note that object_offset is the offset of "buf" within the attached object.
/* Note that object_offset is the offset of "buf" within the attached
* object. */
void SetBuffer(const char* buf, size_t object_offset);
// The BufferHandle can have an "attached object", which can be used to
// tunnel through a pointer to the buffer's underlying representation.
/* The BufferHandle can have an "attached object", which can be used to
* tunnel through a pointer to the buffer's underlying representation. */
template <class T>
void SetAttachedObject(const T* obj);
// Returns NULL if the attached object is not of this type.
/* Returns NULL if the attached object is not of this type. */
template <class T>
const T* GetAttachedObject() const;
@ -215,26 +223,29 @@ UPB_DEFINE_CLASS0(upb::BufferHandle,
friend UPB_INLINE const void* ::upb_bufhandle_objtype(
const upb_bufhandle *h);
friend UPB_INLINE const char* ::upb_bufhandle_buf(const upb_bufhandle *h);
,
UPB_DEFINE_STRUCT0(upb_bufhandle,
#else
struct upb_bufhandle {
#endif
const char *buf_;
const void *obj_;
const void *objtype_;
size_t objofs_;
));
// A upb::Handlers object represents the set of handlers associated with a
// message in the graph of messages. You can think of it as a big virtual
// table with functions corresponding to all the events that can fire while
// parsing or visiting a message of a specific type.
//
// Any handlers that are not set behave as if they had successfully consumed
// the value. Any unset Start* handlers will propagate their closure to the
// inner frame.
//
// The easiest way to create the *Handler objects needed by the Set* methods is
// with the UpbBind() and UpbMakeHandler() macros; see below.
UPB_DEFINE_CLASS1(upb::Handlers, upb::RefCounted,
};
#ifdef __cplusplus
/* A upb::Handlers object represents the set of handlers associated with a
* message in the graph of messages. You can think of it as a big virtual
* table with functions corresponding to all the events that can fire while
* parsing or visiting a message of a specific type.
*
* Any handlers that are not set behave as if they had successfully consumed
* the value. Any unset Start* handlers will propagate their closure to the
* inner frame.
*
* The easiest way to create the *Handler objects needed by the Set* methods is
* with the UpbBind() and UpbMakeHandler() macros; see below. */
class upb::Handlers {
public:
typedef upb_selector_t Selector;
typedef upb_handlertype_t Type;
@ -259,95 +270,94 @@ UPB_DEFINE_CLASS1(upb::Handlers, upb::RefCounted,
typedef ValueHandler<double>::H DoubleHandler;
typedef ValueHandler<bool>::H BoolHandler;
// Any function pointer can be converted to this and converted back to its
// correct type.
/* Any function pointer can be converted to this and converted back to its
* correct type. */
typedef void GenericFunction();
typedef void HandlersCallback(const void *closure, upb_handlers *h);
// Returns a new handlers object for the given frozen msgdef.
// Returns NULL if memory allocation failed.
/* Returns a new handlers object for the given frozen msgdef.
* Returns NULL if memory allocation failed. */
static reffed_ptr<Handlers> New(const MessageDef *m);
// Convenience function for registering a graph of handlers that mirrors the
// graph of msgdefs for some message. For "m" and all its children a new set
// of handlers will be created and the given callback will be invoked,
// allowing the client to register handlers for this message. Note that any
// subhandlers set by the callback will be overwritten.
/* Convenience function for registering a graph of handlers that mirrors the
* graph of msgdefs for some message. For "m" and all its children a new set
* of handlers will be created and the given callback will be invoked,
* allowing the client to register handlers for this message. Note that any
* subhandlers set by the callback will be overwritten. */
static reffed_ptr<const Handlers> NewFrozen(const MessageDef *m,
HandlersCallback *callback,
const void *closure);
// Functionality from upb::RefCounted.
bool IsFrozen() const;
void Ref(const void* owner) const;
void Unref(const void* owner) const;
void DonateRef(const void *from, const void *to) const;
void CheckRef(const void *owner) const;
// All handler registration functions return bool to indicate success or
// failure; details about failures are stored in this status object. If a
// failure does occur, it must be cleared before the Handlers are frozen,
// otherwise the freeze() operation will fail. The functions may *only* be
// used while the Handlers are mutable.
/* Functionality from upb::RefCounted. */
UPB_REFCOUNTED_CPPMETHODS
/* All handler registration functions return bool to indicate success or
* failure; details about failures are stored in this status object. If a
* failure does occur, it must be cleared before the Handlers are frozen,
* otherwise the freeze() operation will fail. The functions may *only* be
* used while the Handlers are mutable. */
const Status* status();
void ClearError();
// Call to freeze these Handlers. Requires that any SubHandlers are already
// frozen. For cycles, you must use the static version below and freeze the
// whole graph at once.
/* Call to freeze these Handlers. Requires that any SubHandlers are already
* frozen. For cycles, you must use the static version below and freeze the
* whole graph at once. */
bool Freeze(Status* s);
// Freezes the given set of handlers. You may not freeze a handler without
// also freezing any handlers they point to.
/* Freezes the given set of handlers. You may not freeze a handler without
* also freezing any handlers they point to. */
static bool Freeze(Handlers*const* handlers, int n, Status* s);
static bool Freeze(const std::vector<Handlers*>& handlers, Status* s);
// Returns the msgdef associated with this handlers object.
/* Returns the msgdef associated with this handlers object. */
const MessageDef* message_def() const;
// Adds the given pointer and function to the list of cleanup functions that
// will be run when these handlers are freed. If this pointer has previously
// been registered, the function returns false and does nothing.
/* Adds the given pointer and function to the list of cleanup functions that
* will be run when these handlers are freed. If this pointer has previously
* been registered, the function returns false and does nothing. */
bool AddCleanup(void *ptr, upb_handlerfree *cleanup);
// Sets the startmsg handler for the message, which is defined as follows:
//
// bool startmsg(MyType* closure) {
// // Called when the message begins. Returns true if processing should
// // continue.
// return true;
// }
/* Sets the startmsg handler for the message, which is defined as follows:
*
* bool startmsg(MyType* closure) {
* // Called when the message begins. Returns true if processing should
* // continue.
* return true;
* }
*/
bool SetStartMessageHandler(const StartMessageHandler& handler);
// Sets the endmsg handler for the message, which is defined as follows:
//
// bool endmsg(MyType* closure, upb_status *status) {
// // Called when processing of this message ends, whether in success or
// // failure. "status" indicates the final status of processing, and
// // can also be modified in-place to update the final status.
// }
/* Sets the endmsg handler for the message, which is defined as follows:
*
* bool endmsg(MyType* closure, upb_status *status) {
* // Called when processing of this message ends, whether in success or
* // failure. "status" indicates the final status of processing, and
* // can also be modified in-place to update the final status.
* }
*/
bool SetEndMessageHandler(const EndMessageHandler& handler);
// Sets the value handler for the given field, which is defined as follows
// (this is for an int32 field; other field types will pass their native
// C/C++ type for "val"):
//
// bool OnValue(MyClosure* c, const MyHandlerData* d, int32_t val) {
// // Called when the field's value is encountered. "d" contains
// // whatever data was bound to this field when it was registered.
// // Returns true if processing should continue.
// return true;
// }
//
// handers->SetInt32Handler(f, UpbBind(OnValue, new MyHandlerData(...)));
//
// The value type must exactly match f->type().
// For example, a handler that takes an int32_t parameter may only be used for
// fields of type UPB_TYPE_INT32 and UPB_TYPE_ENUM.
//
// Returns false if the handler failed to register; in this case the cleanup
// handler (if any) will be called immediately.
/* Sets the value handler for the given field, which is defined as follows
* (this is for an int32 field; other field types will pass their native
* C/C++ type for "val"):
*
* bool OnValue(MyClosure* c, const MyHandlerData* d, int32_t val) {
* // Called when the field's value is encountered. "d" contains
* // whatever data was bound to this field when it was registered.
* // Returns true if processing should continue.
* return true;
* }
*
* handers->SetInt32Handler(f, UpbBind(OnValue, new MyHandlerData(...)));
*
* The value type must exactly match f->type().
* For example, a handler that takes an int32_t parameter may only be used for
* fields of type UPB_TYPE_INT32 and UPB_TYPE_ENUM.
*
* Returns false if the handler failed to register; in this case the cleanup
* handler (if any) will be called immediately.
*/
bool SetInt32Handler (const FieldDef* f, const Int32Handler& h);
bool SetInt64Handler (const FieldDef* f, const Int64Handler& h);
bool SetUInt32Handler(const FieldDef* f, const UInt32Handler& h);
@ -356,240 +366,247 @@ UPB_DEFINE_CLASS1(upb::Handlers, upb::RefCounted,
bool SetDoubleHandler(const FieldDef* f, const DoubleHandler& h);
bool SetBoolHandler (const FieldDef* f, const BoolHandler& h);
// Like the previous, but templated on the type on the value (ie. int32).
// This is mostly useful to call from other templates. To call this you must
// specify the template parameter explicitly, ie:
// h->SetValueHandler<T>(f, UpbBind(MyHandler<T>, MyData));
/* Like the previous, but templated on the type on the value (ie. int32).
* This is mostly useful to call from other templates. To call this you must
* specify the template parameter explicitly, ie:
* h->SetValueHandler<T>(f, UpbBind(MyHandler<T>, MyData)); */
template <class T>
bool SetValueHandler(
const FieldDef *f,
const typename ValueHandler<typename CanonicalType<T>::Type>::H& handler);
// Sets handlers for a string field, which are defined as follows:
//
// MySubClosure* startstr(MyClosure* c, const MyHandlerData* d,
// size_t size_hint) {
// // Called when a string value begins. The return value indicates the
// // closure for the string. "size_hint" indicates the size of the
// // string if it is known, however if the string is length-delimited
// // and the end-of-string is not available size_hint will be zero.
// // This case is indistinguishable from the case where the size is
// // known to be zero.
// //
// // TODO(haberman): is it important to distinguish these cases?
// // If we had ssize_t as a type we could make -1 "unknown", but
// // ssize_t is POSIX (not ANSI) and therefore less portable.
// // In practice I suspect it won't be important to distinguish.
// return closure;
// }
//
// size_t str(MyClosure* closure, const MyHandlerData* d,
// const char *str, size_t len) {
// // Called for each buffer of string data; the multiple physical buffers
// // are all part of the same logical string. The return value indicates
// // how many bytes were consumed. If this number is less than "len",
// // this will also indicate that processing should be halted for now,
// // like returning false or UPB_BREAK from any other callback. If
// // number is greater than "len", the excess bytes will be skipped over
// // and not passed to the callback.
// return len;
// }
//
// bool endstr(MyClosure* c, const MyHandlerData* d) {
// // Called when a string value ends. Return value indicates whether
// // processing should continue.
// return true;
// }
/* Sets handlers for a string field, which are defined as follows:
*
* MySubClosure* startstr(MyClosure* c, const MyHandlerData* d,
* size_t size_hint) {
* // Called when a string value begins. The return value indicates the
* // closure for the string. "size_hint" indicates the size of the
* // string if it is known, however if the string is length-delimited
* // and the end-of-string is not available size_hint will be zero.
* // This case is indistinguishable from the case where the size is
* // known to be zero.
* //
* // TODO(haberman): is it important to distinguish these cases?
* // If we had ssize_t as a type we could make -1 "unknown", but
* // ssize_t is POSIX (not ANSI) and therefore less portable.
* // In practice I suspect it won't be important to distinguish.
* return closure;
* }
*
* size_t str(MyClosure* closure, const MyHandlerData* d,
* const char *str, size_t len) {
* // Called for each buffer of string data; the multiple physical buffers
* // are all part of the same logical string. The return value indicates
* // how many bytes were consumed. If this number is less than "len",
* // this will also indicate that processing should be halted for now,
* // like returning false or UPB_BREAK from any other callback. If
* // number is greater than "len", the excess bytes will be skipped over
* // and not passed to the callback.
* return len;
* }
*
* bool endstr(MyClosure* c, const MyHandlerData* d) {
* // Called when a string value ends. Return value indicates whether
* // processing should continue.
* return true;
* }
*/
bool SetStartStringHandler(const FieldDef* f, const StartStringHandler& h);
bool SetStringHandler(const FieldDef* f, const StringHandler& h);
bool SetEndStringHandler(const FieldDef* f, const EndFieldHandler& h);
// Sets the startseq handler, which is defined as follows:
//
// MySubClosure *startseq(MyClosure* c, const MyHandlerData* d) {
// // Called when a sequence (repeated field) begins. The returned
// // pointer indicates the closure for the sequence (or UPB_BREAK
// // to interrupt processing).
// return closure;
// }
//
// h->SetStartSequenceHandler(f, UpbBind(startseq, new MyHandlerData(...)));
//
// Returns "false" if "f" does not belong to this message or is not a
// repeated field.
/* Sets the startseq handler, which is defined as follows:
*
* MySubClosure *startseq(MyClosure* c, const MyHandlerData* d) {
* // Called when a sequence (repeated field) begins. The returned
* // pointer indicates the closure for the sequence (or UPB_BREAK
* // to interrupt processing).
* return closure;
* }
*
* h->SetStartSequenceHandler(f, UpbBind(startseq, new MyHandlerData(...)));
*
* Returns "false" if "f" does not belong to this message or is not a
* repeated field.
*/
bool SetStartSequenceHandler(const FieldDef* f, const StartFieldHandler& h);
// Sets the startsubmsg handler for the given field, which is defined as
// follows:
//
// MySubClosure* startsubmsg(MyClosure* c, const MyHandlerData* d) {
// // Called when a submessage begins. The returned pointer indicates the
// // closure for the sequence (or UPB_BREAK to interrupt processing).
// return closure;
// }
//
// h->SetStartSubMessageHandler(f, UpbBind(startsubmsg,
// new MyHandlerData(...)));
//
// Returns "false" if "f" does not belong to this message or is not a
// submessage/group field.
/* Sets the startsubmsg handler for the given field, which is defined as
* follows:
*
* MySubClosure* startsubmsg(MyClosure* c, const MyHandlerData* d) {
* // Called when a submessage begins. The returned pointer indicates the
* // closure for the sequence (or UPB_BREAK to interrupt processing).
* return closure;
* }
*
* h->SetStartSubMessageHandler(f, UpbBind(startsubmsg,
* new MyHandlerData(...)));
*
* Returns "false" if "f" does not belong to this message or is not a
* submessage/group field.
*/
bool SetStartSubMessageHandler(const FieldDef* f, const StartFieldHandler& h);
// Sets the endsubmsg handler for the given field, which is defined as
// follows:
//
// bool endsubmsg(MyClosure* c, const MyHandlerData* d) {
// // Called when a submessage ends. Returns true to continue processing.
// return true;
// }
//
// Returns "false" if "f" does not belong to this message or is not a
// submessage/group field.
/* Sets the endsubmsg handler for the given field, which is defined as
* follows:
*
* bool endsubmsg(MyClosure* c, const MyHandlerData* d) {
* // Called when a submessage ends. Returns true to continue processing.
* return true;
* }
*
* Returns "false" if "f" does not belong to this message or is not a
* submessage/group field.
*/
bool SetEndSubMessageHandler(const FieldDef *f, const EndFieldHandler &h);
// Starts the endsubseq handler for the given field, which is defined as
// follows:
//
// bool endseq(MyClosure* c, const MyHandlerData* d) {
// // Called when a sequence ends. Returns true continue processing.
// return true;
// }
//
// Returns "false" if "f" does not belong to this message or is not a
// repeated field.
/* Starts the endsubseq handler for the given field, which is defined as
* follows:
*
* bool endseq(MyClosure* c, const MyHandlerData* d) {
* // Called when a sequence ends. Returns true continue processing.
* return true;
* }
*
* Returns "false" if "f" does not belong to this message or is not a
* repeated field.
*/
bool SetEndSequenceHandler(const FieldDef* f, const EndFieldHandler& h);
// Sets or gets the object that specifies handlers for the given field, which
// must be a submessage or group. Returns NULL if no handlers are set.
/* Sets or gets the object that specifies handlers for the given field, which
* must be a submessage or group. Returns NULL if no handlers are set. */
bool SetSubHandlers(const FieldDef* f, const Handlers* sub);
const Handlers* GetSubHandlers(const FieldDef* f) const;
// Equivalent to GetSubHandlers, but takes the STARTSUBMSG selector for the
// field.
/* Equivalent to GetSubHandlers, but takes the STARTSUBMSG selector for the
* field. */
const Handlers* GetSubHandlers(Selector startsubmsg) const;
// A selector refers to a specific field handler in the Handlers object
// (for example: the STARTSUBMSG handler for field "field15").
// On success, returns true and stores the selector in "s".
// If the FieldDef or Type are invalid, returns false.
// The returned selector is ONLY valid for Handlers whose MessageDef
// contains this FieldDef.
/* A selector refers to a specific field handler in the Handlers object
* (for example: the STARTSUBMSG handler for field "field15").
* On success, returns true and stores the selector in "s".
* If the FieldDef or Type are invalid, returns false.
* The returned selector is ONLY valid for Handlers whose MessageDef
* contains this FieldDef. */
static bool GetSelector(const FieldDef* f, Type type, Selector* s);
// Given a START selector of any kind, returns the corresponding END selector.
/* Given a START selector of any kind, returns the corresponding END selector. */
static Selector GetEndSelector(Selector start_selector);
// Returns the function pointer for this handler. It is the client's
// responsibility to cast to the correct function type before calling it.
/* Returns the function pointer for this handler. It is the client's
* responsibility to cast to the correct function type before calling it. */
GenericFunction* GetHandler(Selector selector);
// Sets the given attributes to the attributes for this selector.
/* Sets the given attributes to the attributes for this selector. */
bool GetAttributes(Selector selector, HandlerAttributes* attr);
// Returns the handler data that was registered with this handler.
/* Returns the handler data that was registered with this handler. */
const void* GetHandlerData(Selector selector);
// Could add any of the following functions as-needed, with some minor
// implementation changes:
//
// const FieldDef* GetFieldDef(Selector selector);
// static bool IsSequence(Selector selector);
/* Could add any of the following functions as-needed, with some minor
* implementation changes:
*
* const FieldDef* GetFieldDef(Selector selector);
* static bool IsSequence(Selector selector); */
private:
UPB_DISALLOW_POD_OPS(Handlers, upb::Handlers);
UPB_DISALLOW_POD_OPS(Handlers, upb::Handlers)
friend UPB_INLINE GenericFunction *::upb_handlers_gethandler(
const upb_handlers *h, upb_selector_t s);
friend UPB_INLINE const void *::upb_handlers_gethandlerdata(
const upb_handlers *h, upb_selector_t s);
#else
struct upb_handlers {
#endif
upb_refcounted base;
,
UPB_DEFINE_STRUCT(upb_handlers, upb_refcounted,
const upb_msgdef *msg;
const upb_handlers **sub;
const void *top_closure_type;
upb_inttable cleanup_;
upb_status status_; // Used only when mutable.
upb_handlers_tabent table[1]; // Dynamically-sized field handler array.
));
upb_status status_; /* Used only when mutable. */
upb_handlers_tabent table[1]; /* Dynamically-sized field handler array. */
};
#ifdef __cplusplus
namespace upb {
// Convenience macros for creating a Handler object that is wrapped with a
// type-safe wrapper function that converts the "void*" parameters/returns
// of the underlying C API into nice C++ function.
//
// Sample usage:
// void OnValue1(MyClosure* c, const MyHandlerData* d, int32_t val) {
// // do stuff ...
// }
//
// // Handler that doesn't need any data bound to it.
// void OnValue2(MyClosure* c, int32_t val) {
// // do stuff ...
// }
//
// // Handler that returns bool so it can return failure if necessary.
// bool OnValue3(MyClosure* c, int32_t val) {
// // do stuff ...
// return ok;
// }
//
// // Member function handler.
// class MyClosure {
// public:
// void OnValue(int32_t val) {
// // do stuff ...
// }
// };
//
// // Takes ownership of the MyHandlerData.
// handlers->SetInt32Handler(f1, UpbBind(OnValue1, new MyHandlerData(...)));
// handlers->SetInt32Handler(f2, UpbMakeHandler(OnValue2));
// handlers->SetInt32Handler(f1, UpbMakeHandler(OnValue3));
// handlers->SetInt32Handler(f2, UpbMakeHandler(&MyClosure::OnValue));
/* Convenience macros for creating a Handler object that is wrapped with a
* type-safe wrapper function that converts the "void*" parameters/returns
* of the underlying C API into nice C++ function.
*
* Sample usage:
* void OnValue1(MyClosure* c, const MyHandlerData* d, int32_t val) {
* // do stuff ...
* }
*
* // Handler that doesn't need any data bound to it.
* void OnValue2(MyClosure* c, int32_t val) {
* // do stuff ...
* }
*
* // Handler that returns bool so it can return failure if necessary.
* bool OnValue3(MyClosure* c, int32_t val) {
* // do stuff ...
* return ok;
* }
*
* // Member function handler.
* class MyClosure {
* public:
* void OnValue(int32_t val) {
* // do stuff ...
* }
* };
*
* // Takes ownership of the MyHandlerData.
* handlers->SetInt32Handler(f1, UpbBind(OnValue1, new MyHandlerData(...)));
* handlers->SetInt32Handler(f2, UpbMakeHandler(OnValue2));
* handlers->SetInt32Handler(f1, UpbMakeHandler(OnValue3));
* handlers->SetInt32Handler(f2, UpbMakeHandler(&MyClosure::OnValue));
*/
#ifdef UPB_CXX11
// In C++11, the "template" disambiguator can appear even outside templates,
// so all calls can safely use this pair of macros.
/* In C++11, the "template" disambiguator can appear even outside templates,
* so all calls can safely use this pair of macros. */
#define UpbMakeHandler(f) upb::MatchFunc(f).template GetFunc<f>()
// We have to be careful to only evaluate "d" once.
/* We have to be careful to only evaluate "d" once. */
#define UpbBind(f, d) upb::MatchFunc(f).template GetFunc<f>((d))
#else
// Prior to C++11, the "template" disambiguator may only appear inside a
// template, so the regular macro must not use "template"
/* Prior to C++11, the "template" disambiguator may only appear inside a
* template, so the regular macro must not use "template" */
#define UpbMakeHandler(f) upb::MatchFunc(f).GetFunc<f>()
#define UpbBind(f, d) upb::MatchFunc(f).GetFunc<f>((d))
#endif // UPB_CXX11
#endif /* UPB_CXX11 */
// This macro must be used in C++98 for calls from inside a template. But we
// define this variant in all cases; code that wants to be compatible with both
// C++98 and C++11 should always use this macro when calling from a template.
/* This macro must be used in C++98 for calls from inside a template. But we
* define this variant in all cases; code that wants to be compatible with both
* C++98 and C++11 should always use this macro when calling from a template. */
#define UpbMakeHandlerT(f) upb::MatchFunc(f).template GetFunc<f>()
// We have to be careful to only evaluate "d" once.
/* We have to be careful to only evaluate "d" once. */
#define UpbBindT(f, d) upb::MatchFunc(f).template GetFunc<f>((d))
// Handler: a struct that contains the (handler, data, deleter) tuple that is
// used to register all handlers. Users can Make() these directly but it's
// more convenient to use the UpbMakeHandler/UpbBind macros above.
/* Handler: a struct that contains the (handler, data, deleter) tuple that is
* used to register all handlers. Users can Make() these directly but it's
* more convenient to use the UpbMakeHandler/UpbBind macros above. */
template <class T> class Handler {
public:
// The underlying, handler function signature that upb uses internally.
/* The underlying, handler function signature that upb uses internally. */
typedef T FuncPtr;
// Intentionally implicit.
/* Intentionally implicit. */
template <class F> Handler(F func);
~Handler();
@ -601,7 +618,7 @@ template <class T> class Handler {
}
}
UPB_DISALLOW_COPY_AND_ASSIGN(Handler);
UPB_DISALLOW_COPY_AND_ASSIGN(Handler)
friend class Handlers;
FuncPtr handler_;
mutable HandlerAttributes attr_;
@ -610,15 +627,15 @@ template <class T> class Handler {
upb_handlerfree *cleanup_func_;
};
} // namespace upb
} /* namespace upb */
#endif // __cplusplus
#endif /* __cplusplus */
UPB_BEGIN_EXTERN_C
// Native C API.
/* Native C API. */
// Handler function typedefs.
/* Handler function typedefs. */
typedef bool upb_startmsg_handlerfunc(void *c, const void*);
typedef bool upb_endmsg_handlerfunc(void *c, const void *, upb_status *status);
typedef void* upb_startfield_handlerfunc(void *c, const void *hd);
@ -635,10 +652,10 @@ typedef void *upb_startstr_handlerfunc(void *c, const void *hd,
typedef size_t upb_string_handlerfunc(void *c, const void *hd, const char *buf,
size_t n, const upb_bufhandle* handle);
// upb_bufhandle
/* upb_bufhandle */
size_t upb_bufhandle_objofs(const upb_bufhandle *h);
// upb_handlerattr
/* upb_handlerattr */
void upb_handlerattr_init(upb_handlerattr *attr);
void upb_handlerattr_uninit(upb_handlerattr *attr);
@ -656,7 +673,7 @@ UPB_INLINE const void *upb_handlerattr_handlerdata(
return attr->handler_data_;
}
// upb_handlers
/* upb_handlers */
typedef void upb_handlers_callback(const void *closure, upb_handlers *h);
upb_handlers *upb_handlers_new(const upb_msgdef *m,
const void *owner);
@ -664,12 +681,9 @@ const upb_handlers *upb_handlers_newfrozen(const upb_msgdef *m,
const void *owner,
upb_handlers_callback *callback,
const void *closure);
bool upb_handlers_isfrozen(const upb_handlers *h);
void upb_handlers_ref(const upb_handlers *h, const void *owner);
void upb_handlers_unref(const upb_handlers *h, const void *owner);
void upb_handlers_donateref(const upb_handlers *h, const void *from,
const void *to);
void upb_handlers_checkref(const upb_handlers *h, const void *owner);
/* Include refcounted methods like upb_handlers_ref(). */
UPB_REFCOUNTED_CMETHODS(upb_handlers, upb_handlers_upcast)
const upb_status *upb_handlers_status(upb_handlers *h);
void upb_handlers_clearerr(upb_handlers *h);
@ -740,26 +754,29 @@ UPB_INLINE const void *upb_handlers_gethandlerdata(const upb_handlers *h,
return upb_handlerattr_handlerdata(&h->table[s].attr);
}
// Handler types for single fields.
// Right now we only have one for TYPE_BYTES but ones for other types
// should follow.
//
// These follow the same handlers protocol for fields of a message.
UPB_DEFINE_CLASS0(upb::BytesHandler,
#ifdef __cplusplus
/* Handler types for single fields.
* Right now we only have one for TYPE_BYTES but ones for other types
* should follow.
*
* These follow the same handlers protocol for fields of a message. */
class upb::BytesHandler {
public:
BytesHandler();
~BytesHandler();
,
UPB_DEFINE_STRUCT0(upb_byteshandler,
#else
struct upb_byteshandler {
#endif
upb_handlers_tabent table[3];
));
};
void upb_byteshandler_init(upb_byteshandler *h);
// Caller must ensure that "d" outlives the handlers.
// TODO(haberman): should this have a "freeze" operation? It's not necessary
// for memory management, but could be useful to force immutability and provide
// a convenient moment to verify that all registration succeeded.
/* Caller must ensure that "d" outlives the handlers.
* TODO(haberman): should this have a "freeze" operation? It's not necessary
* for memory management, but could be useful to force immutability and provide
* a convenient moment to verify that all registration succeeded. */
bool upb_byteshandler_setstartstr(upb_byteshandler *h,
upb_startstr_handlerfunc *func, void *d);
bool upb_byteshandler_setstring(upb_byteshandler *h,
@ -767,7 +784,7 @@ bool upb_byteshandler_setstring(upb_byteshandler *h,
bool upb_byteshandler_setendstr(upb_byteshandler *h,
upb_endfield_handlerfunc *func, void *d);
// "Static" methods
/* "Static" methods */
bool upb_handlers_freeze(upb_handlers *const *handlers, int n, upb_status *s);
upb_handlertype_t upb_handlers_getprimitivehandlertype(const upb_fielddef *f);
bool upb_handlers_getselector(const upb_fielddef *f, upb_handlertype_t type,
@ -776,7 +793,7 @@ UPB_INLINE upb_selector_t upb_handlers_getendselector(upb_selector_t start) {
return start + 1;
}
// Internal-only.
/* Internal-only. */
uint32_t upb_handlers_selectorbaseoffset(const upb_fielddef *f);
uint32_t upb_handlers_selectorcount(const upb_fielddef *f);
@ -784,4 +801,4 @@ UPB_END_EXTERN_C
#include "upb/handlers-inl.h"
#endif // UPB_HANDLERS_H
#endif /* UPB_HANDLERS_H */

File diff suppressed because it is too large Load Diff

@ -18,23 +18,24 @@
namespace upb {
namespace json {
class Parser;
} // namespace json
} // namespace upb
} /* namespace json */
} /* namespace upb */
#endif
UPB_DECLARE_TYPE(upb::json::Parser, upb_json_parser);
UPB_DECLARE_TYPE(upb::json::Parser, upb_json_parser)
/* upb::json::Parser **********************************************************/
// Preallocation hint: parser won't allocate more bytes than this when first
// constructed. This hint may be an overestimate for some build configurations.
// But if the parser library is upgraded without recompiling the application,
// it may be an underestimate.
/* Preallocation hint: parser won't allocate more bytes than this when first
* constructed. This hint may be an overestimate for some build configurations.
* But if the parser library is upgraded without recompiling the application,
* it may be an underestimate. */
#define UPB_JSON_PARSER_SIZE 3568
#ifdef __cplusplus
// Parses an incoming BytesStream, pushing the results to the destination sink.
/* Parses an incoming BytesStream, pushing the results to the destination
* sink. */
class upb::json::Parser {
public:
static Parser* Create(Environment* env, Sink* output);
@ -42,7 +43,7 @@ class upb::json::Parser {
BytesSink* input();
private:
UPB_DISALLOW_POD_OPS(Parser, upb::json::Parser);
UPB_DISALLOW_POD_OPS(Parser, upb::json::Parser)
};
#endif
@ -64,10 +65,10 @@ inline Parser* Parser::Create(Environment* env, Sink* output) {
inline BytesSink* Parser::input() {
return upb_json_parser_input(this);
}
} // namespace json
} // namespace upb
} /* namespace json */
} /* namespace upb */
#endif
#endif // UPB_JSON_PARSER_H_
#endif /* UPB_JSON_PARSER_H_ */

@ -36,26 +36,26 @@
typedef struct {
upb_sink sink;
// The current message in which we're parsing, and the field whose value we're
// expecting next.
/* The current message in which we're parsing, and the field whose value we're
* expecting next. */
const upb_msgdef *m;
const upb_fielddef *f;
// We are in a repeated-field context, ready to emit mapentries as
// submessages. This flag alters the start-of-object (open-brace) behavior to
// begin a sequence of mapentry messages rather than a single submessage.
/* We are in a repeated-field context, ready to emit mapentries as
* submessages. This flag alters the start-of-object (open-brace) behavior to
* begin a sequence of mapentry messages rather than a single submessage. */
bool is_map;
// We are in a map-entry message context. This flag is set when parsing the
// value field of a single map entry and indicates to all value-field parsers
// (subobjects, strings, numbers, and bools) that the map-entry submessage
// should end as soon as the value is parsed.
/* We are in a map-entry message context. This flag is set when parsing the
* value field of a single map entry and indicates to all value-field parsers
* (subobjects, strings, numbers, and bools) that the map-entry submessage
* should end as soon as the value is parsed. */
bool is_mapentry;
// If |is_map| or |is_mapentry| is true, |mapfield| refers to the parent
// message's map field that we're currently parsing. This differs from |f|
// because |f| is the field in the *current* message (i.e., the map-entry
// message itself), not the parent's field that leads to this map.
/* If |is_map| or |is_mapentry| is true, |mapfield| refers to the parent
* message's map field that we're currently parsing. This differs from |f|
* because |f| is the field in the *current* message (i.e., the map-entry
* message itself), not the parent's field that leads to this map. */
const upb_fielddef *mapfield;
} upb_jsonparser_frame;
@ -64,41 +64,41 @@ struct upb_json_parser {
upb_byteshandler input_handler_;
upb_bytessink input_;
// Stack to track the JSON scopes we are in.
/* Stack to track the JSON scopes we are in. */
upb_jsonparser_frame stack[UPB_JSON_MAX_DEPTH];
upb_jsonparser_frame *top;
upb_jsonparser_frame *limit;
upb_status *status;
// Ragel's internal parsing stack for the parsing state machine.
/* Ragel's internal parsing stack for the parsing state machine. */
int current_state;
int parser_stack[UPB_JSON_MAX_DEPTH];
int parser_top;
// The handle for the current buffer.
/* The handle for the current buffer. */
const upb_bufhandle *handle;
// Accumulate buffer. See details in parser.rl.
/* Accumulate buffer. See details in parser.rl. */
const char *accumulated;
size_t accumulated_len;
char *accumulate_buf;
size_t accumulate_buf_size;
// Multi-part text data. See details in parser.rl.
/* Multi-part text data. See details in parser.rl. */
int multipart_state;
upb_selector_t string_selector;
// Input capture. See details in parser.rl.
/* Input capture. See details in parser.rl. */
const char *capture;
// Intermediate result of parsing a unicode escape sequence.
/* Intermediate result of parsing a unicode escape sequence. */
uint32_t digit;
};
#define PARSER_CHECK_RETURN(x) if (!(x)) return false
// Used to signal that a capture has been suspended.
/* Used to signal that a capture has been suspended. */
static char suspend_capture;
static upb_selector_t getsel_for_handlertype(upb_json_parser *p,
@ -123,8 +123,8 @@ static bool check_stack(upb_json_parser *p) {
return true;
}
// There are GCC/Clang built-ins for overflow checking which we could start
// using if there was any performance benefit to it.
/* There are GCC/Clang built-ins for overflow checking which we could start
* using if there was any performance benefit to it. */
static bool checked_add(size_t a, size_t b, size_t *c) {
if (SIZE_MAX - a < b) return false;
@ -133,7 +133,7 @@ static bool checked_add(size_t a, size_t b, size_t *c) {
}
static size_t saturating_multiply(size_t a, size_t b) {
// size_t is unsigned, so this is defined behavior even on overflow.
/* size_t is unsigned, so this is defined behavior even on overflow. */
size_t ret = a * b;
if (b != 0 && ret / b != a) {
ret = SIZE_MAX;
@ -144,7 +144,7 @@ static size_t saturating_multiply(size_t a, size_t b) {
/* Base64 decoding ************************************************************/
// TODO(haberman): make this streaming.
/* TODO(haberman): make this streaming. */
static const signed char b64table[] = {
-1, -1, -1, -1, -1, -1, -1, -1,
@ -181,19 +181,22 @@ static const signed char b64table[] = {
-1, -1, -1, -1, -1, -1, -1, -1
};
// Returns the table value sign-extended to 32 bits. Knowing that the upper
// bits will be 1 for unrecognized characters makes it easier to check for
// this error condition later (see below).
/* Returns the table value sign-extended to 32 bits. Knowing that the upper
* bits will be 1 for unrecognized characters makes it easier to check for
* this error condition later (see below). */
int32_t b64lookup(unsigned char ch) { return b64table[ch]; }
// Returns true if the given character is not a valid base64 character or
// padding.
/* Returns true if the given character is not a valid base64 character or
* padding. */
bool nonbase64(unsigned char ch) { return b64lookup(ch) == -1 && ch != '='; }
static bool base64_push(upb_json_parser *p, upb_selector_t sel, const char *ptr,
size_t len) {
const char *limit = ptr + len;
for (; ptr < limit; ptr += 4) {
uint32_t val;
char output[3];
if (limit - ptr < 4) {
upb_status_seterrf(p->status,
"Base64 input for bytes field not a multiple of 4: %s",
@ -201,17 +204,16 @@ static bool base64_push(upb_json_parser *p, upb_selector_t sel, const char *ptr,
return false;
}
uint32_t val = b64lookup(ptr[0]) << 18 |
b64lookup(ptr[1]) << 12 |
b64lookup(ptr[2]) << 6 |
b64lookup(ptr[3]);
val = b64lookup(ptr[0]) << 18 |
b64lookup(ptr[1]) << 12 |
b64lookup(ptr[2]) << 6 |
b64lookup(ptr[3]);
// Test the upper bit; returns true if any of the characters returned -1.
/* Test the upper bit; returns true if any of the characters returned -1. */
if (val & 0x80000000) {
goto otherchar;
}
char output[3];
output[0] = val >> 16;
output[1] = (val >> 8) & 0xff;
output[2] = val & 0xff;
@ -227,29 +229,34 @@ otherchar:
upb_fielddef_name(p->top->f));
return false;
} if (ptr[2] == '=') {
// Last group contains only two input bytes, one output byte.
uint32_t val;
char output;
/* Last group contains only two input bytes, one output byte. */
if (ptr[0] == '=' || ptr[1] == '=' || ptr[3] != '=') {
goto badpadding;
}
uint32_t val = b64lookup(ptr[0]) << 18 |
b64lookup(ptr[1]) << 12;
val = b64lookup(ptr[0]) << 18 |
b64lookup(ptr[1]) << 12;
assert(!(val & 0x80000000));
char output = val >> 16;
output = val >> 16;
upb_sink_putstring(&p->top->sink, sel, &output, 1, NULL);
return true;
} else {
// Last group contains only three input bytes, two output bytes.
uint32_t val;
char output[2];
/* Last group contains only three input bytes, two output bytes. */
if (ptr[0] == '=' || ptr[1] == '=' || ptr[2] == '=') {
goto badpadding;
}
uint32_t val = b64lookup(ptr[0]) << 18 |
b64lookup(ptr[1]) << 12 |
b64lookup(ptr[2]) << 6;
val = b64lookup(ptr[0]) << 18 |
b64lookup(ptr[1]) << 12 |
b64lookup(ptr[2]) << 6;
char output[2];
output[0] = val >> 16;
output[1] = (val >> 8) & 0xff;
upb_sink_putstring(&p->top->sink, sel, output, 2, NULL);
@ -267,23 +274,23 @@ badpadding:
/* Accumulate buffer **********************************************************/
// Functionality for accumulating a buffer.
//
// Some parts of the parser need an entire value as a contiguous string. For
// example, to look up a member name in a hash table, or to turn a string into
// a number, the relevant library routines need the input string to be in
// contiguous memory, even if the value spanned two or more buffers in the
// input. These routines handle that.
//
// In the common case we can just point to the input buffer to get this
// contiguous string and avoid any actual copy. So we optimistically begin
// this way. But there are a few cases where we must instead copy into a
// separate buffer:
//
// 1. The string was not contiguous in the input (it spanned buffers).
//
// 2. The string included escape sequences that need to be interpreted to get
// the true value in a contiguous buffer.
/* Functionality for accumulating a buffer.
*
* Some parts of the parser need an entire value as a contiguous string. For
* example, to look up a member name in a hash table, or to turn a string into
* a number, the relevant library routines need the input string to be in
* contiguous memory, even if the value spanned two or more buffers in the
* input. These routines handle that.
*
* In the common case we can just point to the input buffer to get this
* contiguous string and avoid any actual copy. So we optimistically begin
* this way. But there are a few cases where we must instead copy into a
* separate buffer:
*
* 1. The string was not contiguous in the input (it spanned buffers).
*
* 2. The string included escape sequences that need to be interpreted to get
* the true value in a contiguous buffer. */
static void assert_accumulate_empty(upb_json_parser *p) {
UPB_UNUSED(p);
@ -296,15 +303,16 @@ static void accumulate_clear(upb_json_parser *p) {
p->accumulated_len = 0;
}
// Used internally by accumulate_append().
/* Used internally by accumulate_append(). */
static bool accumulate_realloc(upb_json_parser *p, size_t need) {
void *mem;
size_t old_size = p->accumulate_buf_size;
size_t new_size = UPB_MAX(old_size, 128);
while (new_size < need) {
new_size = saturating_multiply(new_size, 2);
}
void *mem = upb_env_realloc(p->env, p->accumulate_buf, old_size, new_size);
mem = upb_env_realloc(p->env, p->accumulate_buf, old_size, new_size);
if (!mem) {
upb_status_seterrmsg(p->status, "Out of memory allocating buffer.");
return false;
@ -315,18 +323,19 @@ static bool accumulate_realloc(upb_json_parser *p, size_t need) {
return true;
}
// Logically appends the given data to the append buffer.
// If "can_alias" is true, we will try to avoid actually copying, but the buffer
// must be valid until the next accumulate_append() call (if any).
/* Logically appends the given data to the append buffer.
* If "can_alias" is true, we will try to avoid actually copying, but the buffer
* must be valid until the next accumulate_append() call (if any). */
static bool accumulate_append(upb_json_parser *p, const char *buf, size_t len,
bool can_alias) {
size_t need;
if (!p->accumulated && can_alias) {
p->accumulated = buf;
p->accumulated_len = len;
return true;
}
size_t need;
if (!checked_add(p->accumulated_len, len, &need)) {
upb_status_seterrmsg(p->status, "Integer overflow.");
return false;
@ -346,9 +355,9 @@ static bool accumulate_append(upb_json_parser *p, const char *buf, size_t len,
return true;
}
// Returns a pointer to the data accumulated since the last accumulate_clear()
// call, and writes the length to *len. This with point either to the input
// buffer or a temporary accumulate buffer.
/* Returns a pointer to the data accumulated since the last accumulate_clear()
* call, and writes the length to *len. This with point either to the input
* buffer or a temporary accumulate buffer. */
static const char *accumulate_getptr(upb_json_parser *p, size_t *len) {
assert(p->accumulated);
*len = p->accumulated_len;
@ -358,42 +367,42 @@ static const char *accumulate_getptr(upb_json_parser *p, size_t *len) {
/* Mult-part text data ********************************************************/
// When we have text data in the input, it can often come in multiple segments.
// For example, there may be some raw string data followed by an escape
// sequence. The two segments are processed with different logic. Also buffer
// seams in the input can cause multiple segments.
//
// As we see segments, there are two main cases for how we want to process them:
//
// 1. we want to push the captured input directly to string handlers.
//
// 2. we need to accumulate all the parts into a contiguous buffer for further
// processing (field name lookup, string->number conversion, etc).
// This is the set of states for p->multipart_state.
/* When we have text data in the input, it can often come in multiple segments.
* For example, there may be some raw string data followed by an escape
* sequence. The two segments are processed with different logic. Also buffer
* seams in the input can cause multiple segments.
*
* As we see segments, there are two main cases for how we want to process them:
*
* 1. we want to push the captured input directly to string handlers.
*
* 2. we need to accumulate all the parts into a contiguous buffer for further
* processing (field name lookup, string->number conversion, etc). */
/* This is the set of states for p->multipart_state. */
enum {
// We are not currently processing multipart data.
/* We are not currently processing multipart data. */
MULTIPART_INACTIVE = 0,
// We are processing multipart data by accumulating it into a contiguous
// buffer.
/* We are processing multipart data by accumulating it into a contiguous
* buffer. */
MULTIPART_ACCUMULATE = 1,
// We are processing multipart data by pushing each part directly to the
// current string handlers.
/* We are processing multipart data by pushing each part directly to the
* current string handlers. */
MULTIPART_PUSHEAGERLY = 2
};
// Start a multi-part text value where we accumulate the data for processing at
// the end.
/* Start a multi-part text value where we accumulate the data for processing at
* the end. */
static void multipart_startaccum(upb_json_parser *p) {
assert_accumulate_empty(p);
assert(p->multipart_state == MULTIPART_INACTIVE);
p->multipart_state = MULTIPART_ACCUMULATE;
}
// Start a multi-part text value where we immediately push text data to a string
// value with the given selector.
/* Start a multi-part text value where we immediately push text data to a string
* value with the given selector. */
static void multipart_start(upb_json_parser *p, upb_selector_t sel) {
assert_accumulate_empty(p);
assert(p->multipart_state == MULTIPART_INACTIVE);
@ -425,8 +434,8 @@ static bool multipart_text(upb_json_parser *p, const char *buf, size_t len,
return true;
}
// Note: this invalidates the accumulate buffer! Call only after reading its
// contents.
/* Note: this invalidates the accumulate buffer! Call only after reading its
* contents. */
static void multipart_end(upb_json_parser *p) {
assert(p->multipart_state != MULTIPART_INACTIVE);
p->multipart_state = MULTIPART_INACTIVE;
@ -436,9 +445,9 @@ static void multipart_end(upb_json_parser *p) {
/* Input capture **************************************************************/
// Functionality for capturing a region of the input as text. Gracefully
// handles the case where a buffer seam occurs in the middle of the captured
// region.
/* Functionality for capturing a region of the input as text. Gracefully
* handles the case where a buffer seam occurs in the middle of the captured
* region. */
static void capture_begin(upb_json_parser *p, const char *ptr) {
assert(p->multipart_state != MULTIPART_INACTIVE);
@ -456,24 +465,24 @@ static bool capture_end(upb_json_parser *p, const char *ptr) {
}
}
// This is called at the end of each input buffer (ie. when we have hit a
// buffer seam). If we are in the middle of capturing the input, this
// processes the unprocessed capture region.
/* This is called at the end of each input buffer (ie. when we have hit a
* buffer seam). If we are in the middle of capturing the input, this
* processes the unprocessed capture region. */
static void capture_suspend(upb_json_parser *p, const char **ptr) {
if (!p->capture) return;
if (multipart_text(p, p->capture, *ptr - p->capture, false)) {
// We use this as a signal that we were in the middle of capturing, and
// that capturing should resume at the beginning of the next buffer.
//
// We can't use *ptr here, because we have no guarantee that this pointer
// will be valid when we resume (if the underlying memory is freed, then
// using the pointer at all, even to compare to NULL, is likely undefined
// behavior).
/* We use this as a signal that we were in the middle of capturing, and
* that capturing should resume at the beginning of the next buffer.
*
* We can't use *ptr here, because we have no guarantee that this pointer
* will be valid when we resume (if the underlying memory is freed, then
* using the pointer at all, even to compare to NULL, is likely undefined
* behavior). */
p->capture = &suspend_capture;
} else {
// Need to back up the pointer to the beginning of the capture, since
// we were not able to actually preserve it.
/* Need to back up the pointer to the beginning of the capture, since
* we were not able to actually preserve it. */
*ptr = p->capture;
}
}
@ -488,8 +497,8 @@ static void capture_resume(upb_json_parser *p, const char *ptr) {
/* Callbacks from the parser **************************************************/
// These are the functions called directly from the parser itself.
// We define these in the same order as their declarations in the parser.
/* These are the functions called directly from the parser itself.
* We define these in the same order as their declarations in the parser. */
static char escape_char(char in) {
switch (in) {
@ -534,8 +543,8 @@ static void hexdigit(upb_json_parser *p, const char *ptr) {
static bool end_hex(upb_json_parser *p) {
uint32_t codepoint = p->digit;
// emit the codepoint as UTF-8.
char utf8[3]; // support \u0000 -- \uFFFF -- need only three bytes.
/* emit the codepoint as UTF-8. */
char utf8[3]; /* support \u0000 -- \uFFFF -- need only three bytes. */
int length = 0;
if (codepoint <= 0x7F) {
utf8[0] = codepoint;
@ -553,8 +562,8 @@ static bool end_hex(upb_json_parser *p) {
utf8[0] = (codepoint & 0x0F) | 0xE0;
length = 3;
}
// TODO(haberman): Handle high surrogates: if codepoint is a high surrogate
// we have to wait for the next escape to get the full code point).
/* TODO(haberman): Handle high surrogates: if codepoint is a high surrogate
* we have to wait for the next escape to get the full code point). */
return multipart_text(p, utf8, length, false);
}
@ -583,17 +592,29 @@ static bool end_number(upb_json_parser *p, const char *ptr) {
}
static bool parse_number(upb_json_parser *p) {
// strtol() and friends unfortunately do not support specifying the length of
// the input string, so we need to force a copy into a NULL-terminated buffer.
size_t len;
const char *buf;
const char *myend;
char *end;
/* strtol() and friends unfortunately do not support specifying the length of
* the input string, so we need to force a copy into a NULL-terminated buffer. */
if (!multipart_text(p, "\0", 1, false)) {
return false;
}
size_t len;
const char *buf = accumulate_getptr(p, &len);
const char *myend = buf + len - 1; // One for NULL.
char *end;
buf = accumulate_getptr(p, &len);
myend = buf + len - 1; /* One for NULL. */
/* XXX: We are using strtol to parse integers, but this is wrong as even
* integers can be represented as 1e6 (for example), which strtol can't
* handle correctly.
*
* XXX: Also, we can't handle large integers properly because strto[u]ll
* isn't in C89.
*
* XXX: Also, we don't properly check floats for overflow, since strtof
* isn't in C89. */
switch (upb_fielddef_type(p->top->f)) {
case UPB_TYPE_ENUM:
case UPB_TYPE_INT32: {
@ -605,7 +626,7 @@ static bool parse_number(upb_json_parser *p) {
break;
}
case UPB_TYPE_INT64: {
long long val = strtoll(p->accumulated, &end, 0);
long long val = strtol(p->accumulated, &end, 0);
if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || end != myend)
goto err;
else
@ -621,7 +642,7 @@ static bool parse_number(upb_json_parser *p) {
break;
}
case UPB_TYPE_UINT64: {
unsigned long long val = strtoull(p->accumulated, &end, 0);
unsigned long long val = strtoul(p->accumulated, &end, 0);
if (val > UINT64_MAX || errno == ERANGE || end != myend)
goto err;
else
@ -637,7 +658,7 @@ static bool parse_number(upb_json_parser *p) {
break;
}
case UPB_TYPE_FLOAT: {
float val = strtof(p->accumulated, &end);
float val = strtod(p->accumulated, &end);
if (errno == ERANGE || end != myend)
goto err;
else
@ -659,6 +680,8 @@ err:
}
static bool parser_putbool(upb_json_parser *p, bool val) {
bool ok;
if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL) {
upb_status_seterrf(p->status,
"Boolean value specified for non-bool field: %s",
@ -666,7 +689,7 @@ static bool parser_putbool(upb_json_parser *p, bool val) {
return false;
}
bool ok = upb_sink_putbool(&p->top->sink, parser_getsel(p), val);
ok = upb_sink_putbool(&p->top->sink, parser_getsel(p), val);
UPB_ASSERT_VAR(ok, ok);
return true;
@ -676,12 +699,15 @@ static bool start_stringval(upb_json_parser *p) {
assert(p->top->f);
if (upb_fielddef_isstring(p->top->f)) {
upb_jsonparser_frame *inner;
upb_selector_t sel;
if (!check_stack(p)) return false;
// Start a new parser frame: parser frames correspond one-to-one with
// handler frames, and string events occur in a sub-frame.
upb_jsonparser_frame *inner = p->top + 1;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
/* Start a new parser frame: parser frames correspond one-to-one with
* handler frames, and string events occur in a sub-frame. */
inner = p->top + 1;
sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink);
inner->m = p->top->m;
inner->f = p->top->f;
@ -690,11 +716,11 @@ static bool start_stringval(upb_json_parser *p) {
p->top = inner;
if (upb_fielddef_type(p->top->f) == UPB_TYPE_STRING) {
// For STRING fields we push data directly to the handlers as it is
// parsed. We don't do this yet for BYTES fields, because our base64
// decoder is not streaming.
//
// TODO(haberman): make base64 decoding streaming also.
/* For STRING fields we push data directly to the handlers as it is
* parsed. We don't do this yet for BYTES fields, because our base64
* decoder is not streaming.
*
* TODO(haberman): make base64 decoding streaming also. */
multipart_start(p, getsel_for_handlertype(p, UPB_HANDLER_STRING));
return true;
} else {
@ -702,11 +728,11 @@ static bool start_stringval(upb_json_parser *p) {
return true;
}
} else if (upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM) {
// No need to push a frame -- symbolic enum names in quotes remain in the
// current parser frame.
//
// Enum string values must accumulate so we can look up the value in a table
// once it is complete.
/* No need to push a frame -- symbolic enum names in quotes remain in the
* current parser frame.
*
* Enum string values must accumulate so we can look up the value in a table
* once it is complete. */
multipart_startaccum(p);
return true;
} else {
@ -726,7 +752,7 @@ static bool end_stringval(upb_json_parser *p) {
p->accumulated, p->accumulated_len)) {
return false;
}
// Fall through.
/* Fall through. */
case UPB_TYPE_STRING: {
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
@ -736,7 +762,7 @@ static bool end_stringval(upb_json_parser *p) {
}
case UPB_TYPE_ENUM: {
// Resolve enum symbolic name to integer value.
/* Resolve enum symbolic name to integer value. */
const upb_enumdef *enumdef =
(const upb_enumdef*)upb_fielddef_subdef(p->top->f);
@ -773,18 +799,18 @@ static void start_member(upb_json_parser *p) {
multipart_startaccum(p);
}
// Helper: invoked during parse_mapentry() to emit the mapentry message's key
// field based on the current contents of the accumulate buffer.
/* Helper: invoked during parse_mapentry() to emit the mapentry message's key
* field based on the current contents of the accumulate buffer. */
static bool parse_mapentry_key(upb_json_parser *p) {
size_t len;
const char *buf = accumulate_getptr(p, &len);
// Emit the key field. We do a bit of ad-hoc parsing here because the
// parser state machine has already decided that this is a string field
// name, and we are reinterpreting it as some arbitrary key type. In
// particular, integer and bool keys are quoted, so we need to parse the
// quoted string contents here.
/* Emit the key field. We do a bit of ad-hoc parsing here because the
* parser state machine has already decided that this is a string field
* name, and we are reinterpreting it as some arbitrary key type. In
* particular, integer and bool keys are quoted, so we need to parse the
* quoted string contents here. */
p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_KEY);
if (p->top->f == NULL) {
@ -796,7 +822,7 @@ static bool parse_mapentry_key(upb_json_parser *p) {
case UPB_TYPE_INT64:
case UPB_TYPE_UINT32:
case UPB_TYPE_UINT64:
// Invoke end_number. The accum buffer has the number's text already.
/* Invoke end_number. The accum buffer has the number's text already. */
if (!parse_number(p)) {
return false;
}
@ -837,47 +863,52 @@ static bool parse_mapentry_key(upb_json_parser *p) {
return true;
}
// Helper: emit one map entry (as a submessage in the map field sequence). This
// is invoked from end_membername(), at the end of the map entry's key string,
// with the map key in the accumulate buffer. It parses the key from that
// buffer, emits the handler calls to start the mapentry submessage (setting up
// its subframe in the process), and sets up state in the subframe so that the
// value parser (invoked next) will emit the mapentry's value field and then
// end the mapentry message.
/* Helper: emit one map entry (as a submessage in the map field sequence). This
* is invoked from end_membername(), at the end of the map entry's key string,
* with the map key in the accumulate buffer. It parses the key from that
* buffer, emits the handler calls to start the mapentry submessage (setting up
* its subframe in the process), and sets up state in the subframe so that the
* value parser (invoked next) will emit the mapentry's value field and then
* end the mapentry message. */
static bool handle_mapentry(upb_json_parser *p) {
// Map entry: p->top->sink is the seq frame, so we need to start a frame
// for the mapentry itself, and then set |f| in that frame so that the map
// value field is parsed, and also set a flag to end the frame after the
// map-entry value is parsed.
const upb_fielddef *mapfield;
const upb_msgdef *mapentrymsg;
upb_jsonparser_frame *inner;
upb_selector_t sel;
/* Map entry: p->top->sink is the seq frame, so we need to start a frame
* for the mapentry itself, and then set |f| in that frame so that the map
* value field is parsed, and also set a flag to end the frame after the
* map-entry value is parsed. */
if (!check_stack(p)) return false;
const upb_fielddef *mapfield = p->top->mapfield;
const upb_msgdef *mapentrymsg = upb_fielddef_msgsubdef(mapfield);
mapfield = p->top->mapfield;
mapentrymsg = upb_fielddef_msgsubdef(mapfield);
upb_jsonparser_frame *inner = p->top + 1;
inner = p->top + 1;
p->top->f = mapfield;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
inner->m = mapentrymsg;
inner->mapfield = mapfield;
inner->is_map = false;
// Don't set this to true *yet* -- we reuse parsing handlers below to push
// the key field value to the sink, and these handlers will pop the frame
// if they see is_mapentry (when invoked by the parser state machine, they
// would have just seen the map-entry value, not key).
/* Don't set this to true *yet* -- we reuse parsing handlers below to push
* the key field value to the sink, and these handlers will pop the frame
* if they see is_mapentry (when invoked by the parser state machine, they
* would have just seen the map-entry value, not key). */
inner->is_mapentry = false;
p->top = inner;
// send STARTMSG in submsg frame.
/* send STARTMSG in submsg frame. */
upb_sink_startmsg(&p->top->sink);
parse_mapentry_key(p);
// Set up the value field to receive the map-entry value.
/* Set up the value field to receive the map-entry value. */
p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_VALUE);
p->top->is_mapentry = true; // set up to pop frame after value is parsed.
p->top->is_mapentry = true; /* set up to pop frame after value is parsed. */
p->top->mapfield = mapfield;
if (p->top->f == NULL) {
upb_status_seterrmsg(p->status, "mapentry message has no value");
@ -898,7 +929,8 @@ static bool end_membername(upb_json_parser *p) {
const upb_fielddef *f = upb_msgdef_ntof(p->top->m, buf, len);
if (!f) {
// TODO(haberman): Ignore unknown fields if requested/configured to do so.
/* TODO(haberman): Ignore unknown fields if requested/configured to do
* so. */
upb_status_seterrf(p->status, "No such field: %.*s\n", (int)len, buf);
return false;
}
@ -911,19 +943,21 @@ static bool end_membername(upb_json_parser *p) {
}
static void end_member(upb_json_parser *p) {
// If we just parsed a map-entry value, end that frame too.
/* If we just parsed a map-entry value, end that frame too. */
if (p->top->is_mapentry) {
assert(p->top > p->stack);
// send ENDMSG on submsg.
upb_status s = UPB_STATUS_INIT;
upb_selector_t sel;
bool ok;
const upb_fielddef *mapfield;
assert(p->top > p->stack);
/* send ENDMSG on submsg. */
upb_sink_endmsg(&p->top->sink, &s);
const upb_fielddef* mapfield = p->top->mapfield;
mapfield = p->top->mapfield;
// send ENDSUBMSG in repeated-field-of-mapentries frame.
/* send ENDSUBMSG in repeated-field-of-mapentries frame. */
p->top--;
upb_selector_t sel;
bool ok = upb_handlers_getselector(mapfield,
UPB_HANDLER_ENDSUBMSG, &sel);
ok = upb_handlers_getselector(mapfield, UPB_HANDLER_ENDSUBMSG, &sel);
UPB_ASSERT_VAR(ok, ok);
upb_sink_endsubmsg(&p->top->sink, sel);
}
@ -935,12 +969,15 @@ static bool start_subobject(upb_json_parser *p) {
assert(p->top->f);
if (upb_fielddef_ismap(p->top->f)) {
// Beginning of a map. Start a new parser frame in a repeated-field
// context.
upb_jsonparser_frame *inner;
upb_selector_t sel;
/* Beginning of a map. Start a new parser frame in a repeated-field
* context. */
if (!check_stack(p)) return false;
upb_jsonparser_frame *inner = p->top + 1;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
inner = p->top + 1;
sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
upb_sink_startseq(&p->top->sink, sel, &inner->sink);
inner->m = upb_fielddef_msgsubdef(p->top->f);
inner->mapfield = p->top->f;
@ -951,13 +988,16 @@ static bool start_subobject(upb_json_parser *p) {
return true;
} else if (upb_fielddef_issubmsg(p->top->f)) {
// Beginning of a subobject. Start a new parser frame in the submsg
// context.
upb_jsonparser_frame *inner;
upb_selector_t sel;
/* Beginning of a subobject. Start a new parser frame in the submsg
* context. */
if (!check_stack(p)) return false;
upb_jsonparser_frame *inner = p->top + 1;
inner = p->top + 1;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
inner->m = upb_fielddef_msgsubdef(p->top->f);
inner->f = NULL;
@ -976,17 +1016,22 @@ static bool start_subobject(upb_json_parser *p) {
static void end_subobject(upb_json_parser *p) {
if (p->top->is_map) {
upb_selector_t sel;
p->top--;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
upb_sink_endseq(&p->top->sink, sel);
} else {
upb_selector_t sel;
p->top--;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG);
sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG);
upb_sink_endsubmsg(&p->top->sink, sel);
}
}
static bool start_array(upb_json_parser *p) {
upb_jsonparser_frame *inner;
upb_selector_t sel;
assert(p->top->f);
if (!upb_fielddef_isseq(p->top->f)) {
@ -998,8 +1043,8 @@ static bool start_array(upb_json_parser *p) {
if (!check_stack(p)) return false;
upb_jsonparser_frame *inner = p->top + 1;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
inner = p->top + 1;
sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
upb_sink_startseq(&p->top->sink, sel, &inner->sink);
inner->m = p->top->m;
inner->f = p->top->f;
@ -1011,10 +1056,12 @@ static bool start_array(upb_json_parser *p) {
}
static void end_array(upb_json_parser *p) {
upb_selector_t sel;
assert(p->top > p->stack);
p->top--;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
upb_sink_endseq(&p->top->sink, sel);
}
@ -1037,20 +1084,20 @@ static void end_object(upb_json_parser *p) {
/* The actual parser **********************************************************/
// What follows is the Ragel parser itself. The language is specified in Ragel
// and the actions call our C functions above.
//
// Ragel has an extensive set of functionality, and we use only a small part of
// it. There are many action types but we only use a few:
//
// ">" -- transition into a machine
// "%" -- transition out of a machine
// "@" -- transition into a final state of a machine.
//
// "@" transitions are tricky because a machine can transition into a final
// state repeatedly. But in some cases we know this can't happen, for example
// a string which is delimited by a final '"' can only transition into its
// final state once, when the closing '"' is seen.
/* What follows is the Ragel parser itself. The language is specified in Ragel
* and the actions call our C functions above.
*
* Ragel has an extensive set of functionality, and we use only a small part of
* it. There are many action types but we only use a few:
*
* ">" -- transition into a machine
* "%" -- transition out of a machine
* "@" -- transition into a final state of a machine.
*
* "@" transitions are tricky because a machine can transition into a final
* state repeatedly. But in some cases we know this can't happen, for example
* a string which is delimited by a final '"' can only transition into its
* final state once, when the closing '"' is seen. */
%%{
machine json;
@ -1154,12 +1201,9 @@ static void end_object(upb_json_parser *p) {
size_t parse(void *closure, const void *hd, const char *buf, size_t size,
const upb_bufhandle *handle) {
UPB_UNUSED(hd);
UPB_UNUSED(handle);
upb_json_parser *parser = closure;
parser->handle = handle;
// Variables used by Ragel's generated code.
/* Variables used by Ragel's generated code. */
int cs = parser->current_state;
int *stack = parser->parser_stack;
int top = parser->parser_top;
@ -1167,6 +1211,11 @@ size_t parse(void *closure, const void *hd, const char *buf, size_t size,
const char *p = buf;
const char *pe = buf + size;
parser->handle = handle;
UPB_UNUSED(hd);
UPB_UNUSED(handle);
capture_resume(parser, buf);
%% write exec;
@ -1178,7 +1227,7 @@ size_t parse(void *closure, const void *hd, const char *buf, size_t size,
}
error:
// Save parsing state back to parser.
/* Save parsing state back to parser. */
parser->current_state = cs;
parser->parser_top = top;
@ -1189,7 +1238,7 @@ bool end(void *closure, const void *hd) {
UPB_UNUSED(closure);
UPB_UNUSED(hd);
// Prevent compile warning on unused static constants.
/* Prevent compile warning on unused static constants. */
UPB_UNUSED(json_start);
UPB_UNUSED(json_en_number_machine);
UPB_UNUSED(json_en_string_machine);
@ -1199,14 +1248,15 @@ bool end(void *closure, const void *hd) {
}
static void json_parser_reset(upb_json_parser *p) {
int cs;
int top;
p->top = p->stack;
p->top->f = NULL;
p->top->is_map = false;
p->top->is_mapentry = false;
int cs;
int top;
// Emit Ragel initialization of the parser.
/* Emit Ragel initialization of the parser. */
%% write init;
p->current_state = cs;
p->parser_top = top;
@ -1239,8 +1289,8 @@ upb_json_parser *upb_json_parser_create(upb_env *env, upb_sink *output) {
upb_sink_reset(&p->top->sink, output->handlers, output->closure);
p->top->m = upb_handlers_msgdef(output->handlers);
// If this fails, uncomment and increase the value in parser.h.
// fprintf(stderr, "%zd\n", upb_env_bytesallocated(env) - size_before);
/* If this fails, uncomment and increase the value in parser.h.
* fprintf(stderr, "%zd\n", upb_env_bytesallocated(env) - size_before); */
assert(upb_env_bytesallocated(env) - size_before <= UPB_JSON_PARSER_SIZE);
return p;
}

@ -17,26 +17,26 @@
struct upb_json_printer {
upb_sink input_;
// BytesSink closure.
/* BytesSink closure. */
void *subc_;
upb_bytessink *output_;
// We track the depth so that we know when to emit startstr/endstr on the
// output.
/* We track the depth so that we know when to emit startstr/endstr on the
* output. */
int depth_;
// Have we emitted the first element? This state is necessary to emit commas
// without leaving a trailing comma in arrays/maps. We keep this state per
// frame depth.
//
// Why max_depth * 2? UPB_MAX_HANDLER_DEPTH counts depth as nested messages.
// We count frames (contexts in which we separate elements by commas) as both
// repeated fields and messages (maps), and the worst case is a
// message->repeated field->submessage->repeated field->... nesting.
/* Have we emitted the first element? This state is necessary to emit commas
* without leaving a trailing comma in arrays/maps. We keep this state per
* frame depth.
*
* Why max_depth * 2? UPB_MAX_HANDLER_DEPTH counts depth as nested messages.
* We count frames (contexts in which we separate elements by commas) as both
* repeated fields and messages (maps), and the worst case is a
* message->repeated field->submessage->repeated field->... nesting. */
bool first_elem_[UPB_MAX_HANDLER_DEPTH * 2];
};
// StringPiece; a pointer plus a length.
/* StringPiece; a pointer plus a length. */
typedef struct {
const char *ptr;
size_t len;
@ -50,11 +50,11 @@ strpc *newstrpc(upb_handlers *h, const upb_fielddef *f) {
return ret;
}
// ------------ JSON string printing: values, maps, arrays --------------------
/* ------------ JSON string printing: values, maps, arrays ------------------ */
static void print_data(
upb_json_printer *p, const char *buf, unsigned int len) {
// TODO: Will need to change if we support pushback from the sink.
/* TODO: Will need to change if we support pushback from the sink. */
size_t n = upb_bytessink_putbuf(p->output_, p->subc_, buf, len, NULL);
UPB_ASSERT_VAR(n, n == len);
}
@ -66,18 +66,18 @@ static void print_comma(upb_json_printer *p) {
p->first_elem_[p->depth_] = false;
}
// Helpers that print properly formatted elements to the JSON output stream.
/* Helpers that print properly formatted elements to the JSON output stream. */
// Used for escaping control chars in strings.
/* Used for escaping control chars in strings. */
static const char kControlCharLimit = 0x20;
static inline bool is_json_escaped(char c) {
// See RFC 4627.
UPB_INLINE bool is_json_escaped(char c) {
/* See RFC 4627. */
unsigned char uc = (unsigned char)c;
return uc < kControlCharLimit || uc == '"' || uc == '\\';
}
static inline char* json_nice_escape(char c) {
UPB_INLINE char* json_nice_escape(char c) {
switch (c) {
case '"': return "\\\"";
case '\\': return "\\\\";
@ -90,46 +90,47 @@ static inline char* json_nice_escape(char c) {
}
}
// Write a properly escaped string chunk. The surrounding quotes are *not*
// printed; this is so that the caller has the option of emitting the string
// content in chunks.
/* Write a properly escaped string chunk. The surrounding quotes are *not*
* printed; this is so that the caller has the option of emitting the string
* content in chunks. */
static void putstring(upb_json_printer *p, const char *buf, unsigned int len) {
const char* unescaped_run = NULL;
for (unsigned int i = 0; i < len; i++) {
unsigned int i;
for (i = 0; i < len; i++) {
char c = buf[i];
// Handle escaping.
/* Handle escaping. */
if (is_json_escaped(c)) {
// Use a "nice" escape, like \n, if one exists for this character.
/* Use a "nice" escape, like \n, if one exists for this character. */
const char* escape = json_nice_escape(c);
// If we don't have a specific 'nice' escape code, use a \uXXXX-style
// escape.
/* If we don't have a specific 'nice' escape code, use a \uXXXX-style
* escape. */
char escape_buf[8];
if (!escape) {
unsigned char byte = (unsigned char)c;
snprintf(escape_buf, sizeof(escape_buf), "\\u%04x", (int)byte);
_upb_snprintf(escape_buf, sizeof(escape_buf), "\\u%04x", (int)byte);
escape = escape_buf;
}
// N.B. that we assume that the input encoding is equal to the output
// encoding (both UTF-8 for now), so for chars >= 0x20 and != \, ", we
// can simply pass the bytes through.
/* N.B. that we assume that the input encoding is equal to the output
* encoding (both UTF-8 for now), so for chars >= 0x20 and != \, ", we
* can simply pass the bytes through. */
// If there's a current run of unescaped chars, print that run first.
/* If there's a current run of unescaped chars, print that run first. */
if (unescaped_run) {
print_data(p, unescaped_run, &buf[i] - unescaped_run);
unescaped_run = NULL;
}
// Then print the escape code.
/* Then print the escape code. */
print_data(p, escape, strlen(escape));
} else {
// Add to the current unescaped run of characters.
/* Add to the current unescaped run of characters. */
if (unescaped_run == NULL) {
unescaped_run = &buf[i];
}
}
}
// If the string ended in a run of unescaped characters, print that last run.
/* If the string ended in a run of unescaped characters, print that last run. */
if (unescaped_run) {
print_data(p, unescaped_run, &buf[len] - unescaped_run);
}
@ -137,42 +138,42 @@ static void putstring(upb_json_printer *p, const char *buf, unsigned int len) {
#define CHKLENGTH(x) if (!(x)) return -1;
// Helpers that format floating point values according to our custom formats.
// Right now we use %.8g and %.17g for float/double, respectively, to match
// proto2::util::JsonFormat's defaults. May want to change this later.
/* Helpers that format floating point values according to our custom formats.
* Right now we use %.8g and %.17g for float/double, respectively, to match
* proto2::util::JsonFormat's defaults. May want to change this later. */
static size_t fmt_double(double val, char* buf, size_t length) {
size_t n = snprintf(buf, length, "%.17g", val);
size_t n = _upb_snprintf(buf, length, "%.17g", val);
CHKLENGTH(n > 0 && n < length);
return n;
}
static size_t fmt_float(float val, char* buf, size_t length) {
size_t n = snprintf(buf, length, "%.8g", val);
size_t n = _upb_snprintf(buf, length, "%.8g", val);
CHKLENGTH(n > 0 && n < length);
return n;
}
static size_t fmt_bool(bool val, char* buf, size_t length) {
size_t n = snprintf(buf, length, "%s", (val ? "true" : "false"));
size_t n = _upb_snprintf(buf, length, "%s", (val ? "true" : "false"));
CHKLENGTH(n > 0 && n < length);
return n;
}
static size_t fmt_int64(long val, char* buf, size_t length) {
size_t n = snprintf(buf, length, "%ld", val);
size_t n = _upb_snprintf(buf, length, "%ld", val);
CHKLENGTH(n > 0 && n < length);
return n;
}
static size_t fmt_uint64(unsigned long long val, char* buf, size_t length) {
size_t n = snprintf(buf, length, "%llu", val);
size_t n = _upb_snprintf(buf, length, "%llu", val);
CHKLENGTH(n > 0 && n < length);
return n;
}
// Print a map key given a field name. Called by scalar field handlers and by
// startseq for repeated fields.
/* Print a map key given a field name. Called by scalar field handlers and by
* startseq for repeated fields. */
static bool putkey(void *closure, const void *handler_data) {
upb_json_printer *p = closure;
const strpc *key = handler_data;
@ -189,9 +190,9 @@ static bool putkey(void *closure, const void *handler_data) {
#define TYPE_HANDLERS(type, fmt_func) \
static bool put##type(void *closure, const void *handler_data, type val) { \
upb_json_printer *p = closure; \
UPB_UNUSED(handler_data); \
char data[64]; \
size_t length = fmt_func(val, data, sizeof(data)); \
UPB_UNUSED(handler_data); \
CHKFMT(length); \
print_data(p, data, length); \
return true; \
@ -220,20 +221,20 @@ static bool putkey(void *closure, const void *handler_data) {
return true; \
}
TYPE_HANDLERS(double, fmt_double);
TYPE_HANDLERS(float, fmt_float);
TYPE_HANDLERS(bool, fmt_bool);
TYPE_HANDLERS(int32_t, fmt_int64);
TYPE_HANDLERS(uint32_t, fmt_int64);
TYPE_HANDLERS(int64_t, fmt_int64);
TYPE_HANDLERS(uint64_t, fmt_uint64);
// double and float are not allowed to be map keys.
TYPE_HANDLERS_MAPKEY(bool, fmt_bool);
TYPE_HANDLERS_MAPKEY(int32_t, fmt_int64);
TYPE_HANDLERS_MAPKEY(uint32_t, fmt_int64);
TYPE_HANDLERS_MAPKEY(int64_t, fmt_int64);
TYPE_HANDLERS_MAPKEY(uint64_t, fmt_uint64);
TYPE_HANDLERS(double, fmt_double)
TYPE_HANDLERS(float, fmt_float)
TYPE_HANDLERS(bool, fmt_bool)
TYPE_HANDLERS(int32_t, fmt_int64)
TYPE_HANDLERS(uint32_t, fmt_int64)
TYPE_HANDLERS(int64_t, fmt_int64)
TYPE_HANDLERS(uint64_t, fmt_uint64)
/* double and float are not allowed to be map keys. */
TYPE_HANDLERS_MAPKEY(bool, fmt_bool)
TYPE_HANDLERS_MAPKEY(int32_t, fmt_int64)
TYPE_HANDLERS_MAPKEY(uint32_t, fmt_int64)
TYPE_HANDLERS_MAPKEY(int64_t, fmt_int64)
TYPE_HANDLERS_MAPKEY(uint64_t, fmt_uint64)
#undef TYPE_HANDLERS
#undef TYPE_HANDLERS_MAPKEY
@ -247,9 +248,11 @@ static bool scalar_enum(void *closure, const void *handler_data,
int32_t val) {
const EnumHandlerData *hd = handler_data;
upb_json_printer *p = closure;
const char *symbolic_name;
CHK(putkey(closure, hd->keyname));
const char *symbolic_name = upb_enumdef_iton(hd->enumdef, val);
symbolic_name = upb_enumdef_iton(hd->enumdef, val);
if (symbolic_name) {
print_data(p, "\"", 1);
putstring(p, symbolic_name, strlen(symbolic_name));
@ -300,8 +303,8 @@ static void *scalar_startsubmsg(void *closure, const void *handler_data) {
}
static void *repeated_startsubmsg(void *closure, const void *handler_data) {
UPB_UNUSED(handler_data);
upb_json_printer *p = closure;
UPB_UNUSED(handler_data);
print_comma(p);
return closure;
}
@ -318,8 +321,8 @@ static void end_frame(upb_json_printer *p) {
}
static bool printer_startmsg(void *closure, const void *handler_data) {
UPB_UNUSED(handler_data);
upb_json_printer *p = closure;
UPB_UNUSED(handler_data);
if (p->depth_ == 0) {
upb_bytessink_start(p->output_, 0, &p->subc_);
}
@ -328,9 +331,9 @@ static bool printer_startmsg(void *closure, const void *handler_data) {
}
static bool printer_endmsg(void *closure, const void *handler_data, upb_status *s) {
upb_json_printer *p = closure;
UPB_UNUSED(handler_data);
UPB_UNUSED(s);
upb_json_printer *p = closure;
end_frame(p);
if (p->depth_ == 0) {
upb_bytessink_end(p->output_);
@ -348,8 +351,8 @@ static void *startseq(void *closure, const void *handler_data) {
}
static bool endseq(void *closure, const void *handler_data) {
UPB_UNUSED(handler_data);
upb_json_printer *p = closure;
UPB_UNUSED(handler_data);
print_data(p, "]", 1);
p->depth_--;
return true;
@ -365,8 +368,8 @@ static void *startmap(void *closure, const void *handler_data) {
}
static bool endmap(void *closure, const void *handler_data) {
UPB_UNUSED(handler_data);
upb_json_printer *p = closure;
UPB_UNUSED(handler_data);
print_data(p, "}", 1);
p->depth_--;
return true;
@ -374,32 +377,35 @@ static bool endmap(void *closure, const void *handler_data) {
static size_t putstr(void *closure, const void *handler_data, const char *str,
size_t len, const upb_bufhandle *handle) {
upb_json_printer *p = closure;
UPB_UNUSED(handler_data);
UPB_UNUSED(handle);
upb_json_printer *p = closure;
putstring(p, str, len);
return len;
}
// This has to Base64 encode the bytes, because JSON has no "bytes" type.
/* This has to Base64 encode the bytes, because JSON has no "bytes" type. */
static size_t putbytes(void *closure, const void *handler_data, const char *str,
size_t len, const upb_bufhandle *handle) {
UPB_UNUSED(handler_data);
UPB_UNUSED(handle);
upb_json_printer *p = closure;
// This is the regular base64, not the "web-safe" version.
/* This is the regular base64, not the "web-safe" version. */
static const char base64[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
// Base64-encode.
/* Base64-encode. */
char data[16000];
const char *limit = data + sizeof(data);
const unsigned char *from = (const unsigned char*)str;
char *to = data;
size_t remaining = len;
size_t bytes;
UPB_UNUSED(handler_data);
UPB_UNUSED(handle);
while (remaining > 2) {
// TODO(haberman): handle encoded lengths > sizeof(data)
/* TODO(haberman): handle encoded lengths > sizeof(data) */
UPB_ASSERT_VAR(limit, (limit - to) >= 4);
to[0] = base64[from[0] >> 2];
@ -431,7 +437,7 @@ static size_t putbytes(void *closure, const void *handler_data, const char *str,
break;
}
size_t bytes = to - data;
bytes = to - data;
print_data(p, "\"", 1);
putstring(p, data, bytes);
print_data(p, "\"", 1);
@ -440,9 +446,9 @@ static size_t putbytes(void *closure, const void *handler_data, const char *str,
static void *scalar_startstr(void *closure, const void *handler_data,
size_t size_hint) {
upb_json_printer *p = closure;
UPB_UNUSED(handler_data);
UPB_UNUSED(size_hint);
upb_json_printer *p = closure;
CHK(putkey(closure, handler_data));
print_data(p, "\"", 1);
return p;
@ -456,17 +462,17 @@ static size_t scalar_str(void *closure, const void *handler_data,
}
static bool scalar_endstr(void *closure, const void *handler_data) {
UPB_UNUSED(handler_data);
upb_json_printer *p = closure;
UPB_UNUSED(handler_data);
print_data(p, "\"", 1);
return true;
}
static void *repeated_startstr(void *closure, const void *handler_data,
size_t size_hint) {
upb_json_printer *p = closure;
UPB_UNUSED(handler_data);
UPB_UNUSED(size_hint);
upb_json_printer *p = closure;
print_comma(p);
print_data(p, "\"", 1);
return p;
@ -480,17 +486,17 @@ static size_t repeated_str(void *closure, const void *handler_data,
}
static bool repeated_endstr(void *closure, const void *handler_data) {
UPB_UNUSED(handler_data);
upb_json_printer *p = closure;
UPB_UNUSED(handler_data);
print_data(p, "\"", 1);
return true;
}
static void *mapkeyval_startstr(void *closure, const void *handler_data,
size_t size_hint) {
upb_json_printer *p = closure;
UPB_UNUSED(handler_data);
UPB_UNUSED(size_hint);
upb_json_printer *p = closure;
print_data(p, "\"", 1);
return p;
}
@ -503,15 +509,15 @@ static size_t mapkey_str(void *closure, const void *handler_data,
}
static bool mapkey_endstr(void *closure, const void *handler_data) {
UPB_UNUSED(handler_data);
upb_json_printer *p = closure;
UPB_UNUSED(handler_data);
print_data(p, "\":", 2);
return true;
}
static bool mapvalue_endstr(void *closure, const void *handler_data) {
UPB_UNUSED(handler_data);
upb_json_printer *p = closure;
UPB_UNUSED(handler_data);
print_data(p, "\"", 1);
return true;
}
@ -552,30 +558,31 @@ static void set_enum_hd(upb_handlers *h,
upb_handlerattr_sethandlerdata(attr, hd);
}
// Set up handlers for a mapentry submessage (i.e., an individual key/value pair
// in a map).
//
// TODO: Handle missing key, missing value, out-of-order key/value, or repeated
// key or value cases properly. The right way to do this is to allocate a
// temporary structure at the start of a mapentry submessage, store key and
// value data in it as key and value handlers are called, and then print the
// key/value pair once at the end of the submessage. If we don't do this, we
// should at least detect the case and throw an error. However, so far all of
// our sources that emit mapentry messages do so canonically (with one key
// field, and then one value field), so this is not a pressing concern at the
// moment.
/* Set up handlers for a mapentry submessage (i.e., an individual key/value pair
* in a map).
*
* TODO: Handle missing key, missing value, out-of-order key/value, or repeated
* key or value cases properly. The right way to do this is to allocate a
* temporary structure at the start of a mapentry submessage, store key and
* value data in it as key and value handlers are called, and then print the
* key/value pair once at the end of the submessage. If we don't do this, we
* should at least detect the case and throw an error. However, so far all of
* our sources that emit mapentry messages do so canonically (with one key
* field, and then one value field), so this is not a pressing concern at the
* moment. */
void printer_sethandlers_mapentry(const void *closure, upb_handlers *h) {
UPB_UNUSED(closure);
const upb_msgdef *md = upb_handlers_msgdef(h);
// A mapentry message is printed simply as '"key": value'. Rather than
// special-case key and value for every type below, we just handle both
// fields explicitly here.
/* A mapentry message is printed simply as '"key": value'. Rather than
* special-case key and value for every type below, we just handle both
* fields explicitly here. */
const upb_fielddef* key_field = upb_msgdef_itof(md, UPB_MAPENTRY_KEY);
const upb_fielddef* value_field = upb_msgdef_itof(md, UPB_MAPENTRY_VALUE);
upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER;
UPB_UNUSED(closure);
switch (upb_fielddef_type(key_field)) {
case UPB_TYPE_INT32:
upb_handlers_setint32(h, key_field, putmapkey_int32_t, &empty_attr);
@ -643,8 +650,8 @@ void printer_sethandlers_mapentry(const void *closure, upb_handlers *h) {
break;
}
case UPB_TYPE_MESSAGE:
// No handler necessary -- the submsg handlers will print the message
// as appropriate.
/* No handler necessary -- the submsg handlers will print the message
* as appropriate. */
break;
}
@ -652,14 +659,16 @@ void printer_sethandlers_mapentry(const void *closure, upb_handlers *h) {
}
void printer_sethandlers(const void *closure, upb_handlers *h) {
UPB_UNUSED(closure);
const upb_msgdef *md = upb_handlers_msgdef(h);
bool is_mapentry = upb_msgdef_mapentry(md);
upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER;
upb_msg_field_iter i;
UPB_UNUSED(closure);
if (is_mapentry) {
// mapentry messages are sufficiently different that we handle them
// separately.
/* mapentry messages are sufficiently different that we handle them
* separately. */
printer_sethandlers_mapentry(closure, h);
return;
}
@ -676,7 +685,6 @@ void printer_sethandlers(const void *closure, upb_handlers *h) {
} \
break;
upb_msg_field_iter i;
upb_msg_field_begin(&i, md);
for(; !upb_msg_field_done(&i); upb_msg_field_next(&i)) {
const upb_fielddef *f = upb_msg_iter_field(&i);
@ -701,9 +709,9 @@ void printer_sethandlers(const void *closure, upb_handlers *h) {
TYPE(UPB_TYPE_INT64, int64, int64_t);
TYPE(UPB_TYPE_UINT64, uint64, uint64_t);
case UPB_TYPE_ENUM: {
// For now, we always emit symbolic names for enums. We may want an
// option later to control this behavior, but we will wait for a real
// need first.
/* For now, we always emit symbolic names for enums. We may want an
* option later to control this behavior, but we will wait for a real
* need first. */
upb_handlerattr enum_attr = UPB_HANDLERATTR_INITIALIZER;
set_enum_hd(h, f, &enum_attr);
@ -728,8 +736,8 @@ void printer_sethandlers(const void *closure, upb_handlers *h) {
}
break;
case UPB_TYPE_BYTES:
// XXX: this doesn't support strings that span buffers yet. The base64
// encoder will need to be made resumable for this to work properly.
/* XXX: this doesn't support strings that span buffers yet. The base64
* encoder will need to be made resumable for this to work properly. */
if (upb_fielddef_isseq(f)) {
upb_handlers_setstring(h, f, repeated_bytes, &empty_attr);
} else {
@ -772,7 +780,7 @@ upb_json_printer *upb_json_printer_create(upb_env *e, const upb_handlers *h,
json_printer_reset(p);
upb_sink_reset(&p->input_, h, p);
// If this fails, increase the value in printer.h.
/* If this fails, increase the value in printer.h. */
assert(upb_env_bytesallocated(e) - size_before <= UPB_JSON_PRINTER_SIZE);
return p;
}

@ -18,11 +18,11 @@
namespace upb {
namespace json {
class Printer;
} // namespace json
} // namespace upb
} /* namespace json */
} /* namespace upb */
#endif
UPB_DECLARE_TYPE(upb::json::Printer, upb_json_printer);
UPB_DECLARE_TYPE(upb::json::Printer, upb_json_printer)
/* upb::json::Printer *********************************************************/
@ -31,29 +31,29 @@ UPB_DECLARE_TYPE(upb::json::Printer, upb_json_printer);
#ifdef __cplusplus
// Prints an incoming stream of data to a BytesSink in JSON format.
/* Prints an incoming stream of data to a BytesSink in JSON format. */
class upb::json::Printer {
public:
static Printer* Create(Environment* env, const upb::Handlers* handlers,
BytesSink* output);
// The input to the printer.
/* The input to the printer. */
Sink* input();
// Returns handlers for printing according to the specified schema.
/* Returns handlers for printing according to the specified schema. */
static reffed_ptr<const Handlers> NewHandlers(const upb::MessageDef* md);
static const size_t kSize = UPB_JSON_PRINTER_SIZE;
private:
UPB_DISALLOW_POD_OPS(Printer, upb::json::Printer);
UPB_DISALLOW_POD_OPS(Printer, upb::json::Printer)
};
#endif
UPB_BEGIN_EXTERN_C
// Native C API.
/* Native C API. */
upb_json_printer *upb_json_printer_create(upb_env *e, const upb_handlers *h,
upb_bytessink *output);
upb_sink *upb_json_printer_input(upb_json_printer *p);
@ -76,9 +76,9 @@ inline reffed_ptr<const Handlers> Printer::NewHandlers(
const Handlers* h = upb_json_printer_newhandlers(md, &h);
return reffed_ptr<const Handlers>(h, &h);
}
} // namespace json
} // namespace upb
} /* namespace json */
} /* namespace upb */
#endif
#endif // UPB_JSON_TYPED_PRINTER_H_
#endif /* UPB_JSON_TYPED_PRINTER_H_ */

@ -45,14 +45,14 @@ static void visitgroup(const upb_refcounted *r, upb_refcounted_visit *visit,
upb_inttable_begin(&i, &g->methods);
for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i));
visit(r, UPB_UPCAST(method), closure);
visit(r, upb_pbdecodermethod_upcast(method), closure);
}
}
mgroup *newgroup(const void *owner) {
mgroup *g = malloc(sizeof(*g));
static const struct upb_refcounted_vtbl vtbl = {visitgroup, freegroup};
upb_refcounted_init(UPB_UPCAST(g), &vtbl, owner);
upb_refcounted_init(mgroup_upcast_mutable(g), &vtbl, owner);
upb_inttable_init(&g->methods, UPB_CTYPE_PTR);
g->bytecode = NULL;
g->bytecode_end = NULL;
@ -83,18 +83,18 @@ static upb_pbdecodermethod *newmethod(const upb_handlers *dest_handlers,
mgroup *group) {
static const struct upb_refcounted_vtbl vtbl = {visitmethod, freemethod};
upb_pbdecodermethod *ret = malloc(sizeof(*ret));
upb_refcounted_init(UPB_UPCAST(ret), &vtbl, &ret);
upb_refcounted_init(upb_pbdecodermethod_upcast_mutable(ret), &vtbl, &ret);
upb_byteshandler_init(&ret->input_handler_);
// The method references the group and vice-versa, in a circular reference.
/* The method references the group and vice-versa, in a circular reference. */
upb_ref2(ret, group);
upb_ref2(group, ret);
upb_inttable_insertptr(&group->methods, dest_handlers, upb_value_ptr(ret));
upb_refcounted_unref(UPB_UPCAST(ret), &ret);
upb_pbdecodermethod_unref(ret, &ret);
ret->group = UPB_UPCAST(group);
ret->group = mgroup_upcast_mutable(group);
ret->dest_handlers_ = dest_handlers;
ret->is_native_ = false; // If we JIT, it will update this later.
ret->is_native_ = false; /* If we JIT, it will update this later. */
upb_inttable_init(&ret->dispatch, UPB_CTYPE_UINT64);
if (ret->dest_handlers_) {
@ -103,25 +103,6 @@ static upb_pbdecodermethod *newmethod(const upb_handlers *dest_handlers,
return ret;
}
void upb_pbdecodermethod_ref(const upb_pbdecodermethod *m, const void *owner) {
upb_refcounted_ref(UPB_UPCAST(m), owner);
}
void upb_pbdecodermethod_unref(const upb_pbdecodermethod *m,
const void *owner) {
upb_refcounted_unref(UPB_UPCAST(m), owner);
}
void upb_pbdecodermethod_donateref(const upb_pbdecodermethod *m,
const void *from, const void *to) {
upb_refcounted_donateref(UPB_UPCAST(m), from, to);
}
void upb_pbdecodermethod_checkref(const upb_pbdecodermethod *m,
const void *owner) {
upb_refcounted_checkref(UPB_UPCAST(m), owner);
}
const upb_handlers *upb_pbdecodermethod_desthandlers(
const upb_pbdecodermethod *m) {
return m->dest_handlers_;
@ -138,10 +119,11 @@ bool upb_pbdecodermethod_isnative(const upb_pbdecodermethod *m) {
const upb_pbdecodermethod *upb_pbdecodermethod_new(
const upb_pbdecodermethodopts *opts, const void *owner) {
const upb_pbdecodermethod *ret;
upb_pbcodecache cache;
upb_pbcodecache_init(&cache);
const upb_pbdecodermethod *ret =
upb_pbcodecache_getdecodermethod(&cache, opts);
ret = upb_pbcodecache_getdecodermethod(&cache, opts);
upb_pbdecodermethod_ref(ret, owner);
upb_pbcodecache_uninit(&cache);
return ret;
@ -150,7 +132,7 @@ const upb_pbdecodermethod *upb_pbdecodermethod_new(
/* bytecode compiler **********************************************************/
// Data used only at compilation time.
/* Data used only at compilation time. */
typedef struct {
mgroup *group;
@ -158,15 +140,17 @@ typedef struct {
int fwd_labels[MAXLABEL];
int back_labels[MAXLABEL];
// For fields marked "lazy", parse them lazily or eagerly?
/* For fields marked "lazy", parse them lazily or eagerly? */
bool lazy;
} compiler;
static compiler *newcompiler(mgroup *group, bool lazy) {
compiler *ret = malloc(sizeof(*ret));
int i;
ret->group = group;
ret->lazy = lazy;
for (int i = 0; i < MAXLABEL; i++) {
for (i = 0; i < MAXLABEL; i++) {
ret->fwd_labels[i] = EMPTYLABEL;
ret->back_labels[i] = EMPTYLABEL;
}
@ -179,7 +163,7 @@ static void freecompiler(compiler *c) {
const size_t ptr_words = sizeof(void*) / sizeof(uint32_t);
// How many words an instruction is.
/* How many words an instruction is. */
static int instruction_len(uint32_t instr) {
switch (getop(instr)) {
case OP_SETDISPATCH: return 1 + ptr_words;
@ -195,8 +179,8 @@ bool op_has_longofs(int32_t instruction) {
case OP_BRANCH:
case OP_CHECKDELIM:
return true;
// The "tag" instructions only have 8 bytes available for the jump target,
// but that is ok because these opcodes only require short jumps.
/* The "tag" instructions only have 8 bytes available for the jump target,
* but that is ok because these opcodes only require short jumps. */
case OP_TAG1:
case OP_TAG2:
case OP_TAGN:
@ -221,18 +205,21 @@ static void setofs(uint32_t *instruction, int32_t ofs) {
} else {
*instruction = (*instruction & ~0xff00) | ((ofs & 0xff) << 8);
}
assert(getofs(*instruction) == ofs); // Would fail in cases of overflow.
assert(getofs(*instruction) == ofs); /* Would fail in cases of overflow. */
}
static uint32_t pcofs(compiler *c) { return c->pc - c->group->bytecode; }
// Defines a local label at the current PC location. All previous forward
// references are updated to point to this location. The location is noted
// for any future backward references.
/* Defines a local label at the current PC location. All previous forward
* references are updated to point to this location. The location is noted
* for any future backward references. */
static void label(compiler *c, unsigned int label) {
int val;
uint32_t *codep;
assert(label < MAXLABEL);
int val = c->fwd_labels[label];
uint32_t *codep = (val == EMPTYLABEL) ? NULL : c->group->bytecode + val;
val = c->fwd_labels[label];
codep = (val == EMPTYLABEL) ? NULL : c->group->bytecode + val;
while (codep) {
int ofs = getofs(*codep);
setofs(codep, c->pc - codep - instruction_len(*codep));
@ -242,24 +229,25 @@ static void label(compiler *c, unsigned int label) {
c->back_labels[label] = pcofs(c);
}
// Creates a reference to a numbered label; either a forward reference
// (positive arg) or backward reference (negative arg). For forward references
// the value returned now is actually a "next" pointer into a linked list of all
// instructions that use this label and will be patched later when the label is
// defined with label().
//
// The returned value is the offset that should be written into the instruction.
/* Creates a reference to a numbered label; either a forward reference
* (positive arg) or backward reference (negative arg). For forward references
* the value returned now is actually a "next" pointer into a linked list of all
* instructions that use this label and will be patched later when the label is
* defined with label().
*
* The returned value is the offset that should be written into the instruction.
*/
static int32_t labelref(compiler *c, int label) {
assert(label < MAXLABEL);
if (label == LABEL_DISPATCH) {
// No resolving required.
/* No resolving required. */
return 0;
} else if (label < 0) {
// Backward local label. Relative to the next instruction.
/* Backward local label. Relative to the next instruction. */
uint32_t from = (c->pc + 1) - c->group->bytecode;
return c->back_labels[-label] - from;
} else {
// Forward local label: prepend to (possibly-empty) linked list.
/* Forward local label: prepend to (possibly-empty) linked list. */
int *lptr = &c->fwd_labels[label];
int32_t ret = (*lptr == EMPTYLABEL) ? 0 : *lptr - pcofs(c);
*lptr = pcofs(c);
@ -273,7 +261,7 @@ static void put32(compiler *c, uint32_t v) {
int ofs = pcofs(c);
size_t oldsize = g->bytecode_end - g->bytecode;
size_t newsize = UPB_MAX(oldsize * 2, 64);
// TODO(haberman): handle OOM.
/* TODO(haberman): handle OOM. */
g->bytecode = realloc(g->bytecode, newsize * sizeof(uint32_t));
g->bytecode_end = g->bytecode + newsize;
c->pc = g->bytecode + ofs;
@ -372,19 +360,22 @@ static void putop(compiler *c, opcode op, ...) {
#if defined(UPB_USE_JIT_X64) || defined(UPB_DUMP_BYTECODE)
const char *upb_pbdecoder_getopname(unsigned int op) {
#define OP(op) [OP_ ## op] = "OP_" #op
#define T(op) OP(PARSE_##op)
static const char *names[] = {
"<no opcode>",
T(DOUBLE), T(FLOAT), T(INT64), T(UINT64), T(INT32), T(FIXED64), T(FIXED32),
T(BOOL), T(UINT32), T(SFIXED32), T(SFIXED64), T(SINT32), T(SINT64),
OP(STARTMSG), OP(ENDMSG), OP(STARTSEQ), OP(ENDSEQ), OP(STARTSUBMSG),
OP(ENDSUBMSG), OP(STARTSTR), OP(STRING), OP(ENDSTR), OP(CALL), OP(RET),
OP(PUSHLENDELIM), OP(PUSHTAGDELIM), OP(SETDELIM), OP(CHECKDELIM),
OP(BRANCH), OP(TAG1), OP(TAG2), OP(TAGN), OP(SETDISPATCH), OP(POP),
OP(SETBIGGROUPNUM), OP(DISPATCH), OP(HALT),
};
return op > OP_HALT ? names[0] : names[op];
#define QUOTE(x) #x
#define EXPAND_AND_QUOTE(x) QUOTE(x)
#define OPNAME(x) OP_##x
#define OP(x) case OPNAME(x): return EXPAND_AND_QUOTE(OPNAME(x));
#define T(x) OP(PARSE_##x)
/* Keep in sync with list in decoder.int.h. */
switch ((opcode)op) {
T(DOUBLE) T(FLOAT) T(INT64) T(UINT64) T(INT32) T(FIXED64) T(FIXED32)
T(BOOL) T(UINT32) T(SFIXED32) T(SFIXED64) T(SINT32) T(SINT64)
OP(STARTMSG) OP(ENDMSG) OP(STARTSEQ) OP(ENDSEQ) OP(STARTSUBMSG)
OP(ENDSUBMSG) OP(STARTSTR) OP(STRING) OP(ENDSTR) OP(CALL) OP(RET)
OP(PUSHLENDELIM) OP(PUSHTAGDELIM) OP(SETDELIM) OP(CHECKDELIM)
OP(BRANCH) OP(TAG1) OP(TAG2) OP(TAGN) OP(SETDISPATCH) OP(POP)
OP(SETBIGGROUPNUM) OP(DISPATCH) OP(HALT)
}
return "<unknown op>";
#undef OP
#undef T
}
@ -482,7 +473,7 @@ static void dumpbc(uint32_t *p, uint32_t *end, FILE *f) {
static uint64_t get_encoded_tag(const upb_fielddef *f, int wire_type) {
uint32_t tag = (upb_fielddef_number(f) << 3) | wire_type;
uint64_t encoded_tag = upb_vencode32(tag);
// No tag should be greater than 5 bytes.
/* No tag should be greater than 5 bytes. */
assert(encoded_tag <= 0xffffffffff);
return encoded_tag;
}
@ -510,29 +501,29 @@ static upb_selector_t getsel(const upb_fielddef *f, upb_handlertype_t type) {
return selector;
}
// Takes an existing, primary dispatch table entry and repacks it with a
// different alternate wire type. Called when we are inserting a secondary
// dispatch table entry for an alternate wire type.
/* Takes an existing, primary dispatch table entry and repacks it with a
* different alternate wire type. Called when we are inserting a secondary
* dispatch table entry for an alternate wire type. */
static uint64_t repack(uint64_t dispatch, int new_wt2) {
uint64_t ofs;
uint8_t wt1;
uint8_t old_wt2;
upb_pbdecoder_unpackdispatch(dispatch, &ofs, &wt1, &old_wt2);
assert(old_wt2 == NO_WIRE_TYPE); // wt2 should not be set yet.
assert(old_wt2 == NO_WIRE_TYPE); /* wt2 should not be set yet. */
return upb_pbdecoder_packdispatch(ofs, wt1, new_wt2);
}
// Marks the current bytecode position as the dispatch target for this message,
// field, and wire type.
/* Marks the current bytecode position as the dispatch target for this message,
* field, and wire type. */
static void dispatchtarget(compiler *c, upb_pbdecodermethod *method,
const upb_fielddef *f, int wire_type) {
// Offset is relative to msg base.
/* Offset is relative to msg base. */
uint64_t ofs = pcofs(c) - method->code_base.ofs;
uint32_t fn = upb_fielddef_number(f);
upb_inttable *d = &method->dispatch;
upb_value v;
if (upb_inttable_remove(d, fn, &v)) {
// TODO: prioritize based on packed setting in .proto file.
/* TODO: prioritize based on packed setting in .proto file. */
uint64_t repacked = repack(upb_value_getuint64(v), wire_type);
upb_inttable_insert(d, fn, upb_value_uint64(repacked));
upb_inttable_insert(d, fn + UPB_MAX_FIELDNUMBER, upb_value_uint64(ofs));
@ -574,8 +565,8 @@ static void putsel(compiler *c, opcode op, upb_selector_t sel,
}
}
// Puts an opcode to call a callback, but only if a callback actually exists for
// this field and handler type.
/* Puts an opcode to call a callback, but only if a callback actually exists for
* this field and handler type. */
static void maybeput(compiler *c, opcode op, const upb_handlers *h,
const upb_fielddef *f, upb_handlertype_t type) {
putsel(c, op, getsel(f, type), h);
@ -593,27 +584,28 @@ static bool haslazyhandlers(const upb_handlers *h, const upb_fielddef *f) {
/* bytecode compiler code generation ******************************************/
// Symbolic names for our local labels.
#define LABEL_LOOPSTART 1 // Top of a repeated field loop.
#define LABEL_LOOPBREAK 2 // To jump out of a repeated loop
#define LABEL_FIELD 3 // Jump backward to find the most recent field.
#define LABEL_ENDMSG 4 // To reach the OP_ENDMSG instr for this msg.
/* Symbolic names for our local labels. */
#define LABEL_LOOPSTART 1 /* Top of a repeated field loop. */
#define LABEL_LOOPBREAK 2 /* To jump out of a repeated loop */
#define LABEL_FIELD 3 /* Jump backward to find the most recent field. */
#define LABEL_ENDMSG 4 /* To reach the OP_ENDMSG instr for this msg. */
// Generates bytecode to parse a single non-lazy message field.
/* Generates bytecode to parse a single non-lazy message field. */
static void generate_msgfield(compiler *c, const upb_fielddef *f,
upb_pbdecodermethod *method) {
const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
const upb_pbdecodermethod *sub_m = find_submethod(c, method, f);
int wire_type;
if (!sub_m) {
// Don't emit any code for this field at all; it will be parsed as an
// unknown field.
/* Don't emit any code for this field at all; it will be parsed as an
* unknown field. */
return;
}
label(c, LABEL_FIELD);
int wire_type =
wire_type =
(upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE)
? UPB_WIRE_TYPE_DELIMITED
: UPB_WIRE_TYPE_START_GROUP;
@ -654,7 +646,7 @@ static void generate_msgfield(compiler *c, const upb_fielddef *f,
}
}
// Generates bytecode to parse a single string or lazy submessage field.
/* Generates bytecode to parse a single string or lazy submessage field. */
static void generate_delimfield(compiler *c, const upb_fielddef *f,
upb_pbdecodermethod *method) {
const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
@ -669,7 +661,7 @@ static void generate_delimfield(compiler *c, const upb_fielddef *f,
label(c, LABEL_LOOPSTART);
putop(c, OP_PUSHLENDELIM);
putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
// Need to emit even if no handler to skip past the string.
/* Need to emit even if no handler to skip past the string. */
putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
putop(c, OP_POP);
maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
@ -693,49 +685,52 @@ static void generate_delimfield(compiler *c, const upb_fielddef *f,
}
}
// Generates bytecode to parse a single primitive field.
/* Generates bytecode to parse a single primitive field. */
static void generate_primitivefield(compiler *c, const upb_fielddef *f,
upb_pbdecodermethod *method) {
label(c, LABEL_FIELD);
const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
upb_descriptortype_t descriptor_type = upb_fielddef_descriptortype(f);
opcode parse_type;
upb_selector_t sel;
int wire_type;
label(c, LABEL_FIELD);
// From a decoding perspective, ENUM is the same as INT32.
/* From a decoding perspective, ENUM is the same as INT32. */
if (descriptor_type == UPB_DESCRIPTOR_TYPE_ENUM)
descriptor_type = UPB_DESCRIPTOR_TYPE_INT32;
opcode parse_type = (opcode)descriptor_type;
parse_type = (opcode)descriptor_type;
// TODO(haberman): generate packed or non-packed first depending on "packed"
// setting in the fielddef. This will favor (in speed) whichever was
// specified.
/* TODO(haberman): generate packed or non-packed first depending on "packed"
* setting in the fielddef. This will favor (in speed) whichever was
* specified. */
assert((int)parse_type >= 0 && parse_type <= OP_MAX);
upb_selector_t sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
int wire_type = upb_pb_native_wire_types[upb_fielddef_descriptortype(f)];
sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
wire_type = upb_pb_native_wire_types[upb_fielddef_descriptortype(f)];
if (upb_fielddef_isseq(f)) {
putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
putop(c, OP_PUSHLENDELIM);
putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); // Packed
putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); /* Packed */
label(c, LABEL_LOOPSTART);
putop(c, parse_type, sel);
putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
putop(c, OP_BRANCH, -LABEL_LOOPSTART);
dispatchtarget(c, method, f, wire_type);
putop(c, OP_PUSHTAGDELIM, 0);
putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); // Non-packed
putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); /* Non-packed */
label(c, LABEL_LOOPSTART);
putop(c, parse_type, sel);
putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
putop(c, OP_BRANCH, -LABEL_LOOPSTART);
label(c, LABEL_LOOPBREAK);
putop(c, OP_POP); // Packed and non-packed join.
putop(c, OP_POP); /* Packed and non-packed join. */
maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
putop(c, OP_SETDELIM); // Could remove for non-packed by dup ENDSEQ.
putop(c, OP_SETDELIM); /* Could remove for non-packed by dup ENDSEQ. */
} else {
putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
putchecktag(c, f, wire_type, LABEL_DISPATCH);
@ -744,24 +739,29 @@ static void generate_primitivefield(compiler *c, const upb_fielddef *f,
}
}
// Adds bytecode for parsing the given message to the given decoderplan,
// while adding all dispatch targets to this message's dispatch table.
/* Adds bytecode for parsing the given message to the given decoderplan,
* while adding all dispatch targets to this message's dispatch table. */
static void compile_method(compiler *c, upb_pbdecodermethod *method) {
const upb_handlers *h;
const upb_msgdef *md;
uint32_t* start_pc;
upb_msg_field_iter i;
upb_value val;
assert(method);
// Clear all entries in the dispatch table.
/* Clear all entries in the dispatch table. */
upb_inttable_uninit(&method->dispatch);
upb_inttable_init(&method->dispatch, UPB_CTYPE_UINT64);
const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
const upb_msgdef *md = upb_handlers_msgdef(h);
h = upb_pbdecodermethod_desthandlers(method);
md = upb_handlers_msgdef(h);
method->code_base.ofs = pcofs(c);
putop(c, OP_SETDISPATCH, &method->dispatch);
putsel(c, OP_STARTMSG, UPB_STARTMSG_SELECTOR, h);
label(c, LABEL_FIELD);
uint32_t* start_pc = c->pc;
upb_msg_field_iter i;
start_pc = c->pc;
for(upb_msg_field_begin(&i, md);
!upb_msg_field_done(&i);
upb_msg_field_next(&i)) {
@ -778,23 +778,23 @@ static void compile_method(compiler *c, upb_pbdecodermethod *method) {
}
}
// If there were no fields, or if no handlers were defined, we need to
// generate a non-empty loop body so that we can at least dispatch for unknown
// fields and check for the end of the message.
/* If there were no fields, or if no handlers were defined, we need to
* generate a non-empty loop body so that we can at least dispatch for unknown
* fields and check for the end of the message. */
if (c->pc == start_pc) {
// Check for end-of-message.
/* Check for end-of-message. */
putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
// Unconditionally dispatch.
/* Unconditionally dispatch. */
putop(c, OP_DISPATCH, 0);
}
// For now we just loop back to the last field of the message (or if none,
// the DISPATCH opcode for the message).
/* For now we just loop back to the last field of the message (or if none,
* the DISPATCH opcode for the message). */
putop(c, OP_BRANCH, -LABEL_FIELD);
// Insert both a label and a dispatch table entry for this end-of-msg.
/* Insert both a label and a dispatch table entry for this end-of-msg. */
label(c, LABEL_ENDMSG);
upb_value val = upb_value_uint64(pcofs(c) - method->code_base.ofs);
val = upb_value_uint64(pcofs(c) - method->code_base.ofs);
upb_inttable_insert(&method->dispatch, DISPATCH_ENDMSG, val);
putsel(c, OP_ENDMSG, UPB_ENDMSG_SELECTOR, h);
@ -803,19 +803,21 @@ static void compile_method(compiler *c, upb_pbdecodermethod *method) {
upb_inttable_compact(&method->dispatch);
}
// Populate "methods" with new upb_pbdecodermethod objects reachable from "h".
// Returns the method for these handlers.
//
// Generates a new method for every destination handlers reachable from "h".
/* Populate "methods" with new upb_pbdecodermethod objects reachable from "h".
* Returns the method for these handlers.
*
* Generates a new method for every destination handlers reachable from "h". */
static void find_methods(compiler *c, const upb_handlers *h) {
upb_value v;
upb_msg_field_iter i;
const upb_msgdef *md;
if (upb_inttable_lookupptr(&c->group->methods, h, &v))
return;
newmethod(h, c->group);
// Find submethods.
upb_msg_field_iter i;
const upb_msgdef *md = upb_handlers_msgdef(h);
/* Find submethods. */
md = upb_handlers_msgdef(h);
for(upb_msg_field_begin(&i, md);
!upb_msg_field_done(&i);
upb_msg_field_next(&i)) {
@ -823,20 +825,21 @@ static void find_methods(compiler *c, const upb_handlers *h) {
const upb_handlers *sub_h;
if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE &&
(sub_h = upb_handlers_getsubhandlers(h, f)) != NULL) {
// We only generate a decoder method for submessages with handlers.
// Others will be parsed as unknown fields.
/* We only generate a decoder method for submessages with handlers.
* Others will be parsed as unknown fields. */
find_methods(c, sub_h);
}
}
}
// (Re-)compile bytecode for all messages in "msgs."
// Overwrites any existing bytecode in "c".
/* (Re-)compile bytecode for all messages in "msgs."
* Overwrites any existing bytecode in "c". */
static void compile_methods(compiler *c) {
// Start over at the beginning of the bytecode.
upb_inttable_iter i;
/* Start over at the beginning of the bytecode. */
c->pc = c->group->bytecode;
upb_inttable_iter i;
upb_inttable_begin(&i, &c->group->methods);
for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i));
@ -849,10 +852,10 @@ static void set_bytecode_handlers(mgroup *g) {
upb_inttable_begin(&i, &g->methods);
for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
upb_pbdecodermethod *m = upb_value_getptr(upb_inttable_iter_value(&i));
upb_byteshandler *h = &m->input_handler_;
m->code_base.ptr = g->bytecode + m->code_base.ofs;
upb_byteshandler *h = &m->input_handler_;
upb_byteshandler_setstartstr(h, upb_pbdecoder_startbc, m->code_base.ptr);
upb_byteshandler_setstring(h, upb_pbdecoder_decode, g);
upb_byteshandler_setendstr(h, upb_pbdecoder_end, m);
@ -867,53 +870,58 @@ static void set_bytecode_handlers(mgroup *g) {
static void sethandlers(mgroup *g, bool allowjit) {
g->jit_code = NULL;
if (allowjit) {
// Compile byte-code into machine code, create handlers.
/* Compile byte-code into machine code, create handlers. */
upb_pbdecoder_jit(g);
} else {
set_bytecode_handlers(g);
}
}
#else // UPB_USE_JIT_X64
#else /* UPB_USE_JIT_X64 */
static void sethandlers(mgroup *g, bool allowjit) {
// No JIT compiled in; use bytecode handlers unconditionally.
/* No JIT compiled in; use bytecode handlers unconditionally. */
UPB_UNUSED(allowjit);
set_bytecode_handlers(g);
}
#endif // UPB_USE_JIT_X64
#endif /* UPB_USE_JIT_X64 */
// TODO(haberman): allow this to be constructed for an arbitrary set of dest
// handlers and other mgroups (but verify we have a transitive closure).
/* TODO(haberman): allow this to be constructed for an arbitrary set of dest
* handlers and other mgroups (but verify we have a transitive closure). */
const mgroup *mgroup_new(const upb_handlers *dest, bool allowjit, bool lazy,
const void *owner) {
mgroup *g;
compiler *c;
UPB_UNUSED(allowjit);
assert(upb_handlers_isfrozen(dest));
mgroup *g = newgroup(owner);
compiler *c = newcompiler(g, lazy);
g = newgroup(owner);
c = newcompiler(g, lazy);
find_methods(c, dest);
// We compile in two passes:
// 1. all messages are assigned relative offsets from the beginning of the
// bytecode (saved in method->code_base).
// 2. forwards OP_CALL instructions can be correctly linked since message
// offsets have been previously assigned.
//
// Could avoid the second pass by linking OP_CALL instructions somehow.
/* We compile in two passes:
* 1. all messages are assigned relative offsets from the beginning of the
* bytecode (saved in method->code_base).
* 2. forwards OP_CALL instructions can be correctly linked since message
* offsets have been previously assigned.
*
* Could avoid the second pass by linking OP_CALL instructions somehow. */
compile_methods(c);
compile_methods(c);
g->bytecode_end = c->pc;
freecompiler(c);
#ifdef UPB_DUMP_BYTECODE
FILE *f = fopen("/tmp/upb-bytecode", "wb");
assert(f);
dumpbc(g->bytecode, g->bytecode_end, stderr);
dumpbc(g->bytecode, g->bytecode_end, f);
fclose(f);
{
FILE *f = fopen("/tmp/upb-bytecode", "wb");
assert(f);
dumpbc(g->bytecode, g->bytecode_end, stderr);
dumpbc(g->bytecode, g->bytecode_end, f);
fclose(f);
}
#endif
sethandlers(g, allowjit);
@ -933,7 +941,7 @@ void upb_pbcodecache_uninit(upb_pbcodecache *c) {
upb_inttable_begin(&i, &c->groups);
for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
const mgroup *group = upb_value_getconstptr(upb_inttable_iter_value(&i));
upb_refcounted_unref(UPB_UPCAST(group), c);
mgroup_unref(group, c);
}
upb_inttable_uninit(&c->groups);
}
@ -951,13 +959,15 @@ bool upb_pbcodecache_setallowjit(upb_pbcodecache *c, bool allow) {
const upb_pbdecodermethod *upb_pbcodecache_getdecodermethod(
upb_pbcodecache *c, const upb_pbdecodermethodopts *opts) {
// Right now we build a new DecoderMethod every time.
// TODO(haberman): properly cache methods by their true key.
upb_value v;
bool ok;
/* Right now we build a new DecoderMethod every time.
* TODO(haberman): properly cache methods by their true key. */
const mgroup *g = mgroup_new(opts->handlers, c->allow_jit_, opts->lazy, c);
upb_inttable_push(&c->groups, upb_value_constptr(g));
upb_value v;
bool ok = upb_inttable_lookupptr(&g->methods, opts->handlers, &v);
ok = upb_inttable_lookupptr(&g->methods, opts->handlers, &v);
UPB_ASSERT_VAR(ok, ok);
return upb_value_getptr(v);
}

@ -7,7 +7,7 @@
* Driver code for the x64 JIT compiler.
*/
// Needed to ensure we get defines like MAP_ANON.
/* Needed to ensure we get defines like MAP_ANON. */
#define _GNU_SOURCE
#include <dlfcn.h>
@ -19,50 +19,50 @@
#include "upb/pb/varint.int.h"
#include "upb/shim/shim.h"
// To debug the JIT:
//
// 1. Uncomment:
// #define UPB_JIT_LOAD_SO
//
// Note: this mode requires that we can shell out to gcc.
//
// 2. Run the test locally. This will load the JIT code by building a
// .so (/tmp/upb-jit-code.so) and using dlopen, so more of the tooling will
// work properly (like GDB).
//
// IF YOU ALSO WANT AUTOMATIC JIT DEBUG OUTPUT:
//
// 3. Run: upb/pb/make-gdb-script.rb > script.gdb. This reads
// /tmp/upb-jit-code.so as input and generates a GDB script that is specific
// to this jit code.
//
// 4. Run: gdb --command=script.gdb --args path/to/test
// This will drop you to a GDB prompt which you can now use normally.
// But when you run the test it will print a message to stdout every time
// the JIT executes assembly for a particular bytecode. Sample output:
//
// X.enterjit bytes=18
// buf_ofs=1 data_rem=17 delim_rem=-2 X.0x6.OP_PARSE_DOUBLE
// buf_ofs=9 data_rem=9 delim_rem=-10 X.0x7.OP_CHECKDELIM
// buf_ofs=9 data_rem=9 delim_rem=-10 X.0x8.OP_TAG1
// X.0x3.dispatch.DecoderTest
// X.parse_unknown
// X.0x3.dispatch.DecoderTest
// X.decode_unknown_tag_fallback
// X.exitjit
//
// This output should roughly correspond to the output that the bytecode
// interpreter emits when compiled with UPB_DUMP_BYTECODE (modulo some
// extra JIT-specific output).
// These defines are necessary for DynASM codegen.
// See dynasm/dasm_proto.h for more info.
/* To debug the JIT:
*
* 1. Uncomment:
* #define UPB_JIT_LOAD_SO
*
* Note: this mode requires that we can shell out to gcc.
*
* 2. Run the test locally. This will load the JIT code by building a
* .so (/tmp/upb-jit-code.so) and using dlopen, so more of the tooling will
* work properly (like GDB).
*
* IF YOU ALSO WANT AUTOMATIC JIT DEBUG OUTPUT:
*
* 3. Run: upb/pb/make-gdb-script.rb > script.gdb. This reads
* /tmp/upb-jit-code.so as input and generates a GDB script that is specific
* to this jit code.
*
* 4. Run: gdb --command=script.gdb --args path/to/test
* This will drop you to a GDB prompt which you can now use normally.
* But when you run the test it will print a message to stdout every time
* the JIT executes assembly for a particular bytecode. Sample output:
*
* X.enterjit bytes=18
* buf_ofs=1 data_rem=17 delim_rem=-2 X.0x6.OP_PARSE_DOUBLE
* buf_ofs=9 data_rem=9 delim_rem=-10 X.0x7.OP_CHECKDELIM
* buf_ofs=9 data_rem=9 delim_rem=-10 X.0x8.OP_TAG1
* X.0x3.dispatch.DecoderTest
* X.parse_unknown
* X.0x3.dispatch.DecoderTest
* X.decode_unknown_tag_fallback
* X.exitjit
*
* This output should roughly correspond to the output that the bytecode
* interpreter emits when compiled with UPB_DUMP_BYTECODE (modulo some
* extra JIT-specific output). */
/* These defines are necessary for DynASM codegen.
* See dynasm/dasm_proto.h for more info. */
#define Dst_DECL jitcompiler *jc
#define Dst_REF (jc->dynasm)
#define Dst (jc)
// In debug mode, make DynASM do internal checks (must be defined before any
// dasm header is included.
/* In debug mode, make DynASM do internal checks (must be defined before any
* dasm header is included. */
#ifndef NDEBUG
#define DASM_CHECKS
#endif
@ -75,49 +75,49 @@ typedef struct {
mgroup *group;
uint32_t *pc;
// This pointer is allocated by dasm_init() and freed by dasm_free().
/* This pointer is allocated by dasm_init() and freed by dasm_free(). */
struct dasm_State *dynasm;
// Maps some key (an arbitrary void*) to a pclabel.
//
// The pclabel represents a location in the generated code -- DynASM exposes
// a pclabel -> (machine code offset) lookup function.
//
// The key can be anything. There are two main kinds of keys:
// - bytecode location -- the void* points to the bytecode instruction
// itself. We can then use this to generate jumps to this instruction.
// - other object (like dispatch table). We use these to represent parts
// of the generated code that do not exactly correspond to a bytecode
// instruction.
upb_inttable jmptargets;
/* Maps some key (an arbitrary void*) to a pclabel.
*
* The pclabel represents a location in the generated code -- DynASM exposes
* a pclabel -> (machine code offset) lookup function.
*
* The key can be anything. There are two main kinds of keys:
* - bytecode location -- the void* points to the bytecode instruction
* itself. We can then use this to generate jumps to this instruction.
* - other object (like dispatch table). We use these to represent parts
* of the generated code that do not exactly correspond to a bytecode
* instruction. */
upb_inttable jmptargets;
#ifndef NDEBUG
// Like jmptargets, but members are present in the table when they have had
// define_jmptarget() (as opposed to jmptarget) called. Used to verify that
// define_jmptarget() is called exactly once for every target.
// The value is ignored.
/* Like jmptargets, but members are present in the table when they have had
* define_jmptarget() (as opposed to jmptarget) called. Used to verify that
* define_jmptarget() is called exactly once for every target.
* The value is ignored. */
upb_inttable jmpdefined;
// For checking that two asmlabels aren't defined for the same byte.
/* For checking that two asmlabels aren't defined for the same byte. */
int lastlabelofs;
#endif
#ifdef UPB_JIT_LOAD_SO
// For marking labels that should go into the generated code.
// Maps pclabel -> char* label (string is owned by the table).
/* For marking labels that should go into the generated code.
* Maps pclabel -> char* label (string is owned by the table). */
upb_inttable asmlabels;
#endif
// The total number of pclabels currently defined.
// Note that this contains both jmptargets and asmlabels, which both use
// pclabels but for different purposes.
/* The total number of pclabels currently defined.
* Note that this contains both jmptargets and asmlabels, which both use
* pclabels but for different purposes. */
uint32_t pclabel_count;
// Used by DynASM to store globals.
/* Used by DynASM to store globals. */
void **globals;
} jitcompiler;
// Functions called by codegen.
/* Functions called by codegen. */
static int jmptarget(jitcompiler *jc, const void *key);
static int define_jmptarget(jitcompiler *jc, const void *key);
static void asmlabel(jitcompiler *jc, const char *fmt, ...);
@ -174,21 +174,21 @@ static void freejitcompiler(jitcompiler *jc) {
#ifdef UPB_JIT_LOAD_SO
// Like sprintf except allocates the string, which is returned and owned by the
// caller.
//
// Like the GNU extension asprintf(), except we abort on error (since this is
// only for debugging).
/* Like sprintf except allocates the string, which is returned and owned by the
* caller.
*
* Like the GNU extension asprintf(), except we abort on error (since this is
* only for debugging). */
static char *upb_vasprintf(const char *fmt, va_list args) {
// Run once to get the length of the string.
/* Run once to get the length of the string. */
va_list args_copy;
va_copy(args_copy, args);
int len = vsnprintf(NULL, 0, fmt, args_copy);
int len = _upb_vsnprintf(NULL, 0, fmt, args_copy);
va_end(args_copy);
char *ret = malloc(len + 1); // + 1 for NULL terminator.
char *ret = malloc(len + 1); /* + 1 for NULL terminator. */
if (!ret) abort();
int written = vsnprintf(ret, len + 1, fmt, args);
int written = _upb_vsnprintf(ret, len + 1, fmt, args);
UPB_ASSERT_VAR(written, written == len);
return ret;
@ -220,23 +220,26 @@ static bool try_getjmptarget(jitcompiler *jc, const void *key, int *pclabel) {
}
}
// Gets the pclabel for this bytecode location's jmptarget. Requires that the
// jmptarget() has been previously defined.
/* Gets the pclabel for this bytecode location's jmptarget. Requires that the
* jmptarget() has been previously defined. */
static int getjmptarget(jitcompiler *jc, const void *key) {
int pclabel = 0;
bool ok;
assert(upb_inttable_lookupptr(&jc->jmpdefined, key, NULL));
bool ok = try_getjmptarget(jc, key, &pclabel);
ok = try_getjmptarget(jc, key, &pclabel);
UPB_ASSERT_VAR(ok, ok);
return pclabel;
}
// Returns a pclabel that serves as a jmp target for the given bytecode pointer.
// This should only be called for code that is jumping to the target; code
// defining the target should use define_jmptarget().
//
// Creates/allocates a pclabel for this target if one does not exist already.
/* Returns a pclabel that serves as a jmp target for the given bytecode pointer.
* This should only be called for code that is jumping to the target; code
* defining the target should use define_jmptarget().
*
* Creates/allocates a pclabel for this target if one does not exist already. */
static int jmptarget(jitcompiler *jc, const void *key) {
int pclabel;
// Optimizer sometimes can't figure out that initializing this is unnecessary.
int pclabel = 0;
if (!try_getjmptarget(jc, key, &pclabel)) {
pclabel = alloc_pclabel(jc);
upb_inttable_insertptr(&jc->jmptargets, key, upb_value_uint32(pclabel));
@ -244,12 +247,12 @@ static int jmptarget(jitcompiler *jc, const void *key) {
return pclabel;
}
// Defines a pclabel associated with the given bytecode location.
// Must be called exactly once by the code that is generating the code for this
// bytecode.
//
// Must be called exactly once before bytecode generation is complete (this is a
// sanity check to make sure the label is defined exactly once).
/* Defines a pclabel associated with the given bytecode location.
* Must be called exactly once by the code that is generating the code for this
* bytecode.
*
* Must be called exactly once before bytecode generation is complete (this is a
* sanity check to make sure the label is defined exactly once). */
static int define_jmptarget(jitcompiler *jc, const void *key) {
#ifndef NDEBUG
upb_inttable_insertptr(&jc->jmpdefined, key, upb_value_bool(true));
@ -257,115 +260,121 @@ static int define_jmptarget(jitcompiler *jc, const void *key) {
return jmptarget(jc, key);
}
// Returns a bytecode pc offset relative to the beginning of the group's code.
/* Returns a bytecode pc offset relative to the beginning of the group's
* code. */
static int pcofs(jitcompiler *jc) {
return jc->pc - jc->group->bytecode;
}
// Returns a machine code offset corresponding to the given key.
// Requires that this key was defined with define_jmptarget.
/* Returns a machine code offset corresponding to the given key.
* Requires that this key was defined with define_jmptarget. */
static int machine_code_ofs(jitcompiler *jc, const void *key) {
int pclabel = getjmptarget(jc, key);
// Despite its name, this function takes a pclabel and returns the
// corresponding machine code offset.
/* Despite its name, this function takes a pclabel and returns the
* corresponding machine code offset. */
return dasm_getpclabel(jc, pclabel);
}
// Returns a machine code offset corresponding to the given method-relative
// bytecode offset. Note that the bytecode offset is relative to the given
// method, but the returned machine code offset is relative to the beginning of
// *all* the machine code.
/* Returns a machine code offset corresponding to the given method-relative
* bytecode offset. Note that the bytecode offset is relative to the given
* method, but the returned machine code offset is relative to the beginning of
* *all* the machine code. */
static int machine_code_ofs2(jitcompiler *jc, const upb_pbdecodermethod *method,
int pcofs) {
void *bc_target = jc->group->bytecode + method->code_base.ofs + pcofs;
return machine_code_ofs(jc, bc_target);
}
// Given a pcofs relative to this method's base, returns a machine code offset
// relative to jmptarget(dispatch->array) (which is used in jitdispatch as the
// machine code base for dispatch table lookups).
/* Given a pcofs relative to this method's base, returns a machine code offset
* relative to jmptarget(dispatch->array) (which is used in jitdispatch as the
* machine code base for dispatch table lookups). */
uint32_t dispatchofs(jitcompiler *jc, const upb_pbdecodermethod *method,
int pcofs) {
int mc_base = machine_code_ofs(jc, method->dispatch.array);
int mc_target = machine_code_ofs2(jc, method, pcofs);
int ret;
assert(mc_base > 0);
assert(mc_target > 0);
int ret = mc_target - mc_base;
ret = mc_target - mc_base;
assert(ret > 0);
return ret;
}
// Rewrites the dispatch tables into machine code offsets.
/* Rewrites the dispatch tables into machine code offsets. */
static void patchdispatch(jitcompiler *jc) {
upb_inttable_iter i;
upb_inttable_begin(&i, &jc->group->methods);
for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i));
method->is_native_ = true;
upb_inttable *dispatch = &method->dispatch;
upb_inttable_iter i2;
method->is_native_ = true;
// Remove DISPATCH_ENDMSG -- only the bytecode interpreter needs it.
// And leaving it around will cause us to find field 0 improperly.
/* Remove DISPATCH_ENDMSG -- only the bytecode interpreter needs it.
* And leaving it around will cause us to find field 0 improperly. */
upb_inttable_remove(dispatch, DISPATCH_ENDMSG, NULL);
upb_inttable_iter i2;
upb_inttable_begin(&i2, dispatch);
for (; !upb_inttable_done(&i2); upb_inttable_next(&i2)) {
uintptr_t key = upb_inttable_iter_key(&i2);
uint64_t val = upb_value_getuint64(upb_inttable_iter_value(&i2));
uint64_t newval;
bool ok;
if (key <= UPB_MAX_FIELDNUMBER) {
// Primary slot.
/* Primary slot. */
uint64_t ofs;
uint8_t wt1;
uint8_t wt2;
upb_pbdecoder_unpackdispatch(val, &ofs, &wt1, &wt2);
// Update offset and repack.
/* Update offset and repack. */
ofs = dispatchofs(jc, method, ofs);
newval = upb_pbdecoder_packdispatch(ofs, wt1, wt2);
assert((int64_t)newval > 0);
} else {
// Secondary slot. Since we have 64 bits for the value, we use an
// absolute offset.
/* Secondary slot. Since we have 64 bits for the value, we use an
* absolute offset. */
int mcofs = machine_code_ofs2(jc, method, val);
newval = (uint64_t)((char*)jc->group->jit_code + mcofs);
}
bool ok = upb_inttable_replace(dispatch, key, upb_value_uint64(newval));
ok = upb_inttable_replace(dispatch, key, upb_value_uint64(newval));
UPB_ASSERT_VAR(ok, ok);
}
// Update entry point for this method to point at mc base instead of bc
// base. Set this only *after* we have patched the offsets
// (machine_code_ofs2() uses this).
/* Update entry point for this method to point at mc base instead of bc
* base. Set this only *after* we have patched the offsets
* (machine_code_ofs2() uses this). */
method->code_base.ptr = (char*)jc->group->jit_code + machine_code_ofs(jc, method);
upb_byteshandler *h = &method->input_handler_;
upb_byteshandler_setstartstr(h, upb_pbdecoder_startjit, NULL);
upb_byteshandler_setstring(h, jc->group->jit_code, method->code_base.ptr);
upb_byteshandler_setendstr(h, upb_pbdecoder_end, method);
{
upb_byteshandler *h = &method->input_handler_;
upb_byteshandler_setstartstr(h, upb_pbdecoder_startjit, NULL);
upb_byteshandler_setstring(h, jc->group->jit_code, method->code_base.ptr);
upb_byteshandler_setendstr(h, upb_pbdecoder_end, method);
}
}
}
#ifdef UPB_JIT_LOAD_SO
static void load_so(jitcompiler *jc) {
// Dump to a .so file in /tmp and load that, so all the tooling works right
// (for example, debuggers and profilers will see symbol names for the JIT-ted
// code). This is the same goal of the GDB JIT code below, but the GDB JIT
// interface is only used/understood by GDB. Hopefully a standard will
// develop for registering JIT-ted code that all tools will recognize,
// rendering this obsolete.
// jc->asmlabels maps:
// pclabel -> char* label
//
// Use this to build mclabels, which maps:
// machine code offset -> char* label
//
// Then we can use mclabels to emit the labels as we iterate over the bytes we
// are outputting.
/* Dump to a .so file in /tmp and load that, so all the tooling works right
* (for example, debuggers and profilers will see symbol names for the JIT-ted
* code). This is the same goal of the GDB JIT code below, but the GDB JIT
* interface is only used/understood by GDB. Hopefully a standard will
* develop for registering JIT-ted code that all tools will recognize,
* rendering this obsolete.
*
* jc->asmlabels maps:
* pclabel -> char* label
*
* Use this to build mclabels, which maps:
* machine code offset -> char* label
*
* Then we can use mclabels to emit the labels as we iterate over the bytes we
* are outputting. */
upb_inttable_iter i;
upb_inttable mclabels;
upb_inttable_init(&mclabels, UPB_CTYPE_PTR);
@ -376,25 +385,26 @@ static void load_so(jitcompiler *jc) {
upb_inttable_iter_value(&i));
}
// We write a .s file in text format, as input to the assembler.
// Then we run gcc to turn it into a .so file.
//
// The last "XXXXXX" will be replaced with something randomly generated by
// mkstmemp(). We don't add ".s" to this filename because it makes the string
// processing for mkstemp() and system() more complicated.
/* We write a .s file in text format, as input to the assembler.
* Then we run gcc to turn it into a .so file.
*
* The last "XXXXXX" will be replaced with something randomly generated by
* mkstmemp(). We don't add ".s" to this filename because it makes the string
* processing for mkstemp() and system() more complicated. */
char s_filename[] = "/tmp/upb-jit-codeXXXXXX";
int fd = mkstemp(s_filename);
FILE *f;
if (fd >= 0 && (f = fdopen(fd, "wb")) != NULL) {
uint8_t *jit_code = (uint8_t*)jc->group->jit_code;
fputs(" .text\n\n", f);
size_t linelen = 0;
for (size_t i = 0; i < jc->group->jit_size; i++) {
size_t i;
fputs(" .text\n\n", f);
for (i = 0; i < jc->group->jit_size; i++) {
upb_value v;
if (upb_inttable_lookup(&mclabels, i, &v)) {
const char *label = upb_value_getptr(v);
// "X." makes our JIT syms recognizable as such, which we build into
// other tooling.
/* "X." makes our JIT syms recognizable as such, which we build into
* other tooling. */
fprintf(f, "\n\nX.%s:\n", label);
fprintf(f, " .globl X.%s", label);
linelen = 1000;
@ -412,10 +422,10 @@ static void load_so(jitcompiler *jc) {
abort();
}
// This is exploitable if you have an adversary on your machine who can write
// to this tmp directory. But this is just for debugging so we don't worry
// too much about that. It shouldn't be prone to races against concurrent
// (non-adversarial) upb JIT's because we used mkstemp().
/* This is exploitable if you have an adversary on your machine who can write
* to this tmp directory. But this is just for debugging so we don't worry
* too much about that. It shouldn't be prone to races against concurrent
* (non-adversarial) upb JIT's because we used mkstemp(). */
char *cmd = upb_asprintf("gcc -shared -o %s.so -x assembler %s", s_filename,
s_filename);
if (system(cmd) != 0) {
@ -426,12 +436,14 @@ static void load_so(jitcompiler *jc) {
char *so_filename = upb_asprintf("%s.so", s_filename);
// Some convenience symlinks.
// This is racy, but just for convenience.
/* Some convenience symlinks.
* This is racy, but just for convenience. */
int ret;
unlink("/tmp/upb-jit-code.so");
unlink("/tmp/upb-jit-code.s");
symlink(s_filename, "/tmp/upb-jit-code.s");
symlink(so_filename, "/tmp/upb-jit-code.so");
ret = symlink(s_filename, "/tmp/upb-jit-code.s");
ret = symlink(so_filename, "/tmp/upb-jit-code.so");
UPB_UNUSED(ret); // We don't care if this fails.
jc->group->dl = dlopen(so_filename, RTLD_LAZY);
free(so_filename);
@ -453,22 +465,26 @@ static void load_so(jitcompiler *jc) {
#endif
void upb_pbdecoder_jit(mgroup *group) {
jitcompiler *jc;
char *jit_code;
int dasm_status;
group->debug_info = NULL;
group->dl = NULL;
assert(group->bytecode);
jitcompiler *jc = newjitcompiler(group);
jc = newjitcompiler(group);
emit_static_asm(jc);
jitbytecode(jc);
int dasm_status = dasm_link(jc, &jc->group->jit_size);
dasm_status = dasm_link(jc, &jc->group->jit_size);
if (dasm_status != DASM_S_OK) {
fprintf(stderr, "DynASM error; returned status: 0x%08x\n", dasm_status);
abort();
}
char *jit_code = mmap(NULL, jc->group->jit_size, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
jit_code = mmap(NULL, jc->group->jit_size, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
dasm_encode(jc, jit_code);
mprotect(jit_code, jc->group->jit_size, PROT_EXEC | PROT_READ);
jc->group->jit_code = (upb_string_handlerfunc *)jit_code;
@ -481,7 +497,7 @@ void upb_pbdecoder_jit(mgroup *group) {
freejitcompiler(jc);
// Now the bytecode is no longer needed.
/* Now the bytecode is no longer needed. */
free(group->bytecode);
group->bytecode = NULL;
}
@ -493,7 +509,7 @@ void upb_pbdecoder_freejit(mgroup *group) {
dlclose(group->dl);
#endif
} else {
munmap(group->jit_code, group->jit_size);
munmap((void*)group->jit_code, group->jit_size);
}
free(group->debug_info);
}

@ -143,13 +143,13 @@ static upb_func *gethandler(const upb_handlers *h, upb_selector_t sel) {
return h ? upb_handlers_gethandler(h, sel) : NULL;
}
// Defines an "assembly label" for the current code generation offset.
// This label exists *purely* for debugging purposes: it is emitted into
// the .so, and printed as part of JIT debugging output when UPB_JIT_LOAD_SO is
// defined.
//
// We would define this in the .c file except that it conditionally defines a
// pclabel.
/* Defines an "assembly label" for the current code generation offset.
* This label exists *purely* for debugging purposes: it is emitted into
* the .so, and printed as part of JIT debugging output when UPB_JIT_LOAD_SO is
* defined.
*
* We would define this in the .c file except that it conditionally defines a
* pclabel. */
static void asmlabel(jitcompiler *jc, const char *fmt, ...) {
#ifndef NDEBUG
int ofs = jc->dynasm->section->ofs;
@ -167,37 +167,39 @@ static void asmlabel(jitcompiler *jc, const char *fmt, ...) {
va_end(args);
int pclabel = alloc_pclabel(jc);
// Normally we would prefer to allocate this inline with the codegen,
// ie.
// |=>asmlabel(...)
// But since we do this conditionally, only when UPB_JIT_LOAD_SO is defined,
// we do it here instead.
/* Normally we would prefer to allocate this inline with the codegen,
* ie.
* |=>asmlabel(...)
* But since we do this conditionally, only when UPB_JIT_LOAD_SO is defined,
* we do it here instead. */
|=>pclabel:
upb_inttable_insert(&jc->asmlabels, pclabel, upb_value_ptr(str));
#endif
}
// Should only be called when the associated handler is known to exist.
/* Should only be called when the associated handler is known to exist. */
static bool alwaysok(const upb_handlers *h, upb_selector_t sel) {
upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
bool ok = upb_handlers_getattr(h, sel, &attr);
bool ret;
UPB_ASSERT_VAR(ok, ok);
bool ret = upb_handlerattr_alwaysok(&attr);
ret = upb_handlerattr_alwaysok(&attr);
upb_handlerattr_uninit(&attr);
return ret;
}
// Emit static assembly routines; code that does not vary based on the message
// schema. Since it's not input-dependent, we only need one single copy of it.
// For the moment we generate a single copy per generated handlers. Eventually
// we should generate this code at compile time and link it into the binary so
// we have one copy total. To do that we'll want to be sure that it is within
// 2GB of our JIT code, so that branches between the two are near (rel32).
//
// We'd put this assembly in a .s file directly, but DynASM's ability to
// calculate structure offsets automatically is too useful to pass up (it's way
// more convenient to write DECODER->sink than [rbx + 0x96], especially since
// the latter would have to be changed whenever the structure is updated).
/* Emit static assembly routines; code that does not vary based on the message
* schema. Since it's not input-dependent, we only need one single copy of it.
* For the moment we generate a single copy per generated handlers. Eventually
* we should generate this code at compile time and link it into the binary so
* we have one copy total. To do that we'll want to be sure that it is within
* 2GB of our JIT code, so that branches between the two are near (rel32).
*
* We'd put this assembly in a .s file directly, but DynASM's ability to
* calculate structure offsets automatically is too useful to pass up (it's way
* more convenient to write DECODER->sink than [rbx + 0x96], especially since
* the latter would have to be changed whenever the structure is updated). */
static void emit_static_asm(jitcompiler *jc) {
| // Trampolines for entering/exiting the JIT. These are a bit tricky to
| // support full resuming; when we suspend we copy the JIT's portion of
@ -526,15 +528,17 @@ static void jitprimitive(jitcompiler *jc, opcode op,
X, F64, F32, V64, V64, V32, F64, F32, V64, X, X, X, X, V32, V32, F32, F64,
V32, V64 };
static char fastpath_bytes[] = { 1, 1, 4, 8 };
const valtype_t type = types[op];
const int fastbytes = fastpath_bytes[type];
const valtype_t vtype = types[op];
const int fastbytes = fastpath_bytes[vtype];
upb_func *handler = gethandler(h, sel);
upb_fieldtype_t ftype;
const upb_shim_data *data;
if (handler) {
|1:
| chkneob fastbytes, >3
|2:
switch (type) {
switch (vtype) {
case V32:
| call ->decodev32_fallback
break;
@ -551,7 +555,7 @@ static void jitprimitive(jitcompiler *jc, opcode op,
}
| jmp >4
// Fast path decode; for when check_bytes bytes are available.
/* Fast path decode; for when check_bytes bytes are available. */
|3:
switch (op) {
case OP_PARSE_SFIXED32:
@ -569,19 +573,19 @@ static void jitprimitive(jitcompiler *jc, opcode op,
| movsd xmm0, qword [PTR]
break;
default:
// Inline one byte of varint decoding.
/* Inline one byte of varint decoding. */
| movzx edx, byte [PTR]
| test dl, dl
| js <2 // Fallback to slow path for >1 byte varint.
break;
}
// Second-stage decode; used for both fast and slow paths
// (only needed for a few types).
/* Second-stage decode; used for both fast and slow paths */
/* (only needed for a few types). */
|4:
switch (op) {
case OP_PARSE_SINT32:
// 32-bit zig-zag decode.
/* 32-bit zig-zag decode. */
| mov eax, edx
| shr edx, 1
| and eax, 1
@ -589,7 +593,7 @@ static void jitprimitive(jitcompiler *jc, opcode op,
| xor edx, eax
break;
case OP_PARSE_SINT64:
// 64-bit zig-zag decode.
/* 64-bit zig-zag decode. */
| mov rax, rdx
| shr rdx, 1
| and rax, 1
@ -603,11 +607,10 @@ static void jitprimitive(jitcompiler *jc, opcode op,
default: break;
}
// Call callback (or specialize if we can).
upb_fieldtype_t type;
const upb_shim_data *data = upb_shim_getdata(h, sel, &type);
/* Call callback (or specialize if we can). */
data = upb_shim_getdata(h, sel, &ftype);
if (data) {
switch (type) {
switch (ftype) {
case UPB_TYPE_INT64:
case UPB_TYPE_UINT64:
| mov [CLOSURE + data->offset], rdx
@ -645,14 +648,14 @@ static void jitprimitive(jitcompiler *jc, opcode op,
}
}
// We do this last so that the checkpoint is not advanced past the user's
// data until the callback has returned success.
/* We do this last so that the checkpoint is not advanced past the user's
* data until the callback has returned success. */
| add PTR, fastbytes
} else {
// No handler registered for this value, just skip it.
/* No handler registered for this value, just skip it. */
| chkneob fastbytes, >3
|2:
switch (type) {
switch (vtype) {
case V32:
| call ->skipv32_fallback
break;
@ -668,9 +671,9 @@ static void jitprimitive(jitcompiler *jc, opcode op,
case X: break;
}
// Fast-path skip.
/* Fast-path skip. */
|3:
if (type == V32 || type == V64) {
if (vtype == V32 || vtype == V64) {
| test byte [PTR], 0x80
| jnz <2
}
@ -680,21 +683,21 @@ static void jitprimitive(jitcompiler *jc, opcode op,
static void jitdispatch(jitcompiler *jc,
const upb_pbdecodermethod *method) {
// Lots of room for tweaking/optimization here.
/* Lots of room for tweaking/optimization here. */
const upb_inttable *dispatch = &method->dispatch;
bool has_hash_entries = (dispatch->t.count > 0);
// Whether any of the fields for this message can have two wire types which
// are both valid (packed & non-packed).
//
// OPT: populate this more precisely; not all messages with hash entries have
// this characteristic.
/* Whether any of the fields for this message can have two wire types which
* are both valid (packed & non-packed).
*
* OPT: populate this more precisely; not all messages with hash entries have
* this characteristic. */
bool has_multi_wiretype = has_hash_entries;
|=>define_jmptarget(jc, &method->dispatch):
|1:
// Decode the field tag.
/* Decode the field tag. */
| mov aword DECODER->checkpoint, PTR
| chkeob 2, >6
| movzx edx, byte [PTR]
@ -721,8 +724,8 @@ static void jitdispatch(jitcompiler *jc,
| shr edx, 3
| and cl, 7
// See comment attached to upb_pbdecodermethod.dispatch for layout of the
// dispatch table.
/* See comment attached to upb_pbdecodermethod.dispatch for layout of the
* dispatch table. */
|2:
| cmp edx, dispatch->array_size
if (has_hash_entries) {
@ -794,16 +797,17 @@ static void jitdispatch(jitcompiler *jc,
static void jittag(jitcompiler *jc, uint64_t tag, int n, int ofs,
const upb_pbdecodermethod *method) {
// Internally we parse unknown fields; if this runs us into DELIMEND we jump
// to the corresponding DELIMEND target (either msg end or repeated field
// end), which we find from the OP_CHECKDELIM which must have necessarily
// preceded us.
/* Internally we parse unknown fields; if this runs us into DELIMEND we jump
* to the corresponding DELIMEND target (either msg end or repeated field
* end), which we find from the OP_CHECKDELIM which must have necessarily
* preceded us. */
uint32_t last_instruction = *(jc->pc - 2);
int last_arg = (int32_t)last_instruction >> 8;
assert((last_instruction & 0xff) == OP_CHECKDELIM);
uint32_t *delimend = (jc->pc - 1) + last_arg;
const size_t ptr_words = sizeof(void*) / sizeof(uint32_t);
assert((last_instruction & 0xff) == OP_CHECKDELIM);
if (getop(*(jc->pc - 1)) == OP_TAGN) {
jc->pc += ptr_words;
}
@ -861,7 +865,7 @@ static void jittag(jitcompiler *jc, uint64_t tag, int n, int ofs,
|5:
}
// Compile the bytecode to x64.
/* Compile the bytecode to x64. */
static void jitbytecode(jitcompiler *jc) {
upb_pbdecodermethod *method = NULL;
const upb_handlers *h = NULL;
@ -872,13 +876,13 @@ static void jitbytecode(jitcompiler *jc) {
int32_t longofs = arg;
if (op != OP_SETDISPATCH) {
// Skipped for SETDISPATCH because it defines its own asmlabel for the
// dispatch code it emits.
/* Skipped for SETDISPATCH because it defines its own asmlabel for the
* dispatch code it emits. */
asmlabel(jc, "0x%lx.%s", pcofs(jc), upb_pbdecoder_getopname(op));
// Skipped for SETDISPATCH because it should point at the function
// prologue, not the dispatch function that is emitted first.
// TODO: optimize this to only define pclabels that are actually used.
/* Skipped for SETDISPATCH because it should point at the function
* prologue, not the dispatch function that is emitted first.
* TODO: optimize this to only define pclabels that are actually used. */
|=>define_jmptarget(jc, jc->pc):
}
@ -888,7 +892,7 @@ static void jitbytecode(jitcompiler *jc) {
case OP_STARTMSG: {
upb_func *startmsg = gethandler(h, UPB_STARTMSG_SELECTOR);
if (startmsg) {
// bool startmsg(void *closure, const void *hd)
/* bool startmsg(void *closure, const void *hd) */
|1:
| mov ARG1_64, CLOSURE
| load_handler_data h, UPB_STARTMSG_SELECTOR
@ -909,7 +913,7 @@ static void jitbytecode(jitcompiler *jc) {
upb_func *endmsg = gethandler(h, UPB_ENDMSG_SELECTOR);
|9:
if (endmsg) {
// bool endmsg(void *closure, const void *hd, upb_status *status)
/* bool endmsg(void *closure, const void *hd, upb_status *status) */
| mov ARG1_64, CLOSURE
| load_handler_data h, UPB_ENDMSG_SELECTOR
| mov ARG3_64, DECODER->status
@ -919,27 +923,28 @@ static void jitbytecode(jitcompiler *jc) {
}
case OP_SETDISPATCH: {
uint32_t *op_pc = jc->pc - 1;
// Load info for new method.
const char *msgname;
upb_inttable *dispatch;
/* Load info for new method. */
memcpy(&dispatch, jc->pc, sizeof(void*));
jc->pc += sizeof(void*) / sizeof(uint32_t);
// The OP_SETDISPATCH bytecode contains a pointer that is
// &method->dispatch; we want to go backwards and recover method.
/* The OP_SETDISPATCH bytecode contains a pointer that is
* &method->dispatch; we want to go backwards and recover method. */
method =
(void*)((char*)dispatch - offsetof(upb_pbdecodermethod, dispatch));
// May be NULL, in which case no handlers for this message will be found.
// OPT: we should do better by completely skipping the message in this
// case instead of parsing it field by field. We should also do the skip
// in the containing message's code.
/* May be NULL, in which case no handlers for this message will be found.
* OPT: we should do better by completely skipping the message in this
* case instead of parsing it field by field. We should also do the skip
* in the containing message's code. */
h = method->dest_handlers_;
const char *msgname = upb_msgdef_fullname(upb_handlers_msgdef(h));
msgname = upb_msgdef_fullname(upb_handlers_msgdef(h));
// Emit dispatch code for new method.
/* Emit dispatch code for new method. */
asmlabel(jc, "0x%lx.dispatch.%s", pcofs(jc), msgname);
jitdispatch(jc, method);
// Emit function prologue for new method.
/* Emit function prologue for new method. */
asmlabel(jc, "0x%lx.parse.%s", pcofs(jc), msgname);
|=>define_jmptarget(jc, op_pc):
|=>define_jmptarget(jc, method):
@ -967,9 +972,9 @@ static void jitbytecode(jitcompiler *jc) {
case OP_STARTSTR: {
upb_func *start = gethandler(h, arg);
if (start) {
// void *startseq(void *closure, const void *hd)
// void *startsubmsg(void *closure, const void *hd)
// void *startstr(void *closure, const void *hd, size_t size_hint)
/* void *startseq(void *closure, const void *hd)
* void *startsubmsg(void *closure, const void *hd)
* void *startstr(void *closure, const void *hd, size_t size_hint) */
|1:
| mov ARG1_64, CLOSURE
| load_handler_data h, arg
@ -987,7 +992,7 @@ static void jitbytecode(jitcompiler *jc) {
}
| mov CLOSURE, rax
} else {
// TODO: nop is only required because of asmlabel().
/* TODO: nop is only required because of asmlabel(). */
| nop
}
break;
@ -997,9 +1002,9 @@ static void jitbytecode(jitcompiler *jc) {
case OP_ENDSTR: {
upb_func *end = gethandler(h, arg);
if (end) {
// bool endseq(void *closure, const void *hd)
// bool endsubmsg(void *closure, const void *hd)
// bool endstr(void *closure, const void *hd)
/* bool endseq(void *closure, const void *hd)
* bool endsubmsg(void *closure, const void *hd)
* bool endstr(void *closure, const void *hd) */
|1:
| mov ARG1_64, CLOSURE
| load_handler_data h, arg
@ -1012,7 +1017,7 @@ static void jitbytecode(jitcompiler *jc) {
|2:
}
} else {
// TODO: nop is only required because of asmlabel().
/* TODO: nop is only required because of asmlabel(). */
| nop
}
break;
@ -1028,7 +1033,8 @@ static void jitbytecode(jitcompiler *jc) {
| jmp <1
|2:
if (str) {
// size_t str(void *closure, const void *hd, const char *str, size_t n)
/* size_t str(void *closure, const void *hd, const char *str,
* size_t n) */
| mov ARG1_64, CLOSURE
| load_handler_data h, arg
| mov ARG3_64, PTR
@ -1072,7 +1078,7 @@ static void jitbytecode(jitcompiler *jc) {
| mov CLOSURE, FRAME->sink.closure
break;
case OP_SETDELIM:
// OPT: experiment with testing vs old offset to optimize away.
/* OPT: experiment with testing vs old offset to optimize away. */
| mov DATAEND, DECODER->end
| add DELIMEND, FRAME->end_ofs
| cmp DELIMEND, DECODER->buf

File diff suppressed because it is too large Load Diff

@ -29,17 +29,17 @@
#define CHECK_SUSPEND(x) if (!(x)) return upb_pbdecoder_suspend(d);
// Error messages that are shared between the bytecode and JIT decoders.
/* Error messages that are shared between the bytecode and JIT decoders. */
const char *kPbDecoderStackOverflow = "Nesting too deep.";
// Error messages shared within this file.
/* Error messages shared within this file. */
static const char *kUnterminatedVarint = "Unterminated varint.";
/* upb_pbdecoder **************************************************************/
static opcode halt = OP_HALT;
// Whether an op consumes any of the input buffer.
/* Whether an op consumes any of the input buffer. */
static bool consumes_input(opcode op) {
switch (op) {
case OP_SETDISPATCH:
@ -67,12 +67,12 @@ static bool consumes_input(opcode op) {
static bool in_residual_buf(const upb_pbdecoder *d, const char *p);
// It's unfortunate that we have to micro-manage the compiler with
// UPB_FORCEINLINE and UPB_NOINLINE, especially since this tuning is necessarily
// specific to one hardware configuration. But empirically on a Core i7,
// performance increases 30-50% with these annotations. Every instance where
// these appear, gcc 4.2.1 made the wrong decision and degraded performance in
// benchmarks.
/* It's unfortunate that we have to micro-manage the compiler with
* UPB_FORCEINLINE and UPB_NOINLINE, especially since this tuning is necessarily
* specific to one hardware configuration. But empirically on a Core i7,
* performance increases 30-50% with these annotations. Every instance where
* these appear, gcc 4.2.1 made the wrong decision and degraded performance in
* benchmarks. */
static void seterr(upb_pbdecoder *d, const char *msg) {
upb_status status = UPB_STATUS_INIT;
@ -87,22 +87,22 @@ void upb_pbdecoder_seterr(upb_pbdecoder *d, const char *msg) {
/* Buffering ******************************************************************/
// We operate on one buffer at a time, which is either the user's buffer passed
// to our "decode" callback or some residual bytes from the previous buffer.
/* We operate on one buffer at a time, which is either the user's buffer passed
* to our "decode" callback or some residual bytes from the previous buffer. */
// How many bytes can be safely read from d->ptr without reading past end-of-buf
// or past the current delimited end.
/* How many bytes can be safely read from d->ptr without reading past end-of-buf
* or past the current delimited end. */
static size_t curbufleft(const upb_pbdecoder *d) {
assert(d->data_end >= d->ptr);
return d->data_end - d->ptr;
}
// Overall stream offset of d->ptr.
/* Overall stream offset of d->ptr. */
uint64_t offset(const upb_pbdecoder *d) {
return d->bufstart_ofs + (d->ptr - d->buf);
}
// Advances d->ptr.
/* Advances d->ptr. */
static void advance(upb_pbdecoder *d, size_t len) {
assert(curbufleft(d) >= len);
d->ptr += len;
@ -116,8 +116,8 @@ static bool in_residual_buf(const upb_pbdecoder *d, const char *p) {
return in_buf(p, d->residual, d->residual_end);
}
// Calculates the delim_end value, which is affected by both the current buffer
// and the parsing stack, so must be called whenever either is updated.
/* Calculates the delim_end value, which is affected by both the current buffer
* and the parsing stack, so must be called whenever either is updated. */
static void set_delim_end(upb_pbdecoder *d) {
size_t delim_ofs = d->top->end_ofs - d->bufstart_ofs;
if (delim_ofs <= (size_t)(d->end - d->buf)) {
@ -143,22 +143,22 @@ static void advancetobuf(upb_pbdecoder *d, const char *buf, size_t len) {
}
static void checkpoint(upb_pbdecoder *d) {
// The assertion here is in the interests of efficiency, not correctness.
// We are trying to ensure that we don't checkpoint() more often than
// necessary.
/* The assertion here is in the interests of efficiency, not correctness.
* We are trying to ensure that we don't checkpoint() more often than
* necessary. */
assert(d->checkpoint != d->ptr);
d->checkpoint = d->ptr;
}
// Resumes the decoder from an initial state or from a previous suspend.
/* Resumes the decoder from an initial state or from a previous suspend. */
int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf,
size_t size, const upb_bufhandle *handle) {
UPB_UNUSED(p); // Useless; just for the benefit of the JIT.
UPB_UNUSED(p); /* Useless; just for the benefit of the JIT. */
d->buf_param = buf;
d->size_param = size;
d->handle = handle;
if (d->residual_end > d->residual) {
// We have residual bytes from the last buffer.
/* We have residual bytes from the last buffer. */
assert(d->ptr == d->residual);
} else {
switchtobuf(d, buf, buf + size);
@ -171,18 +171,20 @@ int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf,
return DECODE_OK;
}
// Suspends the decoder at the last checkpoint, without saving any residual
// bytes. If there are any unconsumed bytes, returns a short byte count.
/* Suspends the decoder at the last checkpoint, without saving any residual
* bytes. If there are any unconsumed bytes, returns a short byte count. */
size_t upb_pbdecoder_suspend(upb_pbdecoder *d) {
d->pc = d->last;
if (d->checkpoint == d->residual) {
// Checkpoint was in residual buf; no user bytes were consumed.
/* Checkpoint was in residual buf; no user bytes were consumed. */
d->ptr = d->residual;
return 0;
} else {
size_t consumed;
assert(!in_residual_buf(d, d->checkpoint));
assert(d->buf == d->buf_param);
size_t consumed = d->checkpoint - d->buf;
consumed = d->checkpoint - d->buf;
d->bufstart_ofs += consumed;
d->residual_end = d->residual;
switchtobuf(d, d->residual, d->residual_end);
@ -190,17 +192,17 @@ size_t upb_pbdecoder_suspend(upb_pbdecoder *d) {
}
}
// Suspends the decoder at the last checkpoint, and saves any unconsumed
// bytes in our residual buffer. This is necessary if we need more user
// bytes to form a complete value, which might not be contiguous in the
// user's buffers. Always consumes all user bytes.
/* Suspends the decoder at the last checkpoint, and saves any unconsumed
* bytes in our residual buffer. This is necessary if we need more user
* bytes to form a complete value, which might not be contiguous in the
* user's buffers. Always consumes all user bytes. */
static size_t suspend_save(upb_pbdecoder *d) {
// We hit end-of-buffer before we could parse a full value.
// Save any unconsumed bytes (if any) to the residual buffer.
/* We hit end-of-buffer before we could parse a full value.
* Save any unconsumed bytes (if any) to the residual buffer. */
d->pc = d->last;
if (d->checkpoint == d->residual) {
// Checkpoint was in residual buf; append user byte(s) to residual buf.
/* Checkpoint was in residual buf; append user byte(s) to residual buf. */
assert((d->residual_end - d->residual) + d->size_param <=
sizeof(d->residual));
if (!in_residual_buf(d, d->ptr)) {
@ -209,10 +211,12 @@ static size_t suspend_save(upb_pbdecoder *d) {
memcpy(d->residual_end, d->buf_param, d->size_param);
d->residual_end += d->size_param;
} else {
// Checkpoint was in user buf; old residual bytes not needed.
/* Checkpoint was in user buf; old residual bytes not needed. */
size_t save;
assert(!in_residual_buf(d, d->checkpoint));
d->ptr = d->checkpoint;
size_t save = curbufleft(d);
save = curbufleft(d);
assert(save <= sizeof(d->residual));
memcpy(d->residual, d->ptr, save);
d->residual_end = d->residual + save;
@ -223,19 +227,21 @@ static size_t suspend_save(upb_pbdecoder *d) {
return d->size_param;
}
// Skips "bytes" bytes in the stream, which may be more than available. If we
// skip more bytes than are available, we return a long read count to the caller
// indicating how many bytes the caller should skip before passing a new buffer.
/* Skips "bytes" bytes in the stream, which may be more than available. If we
* skip more bytes than are available, we return a long read count to the caller
* indicating how many bytes the caller should skip before passing a new buffer.
*/
static int32_t skip(upb_pbdecoder *d, size_t bytes) {
assert(!in_residual_buf(d, d->ptr) || d->size_param == 0);
if (curbufleft(d) >= bytes) {
// Skipped data is all in current buffer.
/* Skipped data is all in current buffer. */
advance(d, bytes);
return DECODE_OK;
} else {
// Skipped data extends beyond currently available buffers.
/* Skipped data extends beyond currently available buffers. */
size_t skip;
d->pc = d->last;
size_t skip = bytes - curbufleft(d);
skip = bytes - curbufleft(d);
d->bufstart_ofs += (d->end - d->buf) + skip;
d->residual_end = d->residual;
switchtobuf(d, d->residual, d->residual_end);
@ -243,8 +249,8 @@ static int32_t skip(upb_pbdecoder *d, size_t bytes) {
}
}
// Copies the next "bytes" bytes into "buf" and advances the stream.
// Requires that this many bytes are available in the current buffer.
/* Copies the next "bytes" bytes into "buf" and advances the stream.
* Requires that this many bytes are available in the current buffer. */
UPB_FORCEINLINE static void consumebytes(upb_pbdecoder *d, void *buf,
size_t bytes) {
assert(bytes <= curbufleft(d));
@ -252,9 +258,9 @@ UPB_FORCEINLINE static void consumebytes(upb_pbdecoder *d, void *buf,
advance(d, bytes);
}
// Slow path for getting the next "bytes" bytes, regardless of whether they are
// available in the current buffer or not. Returns a status code as described
// in decoder.int.h.
/* Slow path for getting the next "bytes" bytes, regardless of whether they are
* available in the current buffer or not. Returns a status code as described
* in decoder.int.h. */
UPB_NOINLINE static int32_t getbytes_slow(upb_pbdecoder *d, void *buf,
size_t bytes) {
const size_t avail = curbufleft(d);
@ -275,12 +281,13 @@ UPB_NOINLINE static int32_t getbytes_slow(upb_pbdecoder *d, void *buf,
}
}
// Gets the next "bytes" bytes, regardless of whether they are available in the
// current buffer or not. Returns a status code as described in decoder.int.h.
/* Gets the next "bytes" bytes, regardless of whether they are available in the
* current buffer or not. Returns a status code as described in decoder.int.h.
*/
UPB_FORCEINLINE static int32_t getbytes(upb_pbdecoder *d, void *buf,
size_t bytes) {
if (curbufleft(d) >= bytes) {
// Buffer has enough data to satisfy.
/* Buffer has enough data to satisfy. */
consumebytes(d, buf, bytes);
return DECODE_OK;
} else {
@ -313,13 +320,13 @@ UPB_FORCEINLINE static size_t peekbytes(upb_pbdecoder *d, void *buf,
/* Decoding of wire types *****************************************************/
// Slow path for decoding a varint from the current buffer position.
// Returns a status code as described in decoder.int.h.
/* Slow path for decoding a varint from the current buffer position.
* Returns a status code as described in decoder.int.h. */
UPB_NOINLINE int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d,
uint64_t *u64) {
*u64 = 0;
uint8_t byte = 0x80;
int bitpos;
*u64 = 0;
for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) {
int32_t ret = getbytes(d, &byte, 1);
if (ret >= 0) return ret;
@ -332,15 +339,15 @@ UPB_NOINLINE int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d,
return DECODE_OK;
}
// Decodes a varint from the current buffer position.
// Returns a status code as described in decoder.int.h.
/* Decodes a varint from the current buffer position.
* Returns a status code as described in decoder.int.h. */
UPB_FORCEINLINE static int32_t decode_varint(upb_pbdecoder *d, uint64_t *u64) {
if (curbufleft(d) > 0 && !(*d->ptr & 0x80)) {
*u64 = *d->ptr;
advance(d, 1);
return DECODE_OK;
} else if (curbufleft(d) >= 10) {
// Fast case.
/* Fast case. */
upb_decoderet r = upb_vdecode_fast(d->ptr);
if (r.p == NULL) {
seterr(d, kUnterminatedVarint);
@ -350,22 +357,23 @@ UPB_FORCEINLINE static int32_t decode_varint(upb_pbdecoder *d, uint64_t *u64) {
*u64 = r.val;
return DECODE_OK;
} else {
// Slow case -- varint spans buffer seam.
/* Slow case -- varint spans buffer seam. */
return upb_pbdecoder_decode_varint_slow(d, u64);
}
}
// Decodes a 32-bit varint from the current buffer position.
// Returns a status code as described in decoder.int.h.
/* Decodes a 32-bit varint from the current buffer position.
* Returns a status code as described in decoder.int.h. */
UPB_FORCEINLINE static int32_t decode_v32(upb_pbdecoder *d, uint32_t *u32) {
uint64_t u64;
int32_t ret = decode_varint(d, &u64);
if (ret >= 0) return ret;
if (u64 > UINT32_MAX) {
seterr(d, "Unterminated 32-bit varint");
// TODO(haberman) guarantee that this function return is >= 0 somehow,
// so we know this path will always be treated as error by our caller.
// Right now the size_t -> int32_t can overflow and produce negative values.
/* TODO(haberman) guarantee that this function return is >= 0 somehow,
* so we know this path will always be treated as error by our caller.
* Right now the size_t -> int32_t can overflow and produce negative values.
*/
*u32 = 0;
return upb_pbdecoder_suspend(d);
}
@ -373,22 +381,22 @@ UPB_FORCEINLINE static int32_t decode_v32(upb_pbdecoder *d, uint32_t *u32) {
return DECODE_OK;
}
// Decodes a fixed32 from the current buffer position.
// Returns a status code as described in decoder.int.h.
// TODO: proper byte swapping for big-endian machines.
/* Decodes a fixed32 from the current buffer position.
* Returns a status code as described in decoder.int.h.
* TODO: proper byte swapping for big-endian machines. */
UPB_FORCEINLINE static int32_t decode_fixed32(upb_pbdecoder *d, uint32_t *u32) {
return getbytes(d, u32, 4);
}
// Decodes a fixed64 from the current buffer position.
// Returns a status code as described in decoder.int.h.
// TODO: proper byte swapping for big-endian machines.
/* Decodes a fixed64 from the current buffer position.
* Returns a status code as described in decoder.int.h.
* TODO: proper byte swapping for big-endian machines. */
UPB_FORCEINLINE static int32_t decode_fixed64(upb_pbdecoder *d, uint64_t *u64) {
return getbytes(d, u64, 8);
}
// Non-static versions of the above functions.
// These are called by the JIT for fallback paths.
/* Non-static versions of the above functions.
* These are called by the JIT for fallback paths. */
int32_t upb_pbdecoder_decode_f32(upb_pbdecoder *d, uint32_t *u32) {
return decode_fixed32(d, u32);
}
@ -400,7 +408,7 @@ int32_t upb_pbdecoder_decode_f64(upb_pbdecoder *d, uint64_t *u64) {
static double as_double(uint64_t n) { double d; memcpy(&d, &n, 8); return d; }
static float as_float(uint32_t n) { float f; memcpy(&f, &n, 4); return f; }
// Pushes a frame onto the decoder stack.
/* Pushes a frame onto the decoder stack. */
static bool decoder_push(upb_pbdecoder *d, uint64_t end) {
upb_pbdecoder_frame *fr = d->top;
@ -421,17 +429,17 @@ static bool decoder_push(upb_pbdecoder *d, uint64_t end) {
}
static bool pushtagdelim(upb_pbdecoder *d, uint32_t arg) {
// While we expect to see an "end" tag (either ENDGROUP or a non-sequence
// field number) prior to hitting any enclosing submessage end, pushing our
// existing delim end prevents us from continuing to parse values from a
// corrupt proto that doesn't give us an END tag in time.
/* While we expect to see an "end" tag (either ENDGROUP or a non-sequence
* field number) prior to hitting any enclosing submessage end, pushing our
* existing delim end prevents us from continuing to parse values from a
* corrupt proto that doesn't give us an END tag in time. */
if (!decoder_push(d, d->top->end_ofs))
return false;
d->top->groupnum = arg;
return true;
}
// Pops a frame from the decoder stack.
/* Pops a frame from the decoder stack. */
static void decoder_pop(upb_pbdecoder *d) { d->top--; }
UPB_NOINLINE int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d,
@ -440,7 +448,7 @@ UPB_NOINLINE int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d,
size_t bytes = upb_value_size(expected);
size_t read = peekbytes(d, &data, bytes);
if (read == bytes && data == expected) {
// Advance past matched bytes.
/* Advance past matched bytes. */
int32_t ok = getbytes(d, &data, read);
UPB_ASSERT_VAR(ok, ok < 0);
return DECODE_OK;
@ -468,7 +476,7 @@ have_tag:
return upb_pbdecoder_suspend(d);
}
// TODO: deliver to unknown field callback.
/* TODO: deliver to unknown field callback. */
switch (wire_type) {
case UPB_WIRE_TYPE_32BIT:
CHECK_RETURN(skip(d, 4));
@ -511,29 +519,29 @@ have_tag:
if (d->ptr == d->delim_end) {
seterr(d, "Enclosing submessage ended in the middle of value or group");
// Unlike most errors we notice during parsing, right now we have consumed
// all of the user's input.
//
// There are three different options for how to handle this case:
//
// 1. decode() = short count, error = set
// 2. decode() = full count, error = set
// 3. decode() = full count, error NOT set, short count and error will
// be reported on next call to decode() (or end())
//
// (1) and (3) have the advantage that they preserve the invariant that an
// error occurs iff decode() returns a short count.
//
// (2) and (3) have the advantage of reflecting the fact that all of the
// bytes were in fact parsed (and possibly delivered to the unknown field
// handler, in the future when that is supported).
//
// (3) requires extra state in the decode (a place to store the "permanent
// error" that we should return for all subsequent attempts to decode).
// But we likely want this anyway.
//
// Right now we do (1), thanks to the fact that we checkpoint *after* this
// check. (3) may be a better choice long term; unclear at the moment.
/* Unlike most errors we notice during parsing, right now we have consumed
* all of the user's input.
*
* There are three different options for how to handle this case:
*
* 1. decode() = short count, error = set
* 2. decode() = full count, error = set
* 3. decode() = full count, error NOT set, short count and error will
* be reported on next call to decode() (or end())
*
* (1) and (3) have the advantage that they preserve the invariant that an
* error occurs iff decode() returns a short count.
*
* (2) and (3) have the advantage of reflecting the fact that all of the
* bytes were in fact parsed (and possibly delivered to the unknown field
* handler, in the future when that is supported).
*
* (3) requires extra state in the decode (a place to store the "permanent
* error" that we should return for all subsequent attempts to decode).
* But we likely want this anyway.
*
* Right now we do (1), thanks to the fact that we checkpoint *after* this
* check. (3) may be a better choice long term; unclear at the moment. */
return upb_pbdecoder_suspend(d);
}
@ -548,24 +556,27 @@ static void goto_endmsg(upb_pbdecoder *d) {
d->pc = d->top->base + upb_value_getuint64(v);
}
// Parses a tag and jumps to the corresponding bytecode instruction for this
// field.
//
// If the tag is unknown (or the wire type doesn't match), parses the field as
// unknown. If the tag is a valid ENDGROUP tag, jumps to the bytecode
// instruction for the end of message.
/* Parses a tag and jumps to the corresponding bytecode instruction for this
* field.
*
* If the tag is unknown (or the wire type doesn't match), parses the field as
* unknown. If the tag is a valid ENDGROUP tag, jumps to the bytecode
* instruction for the end of message. */
static int32_t dispatch(upb_pbdecoder *d) {
upb_inttable *dispatch = d->top->dispatch;
// Decode tag.
uint32_t tag;
uint8_t wire_type;
uint32_t fieldnum;
upb_value val;
int32_t ret;
/* Decode tag. */
CHECK_RETURN(decode_v32(d, &tag));
uint8_t wire_type = tag & 0x7;
uint32_t fieldnum = tag >> 3;
wire_type = tag & 0x7;
fieldnum = tag >> 3;
// Lookup tag. Because of packed/non-packed compatibility, we have to
// check the wire type against two possibilities.
upb_value val;
/* Lookup tag. Because of packed/non-packed compatibility, we have to
* check the wire type against two possibilities. */
if (fieldnum != DISPATCH_ENDMSG &&
upb_inttable_lookup32(dispatch, fieldnum, &val)) {
uint64_t v = upb_value_getuint64(val);
@ -581,17 +592,17 @@ static int32_t dispatch(upb_pbdecoder *d) {
}
}
// Unknown field or ENDGROUP.
int32_t ret = upb_pbdecoder_skipunknown(d, fieldnum, wire_type);
/* Unknown field or ENDGROUP. */
ret = upb_pbdecoder_skipunknown(d, fieldnum, wire_type);
if (ret == DECODE_ENDGROUP) {
goto_endmsg(d);
return DECODE_OK;
} else if (ret == DECODE_OK) {
// We just consumed some input, so we might now have consumed all the data
// in the delmited region. Since every opcode that can trigger dispatch is
// directly preceded by OP_CHECKDELIM, rewind to it now to re-check the
// delimited end.
/* We just consumed some input, so we might now have consumed all the data
* in the delmited region. Since every opcode that can trigger dispatch is
* directly preceded by OP_CHECKDELIM, rewind to it now to re-check the
* delimited end. */
d->pc = d->last - 1;
assert(getop(*d->pc) == OP_CHECKDELIM);
return DECODE_OK;
@ -600,8 +611,8 @@ static int32_t dispatch(upb_pbdecoder *d) {
return ret;
}
// Callers know that the stack is more than one deep because the opcodes that
// call this only occur after PUSH operations.
/* Callers know that the stack is more than one deep because the opcodes that
* call this only occur after PUSH operations. */
upb_pbdecoder_frame *outer_frame(upb_pbdecoder *d) {
assert(d->top != d->stack);
return d->top - 1;
@ -610,14 +621,15 @@ upb_pbdecoder_frame *outer_frame(upb_pbdecoder *d) {
/* The main decoding loop *****************************************************/
// The main decoder VM function. Uses traditional bytecode dispatch loop with a
// switch() statement.
/* The main decoder VM function. Uses traditional bytecode dispatch loop with a
* switch() statement. */
size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
size_t size, const upb_bufhandle *handle) {
upb_pbdecoder *d = closure;
const mgroup *group = hd;
int32_t result;
assert(buf);
int32_t result = upb_pbdecoder_resume(d, NULL, buf, size, handle);
result = upb_pbdecoder_resume(d, NULL, buf, size, handle);
if (result == DECODE_ENDGROUP) {
goto_endmsg(d);
}
@ -634,11 +646,16 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
})
while(1) {
int32_t instruction;
opcode op;
uint32_t arg;
int32_t longofs;
d->last = d->pc;
int32_t instruction = *d->pc++;
opcode op = getop(instruction);
uint32_t arg = instruction >> 8;
int32_t longofs = arg;
instruction = *d->pc++;
op = getop(instruction);
arg = instruction >> 8;
longofs = arg;
assert(d->ptr != d->residual_end);
#ifdef UPB_DUMP_BYTECODE
fprintf(stderr, "s_ofs=%d buf_ofs=%d data_rem=%d buf_rem=%d delim_rem=%d "
@ -653,9 +670,9 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
arg);
#endif
switch (op) {
// Technically, we are losing data if we see a 32-bit varint that is not
// properly sign-extended. We could detect this and error about the data
// loss, but proto2 does not do this, so we pass.
/* Technically, we are losing data if we see a 32-bit varint that is not
* properly sign-extended. We could detect this and error about the data
* loss, but proto2 does not do this, so we pass. */
PRIMITIVE_OP(INT32, varint, int32, int32_t, uint64_t)
PRIMITIVE_OP(INT64, varint, int64, int64_t, uint64_t)
PRIMITIVE_OP(UINT32, varint, uint32, uint32_t, uint64_t)
@ -700,7 +717,7 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
upb_pbdecoder_frame *outer = outer_frame(d);
CHECK_SUSPEND(upb_sink_startstr(&outer->sink, arg, len, &d->top->sink));
if (len == 0) {
d->pc++; // Skip OP_STRING.
d->pc++; /* Skip OP_STRING. */
}
)
VMCASE(OP_STRING,
@ -712,15 +729,15 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
return upb_pbdecoder_suspend(d);
} else {
int32_t ret = skip(d, n);
// This shouldn't return DECODE_OK, because n > len.
/* This shouldn't return DECODE_OK, because n > len. */
assert(ret >= 0);
return ret;
}
}
advance(d, n);
if (n < len || d->delim_end == NULL) {
// We aren't finished with this string yet.
d->pc--; // Repeat OP_STRING.
/* We aren't finished with this string yet. */
d->pc--; /* Repeat OP_STRING. */
if (n > 0) checkpoint(d);
return upb_pbdecoder_suspend(d);
}
@ -748,8 +765,9 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
set_delim_end(d);
)
VMCASE(OP_CHECKDELIM,
// We are guaranteed of this assert because we never allow ourselves to
// consume bytes beyond data_end, which covers delim_end when non-NULL.
/* We are guaranteed of this assert because we never allow ourselves to
* consume bytes beyond data_end, which covers delim_end when non-NULL.
*/
assert(!(d->delim_end && d->ptr > d->delim_end));
if (d->ptr == d->delim_end)
d->pc += longofs;
@ -766,8 +784,9 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
d->pc += longofs;
)
VMCASE(OP_TAG1,
uint8_t expected;
CHECK_SUSPEND(curbufleft(d) > 0);
uint8_t expected = (arg >> 8) & 0xff;
expected = (arg >> 8) & 0xff;
if (*d->ptr == expected) {
advance(d, 1);
} else {
@ -778,13 +797,14 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
CHECK_RETURN(dispatch(d));
} else {
d->pc += shortofs;
break; // Avoid checkpoint().
break; /* Avoid checkpoint(). */
}
}
)
VMCASE(OP_TAG2,
uint16_t expected;
CHECK_SUSPEND(curbufleft(d) > 0);
uint16_t expected = (arg >> 8) & 0xffff;
expected = (arg >> 8) & 0xffff;
if (curbufleft(d) >= 2) {
uint16_t actual;
memcpy(&actual, d->ptr, 2);
@ -801,9 +821,10 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
)
VMCASE(OP_TAGN, {
uint64_t expected;
int32_t result;
memcpy(&expected, d->pc, 8);
d->pc += 2;
int32_t result = upb_pbdecoder_checktag_slow(d, expected);
result = upb_pbdecoder_checktag_slow(d, expected);
if (result == DECODE_MISMATCH) goto badtag;
if (result >= 0) return result;
})
@ -829,9 +850,9 @@ void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint) {
}
void *upb_pbdecoder_startjit(void *closure, const void *hd, size_t size_hint) {
upb_pbdecoder *d = closure;
UPB_UNUSED(hd);
UPB_UNUSED(size_hint);
upb_pbdecoder *d = closure;
d->top->end_ofs = UINT64_MAX;
d->bufstart_ofs = 0;
d->call_len = 0;
@ -841,6 +862,11 @@ void *upb_pbdecoder_startjit(void *closure, const void *hd, size_t size_hint) {
bool upb_pbdecoder_end(void *closure, const void *handler_data) {
upb_pbdecoder *d = closure;
const upb_pbdecodermethod *method = handler_data;
uint64_t end;
char dummy;
#ifdef UPB_USE_JIT_X64
const mgroup *group = (const mgroup*)method->group;
#endif
if (d->residual_end > d->residual) {
seterr(d, "Unexpected EOF");
@ -852,25 +878,24 @@ bool upb_pbdecoder_end(void *closure, const void *handler_data) {
return false;
}
// Message ends here.
uint64_t end = offset(d);
/* Message ends here. */
end = offset(d);
d->top->end_ofs = end;
char dummy;
#ifdef UPB_USE_JIT_X64
const mgroup *group = (const mgroup*)method->group;
if (group->jit_code) {
if (d->top != d->stack)
d->stack->end_ofs = 0;
group->jit_code(closure, method->code_base.ptr, &dummy, 0, NULL);
} else {
} else
#endif
d->stack->end_ofs = end;
{
const uint32_t *p = d->pc;
// Check the previous bytecode, but guard against beginning.
d->stack->end_ofs = end;
/* Check the previous bytecode, but guard against beginning. */
if (p != method->code_base.ptr) p--;
if (getop(*p) == OP_CHECKDELIM) {
// Rewind from OP_TAG* to OP_CHECKDELIM.
/* Rewind from OP_TAG* to OP_CHECKDELIM. */
assert(getop(*d->pc) == OP_TAG1 ||
getop(*d->pc) == OP_TAG2 ||
getop(*d->pc) == OP_TAGN ||
@ -878,9 +903,7 @@ bool upb_pbdecoder_end(void *closure, const void *handler_data) {
d->pc = p;
}
upb_pbdecoder_decode(closure, handler_data, &dummy, 0, NULL);
#ifdef UPB_USE_JIT_X64
}
#endif
if (d->call_len != 0) {
seterr(d, "Unexpected EOF");
@ -909,8 +932,8 @@ static size_t callstacksize(upb_pbdecoder *d, size_t entries) {
#ifdef UPB_USE_JIT_X64
if (d->method_->is_native_) {
// Each native stack frame needs two pointers, plus we need a few frames for
// the enter/exit trampolines.
/* Each native stack frame needs two pointers, plus we need a few frames for
* the enter/exit trampolines. */
size_t ret = entries * sizeof(void*) * 2;
ret += sizeof(void*) * 10;
return ret;
@ -951,7 +974,7 @@ upb_pbdecoder *upb_pbdecoder_create(upb_env *e, const upb_pbdecodermethod *m,
}
upb_sink_reset(&d->top->sink, sink->handlers, sink->closure);
// If this fails, increase the value in decoder.h.
/* If this fails, increase the value in decoder.h. */
assert(upb_env_bytesallocated(e) - size_before <= UPB_PB_DECODER_SIZE);
return d;
}
@ -976,12 +999,12 @@ bool upb_pbdecoder_setmaxnesting(upb_pbdecoder *d, size_t max) {
assert(d->top >= d->stack);
if (max < (size_t)(d->top - d->stack)) {
// Can't set a limit smaller than what we are currently at.
/* Can't set a limit smaller than what we are currently at. */
return false;
}
if (max > d->stack_size) {
// Need to reallocate stack and callstack to accommodate.
/* Need to reallocate stack and callstack to accommodate. */
size_t old_size = stacksize(d, d->stack_size);
size_t new_size = stacksize(d, max);
void *p = upb_env_realloc(d->env, d->stack, old_size, new_size);

@ -28,134 +28,111 @@ class CodeCache;
class Decoder;
class DecoderMethod;
class DecoderMethodOptions;
} // namespace pb
} // namespace upb
} /* namespace pb */
} /* namespace upb */
#endif
UPB_DECLARE_TYPE(upb::pb::CodeCache, upb_pbcodecache);
UPB_DECLARE_TYPE(upb::pb::Decoder, upb_pbdecoder);
UPB_DECLARE_TYPE(upb::pb::DecoderMethod, upb_pbdecodermethod);
UPB_DECLARE_TYPE(upb::pb::DecoderMethodOptions, upb_pbdecodermethodopts);
UPB_DECLARE_TYPE(upb::pb::CodeCache, upb_pbcodecache)
UPB_DECLARE_TYPE(upb::pb::Decoder, upb_pbdecoder)
UPB_DECLARE_TYPE(upb::pb::DecoderMethodOptions, upb_pbdecodermethodopts)
// The parameters one uses to construct a DecoderMethod.
// TODO(haberman): move allowjit here? Seems more convenient for users.
UPB_DEFINE_CLASS0(upb::pb::DecoderMethodOptions,
UPB_DECLARE_DERIVED_TYPE(upb::pb::DecoderMethod, upb::RefCounted,
upb_pbdecodermethod, upb_refcounted)
#ifdef __cplusplus
/* The parameters one uses to construct a DecoderMethod.
* TODO(haberman): move allowjit here? Seems more convenient for users.
* TODO(haberman): move this to be heap allocated for ABI stability. */
class upb::pb::DecoderMethodOptions {
public:
// Parameter represents the destination handlers that this method will push
// to.
/* Parameter represents the destination handlers that this method will push
* to. */
explicit DecoderMethodOptions(const Handlers* dest_handlers);
// Should the decoder push submessages to lazy handlers for fields that have
// them? The caller should set this iff the lazy handlers expect data that is
// in protobuf binary format and the caller wishes to lazy parse it.
/* Should the decoder push submessages to lazy handlers for fields that have
* them? The caller should set this iff the lazy handlers expect data that is
* in protobuf binary format and the caller wishes to lazy parse it. */
void set_lazy(bool lazy);
,
UPB_DEFINE_STRUCT0(upb_pbdecodermethodopts,
#else
struct upb_pbdecodermethodopts {
#endif
const upb_handlers *handlers;
bool lazy;
));
};
#ifdef __cplusplus
// Represents the code to parse a protobuf according to a destination Handlers.
UPB_DEFINE_CLASS1(upb::pb::DecoderMethod, upb::RefCounted,
/* Represents the code to parse a protobuf according to a destination
* Handlers. */
class upb::pb::DecoderMethod {
public:
// From upb::ReferenceCounted.
void Ref(const void* owner) const;
void Unref(const void* owner) const;
void DonateRef(const void* from, const void* to) const;
void CheckRef(const void* owner) const;
// The destination handlers that are statically bound to this method.
// This method is only capable of outputting to a sink that uses these
// handlers.
/* Include base methods from upb::ReferenceCounted. */
UPB_REFCOUNTED_CPPMETHODS
/* The destination handlers that are statically bound to this method.
* This method is only capable of outputting to a sink that uses these
* handlers. */
const Handlers* dest_handlers() const;
// The input handlers for this decoder method.
/* The input handlers for this decoder method. */
const BytesHandler* input_handler() const;
// Whether this method is native.
/* Whether this method is native. */
bool is_native() const;
// Convenience method for generating a DecoderMethod without explicitly
// creating a CodeCache.
/* Convenience method for generating a DecoderMethod without explicitly
* creating a CodeCache. */
static reffed_ptr<const DecoderMethod> New(const DecoderMethodOptions& opts);
private:
UPB_DISALLOW_POD_OPS(DecoderMethod, upb::pb::DecoderMethod);
,
UPB_DEFINE_STRUCT(upb_pbdecodermethod, upb_refcounted,
// While compiling, the base is relative in "ofs", after compiling it is
// absolute in "ptr".
union {
uint32_t ofs; // PC offset of method.
void *ptr; // Pointer to bytecode or machine code for this method.
} code_base;
// The decoder method group to which this method belongs. We own a ref.
// Owning a ref on the entire group is more coarse-grained than is strictly
// necessary; all we truly require is that methods we directly reference
// outlive us, while the group could contain many other messages we don't
// require. But the group represents the messages that were
// allocated+compiled together, so it makes the most sense to free them
// together also.
const upb_refcounted *group;
// Whether this method is native code or bytecode.
bool is_native_;
// The handler one calls to invoke this method.
upb_byteshandler input_handler_;
// The destination handlers this method is bound to. We own a ref.
const upb_handlers *dest_handlers_;
// Dispatch table -- used by both bytecode decoder and JIT when encountering a
// field number that wasn't the one we were expecting to see. See
// decoder.int.h for the layout of this table.
upb_inttable dispatch;
));
// Preallocation hint: decoder won't allocate more bytes than this when first
// constructed. This hint may be an overestimate for some build configurations.
// But if the decoder library is upgraded without recompiling the application,
// it may be an underestimate.
UPB_DISALLOW_POD_OPS(DecoderMethod, upb::pb::DecoderMethod)
};
#endif
/* Preallocation hint: decoder won't allocate more bytes than this when first
* constructed. This hint may be an overestimate for some build configurations.
* But if the decoder library is upgraded without recompiling the application,
* it may be an underestimate. */
#define UPB_PB_DECODER_SIZE 4400
#ifdef __cplusplus
// A Decoder receives binary protobuf data on its input sink and pushes the
// decoded data to its output sink.
/* A Decoder receives binary protobuf data on its input sink and pushes the
* decoded data to its output sink. */
class upb::pb::Decoder {
public:
// Constructs a decoder instance for the given method, which must outlive this
// decoder. Any errors during parsing will be set on the given status, which
// must also outlive this decoder.
//
// The sink must match the given method.
/* Constructs a decoder instance for the given method, which must outlive this
* decoder. Any errors during parsing will be set on the given status, which
* must also outlive this decoder.
*
* The sink must match the given method. */
static Decoder* Create(Environment* env, const DecoderMethod* method,
Sink* output);
// Returns the DecoderMethod this decoder is parsing from.
/* Returns the DecoderMethod this decoder is parsing from. */
const DecoderMethod* method() const;
// The sink on which this decoder receives input.
/* The sink on which this decoder receives input. */
BytesSink* input();
// Returns number of bytes successfully parsed.
//
// This can be useful for determining the stream position where an error
// occurred.
//
// This value may not be up-to-date when called from inside a parsing
// callback.
/* Returns number of bytes successfully parsed.
*
* This can be useful for determining the stream position where an error
* occurred.
*
* This value may not be up-to-date when called from inside a parsing
* callback. */
uint64_t BytesParsed() const;
// Gets/sets the parsing nexting limit. If the total number of nested
// submessages and repeated fields hits this limit, parsing will fail. This
// is a resource limit that controls the amount of memory used by the parsing
// stack.
//
// Setting the limit will fail if the parser is currently suspended at a depth
// greater than this, or if memory allocation of the stack fails.
/* Gets/sets the parsing nexting limit. If the total number of nested
* submessages and repeated fields hits this limit, parsing will fail. This
* is a resource limit that controls the amount of memory used by the parsing
* stack.
*
* Setting the limit will fail if the parser is currently suspended at a depth
* greater than this, or if memory allocation of the stack fails. */
size_t max_nesting() const;
bool set_max_nesting(size_t max);
@ -164,57 +141,62 @@ class upb::pb::Decoder {
static const size_t kSize = UPB_PB_DECODER_SIZE;
private:
UPB_DISALLOW_POD_OPS(Decoder, upb::pb::Decoder);
UPB_DISALLOW_POD_OPS(Decoder, upb::pb::Decoder)
};
#endif // __cplusplus
#endif /* __cplusplus */
// A class for caching protobuf processing code, whether bytecode for the
// interpreted decoder or machine code for the JIT.
//
// This class is not thread-safe.
UPB_DEFINE_CLASS0(upb::pb::CodeCache,
#ifdef __cplusplus
/* A class for caching protobuf processing code, whether bytecode for the
* interpreted decoder or machine code for the JIT.
*
* This class is not thread-safe.
*
* TODO(haberman): move this to be heap allocated for ABI stability. */
class upb::pb::CodeCache {
public:
CodeCache();
~CodeCache();
// Whether the cache is allowed to generate machine code. Defaults to true.
// There is no real reason to turn it off except for testing or if you are
// having a specific problem with the JIT.
//
// Note that allow_jit = true does not *guarantee* that the code will be JIT
// compiled. If this platform is not supported or the JIT was not compiled
// in, the code may still be interpreted.
/* Whether the cache is allowed to generate machine code. Defaults to true.
* There is no real reason to turn it off except for testing or if you are
* having a specific problem with the JIT.
*
* Note that allow_jit = true does not *guarantee* that the code will be JIT
* compiled. If this platform is not supported or the JIT was not compiled
* in, the code may still be interpreted. */
bool allow_jit() const;
// This may only be called when the object is first constructed, and prior to
// any code generation, otherwise returns false and does nothing.
/* This may only be called when the object is first constructed, and prior to
* any code generation, otherwise returns false and does nothing. */
bool set_allow_jit(bool allow);
// Returns a DecoderMethod that can push data to the given handlers.
// If a suitable method already exists, it will be returned from the cache.
//
// Specifying the destination handlers here allows the DecoderMethod to be
// statically bound to the destination handlers if possible, which can allow
// more efficient decoding. However the returned method may or may not
// actually be statically bound. But in all cases, the returned method can
// push data to the given handlers.
/* Returns a DecoderMethod that can push data to the given handlers.
* If a suitable method already exists, it will be returned from the cache.
*
* Specifying the destination handlers here allows the DecoderMethod to be
* statically bound to the destination handlers if possible, which can allow
* more efficient decoding. However the returned method may or may not
* actually be statically bound. But in all cases, the returned method can
* push data to the given handlers. */
const DecoderMethod *GetDecoderMethod(const DecoderMethodOptions& opts);
// If/when someone needs to explicitly create a dynamically-bound
// DecoderMethod*, we can add a method to get it here.
/* If/when someone needs to explicitly create a dynamically-bound
* DecoderMethod*, we can add a method to get it here. */
private:
UPB_DISALLOW_COPY_AND_ASSIGN(CodeCache);
,
UPB_DEFINE_STRUCT0(upb_pbcodecache,
UPB_DISALLOW_COPY_AND_ASSIGN(CodeCache)
#else
struct upb_pbcodecache {
#endif
bool allow_jit_;
// Array of mgroups.
/* Array of mgroups. */
upb_inttable groups;
));
};
UPB_BEGIN_EXTERN_C // {
UPB_BEGIN_EXTERN_C
upb_pbdecoder *upb_pbdecoder_create(upb_env *e,
const upb_pbdecodermethod *method,
@ -230,12 +212,10 @@ void upb_pbdecodermethodopts_init(upb_pbdecodermethodopts *opts,
const upb_handlers *h);
void upb_pbdecodermethodopts_setlazy(upb_pbdecodermethodopts *opts, bool lazy);
void upb_pbdecodermethod_ref(const upb_pbdecodermethod *m, const void *owner);
void upb_pbdecodermethod_unref(const upb_pbdecodermethod *m, const void *owner);
void upb_pbdecodermethod_donateref(const upb_pbdecodermethod *m,
const void *from, const void *to);
void upb_pbdecodermethod_checkref(const upb_pbdecodermethod *m,
const void *owner);
/* Include refcounted methods like upb_pbdecodermethod_ref(). */
UPB_REFCOUNTED_CMETHODS(upb_pbdecodermethod, upb_pbdecodermethod_upcast)
const upb_handlers *upb_pbdecodermethod_desthandlers(
const upb_pbdecodermethod *m);
const upb_byteshandler *upb_pbdecodermethod_inputhandler(
@ -251,7 +231,7 @@ bool upb_pbcodecache_setallowjit(upb_pbcodecache *c, bool allow);
const upb_pbdecodermethod *upb_pbcodecache_getdecodermethod(
upb_pbcodecache *c, const upb_pbdecodermethodopts *opts);
UPB_END_EXTERN_C // }
UPB_END_EXTERN_C
#ifdef __cplusplus
@ -259,7 +239,7 @@ namespace upb {
namespace pb {
// static
/* static */
inline Decoder* Decoder::Create(Environment* env, const DecoderMethod* m,
Sink* sink) {
return upb_pbdecoder_create(env, m, sink);
@ -288,18 +268,6 @@ inline void DecoderMethodOptions::set_lazy(bool lazy) {
upb_pbdecodermethodopts_setlazy(this, lazy);
}
inline void DecoderMethod::Ref(const void *owner) const {
upb_pbdecodermethod_ref(this, owner);
}
inline void DecoderMethod::Unref(const void *owner) const {
upb_pbdecodermethod_unref(this, owner);
}
inline void DecoderMethod::DonateRef(const void *from, const void *to) const {
upb_pbdecodermethod_donateref(this, from, to);
}
inline void DecoderMethod::CheckRef(const void *owner) const {
upb_pbdecodermethod_checkref(this, owner);
}
inline const Handlers* DecoderMethod::dest_handlers() const {
return upb_pbdecodermethod_desthandlers(this);
}
@ -309,7 +277,7 @@ inline const BytesHandler* DecoderMethod::input_handler() const {
inline bool DecoderMethod::is_native() const {
return upb_pbdecodermethod_isnative(this);
}
// static
/* static */
inline reffed_ptr<const DecoderMethod> DecoderMethod::New(
const DecoderMethodOptions &opts) {
const upb_pbdecodermethod *m = upb_pbdecodermethod_new(&opts, &m);
@ -333,9 +301,9 @@ inline const DecoderMethod *CodeCache::GetDecoderMethod(
return upb_pbcodecache_getdecodermethod(this, &opts);
}
} // namespace pb
} // namespace upb
} /* namespace pb */
} /* namespace upb */
#endif // __cplusplus
#endif /* __cplusplus */
#endif /* UPB_DECODER_H_ */

@ -15,28 +15,40 @@
#include "upb/handlers.h"
#include "upb/pb/decoder.h"
#include "upb/sink.h"
#include "upb/structdefs.int.h"
#include "upb/table.int.h"
// Opcode definitions. The canonical meaning of each opcode is its
// implementation in the interpreter (the JIT is written to match this).
//
// All instructions have the opcode in the low byte.
// Instruction format for most instructions is:
//
// +-------------------+--------+
// | arg (24) | op (8) |
// +-------------------+--------+
//
// Exceptions are indicated below. A few opcodes are multi-word.
/* C++ names are not actually used since this type isn't exposed to users. */
#ifdef __cplusplus
namespace upb {
namespace pb {
class MessageGroup;
} /* namespace pb */
} /* namespace upb */
#endif
UPB_DECLARE_DERIVED_TYPE(upb::pb::MessageGroup, upb::RefCounted,
mgroup, upb_refcounted)
/* Opcode definitions. The canonical meaning of each opcode is its
* implementation in the interpreter (the JIT is written to match this).
*
* All instructions have the opcode in the low byte.
* Instruction format for most instructions is:
*
* +-------------------+--------+
* | arg (24) | op (8) |
* +-------------------+--------+
*
* Exceptions are indicated below. A few opcodes are multi-word. */
typedef enum {
// Opcodes 1-8, 13, 15-18 parse their respective descriptor types.
// Arg for all of these is the upb selector for this field.
/* Opcodes 1-8, 13, 15-18 parse their respective descriptor types.
* Arg for all of these is the upb selector for this field. */
#define T(type) OP_PARSE_ ## type = UPB_DESCRIPTOR_TYPE_ ## type
T(DOUBLE), T(FLOAT), T(INT64), T(UINT64), T(INT32), T(FIXED64), T(FIXED32),
T(BOOL), T(UINT32), T(SFIXED32), T(SFIXED64), T(SINT32), T(SINT64),
#undef T
OP_STARTMSG = 9, // No arg.
OP_ENDMSG = 10, // No arg.
OP_STARTMSG = 9, /* No arg. */
OP_ENDMSG = 10, /* No arg. */
OP_STARTSEQ = 11,
OP_ENDSEQ = 12,
OP_STARTSUBMSG = 14,
@ -45,148 +57,185 @@ typedef enum {
OP_STRING = 21,
OP_ENDSTR = 22,
OP_PUSHTAGDELIM = 23, // No arg.
OP_PUSHLENDELIM = 24, // No arg.
OP_POP = 25, // No arg.
OP_SETDELIM = 26, // No arg.
OP_SETBIGGROUPNUM = 27, // two words: | unused (24) | opc || groupnum (32) |
OP_PUSHTAGDELIM = 23, /* No arg. */
OP_PUSHLENDELIM = 24, /* No arg. */
OP_POP = 25, /* No arg. */
OP_SETDELIM = 26, /* No arg. */
OP_SETBIGGROUPNUM = 27, /* two words:
* | unused (24) | opc (8) |
* | groupnum (32) | */
OP_CHECKDELIM = 28,
OP_CALL = 29,
OP_RET = 30,
OP_BRANCH = 31,
// Different opcodes depending on how many bytes expected.
OP_TAG1 = 32, // | expected tag (16) | jump target (8) | opc (8) |
OP_TAG2 = 33, // | expected tag (16) | jump target (8) | opc (8) |
OP_TAGN = 34, // three words:
// | unused (16) | jump target(8) | opc (8) |
// | expected tag 1 (32) |
// | expected tag 2 (32) |
/* Different opcodes depending on how many bytes expected. */
OP_TAG1 = 32, /* | match tag (16) | jump target (8) | opc (8) | */
OP_TAG2 = 33, /* | match tag (16) | jump target (8) | opc (8) | */
OP_TAGN = 34, /* three words: */
/* | unused (16) | jump target(8) | opc (8) | */
/* | match tag 1 (32) | */
/* | match tag 2 (32) | */
OP_SETDISPATCH = 35, // N words:
// | unused (24) | opc |
// | upb_inttable* (32 or 64) |
OP_SETDISPATCH = 35, /* N words: */
/* | unused (24) | opc | */
/* | upb_inttable* (32 or 64) | */
OP_DISPATCH = 36, // No arg.
OP_DISPATCH = 36, /* No arg. */
OP_HALT = 37, // No arg.
OP_HALT = 37 /* No arg. */
} opcode;
#define OP_MAX OP_HALT
UPB_INLINE opcode getop(uint32_t instr) { return instr & 0xff; }
// Method group; represents a set of decoder methods that had their code
// emitted together, and must therefore be freed together. Immutable once
// created. It is possible we may want to expose this to users at some point.
//
// Overall ownership of Decoder objects looks like this:
//
// +----------+
// | | <---> DecoderMethod
// | method |
// CodeCache ---> | group | <---> DecoderMethod
// | |
// | (mgroup) | <---> DecoderMethod
// +----------+
typedef struct {
/* Method group; represents a set of decoder methods that had their code
* emitted together, and must therefore be freed together. Immutable once
* created. It is possible we may want to expose this to users at some point.
*
* Overall ownership of Decoder objects looks like this:
*
* +----------+
* | | <---> DecoderMethod
* | method |
* CodeCache ---> | group | <---> DecoderMethod
* | |
* | (mgroup) | <---> DecoderMethod
* +----------+
*/
struct mgroup {
upb_refcounted base;
// Maps upb_msgdef/upb_handlers -> upb_pbdecodermethod. We own refs on the
// methods.
/* Maps upb_msgdef/upb_handlers -> upb_pbdecodermethod. We own refs on the
* methods. */
upb_inttable methods;
// When we add the ability to link to previously existing mgroups, we'll
// need an array of mgroups we reference here, and own refs on them.
/* When we add the ability to link to previously existing mgroups, we'll
* need an array of mgroups we reference here, and own refs on them. */
// The bytecode for our methods, if any exists. Owned by us.
/* The bytecode for our methods, if any exists. Owned by us. */
uint32_t *bytecode;
uint32_t *bytecode_end;
#ifdef UPB_USE_JIT_X64
// JIT-generated machine code, if any.
/* JIT-generated machine code, if any. */
upb_string_handlerfunc *jit_code;
// The size of the jit_code (required to munmap()).
/* The size of the jit_code (required to munmap()). */
size_t jit_size;
char *debug_info;
void *dl;
#endif
} mgroup;
// The maximum that any submessages can be nested. Matches proto2's limit.
// This specifies the size of the decoder's statically-sized array and therefore
// setting it high will cause the upb::pb::Decoder object to be larger.
//
// If necessary we can add a runtime-settable property to Decoder that allow
// this to be larger than the compile-time setting, but this would add
// complexity, particularly since we would have to decide how/if to give users
// the ability to set a custom memory allocation function.
};
/* The maximum that any submessages can be nested. Matches proto2's limit.
* This specifies the size of the decoder's statically-sized array and therefore
* setting it high will cause the upb::pb::Decoder object to be larger.
*
* If necessary we can add a runtime-settable property to Decoder that allow
* this to be larger than the compile-time setting, but this would add
* complexity, particularly since we would have to decide how/if to give users
* the ability to set a custom memory allocation function. */
#define UPB_DECODER_MAX_NESTING 64
// Internal-only struct used by the decoder.
/* Internal-only struct used by the decoder. */
typedef struct {
// Space optimization note: we store two pointers here that the JIT
// doesn't need at all; the upb_handlers* inside the sink and
// the dispatch table pointer. We can optimze so that the JIT uses
// smaller stack frames than the interpreter. The only thing we need
// to guarantee is that the fallback routines can find end_ofs.
/* Space optimization note: we store two pointers here that the JIT
* doesn't need at all; the upb_handlers* inside the sink and
* the dispatch table pointer. We can optimze so that the JIT uses
* smaller stack frames than the interpreter. The only thing we need
* to guarantee is that the fallback routines can find end_ofs. */
upb_sink sink;
// The absolute stream offset of the end-of-frame delimiter.
// Non-delimited frames (groups and non-packed repeated fields) reuse the
// delimiter of their parent, even though the frame may not end there.
//
// NOTE: the JIT stores a slightly different value here for non-top frames.
// It stores the value relative to the end of the enclosed message. But the
// top frame is still stored the same way, which is important for ensuring
// that calls from the JIT into C work correctly.
/* The absolute stream offset of the end-of-frame delimiter.
* Non-delimited frames (groups and non-packed repeated fields) reuse the
* delimiter of their parent, even though the frame may not end there.
*
* NOTE: the JIT stores a slightly different value here for non-top frames.
* It stores the value relative to the end of the enclosed message. But the
* top frame is still stored the same way, which is important for ensuring
* that calls from the JIT into C work correctly. */
uint64_t end_ofs;
const uint32_t *base;
// 0 indicates a length-delimited field.
// A positive number indicates a known group.
// A negative number indicates an unknown group.
/* 0 indicates a length-delimited field.
* A positive number indicates a known group.
* A negative number indicates an unknown group. */
int32_t groupnum;
upb_inttable *dispatch; // Not used by the JIT.
upb_inttable *dispatch; /* Not used by the JIT. */
} upb_pbdecoder_frame;
struct upb_pbdecodermethod {
upb_refcounted base;
/* While compiling, the base is relative in "ofs", after compiling it is
* absolute in "ptr". */
union {
uint32_t ofs; /* PC offset of method. */
void *ptr; /* Pointer to bytecode or machine code for this method. */
} code_base;
/* The decoder method group to which this method belongs. We own a ref.
* Owning a ref on the entire group is more coarse-grained than is strictly
* necessary; all we truly require is that methods we directly reference
* outlive us, while the group could contain many other messages we don't
* require. But the group represents the messages that were
* allocated+compiled together, so it makes the most sense to free them
* together also. */
const upb_refcounted *group;
/* Whether this method is native code or bytecode. */
bool is_native_;
/* The handler one calls to invoke this method. */
upb_byteshandler input_handler_;
/* The destination handlers this method is bound to. We own a ref. */
const upb_handlers *dest_handlers_;
/* Dispatch table -- used by both bytecode decoder and JIT when encountering a
* field number that wasn't the one we were expecting to see. See
* decoder.int.h for the layout of this table. */
upb_inttable dispatch;
};
struct upb_pbdecoder {
upb_env *env;
// Our input sink.
/* Our input sink. */
upb_bytessink input_;
// The decoder method we are parsing with (owned).
/* The decoder method we are parsing with (owned). */
const upb_pbdecodermethod *method_;
size_t call_len;
const uint32_t *pc, *last;
// Current input buffer and its stream offset.
/* Current input buffer and its stream offset. */
const char *buf, *ptr, *end, *checkpoint;
// End of the delimited region, relative to ptr, or NULL if not in this buf.
/* End of the delimited region, relative to ptr, NULL if not in this buf. */
const char *delim_end;
// End of the delimited region, relative to ptr, or end if not in this buf.
/* End of the delimited region, relative to ptr, end if not in this buf. */
const char *data_end;
// Overall stream offset of "buf."
/* Overall stream offset of "buf." */
uint64_t bufstart_ofs;
// Buffer for residual bytes not parsed from the previous buffer.
// The maximum number of residual bytes we require is 12; a five-byte
// unknown tag plus an eight-byte value, less one because the value
// is only a partial value.
/* Buffer for residual bytes not parsed from the previous buffer.
* The maximum number of residual bytes we require is 12; a five-byte
* unknown tag plus an eight-byte value, less one because the value
* is only a partial value. */
char residual[12];
char *residual_end;
// Stores the user buffer passed to our decode function.
/* Stores the user buffer passed to our decode function. */
const char *buf_param;
size_t size_param;
const upb_bufhandle *handle;
// Our internal stack.
/* Our internal stack. */
upb_pbdecoder_frame *stack, *top, *limit;
const uint32_t **callstack;
size_t stack_size;
@ -194,22 +243,22 @@ struct upb_pbdecoder {
upb_status *status;
#ifdef UPB_USE_JIT_X64
// Used momentarily by the generated code to store a value while a user
// function is called.
/* Used momentarily by the generated code to store a value while a user
* function is called. */
uint32_t tmp_len;
const void *saved_rsp;
#endif
};
// Decoder entry points; used as handlers.
/* Decoder entry points; used as handlers. */
void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint);
void *upb_pbdecoder_startjit(void *closure, const void *hd, size_t size_hint);
size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
size_t size, const upb_bufhandle *handle);
bool upb_pbdecoder_end(void *closure, const void *handler_data);
// Decoder-internal functions that the JIT calls to handle fallback paths.
/* Decoder-internal functions that the JIT calls to handle fallback paths. */
int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf,
size_t size, const upb_bufhandle *handle);
size_t upb_pbdecoder_suspend(upb_pbdecoder *d);
@ -221,41 +270,42 @@ int32_t upb_pbdecoder_decode_f32(upb_pbdecoder *d, uint32_t *u32);
int32_t upb_pbdecoder_decode_f64(upb_pbdecoder *d, uint64_t *u64);
void upb_pbdecoder_seterr(upb_pbdecoder *d, const char *msg);
// Error messages that are shared between the bytecode and JIT decoders.
/* Error messages that are shared between the bytecode and JIT decoders. */
extern const char *kPbDecoderStackOverflow;
// Access to decoderplan members needed by the decoder.
/* Access to decoderplan members needed by the decoder. */
const char *upb_pbdecoder_getopname(unsigned int op);
// JIT codegen entry point.
/* JIT codegen entry point. */
void upb_pbdecoder_jit(mgroup *group);
void upb_pbdecoder_freejit(mgroup *group);
UPB_REFCOUNTED_CMETHODS(mgroup, mgroup_upcast)
// A special label that means "do field dispatch for this message and branch to
// wherever that takes you."
/* A special label that means "do field dispatch for this message and branch to
* wherever that takes you." */
#define LABEL_DISPATCH 0
// A special slot in the dispatch table that stores the epilogue (ENDMSG and/or
// RET) for branching to when we find an appropriate ENDGROUP tag.
/* A special slot in the dispatch table that stores the epilogue (ENDMSG and/or
* RET) for branching to when we find an appropriate ENDGROUP tag. */
#define DISPATCH_ENDMSG 0
// It's important to use this invalid wire type instead of 0 (which is a valid
// wire type).
/* It's important to use this invalid wire type instead of 0 (which is a valid
* wire type). */
#define NO_WIRE_TYPE 0xff
// The dispatch table layout is:
// [field number] -> [ 48-bit offset ][ 8-bit wt2 ][ 8-bit wt1 ]
//
// If wt1 matches, jump to the 48-bit offset. If wt2 matches, lookup
// (UPB_MAX_FIELDNUMBER + fieldnum) and jump there.
//
// We need two wire types because of packed/non-packed compatibility. A
// primitive repeated field can use either wire type and be valid. While we
// could key the table on fieldnum+wiretype, the table would be 8x sparser.
//
// Storing two wire types in the primary value allows us to quickly rule out
// the second wire type without needing to do a separate lookup (this case is
// less common than an unknown field).
/* The dispatch table layout is:
* [field number] -> [ 48-bit offset ][ 8-bit wt2 ][ 8-bit wt1 ]
*
* If wt1 matches, jump to the 48-bit offset. If wt2 matches, lookup
* (UPB_MAX_FIELDNUMBER + fieldnum) and jump there.
*
* We need two wire types because of packed/non-packed compatibility. A
* primitive repeated field can use either wire type and be valid. While we
* could key the table on fieldnum+wiretype, the table would be 8x sparser.
*
* Storing two wire types in the primary value allows us to quickly rule out
* the second wire type without needing to do a separate lookup (this case is
* less common than an unknown field). */
UPB_INLINE uint64_t upb_pbdecoder_packdispatch(uint64_t ofs, uint8_t wt1,
uint8_t wt2) {
return (ofs << 16) | (wt2 << 8) | wt1;
@ -268,17 +318,17 @@ UPB_INLINE void upb_pbdecoder_unpackdispatch(uint64_t dispatch, uint64_t *ofs,
*ofs = dispatch >> 16;
}
// All of the functions in decoder.c that return int32_t return values according
// to the following scheme:
// 1. negative values indicate a return code from the following list.
// 2. positive values indicate that error or end of buffer was hit, and
// that the decode function should immediately return the given value
// (the decoder state has already been suspended and is ready to be
// resumed).
/* All of the functions in decoder.c that return int32_t return values according
* to the following scheme:
* 1. negative values indicate a return code from the following list.
* 2. positive values indicate that error or end of buffer was hit, and
* that the decode function should immediately return the given value
* (the decoder state has already been suspended and is ready to be
* resumed). */
#define DECODE_OK -1
#define DECODE_MISMATCH -2 // Used only from checktag_slow().
#define DECODE_ENDGROUP -3 // Used only from checkunknown().
#define DECODE_MISMATCH -2 /* Used only from checktag_slow(). */
#define DECODE_ENDGROUP -3 /* Used only from checkunknown(). */
#define CHECK_RETURN(x) { int32_t ret = x; if (ret >= 0) return ret; }
#endif // UPB_DECODER_INT_H_
#endif /* UPB_DECODER_INT_H_ */

@ -62,73 +62,74 @@
#include <stdlib.h>
// The output buffer is divided into segments; a segment is a string of data
// that is "ready to go" -- it does not need any varint lengths inserted into
// the middle. The seams between segments are where varints will be inserted
// once they are known.
//
// We also use the concept of a "run", which is a range of encoded bytes that
// occur at a single submessage level. Every segment contains one or more runs.
//
// A segment can span messages. Consider:
//
// .--Submessage lengths---------.
// | | |
// | V V
// V | |--------------- | |-----------------
// Submessages: | |-----------------------------------------------
// Top-level msg: ------------------------------------------------------------
//
// Segments: ----- ------------------- -----------------
// Runs: *---- *--------------*--- *----------------
// (* marks the start)
//
// Note that the top-level menssage is not in any segment because it does not
// have any length preceding it.
//
// A segment is only interrupted when another length needs to be inserted. So
// observe how the second segment spans both the inner submessage and part of
// the next enclosing message.
/* The output buffer is divided into segments; a segment is a string of data
* that is "ready to go" -- it does not need any varint lengths inserted into
* the middle. The seams between segments are where varints will be inserted
* once they are known.
*
* We also use the concept of a "run", which is a range of encoded bytes that
* occur at a single submessage level. Every segment contains one or more runs.
*
* A segment can span messages. Consider:
*
* .--Submessage lengths---------.
* | | |
* | V V
* V | |--------------- | |-----------------
* Submessages: | |-----------------------------------------------
* Top-level msg: ------------------------------------------------------------
*
* Segments: ----- ------------------- -----------------
* Runs: *---- *--------------*--- *----------------
* (* marks the start)
*
* Note that the top-level menssage is not in any segment because it does not
* have any length preceding it.
*
* A segment is only interrupted when another length needs to be inserted. So
* observe how the second segment spans both the inner submessage and part of
* the next enclosing message. */
typedef struct {
uint32_t msglen; // The length to varint-encode before this segment.
uint32_t seglen; // Length of the segment.
uint32_t msglen; /* The length to varint-encode before this segment. */
uint32_t seglen; /* Length of the segment. */
} upb_pb_encoder_segment;
struct upb_pb_encoder {
upb_env *env;
// Our input and output.
/* Our input and output. */
upb_sink input_;
upb_bytessink *output_;
// The "subclosure" -- used as the inner closure as part of the bytessink
// protocol.
/* The "subclosure" -- used as the inner closure as part of the bytessink
* protocol. */
void *subc;
// The output buffer and limit, and our current write position. "buf"
// initially points to "initbuf", but is dynamically allocated if we need to
// grow beyond the initial size.
/* The output buffer and limit, and our current write position. "buf"
* initially points to "initbuf", but is dynamically allocated if we need to
* grow beyond the initial size. */
char *buf, *ptr, *limit;
// The beginning of the current run, or undefined if we are at the top level.
/* The beginning of the current run, or undefined if we are at the top
* level. */
char *runbegin;
// The list of segments we are accumulating.
/* The list of segments we are accumulating. */
upb_pb_encoder_segment *segbuf, *segptr, *seglimit;
// The stack of enclosing submessages. Each entry in the stack points to the
// segment where this submessage's length is being accumulated.
/* The stack of enclosing submessages. Each entry in the stack points to the
* segment where this submessage's length is being accumulated. */
int *stack, *top, *stacklimit;
// Depth of startmsg/endmsg calls.
/* Depth of startmsg/endmsg calls. */
int depth;
};
/* low-level buffering ********************************************************/
// Low-level functions for interacting with the output buffer.
/* Low-level functions for interacting with the output buffer. */
// TODO(haberman): handle pushback
/* TODO(haberman): handle pushback */
static void putbuf(upb_pb_encoder *e, const char *buf, size_t len) {
size_t n = upb_bytessink_putbuf(e->output_, e->subc, buf, len, NULL);
UPB_ASSERT_VAR(n, n == len);
@ -138,11 +139,12 @@ static upb_pb_encoder_segment *top(upb_pb_encoder *e) {
return &e->segbuf[*e->top];
}
// Call to ensure that at least "bytes" bytes are available for writing at
// e->ptr. Returns false if the bytes could not be allocated.
/* Call to ensure that at least "bytes" bytes are available for writing at
* e->ptr. Returns false if the bytes could not be allocated. */
static bool reserve(upb_pb_encoder *e, size_t bytes) {
if ((size_t)(e->limit - e->ptr) < bytes) {
// Grow buffer.
/* Grow buffer. */
char *new_buf;
size_t needed = bytes + (e->ptr - e->buf);
size_t old_size = e->limit - e->buf;
@ -152,7 +154,7 @@ static bool reserve(upb_pb_encoder *e, size_t bytes) {
new_size *= 2;
}
char *new_buf = upb_env_realloc(e->env, e->buf, old_size, new_size);
new_buf = upb_env_realloc(e->env, e->buf, old_size, new_size);
if (new_buf == NULL) {
return false;
@ -167,22 +169,22 @@ static bool reserve(upb_pb_encoder *e, size_t bytes) {
return true;
}
// Call when "bytes" bytes have been writte at e->ptr. The caller *must* have
// previously called reserve() with at least this many bytes.
/* Call when "bytes" bytes have been writte at e->ptr. The caller *must* have
* previously called reserve() with at least this many bytes. */
static void encoder_advance(upb_pb_encoder *e, size_t bytes) {
assert((size_t)(e->limit - e->ptr) >= bytes);
e->ptr += bytes;
}
// Call when all of the bytes for a handler have been written. Flushes the
// bytes if possible and necessary, returning false if this failed.
/* Call when all of the bytes for a handler have been written. Flushes the
* bytes if possible and necessary, returning false if this failed. */
static bool commit(upb_pb_encoder *e) {
if (!e->top) {
// We aren't inside a delimited region. Flush our accumulated bytes to
// the output.
//
// TODO(haberman): in the future we may want to delay flushing for
// efficiency reasons.
/* We aren't inside a delimited region. Flush our accumulated bytes to
* the output.
*
* TODO(haberman): in the future we may want to delay flushing for
* efficiency reasons. */
putbuf(e, e->buf, e->ptr - e->buf);
e->ptr = e->buf;
}
@ -190,7 +192,7 @@ static bool commit(upb_pb_encoder *e) {
return true;
}
// Writes the given bytes to the buffer, handling reserve/advance.
/* Writes the given bytes to the buffer, handling reserve/advance. */
static bool encode_bytes(upb_pb_encoder *e, const void *data, size_t len) {
if (!reserve(e, len)) {
return false;
@ -201,32 +203,33 @@ static bool encode_bytes(upb_pb_encoder *e, const void *data, size_t len) {
return true;
}
// Finish the current run by adding the run totals to the segment and message
// length.
/* Finish the current run by adding the run totals to the segment and message
* length. */
static void accumulate(upb_pb_encoder *e) {
size_t run_len;
assert(e->ptr >= e->runbegin);
size_t run_len = e->ptr - e->runbegin;
run_len = e->ptr - e->runbegin;
e->segptr->seglen += run_len;
top(e)->msglen += run_len;
e->runbegin = e->ptr;
}
// Call to indicate the start of delimited region for which the full length is
// not yet known. All data will be buffered until the length is known.
// Delimited regions may be nested; their lengths will all be tracked properly.
/* Call to indicate the start of delimited region for which the full length is
* not yet known. All data will be buffered until the length is known.
* Delimited regions may be nested; their lengths will all be tracked properly. */
static bool start_delim(upb_pb_encoder *e) {
if (e->top) {
// We are already buffering, advance to the next segment and push it on the
// stack.
/* We are already buffering, advance to the next segment and push it on the
* stack. */
accumulate(e);
if (++e->top == e->stacklimit) {
// TODO(haberman): grow stack?
/* TODO(haberman): grow stack? */
return false;
}
if (++e->segptr == e->seglimit) {
// Grow segment buffer.
/* Grow segment buffer. */
size_t old_size =
(e->seglimit - e->segbuf) * sizeof(upb_pb_encoder_segment);
size_t new_size = old_size * 2;
@ -242,7 +245,7 @@ static bool start_delim(upb_pb_encoder *e) {
e->segbuf = new_buf;
}
} else {
// We were previously at the top level, start buffering.
/* We were previously at the top level, start buffering. */
e->segptr = e->segbuf;
e->top = e->stack;
e->runbegin = e->ptr;
@ -255,15 +258,16 @@ static bool start_delim(upb_pb_encoder *e) {
return true;
}
// Call to indicate the end of a delimited region. We now know the length of
// the delimited region. If we are not nested inside any other delimited
// regions, we can now emit all of the buffered data we accumulated.
/* Call to indicate the end of a delimited region. We now know the length of
* the delimited region. If we are not nested inside any other delimited
* regions, we can now emit all of the buffered data we accumulated. */
static bool end_delim(upb_pb_encoder *e) {
size_t msglen;
accumulate(e);
size_t msglen = top(e)->msglen;
msglen = top(e)->msglen;
if (e->top == e->stack) {
// All lengths are now available, emit all buffered data.
/* All lengths are now available, emit all buffered data. */
char buf[UPB_PB_VARINT_MAX_LEN];
upb_pb_encoder_segment *s;
const char *ptr = e->buf;
@ -277,7 +281,8 @@ static bool end_delim(upb_pb_encoder *e) {
e->ptr = e->buf;
e->top = NULL;
} else {
// Need to keep buffering; propagate length info into enclosing submessages.
/* Need to keep buffering; propagate length info into enclosing
* submessages. */
--e->top;
top(e)->msglen += msglen + upb_varint_size(msglen);
}
@ -288,14 +293,14 @@ static bool end_delim(upb_pb_encoder *e) {
/* tag_t **********************************************************************/
// A precomputed (pre-encoded) tag and length.
/* A precomputed (pre-encoded) tag and length. */
typedef struct {
uint8_t bytes;
char tag[7];
} tag_t;
// Allocates a new tag for this field, and sets it in these handlerattr.
/* Allocates a new tag for this field, and sets it in these handlerattr. */
static void new_tag(upb_handlers *h, const upb_fielddef *f, upb_wiretype_t wt,
upb_handlerattr *attr) {
uint32_t n = upb_fielddef_number(f);
@ -316,12 +321,12 @@ static bool encode_tag(upb_pb_encoder *e, const tag_t *tag) {
/* encoding of wire types *****************************************************/
static bool encode_fixed64(upb_pb_encoder *e, uint64_t val) {
// TODO(haberman): byte-swap for big endian.
/* TODO(haberman): byte-swap for big endian. */
return encode_bytes(e, &val, sizeof(uint64_t));
}
static bool encode_fixed32(upb_pb_encoder *e, uint32_t val) {
// TODO(haberman): byte-swap for big endian.
/* TODO(haberman): byte-swap for big endian. */
return encode_bytes(e, &val, sizeof(uint32_t));
}
@ -408,19 +413,19 @@ static size_t encode_strbuf(void *c, const void *hd, const char *buf,
}
T(double, double, dbl2uint64, encode_fixed64)
T(float, float, flt2uint32, encode_fixed32);
T(int64, int64_t, uint64_t, encode_varint);
T(int32, int32_t, uint32_t, encode_varint);
T(fixed64, uint64_t, uint64_t, encode_fixed64);
T(fixed32, uint32_t, uint32_t, encode_fixed32);
T(bool, bool, bool, encode_varint);
T(uint32, uint32_t, uint32_t, encode_varint);
T(uint64, uint64_t, uint64_t, encode_varint);
T(enum, int32_t, uint32_t, encode_varint);
T(sfixed32, int32_t, uint32_t, encode_fixed32);
T(sfixed64, int64_t, uint64_t, encode_fixed64);
T(sint32, int32_t, upb_zzenc_32, encode_varint);
T(sint64, int64_t, upb_zzenc_64, encode_varint);
T(float, float, flt2uint32, encode_fixed32)
T(int64, int64_t, uint64_t, encode_varint)
T(int32, int32_t, uint32_t, encode_varint)
T(fixed64, uint64_t, uint64_t, encode_fixed64)
T(fixed32, uint32_t, uint32_t, encode_fixed32)
T(bool, bool, bool, encode_varint)
T(uint32, uint32_t, uint32_t, encode_varint)
T(uint64, uint64_t, uint64_t, encode_varint)
T(enum, int32_t, uint32_t, encode_varint)
T(sfixed32, int32_t, uint32_t, encode_fixed32)
T(sfixed64, int64_t, uint64_t, encode_fixed64)
T(sint32, int32_t, upb_zzenc_32, encode_varint)
T(sint64, int64_t, upb_zzenc_64, encode_varint)
#undef T
@ -428,13 +433,15 @@ T(sint64, int64_t, upb_zzenc_64, encode_varint);
/* code to build the handlers *************************************************/
static void newhandlers_callback(const void *closure, upb_handlers *h) {
const upb_msgdef *m;
upb_msg_field_iter i;
UPB_UNUSED(closure);
upb_handlers_setstartmsg(h, startmsg, NULL);
upb_handlers_setendmsg(h, endmsg, NULL);
const upb_msgdef *m = upb_handlers_msgdef(h);
upb_msg_field_iter i;
m = upb_handlers_msgdef(h);
for(upb_msg_field_begin(&i, m);
!upb_msg_field_done(&i);
upb_msg_field_next(&i)) {
@ -446,7 +453,7 @@ static void newhandlers_callback(const void *closure, upb_handlers *h) {
packed ? UPB_WIRE_TYPE_DELIMITED
: upb_pb_native_wire_types[upb_fielddef_descriptortype(f)];
// Pre-encode the tag for this field.
/* Pre-encode the tag for this field. */
new_tag(h, f, wt, &attr);
if (packed) {
@ -489,7 +496,7 @@ static void newhandlers_callback(const void *closure, upb_handlers *h) {
upb_handlers_setendsubmsg(h, f, encode_enddelimfield, &attr);
break;
case UPB_DESCRIPTOR_TYPE_GROUP: {
// Endgroup takes a different tag (wire_type = END_GROUP).
/* Endgroup takes a different tag (wire_type = END_GROUP). */
upb_handlerattr attr2;
new_tag(h, f, UPB_WIRE_TYPE_END_GROUP, &attr2);
@ -525,7 +532,7 @@ upb_pb_encoder *upb_pb_encoder_create(upb_env *env, const upb_handlers *h,
upb_bytessink *output) {
const size_t initial_bufsize = 256;
const size_t initial_segbufsize = 16;
// TODO(haberman): make this configurable.
/* TODO(haberman): make this configurable. */
const size_t stack_size = 64;
#ifndef NDEBUG
const size_t size_before = upb_env_bytesallocated(env);
@ -554,7 +561,7 @@ upb_pb_encoder *upb_pb_encoder_create(upb_env *env, const upb_handlers *h,
e->subc = output->closure;
e->ptr = e->buf;
// If this fails, increase the value in encoder.h.
/* If this fails, increase the value in encoder.h. */
assert(upb_env_bytesallocated(env) - size_before <= UPB_PB_ENCODER_SIZE);
return e;
}

@ -22,35 +22,35 @@
namespace upb {
namespace pb {
class Encoder;
} // namespace pb
} // namespace upb
} /* namespace pb */
} /* namespace upb */
#endif
UPB_DECLARE_TYPE(upb::pb::Encoder, upb_pb_encoder);
UPB_DECLARE_TYPE(upb::pb::Encoder, upb_pb_encoder)
#define UPB_PBENCODER_MAX_NESTING 100
/* upb::pb::Encoder ***********************************************************/
// Preallocation hint: decoder won't allocate more bytes than this when first
// constructed. This hint may be an overestimate for some build configurations.
// But if the decoder library is upgraded without recompiling the application,
// it may be an underestimate.
/* Preallocation hint: decoder won't allocate more bytes than this when first
* constructed. This hint may be an overestimate for some build configurations.
* But if the decoder library is upgraded without recompiling the application,
* it may be an underestimate. */
#define UPB_PB_ENCODER_SIZE 768
#ifdef __cplusplus
class upb::pb::Encoder {
public:
// Creates a new encoder in the given environment. The Handlers must have
// come from NewHandlers() below.
/* Creates a new encoder in the given environment. The Handlers must have
* come from NewHandlers() below. */
static Encoder* Create(Environment* env, const Handlers* handlers,
BytesSink* output);
// The input to the encoder.
/* The input to the encoder. */
Sink* input();
// Creates a new set of handlers for this MessageDef.
/* Creates a new set of handlers for this MessageDef. */
static reffed_ptr<const Handlers> NewHandlers(const MessageDef* msg);
static const size_t kSize = UPB_PB_ENCODER_SIZE;
@ -87,8 +87,8 @@ inline reffed_ptr<const Handlers> Encoder::NewHandlers(
const Handlers* h = upb_pb_encoder_newhandlers(md, &h);
return reffed_ptr<const Handlers>(h, &h);
}
} // namespace pb
} // namespace upb
} /* namespace pb */
} /* namespace upb */
#endif

@ -15,28 +15,31 @@
upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n,
void *owner, upb_status *status) {
// Create handlers.
/* Create handlers. */
const upb_pbdecodermethod *decoder_m;
const upb_handlers *reader_h = upb_descreader_newhandlers(&reader_h);
upb_env env;
upb_pbdecodermethodopts opts;
upb_pbdecoder *decoder;
upb_descreader *reader;
bool ok;
upb_def **ret = NULL;
upb_def **defs;
upb_pbdecodermethodopts_init(&opts, reader_h);
const upb_pbdecodermethod *decoder_m =
upb_pbdecodermethod_new(&opts, &decoder_m);
decoder_m = upb_pbdecodermethod_new(&opts, &decoder_m);
upb_env env;
upb_env_init(&env);
upb_env_reporterrorsto(&env, status);
upb_descreader *reader = upb_descreader_create(&env, reader_h);
upb_pbdecoder *decoder =
upb_pbdecoder_create(&env, decoder_m, upb_descreader_input(reader));
// Push input data.
bool ok = upb_bufsrc_putbuf(str, len, upb_pbdecoder_input(decoder));
reader = upb_descreader_create(&env, reader_h);
decoder = upb_pbdecoder_create(&env, decoder_m, upb_descreader_input(reader));
upb_def **ret = NULL;
/* Push input data. */
ok = upb_bufsrc_putbuf(str, len, upb_pbdecoder_input(decoder));
if (!ok) goto cleanup;
upb_def **defs = upb_descreader_getdefs(reader, owner, n);
defs = upb_descreader_getdefs(reader, owner, n);
ret = malloc(sizeof(upb_def*) * (*n));
memcpy(ret, defs, sizeof(upb_def*) * (*n));
@ -50,21 +53,24 @@ cleanup:
bool upb_load_descriptor_into_symtab(upb_symtab *s, const char *str, size_t len,
upb_status *status) {
int n;
bool success;
upb_def **defs = upb_load_defs_from_descriptor(str, len, &n, &defs, status);
if (!defs) return false;
bool success = upb_symtab_add(s, defs, n, &defs, status);
success = upb_symtab_add(s, defs, n, &defs, status);
free(defs);
return success;
}
char *upb_readfile(const char *filename, size_t *len) {
long size;
char *buf;
FILE *f = fopen(filename, "rb");
if(!f) return NULL;
if(fseek(f, 0, SEEK_END) != 0) goto error;
long size = ftell(f);
size = ftell(f);
if(size < 0) goto error;
if(fseek(f, 0, SEEK_SET) != 0) goto error;
char *buf = malloc(size + 1);
buf = malloc(size + 1);
if(size && fread(buf, size, 1, f) != 1) goto error;
fclose(f);
if (len) *len = size;
@ -78,12 +84,13 @@ error:
bool upb_load_descriptor_file_into_symtab(upb_symtab *symtab, const char *fname,
upb_status *status) {
size_t len;
bool success;
char *data = upb_readfile(fname, &len);
if (!data) {
if (status) upb_status_seterrf(status, "Couldn't read file: %s", fname);
return false;
}
bool success = upb_load_descriptor_into_symtab(symtab, data, len, status);
success = upb_load_descriptor_into_symtab(symtab, data, len, status);
free(data);
return success;
}

@ -33,23 +33,23 @@
extern "C" {
#endif
// Loads all defs from the given protobuf binary descriptor, setting default
// accessors and a default layout on all messages. The caller owns the
// returned array of defs, which will be of length *n. On error NULL is
// returned and status is set (if non-NULL).
/* Loads all defs from the given protobuf binary descriptor, setting default
* accessors and a default layout on all messages. The caller owns the
* returned array of defs, which will be of length *n. On error NULL is
* returned and status is set (if non-NULL). */
upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n,
void *owner, upb_status *status);
// Like the previous but also adds the loaded defs to the given symtab.
/* Like the previous but also adds the loaded defs to the given symtab. */
bool upb_load_descriptor_into_symtab(upb_symtab *symtab, const char *str,
size_t len, upb_status *status);
// Like the previous but also reads the descriptor from the given filename.
/* Like the previous but also reads the descriptor from the given filename. */
bool upb_load_descriptor_file_into_symtab(upb_symtab *symtab, const char *fname,
upb_status *status);
// Reads the given filename into a character string, returning NULL if there
// was an error.
/* Reads the given filename into a character string, returning NULL if there
* was an error. */
char *upb_readfile(const char *filename, size_t *len);
#ifdef __cplusplus
@ -57,8 +57,8 @@ char *upb_readfile(const char *filename, size_t *len);
namespace upb {
// All routines that load descriptors expect the descriptor to be a
// FileDescriptorSet.
/* All routines that load descriptors expect the descriptor to be a
* FileDescriptorSet. */
inline bool LoadDescriptorFileIntoSymtab(SymbolTable* s, const char *fname,
Status* status) {
return upb_load_descriptor_file_into_symtab(s, fname, status);
@ -69,14 +69,14 @@ inline bool LoadDescriptorIntoSymtab(SymbolTable* s, const char* str,
return upb_load_descriptor_into_symtab(s, str, len, status);
}
// Templated so it can accept both string and std::string.
/* Templated so it can accept both string and std::string. */
template <typename T>
bool LoadDescriptorIntoSymtab(SymbolTable* s, const T& desc, Status* status) {
return upb_load_descriptor_into_symtab(s, desc.c_str(), desc.size(), status);
}
} // namespace upb
} /* namespace upb */
#endif
#endif
#endif /* UPB_GLUE_H */

@ -13,6 +13,7 @@
#include <ctype.h>
#include <float.h>
#include <inttypes.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@ -50,22 +51,24 @@ static int endfield(upb_textprinter *p) {
static int putescaped(upb_textprinter *p, const char *buf, size_t len,
bool preserve_utf8) {
// Based on CEscapeInternal() from Google's protobuf release.
/* Based on CEscapeInternal() from Google's protobuf release. */
char dstbuf[4096], *dst = dstbuf, *dstend = dstbuf + sizeof(dstbuf);
const char *end = buf + len;
// I think hex is prettier and more useful, but proto2 uses octal; should
// investigate whether it can parse hex also.
/* I think hex is prettier and more useful, but proto2 uses octal; should
* investigate whether it can parse hex also. */
const bool use_hex = false;
bool last_hex_escape = false; // true if last output char was \xNN
bool last_hex_escape = false; /* true if last output char was \xNN */
for (; buf < end; buf++) {
bool is_hex_escape;
if (dstend - dst < 4) {
upb_bytessink_putbuf(p->output_, p->subc, dstbuf, dst - dstbuf, NULL);
dst = dstbuf;
}
bool is_hex_escape = false;
is_hex_escape = false;
switch (*buf) {
case '\n': *(dst++) = '\\'; *(dst++) = 'n'; break;
case '\r': *(dst++) = '\\'; *(dst++) = 'r'; break;
@ -74,9 +77,9 @@ static int putescaped(upb_textprinter *p, const char *buf, size_t len,
case '\'': *(dst++) = '\\'; *(dst++) = '\''; break;
case '\\': *(dst++) = '\\'; *(dst++) = '\\'; break;
default:
// Note that if we emit \xNN and the buf character after that is a hex
// digit then that digit must be escaped too to prevent it being
// interpreted as part of the character code by C.
/* Note that if we emit \xNN and the buf character after that is a hex
* digit then that digit must be escaped too to prevent it being
* interpreted as part of the character code by C. */
if ((!preserve_utf8 || (uint8_t)*buf < 0x80) &&
(!isprint(*buf) || (last_hex_escape && isxdigit(*buf)))) {
sprintf(dst, (use_hex ? "\\x%02x" : "\\%03o"), (uint8_t)*buf);
@ -88,29 +91,38 @@ static int putescaped(upb_textprinter *p, const char *buf, size_t len,
}
last_hex_escape = is_hex_escape;
}
// Flush remaining data.
/* Flush remaining data. */
upb_bytessink_putbuf(p->output_, p->subc, dstbuf, dst - dstbuf, NULL);
return 0;
}
#ifdef __GNUC__
#define va_copy(a, b) __va_copy(a, b)
#endif
bool putf(upb_textprinter *p, const char *fmt, ...) {
va_list args;
va_list args_copy;
char *str;
int written;
int len;
bool ok;
va_start(args, fmt);
// Run once to get the length of the string.
va_list args_copy;
/* Run once to get the length of the string. */
va_copy(args_copy, args);
int len = vsnprintf(NULL, 0, fmt, args_copy);
len = vsprintf(NULL, fmt, args_copy);
va_end(args_copy);
// + 1 for NULL terminator (vsnprintf() requires it even if we don't).
char *str = malloc(len + 1);
/* + 1 for NULL terminator (vsprintf() requires it even if we don't). */
str = malloc(len + 1);
if (!str) return false;
int written = vsnprintf(str, len + 1, fmt, args);
written = vsprintf(str, fmt, args);
va_end(args);
UPB_ASSERT_VAR(written, written == len);
bool ok = upb_bytessink_putbuf(p->output_, p->subc, str, len, NULL);
ok = upb_bytessink_putbuf(p->output_, p->subc, str, len, NULL);
free(str);
return ok;
}
@ -119,8 +131,8 @@ bool putf(upb_textprinter *p, const char *fmt, ...) {
/* handlers *******************************************************************/
static bool textprinter_startmsg(void *c, const void *hd) {
UPB_UNUSED(hd);
upb_textprinter *p = c;
UPB_UNUSED(hd);
if (p->indent_depth_ == 0) {
upb_bytessink_start(p->output_, 0, &p->subc);
}
@ -128,9 +140,9 @@ static bool textprinter_startmsg(void *c, const void *hd) {
}
static bool textprinter_endmsg(void *c, const void *hd, upb_status *s) {
upb_textprinter *p = c;
UPB_UNUSED(hd);
UPB_UNUSED(s);
upb_textprinter *p = c;
if (p->indent_depth_ == 0) {
upb_bytessink_end(p->output_);
}
@ -167,14 +179,14 @@ err:
TYPE(int32, int32_t, "%" PRId32)
TYPE(int64, int64_t, "%" PRId64)
TYPE(uint32, uint32_t, "%" PRIu32);
TYPE(uint32, uint32_t, "%" PRIu32)
TYPE(uint64, uint64_t, "%" PRIu64)
TYPE(float, float, "%." STRINGIFY_MACROVAL(FLT_DIG) "g")
TYPE(double, double, "%." STRINGIFY_MACROVAL(DBL_DIG) "g")
#undef TYPE
// Output a symbolic value from the enum if found, else just print as int32.
/* Output a symbolic value from the enum if found, else just print as int32. */
static bool textprinter_putenum(void *closure, const void *handler_data,
int32_t val) {
upb_textprinter *p = closure;
@ -194,17 +206,17 @@ static bool textprinter_putenum(void *closure, const void *handler_data,
static void *textprinter_startstr(void *closure, const void *handler_data,
size_t size_hint) {
upb_textprinter *p = closure;
const upb_fielddef *f = handler_data;
UPB_UNUSED(size_hint);
upb_textprinter *p = closure;
indent(p);
putf(p, "%s: \"", upb_fielddef_name(f));
return p;
}
static bool textprinter_endstr(void *closure, const void *handler_data) {
UPB_UNUSED(handler_data);
upb_textprinter *p = closure;
UPB_UNUSED(handler_data);
putf(p, "\"");
endfield(p);
return true;
@ -212,9 +224,9 @@ static bool textprinter_endstr(void *closure, const void *handler_data) {
static size_t textprinter_putstr(void *closure, const void *hd, const char *buf,
size_t len, const upb_bufhandle *handle) {
UPB_UNUSED(handle);
upb_textprinter *p = closure;
const upb_fielddef *f = hd;
UPB_UNUSED(handle);
CHECK(putescaped(p, buf, len, upb_fielddef_type(f) == UPB_TYPE_STRING));
return len;
err:
@ -233,8 +245,8 @@ err:
}
static bool textprinter_endsubmsg(void *closure, const void *handler_data) {
UPB_UNUSED(handler_data);
upb_textprinter *p = closure;
UPB_UNUSED(handler_data);
p->indent_depth_--;
CHECK(indent(p));
upb_bytessink_putbuf(p->output_, p->subc, "}", 1, NULL);
@ -245,13 +257,13 @@ err:
}
static void onmreg(const void *c, upb_handlers *h) {
UPB_UNUSED(c);
const upb_msgdef *m = upb_handlers_msgdef(h);
upb_msg_field_iter i;
UPB_UNUSED(c);
upb_handlers_setstartmsg(h, textprinter_startmsg, NULL);
upb_handlers_setendmsg(h, textprinter_endmsg, NULL);
upb_msg_field_iter i;
for(upb_msg_field_begin(&i, m);
!upb_msg_field_done(&i);
upb_msg_field_next(&i)) {

@ -15,18 +15,18 @@
namespace upb {
namespace pb {
class TextPrinter;
} // namespace pb
} // namespace upb
} /* namespace pb */
} /* namespace upb */
#endif
UPB_DECLARE_TYPE(upb::pb::TextPrinter, upb_textprinter);
UPB_DECLARE_TYPE(upb::pb::TextPrinter, upb_textprinter)
#ifdef __cplusplus
class upb::pb::TextPrinter {
public:
// The given handlers must have come from NewHandlers(). It must outlive the
// TextPrinter.
/* The given handlers must have come from NewHandlers(). It must outlive the
* TextPrinter. */
static TextPrinter *Create(Environment *env, const upb::Handlers *handlers,
BytesSink *output);
@ -34,8 +34,8 @@ class upb::pb::TextPrinter {
Sink* input();
// If handler caching becomes a requirement we can add a code cache as in
// decoder.h
/* If handler caching becomes a requirement we can add a code cache as in
* decoder.h */
static reffed_ptr<const Handlers> NewHandlers(const MessageDef* md);
};
@ -43,7 +43,7 @@ class upb::pb::TextPrinter {
UPB_BEGIN_EXTERN_C
// C API.
/* C API. */
upb_textprinter *upb_textprinter_create(upb_env *env, const upb_handlers *h,
upb_bytessink *output);
void upb_textprinter_setsingleline(upb_textprinter *p, bool single_line);
@ -74,8 +74,8 @@ inline reffed_ptr<const Handlers> TextPrinter::NewHandlers(
const Handlers* h = upb_textprinter_newhandlers(md, &h);
return reffed_ptr<const Handlers>(h, &h);
}
} // namespace pb
} // namespace upb
} /* namespace pb */
} /* namespace upb */
#endif

@ -7,32 +7,33 @@
#include "upb/pb/varint.int.h"
// Index is descriptor type.
/* Index is descriptor type. */
const uint8_t upb_pb_native_wire_types[] = {
UPB_WIRE_TYPE_END_GROUP, // ENDGROUP
UPB_WIRE_TYPE_64BIT, // DOUBLE
UPB_WIRE_TYPE_32BIT, // FLOAT
UPB_WIRE_TYPE_VARINT, // INT64
UPB_WIRE_TYPE_VARINT, // UINT64
UPB_WIRE_TYPE_VARINT, // INT32
UPB_WIRE_TYPE_64BIT, // FIXED64
UPB_WIRE_TYPE_32BIT, // FIXED32
UPB_WIRE_TYPE_VARINT, // BOOL
UPB_WIRE_TYPE_DELIMITED, // STRING
UPB_WIRE_TYPE_START_GROUP, // GROUP
UPB_WIRE_TYPE_DELIMITED, // MESSAGE
UPB_WIRE_TYPE_DELIMITED, // BYTES
UPB_WIRE_TYPE_VARINT, // UINT32
UPB_WIRE_TYPE_VARINT, // ENUM
UPB_WIRE_TYPE_32BIT, // SFIXED32
UPB_WIRE_TYPE_64BIT, // SFIXED64
UPB_WIRE_TYPE_VARINT, // SINT32
UPB_WIRE_TYPE_VARINT, // SINT64
UPB_WIRE_TYPE_END_GROUP, /* ENDGROUP */
UPB_WIRE_TYPE_64BIT, /* DOUBLE */
UPB_WIRE_TYPE_32BIT, /* FLOAT */
UPB_WIRE_TYPE_VARINT, /* INT64 */
UPB_WIRE_TYPE_VARINT, /* UINT64 */
UPB_WIRE_TYPE_VARINT, /* INT32 */
UPB_WIRE_TYPE_64BIT, /* FIXED64 */
UPB_WIRE_TYPE_32BIT, /* FIXED32 */
UPB_WIRE_TYPE_VARINT, /* BOOL */
UPB_WIRE_TYPE_DELIMITED, /* STRING */
UPB_WIRE_TYPE_START_GROUP, /* GROUP */
UPB_WIRE_TYPE_DELIMITED, /* MESSAGE */
UPB_WIRE_TYPE_DELIMITED, /* BYTES */
UPB_WIRE_TYPE_VARINT, /* UINT32 */
UPB_WIRE_TYPE_VARINT, /* ENUM */
UPB_WIRE_TYPE_32BIT, /* SFIXED32 */
UPB_WIRE_TYPE_64BIT, /* SFIXED64 */
UPB_WIRE_TYPE_VARINT, /* SINT32 */
UPB_WIRE_TYPE_VARINT, /* SINT64 */
};
// A basic branch-based decoder, uses 32-bit values to get good performance
// on 32-bit architectures (but performs well on 64-bits also).
// This scheme comes from the original Google Protobuf implementation (proto2).
/* A basic branch-based decoder, uses 32-bit values to get good performance
* on 32-bit architectures (but performs well on 64-bits also).
* This scheme comes from the original Google Protobuf implementation
* (proto2). */
upb_decoderet upb_vdecode_max8_branch32(upb_decoderet r) {
upb_decoderet err = {NULL, 0};
const char *p = r.p;
@ -56,7 +57,7 @@ done:
return r;
}
// Like the previous, but uses 64-bit values.
/* Like the previous, but uses 64-bit values. */
upb_decoderet upb_vdecode_max8_branch64(upb_decoderet r) {
const char *p = r.p;
uint64_t val = r.val;
@ -78,49 +79,53 @@ done:
return r;
}
// Given an encoded varint v, returns an integer with a single bit set that
// indicates the end of the varint. Subtracting one from this value will
// yield a mask that leaves only bits that are part of the varint. Returns
// 0 if the varint is unterminated.
/* Given an encoded varint v, returns an integer with a single bit set that
* indicates the end of the varint. Subtracting one from this value will
* yield a mask that leaves only bits that are part of the varint. Returns
* 0 if the varint is unterminated. */
static uint64_t upb_get_vstopbit(uint64_t v) {
uint64_t cbits = v | 0x7f7f7f7f7f7f7f7fULL;
return ~cbits & (cbits+1);
}
// A branchless decoder. Credit to Pascal Massimino for the bit-twiddling.
/* A branchless decoder. Credit to Pascal Massimino for the bit-twiddling. */
upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r) {
uint64_t b;
uint64_t stop_bit;
upb_decoderet my_r;
memcpy(&b, r.p, sizeof(b));
uint64_t stop_bit = upb_get_vstopbit(b);
stop_bit = upb_get_vstopbit(b);
b = (b & 0x7f7f7f7f7f7f7f7fULL) & (stop_bit - 1);
b += b & 0x007f007f007f007fULL;
b += 3 * (b & 0x0000ffff0000ffffULL);
b += 15 * (b & 0x00000000ffffffffULL);
if (stop_bit == 0) {
// Error: unterminated varint.
/* Error: unterminated varint. */
upb_decoderet err_r = {(void*)0, 0};
return err_r;
}
upb_decoderet my_r = {r.p + ((__builtin_ctzll(stop_bit) + 1) / 8),
r.val | (b << 7)};
my_r = upb_decoderet_make(r.p + ((__builtin_ctzll(stop_bit) + 1) / 8),
r.val | (b << 7));
return my_r;
}
// A branchless decoder. Credit to Daniel Wright for the bit-twiddling.
/* A branchless decoder. Credit to Daniel Wright for the bit-twiddling. */
upb_decoderet upb_vdecode_max8_wright(upb_decoderet r) {
uint64_t b;
uint64_t stop_bit;
upb_decoderet my_r;
memcpy(&b, r.p, sizeof(b));
uint64_t stop_bit = upb_get_vstopbit(b);
stop_bit = upb_get_vstopbit(b);
b &= (stop_bit - 1);
b = ((b & 0x7f007f007f007f00ULL) >> 1) | (b & 0x007f007f007f007fULL);
b = ((b & 0xffff0000ffff0000ULL) >> 2) | (b & 0x0000ffff0000ffffULL);
b = ((b & 0xffffffff00000000ULL) >> 4) | (b & 0x00000000ffffffffULL);
if (stop_bit == 0) {
// Error: unterminated varint.
/* Error: unterminated varint. */
upb_decoderet err_r = {(void*)0, 0};
return err_r;
}
upb_decoderet my_r = {r.p + ((__builtin_ctzll(stop_bit) + 1) / 8),
r.val | (b << 14)};
my_r = upb_decoderet_make(r.p + ((__builtin_ctzll(stop_bit) + 1) / 8),
r.val | (b << 14));
return my_r;
}

@ -20,25 +20,25 @@
extern "C" {
#endif
// A list of types as they are encoded on-the-wire.
/* A list of types as they are encoded on-the-wire. */
typedef enum {
UPB_WIRE_TYPE_VARINT = 0,
UPB_WIRE_TYPE_64BIT = 1,
UPB_WIRE_TYPE_DELIMITED = 2,
UPB_WIRE_TYPE_START_GROUP = 3,
UPB_WIRE_TYPE_END_GROUP = 4,
UPB_WIRE_TYPE_32BIT = 5,
UPB_WIRE_TYPE_32BIT = 5
} upb_wiretype_t;
#define UPB_MAX_WIRE_TYPE 5
// The maximum number of bytes that it takes to encode a 64-bit varint.
// Note that with a better encoding this could be 9 (TODO: write up a
// wiki document about this).
/* The maximum number of bytes that it takes to encode a 64-bit varint.
* Note that with a better encoding this could be 9 (TODO: write up a
* wiki document about this). */
#define UPB_PB_VARINT_MAX_LEN 10
// Array of the "native" (ie. non-packed-repeated) wire type for the given a
// descriptor type (upb_descriptortype_t).
/* Array of the "native" (ie. non-packed-repeated) wire type for the given a
* descriptor type (upb_descriptortype_t). */
extern const uint8_t upb_pb_native_wire_types[];
/* Zig-zag encoding/decoding **************************************************/
@ -54,44 +54,59 @@ UPB_INLINE uint64_t upb_zzenc_64(int64_t n) { return (n << 1) ^ (n >> 63); }
/* Decoding *******************************************************************/
// All decoding functions return this struct by value.
/* All decoding functions return this struct by value. */
typedef struct {
const char *p; // NULL if the varint was unterminated.
const char *p; /* NULL if the varint was unterminated. */
uint64_t val;
} upb_decoderet;
// Four functions for decoding a varint of at most eight bytes. They are all
// functionally identical, but are implemented in different ways and likely have
// different performance profiles. We keep them around for performance testing.
//
// Note that these functions may not read byte-by-byte, so they must not be used
// unless there are at least eight bytes left in the buffer!
UPB_INLINE upb_decoderet upb_decoderet_make(const char *p, uint64_t val) {
upb_decoderet ret;
ret.p = p;
ret.val = val;
return ret;
}
/* Four functions for decoding a varint of at most eight bytes. They are all
* functionally identical, but are implemented in different ways and likely have
* different performance profiles. We keep them around for performance testing.
*
* Note that these functions may not read byte-by-byte, so they must not be used
* unless there are at least eight bytes left in the buffer! */
upb_decoderet upb_vdecode_max8_branch32(upb_decoderet r);
upb_decoderet upb_vdecode_max8_branch64(upb_decoderet r);
upb_decoderet upb_vdecode_max8_wright(upb_decoderet r);
upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r);
// Template for a function that checks the first two bytes with branching
// and dispatches 2-10 bytes with a separate function. Note that this may read
// up to 10 bytes, so it must not be used unless there are at least ten bytes
// left in the buffer!
/* Template for a function that checks the first two bytes with branching
* and dispatches 2-10 bytes with a separate function. Note that this may read
* up to 10 bytes, so it must not be used unless there are at least ten bytes
* left in the buffer! */
#define UPB_VARINT_DECODER_CHECK2(name, decode_max8_function) \
UPB_INLINE upb_decoderet upb_vdecode_check2_ ## name(const char *_p) { \
uint8_t *p = (uint8_t*)_p; \
if ((*p & 0x80) == 0) { upb_decoderet r = {_p + 1, *p & 0x7fU}; return r; } \
upb_decoderet r = {_p + 2, (*p & 0x7fU) | ((*(p + 1) & 0x7fU) << 7)}; \
if ((*(p + 1) & 0x80) == 0) return r; \
upb_decoderet r; \
if ((*p & 0x80) == 0) { \
/* Common case: one-byte varint. */ \
return upb_decoderet_make(_p + 1, *p & 0x7fU); \
} \
r = upb_decoderet_make(_p + 2, (*p & 0x7fU) | ((*(p + 1) & 0x7fU) << 7)); \
if ((*(p + 1) & 0x80) == 0) { \
/* Two-byte varint. */ \
return r; \
} \
/* Longer varint, fallback to out-of-line function. */ \
return decode_max8_function(r); \
}
UPB_VARINT_DECODER_CHECK2(branch32, upb_vdecode_max8_branch32);
UPB_VARINT_DECODER_CHECK2(branch64, upb_vdecode_max8_branch64);
UPB_VARINT_DECODER_CHECK2(wright, upb_vdecode_max8_wright);
UPB_VARINT_DECODER_CHECK2(massimino, upb_vdecode_max8_massimino);
UPB_VARINT_DECODER_CHECK2(branch32, upb_vdecode_max8_branch32)
UPB_VARINT_DECODER_CHECK2(branch64, upb_vdecode_max8_branch64)
UPB_VARINT_DECODER_CHECK2(wright, upb_vdecode_max8_wright)
UPB_VARINT_DECODER_CHECK2(massimino, upb_vdecode_max8_massimino)
#undef UPB_VARINT_DECODER_CHECK2
// Our canonical functions for decoding varints, based on the currently
// favored best-performing implementations.
/* Our canonical functions for decoding varints, based on the currently
* favored best-performing implementations. */
UPB_INLINE upb_decoderet upb_vdecode_fast(const char *p) {
if (sizeof(long) == 8)
return upb_vdecode_check2_branch64(p);
@ -108,7 +123,7 @@ UPB_INLINE upb_decoderet upb_vdecode_max8_fast(upb_decoderet r) {
UPB_INLINE int upb_value_size(uint64_t val) {
#ifdef __GNUC__
int high_bit = 63 - __builtin_clzll(val); // 0-based, undef if val == 0.
int high_bit = 63 - __builtin_clzll(val); /* 0-based, undef if val == 0. */
#else
int high_bit = 0;
uint64_t tmp = val;
@ -117,13 +132,14 @@ UPB_INLINE int upb_value_size(uint64_t val) {
return val == 0 ? 1 : high_bit / 8 + 1;
}
// Encodes a 64-bit varint into buf (which must be >=UPB_PB_VARINT_MAX_LEN
// bytes long), returning how many bytes were used.
//
// TODO: benchmark and optimize if necessary.
/* Encodes a 64-bit varint into buf (which must be >=UPB_PB_VARINT_MAX_LEN
* bytes long), returning how many bytes were used.
*
* TODO: benchmark and optimize if necessary. */
UPB_INLINE size_t upb_vencode64(uint64_t val, char *buf) {
size_t i;
if (val == 0) { buf[0] = 0; return 1; }
size_t i = 0;
i = 0;
while (val) {
uint8_t byte = val & 0x7fU;
val >>= 7;
@ -138,7 +154,7 @@ UPB_INLINE size_t upb_varint_size(uint64_t val) {
return upb_vencode64(val, buf);
}
// Encodes a 32-bit varint, *not* sign-extended.
/* Encodes a 32-bit varint, *not* sign-extended. */
UPB_INLINE uint64_t upb_vencode32(uint32_t val) {
char buf[UPB_PB_VARINT_MAX_LEN];
size_t bytes = upb_vencode64(val, buf);

@ -30,17 +30,17 @@ const void *UPB_UNTRACKED_REF = &untracked_val;
/* arch-specific atomic primitives *******************************************/
#ifdef UPB_THREAD_UNSAFE //////////////////////////////////////////////////////
#ifdef UPB_THREAD_UNSAFE /*---------------------------------------------------*/
static void atomic_inc(uint32_t *a) { (*a)++; }
static bool atomic_dec(uint32_t *a) { return --(*a) == 0; }
#elif defined(__GNUC__) || defined(__clang__) //////////////////////////////////
#elif defined(__GNUC__) || defined(__clang__) /*------------------------------*/
static void atomic_inc(uint32_t *a) { __sync_fetch_and_add(a, 1); }
static bool atomic_dec(uint32_t *a) { return __sync_sub_and_fetch(a, 1) == 0; }
#elif defined(WIN32) ///////////////////////////////////////////////////////////
#elif defined(WIN32) /*-------------------------------------------------------*/
#include <Windows.h>
@ -54,13 +54,13 @@ static bool atomic_dec(upb_atomic_t *a) {
Implement them or compile with UPB_THREAD_UNSAFE.
#endif
// All static objects point to this refcount.
// It is special-cased in ref/unref below.
/* All static objects point to this refcount.
* It is special-cased in ref/unref below. */
uint32_t static_refcount = -1;
// We can avoid atomic ops for statically-declared objects.
// This is a minor optimization but nice since we can avoid degrading under
// contention in this case.
/* We can avoid atomic ops for statically-declared objects.
* This is a minor optimization but nice since we can avoid degrading under
* contention in this case. */
static void refgroup(uint32_t *group) {
if (group != &static_refcount)
@ -87,21 +87,21 @@ static void upb_unlock() {}
#else
// User must define functions that lock/unlock a global mutex and link this
// file against them.
/* User must define functions that lock/unlock a global mutex and link this
* file against them. */
void upb_lock();
void upb_unlock();
#endif
// UPB_DEBUG_REFS mode counts on being able to malloc() memory in some
// code-paths that can normally never fail, like upb_refcounted_ref(). Since
// we have no way to propagage out-of-memory errors back to the user, and since
// these errors can only occur in UPB_DEBUG_REFS mode, we immediately fail.
/* UPB_DEBUG_REFS mode counts on being able to malloc() memory in some
* code-paths that can normally never fail, like upb_refcounted_ref(). Since
* we have no way to propagage out-of-memory errors back to the user, and since
* these errors can only occur in UPB_DEBUG_REFS mode, we immediately fail. */
#define CHECK_OOM(predicate) if (!(predicate)) { assert(predicate); exit(1); }
typedef struct {
int count; // How many refs there are (duplicates only allowed for ref2).
int count; /* How many refs there are (duplicates only allowed for ref2). */
bool is_ref2;
} trackedref;
@ -114,18 +114,19 @@ static trackedref *trackedref_new(bool is_ref2) {
}
static void track(const upb_refcounted *r, const void *owner, bool ref2) {
upb_value v;
assert(owner);
if (owner == UPB_UNTRACKED_REF) return;
upb_lock();
upb_value v;
if (upb_inttable_lookupptr(r->refs, owner, &v)) {
trackedref *ref = upb_value_getptr(v);
// Since we allow multiple ref2's for the same to/from pair without
// allocating separate memory for each one, we lose the fine-grained
// tracking behavior we get with regular refs. Since ref2s only happen
// inside upb, we'll accept this limitation until/unless there is a really
// difficult upb-internal bug that can't be figured out without it.
/* Since we allow multiple ref2's for the same to/from pair without
* allocating separate memory for each one, we lose the fine-grained
* tracking behavior we get with regular refs. Since ref2s only happen
* inside upb, we'll accept this limitation until/unless there is a really
* difficult upb-internal bug that can't be figured out without it. */
assert(ref2);
assert(ref->is_ref2);
ref->count++;
@ -134,8 +135,8 @@ static void track(const upb_refcounted *r, const void *owner, bool ref2) {
bool ok = upb_inttable_insertptr(r->refs, owner, upb_value_ptr(ref));
CHECK_OOM(ok);
if (ref2) {
// We know this cast is safe when it is a ref2, because it's coming from
// another refcounted object.
/* We know this cast is safe when it is a ref2, because it's coming from
* another refcounted object. */
const upb_refcounted *from = owner;
assert(!upb_inttable_lookupptr(from->ref2s, r, NULL));
ok = upb_inttable_insertptr(from->ref2s, r, upb_value_ptr(NULL));
@ -146,22 +147,25 @@ static void track(const upb_refcounted *r, const void *owner, bool ref2) {
}
static void untrack(const upb_refcounted *r, const void *owner, bool ref2) {
upb_value v;
bool found;
trackedref *ref;
assert(owner);
if (owner == UPB_UNTRACKED_REF) return;
upb_lock();
upb_value v;
bool found = upb_inttable_lookupptr(r->refs, owner, &v);
// This assert will fail if an owner attempts to release a ref it didn't have.
found = upb_inttable_lookupptr(r->refs, owner, &v);
/* This assert will fail if an owner attempts to release a ref it didn't have. */
UPB_ASSERT_VAR(found, found);
trackedref *ref = upb_value_getptr(v);
ref = upb_value_getptr(v);
assert(ref->is_ref2 == ref2);
if (--ref->count == 0) {
free(ref);
upb_inttable_removeptr(r->refs, owner, NULL);
if (ref2) {
// We know this cast is safe when it is a ref2, because it's coming from
// another refcounted object.
/* We know this cast is safe when it is a ref2, because it's coming from
* another refcounted object. */
const upb_refcounted *from = owner;
bool removed = upb_inttable_removeptr(from->ref2s, r, NULL);
assert(removed);
@ -171,32 +175,41 @@ static void untrack(const upb_refcounted *r, const void *owner, bool ref2) {
}
static void checkref(const upb_refcounted *r, const void *owner, bool ref2) {
upb_lock();
upb_value v;
bool found = upb_inttable_lookupptr(r->refs, owner, &v);
bool found;
trackedref *ref;
upb_lock();
found = upb_inttable_lookupptr(r->refs, owner, &v);
UPB_ASSERT_VAR(found, found);
trackedref *ref = upb_value_getptr(v);
ref = upb_value_getptr(v);
assert(ref->is_ref2 == ref2);
upb_unlock();
}
// Populates the given UPB_CTYPE_INT32 inttable with counts of ref2's that
// originate from the given owner.
/* Populates the given UPB_CTYPE_INT32 inttable with counts of ref2's that
* originate from the given owner. */
static void getref2s(const upb_refcounted *owner, upb_inttable *tab) {
upb_lock();
upb_inttable_iter i;
upb_lock();
upb_inttable_begin(&i, owner->ref2s);
for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
upb_value v;
upb_value count;
trackedref *ref;
bool ok;
bool found;
upb_refcounted *to = (upb_refcounted*)upb_inttable_iter_key(&i);
// To get the count we need to look in the target's table.
upb_value v;
bool found = upb_inttable_lookupptr(to->refs, owner, &v);
/* To get the count we need to look in the target's table. */
found = upb_inttable_lookupptr(to->refs, owner, &v);
assert(found);
trackedref *ref = upb_value_getptr(v);
upb_value count = upb_value_int32(ref->count);
ref = upb_value_getptr(v);
count = upb_value_int32(ref->count);
bool ok = upb_inttable_insertptr(tab, to, count);
ok = upb_inttable_insertptr(tab, to, count);
CHECK_OOM(ok);
}
upb_unlock();
@ -210,15 +223,18 @@ typedef struct {
static void visit_check(const upb_refcounted *obj, const upb_refcounted *subobj,
void *closure) {
check_state *s = closure;
assert(obj == s->obj);
assert(subobj);
upb_inttable *ref2 = &s->ref2;
upb_value v;
bool removed = upb_inttable_removeptr(ref2, subobj, &v);
// The following assertion will fail if the visit() function visits a subobj
// that it did not have a ref2 on, or visits the same subobj too many times.
bool removed;
int32_t newcount;
assert(obj == s->obj);
assert(subobj);
removed = upb_inttable_removeptr(ref2, subobj, &v);
/* The following assertion will fail if the visit() function visits a subobj
* that it did not have a ref2 on, or visits the same subobj too many times. */
assert(removed);
int32_t newcount = upb_value_getint32(v) - 1;
newcount = upb_value_getint32(v) - 1;
if (newcount > 0) {
upb_inttable_insert(ref2, (uintptr_t)subobj, upb_value_int32(newcount));
}
@ -226,19 +242,21 @@ static void visit_check(const upb_refcounted *obj, const upb_refcounted *subobj,
static void visit(const upb_refcounted *r, upb_refcounted_visit *v,
void *closure) {
// In DEBUG_REFS mode we know what existing ref2 refs there are, so we know
// exactly the set of nodes that visit() should visit. So we verify visit()'s
// correctness here.
bool ok;
/* In DEBUG_REFS mode we know what existing ref2 refs there are, so we know
* exactly the set of nodes that visit() should visit. So we verify visit()'s
* correctness here. */
check_state state;
state.obj = r;
bool ok = upb_inttable_init(&state.ref2, UPB_CTYPE_INT32);
ok = upb_inttable_init(&state.ref2, UPB_CTYPE_INT32);
CHECK_OOM(ok);
getref2s(r, &state.ref2);
// This should visit any children in the ref2 table.
/* This should visit any children in the ref2 table. */
if (r->vtbl->visit) r->vtbl->visit(r, visit_check, &state);
// This assertion will fail if the visit() function missed any children.
/* This assertion will fail if the visit() function missed any children. */
assert(upb_inttable_count(&state.ref2) == 0);
upb_inttable_uninit(&state.ref2);
if (r->vtbl->visit) r->vtbl->visit(r, v, closure);
@ -302,27 +320,27 @@ static void visit(const upb_refcounted *r, upb_refcounted_visit *v,
if (r->vtbl->visit) r->vtbl->visit(r, v, closure);
}
#endif // UPB_DEBUG_REFS
#endif /* UPB_DEBUG_REFS */
/* freeze() *******************************************************************/
// The freeze() operation is by far the most complicated part of this scheme.
// We compute strongly-connected components and then mutate the graph such that
// we preserve the invariants documented at the top of this file. And we must
// handle out-of-memory errors gracefully (without leaving the graph
// inconsistent), which adds to the fun.
/* The freeze() operation is by far the most complicated part of this scheme.
* We compute strongly-connected components and then mutate the graph such that
* we preserve the invariants documented at the top of this file. And we must
* handle out-of-memory errors gracefully (without leaving the graph
* inconsistent), which adds to the fun. */
// The state used by the freeze operation (shared across many functions).
/* The state used by the freeze operation (shared across many functions). */
typedef struct {
int depth;
int maxdepth;
uint64_t index;
// Maps upb_refcounted* -> attributes (color, etc). attr layout varies by
// color.
/* Maps upb_refcounted* -> attributes (color, etc). attr layout varies by
* color. */
upb_inttable objattr;
upb_inttable stack; // stack of upb_refcounted* for Tarjan's algorithm.
upb_inttable groups; // array of uint32_t*, malloc'd refcounts for new groups
upb_inttable stack; /* stack of upb_refcounted* for Tarjan's algorithm. */
upb_inttable groups; /* array of uint32_t*, malloc'd refcounts for new groups */
upb_status *status;
jmp_buf err;
} tarjan;
@ -331,15 +349,15 @@ static void release_ref2(const upb_refcounted *obj,
const upb_refcounted *subobj,
void *closure);
// Node attributes /////////////////////////////////////////////////////////////
/* Node attributes -----------------------------------------------------------*/
// After our analysis phase all nodes will be either GRAY or WHITE.
/* After our analysis phase all nodes will be either GRAY or WHITE. */
typedef enum {
BLACK = 0, // Object has not been seen.
GRAY, // Object has been found via a refgroup but may not be reachable.
GREEN, // Object is reachable and is currently on the Tarjan stack.
WHITE, // Object is reachable and has been assigned a group (SCC).
BLACK = 0, /* Object has not been seen. */
GRAY, /* Object has been found via a refgroup but may not be reachable. */
GREEN, /* Object is reachable and is currently on the Tarjan stack. */
WHITE /* Object is reachable and has been assigned a group (SCC). */
} color_t;
UPB_NORETURN static void err(tarjan *t) { longjmp(t->err, 1); }
@ -367,7 +385,7 @@ static void setattr(tarjan *t, const upb_refcounted *r, uint64_t attr) {
}
static color_t color(tarjan *t, const upb_refcounted *r) {
return trygetattr(t, r) & 0x3; // Color is always stored in the low 2 bits.
return trygetattr(t, r) & 0x3; /* Color is always stored in the low 2 bits. */
}
static void set_gray(tarjan *t, const upb_refcounted *r) {
@ -375,11 +393,11 @@ static void set_gray(tarjan *t, const upb_refcounted *r) {
setattr(t, r, GRAY);
}
// Pushes an obj onto the Tarjan stack and sets it to GREEN.
/* Pushes an obj onto the Tarjan stack and sets it to GREEN. */
static void push(tarjan *t, const upb_refcounted *r) {
assert(color(t, r) == BLACK || color(t, r) == GRAY);
// This defines the attr layout for the GREEN state. "index" and "lowlink"
// get 31 bits, which is plenty (limit of 2B objects frozen at a time).
/* This defines the attr layout for the GREEN state. "index" and "lowlink"
* get 31 bits, which is plenty (limit of 2B objects frozen at a time). */
setattr(t, r, GREEN | (t->index << 2) | (t->index << 33));
if (++t->index == 0x80000000) {
upb_status_seterrmsg(t->status, "too many objects to freeze");
@ -388,13 +406,13 @@ static void push(tarjan *t, const upb_refcounted *r) {
upb_inttable_push(&t->stack, upb_value_ptr((void*)r));
}
// Pops an obj from the Tarjan stack and sets it to WHITE, with a ptr to its
// SCC group.
/* Pops an obj from the Tarjan stack and sets it to WHITE, with a ptr to its
* SCC group. */
static upb_refcounted *pop(tarjan *t) {
upb_refcounted *r = upb_value_getptr(upb_inttable_pop(&t->stack));
assert(color(t, r) == GREEN);
// This defines the attr layout for nodes in the WHITE state.
// Top of group stack is [group, NULL]; we point at group.
/* This defines the attr layout for nodes in the WHITE state.
* Top of group stack is [group, NULL]; we point at group. */
setattr(t, r, WHITE | (upb_inttable_count(&t->groups) - 2) << 8);
return r;
}
@ -402,7 +420,7 @@ static upb_refcounted *pop(tarjan *t) {
static void tarjan_newgroup(tarjan *t) {
uint32_t *group = malloc(sizeof(*group));
if (!group) oom(t);
// Push group and empty group leader (we'll fill in leader later).
/* Push group and empty group leader (we'll fill in leader later). */
if (!upb_inttable_push(&t->groups, upb_value_ptr(group)) ||
!upb_inttable_push(&t->groups, upb_value_ptr(NULL))) {
free(group);
@ -430,21 +448,27 @@ static void set_lowlink(tarjan *t, const upb_refcounted *r, uint32_t lowlink) {
}
static uint32_t *group(tarjan *t, upb_refcounted *r) {
assert(color(t, r) == WHITE);
uint64_t groupnum = getattr(t, r) >> 8;
uint64_t groupnum;
upb_value v;
bool found = upb_inttable_lookup(&t->groups, groupnum, &v);
bool found;
assert(color(t, r) == WHITE);
groupnum = getattr(t, r) >> 8;
found = upb_inttable_lookup(&t->groups, groupnum, &v);
UPB_ASSERT_VAR(found, found);
return upb_value_getptr(v);
}
// If the group leader for this object's group has not previously been set,
// the given object is assigned to be its leader.
/* If the group leader for this object's group has not previously been set,
* the given object is assigned to be its leader. */
static upb_refcounted *groupleader(tarjan *t, upb_refcounted *r) {
assert(color(t, r) == WHITE);
uint64_t leader_slot = (getattr(t, r) >> 8) + 1;
uint64_t leader_slot;
upb_value v;
bool found = upb_inttable_lookup(&t->groups, leader_slot, &v);
bool found;
assert(color(t, r) == WHITE);
leader_slot = (getattr(t, r) >> 8) + 1;
found = upb_inttable_lookup(&t->groups, leader_slot, &v);
UPB_ASSERT_VAR(found, found);
if (upb_value_getptr(v)) {
return upb_value_getptr(v);
@ -456,10 +480,10 @@ static upb_refcounted *groupleader(tarjan *t, upb_refcounted *r) {
}
// Tarjan's algorithm //////////////////////////////////////////////////////////
/* Tarjan's algorithm --------------------------------------------------------*/
// See:
// http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm
/* See:
* http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm */
static void do_tarjan(const upb_refcounted *obj, tarjan *t);
static void tarjan_visit(const upb_refcounted *obj,
@ -470,14 +494,14 @@ static void tarjan_visit(const upb_refcounted *obj,
upb_status_seterrf(t->status, "graph too deep to freeze (%d)", t->maxdepth);
err(t);
} else if (subobj->is_frozen || color(t, subobj) == WHITE) {
// Do nothing: we don't want to visit or color already-frozen nodes,
// and WHITE nodes have already been assigned a SCC.
/* Do nothing: we don't want to visit or color already-frozen nodes,
* and WHITE nodes have already been assigned a SCC. */
} else if (color(t, subobj) < GREEN) {
// Subdef has not yet been visited; recurse on it.
/* Subdef has not yet been visited; recurse on it. */
do_tarjan(subobj, t);
set_lowlink(t, obj, UPB_MIN(lowlink(t, obj), lowlink(t, subobj)));
} else if (color(t, subobj) == GREEN) {
// Subdef is in the stack and hence in the current SCC.
/* Subdef is in the stack and hence in the current SCC. */
set_lowlink(t, obj, UPB_MIN(lowlink(t, obj), idx(t, subobj)));
}
--t->depth;
@ -485,7 +509,7 @@ static void tarjan_visit(const upb_refcounted *obj,
static void do_tarjan(const upb_refcounted *obj, tarjan *t) {
if (color(t, obj) == BLACK) {
// We haven't seen this object's group; mark the whole group GRAY.
/* We haven't seen this object's group; mark the whole group GRAY. */
const upb_refcounted *o = obj;
do { set_gray(t, o); } while ((o = o->next) != obj);
}
@ -500,15 +524,15 @@ static void do_tarjan(const upb_refcounted *obj, tarjan *t) {
}
// freeze() ////////////////////////////////////////////////////////////////////
/* freeze() ------------------------------------------------------------------*/
static void crossref(const upb_refcounted *r, const upb_refcounted *subobj,
void *_t) {
tarjan *t = _t;
assert(color(t, r) > BLACK);
if (color(t, subobj) > BLACK && r->group != subobj->group) {
// Previously this ref was not reflected in subobj->group because they
// were in the same group; now that they are split a ref must be taken.
/* Previously this ref was not reflected in subobj->group because they
* were in the same group; now that they are split a ref must be taken. */
refgroup(subobj->group);
}
}
@ -516,10 +540,12 @@ static void crossref(const upb_refcounted *r, const upb_refcounted *subobj,
static bool freeze(upb_refcounted *const*roots, int n, upb_status *s,
int maxdepth) {
volatile bool ret = false;
int i;
upb_inttable_iter iter;
// We run in two passes so that we can allocate all memory before performing
// any mutation of the input -- this allows us to leave the input unchanged
// in the case of memory allocation failure.
/* We run in two passes so that we can allocate all memory before performing
* any mutation of the input -- this allows us to leave the input unchanged
* in the case of memory allocation failure. */
tarjan t;
t.index = 0;
t.depth = 0;
@ -531,64 +557,65 @@ static bool freeze(upb_refcounted *const*roots, int n, upb_status *s,
if (setjmp(t.err) != 0) goto err4;
for (int i = 0; i < n; i++) {
for (i = 0; i < n; i++) {
if (color(&t, roots[i]) < GREEN) {
do_tarjan(roots[i], &t);
}
}
// If we've made it this far, no further errors are possible so it's safe to
// mutate the objects without risk of leaving them in an inconsistent state.
/* If we've made it this far, no further errors are possible so it's safe to
* mutate the objects without risk of leaving them in an inconsistent state. */
ret = true;
// The transformation that follows requires care. The preconditions are:
// - all objects in attr map are WHITE or GRAY, and are in mutable groups
// (groups of all mutable objs)
// - no ref2(to, from) refs have incremented count(to) if both "to" and
// "from" are in our attr map (this follows from invariants (2) and (3))
// Pass 1: we remove WHITE objects from their mutable groups, and add them to
// new groups according to the SCC's we computed. These new groups will
// consist of only frozen objects. None will be immediately collectible,
// because WHITE objects are by definition reachable from one of "roots",
// which the caller must own refs on.
upb_inttable_iter i;
upb_inttable_begin(&i, &t.objattr);
for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&i);
// Since removal from a singly-linked list requires access to the object's
// predecessor, we consider obj->next instead of obj for moving. With the
// while() loop we guarantee that we will visit every node's predecessor.
// Proof:
// 1. every node's predecessor is in our attr map.
// 2. though the loop body may change a node's predecessor, it will only
// change it to be the node we are currently operating on, so with a
// while() loop we guarantee ourselves the chance to remove each node.
/* The transformation that follows requires care. The preconditions are:
* - all objects in attr map are WHITE or GRAY, and are in mutable groups
* (groups of all mutable objs)
* - no ref2(to, from) refs have incremented count(to) if both "to" and
* "from" are in our attr map (this follows from invariants (2) and (3)) */
/* Pass 1: we remove WHITE objects from their mutable groups, and add them to
* new groups according to the SCC's we computed. These new groups will
* consist of only frozen objects. None will be immediately collectible,
* because WHITE objects are by definition reachable from one of "roots",
* which the caller must own refs on. */
upb_inttable_begin(&iter, &t.objattr);
for(; !upb_inttable_done(&iter); upb_inttable_next(&iter)) {
upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&iter);
/* Since removal from a singly-linked list requires access to the object's
* predecessor, we consider obj->next instead of obj for moving. With the
* while() loop we guarantee that we will visit every node's predecessor.
* Proof:
* 1. every node's predecessor is in our attr map.
* 2. though the loop body may change a node's predecessor, it will only
* change it to be the node we are currently operating on, so with a
* while() loop we guarantee ourselves the chance to remove each node. */
while (color(&t, obj->next) == WHITE &&
group(&t, obj->next) != obj->next->group) {
// Remove from old group.
upb_refcounted *leader;
/* Remove from old group. */
upb_refcounted *move = obj->next;
if (obj == move) {
// Removing the last object from a group.
/* Removing the last object from a group. */
assert(*obj->group == obj->individual_count);
free(obj->group);
} else {
obj->next = move->next;
// This may decrease to zero; we'll collect GRAY objects (if any) that
// remain in the group in the third pass.
/* This may decrease to zero; we'll collect GRAY objects (if any) that
* remain in the group in the third pass. */
assert(*move->group >= move->individual_count);
*move->group -= move->individual_count;
}
// Add to new group.
upb_refcounted *leader = groupleader(&t, move);
/* Add to new group. */
leader = groupleader(&t, move);
if (move == leader) {
// First object added to new group is its leader.
/* First object added to new group is its leader. */
move->group = group(&t, move);
move->next = move;
*move->group = move->individual_count;
} else {
// Group already has at least one object in it.
/* Group already has at least one object in it. */
assert(leader->group == group(&t, move));
move->group = group(&t, move);
move->next = leader->next;
@ -600,40 +627,42 @@ static bool freeze(upb_refcounted *const*roots, int n, upb_status *s,
}
}
// Pass 2: GRAY and WHITE objects "obj" with ref2(to, obj) references must
// increment count(to) if group(obj) != group(to) (which could now be the
// case if "to" was just frozen).
upb_inttable_begin(&i, &t.objattr);
for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&i);
/* Pass 2: GRAY and WHITE objects "obj" with ref2(to, obj) references must
* increment count(to) if group(obj) != group(to) (which could now be the
* case if "to" was just frozen). */
upb_inttable_begin(&iter, &t.objattr);
for(; !upb_inttable_done(&iter); upb_inttable_next(&iter)) {
upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&iter);
visit(obj, crossref, &t);
}
// Pass 3: GRAY objects are collected if their group's refcount dropped to
// zero when we removed its white nodes. This can happen if they had only
// been kept alive by virtue of sharing a group with an object that was just
// frozen.
//
// It is important that we do this last, since the GRAY object's free()
// function could call unref2() on just-frozen objects, which will decrement
// refs that were added in pass 2.
upb_inttable_begin(&i, &t.objattr);
for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&i);
/* Pass 3: GRAY objects are collected if their group's refcount dropped to
* zero when we removed its white nodes. This can happen if they had only
* been kept alive by virtue of sharing a group with an object that was just
* frozen.
*
* It is important that we do this last, since the GRAY object's free()
* function could call unref2() on just-frozen objects, which will decrement
* refs that were added in pass 2. */
upb_inttable_begin(&iter, &t.objattr);
for(; !upb_inttable_done(&iter); upb_inttable_next(&iter)) {
upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&iter);
if (obj->group == NULL || *obj->group == 0) {
if (obj->group) {
// We eagerly free() the group's count (since we can't easily determine
// the group's remaining size it's the easiest way to ensure it gets
// done).
upb_refcounted *o;
/* We eagerly free() the group's count (since we can't easily determine
* the group's remaining size it's the easiest way to ensure it gets
* done). */
free(obj->group);
// Visit to release ref2's (done in a separate pass since release_ref2
// depends on o->group being unmodified so it can test merged()).
upb_refcounted *o = obj;
/* Visit to release ref2's (done in a separate pass since release_ref2
* depends on o->group being unmodified so it can test merged()). */
o = obj;
do { visit(o, release_ref2, NULL); } while ((o = o->next) != obj);
// Mark "group" fields as NULL so we know to free the objects later in
// this loop, but also don't try to delete the group twice.
/* Mark "group" fields as NULL so we know to free the objects later in
* this loop, but also don't try to delete the group twice. */
o = obj;
do { o->group = NULL; } while ((o = o->next) != obj);
}
@ -643,9 +672,9 @@ static bool freeze(upb_refcounted *const*roots, int n, upb_status *s,
err4:
if (!ret) {
upb_inttable_begin(&i, &t.groups);
for(; !upb_inttable_done(&i); upb_inttable_next(&i))
free(upb_value_getptr(upb_inttable_iter_value(&i)));
upb_inttable_begin(&iter, &t.groups);
for(; !upb_inttable_done(&iter); upb_inttable_next(&iter))
free(upb_value_getptr(upb_inttable_iter_value(&iter)));
}
upb_inttable_uninit(&t.groups);
err3:
@ -664,21 +693,24 @@ static bool merged(const upb_refcounted *r, const upb_refcounted *r2) {
}
static void merge(upb_refcounted *r, upb_refcounted *from) {
upb_refcounted *base;
upb_refcounted *tmp;
if (merged(r, from)) return;
*r->group += *from->group;
free(from->group);
upb_refcounted *base = from;
// Set all refcount pointers in the "from" chain to the merged refcount.
//
// TODO(haberman): this linear algorithm can result in an overall O(n^2) bound
// if the user continuously extends a group by one object. Prevent this by
// using one of the techniques in this paper:
// ftp://www.ncedc.org/outgoing/geomorph/dino/orals/p245-tarjan.pdf
base = from;
/* Set all refcount pointers in the "from" chain to the merged refcount.
*
* TODO(haberman): this linear algorithm can result in an overall O(n^2) bound
* if the user continuously extends a group by one object. Prevent this by
* using one of the techniques in this paper:
* ftp://www.ncedc.org/outgoing/geomorph/dino/orals/p245-tarjan.pdf */
do { from->group = r->group; } while ((from = from->next) != base);
// Merge the two circularly linked lists by swapping their next pointers.
upb_refcounted *tmp = r->next;
/* Merge the two circularly linked lists by swapping their next pointers. */
tmp = r->next;
r->next = base->next;
base->next = tmp;
}
@ -698,11 +730,13 @@ static void release_ref2(const upb_refcounted *obj,
static void unref(const upb_refcounted *r) {
if (unrefgroup(r->group)) {
const upb_refcounted *o;
free(r->group);
// In two passes, since release_ref2 needs a guarantee that any subobjs
// are alive.
const upb_refcounted *o = r;
/* In two passes, since release_ref2 needs a guarantee that any subobjs
* are alive. */
o = r;
do { visit(o, release_ref2, NULL); } while((o = o->next) != r);
o = r;
@ -727,9 +761,9 @@ bool upb_refcounted_init(upb_refcounted *r,
const struct upb_refcounted_vtbl *vtbl,
const void *owner) {
#ifndef NDEBUG
// Endianness check. This is unrelated to upb_refcounted, it's just a
// convenient place to put the check that we can be assured will run for
// basically every program using upb.
/* Endianness check. This is unrelated to upb_refcounted, it's just a
* convenient place to put the check that we can be assured will run for
* basically every program using upb. */
const int x = 1;
#ifdef UPB_BIG_ENDIAN
assert(*(char*)&x != 1);
@ -772,7 +806,7 @@ void upb_refcounted_unref(const upb_refcounted *r, const void *owner) {
}
void upb_refcounted_ref2(const upb_refcounted *r, upb_refcounted *from) {
assert(!from->is_frozen); // Non-const pointer implies this.
assert(!from->is_frozen); /* Non-const pointer implies this. */
track(r, from, true);
if (r->is_frozen) {
refgroup(r->group);
@ -782,7 +816,7 @@ void upb_refcounted_ref2(const upb_refcounted *r, upb_refcounted *from) {
}
void upb_refcounted_unref2(const upb_refcounted *r, upb_refcounted *from) {
assert(!from->is_frozen); // Non-const pointer implies this.
assert(!from->is_frozen); /* Non-const pointer implies this. */
untrack(r, from, true);
if (r->is_frozen) {
unref(r);
@ -806,7 +840,8 @@ void upb_refcounted_checkref(const upb_refcounted *r, const void *owner) {
bool upb_refcounted_freeze(upb_refcounted *const*roots, int n, upb_status *s,
int maxdepth) {
for (int i = 0; i < n; i++) {
int i;
for (i = 0; i < n; i++) {
assert(!roots[i]->is_frozen);
}
return freeze(roots, n, s, maxdepth);

@ -21,84 +21,102 @@
#include "upb/table.int.h"
// Reference tracking will check ref()/unref() operations to make sure the
// ref ownership is correct. Where possible it will also make tools like
// Valgrind attribute ref leaks to the code that took the leaked ref, not
// the code that originally created the object.
//
// Enabling this requires the application to define upb_lock()/upb_unlock()
// functions that acquire/release a global mutex (or #define UPB_THREAD_UNSAFE).
// For this reason we don't enable it by default, even in debug builds.
/* Reference tracking will check ref()/unref() operations to make sure the
* ref ownership is correct. Where possible it will also make tools like
* Valgrind attribute ref leaks to the code that took the leaked ref, not
* the code that originally created the object.
*
* Enabling this requires the application to define upb_lock()/upb_unlock()
* functions that acquire/release a global mutex (or #define UPB_THREAD_UNSAFE).
* For this reason we don't enable it by default, even in debug builds.
*/
// #define UPB_DEBUG_REFS
/* #define UPB_DEBUG_REFS */
#ifdef __cplusplus
namespace upb { class RefCounted; }
#endif
UPB_DECLARE_TYPE(upb::RefCounted, upb_refcounted);
UPB_DECLARE_TYPE(upb::RefCounted, upb_refcounted)
struct upb_refcounted_vtbl;
UPB_DEFINE_CLASS0(upb::RefCounted,
#ifdef __cplusplus
class upb::RefCounted {
public:
// Returns true if the given object is frozen.
/* Returns true if the given object is frozen. */
bool IsFrozen() const;
// Increases the ref count, the new ref is owned by "owner" which must not
// already own a ref (and should not itself be a refcounted object if the ref
// could possibly be circular; see below).
// Thread-safe iff "this" is frozen.
/* Increases the ref count, the new ref is owned by "owner" which must not
* already own a ref (and should not itself be a refcounted object if the ref
* could possibly be circular; see below).
* Thread-safe iff "this" is frozen. */
void Ref(const void *owner) const;
// Release a ref that was acquired from upb_refcounted_ref() and collects any
// objects it can.
/* Release a ref that was acquired from upb_refcounted_ref() and collects any
* objects it can. */
void Unref(const void *owner) const;
// Moves an existing ref from "from" to "to", without changing the overall
// ref count. DonateRef(foo, NULL, owner) is the same as Ref(foo, owner),
// but "to" may not be NULL.
/* Moves an existing ref from "from" to "to", without changing the overall
* ref count. DonateRef(foo, NULL, owner) is the same as Ref(foo, owner),
* but "to" may not be NULL. */
void DonateRef(const void *from, const void *to) const;
// Verifies that a ref to the given object is currently held by the given
// owner. Only effective in UPB_DEBUG_REFS builds.
/* Verifies that a ref to the given object is currently held by the given
* owner. Only effective in UPB_DEBUG_REFS builds. */
void CheckRef(const void *owner) const;
private:
UPB_DISALLOW_POD_OPS(RefCounted, upb::RefCounted);
,
UPB_DEFINE_STRUCT0(upb_refcounted,
// A single reference count shared by all objects in the group.
UPB_DISALLOW_POD_OPS(RefCounted, upb::RefCounted)
#else
struct upb_refcounted {
#endif
/* TODO(haberman): move the actual structure definition to structdefs.int.h.
* The only reason they are here is because inline functions need to see the
* definition of upb_handlers, which needs to see this definition. But we
* can change the upb_handlers inline functions to deal in raw offsets
* instead.
*/
/* A single reference count shared by all objects in the group. */
uint32_t *group;
// A singly-linked list of all objects in the group.
/* A singly-linked list of all objects in the group. */
upb_refcounted *next;
// Table of function pointers for this type.
/* Table of function pointers for this type. */
const struct upb_refcounted_vtbl *vtbl;
// Maintained only when mutable, this tracks the number of refs (but not
// ref2's) to this object. *group should be the sum of all individual_count
// in the group.
/* Maintained only when mutable, this tracks the number of refs (but not
* ref2's) to this object. *group should be the sum of all individual_count
* in the group. */
uint32_t individual_count;
bool is_frozen;
#ifdef UPB_DEBUG_REFS
upb_inttable *refs; // Maps owner -> trackedref for incoming refs.
upb_inttable *ref2s; // Set of targets for outgoing ref2s.
upb_inttable *refs; /* Maps owner -> trackedref for incoming refs. */
upb_inttable *ref2s; /* Set of targets for outgoing ref2s. */
#endif
};
#ifdef UPB_DEBUG_REFS
#define UPB_REFCOUNT_INIT(refs, ref2s) \
{&static_refcount, NULL, NULL, 0, true, refs, ref2s}
#else
#define UPB_REFCOUNT_INIT(refs, ref2s) {&static_refcount, NULL, NULL, 0, true}
#endif
));
UPB_BEGIN_EXTERN_C // {
UPB_BEGIN_EXTERN_C
// It is better to use tracked refs when possible, for the extra debugging
// capability. But if this is not possible (because you don't have easy access
// to a stable pointer value that is associated with the ref), you can pass
// UPB_UNTRACKED_REF instead.
/* It is better to use tracked refs when possible, for the extra debugging
* capability. But if this is not possible (because you don't have easy access
* to a stable pointer value that is associated with the ref), you can pass
* UPB_UNTRACKED_REF instead. */
extern const void *UPB_UNTRACKED_REF;
// Native C API.
/* Native C API. */
bool upb_refcounted_isfrozen(const upb_refcounted *r);
void upb_refcounted_ref(const upb_refcounted *r, const void *owner);
void upb_refcounted_unref(const upb_refcounted *r, const void *owner);
@ -106,37 +124,70 @@ void upb_refcounted_donateref(
const upb_refcounted *r, const void *from, const void *to);
void upb_refcounted_checkref(const upb_refcounted *r, const void *owner);
// Internal-to-upb Interface ///////////////////////////////////////////////////
#define UPB_REFCOUNTED_CMETHODS(type, upcastfunc) \
UPB_INLINE bool type ## _isfrozen(const type *v) { \
return upb_refcounted_isfrozen(upcastfunc(v)); \
} \
UPB_INLINE void type ## _ref(const type *v, const void *owner) { \
upb_refcounted_ref(upcastfunc(v), owner); \
} \
UPB_INLINE void type ## _unref(const type *v, const void *owner) { \
upb_refcounted_unref(upcastfunc(v), owner); \
} \
UPB_INLINE void type ## _donateref(const type *v, const void *from, const void *to) { \
upb_refcounted_donateref(upcastfunc(v), from, to); \
} \
UPB_INLINE void type ## _checkref(const type *v, const void *owner) { \
upb_refcounted_checkref(upcastfunc(v), owner); \
}
#define UPB_REFCOUNTED_CPPMETHODS \
bool IsFrozen() const { \
return upb::upcast_to<const upb::RefCounted>(this)->IsFrozen(); \
} \
void Ref(const void *owner) const { \
return upb::upcast_to<const upb::RefCounted>(this)->Ref(owner); \
} \
void Unref(const void *owner) const { \
return upb::upcast_to<const upb::RefCounted>(this)->Unref(owner); \
} \
void DonateRef(const void *from, const void *to) const { \
return upb::upcast_to<const upb::RefCounted>(this)->DonateRef(from, to); \
} \
void CheckRef(const void *owner) const { \
return upb::upcast_to<const upb::RefCounted>(this)->CheckRef(owner); \
}
/* Internal-to-upb Interface **************************************************/
typedef void upb_refcounted_visit(const upb_refcounted *r,
const upb_refcounted *subobj,
void *closure);
struct upb_refcounted_vtbl {
// Must visit all subobjects that are currently ref'd via upb_refcounted_ref2.
// Must be longjmp()-safe.
/* Must visit all subobjects that are currently ref'd via upb_refcounted_ref2.
* Must be longjmp()-safe. */
void (*visit)(const upb_refcounted *r, upb_refcounted_visit *visit, void *c);
// Must free the object and release all references to other objects.
/* Must free the object and release all references to other objects. */
void (*free)(upb_refcounted *r);
};
// Initializes the refcounted with a single ref for the given owner. Returns
// false if memory could not be allocated.
/* Initializes the refcounted with a single ref for the given owner. Returns
* false if memory could not be allocated. */
bool upb_refcounted_init(upb_refcounted *r,
const struct upb_refcounted_vtbl *vtbl,
const void *owner);
// Adds a ref from one refcounted object to another ("from" must not already
// own a ref). These refs may be circular; cycles will be collected correctly
// (if conservatively). These refs do not need to be freed in from's free()
// function.
/* Adds a ref from one refcounted object to another ("from" must not already
* own a ref). These refs may be circular; cycles will be collected correctly
* (if conservatively). These refs do not need to be freed in from's free()
* function. */
void upb_refcounted_ref2(const upb_refcounted *r, upb_refcounted *from);
// Removes a ref that was acquired from upb_refcounted_ref2(), and collects any
// object it can. This is only necessary when "from" no longer points to "r",
// and not from from's "free" function.
/* Removes a ref that was acquired from upb_refcounted_ref2(), and collects any
* object it can. This is only necessary when "from" no longer points to "r",
* and not from from's "free" function. */
void upb_refcounted_unref2(const upb_refcounted *r, upb_refcounted *from);
#define upb_ref2(r, from) \
@ -144,37 +195,30 @@ void upb_refcounted_unref2(const upb_refcounted *r, upb_refcounted *from);
#define upb_unref2(r, from) \
upb_refcounted_unref2((const upb_refcounted*)r, (upb_refcounted*)from)
// Freezes all mutable object reachable by ref2() refs from the given roots.
// This will split refcounting groups into precise SCC groups, so that
// refcounting of frozen objects can be more aggressive. If memory allocation
// fails, or if more than 2**31 mutable objects are reachable from "roots", or
// if the maximum depth of the graph exceeds "maxdepth", false is returned and
// the objects are unchanged.
//
// After this operation succeeds, the objects are frozen/const, and may not be
// used through non-const pointers. In particular, they may not be passed as
// the second parameter of upb_refcounted_{ref,unref}2(). On the upside, all
// operations on frozen refcounteds are threadsafe, and objects will be freed
// at the precise moment that they become unreachable.
//
// Caller must own refs on each object in the "roots" list.
/* Freezes all mutable object reachable by ref2() refs from the given roots.
* This will split refcounting groups into precise SCC groups, so that
* refcounting of frozen objects can be more aggressive. If memory allocation
* fails, or if more than 2**31 mutable objects are reachable from "roots", or
* if the maximum depth of the graph exceeds "maxdepth", false is returned and
* the objects are unchanged.
*
* After this operation succeeds, the objects are frozen/const, and may not be
* used through non-const pointers. In particular, they may not be passed as
* the second parameter of upb_refcounted_{ref,unref}2(). On the upside, all
* operations on frozen refcounteds are threadsafe, and objects will be freed
* at the precise moment that they become unreachable.
*
* Caller must own refs on each object in the "roots" list. */
bool upb_refcounted_freeze(upb_refcounted *const*roots, int n, upb_status *s,
int maxdepth);
// Shared by all compiled-in refcounted objects.
/* Shared by all compiled-in refcounted objects. */
extern uint32_t static_refcount;
UPB_END_EXTERN_C // }
#ifdef UPB_DEBUG_REFS
#define UPB_REFCOUNT_INIT(refs, ref2s) \
{&static_refcount, NULL, NULL, 0, true, refs, ref2s}
#else
#define UPB_REFCOUNT_INIT(refs, ref2s) {&static_refcount, NULL, NULL, 0, true}
#endif
UPB_END_EXTERN_C
#ifdef __cplusplus
// C++ Wrappers.
/* C++ Wrappers. */
namespace upb {
inline bool RefCounted::IsFrozen() const {
return upb_refcounted_isfrozen(this);
@ -191,7 +235,7 @@ inline void RefCounted::DonateRef(const void *from, const void *to) const {
inline void RefCounted::CheckRef(const void *owner) const {
upb_refcounted_checkref(this, owner);
}
} // namespace upb
} /* namespace upb */
#endif
#endif // UPB_REFCOUNT_H_
#endif /* UPB_REFCOUNT_H_ */

@ -9,7 +9,7 @@
#include <stdlib.h>
// Fallback implementation if the shim is not specialized by the JIT.
/* Fallback implementation if the shim is not specialized by the JIT. */
#define SHIM_WRITER(type, ctype) \
bool upb_shim_set ## type (void *c, const void *hd, ctype val) { \
uint8_t *m = c; \
@ -31,12 +31,14 @@ SHIM_WRITER(bool, bool)
bool upb_shim_set(upb_handlers *h, const upb_fielddef *f, size_t offset,
int32_t hasbit) {
upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
bool ok;
upb_shim_data *d = malloc(sizeof(*d));
if (!d) return false;
d->offset = offset;
d->hasbit = hasbit;
upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
upb_handlerattr_sethandlerdata(&attr, d);
upb_handlerattr_setalwaysok(&attr, true);
upb_handlers_addcleanup(h, d, free);
@ -45,7 +47,7 @@ bool upb_shim_set(upb_handlers *h, const upb_fielddef *f, size_t offset,
case UPB_TYPE_##u: \
ok = upb_handlers_set##l(h, f, upb_shim_set##l, &attr); break;
bool ok = false;
ok = false;
switch (upb_fielddef_type(f)) {
TYPE(INT64, int64);

@ -32,33 +32,33 @@ namespace upb {
struct Shim {
typedef upb_shim_data Data;
// Sets a handler for the given field that writes the value to the given
// offset and, if hasbit >= 0, sets a bit at the given bit offset. Returns
// true if the handler was set successfully.
/* Sets a handler for the given field that writes the value to the given
* offset and, if hasbit >= 0, sets a bit at the given bit offset. Returns
* true if the handler was set successfully. */
static bool Set(Handlers *h, const FieldDef *f, size_t ofs, int32_t hasbit);
// If this handler is a shim, returns the corresponding upb::Shim::Data and
// stores the type in "type". Otherwise returns NULL.
/* If this handler is a shim, returns the corresponding upb::Shim::Data and
* stores the type in "type". Otherwise returns NULL. */
static const Data* GetData(const Handlers* h, Handlers::Selector s,
FieldDef::Type* type);
};
} // namespace upb
} /* namespace upb */
#endif
UPB_BEGIN_EXTERN_C // {
UPB_BEGIN_EXTERN_C
// C API.
/* C API. */
bool upb_shim_set(upb_handlers *h, const upb_fielddef *f, size_t offset,
int32_t hasbit);
const upb_shim_data *upb_shim_getdata(const upb_handlers *h, upb_selector_t s,
upb_fieldtype_t *type);
UPB_END_EXTERN_C // }
UPB_END_EXTERN_C
#ifdef __cplusplus
// C++ Wrappers.
/* C++ Wrappers. */
namespace upb {
inline bool Shim::Set(Handlers* h, const FieldDef* f, size_t ofs,
int32_t hasbit) {
@ -68,7 +68,7 @@ inline const Shim::Data* Shim::GetData(const Handlers* h, Handlers::Selector s,
FieldDef::Type* type) {
return upb_shim_getdata(h, s, type);
}
} // namespace upb
} /* namespace upb */
#endif
#endif // UPB_SHIM_H
#endif /* UPB_SHIM_H */

@ -30,88 +30,90 @@ class Sink;
}
#endif
UPB_DECLARE_TYPE(upb::BufferSource, upb_bufsrc);
UPB_DECLARE_TYPE(upb::BytesSink, upb_bytessink);
UPB_DECLARE_TYPE(upb::Sink, upb_sink);
// A upb::Sink is an object that binds a upb::Handlers object to some runtime
// state. It represents an endpoint to which data can be sent.
//
// TODO(haberman): right now all of these functions take selectors. Should they
// take selectorbase instead?
//
// ie. instead of calling:
// sink->StartString(FOO_FIELD_START_STRING, ...)
// a selector base would let you say:
// sink->StartString(FOO_FIELD, ...)
//
// This would make call sites a little nicer and require emitting fewer selector
// definitions in .h files.
//
// But the current scheme has the benefit that you can retrieve a function
// pointer for any handler with handlers->GetHandler(selector), without having
// to have a separate GetHandler() function for each handler type. The JIT
// compiler uses this. To accommodate we'd have to expose a separate
// GetHandler() for every handler type.
//
// Also to ponder: selectors right now are independent of a specific Handlers
// instance. In other words, they allocate a number to every possible handler
// that *could* be registered, without knowing anything about what handlers
// *are* registered. That means that using selectors as table offsets prohibits
// us from compacting the handler table at Freeze() time. If the table is very
// sparse, this could be wasteful.
//
// Having another selector-like thing that is specific to a Handlers instance
// would allow this compacting, but then it would be impossible to write code
// ahead-of-time that can be bound to any Handlers instance at runtime. For
// example, a .proto file parser written as straight C will not know what
// Handlers it will be bound to, so when it calls sink->StartString() what
// selector will it pass? It needs a selector like we have today, that is
// independent of any particular upb::Handlers.
//
// Is there a way then to allow Handlers table compaction?
UPB_DEFINE_CLASS0(upb::Sink,
UPB_DECLARE_TYPE(upb::BufferSource, upb_bufsrc)
UPB_DECLARE_TYPE(upb::BytesSink, upb_bytessink)
UPB_DECLARE_TYPE(upb::Sink, upb_sink)
#ifdef __cplusplus
/* A upb::Sink is an object that binds a upb::Handlers object to some runtime
* state. It represents an endpoint to which data can be sent.
*
* TODO(haberman): right now all of these functions take selectors. Should they
* take selectorbase instead?
*
* ie. instead of calling:
* sink->StartString(FOO_FIELD_START_STRING, ...)
* a selector base would let you say:
* sink->StartString(FOO_FIELD, ...)
*
* This would make call sites a little nicer and require emitting fewer selector
* definitions in .h files.
*
* But the current scheme has the benefit that you can retrieve a function
* pointer for any handler with handlers->GetHandler(selector), without having
* to have a separate GetHandler() function for each handler type. The JIT
* compiler uses this. To accommodate we'd have to expose a separate
* GetHandler() for every handler type.
*
* Also to ponder: selectors right now are independent of a specific Handlers
* instance. In other words, they allocate a number to every possible handler
* that *could* be registered, without knowing anything about what handlers
* *are* registered. That means that using selectors as table offsets prohibits
* us from compacting the handler table at Freeze() time. If the table is very
* sparse, this could be wasteful.
*
* Having another selector-like thing that is specific to a Handlers instance
* would allow this compacting, but then it would be impossible to write code
* ahead-of-time that can be bound to any Handlers instance at runtime. For
* example, a .proto file parser written as straight C will not know what
* Handlers it will be bound to, so when it calls sink->StartString() what
* selector will it pass? It needs a selector like we have today, that is
* independent of any particular upb::Handlers.
*
* Is there a way then to allow Handlers table compaction? */
class upb::Sink {
public:
// Constructor with no initialization; must be Reset() before use.
/* Constructor with no initialization; must be Reset() before use. */
Sink() {}
// Constructs a new sink for the given frozen handlers and closure.
//
// TODO: once the Handlers know the expected closure type, verify that T
// matches it.
/* Constructs a new sink for the given frozen handlers and closure.
*
* TODO: once the Handlers know the expected closure type, verify that T
* matches it. */
template <class T> Sink(const Handlers* handlers, T* closure);
// Resets the value of the sink.
/* Resets the value of the sink. */
template <class T> void Reset(const Handlers* handlers, T* closure);
// Returns the top-level object that is bound to this sink.
//
// TODO: once the Handlers know the expected closure type, verify that T
// matches it.
/* Returns the top-level object that is bound to this sink.
*
* TODO: once the Handlers know the expected closure type, verify that T
* matches it. */
template <class T> T* GetObject() const;
// Functions for pushing data into the sink.
//
// These return false if processing should stop (either due to error or just
// to suspend).
//
// These may not be called from within one of the same sink's handlers (in
// other words, handlers are not re-entrant).
// Should be called at the start and end of every message; both the top-level
// message and submessages. This means that submessages should use the
// following sequence:
// sink->StartSubMessage(startsubmsg_selector);
// sink->StartMessage();
// // ...
// sink->EndMessage(&status);
// sink->EndSubMessage(endsubmsg_selector);
/* Functions for pushing data into the sink.
*
* These return false if processing should stop (either due to error or just
* to suspend).
*
* These may not be called from within one of the same sink's handlers (in
* other words, handlers are not re-entrant). */
/* Should be called at the start and end of every message; both the top-level
* message and submessages. This means that submessages should use the
* following sequence:
* sink->StartSubMessage(startsubmsg_selector);
* sink->StartMessage();
* // ...
* sink->EndMessage(&status);
* sink->EndSubMessage(endsubmsg_selector); */
bool StartMessage();
bool EndMessage(Status* status);
// Putting of individual values. These work for both repeated and
// non-repeated fields, but for repeated fields you must wrap them in
// calls to StartSequence()/EndSequence().
/* Putting of individual values. These work for both repeated and
* non-repeated fields, but for repeated fields you must wrap them in
* calls to StartSequence()/EndSequence(). */
bool PutInt32(Handlers::Selector s, int32_t val);
bool PutInt64(Handlers::Selector s, int64_t val);
bool PutUInt32(Handlers::Selector s, uint32_t val);
@ -120,92 +122,101 @@ UPB_DEFINE_CLASS0(upb::Sink,
bool PutDouble(Handlers::Selector s, double val);
bool PutBool(Handlers::Selector s, bool val);
// Putting of string/bytes values. Each string can consist of zero or more
// non-contiguous buffers of data.
//
// For StartString(), the function will write a sink for the string to "sub."
// The sub-sink must be used for any/all PutStringBuffer() calls.
/* Putting of string/bytes values. Each string can consist of zero or more
* non-contiguous buffers of data.
*
* For StartString(), the function will write a sink for the string to "sub."
* The sub-sink must be used for any/all PutStringBuffer() calls. */
bool StartString(Handlers::Selector s, size_t size_hint, Sink* sub);
size_t PutStringBuffer(Handlers::Selector s, const char *buf, size_t len,
const BufferHandle *handle);
bool EndString(Handlers::Selector s);
// For submessage fields.
//
// For StartSubMessage(), the function will write a sink for the string to
// "sub." The sub-sink must be used for any/all handlers called within the
// submessage.
/* For submessage fields.
*
* For StartSubMessage(), the function will write a sink for the string to
* "sub." The sub-sink must be used for any/all handlers called within the
* submessage. */
bool StartSubMessage(Handlers::Selector s, Sink* sub);
bool EndSubMessage(Handlers::Selector s);
// For repeated fields of any type, the sequence of values must be wrapped in
// these calls.
//
// For StartSequence(), the function will write a sink for the string to
// "sub." The sub-sink must be used for any/all handlers called within the
// sequence.
/* For repeated fields of any type, the sequence of values must be wrapped in
* these calls.
*
* For StartSequence(), the function will write a sink for the string to
* "sub." The sub-sink must be used for any/all handlers called within the
* sequence. */
bool StartSequence(Handlers::Selector s, Sink* sub);
bool EndSequence(Handlers::Selector s);
// Copy and assign specifically allowed.
// We don't even bother making these members private because so many
// functions need them and this is mainly just a dumb data container anyway.
,
UPB_DEFINE_STRUCT0(upb_sink,
/* Copy and assign specifically allowed.
* We don't even bother making these members private because so many
* functions need them and this is mainly just a dumb data container anyway.
*/
#else
struct upb_sink {
#endif
const upb_handlers *handlers;
void *closure;
));
};
UPB_DEFINE_CLASS0(upb::BytesSink,
#ifdef __cplusplus
class upb::BytesSink {
public:
BytesSink() {}
// Constructs a new sink for the given frozen handlers and closure.
//
// TODO(haberman): once the Handlers know the expected closure type, verify
// that T matches it.
/* Constructs a new sink for the given frozen handlers and closure.
*
* TODO(haberman): once the Handlers know the expected closure type, verify
* that T matches it. */
template <class T> BytesSink(const BytesHandler* handler, T* closure);
// Resets the value of the sink.
/* Resets the value of the sink. */
template <class T> void Reset(const BytesHandler* handler, T* closure);
bool Start(size_t size_hint, void **subc);
size_t PutBuffer(void *subc, const char *buf, size_t len,
const BufferHandle *handle);
bool End();
,
UPB_DEFINE_STRUCT0(upb_bytessink,
#else
struct upb_bytessink {
#endif
const upb_byteshandler *handler;
void *closure;
));
};
// A class for pushing a flat buffer of data to a BytesSink.
// You can construct an instance of this to get a resumable source,
// or just call the static PutBuffer() to do a non-resumable push all in one go.
UPB_DEFINE_CLASS0(upb::BufferSource,
#ifdef __cplusplus
/* A class for pushing a flat buffer of data to a BytesSink.
* You can construct an instance of this to get a resumable source,
* or just call the static PutBuffer() to do a non-resumable push all in one
* go. */
class upb::BufferSource {
public:
BufferSource();
BufferSource(const char* buf, size_t len, BytesSink* sink);
// Returns true if the entire buffer was pushed successfully. Otherwise the
// next call to PutNext() will resume where the previous one left off.
// TODO(haberman): implement this.
/* Returns true if the entire buffer was pushed successfully. Otherwise the
* next call to PutNext() will resume where the previous one left off.
* TODO(haberman): implement this. */
bool PutNext();
// A static version; with this version is it not possible to resume in the
// case of failure or a partially-consumed buffer.
/* A static version; with this version is it not possible to resume in the
* case of failure or a partially-consumed buffer. */
static bool PutBuffer(const char* buf, size_t len, BytesSink* sink);
template <class T> static bool PutBuffer(const T& str, BytesSink* sink) {
return PutBuffer(str.c_str(), str.size(), sink);
}
,
UPB_DEFINE_STRUCT0(upb_bufsrc,
));
#else
struct upb_bufsrc {
char dummy;
#endif
};
UPB_BEGIN_EXTERN_C // {
UPB_BEGIN_EXTERN_C
// Inline definitions.
/* Inline definitions. */
UPB_INLINE void upb_bytessink_reset(upb_bytessink *s, const upb_byteshandler *h,
void *closure) {
@ -215,10 +226,11 @@ UPB_INLINE void upb_bytessink_reset(upb_bytessink *s, const upb_byteshandler *h,
UPB_INLINE bool upb_bytessink_start(upb_bytessink *s, size_t size_hint,
void **subc) {
typedef upb_startstr_handlerfunc func;
func *start;
*subc = s->closure;
if (!s->handler) return true;
upb_startstr_handlerfunc *start =
(upb_startstr_handlerfunc *)s->handler->table[UPB_STARTSTR_SELECTOR].func;
start = (func *)s->handler->table[UPB_STARTSTR_SELECTOR].func;
if (!start) return true;
*subc = start(s->closure, upb_handlerattr_handlerdata(
@ -230,9 +242,10 @@ UPB_INLINE bool upb_bytessink_start(upb_bytessink *s, size_t size_hint,
UPB_INLINE size_t upb_bytessink_putbuf(upb_bytessink *s, void *subc,
const char *buf, size_t size,
const upb_bufhandle* handle) {
typedef upb_string_handlerfunc func;
func *putbuf;
if (!s->handler) return true;
upb_string_handlerfunc *putbuf =
(upb_string_handlerfunc *)s->handler->table[UPB_STRING_SELECTOR].func;
putbuf = (func *)s->handler->table[UPB_STRING_SELECTOR].func;
if (!putbuf) return true;
return putbuf(subc, upb_handlerattr_handlerdata(
@ -241,9 +254,10 @@ UPB_INLINE size_t upb_bytessink_putbuf(upb_bytessink *s, void *subc,
}
UPB_INLINE bool upb_bytessink_end(upb_bytessink *s) {
typedef upb_endfield_handlerfunc func;
func *end;
if (!s->handler) return true;
upb_endfield_handlerfunc *end =
(upb_endfield_handlerfunc *)s->handler->table[UPB_ENDSTR_SELECTOR].func;
end = (func *)s->handler->table[UPB_ENDSTR_SELECTOR].func;
if (!end) return true;
return end(s->closure,
@ -254,10 +268,11 @@ UPB_INLINE bool upb_bytessink_end(upb_bytessink *s) {
UPB_INLINE bool upb_bufsrc_putbuf(const char *buf, size_t len,
upb_bytessink *sink) {
void *subc;
bool ret;
upb_bufhandle handle;
upb_bufhandle_init(&handle);
upb_bufhandle_setbuf(&handle, buf, 0);
bool ret = upb_bytessink_start(sink, len, &subc);
ret = upb_bytessink_start(sink, len, &subc);
if (ret && len != 0) {
ret = (upb_bytessink_putbuf(sink, subc, buf, len, &handle) == len);
}
@ -271,21 +286,23 @@ UPB_INLINE bool upb_bufsrc_putbuf(const char *buf, size_t len,
#define PUTVAL(type, ctype) \
UPB_INLINE bool upb_sink_put##type(upb_sink *s, upb_selector_t sel, \
ctype val) { \
typedef upb_##type##_handlerfunc functype; \
functype *func; \
const void *hd; \
if (!s->handlers) return true; \
upb_##type##_handlerfunc *func = \
(upb_##type##_handlerfunc *)upb_handlers_gethandler(s->handlers, sel); \
func = (functype *)upb_handlers_gethandler(s->handlers, sel); \
if (!func) return true; \
const void *hd = upb_handlers_gethandlerdata(s->handlers, sel); \
hd = upb_handlers_gethandlerdata(s->handlers, sel); \
return func(s->closure, hd, val); \
}
PUTVAL(int32, int32_t);
PUTVAL(int64, int64_t);
PUTVAL(uint32, uint32_t);
PUTVAL(uint64, uint64_t);
PUTVAL(float, float);
PUTVAL(double, double);
PUTVAL(bool, bool);
PUTVAL(int32, int32_t)
PUTVAL(int64, int64_t)
PUTVAL(uint32, uint32_t)
PUTVAL(uint64, uint64_t)
PUTVAL(float, float)
PUTVAL(double, double)
PUTVAL(bool, bool)
#undef PUTVAL
UPB_INLINE void upb_sink_reset(upb_sink *s, const upb_handlers *h, void *c) {
@ -296,114 +313,129 @@ UPB_INLINE void upb_sink_reset(upb_sink *s, const upb_handlers *h, void *c) {
UPB_INLINE size_t upb_sink_putstring(upb_sink *s, upb_selector_t sel,
const char *buf, size_t n,
const upb_bufhandle *handle) {
typedef upb_string_handlerfunc func;
func *handler;
const void *hd;
if (!s->handlers) return n;
upb_string_handlerfunc *handler =
(upb_string_handlerfunc *)upb_handlers_gethandler(s->handlers, sel);
handler = (func *)upb_handlers_gethandler(s->handlers, sel);
if (!handler) return n;
const void *hd = upb_handlers_gethandlerdata(s->handlers, sel);
hd = upb_handlers_gethandlerdata(s->handlers, sel);
return handler(s->closure, hd, buf, n, handle);
}
UPB_INLINE bool upb_sink_startmsg(upb_sink *s) {
typedef upb_startmsg_handlerfunc func;
func *startmsg;
const void *hd;
if (!s->handlers) return true;
upb_startmsg_handlerfunc *startmsg =
(upb_startmsg_handlerfunc *)upb_handlers_gethandler(s->handlers,
UPB_STARTMSG_SELECTOR);
startmsg = (func*)upb_handlers_gethandler(s->handlers, UPB_STARTMSG_SELECTOR);
if (!startmsg) return true;
const void *hd =
upb_handlers_gethandlerdata(s->handlers, UPB_STARTMSG_SELECTOR);
hd = upb_handlers_gethandlerdata(s->handlers, UPB_STARTMSG_SELECTOR);
return startmsg(s->closure, hd);
}
UPB_INLINE bool upb_sink_endmsg(upb_sink *s, upb_status *status) {
typedef upb_endmsg_handlerfunc func;
func *endmsg;
const void *hd;
if (!s->handlers) return true;
upb_endmsg_handlerfunc *endmsg =
(upb_endmsg_handlerfunc *)upb_handlers_gethandler(s->handlers,
UPB_ENDMSG_SELECTOR);
endmsg = (func *)upb_handlers_gethandler(s->handlers, UPB_ENDMSG_SELECTOR);
if (!endmsg) return true;
const void *hd =
upb_handlers_gethandlerdata(s->handlers, UPB_ENDMSG_SELECTOR);
hd = upb_handlers_gethandlerdata(s->handlers, UPB_ENDMSG_SELECTOR);
return endmsg(s->closure, hd, status);
}
UPB_INLINE bool upb_sink_startseq(upb_sink *s, upb_selector_t sel,
upb_sink *sub) {
typedef upb_startfield_handlerfunc func;
func *startseq;
const void *hd;
sub->closure = s->closure;
sub->handlers = s->handlers;
if (!s->handlers) return true;
upb_startfield_handlerfunc *startseq =
(upb_startfield_handlerfunc*)upb_handlers_gethandler(s->handlers, sel);
startseq = (func*)upb_handlers_gethandler(s->handlers, sel);
if (!startseq) return true;
const void *hd = upb_handlers_gethandlerdata(s->handlers, sel);
hd = upb_handlers_gethandlerdata(s->handlers, sel);
sub->closure = startseq(s->closure, hd);
return sub->closure ? true : false;
}
UPB_INLINE bool upb_sink_endseq(upb_sink *s, upb_selector_t sel) {
typedef upb_endfield_handlerfunc func;
func *endseq;
const void *hd;
if (!s->handlers) return true;
upb_endfield_handlerfunc *endseq =
(upb_endfield_handlerfunc*)upb_handlers_gethandler(s->handlers, sel);
endseq = (func*)upb_handlers_gethandler(s->handlers, sel);
if (!endseq) return true;
const void *hd = upb_handlers_gethandlerdata(s->handlers, sel);
hd = upb_handlers_gethandlerdata(s->handlers, sel);
return endseq(s->closure, hd);
}
UPB_INLINE bool upb_sink_startstr(upb_sink *s, upb_selector_t sel,
size_t size_hint, upb_sink *sub) {
typedef upb_startstr_handlerfunc func;
func *startstr;
const void *hd;
sub->closure = s->closure;
sub->handlers = s->handlers;
if (!s->handlers) return true;
upb_startstr_handlerfunc *startstr =
(upb_startstr_handlerfunc*)upb_handlers_gethandler(s->handlers, sel);
startstr = (func*)upb_handlers_gethandler(s->handlers, sel);
if (!startstr) return true;
const void *hd = upb_handlers_gethandlerdata(s->handlers, sel);
hd = upb_handlers_gethandlerdata(s->handlers, sel);
sub->closure = startstr(s->closure, hd, size_hint);
return sub->closure ? true : false;
}
UPB_INLINE bool upb_sink_endstr(upb_sink *s, upb_selector_t sel) {
typedef upb_endfield_handlerfunc func;
func *endstr;
const void *hd;
if (!s->handlers) return true;
upb_endfield_handlerfunc *endstr =
(upb_endfield_handlerfunc*)upb_handlers_gethandler(s->handlers, sel);
endstr = (func*)upb_handlers_gethandler(s->handlers, sel);
if (!endstr) return true;
const void *hd = upb_handlers_gethandlerdata(s->handlers, sel);
hd = upb_handlers_gethandlerdata(s->handlers, sel);
return endstr(s->closure, hd);
}
UPB_INLINE bool upb_sink_startsubmsg(upb_sink *s, upb_selector_t sel,
upb_sink *sub) {
typedef upb_startfield_handlerfunc func;
func *startsubmsg;
const void *hd;
sub->closure = s->closure;
if (!s->handlers) {
sub->handlers = NULL;
return true;
}
sub->handlers = upb_handlers_getsubhandlers_sel(s->handlers, sel);
upb_startfield_handlerfunc *startsubmsg =
(upb_startfield_handlerfunc*)upb_handlers_gethandler(s->handlers, sel);
startsubmsg = (func*)upb_handlers_gethandler(s->handlers, sel);
if (!startsubmsg) return true;
const void *hd = upb_handlers_gethandlerdata(s->handlers, sel);
hd = upb_handlers_gethandlerdata(s->handlers, sel);
sub->closure = startsubmsg(s->closure, hd);
return sub->closure ? true : false;
}
UPB_INLINE bool upb_sink_endsubmsg(upb_sink *s, upb_selector_t sel) {
typedef upb_endfield_handlerfunc func;
func *endsubmsg;
const void *hd;
if (!s->handlers) return true;
upb_endfield_handlerfunc *endsubmsg =
(upb_endfield_handlerfunc*)upb_handlers_gethandler(s->handlers, sel);
endsubmsg = (func*)upb_handlers_gethandler(s->handlers, sel);
if (!endsubmsg) return s->closure;
const void *hd = upb_handlers_gethandlerdata(s->handlers, sel);
hd = upb_handlers_gethandlerdata(s->handlers, sel);
return endsubmsg(s->closure, hd);
}
UPB_END_EXTERN_C // }
UPB_END_EXTERN_C
#ifdef __cplusplus
@ -492,7 +524,7 @@ inline bool BufferSource::PutBuffer(const char *buf, size_t len,
return upb_bufsrc_putbuf(buf, len, sink);
}
} // namespace upb
} /* namespace upb */
#endif
#endif

@ -0,0 +1,176 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2015 Google Inc. See LICENSE for details.
* Author: Josh Haberman <jhaberman@gmail.com>
*
* This file contains definitions of structs that should be considered private
* and NOT stable across versions of upb.
*
* The only reason they are declared here and not in .c files is to allow upb
* and the application (if desired) to embed statically-initialized instances
* of structures like defs.
*
* If you include this file, all guarantees of ABI compatibility go out the
* window! Any code that includes this file needs to recompile against the
* exact same version of upb that they are linking against.
*
* You also need to recompile if you change the value of the UPB_DEBUG_REFS
* flag.
*/
#include <upb/def.h>
#ifndef UPB_STATICINIT_H_
#define UPB_STATICINIT_H_
#ifdef __cplusplus
/* Because of how we do our typedefs, this header can't be included from C++. */
#error This file cannot be included from C++
#endif
/* upb_refcounted *************************************************************/
/* upb_def ********************************************************************/
struct upb_def {
upb_refcounted base;
const char *fullname;
char type; /* A upb_deftype_t (char to save space) */
/* Used as a flag during the def's mutable stage. Must be false unless
* it is currently being used by a function on the stack. This allows
* us to easily determine which defs were passed into the function's
* current invocation. */
bool came_from_user;
};
#define UPB_DEF_INIT(name, type, refs, ref2s) \
{ UPB_REFCOUNT_INIT(refs, ref2s), name, type, false }
/* upb_fielddef ***************************************************************/
struct upb_fielddef {
upb_def base;
union {
int64_t sint;
uint64_t uint;
double dbl;
float flt;
void *bytes;
} defaultval;
union {
const upb_msgdef *def; /* If !msg_is_symbolic. */
char *name; /* If msg_is_symbolic. */
} msg;
union {
const upb_def *def; /* If !subdef_is_symbolic. */
char *name; /* If subdef_is_symbolic. */
} sub; /* The msgdef or enumdef for this field, if upb_hassubdef(f). */
bool subdef_is_symbolic;
bool msg_is_symbolic;
const upb_oneofdef *oneof;
bool default_is_string;
bool type_is_set_; /* False until type is explicitly set. */
bool is_extension_;
bool lazy_;
bool packed_;
upb_intfmt_t intfmt;
bool tagdelim;
upb_fieldtype_t type_;
upb_label_t label_;
uint32_t number_;
uint32_t selector_base; /* Used to index into a upb::Handlers table. */
uint32_t index_;
};
#define UPB_FIELDDEF_INIT(label, type, intfmt, tagdelim, is_extension, lazy, \
packed, name, num, msgdef, subdef, selector_base, \
index, defaultval, refs, ref2s) \
{ \
UPB_DEF_INIT(name, UPB_DEF_FIELD, refs, ref2s), defaultval, {msgdef}, \
{subdef}, NULL, false, false, \
type == UPB_TYPE_STRING || type == UPB_TYPE_BYTES, true, is_extension, \
lazy, packed, intfmt, tagdelim, type, label, num, selector_base, index \
}
/* upb_msgdef *****************************************************************/
struct upb_msgdef {
upb_def base;
size_t selector_count;
uint32_t submsg_field_count;
/* Tables for looking up fields by number and name. */
upb_inttable itof; /* int to field */
upb_strtable ntof; /* name to field */
/* Tables for looking up oneofs by name. */
upb_strtable ntoo; /* name to oneof */
/* Is this a map-entry message?
* TODO: set this flag properly for static descriptors; regenerate
* descriptor.upb.c. */
bool map_entry;
/* TODO(haberman): proper extension ranges (there can be multiple). */
};
/* TODO: also support static initialization of the oneofs table. This will be
* needed if we compile in descriptors that contain oneofs. */
#define UPB_MSGDEF_INIT(name, selector_count, submsg_field_count, itof, ntof, \
refs, ref2s) \
{ \
UPB_DEF_INIT(name, UPB_DEF_MSG, refs, ref2s), selector_count, \
submsg_field_count, itof, ntof, \
UPB_EMPTY_STRTABLE_INIT(UPB_CTYPE_PTR), false \
}
/* upb_enumdef ****************************************************************/
struct upb_enumdef {
upb_def base;
upb_strtable ntoi;
upb_inttable iton;
int32_t defaultval;
};
#define UPB_ENUMDEF_INIT(name, ntoi, iton, defaultval, refs, ref2s) \
{ UPB_DEF_INIT(name, UPB_DEF_ENUM, refs, ref2s), ntoi, iton, defaultval }
/* upb_oneofdef ***************************************************************/
struct upb_oneofdef {
upb_def base;
upb_strtable ntof;
upb_inttable itof;
const upb_msgdef *parent;
};
#define UPB_ONEOFDEF_INIT(name, ntof, itof, refs, ref2s) \
{ UPB_DEF_INIT(name, UPB_DEF_ENUM, refs, ref2s), ntof, itof }
/* upb_symtab *****************************************************************/
struct upb_symtab {
upb_refcounted base;
upb_strtable symtab;
};
#define UPB_SYMTAB_INIT(symtab, refs, ref2s) \
{ UPB_REFCOUNT_INIT(refs, ref2s), symtab }
#endif /* UPB_STATICINIT_H_ */

@ -5,32 +5,12 @@
* Author: Josh Haberman <jhaberman@gmail.com>
*/
#include "upb/structdefs.int.h"
#include "upb/symtab.h"
#include <stdlib.h>
#include <string.h>
bool upb_symtab_isfrozen(const upb_symtab *s) {
return upb_refcounted_isfrozen(UPB_UPCAST(s));
}
void upb_symtab_ref(const upb_symtab *s, const void *owner) {
upb_refcounted_ref(UPB_UPCAST(s), owner);
}
void upb_symtab_unref(const upb_symtab *s, const void *owner) {
upb_refcounted_unref(UPB_UPCAST(s), owner);
}
void upb_symtab_donateref(
const upb_symtab *s, const void *from, const void *to) {
upb_refcounted_donateref(UPB_UPCAST(s), from, to);
}
void upb_symtab_checkref(const upb_symtab *s, const void *owner) {
upb_refcounted_checkref(UPB_UPCAST(s), owner);
}
static void upb_symtab_free(upb_refcounted *r) {
upb_symtab *s = (upb_symtab*)r;
upb_strtable_iter i;
@ -47,18 +27,21 @@ static void upb_symtab_free(upb_refcounted *r) {
upb_symtab *upb_symtab_new(const void *owner) {
static const struct upb_refcounted_vtbl vtbl = {NULL, &upb_symtab_free};
upb_symtab *s = malloc(sizeof(*s));
upb_refcounted_init(UPB_UPCAST(s), &vtbl, owner);
upb_refcounted_init(upb_symtab_upcast_mutable(s), &vtbl, owner);
upb_strtable_init(&s->symtab, UPB_CTYPE_PTR);
return s;
}
void upb_symtab_freeze(upb_symtab *s) {
upb_refcounted *r;
bool ok;
assert(!upb_symtab_isfrozen(s));
upb_refcounted *r = UPB_UPCAST(s);
// The symtab does not take ref2's (see refcounted.h) on the defs, because
// defs cannot refer back to the table and therefore cannot create cycles. So
// 0 will suffice for maxdepth here.
bool ok = upb_refcounted_freeze(&r, 1, NULL, 0);
r = upb_symtab_upcast_mutable(s);
/* The symtab does not take ref2's (see refcounted.h) on the defs, because
* defs cannot refer back to the table and therefore cannot create cycles. So
* 0 will suffice for maxdepth here. */
ok = upb_refcounted_freeze(&r, 1, NULL, 0);
UPB_ASSERT_VAR(ok, ok);
}
@ -83,19 +66,19 @@ const upb_enumdef *upb_symtab_lookupenum(const upb_symtab *s, const char *sym) {
return def ? upb_dyncast_enumdef(def) : NULL;
}
// Given a symbol and the base symbol inside which it is defined, find the
// symbol's definition in t.
/* Given a symbol and the base symbol inside which it is defined, find the
* symbol's definition in t. */
static upb_def *upb_resolvename(const upb_strtable *t,
const char *base, const char *sym) {
if(strlen(sym) == 0) return NULL;
if(sym[0] == '.') {
// Symbols starting with '.' are absolute, so we do a single lookup.
// Slice to omit the leading '.'
/* Symbols starting with '.' are absolute, so we do a single lookup.
* Slice to omit the leading '.' */
upb_value v;
return upb_strtable_lookup(t, sym + 1, &v) ? upb_value_getptr(v) : NULL;
} else {
// Remove components from base until we find an entry or run out.
// TODO: This branch is totally broken, but currently not used.
/* Remove components from base until we find an entry or run out.
* TODO: This branch is totally broken, but currently not used. */
(void)base;
assert(false);
return NULL;
@ -108,36 +91,41 @@ const upb_def *upb_symtab_resolve(const upb_symtab *s, const char *base,
return ret;
}
// Searches def and its children to find defs that have the same name as any
// def in "addtab." Returns true if any where found, and as a side-effect adds
// duplicates of these defs into addtab.
//
// We use a modified depth-first traversal that traverses each SCC (which we
// already computed) as if it were a single node. This allows us to traverse
// the possibly-cyclic graph as if it were a DAG and to dup the correct set of
// nodes with O(n) time.
/* Searches def and its children to find defs that have the same name as any
* def in "addtab." Returns true if any where found, and as a side-effect adds
* duplicates of these defs into addtab.
*
* We use a modified depth-first traversal that traverses each SCC (which we
* already computed) as if it were a single node. This allows us to traverse
* the possibly-cyclic graph as if it were a DAG and to dup the correct set of
* nodes with O(n) time. */
static bool upb_resolve_dfs(const upb_def *def, upb_strtable *addtab,
const void *new_owner, upb_inttable *seen,
upb_status *s) {
// Memoize results of this function for efficiency (since we're traversing a
// DAG this is not needed to limit the depth of the search).
/* Memoize results of this function for efficiency (since we're traversing a
* DAG this is not needed to limit the depth of the search). */
upb_value v;
bool need_dup;
const upb_def *base;
if (upb_inttable_lookup(seen, (uintptr_t)def, &v))
return upb_value_getbool(v);
// Visit submessages for all messages in the SCC.
bool need_dup = false;
const upb_def *base = def;
/* Visit submessages for all messages in the SCC. */
need_dup = false;
base = def;
do {
upb_value v;
const upb_msgdef *m;
assert(upb_def_isfrozen(def));
if (def->type == UPB_DEF_FIELD) continue;
upb_value v;
if (upb_strtable_lookup(addtab, upb_def_fullname(def), &v)) {
need_dup = true;
}
// For messages, continue the recursion by visiting all subdefs.
const upb_msgdef *m = upb_dyncast_msgdef(def);
/* For messages, continue the recursion by visiting all subdefs. */
m = upb_dyncast_msgdef(def);
if (m) {
upb_msg_field_iter i;
for(upb_msg_field_begin(&i, m);
@ -145,7 +133,7 @@ static bool upb_resolve_dfs(const upb_def *def, upb_strtable *addtab,
upb_msg_field_next(&i)) {
upb_fielddef *f = upb_msg_iter_field(&i);
if (!upb_fielddef_hassubdef(f)) continue;
// |= to avoid short-circuit; we need its side-effects.
/* |= to avoid short-circuit; we need its side-effects. */
need_dup |= upb_resolve_dfs(
upb_fielddef_subdef(f), addtab, new_owner, seen, s);
if (!upb_ok(s)) return false;
@ -154,11 +142,13 @@ static bool upb_resolve_dfs(const upb_def *def, upb_strtable *addtab,
} while ((def = (upb_def*)def->base.next) != base);
if (need_dup) {
// Dup any defs that don't already have entries in addtab.
/* Dup any defs that don't already have entries in addtab. */
def = base;
do {
const char *name;
if (def->type == UPB_DEF_FIELD) continue;
const char *name = upb_def_fullname(def);
name = upb_def_fullname(def);
if (!upb_strtable_lookup(addtab, name, NULL)) {
upb_def *newdef = upb_def_dup(def, new_owner);
if (!newdef) goto oom;
@ -177,34 +167,41 @@ oom:
return false;
}
// TODO(haberman): we need a lot more testing of error conditions.
// The came_from_user stuff in particular is not tested.
/* TODO(haberman): we need a lot more testing of error conditions.
* The came_from_user stuff in particular is not tested. */
bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, int n, void *ref_donor,
upb_status *status) {
assert(!upb_symtab_isfrozen(s));
int i;
upb_strtable_iter iter;
upb_def **add_defs = NULL;
upb_strtable addtab;
upb_inttable seen;
assert(!upb_symtab_isfrozen(s));
if (!upb_strtable_init(&addtab, UPB_CTYPE_PTR)) {
upb_status_seterrmsg(status, "out of memory");
return false;
}
// Add new defs to our "add" set.
for (int i = 0; i < n; i++) {
/* Add new defs to our "add" set. */
for (i = 0; i < n; i++) {
upb_def *def = defs[i];
const char *fullname;
upb_fielddef *f;
if (upb_def_isfrozen(def)) {
upb_status_seterrmsg(status, "added defs must be mutable");
goto err;
}
assert(!upb_def_isfrozen(def));
const char *fullname = upb_def_fullname(def);
fullname = upb_def_fullname(def);
if (!fullname) {
upb_status_seterrmsg(
status, "Anonymous defs cannot be added to a symtab");
goto err;
}
upb_fielddef *f = upb_dyncast_fielddef_mutable(def);
f = upb_dyncast_fielddef_mutable(def);
if (f) {
if (!upb_fielddef_containingtypename(f)) {
@ -218,8 +215,8 @@ bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, int n, void *ref_donor,
upb_status_seterrf(status, "Conflicting defs named '%s'", fullname);
goto err;
}
// We need this to back out properly, because if there is a failure we
// need to donate the ref back to the caller.
/* We need this to back out properly, because if there is a failure we
* need to donate the ref back to the caller. */
def->came_from_user = true;
upb_def_donateref(def, ref_donor, s);
if (!upb_strtable_insert(&addtab, fullname, upb_value_ptr(def)))
@ -227,31 +224,33 @@ bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, int n, void *ref_donor,
}
}
// Add standalone fielddefs (ie. extensions) to the appropriate messages.
// If the appropriate message only exists in the existing symtab, duplicate
// it so we have a mutable copy we can add the fields to.
for (int i = 0; i < n; i++) {
/* Add standalone fielddefs (ie. extensions) to the appropriate messages.
* If the appropriate message only exists in the existing symtab, duplicate
* it so we have a mutable copy we can add the fields to. */
for (i = 0; i < n; i++) {
upb_def *def = defs[i];
upb_fielddef *f = upb_dyncast_fielddef_mutable(def);
const char *msgname;
upb_value v;
upb_msgdef *m;
if (!f) continue;
const char *msgname = upb_fielddef_containingtypename(f);
// We validated this earlier in this function.
msgname = upb_fielddef_containingtypename(f);
/* We validated this earlier in this function. */
assert(msgname);
// If the extendee name is absolutely qualified, move past the initial ".".
// TODO(haberman): it is not obvious what it would mean if this was not
// absolutely qualified.
/* If the extendee name is absolutely qualified, move past the initial ".".
* TODO(haberman): it is not obvious what it would mean if this was not
* absolutely qualified. */
if (msgname[0] == '.') {
msgname++;
}
upb_value v;
upb_msgdef *m;
if (upb_strtable_lookup(&addtab, msgname, &v)) {
// Extendee is in the set of defs the user asked us to add.
/* Extendee is in the set of defs the user asked us to add. */
m = upb_value_getptr(v);
} else {
// Need to find and dup the extendee from the existing symtab.
/* Need to find and dup the extendee from the existing symtab. */
const upb_msgdef *frozen_m = upb_symtab_lookupmsg(s, msgname);
if (!frozen_m) {
upb_status_seterrf(status,
@ -273,37 +272,37 @@ bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, int n, void *ref_donor,
}
}
// Add dups of any existing def that can reach a def with the same name as
// anything in our "add" set.
upb_inttable seen;
/* Add dups of any existing def that can reach a def with the same name as
* anything in our "add" set. */
if (!upb_inttable_init(&seen, UPB_CTYPE_BOOL)) goto oom_err;
upb_strtable_iter i;
upb_strtable_begin(&i, &s->symtab);
for (; !upb_strtable_done(&i); upb_strtable_next(&i)) {
upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i));
upb_strtable_begin(&iter, &s->symtab);
for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter));
upb_resolve_dfs(def, &addtab, s, &seen, status);
if (!upb_ok(status)) goto err;
}
upb_inttable_uninit(&seen);
// Now using the table, resolve symbolic references for subdefs.
upb_strtable_begin(&i, &addtab);
for (; !upb_strtable_done(&i); upb_strtable_next(&i)) {
upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i));
/* Now using the table, resolve symbolic references for subdefs. */
upb_strtable_begin(&iter, &addtab);
for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
const char *base;
upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter));
upb_msgdef *m = upb_dyncast_msgdef_mutable(def);
upb_msg_field_iter j;
if (!m) continue;
// Type names are resolved relative to the message in which they appear.
const char *base = upb_msgdef_fullname(m);
/* Type names are resolved relative to the message in which they appear. */
base = upb_msgdef_fullname(m);
upb_msg_field_iter j;
for(upb_msg_field_begin(&j, m);
!upb_msg_field_done(&j);
upb_msg_field_next(&j)) {
upb_fielddef *f = upb_msg_iter_field(&j);
const char *name = upb_fielddef_subdefname(f);
if (name && !upb_fielddef_subdef(f)) {
// Try the lookup in the current set of to-be-added defs first. If not
// there, try existing defs.
/* Try the lookup in the current set of to-be-added defs first. If not
* there, try existing defs. */
upb_def *subdef = upb_resolvename(&addtab, base, name);
if (subdef == NULL) {
subdef = upb_resolvename(&s->symtab, base, name);
@ -319,31 +318,33 @@ bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, int n, void *ref_donor,
}
}
// We need an array of the defs in addtab, for passing to upb_def_freeze.
/* We need an array of the defs in addtab, for passing to upb_def_freeze. */
add_defs = malloc(sizeof(void*) * upb_strtable_count(&addtab));
if (add_defs == NULL) goto oom_err;
upb_strtable_begin(&i, &addtab);
for (n = 0; !upb_strtable_done(&i); upb_strtable_next(&i)) {
add_defs[n++] = upb_value_getptr(upb_strtable_iter_value(&i));
upb_strtable_begin(&iter, &addtab);
for (n = 0; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
add_defs[n++] = upb_value_getptr(upb_strtable_iter_value(&iter));
}
if (!upb_def_freeze(add_defs, n, status)) goto err;
// This must be delayed until all errors have been detected, since error
// recovery code uses this table to cleanup defs.
/* This must be delayed until all errors have been detected, since error
* recovery code uses this table to cleanup defs. */
upb_strtable_uninit(&addtab);
// TODO(haberman) we don't properly handle errors after this point (like
// OOM in upb_strtable_insert() below).
for (int i = 0; i < n; i++) {
/* TODO(haberman) we don't properly handle errors after this point (like
* OOM in upb_strtable_insert() below). */
for (i = 0; i < n; i++) {
upb_def *def = add_defs[i];
const char *name = upb_def_fullname(def);
upb_value v;
bool success;
if (upb_strtable_remove(&s->symtab, name, &v)) {
const upb_def *def = upb_value_getptr(v);
upb_def_unref(def, s);
}
bool success = upb_strtable_insert(&s->symtab, name, upb_value_ptr(def));
success = upb_strtable_insert(&s->symtab, name, upb_value_ptr(def));
UPB_ASSERT_VAR(success, success == true);
}
free(add_defs);
@ -352,12 +353,11 @@ bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, int n, void *ref_donor,
oom_err:
upb_status_seterrmsg(status, "out of memory");
err: {
// For defs the user passed in, we need to donate the refs back. For defs
// we dup'd, we need to just unref them.
upb_strtable_iter i;
upb_strtable_begin(&i, &addtab);
for (; !upb_strtable_done(&i); upb_strtable_next(&i)) {
upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i));
/* For defs the user passed in, we need to donate the refs back. For defs
* we dup'd, we need to just unref them. */
upb_strtable_begin(&iter, &addtab);
for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter));
bool came_from_user = def->came_from_user;
def->came_from_user = false;
if (came_from_user) {
@ -373,7 +373,7 @@ err: {
return false;
}
// Iteration.
/* Iteration. */
static void advance_to_matching(upb_symtab_iter *iter) {
if (iter->type == UPB_DEF_ANY)

@ -23,7 +23,8 @@
namespace upb { class SymbolTable; }
#endif
UPB_DECLARE_TYPE(upb::SymbolTable, upb_symtab);
UPB_DECLARE_DERIVED_TYPE(upb::SymbolTable, upb::RefCounted,
upb_symtab, upb_refcounted)
typedef struct {
UPB_PRIVATE_FOR_CPP
@ -31,87 +32,85 @@ typedef struct {
upb_deftype_t type;
} upb_symtab_iter;
// Non-const methods in upb::SymbolTable are NOT thread-safe.
UPB_DEFINE_CLASS1(upb::SymbolTable, upb::RefCounted,
#ifdef __cplusplus
/* Non-const methods in upb::SymbolTable are NOT thread-safe. */
class upb::SymbolTable {
public:
// Returns a new symbol table with a single ref owned by "owner."
// Returns NULL if memory allocation failed.
/* Returns a new symbol table with a single ref owned by "owner."
* Returns NULL if memory allocation failed. */
static reffed_ptr<SymbolTable> New();
// Functionality from upb::RefCounted.
bool IsFrozen() const;
void Ref(const void* owner) const;
void Unref(const void* owner) const;
void DonateRef(const void *from, const void *to) const;
void CheckRef(const void *owner) const;
// For all lookup functions, the returned pointer is not owned by the
// caller; it may be invalidated by any non-const call or unref of the
// SymbolTable! To protect against this, take a ref if desired.
// Freezes the symbol table: prevents further modification of it.
// After the Freeze() operation is successful, the SymbolTable must only be
// accessed via a const pointer.
//
// Unlike with upb::MessageDef/upb::EnumDef/etc, freezing a SymbolTable is not
// a necessary step in using a SymbolTable. If you have no need for it to be
// immutable, there is no need to freeze it ever. However sometimes it is
// useful, and SymbolTables that are statically compiled into the binary are
// always frozen by nature.
/* Include RefCounted base methods. */
UPB_REFCOUNTED_CPPMETHODS
/* For all lookup functions, the returned pointer is not owned by the
* caller; it may be invalidated by any non-const call or unref of the
* SymbolTable! To protect against this, take a ref if desired. */
/* Freezes the symbol table: prevents further modification of it.
* After the Freeze() operation is successful, the SymbolTable must only be
* accessed via a const pointer.
*
* Unlike with upb::MessageDef/upb::EnumDef/etc, freezing a SymbolTable is not
* a necessary step in using a SymbolTable. If you have no need for it to be
* immutable, there is no need to freeze it ever. However sometimes it is
* useful, and SymbolTables that are statically compiled into the binary are
* always frozen by nature. */
void Freeze();
// Resolves the given symbol using the rules described in descriptor.proto,
// namely:
//
// If the name starts with a '.', it is fully-qualified. Otherwise,
// C++-like scoping rules are used to find the type (i.e. first the nested
// types within this message are searched, then within the parent, on up
// to the root namespace).
//
// If not found, returns NULL.
/* Resolves the given symbol using the rules described in descriptor.proto,
* namely:
*
* If the name starts with a '.', it is fully-qualified. Otherwise,
* C++-like scoping rules are used to find the type (i.e. first the nested
* types within this message are searched, then within the parent, on up
* to the root namespace).
*
* If not found, returns NULL. */
const Def* Resolve(const char* base, const char* sym) const;
// Finds an entry in the symbol table with this exact name. If not found,
// returns NULL.
/* Finds an entry in the symbol table with this exact name. If not found,
* returns NULL. */
const Def* Lookup(const char *sym) const;
const MessageDef* LookupMessage(const char *sym) const;
const EnumDef* LookupEnum(const char *sym) const;
// TODO: introduce a C++ iterator, but make it nice and templated so that if
// you ask for an iterator of MessageDef the iterated elements are strongly
// typed as MessageDef*.
// Adds the given mutable defs to the symtab, resolving all symbols
// (including enum default values) and finalizing the defs. Only one def per
// name may be in the list, but defs can replace existing defs in the symtab.
// All defs must have a name -- anonymous defs are not allowed. Anonymous
// defs can still be frozen by calling upb_def_freeze() directly.
//
// Any existing defs that can reach defs that are being replaced will
// themselves be replaced also, so that the resulting set of defs is fully
// consistent.
//
// This logic implemented in this method is a convenience; ultimately it
// calls some combination of upb_fielddef_setsubdef(), upb_def_dup(), and
// upb_freeze(), any of which the client could call themself. However, since
// the logic for doing so is nontrivial, we provide it here.
//
// The entire operation either succeeds or fails. If the operation fails,
// the symtab is unchanged, false is returned, and status indicates the
// error. The caller passes a ref on all defs to the symtab (even if the
// operation fails).
//
// TODO(haberman): currently failure will leave the symtab unchanged, but may
// leave the defs themselves partially resolved. Does this matter? If so we
// could do a prepass that ensures that all symbols are resolvable and bail
// if not, so we don't mutate anything until we know the operation will
// succeed.
//
// TODO(haberman): since the defs must be mutable, refining a frozen def
// requires making mutable copies of the entire tree. This is wasteful if
// only a few messages are changing. We may want to add a way of adding a
// tree of frozen defs to the symtab (perhaps an alternate constructor where
// you pass the root of the tree?)
/* TODO: introduce a C++ iterator, but make it nice and templated so that if
* you ask for an iterator of MessageDef the iterated elements are strongly
* typed as MessageDef*. */
/* Adds the given mutable defs to the symtab, resolving all symbols
* (including enum default values) and finalizing the defs. Only one def per
* name may be in the list, but defs can replace existing defs in the symtab.
* All defs must have a name -- anonymous defs are not allowed. Anonymous
* defs can still be frozen by calling upb_def_freeze() directly.
*
* Any existing defs that can reach defs that are being replaced will
* themselves be replaced also, so that the resulting set of defs is fully
* consistent.
*
* This logic implemented in this method is a convenience; ultimately it
* calls some combination of upb_fielddef_setsubdef(), upb_def_dup(), and
* upb_freeze(), any of which the client could call themself. However, since
* the logic for doing so is nontrivial, we provide it here.
*
* The entire operation either succeeds or fails. If the operation fails,
* the symtab is unchanged, false is returned, and status indicates the
* error. The caller passes a ref on all defs to the symtab (even if the
* operation fails).
*
* TODO(haberman): currently failure will leave the symtab unchanged, but may
* leave the defs themselves partially resolved. Does this matter? If so we
* could do a prepass that ensures that all symbols are resolvable and bail
* if not, so we don't mutate anything until we know the operation will
* succeed.
*
* TODO(haberman): since the defs must be mutable, refining a frozen def
* requires making mutable copies of the entire tree. This is wasteful if
* only a few messages are changing. We may want to add a way of adding a
* tree of frozen defs to the symtab (perhaps an alternate constructor where
* you pass the root of the tree?) */
bool Add(Def*const* defs, int n, void* ref_donor, upb_status* status);
bool Add(const std::vector<Def*>& defs, void *owner, Status* status) {
@ -119,25 +118,17 @@ UPB_DEFINE_CLASS1(upb::SymbolTable, upb::RefCounted,
}
private:
UPB_DISALLOW_POD_OPS(SymbolTable, upb::SymbolTable);
,
UPB_DEFINE_STRUCT(upb_symtab, upb_refcounted,
upb_strtable symtab;
));
#define UPB_SYMTAB_INIT(symtab, refs, ref2s) \
{ UPB_REFCOUNT_INIT(refs, ref2s), symtab }
UPB_BEGIN_EXTERN_C // {
// Native C API.
// From upb_refcounted.
bool upb_symtab_isfrozen(const upb_symtab *s);
void upb_symtab_ref(const upb_symtab *s, const void *owner);
void upb_symtab_unref(const upb_symtab *s, const void *owner);
void upb_symtab_donateref(
const upb_symtab *s, const void *from, const void *to);
void upb_symtab_checkref(const upb_symtab *s, const void *owner);
UPB_DISALLOW_POD_OPS(SymbolTable, upb::SymbolTable)
};
#endif /* __cplusplus */
UPB_BEGIN_EXTERN_C
/* Native C API. */
/* Include refcounted methods like upb_symtab_ref(). */
UPB_REFCOUNTED_CMETHODS(upb_symtab, upb_symtab_upcast)
upb_symtab *upb_symtab_new(const void *owner);
void upb_symtab_freeze(upb_symtab *s);
@ -149,48 +140,32 @@ const upb_enumdef *upb_symtab_lookupenum(const upb_symtab *s, const char *sym);
bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, int n, void *ref_donor,
upb_status *status);
// upb_symtab_iter i;
// for(upb_symtab_begin(&i, s, type); !upb_symtab_done(&i);
// upb_symtab_next(&i)) {
// const upb_def *def = upb_symtab_iter_def(&i);
// // ...
// }
//
// For C we don't have separate iterators for const and non-const.
// It is the caller's responsibility to cast the upb_fielddef* to
// const if the upb_msgdef* is const.
/* upb_symtab_iter i;
* for(upb_symtab_begin(&i, s, type); !upb_symtab_done(&i);
* upb_symtab_next(&i)) {
* const upb_def *def = upb_symtab_iter_def(&i);
* // ...
* }
*
* For C we don't have separate iterators for const and non-const.
* It is the caller's responsibility to cast the upb_fielddef* to
* const if the upb_msgdef* is const. */
void upb_symtab_begin(upb_symtab_iter *iter, const upb_symtab *s,
upb_deftype_t type);
void upb_symtab_next(upb_symtab_iter *iter);
bool upb_symtab_done(const upb_symtab_iter *iter);
const upb_def *upb_symtab_iter_def(const upb_symtab_iter *iter);
UPB_END_EXTERN_C // }
UPB_END_EXTERN_C
#ifdef __cplusplus
// C++ inline wrappers.
/* C++ inline wrappers. */
namespace upb {
inline reffed_ptr<SymbolTable> SymbolTable::New() {
upb_symtab *s = upb_symtab_new(&s);
return reffed_ptr<SymbolTable>(s, &s);
}
inline bool SymbolTable::IsFrozen() const {
return upb_symtab_isfrozen(this);
}
inline void SymbolTable::Ref(const void *owner) const {
upb_symtab_ref(this, owner);
}
inline void SymbolTable::Unref(const void *owner) const {
upb_symtab_unref(this, owner);
}
inline void SymbolTable::DonateRef(const void *from, const void *to) const {
upb_symtab_donateref(this, from, to);
}
inline void SymbolTable::CheckRef(const void *owner) const {
upb_symtab_checkref(this, owner);
}
inline void SymbolTable::Freeze() {
return upb_symtab_freeze(this);
}
@ -208,7 +183,7 @@ inline bool SymbolTable::Add(
Def*const* defs, int n, void* ref_donor, upb_status* status) {
return upb_symtab_add(this, (upb_def*const*)defs, n, ref_donor, status);
}
} // namespace upb
} /* namespace upb */
#endif
#endif /* UPB_SYMTAB_H_ */

@ -12,17 +12,17 @@
#include <stdlib.h>
#include <string.h>
#define UPB_MAXARRSIZE 16 // 64k.
#define UPB_MAXARRSIZE 16 /* 64k. */
// From Chromium.
/* From Chromium. */
#define ARRAY_SIZE(x) \
((sizeof(x)/sizeof(0[x])) / ((size_t)(!(sizeof(x) % sizeof(0[x])))))
static const double MAX_LOAD = 0.85;
// The minimum utilization of the array part of a mixed hash/array table. This
// is a speed/memory-usage tradeoff (though it's not straightforward because of
// cache effects). The lower this is, the more memory we'll use.
/* The minimum utilization of the array part of a mixed hash/array table. This
* is a speed/memory-usage tradeoff (though it's not straightforward because of
* cache effects). The lower this is, the more memory we'll use. */
static const double MIN_DENSITY = 0.1;
bool is_pow2(uint64_t v) { return v == 0 || (v & (v - 1)) == 0; }
@ -31,7 +31,7 @@ int log2ceil(uint64_t v) {
int ret = 0;
bool pow2 = is_pow2(v);
while (v >>= 1) ret++;
ret = pow2 ? ret : ret + 1; // Ceiling.
ret = pow2 ? ret : ret + 1; /* Ceiling. */
return UPB_MIN(UPB_MAXARRSIZE, ret);
}
@ -40,12 +40,15 @@ char *upb_strdup(const char *s) {
}
char *upb_strdup2(const char *s, size_t len) {
// Prevent overflow errors.
size_t n;
char *p;
/* Prevent overflow errors. */
if (len == SIZE_MAX) return NULL;
// Always null-terminate, even if binary data; but don't rely on the input to
// have a null-terminating byte since it may be a raw binary buffer.
size_t n = len + 1;
char *p = malloc(n);
/* Always null-terminate, even if binary data; but don't rely on the input to
* have a null-terminating byte since it may be a raw binary buffer. */
n = len + 1;
p = malloc(n);
if (p) {
memcpy(p, s, len);
p[len] = 0;
@ -53,7 +56,7 @@ char *upb_strdup2(const char *s, size_t len) {
return p;
}
// A type to represent the lookup key of either a strtable or an inttable.
/* A type to represent the lookup key of either a strtable or an inttable. */
typedef union {
uintptr_t num;
struct {
@ -80,7 +83,7 @@ typedef bool eqlfunc_t(upb_tabkey k1, lookupkey_t k2);
/* Base table (shared code) ***************************************************/
// For when we need to cast away const.
/* For when we need to cast away const. */
static upb_tabent *mutable_entries(upb_table *t) {
return (upb_tabent*)t->entries;
}
@ -90,11 +93,13 @@ static bool isfull(upb_table *t) {
}
static bool init(upb_table *t, upb_ctype_t ctype, uint8_t size_lg2) {
size_t bytes;
t->count = 0;
t->ctype = ctype;
t->size_lg2 = size_lg2;
t->mask = upb_table_size(t) ? upb_table_size(t) - 1 : 0;
size_t bytes = upb_table_size(t) * sizeof(upb_tabent);
bytes = upb_table_size(t) * sizeof(upb_tabent);
if (bytes > 0) {
t->entries = malloc(bytes);
if (!t->entries) return false;
@ -118,8 +123,10 @@ static upb_tabent *getentry_mutable(upb_table *t, uint32_t hash) {
static const upb_tabent *findentry(const upb_table *t, lookupkey_t key,
uint32_t hash, eqlfunc_t *eql) {
const upb_tabent *e;
if (t->size_lg2 == 0) return NULL;
const upb_tabent *e = upb_getentry(t, hash);
e = upb_getentry(t, hash);
if (upb_tabent_isempty(e)) return NULL;
while (1) {
if (eql(e->key, key)) return e;
@ -137,7 +144,7 @@ static bool lookup(const upb_table *t, lookupkey_t key, upb_value *v,
const upb_tabent *e = findentry(t, key, hash, eql);
if (e) {
if (v) {
_upb_value_setval(v, e->val, t->ctype);
_upb_value_setval(v, e->val.val, t->ctype);
}
return true;
} else {
@ -145,36 +152,41 @@ static bool lookup(const upb_table *t, lookupkey_t key, upb_value *v,
}
}
// The given key must not already exist in the table.
/* The given key must not already exist in the table. */
static void insert(upb_table *t, lookupkey_t key, upb_tabkey tabkey,
upb_value val, uint32_t hash,
hashfunc_t *hashfunc, eqlfunc_t *eql) {
upb_tabent *mainpos_e;
upb_tabent *our_e;
UPB_UNUSED(eql);
UPB_UNUSED(key);
assert(findentry(t, key, hash, eql) == NULL);
assert(val.ctype == t->ctype);
t->count++;
upb_tabent *mainpos_e = getentry_mutable(t, hash);
upb_tabent *our_e = mainpos_e;
mainpos_e = getentry_mutable(t, hash);
our_e = mainpos_e;
if (upb_tabent_isempty(mainpos_e)) {
// Our main position is empty; use it.
/* Our main position is empty; use it. */
our_e->next = NULL;
} else {
// Collision.
/* Collision. */
upb_tabent *new_e = emptyent(t);
// Head of collider's chain.
/* Head of collider's chain. */
upb_tabent *chain = getentry_mutable(t, hashfunc(mainpos_e->key));
if (chain == mainpos_e) {
// Existing ent is in its main posisiton (it has the same hash as us, and
// is the head of our chain). Insert to new ent and append to this chain.
/* Existing ent is in its main posisiton (it has the same hash as us, and
* is the head of our chain). Insert to new ent and append to this chain. */
new_e->next = mainpos_e->next;
mainpos_e->next = new_e;
our_e = new_e;
} else {
// Existing ent is not in its main position (it is a node in some other
// chain). This implies that no existing ent in the table has our hash.
// Evict it (updating its chain) and use its ent for head of our chain.
*new_e = *mainpos_e; // copies next.
/* Existing ent is not in its main position (it is a node in some other
* chain). This implies that no existing ent in the table has our hash.
* Evict it (updating its chain) and use its ent for head of our chain. */
*new_e = *mainpos_e; /* copies next. */
while (chain->next != mainpos_e) {
chain = (upb_tabent*)chain->next;
assert(chain);
@ -185,7 +197,7 @@ static void insert(upb_table *t, lookupkey_t key, upb_tabkey tabkey,
}
}
our_e->key = tabkey;
our_e->val = val.val;
our_e->val.val = val.val;
assert(findentry(t, key, hash, eql) == our_e);
}
@ -194,31 +206,34 @@ static bool rm(upb_table *t, lookupkey_t key, upb_value *val,
upb_tabent *chain = getentry_mutable(t, hash);
if (upb_tabent_isempty(chain)) return false;
if (eql(chain->key, key)) {
// Element to remove is at the head of its chain.
/* Element to remove is at the head of its chain. */
t->count--;
if (val) {
_upb_value_setval(val, chain->val, t->ctype);
_upb_value_setval(val, chain->val.val, t->ctype);
}
if (chain->next) {
upb_tabent *move = (upb_tabent*)chain->next;
*chain = *move;
if (removed) *removed = move->key;
move->key = 0; // Make the slot empty.
move->key = 0; /* Make the slot empty. */
} else {
if (removed) *removed = chain->key;
chain->key = 0; // Make the slot empty.
chain->key = 0; /* Make the slot empty. */
}
return true;
} else {
// Element to remove is either in a non-head position or not in the table.
/* Element to remove is either in a non-head position or not in the
* table. */
while (chain->next && !eql(chain->next->key, key))
chain = (upb_tabent*)chain->next;
if (chain->next) {
// Found element to remove.
/* Found element to remove. */
upb_tabent *rm;
if (val) {
_upb_value_setval(val, chain->next->val, t->ctype);
_upb_value_setval(val, chain->next->val.val, t->ctype);
}
upb_tabent *rm = (upb_tabent*)chain->next;
rm = (upb_tabent*)chain->next;
if (removed) *removed = rm->key;
rm->key = 0;
chain->next = rm->next;
@ -246,7 +261,7 @@ static size_t begin(const upb_table *t) {
/* upb_strtable ***************************************************************/
// A simple "subclass" of upb_table that only adds a hash function for strings.
/* A simple "subclass" of upb_table that only adds a hash function for strings. */
static upb_tabkey strcopy(lookupkey_t k2) {
char *str = malloc(k2.str.len + sizeof(uint32_t) + 1);
@ -273,16 +288,18 @@ bool upb_strtable_init(upb_strtable *t, upb_ctype_t ctype) {
}
void upb_strtable_uninit(upb_strtable *t) {
for (size_t i = 0; i < upb_table_size(&t->t); i++)
size_t i;
for (i = 0; i < upb_table_size(&t->t); i++)
free((void*)t->t.entries[i].key);
uninit(&t->t);
}
bool upb_strtable_resize(upb_strtable *t, size_t size_lg2) {
upb_strtable new_table;
upb_strtable_iter i;
if (!init(&new_table.t, t->t.ctype, size_lg2))
return false;
upb_strtable_iter i;
upb_strtable_begin(&i, t);
for ( ; !upb_strtable_done(&i); upb_strtable_next(&i)) {
upb_strtable_insert2(
@ -298,17 +315,22 @@ bool upb_strtable_resize(upb_strtable *t, size_t size_lg2) {
bool upb_strtable_insert2(upb_strtable *t, const char *k, size_t len,
upb_value v) {
lookupkey_t key;
upb_tabkey tabkey;
uint32_t hash;
if (isfull(&t->t)) {
// Need to resize. New table of double the size, add old elements to it.
/* Need to resize. New table of double the size, add old elements to it. */
if (!upb_strtable_resize(t, t->t.size_lg2 + 1)) {
return false;
}
}
lookupkey_t key = strkey2(k, len);
upb_tabkey tabkey = strcopy(key);
key = strkey2(k, len);
tabkey = strcopy(key);
if (tabkey == 0) return false;
uint32_t hash = MurmurHash2(key.str.str, key.str.len, 0);
hash = MurmurHash2(key.str.str, key.str.len, 0);
insert(&t->t, key, tabkey, v, hash, &strhash, &streql);
return true;
}
@ -331,7 +353,7 @@ bool upb_strtable_remove2(upb_strtable *t, const char *key, size_t len,
}
}
// Iteration
/* Iteration */
static const upb_tabent *str_tabent(const upb_strtable_iter *i) {
return &i->t->t.entries[i->index];
@ -357,15 +379,15 @@ const char *upb_strtable_iter_key(upb_strtable_iter *i) {
}
size_t upb_strtable_iter_keylength(upb_strtable_iter *i) {
assert(!upb_strtable_done(i));
uint32_t len;
assert(!upb_strtable_done(i));
upb_tabstr(str_tabent(i)->key, &len);
return len;
}
upb_value upb_strtable_iter_value(const upb_strtable_iter *i) {
assert(!upb_strtable_done(i));
return _upb_value_val(str_tabent(i)->val, i->t->t.ctype);
return _upb_value_val(str_tabent(i)->val.val, i->t->t.ctype);
}
void upb_strtable_iter_setdone(upb_strtable_iter *i) {
@ -382,8 +404,8 @@ bool upb_strtable_iter_isequal(const upb_strtable_iter *i1,
/* upb_inttable ***************************************************************/
// For inttables we use a hybrid structure where small keys are kept in an
// array and large keys are put in the hash table.
/* For inttables we use a hybrid structure where small keys are kept in an
* array and large keys are put in the hash table. */
static uint32_t inthash(upb_tabkey key) { return upb_inthash(key); }
@ -391,11 +413,11 @@ static bool inteql(upb_tabkey k1, lookupkey_t k2) {
return k1 == k2.num;
}
static _upb_value *mutable_array(upb_inttable *t) {
return (_upb_value*)t->array;
static upb_tabval *mutable_array(upb_inttable *t) {
return (upb_tabval*)t->array;
}
static _upb_value *inttable_val(upb_inttable *t, uintptr_t key) {
static upb_tabval *inttable_val(upb_inttable *t, uintptr_t key) {
if (key < t->array_size) {
return upb_arrhas(t->array[key]) ? &(mutable_array(t)[key]) : NULL;
} else {
@ -405,7 +427,7 @@ static _upb_value *inttable_val(upb_inttable *t, uintptr_t key) {
}
}
static const _upb_value *inttable_val_const(const upb_inttable *t,
static const upb_tabval *inttable_val_const(const upb_inttable *t,
uintptr_t key) {
return inttable_val((upb_inttable*)t, key);
}
@ -417,7 +439,7 @@ size_t upb_inttable_count(const upb_inttable *t) {
static void check(upb_inttable *t) {
UPB_UNUSED(t);
#if defined(UPB_DEBUG_TABLE) && !defined(NDEBUG)
// This check is very expensive (makes inserts/deletes O(N)).
/* This check is very expensive (makes inserts/deletes O(N)). */
size_t count = 0;
upb_inttable_iter i;
upb_inttable_begin(&i, t);
@ -430,12 +452,14 @@ static void check(upb_inttable *t) {
bool upb_inttable_sizedinit(upb_inttable *t, upb_ctype_t ctype,
size_t asize, int hsize_lg2) {
size_t array_bytes;
if (!init(&t->t, ctype, hsize_lg2)) return false;
// Always make the array part at least 1 long, so that we know key 0
// won't be in the hash part, which simplifies things.
/* Always make the array part at least 1 long, so that we know key 0
* won't be in the hash part, which simplifies things. */
t->array_size = UPB_MAX(1, asize);
t->array_count = 0;
size_t array_bytes = t->array_size * sizeof(upb_value);
array_bytes = t->array_size * sizeof(upb_value);
t->array = malloc(array_bytes);
if (!t->array) {
uninit(&t->t);
@ -456,23 +480,31 @@ void upb_inttable_uninit(upb_inttable *t) {
}
bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val) {
assert(upb_arrhas(val.val));
/* XXX: Table can't store value (uint64_t)-1. Need to somehow statically
* guarantee that this is not necessary, or fix the limitation. */
upb_tabval tabval;
tabval.val = val.val;
UPB_UNUSED(tabval);
assert(upb_arrhas(tabval));
if (key < t->array_size) {
assert(!upb_arrhas(t->array[key]));
t->array_count++;
mutable_array(t)[key] = val.val;
mutable_array(t)[key].val = val.val;
} else {
if (isfull(&t->t)) {
// Need to resize the hash part, but we re-use the array part.
/* Need to resize the hash part, but we re-use the array part. */
size_t i;
upb_table new_table;
if (!init(&new_table, t->t.ctype, t->t.size_lg2 + 1))
return false;
size_t i;
for (i = begin(&t->t); i < upb_table_size(&t->t); i = next(&t->t, i)) {
const upb_tabent *e = &t->t.entries[i];
uint32_t hash;
upb_value v;
_upb_value_setval(&v, e->val, t->t.ctype);
uint32_t hash = upb_inthash(e->key);
_upb_value_setval(&v, e->val.val, t->t.ctype);
hash = upb_inthash(e->key);
insert(&new_table, intkey(e->key), e->key, v, hash, &inthash, &inteql);
}
@ -488,16 +520,16 @@ bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val) {
}
bool upb_inttable_lookup(const upb_inttable *t, uintptr_t key, upb_value *v) {
const _upb_value *table_v = inttable_val_const(t, key);
const upb_tabval *table_v = inttable_val_const(t, key);
if (!table_v) return false;
if (v) _upb_value_setval(v, *table_v, t->t.ctype);
if (v) _upb_value_setval(v, table_v->val, t->t.ctype);
return true;
}
bool upb_inttable_replace(upb_inttable *t, uintptr_t key, upb_value val) {
_upb_value *table_v = inttable_val(t, key);
upb_tabval *table_v = inttable_val(t, key);
if (!table_v) return false;
*table_v = val.val;
table_v->val = val.val;
return true;
}
@ -505,11 +537,11 @@ bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val) {
bool success;
if (key < t->array_size) {
if (upb_arrhas(t->array[key])) {
upb_tabval empty = UPB_TABVALUE_EMPTY_INIT;
t->array_count--;
if (val) {
_upb_value_setval(val, t->array[key], t->t.ctype);
_upb_value_setval(val, t->array[key].val, t->t.ctype);
}
_upb_value empty = UPB_ARRAY_EMPTYENT;
mutable_array(t)[key] = empty;
success = true;
} else {
@ -549,10 +581,14 @@ bool upb_inttable_removeptr(upb_inttable *t, const void *key, upb_value *val) {
}
void upb_inttable_compact(upb_inttable *t) {
// Create a power-of-two histogram of the table keys.
/* Create a power-of-two histogram of the table keys. */
int counts[UPB_MAXARRSIZE + 1] = {0};
uintptr_t max_key = 0;
upb_inttable_iter i;
size_t arr_size;
int arr_count;
upb_inttable new_t;
upb_inttable_begin(&i, t);
for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
uintptr_t key = upb_inttable_iter_key(&i);
@ -562,15 +598,17 @@ void upb_inttable_compact(upb_inttable *t) {
counts[log2ceil(key)]++;
}
size_t arr_size = 1;
int arr_count = upb_inttable_count(t);
arr_size = 1;
arr_count = upb_inttable_count(t);
if (upb_inttable_count(t) >= max_key * MIN_DENSITY) {
// We can put 100% of the entries in the array part.
/* We can put 100% of the entries in the array part. */
arr_size = max_key + 1;
} else {
// Find the largest power of two that satisfies the MIN_DENSITY definition.
for (int size_lg2 = ARRAY_SIZE(counts) - 1; size_lg2 > 1; size_lg2--) {
/* Find the largest power of two that satisfies the MIN_DENSITY
* definition. */
int size_lg2;
for (size_lg2 = ARRAY_SIZE(counts) - 1; size_lg2 > 1; size_lg2--) {
arr_size = 1 << size_lg2;
arr_count -= counts[size_lg2];
if (arr_count >= arr_size * MIN_DENSITY) {
@ -579,38 +617,39 @@ void upb_inttable_compact(upb_inttable *t) {
}
}
// Array part must always be at least 1 entry large to catch lookups of key
// 0. Key 0 must always be in the array part because "0" in the hash part
// denotes an empty entry.
/* Array part must always be at least 1 entry large to catch lookups of key
* 0. Key 0 must always be in the array part because "0" in the hash part
* denotes an empty entry. */
arr_size = UPB_MAX(arr_size, 1);
// Insert all elements into new, perfectly-sized table.
int hash_count = upb_inttable_count(t) - arr_count;
int hash_size = hash_count ? (hash_count / MAX_LOAD) + 1 : 0;
int hashsize_lg2 = log2ceil(hash_size);
assert(hash_count >= 0);
upb_inttable new_t;
upb_inttable_sizedinit(&new_t, t->t.ctype, arr_size, hashsize_lg2);
upb_inttable_begin(&i, t);
for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
uintptr_t k = upb_inttable_iter_key(&i);
upb_inttable_insert(&new_t, k, upb_inttable_iter_value(&i));
{
/* Insert all elements into new, perfectly-sized table. */
int hash_count = upb_inttable_count(t) - arr_count;
int hash_size = hash_count ? (hash_count / MAX_LOAD) + 1 : 0;
int hashsize_lg2 = log2ceil(hash_size);
assert(hash_count >= 0);
upb_inttable_sizedinit(&new_t, t->t.ctype, arr_size, hashsize_lg2);
upb_inttable_begin(&i, t);
for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
uintptr_t k = upb_inttable_iter_key(&i);
upb_inttable_insert(&new_t, k, upb_inttable_iter_value(&i));
}
assert(new_t.array_size == arr_size);
assert(new_t.t.size_lg2 == hashsize_lg2);
}
assert(new_t.array_size == arr_size);
assert(new_t.t.size_lg2 == hashsize_lg2);
upb_inttable_uninit(t);
*t = new_t;
}
// Iteration.
/* Iteration. */
static const upb_tabent *int_tabent(const upb_inttable_iter *i) {
assert(!i->array_part);
return &i->t->t.entries[i->index];
}
static _upb_value int_arrent(const upb_inttable_iter *i) {
static upb_tabval int_arrent(const upb_inttable_iter *i) {
assert(i->array_part);
return i->t->array[i->index];
}
@ -655,7 +694,7 @@ uintptr_t upb_inttable_iter_key(const upb_inttable_iter *i) {
upb_value upb_inttable_iter_value(const upb_inttable_iter *i) {
assert(!upb_inttable_done(i));
return _upb_value_val(
i->array_part ? i->t->array[i->index] : int_tabent(i)->val,
i->array_part ? i->t->array[i->index].val : int_tabent(i)->val.val,
i->t->t.ctype);
}
@ -673,26 +712,26 @@ bool upb_inttable_iter_isequal(const upb_inttable_iter *i1,
}
#ifdef UPB_UNALIGNED_READS_OK
//-----------------------------------------------------------------------------
// MurmurHash2, by Austin Appleby (released as public domain).
// Reformatted and C99-ified by Joshua Haberman.
// Note - This code makes a few assumptions about how your machine behaves -
// 1. We can read a 4-byte value from any address without crashing
// 2. sizeof(int) == 4 (in upb this limitation is removed by using uint32_t
// And it has a few limitations -
// 1. It will not work incrementally.
// 2. It will not produce the same results on little-endian and big-endian
// machines.
/* -----------------------------------------------------------------------------
* MurmurHash2, by Austin Appleby (released as public domain).
* Reformatted and C99-ified by Joshua Haberman.
* Note - This code makes a few assumptions about how your machine behaves -
* 1. We can read a 4-byte value from any address without crashing
* 2. sizeof(int) == 4 (in upb this limitation is removed by using uint32_t
* And it has a few limitations -
* 1. It will not work incrementally.
* 2. It will not produce the same results on little-endian and big-endian
* machines. */
uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed) {
// 'm' and 'r' are mixing constants generated offline.
// They're not really 'magic', they just happen to work well.
/* 'm' and 'r' are mixing constants generated offline.
* They're not really 'magic', they just happen to work well. */
const uint32_t m = 0x5bd1e995;
const int32_t r = 24;
// Initialize the hash to a 'random' value
/* Initialize the hash to a 'random' value */
uint32_t h = seed ^ len;
// Mix 4 bytes at a time into the hash
/* Mix 4 bytes at a time into the hash */
const uint8_t * data = (const uint8_t *)key;
while(len >= 4) {
uint32_t k = *(uint32_t *)data;
@ -708,15 +747,15 @@ uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed) {
len -= 4;
}
// Handle the last few bytes of the input array
/* Handle the last few bytes of the input array */
switch(len) {
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0]; h *= m;
};
// Do a few final mixes of the hash to ensure the last few
// bytes are well-incorporated.
/* Do a few final mixes of the hash to ensure the last few
* bytes are well-incorporated. */
h ^= h >> 13;
h *= m;
h ^= h >> 15;
@ -724,13 +763,13 @@ uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed) {
return h;
}
#else // !UPB_UNALIGNED_READS_OK
#else /* !UPB_UNALIGNED_READS_OK */
//-----------------------------------------------------------------------------
// MurmurHashAligned2, by Austin Appleby
// Same algorithm as MurmurHash2, but only does aligned reads - should be safer
// on certain platforms.
// Performance will be lower than MurmurHash2
/* -----------------------------------------------------------------------------
* MurmurHashAligned2, by Austin Appleby
* Same algorithm as MurmurHash2, but only does aligned reads - should be safer
* on certain platforms.
* Performance will be lower than MurmurHash2 */
#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
@ -742,8 +781,10 @@ uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed) {
uint8_t align = (uintptr_t)data & 3;
if(align && (len >= 4)) {
// Pre-load the temp registers
/* Pre-load the temp registers */
uint32_t t = 0, d = 0;
int32_t sl;
int32_t sr;
switch(align) {
case 1: t |= data[2] << 16;
@ -756,16 +797,18 @@ uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed) {
data += 4-align;
len -= 4-align;
int32_t sl = 8 * (4-align);
int32_t sr = 8 * align;
sl = 8 * (4-align);
sr = 8 * align;
// Mix
/* Mix */
while(len >= 4) {
uint32_t k;
d = *(uint32_t *)data;
t = (t >> sr) | (d << sl);
uint32_t k = t;
k = t;
MIX(h,k,m);
@ -775,25 +818,27 @@ uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed) {
len -= 4;
}
// Handle leftover data in temp registers
/* Handle leftover data in temp registers */
d = 0;
if(len >= align) {
uint32_t k;
switch(align) {
case 3: d |= data[2] << 16;
case 2: d |= data[1] << 8;
case 1: d |= data[0];
}
uint32_t k = (t >> sr) | (d << sl);
k = (t >> sr) | (d << sl);
MIX(h,k,m);
data += align;
len -= align;
//----------
// Handle tail bytes
/* ----------
* Handle tail bytes */
switch(len) {
case 3: h ^= data[2] << 16;
@ -824,8 +869,8 @@ uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed) {
len -= 4;
}
//----------
// Handle tail bytes
/* ----------
* Handle tail bytes */
switch(len) {
case 3: h ^= data[2] << 16;
@ -842,4 +887,4 @@ uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed) {
}
#undef MIX
#endif // UPB_UNALIGNED_READS_OK
#endif /* UPB_UNALIGNED_READS_OK */

@ -35,9 +35,9 @@ extern "C" {
/* upb_value ******************************************************************/
// A tagged union (stored untagged inside the table) so that we can check that
// clients calling table accessors are correctly typed without having to have
// an explosion of accessors.
/* A tagged union (stored untagged inside the table) so that we can check that
* clients calling table accessors are correctly typed without having to have
* an explosion of accessors. */
typedef enum {
UPB_CTYPE_INT32 = 1,
UPB_CTYPE_INT64 = 2,
@ -47,85 +47,54 @@ typedef enum {
UPB_CTYPE_CSTR = 6,
UPB_CTYPE_PTR = 7,
UPB_CTYPE_CONSTPTR = 8,
UPB_CTYPE_FPTR = 9,
UPB_CTYPE_FPTR = 9
} upb_ctype_t;
typedef union {
int32_t int32;
int64_t int64;
uint64_t uint64;
uint32_t uint32;
bool _bool;
char *cstr;
void *ptr;
const void *constptr;
upb_func *fptr;
} _upb_value;
typedef struct {
_upb_value val;
uint64_t val;
#ifndef NDEBUG
// In debug mode we carry the value type around also so we can check accesses
// to be sure the right member is being read.
/* In debug mode we carry the value type around also so we can check accesses
* to be sure the right member is being read. */
upb_ctype_t ctype;
#endif
} upb_value;
#ifdef UPB_C99
#define UPB_VALUE_INIT(v, member) {.member = v}
#endif
#define UPB__VALUE_INIT_NONE UPB_VALUE_INIT(NULL, ptr)
#ifdef NDEBUG
#define SET_TYPE(dest, val) UPB_UNUSED(val)
#define UPB_VALUE_INIT_NONE {UPB__VALUE_INIT_NONE}
#else
#define SET_TYPE(dest, val) dest = val
// Non-existent type, all reads will fail.
#define UPB_VALUE_INIT_NONE {UPB__VALUE_INIT_NONE, -1}
#endif
#define UPB_VALUE_INIT_INT32(v) UPB_VALUE_INIT(v, int32)
#define UPB_VALUE_INIT_INT64(v) UPB_VALUE_INIT(v, int64)
#define UPB_VALUE_INIT_UINT32(v) UPB_VALUE_INIT(v, uint32)
#define UPB_VALUE_INIT_UINT64(v) UPB_VALUE_INIT(v, uint64)
#define UPB_VALUE_INIT_BOOL(v) UPB_VALUE_INIT(v, _bool)
#define UPB_VALUE_INIT_CSTR(v) UPB_VALUE_INIT(v, cstr)
#define UPB_VALUE_INIT_PTR(v) UPB_VALUE_INIT(v, ptr)
#define UPB_VALUE_INIT_CONSTPTR(v) UPB_VALUE_INIT(v, constptr)
#define UPB_VALUE_INIT_FPTR(v) UPB_VALUE_INIT(v, fptr)
// Like strdup(), which isn't always available since it's not ANSI C.
/* Like strdup(), which isn't always available since it's not ANSI C. */
char *upb_strdup(const char *s);
// Variant that works with a length-delimited rather than NULL-delimited string,
// as supported by strtable.
/* Variant that works with a length-delimited rather than NULL-delimited string,
* as supported by strtable. */
char *upb_strdup2(const char *s, size_t len);
UPB_INLINE void _upb_value_setval(upb_value *v, _upb_value val,
UPB_INLINE void _upb_value_setval(upb_value *v, uint64_t val,
upb_ctype_t ctype) {
v->val = val;
SET_TYPE(v->ctype, ctype);
}
UPB_INLINE upb_value _upb_value_val(_upb_value val, upb_ctype_t ctype) {
UPB_INLINE upb_value _upb_value_val(uint64_t val, upb_ctype_t ctype) {
upb_value ret;
_upb_value_setval(&ret, val, ctype);
return ret;
}
// For each value ctype, define the following set of functions:
//
// // Get/set an int32 from a upb_value.
// int32_t upb_value_getint32(upb_value val);
// void upb_value_setint32(upb_value *val, int32_t cval);
//
// // Construct a new upb_value from an int32.
// upb_value upb_value_int32(int32_t val);
#define FUNCS(name, membername, type_t, proto_type) \
/* For each value ctype, define the following set of functions:
*
* // Get/set an int32 from a upb_value.
* int32_t upb_value_getint32(upb_value val);
* void upb_value_setint32(upb_value *val, int32_t cval);
*
* // Construct a new upb_value from an int32.
* upb_value upb_value_int32(int32_t val); */
#define FUNCS(name, membername, type_t, converter, proto_type) \
UPB_INLINE void upb_value_set ## name(upb_value *val, type_t cval) { \
val->val.uint64 = 0; \
val->val = (converter)cval; \
SET_TYPE(val->ctype, proto_type); \
val->val.membername = cval; \
} \
UPB_INLINE upb_value upb_value_ ## name(type_t val) { \
upb_value ret; \
@ -134,31 +103,41 @@ UPB_INLINE upb_value _upb_value_val(_upb_value val, upb_ctype_t ctype) {
} \
UPB_INLINE type_t upb_value_get ## name(upb_value val) { \
assert(val.ctype == proto_type); \
return val.val.membername; \
return (type_t)(converter)val.val; \
}
FUNCS(int32, int32, int32_t, UPB_CTYPE_INT32);
FUNCS(int64, int64, int64_t, UPB_CTYPE_INT64);
FUNCS(uint32, uint32, uint32_t, UPB_CTYPE_UINT32);
FUNCS(uint64, uint64, uint64_t, UPB_CTYPE_UINT64);
FUNCS(bool, _bool, bool, UPB_CTYPE_BOOL);
FUNCS(cstr, cstr, char*, UPB_CTYPE_CSTR);
FUNCS(ptr, ptr, void*, UPB_CTYPE_PTR);
FUNCS(constptr, constptr, const void*, UPB_CTYPE_CONSTPTR);
FUNCS(fptr, fptr, upb_func*, UPB_CTYPE_FPTR);
FUNCS(int32, int32, int32_t, int32_t, UPB_CTYPE_INT32)
FUNCS(int64, int64, int64_t, int64_t, UPB_CTYPE_INT64)
FUNCS(uint32, uint32, uint32_t, uint32_t, UPB_CTYPE_UINT32)
FUNCS(uint64, uint64, uint64_t, uint64_t, UPB_CTYPE_UINT64)
FUNCS(bool, _bool, bool, bool, UPB_CTYPE_BOOL)
FUNCS(cstr, cstr, char*, uintptr_t, UPB_CTYPE_CSTR)
FUNCS(ptr, ptr, void*, uintptr_t, UPB_CTYPE_PTR)
FUNCS(constptr, constptr, const void*, uintptr_t, UPB_CTYPE_CONSTPTR)
FUNCS(fptr, fptr, upb_func*, uintptr_t, UPB_CTYPE_FPTR)
#undef FUNCS
#undef SET_TYPE
/* upb_table ******************************************************************/
/* upb_tabkey *****************************************************************/
/* Either:
* 1. an actual integer key, or
* 2. a pointer to a string prefixed by its uint32_t length, owned by us.
*
* ...depending on whether this is a string table or an int table. We would
* make this a union of those two types, but C89 doesn't support statically
* initializing a non-first union member. */
typedef uintptr_t upb_tabkey;
#define UPB_TABKEY_NUM(n) n
#define UPB_TABKEY_NONE 0
// The preprocessor isn't quite powerful enough to turn the compile-time string
// length into a byte-wise string representation, so code generation needs to
// help it along.
//
// "len1" is the low byte and len4 is the high byte.
/* The preprocessor isn't quite powerful enough to turn the compile-time string
* length into a byte-wise string representation, so code generation needs to
* help it along.
*
* "len1" is the low byte and len4 is the high byte. */
#ifdef UPB_BIG_ENDIAN
#define UPB_TABKEY_STR(len1, len2, len3, len4, strval) \
(uintptr_t)(len4 len3 len2 len1 strval)
@ -167,53 +146,105 @@ FUNCS(fptr, fptr, upb_func*, UPB_CTYPE_FPTR);
(uintptr_t)(len1 len2 len3 len4 strval)
#endif
// Either:
// 1. an actual integer key, or
// 2. a pointer to a string prefixed by its uint32_t length, owned by us.
//
// ...depending on whether this is a string table or an int table. We would
// make this a union of those two types, but C89 doesn't support statically
// initializing a non-first union member.
typedef uintptr_t upb_tabkey;
// Ideally we could use a structure like this instead of the memcpy() calls:
//
// typedef struct {
// uint32_t len;
// char data[1]; // Allocate to correct length.
// } upb_tabstr;
//
// But unfortuantely in C89 there is no way to statically initialize such a
// thing. So instead of memcpy() the length in and out of the string.
UPB_INLINE char *upb_tabstr(upb_tabkey key, uint32_t *len) {
char* mem = (char*)key;
if (len) memcpy(len, mem, sizeof(*len));
return mem + sizeof(*len);
}
/* upb_tabval *****************************************************************/
#ifdef __cplusplus
/* Status initialization not supported.
*
* This separate definition is necessary because in C++, UINTPTR_MAX isn't
* reliably available. */
typedef struct {
uint64_t val;
} upb_tabval;
#else
/* C -- supports static initialization, but to support static initialization of
* both integers and points for both 32 and 64 bit targets, it takes a little
* bit of doing. */
#if UINTPTR_MAX == 0xffffffffffffffffULL
#define UPB_PTR_IS_64BITS
#elif UINTPTR_MAX != 0xffffffff
#error Could not determine how many bits pointers are.
#endif
typedef union {
/* For static initialization.
*
* Unfortunately this ugliness is necessary -- it is the only way that we can,
* with -std=c89 -pedantic, statically initialize this to either a pointer or
* an integer on 32-bit platforms. */
struct {
#ifdef UPB_PTR_IS_64BITS
uintptr_t val;
#else
uintptr_t val1;
uintptr_t val2;
#endif
} staticinit;
/* The normal accessor that we use for everything at runtime. */
uint64_t val;
} upb_tabval;
#ifdef UPB_PTR_IS_64BITS
#define UPB_TABVALUE_INT_INIT(v) {{v}}
#define UPB_TABVALUE_EMPTY_INIT {{-1}}
#else
/* 32-bit pointers */
#ifdef UPB_BIG_ENDIAN
#define UPB_TABVALUE_INT_INIT(v) {{0, v}}
#define UPB_TABVALUE_EMPTY_INIT {{-1, -1}}
#else
#define UPB_TABVALUE_INT_INIT(v) {{v, 0}}
#define UPB_TABVALUE_EMPTY_INIT {{-1, -1}}
#endif
#endif
#define UPB_TABVALUE_PTR_INIT(v) UPB_TABVALUE_INT_INIT((uintptr_t)v)
#undef UPB_PTR_IS_64BITS
#endif /* __cplusplus */
/* upb_table ******************************************************************/
typedef struct _upb_tabent {
upb_tabkey key;
_upb_value val;
upb_tabval val;
// Internal chaining. This is const so we can create static initializers for
// tables. We cast away const sometimes, but *only* when the containing
// upb_table is known to be non-const. This requires a bit of care, but
// the subtlety is confined to table.c.
/* Internal chaining. This is const so we can create static initializers for
* tables. We cast away const sometimes, but *only* when the containing
* upb_table is known to be non-const. This requires a bit of care, but
* the subtlety is confined to table.c. */
const struct _upb_tabent *next;
} upb_tabent;
typedef struct {
size_t count; // Number of entries in the hash part.
size_t mask; // Mask to turn hash value -> bucket.
upb_ctype_t ctype; // Type of all values.
uint8_t size_lg2; // Size of the hash table part is 2^size_lg2 entries.
// Hash table entries.
// Making this const isn't entirely accurate; what we really want is for it to
// have the same const-ness as the table it's inside. But there's no way to
// declare that in C. So we have to make it const so that we can statically
// initialize const hash tables. Then we cast away const when we have to.
size_t count; /* Number of entries in the hash part. */
size_t mask; /* Mask to turn hash value -> bucket. */
upb_ctype_t ctype; /* Type of all values. */
uint8_t size_lg2; /* Size of the hashtable part is 2^size_lg2 entries. */
/* Hash table entries.
* Making this const isn't entirely accurate; what we really want is for it to
* have the same const-ness as the table it's inside. But there's no way to
* declare that in C. So we have to make it const so that we can statically
* initialize const hash tables. Then we cast away const when we have to.
*/
const upb_tabent *entries;
} upb_table;
@ -228,10 +259,10 @@ typedef struct {
UPB_STRTABLE_INIT(0, 0, ctype, 0, NULL)
typedef struct {
upb_table t; // For entries that don't fit in the array part.
const _upb_value *array; // Array part of the table. See const note above.
size_t array_size; // Array part size.
size_t array_count; // Array part number of elements.
upb_table t; /* For entries that don't fit in the array part. */
const upb_tabval *array; /* Array part of the table. See const note above. */
size_t array_size; /* Array part size. */
size_t array_count; /* Array part number of elements. */
} upb_inttable;
#define UPB_INTTABLE_INIT(count, mask, ctype, size_lg2, ent, a, asize, acount) \
@ -240,8 +271,7 @@ typedef struct {
#define UPB_EMPTY_INTTABLE_INIT(ctype) \
UPB_INTTABLE_INIT(0, 0, ctype, 0, NULL, NULL, 0, 0)
#define UPB_ARRAY_EMPTYVAL -1
#define UPB_ARRAY_EMPTYENT UPB_VALUE_INIT_INT64(UPB_ARRAY_EMPTYVAL)
#define UPB_ARRAY_EMPTYENT -1
UPB_INLINE size_t upb_table_size(const upb_table *t) {
if (t->size_lg2 == 0)
@ -250,12 +280,12 @@ UPB_INLINE size_t upb_table_size(const upb_table *t) {
return 1 << t->size_lg2;
}
// Internal-only functions, in .h file only out of necessity.
/* Internal-only functions, in .h file only out of necessity. */
UPB_INLINE bool upb_tabent_isempty(const upb_tabent *e) {
return e->key == 0;
}
// Used by some of the unit tests for generic hashing functionality.
/* Used by some of the unit tests for generic hashing functionality. */
uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed);
UPB_INLINE uintptr_t upb_intkey(uintptr_t key) {
@ -270,93 +300,94 @@ static const upb_tabent *upb_getentry(const upb_table *t, uint32_t hash) {
return t->entries + (hash & t->mask);
}
UPB_INLINE bool upb_arrhas(_upb_value v) {
return v.uint64 != (uint64_t)UPB_ARRAY_EMPTYVAL;
UPB_INLINE bool upb_arrhas(upb_tabval key) {
return key.val != (uint64_t)-1;
}
// Initialize and uninitialize a table, respectively. If memory allocation
// failed, false is returned that the table is uninitialized.
/* Initialize and uninitialize a table, respectively. If memory allocation
* failed, false is returned that the table is uninitialized. */
bool upb_inttable_init(upb_inttable *table, upb_ctype_t ctype);
bool upb_strtable_init(upb_strtable *table, upb_ctype_t ctype);
void upb_inttable_uninit(upb_inttable *table);
void upb_strtable_uninit(upb_strtable *table);
// Returns the number of values in the table.
/* Returns the number of values in the table. */
size_t upb_inttable_count(const upb_inttable *t);
UPB_INLINE size_t upb_strtable_count(const upb_strtable *t) {
return t->t.count;
}
// Inserts the given key into the hashtable with the given value. The key must
// not already exist in the hash table. For string tables, the key must be
// NULL-terminated, and the table will make an internal copy of the key.
// Inttables must not insert a value of UINTPTR_MAX.
//
// If a table resize was required but memory allocation failed, false is
// returned and the table is unchanged.
/* Inserts the given key into the hashtable with the given value. The key must
* not already exist in the hash table. For string tables, the key must be
* NULL-terminated, and the table will make an internal copy of the key.
* Inttables must not insert a value of UINTPTR_MAX.
*
* If a table resize was required but memory allocation failed, false is
* returned and the table is unchanged. */
bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val);
bool upb_strtable_insert2(upb_strtable *t, const char *key, size_t len,
upb_value val);
// For NULL-terminated strings.
/* For NULL-terminated strings. */
UPB_INLINE bool upb_strtable_insert(upb_strtable *t, const char *key,
upb_value val) {
return upb_strtable_insert2(t, key, strlen(key), val);
}
// Looks up key in this table, returning "true" if the key was found.
// If v is non-NULL, copies the value for this key into *v.
/* Looks up key in this table, returning "true" if the key was found.
* If v is non-NULL, copies the value for this key into *v. */
bool upb_inttable_lookup(const upb_inttable *t, uintptr_t key, upb_value *v);
bool upb_strtable_lookup2(const upb_strtable *t, const char *key, size_t len,
upb_value *v);
// For NULL-terminated strings.
/* For NULL-terminated strings. */
UPB_INLINE bool upb_strtable_lookup(const upb_strtable *t, const char *key,
upb_value *v) {
return upb_strtable_lookup2(t, key, strlen(key), v);
}
// Removes an item from the table. Returns true if the remove was successful,
// and stores the removed item in *val if non-NULL.
/* Removes an item from the table. Returns true if the remove was successful,
* and stores the removed item in *val if non-NULL. */
bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val);
bool upb_strtable_remove2(upb_strtable *t, const char *key, size_t len,
upb_value *val);
// For NULL-terminated strings.
/* For NULL-terminated strings. */
UPB_INLINE bool upb_strtable_remove(upb_strtable *t, const char *key,
upb_value *v) {
return upb_strtable_remove2(t, key, strlen(key), v);
}
// Updates an existing entry in an inttable. If the entry does not exist,
// returns false and does nothing. Unlike insert/remove, this does not
// invalidate iterators.
/* Updates an existing entry in an inttable. If the entry does not exist,
* returns false and does nothing. Unlike insert/remove, this does not
* invalidate iterators. */
bool upb_inttable_replace(upb_inttable *t, uintptr_t key, upb_value val);
// Handy routines for treating an inttable like a stack. May not be mixed with
// other insert/remove calls.
/* Handy routines for treating an inttable like a stack. May not be mixed with
* other insert/remove calls. */
bool upb_inttable_push(upb_inttable *t, upb_value val);
upb_value upb_inttable_pop(upb_inttable *t);
// Convenience routines for inttables with pointer keys.
/* Convenience routines for inttables with pointer keys. */
bool upb_inttable_insertptr(upb_inttable *t, const void *key, upb_value val);
bool upb_inttable_removeptr(upb_inttable *t, const void *key, upb_value *val);
bool upb_inttable_lookupptr(
const upb_inttable *t, const void *key, upb_value *val);
// Optimizes the table for the current set of entries, for both memory use and
// lookup time. Client should call this after all entries have been inserted;
// inserting more entries is legal, but will likely require a table resize.
/* Optimizes the table for the current set of entries, for both memory use and
* lookup time. Client should call this after all entries have been inserted;
* inserting more entries is legal, but will likely require a table resize. */
void upb_inttable_compact(upb_inttable *t);
// A special-case inlinable version of the lookup routine for 32-bit integers.
/* A special-case inlinable version of the lookup routine for 32-bit
* integers. */
UPB_INLINE bool upb_inttable_lookup32(const upb_inttable *t, uint32_t key,
upb_value *v) {
*v = upb_value_int32(0); // Silence compiler warnings.
*v = upb_value_int32(0); /* Silence compiler warnings. */
if (key < t->array_size) {
_upb_value arrval = t->array[key];
upb_tabval arrval = t->array[key];
if (upb_arrhas(arrval)) {
_upb_value_setval(v, arrval, t->t.ctype);
_upb_value_setval(v, arrval.val, t->t.ctype);
return true;
} else {
return false;
@ -366,7 +397,7 @@ UPB_INLINE bool upb_inttable_lookup32(const upb_inttable *t, uint32_t key,
if (t->t.entries == NULL) return false;
for (e = upb_getentry(&t->t, upb_inthash(key)); true; e = e->next) {
if ((uint32_t)e->key == key) {
_upb_value_setval(v, e->val, t->t.ctype);
_upb_value_setval(v, e->val.val, t->t.ctype);
return true;
}
if (e->next == NULL) return false;
@ -374,42 +405,43 @@ UPB_INLINE bool upb_inttable_lookup32(const upb_inttable *t, uint32_t key,
}
}
// Exposed for testing only.
/* Exposed for testing only. */
bool upb_strtable_resize(upb_strtable *t, size_t size_lg2);
/* Iterators ******************************************************************/
// Iterators for int and string tables. We are subject to some kind of unusual
// design constraints:
//
// For high-level languages:
// - we must be able to guarantee that we don't crash or corrupt memory even if
// the program accesses an invalidated iterator.
//
// For C++11 range-based for:
// - iterators must be copyable
// - iterators must be comparable
// - it must be possible to construct an "end" value.
//
// Iteration order is undefined.
//
// Modifying the table invalidates iterators. upb_{str,int}table_done() is
// guaranteed to work even on an invalidated iterator, as long as the table it
// is iterating over has not been freed. Calling next() or accessing data from
// an invalidated iterator yields unspecified elements from the table, but it is
// guaranteed not to crash and to return real table elements (except when done()
// is true).
/* Iterators for int and string tables. We are subject to some kind of unusual
* design constraints:
*
* For high-level languages:
* - we must be able to guarantee that we don't crash or corrupt memory even if
* the program accesses an invalidated iterator.
*
* For C++11 range-based for:
* - iterators must be copyable
* - iterators must be comparable
* - it must be possible to construct an "end" value.
*
* Iteration order is undefined.
*
* Modifying the table invalidates iterators. upb_{str,int}table_done() is
* guaranteed to work even on an invalidated iterator, as long as the table it
* is iterating over has not been freed. Calling next() or accessing data from
* an invalidated iterator yields unspecified elements from the table, but it is
* guaranteed not to crash and to return real table elements (except when done()
* is true). */
/* upb_strtable_iter **********************************************************/
// upb_strtable_iter i;
// upb_strtable_begin(&i, t);
// for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
// const char *key = upb_strtable_iter_key(&i);
// const upb_value val = upb_strtable_iter_value(&i);
// // ...
// }
/* upb_strtable_iter i;
* upb_strtable_begin(&i, t);
* for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
* const char *key = upb_strtable_iter_key(&i);
* const upb_value val = upb_strtable_iter_value(&i);
* // ...
* }
*/
typedef struct {
const upb_strtable *t;
@ -429,13 +461,14 @@ bool upb_strtable_iter_isequal(const upb_strtable_iter *i1,
/* upb_inttable_iter **********************************************************/
// upb_inttable_iter i;
// upb_inttable_begin(&i, t);
// for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
// uintptr_t key = upb_inttable_iter_key(&i);
// upb_value val = upb_inttable_iter_value(&i);
// // ...
// }
/* upb_inttable_iter i;
* upb_inttable_begin(&i, t);
* for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
* uintptr_t key = upb_inttable_iter_key(&i);
* upb_value val = upb_inttable_iter_value(&i);
* // ...
* }
*/
typedef struct {
const upb_inttable *t;

@ -20,10 +20,10 @@ bool upb_dumptostderr(void *closure, const upb_status* status) {
return false;
}
// Guarantee null-termination and provide ellipsis truncation.
// It may be tempting to "optimize" this by initializing these final
// four bytes up-front and then being careful never to overwrite them,
// this is safer and simpler.
/* Guarantee null-termination and provide ellipsis truncation.
* It may be tempting to "optimize" this by initializing these final
* four bytes up-front and then being careful never to overwrite them,
* this is safer and simpler. */
static void nullz(upb_status *status) {
const char *ellipsis = "...";
size_t len = strlen(ellipsis);
@ -65,7 +65,7 @@ void upb_status_seterrf(upb_status *status, const char *fmt, ...) {
void upb_status_vseterrf(upb_status *status, const char *fmt, va_list args) {
if (!status) return;
status->ok_ = false;
vsnprintf(status->msg, sizeof(status->msg), fmt, args);
_upb_vsnprintf(status->msg, sizeof(status->msg), fmt, args);
nullz(status);
}

@ -18,37 +18,59 @@
#include <stdbool.h>
#include <stddef.h>
// inline if possible, emit standalone code if required.
/* UPB_INLINE: inline if possible, emit standalone code if required. */
#ifdef __cplusplus
#define UPB_INLINE inline
#elif defined (__GNUC__)
#define UPB_INLINE static __inline__
#else
#define UPB_INLINE static inline
#define UPB_INLINE static
#endif
// Define this manually if you're on big endian and your compiler doesn't
// provide these preprocessor symbols.
/* Define UPB_BIG_ENDIAN manually if you're on big endian and your compiler
* doesn't provide these preprocessor symbols. */
#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
#define UPB_BIG_ENDIAN
#endif
// For use in C/C++ source files (not headers), forces inlining within the file.
/* Macros for function attributes on compilers that support them. */
#ifdef __GNUC__
#define UPB_FORCEINLINE inline __attribute__((always_inline))
#define UPB_FORCEINLINE __inline__ __attribute__((always_inline))
#define UPB_NOINLINE __attribute__((noinline))
#else
#define UPB_NORETURN __attribute__((__noreturn__))
#else /* !defined(__GNUC__) */
#define UPB_FORCEINLINE
#define UPB_NOINLINE
#define UPB_NORETURN
#endif
#if __STDC_VERSION__ >= 199901L
#define UPB_C99
/* A few hacky workarounds for functions not in C89.
* For internal use only!
* TODO(haberman): fix these by including our own implementations, or finding
* another workaround.
*/
#ifdef __GNUC__
#define _upb_snprintf __builtin_snprintf
#define _upb_vsnprintf __builtin_vsnprintf
#elif __STDC_VERSION__ >= 199901L
/* C99 versions. */
#define _upb_snprintf snprintf
#define _upb_vsnprintf vsnprintf
#else
#error Need implementations of [v]snprintf
#endif
#if ((defined(__cplusplus) && __cplusplus >= 201103L) || \
defined(__GXX_EXPERIMENTAL_CXX0X__)) && !defined(UPB_NO_CXX11)
#define UPB_CXX11
#endif
/* UPB_DISALLOW_COPY_AND_ASSIGN()
* UPB_DISALLOW_POD_OPS()
*
* Declare these in the "private" section of a C++ class to forbid copy/assign
* or all POD ops (construct, destruct, copy, assign) on that class. */
#ifdef UPB_CXX11
#include <type_traits>
#define UPB_DISALLOW_COPY_AND_ASSIGN(class_name) \
@ -57,51 +79,53 @@
#define UPB_DISALLOW_POD_OPS(class_name, full_class_name) \
class_name() = delete; \
~class_name() = delete; \
/* Friend Pointer<T> so it can access base class. */ \
friend class Pointer<full_class_name>; \
friend class Pointer<const full_class_name>; \
UPB_DISALLOW_COPY_AND_ASSIGN(class_name)
#define UPB_ASSERT_STDLAYOUT(type) \
static_assert(std::is_standard_layout<type>::value, \
#type " must be standard layout");
#else // !defined(UPB_CXX11)
#else /* !defined(UPB_CXX11) */
#define UPB_DISALLOW_COPY_AND_ASSIGN(class_name) \
class_name(const class_name&); \
void operator=(const class_name&);
#define UPB_DISALLOW_POD_OPS(class_name, full_class_name) \
class_name(); \
~class_name(); \
/* Friend Pointer<T> so it can access base class. */ \
friend class Pointer<full_class_name>; \
friend class Pointer<const full_class_name>; \
UPB_DISALLOW_COPY_AND_ASSIGN(class_name)
#define UPB_ASSERT_STDLAYOUT(type)
#endif
/* UPB_DECLARE_TYPE()
* UPB_DECLARE_DERIVED_TYPE()
* UPB_DECLARE_DERIVED_TYPE2()
*
* Macros for declaring C and C++ types both, including inheritance.
* The inheritance doesn't use real C++ inheritance, to stay compatible with C.
*
* These macros also provide upcasts:
* - in C: types-specific functions (ie. upb_foo_upcast(foo))
* - in C++: upb::upcast(foo) along with implicit conversions
*
* Downcasts are not provided, but upb/def.h defines downcasts for upb::Def. */
#define UPB_C_UPCASTS(ty, base) \
UPB_INLINE base *ty ## _upcast_mutable(ty *p) { return (base*)p; } \
UPB_INLINE const base *ty ## _upcast(const ty *p) { return (const base*)p; }
#define UPB_C_UPCASTS2(ty, base, base2) \
UPB_C_UPCASTS(ty, base) \
UPB_INLINE base2 *ty ## _upcast2_mutable(ty *p) { return (base2*)p; } \
UPB_INLINE const base2 *ty ## _upcast2(const ty *p) { return (const base2*)p; }
#ifdef __cplusplus
#define UPB_PRIVATE_FOR_CPP private:
#define UPB_DECLARE_TYPE(cppname, cname) typedef cppname cname;
#define UPB_BEGIN_EXTERN_C extern "C" {
#define UPB_END_EXTERN_C }
#define UPB_DEFINE_STRUCT0(cname, members) members;
#define UPB_DEFINE_STRUCT(cname, cbase, members) \
public: \
cbase* base() { return &base_; } \
const cbase* base() const { return &base_; } \
\
private: \
cbase base_; \
members;
#define UPB_DEFINE_CLASS0(cppname, cppmethods, members) \
class cppname { \
cppmethods \
members \
}; \
UPB_ASSERT_STDLAYOUT(cppname);
#define UPB_DEFINE_CLASS1(cppname, cppbase, cppmethods, members) \
UPB_DEFINE_CLASS0(cppname, cppmethods, members) \
#define UPB_PRIVATE_FOR_CPP private:
#define UPB_DECLARE_TYPE(cppname, cname) typedef cppname cname;
#define UPB_DECLARE_DERIVED_TYPE(cppname, cppbase, cname, cbase) \
UPB_DECLARE_TYPE(cppname, cname) \
UPB_C_UPCASTS(cname, cbase) \
namespace upb { \
template <> \
class Pointer<cppname> : public PointerBase<cppname, cppbase> { \
@ -115,8 +139,11 @@
explicit Pointer(const cppname* ptr) : PointerBase(ptr) {} \
}; \
}
#define UPB_DEFINE_CLASS2(cppname, cppbase, cppbase2, cppmethods, members) \
UPB_DEFINE_CLASS0(cppname, UPB_QUOTE(cppmethods), members) \
#define UPB_DECLARE_DERIVED_TYPE2(cppname, cppbase, cppbase2, cname, cbase, \
cbase2) \
UPB_DECLARE_TYPE(cppname, cname) \
UPB_C_UPCASTS2(cname, cbase, cbase2) \
namespace upb { \
template <> \
class Pointer<cppname> : public PointerBase2<cppname, cppbase, cppbase2> { \
@ -131,96 +158,97 @@
}; \
}
#else // !defined(__cplusplus)
#else /* !defined(__cplusplus) */
#define UPB_BEGIN_EXTERN_C
#define UPB_END_EXTERN_C
#define UPB_PRIVATE_FOR_CPP
#define UPB_DECLARE_TYPE(cppname, cname) \
struct cname; \
typedef struct cname cname;
#define UPB_BEGIN_EXTERN_C
#define UPB_END_EXTERN_C
#define UPB_DEFINE_STRUCT0(cname, members) \
struct cname { \
members; \
};
#define UPB_DEFINE_STRUCT(cname, cbase, members) \
struct cname { \
cbase base; \
members; \
};
#define UPB_DEFINE_CLASS0(cppname, cppmethods, members) members
#define UPB_DEFINE_CLASS1(cppname, cppbase, cppmethods, members) members
#define UPB_DEFINE_CLASS2(cppname, cppbase, cppbase2, cppmethods, members) \
members
#endif // defined(__cplusplus)
#define UPB_DECLARE_DERIVED_TYPE(cppname, cppbase, cname, cbase) \
UPB_DECLARE_TYPE(cppname, cname) \
UPB_C_UPCASTS(cname, cbase)
#define UPB_DECLARE_DERIVED_TYPE2(cppname, cppbase, cppbase2, \
cname, cbase, cbase2) \
UPB_DECLARE_TYPE(cppname, cname) \
UPB_C_UPCASTS2(cname, cbase, cbase2)
#ifdef __GNUC__
#define UPB_NORETURN __attribute__((__noreturn__))
#else
#define UPB_NORETURN
#endif
#endif /* defined(__cplusplus) */
#define UPB_MAX(x, y) ((x) > (y) ? (x) : (y))
#define UPB_MIN(x, y) ((x) < (y) ? (x) : (y))
#define UPB_UNUSED(var) (void)var
// Code with commas confuses the preprocessor when passed as arguments, whether
// C++ type names with commas (eg. Foo<int, int>) or code blocks that declare
// variables (ie. int foo, bar).
#define UPB_QUOTE(...) __VA_ARGS__
// For asserting something about a variable when the variable is not used for
// anything else. This prevents "unused variable" warnings when compiling in
// debug mode.
/* For asserting something about a variable when the variable is not used for
* anything else. This prevents "unused variable" warnings when compiling in
* debug mode. */
#define UPB_ASSERT_VAR(var, predicate) UPB_UNUSED(var); assert(predicate)
// Generic function type.
/* Generic function type. */
typedef void upb_func();
/* Casts **********************************************************************/
// Upcasts for C. For downcasts see the definitions of the subtypes.
#define UPB_UPCAST(obj) (&(obj)->base)
#define UPB_UPCAST2(obj) UPB_UPCAST(UPB_UPCAST(obj))
/* C++ Casts ******************************************************************/
#ifdef __cplusplus
// Downcasts for C++. We can't use C++ inheritance directly and maintain
// compatibility with C. So our inheritance is undeclared in C++.
// Specializations of these casting functions are defined for appropriate type
// pairs, and perform the necessary checks.
//
// Example:
// upb::Def* def = <...>;
// upb::MessageDef* = upb::dyn_cast<upb::MessageDef*>(def);
namespace upb {
// Casts to a direct subclass. The caller must know that cast is correct; an
// incorrect cast will throw an assertion failure in debug mode.
template <class T> class Pointer;
/* Casts to a subclass. The caller must know that cast is correct; an
* incorrect cast will throw an assertion failure in debug mode.
*
* Example:
* upb::Def* def = GetDef();
* // Assert-fails if this was not actually a MessageDef.
* upb::MessgeDef* md = upb::down_cast<upb::MessageDef>(def);
*
* Note that downcasts are only defined for some types (at the moment you can
* only downcast from a upb::Def to a specific Def type). */
template<class To, class From> To down_cast(From* f);
// Casts to a direct subclass. If the class does not actually match the given
// subtype, returns NULL.
/* Casts to a subclass. If the class does not actually match the given To type,
* returns NULL.
*
* Example:
* upb::Def* def = GetDef();
* // md will be NULL if this was not actually a MessageDef.
* upb::MessgeDef* md = upb::down_cast<upb::MessageDef>(def);
*
* Note that dynamic casts are only defined for some types (at the moment you
* can only downcast from a upb::Def to a specific Def type).. */
template<class To, class From> To dyn_cast(From* f);
// Pointer<T> is a simple wrapper around a T*. It is only constructed for
// upcast() below, and its sole purpose is to be implicitly convertable to T* or
// pointers to base classes, just as a pointer would be in regular C++ if the
// inheritance were directly expressed as C++ inheritance.
template <class T> class Pointer;
// Casts to any base class, or the type itself (ie. can be a no-op).
/* Casts to any base class, or the type itself (ie. can be a no-op).
*
* Example:
* upb::MessageDef* md = GetDef();
* // This will fail to compile if this wasn't actually a base class.
* upb::Def* def = upb::upcast(md);
*/
template <class T> inline Pointer<T> upcast(T *f) { return Pointer<T>(f); }
/* Attempt upcast to specific base class.
*
* Example:
* upb::MessageDef* md = GetDef();
* upb::upcast_to<upb::Def>(md)->MethodOnDef();
*/
template <class T, class F> inline T* upcast_to(F *f) {
return static_cast<T*>(upcast(f));
}
/* PointerBase<T>: implementation detail of upb::upcast().
* It is implicitly convertable to pointers to the Base class(es).
*/
template <class T, class Base>
class PointerBase {
public:
explicit PointerBase(T* ptr) : ptr_(ptr) {}
operator T*() { return ptr_; }
operator Base*() { return ptr_->base(); }
operator Base*() { return (Base*)ptr_; }
private:
T* ptr_;
@ -242,17 +270,17 @@ class PointerBase2 : public PointerBase<T, Base> {
#ifdef __cplusplus
#include <algorithm> // For std::swap().
#include <algorithm> /* For std::swap(). */
namespace upb {
// Provides RAII semantics for upb refcounted objects. Each reffed_ptr owns a
// ref on whatever object it points to (if any).
/* Provides RAII semantics for upb refcounted objects. Each reffed_ptr owns a
* ref on whatever object it points to (if any). */
template <class T> class reffed_ptr {
public:
reffed_ptr() : ptr_(NULL) {}
// If ref_donor is NULL, takes a new ref, otherwise adopts from ref_donor.
/* If ref_donor is NULL, takes a new ref, otherwise adopts from ref_donor. */
template <class U>
reffed_ptr(U* val, const void* ref_donor = NULL)
: ptr_(upb::upcast(val)) {
@ -283,8 +311,8 @@ template <class T> class reffed_ptr {
return *this;
}
// TODO(haberman): add C++11 move construction/assignment for greater
// efficiency.
/* TODO(haberman): add C++11 move construction/assignment for greater
* efficiency. */
void swap(reffed_ptr& other) {
if (ptr_ == other.ptr_) {
@ -308,7 +336,7 @@ template <class T> class reffed_ptr {
T* get() const { return ptr_; }
// If ref_donor is NULL, takes a new ref, otherwise adopts from ref_donor.
/* If ref_donor is NULL, takes a new ref, otherwise adopts from ref_donor. */
template <class U>
void reset(U* ptr = NULL, const void* ref_donor = NULL) {
reffed_ptr(ptr, ref_donor).swap(*this);
@ -324,8 +352,8 @@ template <class T> class reffed_ptr {
return reffed_ptr<U>(upb::dyn_cast<U*>(get()));
}
// Plain release() is unsafe; if we were the only owner, it would leak the
// object. Instead we provide this:
/* Plain release() is unsafe; if we were the only owner, it would leak the
* object. Instead we provide this: */
T* ReleaseTo(const void* new_owner) {
T* ret = NULL;
ptr_->DonateRef(this, new_owner);
@ -337,9 +365,9 @@ template <class T> class reffed_ptr {
T* ptr_;
};
} // namespace upb
} /* namespace upb */
#endif // __cplusplus
#endif /* __cplusplus */
/* upb::Status ****************************************************************/
@ -351,70 +379,76 @@ class Status;
}
#endif
UPB_DECLARE_TYPE(upb::ErrorSpace, upb_errorspace);
UPB_DECLARE_TYPE(upb::Status, upb_status);
UPB_DECLARE_TYPE(upb::ErrorSpace, upb_errorspace)
UPB_DECLARE_TYPE(upb::Status, upb_status)
// The maximum length of an error message before it will get truncated.
/* The maximum length of an error message before it will get truncated. */
#define UPB_STATUS_MAX_MESSAGE 128
// An error callback function is used to report errors from some component.
// The function can return "true" to indicate that the component should try
// to recover and proceed, but this is not always possible.
/* An error callback function is used to report errors from some component.
* The function can return "true" to indicate that the component should try
* to recover and proceed, but this is not always possible. */
typedef bool upb_errcb_t(void *closure, const upb_status* status);
UPB_DEFINE_CLASS0(upb::ErrorSpace,
,
UPB_DEFINE_STRUCT0(upb_errorspace,
#ifdef __cplusplus
class upb::ErrorSpace {
#else
struct upb_errorspace {
#endif
const char *name;
// Should the error message in the status object according to this code.
/* Should the error message in the status object according to this code. */
void (*set_message)(upb_status* status, int code);
));
};
#ifdef __cplusplus
/* Object representing a success or failure status.
* It owns no resources and allocates no memory, so it should work
* even in OOM situations. */
// Object representing a success or failure status.
// It owns no resources and allocates no memory, so it should work
// even in OOM situations.
UPB_DEFINE_CLASS0(upb::Status,
class upb::Status {
public:
Status();
// Returns true if there is no error.
/* Returns true if there is no error. */
bool ok() const;
// Optional error space and code, useful if the caller wants to
// programmatically check the specific kind of error.
/* Optional error space and code, useful if the caller wants to
* programmatically check the specific kind of error. */
ErrorSpace* error_space();
int code() const;
const char *error_message() const;
// The error message will be truncated if it is longer than
// UPB_STATUS_MAX_MESSAGE-4.
/* The error message will be truncated if it is longer than
* UPB_STATUS_MAX_MESSAGE-4. */
void SetErrorMessage(const char* msg);
void SetFormattedErrorMessage(const char* fmt, ...);
// If there is no error message already, this will use the ErrorSpace to
// populate the error message for this code. The caller can still call
// SetErrorMessage() to give a more specific message.
/* If there is no error message already, this will use the ErrorSpace to
* populate the error message for this code. The caller can still call
* SetErrorMessage() to give a more specific message. */
void SetErrorCode(ErrorSpace* space, int code);
// Resets the status to a successful state with no message.
/* Resets the status to a successful state with no message. */
void Clear();
void CopyFrom(const Status& other);
private:
UPB_DISALLOW_COPY_AND_ASSIGN(Status);
,
UPB_DEFINE_STRUCT0(upb_status,
UPB_DISALLOW_COPY_AND_ASSIGN(Status)
#else
struct upb_status {
#endif
bool ok_;
// Specific status code defined by some error space (optional).
/* Specific status code defined by some error space (optional). */
int code_;
upb_errorspace *error_space_;
// Error message; NULL-terminated.
/* Error message; NULL-terminated. */
char msg[UPB_STATUS_MAX_MESSAGE];
));
};
#define UPB_STATUS_INIT {true, 0, NULL, {0}}
@ -422,15 +456,15 @@ UPB_DEFINE_STRUCT0(upb_status,
extern "C" {
#endif
// The returned string is invalidated by any other call into the status.
/* The returned string is invalidated by any other call into the status. */
const char *upb_status_errmsg(const upb_status *status);
bool upb_ok(const upb_status *status);
upb_errorspace *upb_status_errspace(const upb_status *status);
int upb_status_errcode(const upb_status *status);
// Any of the functions that write to a status object allow status to be NULL,
// to support use cases where the function's caller does not care about the
// status message.
/* Any of the functions that write to a status object allow status to be NULL,
* to support use cases where the function's caller does not care about the
* status message. */
void upb_status_clear(upb_status *status);
void upb_status_seterrmsg(upb_status *status, const char *msg);
void upb_status_seterrf(upb_status *status, const char *fmt, ...);
@ -439,11 +473,11 @@ void upb_status_seterrcode(upb_status *status, upb_errorspace *space, int code);
void upb_status_copy(upb_status *to, const upb_status *from);
#ifdef __cplusplus
} // extern "C"
} /* extern "C" */
namespace upb {
// C++ Wrappers
/* C++ Wrappers */
inline Status::Status() { Clear(); }
inline bool Status::ok() const { return upb_ok(this); }
inline const char* Status::error_message() const {
@ -466,7 +500,7 @@ inline void Status::CopyFrom(const Status& other) {
upb_status_copy(this, &other);
}
} // namespace upb
} /* namespace upb */
#endif

Loading…
Cancel
Save