From 61e5d367ff180a4fcd48dd06b9918a9d37edc766 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Fri, 18 Feb 2011 18:17:06 -0800 Subject: [PATCH] Change the API for getting the bootstrapped defs. The symtab that contains them is now hidden, and you can look them up by name but there is no access to the symtab itself, so there is no risk of mutating it (by extending it, adding other defs to it, etc). --- Makefile | 21 ++++--- benchmarks/parsestream.upb_table.c | 6 -- benchmarks/parsetostruct.upb_table.c | 1 - src/descriptor_const.h | 24 ++++---- src/upb_decoder.c | 2 +- src/upb_def.c | 87 +++++++++++++++++----------- src/upb_def.h | 27 +++++---- src/upb_glue.c | 3 +- src/upbc.c | 20 +------ tests/test_def.c | 9 +-- tests/test_vs_proto2.cc | 16 +---- 11 files changed, 100 insertions(+), 116 deletions(-) diff --git a/Makefile b/Makefile index f2bd34e5a6..6801f637b2 100644 --- a/Makefile +++ b/Makefile @@ -179,22 +179,28 @@ tests/test.proto.pb: tests/test.proto @# TODO: replace with upbc protoc tests/test.proto -otests/test.proto.pb -TESTS= \ +SIMPLE_TESTS= \ tests/test_string \ - tests/test_table \ tests/test_def \ tests/test_stream \ + tests/tests +# tests/test_decoder \ + +SIMPLE_CXX_TESTS= \ + tests/test_table + +VARIADIC_TESTS= \ tests/t.test_vs_proto2.googlemessage1 \ tests/t.test_vs_proto2.googlemessage2 \ - tests/test.proto.pb \ - tests/tests -# tests/test_decoder \ +TESTS=$(SIMPLE_TESTS) $(SIMPLE_CXX_TESTS) $(VARIADIC_TESTS) + tests: $(TESTS) $(TESTS): $(LIBUPB) +tests/tests: tests/test.proto.pb -% : %.c +$(SIMPLE_TESTS): % : %.c $(E) CC $< $(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -c -o $@ $< @@ -237,9 +243,6 @@ tests/test_table: tests/test_table.cc tests/tests: src/libupb.a -# Tools -tools: src/upbc -src/upbc: $(LIBUPB) # Benchmarks. ################################################################## diff --git a/benchmarks/parsestream.upb_table.c b/benchmarks/parsestream.upb_table.c index a646999ad0..85c9ff9255 100644 --- a/benchmarks/parsestream.upb_table.c +++ b/benchmarks/parsestream.upb_table.c @@ -17,12 +17,6 @@ static bool initialize() // Initialize upb state, decode descriptor. upb_status status = UPB_STATUS_INIT; upb_symtab *s = upb_symtab_new(); - upb_symtab_add_descriptorproto(s); - upb_def *fds_def = upb_symtab_lookup( - s, UPB_STRLIT("google.protobuf.FileDescriptorSet")); - if (!fds_def) { - fprintf(stderr, "Couldn't load FileDescriptorSet def"); - } upb_string *fds_str = upb_strreadfile(MESSAGE_DESCRIPTOR_FILE); if(fds_str == NULL) { diff --git a/benchmarks/parsetostruct.upb_table.c b/benchmarks/parsetostruct.upb_table.c index 592f86e948..dfdd5b2c17 100644 --- a/benchmarks/parsetostruct.upb_table.c +++ b/benchmarks/parsetostruct.upb_table.c @@ -20,7 +20,6 @@ static bool initialize() // Initialize upb state, decode descriptor. upb_status status = UPB_STATUS_INIT; upb_symtab *s = upb_symtab_new(); - upb_symtab_add_descriptorproto(s); upb_string *fds_str = upb_strreadfile(MESSAGE_DESCRIPTOR_FILE); if(fds_str == NULL) { diff --git a/src/descriptor_const.h b/src/descriptor_const.h index 44598dc2b8..f02eb75169 100644 --- a/src/descriptor_const.h +++ b/src/descriptor_const.h @@ -11,8 +11,8 @@ extern "C" { typedef enum google_protobuf_FieldOptions_CType { GOOGLE_PROTOBUF_FIELDOPTIONS_STRING = 0, - GOOGLE_PROTOBUF_FIELDOPTIONS_CORD = 1, - GOOGLE_PROTOBUF_FIELDOPTIONS_STRING_PIECE = 2 + GOOGLE_PROTOBUF_FIELDOPTIONS_STRING_PIECE = 2, + GOOGLE_PROTOBUF_FIELDOPTIONS_CORD = 1 } google_protobuf_FieldOptions_CType; typedef enum google_protobuf_FieldDescriptorProto_Type { @@ -50,10 +50,8 @@ typedef enum google_protobuf_FileOptions_OptimizeMode { /* Constants for field names and numbers. */ -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART_FIELDNUM 1 -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART_FIELDNAME "name_part" -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION_FIELDNUM 2 -#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION_FIELDNAME "is_extension" +#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_FIELDNUM 1 +#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_FIELDNAME "file" #define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME_FIELDNUM 1 #define GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME_FIELDNAME "name" #define GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD_FIELDNUM 2 @@ -132,8 +130,10 @@ typedef enum google_protobuf_FileOptions_OptimizeMode { #define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD_FIELDNAME "method" #define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS_FIELDNUM 3 #define GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS_FIELDNAME "options" -#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_FIELDNUM 1 -#define GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_FIELDNAME "file" +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART_FIELDNUM 1 +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART_FIELDNAME "name_part" +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION_FIELDNUM 2 +#define GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION_FIELDNAME "is_extension" #define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_FIELDNUM 1 #define GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_FIELDNAME "location" #define GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE_START_FIELDNUM 1 @@ -146,14 +146,12 @@ typedef enum google_protobuf_FileOptions_OptimizeMode { #define GOOGLE_PROTOBUF_FIELDOPTIONS_PACKED_FIELDNAME "packed" #define GOOGLE_PROTOBUF_FIELDOPTIONS_DEPRECATED_FIELDNUM 3 #define GOOGLE_PROTOBUF_FIELDOPTIONS_DEPRECATED_FIELDNAME "deprecated" -#define GOOGLE_PROTOBUF_FIELDOPTIONS_UNINTERPRETED_OPTION_FIELDNUM 999 -#define GOOGLE_PROTOBUF_FIELDOPTIONS_UNINTERPRETED_OPTION_FIELDNAME "uninterpreted_option" #define GOOGLE_PROTOBUF_FIELDOPTIONS_EXPERIMENTAL_MAP_KEY_FIELDNUM 9 #define GOOGLE_PROTOBUF_FIELDOPTIONS_EXPERIMENTAL_MAP_KEY_FIELDNAME "experimental_map_key" +#define GOOGLE_PROTOBUF_FIELDOPTIONS_UNINTERPRETED_OPTION_FIELDNUM 999 +#define GOOGLE_PROTOBUF_FIELDOPTIONS_UNINTERPRETED_OPTION_FIELDNAME "uninterpreted_option" #define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_PACKAGE_FIELDNUM 1 #define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_PACKAGE_FIELDNAME "java_package" -#define GOOGLE_PROTOBUF_FILEOPTIONS_UNINTERPRETED_OPTION_FIELDNUM 999 -#define GOOGLE_PROTOBUF_FILEOPTIONS_UNINTERPRETED_OPTION_FIELDNAME "uninterpreted_option" #define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME_FIELDNUM 8 #define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME_FIELDNAME "java_outer_classname" #define GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZE_FOR_FIELDNUM 9 @@ -168,6 +166,8 @@ typedef enum google_protobuf_FileOptions_OptimizeMode { #define GOOGLE_PROTOBUF_FILEOPTIONS_PY_GENERIC_SERVICES_FIELDNAME "py_generic_services" #define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_GENERATE_EQUALS_AND_HASH_FIELDNUM 20 #define GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_GENERATE_EQUALS_AND_HASH_FIELDNAME "java_generate_equals_and_hash" +#define GOOGLE_PROTOBUF_FILEOPTIONS_UNINTERPRETED_OPTION_FIELDNUM 999 +#define GOOGLE_PROTOBUF_FILEOPTIONS_UNINTERPRETED_OPTION_FIELDNAME "uninterpreted_option" #define GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT_FIELDNUM 1 #define GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT_FIELDNAME "message_set_wire_format" #define GOOGLE_PROTOBUF_MESSAGEOPTIONS_NO_STANDARD_DESCRIPTOR_ACCESSOR_FIELDNUM 2 diff --git a/src/upb_decoder.c b/src/upb_decoder.c index 47e67b6b6f..40ab790701 100644 --- a/src/upb_decoder.c +++ b/src/upb_decoder.c @@ -346,8 +346,8 @@ void upb_decoder_run(upb_src *src, upb_status *status) { //const char *ptr = d->ptr; // Decodes as many fields as possible, updating d->ptr appropriately, // before falling through to the slow(er) path. - const char *end = UPB_MIN(d->end, d->submsg_end); #ifdef USE_ASSEMBLY_FASTPATH + const char *end = UPB_MIN(d->end, d->submsg_end); fastdecode_ret ret = upb_fastdecode(d->ptr, end, d->dispatcher.top->handlers.set->value, d->dispatcher.top->handlers.closure, diff --git a/src/upb_def.c b/src/upb_def.c index 6a1e543d50..0382610e47 100644 --- a/src/upb_def.c +++ b/src/upb_def.c @@ -859,6 +859,7 @@ static upb_symtab_ent *upb_resolve(upb_strtable *t, // TODO: This branch is totally broken, but currently not used. upb_string *sym_str = upb_string_new(); int baselen = upb_string_len(base); + upb_symtab_ent *ret = NULL; while(1) { // sym_str = base[0...base_len] + UPB_SYMBOL_SEPARATOR + sym upb_strlen_t len = baselen + upb_string_len(sym) + 1; @@ -868,11 +869,18 @@ static upb_symtab_ent *upb_resolve(upb_strtable *t, memcpy(buf + baselen + 1, upb_string_getrobuf(sym), upb_string_len(sym)); upb_symtab_ent *e = upb_strtable_lookup(t, sym_str); - if (e) return e; - else if(baselen == 0) return NULL; // No more scopes to try. - + if (e) { + ret = e; + break; + } else if(baselen == 0) { + // No more scopes to try. + ret = NULL; + break; + } baselen = my_memrchr(buf, UPB_SYMBOL_SEPARATOR, baselen); } + upb_string_unref(sym_str); + return ret; } } @@ -1307,38 +1315,51 @@ static upb_src *upb_baredecoder_src(upb_baredecoder *d) { return &d->src; } -void upb_symtab_add_descriptorproto(upb_symtab *symtab) { - // For the moment we silently decline to perform the operation if the symbols - // already exist in the symtab. Revisit this when we have a better story - // about whether syms in a table can be replaced. - if(symtab->fds_msgdef) return; - - static upb_string descriptor_str = - UPB_STATIC_STRING_ARRAY(descriptor_pb); - upb_baredecoder *decoder = upb_baredecoder_new(&descriptor_str); - upb_status status = UPB_STATUS_INIT; - upb_symtab_addfds(symtab, upb_baredecoder_src(decoder), &status); - upb_baredecoder_free(decoder); - - if(!upb_ok(&status)) { - // upb itself is corrupt. - upb_printerr(&status); - upb_clearerr(&status); - upb_symtab_unref(symtab); - abort(); +static upb_symtab *descriptor_symtab = NULL; + +static void upb_free_descriptor_symtab() { + if (descriptor_symtab) { + // There should be no way for anyone to acquire a ref on this symtab. + assert(upb_atomic_only(&descriptor_symtab->refcount)); + _upb_symtab_free(descriptor_symtab); + descriptor_symtab = NULL; } - upb_def *def = upb_symtab_lookup( - symtab, UPB_STRLIT("google.protobuf.FileDescriptorSet")); - if (!def || (symtab->fds_msgdef = upb_dyncast_msgdef(def)) == NULL) { - // upb itself is corrupt. - abort(); +} + +upb_def *upb_getdescriptordef(upb_string *str) { + // TODO: add locking. + if (descriptor_symtab == NULL) { + descriptor_symtab = upb_symtab_new(); + + static upb_string descriptor_str = + UPB_STATIC_STRING_ARRAY(descriptor_pb); + upb_baredecoder *decoder = upb_baredecoder_new(&descriptor_str); + upb_status status = UPB_STATUS_INIT; + upb_symtab_addfds(descriptor_symtab, upb_baredecoder_src(decoder), &status); + upb_baredecoder_free(decoder); + + if(!upb_ok(&status)) { + // upb itself is corrupt. + upb_printerr(&status); + upb_clearerr(&status); + abort(); + } + upb_status_uninit(&status); + + // As a sanity check, make sure that FileDescriptorSet was loaded. + upb_msgdef *def = upb_getfdsdef(); + if (!def) { + // upb itself is corrupt. + abort(); + } + upb_def_unref(UPB_UPCAST(def)); // The symtab already holds a ref on it. + atexit(upb_free_descriptor_symtab); } - upb_def_unref(def); // The symtab already holds a ref on it. - upb_status_uninit(&status); + return upb_symtab_resolve( + descriptor_symtab, UPB_STRLIT("google.protobuf"), str); } -upb_msgdef *upb_symtab_fds_def(upb_symtab *s) { - assert(s->fds_msgdef != NULL); - upb_def_ref(UPB_UPCAST(s->fds_msgdef)); - return s->fds_msgdef; +upb_msgdef *upb_getfdsdef() { + return upb_downcast_msgdef( + upb_getdescriptordef(UPB_STRLIT("FileDescriptorSet"))); } diff --git a/src/upb_def.h b/src/upb_def.h index db33fbb337..121d5bce99 100644 --- a/src/upb_def.h +++ b/src/upb_def.h @@ -276,7 +276,7 @@ void _upb_symtab_free(upb_symtab *s); // Must not be called directly! INLINE void upb_symtab_ref(upb_symtab *s) { upb_atomic_ref(&s->refcount); } INLINE void upb_symtab_unref(upb_symtab *s) { - if(upb_atomic_unref(&s->refcount)) _upb_symtab_free(s); + if(s && upb_atomic_unref(&s->refcount)) _upb_symtab_free(s); } // Resolves the given symbol using the rules described in descriptor.proto, @@ -314,17 +314,20 @@ upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_deftype_t type); // more useful? Maybe it should be an option. void upb_symtab_addfds(upb_symtab *s, upb_src *desc, upb_status *status); -// Adds defs for google.protobuf.FileDescriptorSet and friends to this symtab. -// This is necessary for bootstrapping, since these are the upb_defs that -// specify other defs and allow them to be loaded. -void upb_symtab_add_descriptorproto(upb_symtab *s); - -// Returns the upb_msgdef for google.protobuf.FileDescriptorSet, which the -// caller owns a ref on. This is a convenience method that is equivalent to -// looking up the symbol called "google.protobuf.FileDescriptorSet" yourself, -// except that it only will return a def that was added by -// upb_symtab_add_descriptorproto(). -upb_msgdef *upb_symtab_fds_def(upb_symtab *s); +// Returns a def corresponding to the given name, from descriptor.proto. +// upb internally bootstraps the defs in descriptor.proto, since they are +// necessary for loading other descriptors. The caller owns a ref on the +// returned def (which is NULL if no such def exists in descriptor.proto). +// +// The name should *not* be qualified by the package, to promote +// interoperability between the internal and external releases of Protocol +// Buffers (inside Google, these are in the "proto2" package, externally they +// are in "google.protobuf". +upb_def *upb_getdescriptordef(upb_string *str); + +// A convenience method for getting the upb_def for FileDescriptorProto. +// Return should never be NULL. +upb_msgdef *upb_getfdsdef(); /* upb_def casts **************************************************************/ diff --git a/src/upb_glue.c b/src/upb_glue.c index 536adc07a6..a3d4e7df59 100644 --- a/src/upb_glue.c +++ b/src/upb_glue.c @@ -38,13 +38,12 @@ void upb_strtomsg(upb_string *str, upb_msg *msg, upb_msgdef *md, } void upb_parsedesc(upb_symtab *symtab, upb_string *str, upb_status *status) { - upb_symtab_add_descriptorproto(symtab); upb_stringsrc strsrc; upb_stringsrc_init(&strsrc); upb_stringsrc_reset(&strsrc, str); upb_decoder d; - upb_msgdef *fds_msgdef = upb_symtab_fds_def(symtab); + upb_msgdef *fds_msgdef = upb_getfdsdef(); upb_decoder_init(&d, fds_msgdef); upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc)); diff --git a/src/upbc.c b/src/upbc.c index 4cfa6e6982..428ec4164f 100644 --- a/src/upbc.c +++ b/src/upbc.c @@ -191,29 +191,12 @@ int main(int argc, char *argv[]) // TODO: make upb_parsedesc use a separate symtab, so we can use it here when // importing descriptor.proto. upb_symtab *s = upb_symtab_new(); - upb_symtab_add_descriptorproto(s); - upb_symtab *s2 = upb_symtab_new(); upb_status status = UPB_STATUS_INIT; - - upb_stringsrc strsrc; - upb_stringsrc_init(&strsrc); - upb_stringsrc_reset(&strsrc, descriptor); - - upb_decoder d; - upb_msgdef *fds_msgdef = upb_symtab_fds_def(s); - upb_decoder_init(&d, fds_msgdef); - upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc)); - - upb_symtab_addfds(s2, upb_decoder_src(&d), &status); - upb_stringsrc_uninit(&strsrc); - upb_decoder_uninit(&d); - upb_def_unref(UPB_UPCAST(fds_msgdef)); - + upb_parsedesc(s, descriptor, &status); if(!upb_ok(&status)) { upb_printerr(&status); error("Failed to parse input file descriptor\n"); } - upb_status_uninit(&status); /* Emit output files. */ @@ -232,7 +215,6 @@ int main(int argc, char *argv[]) free(defs); upb_string_unref(descriptor); upb_symtab_unref(s); - upb_symtab_unref(s2); fclose(h_const_file); return 0; diff --git a/tests/test_def.c b/tests/test_def.c index 2d2658f34e..287cc32044 100644 --- a/tests/test_def.c +++ b/tests/test_def.c @@ -5,8 +5,8 @@ int main() { upb_symtab *s = upb_symtab_new(); - upb_symtab_add_descriptorproto(s); + // Will be empty atm since we haven't added anything to the symtab. int count; upb_def **defs = upb_symtab_getdefs(s, &count, UPB_DEF_ANY); for (int i = 0; i < count; i++) { @@ -14,12 +14,9 @@ int main() { } free(defs); - upb_string *str = upb_strdupc("google.protobuf.FileDescriptorSet"); - upb_def *fds = upb_symtab_lookup(s, str); + upb_msgdef *fds = upb_getfdsdef(); assert(fds != NULL); - assert(upb_dyncast_msgdef(fds) != NULL); - upb_def_unref(fds); - upb_string_unref(str); + upb_def_unref(UPB_UPCAST(fds)); upb_symtab_unref(s); return 0; } diff --git a/tests/test_vs_proto2.cc b/tests/test_vs_proto2.cc index 749eedfea5..1839123833 100644 --- a/tests/test_vs_proto2.cc +++ b/tests/test_vs_proto2.cc @@ -213,26 +213,13 @@ int main(int argc, char *argv[]) fprintf(stderr, "Couldn't read " MESSAGE_DESCRIPTOR_FILE ".\n"); return 1; } - upb_symtab_add_descriptorproto(symtab); - upb_def *fds_msgdef = upb_symtab_lookup( - symtab, UPB_STRLIT("google.protobuf.FileDescriptorSet")); - assert(fds_msgdef); - - upb_stringsrc ssrc; - upb_stringsrc_init(&ssrc); - upb_stringsrc_reset(&ssrc, fds); - upb_decoder decoder; - upb_decoder_init(&decoder, upb_downcast_msgdef(fds_msgdef)); - upb_decoder_reset(&decoder, upb_stringsrc_bytesrc(&ssrc)); - upb_symtab_addfds(symtab, upb_decoder_src(&decoder), &status); + upb_parsedesc(symtab, fds, &status); if(!upb_ok(&status)) { fprintf(stderr, "Error importing " MESSAGE_DESCRIPTOR_FILE ": "); upb_printerr(&status); return 1; } upb_string_unref(fds); - upb_decoder_uninit(&decoder); - upb_stringsrc_uninit(&ssrc); upb_string *proto_name = upb_strdupc(MESSAGE_NAME); upb_def *def = upb_symtab_lookup(symtab, proto_name); @@ -260,7 +247,6 @@ int main(int argc, char *argv[]) upb_msg_unref(upb_msg, msgdef); upb_def_unref(UPB_UPCAST(msgdef)); - upb_def_unref(fds_msgdef); upb_string_unref(str); upb_symtab_unref(symtab); upb_status_uninit(&status);