From fcd8889d5b68be6ca7d1df705ad2159777e2f379 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 13 Jan 2015 18:14:39 -0800 Subject: [PATCH 01/10] Support oneofs in MRI Ruby C extension. --- ruby/ext/google/protobuf_c/defs.c | 303 +++++++++- ruby/ext/google/protobuf_c/encode_decode.c | 190 +++++- ruby/ext/google/protobuf_c/protobuf.c | 2 + ruby/ext/google/protobuf_c/protobuf.h | 49 +- ruby/ext/google/protobuf_c/storage.c | 250 ++++++-- ruby/ext/google/protobuf_c/upb.c | 552 +++++++++++++++--- ruby/ext/google/protobuf_c/upb.h | 635 +++++++++++++++++++-- ruby/tests/basic.rb | 92 +++ 8 files changed, 1840 insertions(+), 233 deletions(-) diff --git a/ruby/ext/google/protobuf_c/defs.c b/ruby/ext/google/protobuf_c/defs.c index a18aaac406..371939aeef 100644 --- a/ruby/ext/google/protobuf_c/defs.c +++ b/ruby/ext/google/protobuf_c/defs.c @@ -65,6 +65,10 @@ static upb_fielddef* check_field_notfrozen(const upb_fielddef* def) { return (upb_fielddef*)check_notfrozen((const upb_def*)def); } +static upb_oneofdef* check_oneof_notfrozen(const upb_oneofdef* def) { + return (upb_oneofdef*)check_notfrozen((const upb_def*)def); +} + static upb_enumdef* check_enum_notfrozen(const upb_enumdef* def) { return (upb_enumdef*)check_notfrozen((const upb_def*)def); } @@ -282,6 +286,9 @@ void Descriptor_register(VALUE module) { rb_define_method(klass, "each", Descriptor_each, 0); rb_define_method(klass, "lookup", Descriptor_lookup, 1); rb_define_method(klass, "add_field", Descriptor_add_field, 1); + rb_define_method(klass, "add_oneof", Descriptor_add_oneof, 1); + rb_define_method(klass, "oneofs", Descriptor_oneofs, 0); + rb_define_method(klass, "lookup_oneof", Descriptor_lookup_oneof, 1); rb_define_method(klass, "msgclass", Descriptor_msgclass, 0); rb_define_method(klass, "name", Descriptor_name, 0); rb_define_method(klass, "name=", Descriptor_name_set, 1); @@ -328,10 +335,10 @@ VALUE Descriptor_name_set(VALUE _self, VALUE str) { VALUE Descriptor_each(VALUE _self) { DEFINE_SELF(Descriptor, self, _self); - upb_msg_iter it; - for (upb_msg_begin(&it, self->msgdef); - !upb_msg_done(&it); - upb_msg_next(&it)) { + upb_msg_field_iter it; + for (upb_msg_field_begin(&it, self->msgdef); + !upb_msg_field_done(&it); + upb_msg_field_next(&it)) { const upb_fielddef* field = upb_msg_iter_field(&it); VALUE obj = get_def_obj(field); rb_yield(obj); @@ -360,7 +367,7 @@ VALUE Descriptor_lookup(VALUE _self, VALUE name) { * call-seq: * Descriptor.add_field(field) => nil * - * Adds the given FieldDescriptor to this message type. The descriptor must not + * Adds the given FieldDescriptor to this message type. This descriptor must not * have been added to a pool yet. Raises an exception if a field with the same * name or number already exists. Sub-type references (e.g. for fields of type * message) are not resolved at this point. @@ -377,6 +384,67 @@ VALUE Descriptor_add_field(VALUE _self, VALUE obj) { return Qnil; } +/* + * call-seq: + * Descriptor.add_oneof(oneof) => nil + * + * Adds the given OneofDescriptor to this message type. This descriptor must not + * have been added to a pool yet. Raises an exception if a oneof with the same + * name already exists, or if any of the oneof's fields' names or numbers + * conflict with an existing field in this message type. All fields in the oneof + * are added to the message descriptor. Sub-type references (e.g. for fields of + * type message) are not resolved at this point. + */ +VALUE Descriptor_add_oneof(VALUE _self, VALUE obj) { + DEFINE_SELF(Descriptor, self, _self); + upb_msgdef* mut_def = check_msg_notfrozen(self->msgdef); + OneofDescriptor* def = ruby_to_OneofDescriptor(obj); + upb_oneofdef* mut_oneof_def = check_oneof_notfrozen(def->oneofdef); + CHECK_UPB( + upb_msgdef_addoneof(mut_def, mut_oneof_def, NULL, &status), + "Adding oneof to Descriptor failed"); + add_def_obj(def->oneofdef, obj); + return Qnil; +} + +/* + * call-seq: + * Descriptor.oneofs => list of OneofDescriptors + * + * Returns a list of OneofDescriptors that are part of this message type. + */ +VALUE Descriptor_oneofs(VALUE _self) { + DEFINE_SELF(Descriptor, self, _self); + + VALUE ret = rb_ary_new(); + upb_msg_oneof_iter it; + for (upb_msg_oneof_begin(&it, self->msgdef); + !upb_msg_oneof_done(&it); + upb_msg_oneof_next(&it)) { + const upb_oneofdef* oneof = upb_msg_iter_oneof(&it); + VALUE obj = get_def_obj(oneof); + rb_ary_push(ret, obj); + } + return ret; +} + +/* + * call-seq: + * Descriptor.lookup_oneof(name) => OneofDescriptor + * + * Returns the oneof descriptor for the oneof with the given name, if present, + * or nil if none. + */ +VALUE Descriptor_lookup_oneof(VALUE _self, VALUE name) { + DEFINE_SELF(Descriptor, self, _self); + const char* s = get_str(name); + const upb_oneofdef* oneof = upb_msgdef_ntooz(self->msgdef, s); + if (oneof == NULL) { + return Qnil; + } + return get_def_obj(oneof); +} + /* * call-seq: * Descriptor.msgclass => message_klass @@ -743,6 +811,120 @@ VALUE FieldDescriptor_set(VALUE _self, VALUE msg_rb, VALUE value) { return Qnil; } +// ----------------------------------------------------------------------------- +// OneofDescriptor. +// ----------------------------------------------------------------------------- + +DEFINE_CLASS(OneofDescriptor, "Google::Protobuf::OneofDescriptor"); + +void OneofDescriptor_mark(void* _self) { +} + +void OneofDescriptor_free(void* _self) { + OneofDescriptor* self = _self; + upb_oneofdef_unref(self->oneofdef, &self->oneofdef); + xfree(self); +} + +/* + * call-seq: + * OneofDescriptor.new => oneof_descriptor + * + * Creates a new, empty, oneof descriptor. The oneof may only be modified prior + * to being added to a message descriptor which is subsequently added to a pool. + */ +VALUE OneofDescriptor_alloc(VALUE klass) { + OneofDescriptor* self = ALLOC(OneofDescriptor); + VALUE ret = TypedData_Wrap_Struct(klass, &_OneofDescriptor_type, self); + self->oneofdef = upb_oneofdef_new(&self->oneofdef); + return ret; +} + +void OneofDescriptor_register(VALUE module) { + VALUE klass = rb_define_class_under( + module, "OneofDescriptor", rb_cObject); + rb_define_alloc_func(klass, OneofDescriptor_alloc); + rb_define_method(klass, "name", OneofDescriptor_name, 0); + rb_define_method(klass, "name=", OneofDescriptor_name_set, 1); + rb_define_method(klass, "add_field", OneofDescriptor_add_field, 1); + rb_define_method(klass, "each", OneofDescriptor_each, 0); + rb_include_module(klass, rb_mEnumerable); + cOneofDescriptor = klass; + rb_gc_register_address(&cOneofDescriptor); +} + +/* + * call-seq: + * OneofDescriptor.name => name + * + * Returns the name of this oneof. + */ +VALUE OneofDescriptor_name(VALUE _self) { + DEFINE_SELF(OneofDescriptor, self, _self); + return rb_str_maybe_null(upb_oneofdef_name(self->oneofdef)); +} + +/* + * call-seq: + * OneofDescriptor.name = name + * + * Sets a new name for this oneof. The oneof must not have been added to a + * message descriptor yet. + */ +VALUE OneofDescriptor_name_set(VALUE _self, VALUE value) { + DEFINE_SELF(OneofDescriptor, self, _self); + upb_oneofdef* mut_def = check_oneof_notfrozen(self->oneofdef); + const char* str = get_str(value); + CHECK_UPB(upb_oneofdef_setname(mut_def, str, &status), + "Error setting oneof name"); + return Qnil; +} + +/* + * call-seq: + * OneofDescriptor.add_field(field) => nil + * + * Adds a field to this oneof. The field may have been added to this oneof in + * the past, or the message to which this oneof belongs (if any), but may not + * have already been added to any other oneof or message. Otherwise, an + * exception is raised. + * + * All fields added to the oneof via this method will be automatically added to + * the message to which this oneof belongs, if it belongs to one currently, or + * else will be added to any message to which the oneof is later added at the + * time that it is added. + */ +VALUE OneofDescriptor_add_field(VALUE _self, VALUE obj) { + DEFINE_SELF(OneofDescriptor, self, _self); + upb_oneofdef* mut_def = check_oneof_notfrozen(self->oneofdef); + FieldDescriptor* def = ruby_to_FieldDescriptor(obj); + upb_fielddef* mut_field_def = check_field_notfrozen(def->fielddef); + CHECK_UPB( + upb_oneofdef_addfield(mut_def, mut_field_def, NULL, &status), + "Adding field to OneofDescriptor failed"); + add_def_obj(def->fielddef, obj); + return Qnil; +} + +/* + * call-seq: + * OneofDescriptor.each(&block) => nil + * + * Iterates through fields in this oneof, yielding to the block on each one. + */ +VALUE OneofDescriptor_each(VALUE _self, VALUE field) { + DEFINE_SELF(OneofDescriptor, self, _self); + upb_oneof_iter it; + for (upb_oneof_begin(&it, self->oneofdef); + !upb_oneof_done(&it); + upb_oneof_next(&it)) { + const upb_fielddef* f = upb_oneof_iter_field(&it); + VALUE obj = get_def_obj(f); + rb_yield(obj); + } + return Qnil; +} + // ----------------------------------------------------------------------------- // EnumDescriptor. // ----------------------------------------------------------------------------- @@ -952,6 +1134,7 @@ void MessageBuilderContext_register(VALUE module) { rb_define_method(klass, "required", MessageBuilderContext_required, -1); rb_define_method(klass, "repeated", MessageBuilderContext_repeated, -1); rb_define_method(klass, "map", MessageBuilderContext_map, -1); + rb_define_method(klass, "oneof", MessageBuilderContext_oneof, 1); cMessageBuilderContext = klass; rb_gc_register_address(&cMessageBuilderContext); } @@ -1165,6 +1348,110 @@ VALUE MessageBuilderContext_map(int argc, VALUE* argv, VALUE _self) { return Qnil; } +/* + * call-seq: + * MessageBuilderContext.oneof(name, &block) => nil + * + * Creates a new OneofDescriptor with the given name, creates a + * OneofBuilderContext attached to that OneofDescriptor, evaluates the given + * block in the context of that OneofBuilderContext with #instance_eval, and + * then adds the oneof to the message. + * + * This is the recommended, idiomatic way to build oneof definitions. + */ +VALUE MessageBuilderContext_oneof(VALUE _self, VALUE name) { + DEFINE_SELF(MessageBuilderContext, self, _self); + VALUE oneofdef = rb_class_new_instance(0, NULL, cOneofDescriptor); + VALUE args[2] = { oneofdef, self->builder }; + VALUE ctx = rb_class_new_instance(2, args, cOneofBuilderContext); + VALUE block = rb_block_proc(); + VALUE name_str = rb_str_new2(rb_id2name(SYM2ID(name))); + rb_funcall(oneofdef, rb_intern("name="), 1, name_str); + rb_funcall_with_block(ctx, rb_intern("instance_eval"), 0, NULL, block); + Descriptor_add_oneof(self->descriptor, oneofdef); + + return Qnil; +} + +// ----------------------------------------------------------------------------- +// OneofBuilderContext. +// ----------------------------------------------------------------------------- + +DEFINE_CLASS(OneofBuilderContext, + "Google::Protobuf::Internal::OneofBuilderContext"); + +void OneofBuilderContext_mark(void* _self) { + OneofBuilderContext* self = _self; + rb_gc_mark(self->descriptor); + rb_gc_mark(self->builder); +} + +void OneofBuilderContext_free(void* _self) { + OneofBuilderContext* self = _self; + xfree(self); +} + +VALUE OneofBuilderContext_alloc(VALUE klass) { + OneofBuilderContext* self = ALLOC(OneofBuilderContext); + VALUE ret = TypedData_Wrap_Struct( + klass, &_OneofBuilderContext_type, self); + self->descriptor = Qnil; + self->builder = Qnil; + return ret; +} + +void OneofBuilderContext_register(VALUE module) { + VALUE klass = rb_define_class_under( + module, "OneofBuilderContext", rb_cObject); + rb_define_alloc_func(klass, OneofBuilderContext_alloc); + rb_define_method(klass, "initialize", + OneofBuilderContext_initialize, 2); + rb_define_method(klass, "optional", OneofBuilderContext_optional, -1); + cOneofBuilderContext = klass; + rb_gc_register_address(&cOneofBuilderContext); +} + +/* + * call-seq: + * OneofBuilderContext.new(desc, builder) => context + * + * Create a new oneof builder context around the given oneof descriptor and + * builder context. This class is intended to serve as a DSL context to be used + * with #instance_eval. + */ +VALUE OneofBuilderContext_initialize(VALUE _self, + VALUE oneofdef, + VALUE builder) { + DEFINE_SELF(OneofBuilderContext, self, _self); + self->descriptor = oneofdef; + self->builder = builder; + return Qnil; +} + +/* + * call-seq: + * OneofBuilderContext.optional(name, type, number, type_class = nil) + * + * Defines a new optional field in this oneof with the given type, tag number, + * and type class (for message and enum fields). The type must be a Ruby symbol + * (as accepted by FieldDescriptor#type=) and the type_class must be a string, + * if present (as accepted by FieldDescriptor#submsg_name=). + */ +VALUE OneofBuilderContext_optional(int argc, VALUE* argv, VALUE _self) { + DEFINE_SELF(OneofBuilderContext, self, _self); + + if (argc < 3) { + rb_raise(rb_eArgError, "Expected at least 3 arguments."); + } + VALUE name = argv[0]; + VALUE type = argv[1]; + VALUE number = argv[2]; + VALUE type_class = (argc > 3) ? argv[3] : Qnil; + + return msgdef_add_field(self->descriptor, "optional", + name, type, number, type_class); +} + // ----------------------------------------------------------------------------- // EnumBuilderContext. // ----------------------------------------------------------------------------- @@ -1322,8 +1609,10 @@ VALUE Builder_add_enum(VALUE _self, VALUE name) { static void validate_msgdef(const upb_msgdef* msgdef) { // Verify that no required fields exist. proto3 does not support these. - upb_msg_iter it; - for (upb_msg_begin(&it, msgdef); !upb_msg_done(&it); upb_msg_next(&it)) { + upb_msg_field_iter it; + for (upb_msg_field_begin(&it, msgdef); + !upb_msg_field_done(&it); + upb_msg_field_next(&it)) { const upb_fielddef* field = upb_msg_iter_field(&it); if (upb_fielddef_label(field) == UPB_LABEL_REQUIRED) { rb_raise(rb_eTypeError, "Required fields are unsupported in proto3."); diff --git a/ruby/ext/google/protobuf_c/encode_decode.c b/ruby/ext/google/protobuf_c/encode_decode.c index e5e1514b32..26a038b6f6 100644 --- a/ruby/ext/google/protobuf_c/encode_decode.c +++ b/ruby/ext/google/protobuf_c/encode_decode.c @@ -59,6 +59,30 @@ static const void *newsubmsghandlerdata(upb_handlers* h, uint32_t ofs, return hd; } +typedef struct { + size_t ofs; // union data slot + size_t case_ofs; // oneof_case field + uint32_t tag; // tag number to place in data slot + const upb_msgdef *md; // msgdef, for oneof submessage handler +} oneof_handlerdata_t; + +static const void *newoneofhandlerdata(upb_handlers *h, + uint32_t ofs, + uint32_t case_ofs, + const upb_fielddef *f) { + oneof_handlerdata_t *hd = ALLOC(oneof_handlerdata_t); + hd->ofs = ofs; + hd->case_ofs = case_ofs; + hd->tag = upb_fielddef_number(f); + if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE) { + hd->md = upb_fielddef_msgsubdef(f); + } else { + hd->md = NULL; + } + upb_handlers_addcleanup(h, hd, free); + return hd; +} + // A handler that starts a repeated field. Gets the Repeated*Field instance for // this field (such an instance always exists even in an empty message). static void *startseq_handler(void* closure, const void* hd) { @@ -67,8 +91,7 @@ static void *startseq_handler(void* closure, const void* hd) { return (void*)DEREF(msg, *ofs, VALUE); } -// Handlers that append primitive values to a repeated field (a regular Ruby -// array for now). +// Handlers that append primitive values to a repeated field. #define DEFINE_APPEND_HANDLER(type, ctype) \ static bool append##type##_handler(void *closure, const void *hd, \ ctype val) { \ @@ -85,7 +108,7 @@ DEFINE_APPEND_HANDLER(int64, int64_t) DEFINE_APPEND_HANDLER(uint64, uint64_t) DEFINE_APPEND_HANDLER(double, double) -// Appends a string to a repeated field (a regular Ruby array for now). +// Appends a string to a repeated field. static void* appendstr_handler(void *closure, const void *hd, size_t size_hint) { @@ -96,7 +119,7 @@ static void* appendstr_handler(void *closure, return (void*)str; } -// Appends a 'bytes' string to a repeated field (a regular Ruby array for now). +// Appends a 'bytes' string to a repeated field. static void* appendbytes_handler(void *closure, const void *hd, size_t size_hint) { @@ -266,6 +289,75 @@ static map_handlerdata_t* new_map_handlerdata( return hd; } +// Handlers that set primitive values in oneofs. +#define DEFINE_ONEOF_HANDLER(type, ctype) \ + static bool oneof##type##_handler(void *closure, const void *hd, \ + ctype val) { \ + const oneof_handlerdata_t *oneofdata = hd; \ + DEREF(closure, oneofdata->case_ofs, uint32_t) = oneofdata->tag; \ + DEREF(closure, oneofdata->ofs, ctype) = val; \ + return true; \ + } + +DEFINE_ONEOF_HANDLER(bool, bool) +DEFINE_ONEOF_HANDLER(int32, int32_t) +DEFINE_ONEOF_HANDLER(uint32, uint32_t) +DEFINE_ONEOF_HANDLER(float, float) +DEFINE_ONEOF_HANDLER(int64, int64_t) +DEFINE_ONEOF_HANDLER(uint64, uint64_t) +DEFINE_ONEOF_HANDLER(double, double) + +#undef DEFINE_ONEOF_HANDLER + +// Handlers for strings in a oneof. +static void *oneofstr_handler(void *closure, + const void *hd, + size_t size_hint) { + MessageHeader* msg = closure; + const oneof_handlerdata_t *oneofdata = hd; + VALUE str = rb_str_new2(""); + rb_enc_associate(str, kRubyStringUtf8Encoding); + DEREF(msg, oneofdata->case_ofs, uint32_t) = oneofdata->tag; + DEREF(msg, oneofdata->ofs, VALUE) = str; + return (void*)str; +} + +static void *oneofbytes_handler(void *closure, + const void *hd, + size_t size_hint) { + MessageHeader* msg = closure; + const oneof_handlerdata_t *oneofdata = hd; + VALUE str = rb_str_new2(""); + rb_enc_associate(str, kRubyString8bitEncoding); + DEREF(msg, oneofdata->case_ofs, uint32_t) = oneofdata->tag; + DEREF(msg, oneofdata->ofs, VALUE) = str; + return (void*)str; +} + +// Handler for a submessage field in a oneof. +static void *oneofsubmsg_handler(void *closure, + const void *hd) { + MessageHeader* msg = closure; + const oneof_handlerdata_t *oneofdata = hd; + uint32_t oldcase = DEREF(msg, oneofdata->case_ofs, uint32_t); + DEREF(msg, oneofdata->case_ofs, uint32_t) = oneofdata->tag; + + VALUE subdesc = + get_def_obj((void*)oneofdata->md); + VALUE subklass = Descriptor_msgclass(subdesc); + + if (oldcase != oneofdata->tag || + DEREF(msg, oneofdata->ofs, VALUE) == Qnil) { + DEREF(msg, oneofdata->ofs, VALUE) = + rb_class_new_instance(0, NULL, subklass); + } + + VALUE submsg_rb = DEREF(msg, oneofdata->ofs, VALUE); + MessageHeader* submsg; + TypedData_Get_Struct(submsg_rb, MessageHeader, &Message_type, submsg); + return submsg; +} + // Set up handlers for a repeated field. static void add_handlers_for_repeated_field(upb_handlers *h, const upb_fielddef *f, @@ -383,6 +475,53 @@ static void add_handlers_for_mapentry(const upb_msgdef* msgdef, offsetof(map_parse_frame_t, value_storage)); } +// Set up handlers for a oneof field. +static void add_handlers_for_oneof_field(upb_handlers *h, + const upb_fielddef *f, + size_t offset, + size_t oneof_case_offset) { + + upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER; + upb_handlerattr_sethandlerdata( + &attr, newoneofhandlerdata(h, offset, oneof_case_offset, f)); + + switch (upb_fielddef_type(f)) { + +#define SET_HANDLER(utype, ltype) \ + case utype: \ + upb_handlers_set##ltype(h, f, oneof##ltype##_handler, &attr); \ + break; + + SET_HANDLER(UPB_TYPE_BOOL, bool); + SET_HANDLER(UPB_TYPE_INT32, int32); + SET_HANDLER(UPB_TYPE_UINT32, uint32); + SET_HANDLER(UPB_TYPE_ENUM, int32); + SET_HANDLER(UPB_TYPE_FLOAT, float); + SET_HANDLER(UPB_TYPE_INT64, int64); + SET_HANDLER(UPB_TYPE_UINT64, uint64); + SET_HANDLER(UPB_TYPE_DOUBLE, double); + +#undef SET_HANDLER + + case UPB_TYPE_STRING: + case UPB_TYPE_BYTES: { + bool is_bytes = upb_fielddef_type(f) == UPB_TYPE_BYTES; + upb_handlers_setstartstr(h, f, is_bytes ? + oneofbytes_handler : oneofstr_handler, + &attr); + upb_handlers_setstring(h, f, stringdata_handler, NULL); + break; + } + case UPB_TYPE_MESSAGE: { + upb_handlers_setstartsubmsg(h, f, oneofsubmsg_handler, &attr); + break; + } + } + + upb_handlerattr_uninit(&attr); +} + + static void add_handlers_for_message(const void *closure, upb_handlers *h) { const upb_msgdef* msgdef = upb_handlers_msgdef(h); Descriptor* desc = ruby_to_Descriptor(get_def_obj((void*)msgdef)); @@ -402,16 +541,20 @@ static void add_handlers_for_message(const void *closure, upb_handlers *h) { desc->layout = create_layout(desc->msgdef); } - upb_msg_iter i; - - for (upb_msg_begin(&i, desc->msgdef); - !upb_msg_done(&i); - upb_msg_next(&i)) { + upb_msg_field_iter i; + for (upb_msg_field_begin(&i, desc->msgdef); + !upb_msg_field_done(&i); + upb_msg_field_next(&i)) { const upb_fielddef *f = upb_msg_iter_field(&i); - size_t offset = desc->layout->offsets[upb_fielddef_index(f)] + + size_t offset = desc->layout->fields[upb_fielddef_index(f)].offset + + sizeof(MessageHeader); + size_t oneof_case_offset = + desc->layout->fields[upb_fielddef_index(f)].case_offset + sizeof(MessageHeader); - if (is_map_field(f)) { + if (upb_fielddef_containingoneof(f)) { + add_handlers_for_oneof_field(h, f, offset, oneof_case_offset); + } else if (is_map_field(f)) { add_handlers_for_mapfield(h, f, offset, desc); } else if (upb_fielddef_isseq(f)) { add_handlers_for_repeated_field(h, f, offset); @@ -804,13 +947,28 @@ static void putmsg(VALUE msg_rb, const Descriptor* desc, MessageHeader* msg; TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg); - upb_msg_iter i; - for (upb_msg_begin(&i, desc->msgdef); - !upb_msg_done(&i); - upb_msg_next(&i)) { + upb_msg_field_iter i; + for (upb_msg_field_begin(&i, desc->msgdef); + !upb_msg_field_done(&i); + upb_msg_field_next(&i)) { upb_fielddef *f = upb_msg_iter_field(&i); uint32_t offset = - desc->layout->offsets[upb_fielddef_index(f)] + sizeof(MessageHeader); + desc->layout->fields[upb_fielddef_index(f)].offset + + sizeof(MessageHeader); + uint32_t oneof_case_offset = + desc->layout->fields[upb_fielddef_index(f)].case_offset + + sizeof(MessageHeader); + + if (upb_fielddef_containingoneof(f)) { + // For a oneof, check that this field is actually present -- skip all the + // below if not. + if (DEREF(msg, oneof_case_offset, uint32_t) != + upb_fielddef_number(f)) { + continue; + } + // Otherwise, fall through to the appropriate singular-field handler + // below. + } if (is_map_field(f)) { VALUE map = DEREF(msg, offset, VALUE); diff --git a/ruby/ext/google/protobuf_c/protobuf.c b/ruby/ext/google/protobuf_c/protobuf.c index 3055270514..d2d3503386 100644 --- a/ruby/ext/google/protobuf_c/protobuf.c +++ b/ruby/ext/google/protobuf_c/protobuf.c @@ -77,8 +77,10 @@ void Init_protobuf_c() { DescriptorPool_register(protobuf); Descriptor_register(protobuf); FieldDescriptor_register(protobuf); + OneofDescriptor_register(protobuf); EnumDescriptor_register(protobuf); MessageBuilderContext_register(internal); + OneofBuilderContext_register(internal); EnumBuilderContext_register(internal); Builder_register(internal); RepeatedField_register(protobuf); diff --git a/ruby/ext/google/protobuf_c/protobuf.h b/ruby/ext/google/protobuf_c/protobuf.h index 88ae62e4fa..84f318b3eb 100644 --- a/ruby/ext/google/protobuf_c/protobuf.h +++ b/ruby/ext/google/protobuf_c/protobuf.h @@ -43,6 +43,7 @@ struct Descriptor; struct FieldDescriptor; struct EnumDescriptor; struct MessageLayout; +struct MessageField; struct MessageHeader; struct MessageBuilderContext; struct EnumBuilderContext; @@ -51,10 +52,13 @@ struct Builder; typedef struct DescriptorPool DescriptorPool; typedef struct Descriptor Descriptor; typedef struct FieldDescriptor FieldDescriptor; +typedef struct OneofDescriptor OneofDescriptor; typedef struct EnumDescriptor EnumDescriptor; typedef struct MessageLayout MessageLayout; +typedef struct MessageField MessageField; typedef struct MessageHeader MessageHeader; typedef struct MessageBuilderContext MessageBuilderContext; +typedef struct OneofBuilderContext OneofBuilderContext; typedef struct EnumBuilderContext EnumBuilderContext; typedef struct Builder Builder; @@ -120,6 +124,10 @@ struct FieldDescriptor { const upb_fielddef* fielddef; }; +struct OneofDescriptor { + const upb_oneofdef* oneofdef; +}; + struct EnumDescriptor { const upb_enumdef* enumdef; VALUE module; // begins as nil @@ -130,6 +138,11 @@ struct MessageBuilderContext { VALUE builder; }; +struct OneofBuilderContext { + VALUE descriptor; + VALUE builder; +}; + struct EnumBuilderContext { VALUE enumdesc; }; @@ -144,6 +157,7 @@ extern VALUE cDescriptor; extern VALUE cFieldDescriptor; extern VALUE cEnumDescriptor; extern VALUE cMessageBuilderContext; +extern VALUE cOneofBuilderContext; extern VALUE cEnumBuilderContext; extern VALUE cBuilder; @@ -175,6 +189,9 @@ VALUE Descriptor_name_set(VALUE _self, VALUE str); VALUE Descriptor_each(VALUE _self); VALUE Descriptor_lookup(VALUE _self, VALUE name); VALUE Descriptor_add_field(VALUE _self, VALUE obj); +VALUE Descriptor_add_oneof(VALUE _self, VALUE obj); +VALUE Descriptor_oneofs(VALUE _self); +VALUE Descriptor_lookup_oneof(VALUE _self, VALUE name); VALUE Descriptor_msgclass(VALUE _self); extern const rb_data_type_t _Descriptor_type; @@ -199,6 +216,16 @@ VALUE FieldDescriptor_set(VALUE _self, VALUE msg_rb, VALUE value); upb_fieldtype_t ruby_to_fieldtype(VALUE type); VALUE fieldtype_to_ruby(upb_fieldtype_t type); +void OneofDescriptor_mark(void* _self); +void OneofDescriptor_free(void* _self); +VALUE OneofDescriptor_alloc(VALUE klass); +void OneofDescriptor_register(VALUE module); +OneofDescriptor* ruby_to_OneofDescriptor(VALUE value); +VALUE OneofDescriptor_name(VALUE _self); +VALUE OneofDescriptor_name_set(VALUE _self, VALUE value); +VALUE OneofDescriptor_add_field(VALUE _self, VALUE field); +VALUE OneofDescriptor_each(VALUE _self, VALUE field); + void EnumDescriptor_mark(void* _self); void EnumDescriptor_free(void* _self); VALUE EnumDescriptor_alloc(VALUE klass); @@ -225,6 +252,17 @@ VALUE MessageBuilderContext_optional(int argc, VALUE* argv, VALUE _self); VALUE MessageBuilderContext_required(int argc, VALUE* argv, VALUE _self); VALUE MessageBuilderContext_repeated(int argc, VALUE* argv, VALUE _self); VALUE MessageBuilderContext_map(int argc, VALUE* argv, VALUE _self); +VALUE MessageBuilderContext_oneof(VALUE _self, VALUE name); + +void OneofBuilderContext_mark(void* _self); +void OneofBuilderContext_free(void* _self); +VALUE OneofBuilderContext_alloc(VALUE klass); +void OneofBuilderContext_register(VALUE module); +OneofBuilderContext* ruby_to_OneofBuilderContext(VALUE value); +VALUE OneofBuilderContext_initialize(VALUE _self, + VALUE descriptor, + VALUE builder); +VALUE OneofBuilderContext_optional(int argc, VALUE* argv, VALUE _self); void EnumBuilderContext_mark(void* _self); void EnumBuilderContext_free(void* _self); @@ -247,7 +285,7 @@ VALUE Builder_finalize_to_pool(VALUE _self, VALUE pool_rb); // Native slot storage abstraction. // ----------------------------------------------------------------------------- -#define NATIVE_SLOT_MAX_SIZE sizeof(void*) +#define NATIVE_SLOT_MAX_SIZE sizeof(uint64_t) size_t native_slot_size(upb_fieldtype_t type); void native_slot_set(upb_fieldtype_t type, @@ -384,9 +422,16 @@ VALUE Map_iter_value(Map_iter* iter); // Message layout / storage. // ----------------------------------------------------------------------------- +#define MESSAGE_FIELD_NO_CASE ((size_t)-1) + +struct MessageField { + size_t offset; + size_t case_offset; // for oneofs, a uint32. Else, MESSAGE_FIELD_NO_CASE. +}; + struct MessageLayout { const upb_msgdef* msgdef; - size_t* offsets; + MessageField* fields; size_t size; }; diff --git a/ruby/ext/google/protobuf_c/storage.c b/ruby/ext/google/protobuf_c/storage.c index 14f49d44e9..d526679a44 100644 --- a/ruby/ext/google/protobuf_c/storage.c +++ b/ruby/ext/google/protobuf_c/storage.c @@ -369,21 +369,79 @@ const upb_fielddef* map_entry_value(const upb_msgdef* msgdef) { MessageLayout* create_layout(const upb_msgdef* msgdef) { MessageLayout* layout = ALLOC(MessageLayout); int nfields = upb_msgdef_numfields(msgdef); - layout->offsets = ALLOC_N(size_t, nfields); + layout->fields = ALLOC_N(MessageField, nfields); - upb_msg_iter it; + upb_msg_field_iter it; size_t off = 0; - for (upb_msg_begin(&it, msgdef); !upb_msg_done(&it); upb_msg_next(&it)) { + for (upb_msg_field_begin(&it, msgdef); + !upb_msg_field_done(&it); + upb_msg_field_next(&it)) { const upb_fielddef* field = upb_msg_iter_field(&it); + + if (upb_fielddef_containingoneof(field)) { + // Oneofs are handled separately below. + continue; + } + + // Allocate |field_size| bytes for this field in the layout. size_t field_size = 0; if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) { field_size = sizeof(VALUE); } else { field_size = native_slot_size(upb_fielddef_type(field)); } - // align current offset + // Align current offset up to |size| granularity. + off = (off + field_size - 1) & ~(field_size - 1); + layout->fields[upb_fielddef_index(field)].offset = off; + layout->fields[upb_fielddef_index(field)].case_offset = MESSAGE_FIELD_NO_CASE; + off += field_size; + } + + // Handle oneofs now -- we iterate over oneofs specifically and allocate only + // one slot per oneof. + // + // We assign all value slots first, then pack the 'case' fields at the end, + // since in the common case (modern 64-bit platform) these are 8 bytes and 4 + // bytes respectively and we want to avoid alignment overhead. + upb_msg_oneof_iter oit; + for (upb_msg_oneof_begin(&oit, msgdef); + !upb_msg_oneof_done(&oit); + upb_msg_oneof_next(&oit)) { + const upb_oneofdef* oneof = upb_msg_iter_oneof(&oit); + + // Always allocate NATIVE_SLOT_MAX_SIZE bytes, but share the slot between + // all fields. + size_t field_size = NATIVE_SLOT_MAX_SIZE; + // Align the offset. off = (off + field_size - 1) & ~(field_size - 1); - layout->offsets[upb_fielddef_index(field)] = off; + // Assign all fields in the oneof this same offset. + upb_oneof_iter fit; + for (upb_oneof_begin(&fit, oneof); + !upb_oneof_done(&fit); + upb_oneof_next(&fit)) { + const upb_fielddef* field = upb_oneof_iter_field(&fit); + layout->fields[upb_fielddef_index(field)].offset = off; + } + off += field_size; + } + + // Now the case fields. + for (upb_msg_oneof_begin(&oit, msgdef); + !upb_msg_oneof_done(&oit); + upb_msg_oneof_next(&oit)) { + const upb_oneofdef* oneof = upb_msg_iter_oneof(&oit); + + size_t field_size = sizeof(uint32_t); + // Align the offset. + off = (off + field_size - 1) & ~(field_size - 1); + // Assign all fields in the oneof this same offset. + upb_oneof_iter fit; + for (upb_oneof_begin(&fit, oneof); + !upb_oneof_done(&fit); + upb_oneof_next(&fit)) { + const upb_fielddef* field = upb_oneof_iter_field(&fit); + layout->fields[upb_fielddef_index(field)].case_offset = off; + } off += field_size; } @@ -396,7 +454,7 @@ MessageLayout* create_layout(const upb_msgdef* msgdef) { } void free_layout(MessageLayout* layout) { - xfree(layout->offsets); + xfree(layout->fields); upb_msgdef_unref(layout->msgdef, &layout->msgdef); xfree(layout); } @@ -419,8 +477,18 @@ VALUE layout_get(MessageLayout* layout, const void* storage, const upb_fielddef* field) { void* memory = ((uint8_t *)storage) + - layout->offsets[upb_fielddef_index(field)]; - if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) { + layout->fields[upb_fielddef_index(field)].offset; + uint32_t* oneof_case = (uint32_t *)(((uint8_t *)storage) + + layout->fields[upb_fielddef_index(field)].case_offset); + + if (upb_fielddef_containingoneof(field)) { + if (*oneof_case != upb_fielddef_number(field)) { + return Qnil; + } + return native_slot_get(upb_fielddef_type(field), + field_type_class(field), + memory); + } else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) { return *((VALUE *)memory); } else { return native_slot_get(upb_fielddef_type(field), @@ -485,8 +553,27 @@ void layout_set(MessageLayout* layout, const upb_fielddef* field, VALUE val) { void* memory = ((uint8_t *)storage) + - layout->offsets[upb_fielddef_index(field)]; - if (is_map_field(field)) { + layout->fields[upb_fielddef_index(field)].offset; + uint32_t* oneof_case = (uint32_t *)(((uint8_t *)storage) + + layout->fields[upb_fielddef_index(field)].case_offset); + + if (upb_fielddef_containingoneof(field)) { + if (val == Qnil) { + // Assigning nil to a oneof field clears the oneof completely. + *oneof_case = 0; + memset(memory, 0, NATIVE_SLOT_MAX_SIZE); + } else { + // Set the oneof case *first* in case a GC is triggered during + // native_slot_set(): layout_mark() depends on oneof_case to know whether + // the slot may be a Ruby VALUE and so we need that lifetime to start + // before we could possibly stick a VALUE in it. + *oneof_case = upb_fielddef_number(field); + // We just overwrite the value directly if we changed oneof cases: + // native_slot_set() does not depend on the old value in memory. + native_slot_set(upb_fielddef_type(field), field_type_class(field), + memory, val); + } + } else if (is_map_field(field)) { check_map_field_type(val, field); DEREF(memory, VALUE) = val; } else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) { @@ -500,15 +587,20 @@ void layout_set(MessageLayout* layout, void layout_init(MessageLayout* layout, void* storage) { - upb_msg_iter it; - for (upb_msg_begin(&it, layout->msgdef); - !upb_msg_done(&it); - upb_msg_next(&it)) { + upb_msg_field_iter it; + for (upb_msg_field_begin(&it, layout->msgdef); + !upb_msg_field_done(&it); + upb_msg_field_next(&it)) { const upb_fielddef* field = upb_msg_iter_field(&it); void* memory = ((uint8_t *)storage) + - layout->offsets[upb_fielddef_index(field)]; - - if (is_map_field(field)) { + layout->fields[upb_fielddef_index(field)].offset; + uint32_t* oneof_case = (uint32_t *)(((uint8_t *)storage) + + layout->fields[upb_fielddef_index(field)].case_offset); + + if (upb_fielddef_containingoneof(field)) { + memset(memory, 0, NATIVE_SLOT_MAX_SIZE); + *oneof_case = 0; + } else if (is_map_field(field)) { VALUE map = Qnil; const upb_fielddef* key_field = map_field_key(field); @@ -555,15 +647,21 @@ void layout_init(MessageLayout* layout, } void layout_mark(MessageLayout* layout, void* storage) { - upb_msg_iter it; - for (upb_msg_begin(&it, layout->msgdef); - !upb_msg_done(&it); - upb_msg_next(&it)) { + upb_msg_field_iter it; + for (upb_msg_field_begin(&it, layout->msgdef); + !upb_msg_field_done(&it); + upb_msg_field_next(&it)) { const upb_fielddef* field = upb_msg_iter_field(&it); void* memory = ((uint8_t *)storage) + - layout->offsets[upb_fielddef_index(field)]; + layout->fields[upb_fielddef_index(field)].offset; + uint32_t* oneof_case = (uint32_t *)(((uint8_t *)storage) + + layout->fields[upb_fielddef_index(field)].case_offset); - if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) { + if (upb_fielddef_containingoneof(field)) { + if (*oneof_case == upb_fielddef_number(field)) { + native_slot_mark(upb_fielddef_type(field), memory); + } + } else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) { rb_gc_mark(DEREF(memory, VALUE)); } else { native_slot_mark(upb_fielddef_type(field), memory); @@ -572,17 +670,26 @@ void layout_mark(MessageLayout* layout, void* storage) { } void layout_dup(MessageLayout* layout, void* to, void* from) { - upb_msg_iter it; - for (upb_msg_begin(&it, layout->msgdef); - !upb_msg_done(&it); - upb_msg_next(&it)) { + upb_msg_field_iter it; + for (upb_msg_field_begin(&it, layout->msgdef); + !upb_msg_field_done(&it); + upb_msg_field_next(&it)) { const upb_fielddef* field = upb_msg_iter_field(&it); void* to_memory = ((uint8_t *)to) + - layout->offsets[upb_fielddef_index(field)]; + layout->fields[upb_fielddef_index(field)].offset; + uint32_t* to_oneof_case = (uint32_t *)(((uint8_t *)to) + + layout->fields[upb_fielddef_index(field)].case_offset); void* from_memory = ((uint8_t *)from) + - layout->offsets[upb_fielddef_index(field)]; - - if (is_map_field(field)) { + layout->fields[upb_fielddef_index(field)].offset; + uint32_t* from_oneof_case = (uint32_t *)(((uint8_t *)from) + + layout->fields[upb_fielddef_index(field)].case_offset); + + if (upb_fielddef_containingoneof(field)) { + if (*from_oneof_case == upb_fielddef_number(field)) { + *to_oneof_case = *from_oneof_case; + native_slot_dup(upb_fielddef_type(field), to_memory, from_memory); + } + } else if (is_map_field(field)) { DEREF(to_memory, VALUE) = Map_dup(DEREF(from_memory, VALUE)); } else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) { DEREF(to_memory, VALUE) = RepeatedField_dup(DEREF(from_memory, VALUE)); @@ -593,17 +700,26 @@ void layout_dup(MessageLayout* layout, void* to, void* from) { } void layout_deep_copy(MessageLayout* layout, void* to, void* from) { - upb_msg_iter it; - for (upb_msg_begin(&it, layout->msgdef); - !upb_msg_done(&it); - upb_msg_next(&it)) { + upb_msg_field_iter it; + for (upb_msg_field_begin(&it, layout->msgdef); + !upb_msg_field_done(&it); + upb_msg_field_next(&it)) { const upb_fielddef* field = upb_msg_iter_field(&it); void* to_memory = ((uint8_t *)to) + - layout->offsets[upb_fielddef_index(field)]; + layout->fields[upb_fielddef_index(field)].offset; + uint32_t* to_oneof_case = (uint32_t *)(((uint8_t *)to) + + layout->fields[upb_fielddef_index(field)].case_offset); void* from_memory = ((uint8_t *)from) + - layout->offsets[upb_fielddef_index(field)]; - - if (is_map_field(field)) { + layout->fields[upb_fielddef_index(field)].offset; + uint32_t* from_oneof_case = (uint32_t *)(((uint8_t *)from) + + layout->fields[upb_fielddef_index(field)].case_offset); + + if (upb_fielddef_containingoneof(field)) { + if (*from_oneof_case == upb_fielddef_number(field)) { + *to_oneof_case = *from_oneof_case; + native_slot_deep_copy(upb_fielddef_type(field), to_memory, from_memory); + } + } else if (is_map_field(field)) { DEREF(to_memory, VALUE) = Map_deep_copy(DEREF(from_memory, VALUE)); } else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) { @@ -616,22 +732,38 @@ void layout_deep_copy(MessageLayout* layout, void* to, void* from) { } VALUE layout_eq(MessageLayout* layout, void* msg1, void* msg2) { - upb_msg_iter it; - for (upb_msg_begin(&it, layout->msgdef); - !upb_msg_done(&it); - upb_msg_next(&it)) { + upb_msg_field_iter it; + for (upb_msg_field_begin(&it, layout->msgdef); + !upb_msg_field_done(&it); + upb_msg_field_next(&it)) { const upb_fielddef* field = upb_msg_iter_field(&it); void* msg1_memory = ((uint8_t *)msg1) + - layout->offsets[upb_fielddef_index(field)]; + layout->fields[upb_fielddef_index(field)].offset; + uint32_t* msg1_oneof_case = (uint32_t *)(((uint8_t *)msg1) + + layout->fields[upb_fielddef_index(field)].case_offset); void* msg2_memory = ((uint8_t *)msg2) + - layout->offsets[upb_fielddef_index(field)]; - - if (is_map_field(field)) { - return Map_eq(DEREF(msg1_memory, VALUE), - DEREF(msg2_memory, VALUE)); + layout->fields[upb_fielddef_index(field)].offset; + uint32_t* msg2_oneof_case = (uint32_t *)(((uint8_t *)msg2) + + layout->fields[upb_fielddef_index(field)].case_offset); + + if (upb_fielddef_containingoneof(field)) { + if (*msg1_oneof_case != *msg2_oneof_case || + (*msg1_oneof_case == upb_fielddef_number(field) && + !native_slot_eq(upb_fielddef_type(field), + msg1_memory, + msg2_memory))) { + return Qfalse; + } + } else if (is_map_field(field)) { + if (!Map_eq(DEREF(msg1_memory, VALUE), + DEREF(msg2_memory, VALUE))) { + return Qfalse; + } } else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) { - return RepeatedField_eq(DEREF(msg1_memory, VALUE), - DEREF(msg2_memory, VALUE)); + if (!RepeatedField_eq(DEREF(msg1_memory, VALUE), + DEREF(msg2_memory, VALUE))) { + return Qfalse; + } } else { if (!native_slot_eq(upb_fielddef_type(field), msg1_memory, msg2_memory)) { @@ -643,12 +775,12 @@ VALUE layout_eq(MessageLayout* layout, void* msg1, void* msg2) { } VALUE layout_hash(MessageLayout* layout, void* storage) { - upb_msg_iter it; + upb_msg_field_iter it; st_index_t h = rb_hash_start(0); VALUE hash_sym = rb_intern("hash"); - for (upb_msg_begin(&it, layout->msgdef); - !upb_msg_done(&it); - upb_msg_next(&it)) { + for (upb_msg_field_begin(&it, layout->msgdef); + !upb_msg_field_done(&it); + upb_msg_field_next(&it)) { const upb_fielddef* field = upb_msg_iter_field(&it); VALUE field_val = layout_get(layout, storage, field); h = rb_hash_uint(h, NUM2LONG(rb_funcall(field_val, hash_sym, 0))); @@ -661,11 +793,11 @@ VALUE layout_hash(MessageLayout* layout, void* storage) { VALUE layout_inspect(MessageLayout* layout, void* storage) { VALUE str = rb_str_new2(""); - upb_msg_iter it; + upb_msg_field_iter it; bool first = true; - for (upb_msg_begin(&it, layout->msgdef); - !upb_msg_done(&it); - upb_msg_next(&it)) { + for (upb_msg_field_begin(&it, layout->msgdef); + !upb_msg_field_done(&it); + upb_msg_field_next(&it)) { const upb_fielddef* field = upb_msg_iter_field(&it); VALUE field_val = layout_get(layout, storage, field); diff --git a/ruby/ext/google/protobuf_c/upb.c b/ruby/ext/google/protobuf_c/upb.c index 571c809f58..c223c568b2 100644 --- a/ruby/ext/google/protobuf_c/upb.c +++ b/ruby/ext/google/protobuf_c/upb.c @@ -247,10 +247,12 @@ static bool assign_msg_indices(upb_msgdef *m, upb_status *s) { upb_fielddef **fields = malloc(n * sizeof(*fields)); if (!fields) return false; - upb_msg_iter j; + upb_msg_field_iter j; int i; m->submsg_field_count = 0; - for(i = 0, upb_msg_begin(&j, m); !upb_msg_done(&j); upb_msg_next(&j), i++) { + for(i = 0, upb_msg_field_begin(&j, m); + !upb_msg_field_done(&j); + upb_msg_field_next(&j), i++) { upb_fielddef *f = upb_msg_iter_field(&j); assert(f->msg.def == m); if (!upb_validate_field(f, s)) { @@ -286,7 +288,9 @@ static bool assign_msg_indices(upb_msgdef *m, upb_status *s) { upb_selector_t sel; upb_inttable_insert(&t, UPB_STARTMSG_SELECTOR, v); upb_inttable_insert(&t, UPB_ENDMSG_SELECTOR, v); - for(upb_msg_begin(&j, m); !upb_msg_done(&j); upb_msg_next(&j)) { + for(upb_msg_field_begin(&j, m); + !upb_msg_field_done(&j); + upb_msg_field_next(&j)) { upb_fielddef *f = upb_msg_iter_field(&j); // These calls will assert-fail in upb_table if the value already exists. TRY(UPB_HANDLER_INT32); @@ -544,6 +548,9 @@ static void visitfield(const upb_refcounted *r, upb_refcounted_visit *visit, if (upb_fielddef_containingtype(f)) { visit(r, UPB_UPCAST2(upb_fielddef_containingtype(f)), closure); } + if (upb_fielddef_containingoneof(f)) { + visit(r, UPB_UPCAST2(upb_fielddef_containingoneof(f)), closure); + } if (upb_fielddef_subdef(f)) { visit(r, UPB_UPCAST(upb_fielddef_subdef(f)), closure); } @@ -619,6 +626,7 @@ upb_fielddef *upb_fielddef_new(const void *owner) { } f->msg.def = NULL; f->sub.def = NULL; + f->oneof = NULL; f->subdef_is_symbolic = false; f->msg_is_symbolic = false; f->label_ = UPB_LABEL_OPTIONAL; @@ -748,6 +756,10 @@ const upb_msgdef *upb_fielddef_containingtype(const upb_fielddef *f) { return f->msg_is_symbolic ? NULL : f->msg.def; } +const upb_oneofdef *upb_fielddef_containingoneof(const upb_fielddef *f) { + return f->oneof; +} + upb_msgdef *upb_fielddef_containingtype_mutable(upb_fielddef *f) { return (upb_msgdef*)upb_fielddef_containingtype(f); } @@ -776,6 +788,10 @@ bool upb_fielddef_setcontainingtypename(upb_fielddef *f, const char *name, } bool upb_fielddef_setname(upb_fielddef *f, const char *name, upb_status *s) { + if (upb_fielddef_containingtype(f) || upb_fielddef_containingoneof(f)) { + upb_status_seterrmsg(s, "Already added to message or oneof"); + return false; + } return upb_def_setfullname(UPB_UPCAST(f), name, s); } @@ -1247,15 +1263,25 @@ bool upb_fielddef_checkdescriptortype(int32_t type) { static void visitmsg(const upb_refcounted *r, upb_refcounted_visit *visit, void *closure) { const upb_msgdef *m = (const upb_msgdef*)r; - upb_msg_iter i; - for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { + upb_msg_field_iter i; + for(upb_msg_field_begin(&i, m); + !upb_msg_field_done(&i); + upb_msg_field_next(&i)) { upb_fielddef *f = upb_msg_iter_field(&i); visit(r, UPB_UPCAST2(f), closure); } + upb_msg_oneof_iter o; + for(upb_msg_oneof_begin(&o, m); + !upb_msg_oneof_done(&o); + upb_msg_oneof_next(&o)) { + upb_oneofdef *f = upb_msg_iter_oneof(&o); + visit(r, UPB_UPCAST2(f), closure); + } } static void freemsg(upb_refcounted *r) { upb_msgdef *m = (upb_msgdef*)r; + upb_strtable_uninit(&m->ntoo); upb_strtable_uninit(&m->ntof); upb_inttable_uninit(&m->itof); upb_def_uninit(UPB_UPCAST(m)); @@ -1267,14 +1293,17 @@ upb_msgdef *upb_msgdef_new(const void *owner) { upb_msgdef *m = malloc(sizeof(*m)); if (!m) return NULL; if (!upb_def_init(UPB_UPCAST(m), UPB_DEF_MSG, &vtbl, owner)) goto err2; - if (!upb_inttable_init(&m->itof, UPB_CTYPE_PTR)) goto err2; - if (!upb_strtable_init(&m->ntof, UPB_CTYPE_PTR)) goto err1; + if (!upb_inttable_init(&m->itof, UPB_CTYPE_PTR)) goto err3; + if (!upb_strtable_init(&m->ntof, UPB_CTYPE_PTR)) goto err2; + if (!upb_strtable_init(&m->ntoo, UPB_CTYPE_PTR)) goto err1; m->map_entry = false; return m; err1: - upb_inttable_uninit(&m->itof); + upb_strtable_uninit(&m->ntof); err2: + upb_inttable_uninit(&m->itof); +err3: free(m); return NULL; } @@ -1286,14 +1315,28 @@ upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, const void *owner) { upb_def_fullname(UPB_UPCAST(m)), NULL); newm->map_entry = m->map_entry; UPB_ASSERT_VAR(ok, ok); - upb_msg_iter i; - for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { + upb_msg_field_iter i; + for(upb_msg_field_begin(&i, m); + !upb_msg_field_done(&i); + upb_msg_field_next(&i)) { upb_fielddef *f = upb_fielddef_dup(upb_msg_iter_field(&i), &f); + // Fields in oneofs are dup'd below. + if (upb_fielddef_containingoneof(f)) continue; if (!f || !upb_msgdef_addfield(newm, f, &f, NULL)) { upb_msgdef_unref(newm, owner); return NULL; } } + upb_msg_oneof_iter o; + for(upb_msg_oneof_begin(&o, m); + !upb_msg_oneof_done(&o); + upb_msg_oneof_next(&o)) { + upb_oneofdef *f = upb_oneofdef_dup(upb_msg_iter_oneof(&o), &f); + if (!f || !upb_msgdef_addoneof(newm, f, &f, NULL)) { + upb_msgdef_unref(newm, owner); + return NULL; + } + } return newm; } @@ -1332,6 +1375,35 @@ bool upb_msgdef_setfullname(upb_msgdef *m, const char *fullname, return upb_def_setfullname(UPB_UPCAST(m), fullname, s); } +// Helper: check that the field |f| is safe to add to msgdef |m|. Set an error +// on status |s| and return false if not. +static bool check_field_add(const upb_msgdef *m, const upb_fielddef *f, + upb_status *s) { + if (upb_fielddef_containingtype(f) != NULL) { + upb_status_seterrmsg(s, "fielddef already belongs to a message"); + return false; + } else if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0) { + upb_status_seterrmsg(s, "field name or number were not set"); + return false; + } else if (upb_msgdef_ntofz(m, upb_fielddef_name(f)) || + upb_msgdef_itof(m, upb_fielddef_number(f))) { + upb_status_seterrmsg(s, "duplicate field name or number for field"); + return false; + } + return true; +} + +static void add_field(upb_msgdef *m, upb_fielddef *f, const void *ref_donor) { + release_containingtype(f); + f->msg.def = m; + f->msg_is_symbolic = false; + upb_inttable_insert(&m->itof, upb_fielddef_number(f), upb_value_ptr(f)); + upb_strtable_insert(&m->ntof, upb_fielddef_name(f), upb_value_ptr(f)); + upb_ref2(f, m); + upb_ref2(m, f); + if (ref_donor) upb_fielddef_unref(f, ref_donor); +} + bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f, const void *ref_donor, upb_status *s) { // TODO: extensions need to have a separate namespace, because proto2 allows a @@ -1345,28 +1417,65 @@ bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f, const void *ref_donor, // We also need to validate that the field number is in an extension range iff // it is an extension. + // This method is idempotent. Check if |f| is already part of this msgdef and + // return immediately if so. + if (upb_fielddef_containingtype(f) == m) { + return true; + } + // Check constraints for all fields before performing any action. - if (upb_fielddef_containingtype(f) != NULL) { - upb_status_seterrmsg(s, "fielddef already belongs to a message"); - return false; - } else if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0) { - upb_status_seterrmsg(s, "field name or number were not set"); + if (!check_field_add(m, f, s)) { return false; - } else if(upb_msgdef_itof(m, upb_fielddef_number(f)) || - upb_msgdef_ntofz(m, upb_fielddef_name(f))) { - upb_status_seterrmsg(s, "duplicate field name or number"); + } else if (upb_fielddef_containingoneof(f) != NULL) { + // Fields in a oneof can only be added by adding the oneof to the msgdef. + upb_status_seterrmsg(s, "fielddef is part of a oneof"); return false; } // Constraint checks ok, perform the action. - release_containingtype(f); - f->msg.def = m; - f->msg_is_symbolic = false; - upb_inttable_insert(&m->itof, upb_fielddef_number(f), upb_value_ptr(f)); - upb_strtable_insert(&m->ntof, upb_fielddef_name(f), upb_value_ptr(f)); - upb_ref2(f, m); - upb_ref2(m, f); - if (ref_donor) upb_fielddef_unref(f, ref_donor); + add_field(m, f, ref_donor); + return true; +} + +bool upb_msgdef_addoneof(upb_msgdef *m, upb_oneofdef *o, const void *ref_donor, + upb_status *s) { + // Check various conditions that would prevent this oneof from being added. + if (upb_oneofdef_containingtype(o)) { + upb_status_seterrmsg(s, "oneofdef already belongs to a message"); + return false; + } else if (upb_oneofdef_name(o) == NULL) { + upb_status_seterrmsg(s, "oneofdef name was not set"); + return false; + } else if (upb_msgdef_ntooz(m, upb_oneofdef_name(o))) { + upb_status_seterrmsg(s, "duplicate oneof name"); + return false; + } + + // Check that all of the oneof's fields do not conflict with names or numbers + // of fields already in the message. + upb_oneof_iter it; + for (upb_oneof_begin(&it, o); !upb_oneof_done(&it); upb_oneof_next(&it)) { + const upb_fielddef *f = upb_oneof_iter_field(&it); + if (!check_field_add(m, f, s)) { + return false; + } + } + + // Everything checks out -- commit now. + + // Add oneof itself first. + o->parent = m; + upb_strtable_insert(&m->ntoo, upb_oneofdef_name(o), upb_value_ptr(o)); + upb_ref2(o, m); + upb_ref2(m, o); + + // Add each field of the oneof directly to the msgdef. + for (upb_oneof_begin(&it, o); !upb_oneof_done(&it); upb_oneof_next(&it)) { + upb_fielddef *f = upb_oneof_iter_field(&it); + add_field(m, f, NULL); + } + + if (ref_donor) upb_oneofdef_unref(o, ref_donor); return true; } @@ -1384,10 +1493,21 @@ const upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name, upb_value_getptr(val) : NULL; } +const upb_oneofdef *upb_msgdef_ntoo(const upb_msgdef *m, const char *name, + size_t len) { + upb_value val; + return upb_strtable_lookup2(&m->ntoo, name, len, &val) ? + upb_value_getptr(val) : NULL; +} + int upb_msgdef_numfields(const upb_msgdef *m) { return upb_strtable_count(&m->ntof); } +int upb_msgdef_numoneofs(const upb_msgdef *m) { + return upb_strtable_count(&m->ntoo); +} + void upb_msgdef_setmapentry(upb_msgdef *m, bool map_entry) { assert(!upb_msgdef_isfrozen(m)); m->map_entry = map_entry; @@ -1397,19 +1517,246 @@ bool upb_msgdef_mapentry(const upb_msgdef *m) { return m->map_entry; } -void upb_msg_begin(upb_msg_iter *iter, const upb_msgdef *m) { +void upb_msg_field_begin(upb_msg_field_iter *iter, const upb_msgdef *m) { upb_inttable_begin(iter, &m->itof); } -void upb_msg_next(upb_msg_iter *iter) { upb_inttable_next(iter); } +void upb_msg_field_next(upb_msg_field_iter *iter) { upb_inttable_next(iter); } -bool upb_msg_done(const upb_msg_iter *iter) { return upb_inttable_done(iter); } +bool upb_msg_field_done(const upb_msg_field_iter *iter) { + return upb_inttable_done(iter); +} -upb_fielddef *upb_msg_iter_field(const upb_msg_iter *iter) { +upb_fielddef *upb_msg_iter_field(const upb_msg_field_iter *iter) { return (upb_fielddef*)upb_value_getptr(upb_inttable_iter_value(iter)); } -void upb_msg_iter_setdone(upb_msg_iter *iter) { +void upb_msg_field_iter_setdone(upb_msg_field_iter *iter) { + upb_inttable_iter_setdone(iter); +} + +void upb_msg_oneof_begin(upb_msg_oneof_iter *iter, const upb_msgdef *m) { + upb_strtable_begin(iter, &m->ntoo); +} + +void upb_msg_oneof_next(upb_msg_oneof_iter *iter) { upb_strtable_next(iter); } + +bool upb_msg_oneof_done(const upb_msg_oneof_iter *iter) { + return upb_strtable_done(iter); +} + +upb_oneofdef *upb_msg_iter_oneof(const upb_msg_oneof_iter *iter) { + return (upb_oneofdef*)upb_value_getptr(upb_strtable_iter_value(iter)); +} + +void upb_msg_oneof_iter_setdone(upb_msg_oneof_iter *iter) { + upb_strtable_iter_setdone(iter); +} + +/* upb_oneofdef ***************************************************************/ + +static void visitoneof(const upb_refcounted *r, upb_refcounted_visit *visit, + void *closure) { + const upb_oneofdef *o = (const upb_oneofdef*)r; + upb_oneof_iter i; + for (upb_oneof_begin(&i, o); !upb_oneof_done(&i); upb_oneof_next(&i)) { + const upb_fielddef *f = upb_oneof_iter_field(&i); + visit(r, UPB_UPCAST2(f), closure); + } + if (o->parent) { + visit(r, UPB_UPCAST2(o->parent), closure); + } +} + +static void freeoneof(upb_refcounted *r) { + upb_oneofdef *o = (upb_oneofdef*)r; + upb_strtable_uninit(&o->ntof); + upb_inttable_uninit(&o->itof); + upb_def_uninit(UPB_UPCAST(o)); + free(o); +} + +upb_oneofdef *upb_oneofdef_new(const void *owner) { + static const struct upb_refcounted_vtbl vtbl = {visitoneof, freeoneof}; + upb_oneofdef *o = malloc(sizeof(*o)); + o->parent = NULL; + if (!o) return NULL; + if (!upb_def_init(UPB_UPCAST(o), UPB_DEF_ONEOF, &vtbl, owner)) goto err2; + if (!upb_inttable_init(&o->itof, UPB_CTYPE_PTR)) goto err2; + if (!upb_strtable_init(&o->ntof, UPB_CTYPE_PTR)) goto err1; + return o; + +err1: + upb_inttable_uninit(&o->itof); +err2: + free(o); + return NULL; +} + +upb_oneofdef *upb_oneofdef_dup(const upb_oneofdef *o, const void *owner) { + upb_oneofdef *newo = upb_oneofdef_new(owner); + if (!newo) return NULL; + bool ok = upb_def_setfullname(UPB_UPCAST(newo), + upb_def_fullname(UPB_UPCAST(o)), NULL); + UPB_ASSERT_VAR(ok, ok); + upb_oneof_iter i; + for (upb_oneof_begin(&i, o); !upb_oneof_done(&i); upb_oneof_next(&i)) { + upb_fielddef *f = upb_fielddef_dup(upb_oneof_iter_field(&i), &f); + if (!f || !upb_oneofdef_addfield(newo, f, &f, NULL)) { + upb_oneofdef_unref(newo, owner); + return NULL; + } + } + return newo; +} + +bool upb_oneofdef_isfrozen(const upb_oneofdef *o) { + return upb_def_isfrozen(UPB_UPCAST(o)); +} + +void upb_oneofdef_ref(const upb_oneofdef *o, const void *owner) { + upb_def_ref(UPB_UPCAST(o), owner); +} + +void upb_oneofdef_unref(const upb_oneofdef *o, const void *owner) { + upb_def_unref(UPB_UPCAST(o), owner); +} + +void upb_oneofdef_donateref(const upb_oneofdef *o, const void *from, + const void *to) { + upb_def_donateref(UPB_UPCAST(o), from, to); +} + +void upb_oneofdef_checkref(const upb_oneofdef *o, const void *owner) { + upb_def_checkref(UPB_UPCAST(o), owner); +} + +const char *upb_oneofdef_name(const upb_oneofdef *o) { + return upb_def_fullname(UPB_UPCAST(o)); +} + +bool upb_oneofdef_setname(upb_oneofdef *o, const char *fullname, + upb_status *s) { + if (upb_oneofdef_containingtype(o)) { + upb_status_seterrmsg(s, "oneof already added to a message"); + return false; + } + return upb_def_setfullname(UPB_UPCAST(o), fullname, s); +} + +const upb_msgdef *upb_oneofdef_containingtype(const upb_oneofdef *o) { + return o->parent; +} + +int upb_oneofdef_numfields(const upb_oneofdef *o) { + return upb_strtable_count(&o->ntof); +} + +bool upb_oneofdef_addfield(upb_oneofdef *o, upb_fielddef *f, + const void *ref_donor, + upb_status *s) { + assert(!upb_oneofdef_isfrozen(o)); + assert(!o->parent || !upb_msgdef_isfrozen(o->parent)); + + // This method is idempotent. Check if |f| is already part of this oneofdef + // and return immediately if so. + if (upb_fielddef_containingoneof(f) == o) { + return true; + } + + // The field must have an OPTIONAL label. + if (upb_fielddef_label(f) != UPB_LABEL_OPTIONAL) { + upb_status_seterrmsg(s, "fields in oneof must have OPTIONAL label"); + return false; + } + + // Check that no field with this name or number exists already in the oneof. + // Also check that the field is not already part of a oneof. + if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0) { + upb_status_seterrmsg(s, "field name or number were not set"); + return false; + } else if (upb_oneofdef_itof(o, upb_fielddef_number(f)) || + upb_oneofdef_ntofz(o, upb_fielddef_name(f))) { + upb_status_seterrmsg(s, "duplicate field name or number"); + return false; + } else if (upb_fielddef_containingoneof(f) != NULL) { + upb_status_seterrmsg(s, "fielddef already belongs to a oneof"); + return false; + } + + // We allow adding a field to the oneof either if the field is not part of a + // msgdef, or if it is and we are also part of the same msgdef. + if (o->parent == NULL) { + // If we're not in a msgdef, the field cannot be either. Otherwise we would + // need to magically add this oneof to a msgdef to remain consistent, which + // is surprising behavior. + if (upb_fielddef_containingtype(f) != NULL) { + upb_status_seterrmsg(s, "fielddef already belongs to a message, but " + "oneof does not"); + return false; + } + } else { + // If we're in a msgdef, the user can add fields that either aren't in any + // msgdef (in which case they're added to our msgdef) or already a part of + // our msgdef. + if (upb_fielddef_containingtype(f) != NULL && + upb_fielddef_containingtype(f) != o->parent) { + upb_status_seterrmsg(s, "fielddef belongs to a different message " + "than oneof"); + return false; + } + } + + // Commit phase. First add the field to our parent msgdef, if any, because + // that may fail; then add the field to our own tables. + + if (o->parent != NULL && upb_fielddef_containingtype(f) == NULL) { + if (!upb_msgdef_addfield((upb_msgdef*)o->parent, f, NULL, s)) { + return false; + } + } + + release_containingtype(f); + f->oneof = o; + upb_inttable_insert(&o->itof, upb_fielddef_number(f), upb_value_ptr(f)); + upb_strtable_insert(&o->ntof, upb_fielddef_name(f), upb_value_ptr(f)); + upb_ref2(f, o); + upb_ref2(o, f); + if (ref_donor) upb_fielddef_unref(f, ref_donor); + + return true; +} + +const upb_fielddef *upb_oneofdef_ntof(const upb_oneofdef *o, + const char *name, size_t length) { + upb_value val; + return upb_strtable_lookup2(&o->ntof, name, length, &val) ? + upb_value_getptr(val) : NULL; +} + +const upb_fielddef *upb_oneofdef_itof(const upb_oneofdef *o, uint32_t num) { + upb_value val; + return upb_inttable_lookup32(&o->itof, num, &val) ? + upb_value_getptr(val) : NULL; +} + +void upb_oneof_begin(upb_oneof_iter *iter, const upb_oneofdef *o) { + upb_inttable_begin(iter, &o->itof); +} + +void upb_oneof_next(upb_oneof_iter *iter) { + upb_inttable_next(iter); +} + +bool upb_oneof_done(upb_oneof_iter *iter) { + return upb_inttable_done(iter); +} + +upb_fielddef *upb_oneof_iter_field(const upb_oneof_iter *iter) { + return (upb_fielddef*)upb_value_getptr(upb_inttable_iter_value(iter)); +} + +void upb_oneof_iter_setdone(upb_oneof_iter *iter) { upb_inttable_iter_setdone(iter); } /* @@ -1452,8 +1799,10 @@ static void freehandlers(upb_refcounted *r) { static void visithandlers(const upb_refcounted *r, upb_refcounted_visit *visit, void *closure) { const upb_handlers *h = (const upb_handlers*)r; - upb_msg_iter i; - for(upb_msg_begin(&i, h->msg); !upb_msg_done(&i); upb_msg_next(&i)) { + upb_msg_field_iter i; + for(upb_msg_field_begin(&i, h->msg); + !upb_msg_field_done(&i); + upb_msg_field_next(&i)) { upb_fielddef *f = upb_msg_iter_field(&i); if (!upb_fielddef_issubmsg(f)) continue; const upb_handlers *sub = upb_handlers_getsubhandlers(h, f); @@ -1482,8 +1831,10 @@ static upb_handlers *newformsg(const upb_msgdef *m, const void *owner, // For each submessage field, get or create a handlers object and set it as // the subhandlers. - upb_msg_iter i; - for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { + upb_msg_field_iter i; + for(upb_msg_field_begin(&i, m); + !upb_msg_field_done(&i); + upb_msg_field_next(&i)) { upb_fielddef *f = upb_msg_iter_field(&i); if (!upb_fielddef_issubmsg(f)) continue; @@ -1840,8 +2191,10 @@ bool upb_handlers_freeze(upb_handlers *const*handlers, int n, upb_status *s) { // Check that there are no closure mismatches due to missing Start* handlers // or subhandlers with different type-level types. - upb_msg_iter j; - for(upb_msg_begin(&j, h->msg); !upb_msg_done(&j); upb_msg_next(&j)) { + upb_msg_field_iter j; + for(upb_msg_field_begin(&j, h->msg); + !upb_msg_field_done(&j); + upb_msg_field_next(&j)) { const upb_fielddef *f = upb_msg_iter_field(&j); if (upb_fielddef_isseq(f)) { @@ -3114,8 +3467,10 @@ static bool upb_resolve_dfs(const upb_def *def, upb_strtable *addtab, // For messages, continue the recursion by visiting all subdefs. const upb_msgdef *m = upb_dyncast_msgdef(def); if (m) { - upb_msg_iter i; - for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { + upb_msg_field_iter i; + for(upb_msg_field_begin(&i, m); + !upb_msg_field_done(&i); + upb_msg_field_next(&i)) { upb_fielddef *f = upb_msg_iter_field(&i); if (!upb_fielddef_hassubdef(f)) continue; // |= to avoid short-circuit; we need its side-effects. @@ -3268,8 +3623,10 @@ bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, int n, void *ref_donor, // Type names are resolved relative to the message in which they appear. const char *base = upb_msgdef_fullname(m); - upb_msg_iter j; - for(upb_msg_begin(&j, m); !upb_msg_done(&j); upb_msg_next(&j)) { + upb_msg_field_iter j; + for(upb_msg_field_begin(&j, m); + !upb_msg_field_done(&j); + upb_msg_field_next(&j)) { upb_fielddef *f = upb_msg_iter_field(&j); const char *name = upb_fielddef_subdefname(f); if (name && !upb_fielddef_subdef(f)) { @@ -3416,16 +3773,12 @@ char *upb_strdup(const char *s) { } char *upb_strdup2(const char *s, size_t len) { - // Prevent overflow errors. - if (len == SIZE_MAX) return NULL; // Always null-terminate, even if binary data; but don't rely on the input to // have a null-terminating byte since it may be a raw binary buffer. size_t n = len + 1; char *p = malloc(n); - if (p) { - memcpy(p, s, len); - p[len] = 0; - } + if (p) memcpy(p, s, len); + p[len] = 0; return p; } @@ -6462,8 +6815,10 @@ static void compile_method(compiler *c, upb_pbdecodermethod *method) { putsel(c, OP_STARTMSG, UPB_STARTMSG_SELECTOR, h); label(c, LABEL_FIELD); uint32_t* start_pc = c->pc; - upb_msg_iter i; - for(upb_msg_begin(&i, md); !upb_msg_done(&i); upb_msg_next(&i)) { + upb_msg_field_iter i; + for(upb_msg_field_begin(&i, md); + !upb_msg_field_done(&i); + upb_msg_field_next(&i)) { const upb_fielddef *f = upb_msg_iter_field(&i); upb_fieldtype_t type = upb_fielddef_type(f); @@ -6513,9 +6868,11 @@ static void find_methods(compiler *c, const upb_handlers *h) { newmethod(h, c->group); // Find submethods. - upb_msg_iter i; + upb_msg_field_iter i; const upb_msgdef *md = upb_handlers_msgdef(h); - for(upb_msg_begin(&i, md); !upb_msg_done(&i); upb_msg_next(&i)) { + for(upb_msg_field_begin(&i, md); + !upb_msg_field_done(&i); + upb_msg_field_next(&i)) { const upb_fielddef *f = upb_msg_iter_field(&i); const upb_handlers *sub_h; if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE && @@ -6557,7 +6914,7 @@ static void set_bytecode_handlers(mgroup *g) { } -/* JIT setup. ******************************************************************/ +/* JIT setup. *****************************************************************/ #ifdef UPB_USE_JIT_X64 @@ -7980,8 +8337,10 @@ static void newhandlers_callback(const void *closure, upb_handlers *h) { upb_handlers_setendmsg(h, endmsg, NULL); const upb_msgdef *m = upb_handlers_msgdef(h); - upb_msg_iter i; - for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { + upb_msg_field_iter i; + for(upb_msg_field_begin(&i, m); + !upb_msg_field_done(&i); + upb_msg_field_next(&i)) { const upb_fielddef *f = upb_msg_iter_field(&i); bool packed = upb_fielddef_isseq(f) && upb_fielddef_isprimitive(f) && upb_fielddef_packed(f); @@ -8443,8 +8802,10 @@ static void onmreg(const void *c, upb_handlers *h) { upb_handlers_setstartmsg(h, textprinter_startmsg, NULL); upb_handlers_setendmsg(h, textprinter_endmsg, NULL); - upb_msg_iter i; - for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { + upb_msg_field_iter i; + for(upb_msg_field_begin(&i, m); + !upb_msg_field_done(&i); + upb_msg_field_next(&i)) { upb_fielddef *f = upb_msg_iter_field(&i); upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER; upb_handlerattr_sethandlerdata(&attr, f); @@ -8857,6 +9218,7 @@ badpadding: // the true value in a contiguous buffer. static void assert_accumulate_empty(upb_json_parser *p) { + UPB_UNUSED(p); assert(p->accumulated == NULL); assert(p->accumulated_len == 0); } @@ -9442,11 +9804,11 @@ static void end_object(upb_json_parser *p) { // final state once, when the closing '"' is seen. -#line 904 "upb/json/parser.rl" +#line 905 "upb/json/parser.rl" -#line 816 "upb/json/parser.c" +#line 817 "upb/json/parser.c" static const char _json_actions[] = { 0, 1, 0, 1, 2, 1, 3, 1, 5, 1, 6, 1, 7, 1, 8, 1, @@ -9597,7 +9959,7 @@ static const int json_en_value_machine = 27; static const int json_en_main = 1; -#line 907 "upb/json/parser.rl" +#line 908 "upb/json/parser.rl" size_t parse(void *closure, const void *hd, const char *buf, size_t size, const upb_bufhandle *handle) { @@ -9617,7 +9979,7 @@ size_t parse(void *closure, const void *hd, const char *buf, size_t size, capture_resume(parser, buf); -#line 987 "upb/json/parser.c" +#line 988 "upb/json/parser.c" { int _klen; unsigned int _trans; @@ -9692,118 +10054,118 @@ _match: switch ( *_acts++ ) { case 0: -#line 819 "upb/json/parser.rl" +#line 820 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; case 1: -#line 820 "upb/json/parser.rl" +#line 821 "upb/json/parser.rl" { p--; {stack[top++] = cs; cs = 10; goto _again;} } break; case 2: -#line 824 "upb/json/parser.rl" +#line 825 "upb/json/parser.rl" { start_text(parser, p); } break; case 3: -#line 825 "upb/json/parser.rl" +#line 826 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_text(parser, p)); } break; case 4: -#line 831 "upb/json/parser.rl" +#line 832 "upb/json/parser.rl" { start_hex(parser); } break; case 5: -#line 832 "upb/json/parser.rl" +#line 833 "upb/json/parser.rl" { hexdigit(parser, p); } break; case 6: -#line 833 "upb/json/parser.rl" +#line 834 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_hex(parser)); } break; case 7: -#line 839 "upb/json/parser.rl" +#line 840 "upb/json/parser.rl" { CHECK_RETURN_TOP(escape(parser, p)); } break; case 8: -#line 845 "upb/json/parser.rl" +#line 846 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; case 9: -#line 848 "upb/json/parser.rl" +#line 849 "upb/json/parser.rl" { {stack[top++] = cs; cs = 19; goto _again;} } break; case 10: -#line 850 "upb/json/parser.rl" +#line 851 "upb/json/parser.rl" { p--; {stack[top++] = cs; cs = 27; goto _again;} } break; case 11: -#line 855 "upb/json/parser.rl" +#line 856 "upb/json/parser.rl" { start_member(parser); } break; case 12: -#line 856 "upb/json/parser.rl" +#line 857 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_member(parser)); } break; case 13: -#line 859 "upb/json/parser.rl" +#line 860 "upb/json/parser.rl" { clear_member(parser); } break; case 14: -#line 865 "upb/json/parser.rl" +#line 866 "upb/json/parser.rl" { start_object(parser); } break; case 15: -#line 868 "upb/json/parser.rl" +#line 869 "upb/json/parser.rl" { end_object(parser); } break; case 16: -#line 874 "upb/json/parser.rl" +#line 875 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_array(parser)); } break; case 17: -#line 878 "upb/json/parser.rl" +#line 879 "upb/json/parser.rl" { end_array(parser); } break; case 18: -#line 883 "upb/json/parser.rl" +#line 884 "upb/json/parser.rl" { start_number(parser, p); } break; case 19: -#line 884 "upb/json/parser.rl" +#line 885 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_number(parser, p)); } break; case 20: -#line 886 "upb/json/parser.rl" +#line 887 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_stringval(parser)); } break; case 21: -#line 887 "upb/json/parser.rl" +#line 888 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_stringval(parser)); } break; case 22: -#line 889 "upb/json/parser.rl" +#line 890 "upb/json/parser.rl" { CHECK_RETURN_TOP(parser_putbool(parser, true)); } break; case 23: -#line 891 "upb/json/parser.rl" +#line 892 "upb/json/parser.rl" { CHECK_RETURN_TOP(parser_putbool(parser, false)); } break; case 24: -#line 893 "upb/json/parser.rl" +#line 894 "upb/json/parser.rl" { /* null value */ } break; case 25: -#line 895 "upb/json/parser.rl" +#line 896 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_subobject(parser)); } break; case 26: -#line 896 "upb/json/parser.rl" +#line 897 "upb/json/parser.rl" { end_subobject(parser); } break; case 27: -#line 901 "upb/json/parser.rl" +#line 902 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; -#line 1173 "upb/json/parser.c" +#line 1174 "upb/json/parser.c" } } @@ -9816,7 +10178,7 @@ _again: _out: {} } -#line 926 "upb/json/parser.rl" +#line 927 "upb/json/parser.rl" if (p != pe) { upb_status_seterrf(parser->status, "Parse error at %s\n", p); @@ -9865,13 +10227,13 @@ void upb_json_parser_reset(upb_json_parser *p) { int top; // Emit Ragel initialization of the parser. -#line 1235 "upb/json/parser.c" +#line 1236 "upb/json/parser.c" { cs = json_start; top = 0; } -#line 974 "upb/json/parser.rl" +#line 975 "upb/json/parser.rl" p->current_state = cs; p->parser_top = top; accumulate_clear(p); @@ -10327,9 +10689,9 @@ void printer_sethandlers(const void *closure, upb_handlers *h) { } \ break; - upb_msg_iter i; - upb_msg_begin(&i, upb_handlers_msgdef(h)); - for(; !upb_msg_done(&i); upb_msg_next(&i)) { + upb_msg_field_iter i; + upb_msg_field_begin(&i, upb_handlers_msgdef(h)); + for(; !upb_msg_field_done(&i); upb_msg_field_next(&i)) { const upb_fielddef *f = upb_msg_iter_field(&i); upb_handlerattr name_attr = UPB_HANDLERATTR_INITIALIZER; diff --git a/ruby/ext/google/protobuf_c/upb.h b/ruby/ext/google/protobuf_c/upb.h index fbcb8e9991..572f6ad1ba 100644 --- a/ruby/ext/google/protobuf_c/upb.h +++ b/ruby/ext/google/protobuf_c/upb.h @@ -710,6 +710,9 @@ typedef struct { #define UPB_STRTABLE_INIT(count, mask, ctype, size_lg2, entries) \ {{count, mask, ctype, size_lg2, entries}} +#define UPB_EMPTY_STRTABLE_INIT(ctype) \ + UPB_STRTABLE_INIT(0, 0, ctype, 0, NULL) + typedef struct { upb_table t; // For entries that don't fit in the array part. const _upb_value *array; // Array part of the table. See const note above. @@ -1129,6 +1132,7 @@ class Def; class EnumDef; class FieldDef; class MessageDef; +class OneofDef; } #endif @@ -1136,6 +1140,7 @@ UPB_DECLARE_TYPE(upb::Def, upb_def); UPB_DECLARE_TYPE(upb::EnumDef, upb_enumdef); UPB_DECLARE_TYPE(upb::FieldDef, upb_fielddef); UPB_DECLARE_TYPE(upb::MessageDef, upb_msgdef); +UPB_DECLARE_TYPE(upb::OneofDef, upb_oneofdef); // Maximum field number allowed for FieldDefs. This is an inherent limit of the // protobuf wire format. @@ -1159,6 +1164,7 @@ typedef enum { UPB_DEF_MSG, UPB_DEF_FIELD, UPB_DEF_ENUM, + UPB_DEF_ONEOF, UPB_DEF_SERVICE, // Not yet implemented. UPB_DEF_ANY = -1, // Wildcard for upb_symtab_get*() } upb_deftype_t; @@ -1443,6 +1449,10 @@ UPB_DEFINE_DEF(upb::FieldDef, fielddef, FIELD, const MessageDef* containing_type() const; const char* containing_type_name(); + // The OneofDef to which this field belongs, or NULL if this field is not part + // of a oneof. + const OneofDef* containing_oneof() const; + // The field's type according to the enum in descriptor.proto. This is not // the same as UPB_TYPE_*, because it distinguishes between (for example) // INT32 and SINT32, whereas our "type" enum does not. This return of @@ -1616,6 +1626,7 @@ UPB_DEFINE_STRUCT(upb_fielddef, upb_def, } sub; // The msgdef or enumdef for this field, if upb_hassubdef(f). bool subdef_is_symbolic; bool msg_is_symbolic; + const upb_oneofdef *oneof; bool default_is_string; bool type_is_set_; // False until type is explicitly set. bool is_extension_; @@ -1631,11 +1642,11 @@ UPB_DEFINE_STRUCT(upb_fielddef, upb_def, )); #define UPB_FIELDDEF_INIT(label, type, intfmt, tagdelim, is_extension, lazy, \ - packed, name, num, msgdef, subdef, selector_base, \ + packed, name, num, msgdef, subdef, selector_base, \ index, defaultval, refs, ref2s) \ { \ UPB_DEF_INIT(name, UPB_DEF_FIELD, refs, ref2s), defaultval, {msgdef}, \ - {subdef}, false, false, \ + {subdef}, NULL, false, false, \ type == UPB_TYPE_STRING || type == UPB_TYPE_BYTES, true, is_extension, \ lazy, packed, intfmt, tagdelim, type, label, num, selector_base, index \ } @@ -1669,6 +1680,7 @@ bool upb_fielddef_isextension(const upb_fielddef *f); bool upb_fielddef_lazy(const upb_fielddef *f); bool upb_fielddef_packed(const upb_fielddef *f); const upb_msgdef *upb_fielddef_containingtype(const upb_fielddef *f); +const upb_oneofdef *upb_fielddef_containingoneof(const upb_fielddef *f); upb_msgdef *upb_fielddef_containingtype_mutable(upb_fielddef *f); const char *upb_fielddef_containingtypename(upb_fielddef *f); upb_intfmt_t upb_fielddef_intfmt(const upb_fielddef *f); @@ -1736,7 +1748,8 @@ UPB_END_EXTERN_C // } /* upb::MessageDef ************************************************************/ -typedef upb_inttable_iter upb_msg_iter; +typedef upb_inttable_iter upb_msg_field_iter; +typedef upb_strtable_iter upb_msg_oneof_iter; // Structure that describes a single .proto message type. // @@ -1766,14 +1779,37 @@ UPB_DEFINE_DEF(upb::MessageDef, msgdef, MSG, UPB_QUOTE( // The number of fields that belong to the MessageDef. int field_count() const; + // The number of oneofs that belong to the MessageDef. + int oneof_count() const; + // Adds a field (upb_fielddef object) to a msgdef. Requires that the msgdef // and the fielddefs are mutable. The fielddef's name and number must be // set, and the message may not already contain any field with this name or // number, and this fielddef may not be part of another message. In error // cases false is returned and the msgdef is unchanged. + // + // If the given field is part of a oneof, this call succeeds if and only if + // that oneof is already part of this msgdef. (Note that adding a oneof to a + // msgdef automatically adds all of its fields to the msgdef at the time that + // the oneof is added, so it is usually more idiomatic to add the oneof's + // fields first then add the oneof to the msgdef. This case is supported for + // convenience.) + // + // If |f| is already part of this MessageDef, this method performs no action + // and returns true (success). Thus, this method is idempotent. bool AddField(FieldDef* f, Status* s); bool AddField(const reffed_ptr& f, Status* s); + // Adds a oneof (upb_oneofdef object) to a msgdef. Requires that the msgdef, + // oneof, and any fielddefs are mutable, that the fielddefs contained in the + // oneof do not have any name or number conflicts with existing fields in the + // msgdef, and that the oneof's name is unique among all oneofs in the msgdef. + // If the oneof is added successfully, all of its fields will be added + // directly to the msgdef as well. In error cases, false is returned and the + // msgdef is unchanged. + bool AddOneof(OneofDef* o, Status* s); + bool AddOneof(const reffed_ptr& o, Status* s); + // These return NULL if the field is not found. FieldDef* FindFieldByNumber(uint32_t number); FieldDef* FindFieldByName(const char *name, size_t len); @@ -1797,6 +1833,25 @@ UPB_DEFINE_DEF(upb::MessageDef, msgdef, MSG, UPB_QUOTE( return FindFieldByName(str.c_str(), str.size()); } + OneofDef* FindOneofByName(const char* name, size_t len); + const OneofDef* FindOneofByName(const char* name, size_t len) const; + + OneofDef* FindOneofByName(const char* name) { + return FindOneofByName(name, strlen(name)); + } + const OneofDef* FindOneofByName(const char* name) const { + return FindOneofByName(name, strlen(name)); + } + + template + OneofDef* FindOneofByName(const T& str) { + return FindOneofByName(str.c_str(), str.size()); + } + template + const OneofDef* FindOneofByName(const T& str) const { + return FindOneofByName(str.c_str(), str.size()); + } + // Returns a new msgdef that is a copy of the given msgdef (and a copy of all // the fields) but with any references to submessages broken and replaced // with just the name of the submessage. Returns NULL if memory allocation @@ -1812,39 +1867,117 @@ UPB_DEFINE_DEF(upb::MessageDef, msgdef, MSG, UPB_QUOTE( bool mapentry() const; // Iteration over fields. The order is undefined. - class iterator : public std::iterator { + class field_iterator + : public std::iterator { public: - explicit iterator(MessageDef* md); - static iterator end(MessageDef* md); + explicit field_iterator(MessageDef* md); + static field_iterator end(MessageDef* md); void operator++(); FieldDef* operator*() const; - bool operator!=(const iterator& other) const; - bool operator==(const iterator& other) const; + bool operator!=(const field_iterator& other) const; + bool operator==(const field_iterator& other) const; private: - upb_msg_iter iter_; + upb_msg_field_iter iter_; }; - class const_iterator + class const_field_iterator : public std::iterator { public: - explicit const_iterator(const MessageDef* md); - static const_iterator end(const MessageDef* md); + explicit const_field_iterator(const MessageDef* md); + static const_field_iterator end(const MessageDef* md); void operator++(); const FieldDef* operator*() const; - bool operator!=(const const_iterator& other) const; - bool operator==(const const_iterator& other) const; + bool operator!=(const const_field_iterator& other) const; + bool operator==(const const_field_iterator& other) const; private: - upb_msg_iter iter_; + upb_msg_field_iter iter_; }; - iterator begin(); - iterator end(); - const_iterator begin() const; - const_iterator end() const; + // Iteration over oneofs. The order is undefined. + class oneof_iterator + : public std::iterator { + public: + explicit oneof_iterator(MessageDef* md); + static oneof_iterator end(MessageDef* md); + + void operator++(); + OneofDef* operator*() const; + bool operator!=(const oneof_iterator& other) const; + bool operator==(const oneof_iterator& other) const; + + private: + upb_msg_oneof_iter iter_; + }; + + class const_oneof_iterator + : public std::iterator { + public: + explicit const_oneof_iterator(const MessageDef* md); + static const_oneof_iterator end(const MessageDef* md); + + void operator++(); + const OneofDef* operator*() const; + bool operator!=(const const_oneof_iterator& other) const; + bool operator==(const const_oneof_iterator& other) const; + + private: + upb_msg_oneof_iter iter_; + }; + + class FieldAccessor { + public: + explicit FieldAccessor(MessageDef* msg) : msg_(msg) {} + field_iterator begin() { return msg_->field_begin(); } + field_iterator end() { return msg_->field_end(); } + private: + MessageDef* msg_; + }; + + class ConstFieldAccessor { + public: + explicit ConstFieldAccessor(const MessageDef* msg) : msg_(msg) {} + const_field_iterator begin() { return msg_->field_begin(); } + const_field_iterator end() { return msg_->field_end(); } + private: + const MessageDef* msg_; + }; + + class OneofAccessor { + public: + explicit OneofAccessor(MessageDef* msg) : msg_(msg) {} + oneof_iterator begin() { return msg_->oneof_begin(); } + oneof_iterator end() { return msg_->oneof_end(); } + private: + MessageDef* msg_; + }; + + class ConstOneofAccessor { + public: + explicit ConstOneofAccessor(const MessageDef* msg) : msg_(msg) {} + const_oneof_iterator begin() { return msg_->oneof_begin(); } + const_oneof_iterator end() { return msg_->oneof_end(); } + private: + const MessageDef* msg_; + }; + + field_iterator field_begin(); + field_iterator field_end(); + const_field_iterator field_begin() const; + const_field_iterator field_end() const; + + oneof_iterator oneof_begin(); + oneof_iterator oneof_end(); + const_oneof_iterator oneof_begin() const; + const_oneof_iterator oneof_end() const; + + FieldAccessor fields() { return FieldAccessor(this); } + ConstFieldAccessor fields() const { return ConstFieldAccessor(this); } + OneofAccessor oneofs() { return OneofAccessor(this); } + ConstOneofAccessor oneofs() const { return ConstOneofAccessor(this); } private: UPB_DISALLOW_POD_OPS(MessageDef, upb::MessageDef); @@ -1857,6 +1990,9 @@ UPB_DEFINE_STRUCT(upb_msgdef, upb_def, upb_inttable itof; // int to field upb_strtable ntof; // name to field + // Tables for looking up oneofs by name. + upb_strtable ntoo; // name to oneof + // Is this a map-entry message? // TODO: set this flag properly for static descriptors; regenerate // descriptor.upb.c. @@ -1865,11 +2001,14 @@ UPB_DEFINE_STRUCT(upb_msgdef, upb_def, // TODO(haberman): proper extension ranges (there can be multiple). )); +// TODO: also support static initialization of the oneofs table. This will be +// needed if we compile in descriptors that contain oneofs. #define UPB_MSGDEF_INIT(name, selector_count, submsg_field_count, itof, ntof, \ refs, ref2s) \ { \ UPB_DEF_INIT(name, UPB_DEF_MSG, refs, ref2s), selector_count, \ - submsg_field_count, itof, ntof, false \ + submsg_field_count, itof, ntof, \ + UPB_EMPTY_STRTABLE_INIT(UPB_CTYPE_PTR), false \ } UPB_BEGIN_EXTERN_C // { @@ -1893,6 +2032,8 @@ bool upb_msgdef_setfullname(upb_msgdef *m, const char *fullname, upb_status *s); upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, const void *owner); bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f, const void *ref_donor, upb_status *s); +bool upb_msgdef_addoneof(upb_msgdef *m, upb_oneofdef *o, const void *ref_donor, + upb_status *s); // Field lookup in a couple of different variations: // - itof = int to field @@ -1917,11 +2058,34 @@ UPB_INLINE upb_fielddef *upb_msgdef_ntof_mutable(upb_msgdef *m, return (upb_fielddef *)upb_msgdef_ntof(m, name, len); } +// Oneof lookup: +// - ntoo = name to oneof +// - ntooz = name to oneof, null-terminated string. +const upb_oneofdef *upb_msgdef_ntoo(const upb_msgdef *m, const char *name, + size_t len); +int upb_msgdef_numoneofs(const upb_msgdef *m); + +UPB_INLINE const upb_oneofdef *upb_msgdef_ntooz(const upb_msgdef *m, + const char *name) { + return upb_msgdef_ntoo(m, name, strlen(name)); +} + +UPB_INLINE upb_oneofdef *upb_msgdef_ntoo_mutable(upb_msgdef *m, + const char *name, size_t len) { + return (upb_oneofdef *)upb_msgdef_ntoo(m, name, len); +} + void upb_msgdef_setmapentry(upb_msgdef *m, bool map_entry); bool upb_msgdef_mapentry(const upb_msgdef *m); -// upb_msg_iter i; -// for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) { +const upb_oneofdef *upb_msgdef_findoneof(const upb_msgdef *m, + const char *name); +int upb_msgdef_numoneofs(const upb_msgdef *m); + +// upb_msg_field_iter i; +// for(upb_msg_field_begin(&i, m); +// !upb_msg_field_done(&i); +// upb_msg_field_next(&i)) { // upb_fielddef *f = upb_msg_iter_field(&i); // // ... // } @@ -1929,11 +2093,18 @@ bool upb_msgdef_mapentry(const upb_msgdef *m); // For C we don't have separate iterators for const and non-const. // It is the caller's responsibility to cast the upb_fielddef* to // const if the upb_msgdef* is const. -void upb_msg_begin(upb_msg_iter *iter, const upb_msgdef *m); -void upb_msg_next(upb_msg_iter *iter); -bool upb_msg_done(const upb_msg_iter *iter); -upb_fielddef *upb_msg_iter_field(const upb_msg_iter *iter); -void upb_msg_iter_setdone(upb_msg_iter *iter); +void upb_msg_field_begin(upb_msg_field_iter *iter, const upb_msgdef *m); +void upb_msg_field_next(upb_msg_field_iter *iter); +bool upb_msg_field_done(const upb_msg_field_iter *iter); +upb_fielddef *upb_msg_iter_field(const upb_msg_field_iter *iter); +void upb_msg_field_iter_setdone(upb_msg_field_iter *iter); + +// Similar to above, we also support iterating through the oneofs in a msgdef. +void upb_msg_oneof_begin(upb_msg_oneof_iter *iter, const upb_msgdef *m); +void upb_msg_oneof_next(upb_msg_oneof_iter *iter); +bool upb_msg_oneof_done(const upb_msg_oneof_iter *iter); +upb_oneofdef *upb_msg_iter_oneof(const upb_msg_oneof_iter *iter); +void upb_msg_oneof_iter_setdone(upb_msg_oneof_iter *iter); UPB_END_EXTERN_C // } @@ -2075,6 +2246,172 @@ int32_t upb_enum_iter_number(upb_enum_iter *iter); UPB_END_EXTERN_C // } +/* upb::OneofDef **************************************************************/ + +typedef upb_inttable_iter upb_oneof_iter; + +// Class that represents a oneof. Its base class is upb::Def (convert with +// upb::upcast()). +UPB_DEFINE_DEF(upb::OneofDef, oneofdef, ONEOF, UPB_QUOTE( + public: + // Returns NULL if memory allocation failed. + static reffed_ptr New(); + + // Functionality from upb::RefCounted. + bool IsFrozen() const; + void Ref(const void* owner) const; + void Unref(const void* owner) const; + void DonateRef(const void* from, const void* to) const; + void CheckRef(const void* owner) const; + + // Functionality from upb::Def. + const char* full_name() const; + + // Returns the MessageDef that owns this OneofDef. + const MessageDef* containing_type() const; + + // Returns the name of this oneof. This is the name used to look up the oneof + // by name once added to a message def. + const char* name() const; + bool set_name(const char* name, Status* s); + + // Returns the number of fields currently defined in the oneof. + int field_count() const; + + // Adds a field to the oneof. The field must not have been added to any other + // oneof or msgdef. If the oneof is not yet part of a msgdef, then when the + // oneof is eventually added to a msgdef, all fields added to the oneof will + // also be added to the msgdef at that time. If the oneof is already part of a + // msgdef, the field must either be a part of that msgdef already, or must not + // be a part of any msgdef; in the latter case, the field is added to the + // msgdef as a part of this operation. + // + // The field may only have an OPTIONAL label, never REQUIRED or REPEATED. + // + // If |f| is already part of this MessageDef, this method performs no action + // and returns true (success). Thus, this method is idempotent. + bool AddField(FieldDef* field, Status* s); + bool AddField(const reffed_ptr& field, Status* s); + + // Looks up by name. + const FieldDef* FindFieldByName(const char* name, size_t len) const; + FieldDef* FindFieldByName(const char* name, size_t len); + const FieldDef* FindFieldByName(const char* name) const { + return FindFieldByName(name, strlen(name)); + } + FieldDef* FindFieldByName(const char* name) { + return FindFieldByName(name, strlen(name)); + } + + template + FieldDef* FindFieldByName(const T& str) { + return FindFieldByName(str.c_str(), str.size()); + } + template + const FieldDef* FindFieldByName(const T& str) const { + return FindFieldByName(str.c_str(), str.size()); + } + + // Looks up by tag number. + const FieldDef* FindFieldByNumber(uint32_t num) const; + + // Returns a new OneofDef with all the same fields. The OneofDef will be owned + // by the given owner. + OneofDef* Dup(const void* owner) const; + + // Iteration over fields. The order is undefined. + class iterator : public std::iterator { + public: + explicit iterator(OneofDef* md); + static iterator end(OneofDef* md); + + void operator++(); + FieldDef* operator*() const; + bool operator!=(const iterator& other) const; + bool operator==(const iterator& other) const; + + private: + upb_oneof_iter iter_; + }; + + class const_iterator + : public std::iterator { + public: + explicit const_iterator(const OneofDef* md); + static const_iterator end(const OneofDef* md); + + void operator++(); + const FieldDef* operator*() const; + bool operator!=(const const_iterator& other) const; + bool operator==(const const_iterator& other) const; + + private: + upb_oneof_iter iter_; + }; + + iterator begin(); + iterator end(); + const_iterator begin() const; + const_iterator end() const; + + private: + UPB_DISALLOW_POD_OPS(OneofDef, upb::OneofDef); +), +UPB_DEFINE_STRUCT(upb_oneofdef, upb_def, + upb_strtable ntof; + upb_inttable itof; + const upb_msgdef *parent; +)); + +#define UPB_ONEOFDEF_INIT(name, ntof, itof, refs, ref2s) \ + { UPB_DEF_INIT(name, UPB_DEF_ENUM, refs, ref2s), ntof, itof } + +UPB_BEGIN_EXTERN_C // { + +// Native C API. +upb_oneofdef *upb_oneofdef_new(const void *owner); +upb_oneofdef *upb_oneofdef_dup(const upb_oneofdef *o, const void *owner); + +// From upb_refcounted. +void upb_oneofdef_unref(const upb_oneofdef *o, const void *owner); +bool upb_oneofdef_isfrozen(const upb_oneofdef *e); +void upb_oneofdef_ref(const upb_oneofdef *o, const void *owner); +void upb_oneofdef_donateref(const upb_oneofdef *m, const void *from, + const void *to); +void upb_oneofdef_checkref(const upb_oneofdef *o, const void *owner); + +const char *upb_oneofdef_name(const upb_oneofdef *o); +bool upb_oneofdef_setname(upb_oneofdef *o, const char *name, upb_status *s); + +const upb_msgdef *upb_oneofdef_containingtype(const upb_oneofdef *o); +int upb_oneofdef_numfields(const upb_oneofdef *o); +bool upb_oneofdef_addfield(upb_oneofdef *o, upb_fielddef *f, + const void *ref_donor, + upb_status *s); + +// Oneof lookups: +// - ntof: look up a field by name. +// - ntofz: look up a field by name (as a null-terminated string). +// - itof: look up a field by number. +const upb_fielddef *upb_oneofdef_ntof(const upb_oneofdef *o, + const char *name, size_t length); +UPB_INLINE const upb_fielddef *upb_oneofdef_ntofz(const upb_oneofdef *o, + const char *name) { + return upb_oneofdef_ntof(o, name, strlen(name)); +} +const upb_fielddef *upb_oneofdef_itof(const upb_oneofdef *o, uint32_t num); + +// upb_oneof_iter i; +// for(upb_oneof_begin(&i, e); !upb_oneof_done(&i); upb_oneof_next(&i)) { +// // ... +// } +void upb_oneof_begin(upb_oneof_iter *iter, const upb_oneofdef *o); +void upb_oneof_next(upb_oneof_iter *iter); +bool upb_oneof_done(upb_oneof_iter *iter); +upb_fielddef *upb_oneof_iter_field(const upb_oneof_iter *iter); +void upb_oneof_iter_setdone(upb_oneof_iter *iter); + +UPB_END_EXTERN_C // } #ifdef __cplusplus @@ -2201,6 +2538,9 @@ inline void FieldDef::set_packed(bool packed) { inline const MessageDef* FieldDef::containing_type() const { return upb_fielddef_containingtype(this); } +inline const OneofDef* FieldDef::containing_oneof() const { + return upb_fielddef_containingoneof(this); +} inline const char* FieldDef::containing_type_name() { return upb_fielddef_containingtypename(this); } @@ -2351,12 +2691,21 @@ inline bool MessageDef::Freeze(Status* status) { inline int MessageDef::field_count() const { return upb_msgdef_numfields(this); } +inline int MessageDef::oneof_count() const { + return upb_msgdef_numoneofs(this); +} inline bool MessageDef::AddField(upb_fielddef* f, Status* s) { return upb_msgdef_addfield(this, f, NULL, s); } inline bool MessageDef::AddField(const reffed_ptr& f, Status* s) { return upb_msgdef_addfield(this, f.get(), NULL, s); } +inline bool MessageDef::AddOneof(upb_oneofdef* o, Status* s) { + return upb_msgdef_addoneof(this, o, NULL, s); +} +inline bool MessageDef::AddOneof(const reffed_ptr& o, Status* s) { + return upb_msgdef_addoneof(this, o.get(), NULL, s); +} inline FieldDef* MessageDef::FindFieldByNumber(uint32_t number) { return upb_msgdef_itof_mutable(this, number); } @@ -2370,6 +2719,13 @@ inline const FieldDef *MessageDef::FindFieldByName(const char *name, size_t len) const { return upb_msgdef_ntof(this, name, len); } +inline OneofDef* MessageDef::FindOneofByName(const char* name, size_t len) { + return upb_msgdef_ntoo_mutable(this, name, len); +} +inline const OneofDef* MessageDef::FindOneofByName(const char* name, + size_t len) const { + return upb_msgdef_ntoo(this, name, len); +} inline MessageDef* MessageDef::Dup(const void *owner) const { return upb_msgdef_dup(this, owner); } @@ -2379,55 +2735,127 @@ inline void MessageDef::setmapentry(bool map_entry) { inline bool MessageDef::mapentry() const { return upb_msgdef_mapentry(this); } -inline MessageDef::iterator MessageDef::begin() { return iterator(this); } -inline MessageDef::iterator MessageDef::end() { return iterator::end(this); } -inline MessageDef::const_iterator MessageDef::begin() const { - return const_iterator(this); +inline MessageDef::field_iterator MessageDef::field_begin() { + return field_iterator(this); } -inline MessageDef::const_iterator MessageDef::end() const { - return const_iterator::end(this); +inline MessageDef::field_iterator MessageDef::field_end() { + return field_iterator::end(this); +} +inline MessageDef::const_field_iterator MessageDef::field_begin() const { + return const_field_iterator(this); +} +inline MessageDef::const_field_iterator MessageDef::field_end() const { + return const_field_iterator::end(this); +} + +inline MessageDef::oneof_iterator MessageDef::oneof_begin() { + return oneof_iterator(this); +} +inline MessageDef::oneof_iterator MessageDef::oneof_end() { + return oneof_iterator::end(this); +} +inline MessageDef::const_oneof_iterator MessageDef::oneof_begin() const { + return const_oneof_iterator(this); +} +inline MessageDef::const_oneof_iterator MessageDef::oneof_end() const { + return const_oneof_iterator::end(this); } -inline MessageDef::iterator::iterator(MessageDef* md) { - upb_msg_begin(&iter_, md); +inline MessageDef::field_iterator::field_iterator(MessageDef* md) { + upb_msg_field_begin(&iter_, md); } -inline MessageDef::iterator MessageDef::iterator::end(MessageDef* md) { - MessageDef::iterator iter(md); - upb_msg_iter_setdone(&iter.iter_); +inline MessageDef::field_iterator MessageDef::field_iterator::end( + MessageDef* md) { + MessageDef::field_iterator iter(md); + upb_msg_field_iter_setdone(&iter.iter_); return iter; } -inline FieldDef* MessageDef::iterator::operator*() const { +inline FieldDef* MessageDef::field_iterator::operator*() const { return upb_msg_iter_field(&iter_); } -inline void MessageDef::iterator::operator++() { return upb_msg_next(&iter_); } -inline bool MessageDef::iterator::operator==(const iterator &other) const { +inline void MessageDef::field_iterator::operator++() { + return upb_msg_field_next(&iter_); +} +inline bool MessageDef::field_iterator::operator==( + const field_iterator &other) const { return upb_inttable_iter_isequal(&iter_, &other.iter_); } -inline bool MessageDef::iterator::operator!=(const iterator &other) const { +inline bool MessageDef::field_iterator::operator!=( + const field_iterator &other) const { return !(*this == other); } -inline MessageDef::const_iterator::const_iterator(const MessageDef* md) { - upb_msg_begin(&iter_, md); +inline MessageDef::const_field_iterator::const_field_iterator( + const MessageDef* md) { + upb_msg_field_begin(&iter_, md); } -inline MessageDef::const_iterator MessageDef::const_iterator::end( +inline MessageDef::const_field_iterator MessageDef::const_field_iterator::end( const MessageDef *md) { - MessageDef::const_iterator iter(md); - upb_msg_iter_setdone(&iter.iter_); + MessageDef::const_field_iterator iter(md); + upb_msg_field_iter_setdone(&iter.iter_); return iter; } -inline const FieldDef* MessageDef::const_iterator::operator*() const { +inline const FieldDef* MessageDef::const_field_iterator::operator*() const { return upb_msg_iter_field(&iter_); } -inline void MessageDef::const_iterator::operator++() { - return upb_msg_next(&iter_); +inline void MessageDef::const_field_iterator::operator++() { + return upb_msg_field_next(&iter_); } -inline bool MessageDef::const_iterator::operator==( - const const_iterator &other) const { +inline bool MessageDef::const_field_iterator::operator==( + const const_field_iterator &other) const { return upb_inttable_iter_isequal(&iter_, &other.iter_); } -inline bool MessageDef::const_iterator::operator!=( - const const_iterator &other) const { +inline bool MessageDef::const_field_iterator::operator!=( + const const_field_iterator &other) const { + return !(*this == other); +} + +inline MessageDef::oneof_iterator::oneof_iterator(MessageDef* md) { + upb_msg_oneof_begin(&iter_, md); +} +inline MessageDef::oneof_iterator MessageDef::oneof_iterator::end( + MessageDef* md) { + MessageDef::oneof_iterator iter(md); + upb_msg_oneof_iter_setdone(&iter.iter_); + return iter; +} +inline OneofDef* MessageDef::oneof_iterator::operator*() const { + return upb_msg_iter_oneof(&iter_); +} +inline void MessageDef::oneof_iterator::operator++() { + return upb_msg_oneof_next(&iter_); +} +inline bool MessageDef::oneof_iterator::operator==( + const oneof_iterator &other) const { + return upb_strtable_iter_isequal(&iter_, &other.iter_); +} +inline bool MessageDef::oneof_iterator::operator!=( + const oneof_iterator &other) const { + return !(*this == other); +} + +inline MessageDef::const_oneof_iterator::const_oneof_iterator( + const MessageDef* md) { + upb_msg_oneof_begin(&iter_, md); +} +inline MessageDef::const_oneof_iterator MessageDef::const_oneof_iterator::end( + const MessageDef *md) { + MessageDef::const_oneof_iterator iter(md); + upb_msg_oneof_iter_setdone(&iter.iter_); + return iter; +} +inline const OneofDef* MessageDef::const_oneof_iterator::operator*() const { + return upb_msg_iter_oneof(&iter_); +} +inline void MessageDef::const_oneof_iterator::operator++() { + return upb_msg_oneof_next(&iter_); +} +inline bool MessageDef::const_oneof_iterator::operator==( + const const_oneof_iterator &other) const { + return upb_strtable_iter_isequal(&iter_, &other.iter_); +} +inline bool MessageDef::const_oneof_iterator::operator!=( + const const_oneof_iterator &other) const { return !(*this == other); } @@ -2495,6 +2923,105 @@ inline const char* EnumDef::Iterator::name() { } inline bool EnumDef::Iterator::Done() { return upb_enum_done(&iter_); } inline void EnumDef::Iterator::Next() { return upb_enum_next(&iter_); } + +inline reffed_ptr OneofDef::New() { + upb_oneofdef *o = upb_oneofdef_new(&o); + return reffed_ptr(o, &o); +} +inline bool OneofDef::IsFrozen() const { return upb_oneofdef_isfrozen(this); } +inline void OneofDef::Ref(const void* owner) const { + return upb_oneofdef_ref(this, owner); +} +inline void OneofDef::Unref(const void* owner) const { + return upb_oneofdef_unref(this, owner); +} +inline void OneofDef::DonateRef(const void* from, const void* to) const { + return upb_oneofdef_donateref(this, from, to); +} +inline void OneofDef::CheckRef(const void* owner) const { + return upb_oneofdef_checkref(this, owner); +} +inline const char* OneofDef::full_name() const { + return upb_oneofdef_name(this); +} + +inline const MessageDef* OneofDef::containing_type() const { + return upb_oneofdef_containingtype(this); +} +inline const char* OneofDef::name() const { + return upb_oneofdef_name(this); +} +inline bool OneofDef::set_name(const char* name, Status* s) { + return upb_oneofdef_setname(this, name, s); +} +inline int OneofDef::field_count() const { + return upb_oneofdef_numfields(this); +} +inline bool OneofDef::AddField(FieldDef* field, Status* s) { + return upb_oneofdef_addfield(this, field, NULL, s); +} +inline bool OneofDef::AddField(const reffed_ptr& field, Status* s) { + return upb_oneofdef_addfield(this, field.get(), NULL, s); +} +inline const FieldDef* OneofDef::FindFieldByName(const char* name, + size_t len) const { + return upb_oneofdef_ntof(this, name, len); +} +inline const FieldDef* OneofDef::FindFieldByNumber(uint32_t num) const { + return upb_oneofdef_itof(this, num); +} +inline OneofDef::iterator OneofDef::begin() { return iterator(this); } +inline OneofDef::iterator OneofDef::end() { return iterator::end(this); } +inline OneofDef::const_iterator OneofDef::begin() const { + return const_iterator(this); +} +inline OneofDef::const_iterator OneofDef::end() const { + return const_iterator::end(this); +} + +inline OneofDef::iterator::iterator(OneofDef* o) { + upb_oneof_begin(&iter_, o); +} +inline OneofDef::iterator OneofDef::iterator::end(OneofDef* o) { + OneofDef::iterator iter(o); + upb_oneof_iter_setdone(&iter.iter_); + return iter; +} +inline FieldDef* OneofDef::iterator::operator*() const { + return upb_oneof_iter_field(&iter_); +} +inline void OneofDef::iterator::operator++() { return upb_oneof_next(&iter_); } +inline bool OneofDef::iterator::operator==(const iterator &other) const { + return upb_inttable_iter_isequal(&iter_, &other.iter_); +} +inline bool OneofDef::iterator::operator!=(const iterator &other) const { + return !(*this == other); +} + +inline OneofDef::const_iterator::const_iterator(const OneofDef* md) { + upb_oneof_begin(&iter_, md); +} +inline OneofDef::const_iterator OneofDef::const_iterator::end( + const OneofDef *md) { + OneofDef::const_iterator iter(md); + upb_oneof_iter_setdone(&iter.iter_); + return iter; +} +inline const FieldDef* OneofDef::const_iterator::operator*() const { + return upb_msg_iter_field(&iter_); +} +inline void OneofDef::const_iterator::operator++() { + return upb_oneof_next(&iter_); +} +inline bool OneofDef::const_iterator::operator==( + const const_iterator &other) const { + return upb_inttable_iter_isequal(&iter_, &other.iter_); +} +inline bool OneofDef::const_iterator::operator!=( + const const_iterator &other) const { + return !(*this == other); +} + } // namespace upb #endif diff --git a/ruby/tests/basic.rb b/ruby/tests/basic.rb index 926550334f..ceeaa8ada2 100644 --- a/ruby/tests/basic.rb +++ b/ruby/tests/basic.rb @@ -73,6 +73,15 @@ module BasicTest optional :key, :string, 1 optional :value, :message, 2, "TestMessage2" end + + add_message "OneofMessage" do + oneof :my_oneof do + optional :a, :string, 1 + optional :b, :int32, 2 + optional :c, :message, 3, "TestMessage2" + optional :d, :enum, 4, "TestEnum" + end + end end TestMessage = pool.lookup("TestMessage").msgclass @@ -87,6 +96,7 @@ module BasicTest pool.lookup("MapMessageWireEquiv_entry1").msgclass MapMessageWireEquiv_entry2 = pool.lookup("MapMessageWireEquiv_entry2").msgclass + OneofMessage = pool.lookup("OneofMessage").msgclass # ------------ test cases --------------- @@ -582,6 +592,80 @@ module BasicTest "b" => TestMessage2.new(:foo => 2)} end + def test_oneof_descriptors + d = OneofMessage.descriptor + o = d.lookup_oneof("my_oneof") + assert o != nil + assert o.class == Google::Protobuf::OneofDescriptor + assert o.name == "my_oneof" + assert d.oneofs == [o] + assert o.count == 4 + field_names = o.map{|f| f.name}.sort + assert field_names == ["a", "b", "c", "d"] + end + + def test_oneof + d = OneofMessage.new + assert d.a == nil + assert d.b == nil + assert d.c == nil + assert d.d == nil + + d.a = "hi" + assert d.a == "hi" + assert d.b == nil + assert d.c == nil + assert d.d == nil + + d.b = 42 + assert d.a == nil + assert d.b == 42 + assert d.c == nil + assert d.d == nil + + d.c = TestMessage2.new(:foo => 100) + assert d.a == nil + assert d.b == nil + assert d.c.foo == 100 + assert d.d == nil + + d.d = :C + assert d.a == nil + assert d.b == nil + assert d.c == nil + assert d.d == :C + + d2 = OneofMessage.decode(OneofMessage.encode(d)) + assert d2 == d + + encoded_field_a = OneofMessage.encode(OneofMessage.new(:a => "string")) + encoded_field_b = OneofMessage.encode(OneofMessage.new(:b => 1000)) + encoded_field_c = OneofMessage.encode( + OneofMessage.new(:c => TestMessage2.new(:foo => 1))) + encoded_field_d = OneofMessage.encode(OneofMessage.new(:d => :B)) + + d3 = OneofMessage.decode( + encoded_field_c + encoded_field_a + encoded_field_d) + assert d3.a == nil + assert d3.b == nil + assert d3.c == nil + assert d3.d == :B + + d4 = OneofMessage.decode( + encoded_field_c + encoded_field_a + encoded_field_d + + encoded_field_c) + assert d4.a == nil + assert d4.b == nil + assert d4.c.foo == 1 + assert d4.d == nil + + d5 = OneofMessage.new(:a => "hello") + assert d5.a != nil + d5.a = nil + assert d5.a == nil + assert OneofMessage.encode(d5) == '' + end + def test_enum_field m = TestMessage.new assert m.optional_enum == :Default @@ -621,6 +705,14 @@ module BasicTest assert m.repeated_msg[0].object_id != m2.repeated_msg[0].object_id end + def test_eq + m = TestMessage.new(:optional_int32 => 42, + :repeated_int32 => [1, 2, 3]) + m2 = TestMessage.new(:optional_int32 => 43, + :repeated_int32 => [1, 2, 3]) + assert m != m2 + end + def test_enum_lookup assert TestEnum::A == 1 assert TestEnum::B == 2 From 7c4bbf07a5eee1fd4f5c2d60be32b95c35b0ed1d Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Wed, 14 Jan 2015 14:56:17 -0800 Subject: [PATCH 02/10] Support oneofs in the Ruby code generator. --- .../protobuf/compiler/ruby/ruby_generator.cc | 64 +++++++++++++------ 1 file changed, 46 insertions(+), 18 deletions(-) diff --git a/src/google/protobuf/compiler/ruby/ruby_generator.cc b/src/google/protobuf/compiler/ruby/ruby_generator.cc index c568790393..3101c524a0 100644 --- a/src/google/protobuf/compiler/ruby/ruby_generator.cc +++ b/src/google/protobuf/compiler/ruby/ruby_generator.cc @@ -100,6 +100,45 @@ std::string TypeName(const google::protobuf::FieldDescriptor* field) { } } +void GenerateField(const google::protobuf::FieldDescriptor* field, + google::protobuf::io::Printer* printer) { + printer->Print( + "$label$ :$name$, ", + "label", LabelForField(field), + "name", field->name()); + printer->Print( + ":$type$, $number$", + "type", TypeName(field), + "number", IntToString(field->number())); + if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) { + printer->Print( + ", \"$subtype$\"\n", + "subtype", field->message_type()->full_name()); + } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_ENUM) { + printer->Print( + ", \"$subtype$\"\n", + "subtype", field->enum_type()->full_name()); + } else { + printer->Print("\n"); + } +} + +void GenerateOneof(const google::protobuf::OneofDescriptor* oneof, + google::protobuf::io::Printer* printer) { + printer->Print( + "oneof :$name$ do\n", + "name", oneof->name()); + printer->Indent(); + + for (int i = 0; i < oneof->field_count(); i++) { + const FieldDescriptor* field = oneof->field(i); + GenerateField(field, printer); + } + + printer->Outdent(); + printer->Print("end\n"); +} + void GenerateMessage(const google::protobuf::Descriptor* message, google::protobuf::io::Printer* printer) { printer->Print( @@ -109,27 +148,16 @@ void GenerateMessage(const google::protobuf::Descriptor* message, for (int i = 0; i < message->field_count(); i++) { const FieldDescriptor* field = message->field(i); - printer->Print( - "$label$ :$name$, ", - "label", LabelForField(field), - "name", field->name()); - printer->Print( - ":$type$, $number$", - "type", TypeName(field), - "number", IntToString(field->number())); - if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) { - printer->Print( - ", \"$subtype$\"\n", - "subtype", field->message_type()->full_name()); - } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_ENUM) { - printer->Print( - ", \"$subtype$\"\n", - "subtype", field->enum_type()->full_name()); - } else { - printer->Print("\n"); + if (!field->containing_oneof()) { + GenerateField(field, printer); } } + for (int i = 0; i < message->oneof_decl_count(); i++) { + const OneofDescriptor* oneof = message->oneof_decl(i); + GenerateOneof(oneof, printer); + } + printer->Outdent(); printer->Print("end\n"); From 3f3820d8f8b5c0b67aadc25ad5a2e728b6a3fe79 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Wed, 14 Jan 2015 15:44:46 -0800 Subject: [PATCH 03/10] Two tests for Ruby code generator: - A golden-file test that ensures protoc produces known-valid output. - A Ruby test that loads that golden file and ensures it actually works with the extension. This split strategy allows us to test end-to-end without needing to integrate the Ruby gem build system and the protoc build system. This is desirable because we do not want a gem build/install to depend on building protoc, and we do not want building protoc to depend on building and testing the gem. --- ruby/google-protobuf.gemspec | 4 +- ruby/tests/generated_code.proto | 67 ++++++++++ ruby/tests/generated_code.rb | 124 ++++++++++++++++++ ruby/tests/generated_code_test.rb | 17 +++ src/Makefile.am | 1 + .../compiler/ruby/ruby_generator_unittest.cc | 116 ++++++++++++++++ 6 files changed, 328 insertions(+), 1 deletion(-) create mode 100644 ruby/tests/generated_code.proto create mode 100644 ruby/tests/generated_code.rb create mode 100644 ruby/tests/generated_code_test.rb create mode 100644 src/google/protobuf/compiler/ruby/ruby_generator_unittest.cc diff --git a/ruby/google-protobuf.gemspec b/ruby/google-protobuf.gemspec index 87033ac4e5..7bfa533c6e 100644 --- a/ruby/google-protobuf.gemspec +++ b/ruby/google-protobuf.gemspec @@ -18,5 +18,7 @@ Gem::Specification.new do |s| s.files = ["lib/google/protobuf.rb"] + # extension C source find_c_source("ext/google/protobuf_c") - s.test_files = `git ls-files -- tests`.split + s.test_files = ["tests/basic.rb", + "tests/stress.rb", + "tests/generated_code_test.rb"] end diff --git a/ruby/tests/generated_code.proto b/ruby/tests/generated_code.proto new file mode 100644 index 0000000000..b1d6323243 --- /dev/null +++ b/ruby/tests/generated_code.proto @@ -0,0 +1,67 @@ +syntax = "proto3"; + +package A.B.C; + +message TestMessage { + optional int32 optional_int32 = 1; + optional int64 optional_int64 = 2; + optional uint32 optional_uint32 = 3; + optional uint64 optional_uint64 = 4; + optional bool optional_bool = 5; + optional double optional_double = 6; + optional float optional_float = 7; + optional string optional_string = 8; + optional bytes optional_bytes = 9; + optional TestEnum optional_enum = 10; + optional TestMessage optional_msg = 11; + + repeated int32 repeated_int32 = 21; + repeated int64 repeated_int64 = 22; + repeated uint32 repeated_uint32 = 23; + repeated uint64 repeated_uint64 = 24; + repeated bool repeated_bool = 25; + repeated double repeated_double = 26; + repeated float repeated_float = 27; + repeated string repeated_string = 28; + repeated bytes repeated_bytes = 29; + repeated TestEnum repeated_enum = 30; + repeated TestMessage repeated_msg = 31; + + oneof my_oneof { + int32 oneof_int32 = 41; + int64 oneof_int64 = 42; + uint32 oneof_uint32 = 43; + uint64 oneof_uint64 = 44; + bool oneof_bool = 45; + double oneof_double = 46; + float oneof_float = 47; + string oneof_string = 48; + bytes oneof_bytes = 49; + TestEnum oneof_enum = 50; + TestMessage oneof_msg = 51; + } + + map map_int32_string = 61; + map map_int64_string = 62; + map map_uint32_string = 63; + map map_uint64_string = 64; + map map_bool_string = 65; + map map_string_string = 66; + map map_string_msg = 67; + map map_string_enum = 68; + map map_string_int32 = 69; + map map_string_bool = 70; + + message NestedMessage { + optional int32 foo = 1; + } + + optional NestedMessage nested_message = 80; +} + +enum TestEnum { + Default = 0; + A = 1; + B = 2; + C = 3; +} diff --git a/ruby/tests/generated_code.rb b/ruby/tests/generated_code.rb new file mode 100644 index 0000000000..db762ad9f2 --- /dev/null +++ b/ruby/tests/generated_code.rb @@ -0,0 +1,124 @@ +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: generated_code.proto + +require 'google/protobuf' + +Google::Protobuf::DescriptorPool.generated_pool.build do + add_message "A.B.C.TestMessage" do + optional :optional_int32, :int32, 1 + optional :optional_int64, :int64, 2 + optional :optional_uint32, :uint32, 3 + optional :optional_uint64, :uint64, 4 + optional :optional_bool, :bool, 5 + optional :optional_double, :double, 6 + optional :optional_float, :float, 7 + optional :optional_string, :string, 8 + optional :optional_bytes, :string, 9 + optional :optional_enum, :enum, 10, "A.B.C.TestEnum" + optional :optional_msg, :message, 11, "A.B.C.TestMessage" + repeated :repeated_int32, :int32, 21 + repeated :repeated_int64, :int64, 22 + repeated :repeated_uint32, :uint32, 23 + repeated :repeated_uint64, :uint64, 24 + repeated :repeated_bool, :bool, 25 + repeated :repeated_double, :double, 26 + repeated :repeated_float, :float, 27 + repeated :repeated_string, :string, 28 + repeated :repeated_bytes, :string, 29 + repeated :repeated_enum, :enum, 30, "A.B.C.TestEnum" + repeated :repeated_msg, :message, 31, "A.B.C.TestMessage" + repeated :map_int32_string, :message, 61, "A.B.C.TestMessage.MapInt32StringEntry" + repeated :map_int64_string, :message, 62, "A.B.C.TestMessage.MapInt64StringEntry" + repeated :map_uint32_string, :message, 63, "A.B.C.TestMessage.MapUint32StringEntry" + repeated :map_uint64_string, :message, 64, "A.B.C.TestMessage.MapUint64StringEntry" + repeated :map_bool_string, :message, 65, "A.B.C.TestMessage.MapBoolStringEntry" + repeated :map_string_string, :message, 66, "A.B.C.TestMessage.MapStringStringEntry" + repeated :map_string_msg, :message, 67, "A.B.C.TestMessage.MapStringMsgEntry" + repeated :map_string_enum, :message, 68, "A.B.C.TestMessage.MapStringEnumEntry" + repeated :map_string_int32, :message, 69, "A.B.C.TestMessage.MapStringInt32Entry" + repeated :map_string_bool, :message, 70, "A.B.C.TestMessage.MapStringBoolEntry" + optional :nested_message, :message, 80, "A.B.C.TestMessage.NestedMessage" + oneof :my_oneof do + optional :oneof_int32, :int32, 41 + optional :oneof_int64, :int64, 42 + optional :oneof_uint32, :uint32, 43 + optional :oneof_uint64, :uint64, 44 + optional :oneof_bool, :bool, 45 + optional :oneof_double, :double, 46 + optional :oneof_float, :float, 47 + optional :oneof_string, :string, 48 + optional :oneof_bytes, :string, 49 + optional :oneof_enum, :enum, 50, "A.B.C.TestEnum" + optional :oneof_msg, :message, 51, "A.B.C.TestMessage" + end + end + add_message "A.B.C.TestMessage.MapInt32StringEntry" do + optional :key, :int32, 1 + optional :value, :string, 2 + end + add_message "A.B.C.TestMessage.MapInt64StringEntry" do + optional :key, :int64, 1 + optional :value, :string, 2 + end + add_message "A.B.C.TestMessage.MapUint32StringEntry" do + optional :key, :uint32, 1 + optional :value, :string, 2 + end + add_message "A.B.C.TestMessage.MapUint64StringEntry" do + optional :key, :uint64, 1 + optional :value, :string, 2 + end + add_message "A.B.C.TestMessage.MapBoolStringEntry" do + optional :key, :bool, 1 + optional :value, :string, 2 + end + add_message "A.B.C.TestMessage.MapStringStringEntry" do + optional :key, :string, 1 + optional :value, :string, 2 + end + add_message "A.B.C.TestMessage.MapStringMsgEntry" do + optional :key, :string, 1 + optional :value, :message, 2, "A.B.C.TestMessage" + end + add_message "A.B.C.TestMessage.MapStringEnumEntry" do + optional :key, :string, 1 + optional :value, :enum, 2, "A.B.C.TestEnum" + end + add_message "A.B.C.TestMessage.MapStringInt32Entry" do + optional :key, :string, 1 + optional :value, :int32, 2 + end + add_message "A.B.C.TestMessage.MapStringBoolEntry" do + optional :key, :string, 1 + optional :value, :bool, 2 + end + add_message "A.B.C.TestMessage.NestedMessage" do + optional :foo, :int32, 1 + end + add_enum "A.B.C.TestEnum" do + value :Default, 0 + value :A, 1 + value :B, 2 + value :C, 3 + end +end + +module A + module B + module C + TestMessage = Google::Protobuf::DescriptorPool.generated_pool.lookup("A.B.C.TestMessage").msgclass + TestMessage::MapInt32StringEntry = Google::Protobuf::DescriptorPool.generated_pool.lookup("A.B.C.TestMessage.MapInt32StringEntry").msgclass + TestMessage::MapInt64StringEntry = Google::Protobuf::DescriptorPool.generated_pool.lookup("A.B.C.TestMessage.MapInt64StringEntry").msgclass + TestMessage::MapUint32StringEntry = Google::Protobuf::DescriptorPool.generated_pool.lookup("A.B.C.TestMessage.MapUint32StringEntry").msgclass + TestMessage::MapUint64StringEntry = Google::Protobuf::DescriptorPool.generated_pool.lookup("A.B.C.TestMessage.MapUint64StringEntry").msgclass + TestMessage::MapBoolStringEntry = Google::Protobuf::DescriptorPool.generated_pool.lookup("A.B.C.TestMessage.MapBoolStringEntry").msgclass + TestMessage::MapStringStringEntry = Google::Protobuf::DescriptorPool.generated_pool.lookup("A.B.C.TestMessage.MapStringStringEntry").msgclass + TestMessage::MapStringMsgEntry = Google::Protobuf::DescriptorPool.generated_pool.lookup("A.B.C.TestMessage.MapStringMsgEntry").msgclass + TestMessage::MapStringEnumEntry = Google::Protobuf::DescriptorPool.generated_pool.lookup("A.B.C.TestMessage.MapStringEnumEntry").msgclass + TestMessage::MapStringInt32Entry = Google::Protobuf::DescriptorPool.generated_pool.lookup("A.B.C.TestMessage.MapStringInt32Entry").msgclass + TestMessage::MapStringBoolEntry = Google::Protobuf::DescriptorPool.generated_pool.lookup("A.B.C.TestMessage.MapStringBoolEntry").msgclass + TestMessage::NestedMessage = Google::Protobuf::DescriptorPool.generated_pool.lookup("A.B.C.TestMessage.NestedMessage").msgclass + TestEnum = Google::Protobuf::DescriptorPool.generated_pool.lookup("A.B.C.TestEnum").enummodule + end + end +end diff --git a/ruby/tests/generated_code_test.rb b/ruby/tests/generated_code_test.rb new file mode 100644 index 0000000000..daef357a10 --- /dev/null +++ b/ruby/tests/generated_code_test.rb @@ -0,0 +1,17 @@ +#!/usr/bin/ruby + +# generated_code.rb is in the same directory as this test. +$LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__))) + +require 'generated_code' +require 'test/unit' + +class GeneratedCodeTest < Test::Unit::TestCase + def test_generated_msg + # just test that we can instantiate the message. The purpose of this test + # is to ensure that the output of the code generator is valid Ruby and + # successfully creates message definitions and classes, not to test every + # aspect of the extension (basic.rb is for that). + m = A::B::C::TestMessage.new() + end +end diff --git a/src/Makefile.am b/src/Makefile.am index 3a469fd7de..6fd8bd2baf 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -458,6 +458,7 @@ protobuf_test_SOURCES = \ google/protobuf/compiler/java/java_plugin_unittest.cc \ google/protobuf/compiler/java/java_doc_comment_unittest.cc \ google/protobuf/compiler/python/python_plugin_unittest.cc \ + google/protobuf/compiler/ruby/ruby_generator_unittest.cc \ $(COMMON_TEST_SOURCES) nodist_protobuf_test_SOURCES = $(protoc_outputs) diff --git a/src/google/protobuf/compiler/ruby/ruby_generator_unittest.cc b/src/google/protobuf/compiler/ruby/ruby_generator_unittest.cc new file mode 100644 index 0000000000..971fb739c4 --- /dev/null +++ b/src/google/protobuf/compiler/ruby/ruby_generator_unittest.cc @@ -0,0 +1,116 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2014 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include + +#include +#include +#include +#include + +#include +#include +#include + +namespace google { +namespace protobuf { +namespace compiler { +namespace python { +namespace { + +class TestGenerator : public CodeGenerator { + public: + TestGenerator() {} + ~TestGenerator() {} + + virtual bool Generate(const FileDescriptor* file, + const string& parameter, + GeneratorContext* context, + string* error) const { + TryInsert("test_pb2.py", "imports", context); + TryInsert("test_pb2.py", "module_scope", context); + TryInsert("test_pb2.py", "class_scope:foo.Bar", context); + TryInsert("test_pb2.py", "class_scope:foo.Bar.Baz", context); + return true; + } + + void TryInsert(const string& filename, const string& insertion_point, + GeneratorContext* context) const { + google::protobuf::scoped_ptr output( + context->OpenForInsert(filename, insertion_point)); + io::Printer printer(output.get(), '$'); + printer.Print("// inserted $name$\n", "name", insertion_point); + } +}; + +// This test is a simple golden-file test over the output of the Ruby code +// generator. When we make changes to the Ruby extension and alter the Ruby code +// generator to use those changes, we should (i) manually test the output of the +// code generator with the extension, and (ii) update the golden output above. +// Some day, we may integrate build systems between protoc and the language +// extensions to the point where we can do this test in a more automated way. + +TEST(RubyGeneratorTest, GeneratorTest) { + google::protobuf::compiler::CommandLineInterface cli; + cli.SetInputsAreProtoPathRelative(true); + + ruby::Generator ruby_generator; + cli.RegisterGenerator("--ruby_out", &ruby_generator, ""); + + string path_arg = "-I" + TestSourceDir() + "/ruby/tests"; + string ruby_out = "--ruby_out=" + TestTempDir(); + const char* argv[] = { + "protoc", + path_arg.c_str(), + ruby_out.c_str(), + "generated_code.proto", + }; + + EXPECT_EQ(0, cli.Run(4, argv)); + + string output; + GOOGLE_CHECK_OK(File::GetContents(TestTempDir() + "/generated_code.rb", + &output, + true)); + + string expected_output; + GOOGLE_CHECK_OK(File::GetContents( + TestSourceDir() + "/ruby/tests/generated_code.rb", + &expected_output, + true)); + + EXPECT_EQ(expected_output, output); +} + +} // namespace +} // namespace python +} // namespace compiler +} // namespace protobuf +} // namespace google From b0670ddae761914515850c1a2db90dfb374a75ea Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Wed, 14 Jan 2015 16:45:47 -0800 Subject: [PATCH 04/10] Fix golden-file Ruby test to work with out-of-tree builds. --- Makefile.am | 6 +- .../compiler/ruby/ruby_generator_unittest.cc | 69 ++++++++++--------- 2 files changed, 41 insertions(+), 34 deletions(-) diff --git a/Makefile.am b/Makefile.am index 566b28501f..1e11e37142 100644 --- a/Makefile.am +++ b/Makefile.am @@ -245,6 +245,7 @@ ruby_EXTRA_DIST= \ ruby/ext/google/protobuf_c/defs.c \ ruby/ext/google/protobuf_c/encode_decode.c \ ruby/ext/google/protobuf_c/extconf.rb \ + ruby/ext/google/protobuf_c/map.c \ ruby/ext/google/protobuf_c/message.c \ ruby/ext/google/protobuf_c/protobuf.c \ ruby/ext/google/protobuf_c/protobuf.h \ @@ -255,7 +256,10 @@ ruby_EXTRA_DIST= \ ruby/google-protobuf.gemspec \ ruby/lib/google/protobuf.rb \ ruby/tests/basic.rb \ - ruby/tests/stress.rb + ruby/tests/stress.rb \ + ruby/tests/generated_code.proto \ + ruby/tests/generated_code.rb \ + ruby/tests/generated_code_test.rb all_EXTRA_DIST=$(java_EXTRA_DIST) $(python_EXTRA_DIST) $(ruby_EXTRA_DIST) diff --git a/src/google/protobuf/compiler/ruby/ruby_generator_unittest.cc b/src/google/protobuf/compiler/ruby/ruby_generator_unittest.cc index 971fb739c4..e35ca695b5 100644 --- a/src/google/protobuf/compiler/ruby/ruby_generator_unittest.cc +++ b/src/google/protobuf/compiler/ruby/ruby_generator_unittest.cc @@ -42,33 +42,22 @@ namespace google { namespace protobuf { namespace compiler { -namespace python { +namespace ruby { namespace { -class TestGenerator : public CodeGenerator { - public: - TestGenerator() {} - ~TestGenerator() {} - - virtual bool Generate(const FileDescriptor* file, - const string& parameter, - GeneratorContext* context, - string* error) const { - TryInsert("test_pb2.py", "imports", context); - TryInsert("test_pb2.py", "module_scope", context); - TryInsert("test_pb2.py", "class_scope:foo.Bar", context); - TryInsert("test_pb2.py", "class_scope:foo.Bar.Baz", context); - return true; - } - - void TryInsert(const string& filename, const string& insertion_point, - GeneratorContext* context) const { - google::protobuf::scoped_ptr output( - context->OpenForInsert(filename, insertion_point)); - io::Printer printer(output.get(), '$'); - printer.Print("// inserted $name$\n", "name", insertion_point); +string FindRubyTestDir() { + // Inspired by TestSourceDir() in src/google/protobuf/testing/googletest.cc. + string prefix = "."; + while (!File::Exists(prefix + "/ruby/tests")) { + if (!File::Exists(prefix)) { + GOOGLE_LOG(FATAL) + << "Could not find Ruby test directory. Please run tests from " + "somewhere within the protobuf source package."; + } + prefix += "/.."; } -}; + return prefix + "/ruby/tests"; +} // This test is a simple golden-file test over the output of the Ruby code // generator. When we make changes to the Ruby extension and alter the Ruby code @@ -78,39 +67,53 @@ class TestGenerator : public CodeGenerator { // extensions to the point where we can do this test in a more automated way. TEST(RubyGeneratorTest, GeneratorTest) { + string ruby_tests = FindRubyTestDir(); + google::protobuf::compiler::CommandLineInterface cli; cli.SetInputsAreProtoPathRelative(true); ruby::Generator ruby_generator; cli.RegisterGenerator("--ruby_out", &ruby_generator, ""); - string path_arg = "-I" + TestSourceDir() + "/ruby/tests"; + // Copy generated_code.proto to the temporary test directory. + string test_input; + GOOGLE_CHECK_OK(File::GetContents( + ruby_tests + "/generated_code.proto", + &test_input, + true)); + GOOGLE_CHECK_OK(File::SetContents( + TestTempDir() + "/generated_code.proto", + test_input, + true)); + + // Invoke the proto compiler (we will be inside TestTempDir() at this point). string ruby_out = "--ruby_out=" + TestTempDir(); + string proto_path = "--proto_path=" + TestTempDir(); const char* argv[] = { "protoc", - path_arg.c_str(), ruby_out.c_str(), + proto_path.c_str(), "generated_code.proto", }; EXPECT_EQ(0, cli.Run(4, argv)); + // Load the generated output and compare to the expected result. string output; - GOOGLE_CHECK_OK(File::GetContents(TestTempDir() + "/generated_code.rb", - &output, - true)); - + GOOGLE_CHECK_OK(File::GetContents( + TestTempDir() + "/generated_code.rb", + &output, + true)); string expected_output; GOOGLE_CHECK_OK(File::GetContents( - TestSourceDir() + "/ruby/tests/generated_code.rb", + ruby_tests + "/generated_code.rb", &expected_output, true)); - EXPECT_EQ(expected_output, output); } } // namespace -} // namespace python +} // namespace ruby } // namespace compiler } // namespace protobuf } // namespace google From e1b7d38d9ad5dfd3719b1e9b1d588e08aba1afe8 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Wed, 14 Jan 2015 17:14:05 -0800 Subject: [PATCH 05/10] Addressed code-review comments. --- ruby/ext/google/protobuf_c/defs.c | 14 ++-- ruby/ext/google/protobuf_c/encode_decode.c | 29 +++++--- ruby/ext/google/protobuf_c/protobuf.h | 2 +- ruby/ext/google/protobuf_c/storage.c | 78 +++++++++++----------- ruby/tests/basic.rb | 7 +- 5 files changed, 71 insertions(+), 59 deletions(-) diff --git a/ruby/ext/google/protobuf_c/defs.c b/ruby/ext/google/protobuf_c/defs.c index 371939aeef..446a5586d1 100644 --- a/ruby/ext/google/protobuf_c/defs.c +++ b/ruby/ext/google/protobuf_c/defs.c @@ -287,7 +287,7 @@ void Descriptor_register(VALUE module) { rb_define_method(klass, "lookup", Descriptor_lookup, 1); rb_define_method(klass, "add_field", Descriptor_add_field, 1); rb_define_method(klass, "add_oneof", Descriptor_add_oneof, 1); - rb_define_method(klass, "oneofs", Descriptor_oneofs, 0); + rb_define_method(klass, "each_oneof", Descriptor_each_oneof, 0); rb_define_method(klass, "lookup_oneof", Descriptor_lookup_oneof, 1); rb_define_method(klass, "msgclass", Descriptor_msgclass, 0); rb_define_method(klass, "name", Descriptor_name, 0); @@ -409,23 +409,23 @@ VALUE Descriptor_add_oneof(VALUE _self, VALUE obj) { /* * call-seq: - * Descriptor.oneofs => list of OneofDescriptors + * Descriptor.each_oneof(&block) => nil * - * Returns a list of OneofDescriptors that are part of this message type. + * Invokes the given block for each oneof in this message type, passing the + * corresponding OneofDescriptor. */ -VALUE Descriptor_oneofs(VALUE _self) { +VALUE Descriptor_each_oneof(VALUE _self) { DEFINE_SELF(Descriptor, self, _self); - VALUE ret = rb_ary_new(); upb_msg_oneof_iter it; for (upb_msg_oneof_begin(&it, self->msgdef); !upb_msg_oneof_done(&it); upb_msg_oneof_next(&it)) { const upb_oneofdef* oneof = upb_msg_iter_oneof(&it); VALUE obj = get_def_obj(oneof); - rb_ary_push(ret, obj); + rb_yield(obj); } - return ret; + return Qnil; } /* diff --git a/ruby/ext/google/protobuf_c/encode_decode.c b/ruby/ext/google/protobuf_c/encode_decode.c index 26a038b6f6..5e53b694d9 100644 --- a/ruby/ext/google/protobuf_c/encode_decode.c +++ b/ruby/ext/google/protobuf_c/encode_decode.c @@ -60,10 +60,10 @@ static const void *newsubmsghandlerdata(upb_handlers* h, uint32_t ofs, } typedef struct { - size_t ofs; // union data slot - size_t case_ofs; // oneof_case field - uint32_t tag; // tag number to place in data slot - const upb_msgdef *md; // msgdef, for oneof submessage handler + size_t ofs; // union data slot + size_t case_ofs; // oneof_case field + uint32_t oneof_case_num; // oneof-case number to place in oneof_case field + const upb_msgdef *md; // msgdef, for oneof submessage handler } oneof_handlerdata_t; static const void *newoneofhandlerdata(upb_handlers *h, @@ -73,7 +73,12 @@ static const void *newoneofhandlerdata(upb_handlers *h, oneof_handlerdata_t *hd = ALLOC(oneof_handlerdata_t); hd->ofs = ofs; hd->case_ofs = case_ofs; - hd->tag = upb_fielddef_number(f); + // We reuse the field tag number as a oneof union discriminant tag. Note that + // we don't expose these numbers to the user, so the only requirement is that + // we have some unique ID for each union case/possibility. The field tag + // numbers are already present and are easy to use so there's no reason to + // create a separate ID space. + hd->oneof_case_num = upb_fielddef_number(f); if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE) { hd->md = upb_fielddef_msgsubdef(f); } else { @@ -294,7 +299,8 @@ static map_handlerdata_t* new_map_handlerdata( static bool oneof##type##_handler(void *closure, const void *hd, \ ctype val) { \ const oneof_handlerdata_t *oneofdata = hd; \ - DEREF(closure, oneofdata->case_ofs, uint32_t) = oneofdata->tag; \ + DEREF(closure, oneofdata->case_ofs, uint32_t) = \ + oneofdata->oneof_case_num; \ DEREF(closure, oneofdata->ofs, ctype) = val; \ return true; \ } @@ -317,7 +323,8 @@ static void *oneofstr_handler(void *closure, const oneof_handlerdata_t *oneofdata = hd; VALUE str = rb_str_new2(""); rb_enc_associate(str, kRubyStringUtf8Encoding); - DEREF(msg, oneofdata->case_ofs, uint32_t) = oneofdata->tag; + DEREF(msg, oneofdata->case_ofs, uint32_t) = + oneofdata->oneof_case_num; DEREF(msg, oneofdata->ofs, VALUE) = str; return (void*)str; } @@ -329,7 +336,8 @@ static void *oneofbytes_handler(void *closure, const oneof_handlerdata_t *oneofdata = hd; VALUE str = rb_str_new2(""); rb_enc_associate(str, kRubyString8bitEncoding); - DEREF(msg, oneofdata->case_ofs, uint32_t) = oneofdata->tag; + DEREF(msg, oneofdata->case_ofs, uint32_t) = + oneofdata->oneof_case_num; DEREF(msg, oneofdata->ofs, VALUE) = str; return (void*)str; } @@ -340,13 +348,14 @@ static void *oneofsubmsg_handler(void *closure, MessageHeader* msg = closure; const oneof_handlerdata_t *oneofdata = hd; uint32_t oldcase = DEREF(msg, oneofdata->case_ofs, uint32_t); - DEREF(msg, oneofdata->case_ofs, uint32_t) = oneofdata->tag; + DEREF(msg, oneofdata->case_ofs, uint32_t) = + oneofdata->oneof_case_num; VALUE subdesc = get_def_obj((void*)oneofdata->md); VALUE subklass = Descriptor_msgclass(subdesc); - if (oldcase != oneofdata->tag || + if (oldcase != oneofdata->oneof_case_num || DEREF(msg, oneofdata->ofs, VALUE) == Qnil) { DEREF(msg, oneofdata->ofs, VALUE) = rb_class_new_instance(0, NULL, subklass); diff --git a/ruby/ext/google/protobuf_c/protobuf.h b/ruby/ext/google/protobuf_c/protobuf.h index 84f318b3eb..5b94ae2607 100644 --- a/ruby/ext/google/protobuf_c/protobuf.h +++ b/ruby/ext/google/protobuf_c/protobuf.h @@ -190,7 +190,7 @@ VALUE Descriptor_each(VALUE _self); VALUE Descriptor_lookup(VALUE _self, VALUE name); VALUE Descriptor_add_field(VALUE _self, VALUE obj); VALUE Descriptor_add_oneof(VALUE _self, VALUE obj); -VALUE Descriptor_oneofs(VALUE _self); +VALUE Descriptor_each_oneof(VALUE _self); VALUE Descriptor_lookup_oneof(VALUE _self, VALUE name); VALUE Descriptor_msgclass(VALUE _self); extern const rb_data_type_t _Descriptor_type; diff --git a/ruby/ext/google/protobuf_c/storage.c b/ruby/ext/google/protobuf_c/storage.c index d526679a44..adb74394ff 100644 --- a/ruby/ext/google/protobuf_c/storage.c +++ b/ruby/ext/google/protobuf_c/storage.c @@ -473,13 +473,26 @@ VALUE field_type_class(const upb_fielddef* field) { return type_class; } +static void* slot_memory(MessageLayout* layout, + const void* storage, + const upb_fielddef* field) { + return ((uint8_t *)storage) + + layout->fields[upb_fielddef_index(field)].offset; +} + +static uint32_t* slot_oneof_case(MessageLayout* layout, + const void* storage, + const upb_fielddef* field) { + return (uint32_t *)(((uint8_t *)storage) + + layout->fields[upb_fielddef_index(field)].case_offset); +} + + VALUE layout_get(MessageLayout* layout, const void* storage, const upb_fielddef* field) { - void* memory = ((uint8_t *)storage) + - layout->fields[upb_fielddef_index(field)].offset; - uint32_t* oneof_case = (uint32_t *)(((uint8_t *)storage) + - layout->fields[upb_fielddef_index(field)].case_offset); + void* memory = slot_memory(layout, storage, field); + uint32_t* oneof_case = slot_oneof_case(layout, storage, field); if (upb_fielddef_containingoneof(field)) { if (*oneof_case != upb_fielddef_number(field)) { @@ -552,10 +565,8 @@ void layout_set(MessageLayout* layout, void* storage, const upb_fielddef* field, VALUE val) { - void* memory = ((uint8_t *)storage) + - layout->fields[upb_fielddef_index(field)].offset; - uint32_t* oneof_case = (uint32_t *)(((uint8_t *)storage) + - layout->fields[upb_fielddef_index(field)].case_offset); + void* memory = slot_memory(layout, storage, field); + uint32_t* oneof_case = slot_oneof_case(layout, storage, field); if (upb_fielddef_containingoneof(field)) { if (val == Qnil) { @@ -592,10 +603,8 @@ void layout_init(MessageLayout* layout, !upb_msg_field_done(&it); upb_msg_field_next(&it)) { const upb_fielddef* field = upb_msg_iter_field(&it); - void* memory = ((uint8_t *)storage) + - layout->fields[upb_fielddef_index(field)].offset; - uint32_t* oneof_case = (uint32_t *)(((uint8_t *)storage) + - layout->fields[upb_fielddef_index(field)].case_offset); + void* memory = slot_memory(layout, storage, field); + uint32_t* oneof_case = slot_oneof_case(layout, storage, field); if (upb_fielddef_containingoneof(field)) { memset(memory, 0, NATIVE_SLOT_MAX_SIZE); @@ -652,10 +661,8 @@ void layout_mark(MessageLayout* layout, void* storage) { !upb_msg_field_done(&it); upb_msg_field_next(&it)) { const upb_fielddef* field = upb_msg_iter_field(&it); - void* memory = ((uint8_t *)storage) + - layout->fields[upb_fielddef_index(field)].offset; - uint32_t* oneof_case = (uint32_t *)(((uint8_t *)storage) + - layout->fields[upb_fielddef_index(field)].case_offset); + void* memory = slot_memory(layout, storage, field); + uint32_t* oneof_case = slot_oneof_case(layout, storage, field); if (upb_fielddef_containingoneof(field)) { if (*oneof_case == upb_fielddef_number(field)) { @@ -675,14 +682,11 @@ void layout_dup(MessageLayout* layout, void* to, void* from) { !upb_msg_field_done(&it); upb_msg_field_next(&it)) { const upb_fielddef* field = upb_msg_iter_field(&it); - void* to_memory = ((uint8_t *)to) + - layout->fields[upb_fielddef_index(field)].offset; - uint32_t* to_oneof_case = (uint32_t *)(((uint8_t *)to) + - layout->fields[upb_fielddef_index(field)].case_offset); - void* from_memory = ((uint8_t *)from) + - layout->fields[upb_fielddef_index(field)].offset; - uint32_t* from_oneof_case = (uint32_t *)(((uint8_t *)from) + - layout->fields[upb_fielddef_index(field)].case_offset); + + void* to_memory = slot_memory(layout, to, field); + uint32_t* to_oneof_case = slot_oneof_case(layout, to, field); + void* from_memory = slot_memory(layout, from, field); + uint32_t* from_oneof_case = slot_oneof_case(layout, from, field); if (upb_fielddef_containingoneof(field)) { if (*from_oneof_case == upb_fielddef_number(field)) { @@ -705,14 +709,11 @@ void layout_deep_copy(MessageLayout* layout, void* to, void* from) { !upb_msg_field_done(&it); upb_msg_field_next(&it)) { const upb_fielddef* field = upb_msg_iter_field(&it); - void* to_memory = ((uint8_t *)to) + - layout->fields[upb_fielddef_index(field)].offset; - uint32_t* to_oneof_case = (uint32_t *)(((uint8_t *)to) + - layout->fields[upb_fielddef_index(field)].case_offset); - void* from_memory = ((uint8_t *)from) + - layout->fields[upb_fielddef_index(field)].offset; - uint32_t* from_oneof_case = (uint32_t *)(((uint8_t *)from) + - layout->fields[upb_fielddef_index(field)].case_offset); + + void* to_memory = slot_memory(layout, to, field); + uint32_t* to_oneof_case = slot_oneof_case(layout, to, field); + void* from_memory = slot_memory(layout, from, field); + uint32_t* from_oneof_case = slot_oneof_case(layout, from, field); if (upb_fielddef_containingoneof(field)) { if (*from_oneof_case == upb_fielddef_number(field)) { @@ -737,14 +738,11 @@ VALUE layout_eq(MessageLayout* layout, void* msg1, void* msg2) { !upb_msg_field_done(&it); upb_msg_field_next(&it)) { const upb_fielddef* field = upb_msg_iter_field(&it); - void* msg1_memory = ((uint8_t *)msg1) + - layout->fields[upb_fielddef_index(field)].offset; - uint32_t* msg1_oneof_case = (uint32_t *)(((uint8_t *)msg1) + - layout->fields[upb_fielddef_index(field)].case_offset); - void* msg2_memory = ((uint8_t *)msg2) + - layout->fields[upb_fielddef_index(field)].offset; - uint32_t* msg2_oneof_case = (uint32_t *)(((uint8_t *)msg2) + - layout->fields[upb_fielddef_index(field)].case_offset); + + void* msg1_memory = slot_memory(layout, msg1, field); + uint32_t* msg1_oneof_case = slot_oneof_case(layout, msg1, field); + void* msg2_memory = slot_memory(layout, msg2, field); + uint32_t* msg2_oneof_case = slot_oneof_case(layout, msg2, field); if (upb_fielddef_containingoneof(field)) { if (*msg1_oneof_case != *msg2_oneof_case || diff --git a/ruby/tests/basic.rb b/ruby/tests/basic.rb index ceeaa8ada2..3e99f3b078 100644 --- a/ruby/tests/basic.rb +++ b/ruby/tests/basic.rb @@ -598,7 +598,12 @@ module BasicTest assert o != nil assert o.class == Google::Protobuf::OneofDescriptor assert o.name == "my_oneof" - assert d.oneofs == [o] + oneof_count = 0 + d.each_oneof{ |oneof| + oneof_count += 1 + assert oneof == o + } + assert oneof_count == 1 assert o.count == 4 field_names = o.map{|f| f.name}.sort assert field_names == ["a", "b", "c", "d"] From e2debef5d8cd084946bd14fecabda5c328382114 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Wed, 14 Jan 2015 18:02:27 -0800 Subject: [PATCH 06/10] Ruby extension: added oneof accessor. --- ruby/ext/google/protobuf_c/encode_decode.c | 3 +- ruby/ext/google/protobuf_c/message.c | 45 ++++++++++++++++++++++ ruby/tests/basic.rb | 6 +++ 3 files changed, 53 insertions(+), 1 deletion(-) diff --git a/ruby/ext/google/protobuf_c/encode_decode.c b/ruby/ext/google/protobuf_c/encode_decode.c index 5e53b694d9..0db8620929 100644 --- a/ruby/ext/google/protobuf_c/encode_decode.c +++ b/ruby/ext/google/protobuf_c/encode_decode.c @@ -77,7 +77,8 @@ static const void *newoneofhandlerdata(upb_handlers *h, // we don't expose these numbers to the user, so the only requirement is that // we have some unique ID for each union case/possibility. The field tag // numbers are already present and are easy to use so there's no reason to - // create a separate ID space. + // create a separate ID space. In addition, using the field tag number here + // lets us easily look up the field in the oneof accessor. hd->oneof_case_num = upb_fielddef_number(f); if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE) { hd->md = upb_fielddef_msgsubdef(f); diff --git a/ruby/ext/google/protobuf_c/message.c b/ruby/ext/google/protobuf_c/message.c index ee8881d482..d0b090143f 100644 --- a/ruby/ext/google/protobuf_c/message.c +++ b/ruby/ext/google/protobuf_c/message.c @@ -70,6 +70,36 @@ VALUE Message_alloc(VALUE klass) { return ret; } +static VALUE which_oneof_field(MessageHeader* self, const upb_oneofdef* o) { + // If no fields in the oneof, always nil. + if (upb_oneofdef_numfields(o) == 0) { + return Qnil; + } + // Grab the first field in the oneof so we can get its layout info to find the + // oneof_case field. + upb_oneof_iter it; + upb_oneof_begin(&it, o); + assert(!upb_oneof_done(&it)); + const upb_fielddef* first_field = upb_oneof_iter_field(&it); + assert(upb_fielddef_containingoneof(first_field) != NULL); + + size_t case_ofs = + self->descriptor->layout-> + fields[upb_fielddef_index(first_field)].case_offset; + uint32_t oneof_case = *((uint32_t*)(Message_data(self) + case_ofs)); + + // oneof_case == 0 indicates no field set. + if (oneof_case == 0) { + return Qnil; + } + + // oneof_case is a field index, so find that field. + const upb_fielddef* f = upb_oneofdef_itof(o, oneof_case); + assert(f != NULL); + + return ID2SYM(rb_intern(upb_fielddef_name(f))); +} + /* * call-seq: * Message.method_missing(*args) @@ -82,6 +112,10 @@ VALUE Message_alloc(VALUE klass) { * * msg.foo = 42 * puts msg.foo + * + * This method also provides read-only accessors for oneofs. If a oneof exists + * with name 'my_oneof', then msg.my_oneof will return a Ruby symbol equal to + * the name of the field in that oneof that is currently set, or nil if none. */ VALUE Message_method_missing(int argc, VALUE* argv, VALUE _self) { MessageHeader* self; @@ -104,6 +138,17 @@ VALUE Message_method_missing(int argc, VALUE* argv, VALUE _self) { name_len--; } + // Check for a oneof name first. + const upb_oneofdef* o = upb_msgdef_ntoo(self->descriptor->msgdef, + name, name_len); + if (o != NULL) { + if (setter) { + rb_raise(rb_eRuntimeError, "Oneof accessors are read-only."); + } + return which_oneof_field(self, o); + } + + // Otherwise, check for a field with that name. const upb_fielddef* f = upb_msgdef_ntof(self->descriptor->msgdef, name, name_len); diff --git a/ruby/tests/basic.rb b/ruby/tests/basic.rb index 3e99f3b078..1b7508bb86 100644 --- a/ruby/tests/basic.rb +++ b/ruby/tests/basic.rb @@ -615,30 +615,35 @@ module BasicTest assert d.b == nil assert d.c == nil assert d.d == nil + assert d.my_oneof == nil d.a = "hi" assert d.a == "hi" assert d.b == nil assert d.c == nil assert d.d == nil + assert d.my_oneof == :a d.b = 42 assert d.a == nil assert d.b == 42 assert d.c == nil assert d.d == nil + assert d.my_oneof == :b d.c = TestMessage2.new(:foo => 100) assert d.a == nil assert d.b == nil assert d.c.foo == 100 assert d.d == nil + assert d.my_oneof == :c d.d = :C assert d.a == nil assert d.b == nil assert d.c == nil assert d.d == :C + assert d.my_oneof == :d d2 = OneofMessage.decode(OneofMessage.encode(d)) assert d2 == d @@ -669,6 +674,7 @@ module BasicTest d5.a = nil assert d5.a == nil assert OneofMessage.encode(d5) == '' + assert d5.my_oneof == nil end def test_enum_field From 9de35e742167b1dd2941cefab6e99239207f8e2c Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Mon, 26 Jan 2015 11:23:19 -0800 Subject: [PATCH 07/10] Addressed code-review comments. --- ruby/ext/google/protobuf_c/defs.c | 4 ++-- ruby/ext/google/protobuf_c/encode_decode.c | 6 +++--- ruby/ext/google/protobuf_c/storage.c | 9 +++++++-- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/ruby/ext/google/protobuf_c/defs.c b/ruby/ext/google/protobuf_c/defs.c index 446a5586d1..b39c27f493 100644 --- a/ruby/ext/google/protobuf_c/defs.c +++ b/ruby/ext/google/protobuf_c/defs.c @@ -58,11 +58,11 @@ static upb_def* check_notfrozen(const upb_def* def) { } static upb_msgdef* check_msg_notfrozen(const upb_msgdef* def) { - return (upb_msgdef*)check_notfrozen((const upb_def*)def); + return upb_downcast_msgdef_mutable(check_notfrozen((const upb_def*)def)); } static upb_fielddef* check_field_notfrozen(const upb_fielddef* def) { - return (upb_fielddef*)check_notfrozen((const upb_def*)def); + return upb_downcast_fielddef_mutable(check_notfrozen((const upb_def*)def)); } static upb_oneofdef* check_oneof_notfrozen(const upb_oneofdef* def) { diff --git a/ruby/ext/google/protobuf_c/encode_decode.c b/ruby/ext/google/protobuf_c/encode_decode.c index 0db8620929..3a441be35e 100644 --- a/ruby/ext/google/protobuf_c/encode_decode.c +++ b/ruby/ext/google/protobuf_c/encode_decode.c @@ -558,11 +558,11 @@ static void add_handlers_for_message(const void *closure, upb_handlers *h) { const upb_fielddef *f = upb_msg_iter_field(&i); size_t offset = desc->layout->fields[upb_fielddef_index(f)].offset + sizeof(MessageHeader); - size_t oneof_case_offset = - desc->layout->fields[upb_fielddef_index(f)].case_offset + - sizeof(MessageHeader); if (upb_fielddef_containingoneof(f)) { + size_t oneof_case_offset = + desc->layout->fields[upb_fielddef_index(f)].case_offset + + sizeof(MessageHeader); add_handlers_for_oneof_field(h, f, offset, oneof_case_offset); } else if (is_map_field(f)) { add_handlers_for_mapfield(h, f, offset, desc); diff --git a/ruby/ext/google/protobuf_c/storage.c b/ruby/ext/google/protobuf_c/storage.c index adb74394ff..fac06e0a12 100644 --- a/ruby/ext/google/protobuf_c/storage.c +++ b/ruby/ext/google/protobuf_c/storage.c @@ -366,6 +366,11 @@ const upb_fielddef* map_entry_value(const upb_msgdef* msgdef) { // Memory layout management. // ----------------------------------------------------------------------------- +static size_t align_up_to(size_t offset, size_t granularity) { + // Granularity must be a power of two. + return (offset + granularity - 1) & ~(granularity - 1); +} + MessageLayout* create_layout(const upb_msgdef* msgdef) { MessageLayout* layout = ALLOC(MessageLayout); int nfields = upb_msgdef_numfields(msgdef); @@ -391,7 +396,7 @@ MessageLayout* create_layout(const upb_msgdef* msgdef) { field_size = native_slot_size(upb_fielddef_type(field)); } // Align current offset up to |size| granularity. - off = (off + field_size - 1) & ~(field_size - 1); + off = align_up_to(off, field_size); layout->fields[upb_fielddef_index(field)].offset = off; layout->fields[upb_fielddef_index(field)].case_offset = MESSAGE_FIELD_NO_CASE; off += field_size; @@ -413,7 +418,7 @@ MessageLayout* create_layout(const upb_msgdef* msgdef) { // all fields. size_t field_size = NATIVE_SLOT_MAX_SIZE; // Align the offset. - off = (off + field_size - 1) & ~(field_size - 1); + off = align_up_to(off, field_size); // Assign all fields in the oneof this same offset. upb_oneof_iter fit; for (upb_oneof_begin(&fit, oneof); From 07b8b0f28d1ad88c7c1c64e8707dab8537313c26 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Mon, 26 Jan 2015 13:52:51 -0800 Subject: [PATCH 08/10] Addressed code-review comments. --- ruby/ext/google/protobuf_c/encode_decode.c | 13 ++++++----- ruby/ext/google/protobuf_c/storage.c | 25 ++++++++++++++++------ 2 files changed, 26 insertions(+), 12 deletions(-) diff --git a/ruby/ext/google/protobuf_c/encode_decode.c b/ruby/ext/google/protobuf_c/encode_decode.c index 3a441be35e..b9ecd456dd 100644 --- a/ruby/ext/google/protobuf_c/encode_decode.c +++ b/ruby/ext/google/protobuf_c/encode_decode.c @@ -349,8 +349,6 @@ static void *oneofsubmsg_handler(void *closure, MessageHeader* msg = closure; const oneof_handlerdata_t *oneofdata = hd; uint32_t oldcase = DEREF(msg, oneofdata->case_ofs, uint32_t); - DEREF(msg, oneofdata->case_ofs, uint32_t) = - oneofdata->oneof_case_num; VALUE subdesc = get_def_obj((void*)oneofdata->md); @@ -361,6 +359,11 @@ static void *oneofsubmsg_handler(void *closure, DEREF(msg, oneofdata->ofs, VALUE) = rb_class_new_instance(0, NULL, subklass); } + // Set the oneof case *after* allocating the new class instance -- see comment + // in layout_set() as to why. There are subtle interactions with possible GC + // points and oneof field type transitions. + DEREF(msg, oneofdata->case_ofs, uint32_t) = + oneofdata->oneof_case_num; VALUE submsg_rb = DEREF(msg, oneofdata->ofs, VALUE); MessageHeader* submsg; @@ -965,11 +968,11 @@ static void putmsg(VALUE msg_rb, const Descriptor* desc, uint32_t offset = desc->layout->fields[upb_fielddef_index(f)].offset + sizeof(MessageHeader); - uint32_t oneof_case_offset = - desc->layout->fields[upb_fielddef_index(f)].case_offset + - sizeof(MessageHeader); if (upb_fielddef_containingoneof(f)) { + uint32_t oneof_case_offset = + desc->layout->fields[upb_fielddef_index(f)].case_offset + + sizeof(MessageHeader); // For a oneof, check that this field is actually present -- skip all the // below if not. if (DEREF(msg, oneof_case_offset, uint32_t) != diff --git a/ruby/ext/google/protobuf_c/storage.c b/ruby/ext/google/protobuf_c/storage.c index fac06e0a12..662101f82e 100644 --- a/ruby/ext/google/protobuf_c/storage.c +++ b/ruby/ext/google/protobuf_c/storage.c @@ -579,15 +579,26 @@ void layout_set(MessageLayout* layout, *oneof_case = 0; memset(memory, 0, NATIVE_SLOT_MAX_SIZE); } else { - // Set the oneof case *first* in case a GC is triggered during - // native_slot_set(): layout_mark() depends on oneof_case to know whether - // the slot may be a Ruby VALUE and so we need that lifetime to start - // before we could possibly stick a VALUE in it. - *oneof_case = upb_fielddef_number(field); - // We just overwrite the value directly if we changed oneof cases: - // native_slot_set() does not depend on the old value in memory. + // The transition between field types for a single oneof (union) slot is + // somewhat complex because we need to ensure that a GC triggered at any + // point by a call into the Ruby VM sees a valid state for this field and + // does not either go off into the weeds (following what it thinks is a + // VALUE but is actually a different field type) or miss an object (seeing + // what it thinks is a primitive field but is actually a VALUE for the new + // field type). + // + // native_slot_set() has two parts: (i) conversion of some sort, and (ii) + // setting the in-memory content to the new value. It guarantees that all + // calls to the Ruby VM are completed before the memory slot is altered. + // + // In order for the transition to be safe, the oneof case slot must be in + // sync with the value slot whenever the Ruby VM has been called. Because + // we are guaranteed that no Ruby VM calls occur after native_slot_set() + // alters the memory slot and before it returns, we set the oneof case + // immediately after native_slot_set() returns. native_slot_set(upb_fielddef_type(field), field_type_class(field), memory, val); + *oneof_case = upb_fielddef_number(field); } } else if (is_map_field(field)) { check_map_field_type(val, field); From eb33f9d3d6f1f78ee70f4bea4326adaf035e1e67 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Mon, 2 Feb 2015 13:03:08 -0800 Subject: [PATCH 09/10] Updated based on code-review comments. --- ruby/ext/google/protobuf_c/encode_decode.c | 9 +++-- ruby/ext/google/protobuf_c/protobuf.h | 14 +++++++ ruby/ext/google/protobuf_c/storage.c | 44 +++++++++++++++------- 3 files changed, 51 insertions(+), 16 deletions(-) diff --git a/ruby/ext/google/protobuf_c/encode_decode.c b/ruby/ext/google/protobuf_c/encode_decode.c index b9ecd456dd..0630f56720 100644 --- a/ruby/ext/google/protobuf_c/encode_decode.c +++ b/ruby/ext/google/protobuf_c/encode_decode.c @@ -359,9 +359,12 @@ static void *oneofsubmsg_handler(void *closure, DEREF(msg, oneofdata->ofs, VALUE) = rb_class_new_instance(0, NULL, subklass); } - // Set the oneof case *after* allocating the new class instance -- see comment - // in layout_set() as to why. There are subtle interactions with possible GC - // points and oneof field type transitions. + // Set the oneof case *after* allocating the new class instance -- otherwise, + // if the Ruby GC is invoked as part of a call into the VM, it might invoke + // our mark routines, and our mark routines might see the case value + // indicating a VALUE is present and expect a valid VALUE. See comment in + // layout_set() for more detail: basically, the change to the value and the + // case must be atomic w.r.t. the Ruby VM. DEREF(msg, oneofdata->case_ofs, uint32_t) = oneofdata->oneof_case_num; diff --git a/ruby/ext/google/protobuf_c/protobuf.h b/ruby/ext/google/protobuf_c/protobuf.h index 5b94ae2607..d8a327aa7d 100644 --- a/ruby/ext/google/protobuf_c/protobuf.h +++ b/ruby/ext/google/protobuf_c/protobuf.h @@ -292,6 +292,15 @@ void native_slot_set(upb_fieldtype_t type, VALUE type_class, void* memory, VALUE value); +// Atomically (with respect to Ruby VM calls) either update the value and set a +// oneof case, or do neither. If |case_memory| is null, then no case value is +// set. +void native_slot_set_value_and_case(upb_fieldtype_t type, + VALUE type_class, + void* memory, + VALUE value, + uint32_t* case_memory, + uint32_t case_number); VALUE native_slot_get(upb_fieldtype_t type, VALUE type_class, const void* memory); @@ -313,6 +322,11 @@ VALUE field_type_class(const upb_fielddef* field); #define MAP_KEY_FIELD 1 #define MAP_VALUE_FIELD 2 +// Oneof case slot value to indicate that no oneof case is set. The value `0` is +// safe because field numbers are used as case identifiers, and no field can +// have a number of 0. +#define ONEOF_CASE_NONE 0 + // These operate on a map field (i.e., a repeated field of submessages whose // submessage type is a map-entry msgdef). bool is_map_field(const upb_fielddef* field); diff --git a/ruby/ext/google/protobuf_c/storage.c b/ruby/ext/google/protobuf_c/storage.c index 662101f82e..5b1549d23a 100644 --- a/ruby/ext/google/protobuf_c/storage.c +++ b/ruby/ext/google/protobuf_c/storage.c @@ -109,6 +109,17 @@ void native_slot_validate_string_encoding(upb_fieldtype_t type, VALUE value) { void native_slot_set(upb_fieldtype_t type, VALUE type_class, void* memory, VALUE value) { + native_slot_set_value_and_case(type, type_class, memory, value, NULL, 0); +} + +void native_slot_set_value_and_case(upb_fieldtype_t type, VALUE type_class, + void* memory, VALUE value, + uint32_t* case_memory, + uint32_t case_number) { + // Note that in order to atomically change the value in memory and the case + // value (w.r.t. Ruby VM calls), we must set the value at |memory| only after + // all Ruby VM calls are complete. The case is then set at the bottom of this + // function. switch (type) { case UPB_TYPE_FLOAT: if (!is_ruby_num(value)) { @@ -198,6 +209,10 @@ void native_slot_set(upb_fieldtype_t type, VALUE type_class, default: break; } + + if (case_memory != NULL) { + *case_memory = case_number; + } } VALUE native_slot_get(upb_fieldtype_t type, @@ -408,6 +423,13 @@ MessageLayout* create_layout(const upb_msgdef* msgdef) { // We assign all value slots first, then pack the 'case' fields at the end, // since in the common case (modern 64-bit platform) these are 8 bytes and 4 // bytes respectively and we want to avoid alignment overhead. + // + // Note that we reserve 4 bytes (a uint32) per 'case' slot because the value + // space for oneof cases is conceptually as wide as field tag numbers. In + // practice, it's unlikely that a oneof would have more than e.g. 256 or 64K + // members (8 or 16 bits respectively), so conceivably we could assign + // consecutive case numbers and then pick a smaller oneof case slot size, but + // the complexity to implement this indirection is probably not worthwhile. upb_msg_oneof_iter oit; for (upb_msg_oneof_begin(&oit, msgdef); !upb_msg_oneof_done(&oit); @@ -576,7 +598,7 @@ void layout_set(MessageLayout* layout, if (upb_fielddef_containingoneof(field)) { if (val == Qnil) { // Assigning nil to a oneof field clears the oneof completely. - *oneof_case = 0; + *oneof_case = ONEOF_CASE_NONE; memset(memory, 0, NATIVE_SLOT_MAX_SIZE); } else { // The transition between field types for a single oneof (union) slot is @@ -587,18 +609,14 @@ void layout_set(MessageLayout* layout, // what it thinks is a primitive field but is actually a VALUE for the new // field type). // - // native_slot_set() has two parts: (i) conversion of some sort, and (ii) - // setting the in-memory content to the new value. It guarantees that all - // calls to the Ruby VM are completed before the memory slot is altered. - // // In order for the transition to be safe, the oneof case slot must be in - // sync with the value slot whenever the Ruby VM has been called. Because - // we are guaranteed that no Ruby VM calls occur after native_slot_set() - // alters the memory slot and before it returns, we set the oneof case - // immediately after native_slot_set() returns. - native_slot_set(upb_fielddef_type(field), field_type_class(field), - memory, val); - *oneof_case = upb_fielddef_number(field); + // sync with the value slot whenever the Ruby VM has been called. Thus, we + // use native_slot_set_value_and_case(), which ensures that both the value + // and case number are altered atomically (w.r.t. the Ruby VM). + native_slot_set_value_and_case( + upb_fielddef_type(field), field_type_class(field), + memory, val, + oneof_case, upb_fielddef_number(field)); } } else if (is_map_field(field)) { check_map_field_type(val, field); @@ -624,7 +642,7 @@ void layout_init(MessageLayout* layout, if (upb_fielddef_containingoneof(field)) { memset(memory, 0, NATIVE_SLOT_MAX_SIZE); - *oneof_case = 0; + *oneof_case = ONEOF_CASE_NONE; } else if (is_map_field(field)) { VALUE map = Qnil; From a3953da536751f2a77de0553b566bde5e8b5aa1e Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Mon, 2 Feb 2015 13:16:57 -0800 Subject: [PATCH 10/10] Updated based on code-review comments. --- ruby/ext/google/protobuf_c/message.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ruby/ext/google/protobuf_c/message.c b/ruby/ext/google/protobuf_c/message.c index d0b090143f..7e58a6178b 100644 --- a/ruby/ext/google/protobuf_c/message.c +++ b/ruby/ext/google/protobuf_c/message.c @@ -88,8 +88,7 @@ static VALUE which_oneof_field(MessageHeader* self, const upb_oneofdef* o) { fields[upb_fielddef_index(first_field)].case_offset; uint32_t oneof_case = *((uint32_t*)(Message_data(self) + case_ofs)); - // oneof_case == 0 indicates no field set. - if (oneof_case == 0) { + if (oneof_case == ONEOF_CASE_NONE) { return Qnil; }