Merge pull request #155 from cfallin/ruby-maps

Support for maps in the MRI C Ruby extension.
pull/172/head
Joshua Haberman 10 years ago
commit 5446deaea7
  1. 113
      ruby/ext/google/protobuf_c/defs.c
  2. 417
      ruby/ext/google/protobuf_c/encode_decode.c
  3. 2
      ruby/ext/google/protobuf_c/extconf.rb
  4. 805
      ruby/ext/google/protobuf_c/map.c
  5. 15
      ruby/ext/google/protobuf_c/message.c
  6. 1
      ruby/ext/google/protobuf_c/protobuf.c
  7. 86
      ruby/ext/google/protobuf_c/protobuf.h
  8. 6
      ruby/ext/google/protobuf_c/repeated_field.c
  9. 189
      ruby/ext/google/protobuf_c/storage.c
  10. 1263
      ruby/ext/google/protobuf_c/upb.c
  11. 83
      ruby/ext/google/protobuf_c/upb.h
  12. 265
      ruby/tests/basic.rb

@ -226,6 +226,7 @@ DEFINE_CLASS(Descriptor, "Google::Protobuf::Descriptor");
void Descriptor_mark(void* _self) {
Descriptor* self = _self;
rb_gc_mark(self->klass);
rb_gc_mark(self->typeclass_references);
}
void Descriptor_free(void* _self) {
@ -270,6 +271,7 @@ VALUE Descriptor_alloc(VALUE klass) {
self->fill_method = NULL;
self->pb_serialize_handlers = NULL;
self->json_serialize_handlers = NULL;
self->typeclass_references = rb_ary_new();
return ret;
}
@ -923,6 +925,7 @@ DEFINE_CLASS(MessageBuilderContext,
void MessageBuilderContext_mark(void* _self) {
MessageBuilderContext* self = _self;
rb_gc_mark(self->descriptor);
rb_gc_mark(self->builder);
}
void MessageBuilderContext_free(void* _self) {
@ -935,6 +938,7 @@ VALUE MessageBuilderContext_alloc(VALUE klass) {
VALUE ret = TypedData_Wrap_Struct(
klass, &_MessageBuilderContext_type, self);
self->descriptor = Qnil;
self->builder = Qnil;
return ret;
}
@ -943,24 +947,29 @@ void MessageBuilderContext_register(VALUE module) {
module, "MessageBuilderContext", rb_cObject);
rb_define_alloc_func(klass, MessageBuilderContext_alloc);
rb_define_method(klass, "initialize",
MessageBuilderContext_initialize, 1);
MessageBuilderContext_initialize, 2);
rb_define_method(klass, "optional", MessageBuilderContext_optional, -1);
rb_define_method(klass, "required", MessageBuilderContext_required, -1);
rb_define_method(klass, "repeated", MessageBuilderContext_repeated, -1);
rb_define_method(klass, "map", MessageBuilderContext_map, -1);
cMessageBuilderContext = klass;
rb_gc_register_address(&cMessageBuilderContext);
}
/*
* call-seq:
* MessageBuilderContext.new(desc) => context
* MessageBuilderContext.new(desc, builder) => context
*
* Create a new builder context around the given message descriptor. This class
* is intended to serve as a DSL context to be used with #instance_eval.
* Create a new message builder context around the given message descriptor and
* builder context. This class is intended to serve as a DSL context to be used
* with #instance_eval.
*/
VALUE MessageBuilderContext_initialize(VALUE _self, VALUE msgdef) {
VALUE MessageBuilderContext_initialize(VALUE _self,
VALUE msgdef,
VALUE builder) {
DEFINE_SELF(MessageBuilderContext, self, _self);
self->descriptor = msgdef;
self->builder = builder;
return Qnil;
}
@ -1065,6 +1074,97 @@ VALUE MessageBuilderContext_repeated(int argc, VALUE* argv, VALUE _self) {
name, type, number, type_class);
}
/*
* call-seq:
* MessageBuilderContext.map(name, key_type, value_type, number,
* value_type_class = nil)
*
* Defines a new map field on this message type with the given key and value
* types, tag number, and type class (for message and enum value types). The key
* type must be :int32/:uint32/:int64/:uint64, :bool, or :string. The value type
* type must be a Ruby symbol (as accepted by FieldDescriptor#type=) and the
* type_class must be a string, if present (as accepted by
* FieldDescriptor#submsg_name=).
*/
VALUE MessageBuilderContext_map(int argc, VALUE* argv, VALUE _self) {
DEFINE_SELF(MessageBuilderContext, self, _self);
if (argc < 4) {
rb_raise(rb_eArgError, "Expected at least 4 arguments.");
}
VALUE name = argv[0];
VALUE key_type = argv[1];
VALUE value_type = argv[2];
VALUE number = argv[3];
VALUE type_class = (argc > 4) ? argv[4] : Qnil;
// Validate the key type. We can't accept enums, messages, or floats/doubles
// as map keys. (We exclude these explicitly, and the field-descriptor setter
// below then ensures that the type is one of the remaining valid options.)
if (SYM2ID(key_type) == rb_intern("float") ||
SYM2ID(key_type) == rb_intern("double") ||
SYM2ID(key_type) == rb_intern("enum") ||
SYM2ID(key_type) == rb_intern("message")) {
rb_raise(rb_eArgError,
"Cannot add a map field with a float, double, enum, or message "
"type.");
}
// Create a new message descriptor for the map entry message, and create a
// repeated submessage field here with that type.
VALUE mapentry_desc = rb_class_new_instance(0, NULL, cDescriptor);
VALUE mapentry_desc_name = rb_funcall(self->descriptor, rb_intern("name"), 0);
mapentry_desc_name = rb_str_cat2(mapentry_desc_name, "_MapEntry_");
mapentry_desc_name = rb_str_cat2(mapentry_desc_name,
rb_id2name(SYM2ID(name)));
Descriptor_name_set(mapentry_desc, mapentry_desc_name);
// The 'mapentry' attribute has no Ruby setter because we do not want the user
// attempting to DIY the setup below; we want to ensure that the fields are
// correct. So we reach into the msgdef here to set the bit manually.
Descriptor* mapentry_desc_self = ruby_to_Descriptor(mapentry_desc);
upb_msgdef_setmapentry((upb_msgdef*)mapentry_desc_self->msgdef, true);
// optional <type> key = 1;
VALUE key_field = rb_class_new_instance(0, NULL, cFieldDescriptor);
FieldDescriptor_name_set(key_field, rb_str_new2("key"));
FieldDescriptor_label_set(key_field, ID2SYM(rb_intern("optional")));
FieldDescriptor_number_set(key_field, INT2NUM(1));
FieldDescriptor_type_set(key_field, key_type);
Descriptor_add_field(mapentry_desc, key_field);
// optional <type> value = 2;
VALUE value_field = rb_class_new_instance(0, NULL, cFieldDescriptor);
FieldDescriptor_name_set(value_field, rb_str_new2("value"));
FieldDescriptor_label_set(value_field, ID2SYM(rb_intern("optional")));
FieldDescriptor_number_set(value_field, INT2NUM(2));
FieldDescriptor_type_set(value_field, value_type);
if (type_class != Qnil) {
VALUE submsg_name = rb_str_new2("."); // prepend '.' to make name absolute.
submsg_name = rb_str_append(submsg_name, type_class);
FieldDescriptor_submsg_name_set(value_field, submsg_name);
}
Descriptor_add_field(mapentry_desc, value_field);
// Add the map-entry message type to the current builder, and use the type to
// create the map field itself.
Builder* builder_self = ruby_to_Builder(self->builder);
rb_ary_push(builder_self->pending_list, mapentry_desc);
VALUE map_field = rb_class_new_instance(0, NULL, cFieldDescriptor);
VALUE name_str = rb_str_new2(rb_id2name(SYM2ID(name)));
FieldDescriptor_name_set(map_field, name_str);
FieldDescriptor_number_set(map_field, number);
FieldDescriptor_label_set(map_field, ID2SYM(rb_intern("repeated")));
FieldDescriptor_type_set(map_field, ID2SYM(rb_intern("message")));
VALUE submsg_name = rb_str_new2("."); // prepend '.' to make name absolute.
submsg_name = rb_str_append(submsg_name, mapentry_desc_name);
FieldDescriptor_submsg_name_set(map_field, submsg_name);
Descriptor_add_field(self->descriptor, map_field);
return Qnil;
}
// -----------------------------------------------------------------------------
// EnumBuilderContext.
// -----------------------------------------------------------------------------
@ -1190,7 +1290,8 @@ void Builder_register(VALUE module) {
VALUE Builder_add_message(VALUE _self, VALUE name) {
DEFINE_SELF(Builder, self, _self);
VALUE msgdef = rb_class_new_instance(0, NULL, cDescriptor);
VALUE ctx = rb_class_new_instance(1, &msgdef, cMessageBuilderContext);
VALUE args[2] = { msgdef, _self };
VALUE ctx = rb_class_new_instance(2, args, cMessageBuilderContext);
VALUE block = rb_block_proc();
rb_funcall(msgdef, rb_intern("name="), 1, name);
rb_funcall_with_block(ctx, rb_intern("instance_eval"), 0, NULL, block);

@ -64,7 +64,7 @@ static const void *newsubmsghandlerdata(upb_handlers* h, uint32_t ofs,
static void *startseq_handler(void* closure, const void* hd) {
MessageHeader* msg = closure;
const size_t *ofs = hd;
return (void*)DEREF(Message_data(msg), *ofs, VALUE);
return (void*)DEREF(msg, *ofs, VALUE);
}
// Handlers that append primitive values to a repeated field (a regular Ruby
@ -115,7 +115,7 @@ static void* str_handler(void *closure,
const size_t *ofs = hd;
VALUE str = rb_str_new2("");
rb_enc_associate(str, kRubyStringUtf8Encoding);
DEREF(Message_data(msg), *ofs, VALUE) = str;
DEREF(msg, *ofs, VALUE) = str;
return (void*)str;
}
@ -127,7 +127,7 @@ static void* bytes_handler(void *closure,
const size_t *ofs = hd;
VALUE str = rb_str_new2("");
rb_enc_associate(str, kRubyString8bitEncoding);
DEREF(Message_data(msg), *ofs, VALUE) = str;
DEREF(msg, *ofs, VALUE) = str;
return (void*)str;
}
@ -163,20 +163,237 @@ static void *submsg_handler(void *closure, const void *hd) {
get_def_obj((void*)submsgdata->md);
VALUE subklass = Descriptor_msgclass(subdesc);
if (DEREF(Message_data(msg), submsgdata->ofs, VALUE) == Qnil) {
DEREF(Message_data(msg), submsgdata->ofs, VALUE) =
if (DEREF(msg, submsgdata->ofs, VALUE) == Qnil) {
DEREF(msg, submsgdata->ofs, VALUE) =
rb_class_new_instance(0, NULL, subklass);
}
VALUE submsg_rb = DEREF(Message_data(msg), submsgdata->ofs, VALUE);
VALUE submsg_rb = DEREF(msg, submsgdata->ofs, VALUE);
MessageHeader* submsg;
TypedData_Get_Struct(submsg_rb, MessageHeader, &Message_type, submsg);
return submsg;
}
// Handler data for startmap/endmap handlers.
typedef struct {
size_t ofs;
upb_fieldtype_t key_field_type;
upb_fieldtype_t value_field_type;
VALUE value_field_typeclass;
} map_handlerdata_t;
// Temporary frame for map parsing: at the beginning of a map entry message, a
// submsg handler allocates a frame to hold (i) a reference to the Map object
// into which this message will be inserted and (ii) storage slots to
// temporarily hold the key and value for this map entry until the end of the
// submessage. When the submessage ends, another handler is called to insert the
// value into the map.
typedef struct {
VALUE map;
char key_storage[NATIVE_SLOT_MAX_SIZE];
char value_storage[NATIVE_SLOT_MAX_SIZE];
} map_parse_frame_t;
// Handler to begin a map entry: allocates a temporary frame. This is the
// 'startsubmsg' handler on the msgdef that contains the map field.
static void *startmapentry_handler(void *closure, const void *hd) {
MessageHeader* msg = closure;
const map_handlerdata_t* mapdata = hd;
VALUE map_rb = DEREF(msg, mapdata->ofs, VALUE);
map_parse_frame_t* frame = ALLOC(map_parse_frame_t);
frame->map = map_rb;
native_slot_init(mapdata->key_field_type, &frame->key_storage);
native_slot_init(mapdata->value_field_type, &frame->value_storage);
return frame;
}
// Handler to end a map entry: inserts the value defined during the message into
// the map. This is the 'endmsg' handler on the map entry msgdef.
static bool endmap_handler(void *closure, const void *hd, upb_status* s) {
map_parse_frame_t* frame = closure;
const map_handlerdata_t* mapdata = hd;
VALUE key = native_slot_get(
mapdata->key_field_type, Qnil,
&frame->key_storage);
VALUE value = native_slot_get(
mapdata->value_field_type, mapdata->value_field_typeclass,
&frame->value_storage);
Map_index_set(frame->map, key, value);
free(frame);
return true;
}
// Allocates a new map_handlerdata_t given the map entry message definition. If
// the offset of the field within the parent message is also given, that is
// added to the handler data as well. Note that this is called *twice* per map
// field: once in the parent message handler setup when setting the startsubmsg
// handler and once in the map entry message handler setup when setting the
// key/value and endmsg handlers. The reason is that there is no easy way to
// pass the handlerdata down to the sub-message handler setup.
static map_handlerdata_t* new_map_handlerdata(
size_t ofs,
const upb_msgdef* mapentry_def,
Descriptor* desc) {
map_handlerdata_t* hd = ALLOC(map_handlerdata_t);
hd->ofs = ofs;
const upb_fielddef* key_field = upb_msgdef_itof(mapentry_def,
MAP_KEY_FIELD);
assert(key_field != NULL);
hd->key_field_type = upb_fielddef_type(key_field);
const upb_fielddef* value_field = upb_msgdef_itof(mapentry_def,
MAP_VALUE_FIELD);
assert(value_field != NULL);
hd->value_field_type = upb_fielddef_type(value_field);
hd->value_field_typeclass = field_type_class(value_field);
// Ensure that value_field_typeclass is properly GC-rooted. We must do this
// because we hold a reference to the Ruby class in the handlerdata, which is
// owned by the handlers. The handlers are owned by *this* message's Ruby
// object, but each Ruby object is rooted independently at the def -> Ruby
// object map. So we have to ensure that the Ruby objects we depend on will
// stick around as long as we're around.
if (hd->value_field_typeclass != Qnil) {
rb_ary_push(desc->typeclass_references, hd->value_field_typeclass);
}
return hd;
}
// Set up handlers for a repeated field.
static void add_handlers_for_repeated_field(upb_handlers *h,
const upb_fielddef *f,
size_t offset) {
upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, offset));
upb_handlers_setstartseq(h, f, startseq_handler, &attr);
upb_handlerattr_uninit(&attr);
switch (upb_fielddef_type(f)) {
#define SET_HANDLER(utype, ltype) \
case utype: \
upb_handlers_set##ltype(h, f, append##ltype##_handler, NULL); \
break;
SET_HANDLER(UPB_TYPE_BOOL, bool);
SET_HANDLER(UPB_TYPE_INT32, int32);
SET_HANDLER(UPB_TYPE_UINT32, uint32);
SET_HANDLER(UPB_TYPE_ENUM, int32);
SET_HANDLER(UPB_TYPE_FLOAT, float);
SET_HANDLER(UPB_TYPE_INT64, int64);
SET_HANDLER(UPB_TYPE_UINT64, uint64);
SET_HANDLER(UPB_TYPE_DOUBLE, double);
#undef SET_HANDLER
case UPB_TYPE_STRING:
case UPB_TYPE_BYTES: {
bool is_bytes = upb_fielddef_type(f) == UPB_TYPE_BYTES;
upb_handlers_setstartstr(h, f, is_bytes ?
appendbytes_handler : appendstr_handler,
NULL);
upb_handlers_setstring(h, f, stringdata_handler, NULL);
break;
}
case UPB_TYPE_MESSAGE: {
upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
upb_handlerattr_sethandlerdata(&attr, newsubmsghandlerdata(h, 0, f));
upb_handlers_setstartsubmsg(h, f, appendsubmsg_handler, &attr);
upb_handlerattr_uninit(&attr);
break;
}
}
}
// Set up handlers for a singular field.
static void add_handlers_for_singular_field(upb_handlers *h,
const upb_fielddef *f,
size_t offset) {
switch (upb_fielddef_type(f)) {
case UPB_TYPE_BOOL:
case UPB_TYPE_INT32:
case UPB_TYPE_UINT32:
case UPB_TYPE_ENUM:
case UPB_TYPE_FLOAT:
case UPB_TYPE_INT64:
case UPB_TYPE_UINT64:
case UPB_TYPE_DOUBLE:
upb_shim_set(h, f, offset, -1);
break;
case UPB_TYPE_STRING:
case UPB_TYPE_BYTES: {
bool is_bytes = upb_fielddef_type(f) == UPB_TYPE_BYTES;
upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, offset));
upb_handlers_setstartstr(h, f,
is_bytes ? bytes_handler : str_handler,
&attr);
upb_handlers_setstring(h, f, stringdata_handler, &attr);
upb_handlerattr_uninit(&attr);
break;
}
case UPB_TYPE_MESSAGE: {
upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
upb_handlerattr_sethandlerdata(&attr, newsubmsghandlerdata(h, offset, f));
upb_handlers_setstartsubmsg(h, f, submsg_handler, &attr);
upb_handlerattr_uninit(&attr);
break;
}
}
}
// Adds handlers to a map field.
static void add_handlers_for_mapfield(upb_handlers* h,
const upb_fielddef* fielddef,
size_t offset,
Descriptor* desc) {
const upb_msgdef* map_msgdef = upb_fielddef_msgsubdef(fielddef);
map_handlerdata_t* hd = new_map_handlerdata(offset, map_msgdef, desc);
upb_handlers_addcleanup(h, hd, free);
upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
upb_handlerattr_sethandlerdata(&attr, hd);
upb_handlers_setstartsubmsg(h, fielddef, startmapentry_handler, &attr);
upb_handlerattr_uninit(&attr);
}
// Adds handlers to a map-entry msgdef.
static void add_handlers_for_mapentry(const upb_msgdef* msgdef,
upb_handlers* h,
Descriptor* desc) {
const upb_fielddef* key_field = map_entry_key(msgdef);
const upb_fielddef* value_field = map_entry_value(msgdef);
map_handlerdata_t* hd = new_map_handlerdata(0, msgdef, desc);
upb_handlers_addcleanup(h, hd, free);
upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
upb_handlerattr_sethandlerdata(&attr, hd);
upb_handlers_setendmsg(h, endmap_handler, &attr);
add_handlers_for_singular_field(
h, key_field,
offsetof(map_parse_frame_t, key_storage));
add_handlers_for_singular_field(
h, value_field,
offsetof(map_parse_frame_t, value_storage));
}
static void add_handlers_for_message(const void *closure, upb_handlers *h) {
Descriptor* desc = ruby_to_Descriptor(
get_def_obj((void*)upb_handlers_msgdef(h)));
const upb_msgdef* msgdef = upb_handlers_msgdef(h);
Descriptor* desc = ruby_to_Descriptor(get_def_obj((void*)msgdef));
// If this is a mapentry message type, set up a special set of handlers and
// bail out of the normal (user-defined) message type handling.
if (upb_msgdef_mapentry(msgdef)) {
add_handlers_for_mapentry(msgdef, h, desc);
return;
}
// Ensure layout exists. We may be invoked to create handlers for a given
// message if we are included as a submsg of another message type before our
// class is actually built, so to work around this, we just create the layout
@ -191,82 +408,15 @@ static void add_handlers_for_message(const void *closure, upb_handlers *h) {
!upb_msg_done(&i);
upb_msg_next(&i)) {
const upb_fielddef *f = upb_msg_iter_field(&i);
size_t offset = desc->layout->offsets[upb_fielddef_index(f)];
if (upb_fielddef_isseq(f)) {
upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, offset));
upb_handlers_setstartseq(h, f, startseq_handler, &attr);
upb_handlerattr_uninit(&attr);
switch (upb_fielddef_type(f)) {
size_t offset = desc->layout->offsets[upb_fielddef_index(f)] +
sizeof(MessageHeader);
#define SET_HANDLER(utype, ltype) \
case utype: \
upb_handlers_set##ltype(h, f, append##ltype##_handler, NULL); \
break;
SET_HANDLER(UPB_TYPE_BOOL, bool);
SET_HANDLER(UPB_TYPE_INT32, int32);
SET_HANDLER(UPB_TYPE_UINT32, uint32);
SET_HANDLER(UPB_TYPE_ENUM, int32);
SET_HANDLER(UPB_TYPE_FLOAT, float);
SET_HANDLER(UPB_TYPE_INT64, int64);
SET_HANDLER(UPB_TYPE_UINT64, uint64);
SET_HANDLER(UPB_TYPE_DOUBLE, double);
#undef SET_HANDLER
case UPB_TYPE_STRING:
case UPB_TYPE_BYTES: {
bool is_bytes = upb_fielddef_type(f) == UPB_TYPE_BYTES;
upb_handlers_setstartstr(h, f, is_bytes ?
appendbytes_handler : appendstr_handler,
NULL);
upb_handlers_setstring(h, f, stringdata_handler, NULL);
}
case UPB_TYPE_MESSAGE: {
upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
upb_handlerattr_sethandlerdata(&attr, newsubmsghandlerdata(h, 0, f));
upb_handlers_setstartsubmsg(h, f, appendsubmsg_handler, &attr);
upb_handlerattr_uninit(&attr);
break;
}
}
}
switch (upb_fielddef_type(f)) {
case UPB_TYPE_BOOL:
case UPB_TYPE_INT32:
case UPB_TYPE_UINT32:
case UPB_TYPE_ENUM:
case UPB_TYPE_FLOAT:
case UPB_TYPE_INT64:
case UPB_TYPE_UINT64:
case UPB_TYPE_DOUBLE:
// The shim writes directly at the given offset (instead of using
// DEREF()) so we need to add the msg overhead.
upb_shim_set(h, f, offset + sizeof(MessageHeader), -1);
break;
case UPB_TYPE_STRING:
case UPB_TYPE_BYTES: {
bool is_bytes = upb_fielddef_type(f) == UPB_TYPE_BYTES;
upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, offset));
upb_handlers_setstartstr(h, f,
is_bytes ? bytes_handler : str_handler,
&attr);
upb_handlers_setstring(h, f, stringdata_handler, &attr);
upb_handlerattr_uninit(&attr);
break;
}
case UPB_TYPE_MESSAGE: {
upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
upb_handlerattr_sethandlerdata(&attr, newsubmsghandlerdata(h, offset, f));
upb_handlers_setstartsubmsg(h, f, submsg_handler, &attr);
upb_handlerattr_uninit(&attr);
break;
}
if (is_map_field(f)) {
add_handlers_for_mapfield(h, f, offset, desc);
} else if (upb_fielddef_isseq(f)) {
add_handlers_for_repeated_field(h, f, offset);
} else {
add_handlers_for_singular_field(h, f, offset);
}
}
}
@ -558,6 +708,88 @@ static void putary(VALUE ary, const upb_fielddef *f, upb_sink *sink,
upb_sink_endseq(sink, getsel(f, UPB_HANDLER_ENDSEQ));
}
static void put_ruby_value(VALUE value,
const upb_fielddef *f,
VALUE type_class,
int depth,
upb_sink *sink) {
upb_selector_t sel = 0;
if (upb_fielddef_isprimitive(f)) {
sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
}
switch (upb_fielddef_type(f)) {
case UPB_TYPE_INT32:
upb_sink_putint32(sink, sel, NUM2INT(value));
break;
case UPB_TYPE_INT64:
upb_sink_putint64(sink, sel, NUM2LL(value));
break;
case UPB_TYPE_UINT32:
upb_sink_putuint32(sink, sel, NUM2UINT(value));
break;
case UPB_TYPE_UINT64:
upb_sink_putuint64(sink, sel, NUM2ULL(value));
break;
case UPB_TYPE_FLOAT:
upb_sink_putfloat(sink, sel, NUM2DBL(value));
break;
case UPB_TYPE_DOUBLE:
upb_sink_putdouble(sink, sel, NUM2DBL(value));
break;
case UPB_TYPE_ENUM: {
if (TYPE(value) == T_SYMBOL) {
value = rb_funcall(type_class, rb_intern("resolve"), 1, value);
}
upb_sink_putint32(sink, sel, NUM2INT(value));
break;
}
case UPB_TYPE_BOOL:
upb_sink_putbool(sink, sel, value == Qtrue);
break;
case UPB_TYPE_STRING:
case UPB_TYPE_BYTES:
putstr(value, f, sink);
break;
case UPB_TYPE_MESSAGE:
putsubmsg(value, f, sink, depth);
}
}
static void putmap(VALUE map, const upb_fielddef *f, upb_sink *sink,
int depth) {
if (map == Qnil) return;
Map* self = ruby_to_Map(map);
upb_sink subsink;
upb_sink_startseq(sink, getsel(f, UPB_HANDLER_STARTSEQ), &subsink);
assert(upb_fielddef_type(f) == UPB_TYPE_MESSAGE);
const upb_fielddef* key_field = map_field_key(f);
const upb_fielddef* value_field = map_field_value(f);
Map_iter it;
for (Map_begin(map, &it); !Map_done(&it); Map_next(&it)) {
VALUE key = Map_iter_key(&it);
VALUE value = Map_iter_value(&it);
upb_sink entry_sink;
upb_sink_startsubmsg(&subsink, getsel(f, UPB_HANDLER_STARTSUBMSG), &entry_sink);
upb_sink_startmsg(&entry_sink);
put_ruby_value(key, key_field, Qnil, depth + 1, &entry_sink);
put_ruby_value(value, value_field, self->value_type_class, depth + 1,
&entry_sink);
upb_status status;
upb_sink_endmsg(&entry_sink, &status);
upb_sink_endsubmsg(&subsink, getsel(f, UPB_HANDLER_ENDSUBMSG));
}
upb_sink_endseq(sink, getsel(f, UPB_HANDLER_ENDSEQ));
}
static void putmsg(VALUE msg_rb, const Descriptor* desc,
upb_sink *sink, int depth) {
upb_sink_startmsg(sink);
@ -571,33 +803,38 @@ static void putmsg(VALUE msg_rb, const Descriptor* desc,
MessageHeader* msg;
TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg);
void* msg_data = Message_data(msg);
upb_msg_iter i;
for (upb_msg_begin(&i, desc->msgdef);
!upb_msg_done(&i);
upb_msg_next(&i)) {
upb_fielddef *f = upb_msg_iter_field(&i);
uint32_t offset = desc->layout->offsets[upb_fielddef_index(f)];
uint32_t offset =
desc->layout->offsets[upb_fielddef_index(f)] + sizeof(MessageHeader);
if (upb_fielddef_isseq(f)) {
VALUE ary = DEREF(msg_data, offset, VALUE);
if (is_map_field(f)) {
VALUE map = DEREF(msg, offset, VALUE);
if (map != Qnil) {
putmap(map, f, sink, depth);
}
} else if (upb_fielddef_isseq(f)) {
VALUE ary = DEREF(msg, offset, VALUE);
if (ary != Qnil) {
putary(ary, f, sink, depth);
}
} else if (upb_fielddef_isstring(f)) {
VALUE str = DEREF(msg_data, offset, VALUE);
VALUE str = DEREF(msg, offset, VALUE);
if (RSTRING_LEN(str) > 0) {
putstr(str, f, sink);
}
} else if (upb_fielddef_issubmsg(f)) {
putsubmsg(DEREF(msg_data, offset, VALUE), f, sink, depth);
putsubmsg(DEREF(msg, offset, VALUE), f, sink, depth);
} else {
upb_selector_t sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
#define T(upbtypeconst, upbtype, ctype, default_value) \
case upbtypeconst: { \
ctype value = DEREF(msg_data, offset, ctype); \
ctype value = DEREF(msg, offset, ctype); \
if (value != default_value) { \
upb_sink_put##upbtype(sink, sel, value); \
} \

@ -5,6 +5,6 @@ require 'mkmf'
$CFLAGS += " -O3 -std=c99 -Wno-unused-function -DNDEBUG "
$objs = ["protobuf.o", "defs.o", "storage.o", "message.o",
"repeated_field.o", "encode_decode.o", "upb.o"]
"repeated_field.o", "map.o", "encode_decode.o", "upb.o"]
create_makefile("google/protobuf_c")

@ -0,0 +1,805 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2014 Google Inc. All rights reserved.
// https://developers.google.com/protocol-buffers/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "protobuf.h"
// -----------------------------------------------------------------------------
// Basic map operations on top of upb's strtable.
//
// Note that we roll our own `Map` container here because, as for
// `RepeatedField`, we want a strongly-typed container. This is so that any user
// errors due to incorrect map key or value types are raised as close as
// possible to the error site, rather than at some deferred point (e.g.,
// serialization).
//
// We build our `Map` on top of upb_strtable so that we're able to take
// advantage of the native_slot storage abstraction, as RepeatedField does.
// (This is not quite a perfect mapping -- see the key conversions below -- but
// gives us full support and error-checking for all value types for free.)
// -----------------------------------------------------------------------------
// Map values are stored using the native_slot abstraction (as with repeated
// field values), but keys are a bit special. Since we use a strtable, we need
// to store keys as sequences of bytes such that equality of those bytes maps
// one-to-one to equality of keys. We store strings directly (i.e., they map to
// their own bytes) and integers as native integers (using the native_slot
// abstraction).
// Note that there is another tradeoff here in keeping string keys as native
// strings rather than Ruby strings: traversing the Map requires conversion to
// Ruby string values on every traversal, potentially creating more garbage. We
// should consider ways to cache a Ruby version of the key if this becomes an
// issue later.
// Forms a key to use with the underlying strtable from a Ruby key value. |buf|
// must point to TABLE_KEY_BUF_LENGTH bytes of temporary space, used to
// construct a key byte sequence if needed. |out_key| and |out_length| provide
// the resulting key data/length.
#define TABLE_KEY_BUF_LENGTH 8 // sizeof(uint64_t)
static void table_key(Map* self, VALUE key,
char* buf,
const char** out_key,
size_t* out_length) {
switch (self->key_type) {
case UPB_TYPE_BYTES:
case UPB_TYPE_STRING:
// Strings: use string content directly.
Check_Type(key, T_STRING);
native_slot_validate_string_encoding(self->key_type, key);
*out_key = RSTRING_PTR(key);
*out_length = RSTRING_LEN(key);
break;
case UPB_TYPE_BOOL:
case UPB_TYPE_INT32:
case UPB_TYPE_INT64:
case UPB_TYPE_UINT32:
case UPB_TYPE_UINT64:
native_slot_set(self->key_type, Qnil, buf, key);
*out_key = buf;
*out_length = native_slot_size(self->key_type);
break;
default:
// Map constructor should not allow a Map with another key type to be
// constructed.
assert(false);
break;
}
}
static VALUE table_key_to_ruby(Map* self, const char* buf, size_t length) {
switch (self->key_type) {
case UPB_TYPE_BYTES:
case UPB_TYPE_STRING: {
VALUE ret = rb_str_new(buf, length);
rb_enc_associate(ret,
(self->key_type == UPB_TYPE_BYTES) ?
kRubyString8bitEncoding : kRubyStringUtf8Encoding);
return ret;
}
case UPB_TYPE_BOOL:
case UPB_TYPE_INT32:
case UPB_TYPE_INT64:
case UPB_TYPE_UINT32:
case UPB_TYPE_UINT64:
return native_slot_get(self->key_type, Qnil, buf);
default:
assert(false);
return Qnil;
}
}
static void* value_memory(upb_value* v) {
return (void*)(&v->val.uint64);
}
// -----------------------------------------------------------------------------
// Map container type.
// -----------------------------------------------------------------------------
const rb_data_type_t Map_type = {
"Google::Protobuf::Map",
{ Map_mark, Map_free, NULL },
};
VALUE cMap;
Map* ruby_to_Map(VALUE _self) {
Map* self;
TypedData_Get_Struct(_self, Map, &Map_type, self);
return self;
}
void Map_mark(void* _self) {
Map* self = _self;
rb_gc_mark(self->value_type_class);
if (self->value_type == UPB_TYPE_STRING ||
self->value_type == UPB_TYPE_BYTES ||
self->value_type == UPB_TYPE_MESSAGE) {
upb_strtable_iter it;
for (upb_strtable_begin(&it, &self->table);
!upb_strtable_done(&it);
upb_strtable_next(&it)) {
upb_value v = upb_strtable_iter_value(&it);
void* mem = value_memory(&v);
native_slot_mark(self->value_type, mem);
}
}
}
void Map_free(void* _self) {
Map* self = _self;
upb_strtable_uninit(&self->table);
xfree(self);
}
VALUE Map_alloc(VALUE klass) {
Map* self = ALLOC(Map);
memset(self, 0, sizeof(Map));
self->value_type_class = Qnil;
VALUE ret = TypedData_Wrap_Struct(klass, &Map_type, self);
return ret;
}
static bool needs_typeclass(upb_fieldtype_t type) {
switch (type) {
case UPB_TYPE_MESSAGE:
case UPB_TYPE_ENUM:
return true;
default:
return false;
}
}
/*
* call-seq:
* Map.new(key_type, value_type, value_typeclass = nil, init_hashmap = {})
* => new map
*
* Allocates a new Map container. This constructor may be called with 2, 3, or 4
* arguments. The first two arguments are always present and are symbols (taking
* on the same values as field-type symbols in message descriptors) that
* indicate the type of the map key and value fields.
*
* The supported key types are: :int32, :int64, :uint32, :uint64, :bool,
* :string, :bytes.
*
* The supported value types are: :int32, :int64, :uint32, :uint64, :bool,
* :string, :bytes, :enum, :message.
*
* The third argument, value_typeclass, must be present if value_type is :enum
* or :message. As in RepeatedField#new, this argument must be a message class
* (for :message) or enum module (for :enum).
*
* The last argument, if present, provides initial content for map. Note that
* this may be an ordinary Ruby hashmap or another Map instance with identical
* key and value types. Also note that this argument may be present whether or
* not value_typeclass is present (and it is unambiguously separate from
* value_typeclass because value_typeclass's presence is strictly determined by
* value_type). The contents of this initial hashmap or Map instance are
* shallow-copied into the new Map: the original map is unmodified, but
* references to underlying objects will be shared if the value type is a
* message type.
*/
VALUE Map_init(int argc, VALUE* argv, VALUE _self) {
Map* self = ruby_to_Map(_self);
// We take either two args (:key_type, :value_type), three args (:key_type,
// :value_type, "ValueMessageType"), or four args (the above plus an initial
// hashmap).
if (argc < 2 || argc > 4) {
rb_raise(rb_eArgError, "Map constructor expects 2, 3 or 4 arguments.");
}
self->key_type = ruby_to_fieldtype(argv[0]);
self->value_type = ruby_to_fieldtype(argv[1]);
// Check that the key type is an allowed type.
switch (self->key_type) {
case UPB_TYPE_INT32:
case UPB_TYPE_INT64:
case UPB_TYPE_UINT32:
case UPB_TYPE_UINT64:
case UPB_TYPE_BOOL:
case UPB_TYPE_STRING:
case UPB_TYPE_BYTES:
// These are OK.
break;
default:
rb_raise(rb_eArgError, "Invalid key type for map.");
}
int init_value_arg = 2;
if (needs_typeclass(self->value_type) && argc > 2) {
self->value_type_class = argv[2];
validate_type_class(self->value_type, self->value_type_class);
init_value_arg = 3;
}
// Table value type is always UINT64: this ensures enough space to store the
// native_slot value.
if (!upb_strtable_init(&self->table, UPB_CTYPE_UINT64)) {
rb_raise(rb_eRuntimeError, "Could not allocate table.");
}
if (argc > init_value_arg) {
Map_merge_into_self(_self, argv[init_value_arg]);
}
return Qnil;
}
/*
* call-seq:
* Map.each(&block)
*
* Invokes &block on each |key, value| pair in the map, in unspecified order.
* Note that Map also includes Enumerable; map thus acts like a normal Ruby
* sequence.
*/
VALUE Map_each(VALUE _self) {
Map* self = ruby_to_Map(_self);
upb_strtable_iter it;
for (upb_strtable_begin(&it, &self->table);
!upb_strtable_done(&it);
upb_strtable_next(&it)) {
VALUE key = table_key_to_ruby(
self, upb_strtable_iter_key(&it), upb_strtable_iter_keylength(&it));
upb_value v = upb_strtable_iter_value(&it);
void* mem = value_memory(&v);
VALUE value = native_slot_get(self->value_type,
self->value_type_class,
mem);
rb_yield_values(2, key, value);
}
return Qnil;
}
/*
* call-seq:
* Map.keys => [list_of_keys]
*
* Returns the list of keys contained in the map, in unspecified order.
*/
VALUE Map_keys(VALUE _self) {
Map* self = ruby_to_Map(_self);
VALUE ret = rb_ary_new();
upb_strtable_iter it;
for (upb_strtable_begin(&it, &self->table);
!upb_strtable_done(&it);
upb_strtable_next(&it)) {
VALUE key = table_key_to_ruby(
self, upb_strtable_iter_key(&it), upb_strtable_iter_keylength(&it));
rb_ary_push(ret, key);
}
return ret;
}
/*
* call-seq:
* Map.values => [list_of_values]
*
* Returns the list of values contained in the map, in unspecified order.
*/
VALUE Map_values(VALUE _self) {
Map* self = ruby_to_Map(_self);
VALUE ret = rb_ary_new();
upb_strtable_iter it;
for (upb_strtable_begin(&it, &self->table);
!upb_strtable_done(&it);
upb_strtable_next(&it)) {
upb_value v = upb_strtable_iter_value(&it);
void* mem = value_memory(&v);
VALUE value = native_slot_get(self->value_type,
self->value_type_class,
mem);
rb_ary_push(ret, value);
}
return ret;
}
/*
* call-seq:
* Map.[](key) => value
*
* Accesses the element at the given key. Throws an exception if the key type is
* incorrect. Returns nil when the key is not present in the map.
*/
VALUE Map_index(VALUE _self, VALUE key) {
Map* self = ruby_to_Map(_self);
char keybuf[TABLE_KEY_BUF_LENGTH];
const char* keyval = NULL;
size_t length = 0;
table_key(self, key, keybuf, &keyval, &length);
upb_value v;
if (upb_strtable_lookup2(&self->table, keyval, length, &v)) {
void* mem = value_memory(&v);
return native_slot_get(self->value_type, self->value_type_class, mem);
} else {
return Qnil;
}
}
/*
* call-seq:
* Map.[]=(key, value) => value
*
* Inserts or overwrites the value at the given key with the given new value.
* Throws an exception if the key type is incorrect. Returns the new value that
* was just inserted.
*/
VALUE Map_index_set(VALUE _self, VALUE key, VALUE value) {
Map* self = ruby_to_Map(_self);
char keybuf[TABLE_KEY_BUF_LENGTH];
const char* keyval = NULL;
size_t length = 0;
table_key(self, key, keybuf, &keyval, &length);
upb_value v;
void* mem = value_memory(&v);
native_slot_set(self->value_type, self->value_type_class, mem, value);
// Replace any existing value by issuing a 'remove' operation first.
upb_strtable_remove2(&self->table, keyval, length, NULL);
if (!upb_strtable_insert2(&self->table, keyval, length, v)) {
rb_raise(rb_eRuntimeError, "Could not insert into table");
}
// Ruby hashmap's :[]= method also returns the inserted value.
return value;
}
/*
* call-seq:
* Map.has_key?(key) => bool
*
* Returns true if the given key is present in the map. Throws an exception if
* the key has the wrong type.
*/
VALUE Map_has_key(VALUE _self, VALUE key) {
Map* self = ruby_to_Map(_self);
char keybuf[TABLE_KEY_BUF_LENGTH];
const char* keyval = NULL;
size_t length = 0;
table_key(self, key, keybuf, &keyval, &length);
if (upb_strtable_lookup2(&self->table, keyval, length, NULL)) {
return Qtrue;
} else {
return Qfalse;
}
}
/*
* call-seq:
* Map.delete(key) => old_value
*
* Deletes the value at the given key, if any, returning either the old value or
* nil if none was present. Throws an exception if the key is of the wrong type.
*/
VALUE Map_delete(VALUE _self, VALUE key) {
Map* self = ruby_to_Map(_self);
char keybuf[TABLE_KEY_BUF_LENGTH];
const char* keyval = NULL;
size_t length = 0;
table_key(self, key, keybuf, &keyval, &length);
upb_value v;
if (upb_strtable_remove2(&self->table, keyval, length, &v)) {
void* mem = value_memory(&v);
return native_slot_get(self->value_type, self->value_type_class, mem);
} else {
return Qnil;
}
}
/*
* call-seq:
* Map.clear
*
* Removes all entries from the map.
*/
VALUE Map_clear(VALUE _self) {
Map* self = ruby_to_Map(_self);
// Uninit and reinit the table -- this is faster than iterating and doing a
// delete-lookup on each key.
upb_strtable_uninit(&self->table);
if (!upb_strtable_init(&self->table, UPB_CTYPE_INT64)) {
rb_raise(rb_eRuntimeError, "Unable to re-initialize table");
}
return Qnil;
}
/*
* call-seq:
* Map.length
*
* Returns the number of entries (key-value pairs) in the map.
*/
VALUE Map_length(VALUE _self) {
Map* self = ruby_to_Map(_self);
return ULL2NUM(upb_strtable_count(&self->table));
}
static VALUE Map_new_this_type(VALUE _self) {
Map* self = ruby_to_Map(_self);
VALUE new_map = Qnil;
VALUE key_type = fieldtype_to_ruby(self->key_type);
VALUE value_type = fieldtype_to_ruby(self->value_type);
if (self->value_type_class != Qnil) {
new_map = rb_funcall(CLASS_OF(_self), rb_intern("new"), 3,
key_type, value_type, self->value_type_class);
} else {
new_map = rb_funcall(CLASS_OF(_self), rb_intern("new"), 2,
key_type, value_type);
}
return new_map;
}
/*
* call-seq:
* Map.dup => new_map
*
* Duplicates this map with a shallow copy. References to all non-primitive
* element objects (e.g., submessages) are shared.
*/
VALUE Map_dup(VALUE _self) {
Map* self = ruby_to_Map(_self);
VALUE new_map = Map_new_this_type(_self);
Map* new_self = ruby_to_Map(new_map);
upb_strtable_iter it;
for (upb_strtable_begin(&it, &self->table);
!upb_strtable_done(&it);
upb_strtable_next(&it)) {
upb_value v = upb_strtable_iter_value(&it);
void* mem = value_memory(&v);
upb_value dup;
void* dup_mem = value_memory(&dup);
native_slot_dup(self->value_type, dup_mem, mem);
if (!upb_strtable_insert2(&new_self->table,
upb_strtable_iter_key(&it),
upb_strtable_iter_keylength(&it),
dup)) {
rb_raise(rb_eRuntimeError, "Error inserting value into new table");
}
}
return new_map;
}
// Used by Google::Protobuf.deep_copy but not exposed directly.
VALUE Map_deep_copy(VALUE _self) {
Map* self = ruby_to_Map(_self);
VALUE new_map = Map_new_this_type(_self);
Map* new_self = ruby_to_Map(new_map);
upb_strtable_iter it;
for (upb_strtable_begin(&it, &self->table);
!upb_strtable_done(&it);
upb_strtable_next(&it)) {
upb_value v = upb_strtable_iter_value(&it);
void* mem = value_memory(&v);
upb_value dup;
void* dup_mem = value_memory(&dup);
native_slot_deep_copy(self->value_type, dup_mem, mem);
if (!upb_strtable_insert2(&new_self->table,
upb_strtable_iter_key(&it),
upb_strtable_iter_keylength(&it),
dup)) {
rb_raise(rb_eRuntimeError, "Error inserting value into new table");
}
}
return new_map;
}
/*
* call-seq:
* Map.==(other) => boolean
*
* Compares this map to another. Maps are equal if they have identical key sets,
* and for each key, the values in both maps compare equal. Elements are
* compared as per normal Ruby semantics, by calling their :== methods (or
* performing a more efficient comparison for primitive types).
*
* Maps with dissimilar key types or value types/typeclasses are never equal,
* even if value comparison (for example, between integers and floats) would
* have otherwise indicated that every element has equal value.
*/
VALUE Map_eq(VALUE _self, VALUE _other) {
Map* self = ruby_to_Map(_self);
// Allow comparisons to Ruby hashmaps by converting to a temporary Map
// instance. Slow, but workable.
if (TYPE(_other) == T_HASH) {
VALUE other_map = Map_new_this_type(_self);
Map_merge_into_self(other_map, _other);
_other = other_map;
}
Map* other = ruby_to_Map(_other);
if (self == other) {
return Qtrue;
}
if (self->key_type != other->key_type ||
self->value_type != other->value_type ||
self->value_type_class != other->value_type_class) {
return Qfalse;
}
if (upb_strtable_count(&self->table) != upb_strtable_count(&other->table)) {
return Qfalse;
}
// For each member of self, check that an equal member exists at the same key
// in other.
upb_strtable_iter it;
for (upb_strtable_begin(&it, &self->table);
!upb_strtable_done(&it);
upb_strtable_next(&it)) {
upb_value v = upb_strtable_iter_value(&it);
void* mem = value_memory(&v);
upb_value other_v;
void* other_mem = value_memory(&other_v);
if (!upb_strtable_lookup2(&other->table,
upb_strtable_iter_key(&it),
upb_strtable_iter_keylength(&it),
&other_v)) {
// Not present in other map.
return Qfalse;
}
if (!native_slot_eq(self->value_type, mem, other_mem)) {
// Present, but value not equal.
return Qfalse;
}
}
return Qtrue;
}
/*
* call-seq:
* Map.hash => hash_value
*
* Returns a hash value based on this map's contents.
*/
VALUE Map_hash(VALUE _self) {
Map* self = ruby_to_Map(_self);
st_index_t h = rb_hash_start(0);
VALUE hash_sym = rb_intern("hash");
upb_strtable_iter it;
for (upb_strtable_begin(&it, &self->table);
!upb_strtable_done(&it);
upb_strtable_next(&it)) {
VALUE key = table_key_to_ruby(
self, upb_strtable_iter_key(&it), upb_strtable_iter_keylength(&it));
upb_value v = upb_strtable_iter_value(&it);
void* mem = value_memory(&v);
VALUE value = native_slot_get(self->value_type,
self->value_type_class,
mem);
h = rb_hash_uint(h, NUM2LONG(rb_funcall(key, hash_sym, 0)));
h = rb_hash_uint(h, NUM2LONG(rb_funcall(value, hash_sym, 0)));
}
return INT2FIX(h);
}
/*
* call-seq:
* Map.inspect => string
*
* Returns a string representing this map's elements. It will be formatted as
* "{key => value, key => value, ...}", with each key and value string
* representation computed by its own #inspect method.
*/
VALUE Map_inspect(VALUE _self) {
Map* self = ruby_to_Map(_self);
VALUE str = rb_str_new2("{");
bool first = true;
VALUE inspect_sym = rb_intern("inspect");
upb_strtable_iter it;
for (upb_strtable_begin(&it, &self->table);
!upb_strtable_done(&it);
upb_strtable_next(&it)) {
VALUE key = table_key_to_ruby(
self, upb_strtable_iter_key(&it), upb_strtable_iter_keylength(&it));
upb_value v = upb_strtable_iter_value(&it);
void* mem = value_memory(&v);
VALUE value = native_slot_get(self->value_type,
self->value_type_class,
mem);
if (!first) {
str = rb_str_cat2(str, ", ");
} else {
first = false;
}
str = rb_str_append(str, rb_funcall(key, inspect_sym, 0));
str = rb_str_cat2(str, " => ");
str = rb_str_append(str, rb_funcall(value, inspect_sym, 0));
}
str = rb_str_cat2(str, "}");
return str;
}
/*
* call-seq:
* Map.merge(other_map) => map
*
* Copies key/value pairs from other_map into a copy of this map. If a key is
* set in other_map and this map, the value from other_map overwrites the value
* in the new copy of this map. Returns the new copy of this map with merged
* contents.
*/
VALUE Map_merge(VALUE _self, VALUE hashmap) {
VALUE dupped = Map_dup(_self);
return Map_merge_into_self(dupped, hashmap);
}
static int merge_into_self_callback(VALUE key, VALUE value, VALUE self) {
Map_index_set(self, key, value);
return ST_CONTINUE;
}
// Used only internally -- shared by #merge and #initialize.
VALUE Map_merge_into_self(VALUE _self, VALUE hashmap) {
if (TYPE(hashmap) == T_HASH) {
rb_hash_foreach(hashmap, merge_into_self_callback, _self);
} else if (RB_TYPE_P(hashmap, T_DATA) && RTYPEDDATA_P(hashmap) &&
RTYPEDDATA_TYPE(hashmap) == &Map_type) {
Map* self = ruby_to_Map(_self);
Map* other = ruby_to_Map(hashmap);
if (self->key_type != other->key_type ||
self->value_type != other->value_type ||
self->value_type_class != other->value_type_class) {
rb_raise(rb_eArgError, "Attempt to merge Map with mismatching types");
}
upb_strtable_iter it;
for (upb_strtable_begin(&it, &other->table);
!upb_strtable_done(&it);
upb_strtable_next(&it)) {
// Replace any existing value by issuing a 'remove' operation first.
upb_value oldv;
upb_strtable_remove2(&self->table,
upb_strtable_iter_key(&it),
upb_strtable_iter_keylength(&it),
&oldv);
upb_value v = upb_strtable_iter_value(&it);
upb_strtable_insert2(&self->table,
upb_strtable_iter_key(&it),
upb_strtable_iter_keylength(&it),
v);
}
} else {
rb_raise(rb_eArgError, "Unknown type merging into Map");
}
return _self;
}
// Internal method: map iterator initialization (used for serialization).
void Map_begin(VALUE _self, Map_iter* iter) {
Map* self = ruby_to_Map(_self);
iter->self = self;
upb_strtable_begin(&iter->it, &self->table);
}
void Map_next(Map_iter* iter) {
upb_strtable_next(&iter->it);
}
bool Map_done(Map_iter* iter) {
return upb_strtable_done(&iter->it);
}
VALUE Map_iter_key(Map_iter* iter) {
return table_key_to_ruby(
iter->self,
upb_strtable_iter_key(&iter->it),
upb_strtable_iter_keylength(&iter->it));
}
VALUE Map_iter_value(Map_iter* iter) {
upb_value v = upb_strtable_iter_value(&iter->it);
void* mem = value_memory(&v);
return native_slot_get(iter->self->value_type,
iter->self->value_type_class,
mem);
}
void Map_register(VALUE module) {
VALUE klass = rb_define_class_under(module, "Map", rb_cObject);
rb_define_alloc_func(klass, Map_alloc);
cMap = klass;
rb_gc_register_address(&cMap);
rb_define_method(klass, "initialize", Map_init, -1);
rb_define_method(klass, "each", Map_each, 0);
rb_define_method(klass, "keys", Map_keys, 0);
rb_define_method(klass, "values", Map_values, 0);
rb_define_method(klass, "[]", Map_index, 1);
rb_define_method(klass, "[]=", Map_index_set, 2);
rb_define_method(klass, "has_key?", Map_has_key, 1);
rb_define_method(klass, "delete", Map_delete, 1);
rb_define_method(klass, "clear", Map_clear, 0);
rb_define_method(klass, "length", Map_length, 0);
rb_define_method(klass, "dup", Map_dup, 0);
rb_define_method(klass, "==", Map_eq, 1);
rb_define_method(klass, "hash", Map_hash, 0);
rb_define_method(klass, "inspect", Map_inspect, 0);
rb_define_method(klass, "merge", Map_merge, 1);
rb_include_module(klass, rb_mEnumerable);
}

@ -139,7 +139,14 @@ int Message_initialize_kwarg(VALUE key, VALUE val, VALUE _self) {
"Unknown field name in initialization map entry.");
}
if (upb_fielddef_label(f) == UPB_LABEL_REPEATED) {
if (is_map_field(f)) {
if (TYPE(val) != T_HASH) {
rb_raise(rb_eArgError,
"Expected Hash object as initializer value for map field.");
}
VALUE map = layout_get(self->descriptor->layout, Message_data(self), f);
Map_merge_into_self(map, val);
} else if (upb_fielddef_label(f) == UPB_LABEL_REPEATED) {
if (TYPE(val) != T_ARRAY) {
rb_raise(rb_eArgError,
"Expected array as initializer value for repeated field.");
@ -450,13 +457,15 @@ VALUE build_module_from_enumdesc(EnumDescriptor* enumdesc) {
* call-seq:
* Google::Protobuf.deep_copy(obj) => copy_of_obj
*
* Performs a deep copy of either a RepeatedField instance or a message object,
* recursively copying its members.
* Performs a deep copy of a RepeatedField instance, a Map instance, or a
* message object, recursively copying its members.
*/
VALUE Google_Protobuf_deep_copy(VALUE self, VALUE obj) {
VALUE klass = CLASS_OF(obj);
if (klass == cRepeatedField) {
return RepeatedField_deep_copy(obj);
} else if (klass == cMap) {
return Map_deep_copy(obj);
} else {
return Message_deep_copy(obj);
}

@ -82,6 +82,7 @@ void Init_protobuf_c() {
EnumBuilderContext_register(internal);
Builder_register(internal);
RepeatedField_register(protobuf);
Map_register(protobuf);
rb_define_singleton_method(protobuf, "encode", Google_Protobuf_encode, 1);
rb_define_singleton_method(protobuf, "decode", Google_Protobuf_decode, 2);

@ -110,6 +110,10 @@ struct Descriptor {
const upb_pbdecodermethod* fill_method;
const upb_handlers* pb_serialize_handlers;
const upb_handlers* json_serialize_handlers;
// Handlers hold type class references for sub-message fields directly in some
// cases. We need to keep these rooted because they might otherwise be
// collected.
VALUE typeclass_references;
};
struct FieldDescriptor {
@ -123,6 +127,7 @@ struct EnumDescriptor {
struct MessageBuilderContext {
VALUE descriptor;
VALUE builder;
};
struct EnumBuilderContext {
@ -213,10 +218,13 @@ void MessageBuilderContext_free(void* _self);
VALUE MessageBuilderContext_alloc(VALUE klass);
void MessageBuilderContext_register(VALUE module);
MessageBuilderContext* ruby_to_MessageBuilderContext(VALUE value);
VALUE MessageBuilderContext_initialize(VALUE _self, VALUE descriptor);
VALUE MessageBuilderContext_initialize(VALUE _self,
VALUE descriptor,
VALUE builder);
VALUE MessageBuilderContext_optional(int argc, VALUE* argv, VALUE _self);
VALUE MessageBuilderContext_required(int argc, VALUE* argv, VALUE _self);
VALUE MessageBuilderContext_repeated(int argc, VALUE* argv, VALUE _self);
VALUE MessageBuilderContext_map(int argc, VALUE* argv, VALUE _self);
void EnumBuilderContext_mark(void* _self);
void EnumBuilderContext_free(void* _self);
@ -239,6 +247,8 @@ VALUE Builder_finalize_to_pool(VALUE _self, VALUE pool_rb);
// Native slot storage abstraction.
// -----------------------------------------------------------------------------
#define NATIVE_SLOT_MAX_SIZE sizeof(void*)
size_t native_slot_size(upb_fieldtype_t type);
void native_slot_set(upb_fieldtype_t type,
VALUE type_class,
@ -246,7 +256,7 @@ void native_slot_set(upb_fieldtype_t type,
VALUE value);
VALUE native_slot_get(upb_fieldtype_t type,
VALUE type_class,
void* memory);
const void* memory);
void native_slot_init(upb_fieldtype_t type, void* memory);
void native_slot_mark(upb_fieldtype_t type, void* memory);
void native_slot_dup(upb_fieldtype_t type, void* to, void* from);
@ -254,11 +264,27 @@ void native_slot_deep_copy(upb_fieldtype_t type, void* to, void* from);
bool native_slot_eq(upb_fieldtype_t type, void* mem1, void* mem2);
void native_slot_validate_string_encoding(upb_fieldtype_t type, VALUE value);
void native_slot_check_int_range_precision(upb_fieldtype_t type, VALUE value);
extern rb_encoding* kRubyStringUtf8Encoding;
extern rb_encoding* kRubyStringASCIIEncoding;
extern rb_encoding* kRubyString8bitEncoding;
VALUE field_type_class(const upb_fielddef* field);
#define MAP_KEY_FIELD 1
#define MAP_VALUE_FIELD 2
// These operate on a map field (i.e., a repeated field of submessages whose
// submessage type is a map-entry msgdef).
bool is_map_field(const upb_fielddef* field);
const upb_fielddef* map_field_key(const upb_fielddef* field);
const upb_fielddef* map_field_value(const upb_fielddef* field);
// These operate on a map-entry msgdef.
const upb_fielddef* map_entry_key(const upb_msgdef* msgdef);
const upb_fielddef* map_entry_value(const upb_msgdef* msgdef);
// -----------------------------------------------------------------------------
// Repeated field container type.
// -----------------------------------------------------------------------------
@ -282,7 +308,6 @@ extern VALUE cRepeatedField;
RepeatedField* ruby_to_RepeatedField(VALUE value);
void RepeatedField_register(VALUE module);
VALUE RepeatedField_each(VALUE _self);
VALUE RepeatedField_index(VALUE _self, VALUE _index);
void* RepeatedField_index_native(VALUE _self, int index);
@ -302,6 +327,59 @@ VALUE RepeatedField_hash(VALUE _self);
VALUE RepeatedField_inspect(VALUE _self);
VALUE RepeatedField_plus(VALUE _self, VALUE list);
// Defined in repeated_field.c; also used by Map.
void validate_type_class(upb_fieldtype_t type, VALUE klass);
// -----------------------------------------------------------------------------
// Map container type.
// -----------------------------------------------------------------------------
typedef struct {
upb_fieldtype_t key_type;
upb_fieldtype_t value_type;
VALUE value_type_class;
upb_strtable table;
} Map;
void Map_mark(void* self);
void Map_free(void* self);
VALUE Map_alloc(VALUE klass);
VALUE Map_init(int argc, VALUE* argv, VALUE self);
void Map_register(VALUE module);
extern const rb_data_type_t Map_type;
extern VALUE cMap;
Map* ruby_to_Map(VALUE value);
VALUE Map_each(VALUE _self);
VALUE Map_keys(VALUE _self);
VALUE Map_values(VALUE _self);
VALUE Map_index(VALUE _self, VALUE key);
VALUE Map_index_set(VALUE _self, VALUE key, VALUE value);
VALUE Map_has_key(VALUE _self, VALUE key);
VALUE Map_delete(VALUE _self, VALUE key);
VALUE Map_clear(VALUE _self);
VALUE Map_length(VALUE _self);
VALUE Map_dup(VALUE _self);
VALUE Map_deep_copy(VALUE _self);
VALUE Map_eq(VALUE _self, VALUE _other);
VALUE Map_hash(VALUE _self);
VALUE Map_inspect(VALUE _self);
VALUE Map_merge(VALUE _self, VALUE hashmap);
VALUE Map_merge_into_self(VALUE _self, VALUE hashmap);
typedef struct {
Map* self;
upb_strtable_iter it;
} Map_iter;
void Map_begin(VALUE _self, Map_iter* iter);
void Map_next(Map_iter* iter);
bool Map_done(Map_iter* iter);
VALUE Map_iter_key(Map_iter* iter);
VALUE Map_iter_value(Map_iter* iter);
// -----------------------------------------------------------------------------
// Message layout / storage.
// -----------------------------------------------------------------------------
@ -315,7 +393,7 @@ struct MessageLayout {
MessageLayout* create_layout(const upb_msgdef* msgdef);
void free_layout(MessageLayout* layout);
VALUE layout_get(MessageLayout* layout,
void* storage,
const void* storage,
const upb_fielddef* field);
void layout_set(MessageLayout* layout,
void* storage,

@ -324,6 +324,10 @@ VALUE RepeatedField_deep_copy(VALUE _self) {
* element types are equal, their lengths are equal, and each element is equal.
* Elements are compared as per normal Ruby semantics, by calling their :==
* methods (or performing a more efficient comparison for primitive types).
*
* Repeated fields with dissimilar element types are never equal, even if value
* comparison (for example, between integers and floats) would have otherwise
* indicated that every element has equal value.
*/
VALUE RepeatedField_eq(VALUE _self, VALUE _other) {
if (_self == _other) {
@ -458,7 +462,7 @@ VALUE RepeatedField_plus(VALUE _self, VALUE list) {
return dupped;
}
static void validate_type_class(upb_fieldtype_t type, VALUE klass) {
void validate_type_class(upb_fieldtype_t type, VALUE klass) {
if (rb_iv_get(klass, kDescriptorInstanceVar) == Qnil) {
rb_raise(rb_eArgError,
"Type class has no descriptor. Please pass a "

@ -57,7 +57,17 @@ size_t native_slot_size(upb_fieldtype_t type) {
}
}
static void check_int_range_precision(upb_fieldtype_t type, VALUE val) {
static bool is_ruby_num(VALUE value) {
return (TYPE(value) == T_FLOAT ||
TYPE(value) == T_FIXNUM ||
TYPE(value) == T_BIGNUM);
}
void native_slot_check_int_range_precision(upb_fieldtype_t type, VALUE val) {
if (!is_ruby_num(val)) {
rb_raise(rb_eTypeError, "Expected number type for integral field.");
}
// NUM2{INT,UINT,LL,ULL} macros do the appropriate range checks on upper
// bound; we just need to do precision checks (i.e., disallow rounding) and
// check for < 0 on unsigned types.
@ -76,12 +86,6 @@ static void check_int_range_precision(upb_fieldtype_t type, VALUE val) {
}
}
static bool is_ruby_num(VALUE value) {
return (TYPE(value) == T_FLOAT ||
TYPE(value) == T_FIXNUM ||
TYPE(value) == T_BIGNUM);
}
void native_slot_validate_string_encoding(upb_fieldtype_t type, VALUE value) {
bool bad_encoding = false;
rb_encoding* string_encoding = rb_enc_from_index(ENCODING_GET(value));
@ -156,14 +160,14 @@ void native_slot_set(upb_fieldtype_t type, VALUE type_class,
int32_t int_val = 0;
if (TYPE(value) == T_SYMBOL) {
// Ensure that the given symbol exists in the enum module.
VALUE lookup = rb_const_get(type_class, SYM2ID(value));
VALUE lookup = rb_funcall(type_class, rb_intern("resolve"), 1, value);
if (lookup == Qnil) {
rb_raise(rb_eRangeError, "Unknown symbol value for enum field.");
} else {
int_val = NUM2INT(lookup);
}
} else {
check_int_range_precision(UPB_TYPE_INT32, value);
native_slot_check_int_range_precision(UPB_TYPE_INT32, value);
int_val = NUM2INT(value);
}
DEREF(memory, int32_t) = int_val;
@ -173,10 +177,7 @@ void native_slot_set(upb_fieldtype_t type, VALUE type_class,
case UPB_TYPE_INT64:
case UPB_TYPE_UINT32:
case UPB_TYPE_UINT64:
if (!is_ruby_num(value)) {
rb_raise(rb_eTypeError, "Expected number type for integral field.");
}
check_int_range_precision(type, value);
native_slot_check_int_range_precision(type, value);
switch (type) {
case UPB_TYPE_INT32:
DEREF(memory, int32_t) = NUM2INT(value);
@ -199,7 +200,9 @@ void native_slot_set(upb_fieldtype_t type, VALUE type_class,
}
}
VALUE native_slot_get(upb_fieldtype_t type, VALUE type_class, void* memory) {
VALUE native_slot_get(upb_fieldtype_t type,
VALUE type_class,
const void* memory) {
switch (type) {
case UPB_TYPE_FLOAT:
return DBL2NUM(DEREF(memory, float));
@ -210,7 +213,7 @@ VALUE native_slot_get(upb_fieldtype_t type, VALUE type_class, void* memory) {
case UPB_TYPE_STRING:
case UPB_TYPE_BYTES:
case UPB_TYPE_MESSAGE:
return *((VALUE *)memory);
return DEREF(memory, VALUE);
case UPB_TYPE_ENUM: {
int32_t val = DEREF(memory, int32_t);
VALUE symbol = enum_lookup(type_class, INT2NUM(val));
@ -246,8 +249,9 @@ void native_slot_init(upb_fieldtype_t type, void* memory) {
break;
case UPB_TYPE_STRING:
case UPB_TYPE_BYTES:
// TODO(cfallin): set encoding appropriately
DEREF(memory, VALUE) = rb_str_new2("");
rb_enc_associate(DEREF(memory, VALUE), (type == UPB_TYPE_BYTES) ?
kRubyString8bitEncoding : kRubyStringUtf8Encoding);
break;
case UPB_TYPE_MESSAGE:
DEREF(memory, VALUE) = Qnil;
@ -321,6 +325,43 @@ bool native_slot_eq(upb_fieldtype_t type, void* mem1, void* mem2) {
}
}
// -----------------------------------------------------------------------------
// Map field utilities.
// -----------------------------------------------------------------------------
bool is_map_field(const upb_fielddef* field) {
if (upb_fielddef_label(field) != UPB_LABEL_REPEATED ||
upb_fielddef_type(field) != UPB_TYPE_MESSAGE) {
return false;
}
const upb_msgdef* subdef = upb_fielddef_msgsubdef(field);
return upb_msgdef_mapentry(subdef);
}
const upb_fielddef* map_field_key(const upb_fielddef* field) {
assert(is_map_field(field));
const upb_msgdef* subdef = upb_fielddef_msgsubdef(field);
return map_entry_key(subdef);
}
const upb_fielddef* map_field_value(const upb_fielddef* field) {
assert(is_map_field(field));
const upb_msgdef* subdef = upb_fielddef_msgsubdef(field);
return map_entry_value(subdef);
}
const upb_fielddef* map_entry_key(const upb_msgdef* msgdef) {
const upb_fielddef* key_field = upb_msgdef_itof(msgdef, MAP_KEY_FIELD);
assert(key_field != NULL);
return key_field;
}
const upb_fielddef* map_entry_value(const upb_msgdef* msgdef) {
const upb_fielddef* value_field = upb_msgdef_itof(msgdef, MAP_VALUE_FIELD);
assert(value_field != NULL);
return value_field;
}
// -----------------------------------------------------------------------------
// Memory layout management.
// -----------------------------------------------------------------------------
@ -334,9 +375,12 @@ MessageLayout* create_layout(const upb_msgdef* msgdef) {
size_t off = 0;
for (upb_msg_begin(&it, msgdef); !upb_msg_done(&it); upb_msg_next(&it)) {
const upb_fielddef* field = upb_msg_iter_field(&it);
size_t field_size =
(upb_fielddef_label(field) == UPB_LABEL_REPEATED) ?
sizeof(VALUE) : native_slot_size(upb_fielddef_type(field));
size_t field_size = 0;
if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
field_size = sizeof(VALUE);
} else {
field_size = native_slot_size(upb_fielddef_type(field));
}
// align current offset
off = (off + field_size - 1) & ~(field_size - 1);
layout->offsets[upb_fielddef_index(field)] = off;
@ -357,7 +401,7 @@ void free_layout(MessageLayout* layout) {
xfree(layout);
}
static VALUE get_type_class(const upb_fielddef* field) {
VALUE field_type_class(const upb_fielddef* field) {
VALUE type_class = Qnil;
if (upb_fielddef_type(field) == UPB_TYPE_MESSAGE) {
VALUE submsgdesc =
@ -372,7 +416,7 @@ static VALUE get_type_class(const upb_fielddef* field) {
}
VALUE layout_get(MessageLayout* layout,
void* storage,
const void* storage,
const upb_fielddef* field) {
void* memory = ((uint8_t *)storage) +
layout->offsets[upb_fielddef_index(field)];
@ -380,7 +424,7 @@ VALUE layout_get(MessageLayout* layout,
return *((VALUE *)memory);
} else {
return native_slot_get(upb_fielddef_type(field),
get_type_class(field),
field_type_class(field),
memory);
}
}
@ -398,9 +442,8 @@ static void check_repeated_field_type(VALUE val, const upb_fielddef* field) {
rb_raise(rb_eTypeError, "Repeated field array has wrong element type");
}
if (upb_fielddef_type(field) == UPB_TYPE_MESSAGE ||
upb_fielddef_type(field) == UPB_TYPE_ENUM) {
RepeatedField* self = ruby_to_RepeatedField(val);
if (self->field_type == UPB_TYPE_MESSAGE ||
self->field_type == UPB_TYPE_ENUM) {
if (self->field_type_class !=
get_def_obj(upb_fielddef_subdef(field))) {
rb_raise(rb_eTypeError,
@ -409,17 +452,48 @@ static void check_repeated_field_type(VALUE val, const upb_fielddef* field) {
}
}
static void check_map_field_type(VALUE val, const upb_fielddef* field) {
assert(is_map_field(field));
const upb_fielddef* key_field = map_field_key(field);
const upb_fielddef* value_field = map_field_value(field);
if (!RB_TYPE_P(val, T_DATA) || !RTYPEDDATA_P(val) ||
RTYPEDDATA_TYPE(val) != &Map_type) {
rb_raise(rb_eTypeError, "Expected Map instance");
}
Map* self = ruby_to_Map(val);
if (self->key_type != upb_fielddef_type(key_field)) {
rb_raise(rb_eTypeError, "Map key type does not match field's key type");
}
if (self->value_type != upb_fielddef_type(value_field)) {
rb_raise(rb_eTypeError, "Map value type does not match field's value type");
}
if (upb_fielddef_type(value_field) == UPB_TYPE_MESSAGE ||
upb_fielddef_type(value_field) == UPB_TYPE_ENUM) {
if (self->value_type_class !=
get_def_obj(upb_fielddef_subdef(value_field))) {
rb_raise(rb_eTypeError,
"Map value type has wrong message/enum class");
}
}
}
void layout_set(MessageLayout* layout,
void* storage,
const upb_fielddef* field,
VALUE val) {
void* memory = ((uint8_t *)storage) +
layout->offsets[upb_fielddef_index(field)];
if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
if (is_map_field(field)) {
check_map_field_type(val, field);
DEREF(memory, VALUE) = val;
} else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
check_repeated_field_type(val, field);
*((VALUE *)memory) = val;
DEREF(memory, VALUE) = val;
} else {
native_slot_set(upb_fielddef_type(field), get_type_class(field),
native_slot_set(upb_fielddef_type(field), field_type_class(field),
memory, val);
}
}
@ -434,9 +508,34 @@ void layout_init(MessageLayout* layout,
void* memory = ((uint8_t *)storage) +
layout->offsets[upb_fielddef_index(field)];
if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
if (is_map_field(field)) {
VALUE map = Qnil;
const upb_fielddef* key_field = map_field_key(field);
const upb_fielddef* value_field = map_field_value(field);
VALUE type_class = field_type_class(value_field);
if (type_class != Qnil) {
VALUE args[3] = {
fieldtype_to_ruby(upb_fielddef_type(key_field)),
fieldtype_to_ruby(upb_fielddef_type(value_field)),
type_class,
};
map = rb_class_new_instance(3, args, cMap);
} else {
VALUE args[2] = {
fieldtype_to_ruby(upb_fielddef_type(key_field)),
fieldtype_to_ruby(upb_fielddef_type(value_field)),
};
map = rb_class_new_instance(2, args, cMap);
}
DEREF(memory, VALUE) = map;
} else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
VALUE ary = Qnil;
VALUE type_class = get_type_class(field);
VALUE type_class = field_type_class(field);
if (type_class != Qnil) {
VALUE args[2] = {
fieldtype_to_ruby(upb_fielddef_type(field)),
@ -447,7 +546,8 @@ void layout_init(MessageLayout* layout,
VALUE args[1] = { fieldtype_to_ruby(upb_fielddef_type(field)) };
ary = rb_class_new_instance(1, args, cRepeatedField);
}
*((VALUE *)memory) = ary;
DEREF(memory, VALUE) = ary;
} else {
native_slot_init(upb_fielddef_type(field), memory);
}
@ -464,7 +564,7 @@ void layout_mark(MessageLayout* layout, void* storage) {
layout->offsets[upb_fielddef_index(field)];
if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
rb_gc_mark(*((VALUE *)memory));
rb_gc_mark(DEREF(memory, VALUE));
} else {
native_slot_mark(upb_fielddef_type(field), memory);
}
@ -482,8 +582,10 @@ void layout_dup(MessageLayout* layout, void* to, void* from) {
void* from_memory = ((uint8_t *)from) +
layout->offsets[upb_fielddef_index(field)];
if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
*((VALUE *)to_memory) = RepeatedField_dup(*((VALUE *)from_memory));
if (is_map_field(field)) {
DEREF(to_memory, VALUE) = Map_dup(DEREF(from_memory, VALUE));
} else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
DEREF(to_memory, VALUE) = RepeatedField_dup(DEREF(from_memory, VALUE));
} else {
native_slot_dup(upb_fielddef_type(field), to_memory, from_memory);
}
@ -501,8 +603,12 @@ void layout_deep_copy(MessageLayout* layout, void* to, void* from) {
void* from_memory = ((uint8_t *)from) +
layout->offsets[upb_fielddef_index(field)];
if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
*((VALUE *)to_memory) = RepeatedField_deep_copy(*((VALUE *)from_memory));
if (is_map_field(field)) {
DEREF(to_memory, VALUE) =
Map_deep_copy(DEREF(from_memory, VALUE));
} else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
DEREF(to_memory, VALUE) =
RepeatedField_deep_copy(DEREF(from_memory, VALUE));
} else {
native_slot_deep_copy(upb_fielddef_type(field), to_memory, from_memory);
}
@ -520,11 +626,12 @@ VALUE layout_eq(MessageLayout* layout, void* msg1, void* msg2) {
void* msg2_memory = ((uint8_t *)msg2) +
layout->offsets[upb_fielddef_index(field)];
if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
if (RepeatedField_eq(*((VALUE *)msg1_memory),
*((VALUE *)msg2_memory)) == Qfalse) {
return Qfalse;
}
if (is_map_field(field)) {
return Map_eq(DEREF(msg1_memory, VALUE),
DEREF(msg2_memory, VALUE));
} else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
return RepeatedField_eq(DEREF(msg1_memory, VALUE),
DEREF(msg2_memory, VALUE));
} else {
if (!native_slot_eq(upb_fielddef_type(field),
msg1_memory, msg2_memory)) {

File diff suppressed because it is too large Load Diff

@ -600,6 +600,9 @@ typedef struct {
// Like strdup(), which isn't always available since it's not ANSI C.
char *upb_strdup(const char *s);
// Variant that works with a length-delimited rather than NULL-delimited string,
// as supported by strtable.
char *upb_strdup2(const char *s, size_t len);
UPB_INLINE void _upb_value_setval(upb_value *v, _upb_value val,
upb_ctype_t ctype) {
@ -654,12 +657,24 @@ FUNCS(fptr, fptr, upb_func*, UPB_CTYPE_FPTR);
typedef union {
uintptr_t num;
const char *str; // We own, nullz.
struct {
// We own this. NULL-terminated but may also contain binary data; see
// explicit length below.
// TODO: move the length to the start of the string in order to reduce
// tabkey's size (to one machine word) in a way that supports static
// initialization.
const char *str;
size_t length;
} s;
} upb_tabkey;
#define UPB_TABKEY_NUM(n) {n}
#ifdef UPB_C99
#define UPB_TABKEY_STR(s) {.str = s}
// Given that |s| is a string literal, sizeof(s) gives us a
// compile-time-constant strlen(). We must ensure that this works for static
// data initializers.
#define UPB_TABKEY_STR(strval) { .s = { .str = strval, \
.length = sizeof(strval) - 1 } }
#endif
// TODO(haberman): C++
#define UPB_TABKEY_NONE {0}
@ -765,7 +780,14 @@ UPB_INLINE size_t upb_strtable_count(const upb_strtable *t) {
// If a table resize was required but memory allocation failed, false is
// returned and the table is unchanged.
bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val);
bool upb_strtable_insert(upb_strtable *t, const char *key, upb_value val);
bool upb_strtable_insert2(upb_strtable *t, const char *key, size_t len,
upb_value val);
// For NULL-terminated strings.
UPB_INLINE bool upb_strtable_insert(upb_strtable *t, const char *key,
upb_value val) {
return upb_strtable_insert2(t, key, strlen(key), val);
}
// Looks up key in this table, returning "true" if the key was found.
// If v is non-NULL, copies the value for this key into *v.
@ -782,7 +804,14 @@ UPB_INLINE bool upb_strtable_lookup(const upb_strtable *t, const char *key,
// Removes an item from the table. Returns true if the remove was successful,
// and stores the removed item in *val if non-NULL.
bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val);
bool upb_strtable_remove(upb_strtable *t, const char *key, upb_value *val);
bool upb_strtable_remove2(upb_strtable *t, const char *key, size_t len,
upb_value *val);
// For NULL-terminated strings.
UPB_INLINE bool upb_strtable_remove(upb_strtable *t, const char *key,
upb_value *v) {
return upb_strtable_remove2(t, key, strlen(key), v);
}
// Updates an existing entry in an inttable. If the entry does not exist,
// returns false and does nothing. Unlike insert/remove, this does not
@ -876,6 +905,7 @@ void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t);
void upb_strtable_next(upb_strtable_iter *i);
bool upb_strtable_done(const upb_strtable_iter *i);
const char *upb_strtable_iter_key(upb_strtable_iter *i);
size_t upb_strtable_iter_keylength(upb_strtable_iter *i);
upb_value upb_strtable_iter_value(const upb_strtable_iter *i);
void upb_strtable_iter_setdone(upb_strtable_iter *i);
bool upb_strtable_iter_isequal(const upb_strtable_iter *i1,
@ -1777,6 +1807,10 @@ UPB_DEFINE_DEF(upb::MessageDef, msgdef, MSG, UPB_QUOTE(
// just be moved into symtab.c?
MessageDef* Dup(const void* owner) const;
// Is this message a map entry?
void setmapentry(bool map_entry);
bool mapentry() const;
// Iteration over fields. The order is undefined.
class iterator : public std::iterator<std::forward_iterator_tag, FieldDef*> {
public:
@ -1823,6 +1857,11 @@ UPB_DEFINE_STRUCT(upb_msgdef, upb_def,
upb_inttable itof; // int to field
upb_strtable ntof; // name to field
// Is this a map-entry message?
// TODO: set this flag properly for static descriptors; regenerate
// descriptor.upb.c.
bool map_entry;
// TODO(haberman): proper extension ranges (there can be multiple).
));
@ -1830,7 +1869,7 @@ UPB_DEFINE_STRUCT(upb_msgdef, upb_def,
refs, ref2s) \
{ \
UPB_DEF_INIT(name, UPB_DEF_MSG, refs, ref2s), selector_count, \
submsg_field_count, itof, ntof \
submsg_field_count, itof, ntof, false \
}
UPB_BEGIN_EXTERN_C // {
@ -1878,6 +1917,9 @@ UPB_INLINE upb_fielddef *upb_msgdef_ntof_mutable(upb_msgdef *m,
return (upb_fielddef *)upb_msgdef_ntof(m, name, len);
}
void upb_msgdef_setmapentry(upb_msgdef *m, bool map_entry);
bool upb_msgdef_mapentry(const upb_msgdef *m);
// upb_msg_iter i;
// for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) {
// upb_fielddef *f = upb_msg_iter_field(&i);
@ -2331,6 +2373,12 @@ inline const FieldDef *MessageDef::FindFieldByName(const char *name,
inline MessageDef* MessageDef::Dup(const void *owner) const {
return upb_msgdef_dup(this, owner);
}
inline void MessageDef::setmapentry(bool map_entry) {
upb_msgdef_setmapentry(this, map_entry);
}
inline bool MessageDef::mapentry() const {
return upb_msgdef_mapentry(this);
}
inline MessageDef::iterator MessageDef::begin() { return iterator(this); }
inline MessageDef::iterator MessageDef::end() { return iterator::end(this); }
inline MessageDef::const_iterator MessageDef::begin() const {
@ -6614,7 +6662,9 @@ typedef enum {
// | unused (24) | opc |
// | upb_inttable* (32 or 64) |
OP_HALT = 36, // No arg.
OP_DISPATCH = 36, // No arg.
OP_HALT = 37, // No arg.
} opcode;
#define OP_MAX OP_HALT
@ -7291,15 +7341,24 @@ UPB_DEFINE_STRUCT0(upb_json_parser,
int parser_stack[UPB_JSON_MAX_DEPTH];
int parser_top;
// A pointer to the beginning of whatever text we are currently parsing.
const char *text_begin;
// The handle for the current buffer.
const upb_bufhandle *handle;
// We have to accumulate text for member names, integers, unicode escapes, and
// base64 partial results.
// Accumulate buffer. See details in parser.rl.
const char *accumulated;
size_t accumulated_len;
// TODO: add members and code for allocating a buffer when necessary (when the
// member spans input buffers or contains escapes).
char *accumulate_buf;
size_t accumulate_buf_size;
// Multi-part text data. See details in parser.rl.
int multipart_state;
upb_selector_t string_selector;
// Input capture. See details in parser.rl.
const char *capture;
// Intermediate result of parsing a unicode escape sequence.
uint32_t digit;
));
UPB_BEGIN_EXTERN_C

@ -36,23 +36,43 @@ module BasicTest
add_message "TestMessage2" do
optional :foo, :int32, 1
end
add_message "Recursive1" do
optional :foo, :message, 1, "Recursive2"
end
add_message "Recursive2" do
optional :foo, :message, 1, "Recursive1"
end
add_enum "TestEnum" do
value :Default, 0
value :A, 1
value :B, 2
value :C, 3
end
add_message "BadFieldNames" do
optional :dup, :int32, 1
optional :class, :int32, 2
optional :"a.b", :int32, 3
end
add_message "MapMessage" do
map :map_string_int32, :string, :int32, 1
map :map_string_msg, :string, :message, 2, "TestMessage2"
end
add_message "MapMessageWireEquiv" do
repeated :map_string_int32, :message, 1, "MapMessageWireEquiv_entry1"
repeated :map_string_msg, :message, 2, "MapMessageWireEquiv_entry2"
end
add_message "MapMessageWireEquiv_entry1" do
optional :key, :string, 1
optional :value, :int32, 2
end
add_message "MapMessageWireEquiv_entry2" do
optional :key, :string, 1
optional :value, :message, 2, "TestMessage2"
end
end
TestMessage = pool.lookup("TestMessage").msgclass
@ -61,6 +81,12 @@ module BasicTest
Recursive2 = pool.lookup("Recursive2").msgclass
TestEnum = pool.lookup("TestEnum").enummodule
BadFieldNames = pool.lookup("BadFieldNames").msgclass
MapMessage = pool.lookup("MapMessage").msgclass
MapMessageWireEquiv = pool.lookup("MapMessageWireEquiv").msgclass
MapMessageWireEquiv_entry1 =
pool.lookup("MapMessageWireEquiv_entry1").msgclass
MapMessageWireEquiv_entry2 =
pool.lookup("MapMessageWireEquiv_entry2").msgclass
# ------------ test cases ---------------
@ -300,7 +326,7 @@ module BasicTest
l.push :B
l.push :C
assert l.count == 3
assert_raise NameError do
assert_raise RangeError do
l.push :D
end
assert l[0] == :A
@ -324,12 +350,244 @@ module BasicTest
end
end
def test_map_basic
# allowed key types:
# :int32, :int64, :uint32, :uint64, :bool, :string, :bytes.
m = Google::Protobuf::Map.new(:string, :int32)
m["asdf"] = 1
assert m["asdf"] == 1
m["jkl;"] = 42
assert m == { "jkl;" => 42, "asdf" => 1 }
assert m.has_key?("asdf")
assert !m.has_key?("qwerty")
assert m.length == 2
m2 = m.dup
assert m == m2
assert m.hash != 0
assert m.hash == m2.hash
collected = {}
m.each { |k,v| collected[v] = k }
assert collected == { 42 => "jkl;", 1 => "asdf" }
assert m.delete("asdf") == 1
assert !m.has_key?("asdf")
assert m["asdf"] == nil
assert !m.has_key?("asdf")
# We only assert on inspect value when there is one map entry because the
# order in which elements appear is unspecified (depends on the internal
# hash function). We don't want a brittle test.
assert m.inspect == "{\"jkl;\" => 42}"
assert m.keys == ["jkl;"]
assert m.values == [42]
m.clear
assert m.length == 0
assert m == {}
assert_raise TypeError do
m[1] = 1
end
assert_raise RangeError do
m["asdf"] = 0x1_0000_0000
end
end
def test_map_ctor
m = Google::Protobuf::Map.new(:string, :int32,
{"a" => 1, "b" => 2, "c" => 3})
assert m == {"a" => 1, "c" => 3, "b" => 2}
end
def test_map_keytypes
m = Google::Protobuf::Map.new(:int32, :int32)
m[1] = 42
m[-1] = 42
assert_raise RangeError do
m[0x8000_0000] = 1
end
assert_raise TypeError do
m["asdf"] = 1
end
m = Google::Protobuf::Map.new(:int64, :int32)
m[0x1000_0000_0000_0000] = 1
assert_raise RangeError do
m[0x1_0000_0000_0000_0000] = 1
end
assert_raise TypeError do
m["asdf"] = 1
end
m = Google::Protobuf::Map.new(:uint32, :int32)
m[0x8000_0000] = 1
assert_raise RangeError do
m[0x1_0000_0000] = 1
end
assert_raise RangeError do
m[-1] = 1
end
m = Google::Protobuf::Map.new(:uint64, :int32)
m[0x8000_0000_0000_0000] = 1
assert_raise RangeError do
m[0x1_0000_0000_0000_0000] = 1
end
assert_raise RangeError do
m[-1] = 1
end
m = Google::Protobuf::Map.new(:bool, :int32)
m[true] = 1
m[false] = 2
assert_raise TypeError do
m[1] = 1
end
assert_raise TypeError do
m["asdf"] = 1
end
m = Google::Protobuf::Map.new(:string, :int32)
m["asdf"] = 1
assert_raise TypeError do
m[1] = 1
end
assert_raise TypeError do
bytestring = ["FFFF"].pack("H*")
m[bytestring] = 1
end
m = Google::Protobuf::Map.new(:bytes, :int32)
bytestring = ["FFFF"].pack("H*")
m[bytestring] = 1
assert_raise TypeError do
m["asdf"] = 1
end
assert_raise TypeError do
m[1] = 1
end
end
def test_map_msg_enum_valuetypes
m = Google::Protobuf::Map.new(:string, :message, TestMessage)
m["asdf"] = TestMessage.new
assert_raise TypeError do
m["jkl;"] = TestMessage2.new
end
m = Google::Protobuf::Map.new(
:string, :message, TestMessage,
{ "a" => TestMessage.new(:optional_int32 => 42),
"b" => TestMessage.new(:optional_int32 => 84) })
assert m.length == 2
assert m.values.map{|msg| msg.optional_int32}.sort == [42, 84]
m = Google::Protobuf::Map.new(:string, :enum, TestEnum,
{ "x" => :A, "y" => :B, "z" => :C })
assert m.length == 3
assert m["z"] == :C
m["z"] = 2
assert m["z"] == :B
m["z"] = 4
assert m["z"] == 4
assert_raise RangeError do
m["z"] = :Z
end
assert_raise TypeError do
m["z"] = "z"
end
end
def test_map_dup_deep_copy
m = Google::Protobuf::Map.new(
:string, :message, TestMessage,
{ "a" => TestMessage.new(:optional_int32 => 42),
"b" => TestMessage.new(:optional_int32 => 84) })
m2 = m.dup
assert m == m2
assert m.object_id != m2.object_id
assert m["a"].object_id == m2["a"].object_id
assert m["b"].object_id == m2["b"].object_id
m2 = Google::Protobuf.deep_copy(m)
assert m == m2
assert m.object_id != m2.object_id
assert m["a"].object_id != m2["a"].object_id
assert m["b"].object_id != m2["b"].object_id
end
def test_map_field
m = MapMessage.new
assert m.map_string_int32 == {}
assert m.map_string_msg == {}
m = MapMessage.new(
:map_string_int32 => {"a" => 1, "b" => 2},
:map_string_msg => {"a" => TestMessage2.new(:foo => 1),
"b" => TestMessage2.new(:foo => 2)})
assert m.map_string_int32.keys.sort == ["a", "b"]
assert m.map_string_int32["a"] == 1
assert m.map_string_msg["b"].foo == 2
m.map_string_int32["c"] = 3
assert m.map_string_int32["c"] == 3
m.map_string_msg["c"] = TestMessage2.new(:foo => 3)
assert m.map_string_msg["c"] == TestMessage2.new(:foo => 3)
m.map_string_msg.delete("b")
m.map_string_msg.delete("c")
assert m.map_string_msg == { "a" => TestMessage2.new(:foo => 1) }
assert_raise TypeError do
m.map_string_msg["e"] = TestMessage.new # wrong value type
end
# ensure nothing was added by the above
assert m.map_string_msg == { "a" => TestMessage2.new(:foo => 1) }
m.map_string_int32 = Google::Protobuf::Map.new(:string, :int32)
assert_raise TypeError do
m.map_string_int32 = Google::Protobuf::Map.new(:string, :int64)
end
assert_raise TypeError do
m.map_string_int32 = {}
end
assert_raise TypeError do
m = MapMessage.new(:map_string_int32 => { 1 => "I am not a number" })
end
end
def test_map_encode_decode
m = MapMessage.new(
:map_string_int32 => {"a" => 1, "b" => 2},
:map_string_msg => {"a" => TestMessage2.new(:foo => 1),
"b" => TestMessage2.new(:foo => 2)})
m2 = MapMessage.decode(MapMessage.encode(m))
assert m == m2
m3 = MapMessageWireEquiv.decode(MapMessage.encode(m))
assert m3.map_string_int32.length == 2
kv = {}
m3.map_string_int32.map { |msg| kv[msg.key] = msg.value }
assert kv == {"a" => 1, "b" => 2}
kv = {}
m3.map_string_msg.map { |msg| kv[msg.key] = msg.value }
assert kv == {"a" => TestMessage2.new(:foo => 1),
"b" => TestMessage2.new(:foo => 2)}
end
def test_enum_field
m = TestMessage.new
assert m.optional_enum == :Default
m.optional_enum = :A
assert m.optional_enum == :A
assert_raise NameError do
assert_raise RangeError do
m.optional_enum = :ASDF
end
m.optional_enum = 1
@ -384,7 +642,8 @@ module BasicTest
:repeated_string => ["a", "b", "c"],
:repeated_int32 => [42, 43, 44],
:repeated_enum => [:A, :B, :C, 100],
:repeated_msg => [TestMessage2.new(:foo => 1), TestMessage2.new(:foo => 2)])
:repeated_msg => [TestMessage2.new(:foo => 1),
TestMessage2.new(:foo => 2)])
data = TestMessage.encode m
m2 = TestMessage.decode data
assert m == m2

Loading…
Cancel
Save