Support for maps in the MRI C Ruby extension.

This adds the Map container and support for parsing and serializing maps
in the protobuf wire format (as defined by the C++ implementation, with
MapEntry submessages in a repeated field). JSON map
serialization/parsing are not yet supported as these will require some
changes to upb as well.
pull/155/head
Chris Fallin 10 years ago
parent 644a6a1da7
commit fd1a3ff11d
  1. 110
      ruby/ext/google/protobuf_c/defs.c
  2. 386
      ruby/ext/google/protobuf_c/encode_decode.c
  3. 2
      ruby/ext/google/protobuf_c/extconf.rb
  4. 883
      ruby/ext/google/protobuf_c/map.c
  5. 15
      ruby/ext/google/protobuf_c/message.c
  6. 1
      ruby/ext/google/protobuf_c/protobuf.c
  7. 69
      ruby/ext/google/protobuf_c/protobuf.h
  8. 6
      ruby/ext/google/protobuf_c/repeated_field.c
  9. 173
      ruby/ext/google/protobuf_c/storage.c
  10. 75
      ruby/ext/google/protobuf_c/upb.c
  11. 58
      ruby/ext/google/protobuf_c/upb.h
  12. 258
      ruby/tests/basic.rb

@ -923,6 +923,7 @@ DEFINE_CLASS(MessageBuilderContext,
void MessageBuilderContext_mark(void* _self) {
MessageBuilderContext* self = _self;
rb_gc_mark(self->descriptor);
rb_gc_mark(self->builder);
}
void MessageBuilderContext_free(void* _self) {
@ -935,6 +936,7 @@ VALUE MessageBuilderContext_alloc(VALUE klass) {
VALUE ret = TypedData_Wrap_Struct(
klass, &_MessageBuilderContext_type, self);
self->descriptor = Qnil;
self->builder = Qnil;
return ret;
}
@ -943,24 +945,29 @@ void MessageBuilderContext_register(VALUE module) {
module, "MessageBuilderContext", rb_cObject);
rb_define_alloc_func(klass, MessageBuilderContext_alloc);
rb_define_method(klass, "initialize",
MessageBuilderContext_initialize, 1);
MessageBuilderContext_initialize, 2);
rb_define_method(klass, "optional", MessageBuilderContext_optional, -1);
rb_define_method(klass, "required", MessageBuilderContext_required, -1);
rb_define_method(klass, "repeated", MessageBuilderContext_repeated, -1);
rb_define_method(klass, "map", MessageBuilderContext_map, -1);
cMessageBuilderContext = klass;
rb_gc_register_address(&cMessageBuilderContext);
}
/*
* call-seq:
* MessageBuilderContext.new(desc) => context
* MessageBuilderContext.new(desc, builder) => context
*
* Create a new builder context around the given message descriptor. This class
* is intended to serve as a DSL context to be used with #instance_eval.
* Create a new message builder context around the given message descriptor and
* builder context. This class is intended to serve as a DSL context to be used
* with #instance_eval.
*/
VALUE MessageBuilderContext_initialize(VALUE _self, VALUE msgdef) {
VALUE MessageBuilderContext_initialize(VALUE _self,
VALUE msgdef,
VALUE builder) {
DEFINE_SELF(MessageBuilderContext, self, _self);
self->descriptor = msgdef;
self->builder = builder;
return Qnil;
}
@ -1065,6 +1072,96 @@ VALUE MessageBuilderContext_repeated(int argc, VALUE* argv, VALUE _self) {
name, type, number, type_class);
}
/*
* call-seq:
* MessageBuilderContext.map(name, key_type, value_type, number,
* value_type_class = nil)
*
* Defines a new map field on this message type with the given key and value types, tag
* number, and type class (for message and enum value types). The key type must
* be :int32/:uint32/:int64/:uint64, :bool, or :string. The value type type must
* be a Ruby symbol (as accepted by FieldDescriptor#type=) and the type_class
* must be a string, if present (as accepted by FieldDescriptor#submsg_name=).
*/
VALUE MessageBuilderContext_map(int argc, VALUE* argv, VALUE _self) {
DEFINE_SELF(MessageBuilderContext, self, _self);
if (argc < 4) {
rb_raise(rb_eArgError, "Expected at least 4 arguments.");
}
VALUE name = argv[0];
VALUE key_type = argv[1];
VALUE value_type = argv[2];
VALUE number = argv[3];
VALUE type_class = (argc > 4) ? argv[4] : Qnil;
// Validate the key type. We can't accept enums, messages, or floats/doubles
// as map keys. (We exclude these explicitly, and the field-descriptor setter
// below then ensures that the type is one of the remaining valid options.)
if (SYM2ID(key_type) == rb_intern("float") ||
SYM2ID(key_type) == rb_intern("double") ||
SYM2ID(key_type) == rb_intern("enum") ||
SYM2ID(key_type) == rb_intern("message")) {
rb_raise(rb_eArgError,
"Cannot add a map field with a float, double, enum, or message "
"type.");
}
// Create a new message descriptor for the map entry message, and create a
// repeated submessage field here with that type.
VALUE mapentry_desc = rb_class_new_instance(0, NULL, cDescriptor);
VALUE mapentry_desc_name = rb_funcall(self->descriptor, rb_intern("name"), 0);
mapentry_desc_name = rb_str_cat2(mapentry_desc_name, "_MapEntry_");
mapentry_desc_name = rb_str_cat2(mapentry_desc_name,
rb_id2name(SYM2ID(name)));
Descriptor_name_set(mapentry_desc, mapentry_desc_name);
// The 'mapentry' attribute has no Ruby setter because we do not want the user
// attempting to DIY the setup below; we want to ensure that the fields are
// correct. So we reach into the msgdef here to set the bit manually.
Descriptor* mapentry_desc_self = ruby_to_Descriptor(mapentry_desc);
upb_msgdef_setmapentry((upb_msgdef*)mapentry_desc_self->msgdef, true);
// optional <type> key = 1;
VALUE key_field = rb_class_new_instance(0, NULL, cFieldDescriptor);
FieldDescriptor_name_set(key_field, rb_str_new2("key"));
FieldDescriptor_label_set(key_field, ID2SYM(rb_intern("optional")));
FieldDescriptor_number_set(key_field, INT2NUM(1));
FieldDescriptor_type_set(key_field, key_type);
Descriptor_add_field(mapentry_desc, key_field);
// optional <type> value = 2;
VALUE value_field = rb_class_new_instance(0, NULL, cFieldDescriptor);
FieldDescriptor_name_set(value_field, rb_str_new2("value"));
FieldDescriptor_label_set(value_field, ID2SYM(rb_intern("optional")));
FieldDescriptor_number_set(value_field, INT2NUM(2));
FieldDescriptor_type_set(value_field, value_type);
if (type_class != Qnil) {
VALUE submsg_name = rb_str_new2("."); // prepend '.' to make name absolute.
submsg_name = rb_str_append(submsg_name, type_class);
FieldDescriptor_submsg_name_set(value_field, submsg_name);
}
Descriptor_add_field(mapentry_desc, value_field);
// Add the map-entry message type to the current builder, and use the type to
// create the map field itself.
Builder* builder_self = ruby_to_Builder(self->builder);
rb_ary_push(builder_self->pending_list, mapentry_desc);
VALUE map_field = rb_class_new_instance(0, NULL, cFieldDescriptor);
VALUE name_str = rb_str_new2(rb_id2name(SYM2ID(name)));
FieldDescriptor_name_set(map_field, name_str);
FieldDescriptor_number_set(map_field, number);
FieldDescriptor_label_set(map_field, ID2SYM(rb_intern("repeated")));
FieldDescriptor_type_set(map_field, ID2SYM(rb_intern("message")));
VALUE submsg_name = rb_str_new2("."); // prepend '.' to make name absolute.
submsg_name = rb_str_append(submsg_name, mapentry_desc_name);
FieldDescriptor_submsg_name_set(map_field, submsg_name);
Descriptor_add_field(self->descriptor, map_field);
return Qnil;
}
// -----------------------------------------------------------------------------
// EnumBuilderContext.
// -----------------------------------------------------------------------------
@ -1190,7 +1287,8 @@ void Builder_register(VALUE module) {
VALUE Builder_add_message(VALUE _self, VALUE name) {
DEFINE_SELF(Builder, self, _self);
VALUE msgdef = rb_class_new_instance(0, NULL, cDescriptor);
VALUE ctx = rb_class_new_instance(1, &msgdef, cMessageBuilderContext);
VALUE args[2] = { msgdef, _self };
VALUE ctx = rb_class_new_instance(2, args, cMessageBuilderContext);
VALUE block = rb_block_proc();
rb_funcall(msgdef, rb_intern("name="), 1, name);
rb_funcall_with_block(ctx, rb_intern("instance_eval"), 0, NULL, block);

@ -174,9 +174,222 @@ static void *submsg_handler(void *closure, const void *hd) {
return submsg;
}
// Handler data for startmap/endmap handlers.
typedef struct {
size_t ofs;
const upb_fielddef* key_field;
const upb_fielddef* value_field;
VALUE value_field_typeclass;
} map_handlerdata_t;
// Temporary frame for map parsing: at the beginning of a map entry message, a
// submsg handler allocates a frame to hold (i) a reference to the Map object
// into which this message will be inserted and (ii) storage slots to
// temporarily hold the key and value for this map entry until the end of the
// submessage. When the submessage ends, another handler is called to insert the
// value into the map.
typedef struct {
VALUE map;
char key_storage[NATIVE_SLOT_MAX_SIZE];
char value_storage[NATIVE_SLOT_MAX_SIZE];
} map_parse_frame_t;
// Handler to begin a sequence of map entries: simple no-op that exists only to
// set context for the map entry handlers.
static void *startmap_handler(void *closure, const void *hd) {
return closure;
}
// Handler to begin a map entry: allocates a temporary frame. This is the
// 'startsubmsg' handler on the msgdef that contains the map field.
static void *startmapentry_handler(void *closure, const void *hd) {
MessageHeader* msg = closure;
const map_handlerdata_t* mapdata = hd;
VALUE map_rb = DEREF(Message_data(msg), mapdata->ofs, VALUE);
map_parse_frame_t* frame = ALLOC(map_parse_frame_t);
frame->map = map_rb;
native_slot_init(upb_fielddef_type(mapdata->key_field),
&frame->key_storage);
native_slot_init(upb_fielddef_type(mapdata->value_field),
&frame->value_storage);
return frame;
}
// Handler to end a map entry: inserts the value defined during the message into
// the map. This is the 'endmsg' handler on the map entry msgdef.
static bool endmap_handler(void *closure, const void *hd, upb_status* s) {
map_parse_frame_t* frame = closure;
const map_handlerdata_t* mapdata = hd;
VALUE key = native_slot_get(
upb_fielddef_type(mapdata->key_field), Qnil,
&frame->key_storage);
VALUE value = native_slot_get(
upb_fielddef_type(mapdata->value_field), mapdata->value_field_typeclass,
&frame->value_storage);
Map_index_set(frame->map, key, value);
free(frame);
return true;
}
// Allocates a new map_handlerdata_t given the map entry message definition. If
// the offset of the field within the parent message is also given, that is
// added to the handler data as well. Note that this is called *twice* per map
// field: once in the parent message handler setup when setting the startsubmsg
// handler and once in the map entry message handler setup when setting the
// key/value and endmsg handlers. The reason is that there is no easy way to
// pass the handlerdata down to the sub-message handler setup.
static map_handlerdata_t* new_map_handlerdata(
size_t ofs,
const upb_msgdef* mapentry_def) {
map_handlerdata_t* hd = ALLOC(map_handlerdata_t);
hd->ofs = ofs;
hd->key_field = upb_msgdef_itof(mapentry_def, 1);
assert(hd->key_field != NULL);
hd->value_field = upb_msgdef_itof(mapentry_def, 2);
assert(hd->value_field != NULL);
hd->value_field_typeclass = field_type_class(hd->value_field);
return hd;
}
// Set up handlers for a repeated field.
static void add_handlers_for_repeated_field(upb_handlers *h,
const upb_fielddef *f,
size_t offset) {
upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, offset));
upb_handlers_setstartseq(h, f, startseq_handler, &attr);
upb_handlerattr_uninit(&attr);
switch (upb_fielddef_type(f)) {
#define SET_HANDLER(utype, ltype) \
case utype: \
upb_handlers_set##ltype(h, f, append##ltype##_handler, NULL); \
break;
SET_HANDLER(UPB_TYPE_BOOL, bool);
SET_HANDLER(UPB_TYPE_INT32, int32);
SET_HANDLER(UPB_TYPE_UINT32, uint32);
SET_HANDLER(UPB_TYPE_ENUM, int32);
SET_HANDLER(UPB_TYPE_FLOAT, float);
SET_HANDLER(UPB_TYPE_INT64, int64);
SET_HANDLER(UPB_TYPE_UINT64, uint64);
SET_HANDLER(UPB_TYPE_DOUBLE, double);
#undef SET_HANDLER
case UPB_TYPE_STRING:
case UPB_TYPE_BYTES: {
bool is_bytes = upb_fielddef_type(f) == UPB_TYPE_BYTES;
upb_handlers_setstartstr(h, f, is_bytes ?
appendbytes_handler : appendstr_handler,
NULL);
upb_handlers_setstring(h, f, stringdata_handler, NULL);
}
case UPB_TYPE_MESSAGE: {
upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
upb_handlerattr_sethandlerdata(&attr, newsubmsghandlerdata(h, 0, f));
upb_handlers_setstartsubmsg(h, f, appendsubmsg_handler, &attr);
upb_handlerattr_uninit(&attr);
break;
}
}
}
// Set up handlers for a singular field.
static void add_handlers_for_singular_field(upb_handlers *h,
const upb_fielddef *f,
size_t offset) {
switch (upb_fielddef_type(f)) {
case UPB_TYPE_BOOL:
case UPB_TYPE_INT32:
case UPB_TYPE_UINT32:
case UPB_TYPE_ENUM:
case UPB_TYPE_FLOAT:
case UPB_TYPE_INT64:
case UPB_TYPE_UINT64:
case UPB_TYPE_DOUBLE:
// The shim writes directly at the given offset (instead of using
// DEREF()) so we need to add the msg overhead.
upb_shim_set(h, f, offset + sizeof(MessageHeader), -1);
break;
case UPB_TYPE_STRING:
case UPB_TYPE_BYTES: {
bool is_bytes = upb_fielddef_type(f) == UPB_TYPE_BYTES;
upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, offset));
upb_handlers_setstartstr(h, f,
is_bytes ? bytes_handler : str_handler,
&attr);
upb_handlers_setstring(h, f, stringdata_handler, &attr);
upb_handlerattr_uninit(&attr);
break;
}
case UPB_TYPE_MESSAGE: {
upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
upb_handlerattr_sethandlerdata(&attr, newsubmsghandlerdata(h, offset, f));
upb_handlers_setstartsubmsg(h, f, submsg_handler, &attr);
upb_handlerattr_uninit(&attr);
break;
}
}
}
// Adds handlers to a map field.
static void add_handlers_for_mapfield(upb_handlers* h,
const upb_fielddef* fielddef,
size_t offset) {
const upb_msgdef* map_msgdef = upb_fielddef_msgsubdef(fielddef);
map_handlerdata_t* hd = new_map_handlerdata(offset, map_msgdef);
upb_handlers_addcleanup(h, hd, free);
upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
upb_handlerattr_sethandlerdata(&attr, hd);
upb_handlers_setstartseq(h, fielddef, startmap_handler, &attr);
upb_handlers_setstartsubmsg(h, fielddef, startmapentry_handler, &attr);
upb_handlerattr_uninit(&attr);
}
// Adds handlers to a map-entry msgdef.
static void add_handlers_for_mapentry(const upb_msgdef* msgdef,
upb_handlers* h) {
map_handlerdata_t* hd = new_map_handlerdata(0, msgdef);
upb_handlers_addcleanup(h, hd, free);
upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
upb_handlerattr_sethandlerdata(&attr, hd);
upb_handlers_setendmsg(h, endmap_handler, &attr);
add_handlers_for_singular_field(
h, hd->key_field,
// Convert the offset into map_parse_frame_t to an offset understood by the
// singular field handlers, so that we don't have to use special
// map-key/value-specific handlers. The ordinary singular field handlers expect
// a Message* and assume offset is relative to the data section at the end, so
// we compensate for that addition.
offsetof(map_parse_frame_t, key_storage) - sizeof(MessageHeader));
add_handlers_for_singular_field(
h, hd->value_field,
offsetof(map_parse_frame_t, value_storage) - sizeof(MessageHeader));
}
static void add_handlers_for_message(const void *closure, upb_handlers *h) {
Descriptor* desc = ruby_to_Descriptor(
get_def_obj((void*)upb_handlers_msgdef(h)));
const upb_msgdef* msgdef = upb_handlers_msgdef(h);
Descriptor* desc = ruby_to_Descriptor(get_def_obj((void*)msgdef));
// If this is a mapentry message type, set up a special set of handlers and
// bail out of the normal (user-defined) message type handling.
if (upb_msgdef_mapentry(msgdef)) {
add_handlers_for_mapentry(msgdef, h);
return;
}
// Ensure layout exists. We may be invoked to create handlers for a given
// message if we are included as a submsg of another message type before our
// class is actually built, so to work around this, we just create the layout
@ -193,80 +406,12 @@ static void add_handlers_for_message(const void *closure, upb_handlers *h) {
const upb_fielddef *f = upb_msg_iter_field(&i);
size_t offset = desc->layout->offsets[upb_fielddef_index(f)];
if (upb_fielddef_isseq(f)) {
upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, offset));
upb_handlers_setstartseq(h, f, startseq_handler, &attr);
upb_handlerattr_uninit(&attr);
switch (upb_fielddef_type(f)) {
#define SET_HANDLER(utype, ltype) \
case utype: \
upb_handlers_set##ltype(h, f, append##ltype##_handler, NULL); \
break;
SET_HANDLER(UPB_TYPE_BOOL, bool);
SET_HANDLER(UPB_TYPE_INT32, int32);
SET_HANDLER(UPB_TYPE_UINT32, uint32);
SET_HANDLER(UPB_TYPE_ENUM, int32);
SET_HANDLER(UPB_TYPE_FLOAT, float);
SET_HANDLER(UPB_TYPE_INT64, int64);
SET_HANDLER(UPB_TYPE_UINT64, uint64);
SET_HANDLER(UPB_TYPE_DOUBLE, double);
#undef SET_HANDLER
case UPB_TYPE_STRING:
case UPB_TYPE_BYTES: {
bool is_bytes = upb_fielddef_type(f) == UPB_TYPE_BYTES;
upb_handlers_setstartstr(h, f, is_bytes ?
appendbytes_handler : appendstr_handler,
NULL);
upb_handlers_setstring(h, f, stringdata_handler, NULL);
}
case UPB_TYPE_MESSAGE: {
upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
upb_handlerattr_sethandlerdata(&attr, newsubmsghandlerdata(h, 0, f));
upb_handlers_setstartsubmsg(h, f, appendsubmsg_handler, &attr);
upb_handlerattr_uninit(&attr);
break;
}
}
}
switch (upb_fielddef_type(f)) {
case UPB_TYPE_BOOL:
case UPB_TYPE_INT32:
case UPB_TYPE_UINT32:
case UPB_TYPE_ENUM:
case UPB_TYPE_FLOAT:
case UPB_TYPE_INT64:
case UPB_TYPE_UINT64:
case UPB_TYPE_DOUBLE:
// The shim writes directly at the given offset (instead of using
// DEREF()) so we need to add the msg overhead.
upb_shim_set(h, f, offset + sizeof(MessageHeader), -1);
break;
case UPB_TYPE_STRING:
case UPB_TYPE_BYTES: {
bool is_bytes = upb_fielddef_type(f) == UPB_TYPE_BYTES;
upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, offset));
upb_handlers_setstartstr(h, f,
is_bytes ? bytes_handler : str_handler,
&attr);
upb_handlers_setstring(h, f, stringdata_handler, &attr);
upb_handlerattr_uninit(&attr);
break;
}
case UPB_TYPE_MESSAGE: {
upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
upb_handlerattr_sethandlerdata(&attr, newsubmsghandlerdata(h, offset, f));
upb_handlers_setstartsubmsg(h, f, submsg_handler, &attr);
upb_handlerattr_uninit(&attr);
break;
}
if (is_map_field(f)) {
add_handlers_for_mapfield(h, f, offset);
} else if (upb_fielddef_isseq(f)) {
add_handlers_for_repeated_field(h, f, offset);
} else {
add_handlers_for_singular_field(h, f, offset);
}
}
}
@ -558,6 +703,88 @@ static void putary(VALUE ary, const upb_fielddef *f, upb_sink *sink,
upb_sink_endseq(sink, getsel(f, UPB_HANDLER_ENDSEQ));
}
static void put_ruby_value(VALUE value,
const upb_fielddef *f,
VALUE type_class,
int depth,
upb_sink *sink) {
upb_selector_t sel = 0;
if (upb_fielddef_isprimitive(f)) {
sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
}
switch (upb_fielddef_type(f)) {
case UPB_TYPE_INT32:
upb_sink_putint32(sink, sel, NUM2INT(value));
break;
case UPB_TYPE_INT64:
upb_sink_putint64(sink, sel, NUM2LL(value));
break;
case UPB_TYPE_UINT32:
upb_sink_putuint32(sink, sel, NUM2UINT(value));
break;
case UPB_TYPE_UINT64:
upb_sink_putuint64(sink, sel, NUM2ULL(value));
break;
case UPB_TYPE_FLOAT:
upb_sink_putfloat(sink, sel, NUM2DBL(value));
break;
case UPB_TYPE_DOUBLE:
upb_sink_putdouble(sink, sel, NUM2DBL(value));
break;
case UPB_TYPE_ENUM: {
if (TYPE(value) == T_SYMBOL) {
value = rb_funcall(type_class, rb_intern("resolve"), 1, value);
}
upb_sink_putint32(sink, sel, NUM2INT(value));
break;
}
case UPB_TYPE_BOOL:
upb_sink_putbool(sink, sel, value == Qtrue);
break;
case UPB_TYPE_STRING:
case UPB_TYPE_BYTES:
putstr(value, f, sink);
break;
case UPB_TYPE_MESSAGE:
putsubmsg(value, f, sink, depth);
}
}
static void putmap(VALUE map, const upb_fielddef *f, upb_sink *sink,
int depth) {
if (map == Qnil) return;
Map* self = ruby_to_Map(map);
upb_sink subsink;
upb_sink_startseq(sink, getsel(f, UPB_HANDLER_STARTSEQ), &subsink);
assert(upb_fielddef_type(f) == UPB_TYPE_MESSAGE);
const upb_fielddef* key_field = map_field_key(f);
const upb_fielddef* value_field = map_field_value(f);
Map_iter it;
for (Map_begin(map, &it); !Map_done(&it); Map_next(&it)) {
VALUE key = Map_iter_key(&it);
VALUE value = Map_iter_value(&it);
upb_sink entry_sink;
upb_sink_startsubmsg(&subsink, getsel(f, UPB_HANDLER_STARTSUBMSG), &entry_sink);
upb_sink_startmsg(&entry_sink);
put_ruby_value(key, key_field, Qnil, depth + 1, &entry_sink);
put_ruby_value(value, value_field, self->value_type_class, depth + 1,
&entry_sink);
upb_status status;
upb_sink_endmsg(&entry_sink, &status);
upb_sink_endsubmsg(&subsink, getsel(f, UPB_HANDLER_ENDSUBMSG));
}
upb_sink_endseq(sink, getsel(f, UPB_HANDLER_ENDSEQ));
}
static void putmsg(VALUE msg_rb, const Descriptor* desc,
upb_sink *sink, int depth) {
upb_sink_startmsg(sink);
@ -580,7 +807,12 @@ static void putmsg(VALUE msg_rb, const Descriptor* desc,
upb_fielddef *f = upb_msg_iter_field(&i);
uint32_t offset = desc->layout->offsets[upb_fielddef_index(f)];
if (upb_fielddef_isseq(f)) {
if (is_map_field(f)) {
VALUE map = DEREF(msg_data, offset, VALUE);
if (map != Qnil) {
putmap(map, f, sink, depth);
}
} else if (upb_fielddef_isseq(f)) {
VALUE ary = DEREF(msg_data, offset, VALUE);
if (ary != Qnil) {
putary(ary, f, sink, depth);

@ -5,6 +5,6 @@ require 'mkmf'
$CFLAGS += " -O3 -std=c99 -Wno-unused-function -DNDEBUG "
$objs = ["protobuf.o", "defs.o", "storage.o", "message.o",
"repeated_field.o", "encode_decode.o", "upb.o"]
"repeated_field.o", "map.o", "encode_decode.o", "upb.o"]
create_makefile("google/protobuf_c")

@ -0,0 +1,883 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2014 Google Inc. All rights reserved.
// https://developers.google.com/protocol-buffers/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "protobuf.h"
// -----------------------------------------------------------------------------
// Basic map operations on top of upb's strtable.
// -----------------------------------------------------------------------------
// Map values are stored using the native_slot abstraction (as with repeated
// field values), but keys are a bit special. Since we use a strtable, we need
// to store keys as sequences of bytes such that equality of those bytes maps
// one-to-one to equality of keys. We store strings directly (i.e., they map to
// their own bytes) and integers as sequences of either 4 or 8 bytes in
// host-byte-order as either a uint32_t or a uint64_t.
// Forms a key to use with the underlying strtable from a Ruby key value. |buf|
// must point to TABLE_KEY_BUF_LENGTH bytes of temporary space, used to
// construct a key byte sequence if needed. |out_key| and |out_length| provide
// the resulting key data/length.
#define TABLE_KEY_BUF_LENGTH 8 // sizeof(uint64_t)
static void table_key(Map* self, VALUE key,
char* buf,
const char** out_key,
size_t* out_length) {
switch (self->key_type) {
case UPB_TYPE_BYTES:
case UPB_TYPE_STRING:
// Strings: use string content directly.
Check_Type(key, T_STRING);
native_slot_validate_string_encoding(self->key_type, key);
*out_key = RSTRING_PTR(key);
*out_length = RSTRING_LEN(key);
break;
case UPB_TYPE_BOOL:
case UPB_TYPE_INT32:
case UPB_TYPE_INT64: {
// Signed numeric types: use an int64 in host-native byte order.
int64_t key_val = 0;
// Do a range/value check.
switch (self->key_type) {
case UPB_TYPE_BOOL:
if (key != Qtrue && key != Qfalse) {
rb_raise(rb_eTypeError, "Key must be true or false");
}
key_val = (key == Qtrue) ? 1 : 0;
break;
case UPB_TYPE_INT32:
native_slot_check_int_range_precision(self->key_type, key);
key_val = NUM2INT(key);
break;
case UPB_TYPE_INT64:
native_slot_check_int_range_precision(self->key_type, key);
key_val = NUM2LL(key);
break;
default:
break;
}
int64_t* int64_key = (int64_t*)buf;
*int64_key = key_val;
*out_key = buf;
*out_length = sizeof(int64_t);
break;
}
case UPB_TYPE_UINT32:
case UPB_TYPE_UINT64: {
// Unsigned numeric types: use a uint64 in host-native byte order.
uint64_t key_val = 0;
// Do a range/value check.
native_slot_check_int_range_precision(self->key_type, key);
switch (self->key_type) {
case UPB_TYPE_UINT32:
key_val = NUM2UINT(key);
break;
case UPB_TYPE_UINT64:
key_val = NUM2ULL(key);
break;
default:
break;
}
uint64_t* uint64_key = (uint64_t*)buf;
*uint64_key = key_val;
*out_key = buf;
*out_length = sizeof(uint64_t);
break;
}
default:
// Map constructor should not allow a Map with another key type to be
// constructed.
assert(false);
break;
}
}
static VALUE table_key_to_ruby(Map* self, const char* buf, size_t length) {
switch (self->key_type) {
case UPB_TYPE_BYTES:
case UPB_TYPE_STRING: {
VALUE ret = rb_str_new(buf, length);
rb_enc_associate(ret,
(self->key_type == UPB_TYPE_BYTES) ?
kRubyString8bitEncoding : kRubyStringUtf8Encoding);
return ret;
}
case UPB_TYPE_BOOL:
case UPB_TYPE_INT32:
case UPB_TYPE_INT64: {
assert(length == sizeof(int64_t));
int64_t* int64_key = (int64_t*)buf;
if (self->key_type == UPB_TYPE_BOOL) {
return *int64_key ? Qtrue : Qfalse;
} else {
return LL2NUM(*int64_key);
}
}
case UPB_TYPE_UINT32:
case UPB_TYPE_UINT64: {
assert(length == sizeof(uint64_t));
uint64_t* uint64_key = (uint64_t*)buf;
return ULL2NUM(*uint64_key);
}
default:
assert(false);
return Qnil;
}
}
static upb_ctype_t upb_table_value_type(upb_fieldtype_t value_type) {
switch (value_type) {
case UPB_TYPE_BOOL:
case UPB_TYPE_INT32:
case UPB_TYPE_INT64:
case UPB_TYPE_UINT32:
case UPB_TYPE_UINT64:
case UPB_TYPE_ENUM:
case UPB_TYPE_FLOAT:
case UPB_TYPE_DOUBLE:
case UPB_TYPE_STRING:
case UPB_TYPE_BYTES:
case UPB_TYPE_MESSAGE:
return UPB_CTYPE_UINT64;
default:
assert(false);
return 0;
}
}
static void* value_memory(upb_value* v) {
return (void*)(&v->val.uint64);
}
// -----------------------------------------------------------------------------
// Map container type.
// -----------------------------------------------------------------------------
const rb_data_type_t Map_type = {
"Google::Protobuf::Map",
{ Map_mark, Map_free, NULL },
};
VALUE cMap;
Map* ruby_to_Map(VALUE _self) {
Map* self;
TypedData_Get_Struct(_self, Map, &Map_type, self);
return self;
}
void Map_mark(void* _self) {
Map* self = _self;
rb_gc_mark(self->value_type_class);
if (self->value_type == UPB_TYPE_STRING ||
self->value_type == UPB_TYPE_BYTES ||
self->value_type == UPB_TYPE_MESSAGE) {
upb_strtable_iter it;
for (upb_strtable_begin(&it, &self->table);
!upb_strtable_done(&it);
upb_strtable_next(&it)) {
upb_value v = upb_strtable_iter_value(&it);
void* mem = value_memory(&v);
native_slot_mark(self->value_type, mem);
}
}
}
void Map_free(void* _self) {
Map* self = _self;
upb_strtable_uninit(&self->table);
xfree(self);
}
VALUE Map_alloc(VALUE klass) {
Map* self = ALLOC(Map);
memset(self, 0, sizeof(Map));
self->value_type_class = Qnil;
VALUE ret = TypedData_Wrap_Struct(klass, &Map_type, self);
return ret;
}
static bool needs_typeclass(upb_fieldtype_t type) {
switch (type) {
case UPB_TYPE_MESSAGE:
case UPB_TYPE_ENUM:
return true;
default:
return false;
}
}
/*
* call-seq:
* Map.new(key_type, value_type, value_typeclass = nil, init_hashmap = {})
* => new map
*
* Allocates a new Map container. This constructor may be called with 2, 3, or 4
* arguments. The first two arguments are always present and are symbols (taking
* on the same values as field-type symbols in message descriptors) that
* indicate the type of the map key and value fields.
*
* The supported key types are: :int32, :int64, :uint32, :uint64, :bool,
* :string, :bytes.
*
* The supported value types are: :int32, :int64, :uint32, :uint64, :bool,
* :string, :bytes, :enum, :message.
*
* The third argument, value_typeclass, must be present if value_type is :enum
* or :message. As in RepeatedField#new, this argument must be a message class
* (for :message) or enum module (for :enum).
*
* The last argument, if present, provides initial content for map. Note that
* this may be an ordinary Ruby hashmap or another Map instance with identical
* key and value types. Also note that this argument may be rpesent whether or
* not value_typeclass is present (and it is unambiguously separate from
* value_typeclass because value_typeclass's presence is strictly determined by
* value_type).
*/
VALUE Map_init(int argc, VALUE* argv, VALUE _self) {
Map* self = ruby_to_Map(_self);
// We take either two args (:key_type, :value_type), three args (:key_type,
// :value_type, "ValueMessageType"), or four args (the above plus an initial
// hashmap).
if (argc < 2 || argc > 4) {
rb_raise(rb_eArgError, "Map constructor expects 2, 3 or 4 arguments.");
}
self->key_type = ruby_to_fieldtype(argv[0]);
self->value_type = ruby_to_fieldtype(argv[1]);
// Check that the key type is an allowed type.
switch (self->key_type) {
case UPB_TYPE_INT32:
case UPB_TYPE_INT64:
case UPB_TYPE_UINT32:
case UPB_TYPE_UINT64:
case UPB_TYPE_BOOL:
case UPB_TYPE_STRING:
case UPB_TYPE_BYTES:
// These are OK.
break;
default:
rb_raise(rb_eArgError, "Invalid key type for map.");
}
int init_value_arg = 2;
if (needs_typeclass(self->value_type) && argc > 2) {
self->value_type_class = argv[2];
validate_type_class(self->value_type, self->value_type_class);
init_value_arg = 3;
}
if (!upb_strtable_init(&self->table, upb_table_value_type(self->value_type))) {
rb_raise(rb_eRuntimeError, "Could not allocate table.");
}
if (argc > init_value_arg) {
Map_merge_into_self(_self, argv[init_value_arg]);
}
return Qnil;
}
/*
* call-seq:
* Map.each(&block)
*
* Invokes &block on each |key, value| pair in the map, in unspecified order.
* Note that Map also includes Enumerable; map thus acts like a normal Ruby
* sequence.
*/
VALUE Map_each(VALUE _self) {
Map* self = ruby_to_Map(_self);
upb_strtable_iter it;
for (upb_strtable_begin(&it, &self->table);
!upb_strtable_done(&it);
upb_strtable_next(&it)) {
VALUE key = table_key_to_ruby(
self, upb_strtable_iter_key(&it), upb_strtable_iter_keylength(&it));
upb_value v = upb_strtable_iter_value(&it);
void* mem = value_memory(&v);
VALUE value = native_slot_get(self->value_type,
self->value_type_class,
mem);
rb_yield_values(2, key, value);
}
return Qnil;
}
/*
* call-seq:
* Map.keys => [list_of_keys]
*
* Returns the list of keys contained in the map, in unspecified order.
*/
VALUE Map_keys(VALUE _self) {
Map* self = ruby_to_Map(_self);
VALUE ret = rb_ary_new();
upb_strtable_iter it;
for (upb_strtable_begin(&it, &self->table);
!upb_strtable_done(&it);
upb_strtable_next(&it)) {
VALUE key = table_key_to_ruby(
self, upb_strtable_iter_key(&it), upb_strtable_iter_keylength(&it));
rb_ary_push(ret, key);
}
return ret;
}
/*
* call-seq:
* Map.values => [list_of_values]
*
* Returns the list of values contained in the map, in unspecified order.
*/
VALUE Map_values(VALUE _self) {
Map* self = ruby_to_Map(_self);
VALUE ret = rb_ary_new();
upb_strtable_iter it;
for (upb_strtable_begin(&it, &self->table);
!upb_strtable_done(&it);
upb_strtable_next(&it)) {
upb_value v = upb_strtable_iter_value(&it);
void* mem = value_memory(&v);
VALUE value = native_slot_get(self->value_type,
self->value_type_class,
mem);
rb_ary_push(ret, value);
}
return ret;
}
/*
* call-seq:
* Map.[](key) => value
*
* Accesses the element at the given key. Throws an exception if the key type is
* incorrect. Returns nil when the key is not present in the map.
*/
VALUE Map_index(VALUE _self, VALUE key) {
Map* self = ruby_to_Map(_self);
char keybuf[TABLE_KEY_BUF_LENGTH];
const char* keyval = NULL;
size_t length = 0;
table_key(self, key, keybuf, &keyval, &length);
upb_value v;
if (upb_strtable_lookup2(&self->table, keyval, length, &v)) {
void* mem = value_memory(&v);
return native_slot_get(self->value_type, self->value_type_class, mem);
} else {
return Qnil;
}
}
/*
* call-seq:
* Map.[]=(key, value) => value
*
* Inserts or overwrites the value at the given key with the given new value.
* Throws an exception if the key type is incorrect. Returns the new value that
* was just inserted.
*/
VALUE Map_index_set(VALUE _self, VALUE key, VALUE value) {
Map* self = ruby_to_Map(_self);
char keybuf[TABLE_KEY_BUF_LENGTH];
const char* keyval = NULL;
size_t length = 0;
table_key(self, key, keybuf, &keyval, &length);
upb_value v;
void* mem = value_memory(&v);
native_slot_set(self->value_type, self->value_type_class, mem, value);
// Replace any existing value by issuing a 'remove' operation first.
upb_value oldv;
upb_strtable_remove2(&self->table, keyval, length, &oldv);
if (!upb_strtable_insert2(&self->table, keyval, length, v)) {
rb_raise(rb_eRuntimeError, "Could not insert into table");
}
// Ruby hashmap's :[]= method also returns the inserted value.
return value;
}
/*
* call-seq:
* Map.has_key?(key) => bool
*
* Returns true if the given key is present in the map. Throws an exception if
* the key has the wrong type.
*/
VALUE Map_has_key(VALUE _self, VALUE key) {
Map* self = ruby_to_Map(_self);
char keybuf[TABLE_KEY_BUF_LENGTH];
const char* keyval = NULL;
size_t length = 0;
table_key(self, key, keybuf, &keyval, &length);
upb_value v;
if (upb_strtable_lookup2(&self->table, keyval, length, &v)) {
return Qtrue;
} else {
return Qfalse;
}
}
/*
* call-seq:
* Map.delete(key) => old_value
*
* Deletes the value at the given key, if any, returning either the old value or
* nil if none was present. Throws an exception if the key is of the wrong type.
*/
VALUE Map_delete(VALUE _self, VALUE key) {
Map* self = ruby_to_Map(_self);
char keybuf[TABLE_KEY_BUF_LENGTH];
const char* keyval = NULL;
size_t length = 0;
table_key(self, key, keybuf, &keyval, &length);
upb_value v;
if (upb_strtable_remove2(&self->table, keyval, length, &v)) {
void* mem = value_memory(&v);
return native_slot_get(self->value_type, self->value_type_class, mem);
} else {
return Qnil;
}
}
/*
* call-seq:
* Map.clear
*
* Removes all entries from the map.
*/
VALUE Map_clear(VALUE _self) {
Map* self = ruby_to_Map(_self);
// Uninit and reinit the table -- this is faster than iterating and doing a
// delete-lookup on each key.
upb_strtable_uninit(&self->table);
if (!upb_strtable_init(&self->table,
upb_table_value_type(self->value_type))) {
rb_raise(rb_eRuntimeError, "Unable to re-initialize table");
}
return Qnil;
}
/*
* call-seq:
* Map.length
*
* Returns the number of entries (key-value pairs) in the map.
*/
VALUE Map_length(VALUE _self) {
Map* self = ruby_to_Map(_self);
return INT2NUM(upb_strtable_count(&self->table));
}
static VALUE Map_new_this_type(VALUE _self) {
Map* self = ruby_to_Map(_self);
VALUE new_map = Qnil;
VALUE key_type = fieldtype_to_ruby(self->key_type);
VALUE value_type = fieldtype_to_ruby(self->value_type);
if (self->value_type_class != Qnil) {
new_map = rb_funcall(CLASS_OF(_self), rb_intern("new"), 3,
key_type, value_type, self->value_type_class);
} else {
new_map = rb_funcall(CLASS_OF(_self), rb_intern("new"), 2,
key_type, value_type);
}
return new_map;
}
/*
* call-seq:
* Map.dup => new_map
*
* Duplicates this map with a shallow copy. References to all non-primitive
* element objects (e.g., submessages) are shared.
*/
VALUE Map_dup(VALUE _self) {
Map* self = ruby_to_Map(_self);
VALUE new_map = Map_new_this_type(_self);
Map* new_self = ruby_to_Map(new_map);
upb_strtable_iter it;
for (upb_strtable_begin(&it, &self->table);
!upb_strtable_done(&it);
upb_strtable_next(&it)) {
upb_value v = upb_strtable_iter_value(&it);
void* mem = value_memory(&v);
upb_value dup;
void* dup_mem = value_memory(&dup);
native_slot_dup(self->value_type, dup_mem, mem);
if (!upb_strtable_insert2(&new_self->table,
upb_strtable_iter_key(&it),
upb_strtable_iter_keylength(&it),
dup)) {
rb_raise(rb_eRuntimeError, "Error inserting value into new table");
}
}
return new_map;
}
// Used by Google::Protobuf.deep_copy but not exposed directly.
VALUE Map_deep_copy(VALUE _self) {
Map* self = ruby_to_Map(_self);
VALUE new_map = Map_new_this_type(_self);
Map* new_self = ruby_to_Map(new_map);
upb_strtable_iter it;
for (upb_strtable_begin(&it, &self->table);
!upb_strtable_done(&it);
upb_strtable_next(&it)) {
upb_value v = upb_strtable_iter_value(&it);
void* mem = value_memory(&v);
upb_value dup;
void* dup_mem = value_memory(&dup);
native_slot_deep_copy(self->value_type, dup_mem, mem);
if (!upb_strtable_insert2(&new_self->table,
upb_strtable_iter_key(&it),
upb_strtable_iter_keylength(&it),
dup)) {
rb_raise(rb_eRuntimeError, "Error inserting value into new table");
}
}
return new_map;
}
/*
* call-seq:
* Map.==(other) => boolean
*
* Compares this map to another. Maps are equal if they have identical key sets,
* and for each key, the values in both maps compare equal. Elements are
* compared as per normal Ruby semantics, by calling their :== methods (or
* performing a more efficient comparison for primitive types).
*
* Maps with dissimilar key types or value types/typeclasses are never equal,
* even if value comparison (for example, between integers and floats) would
* have otherwise indicated that every element has equal value.
*/
VALUE Map_eq(VALUE _self, VALUE _other) {
Map* self = ruby_to_Map(_self);
// Allow comparisons to Ruby hashmaps by converting to a temporary Map
// instance. Slow, but workable.
if (TYPE(_other) == T_HASH) {
VALUE other_map = Map_new_this_type(_self);
Map_merge_into_self(other_map, _other);
_other = other_map;
}
Map* other = ruby_to_Map(_other);
if (self == other) {
return Qtrue;
}
if (self->key_type != other->key_type ||
self->value_type != other->value_type ||
self->value_type_class != other->value_type_class) {
return Qfalse;
}
if (upb_strtable_count(&self->table) != upb_strtable_count(&other->table)) {
return Qfalse;
}
// For each member of self, check that an equal member exists at the same key
// in other.
upb_strtable_iter it;
for (upb_strtable_begin(&it, &self->table);
!upb_strtable_done(&it);
upb_strtable_next(&it)) {
upb_value v = upb_strtable_iter_value(&it);
void* mem = value_memory(&v);
upb_value other_v;
void* other_mem = value_memory(&other_v);
if (!upb_strtable_lookup2(&other->table,
upb_strtable_iter_key(&it),
upb_strtable_iter_keylength(&it),
&other_v)) {
// Not present in other map.
return Qfalse;
}
if (!native_slot_eq(self->value_type, mem, other_mem)) {
// Present, but value not equal.
return Qfalse;
}
}
// For each member of other, check that a member exists at the same key in
// self. We don't need to compare values here -- if the key exists in both, we
// compared values above; if not, we already know that the maps are not equal.
for (upb_strtable_begin(&it, &other->table);
!upb_strtable_done(&it);
upb_strtable_next(&it)) {
upb_value v;
if (!upb_strtable_lookup2(&self->table,
upb_strtable_iter_key(&it),
upb_strtable_iter_keylength(&it),
&v)) {
return Qfalse;
}
}
return Qtrue;
}
/*
* call-seq:
* Map.hash => hash_value
*
* Returns a hash value based on this map's contents.
*/
VALUE Map_hash(VALUE _self) {
Map* self = ruby_to_Map(_self);
st_index_t h = rb_hash_start(0);
VALUE hash_sym = rb_intern("hash");
upb_strtable_iter it;
for (upb_strtable_begin(&it, &self->table);
!upb_strtable_done(&it);
upb_strtable_next(&it)) {
VALUE key = table_key_to_ruby(
self, upb_strtable_iter_key(&it), upb_strtable_iter_keylength(&it));
upb_value v = upb_strtable_iter_value(&it);
void* mem = value_memory(&v);
VALUE value = native_slot_get(self->value_type,
self->value_type_class,
mem);
h = rb_hash_uint(h, NUM2LONG(rb_funcall(key, hash_sym, 0)));
h = rb_hash_uint(h, NUM2LONG(rb_funcall(value, hash_sym, 0)));
}
return INT2FIX(h);
}
/*
* call-seq:
* Map.inspect => string
*
* Returns a string representing this map's elements. It will be formatted as
* "{key => value, key => value, ...}", with each key and value string
* representation computed by its own #inspect method.
*/
VALUE Map_inspect(VALUE _self) {
Map* self = ruby_to_Map(_self);
VALUE str = rb_str_new2("{");
bool first = true;
VALUE inspect_sym = rb_intern("inspect");
upb_strtable_iter it;
for (upb_strtable_begin(&it, &self->table);
!upb_strtable_done(&it);
upb_strtable_next(&it)) {
VALUE key = table_key_to_ruby(
self, upb_strtable_iter_key(&it), upb_strtable_iter_keylength(&it));
upb_value v = upb_strtable_iter_value(&it);
void* mem = value_memory(&v);
VALUE value = native_slot_get(self->value_type,
self->value_type_class,
mem);
if (!first) {
str = rb_str_cat2(str, ", ");
} else {
first = false;
}
str = rb_str_append(str, rb_funcall(key, inspect_sym, 0));
str = rb_str_cat2(str, " => ");
str = rb_str_append(str, rb_funcall(value, inspect_sym, 0));
}
str = rb_str_cat2(str, "}");
return str;
}
/*
* call-seq:
* Map.merge(other_map) => map
*
* Copies key/value pairs from other_map into a copy of this map. If a key is
* set in other_map and this map, the value from other_map overwrites the value
* in the new copy of this map. Returns the new copy of this map with merged
* contents.
*/
VALUE Map_merge(VALUE _self, VALUE hashmap) {
VALUE dupped = Map_dup(_self);
return Map_merge_into_self(dupped, hashmap);
}
static int merge_into_self_callback(VALUE key, VALUE value, VALUE self) {
Map_index_set(self, key, value);
return ST_CONTINUE;
}
// Used only internally -- shared by #merge and #initialize.
VALUE Map_merge_into_self(VALUE _self, VALUE hashmap) {
if (TYPE(hashmap) == T_HASH) {
rb_hash_foreach(hashmap, merge_into_self_callback, _self);
} else if (RB_TYPE_P(hashmap, T_DATA) && RTYPEDDATA_P(hashmap) &&
RTYPEDDATA_TYPE(hashmap) == &Map_type) {
Map* self = ruby_to_Map(_self);
Map* other = ruby_to_Map(hashmap);
if (self->key_type != other->key_type ||
self->value_type != other->value_type ||
self->value_type_class != other->value_type_class) {
rb_raise(rb_eArgError, "Attempt to merge Map with mismatching types");
}
upb_strtable_iter it;
for (upb_strtable_begin(&it, &other->table);
!upb_strtable_done(&it);
upb_strtable_next(&it)) {
// Replace any existing value by issuing a 'remove' operation first.
upb_value oldv;
upb_strtable_remove2(&self->table,
upb_strtable_iter_key(&it),
upb_strtable_iter_keylength(&it),
&oldv);
upb_value v = upb_strtable_iter_value(&it);
upb_strtable_insert2(&self->table,
upb_strtable_iter_key(&it),
upb_strtable_iter_keylength(&it),
v);
}
} else {
rb_raise(rb_eArgError, "Unknown type merging into Map");
}
return _self;
}
// Internal method: map iterator initialization (used for serialization).
void Map_begin(VALUE _self, Map_iter* iter) {
Map* self = ruby_to_Map(_self);
iter->self = self;
upb_strtable_begin(&iter->it, &self->table);
}
void Map_next(Map_iter* iter) {
upb_strtable_next(&iter->it);
}
bool Map_done(Map_iter* iter) {
return upb_strtable_done(&iter->it);
}
VALUE Map_iter_key(Map_iter* iter) {
return table_key_to_ruby(
iter->self,
upb_strtable_iter_key(&iter->it),
upb_strtable_iter_keylength(&iter->it));
}
VALUE Map_iter_value(Map_iter* iter) {
upb_value v = upb_strtable_iter_value(&iter->it);
void* mem = value_memory(&v);
return native_slot_get(iter->self->value_type,
iter->self->value_type_class,
mem);
}
void Map_register(VALUE module) {
VALUE klass = rb_define_class_under(module, "Map", rb_cObject);
rb_define_alloc_func(klass, Map_alloc);
cMap = klass;
rb_gc_register_address(&cMap);
rb_define_method(klass, "initialize", Map_init, -1);
rb_define_method(klass, "each", Map_each, 0);
rb_define_method(klass, "keys", Map_keys, 0);
rb_define_method(klass, "values", Map_values, 0);
rb_define_method(klass, "[]", Map_index, 1);
rb_define_method(klass, "[]=", Map_index_set, 2);
rb_define_method(klass, "has_key?", Map_has_key, 1);
rb_define_method(klass, "delete", Map_delete, 1);
rb_define_method(klass, "clear", Map_clear, 0);
rb_define_method(klass, "length", Map_length, 0);
rb_define_method(klass, "dup", Map_dup, 0);
rb_define_method(klass, "==", Map_eq, 1);
rb_define_method(klass, "hash", Map_hash, 0);
rb_define_method(klass, "inspect", Map_inspect, 0);
rb_define_method(klass, "merge", Map_merge, 1);
rb_include_module(klass, rb_mEnumerable);
}

@ -139,7 +139,14 @@ int Message_initialize_kwarg(VALUE key, VALUE val, VALUE _self) {
"Unknown field name in initialization map entry.");
}
if (upb_fielddef_label(f) == UPB_LABEL_REPEATED) {
if (is_map_field(f)) {
if (TYPE(val) != T_HASH) {
rb_raise(rb_eArgError,
"Expected hashmap as initializer value for map field.");
}
VALUE map = layout_get(self->descriptor->layout, Message_data(self), f);
Map_merge_into_self(map, val);
} else if (upb_fielddef_label(f) == UPB_LABEL_REPEATED) {
if (TYPE(val) != T_ARRAY) {
rb_raise(rb_eArgError,
"Expected array as initializer value for repeated field.");
@ -450,13 +457,15 @@ VALUE build_module_from_enumdesc(EnumDescriptor* enumdesc) {
* call-seq:
* Google::Protobuf.deep_copy(obj) => copy_of_obj
*
* Performs a deep copy of either a RepeatedField instance or a message object,
* recursively copying its members.
* Performs a deep copy of a RepeatedField instance, a Map instance, or a
* message object, recursively copying its members.
*/
VALUE Google_Protobuf_deep_copy(VALUE self, VALUE obj) {
VALUE klass = CLASS_OF(obj);
if (klass == cRepeatedField) {
return RepeatedField_deep_copy(obj);
} else if (klass == cMap) {
return Map_deep_copy(obj);
} else {
return Message_deep_copy(obj);
}

@ -82,6 +82,7 @@ void Init_protobuf_c() {
EnumBuilderContext_register(internal);
Builder_register(internal);
RepeatedField_register(protobuf);
Map_register(protobuf);
rb_define_singleton_method(protobuf, "encode", Google_Protobuf_encode, 1);
rb_define_singleton_method(protobuf, "decode", Google_Protobuf_decode, 2);

@ -123,6 +123,7 @@ struct EnumDescriptor {
struct MessageBuilderContext {
VALUE descriptor;
VALUE builder;
};
struct EnumBuilderContext {
@ -213,10 +214,13 @@ void MessageBuilderContext_free(void* _self);
VALUE MessageBuilderContext_alloc(VALUE klass);
void MessageBuilderContext_register(VALUE module);
MessageBuilderContext* ruby_to_MessageBuilderContext(VALUE value);
VALUE MessageBuilderContext_initialize(VALUE _self, VALUE descriptor);
VALUE MessageBuilderContext_initialize(VALUE _self,
VALUE descriptor,
VALUE builder);
VALUE MessageBuilderContext_optional(int argc, VALUE* argv, VALUE _self);
VALUE MessageBuilderContext_required(int argc, VALUE* argv, VALUE _self);
VALUE MessageBuilderContext_repeated(int argc, VALUE* argv, VALUE _self);
VALUE MessageBuilderContext_map(int argc, VALUE* argv, VALUE _self);
void EnumBuilderContext_mark(void* _self);
void EnumBuilderContext_free(void* _self);
@ -239,6 +243,8 @@ VALUE Builder_finalize_to_pool(VALUE _self, VALUE pool_rb);
// Native slot storage abstraction.
// -----------------------------------------------------------------------------
#define NATIVE_SLOT_MAX_SIZE sizeof(void*)
size_t native_slot_size(upb_fieldtype_t type);
void native_slot_set(upb_fieldtype_t type,
VALUE type_class,
@ -254,11 +260,18 @@ void native_slot_deep_copy(upb_fieldtype_t type, void* to, void* from);
bool native_slot_eq(upb_fieldtype_t type, void* mem1, void* mem2);
void native_slot_validate_string_encoding(upb_fieldtype_t type, VALUE value);
void native_slot_check_int_range_precision(upb_fieldtype_t type, VALUE value);
extern rb_encoding* kRubyStringUtf8Encoding;
extern rb_encoding* kRubyStringASCIIEncoding;
extern rb_encoding* kRubyString8bitEncoding;
VALUE field_type_class(const upb_fielddef* field);
bool is_map_field(const upb_fielddef* field);
const upb_fielddef* map_field_key(const upb_fielddef* field);
const upb_fielddef* map_field_value(const upb_fielddef* field);
// -----------------------------------------------------------------------------
// Repeated field container type.
// -----------------------------------------------------------------------------
@ -282,7 +295,6 @@ extern VALUE cRepeatedField;
RepeatedField* ruby_to_RepeatedField(VALUE value);
void RepeatedField_register(VALUE module);
VALUE RepeatedField_each(VALUE _self);
VALUE RepeatedField_index(VALUE _self, VALUE _index);
void* RepeatedField_index_native(VALUE _self, int index);
@ -302,6 +314,59 @@ VALUE RepeatedField_hash(VALUE _self);
VALUE RepeatedField_inspect(VALUE _self);
VALUE RepeatedField_plus(VALUE _self, VALUE list);
// Defined in repeated_field.c; also used by Map.
void validate_type_class(upb_fieldtype_t type, VALUE klass);
// -----------------------------------------------------------------------------
// Map container type.
// -----------------------------------------------------------------------------
typedef struct {
upb_fieldtype_t key_type;
upb_fieldtype_t value_type;
VALUE value_type_class;
upb_strtable table;
} Map;
void Map_mark(void* self);
void Map_free(void* self);
VALUE Map_alloc(VALUE klass);
VALUE Map_init(int argc, VALUE* argv, VALUE self);
void Map_register(VALUE module);
extern const rb_data_type_t Map_type;
extern VALUE cMap;
Map* ruby_to_Map(VALUE value);
VALUE Map_each(VALUE _self);
VALUE Map_keys(VALUE _self);
VALUE Map_values(VALUE _self);
VALUE Map_index(VALUE _self, VALUE key);
VALUE Map_index_set(VALUE _self, VALUE key, VALUE value);
VALUE Map_has_key(VALUE _self, VALUE key);
VALUE Map_delete(VALUE _self, VALUE key);
VALUE Map_clear(VALUE _self);
VALUE Map_length(VALUE _self);
VALUE Map_dup(VALUE _self);
VALUE Map_deep_copy(VALUE _self);
VALUE Map_eq(VALUE _self, VALUE _other);
VALUE Map_hash(VALUE _self);
VALUE Map_inspect(VALUE _self);
VALUE Map_merge(VALUE _self, VALUE hashmap);
VALUE Map_merge_into_self(VALUE _self, VALUE hashmap);
typedef struct {
Map* self;
upb_strtable_iter it;
} Map_iter;
void Map_begin(VALUE _self, Map_iter* iter);
void Map_next(Map_iter* iter);
bool Map_done(Map_iter* iter);
VALUE Map_iter_key(Map_iter* iter);
VALUE Map_iter_value(Map_iter* iter);
// -----------------------------------------------------------------------------
// Message layout / storage.
// -----------------------------------------------------------------------------

@ -324,6 +324,10 @@ VALUE RepeatedField_deep_copy(VALUE _self) {
* element types are equal, their lengths are equal, and each element is equal.
* Elements are compared as per normal Ruby semantics, by calling their :==
* methods (or performing a more efficient comparison for primitive types).
*
* Repeated fields with dissimilar element types are never equal, even if value
* comparison (for example, between integers and floats) would have otherwise
* indicated that every element has equal value.
*/
VALUE RepeatedField_eq(VALUE _self, VALUE _other) {
if (_self == _other) {
@ -458,7 +462,7 @@ VALUE RepeatedField_plus(VALUE _self, VALUE list) {
return dupped;
}
static void validate_type_class(upb_fieldtype_t type, VALUE klass) {
void validate_type_class(upb_fieldtype_t type, VALUE klass) {
if (rb_iv_get(klass, kDescriptorInstanceVar) == Qnil) {
rb_raise(rb_eArgError,
"Type class has no descriptor. Please pass a "

@ -57,7 +57,17 @@ size_t native_slot_size(upb_fieldtype_t type) {
}
}
static void check_int_range_precision(upb_fieldtype_t type, VALUE val) {
static bool is_ruby_num(VALUE value) {
return (TYPE(value) == T_FLOAT ||
TYPE(value) == T_FIXNUM ||
TYPE(value) == T_BIGNUM);
}
void native_slot_check_int_range_precision(upb_fieldtype_t type, VALUE val) {
if (!is_ruby_num(val)) {
rb_raise(rb_eTypeError, "Expected number type for integral field.");
}
// NUM2{INT,UINT,LL,ULL} macros do the appropriate range checks on upper
// bound; we just need to do precision checks (i.e., disallow rounding) and
// check for < 0 on unsigned types.
@ -76,12 +86,6 @@ static void check_int_range_precision(upb_fieldtype_t type, VALUE val) {
}
}
static bool is_ruby_num(VALUE value) {
return (TYPE(value) == T_FLOAT ||
TYPE(value) == T_FIXNUM ||
TYPE(value) == T_BIGNUM);
}
void native_slot_validate_string_encoding(upb_fieldtype_t type, VALUE value) {
bool bad_encoding = false;
rb_encoding* string_encoding = rb_enc_from_index(ENCODING_GET(value));
@ -156,14 +160,14 @@ void native_slot_set(upb_fieldtype_t type, VALUE type_class,
int32_t int_val = 0;
if (TYPE(value) == T_SYMBOL) {
// Ensure that the given symbol exists in the enum module.
VALUE lookup = rb_const_get(type_class, SYM2ID(value));
VALUE lookup = rb_funcall(type_class, rb_intern("resolve"), 1, value);
if (lookup == Qnil) {
rb_raise(rb_eRangeError, "Unknown symbol value for enum field.");
} else {
int_val = NUM2INT(lookup);
}
} else {
check_int_range_precision(UPB_TYPE_INT32, value);
native_slot_check_int_range_precision(UPB_TYPE_INT32, value);
int_val = NUM2INT(value);
}
DEREF(memory, int32_t) = int_val;
@ -173,10 +177,7 @@ void native_slot_set(upb_fieldtype_t type, VALUE type_class,
case UPB_TYPE_INT64:
case UPB_TYPE_UINT32:
case UPB_TYPE_UINT64:
if (!is_ruby_num(value)) {
rb_raise(rb_eTypeError, "Expected number type for integral field.");
}
check_int_range_precision(type, value);
native_slot_check_int_range_precision(type, value);
switch (type) {
case UPB_TYPE_INT32:
DEREF(memory, int32_t) = NUM2INT(value);
@ -246,8 +247,9 @@ void native_slot_init(upb_fieldtype_t type, void* memory) {
break;
case UPB_TYPE_STRING:
case UPB_TYPE_BYTES:
// TODO(cfallin): set encoding appropriately
DEREF(memory, VALUE) = rb_str_new2("");
rb_enc_associate(DEREF(memory, VALUE), (type == UPB_TYPE_BYTES) ?
kRubyString8bitEncoding : kRubyStringUtf8Encoding);
break;
case UPB_TYPE_MESSAGE:
DEREF(memory, VALUE) = Qnil;
@ -321,6 +323,35 @@ bool native_slot_eq(upb_fieldtype_t type, void* mem1, void* mem2) {
}
}
// -----------------------------------------------------------------------------
// Map field utilities.
// -----------------------------------------------------------------------------
bool is_map_field(const upb_fielddef* field) {
if (upb_fielddef_label(field) != UPB_LABEL_REPEATED ||
upb_fielddef_type(field) != UPB_TYPE_MESSAGE) {
return false;
}
const upb_msgdef* subdef = (const upb_msgdef*)upb_fielddef_subdef(field);
return upb_msgdef_mapentry(subdef);
}
const upb_fielddef* map_field_key(const upb_fielddef* field) {
assert(is_map_field(field));
const upb_msgdef* subdef = (const upb_msgdef*)upb_fielddef_subdef(field);
const upb_fielddef* key_field = upb_msgdef_itof(subdef, 1);
assert(key_field != NULL);
return key_field;
}
const upb_fielddef* map_field_value(const upb_fielddef* field) {
assert(is_map_field(field));
const upb_msgdef* subdef = (const upb_msgdef*)upb_fielddef_subdef(field);
const upb_fielddef* value_field = upb_msgdef_itof(subdef, 2);
assert(value_field != NULL);
return value_field;
}
// -----------------------------------------------------------------------------
// Memory layout management.
// -----------------------------------------------------------------------------
@ -334,9 +365,12 @@ MessageLayout* create_layout(const upb_msgdef* msgdef) {
size_t off = 0;
for (upb_msg_begin(&it, msgdef); !upb_msg_done(&it); upb_msg_next(&it)) {
const upb_fielddef* field = upb_msg_iter_field(&it);
size_t field_size =
(upb_fielddef_label(field) == UPB_LABEL_REPEATED) ?
sizeof(VALUE) : native_slot_size(upb_fielddef_type(field));
size_t field_size = 0;
if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
field_size = sizeof(VALUE);
} else {
field_size = native_slot_size(upb_fielddef_type(field));
}
// align current offset
off = (off + field_size - 1) & ~(field_size - 1);
layout->offsets[upb_fielddef_index(field)] = off;
@ -357,7 +391,7 @@ void free_layout(MessageLayout* layout) {
xfree(layout);
}
static VALUE get_type_class(const upb_fielddef* field) {
VALUE field_type_class(const upb_fielddef* field) {
VALUE type_class = Qnil;
if (upb_fielddef_type(field) == UPB_TYPE_MESSAGE) {
VALUE submsgdesc =
@ -380,7 +414,7 @@ VALUE layout_get(MessageLayout* layout,
return *((VALUE *)memory);
} else {
return native_slot_get(upb_fielddef_type(field),
get_type_class(field),
field_type_class(field),
memory);
}
}
@ -398,9 +432,8 @@ static void check_repeated_field_type(VALUE val, const upb_fielddef* field) {
rb_raise(rb_eTypeError, "Repeated field array has wrong element type");
}
if (upb_fielddef_type(field) == UPB_TYPE_MESSAGE ||
upb_fielddef_type(field) == UPB_TYPE_ENUM) {
RepeatedField* self = ruby_to_RepeatedField(val);
if (self->field_type == UPB_TYPE_MESSAGE ||
self->field_type == UPB_TYPE_ENUM) {
if (self->field_type_class !=
get_def_obj(upb_fielddef_subdef(field))) {
rb_raise(rb_eTypeError,
@ -409,17 +442,48 @@ static void check_repeated_field_type(VALUE val, const upb_fielddef* field) {
}
}
static void check_map_field_type(VALUE val, const upb_fielddef* field) {
assert(is_map_field(field));
const upb_fielddef* key_field = map_field_key(field);
const upb_fielddef* value_field = map_field_value(field);
if (!RB_TYPE_P(val, T_DATA) || !RTYPEDDATA_P(val) ||
RTYPEDDATA_TYPE(val) != &Map_type) {
rb_raise(rb_eTypeError, "Expected Map instance");
}
Map* self = ruby_to_Map(val);
if (self->key_type != upb_fielddef_type(key_field)) {
rb_raise(rb_eTypeError, "Map key type does not match field's key type");
}
if (self->value_type != upb_fielddef_type(value_field)) {
rb_raise(rb_eTypeError, "Map value type does not match field's value type");
}
if (upb_fielddef_type(value_field) == UPB_TYPE_MESSAGE ||
upb_fielddef_type(value_field) == UPB_TYPE_ENUM) {
if (self->value_type_class !=
get_def_obj(upb_fielddef_subdef(value_field))) {
rb_raise(rb_eTypeError,
"Map value type has wrong message/enum class");
}
}
}
void layout_set(MessageLayout* layout,
void* storage,
const upb_fielddef* field,
VALUE val) {
void* memory = ((uint8_t *)storage) +
layout->offsets[upb_fielddef_index(field)];
if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
if (is_map_field(field)) {
check_map_field_type(val, field);
DEREF(memory, VALUE) = val;
} else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
check_repeated_field_type(val, field);
*((VALUE *)memory) = val;
DEREF(memory, VALUE) = val;
} else {
native_slot_set(upb_fielddef_type(field), get_type_class(field),
native_slot_set(upb_fielddef_type(field), field_type_class(field),
memory, val);
}
}
@ -434,9 +498,34 @@ void layout_init(MessageLayout* layout,
void* memory = ((uint8_t *)storage) +
layout->offsets[upb_fielddef_index(field)];
if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
if (is_map_field(field)) {
VALUE map = Qnil;
const upb_fielddef* key_field = map_field_key(field);
const upb_fielddef* value_field = map_field_value(field);
VALUE type_class = field_type_class(value_field);
if (type_class != Qnil) {
VALUE args[3] = {
fieldtype_to_ruby(upb_fielddef_type(key_field)),
fieldtype_to_ruby(upb_fielddef_type(value_field)),
type_class,
};
map = rb_class_new_instance(3, args, cMap);
} else {
VALUE args[2] = {
fieldtype_to_ruby(upb_fielddef_type(key_field)),
fieldtype_to_ruby(upb_fielddef_type(value_field)),
};
map = rb_class_new_instance(2, args, cMap);
}
DEREF(memory, VALUE) = map;
} else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
VALUE ary = Qnil;
VALUE type_class = get_type_class(field);
VALUE type_class = field_type_class(field);
if (type_class != Qnil) {
VALUE args[2] = {
fieldtype_to_ruby(upb_fielddef_type(field)),
@ -447,7 +536,8 @@ void layout_init(MessageLayout* layout,
VALUE args[1] = { fieldtype_to_ruby(upb_fielddef_type(field)) };
ary = rb_class_new_instance(1, args, cRepeatedField);
}
*((VALUE *)memory) = ary;
DEREF(memory, VALUE) = ary;
} else {
native_slot_init(upb_fielddef_type(field), memory);
}
@ -464,7 +554,7 @@ void layout_mark(MessageLayout* layout, void* storage) {
layout->offsets[upb_fielddef_index(field)];
if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
rb_gc_mark(*((VALUE *)memory));
rb_gc_mark(DEREF(memory, VALUE));
} else {
native_slot_mark(upb_fielddef_type(field), memory);
}
@ -482,8 +572,10 @@ void layout_dup(MessageLayout* layout, void* to, void* from) {
void* from_memory = ((uint8_t *)from) +
layout->offsets[upb_fielddef_index(field)];
if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
*((VALUE *)to_memory) = RepeatedField_dup(*((VALUE *)from_memory));
if (is_map_field(field)) {
DEREF(to_memory, VALUE) = Map_dup(DEREF(from_memory, VALUE));
} else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
DEREF(to_memory, VALUE) = RepeatedField_dup(DEREF(from_memory, VALUE));
} else {
native_slot_dup(upb_fielddef_type(field), to_memory, from_memory);
}
@ -501,8 +593,12 @@ void layout_deep_copy(MessageLayout* layout, void* to, void* from) {
void* from_memory = ((uint8_t *)from) +
layout->offsets[upb_fielddef_index(field)];
if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
*((VALUE *)to_memory) = RepeatedField_deep_copy(*((VALUE *)from_memory));
if (is_map_field(field)) {
DEREF(to_memory, VALUE) =
Map_deep_copy(DEREF(from_memory, VALUE));
} else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
DEREF(to_memory, VALUE) =
RepeatedField_deep_copy(DEREF(from_memory, VALUE));
} else {
native_slot_deep_copy(upb_fielddef_type(field), to_memory, from_memory);
}
@ -520,11 +616,12 @@ VALUE layout_eq(MessageLayout* layout, void* msg1, void* msg2) {
void* msg2_memory = ((uint8_t *)msg2) +
layout->offsets[upb_fielddef_index(field)];
if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
if (RepeatedField_eq(*((VALUE *)msg1_memory),
*((VALUE *)msg2_memory)) == Qfalse) {
return Qfalse;
}
if (is_map_field(field)) {
return Map_eq(DEREF(msg1_memory, VALUE),
DEREF(msg2_memory, VALUE));
} else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
return RepeatedField_eq(DEREF(msg1_memory, VALUE),
DEREF(msg2_memory, VALUE));
} else {
if (!native_slot_eq(upb_fielddef_type(field),
msg1_memory, msg2_memory)) {

@ -1269,6 +1269,7 @@ upb_msgdef *upb_msgdef_new(const void *owner) {
if (!upb_def_init(UPB_UPCAST(m), UPB_DEF_MSG, &vtbl, owner)) goto err2;
if (!upb_inttable_init(&m->itof, UPB_CTYPE_PTR)) goto err2;
if (!upb_strtable_init(&m->ntof, UPB_CTYPE_PTR)) goto err1;
m->map_entry = false;
return m;
err1:
@ -1283,6 +1284,7 @@ upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, const void *owner) {
if (!newm) return NULL;
bool ok = upb_def_setfullname(UPB_UPCAST(newm),
upb_def_fullname(UPB_UPCAST(m)), NULL);
newm->map_entry = m->map_entry;
UPB_ASSERT_VAR(ok, ok);
upb_msg_iter i;
for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) {
@ -1386,6 +1388,15 @@ int upb_msgdef_numfields(const upb_msgdef *m) {
return upb_strtable_count(&m->ntof);
}
void upb_msgdef_setmapentry(upb_msgdef *m, bool map_entry) {
assert(!upb_msgdef_isfrozen(m));
m->map_entry = map_entry;
}
bool upb_msgdef_mapentry(const upb_msgdef *m) {
return m->map_entry;
}
void upb_msg_begin(upb_msg_iter *iter, const upb_msgdef *m) {
upb_inttable_begin(iter, &m->itof);
}
@ -3401,31 +3412,28 @@ int log2ceil(uint64_t v) {
}
char *upb_strdup(const char *s) {
size_t n = strlen(s) + 1;
return upb_strdup2(s, strlen(s));
}
char *upb_strdup2(const char *s, size_t len) {
// Always null-terminate, even if binary data; but don't rely on the input to
// have a null-terminating byte since it may be a raw binary buffer.
size_t n = len + 1;
char *p = malloc(n);
if (p) memcpy(p, s, n);
if (p) memcpy(p, s, len);
p[len] = 0;
return p;
}
// A type to represent the lookup key of either a strtable or an inttable.
// This is like upb_tabkey, but can carry a size also to allow lookups of
// non-NULL-terminated strings (we don't store string lengths in the table).
typedef struct {
upb_tabkey key;
uint32_t len; // For string keys only.
} lookupkey_t;
static lookupkey_t strkey(const char *str) {
lookupkey_t k;
k.key.str = (char*)str;
k.len = strlen(str);
return k;
}
static lookupkey_t strkey2(const char *str, size_t len) {
lookupkey_t k;
k.key.str = (char*)str;
k.len = len;
k.key.s.str = (char*)str;
k.key.s.length = len;
return k;
}
@ -3607,11 +3615,12 @@ static size_t begin(const upb_table *t) {
// A simple "subclass" of upb_table that only adds a hash function for strings.
static uint32_t strhash(upb_tabkey key) {
return MurmurHash2(key.str, strlen(key.str), 0);
return MurmurHash2(key.s.str, key.s.length, 0);
}
static bool streql(upb_tabkey k1, lookupkey_t k2) {
return strncmp(k1.str, k2.key.str, k2.len) == 0 && k1.str[k2.len] == '\0';
return k1.s.length == k2.key.s.length &&
memcmp(k1.s.str, k2.key.s.str, k1.s.length) == 0;
}
bool upb_strtable_init(upb_strtable *t, upb_ctype_t ctype) {
@ -3620,7 +3629,7 @@ bool upb_strtable_init(upb_strtable *t, upb_ctype_t ctype) {
void upb_strtable_uninit(upb_strtable *t) {
for (size_t i = 0; i < upb_table_size(&t->t); i++)
free((void*)t->t.entries[i].key.str);
free((void*)t->t.entries[i].key.s.str);
uninit(&t->t);
}
@ -3631,26 +3640,30 @@ bool upb_strtable_resize(upb_strtable *t, size_t size_lg2) {
upb_strtable_iter i;
upb_strtable_begin(&i, t);
for ( ; !upb_strtable_done(&i); upb_strtable_next(&i)) {
upb_strtable_insert(
&new_table, upb_strtable_iter_key(&i), upb_strtable_iter_value(&i));
upb_strtable_insert2(
&new_table,
upb_strtable_iter_key(&i),
upb_strtable_iter_keylength(&i),
upb_strtable_iter_value(&i));
}
upb_strtable_uninit(t);
*t = new_table;
return true;
}
bool upb_strtable_insert(upb_strtable *t, const char *k, upb_value v) {
bool upb_strtable_insert2(upb_strtable *t, const char *k, size_t len,
upb_value v) {
if (isfull(&t->t)) {
// Need to resize. New table of double the size, add old elements to it.
if (!upb_strtable_resize(t, t->t.size_lg2 + 1)) {
return false;
}
}
if ((k = upb_strdup(k)) == NULL) return false;
if ((k = upb_strdup2(k, len)) == NULL) return false;
lookupkey_t key = strkey(k);
uint32_t hash = MurmurHash2(key.key.str, key.len, 0);
insert(&t->t, strkey(k), v, hash, &strhash, &streql);
lookupkey_t key = strkey2(k, len);
uint32_t hash = MurmurHash2(key.key.s.str, key.key.s.length, 0);
insert(&t->t, key, v, hash, &strhash, &streql);
return true;
}
@ -3660,11 +3673,12 @@ bool upb_strtable_lookup2(const upb_strtable *t, const char *key, size_t len,
return lookup(&t->t, strkey2(key, len), v, hash, &streql);
}
bool upb_strtable_remove(upb_strtable *t, const char *key, upb_value *val) {
bool upb_strtable_remove2(upb_strtable *t, const char *key, size_t len,
upb_value *val) {
uint32_t hash = MurmurHash2(key, strlen(key), 0);
upb_tabkey tabkey;
if (rm(&t->t, strkey(key), val, &tabkey, hash, &streql)) {
free((void*)tabkey.str);
if (rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql)) {
free((void*)tabkey.s.str);
return true;
} else {
return false;
@ -3693,7 +3707,12 @@ bool upb_strtable_done(const upb_strtable_iter *i) {
const char *upb_strtable_iter_key(upb_strtable_iter *i) {
assert(!upb_strtable_done(i));
return str_tabent(i)->key.str;
return str_tabent(i)->key.s.str;
}
size_t upb_strtable_iter_keylength(upb_strtable_iter *i) {
assert(!upb_strtable_done(i));
return str_tabent(i)->key.s.length;
}
upb_value upb_strtable_iter_value(const upb_strtable_iter *i) {

@ -600,6 +600,9 @@ typedef struct {
// Like strdup(), which isn't always available since it's not ANSI C.
char *upb_strdup(const char *s);
// Variant that works with a length-delimited rather than NULL-delimited string,
// as supported by strtable.
char *upb_strdup2(const char *s, size_t len);
UPB_INLINE void _upb_value_setval(upb_value *v, _upb_value val,
upb_ctype_t ctype) {
@ -654,12 +657,24 @@ FUNCS(fptr, fptr, upb_func*, UPB_CTYPE_FPTR);
typedef union {
uintptr_t num;
const char *str; // We own, nullz.
struct {
// We own this. NULL-terminated but may also contain binary data; see
// explicit length below.
// TODO: move the length to the start of the string in order to reduce
// tabkey's size (to one machine word) in a way that supports static
// initialization.
const char *str;
size_t length;
} s;
} upb_tabkey;
#define UPB_TABKEY_NUM(n) {n}
#ifdef UPB_C99
#define UPB_TABKEY_STR(s) {.str = s}
// Given that |s| is a string literal, sizeof(s) gives us a
// compile-time-constant strlen(). We must ensure that this works for static
// data initializers.
#define UPB_TABKEY_STR(strval) { .s = { .str = strval, \
.length = sizeof(strval) - 1 } }
#endif
// TODO(haberman): C++
#define UPB_TABKEY_NONE {0}
@ -765,7 +780,14 @@ UPB_INLINE size_t upb_strtable_count(const upb_strtable *t) {
// If a table resize was required but memory allocation failed, false is
// returned and the table is unchanged.
bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val);
bool upb_strtable_insert(upb_strtable *t, const char *key, upb_value val);
bool upb_strtable_insert2(upb_strtable *t, const char *key, size_t len,
upb_value val);
// For NULL-terminated strings.
UPB_INLINE bool upb_strtable_insert(upb_strtable *t, const char *key,
upb_value val) {
return upb_strtable_insert2(t, key, strlen(key), val);
}
// Looks up key in this table, returning "true" if the key was found.
// If v is non-NULL, copies the value for this key into *v.
@ -782,7 +804,14 @@ UPB_INLINE bool upb_strtable_lookup(const upb_strtable *t, const char *key,
// Removes an item from the table. Returns true if the remove was successful,
// and stores the removed item in *val if non-NULL.
bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val);
bool upb_strtable_remove(upb_strtable *t, const char *key, upb_value *val);
bool upb_strtable_remove2(upb_strtable *t, const char *key, size_t len,
upb_value *val);
// For NULL-terminated strings.
UPB_INLINE bool upb_strtable_remove(upb_strtable *t, const char *key,
upb_value *v) {
return upb_strtable_remove2(t, key, strlen(key), v);
}
// Updates an existing entry in an inttable. If the entry does not exist,
// returns false and does nothing. Unlike insert/remove, this does not
@ -876,6 +905,7 @@ void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t);
void upb_strtable_next(upb_strtable_iter *i);
bool upb_strtable_done(const upb_strtable_iter *i);
const char *upb_strtable_iter_key(upb_strtable_iter *i);
size_t upb_strtable_iter_keylength(upb_strtable_iter *i);
upb_value upb_strtable_iter_value(const upb_strtable_iter *i);
void upb_strtable_iter_setdone(upb_strtable_iter *i);
bool upb_strtable_iter_isequal(const upb_strtable_iter *i1,
@ -1777,6 +1807,10 @@ UPB_DEFINE_DEF(upb::MessageDef, msgdef, MSG, UPB_QUOTE(
// just be moved into symtab.c?
MessageDef* Dup(const void* owner) const;
// Is this message a map entry?
void setmapentry(bool map_entry);
bool mapentry() const;
// Iteration over fields. The order is undefined.
class iterator : public std::iterator<std::forward_iterator_tag, FieldDef*> {
public:
@ -1823,6 +1857,11 @@ UPB_DEFINE_STRUCT(upb_msgdef, upb_def,
upb_inttable itof; // int to field
upb_strtable ntof; // name to field
// Is this a map-entry message?
// TODO: set this flag properly for static descriptors; regenerate
// descriptor.upb.c.
bool map_entry;
// TODO(haberman): proper extension ranges (there can be multiple).
));
@ -1830,7 +1869,7 @@ UPB_DEFINE_STRUCT(upb_msgdef, upb_def,
refs, ref2s) \
{ \
UPB_DEF_INIT(name, UPB_DEF_MSG, refs, ref2s), selector_count, \
submsg_field_count, itof, ntof \
submsg_field_count, itof, ntof, false \
}
UPB_BEGIN_EXTERN_C // {
@ -1878,6 +1917,9 @@ UPB_INLINE upb_fielddef *upb_msgdef_ntof_mutable(upb_msgdef *m,
return (upb_fielddef *)upb_msgdef_ntof(m, name, len);
}
void upb_msgdef_setmapentry(upb_msgdef *m, bool map_entry);
bool upb_msgdef_mapentry(const upb_msgdef *m);
// upb_msg_iter i;
// for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) {
// upb_fielddef *f = upb_msg_iter_field(&i);
@ -2331,6 +2373,12 @@ inline const FieldDef *MessageDef::FindFieldByName(const char *name,
inline MessageDef* MessageDef::Dup(const void *owner) const {
return upb_msgdef_dup(this, owner);
}
inline void MessageDef::setmapentry(bool map_entry) {
upb_msgdef_setmapentry(this, map_entry);
}
inline bool MessageDef::mapentry() const {
return upb_msgdef_mapentry(this);
}
inline MessageDef::iterator MessageDef::begin() { return iterator(this); }
inline MessageDef::iterator MessageDef::end() { return iterator::end(this); }
inline MessageDef::const_iterator MessageDef::begin() const {

@ -36,23 +36,43 @@ module BasicTest
add_message "TestMessage2" do
optional :foo, :int32, 1
end
add_message "Recursive1" do
optional :foo, :message, 1, "Recursive2"
end
add_message "Recursive2" do
optional :foo, :message, 1, "Recursive1"
end
add_enum "TestEnum" do
value :Default, 0
value :A, 1
value :B, 2
value :C, 3
end
add_message "BadFieldNames" do
optional :dup, :int32, 1
optional :class, :int32, 2
optional :"a.b", :int32, 3
end
add_message "MapMessage" do
map :map_string_int32, :string, :int32, 1
map :map_string_msg, :string, :message, 2, "TestMessage2"
end
add_message "MapMessageWireEquiv" do
repeated :map_string_int32, :message, 1, "MapMessageWireEquiv_entry1"
repeated :map_string_msg, :message, 2, "MapMessageWireEquiv_entry2"
end
add_message "MapMessageWireEquiv_entry1" do
optional :key, :string, 1
optional :value, :int32, 2
end
add_message "MapMessageWireEquiv_entry2" do
optional :key, :string, 1
optional :value, :message, 2, "TestMessage2"
end
end
TestMessage = pool.lookup("TestMessage").msgclass
@ -61,6 +81,12 @@ module BasicTest
Recursive2 = pool.lookup("Recursive2").msgclass
TestEnum = pool.lookup("TestEnum").enummodule
BadFieldNames = pool.lookup("BadFieldNames").msgclass
MapMessage = pool.lookup("MapMessage").msgclass
MapMessageWireEquiv = pool.lookup("MapMessageWireEquiv").msgclass
MapMessageWireEquiv_entry1 =
pool.lookup("MapMessageWireEquiv_entry1").msgclass
MapMessageWireEquiv_entry2 =
pool.lookup("MapMessageWireEquiv_entry2").msgclass
# ------------ test cases ---------------
@ -300,7 +326,7 @@ module BasicTest
l.push :B
l.push :C
assert l.count == 3
assert_raise NameError do
assert_raise RangeError do
l.push :D
end
assert l[0] == :A
@ -324,12 +350,240 @@ module BasicTest
end
end
def test_map_basic
# allowed key types:
# :int32, :int64, :uint32, :uint64, :bool, :string, :bytes.
m = Google::Protobuf::Map.new(:string, :int32)
m["asdf"] = 1
assert m["asdf"] == 1
m["jkl;"] = 42
assert m == { "jkl;" => 42, "asdf" => 1 }
assert m.has_key?("asdf")
assert !m.has_key?("qwerty")
assert m.length == 2
m2 = m.dup
assert m == m2
assert m.hash != 0
assert m.hash == m2.hash
collected = {}
m.each { |k,v| collected[v] = k }
assert collected == { 42 => "jkl;", 1 => "asdf" }
assert m.delete("asdf") == 1
assert !m.has_key?("asdf")
assert m["asdf"] == nil
assert !m.has_key?("asdf")
# We only assert on inspect value when there is one map entry because the
# order in which elements appear is unspecified (depends on the internal
# hash function). We don't want a brittle test.
assert m.inspect == "{\"jkl;\" => 42}"
assert m.keys == ["jkl;"]
assert m.values == [42]
m.clear
assert m.length == 0
assert m == {}
assert_raise TypeError do
m[1] = 1
end
assert_raise RangeError do
m["asdf"] = 0x1_0000_0000
end
end
def test_map_ctor
m = Google::Protobuf::Map.new(:string, :int32,
{"a" => 1, "b" => 2, "c" => 3})
assert m == {"a" => 1, "c" => 3, "b" => 2}
end
def test_map_keytypes
m = Google::Protobuf::Map.new(:int32, :int32)
m[1] = 42
m[-1] = 42
assert_raise RangeError do
m[0x8000_0000] = 1
end
assert_raise TypeError do
m["asdf"] = 1
end
m = Google::Protobuf::Map.new(:int64, :int32)
m[0x1000_0000_0000_0000] = 1
assert_raise RangeError do
m[0x1_0000_0000_0000_0000] = 1
end
assert_raise TypeError do
m["asdf"] = 1
end
m = Google::Protobuf::Map.new(:uint32, :int32)
m[0x8000_0000] = 1
assert_raise RangeError do
m[0x1_0000_0000] = 1
end
assert_raise RangeError do
m[-1] = 1
end
m = Google::Protobuf::Map.new(:uint64, :int32)
m[0x8000_0000_0000_0000] = 1
assert_raise RangeError do
m[0x1_0000_0000_0000_0000] = 1
end
assert_raise RangeError do
m[-1] = 1
end
m = Google::Protobuf::Map.new(:bool, :int32)
m[true] = 1
m[false] = 2
assert_raise TypeError do
m[1] = 1
end
assert_raise TypeError do
m["asdf"] = 1
end
m = Google::Protobuf::Map.new(:string, :int32)
m["asdf"] = 1
assert_raise TypeError do
m[1] = 1
end
assert_raise TypeError do
bytestring = ["FFFF"].pack("H*")
m[bytestring] = 1
end
m = Google::Protobuf::Map.new(:bytes, :int32)
bytestring = ["FFFF"].pack("H*")
m[bytestring] = 1
assert_raise TypeError do
m["asdf"] = 1
end
assert_raise TypeError do
m[1] = 1
end
end
def test_map_msg_enum_valuetypes
m = Google::Protobuf::Map.new(:string, :message, TestMessage)
m["asdf"] = TestMessage.new
assert_raise TypeError do
m["jkl;"] = TestMessage2.new
end
m = Google::Protobuf::Map.new(:string, :message, TestMessage,
{ "a" => TestMessage.new(:optional_int32 => 42),
"b" => TestMessage.new(:optional_int32 => 84) })
assert m.length == 2
assert m.values.map{|msg| msg.optional_int32}.sort == [42, 84]
m = Google::Protobuf::Map.new(:string, :enum, TestEnum,
{ "x" => :A, "y" => :B, "z" => :C })
assert m.length == 3
assert m["z"] == :C
m["z"] = 2
assert m["z"] == :B
m["z"] = 4
assert m["z"] == 4
assert_raise RangeError do
m["z"] = :Z
end
assert_raise TypeError do
m["z"] = "z"
end
end
def test_map_dup_deep_copy
m = Google::Protobuf::Map.new(:string, :message, TestMessage,
{ "a" => TestMessage.new(:optional_int32 => 42),
"b" => TestMessage.new(:optional_int32 => 84) })
m2 = m.dup
assert m == m2
assert m.object_id != m2.object_id
assert m["a"].object_id == m2["a"].object_id
assert m["b"].object_id == m2["b"].object_id
m2 = Google::Protobuf.deep_copy(m)
assert m == m2
assert m.object_id != m2.object_id
assert m["a"].object_id != m2["a"].object_id
assert m["b"].object_id != m2["b"].object_id
end
def test_map_field
m = MapMessage.new
assert m.map_string_int32 == {}
assert m.map_string_msg == {}
m = MapMessage.new(:map_string_int32 => {"a" => 1, "b" => 2},
:map_string_msg => {"a" => TestMessage2.new(:foo => 1),
"b" => TestMessage2.new(:foo => 2)})
assert m.map_string_int32.keys.sort == ["a", "b"]
assert m.map_string_int32["a"] == 1
assert m.map_string_msg["b"].foo == 2
m.map_string_int32["c"] = 3
assert m.map_string_int32["c"] == 3
m.map_string_msg["c"] = TestMessage2.new(:foo => 3)
assert m.map_string_msg["c"] == TestMessage2.new(:foo => 3)
m.map_string_msg.delete("b")
m.map_string_msg.delete("c")
assert m.map_string_msg == { "a" => TestMessage2.new(:foo => 1) }
assert_raise TypeError do
m.map_string_msg["e"] = TestMessage.new # wrong value type
end
# ensure nothing was added by the above
assert m.map_string_msg == { "a" => TestMessage2.new(:foo => 1) }
m.map_string_int32 = Google::Protobuf::Map.new(:string, :int32)
assert_raise TypeError do
m.map_string_int32 = Google::Protobuf::Map.new(:string, :int64)
end
assert_raise TypeError do
m.map_string_int32 = {}
end
assert_raise TypeError do
m = MapMessage.new(:map_string_int32 => { 1 => "I am not a number" })
end
end
def test_map_encode_decode
m = MapMessage.new(:map_string_int32 => {"a" => 1, "b" => 2},
:map_string_msg => {"a" => TestMessage2.new(:foo => 1),
"b" => TestMessage2.new(:foo => 2)})
m2 = MapMessage.decode(MapMessage.encode(m))
assert m == m2
m3 = MapMessageWireEquiv.decode(MapMessage.encode(m))
assert m3.map_string_int32.length == 2
kv = {}
m3.map_string_int32.map { |msg| kv[msg.key] = msg.value }
assert kv == {"a" => 1, "b" => 2}
kv = {}
m3.map_string_msg.map { |msg| kv[msg.key] = msg.value }
assert kv == {"a" => TestMessage2.new(:foo => 1),
"b" => TestMessage2.new(:foo => 2)}
end
def test_enum_field
m = TestMessage.new
assert m.optional_enum == :Default
m.optional_enum = :A
assert m.optional_enum == :A
assert_raise NameError do
assert_raise RangeError do
m.optional_enum = :ASDF
end
m.optional_enum = 1

Loading…
Cancel
Save