Optimized away the creation of empty string objects.

Prior to this CL, creating an empty message object would create
two empty string objects for every declared field.  First we
created a unique string object for the field's default.  Then
we created yet another string object when we assigned the
default value into the message: we called #encode to ensure
that the string would have the correct encoding and be frozen.

I optimized these unnecessary objects away with two fixes:

1. Memoize the empty string so that we don't create a new empty
   string for every field's default.
2. If we are assigning a string to a message object, avoid creating
   a new string if the assigned string has the correct encoding and
   is already frozen.
pull/6502/head
Joshua Haberman 5 years ago
parent c132a4aa16
commit 1e37a94bb5
  1. 31
      ruby/ext/google/protobuf_c/protobuf.c
  2. 5
      ruby/ext/google/protobuf_c/protobuf.h
  3. 28
      ruby/ext/google/protobuf_c/storage.c

@ -51,6 +51,32 @@ VALUE get_def_obj(const void* def) {
return rb_hash_aref(upb_def_to_ruby_obj_map, ULL2NUM((intptr_t)def));
}
static VALUE cached_empty_string = Qnil;
static VALUE cached_empty_bytes = Qnil;
static VALUE create_frozen_string(const char* str, size_t size, bool binary) {
VALUE str_rb = rb_str_new(str, size);
rb_enc_associate(str_rb,
binary ? kRubyString8bitEncoding : kRubyStringUtf8Encoding);
rb_obj_freeze(str_rb);
return str_rb;
}
VALUE get_frozen_string(const char* str, size_t size, bool binary) {
if (size == 0) {
return binary ? cached_empty_bytes : cached_empty_string;
} else {
// It is harder to memoize non-empty strings. The obvious approach would be
// to use a Ruby hash keyed by string as memo table, but looking up in such a table
// requires constructing a string (the very thing we're trying to avoid).
//
// Since few fields have defaults, we will just optimize the empty string
// case for now.
return create_frozen_string(str, size, binary);
}
}
// -----------------------------------------------------------------------------
// Utilities.
// -----------------------------------------------------------------------------
@ -118,4 +144,9 @@ void Init_protobuf_c() {
rb_gc_register_address(&upb_def_to_ruby_obj_map);
upb_def_to_ruby_obj_map = rb_hash_new();
rb_gc_register_address(&cached_empty_string);
rb_gc_register_address(&cached_empty_bytes);
cached_empty_string = create_frozen_string("", 0, false);
cached_empty_bytes = create_frozen_string("", 0, true);
}

@ -591,6 +591,11 @@ const upb_pbdecodermethod *new_fillmsg_decodermethod(
// cycles.
#define ENCODE_MAX_NESTING 63
// -----------------------------------------------------------------------------
// A cache of frozen string objects to use as field defaults.
// -----------------------------------------------------------------------------
VALUE get_frozen_string(const char* data, size_t size, bool binary);
// -----------------------------------------------------------------------------
// Global map from upb {msg,enum}defs to wrapper Descriptor/EnumDescriptor
// instances.

@ -96,17 +96,19 @@ VALUE native_slot_encode_and_freeze_string(upb_fieldtype_t type, VALUE value) {
kRubyStringUtf8Encoding : kRubyString8bitEncoding;
VALUE desired_encoding_value = rb_enc_from_encoding(desired_encoding);
// Note: this will not duplicate underlying string data unless necessary.
value = rb_str_encode(value, desired_encoding_value, 0, Qnil);
if (rb_obj_encoding(value) != desired_encoding_value || !OBJ_FROZEN(value)) {
// Note: this will not duplicate underlying string data unless necessary.
value = rb_str_encode(value, desired_encoding_value, 0, Qnil);
if (type == UPB_TYPE_STRING &&
rb_enc_str_coderange(value) == ENC_CODERANGE_BROKEN) {
rb_raise(rb_eEncodingError, "String is invalid UTF-8");
}
if (type == UPB_TYPE_STRING &&
rb_enc_str_coderange(value) == ENC_CODERANGE_BROKEN) {
rb_raise(rb_eEncodingError, "String is invalid UTF-8");
}
// Ensure the data remains valid. Since we called #encode a moment ago,
// this does not freeze the string the user assigned.
rb_obj_freeze(value);
// Ensure the data remains valid. Since we called #encode a moment ago,
// this does not freeze the string the user assigned.
rb_obj_freeze(value);
}
return value;
}
@ -729,12 +731,8 @@ VALUE layout_get_default(const upb_fielddef *field) {
case UPB_TYPE_BYTES: {
size_t size;
const char *str = upb_fielddef_defaultstr(field, &size);
VALUE str_rb = rb_str_new(str, size);
rb_enc_associate(str_rb, (upb_fielddef_type(field) == UPB_TYPE_BYTES) ?
kRubyString8bitEncoding : kRubyStringUtf8Encoding);
rb_obj_freeze(str_rb);
return str_rb;
return get_frozen_string(str, size,
upb_fielddef_type(field) == UPB_TYPE_BYTES);
}
default: return Qnil;
}

Loading…
Cancel
Save