@ -252,164 +252,39 @@ void Arena_register(VALUE module) {
// Object Cache
// -----------------------------------------------------------------------------
// A pointer -> Ruby Object cache that keeps references to Ruby wrapper
// objects. This allows us to look up any Ruby wrapper object by the address
// of the object it is wrapping. That way we can avoid ever creating two
// different wrapper objects for the same C object, which saves memory and
// preserves object identity.
//
// We use WeakMap for the cache. For Ruby <2.7 we also need a secondary Hash
// to store WeakMap keys because Ruby <2.7 WeakMap doesn't allow non-finalizable
// keys.
//
// We also need the secondary Hash if sizeof(long) < sizeof(VALUE), because this
// means it may not be possible to fit a pointer into a Fixnum. Keys are
// pointers, and if they fit into a Fixnum, Ruby doesn't collect them, but if
// they overflow and require allocating a Bignum, they could get collected
// prematurely, thus removing the cache entry. This happens on 64-bit Windows,
// on which pointers are 64 bits but longs are 32 bits. In this case, we enable
// the secondary Hash to hold the keys and prevent them from being collected.
# if RUBY_API_VERSION_CODE >= 20700 && SIZEOF_LONG >= SIZEOF_VALUE
# define USE_SECONDARY_MAP 0
# else
# define USE_SECONDARY_MAP 1
# endif
# if USE_SECONDARY_MAP
// Maps Numeric -> Object. The object is then used as a key into the WeakMap.
// This is needed for Ruby <2.7 where a number cannot be a key to WeakMap.
// The object is used only for its identity; it does not contain any data.
VALUE secondary_map = Qnil ;
// Mutations to the map are under a mutex, because SeconaryMap_MaybeGC()
// iterates over the map which cannot happen in parallel with insertions, or
// Ruby will throw:
// can't add a new key into hash during iteration (RuntimeError)
VALUE secondary_map_mutex = Qnil ;
// Lambda that will GC entries from the secondary map that are no longer present
// in the primary map.
VALUE gc_secondary_map_lambda = Qnil ;
ID length ;
extern VALUE weak_obj_cache ;
static void SecondaryMap_Init ( ) {
rb_gc_register_address ( & secondary_map ) ;
rb_gc_register_address ( & gc_secondary_map_lambda ) ;
rb_gc_register_address ( & secondary_map_mutex ) ;
secondary_map = rb_hash_new ( ) ;
gc_secondary_map_lambda = rb_eval_string (
" ->(secondary, weak) { \n "
" secondary.delete_if { |k, v| !weak.key?(v) } \n "
" } \n " ) ;
secondary_map_mutex = rb_mutex_new ( ) ;
length = rb_intern ( " length " ) ;
}
// The secondary map is a regular Hash, and will never shrink on its own.
// The main object cache is a WeakMap that will automatically remove entries
// when the target object is no longer reachable, but unless we manually
// remove the corresponding entries from the secondary map, it will grow
// without bound.
//
// To avoid this unbounded growth we periodically remove entries from the
// secondary map that are no longer present in the WeakMap. The logic of
// how often to perform this GC is an artbirary tuning parameter that
// represents a straightforward CPU/memory tradeoff.
//
// Requires: secondary_map_mutex is held.
static void SecondaryMap_MaybeGC ( ) {
PBRUBY_ASSERT ( rb_mutex_locked_p ( secondary_map_mutex ) = = Qtrue ) ;
size_t weak_len = NUM2ULL ( rb_funcall ( weak_obj_cache , length , 0 ) ) ;
size_t secondary_len = RHASH_SIZE ( secondary_map ) ;
if ( secondary_len < weak_len ) {
// Logically this case should not be possible: a valid entry cannot exist in
// the weak table unless there is a corresponding entry in the secondary
// table. It should *always* be the case that secondary_len >= weak_len.
//
// However ObjectSpace::WeakMap#length (and therefore weak_len) is
// unreliable: it overreports its true length by including non-live objects.
// However these non-live objects are not yielded in iteration, so we may
// have previously deleted them from the secondary map in a previous
// invocation of SecondaryMap_MaybeGC().
//
// In this case, we can't measure any waste, so we just return.
return ;
}
size_t waste = secondary_len - weak_len ;
// GC if we could remove at least 2000 entries or 20% of the table size
// (whichever is greater). Since the cost of the GC pass is O(N), we
// want to make sure that we condition this on overall table size, to
// avoid O(N^2) CPU costs.
size_t threshold = PBRUBY_MAX ( secondary_len * 0.2 , 2000 ) ;
if ( waste > threshold ) {
rb_funcall ( gc_secondary_map_lambda , rb_intern ( " call " ) , 2 , secondary_map ,
weak_obj_cache ) ;
}
}
// Requires: secondary_map_mutex is held by this thread iff create == true.
static VALUE SecondaryMap_Get ( VALUE key , bool create ) {
PBRUBY_ASSERT ( ! create | | rb_mutex_locked_p ( secondary_map_mutex ) = = Qtrue ) ;
VALUE ret = rb_hash_lookup ( secondary_map , key ) ;
if ( ret = = Qnil & & create ) {
SecondaryMap_MaybeGC ( ) ;
ret = rb_class_new_instance ( 0 , NULL , rb_cObject ) ;
rb_hash_aset ( secondary_map , key , ret ) ;
}
return ret ;
}
# endif
// Requires: secondary_map_mutex is held by this thread iff create == true.
static VALUE ObjectCache_GetKey ( const void * key , bool create ) {
VALUE key_val = ( VALUE ) key ;
PBRUBY_ASSERT ( ( key_val & 3 ) = = 0 ) ;
VALUE ret = LL2NUM ( key_val > > 2 ) ;
# if USE_SECONDARY_MAP
ret = SecondaryMap_Get ( ret , create ) ;
# endif
return ret ;
}
// Public ObjectCache API.
VALUE weak_obj_cache = Qnil ;
ID item_get ;
ID item_set ;
ID item_getset ;
static void ObjectCache_Init ( VALUE protobuf ) {
item_get = rb_intern ( " get " ) ;
item_getset = rb_intern ( " getset " ) ;
static void ObjectCache_Init ( ) {
rb_gc_register_address ( & weak_obj_cache ) ;
VALUE klass = rb_eval_string ( " ObjectSpace::WeakMap " ) ;
weak_obj_cache = rb_class_new_instance ( 0 , NULL , klass ) ;
item_get = rb_intern ( " [] " ) ;
item_set = rb_intern ( " []= " ) ;
# if USE_SECONDARY_MAP
SecondaryMap_Init ( ) ;
# if RUBY_API_VERSION_CODE >= 20700 && SIZEOF_LONG >= SIZEOF_VALUE
VALUE cache_class = rb_const_get ( protobuf , rb_intern ( " ObjectCache " ) ) ;
# else
VALUE cache_class = rb_const_get ( protobuf , rb_intern ( " LegacyObjectCache " ) ) ;
# endif
weak_obj_cache = rb_class_new_instance ( 0 , NULL , cache_class ) ;
rb_const_set ( protobuf , rb_intern ( " OBJECT_CACHE " ) , weak_obj_cache ) ;
}
void ObjectCache_Add ( const void * key , VALUE val ) {
PBRUBY_ASSERT ( ObjectCache_Get ( key ) = = Qnil ) ;
# if USE_SECONDARY_MAP
rb_mutex_lock ( secondary_map_mutex ) ;
# endif
VALUE key_rb = ObjectCache_GetKey ( key , true ) ;
rb_funcall ( weak_obj_cache , item_set , 2 , key_rb , val ) ;
# if USE_SECONDARY_MAP
rb_mutex_unlock ( secondary_map_mutex ) ;
# endif
PBRUBY_ASSERT ( ObjectCache_Get ( key ) = = val ) ;
VALUE ObjectCache_GetSet ( const void * key , VALUE val ) {
VALUE key_val = ( VALUE ) key ;
PBRUBY_ASSERT ( ( key_val & 3 ) = = 0 ) ;
return rb_funcall ( weak_obj_cache , item_getset , 2 , LL2NUM ( key_val ) , val ) ;
}
// Returns the cached object for this key, if any. Otherwise returns Qnil.
VALUE ObjectCache_Get ( const void * key ) {
VALUE key_rb = ObjectCache_GetKey ( key , false ) ;
return rb_funcall ( weak_obj_cache , item_get , 1 , key_rb ) ;
VALUE key_val = ( VALUE ) key ;
PBRUBY_ASSERT ( ( key_val & 3 ) = = 0 ) ;
return rb_funcall ( weak_obj_cache , item_get , 1 , LL2NUM ( key_val ) ) ;
}
/*
@ -459,11 +334,10 @@ VALUE Google_Protobuf_deep_copy(VALUE self, VALUE obj) {
// This must be named "Init_protobuf_c" because the Ruby module is named
// "protobuf_c" -- the VM looks for this symbol in our .so.
__attribute__ ( ( visibility ( " default " ) ) ) void Init_protobuf_c ( ) {
ObjectCache_Init ( ) ;
VALUE google = rb_define_module ( " Google " ) ;
VALUE protobuf = rb_define_module_under ( google , " Protobuf " ) ;
ObjectCache_Init ( protobuf ) ;
Arena_register ( protobuf ) ;
Defs_register ( protobuf ) ;
RepeatedField_register ( protobuf ) ;