diff --git a/src/hb-algs.hh b/src/hb-algs.hh index 1130219c8..0d4563d02 100644 --- a/src/hb-algs.hh +++ b/src/hb-algs.hh @@ -314,7 +314,7 @@ struct template constexpr auto impl (const T& v, hb_priority<2>) const HB_RETURN (uint32_t, hb_deref (v).hash ()) -#if 0 +#if 1 // The following, unfortunately, while keeps the probing chains short, slows // down the overall hash table performance. Not because of the extra operation // itself in my opinion, but something else going on that we have not been able diff --git a/src/hb-map.hh b/src/hb-map.hh index 91ccb196f..d1d950f91 100644 --- a/src/hb-map.hh +++ b/src/hb-map.hh @@ -200,22 +200,46 @@ struct hb_hashmap_t return true; } + unsigned + probe_distance (unsigned i, unsigned hash) const + { + return (i + mask + 1 - (hash % prime)) & mask; + } + template bool set_with_hash (KK&& key, uint32_t hash, VV&& value) { if (unlikely (!successful)) return false; if (unlikely ((occupancy + occupancy / 2) >= mask && !resize ())) return false; + K k = std::forward (key); + V v = std::forward (value); + hash &= 0x3FFFFFFF; // We only store lower 30bit of hash unsigned int i = hash % prime; + unsigned dist = 0; while (items[i].is_used ()) { - if ((hb_is_same (K, hb_codepoint_t) || items[i].hash == hash) && - items[i] == key) - break; if (items[i].is_tombstone ()) break; + if ((hb_is_same (K, hb_codepoint_t) || items[i].hash == hash) && + items[i] == k) + break; + + // Robinhood hashing + // https://www.sebastiansylvan.com/post/robin-hood-hashing-should-be-your-default-hash-table-implementation/ + unsigned existing_dist = probe_distance (i, items[i].hash); + if (existing_dist < dist) + { + hb_swap (items[i].key, k); + //hb_swap (items[i].hash, hash); + uint32_t tmp = items[i].hash; items[i].hash = hash; hash = tmp; + hb_swap (items[i].value, v); + dist = existing_dist; + } + i = (i + 1) & mask; + dist++; } item_t &item = items[i]; @@ -227,8 +251,8 @@ struct hb_hashmap_t population--; } - item.key = std::forward (key); - item.value = std::forward (value); + item.key = std::move (k); + item.value = std::move (v); item.hash = hash; item.set_used (true); item.set_tombstone (false); @@ -284,9 +308,12 @@ struct hb_hashmap_t if (unlikely (!items)) return nullptr; hash &= 0x3FFFFFFF; // We only store lower 30bit of hash - unsigned int i = hash % prime; + unsigned i = hash % prime; + unsigned dist = 0; while (items[i].is_used ()) { + if (dist > probe_distance (i, items[i].hash)) + return nullptr; if ((hb_is_same (K, hb_codepoint_t) || items[i].hash == hash) && items[i] == key) { @@ -296,6 +323,7 @@ struct hb_hashmap_t return nullptr; } i = (i + 1) & mask; + dist++; } return nullptr; }