Finished hashtable implementation, not yet tested.

pull/13171/head
Joshua Haberman 16 years ago
parent e3905b425a
commit 7ccb32c305
  1. 172
      upb_table.c
  2. 22
      upb_table.h

@ -16,11 +16,13 @@ static const double MAX_LOAD = 0.85;
static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed);
static uint32_t max(uint32_t a, uint32_t b) { return a > b ? a : b; }
/* We use 1-based indexes into the table so that 0 can be "NULL". */
static struct upb_inttable_entry *intent(struct upb_inttable *t, uint32_t i) {
return (struct upb_inttable_entry*)((char*)t->t.entries + i*t->t.entry_size);
return UPB_INDEX(t->t.entries, i-1, t->t.entry_size);
}
static struct upb_strtable_entry *strent(struct upb_strtable *t, uint32_t i) {
return (struct upb_strtable_entry*)((char*)t->t.entries + i*t->t.entry_size);
return UPB_INDEX(t->t.entries, i-1, t->t.entry_size);
}
void upb_table_init(struct upb_table *t, uint32_t size, uint16_t entry_size)
@ -30,128 +32,164 @@ void upb_table_init(struct upb_table *t, uint32_t size, uint16_t entry_size)
t->size_lg2 = 1;
while(size >>= 1) t->size_lg2++;
t->size_lg2 = max(t->size_lg2, 4); /* Min size of 16. */
t->entries = malloc(upb_table_size(t) * t->entry_size);
size_t bytes = upb_table_size(t) * t->entry_size;
t->entries = malloc(bytes);
memset(t->entries, 0, bytes); /* Both tables consider 0's an empty entry. */
}
void upb_inttable_init(struct upb_inttable *t, uint32_t size, uint16_t entsize)
{
upb_table_init(&t->t, size, entsize);
for(uint32_t i = 0; i < upb_table_size(&t->t); i++)
intent(t, i)->key = EMPTYENT;
}
void upb_strtable_init(struct upb_strtable *t, uint32_t size, uint16_t entsize)
{
upb_table_init(&t->t, size, entsize);
for(uint32_t i = 0; i < upb_table_size(&t->t); i++)
strent(t, i)->key.data = NULL;
}
void upb_table_free(struct upb_table *t) { free(t->entries); }
void upb_inttable_free(struct upb_inttable *t) { upb_table_free(&t->t); }
void upb_strtable_free(struct upb_strtable *t) { upb_table_free(&t->t); }
static uint32_t strtable_bucket(struct upb_strtable *t, struct upb_string *key)
{
uint32_t hash = MurmurHash2(key->data, key->byte_len, 0);
return (hash & (upb_strtable_size(t)-1)) + 1;
}
void *upb_strtable_lookup(struct upb_strtable *t, struct upb_string *key)
{
uint32_t hash =
MurmurHash2(key->data, key->byte_len, 0) & (upb_strtable_size(t)-1);
while(1) {
struct upb_strtable_entry *e =
(struct upb_strtable_entry*)(char*)t->t.entries + hash*t->t.entry_size;
if(e->key.data == NULL) return NULL;
else if(upb_string_eql(&e->key, key)) return e;
hash = e->next;
}
uint32_t bucket = strtable_bucket(t, key);
struct upb_strtable_entry *e;
do {
e = strent(t, bucket);
if(upb_string_eql(&e->key, key)) return e;
} while((bucket = e->next) != UPB_END_OF_CHAIN);
return NULL;
}
static struct upb_inttable_entry *find_empty_slot(struct upb_inttable *table)
static uint32_t empty_intbucket(struct upb_inttable *table)
{
/* TODO: does it matter that this is biased towards the front of the table? */
for(uint32_t i = 0; i < upb_inttable_size(table); i++) {
for(uint32_t i = 1; i <= upb_inttable_size(table); i++) {
struct upb_inttable_entry *e = intent(table, i);
if(e->key == EMPTYENT) return e;
if(e->key == EMPTYENT) return i;
}
assert(false);
return NULL;
return 0;
}
static void *maybe_resize(struct upb_table *t) {
if((double)++t->count / upb_table_size(t) > MAX_LOAD) {
void *old_entries = t->entries;
t->size_lg2++; /* Double the table size. */
t->entries = malloc(upb_table_size(t) * t->entry_size);
return old_entries;
} else {
return NULL; /* No resize necessary. */
}
}
static void intinsert(void *table, struct upb_inttable_entry *e, uint32_t size)
static void intinsert(struct upb_inttable *t, struct upb_inttable_entry *e)
{
/* TODO */
#if 0
struct upb_inttable_entry *e, *table_e;
e = upb_inttable_entry_get(entries, i, entry_size);
table_e = upb_inttable_mainpos2(table, e->key);
if(table_e->key != UPB_EMPTY_ENTRY) { /* Collision. */
if(table_e == upb_inttable_mainpos2(table, table_e->key)) {
uint32_t bucket = upb_inttable_bucket(t, e->key);
struct upb_inttable_entry *table_e = intent(t, bucket);
if(table_e->key != EMPTYENT) { /* Collision. */
if(bucket == upb_inttable_bucket(t, table_e->key)) {
/* Existing element is in its main posisiton. Find an empty slot to
* place our new element and append it to this key's chain. */
struct upb_inttable_entry *empty = find_empty_slot(table);
while (table_e->next) table_e = table_e->next;
table_e->next = empty;
table_e = empty;
uint32_t empty_bucket = empty_intbucket(t);
while (table_e->next != UPB_END_OF_CHAIN)
table_e = intent(t, table_e->next-1);
table_e->next = empty_bucket;
table_e = intent(t, empty_bucket);
} else {
/* Existing element is not in its main position. Move it to an empty
* slot and put our element in its main position. */
struct upb_inttable_entry *empty, *colliding_key_mainpos;
empty = find_empty_slot(table);
colliding_key_mainpos = upb_inttable_mainpos2(table, table_e->key);
assert(colliding_key_mainpos->key != UPB_EMPTY_ENTRY);
assert(colliding_key_mainpos->next);
memcpy(empty, table_e, entry_size); /* next is copied also. */
uint32_t empty_bucket = empty_intbucket(t);
uint32_t evictee_bucket = upb_inttable_bucket(t, table_e->key);
memcpy(intent(t, empty_bucket), table_e, t->t.entry_size); /* copies next */
struct upb_inttable_entry *evictee_e = intent(t, evictee_bucket);
while(1) {
assert(colliding_key_mainpos->next);
if(colliding_key_mainpos->next == table_e) {
colliding_key_mainpos->next = empty;
assert(evictee_e->key != UPB_EMPTY_ENTRY);
assert(evictee_e->next != END_OF_CHAIN);
if(evictee_e->next == bucket) {
evictee_e->next = empty_bucket;
break;
}
}
/* table_e remains set to our mainpos. */
}
}
memcpy(table_e, e, entry_size);
table_e->next = NULL;
#endif
memcpy(table_e, e, t->t.entry_size);
table_e->next = UPB_END_OF_CHAIN;
}
void upb_inttable_insert(struct upb_inttable *t, struct upb_inttable_entry *e)
{
void *new_entries = maybe_resize(&t->t);
if(new_entries) { /* Are we doing a resize? */
for(uint32_t i = 0; i < (upb_inttable_size(t)>>1); i++) {
if((double)++t->t.count / upb_inttable_size(t) > MAX_LOAD) {
/* Need to resize. New table of double the size, add old elements to it. */
struct upb_inttable new_table;
upb_inttable_init(&new_table, upb_inttable_size(t)*2, t->t.entry_size);
for(uint32_t i = 1; i <= upb_inttable_size(t)/2; i++) {
struct upb_inttable_entry *old_e = intent(t, i);
if(old_e->key != EMPTYENT) intinsert(new_entries, old_e, t->t.entry_size);
if(old_e->key != EMPTYENT) intinsert(&new_table, old_e);
}
upb_inttable_free(t);
*t = new_table;
}
intinsert(t->t.entries, e, t->t.entry_size);
intinsert(t->t.entries, e);
}
static void strinsert(void *table, struct upb_strtable_entry *e, uint32_t size)
static uint32_t empty_strbucket(struct upb_strtable *table)
{
/* TODO */
/* TODO: does it matter that this is biased towards the front of the table? */
for(uint32_t i = 1; i <= upb_strtable_size(table); i++) {
struct upb_strtable_entry *e = strent(table, i);
if(e->key.byte_len == 0) return i;
}
assert(false);
return 0;
}
static void strinsert(struct upb_strtable *t, struct upb_strtable_entry *e)
{
uint32_t bucket = strtable_bucket(t, &e->key);
struct upb_strtable_entry *table_e = strent(t, bucket);
if(table_e->key.byte_len != 0) { /* Collision. */
if(bucket == strtable_bucket(t, &table_e->key)) {
/* Existing element is in its main posisiton. Find an empty slot to
* place our new element and append it to this key's chain. */
uint32_t empty_bucket = empty_strbucket(t);
while (table_e->next != UPB_END_OF_CHAIN)
table_e = strent(t, table_e->next-1);
table_e->next = empty_bucket;
table_e = strent(t, empty_bucket);
} else {
/* Existing element is not in its main position. Move it to an empty
* slot and put our element in its main position. */
uint32_t empty_bucket = empty_strbucket(t);
uint32_t evictee_bucket = strtable_bucket(t, &table_e->key);
memcpy(strent(t, empty_bucket), table_e, t->t.entry_size); /* copies next */
struct upb_strtable_entry *evictee_e = strent(t, evictee_bucket);
while(1) {
assert(evictee_e->key != UPB_EMPTY_ENTRY);
assert(evictee_e->next != END_OF_CHAIN);
if(evictee_e->next == bucket) {
evictee_e->next = empty_bucket;
break;
}
}
/* table_e remains set to our mainpos. */
}
}
memcpy(table_e, e, t->t.entry_size);
table_e->next = UPB_END_OF_CHAIN;
}
void upb_strtable_insert(struct upb_strtable *t, struct upb_strtable_entry *e)
{
void *new_entries = maybe_resize(&t->t);
if(new_entries) { /* Are we doing a resize? */
for(uint32_t i = 0; i < (upb_strtable_size(t)>>1); i++) {
if((double)++t->t.count / upb_strtable_size(t) > MAX_LOAD) {
/* Need to resize. New table of double the size, add old elements to it. */
struct upb_strtable new_table;
upb_strtable_init(&new_table, upb_strtable_size(t)*2, t->t.entry_size);
for(uint32_t i = 1; i <= upb_strtable_size(t)/2; i++) {
struct upb_strtable_entry *old_e = strent(t, i);
if(old_e->key.data != NULL) strinsert(new_entries, old_e, t->t.entry_size);
if(old_e->key.byte_len != 0) strinsert(&new_table, old_e);
}
upb_strtable_free(t);
*t = new_table;
}
strinsert(t->t.entries, e, t->t.entry_size);
strinsert(t->t.entries, e);
}
#ifdef UPB_UNALIGNED_READS_OK

@ -23,6 +23,9 @@ extern "C" {
typedef uint32_t upb_inttable_key_t;
#define UPB_END_OF_CHAIN (uint32_t)0
#define UPB_INDEX(base, i, m) (void*)((char*)base + (i*m))
struct upb_inttable_entry {
upb_inttable_key_t key;
uint32_t next; /* Internal chaining. */
@ -78,8 +81,8 @@ INLINE uint32_t upb_strtable_size(struct upb_strtable *t) {
void upb_inttable_insert(struct upb_inttable *t, struct upb_inttable_entry *e);
void upb_strtable_insert(struct upb_strtable *t, struct upb_strtable_entry *e);
INLINE uint32_t upb_inttable_hash(struct upb_inttable *t, upb_inttable_key_t k) {
return k & (upb_inttable_size(t)-1); /* Identity hash for ints. */
INLINE uint32_t upb_inttable_bucket(struct upb_inttable *t, upb_inttable_key_t k) {
return (k & (upb_inttable_size(t)-1)) + 1; /* Identity hash for ints. */
}
/* Looks up key in this table. Inlined because this is in the critical path
@ -88,14 +91,13 @@ INLINE uint32_t upb_inttable_hash(struct upb_inttable *t, upb_inttable_key_t k)
* compiler more ability to optimize. */
INLINE void *upb_inttable_lookup(struct upb_inttable *t,
uint32_t key, uint32_t entry_size) {
uint32_t hash = upb_inttable_hash(t, key);
while(1) {
struct upb_inttable_entry *e =
(struct upb_inttable_entry*)(char*)t->t.entries + hash*entry_size;
if(e->key == 0) return NULL;
else if(e->key == key) return e;
hash = e->next;
}
uint32_t bucket = upb_inttable_bucket(t, key);
struct upb_inttable_entry *e;
do {
e = UPB_INDEX(t->t.entries, bucket-1, entry_size);
if(e->key == key) return e;
} while((bucket = e->next) != UPB_END_OF_CHAIN);
return NULL; /* Not found. */
}
void *upb_strtable_lookup(struct upb_strtable *t, struct upb_string *key);

Loading…
Cancel
Save