From f4c00fc9798d5089f7d22054a45f645cd1a0936c Mon Sep 17 00:00:00 2001
From: Joshua Haberman <joshua@reverberate.org>
Date: Sun, 14 Jun 2009 12:30:27 -0700
Subject: [PATCH] More work on the table implementation.

It currently beats std::map and std::hash_map by >10x.
---
 Makefile      |  13 ++--
 test_table.cc | 192 ++++++++++++++++++++++++++++++++++++++++++++++++++
 test_util.h   |  50 +++++++++++++
 upb.h         |   4 +-
 upb_table.c   |  58 +++++++++++----
 upb_table.h   |  36 +++++-----
 6 files changed, 314 insertions(+), 39 deletions(-)
 create mode 100644 test_table.cc
 create mode 100644 test_util.h

diff --git a/Makefile b/Makefile
index a781fa258d..da620fe1d2 100644
--- a/Makefile
+++ b/Makefile
@@ -1,23 +1,18 @@
 
 .PHONY: all clean
 CC=gcc
-CFLAGS=-std=c99 -O3 -Wall -Wextra -pedantic
+CFLAGS=-std=c99
+CPPFLAGS=-O3 -Wall -Wextra -pedantic -g
 OBJ=upb_parse.o upb_table.o upb_struct.o descriptor.o
-all: $(OBJ)
+all: $(OBJ) test_table
 clean:
 	rm -f $(OBJ) tests
 
 upb_parse.o: upb_parse.c upb_parse.h
-	$(CC) $(CFLAGS) -o upb_parse.o -c upb_parse.c
-
 upb_table.o: upb_table.c upb_table.h
-	$(CC) $(CFLAGS) -o upb_table.o -c upb_table.c
-
 upb_struct.o: upb_struct.c upb_struct.h
-	$(CC) $(CFLAGS) -o upb_struct.o -c upb_struct.c
-
 descriptor.o: descriptor.c descriptor.h
-	$(CC) $(CFLAGS) -o descriptor.o -c descriptor.c
+test_table: test_table.cc upb_table.o
 
 tests: tests.c upb_parse.c upb_parse.h
 	$(CC) $(CFLAGS) -o tests tests.c
diff --git a/test_table.cc b/test_table.cc
new file mode 100644
index 0000000000..5cbeb41ef2
--- /dev/null
+++ b/test_table.cc
@@ -0,0 +1,192 @@
+
+#include "upb_table.h"
+#include "test_util.h"
+#include <assert.h>
+#include <map>
+#include <ext/hash_map>
+#include <sys/resource.h>
+
+struct table_entry {
+  struct upb_inttable_entry e;
+  int32_t value;  /* key+2 */
+};
+
+double get_usertime()
+{
+  struct rusage usage;
+  getrusage(RUSAGE_SELF, &usage);
+  return usage.ru_utime.tv_sec + (usage.ru_utime.tv_usec/1000000.0);
+}
+
+static uint32_t max(uint32_t a, uint32_t b) { return a > b ? a : b; }
+
+/* num_entries must be a power of 2. */
+void test(int32_t *keys, size_t num_entries)
+{
+  /* Initialize structures. */
+  struct table_entry *entries = new table_entry[num_entries];
+  struct upb_inttable table;
+  int32_t largest_key = 0;
+  std::map<int32_t, int32_t> m;
+  __gnu_cxx::hash_map<int32_t, int32_t> hm;
+  for(size_t i = 0; i < num_entries; i++) {
+    int32_t key = keys[i];
+    largest_key = max(largest_key, key);
+    entries[i].e.key = key;
+    entries[i].value = key*2;
+    m[key] = key*2;
+    hm[key] = key*2;
+  }
+  upb_inttable_init(&table, entries, num_entries, sizeof(struct table_entry));
+  printf("size: %lu\n", sizeof(struct table_entry));
+  delete[] entries;
+
+  /* Test correctness. */
+  for(int32_t i = 0; i < largest_key; i++) {
+    struct table_entry *e = (struct table_entry*)upb_inttable_lookup(
+        &table, i, sizeof(struct table_entry));
+    if(m.find(i) != m.end()) { /* Assume map implementation is correct. */
+      assert(e);
+      assert(e->e.key == i);
+      assert(e->value == i*2);
+      assert(m[i] == i*2);
+      assert(hm[i] == i*2);
+    } else {
+      assert(e == NULL);
+    }
+  }
+
+  /* Test performance. We only test lookups for keys that are known to exist. */
+  uintptr_t x = 0;
+  const unsigned int iterations = 0xFFFFFF;
+  const int32_t mask = num_entries - 1;
+  printf("Measuring sequential loop overhead...");
+  fflush(stdout);
+  double before = get_usertime();
+  for(unsigned int i = 0; i < iterations; i++) {
+    int32_t key = keys[i & mask];
+    x += key;
+  }
+  double seq_overhead = get_usertime() - before;
+  printf("%0.3f seconds for %d iterations\n", seq_overhead, iterations);
+
+  printf("Measuring random loop overhead...");
+  rand();
+  fflush(stdout);
+  before = get_usertime();
+  for(unsigned int i = 0; i < iterations; i++) {
+    int32_t key = keys[rand() & mask];
+    x += key;
+  }
+  double rand_overhead = get_usertime() - before;
+  printf("%0.3f seconds for %d iterations\n", rand_overhead, iterations);
+
+  printf("upb_table(seq): ");
+  fflush(stdout);
+  before = get_usertime();
+  for(unsigned int i = 0; i < iterations; i++) {
+    int32_t key = keys[i & mask];
+    struct table_entry *e = (struct table_entry*)upb_inttable_lookup(
+        &table, key, sizeof(struct table_entry));
+    x += (uintptr_t)e;
+  }
+  double total = get_usertime() - before;
+  double without_overhead = total - seq_overhead;
+  printf("%0.3f seconds (%0.3f - %0.3f overhead) for %d iterations.  %s/s\n", without_overhead, total, seq_overhead, iterations, eng(iterations/without_overhead, 3, false));
+
+  printf("upb_table(rand): ");
+  fflush(stdout);
+  before = get_usertime();
+  for(unsigned int i = 0; i < iterations; i++) {
+    int32_t key = keys[rand() & mask];
+    struct table_entry *e = (struct table_entry*)upb_inttable_lookup(
+        &table, key, sizeof(struct table_entry));
+    x += (uintptr_t)e;
+  }
+  total = get_usertime() - before;
+  without_overhead = total - rand_overhead;
+  printf("%0.3f seconds (%0.3f - %0.3f overhead) for %d iterations.  %s/s\n", without_overhead, total, rand_overhead, iterations, eng(iterations/without_overhead, 3, false));
+
+  printf("map(seq): ");
+  fflush(stdout);
+  before = get_usertime();
+  for(unsigned int i = 0; i < iterations; i++) {
+    int32_t key = keys[i & mask];
+    x += m[key];
+  }
+  total = get_usertime() - before;
+  without_overhead = total - seq_overhead;
+  printf("%0.3f seconds (%0.3f - %0.3f overhead) for %d iterations.  %s/s\n", without_overhead, total, seq_overhead, iterations, eng(iterations/without_overhead, 3, false));
+
+  printf("map(rand): ");
+  fflush(stdout);
+  before = get_usertime();
+  for(unsigned int i = 0; i < iterations; i++) {
+    int32_t key = keys[rand() & mask];
+    x += m[key];
+  }
+  total = get_usertime() - before;
+  without_overhead = total - rand_overhead;
+  printf("%0.3f seconds (%0.3f - %0.3f overhead) for %d iterations.  %s/s\n", without_overhead, total, rand_overhead, iterations, eng(iterations/without_overhead, 3, false));
+
+  printf("hash_map(seq): ");
+  fflush(stdout);
+  before = get_usertime();
+  for(unsigned int i = 0; i < iterations; i++) {
+    int32_t key = keys[i & mask];
+    x += hm[key];
+  }
+  total = get_usertime() - before;
+  without_overhead = total - seq_overhead;
+  printf("%0.3f seconds (%0.3f - %0.3f overhead) for %d iterations.  %s/s\n", without_overhead, total, seq_overhead, iterations, eng(iterations/without_overhead, 3, false));
+
+  printf("hash_map(rand): ");
+  fflush(stdout);
+  before = get_usertime();
+  for(unsigned int i = 0; i < iterations; i++) {
+    int32_t key = keys[rand() & mask];
+    x += hm[key];
+  }
+  total = get_usertime() - before;
+  without_overhead = total - rand_overhead;
+  printf("%0.3f seconds (%0.3f - %0.3f overhead) for %d iterations.  %s/s\n\n", without_overhead, total, rand_overhead, iterations, eng(iterations/without_overhead, 3, false));
+  upb_inttable_free(&table);
+}
+
+int32_t *get_contiguous_keys(int32_t num)
+{
+  int32_t *buf = new int32_t[num];
+  for(int32_t i = 0; i < num; i++)
+    buf[i] = i;
+  return buf;
+}
+
+int main()
+{
+  int32_t *keys1 = get_contiguous_keys(8);
+  printf("Contiguous 1-8 ====\n");
+  test(keys1, 8);
+  delete[] keys1;
+
+  int32_t *keys2 = get_contiguous_keys(64);
+  printf("Contiguous 1-64 ====\n");
+  test(keys2, 64);
+  delete[] keys2;
+
+  int32_t *keys3 = get_contiguous_keys(512);
+  printf("Contiguous 1-512 ====\n");
+  test(keys3, 512);
+  delete[] keys3;
+
+  int32_t *keys4 = new int32_t[64];
+  for(int32_t i = 0; i < 64; i++) {
+    if(i < 32)
+      keys4[i] = i;
+    else
+      keys4[i] = 10100+i;
+  }
+  printf("1-32 and 10033-10064 ====\n");
+  test(keys4, 64);
+  delete[] keys4;
+}
+
diff --git a/test_util.h b/test_util.h
new file mode 100644
index 0000000000..6e9e7f9564
--- /dev/null
+++ b/test_util.h
@@ -0,0 +1,50 @@
+#define PREFIX_START (-24)
+/* Smallest power of then for which there is a prefix defined.
+   If the set of prefixes will be extended, change this constant
+   and update the table "prefix". */
+
+#include <stdio.h>
+#include <math.h>
+
+static char *eng(double value, int digits, int numeric)
+{
+  static char *prefix[] = {
+  "y", "z", "a", "f", "p", "n", "u", "m", "",
+  "k", "M", "G", "T", "P", "E", "Z", "Y"
+  };
+#define PREFIX_END (PREFIX_START+\
+(int)((sizeof(prefix)/sizeof(char *)-1)*3))
+
+      int expof10;
+      static char result[100];
+      char *res = result;
+
+      if (value < 0.)
+        {
+            *res++ = '-';
+            value = -value;
+        }
+
+      expof10 = (int) log10(value);
+      if(expof10 > 0)
+        expof10 = (expof10/3)*3;
+      else
+        expof10 = (-expof10+3)/3*(-3); 
+
+      value *= pow(10,-expof10);
+
+      if (value >= 1000.)
+         { value /= 1000.0; expof10 += 3; }
+      else if(value >= 100.0)
+         digits -= 2;
+      else if(value >= 10.0)
+         digits -= 1;
+
+      if(numeric || (expof10 < PREFIX_START) ||    
+                    (expof10 > PREFIX_END))
+        sprintf(res, "%.*fe%d", digits-1, value, expof10); 
+      else
+        sprintf(res, "%.*f %s", digits-1, value, 
+          prefix[(expof10-PREFIX_START)/3]);
+      return result;
+}
diff --git a/upb.h b/upb.h
index 445975b194..b09a39cbe3 100644
--- a/upb.h
+++ b/upb.h
@@ -25,7 +25,7 @@ enum upb_wire_type {
   UPB_WIRE_TYPE_DELIMITED   = 2,
   UPB_WIRE_TYPE_START_GROUP = 3,
   UPB_WIRE_TYPE_END_GROUP   = 4,
-  UPB_WIRE_TYPE_32BIT       = 5,
+  UPB_WIRE_TYPE_32BIT       = 5
 };
 typedef int8_t upb_wire_type_t;
 
@@ -94,7 +94,7 @@ typedef enum upb_status {
   UPB_ERROR_UNKNOWN_VALUE = 2,
 
   // A field was encoded with the wrong wire type.
-  UPB_ERROR_MISMATCHED_TYPE = 3,
+  UPB_ERROR_MISMATCHED_TYPE = 3
 } upb_status_t;
 
 #ifdef __cplusplus
diff --git a/upb_table.c b/upb_table.c
index 656da242be..d28e079c63 100644
--- a/upb_table.c
+++ b/upb_table.c
@@ -6,7 +6,9 @@
 
 #include "upb_table.h"
 
+#include <assert.h>
 #include <stdlib.h>
+#include <string.h>
 
 static int compare_entries(const void *f1, const void *f2)
 {
@@ -26,6 +28,18 @@ static uint32_t round_up_to_pow2(uint32_t v)
   return v;
 }
 
+static struct upb_inttable_entry *find_empty_slot(struct upb_inttable *table)
+{
+  /* TODO: does it matter that this is biased towards the front of the table? */
+  for(uint32_t i = 0; i < table->size; i++) {
+    struct upb_inttable_entry *e =
+        upb_inttable_entry_get(table->entries, i, table->entry_size);
+    if(e->key == UPB_EMPTY_ENTRY) return e;
+  }
+  assert(false);
+  return NULL;
+}
+
 void upb_inttable_init(struct upb_inttable *table, void *entries,
                        int num_entries, int entry_size)
 {
@@ -53,31 +67,51 @@ void upb_inttable_init(struct upb_inttable *table, void *entries,
   uint32_t min_size_by_load = all_in_array ? n : (double)num_entries / 0.85;
   uint32_t min_size = max(n, min_size_by_load);
   table->size = round_up_to_pow2(min_size);
-
+  table->entry_size = entry_size;
   table->entries = malloc(table->size * entry_size);
+
   /* Initialize to empty. */
   for(size_t i = 0; i < table->size; i++) {
-    struct upb_inttable_entry *e = upb_inttable_get(table, i, entry_size);
-    e->key = UPB_END_OF_CHAIN;
-    e->next = UPB_END_OF_CHAIN;
+    struct upb_inttable_entry *e =
+        upb_inttable_entry_get(table->entries, i, entry_size);
+    e->key = UPB_EMPTY_ENTRY;
+    e->next = NULL;
   }
 
   /* Insert the elements. */
   for(int i = 0; i < num_entries; i++) {
-    struct upb_inttable_entry *e =
-        upb_inttable_entry_get(entries, i, entry_size);
-    int32_t hash = upb_inttable_hash(table, e->key);
-    struct upb_inttable_entry *table_e =
-        upb_inttable_get(table, hash, entry_size);
-    if(table_e->key != UPB_END_OF_CHAIN) {  /* Collision. */
-      if(hash == upb_inttable_hash(table, table_e->key)) {
+    struct upb_inttable_entry *e, *table_e;
+    e = upb_inttable_entry_get(entries, i, entry_size);
+    table_e = upb_inttable_mainpos(table, e->key);
+    if(table_e->key != UPB_EMPTY_ENTRY) {  /* Collision. */
+      if(table_e == upb_inttable_mainpos(table, table_e->key)) {
         /* Existing element is in its main posisiton.  Find an empty slot to
-         * place our new element. */
+         * place our new element and append it to this key's chain. */
+        struct upb_inttable_entry *empty = find_empty_slot(table);
+        while (table_e->next) table_e = table_e->next;
+        table_e->next = empty;
+        table_e = empty;
       } else {
         /* Existing element is not in its main position.  Move it to an empty
          * slot and put our element in its main position. */
+        struct upb_inttable_entry *empty, *colliding_key_mainpos;
+        empty = find_empty_slot(table);
+        colliding_key_mainpos = upb_inttable_mainpos(table, table_e->key);
+        assert(colliding_key_mainpos->key != UPB_EMPTY_ENTRY);
+        assert(colliding_key_mainpos->next);
+        memcpy(empty, table_e, entry_size);  /* next is copied also. */
+        while(1) {
+          assert(colliding_key_mainpos->next);
+          if(colliding_key_mainpos->next == table_e) {
+            colliding_key_mainpos->next = empty;
+            break;
+          }
+        }
+        /* table_e remains set to our mainpos. */
       }
     }
+    memcpy(table_e, e, entry_size);
+    table_e->next = NULL;
   }
 }
 
diff --git a/upb_table.h b/upb_table.h
index 684dbff9fd..41cc2f7e78 100644
--- a/upb_table.h
+++ b/upb_table.h
@@ -13,17 +13,19 @@
 extern "C" {
 #endif
 
+/* Inttables are keyed by 32-bit integer. */
 typedef int32_t upb_inttable_key_t;
 
-#define UPB_END_OF_CHAIN (upb_inttable_key_t)-1
+#define UPB_EMPTY_ENTRY (upb_inttable_key_t)-1
 
 struct upb_inttable_entry {
   upb_inttable_key_t key;
-  int32_t next;
+  struct upb_inttable_entry *next;  /* Internal chaining. */
 };
 
 struct upb_inttable {
   uint32_t size;  /* Is a power of two. */
+  uint32_t entry_size;  /* How big is each entry? */
   void *entries;
 };
 
@@ -37,34 +39,36 @@ struct upb_inttable {
 void upb_inttable_init(struct upb_inttable *table, void *entries,
                        int num_entries, int entry_size);
 
-inline int32_t upb_inttable_hash(struct upb_inttable * table,
-                                 upb_inttable_key_t key) {
-  return key & (table->size-1);
-}
-
 /* Frees any data that was allocated by upb_inttable_init. */
 void upb_inttable_free(struct upb_inttable *table);
 
 inline struct upb_inttable_entry *upb_inttable_entry_get(
     void *entries, int32_t pos, int entry_size) {
-  return (struct upb_inttable_entry*)((char*)entries) + pos*entry_size;
+  return (struct upb_inttable_entry*)(((char*)entries) + pos*entry_size);
 }
 
-inline struct upb_inttable_entry *upb_inttable_get(
-    struct upb_inttable *table, int32_t pos, int entry_size) {
+inline struct upb_inttable_entry *upb_inttable_mainpos2(
+    struct upb_inttable *table, upb_inttable_key_t key, int32_t entry_size) {
+  /* Identity hash for integers. */
+  int32_t pos = key & (table->size-1);
   return upb_inttable_entry_get(table->entries, pos, entry_size);
 }
 
+inline struct upb_inttable_entry *upb_inttable_mainpos(
+    struct upb_inttable *table, upb_inttable_key_t key) {
+  return upb_inttable_mainpos2(table, key, table->entry_size);
+}
+
 /* Lookups up key in this table.  Inlined because this is in the critical path
  * of parsing. */
-inline void *upb_inttable_lookup(struct upb_inttable *table, int32_t key,
-                                 int entry_size) {
-  int32_t pos = upb_inttable_hash(table, key);
+inline void *upb_inttable_lookup(struct upb_inttable *table,
+                                 int32_t key,
+                                 int32_t entry_size) {
+  struct upb_inttable_entry *e = upb_inttable_mainpos2(table, key, entry_size);
   do {
-    struct upb_inttable_entry *e = upb_inttable_get(table, pos, entry_size);
     if (key == e->key) return e;
-    pos = e->next;
-  } while (pos != UPB_END_OF_CHAIN);
+    e = e->next;
+  } while (e);
   return NULL;  /* Not found. */
 }