@ -1,6 +1,8 @@
/*
/*
* upb - a minimalist implementation of protocol buffers .
* upb - a minimalist implementation of protocol buffers .
*
*
* There are a few printf ' s strewn throughout this file , uncommenting them
* can be useful for debugging .
* Copyright ( c ) 2009 Joshua Haberman . See LICENSE for details .
* Copyright ( c ) 2009 Joshua Haberman . See LICENSE for details .
*/
*/
@ -13,140 +15,294 @@
static const double MAX_LOAD = 0.85 ;
static const double MAX_LOAD = 0.85 ;
// The minimum percentage of an array part that we will allow. This is a
// speed/memory-usage tradeoff (though it's not straightforward because of
// cache effects). The lower this is, the more memory we'll use.
static const double MIN_DENSITY = 0.1 ;
static uint32_t MurmurHash2 ( const void * key , size_t len , uint32_t seed ) ;
static uint32_t MurmurHash2 ( const void * key , size_t len , uint32_t seed ) ;
/* We use 1-based indexes into the table so that 0 can be "NULL". */
/* Base table (shared code) ***************************************************/
static upb_inttable_entry * intent ( upb_inttable * t , int32_t i ) {
return UPB_INDEX ( t - > t . entries , i - 1 , t - > t . entry_size ) ;
}
static upb_strtable_entry * strent ( upb_strtable * t , int32_t i ) {
return UPB_INDEX ( t - > t . entries , i - 1 , t - > t . entry_size ) ;
}
void upb_table_init ( upb_table * t , uint32_t size , uint16_t entry_size )
static uint32_t upb_table_size ( upb_table * t ) { return 1 < < t - > size_lg2 ; }
{
static size_t upb_table_entrysize ( upb_table * t ) { return t - > entry_size ; }
static size_t upb_table_valuesize ( upb_table * t ) { return t - > value_size ; }
void upb_table_init ( upb_table * t , uint32_t size , uint16_t entry_size ) {
t - > count = 0 ;
t - > count = 0 ;
t - > entry_size = entry_size ;
t - > entry_size = entry_size ;
t - > size_lg2 = 0 ;
t - > size_lg2 = 1 ;
while ( size > > = 1 ) t - > size_lg2 + + ;
while ( upb_table_size ( t ) < size ) t - > size_lg2 + + ;
size_t bytes = upb_table_size ( t ) * t - > entry_size ;
size_t bytes = upb_table_size ( t ) * t - > entry_size ;
t - > mask = upb_table_size ( t ) - 1 ;
t - > mask = upb_table_size ( t ) - 1 ;
t - > entries = malloc ( bytes ) ;
t - > entries = malloc ( bytes ) ;
memset ( t - > entries , 0 , bytes ) ; /* Both tables consider 0's an empty entry. */
}
}
void upb_inttable_init ( upb_inttable * t , uint32_t size , uint16_t entsize )
void upb_table_free ( upb_table * t ) { free ( t - > entries ) ; }
{
upb_table_init ( & t - > t , size , entsize ) ;
/* upb_inttable ***************************************************************/
static upb_inttable_entry * intent ( upb_inttable * t , int32_t i ) {
//printf("looking up int entry %d, size of entry: %d\n", i, t->t.entry_size);
return UPB_INDEX ( t - > t . entries , i , t - > t . entry_size ) ;
}
}
void upb_strtable_init ( upb_strtable * t , uint32_t size , uint16_t entsize )
static uint32_t upb_inttable_hashtablesize ( upb_inttable * t ) {
{
return upb_table_size ( & t - > t ) ;
upb_table_init ( & t - > t , size , entsize ) ;
}
}
void upb_table_free ( upb_table * t ) { free ( t - > entries ) ; }
void upb_inttable_sizedinit ( upb_inttable * t , uint32_t arrsize , uint32_t hashsize ,
void upb_inttable_free ( upb_inttable * t ) { upb_table_free ( & t - > t ) ; }
uint16_t value_size ) {
void upb_strtable_free ( upb_strtable * t ) {
size_t entsize = _upb_inttable_entrysize ( value_size ) ;
// Free refs from the strtable.
upb_table_init ( & t - > t , hashsize , entsize ) ;
upb_strtable_entry * e = upb_strtable_begin ( t ) ;
for ( uint32_t i = 0 ; i < upb_table_size ( & t - > t ) ; i + + ) {
for ( ; e ; e = upb_strtable_next ( t , e ) ) {
upb_inttable_entry * e = intent ( t , i ) ;
upb_string_unref ( e - > key ) ;
e - > hdr . key = 0 ;
e - > hdr . next = UPB_END_OF_CHAIN ;
e - > val . has_entry = 0 ;
}
t - > t . value_size = value_size ;
// Always make the array part at least 1 long, so that we know key 0
// won't be in the hash part (which lets us speed up that code path).
t - > array_size = UPB_MAX ( 1 , arrsize ) ;
t - > array = malloc ( upb_table_valuesize ( & t - > t ) * t - > array_size ) ;
t - > array_count = 0 ;
for ( uint32_t i = 0 ; i < t - > array_size ; i + + ) {
upb_inttable_value * val = UPB_INDEX ( t - > array , i , upb_table_valuesize ( & t - > t ) ) ;
val - > has_entry = false ;
}
}
upb_table_free ( & t - > t ) ;
}
}
static uint32_t strtable_bucket ( upb_strtable * t , upb_string * key )
void upb_inttable_init ( upb_inttable * t , uint32_t hashsize , uint16_t value_size ) {
{
upb_inttable_sizedinit ( t , 0 , hashsize , value_size ) ;
uint32_t hash = MurmurHash2 ( upb_string_getrobuf ( key ) , upb_string_len ( key ) , 0 ) ;
return ( hash & ( upb_strtable_size ( t ) - 1 ) ) + 1 ;
}
}
void * upb_strtable_lookup ( upb_strtable * t , upb_string * key )
void upb_inttable_free ( upb_inttable * t ) {
{
upb_table_free ( & t - > t ) ;
uint32_t bucket = strtable_bucket ( t , key ) ;
free ( t - > array ) ;
upb_strtable_entry * e ;
do {
e = strent ( t , bucket ) ;
if ( e - > key & & upb_streql ( e - > key , key ) ) return e ;
} while ( ( bucket = e - > next ) ! = UPB_END_OF_CHAIN ) ;
return NULL ;
}
}
static uint32_t empty_intbucket ( upb_inttable * table )
static uint32_t empty_intbucket ( upb_inttable * table )
{
{
/* TODO: does it matter that this is biased towards the front of the table? */
// TODO: does it matter that this is biased towards the front of the table?
for ( uint32_t i = 1 ; i < = upb_inttable_size ( table ) ; i + + ) {
for ( uint32_t i = 0 ; i < upb_inttable_hashtablesize ( table ) ; i + + ) {
upb_inttable_entry * e = intent ( table , i ) ;
upb_inttable_entry * e = intent ( table , i ) ;
if ( ! e - > has_entry ) return i ;
if ( ! e - > val . has_entry ) return i ;
}
}
assert ( false ) ;
assert ( false ) ;
return 0 ;
return 0 ;
}
}
/* The insert routines have a lot more code duplication between int/string
// The insert routines have a lot more code duplication between int/string
* variants than I would like , but there ' s just a bit too much that varies to
// variants than I would like, but there's just a bit too much that varies to
* parameterize them . */
// parameterize them.
static void intinsert ( upb_inttable * t , upb_inttable_entry * e )
static void intinsert ( upb_inttable * t , upb_inttable_key_t key , void * val ) {
{
assert ( upb_inttable_lookup ( t , key ) = = NULL ) ;
assert ( upb_inttable_lookup ( t , e - > key ) = = NULL ) ;
upb_inttable_value * table_val ;
t - > t . count + + ;
if ( _upb_inttable_isarrkey ( t , key ) ) {
uint32_t bucket = upb_inttable_bucket ( t , e - > key ) ;
table_val = UPB_INDEX ( t - > array , key , upb_table_valuesize ( & t - > t ) ) ;
upb_inttable_entry * table_e = intent ( t , bucket ) ;
//printf("Inserting key %d to Array part! %p\n", key, table_val);
if ( table_e - > has_entry ) { /* Collision. */
} else {
if ( bucket = = upb_inttable_bucket ( t , table_e - > key ) ) {
t - > t . count + + ;
/* Existing element is in its main posisiton. Find an empty slot to
uint32_t bucket = _upb_inttable_bucket ( t , key ) ;
* place our new element and append it to this key ' s chain . */
upb_inttable_entry * table_e = intent ( t , bucket ) ;
uint32_t empty_bucket = empty_intbucket ( t ) ;
//printf("Hash part! Inserting into bucket %d?\n", bucket);
while ( table_e - > next ! = UPB_END_OF_CHAIN )
if ( table_e - > val . has_entry ) { /* Collision. */
table_e = intent ( t , table_e - > next ) ;
//printf("Collision!\n");
table_e - > next = empty_bucket ;
if ( bucket = = _upb_inttable_bucket ( t , table_e - > hdr . key ) ) {
table_e = intent ( t , empty_bucket ) ;
/* Existing element is in its main posisiton. Find an empty slot to
} else {
* place our new element and append it to this key ' s chain . */
/* Existing element is not in its main position. Move it to an empty
uint32_t empty_bucket = empty_intbucket ( t ) ;
* slot and put our element in its main position . */
while ( table_e - > hdr . next ! = UPB_END_OF_CHAIN )
uint32_t empty_bucket = empty_intbucket ( t ) ;
table_e = intent ( t , table_e - > hdr . next ) ;
uint32_t evictee_bucket = upb_inttable_bucket ( t , table_e - > key ) ;
table_e - > hdr . next = empty_bucket ;
memcpy ( intent ( t , empty_bucket ) , table_e , t - > t . entry_size ) ; /* copies next */
table_e = intent ( t , empty_bucket ) ;
upb_inttable_entry * evictee_e = intent ( t , evictee_bucket ) ;
} else {
while ( 1 ) {
/* Existing element is not in its main position. Move it to an empty
assert ( evictee_e - > has_entry ) ;
* slot and put our element in its main position . */
assert ( evictee_e - > next ! = UPB_END_OF_CHAIN ) ;
uint32_t empty_bucket = empty_intbucket ( t ) ;
if ( evictee_e - > next = = bucket ) {
uint32_t evictee_bucket = _upb_inttable_bucket ( t , table_e - > hdr . key ) ;
evictee_e - > next = empty_bucket ;
memcpy ( intent ( t , empty_bucket ) , table_e , t - > t . entry_size ) ; /* copies next */
break ;
upb_inttable_entry * evictee_e = intent ( t , evictee_bucket ) ;
while ( 1 ) {
assert ( evictee_e - > val . has_entry ) ;
assert ( evictee_e - > hdr . next ! = UPB_END_OF_CHAIN ) ;
if ( evictee_e - > hdr . next = = bucket ) {
evictee_e - > hdr . next = empty_bucket ;
break ;
}
evictee_e = intent ( t , evictee_e - > hdr . next ) ;
}
}
evictee_e = intent ( t , evictee_e - > next ) ;
/* table_e remains set to our mainpos. */
}
}
/* table_e remains set to our mainpos. */
}
}
//printf("Inserting! to:%p, copying to: %p\n", table_e, &table_e->val);
table_val = & table_e - > val ;
table_e - > hdr . key = key ;
table_e - > hdr . next = UPB_END_OF_CHAIN ;
}
}
memcpy ( table_e , e , t - > t . entry_size ) ;
memcpy ( table_val , val , upb_table_valuesize ( & t - > t ) ) ;
table_e - > next = UPB_END_OF_CHAIN ;
table_val - > has_entry = true ;
table_e - > has_entry = true ;
assert ( upb_inttable_lookup ( t , key ) = = table_val ) ;
assert ( upb_inttable_lookup ( t , e - > key ) = = table_e ) ;
}
}
void upb_inttable_insert ( upb_inttable * t , upb_inttable_entry * e )
// Insert all elements from src into dest. Caller ensures that a resize will
{
// not be necessary.
if ( ( double ) ( t - > t . count + 1 ) / upb_inttable_size ( t ) > MAX_LOAD ) {
static void upb_inttable_insertall ( upb_inttable * dst , upb_inttable * src ) {
/* Need to resize. New table of double the size, add old elements to it. */
for ( upb_inttable_iter i = upb_inttable_begin ( src ) ; ! upb_inttable_done ( i ) ;
i = upb_inttable_next ( src , i ) ) {
//printf("load check: %d %d\n", upb_inttable_count(dst), upb_inttable_hashtablesize(dst));
assert ( ( double ) ( upb_inttable_count ( dst ) ) /
upb_inttable_hashtablesize ( dst ) < = MAX_LOAD ) ;
intinsert ( dst , upb_inttable_iter_key ( i ) , upb_inttable_iter_value ( i ) ) ;
}
}
void upb_inttable_insert ( upb_inttable * t , upb_inttable_key_t key , void * val ) {
if ( ( double ) ( t - > t . count + 1 ) / upb_inttable_hashtablesize ( t ) > MAX_LOAD ) {
//printf("RESIZE!\n");
// Need to resize. Allocate new table with double the size of however many
// elements we have now, add old elements to it. We create the new hash
// table without an array part, even if the old table had an array part.
// If/when the user calls upb_inttable_compact() again, we'll create an
// array part then.
upb_inttable new_table ;
upb_inttable new_table ;
upb_inttable_init ( & new_table , upb_inttable_size ( t ) * 2 , t - > t . entry_size ) ;
//printf("Old table count=%d, size=%d\n", upb_inttable_count(t), upb_inttable_hashtablesize(t));
new_table . t . count = t - > t . count ;
upb_inttable_init ( & new_table , upb_inttable_count ( t ) * 2 , upb_table_valuesize ( & t - > t ) ) ;
upb_inttable_entry * old_e ;
upb_inttable_insertall ( & new_table , t ) ;
for ( old_e = upb_inttable_begin ( t ) ; old_e ; old_e = upb_inttable_next ( t , old_e ) )
intinsert ( & new_table , old_e ) ;
upb_inttable_free ( t ) ;
upb_inttable_free ( t ) ;
* t = new_table ;
* t = new_table ;
}
}
intinsert ( t , e ) ;
intinsert ( t , key , val ) ;
}
void upb_inttable_compact ( upb_inttable * t ) {
// Find the largest array part we can that satisfies the MIN_DENSITY
// definition. For now we just count down powers of two.
upb_inttable_key_t largest_key = 0 ;
for ( upb_inttable_iter i = upb_inttable_begin ( t ) ; ! upb_inttable_done ( i ) ;
i = upb_inttable_next ( t , i ) ) {
largest_key = UPB_MAX ( largest_key , upb_inttable_iter_key ( i ) ) ;
}
int lg2_array = 0 ;
while ( ( 1UL < < lg2_array ) < largest_key ) + + lg2_array ;
+ + lg2_array ; // Undo the first iteration.
size_t array_size ;
int array_count ;
while ( lg2_array > 0 ) {
array_size = ( 1 < < - - lg2_array ) ;
//printf("Considering size %d (btw, our table has %d things total)\n", array_size, upb_inttable_count(t));
if ( ( double ) upb_inttable_count ( t ) / array_size < MIN_DENSITY ) {
// Even if 100% of the keys were in the array pary, an array of this
// size would not be dense enough.
continue ;
}
array_count = 0 ;
for ( upb_inttable_iter i = upb_inttable_begin ( t ) ; ! upb_inttable_done ( i ) ;
i = upb_inttable_next ( t , i ) ) {
if ( upb_inttable_iter_key ( i ) < array_size )
array_count + + ;
}
//printf("There would be %d things in that array\n", array_count);
if ( ( double ) array_count / array_size > = MIN_DENSITY ) break ;
}
upb_inttable new_table ;
int hash_size = ( upb_inttable_count ( t ) - array_count + 1 ) / MAX_LOAD ;
upb_inttable_sizedinit ( & new_table , array_size , hash_size ,
upb_table_valuesize ( & t - > t ) ) ;
//printf("For %d things, using array size=%d, hash_size = %d\n", upb_inttable_count(t), array_size, hash_size);
upb_inttable_insertall ( & new_table , t ) ;
upb_inttable_free ( t ) ;
* t = new_table ;
}
upb_inttable_iter upb_inttable_begin ( upb_inttable * t ) {
upb_inttable_iter iter = { - 1 , NULL , true } ; // -1 will overflow to 0 on the first iteration.
return upb_inttable_next ( t , iter ) ;
}
upb_inttable_iter upb_inttable_next ( upb_inttable * t , upb_inttable_iter iter ) {
const size_t hdrsize = sizeof ( upb_inttable_header ) ;
const size_t entsize = upb_table_entrysize ( & t - > t ) ;
if ( iter . array_part ) {
while ( + + iter . key < t - > array_size ) {
//printf("considering value %d\n", iter.key);
iter . value = UPB_INDEX ( t - > array , iter . key , t - > t . value_size ) ;
if ( iter . value - > has_entry ) return iter ;
}
//printf("Done with array part!\n");
iter . array_part = false ;
// Point to the value of the table[-1] entry.
iter . value = UPB_INDEX ( intent ( t , - 1 ) , 1 , hdrsize ) ;
}
void * end = intent ( t , upb_inttable_hashtablesize ( t ) ) ;
// Point to the entry for the value that was previously in iter.
upb_inttable_entry * e = UPB_INDEX ( iter . value , - 1 , hdrsize ) ;
do {
e = UPB_INDEX ( e , 1 , entsize ) ;
//printf("considering value %p (val: %p)\n", e, &e->val);
if ( e = = end ) {
//printf("No values.\n");
iter . value = NULL ;
return iter ;
}
} while ( ! e - > val . has_entry ) ;
//printf("USING VALUE! %p\n", e);
iter . key = e - > hdr . key ;
iter . value = & e - > val ;
return iter ;
}
/* upb_strtable ***************************************************************/
static upb_strtable_entry * strent ( upb_strtable * t , int32_t i ) {
return UPB_INDEX ( t - > t . entries , i , t - > t . entry_size ) ;
}
static uint32_t upb_strtable_size ( upb_strtable * t ) {
return upb_table_size ( & t - > t ) ;
}
void upb_strtable_init ( upb_strtable * t , uint32_t size , uint16_t entsize ) {
upb_table_init ( & t - > t , size , entsize ) ;
for ( uint32_t i = 0 ; i < upb_table_size ( & t - > t ) ; i + + ) {
upb_strtable_entry * e = strent ( t , i ) ;
e - > key = NULL ;
e - > next = UPB_END_OF_CHAIN ;
}
}
void upb_strtable_free ( upb_strtable * t ) {
// Free refs from the strtable.
upb_strtable_entry * e = upb_strtable_begin ( t ) ;
for ( ; e ; e = upb_strtable_next ( t , e ) ) {
upb_string_unref ( e - > key ) ;
}
upb_table_free ( & t - > t ) ;
}
static uint32_t strtable_bucket ( upb_strtable * t , upb_string * key )
{
uint32_t hash = MurmurHash2 ( upb_string_getrobuf ( key ) , upb_string_len ( key ) , 0 ) ;
return ( hash & t - > t . mask ) ;
}
void * upb_strtable_lookup ( upb_strtable * t , upb_string * key )
{
uint32_t bucket = strtable_bucket ( t , key ) ;
upb_strtable_entry * e ;
do {
e = strent ( t , bucket ) ;
if ( e - > key & & upb_streql ( e - > key , key ) ) return e ;
} while ( ( bucket = e - > next ) ! = UPB_END_OF_CHAIN ) ;
return NULL ;
}
}
static uint32_t empty_strbucket ( upb_strtable * table )
static uint32_t empty_strbucket ( upb_strtable * table )
{
{
/* TODO: does it matter that this is biased towards the front of the table? */
// TODO: does it matter that this is biased towards the front of the table?
for ( uint32_t i = 1 ; i < = upb_strtable_size ( table ) ; i + + ) {
for ( uint32_t i = 0 ; i < upb_strtable_size ( table ) ; i + + ) {
upb_strtable_entry * e = strent ( table , i ) ;
upb_strtable_entry * e = strent ( table , i ) ;
if ( ! e - > key ) return i ;
if ( ! e - > key ) return i ;
}
}
@ -191,13 +347,16 @@ static void strinsert(upb_strtable *t, upb_strtable_entry *e)
}
}
memcpy ( table_e , e , t - > t . entry_size ) ;
memcpy ( table_e , e , t - > t . entry_size ) ;
table_e - > next = UPB_END_OF_CHAIN ;
table_e - > next = UPB_END_OF_CHAIN ;
//printf("Looking up, string=" UPB_STRFMT "...\n", UPB_STRARG(e->key));
assert ( upb_strtable_lookup ( t , e - > key ) = = table_e ) ;
assert ( upb_strtable_lookup ( t , e - > key ) = = table_e ) ;
//printf("Yay!\n");
}
}
void upb_strtable_insert ( upb_strtable * t , upb_strtable_entry * e )
void upb_strtable_insert ( upb_strtable * t , upb_strtable_entry * e )
{
{
if ( ( double ) ( t - > t . count + 1 ) / upb_strtable_size ( t ) > MAX_LOAD ) {
if ( ( double ) ( t - > t . count + 1 ) / upb_strtable_size ( t ) > MAX_LOAD ) {
/* Need to resize. New table of double the size, add old elements to it. */
// Need to resize. New table of double the size, add old elements to it.
//printf("RESIZE!!\n");
upb_strtable new_table ;
upb_strtable new_table ;
upb_strtable_init ( & new_table , upb_strtable_size ( t ) * 2 , t - > t . entry_size ) ;
upb_strtable_init ( & new_table , upb_strtable_size ( t ) * 2 , t - > t . entry_size ) ;
upb_strtable_entry * old_e ;
upb_strtable_entry * old_e ;
@ -209,25 +368,12 @@ void upb_strtable_insert(upb_strtable *t, upb_strtable_entry *e)
strinsert ( t , e ) ;
strinsert ( t , e ) ;
}
}
void * upb_inttable_begin ( upb_inttable * t ) {
return upb_inttable_next ( t , intent ( t , 0 ) ) ;
}
void * upb_inttable_next ( upb_inttable * t , upb_inttable_entry * cur ) {
upb_inttable_entry * end = intent ( t , upb_inttable_size ( t ) + 1 ) ;
do {
cur = ( void * ) ( ( char * ) cur + t - > t . entry_size ) ;
if ( cur = = end ) return NULL ;
} while ( ! cur - > has_entry ) ;
return cur ;
}
void * upb_strtable_begin ( upb_strtable * t ) {
void * upb_strtable_begin ( upb_strtable * t ) {
return upb_strtable_next ( t , strent ( t , 0 ) ) ;
return upb_strtable_next ( t , strent ( t , - 1 ) ) ;
}
}
void * upb_strtable_next ( upb_strtable * t , upb_strtable_entry * cur ) {
void * upb_strtable_next ( upb_strtable * t , upb_strtable_entry * cur ) {
upb_strtable_entry * end = strent ( t , upb_strtable_size ( t ) + 1 ) ;
upb_strtable_entry * end = strent ( t , upb_strtable_size ( t ) ) ;
do {
do {
cur = ( void * ) ( ( char * ) cur + t - > t . entry_size ) ;
cur = ( void * ) ( ( char * ) cur + t - > t . entry_size ) ;
if ( cur = = end ) return NULL ;
if ( cur = = end ) return NULL ;