Tests for string and fleshed out implementation.

pull/13171/head
Joshua Haberman 15 years ago
parent 28ec9a1fa0
commit e29bf964d1
  1. 15
      Makefile
  2. 63
      core/upb_string.c
  3. 40
      core/upb_string.h
  4. 56
      tests/test_string.c
  5. 13
      tests/test_table.cc

@ -86,22 +86,25 @@ tests/test.proto.pb: tests/test.proto
# TODO: replace with upbc
protoc tests/test.proto -otests/test.proto.pb
TESTS=tests/tests \
TESTS=tests/test_string \
tests/test_table
tests: $(TESTS)
OTHER_TESTS=tests/tests \
tests/test_table \
tests/t.test_vs_proto2.googlemessage1 \
tests/t.test_vs_proto2.googlemessage2 \
tests/test.proto.pb
$(TESTS): core/libupb.a
#VALGRIND=valgrind --leak-check=full --error-exitcode=1
VALGRIND=
VALGRIND=valgrind --leak-check=full --error-exitcode=1
#VALGRIND=
test: tests
@echo Running all tests under valgrind.
$(VALGRIND) ./tests/tests
# Needs to be rewritten to separate the benchmark.
# valgrind --error-exitcode=1 ./tests/test_table
@for test in tests/t.* ; do \
if [ -f ./$$test ] ; then \
@for test in tests/*; do \
if [ -x ./$$test ] ; then \
echo $(VALGRIND) ./$$test: \\c; \
$(VALGRIND) ./$$test; \
fi \

@ -7,8 +7,11 @@
#include "upb_string.h"
#include <stdlib.h>
#define UPB_STRING_UNFINALIZED -1
#ifdef __GLIBC__
#include <malloc.h>
#elif defined(__APPLE__)
#include <malloc/malloc.h>
#endif
static uint32_t upb_round_up_pow2(uint32_t v) {
// http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
@ -25,23 +28,67 @@ static uint32_t upb_round_up_pow2(uint32_t v) {
upb_string *upb_string_new() {
upb_string *str = malloc(sizeof(*str));
str->ptr = NULL;
str->cached_mem = NULL;
#ifndef UPB_HAVE_MSIZE
str->size = 0;
str->len = UPB_STRING_UNFINALIZED;
#endif
str->src = NULL;
upb_atomic_refcount_init(&str->refcount, 1);
return str;
}
uint32_t upb_string_size(upb_string *str) {
#ifdef __GLIBC__
return malloc_usable_size(str->cached_mem);
#elif defined(__APPLE__)
return malloc_size(str->cached_mem);
#else
return str->size;
#endif
}
static void upb_string_release(upb_string *str) {
if(str->src) {
upb_string_unref(str->src);
str->src = NULL;
}
}
void _upb_string_free(upb_string *str) {
if(str->ptr) free(str->ptr);
if(str->cached_mem) free(str->cached_mem);
upb_string_release(str);
free(str);
}
upb_string *upb_string_tryrecycle(upb_string *str) {
if(str == NULL || upb_atomic_read(&str->refcount) > 1) {
return upb_string_new();
} else {
str->ptr = NULL;
upb_string_release(str);
return str;
}
}
char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len) {
assert(str->len == UPB_STRING_UNFINALIZED);
if (str->size < len) {
str->size = upb_round_up_pow2(len);
str->ptr = realloc(str->ptr, str->size);
assert(str->ptr == NULL);
uint32_t size = upb_string_size(str);
if (size < len) {
size = upb_round_up_pow2(len);
str->cached_mem = realloc(str->cached_mem, size);
#ifndef UPB_HAVE_MSIZE
str->size = size;
#endif
}
str->len = len;
str->ptr = str->cached_mem;
return str->ptr;
}
void upb_string_substr(upb_string *str, upb_string *target_str,
upb_strlen_t start, upb_strlen_t len) {
assert(str->ptr == NULL);
str->src = upb_string_getref(target_str);
str->ptr = upb_string_getrobuf(target_str) + start;
str->len = len;
}

@ -16,8 +16,6 @@
* without having to reallocate the upb_string.
* - strings can be substrings of other strings (owning a ref on the source
* string).
* - strings can refer to memory that they do not own, in which case we avoid
* copies if possible (the exact strategy for doing this can vary).
* - strings are not thread-safe by default, but can be made so by calling a
* function. This is not the default because it causes extra CPU overhead.
*/
@ -37,16 +35,31 @@ extern "C" {
// All members of this struct are private, and may only be read/written through
// the associated functions. Also, strings may *only* be allocated on the heap.
struct _upb_string {
// The pointer to our currently active data. This may be memory we own
// or a pointer into memory we don't own.
char *ptr;
// If non-NULL, this is a block of memory we own. We keep this cached even
// if "ptr" is currently aliasing memory we don't own.
char *cached_mem;
// The effective length of the string (the bytes at ptr).
int32_t len;
#ifndef UPB_HAVE_MSIZE
// How many bytes are allocated in cached_mem.
//
// Many platforms have a function that can tell you the size of a block
// that was previously malloc'd. In this case we can avoid storing the
// size explicitly.
uint32_t size;
#endif
// The string's refcount.
upb_atomic_refcount_t refcount;
union {
// Used if this is a slice of another string.
struct _upb_string *src;
// Used if this string is referencing external unowned memory.
upb_atomic_refcount_t reader_count;
} extra;
// Used if this is a slice of another string, NULL otherwise. We own a ref
// on src.
struct _upb_string *src;
};
// Returns a newly-created, empty, non-finalized string. When the string is no
@ -113,11 +126,14 @@ char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len);
void upb_string_substr(upb_string *str, upb_string *target_str,
upb_strlen_t start, upb_strlen_t len);
// Sketch of an API for allowing upb_strings to reference external, unowned
// data. Waiting for a clear use case before actually implementing it.
//
// Makes the string "str" a reference to the given string data. The caller
// guarantees that the given string data will not change or be deleted until
// a matching call to upb_string_detach().
void upb_string_attach(upb_string *str, char *ptr, upb_strlen_t len);
void upb_string_detach(upb_string *str);
// void upb_string_attach(upb_string *str, char *ptr, upb_strlen_t len);
// void upb_string_detach(upb_string *str);
// Allows using upb_strings in printf, ie:
// upb_strptr str = UPB_STRLIT("Hello, World!\n");
@ -176,7 +192,9 @@ INLINE upb_string *upb_strduplen(const void *src, upb_strlen_t len) {
}
// Like upb_strdup(), but duplicates a C NULL-terminated string.
upb_string *upb_strdupc(const char *src);
INLINE upb_string *upb_strdupc(const char *src) {
return upb_strduplen(src, strlen(src));
}
// Appends 'append' to 's' in-place, resizing s if necessary.
void upb_strcat(upb_string *s, upb_string *append);

@ -0,0 +1,56 @@
#undef NDEBUG /* ensure tests always assert. */
#include "upb_string.h"
char static_str[] = "Static string.";
int main() {
upb_string *str = upb_string_new();
assert(str != NULL);
upb_string_unref(str);
// Can also create a string by tryrecycle(NULL).
str = upb_string_tryrecycle(NULL);
assert(str != NULL);
upb_strcpyc(str, static_str);
assert(upb_string_len(str) == (sizeof(static_str) - 1));
const char *robuf = upb_string_getrobuf(str);
assert(robuf != NULL);
assert(memcmp(robuf, static_str, upb_string_len(str)) == 0);
upb_string_endread(str);
upb_string *str2 = upb_string_tryrecycle(str);
// No other referents, so should return the same string.
assert(str2 == str);
// Write a shorter string, the same memory should be reused.
upb_strcpyc(str, "XX");
const char *robuf2 = upb_string_getrobuf(str);
assert(robuf2 == robuf);
assert(memcmp(robuf2, "XX", 2) == 0);
// Make string alias part of another string.
str2 = upb_strdupc("WXYZ");
upb_string_substr(str, str2, 1, 2);
assert(upb_string_len(str) == 2);
assert(upb_string_len(str2) == 4);
// The two string should be aliasing the same data.
const char *robuf3 = upb_string_getrobuf(str);
const char *robuf4 = upb_string_getrobuf(str2);
assert(robuf3 == robuf4 + 1);
// The aliased string should have an extra ref.
assert(upb_atomic_read(&str2->refcount) == 2);
// Recycling str should eliminate the extra ref.
str = upb_string_tryrecycle(str);
assert(upb_atomic_read(&str2->refcount) == 1);
// Resetting str should reuse its old data.
upb_strcpyc(str, "XX");
const char *robuf5 = upb_string_getrobuf(str);
assert(robuf5 == robuf);
upb_string_unref(str);
upb_string_unref(str2);
}

@ -12,6 +12,8 @@
#include <sys/resource.h>
#include <iostream>
bool benchmark = false;
using std::string;
using std::vector;
@ -116,6 +118,11 @@ void test_inttable(int32_t *keys, size_t num_entries)
}
}
if(!benchmark) {
upb_inttable_free(&table);
return;
}
/* Test performance. We only test lookups for keys that are known to exist. */
uintptr_t x = 0;
const unsigned int iterations = 0xFFFFFF;
@ -219,8 +226,12 @@ int32_t *get_contiguous_keys(int32_t num)
return buf;
}
int main()
int main(int argc, char *argv[])
{
for (int i = 1; i < argc; i++) {
if (strcmp(argv[i], "--benchmark") == 0) benchmark = true;
}
vector<string> keys;
keys.push_back("google.protobuf.FileDescriptorSet");
keys.push_back("google.protobuf.FileDescriptorProto");

Loading…
Cancel
Save