diff --git a/Makefile b/Makefile index a06ff0cdb2..8ddcd095d5 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ CC=gcc CXX=g++ CFLAGS=-std=c99 CPPFLAGS=-Wall -Wextra -pedantic -g -DUPB_UNALIGNED_READS_OK -fomit-frame-pointer -OBJ=upb_parse.o upb_table.o upb_msg.o upb_enum.o upb_context.o descriptor.o +OBJ=upb_parse.o upb_table.o upb_msg.o upb_enum.o upb_context.o upb_string.o descriptor.o all: $(OBJ) test_table tests upbc clean: rm -f *.o test_table tests diff --git a/test_table.cc b/test_table.cc index 947a7a4860..0337eaa399 100644 --- a/test_table.cc +++ b/test_table.cc @@ -247,7 +247,7 @@ int main() keys.push_back("google.protobuf.UninterpretedOption.NamePart"); test_strtable(keys, 18); - return 0; + int32_t *keys1 = get_contiguous_keys(8); printf("Contiguous 1-8 ====\n"); test_inttable(keys1, 8); diff --git a/upb_context.c b/upb_context.c index e92f9e31a1..d83906c7e1 100644 --- a/upb_context.c +++ b/upb_context.c @@ -11,6 +11,7 @@ #include "upb_enum.h" #include "upb_msg.h" +/* Search for a character in a string, in reverse. */ static int memrchr(char *data, char c, size_t len) { int off = len-1; @@ -70,6 +71,8 @@ struct upb_symtab_entry *upb_context_lookup(struct upb_context *c, return upb_strtable_lookup(&c->symtab, symbol); } +/* Given a symbol and the base symbol inside which it is defined, find the + * symbol's definition in t. */ static struct upb_symtab_entry *resolve(struct upb_strtable *t, struct upb_string *base, struct upb_string *symbol) @@ -103,6 +106,7 @@ static struct upb_symtab_entry *resolve(struct upb_strtable *t, } } +/* Tries to resolve a symbol in two different tables. */ union upb_symbol_ref resolve2(struct upb_strtable *t1, struct upb_strtable *t2, struct upb_string *base, struct upb_string *sym, enum upb_symbol_type expected_type) { @@ -121,8 +125,9 @@ struct upb_symtab_entry *upb_context_resolve(struct upb_context *c, return resolve(&c->symtab, base, symbol); } -/* join("Foo.Bar", "Baz") -> "Foo.Bar.Baz" - * join("", "Baz") -> "Baz" +/* Joins strings together, for example: + * join("Foo.Bar", "Baz") -> "Foo.Bar.Baz" + * join("", "Baz") -> "Baz" * Caller owns the returned string and must free it. */ static struct upb_string join(struct upb_string *base, struct upb_string *name) { size_t len = base->byte_len + name->byte_len; @@ -192,7 +197,7 @@ static bool insert_message(struct upb_strtable *t, upb_strtable_insert(t, &e.e); /* Add nested messages and enums. */ - //if(d->set_flags.has.nested_type) + //if(d->set_flags.has.nested_type) (re-enable when protoc sets) if(d->nested_type) for(unsigned int i = 0; i < d->nested_type->len; i++) if(!insert_message(t, d->nested_type->elements[i], &fqname)) @@ -268,7 +273,10 @@ bool upb_context_parsefds(struct upb_context *c, struct upb_string *fds_str) { google_protobuf_FileDescriptorSet *fds = upb_alloc_and_parse(c->fds_msg, fds_str, true); if(!fds) return false; + if(fds->set_flags.has.file) { + /* Insert new symbols into a temporary table until we have verified that + * the descriptor is valid. */ struct upb_strtable tmp; upb_strtable_init(&tmp, 0, sizeof(struct upb_symtab_entry)); for(uint32_t i = 0; i < fds->file->len; i++) { @@ -284,10 +292,12 @@ bool upb_context_parsefds(struct upb_context *c, struct upb_string *fds_str) { upb_strtable_insert(&c->symtab, &e->e); upb_strtable_free(&tmp); } + + /* We own fds now, need to keep a ref so we can free it later. */ if(c->fds_size == c->fds_len) { c->fds_size *= 2; c->fds = realloc(c->fds, c->fds_size); } - c->fds[c->fds_len++] = fds; /* Need to keep a ref since we own it. */ + c->fds[c->fds_len++] = fds; return true; } diff --git a/upb_msg.c b/upb_msg.c index 48804e76eb..c9fa260b82 100644 --- a/upb_msg.c +++ b/upb_msg.c @@ -17,6 +17,7 @@ static int div_round_up(int numerator, int denominator) { return numerator > 0 ? (numerator - 1) / denominator + 1 : 0; } +/* Callback for sorting fields. */ static int compare_fields(const void *e1, const void *e2) { const google_protobuf_FieldDescriptorProto *fd1 = *(void**)e1; const google_protobuf_FieldDescriptorProto *fd2 = *(void**)e2; @@ -243,6 +244,7 @@ void upb_msg_reuse_submsg(void **msg, struct upb_msg *m) /* Serialization/Deserialization. ********************************************/ +/* We use this as our "user_data" for each frame of the parsing stack. */ struct parse_frame_data { struct upb_msg *m; void *data; @@ -267,6 +269,8 @@ static upb_field_type_t tag_cb(struct upb_parse_state *s, struct upb_tag *tag, return f->type; } +/* Returns a pointer to where the next value for field "f" should be stored, + * taking into account whether f is an array that may need to be reallocatd. */ static union upb_value_ptr get_value_ptr(void *data, struct upb_msg_field *f) { union upb_value_ptr p = upb_msg_getptr(data, f); @@ -315,6 +319,7 @@ static void submsg_start_cb(struct upb_parse_state *_s, void *user_field_desc) struct upb_msg_parse_state *s = (void*)_s; struct upb_msg_field *f = user_field_desc; struct parse_frame_data *frame = (void*)&s->s.top->user_data; + // TODO: find a non-hacky way to get a pointer to the old frame. struct parse_frame_data *oldframe = (void*)((char*)s->s.top - s->s.udata_size); union upb_value_ptr p = get_value_ptr(oldframe->data, f); upb_msg_reuse_submsg(p.message, f->ref.msg); @@ -363,74 +368,3 @@ void *upb_alloc_and_parse(struct upb_msg *m, struct upb_string *str, bool byref) } } -static void dump_value(union upb_value_ptr p, upb_field_type_t type, FILE *stream) -{ - switch(type) { - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_DOUBLE: - fprintf(stream, "%f", *p._double); break; - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FLOAT: - fprintf(stream, "%f", *p._float); break; - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED64: - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT64: - fprintf(stream, "%" PRIu64, *p.uint64); break; - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED32: - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT32: - fprintf(stream, "%" PRIu32, *p.uint32); break; - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BOOL: - if(*p._bool) fputs("true", stream); - else fputs("false", stream); - break; - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP: - break; /* can't actually be stored */ - - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE: - fprintf(stream, "%p", (void*)*p.message); break; - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING: - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES: - if(*p.string) - fprintf(stream, "\"" UPB_STRFMT "\"", UPB_STRARG(**p.string)); - else - fputs("(null)", stream); - break; - - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ENUM: - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT32: - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED32: - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT32: - fprintf(stream, "%" PRId32, *p.int32); break; - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED64: - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT64: - case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT64: - fprintf(stream, "%" PRId64, *p.int64); break; - } -} - -void upb_msg_print(void *data, struct upb_msg *m, FILE *stream) -{ - fprintf(stream, "Message <" UPB_STRFMT ">\n", UPB_STRARG(*m->descriptor->name)); - for(uint32_t i = 0; i < m->num_fields; i++) { - struct upb_msg_field *f = &m->fields[i]; - google_protobuf_FieldDescriptorProto *fd = m->field_descriptors[i]; - fprintf(stream, " " UPB_STRFMT, UPB_STRARG(*fd->name)); - - if(upb_msg_is_set(data, f)) fputs(" (set): ", stream); - else fputs(" (NOT set): ", stream); - - union upb_value_ptr p = upb_msg_getptr(data, f); - if(f->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REPEATED) { - if(*p.array) { - fputc('[', stream); - for(uint32_t j = 0; j < (*p.array)->len; j++) { - dump_value(upb_array_getelementptr(*p.array, j, f->type), f->type, stream); - if(j != (*p.array)->len - 1) fputs(", ", stream); - } - fputs("]\n", stream); - } else { - fputs("\n", stream); - } - } else { - dump_value(p, f->type, stream); - fputc('\n', stream); - } - } -} diff --git a/upb_string.c b/upb_string.c new file mode 100644 index 0000000000..733bafe361 --- /dev/null +++ b/upb_string.c @@ -0,0 +1,24 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + */ + +#include "upb_string.h" + +bool upb_strreadfile(const char *filename, struct upb_string *data) { + FILE *f = fopen(filename, "rb"); + if(!f) return false; + if(fseek(f, 0, SEEK_END) != 0) return false; + long size = ftell(f); + if(size < 0) return false; + if(fseek(f, 0, SEEK_SET) != 0) return false; + data->ptr = (char*)malloc(size); + data->byte_len = size; + if(fread(data->ptr, size, 1, f) != 1) { + free(data->ptr); + return false; + } + fclose(f); + return true; +} diff --git a/upb_string.h b/upb_string.h index 2520caa714..44bc653bf9 100644 --- a/upb_string.h +++ b/upb_string.h @@ -1,6 +1,8 @@ /* * upb - a minimalist implementation of protocol buffers. * + * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. + * Defines a delimited (as opposed to null-terminated) string type and some * library functions for manipulating them. * @@ -16,8 +18,6 @@ * string data. With NULL-termination I would be forced to write a NULL * into the middle of the protobuf's data, which is less than ideal and in * some cases not practical or possible. - * - * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. */ #ifndef UPB_STRING_H_ @@ -29,6 +29,7 @@ extern "C" { #include #include +#include "upb.h" struct upb_string { /* We expect the data to be 8-bit clean (uint8_t), but char* is such an @@ -59,24 +60,17 @@ INLINE void upb_strfree(struct upb_string s) { free(s.ptr); } -INLINE bool upb_strreadfile(const char *filename, struct upb_string *data) { - FILE *f = fopen(filename, "rb"); - if(!f) return false; - if(fseek(f, 0, SEEK_END) != 0) return false; - long size = ftell(f); - if(size < 0) return false; - if(fseek(f, 0, SEEK_SET) != 0) return false; - data->ptr = (char*)malloc(size); - data->byte_len = size; - if(fread(data->ptr, size, 1, f) != 1) { - free(data->ptr); - return false; - } - fclose(f); - return true; -} +/* Reads an entire file into a newly-allocated string. */ +bool upb_strreadfile(const char *filename, struct upb_string *data); +/* Allows defining upb_strings as literals, ie: + * struct upb_string str = UPB_STRLIT("Hello, World!\n"); + */ #define UPB_STRLIT(strlit) {.ptr=strlit, .byte_len=sizeof(strlit)-1} + +/* Allows using upb_strings in printf, ie: + * struct upb_string str = UPB_STRLIT("Hello, World!\n"); + * printf("String is: " UPB_STRFMT, UPB_STRARG(str)); */ #define UPB_STRARG(str) (str).byte_len, (str).ptr #define UPB_STRFMT "%.*s" diff --git a/upb_table.c b/upb_table.c index d53ab1c33e..bad0b23a46 100644 --- a/upb_table.c +++ b/upb_table.c @@ -77,8 +77,12 @@ static uint32_t empty_intbucket(struct upb_inttable *table) return 0; } +/* The insert routines have a lot more code duplication between int/string + * variants than I would like, but there's just a bit too much that varies to + * parameterize them. */ static void intinsert(struct upb_inttable *t, struct upb_inttable_entry *e) { + assert(upb_inttable_lookup(t, e->key, t->t.entry_size) == NULL); uint32_t bucket = upb_inttable_bucket(t, e->key); struct upb_inttable_entry *table_e = intent(t, bucket); if(table_e->key != EMPTYENT) { /* Collision. */ @@ -110,6 +114,7 @@ static void intinsert(struct upb_inttable *t, struct upb_inttable_entry *e) } memcpy(table_e, e, t->t.entry_size); table_e->next = UPB_END_OF_CHAIN; + assert(upb_inttable_lookup(t, e->key, t->t.entry_size) == table_e); } void upb_inttable_insert(struct upb_inttable *t, struct upb_inttable_entry *e) @@ -141,6 +146,7 @@ static uint32_t empty_strbucket(struct upb_strtable *table) static void strinsert(struct upb_strtable *t, struct upb_strtable_entry *e) { + assert(upb_strtable_lookup(t, &e->key) == NULL); uint32_t bucket = strtable_bucket(t, &e->key); struct upb_strtable_entry *table_e = strent(t, bucket); if(table_e->key.byte_len != 0) { /* Collision. */ diff --git a/upb_table.h b/upb_table.h index a5da0f2c6a..8eec03c6a6 100644 --- a/upb_table.h +++ b/upb_table.h @@ -23,6 +23,7 @@ extern "C" { #endif +/* Note: the key cannot be zero! Zero is used by the implementation. */ typedef uint32_t upb_inttable_key_t; #define UPB_END_OF_CHAIN (uint32_t)0