diff --git a/descriptor/descriptor.h b/descriptor/descriptor.h index ba86c81109..313cc89b2a 100644 --- a/descriptor/descriptor.h +++ b/descriptor/descriptor.h @@ -99,7 +99,7 @@ struct google_protobuf_UninterpretedOption_NamePart { bool is_extension:1; /* = 2, required. */ } has; } set_flags; - struct upb_string* name_part; + upb_string* name_part; bool is_extension; }; UPB_DEFINE_MSG_ARRAY(google_protobuf_UninterpretedOption_NamePart) @@ -119,7 +119,7 @@ struct google_protobuf_DescriptorProto { bool options:1; /* = 7, optional. */ } has; } set_flags; - struct upb_string* name; + upb_string* name; UPB_MSG_ARRAY(google_protobuf_FieldDescriptorProto)* field; UPB_MSG_ARRAY(google_protobuf_DescriptorProto)* nested_type; UPB_MSG_ARRAY(google_protobuf_EnumDescriptorProto)* enum_type; @@ -140,7 +140,7 @@ struct google_protobuf_EnumDescriptorProto { bool options:1; /* = 3, optional. */ } has; } set_flags; - struct upb_string* name; + upb_string* name; UPB_MSG_ARRAY(google_protobuf_EnumValueDescriptorProto)* value; google_protobuf_EnumOptions* options; }; @@ -161,11 +161,11 @@ struct google_protobuf_UninterpretedOption { } has; } set_flags; UPB_MSG_ARRAY(google_protobuf_UninterpretedOption_NamePart)* name; - struct upb_string* identifier_value; + upb_string* identifier_value; uint64_t positive_int_value; int64_t negative_int_value; double double_value; - struct upb_string* string_value; + upb_string* string_value; }; UPB_DEFINE_MSG_ARRAY(google_protobuf_UninterpretedOption) @@ -185,8 +185,8 @@ struct google_protobuf_FileDescriptorProto { bool options:1; /* = 8, optional. */ } has; } set_flags; - struct upb_string* name; - struct upb_string* package; + upb_string* name; + upb_string* package; struct upb_string_array* dependency; UPB_MSG_ARRAY(google_protobuf_DescriptorProto)* message_type; UPB_MSG_ARRAY(google_protobuf_EnumDescriptorProto)* enum_type; @@ -208,9 +208,9 @@ struct google_protobuf_MethodDescriptorProto { bool options:1; /* = 4, optional. */ } has; } set_flags; - struct upb_string* name; - struct upb_string* input_type; - struct upb_string* output_type; + upb_string* name; + upb_string* input_type; + upb_string* output_type; google_protobuf_MethodOptions* options; }; UPB_DEFINE_MSG_ARRAY(google_protobuf_MethodDescriptorProto) @@ -239,7 +239,7 @@ struct google_protobuf_EnumValueDescriptorProto { bool options:1; /* = 3, optional. */ } has; } set_flags; - struct upb_string* name; + upb_string* name; int32_t number; google_protobuf_EnumValueOptions* options; }; @@ -256,7 +256,7 @@ struct google_protobuf_ServiceDescriptorProto { bool options:1; /* = 3, optional. */ } has; } set_flags; - struct upb_string* name; + upb_string* name; UPB_MSG_ARRAY(google_protobuf_MethodDescriptorProto)* method; google_protobuf_ServiceOptions* options; }; @@ -306,7 +306,7 @@ struct google_protobuf_FieldOptions { int32_t ctype; bool packed; bool deprecated; - struct upb_string* experimental_map_key; + upb_string* experimental_map_key; UPB_MSG_ARRAY(google_protobuf_UninterpretedOption)* uninterpreted_option; }; UPB_DEFINE_MSG_ARRAY(google_protobuf_FieldOptions) @@ -324,8 +324,8 @@ struct google_protobuf_FileOptions { bool uninterpreted_option:1; /* = 999, repeated. */ } has; } set_flags; - struct upb_string* java_package; - struct upb_string* java_outer_classname; + upb_string* java_package; + upb_string* java_outer_classname; int32_t optimize_for; bool java_multiple_files; UPB_MSG_ARRAY(google_protobuf_UninterpretedOption)* uninterpreted_option; @@ -376,13 +376,13 @@ struct google_protobuf_FieldDescriptorProto { bool options:1; /* = 8, optional. */ } has; } set_flags; - struct upb_string* name; - struct upb_string* extendee; + upb_string* name; + upb_string* extendee; int32_t number; int32_t label; int32_t type; - struct upb_string* type_name; - struct upb_string* default_value; + upb_string* type_name; + upb_string* default_value; google_protobuf_FieldOptions* options; }; UPB_DEFINE_MSG_ARRAY(google_protobuf_FieldDescriptorProto) diff --git a/src/upb_atomic.h b/src/upb_atomic.h index e425502895..de2238c4c0 100644 --- a/src/upb_atomic.h +++ b/src/upb_atomic.h @@ -35,28 +35,34 @@ extern "C" { /* Non-thread-safe implementations. ******************************************/ typedef struct { - int val; + int v; } upb_atomic_refcount_t; INLINE void upb_atomic_refcount_init(upb_atomic_refcount_t *a, int val) { - a->val = val; + a->v = val; } INLINE bool upb_atomic_ref(upb_atomic_refcount_t *a) { - return a->val++ == 0; + return a->v++ == 0; } INLINE bool upb_atomic_unref(upb_atomic_refcount_t *a) { - return --a->val == 0; + return --a->v == 0; } INLINE int upb_atomic_read(upb_atomic_refcount_t *a) { - return a->val; + return a->v; } INLINE bool upb_atomic_add(upb_atomic_refcount_t *a, int val) { - a->val += val; - return a->val == 0; + a->v += val; + return a->v == 0; +} + +INLINE bool upb_atomic_fetch_and_add(upb_atomic_refcount_t *a, int val) { + int ret = a->v; + a->v += val; + return ret; } typedef struct { @@ -81,32 +87,32 @@ INLINE void upb_rwlock_unlock(upb_rwlock_t *l) { (void)l; } /* GCC includes atomic primitives. */ typedef struct { - volatile int val; + volatile int v; } upb_atomic_refcount_t; INLINE void upb_atomic_refcount_init(upb_atomic_refcount_t *a, int val) { - a->val = val; + a->v = val; __sync_synchronize(); /* Ensure the initialized value is visible. */ } INLINE bool upb_atomic_ref(upb_atomic_refcount_t *a) { - return __sync_fetch_and_add(&a->val, 1) == 0; + return __sync_fetch_and_add(&a->v, 1) == 0; } INLINE bool upb_atomic_add(upb_atomic_refcount_t *a, int n) { - return __sync_add_and_fetch(&a->val, n) == 0; + return __sync_add_and_fetch(&a->v, n) == 0; } INLINE bool upb_atomic_unref(upb_atomic_refcount_t *a) { - return __sync_sub_and_fetch(&a->val, 1) == 0; + return __sync_sub_and_fetch(&a->v, 1) == 0; } INLINE bool upb_atomic_read(upb_atomic_refcount_t *a) { - return __sync_fetch_and_add(&a->val, 0); + return __sync_fetch_and_add(&a->v, 0); } INLINE bool upb_atomic_write(upb_atomic_refcount_t *a, int val) { - a->val = val; + a->v = val; } #elif defined(WIN32) diff --git a/src/upb_data.c b/src/upb_data.c index 3c8b244467..73a880f101 100644 --- a/src/upb_data.c +++ b/src/upb_data.c @@ -4,8 +4,13 @@ * Copyright (c) 2009 Joshua Haberman. See LICENSE for details. */ +#include #include "upb_data.h" +INLINE void data_init(upb_data *d, int flags) { + d->v = flags; +} + static uint32_t round_up_to_pow2(uint32_t v) { /* cf. http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 */ @@ -20,37 +25,59 @@ static uint32_t round_up_to_pow2(uint32_t v) } static void check_not_frozen(upb_data *d) { + // On one hand I am reluctant to put abort() calls in a low-level library + // that are enabled in a production build. On the other hand, this is a bug + // in the client code that we cannot recover from, and it seems better to get + // the error here than later. if(upb_data_hasflag(d, UPB_DATA_FROZEN)) abort(); } +static upb_strlen_t string_get_bytesize(upb_string *s) { + if(upb_data_hasflag(&s->common.base, UPB_DATA_REFCOUNTED)) { + return s->refcounted.byte_size; + } else { + return (s->norefcount.byte_size_and_flags & 0xFFFFFFF8) >> 3; + } +} + +static void string_set_bytesize(upb_string *s, upb_strlen_t newsize) { + if(upb_data_hasflag(&s->common.base, UPB_DATA_REFCOUNTED)) { + s->refcounted.byte_size = newsize; + } else { + s->norefcount.byte_size_and_flags &= 0x7; + s->norefcount.byte_size_and_flags |= (newsize << 3); + } +} + upb_string *upb_string_new() { - upb_string *s = malloc(sizeof(*s)); - s->byte_size = 0; - s->byte_len = 0; - s->ptr = NULL; - s->is_heap_allocated = true; - s->is_frozen = false; + upb_string *s = malloc(sizeof(upb_refcounted_string)); + data_init(&s->common.base, UPB_DATA_HEAPALLOCATED | UPB_DATA_REFCOUNTED); + s->refcounted.byte_size = 0; + s->common.byte_len = 0; + s->common.ptr = NULL; return s; } -static void _upb_string_free(upb_string *s) +void _upb_string_free(upb_string *s) { - if(s->byte_size != 0) free(s->ptr); + if(string_get_bytesize(s) != 0) free(s->common.ptr); free(s); } char *upb_string_getrwbuf(upb_string *s, upb_strlen_t byte_len) { - check_not_frozen(s); - if(s->byte_size < byte_len) { + check_not_frozen(&s->common.base); + if(string_get_bytesize(s) < byte_len) { // Need to resize. - s->byte_size = round_up_to_pow2(byte_len); - s->ptr = realloc(s->ptr, s->byte_size); + size_t new_byte_size = round_up_to_pow2(byte_len); + s->common.ptr = realloc(s->common.ptr, new_byte_size); + string_set_bytesize(s, new_byte_size); } - s->byte_len = byte_len; - return s->ptr; + s->common.byte_len = byte_len; + return s->common.ptr; } +#if 0 void upb_msg_destroy(struct upb_msg *msg) { for(upb_field_count_t i = 0; i < msg->def->num_fields; i++) { struct upb_fielddef *f = &msg->def->fields[i]; @@ -74,4 +101,4 @@ void upb_array_destroy(struct upb_array *arr) if(arr->size != 0) free(arr->elements._void); free(arr); } - +#endif diff --git a/src/upb_data.h b/src/upb_data.h index 01661ffbf5..7b0a97b7d6 100644 --- a/src/upb_data.h +++ b/src/upb_data.h @@ -5,24 +5,31 @@ * * This file defines the in-memory format for messages, arrays, and strings * (which are the three dynamically-allocated structures that make up all - * protobufs). */ + * protobufs). + * + * The members of all structs should be considered private. Access should + * only happen through the provided functions. */ #ifndef UPB_DATA_H #define UPB_DATA_H +#include #include #include "upb.h" -#include "upb_def.h" +#include "upb_atomic.h" + +struct upb_msgdef; +struct upb_fielddef; /* upb_data *******************************************************************/ // The "base class" of strings, arrays, and messages. Contains a few flags and // possibly a reference count. None of the functions for upb_data are public, // but some of the constants are. -typedef upb_atomic_t upb_data; +typedef upb_atomic_refcount_t upb_data; // The flags in upb_data. -enum upb_data_flag { +typedef enum { // Set if the object itself was allocated with malloc() and should be freed // with free(). This flag would be false if the object was allocated on the // stack or is data from the static segment of an object file. Note that this @@ -40,48 +47,56 @@ enum upb_data_flag { // Set if the object has an embedded refcount. UPB_DATA_REFCOUNTED = (1<<2) -}; +} upb_data_flag; #define REFCOUNT_MASK 0xFFFFFFF8 #define REFCOUNT_SHIFT 3 #define REFCOUNT_ONE (1<v & flag; } -INLINE uint32_t _upb_data_read_refcount(upb_data *d) { +// INTERNAL-ONLY +INLINE void upb_data_setflag(upb_data *d, upb_data_flag flag) { + d->v |= flag; +} + +INLINE uint32_t upb_data_getrefcount(upb_data *d) { + int data; if(upb_data_hasflag(d, UPB_DATA_FROZEN)) data = upb_atomic_read(d); else - data = data->val; + data = d->v; return (data & REFCOUNT_MASK) >> REFCOUNT_SHIFT; } // Returns true if the given data has only one owner. -INLINE bool _upb_data_only(upb_data *data) { +INLINE bool upb_data_only(upb_data *data) { return !upb_data_hasflag(data, UPB_DATA_REFCOUNTED) || - upb_data_read_refcount(data) == 1; + upb_data_getrefcount(data) == 1; } // Specifies the type of ref that is requested based on the kind of access the // caller needs to the object. -enum upb_ref_flags { +typedef enum { // Use when the client plans to perform read-only access to the object, and // only in one thread at a time. This imposes the least requirements on the // object; it can be either frozen or not. As a result, requesting a // reference of this type never performs a copy unless the object has no // refcount. + // + // A ref of this type can always be explicitly converted to frozen or + // unfrozen later. UPB_REF_THREADUNSAFE_READONLY = 0, // Use when the client plans to perform read-only access, but from multiple // threads concurrently. This will force the object to eagerly perform any // parsing that may have been lazily deferred, and will force a copy if the - // object is not current frozen and there are any other referents (who expect - // the object to stay writable). + // object is not current frozen. // // Asking for a reference of this type is equivalent to: // x = getref(y, UPB_REF_THREADUNSAFE_READONLY); @@ -92,23 +107,23 @@ enum upb_ref_flags { // Use when the client plans to perform read/write access. As a result, the // reference will not be thread-safe for concurrent reading *or* writing; the // object must be externally synchronized if it is being accessed from more - // than one thread. This will force a copy if the object is not currently - // frozen and there are any other referents (who expect the object to stay - // safe for unsynchronized reads). + // than one thread. This will force a copy if the object is currently frozen. // // Asking for a reference of this type is equivalent to: // x = getref(y, UPB_REF_THREADUNSAFE_READONLY); // x = thaw(x); // ...except it is more efficient. UPB_REF_MUTABLE = 2 -} +} upb_reftype; +// INTERNAL-ONLY FUNCTION: // Attempts to increment the reference on d with the given type of ref. If // this is not possible, returns false. -INLINE bool upb_data_incref(upb_data *d, upb_reftype reftype) { +INLINE bool _upb_data_incref(upb_data *d, upb_reftype reftype) { if((reftype == UPB_REF_FROZEN && !upb_data_hasflag(d, UPB_DATA_FROZEN)) || (reftype == UPB_REF_MUTABLE && upb_data_hasflag(d, UPB_DATA_FROZEN)) || - !upb_data_hasflag(d, UPB_DATA_REFCOUNTED)) { + (upb_data_hasflag(d, UPB_DATA_HEAPALLOCATED) && + !upb_data_hasflag(d, UPB_DATA_REFCOUNTED))) { return false; } // Increment the ref. Only need to use atomic ops if the ref is frozen. @@ -117,25 +132,40 @@ INLINE bool upb_data_incref(upb_data *d, upb_reftype reftype) { return true; } -INLINE bool upb_data_decref(upb_data *d) { - if(upb_data_hasflag(d, UPB_DATA_FROZEN)) { - int32_t old_val = upb_atomic_fetch_and_add(d, -REFCOUNT_ONE); - return old_val & REFCOUNT_MASK == REFCOUNT_ONE; +// INTERNAL-ONLY FUNCTION: +// Releases a reference on d, returning true if the object should be deleted. +INLINE bool _upb_data_unref(upb_data *d) { + if(upb_data_hasflag(d, UPB_DATA_HEAPALLOCATED)) { + // A heap-allocated object without a refcount should never be decref'd. + // Its owner owns it exlusively and should free it directly. + assert(upb_data_hasflag(d, UPB_DATA_REFCOUNTED)); + if(upb_data_hasflag(d, UPB_DATA_FROZEN)) { + int32_t old_val = upb_atomic_fetch_and_add(d, -REFCOUNT_ONE); + return (old_val & REFCOUNT_MASK) == REFCOUNT_ONE; + } else { + d->v -= REFCOUNT_ONE; + return (d->v & REFCOUNT_MASK) == 0; + } } else { - *d -= REFCOUNT_ONE; - return *d & REFCOUNT_MASK == 0; + // Non heap-allocated data never should be deleted. + return false; } } -typedef uint8_t upb_flags_t; struct upb_mmhead {}; /* upb_string *****************************************************************/ typedef uint32_t upb_strlen_t; -// The members of this struct are private. Access should only be through the -// associated functions. +// We have several different representations for string, depending on whether +// it has a refcount (and likely in the future, depending on whether it is a +// slice of another string). We could just have one representation with +// members that are sometimes unused, but this is wasteful in memory. The +// flags that are always part of the first word tell us which representation +// to use. +// +// upb_string_common is the members that are common to all representations. typedef struct { upb_data base; upb_strlen_t byte_len; @@ -144,12 +174,14 @@ typedef struct { char *ptr; } upb_string_common; +// Used for a string without a refcount. typedef struct { uint32_t byte_size_and_flags; upb_strlen_t byte_len; char *ptr; } upb_norefcount_string; +// Used for a string with a refcount. typedef struct { upb_data base; upb_strlen_t byte_len; @@ -159,28 +191,35 @@ typedef struct { typedef union { upb_string_common common; - // Should only be accessed if flags indicate the string has *no* refcount. upb_norefcount_string norefcount; - // Should only be accessed if flags indicate the string *has* a refcount. upb_refcounted_string refcounted; } upb_string; -// Returns a newly constructed string, which starts out empty. Caller owns one -// ref on it. -upb_string *upb_string_new(void); +// Returns a newly constructed, refcounted string which starts out empty. +// Caller owns one ref on it. The returned string will not be frozen. +upb_string *upb_string_new(); + +// Creates a new string which is a duplicate of the given string. If +// refcounted is true, the new string is refcounted, otherwise the caller +// has exlusive ownership of it. +INLINE upb_string *upb_strdup(upb_string *s, bool refcounted); + +// INTERNAL-ONLY: +// Frees the given string, alone with any memory the string owned. +void _upb_string_free(upb_string *s); // Returns a string to which caller owns a ref, and contains the same contents // as src. The returned value may be a copy of src, if the requested flags // were incompatible with src's. -INLINE upb_string *upb_string_getref(upb_string *s, upb_flags_t ref_flags) { - if(upb_data_incref(&s->common.base, ref_flags)) return s; - return upb_strdup(s); +INLINE upb_string *upb_string_getref(upb_string *s, int ref_flags) { + if(_upb_data_incref(&s->common.base, ref_flags)) return s; + return upb_strdup(s, true); } // The caller releases a ref on src, which it must previously have owned a ref // on. INLINE void upb_string_unref(upb_string *s) { - if(upb_data_unref(&s->common.base)) _upb_string_free(s); + if(_upb_data_unref(&s->common.base)) _upb_string_free(s); } // Returns a buffer to which the caller may write. The string is resized to @@ -188,6 +227,10 @@ INLINE void upb_string_unref(upb_string *s) { // not be frozen otherwise the program will assert-fail or abort(). char *upb_string_getrwbuf(upb_string *s, upb_strlen_t byte_len); +INLINE void upb_string_clear(upb_string *s) { + upb_string_getrwbuf(s, 0); +} + // Returns a buffer that the caller may use to read the current contents of // the string. The number of bytes available is upb_strlen(s). INLINE const char *upb_string_getrobuf(upb_string *s) { @@ -227,6 +270,32 @@ INLINE void upb_strcpy(upb_string *dest, upb_string *src) { memcpy(upb_string_getrwbuf(dest, src_len), upb_string_getrobuf(src), src_len); } +INLINE upb_string *upb_strdup(upb_string *s, bool refcounted) { + upb_string *copy = upb_string_new(refcounted); + upb_strcpy(copy, s); + return copy; +} + +// Appends 'append' to 's' in-place, resizing s if necessary. +INLINE void upb_strcat(upb_string *s, upb_string *append) { + upb_strlen_t s_len = upb_strlen(s); + upb_strlen_t append_len = upb_strlen(append); + upb_strlen_t newlen = s_len + append_len; + memcpy(upb_string_getrwbuf(s, newlen) + s_len, + upb_string_getrobuf(append), append_len); +} + +// Returns a string that is a substring of the given string. Currently this +// returns a copy, but in the future this may return an object that references +// the original string data instead of copying it. Both now and in the future, +// the caller owns a ref on whatever is returned. +INLINE upb_string *upb_strslice(upb_string *s, int offset, int len) { + upb_string *slice = upb_string_new(true); + len = UPB_MIN((upb_strlen_t)len, upb_strlen(s) - (upb_strlen_t)offset); + memcpy(upb_string_getrwbuf(slice, len), upb_string_getrobuf(s) + offset, len); + return slice; +} + // Reads an entire file into a newly-allocated string (caller owns one ref). upb_string *upb_strreadfile(const char *filename); @@ -234,12 +303,12 @@ upb_string *upb_strreadfile(const char *filename); // Initialize with the given macro, which must resolve to a const char*. You // must not dynamically allocate this type. typedef upb_string upb_static_string; -#define UPB_STRLIT(str) {sizeof(str), false, true, false, 0, str} +#define UPB_STRLIT(str) {{{0 | UPB_DATA_FROZEN}, sizeof(str), str}} // Allows using upb_strings in printf, ie: // upb_string str = UPB_STRLIT("Hello, World!\n"); // printf("String is: " UPB_STRFMT, UPB_STRARG(str)); */ -#define UPB_STRARG(str) (str)->byte_len, (str)->ptr +#define UPB_STRARG(str) (str)->common.byte_len, (str)->common.ptr #define UPB_STRFMT "%.*s" /* upb_array ******************************************************************/ @@ -276,7 +345,7 @@ upb_array *upb_array_new(void); // Returns an array to which caller owns a ref, and contains the same contents // as src. The returned value may be a copy of src, if the requested flags // were incompatible with src's. -INLINE upb_array *upb_array_getref(upb_array *src, upb_flags_t flags); +INLINE upb_array *upb_array_getref(upb_array *src, int ref_flags); // The caller releases a ref on the given array, which it must previously have // owned a ref on. diff --git a/src/upb_def.c b/src/upb_def.c index abf2b564eb..5f52f3136f 100644 --- a/src/upb_def.c +++ b/src/upb_def.c @@ -5,6 +5,7 @@ */ #include +#include #include "descriptor.h" #include "upb_def.h" #include "upb_data.h" @@ -149,7 +150,7 @@ void _upb_def_cyclic_ref(struct upb_def *def) { } static void upb_def_init(struct upb_def *def, enum upb_def_type type, - struct upb_string *fqname) { + upb_string *fqname) { def->type = type; def->is_cyclic = 0; // We detect this later, after resolving refs. def->search_depth = 0; @@ -165,10 +166,10 @@ static void upb_def_uninit(struct upb_def *def) { struct upb_unresolveddef { struct upb_def base; - struct upb_string *name; + upb_string *name; }; -static struct upb_unresolveddef *upb_unresolveddef_new(struct upb_string *str) { +static struct upb_unresolveddef *upb_unresolveddef_new(upb_string *str) { struct upb_unresolveddef *def = malloc(sizeof(*def)); upb_string *name = upb_string_getref(str, UPB_REF_THREADUNSAFE_READONLY); upb_def_init(&def->base, UPB_DEF_UNRESOLVED, name); @@ -267,7 +268,7 @@ static void fielddef_sort(struct upb_fielddef **defs, size_t num) static struct upb_msgdef *msgdef_new(struct upb_fielddef **fields, int num_fields, - struct upb_string *fqname, + upb_string *fqname, struct upb_status *status) { if(num_fields > UPB_MAX_FIELDS) { @@ -349,11 +350,11 @@ struct ntoi_ent { struct iton_ent { struct upb_inttable_entry e; - struct upb_string *string; + upb_string *string; }; static struct upb_enumdef *enumdef_new(google_protobuf_EnumDescriptorProto *ed, - struct upb_string *fqname) + upb_string *fqname) { struct upb_enumdef *e = malloc(sizeof(*e)); upb_def_init(&e->base, UPB_DEF_ENUM, fqname); @@ -421,34 +422,36 @@ static int my_memrchr(char *data, char c, size_t len) /* Given a symbol and the base symbol inside which it is defined, find the * symbol's definition in t. */ static struct symtab_ent *resolve(struct upb_strtable *t, - struct upb_string *base, - struct upb_string *symbol) + upb_string *base, + upb_string *symbol) { - if(base->byte_len + symbol->byte_len + 1 >= UPB_SYMBOL_MAXLEN || - symbol->byte_len == 0) return NULL; - - if(symbol->ptr[0] == UPB_SYMBOL_SEPARATOR) { - /* Symbols starting with '.' are absolute, so we do a single lookup. */ - struct upb_string sym_str = {.ptr = symbol->ptr+1, - .byte_len = symbol->byte_len-1}; - return upb_strtable_lookup(t, &sym_str); + if(upb_strlen(base) + upb_strlen(symbol) + 1 >= UPB_SYMBOL_MAXLEN || + upb_strlen(symbol) == 0) return NULL; + + if(upb_string_getrobuf(symbol)[0] == UPB_SYMBOL_SEPARATOR) { + // Symbols starting with '.' are absolute, so we do a single lookup. + // Slice to omit the leading '.' + upb_string *sym_str = upb_strslice(symbol, 1, INT_MAX); + struct symtab_ent *e = upb_strtable_lookup(t, sym_str); + upb_string_unref(sym_str); + return e; } else { - /* Remove components from base until we find an entry or run out. */ - char sym[UPB_SYMBOL_MAXLEN+1]; - struct upb_string sym_str = {.ptr = sym}; - int baselen = base->byte_len; + // Remove components from base until we find an entry or run out. + upb_string *sym_str = upb_string_new(true); + int baselen = upb_strlen(base); while(1) { - /* sym_str = base[0...base_len] + UPB_SYMBOL_SEPARATOR + symbol */ - memcpy(sym, base->ptr, baselen); - sym[baselen] = UPB_SYMBOL_SEPARATOR; - memcpy(sym + baselen + 1, symbol->ptr, symbol->byte_len); - sym_str.byte_len = baselen + symbol->byte_len + 1; - - struct symtab_ent *e = upb_strtable_lookup(t, &sym_str); + // sym_str = base[0...base_len] + UPB_SYMBOL_SEPARATOR + symbol + upb_strlen_t len = baselen + upb_strlen(symbol) + 1; + char *buf = upb_string_getrwbuf(sym_str, len); + memcpy(buf, upb_string_getrobuf(base), baselen); + buf[baselen] = UPB_SYMBOL_SEPARATOR; + memcpy(buf + baselen + 1, upb_string_getrobuf(symbol), upb_strlen(symbol)); + + struct symtab_ent *e = upb_strtable_lookup(t, sym_str); if (e) return e; else if(baselen == 0) return NULL; /* No more scopes to try. */ - baselen = my_memrchr(base->ptr, UPB_SYMBOL_SEPARATOR, baselen); + baselen = my_memrchr(buf, UPB_SYMBOL_SEPARATOR, baselen); } } } @@ -457,26 +460,18 @@ static struct symtab_ent *resolve(struct upb_strtable *t, * join("Foo.Bar", "Baz") -> "Foo.Bar.Baz" * join("", "Baz") -> "Baz" * Caller owns a ref on the returned string. */ -static struct upb_string *join(struct upb_string *base, struct upb_string *name) { - size_t len = base->byte_len + name->byte_len; - if(base->byte_len > 0) len++; /* For the separator. */ - struct upb_string *joined = upb_string_new(); - char *joined_ptr = upb_string_getrwbuf(joined, len); - if(base->byte_len > 0) { - /* nested_base = base + '.' + d->name */ - memcpy(joined_ptr, base->ptr, base->byte_len); - joined_ptr[base->byte_len] = UPB_SYMBOL_SEPARATOR; - memcpy(&joined_ptr[base->byte_len+1], name->ptr, name->byte_len); - } else { - memcpy(joined_ptr, name->ptr, name->byte_len); +static upb_string *join(upb_string *base, upb_string *name) { + upb_string *joined = upb_strdup(base, true); + upb_strlen_t len = upb_strlen(joined); + if(len > 0) { + upb_string_getrwbuf(joined, len + 1)[len] = UPB_SYMBOL_SEPARATOR; } + upb_strcat(joined, name); return joined; } -static struct upb_string *try_define(struct upb_strtable *t, - struct upb_string *base, - struct upb_string *name, - struct upb_status *status) +static upb_string *try_define(struct upb_strtable *t, upb_string *base, + upb_string *name, struct upb_status *status) { if(!name) { upb_seterr(status, UPB_STATUS_ERROR, @@ -484,7 +479,7 @@ static struct upb_string *try_define(struct upb_strtable *t, UPB_STRARG(base)); return NULL; } - struct upb_string *fqname = join(base, name); + upb_string *fqname = join(base, name); if(upb_strtable_lookup(t, fqname)) { upb_seterr(status, UPB_STATUS_ERROR, "attempted to redefine symbol '" UPB_STRFMT "'", @@ -497,11 +492,11 @@ static struct upb_string *try_define(struct upb_strtable *t, static void insert_enum(struct upb_strtable *t, google_protobuf_EnumDescriptorProto *ed, - struct upb_string *base, + upb_string *base, struct upb_status *status) { - struct upb_string *name = ed->set_flags.has.name ? ed->name : NULL; - struct upb_string *fqname = try_define(t, base, name, status); + upb_string *name = ed->set_flags.has.name ? ed->name : NULL; + upb_string *fqname = try_define(t, base, name, status); if(!fqname) return; struct symtab_ent e; @@ -512,11 +507,11 @@ static void insert_enum(struct upb_strtable *t, static void insert_message(struct upb_strtable *t, google_protobuf_DescriptorProto *d, - struct upb_string *base, bool sort, + upb_string *base, bool sort, struct upb_status *status) { - struct upb_string *name = d->set_flags.has.name ? d->name : NULL; - struct upb_string *fqname = try_define(t, base, name, status); + upb_string *name = d->set_flags.has.name ? d->name : NULL; + upb_string *fqname = try_define(t, base, name, status); if(!fqname) return; int num_fields = d->set_flags.has.field ? d->field->len : 0; @@ -601,7 +596,7 @@ static void addfd(struct upb_strtable *addto, struct upb_strtable *existingdefs, google_protobuf_FileDescriptorProto *fd, bool sort, struct upb_status *status) { - struct upb_string *pkg; + upb_string *pkg; // Temporary hack until the static data is integrated into our // memory-management scheme. bool should_unref; @@ -635,11 +630,11 @@ static void addfd(struct upb_strtable *addto, struct upb_strtable *existingdefs, for(e = upb_strtable_begin(addto); e; e = upb_strtable_next(addto, &e->e)) { struct upb_msgdef *m = upb_dyncast_msgdef(e->def); if(!m) continue; - struct upb_string *base = e->e.key; + upb_string *base = e->e.key; for(upb_field_count_t i = 0; i < m->num_fields; i++) { struct upb_fielddef *f = &m->fields[i]; if(!upb_hasdef(f)) continue; // No resolving necessary. - struct upb_string *name = upb_downcast_unresolveddef(f->def)->name; + upb_string *name = upb_downcast_unresolveddef(f->def)->name; struct symtab_ent *found = resolve(existingdefs, base, name); if(!found) found = resolve(addto, base, name); upb_field_type_t expected = upb_issubmsg(f) ? UPB_DEF_MSG : UPB_DEF_ENUM; @@ -689,9 +684,9 @@ struct upb_symtab *upb_symtab_new() if(!upb_ok(&status)) { fprintf(stderr, "Failed to initialize upb: %s.\n", status.msg); assert(false); - return NULL; /* Indicates that upb is buggy or corrupt. */ + return NULL; // Indicates that upb is buggy or corrupt. } - struct upb_string name = UPB_STRLIT("google.protobuf.FileDescriptorSet"); + upb_string name = UPB_STRLIT("google.protobuf.FileDescriptorSet"); struct symtab_ent *e = upb_strtable_lookup(&s->psymtab, &name); assert(e); s->fds_msgdef = upb_downcast_msgdef(e->def); @@ -737,8 +732,7 @@ struct upb_def **upb_symtab_getdefs(struct upb_symtab *s, int *count, return defs; } -struct upb_def *upb_symtab_lookup(struct upb_symtab *s, - struct upb_string *sym) +struct upb_def *upb_symtab_lookup(struct upb_symtab *s, upb_string *sym) { upb_rwlock_rdlock(&s->lock); struct symtab_ent *e = upb_strtable_lookup(&s->symtab, sym); @@ -752,9 +746,8 @@ struct upb_def *upb_symtab_lookup(struct upb_symtab *s, } -struct upb_def *upb_symtab_resolve(struct upb_symtab *s, - struct upb_string *base, - struct upb_string *symbol) { +struct upb_def *upb_symtab_resolve(struct upb_symtab *s, upb_string *base, + upb_string *symbol) { upb_rwlock_rdlock(&s->lock); struct symtab_ent *e = resolve(&s->symtab, base, symbol); struct upb_def *ret = NULL; @@ -816,7 +809,7 @@ void upb_symtab_addfds(struct upb_symtab *s, return; } -void upb_symtab_add_desc(struct upb_symtab *s, struct upb_string *desc, +void upb_symtab_add_desc(struct upb_symtab *s, upb_string *desc, struct upb_status *status) { upb_msg *fds = upb_msg_new(s->fds_msgdef); diff --git a/src/upb_def.h b/src/upb_def.h index 87c2be824b..e8c75487c5 100644 --- a/src/upb_def.h +++ b/src/upb_def.h @@ -26,6 +26,7 @@ #ifndef UPB_DEF_H_ #define UPB_DEF_H_ +#include "upb_data.h" #include "upb_atomic.h" #include "upb_table.h" @@ -53,7 +54,7 @@ enum upb_def_type { typedef int8_t upb_def_type_t; struct upb_def { - struct upb_string *fqname; // Fully qualified. + upb_string *fqname; // Fully qualified. upb_atomic_refcount_t refcount; upb_def_type_t type; @@ -121,7 +122,7 @@ struct upb_fielddef { upb_field_type_t type; upb_label_t label; upb_field_number_t number; - struct upb_string *name; + upb_string *name; // These are set only when this fielddef is part of a msgdef. uint32_t byte_offset; // Where in a upb_msg to find the data. @@ -205,9 +206,9 @@ INLINE struct upb_fielddef *upb_msg_itof(struct upb_msgdef *m, uint32_t num) { } INLINE struct upb_fielddef *upb_msg_ntof(struct upb_msgdef *m, - struct upb_string *name) { + upb_string *name) { struct upb_ntof_ent *e; - e = (struct upb_ntof_ent*) upb_strtable_lookup(&m->ntof, name); + e = (struct upb_ntof_ent*)upb_strtable_lookup(&m->ntof, name); return e ? e->f : NULL; } @@ -222,9 +223,9 @@ struct upb_enumdef { typedef int32_t upb_enumval_t; // Lookups from name to integer and vice-versa. -bool upb_enumdef_ntoi(struct upb_enumdef *e, struct upb_string *name, +bool upb_enumdef_ntoi(struct upb_enumdef *e, upb_string *name, upb_enumval_t *num); -struct upb_string *upb_enumdef_iton(struct upb_enumdef *e, upb_enumval_t num); +upb_string *upb_enumdef_iton(struct upb_enumdef *e, upb_enumval_t num); // Iteration over name/value pairs. The order is undefined. // struct upb_enumd_iter i; @@ -234,7 +235,7 @@ struct upb_string *upb_enumdef_iton(struct upb_enumdef *e, upb_enumval_t num); struct upb_enum_iter { struct upb_enumdef *e; void *state; // Internal iteration state. - struct upb_string *name; + upb_string *name; upb_enumval_t val; }; void upb_enum_begin(struct upb_enum_iter *iter, struct upb_enumdef *e); @@ -278,14 +279,12 @@ INLINE void upb_symtab_unref(struct upb_symtab *s) { // // If a def is found, the caller owns one ref on the returned def. Otherwise // returns NULL. -struct upb_def *upb_symtab_resolve(struct upb_symtab *s, - struct upb_string *base, - struct upb_string *symbol); +struct upb_def *upb_symtab_resolve(struct upb_symtab *s, upb_string *base, + upb_string *symbol); // Find an entry in the symbol table with this exact name. If a def is found, // the caller owns one ref on the returned def. Otherwise returns NULL. -struct upb_def *upb_symtab_lookup(struct upb_symtab *s, - struct upb_string *sym); +struct upb_def *upb_symtab_lookup(struct upb_symtab *s, upb_string *sym); // Gets an array of pointers to all currently active defs in this symtab. The // caller owns the returned array (which is of length *count) as well as a ref @@ -299,7 +298,7 @@ struct upb_def **upb_symtab_getdefs(struct upb_symtab *s, int *count, // defined in desc). desc may not attempt to define any names that are already // defined in this symtab. Caller retains ownership of desc. status indicates // whether the operation was successful or not, and the error message (if any). -void upb_symtab_add_desc(struct upb_symtab *s, struct upb_string *desc, +void upb_symtab_add_desc(struct upb_symtab *s, upb_string *desc, struct upb_status *status); #ifdef __cplusplus diff --git a/src/upb_table.h b/src/upb_table.h index 8250354a60..9a49b8b3c5 100644 --- a/src/upb_table.h +++ b/src/upb_table.h @@ -39,7 +39,7 @@ struct upb_inttable_entry { // performance by letting us compare hashes before comparing lengths or the // strings themselves. struct upb_strtable_entry { - struct upb_string *key; // We own a frozen ref. + upb_string *key; // We own a frozen ref. uint32_t next; // Internal chaining. }; @@ -117,7 +117,7 @@ INLINE void *upb_inttable_lookup(struct upb_inttable *t, uint32_t key) { return upb_inttable_fast_lookup(t, key, t->t.entry_size); } -void *upb_strtable_lookup(struct upb_strtable *t, struct upb_string *key); +void *upb_strtable_lookup(struct upb_strtable *t, upb_string *key); /* Provides iteration over the table. The order in which the entries are * returned is undefined. Insertions invalidate iterators. The _next