From cca4818eb7769d6e776bdc30516a5f871f1d6393 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 31 Mar 2012 12:17:32 -0700 Subject: [PATCH] Sync from internal Google development. --- Makefile | 14 +-- benchmarks/parsestream.upb.c | 4 +- bindings/cpp/upb/bytestream.hpp | 4 + bindings/cpp/upb/def.hpp | 47 +++++---- bindings/linux/Makefile | 20 ++++ bindings/linux/assert.h | 20 ++++ bindings/linux/errno.h | 8 ++ bindings/linux/stdint.h | 8 ++ bindings/linux/stdio.h | 10 ++ bindings/linux/stdlib.h | 22 ++++ bindings/linux/string.h | 26 +++++ tests/test_cpp.cc | 4 +- tests/test_def.c | 20 ++-- upb/bytestream.c | 165 ------------------------------ upb/bytestream.h | 50 --------- upb/def.c | 103 +++++++++---------- upb/def.h | 64 ++++++------ upb/handlers.c | 2 +- upb/handlers.h | 1 - upb/refcount.c | 122 ++++++++++++---------- upb/refcount.h | 19 ++-- upb/stdc/README | 15 +++ upb/stdc/error.c | 44 ++++++++ upb/stdc/error.h | 27 +++++ upb/stdc/io.c | 175 ++++++++++++++++++++++++++++++++ upb/stdc/io.h | 73 +++++++++++++ upb/table.c | 1 + upb/upb.c | 34 ------- upb/upb.h | 66 ++++++++---- 29 files changed, 713 insertions(+), 455 deletions(-) create mode 100644 bindings/linux/Makefile create mode 100644 bindings/linux/assert.h create mode 100644 bindings/linux/errno.h create mode 100644 bindings/linux/stdint.h create mode 100644 bindings/linux/stdio.h create mode 100644 bindings/linux/stdlib.h create mode 100644 bindings/linux/string.h create mode 100644 upb/stdc/README create mode 100644 upb/stdc/error.c create mode 100644 upb/stdc/error.h create mode 100644 upb/stdc/io.c create mode 100644 upb/stdc/io.h diff --git a/Makefile b/Makefile index a12e7ef5e2..de36900bb7 100644 --- a/Makefile +++ b/Makefile @@ -81,14 +81,16 @@ deps: Makefile $(ALLSRC) # The core library. CORE= \ - upb/upb.c \ - upb/handlers.c \ - upb/descriptor/reader.c \ - upb/table.c \ - upb/refcount.c \ + upb/bytestream.c \ upb/def.c \ + upb/descriptor/reader.c \ + upb/handlers.c \ upb/msg.c \ - upb/bytestream.c \ + upb/refcount.c \ + upb/stdc/error.c \ + upb/stdc/io.c \ + upb/table.c \ + upb/upb.c \ bindings/cpp/upb/proto2_bridge.cc \ # TODO: the proto2 bridge should be built as a separate library. diff --git a/benchmarks/parsestream.upb.c b/benchmarks/parsestream.upb.c index e9164d0284..781b97ab12 100644 --- a/benchmarks/parsestream.upb.c +++ b/benchmarks/parsestream.upb.c @@ -31,7 +31,7 @@ static bool initialize() { // Initialize upb state, decode descriptor. upb_status status = UPB_STATUS_INIT; - upb_symtab *s = upb_symtab_new(); + upb_symtab *s = upb_symtab_new(&s); upb_load_descriptor_file_into_symtab(s, MESSAGE_DESCRIPTOR_FILE, &status); if(!upb_ok(&status)) { fprintf(stderr, "Error reading descriptor: %s\n", @@ -44,7 +44,7 @@ static bool initialize() fprintf(stderr, "Error finding symbol '%s'.\n", MESSAGE_NAME); return false; } - upb_symtab_unref(s); + upb_symtab_unref(s, &s); // Read the message data itself. input_str = upb_readfile(MESSAGE_FILE, &input_len); diff --git a/bindings/cpp/upb/bytestream.hpp b/bindings/cpp/upb/bytestream.hpp index 8b48690f42..37d81576a8 100644 --- a/bindings/cpp/upb/bytestream.hpp +++ b/bindings/cpp/upb/bytestream.hpp @@ -209,6 +209,10 @@ class ByteRegion : public upb_byteregion { uint64_t ofs = start_ofs(); size_t len; const char *ptr = GetPtr(ofs, &len); + // Emperically calling reserve() here is counterproductive and slows down + // benchmarks. If the parsing is happening in a tight loop that is reusing + // the string object, there is probably enough data reserved already and + // the reserve() call is extra overhead. str->assign(ptr, len); ofs += len; while (ofs < end_ofs()) { diff --git a/bindings/cpp/upb/def.hpp b/bindings/cpp/upb/def.hpp index 69986482c6..6547255bb7 100644 --- a/bindings/cpp/upb/def.hpp +++ b/bindings/cpp/upb/def.hpp @@ -60,12 +60,14 @@ class FieldDef : public upb_fielddef { return static_cast(f); } - static FieldDef* New(void *owner) { return Cast(upb_fielddef_new(owner)); } - FieldDef* Dup(void *owner) const { + static FieldDef* New(const void *owner) { + return Cast(upb_fielddef_new(owner)); + } + FieldDef* Dup(const void *owner) const { return Cast(upb_fielddef_dup(this, owner)); } - void Ref(void *owner) { upb_fielddef_ref(this, owner); } - void Unref(void *owner) { upb_fielddef_unref(this, owner); } + void Ref(const void *owner) { upb_fielddef_ref(this, owner); } + void Unref(const void *owner) { upb_fielddef_unref(this, owner); } bool IsMutable() const { return upb_fielddef_ismutable(this); } bool IsFinalized() const { return upb_fielddef_isfinalized(this); } @@ -194,8 +196,8 @@ class Def : public upb_def { return static_cast(def); } - void Ref(void *owner) const { upb_def_ref(this, owner); } - void Unref(void *owner) const { upb_def_unref(this, owner); } + void Ref(const void *owner) const { upb_def_ref(this, owner); } + void Unref(const void *owner) const { upb_def_unref(this, owner); } void set_full_name(const char *name) { upb_def_setfullname(this, name); } void set_full_name(const std::string& name) { @@ -247,8 +249,8 @@ class MessageDef : public upb_msgdef { return Cast(upb_msgdef_dup(this, owner)); } - void Ref(void *owner) const { upb_msgdef_ref(this, owner); } - void Unref(void *owner) const { upb_msgdef_unref(this, owner); } + void Ref(const void *owner) const { upb_msgdef_ref(this, owner); } + void Unref(const void *owner) const { upb_msgdef_unref(this, owner); } // Read accessors -- may be called at any time. @@ -281,11 +283,13 @@ class MessageDef : public upb_msgdef { // be set, and the message may not already contain any field with this name // or number, and this FieldDef may not be part of another message, otherwise // false is returned and the MessageDef is unchanged. - bool AddField(FieldDef* f, void *owner) { return AddFields(&f, 1, owner); } - bool AddFields(FieldDef*const * f, int n, void *owner) { + bool AddField(FieldDef* f, const void *owner) { + return AddFields(&f, 1, owner); + } + bool AddFields(FieldDef*const * f, int n, const void *owner) { return upb_msgdef_addfields(this, (upb_fielddef*const*)f, n, owner); } - bool AddFields(const std::vector& fields, void *owner) { + bool AddFields(const std::vector& fields, const void *owner) { return AddFields(&fields[0], fields.size(), owner); } @@ -344,11 +348,13 @@ class EnumDef : public upb_enumdef { return static_cast(e); } - static EnumDef* New(void *owner) { return Cast(upb_enumdef_new(owner)); } + static EnumDef* New(const void *owner) { return Cast(upb_enumdef_new(owner)); } - void Ref(void *owner) { upb_enumdef_ref(this, owner); } - void Unref(void *owner) { upb_enumdef_unref(this, owner); } - EnumDef* Dup(void *owner) const { return Cast(upb_enumdef_dup(this, owner)); } + void Ref(const void *owner) { upb_enumdef_ref(this, owner); } + void Unref(const void *owner) { upb_enumdef_unref(this, owner); } + EnumDef* Dup(const void *owner) const { + return Cast(upb_enumdef_dup(this, owner)); + } Def* AsDef() { return Def::Cast(UPB_UPCAST(this)); } const Def* AsDef() const { return Def::Cast(UPB_UPCAST(this)); } @@ -397,10 +403,15 @@ class SymbolTable : public upb_symtab { return static_cast(s); } - static SymbolTable* New() { return Cast(upb_symtab_new()); } + static SymbolTable* New(const void *owner) { + return Cast(upb_symtab_new(owner)); + } - void Ref() const { upb_symtab_unref(this); } - void Unref() const { upb_symtab_unref(this); } + void Ref(const void *owner) const { upb_symtab_unref(this, owner); } + void Unref(const void *owner) const { upb_symtab_unref(this, owner); } + void DonateRef(const void *from, const void *to) const { + upb_symtab_donateref(this, from, to); + } // Adds the given defs to the symtab, resolving all symbols. Only one def // per name may be in the list, but defs can replace existing defs in the diff --git a/bindings/linux/Makefile b/bindings/linux/Makefile new file mode 100644 index 0000000000..1736b61f36 --- /dev/null +++ b/bindings/linux/Makefile @@ -0,0 +1,20 @@ +obj-m = upb.o + +upb-objs = \ + ../../upb/upb.o \ + ../../upb/bytestream.o \ + ../../upb/def.o \ + ../../upb/handlers.o \ + ../../upb/table.o \ + ../../upb/refcount.o \ + ../../upb/msg.o \ + +KVERSION = $(shell uname -r) + +ccflags-y := -I$(PWD) -I$(PWD)/../.. -Wno-declaration-after-statement -std=gnu99 + +all: + make -C /lib/modules/$(KVERSION)/build M=$(PWD) modules + +clean: + make -C /lib/modules/$(KVERSION)/build M=$(PWD) clean diff --git a/bindings/linux/assert.h b/bindings/linux/assert.h new file mode 100644 index 0000000000..26d8ab6423 --- /dev/null +++ b/bindings/linux/assert.h @@ -0,0 +1,20 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2012 Google Inc. See LICENSE for details. + * Author: Josh Haberman + */ + +#include + +#ifndef UPB_LINUX_ASSERT_H +#define UPB_LINUX_ASSERT_H + +#ifdef NDEBUG +#define assert(x) +#else +#define assert(x) \ + if (!(x)) panic("Assertion failed: %s at %s:%d", #x, __FILE__, __LINE__); +#endif + +#endif diff --git a/bindings/linux/errno.h b/bindings/linux/errno.h new file mode 100644 index 0000000000..f45d939a57 --- /dev/null +++ b/bindings/linux/errno.h @@ -0,0 +1,8 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2012 Google Inc. See LICENSE for details. + * Author: Josh Haberman + */ + +#include diff --git a/bindings/linux/stdint.h b/bindings/linux/stdint.h new file mode 100644 index 0000000000..2524b23a51 --- /dev/null +++ b/bindings/linux/stdint.h @@ -0,0 +1,8 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2012 Google Inc. See LICENSE for details. + * Author: Josh Haberman + */ + +#include diff --git a/bindings/linux/stdio.h b/bindings/linux/stdio.h new file mode 100644 index 0000000000..72c1b0df52 --- /dev/null +++ b/bindings/linux/stdio.h @@ -0,0 +1,10 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2012 Google Inc. See LICENSE for details. + * Author: Josh Haberman + * + * Linux-kernel implementations of some stdlib.h functions. + */ + +#include // For sprintf and friends. diff --git a/bindings/linux/stdlib.h b/bindings/linux/stdlib.h new file mode 100644 index 0000000000..8381b13bbd --- /dev/null +++ b/bindings/linux/stdlib.h @@ -0,0 +1,22 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2012 Google Inc. See LICENSE for details. + * Author: Josh Haberman + * + * Linux-kernel implementations of some stdlib.h functions. + */ + +#include + +#ifndef UPB_LINUX_STDLIB_H +#define UPB_LINUX_STDLIB_H + +static inline void *malloc(size_t size) { return kmalloc(size, GFP_ATOMIC); } +static inline void free(void *p) { kfree(p); } + +static inline void *realloc(void *p, size_t size) { + return krealloc(p, size, GFP_ATOMIC); +} + +#endif diff --git a/bindings/linux/string.h b/bindings/linux/string.h new file mode 100644 index 0000000000..69de3fa0e9 --- /dev/null +++ b/bindings/linux/string.h @@ -0,0 +1,26 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2012 Google Inc. See LICENSE for details. + * Author: Josh Haberman + */ + +#ifndef UPB_LINUX_STRING_H_ +#define UPB_LINUX_STRING_H_ + +#include +#include +#include "upb/upb.h" // For INLINE. + +INLINE char *strdup(const char *s) { + size_t len = strlen(s); + char *ret = malloc(len + 1); + if (ret == NULL) return NULL; + // Be particularly defensive and guard against buffer overflow if there + // is a concurrent mutator. + strncpy(ret, s, len); + ret[len] = '\0'; + return ret; +} + +#endif /* UPB_DEF_H_ */ diff --git a/tests/test_cpp.cc b/tests/test_cpp.cc index 4d70e85a76..fb0916df0b 100644 --- a/tests/test_cpp.cc +++ b/tests/test_cpp.cc @@ -18,7 +18,7 @@ #include "upb_test.h" static void TestSymbolTable(const char *descriptor_file) { - upb::SymbolTable *s = upb::SymbolTable::New(); + upb::SymbolTable *s = upb::SymbolTable::New(&s); upb::Status status; if (!upb::LoadDescriptorFileIntoSymtab(s, descriptor_file, &status)) { std::cerr << "Couldn't load descriptor: " << status; @@ -27,7 +27,7 @@ static void TestSymbolTable(const char *descriptor_file) { const upb::MessageDef *md = s->LookupMessage("A", &md); ASSERT(md); - s->Unref(); + s->Unref(&s); md->Unref(&md); } diff --git a/tests/test_def.c b/tests/test_def.c index 698532eaf1..f60d556857 100644 --- a/tests/test_def.c +++ b/tests/test_def.c @@ -16,16 +16,16 @@ const char *descriptor_file; static void test_empty_symtab() { - upb_symtab *s = upb_symtab_new(); + upb_symtab *s = upb_symtab_new(&s); int count; const upb_def **defs = upb_symtab_getdefs(s, &count, UPB_DEF_ANY, NULL); ASSERT(count == 0); free(defs); - upb_symtab_unref(s); + upb_symtab_unref(s, &s); } -static upb_symtab *load_test_proto() { - upb_symtab *s = upb_symtab_new(); +static upb_symtab *load_test_proto(void *owner) { + upb_symtab *s = upb_symtab_new(owner); ASSERT(s); upb_status status = UPB_STATUS_INIT; if (!upb_load_descriptor_file_into_symtab(s, descriptor_file, &status)) { @@ -38,14 +38,14 @@ static upb_symtab *load_test_proto() { } static void test_cycles() { - upb_symtab *s = load_test_proto(); + upb_symtab *s = load_test_proto(&s); // Test cycle detection by making a cyclic def's main refcount go to zero // and then be incremented to one again. const upb_def *def = upb_symtab_lookup(s, "A", &def); ASSERT(def); ASSERT(upb_def_isfinalized(def)); - upb_symtab_unref(s); + upb_symtab_unref(s, &s); // Message A has only one subfield: "optional B b = 1". const upb_msgdef *m = upb_downcast_msgdef_const(def); @@ -62,14 +62,14 @@ static void test_cycles() { } static void test_fielddef_unref() { - upb_symtab *s = load_test_proto(); + upb_symtab *s = load_test_proto(&s); const upb_msgdef *md = upb_symtab_lookupmsg(s, "A", &md); upb_fielddef *f = upb_msgdef_itof(md, 1); upb_fielddef_ref(f, &f); // Unref symtab and msgdef; now fielddef is the only thing keeping the msgdef // alive. - upb_symtab_unref(s); + upb_symtab_unref(s, &s); upb_msgdef_unref(md, &md); // Check that md is still alive. ASSERT(strcmp(upb_def_fullname(UPB_UPCAST(md)), "A") == 0); @@ -125,7 +125,7 @@ INLINE upb_enumdef *upb_enumdef_newnamed(const char *name, void *owner) { } void test_replacement() { - upb_symtab *s = upb_symtab_new(); + upb_symtab *s = upb_symtab_new(&s); upb_msgdef *m = upb_msgdef_newnamed("MyMessage", &s); upb_msgdef_addfield(m, newfield( @@ -156,7 +156,7 @@ void test_replacement() { ASSERT(m3 == m2); upb_msgdef_unref(m3, &m3); - upb_symtab_unref(s); + upb_symtab_unref(s, &s); } int main(int argc, char *argv[]) { diff --git a/upb/bytestream.c b/upb/bytestream.c index 8feb678037..a242df4a76 100644 --- a/upb/bytestream.c +++ b/upb/bytestream.c @@ -11,9 +11,6 @@ #include #include -// We can make this configurable if necessary. -#define BUF_SIZE 32768 - char *upb_byteregion_strdup(const struct _upb_byteregion *r) { char *ret = malloc(upb_byteregion_len(r) + 1); upb_byteregion_copyall(r, ret); @@ -75,168 +72,6 @@ upb_bytesuccess_t upb_byteregion_fetch(upb_byteregion *r) { } -/* upb_stdio ******************************************************************/ - -int upb_stdio_cmpbuf(const void *_key, const void *_elem) { - const uint64_t *ofs = _key; - const upb_stdio_buf *buf = _elem; - return (*ofs / BUF_SIZE) - (buf->ofs / BUF_SIZE); -} - -static upb_stdio_buf *upb_stdio_findbuf(const upb_stdio *s, uint64_t ofs) { - // TODO: it is probably faster to linear search short lists, and to - // special-case the last one or two bufs. - return bsearch(&ofs, s->bufs, s->nbuf, sizeof(*s->bufs), &upb_stdio_cmpbuf); -} - -static upb_stdio_buf *upb_stdio_rotatebufs(upb_stdio *s) { - upb_stdio_buf **reuse = NULL; // XXX - int num_reused = 0, num_inuse = 0; - - // Could sweep only a subset of bufs if this was a hotspot. - for (int i = 0; i < s->nbuf; i++) { - upb_stdio_buf *buf = s->bufs[i]; - if (buf->refcount > 0) { - s->bufs[num_inuse++] = buf; - } else { - reuse[num_reused++] = buf; - } - } - assert(num_reused + num_inuse == s->nbuf); - memcpy(s->bufs + num_inuse, reuse, num_reused * sizeof(upb_stdio_buf*)); - if (num_reused == 0) { - ++s->nbuf; - s->bufs = realloc(s->bufs, s->nbuf * sizeof(*s->bufs)); - s->bufs[s->nbuf-1] = malloc(sizeof(upb_stdio_buf) + BUF_SIZE); - return s->bufs[s->nbuf-1]; - } - return s->bufs[s->nbuf-num_reused]; -} - -void upb_stdio_discard(void *src, uint64_t ofs) { - (void)src; - (void)ofs; -} - -upb_bytesuccess_t upb_stdio_fetch(void *src, uint64_t ofs, size_t *bytes_read) { - (void)ofs; - upb_stdio *stdio = (upb_stdio*)src; - upb_stdio_buf *buf = upb_stdio_rotatebufs(stdio); -retry: - *bytes_read = fread(&buf->data, 1, BUF_SIZE, stdio->file); - buf->len = *bytes_read; - if (*bytes_read < (size_t)BUF_SIZE) { - // Error or EOF. - if (feof(stdio->file)) { - upb_status_seteof(&stdio->src.status); - return UPB_BYTE_EOF; - } - if (ferror(stdio->file)) { -#ifdef EINTR - // If we encounter a client who doesn't want to retry EINTR, we can easily - // add a boolean property of the stdio that controls this behavior. - if (errno == EINTR) { - clearerr(stdio->file); - goto retry; - } -#endif - upb_status_fromerrno(&stdio->src.status); - return upb_errno_is_wouldblock() ? UPB_BYTE_WOULDBLOCK : UPB_BYTE_ERROR; - } - assert(false); - } - return UPB_BYTE_OK; -} - -void upb_stdio_copy(const void *src, uint64_t ofs, size_t len, char *dst) { - upb_stdio_buf *buf = upb_stdio_findbuf(src, ofs); - ofs -= buf->ofs; - memcpy(dst, buf->data + ofs, BUF_SIZE - ofs); - len -= (BUF_SIZE - ofs); - dst += (BUF_SIZE - ofs); - while (len > 0) { - ++buf; - size_t bytes = UPB_MIN(len, BUF_SIZE); - memcpy(dst, buf->data, bytes); - len -= bytes; - dst += bytes; - } -} - -const char *upb_stdio_getptr(const void *src, uint64_t ofs, size_t *len) { - upb_stdio_buf *buf = upb_stdio_findbuf(src, ofs); - ofs -= buf->ofs; - *len = BUF_SIZE - ofs; - return &buf->data[ofs]; -} - -#if 0 -upb_strlen_t upb_stdio_putstr(upb_bytesink *sink, upb_string *str, upb_status *status) { - upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, sink)); - upb_strlen_t len = upb_string_len(str); - upb_strlen_t written = fwrite(upb_string_getrobuf(str), 1, len, stdio->file); - if (written < len) { - upb_status_setf(status, UPB_ERROR, "Error writing to stdio stream."); - return -1; - } - return written; -} - -uint32_t upb_stdio_vprintf(upb_bytesink *sink, upb_status *status, - const char *fmt, va_list args) { - upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, sink)); - int written = vfprintf(stdio->file, fmt, args); - if (written < 0) { - upb_status_seterrf(status, "Error writing to stdio stream."); - return -1; - } - return written; -} -#endif - -void upb_stdio_init(upb_stdio *stdio) { - static upb_bytesrc_vtbl bytesrc_vtbl = { - &upb_stdio_fetch, - &upb_stdio_discard, - &upb_stdio_copy, - &upb_stdio_getptr, - }; - upb_bytesrc_init(&stdio->src, &bytesrc_vtbl); - - //static upb_bytesink_vtbl bytesink_vtbl = { - // upb_stdio_putstr, - // upb_stdio_vprintf - //}; - //upb_bytesink_init(&stdio->bytesink, &bytesink_vtbl); -} - -void upb_stdio_reset(upb_stdio* stdio, FILE *file) { - stdio->file = file; - stdio->should_close = false; -} - -void upb_stdio_open(upb_stdio *stdio, const char *filename, const char *mode, - upb_status *s) { - FILE *f = fopen(filename, mode); - if (!f) { - upb_status_fromerrno(s); - return; - } - setvbuf(stdio->file, NULL, _IONBF, 0); // Disable buffering; we do our own. - upb_stdio_reset(stdio, f); - stdio->should_close = true; -} - -void upb_stdio_uninit(upb_stdio *stdio) { - // Can't report status; caller should flush() to ensure data is written. - if (stdio->should_close) fclose(stdio->file); - stdio->file = NULL; -} - -upb_bytesrc* upb_stdio_bytesrc(upb_stdio *stdio) { return &stdio->src; } -upb_bytesink* upb_stdio_bytesink(upb_stdio *stdio) { return &stdio->sink; } - - /* upb_stringsrc **************************************************************/ upb_bytesuccess_t upb_stringsrc_fetch(void *_src, uint64_t ofs, size_t *read) { diff --git a/upb/bytestream.h b/upb/bytestream.h index 3217ee15c3..bdfcd73c06 100644 --- a/upb/bytestream.h +++ b/upb/bytestream.h @@ -409,56 +409,6 @@ INLINE void upb_bytesink_rewind(upb_bytesink *sink, uint64_t offset) { // TODO: add flush() -/* upb_stdio ******************************************************************/ - -// bytesrc/bytesink for ANSI C stdio, which is less efficient than posixfd, but -// more portable. -// -// Specifically, stdio functions acquire locks on every operation (unless you -// use the f{read,write,...}_unlocked variants, which are not standard) and -// performs redundant buffering (unless you disable it with setvbuf(), but we -// can only do this on newly-opened filehandles). - -typedef struct { - uint64_t ofs; - size_t len; - uint32_t refcount; - char data[]; -} upb_stdio_buf; - -// We use a single object for both bytesrc and bytesink for simplicity. -// The object is still not thread-safe, and may only be used by one reader -// and one writer at a time. -typedef struct { - upb_bytesrc src; - upb_bytesink sink; - FILE *file; - bool should_close; - upb_stdio_buf **bufs; - int nbuf; - uint32_t szbuf; -} upb_stdio; - -void upb_stdio_init(upb_stdio *stdio); -// Caller should call upb_stdio_flush prior to calling this to ensure that -// all data is flushed, otherwise data can be silently dropped if an error -// occurs flushing the remaining buffers. -void upb_stdio_uninit(upb_stdio *stdio); - -// Resets the object to read/write to the given "file." The caller is -// responsible for closing the file, which must outlive this object. -void upb_stdio_reset(upb_stdio *stdio, FILE *file); - -// As an alternative to upb_stdio_reset(), initializes the object by opening a -// file, and will handle closing it. This may result in more efficient I/O -// than the previous since we can call setvbuf() to disable buffering. -void upb_stdio_open(upb_stdio *stdio, const char *filename, const char *mode, - upb_status *s); - -upb_bytesrc *upb_stdio_bytesrc(upb_stdio *stdio); -upb_bytesink *upb_stdio_bytesink(upb_stdio *stdio); - - /* upb_stringsrc **************************************************************/ // bytesrc/bytesink for a simple contiguous string. diff --git a/upb/def.c b/upb/def.c index 5a5b0f463a..4bcc0c6267 100644 --- a/upb/def.c +++ b/upb/def.c @@ -59,24 +59,12 @@ bool upb_def_setfullname(upb_def *def, const char *fullname) { return true; } -upb_def *upb_def_dup(const upb_def *def, void *o) { - switch (def->type) { - case UPB_DEF_MSG: - return UPB_UPCAST(upb_msgdef_dup(upb_downcast_msgdef_const(def), o)); - case UPB_DEF_FIELD: - return UPB_UPCAST(upb_fielddef_dup(upb_downcast_fielddef_const(def), o)); - case UPB_DEF_ENUM: - return UPB_UPCAST(upb_enumdef_dup(upb_downcast_enumdef_const(def), o)); - default: assert(false); return NULL; - } -} - -void upb_def_ref(const upb_def *_def, void *owner) { +void upb_def_ref(const upb_def *_def, const void *owner) { upb_def *def = (upb_def*)_def; upb_refcount_ref(&def->refcount, owner); } -void upb_def_unref(const upb_def *_def, void *owner) { +void upb_def_unref(const upb_def *_def, const void *owner) { upb_def *def = (upb_def*)_def; if (!def) return; if (!upb_refcount_unref(&def->refcount, owner)) return; @@ -95,7 +83,24 @@ void upb_def_unref(const upb_def *_def, void *owner) { } while(def != base); } -static bool upb_def_init(upb_def *def, upb_deftype_t type, void *owner) { +void upb_def_donateref(const upb_def *_def, const void *from, const void *to) { + upb_def *def = (upb_def*)_def; + upb_refcount_donateref(&def->refcount, from, to); +} + +upb_def *upb_def_dup(const upb_def *def, const void *o) { + switch (def->type) { + case UPB_DEF_MSG: + return UPB_UPCAST(upb_msgdef_dup(upb_downcast_msgdef_const(def), o)); + case UPB_DEF_FIELD: + return UPB_UPCAST(upb_fielddef_dup(upb_downcast_fielddef_const(def), o)); + case UPB_DEF_ENUM: + return UPB_UPCAST(upb_enumdef_dup(upb_downcast_enumdef_const(def), o)); + default: assert(false); return NULL; + } +} + +static bool upb_def_init(upb_def *def, upb_deftype_t type, const void *owner) { def->type = type; def->is_finalized = false; def->fullname = NULL; @@ -107,11 +112,6 @@ static void upb_def_uninit(upb_def *def) { free(def->fullname); } -void upb_def_donateref(const upb_def *_def, void *from, void *to) { - upb_def *def = (upb_def*)_def; - upb_refcount_donateref(&def->refcount, from, to); -} - static void upb_def_getsuccessors(upb_refcount *refcount, void *closure) { upb_def *def = (upb_def*)refcount; switch (def->type) { @@ -236,7 +236,7 @@ err: /* upb_enumdef ****************************************************************/ -upb_enumdef *upb_enumdef_new(void *owner) { +upb_enumdef *upb_enumdef_new(const void *owner) { upb_enumdef *e = malloc(sizeof(*e)); if (!e) return NULL; if (!upb_def_init(&e->base, UPB_DEF_ENUM, owner)) goto err2; @@ -264,7 +264,7 @@ static void upb_enumdef_free(upb_enumdef *e) { free(e); } -upb_enumdef *upb_enumdef_dup(const upb_enumdef *e, void *owner) { +upb_enumdef *upb_enumdef_dup(const upb_enumdef *e, const void *owner) { upb_enumdef *new_e = upb_enumdef_new(owner); if (!new_e) return NULL; upb_enum_iter i; @@ -349,7 +349,7 @@ const upb_typeinfo upb_types[UPB_NUM_TYPES] = { static void upb_fielddef_init_default(upb_fielddef *f); -upb_fielddef *upb_fielddef_new(void *owner) { +upb_fielddef *upb_fielddef_new(const void *owner) { upb_fielddef *f = malloc(sizeof(*f)); if (!f) return NULL; if (!upb_def_init(UPB_UPCAST(f), UPB_DEF_FIELD, owner)) { @@ -389,7 +389,7 @@ static void upb_fielddef_free(upb_fielddef *f) { free(f); } -upb_fielddef *upb_fielddef_dup(const upb_fielddef *f, void *owner) { +upb_fielddef *upb_fielddef_dup(const upb_fielddef *f, const void *owner) { upb_fielddef *newf = upb_fielddef_new(owner); if (!newf) return NULL; upb_fielddef_settype(newf, upb_fielddef_type(f)); @@ -626,7 +626,7 @@ bool upb_fielddef_setsubtypename(upb_fielddef *f, const char *name) { /* upb_msgdef *****************************************************************/ -upb_msgdef *upb_msgdef_new(void *owner) { +upb_msgdef *upb_msgdef_new(const void *owner) { upb_msgdef *m = malloc(sizeof(*m)); if (!m) return NULL; if (!upb_def_init(&m->base, UPB_DEF_MSG, owner)) goto err2; @@ -652,7 +652,7 @@ static void upb_msgdef_free(upb_msgdef *m) { free(m); } -upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, void *owner) { +upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, const void *owner) { upb_msgdef *newm = upb_msgdef_new(owner); if (!newm) return NULL; upb_msgdef_setsize(newm, upb_msgdef_size(m)); @@ -693,7 +693,7 @@ bool upb_msgdef_setextrange(upb_msgdef *m, uint32_t start, uint32_t end) { } bool upb_msgdef_addfields(upb_msgdef *m, upb_fielddef *const *fields, int n, - void *ref_donor) { + const void *ref_donor) { // Check constraints for all fields before performing any action. for (int i = 0; i < n; i++) { upb_fielddef *f = fields[i]; @@ -725,36 +725,37 @@ void upb_msg_next(upb_msg_iter *iter) { upb_inttable_next(iter); } /* upb_symtab *****************************************************************/ -static void upb_symtab_free(upb_symtab *s) { - upb_strtable_iter i; - upb_strtable_begin(&i, &s->symtab); - for (; !upb_strtable_done(&i); upb_strtable_next(&i)) - upb_def_unref(upb_value_getptr(upb_strtable_iter_value(&i)), s); - upb_strtable_uninit(&s->symtab); - free(s); +upb_symtab *upb_symtab_new(const void *owner) { + upb_symtab *s = malloc(sizeof(*s)); + upb_refcount_init(&s->refcount, owner); + upb_strtable_init(&s->symtab); + return s; } -void upb_symtab_ref(const upb_symtab *_s) { - upb_symtab *s = (upb_symtab*)_s; - s->refcount++; +void upb_symtab_ref(const upb_symtab *s, const void *owner) { + upb_refcount_ref(&s->refcount, owner); } -void upb_symtab_unref(const upb_symtab *_s) { - upb_symtab *s = (upb_symtab*)_s; - if(s && --s->refcount == 0) { - upb_symtab_free(s); +void upb_symtab_unref(const upb_symtab *s, const void *owner) { + if(s && upb_refcount_unref(&s->refcount, owner)) { + upb_symtab *destroying = (upb_symtab*)s; + upb_strtable_iter i; + upb_strtable_begin(&i, &destroying->symtab); + for (; !upb_strtable_done(&i); upb_strtable_next(&i)) + upb_def_unref(upb_value_getptr(upb_strtable_iter_value(&i)), s); + upb_strtable_uninit(&destroying->symtab); + upb_refcount_uninit(&destroying->refcount); + free(destroying); } } -upb_symtab *upb_symtab_new() { - upb_symtab *s = malloc(sizeof(*s)); - s->refcount = 1; - upb_strtable_init(&s->symtab); - return s; +void upb_symtab_donateref( + const upb_symtab *s, const void *from, const void *to) { + upb_refcount_donateref(&s->refcount, from, to); } const upb_def **upb_symtab_getdefs(const upb_symtab *s, int *count, - upb_deftype_t type, void *owner) { + upb_deftype_t type, const void *owner) { int total = upb_strtable_count(&s->symtab); // We may only use part of this, depending on how many symbols are of the // correct type. @@ -775,7 +776,7 @@ const upb_def **upb_symtab_getdefs(const upb_symtab *s, int *count, } const upb_def *upb_symtab_lookup(const upb_symtab *s, const char *sym, - void *owner) { + const void *owner) { const upb_value *v = upb_strtable_lookup(&s->symtab, sym); upb_def *ret = v ? upb_value_getptr(*v) : NULL; if (ret) upb_def_ref(ret, owner); @@ -783,7 +784,7 @@ const upb_def *upb_symtab_lookup(const upb_symtab *s, const char *sym, } const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym, - void *owner) { + const void *owner) { const upb_value *v = upb_strtable_lookup(&s->symtab, sym); upb_def *def = v ? upb_value_getptr(*v) : NULL; upb_msgdef *ret = NULL; @@ -814,7 +815,7 @@ static upb_def *upb_resolvename(const upb_strtable *t, } const upb_def *upb_symtab_resolve(const upb_symtab *s, const char *base, - const char *sym, void *owner) { + const char *sym, const void *owner) { upb_def *ret = upb_resolvename(&s->symtab, base, sym); if (ret) upb_def_ref(ret, owner); return ret; @@ -829,7 +830,7 @@ const upb_def *upb_symtab_resolve(const upb_symtab *s, const char *base, // // Returns true if defs that can reach "def" need to be duplicated into deftab. static bool upb_resolve_dfs(const upb_def *def, upb_strtable *deftab, - void *new_owner, upb_inttable *seen, + const void *new_owner, upb_inttable *seen, upb_status *s) { // Memoize results of this function for efficiency (since we're traversing a // DAG this is not needed to limit the depth of the search). diff --git a/upb/def.h b/upb/def.h index 452b809438..018f375768 100644 --- a/upb/def.h +++ b/upb/def.h @@ -74,13 +74,13 @@ typedef struct _upb_def { #define UPB_UPCAST(ptr) (&(ptr)->base) -// Call to ref/unref a def. Can be used at any time, but is not thread-safe -// until the def is finalized. While a def is finalized, everything reachable -// from that def is guaranteed to be alive. -void upb_def_ref(const upb_def *def, void *owner); -void upb_def_unref(const upb_def *def, void *owner); -void upb_def_donateref(const upb_def *def, void *from, void *to); -upb_def *upb_def_dup(const upb_def *def, void *owner); +// Call to ref/unref a def. These are thread-safe. If the def is finalized, +// it is guaranteed that any def reachable from a live def is also live. +void upb_def_ref(const upb_def *def, const void *owner); +void upb_def_unref(const upb_def *def, const void *owner); +void upb_def_donateref(const upb_def *def, const void *from, const void *to); + +upb_def *upb_def_dup(const upb_def *def, const void *owner); // A def is mutable until it has been finalized. bool upb_def_ismutable(const upb_def *def); @@ -189,12 +189,12 @@ typedef struct _upb_fielddef { } upb_fielddef; // Returns NULL if memory allocation failed. -upb_fielddef *upb_fielddef_new(void *owner); +upb_fielddef *upb_fielddef_new(const void *owner); -INLINE void upb_fielddef_ref(upb_fielddef *f, void *owner) { +INLINE void upb_fielddef_ref(upb_fielddef *f, const void *owner) { upb_def_ref(UPB_UPCAST(f), owner); } -INLINE void upb_fielddef_unref(upb_fielddef *f, void *owner) { +INLINE void upb_fielddef_unref(upb_fielddef *f, const void *owner) { upb_def_unref(UPB_UPCAST(f), owner); } @@ -203,7 +203,7 @@ INLINE void upb_fielddef_unref(upb_fielddef *f, void *owner) { // wasn't already. If the subdef is set but has no name (which is possible // since msgdefs are not required to have a name) the new fielddef's subdef // will be unset. -upb_fielddef *upb_fielddef_dup(const upb_fielddef *f, void *owner); +upb_fielddef *upb_fielddef_dup(const upb_fielddef *f, const void *owner); INLINE bool upb_fielddef_ismutable(const upb_fielddef *f) { return upb_def_ismutable(UPB_UPCAST(f)); @@ -366,21 +366,21 @@ typedef struct _upb_msgdef { } upb_msgdef; // Returns NULL if memory allocation failed. -upb_msgdef *upb_msgdef_new(void *owner); +upb_msgdef *upb_msgdef_new(const void *owner); -INLINE void upb_msgdef_unref(const upb_msgdef *md, void *owner) { - upb_def_unref(UPB_UPCAST(md), owner); -} -INLINE void upb_msgdef_ref(const upb_msgdef *md, void *owner) { +INLINE void upb_msgdef_ref(const upb_msgdef *md, const void *owner) { upb_def_ref(UPB_UPCAST(md), owner); } +INLINE void upb_msgdef_unref(const upb_msgdef *md, const void *owner) { + upb_def_unref(UPB_UPCAST(md), owner); +} // Returns a new msgdef that is a copy of the given msgdef (and a copy of all // the fields) but with any references to submessages broken and replaced with // just the name of the submessage. Returns NULL if memory allocation failed. // This can be put back into another symtab and the names will be re-resolved // in the new context. -upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, void *owner); +upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, const void *owner); // Read accessors. May be called at any time. INLINE size_t upb_msgdef_size(const upb_msgdef *m) { return m->size; } @@ -407,9 +407,9 @@ bool upb_msgdef_setextrange(upb_msgdef *m, uint32_t start, uint32_t end); // non-NULL, caller passes a ref on the fielddef from ref_donor to the msgdef, // otherwise caller retains its reference(s) on the defs in f. bool upb_msgdef_addfields( - upb_msgdef *m, upb_fielddef *const *f, int n, void *ref_donor); + upb_msgdef *m, upb_fielddef *const *f, int n, const void *ref_donor); INLINE bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f, - void *ref_donor) { + const void *ref_donor) { return upb_msgdef_addfields(m, &f, 1, ref_donor); } @@ -460,14 +460,14 @@ typedef struct _upb_enumdef { } upb_enumdef; // Returns NULL if memory allocation failed. -upb_enumdef *upb_enumdef_new(void *owner); -INLINE void upb_enumdef_ref(const upb_enumdef *e, void *owner) { +upb_enumdef *upb_enumdef_new(const void *owner); +INLINE void upb_enumdef_ref(const upb_enumdef *e, const void *owner) { upb_def_ref(&e->base, owner); } -INLINE void upb_enumdef_unref(const upb_enumdef *e, void *owner) { +INLINE void upb_enumdef_unref(const upb_enumdef *e, const void *owner) { upb_def_unref(&e->base, owner); } -upb_enumdef *upb_enumdef_dup(const upb_enumdef *e, void *owner); +upb_enumdef *upb_enumdef_dup(const upb_enumdef *e, const void *owner); INLINE int32_t upb_enumdef_default(const upb_enumdef *e) { return e->defaultval; @@ -525,13 +525,15 @@ INLINE int32_t upb_enum_iter_number(upb_enum_iter *iter) { // always create such tables themselves, but upb_symtab has logic for resolving // symbolic references, which is nontrivial. typedef struct { - uint32_t refcount; + upb_refcount refcount; upb_strtable symtab; } upb_symtab; -upb_symtab *upb_symtab_new(void); -void upb_symtab_ref(const upb_symtab *s); -void upb_symtab_unref(const upb_symtab *s); +upb_symtab *upb_symtab_new(const void *owner); +void upb_symtab_ref(const upb_symtab *s, const void *owner); +void upb_symtab_unref(const upb_symtab *s, const void *owner); +void upb_symtab_donateref( + const upb_symtab *s, const void *from, const void *to); // Resolves the given symbol using the rules described in descriptor.proto, // namely: @@ -544,15 +546,15 @@ void upb_symtab_unref(const upb_symtab *s); // If a def is found, the caller owns one ref on the returned def, owned by // owner. Otherwise returns NULL. const upb_def *upb_symtab_resolve(const upb_symtab *s, const char *base, - const char *sym, void *owner); + const char *sym, const void *owner); // Finds an entry in the symbol table with this exact name. If a def is found, // the caller owns one ref on the returned def, owned by owner. Otherwise // returns NULL. const upb_def *upb_symtab_lookup( - const upb_symtab *s, const char *sym, void *owner); + const upb_symtab *s, const char *sym, const void *owner); const upb_msgdef *upb_symtab_lookupmsg( - const upb_symtab *s, const char *sym, void *owner); + const upb_symtab *s, const char *sym, const void *owner); // Gets an array of pointers to all currently active defs in this symtab. The // caller owns the returned array (which is of length *count) as well as a ref @@ -560,7 +562,7 @@ const upb_msgdef *upb_symtab_lookupmsg( // all types are returned, otherwise only defs of the required type are // returned. const upb_def **upb_symtab_getdefs( - const upb_symtab *s, int *n, upb_deftype_t type, void *owner); + const upb_symtab *s, int *n, upb_deftype_t type, const void *owner); // Adds the given defs to the symtab, resolving all symbols (including enum // default values) and finalizing the defs. Only one def per name may be in diff --git a/upb/handlers.c b/upb/handlers.c index ea5a054a33..8350f64033 100644 --- a/upb/handlers.c +++ b/upb/handlers.c @@ -11,7 +11,7 @@ /* upb_mhandlers **************************************************************/ -static upb_mhandlers *upb_mhandlers_new() { +static upb_mhandlers *upb_mhandlers_new(void) { upb_mhandlers *m = malloc(sizeof(*m)); upb_inttable_init(&m->fieldtab); m->startmsg = NULL; diff --git a/upb/handlers.h b/upb/handlers.h index 9083a2ef09..6d8f9f29a0 100644 --- a/upb/handlers.h +++ b/upb/handlers.h @@ -18,7 +18,6 @@ #ifndef UPB_HANDLERS_H #define UPB_HANDLERS_H -#include #include "upb/upb.h" #include "upb/def.h" #include "upb/bytestream.h" diff --git a/upb/refcount.c b/upb/refcount.c index a15547a5d2..d729a2a49b 100644 --- a/upb/refcount.c +++ b/upb/refcount.c @@ -6,7 +6,6 @@ */ #include -#include #include "upb/refcount.h" // TODO(haberman): require client to define these if ref debugging is on. @@ -120,10 +119,55 @@ bool upb_refcount_findscc(upb_refcount **refs, int n, upb_getsuccessors *func) { return true; } +#ifdef UPB_DEBUG_REFS +static void upb_refcount_track(const upb_refcount *r, const void *owner) { + // Caller must not already own a ref. + assert(upb_inttable_lookup(r->refs, (uintptr_t)owner) == NULL); + + // If a ref is leaked we want to blame the leak on the whoever leaked the + // ref, not on who originally allocated the refcounted object. We accomplish + // this as follows. When a ref is taken in DEBUG_REFS mode, we malloc() some + // memory and arrange setup pointers like so: + // + // upb_refcount + // +----------+ +---------+ + // | count |<-+ | + // +----------+ +----------+ + // | table |---X-->| malloc'd | + // +----------+ | memory | + // +----------+ + // + // Since the "malloc'd memory" is allocated inside of "ref" and free'd in + // unref, it will cause a leak if not unref'd. And since the leaked memory + // points to the object itself, the object will be considered "indirectly + // lost" by tools like Valgrind and not shown unless requested (which is good + // because the object's creator may not be responsible for the leak). But we + // have to hide the pointer marked "X" above from Valgrind, otherwise the + // malloc'd memory will appear to be indirectly leaked and the object itself + // will still be considered the primary leak. We hide this pointer from + // Valgrind (et all) by doing a bitwise not on it. + const upb_refcount **target = malloc(sizeof(void*)); + uintptr_t obfuscated = ~(uintptr_t)target; + *target = r; + upb_inttable_insert(r->refs, (uintptr_t)owner, upb_value_uint64(obfuscated)); +} + +static void upb_refcount_untrack(const upb_refcount *r, const void *owner) { + upb_value v; + bool success = upb_inttable_remove(r->refs, (uintptr_t)owner, &v); + assert(success); + if (success) { + // Must un-obfuscate the pointer (see above). + free((void*)(~upb_value_getuint64(v))); + } +} +#endif + /* upb_refcount **************************************************************/ -bool upb_refcount_init(upb_refcount *r, void *owner) { +bool upb_refcount_init(upb_refcount *r, const void *owner) { + (void)owner; r->count = malloc(sizeof(uint32_t)); if (!r->count) return false; // Initializing this here means upb_refcount_findscc() can only run once for @@ -132,7 +176,8 @@ bool upb_refcount_init(upb_refcount *r, void *owner) { r->next = r; #ifdef UPB_DEBUG_REFS // We don't detect malloc() failures for UPB_DEBUG_REFS. - upb_inttable_init(&r->refs); + r->refs = malloc(sizeof(*r->refs)); + upb_inttable_init(r->refs); *r->count = 0; upb_refcount_ref(r, owner); #else @@ -144,81 +189,48 @@ bool upb_refcount_init(upb_refcount *r, void *owner) { void upb_refcount_uninit(upb_refcount *r) { (void)r; #ifdef UPB_DEBUG_REFS - assert(upb_inttable_count(&r->refs) == 0); - upb_inttable_uninit(&r->refs); -#endif -} - -// Moves an existing ref from ref_donor to new_owner, without changing the -// overall ref count. -void upb_refcount_donateref(upb_refcount *r, void *from, void *to) { - (void)r; (void)from; (void)to; - assert(from != to); -#ifdef UPB_DEBUG_REFS - upb_refcount_ref(r, to); - upb_refcount_unref(r, from); + assert(upb_inttable_count(r->refs) == 0); + upb_inttable_uninit(r->refs); + free(r->refs); #endif } // Thread-safe operations ////////////////////////////////////////////////////// -// Ref and unref are thread-safe. -void upb_refcount_ref(upb_refcount *r, void *owner) { +void upb_refcount_ref(const upb_refcount *r, const void *owner) { (void)owner; upb_atomic_inc(r->count); #ifdef UPB_DEBUG_REFS UPB_LOCK; - // Caller must not already own a ref. - assert(upb_inttable_lookup(&r->refs, (uintptr_t)owner) == NULL); - - // If a ref is leaked we want to blame the leak on the whoever leaked the - // ref, not on who originally allocated the refcounted object. We accomplish - // this as follows. When a ref is taken in DEBUG_REFS mode, we malloc() some - // memory and arrange setup pointers like so: - // - // upb_refcount - // +----------+ +---------+ - // | count |<-+ | - // +----------+ +----------+ - // | table |---X-->| malloc'd | - // +----------+ | memory | - // +----------+ - // - // Since the "malloc'd memory" is allocated inside of "ref" and free'd in - // unref, it will cause a leak if not unref'd. And since the leaked memory - // points to the object itself, the object will be considered "indirectly - // lost" by tools like Valgrind and not shown unless requested (which is good - // because the object's creator may not be responsible for the leak). But we - // have to hide the pointer marked "X" above from Valgrind, otherwise the - // malloc'd memory will appear to be indirectly leaked and the object itself - // will still be considered the primary leak. We hide this pointer from - // Valgrind (et all) by doing a bitwise not on it. - upb_refcount **target = malloc(sizeof(void*)); - uintptr_t obfuscated = ~(uintptr_t)target; - *target = r; - upb_inttable_insert(&r->refs, (uintptr_t)owner, upb_value_uint64(obfuscated)); + upb_refcount_track(r, owner); UPB_UNLOCK; #endif } -bool upb_refcount_unref(upb_refcount *r, void *owner) { +bool upb_refcount_unref(const upb_refcount *r, const void *owner) { (void)owner; bool ret = upb_atomic_dec(r->count); #ifdef UPB_DEBUG_REFS UPB_LOCK; - upb_value v; - bool success = upb_inttable_remove(&r->refs, (uintptr_t)owner, &v); - assert(success); - if (success) { - // Must un-obfuscate the pointer (see above). - free((void*)(~upb_value_getuint64(v))); - } + upb_refcount_untrack(r, owner); UPB_UNLOCK; #endif if (ret) free(r->count); return ret; } +void upb_refcount_donateref( + const upb_refcount *r, const void *from, const void *to) { + (void)r; (void)from; (void)to; + assert(from != to); +#ifdef UPB_DEBUG_REFS + UPB_LOCK; + upb_refcount_track(r, to); + upb_refcount_untrack(r, from); + UPB_UNLOCK; +#endif +} + bool upb_refcount_merged(const upb_refcount *r, const upb_refcount *r2) { return r->count == r2->count; } diff --git a/upb/refcount.h b/upb/refcount.h index cb2bda9aa5..91ad3b82d8 100644 --- a/upb/refcount.h +++ b/upb/refcount.h @@ -28,7 +28,9 @@ typedef struct _upb_refcount { uint16_t index; // For SCC algorithm. uint16_t lowlink; // For SCC algorithm. #ifdef UPB_DEBUG_REFS - upb_inttable refs; + // Make this a pointer so that we can modify it inside of const methods + // without ugly casts. + upb_inttable *refs; #endif } upb_refcount; @@ -36,15 +38,11 @@ typedef struct _upb_refcount { // Initializes the refcount with a single ref for the given owner. Returns // NULL if memory could not be allocated. -bool upb_refcount_init(upb_refcount *r, void *owner); +bool upb_refcount_init(upb_refcount *r, const void *owner); // Uninitializes the refcount. May only be called after unref() returns true. void upb_refcount_uninit(upb_refcount *r); -// Moves an existing ref from ref_donor to new_owner, without changing the -// overall ref count. -void upb_refcount_donateref(upb_refcount *r, void *from, void *to); - // Finds strongly-connected components among some set of objects and merges all // refcounts that share a SCC. The given function will be called when the // algorithm needs to visit children of a particular object; the function @@ -59,10 +57,15 @@ void upb_refcount_visit(upb_refcount *obj, upb_refcount *subobj, void *closure); // Increases the ref count, the new ref is owned by "owner" which must not // already own a ref. Circular reference chains are not allowed. -void upb_refcount_ref(upb_refcount *r, void *owner); +void upb_refcount_ref(const upb_refcount *r, const void *owner); // Release a ref owned by owner, returns true if that was the last ref. -bool upb_refcount_unref(upb_refcount *r, void *owner); +bool upb_refcount_unref(const upb_refcount *r, const void *owner); + +// Moves an existing ref from ref_donor to new_owner, without changing the +// overall ref count. +void upb_refcount_donateref( + const upb_refcount *r, const void *from, const void *to); // Returns true if these two objects share a refcount. bool upb_refcount_merged(const upb_refcount *r, const upb_refcount *r2); diff --git a/upb/stdc/README b/upb/stdc/README new file mode 100644 index 0000000000..1815af49c9 --- /dev/null +++ b/upb/stdc/README @@ -0,0 +1,15 @@ +This directory contains code that is ANSI C but uses parts of the +standard library that are not available to very limited environments +like Linux Kernel modules. The standard calls environments like this +"freestanding implementations." + +This does *not* imply that the upb core can be compiled directly on a +freestanding implementation. Even the core uses library functions +that are not directly available on freestanding implementations +(notably malloc()/free(), vsnprintf(), and assert()). So compiling on +freestanding implementations may require implementing compatibility +versions of functions like malloc(). + +Also, Linux is not technically a freestanding implementation either, +since it does not accept functions that return float or double on +x86-64 (these use SSE registers which are disabled in kernel mode). diff --git a/upb/stdc/error.c b/upb/stdc/error.c new file mode 100644 index 0000000000..313866c645 --- /dev/null +++ b/upb/stdc/error.c @@ -0,0 +1,44 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2009 Google Inc. See LICENSE for details. + * Author: Josh Haberman + * + * Handling of errno. + */ + +#include "upb/stdc/error.h" + +#include +#include + +void upb_status_fromerrno(upb_status *status, int code) { + if (code != 0 && !upb_errno_is_wouldblock(code)) { + status->error = true; + upb_status_setcode(status, &upb_stdc_errorspace, code); + } +} + +bool upb_errno_is_wouldblock(int code) { + return +#ifdef EAGAIN + code == EAGAIN || +#endif +#ifdef EWOULDBLOCK + code == EWOULDBLOCK || +#endif + false; +} + +bool upb_stdc_codetostr(int code, char *buf, size_t len) { + // strerror() may use static buffers and is not guaranteed to be thread-safe, + // but it appears that it is not subject to buffer overflows in practice, and + // it used by other portable and high-quality software like Lua. For more + // discussion see: http://thread.gmane.org/gmane.comp.lang.lua.general/89506 + char *err = strerror(code); + if (strlen(err) >= len) return false; + strcpy(buf, err); + return true; +} + +upb_errorspace upb_stdc_errorspace = {"stdc", &upb_stdc_codetostr}; diff --git a/upb/stdc/error.h b/upb/stdc/error.h new file mode 100644 index 0000000000..98020970ad --- /dev/null +++ b/upb/stdc/error.h @@ -0,0 +1,27 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2012 Google Inc. See LICENSE for details. + * Author: Josh Haberman + * + * Handling of errno. + */ + +#include "upb/upb.h" + +#ifndef UPB_STDC_ERROR_H_ +#define UPB_STDC_ERROR_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +extern upb_errorspace upb_stdc_errorspace; +void upb_status_fromerrno(upb_status *status, int code); +bool upb_errno_is_wouldblock(int code); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* UPB_STDC_ERROR_H_ */ diff --git a/upb/stdc/io.c b/upb/stdc/io.c new file mode 100644 index 0000000000..1abed32d8e --- /dev/null +++ b/upb/stdc/io.c @@ -0,0 +1,175 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2012 Google Inc. See LICENSE for details. + * Author: Josh Haberman + */ + +#include "upb/stdc/io.h" + +#include "upb/stdc/error.h" + +// We can make this configurable if necessary. +#define BUF_SIZE 32768 + +/* upb_stdio ******************************************************************/ + +int upb_stdio_cmpbuf(const void *_key, const void *_elem) { + const uint64_t *ofs = _key; + const upb_stdio_buf *buf = _elem; + return (*ofs / BUF_SIZE) - (buf->ofs / BUF_SIZE); +} + +static upb_stdio_buf *upb_stdio_findbuf(const upb_stdio *s, uint64_t ofs) { + // TODO: it is probably faster to linear search short lists, and to + // special-case the last one or two bufs. + return bsearch(&ofs, s->bufs, s->nbuf, sizeof(*s->bufs), &upb_stdio_cmpbuf); +} + +static upb_stdio_buf *upb_stdio_rotatebufs(upb_stdio *s) { + upb_stdio_buf **reuse = NULL; // XXX + int num_reused = 0, num_inuse = 0; + + // Could sweep only a subset of bufs if this was a hotspot. + for (int i = 0; i < s->nbuf; i++) { + upb_stdio_buf *buf = s->bufs[i]; + if (buf->refcount > 0) { + s->bufs[num_inuse++] = buf; + } else { + reuse[num_reused++] = buf; + } + } + assert(num_reused + num_inuse == s->nbuf); + memcpy(s->bufs + num_inuse, reuse, num_reused * sizeof(upb_stdio_buf*)); + if (num_reused == 0) { + ++s->nbuf; + s->bufs = realloc(s->bufs, s->nbuf * sizeof(*s->bufs)); + s->bufs[s->nbuf-1] = malloc(sizeof(upb_stdio_buf) + BUF_SIZE); + return s->bufs[s->nbuf-1]; + } + return s->bufs[s->nbuf-num_reused]; +} + +void upb_stdio_discard(void *src, uint64_t ofs) { + (void)src; + (void)ofs; +} + +upb_bytesuccess_t upb_stdio_fetch(void *src, uint64_t ofs, size_t *bytes_read) { + (void)ofs; + upb_stdio *stdio = (upb_stdio*)src; + upb_stdio_buf *buf = upb_stdio_rotatebufs(stdio); +retry: + *bytes_read = fread(&buf->data, 1, BUF_SIZE, stdio->file); + buf->len = *bytes_read; + if (*bytes_read < (size_t)BUF_SIZE) { + // Error or EOF. + if (feof(stdio->file)) { + upb_status_seteof(&stdio->src.status); + return UPB_BYTE_EOF; + } + if (ferror(stdio->file)) { +#ifdef EINTR + // If we encounter a client who doesn't want to retry EINTR, we can easily + // add a boolean property of the stdio that controls this behavior. + if (errno == EINTR) { + clearerr(stdio->file); + goto retry; + } +#endif + upb_status_fromerrno(&stdio->src.status, errno); + return upb_errno_is_wouldblock(errno) ? + UPB_BYTE_WOULDBLOCK : UPB_BYTE_ERROR; + } + assert(false); + } + return UPB_BYTE_OK; +} + +void upb_stdio_copy(const void *src, uint64_t ofs, size_t len, char *dst) { + upb_stdio_buf *buf = upb_stdio_findbuf(src, ofs); + ofs -= buf->ofs; + memcpy(dst, buf->data + ofs, BUF_SIZE - ofs); + len -= (BUF_SIZE - ofs); + dst += (BUF_SIZE - ofs); + while (len > 0) { + ++buf; + size_t bytes = UPB_MIN(len, BUF_SIZE); + memcpy(dst, buf->data, bytes); + len -= bytes; + dst += bytes; + } +} + +const char *upb_stdio_getptr(const void *src, uint64_t ofs, size_t *len) { + upb_stdio_buf *buf = upb_stdio_findbuf(src, ofs); + ofs -= buf->ofs; + *len = BUF_SIZE - ofs; + return &buf->data[ofs]; +} + +#if 0 +upb_strlen_t upb_stdio_putstr(upb_bytesink *sink, upb_string *str, upb_status *status) { + upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, sink)); + upb_strlen_t len = upb_string_len(str); + upb_strlen_t written = fwrite(upb_string_getrobuf(str), 1, len, stdio->file); + if (written < len) { + upb_status_setf(status, UPB_ERROR, "Error writing to stdio stream."); + return -1; + } + return written; +} + +uint32_t upb_stdio_vprintf(upb_bytesink *sink, upb_status *status, + const char *fmt, va_list args) { + upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, sink)); + int written = vfprintf(stdio->file, fmt, args); + if (written < 0) { + upb_status_seterrf(status, "Error writing to stdio stream."); + return -1; + } + return written; +} +#endif + +void upb_stdio_init(upb_stdio *stdio) { + static upb_bytesrc_vtbl bytesrc_vtbl = { + &upb_stdio_fetch, + &upb_stdio_discard, + &upb_stdio_copy, + &upb_stdio_getptr, + }; + upb_bytesrc_init(&stdio->src, &bytesrc_vtbl); + + //static upb_bytesink_vtbl bytesink_vtbl = { + // upb_stdio_putstr, + // upb_stdio_vprintf + //}; + //upb_bytesink_init(&stdio->bytesink, &bytesink_vtbl); +} + +void upb_stdio_reset(upb_stdio* stdio, FILE *file) { + stdio->file = file; + stdio->should_close = false; +} + +void upb_stdio_open(upb_stdio *stdio, const char *filename, const char *mode, + upb_status *s) { + FILE *f = fopen(filename, mode); + if (!f) { + upb_status_fromerrno(s, errno); + return; + } + setvbuf(stdio->file, NULL, _IONBF, 0); // Disable buffering; we do our own. + upb_stdio_reset(stdio, f); + stdio->should_close = true; +} + +void upb_stdio_uninit(upb_stdio *stdio) { + // Can't report status; caller should flush() to ensure data is written. + if (stdio->should_close) fclose(stdio->file); + stdio->file = NULL; +} + +upb_bytesrc* upb_stdio_bytesrc(upb_stdio *stdio) { return &stdio->src; } +upb_bytesink* upb_stdio_bytesink(upb_stdio *stdio) { return &stdio->sink; } diff --git a/upb/stdc/io.h b/upb/stdc/io.h new file mode 100644 index 0000000000..fd19befa0a --- /dev/null +++ b/upb/stdc/io.h @@ -0,0 +1,73 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2012 Google Inc. See LICENSE for details. + * Author: Josh Haberman + * + * ANSI C file I/O. + */ + +#ifndef UPB_STDC_IO_H_ +#define UPB_STDC_IO_H_ + +#include +#include "upb/bytestream.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* upb_stdio ******************************************************************/ + +// bytesrc/bytesink for ANSI C stdio, which is less efficient than posixfd, but +// more portable. +// +// Specifically, stdio functions acquire locks on every operation (unless you +// use the f{read,write,...}_unlocked variants, which are not standard) and +// performs redundant buffering (unless you disable it with setvbuf(), but we +// can only do this on newly-opened filehandles). + +typedef struct { + uint64_t ofs; + size_t len; + uint32_t refcount; + char data[]; +} upb_stdio_buf; + +// We use a single object for both bytesrc and bytesink for simplicity. +// The object is still not thread-safe, and may only be used by one reader +// and one writer at a time. +typedef struct { + upb_bytesrc src; + upb_bytesink sink; + FILE *file; + bool should_close; + upb_stdio_buf **bufs; + int nbuf; + uint32_t szbuf; +} upb_stdio; + +void upb_stdio_init(upb_stdio *stdio); +// Caller should call upb_stdio_flush prior to calling this to ensure that +// all data is flushed, otherwise data can be silently dropped if an error +// occurs flushing the remaining buffers. +void upb_stdio_uninit(upb_stdio *stdio); + +// Resets the object to read/write to the given "file." The caller is +// responsible for closing the file, which must outlive this object. +void upb_stdio_reset(upb_stdio *stdio, FILE *file); + +// As an alternative to upb_stdio_reset(), initializes the object by opening a +// file, and will handle closing it. This may result in more efficient I/O +// than the previous since we can call setvbuf() to disable buffering. +void upb_stdio_open(upb_stdio *stdio, const char *filename, const char *mode, + upb_status *s); + +upb_bytesrc *upb_stdio_bytesrc(upb_stdio *stdio); +upb_bytesink *upb_stdio_bytesink(upb_stdio *stdio); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* UPB_STDC_IO_H_ */ diff --git a/upb/table.c b/upb/table.c index 4e3544eea6..1cf944a337 100644 --- a/upb/table.c +++ b/upb/table.c @@ -86,6 +86,7 @@ static upb_value *upb_table_lookup(const upb_table *t, upb_tabkey key, // The given key must not already exist in the table. static void upb_table_insert(upb_table *t, upb_tabkey key, upb_value val, upb_hashfunc_t *hash, upb_eqlfunc_t *eql) { + (void)eql; assert(upb_table_lookup(t, key, hash, eql) == NULL); t->count++; upb_tabent *mainpos_e = hash(t, key); diff --git a/upb/upb.c b/upb/upb.c index c172bd3c80..5a00961a7d 100644 --- a/upb/upb.c +++ b/upb/upb.c @@ -91,40 +91,6 @@ void upb_status_setcode(upb_status *status, upb_errorspace *space, int code) { status->str = NULL; } -void upb_status_fromerrno(upb_status *status) { - if (errno != 0 && !upb_errno_is_wouldblock()) { - status->error = true; - upb_status_setcode(status, &upb_posix_errorspace, errno); - } -} - -bool upb_errno_is_wouldblock() { - return -#ifdef EAGAIN - errno == EAGAIN || -#endif -#ifdef EWOULDBLOCK - errno == EWOULDBLOCK || -#endif - false; -} - -bool upb_posix_codetostr(int code, char *buf, size_t len) { - if (strerror_r(code, buf, len) == -1) { - if (errno == EINVAL) { - size_t actual_len = - snprintf(buf, len, "Invalid POSIX error number %d\n", code); - return actual_len >= len; - } else if (errno == ERANGE) { - return false; - } - assert(false); - } - return true; -} - -upb_errorspace upb_posix_errorspace = {"POSIX", &upb_posix_codetostr}; - int upb_vrprintf(char **buf, size_t *size, size_t ofs, const char *fmt, va_list args) { // Try once without reallocating. We have to va_copy because we might have diff --git a/upb/upb.h b/upb/upb.h index ef440fb733..245d86fda2 100644 --- a/upb/upb.h +++ b/upb/upb.h @@ -31,6 +31,10 @@ extern "C" { #define UPB_NORETURN #endif +#ifndef UINT16_MAX +#define UINT16_MAX 65535 +#endif + #define UPB_MAX(x, y) ((x) > (y) ? (x) : (y)) #define UPB_MIN(x, y) ((x) < (y) ? (x) : (y)) @@ -135,11 +139,21 @@ typedef struct { // // Construct a new upb_value from an int32. // upb_value upb_value_int32(int32_t val); -#define UPB_VALUE_ACCESSORS(name, membername, ctype, proto_type) \ - INLINE ctype upb_value_get ## name(upb_value val) { \ - assert(val.type == proto_type); \ - return val.val.membername; \ +#define WRITERS(name, membername, ctype, proto_type) \ + INLINE void upb_value_set ## name(upb_value *val, ctype cval) { \ + val->val.uint64 = 0; \ + SET_TYPE(val->type, proto_type); \ + val->val.membername = cval; \ } \ + INLINE upb_value upb_value_ ## name(ctype val) { \ + upb_value ret; \ + upb_value_set ## name(&ret, val); \ + return ret; \ + } + +#define ALL(name, membername, ctype, proto_type) \ + /* Can't reuse WRITERS() here unfortunately because "bool" is a macro \ + * that expands to _Bool, so it ends up defining eg. upb_value_set_Bool */ \ INLINE void upb_value_set ## name(upb_value *val, ctype cval) { \ val->val.uint64 = 0; \ SET_TYPE(val->type, proto_type); \ @@ -149,25 +163,39 @@ typedef struct { upb_value ret; \ upb_value_set ## name(&ret, val); \ return ret; \ + } \ + INLINE ctype upb_value_get ## name(upb_value val) { \ + assert(val.type == proto_type); \ + return val.val.membername; \ } -UPB_VALUE_ACCESSORS(int32, int32, int32_t, UPB_CTYPE_INT32); -UPB_VALUE_ACCESSORS(int64, int64, int64_t, UPB_CTYPE_INT64); -UPB_VALUE_ACCESSORS(uint32, uint32, uint32_t, UPB_CTYPE_UINT32); -UPB_VALUE_ACCESSORS(uint64, uint64, uint64_t, UPB_CTYPE_UINT64); -UPB_VALUE_ACCESSORS(double, _double, double, UPB_CTYPE_DOUBLE); -UPB_VALUE_ACCESSORS(float, _float, float, UPB_CTYPE_FLOAT); -UPB_VALUE_ACCESSORS(bool, _bool, bool, UPB_CTYPE_BOOL); -UPB_VALUE_ACCESSORS(ptr, _void, void*, UPB_CTYPE_PTR); -UPB_VALUE_ACCESSORS(byteregion, byteregion, struct _upb_byteregion*, - UPB_CTYPE_BYTEREGION); +ALL(int32, int32, int32_t, UPB_CTYPE_INT32); +ALL(int64, int64, int64_t, UPB_CTYPE_INT64); +ALL(uint32, uint32, uint32_t, UPB_CTYPE_UINT32); +ALL(uint64, uint64, uint64_t, UPB_CTYPE_UINT64); +ALL(bool, _bool, bool, UPB_CTYPE_BOOL); +ALL(ptr, _void, void*, UPB_CTYPE_PTR); +ALL(byteregion, byteregion, struct _upb_byteregion*, UPB_CTYPE_BYTEREGION); // upb_fielddef should never be modified from a callback // (ie. when they're getting passed through a upb_value). -UPB_VALUE_ACCESSORS(fielddef, fielddef, const struct _upb_fielddef*, - UPB_CTYPE_FIELDDEF); +ALL(fielddef, fielddef, const struct _upb_fielddef*, UPB_CTYPE_FIELDDEF); + +#ifdef __KERNEL__ +// Linux kernel modules are compiled without SSE and therefore are incapable +// of compiling functions that return floating-point values, so we define as +// macros instead and lose the type check. +WRITERS(double, _double, double, UPB_CTYPE_DOUBLE); +WRITERS(float, _float, float, UPB_CTYPE_FLOAT); +#define upb_value_getdouble(v) (v.val._double) +#define upb_value_getfloat(v) (v.val._float) +#else +ALL(double, _double, double, UPB_CTYPE_DOUBLE); +ALL(float, _float, float, UPB_CTYPE_FLOAT); +#endif /* __KERNEL__ */ -#undef UPB_VALUE_ACCESSORS +#undef WRITERS +#undef ALL extern upb_value UPB_NO_VALUE; @@ -218,10 +246,6 @@ INLINE void upb_status_seteof(upb_status *s) { s->eof = true; } const char *upb_status_getstr(const upb_status *s); void upb_status_copy(upb_status *to, const upb_status *from); -extern upb_errorspace upb_posix_errorspace; -void upb_status_fromerrno(upb_status *status); -bool upb_errno_is_wouldblock(); - // Like vasprintf (which allocates a string large enough for the result), but // uses *buf (which can be NULL) as a starting point and reallocates it only if // the new value will not fit. "size" is updated to reflect the allocated size