diff --git a/Makefile b/Makefile index 06bf064306..c0b9d90d42 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,7 @@ all: lib USER_CFLAGS=$(strip $(shell test -f perf-cppflags && cat perf-cppflags)) # If the user doesn't specify an -O setting, we default to -O3, except -# for upb_def which gets -Os. +# for def which gets -Os. ifeq (, $(findstring -O, $(USER_CFLAGS))) USER_CFLAGS += -O3 DEF_OPT = -Os @@ -39,9 +39,9 @@ endif CC=gcc CXX=g++ CFLAGS=-std=gnu99 -INCLUDE=-Isrc -Itests -I. +INCLUDE=-Itests -I. CPPFLAGS=$(INCLUDE) -Wall -Wextra $(USER_CFLAGS) -LDLIBS=-lpthread src/libupb.a +LDLIBS=-lpthread upb/libupb.a # Build with "make Q=" to see all commands that are being executed. Q=@ @@ -60,7 +60,7 @@ endif # different -D options, which can include different header files. deps: gen-deps.sh Makefile $(CORE) $(STREAM) $(Q) CPPFLAGS="$(CPPFLAGS)" ./gen-deps.sh $(CORE) $(STREAM) - $(E) Regenerating dependencies for src/... + $(E) Regenerating dependencies for upb/... $(ALLSRC): perf-cppflags @@ -71,25 +71,23 @@ $(ALLSRC): perf-cppflags # The core library. CORE= \ - src/upb.c \ - src/upb_handlers.c \ - src/upb_descriptor.c \ - src/upb_table.c \ - src/upb_def.c \ - src/upb_msg.c \ - src/upb_varint.c \ - - -# Common encoders/decoders -- you're almost certain to want these. -STREAM= \ - src/upb_decoder.c \ - src/upb_stdio.c \ - src/upb_textprinter.c \ - src/upb_strstream.c \ - src/upb_glue.c \ + upb/upb.c \ + upb/handlers.c \ + upb/descriptor.c \ + upb/table.c \ + upb/def.c \ + upb/msg.c \ + upb/bytestream.c \ + +# Library for the protocol buffer format (both text and binary). +PB= \ + upb/pb/decoder.c \ + upb/pb/varint.c \ + upb/pb/glue.c \ + upb/pb/textprinter.c \ # Parts of core that are yet to be converted. -OTHERSRC=src/upb_encoder.c +OTHERSRC=upb/pb/encoder.c BENCHMARKS_SRC= \ benchmarks/main.c \ @@ -114,13 +112,13 @@ ALLSRC=$(CORE) $(STREAM) $(BENCHMARKS_SRC) $(TESTS_SRC) clean_leave_profile: rm -rf $(LIBUPB) $(LIBUPB_PIC) rm -rf $(call rwildcard,,*.o) $(call rwildcard,,*.lo) $(call rwildcard,,*.dSYM) - rm -rf src/upb_decoder_x86.h + rm -rf upb/pb/decoder_x86.h rm -rf benchmark/google_messages.proto.pb benchmark/google_messages.pb.* benchmarks/b.* benchmarks/*.pb* - rm -rf src/jit_debug_elf_file.o - rm -rf src/jit_debug_elf_file.h + rm -rf upb/pb/jit_debug_elf_file.o + rm -rf upb/pb/jit_debug_elf_file.h rm -rf $(TESTS) tests/t.* - rm -rf src/descriptor.pb - rm -rf src/upbc deps + rm -rf upb/descriptor.pb + rm -rf tools/upbc deps rm -rf lang_ext/lua/upb.so cd lang_ext/python && python setup.py clean --all @@ -128,9 +126,9 @@ clean: clean_leave_profile rm -rf $(call rwildcard,,*.gcno) $(call rwildcard,,*.gcda) # Core library (libupb.a). -SRC=$(CORE) $(STREAM) -LIBUPB=src/libupb.a -LIBUPB_PIC=src/libupb_pic.a +SRC=$(CORE) $(PB) +LIBUPB=upb/libupb.a +LIBUPB_PIC=upb/libupb_pic.a lib: $(LIBUPB) @@ -138,7 +136,7 @@ OBJ=$(patsubst %.c,%.o,$(SRC)) PICOBJ=$(patsubst %.c,%.lo,$(SRC)) ifneq (, $(findstring DUPB_USE_JIT_X64, $(USER_CFLAGS))) -src/upb_decoder.o: src/upb_decoder_x86.h +upb/pb/decoder.o: upb/pb/decoder_x86.h ifeq (, $(findstring DNDEBUG, $(USER_CFLAGS))) $(error "JIT only works with -DNDEBUG enabled!") endif @@ -158,29 +156,29 @@ $(LIBUPB_PIC): $(PICOBJ) $(E) 'CC -fPIC' $< $(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -c -o $@ $< -fPIC -# Override the optimization level for upb_def.o, because it is not in the +# Override the optimization level for def.o, because it is not in the # critical path but gets very large when -O3 is used. -src/upb_def.o: src/upb_def.c +upb/def.o: upb/def.c $(E) CC $< $(Q) $(CC) $(CFLAGS) $(CPPFLAGS) $(DEF_OPT) -c -o $@ $< -src/upb_def.lo: src/upb_def.c +upb/def.lo: upb/def.c $(E) 'CC -fPIC' $< $(Q) $(CC) $(CFLAGS) $(CPPFLAGS) $(DEF_OPT) -c -o $@ $< -fPIC -src/upb_decoder_x86.h: src/upb_decoder_x86.dasc +upb/pb/decoder_x86.h: upb/pb/decoder_x86.dasc $(E) DYNASM $< - $(Q) lua dynasm/dynasm.lua src/upb_decoder_x86.dasc > src/upb_decoder_x86.h + $(Q) lua dynasm/dynasm.lua upb/pb/decoder_x86.dasc > upb/pb/decoder_x86.h ifneq ($(shell uname), Darwin) -src/jit_debug_elf_file.o: src/jit_debug_elf_file.s +upb/pb/jit_debug_elf_file.o: upb/pb/jit_debug_elf_file.s $(E) GAS $< - $(Q) gcc -c src/jit_debug_elf_file.s -o src/jit_debug_elf_file.o + $(Q) gcc -c upb/pb/jit_debug_elf_file.s -o upb/pb/jit_debug_elf_file.o -src/jit_debug_elf_file.h: src/jit_debug_elf_file.o +upb/pb/jit_debug_elf_file.h: upb/pb/jit_debug_elf_file.o $(E) XXD $< - $(Q) xxd -i src/jit_debug_elf_file.o > src/jit_debug_elf_file.h -src/upb_decoder_x86.h: src/jit_debug_elf_file.h + $(Q) xxd -i upb/pb/jit_debug_elf_file.o > upb/pb/jit_debug_elf_file.h +upb/pb/decoder_x86.h: upb/pb/jit_debug_elf_file.h endif # Function to expand a wildcard pattern recursively. @@ -188,16 +186,16 @@ rwildcard=$(strip $(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2)$(filter $ -# Regenerating the auto-generated files in src/. -src/descriptor.pb: src/descriptor.proto +# Regenerating the auto-generated files in upb/. +upb/descriptor.pb: upb/descriptor.proto @# TODO: replace with upbc - protoc src/descriptor.proto -osrc/descriptor.pb + protoc upb/descriptor.proto -oupb/descriptor.pb -descriptorgen: src/descriptor.pb src/upbc +descriptorgen: upb/descriptor.pb tools/upbc @# Regenerate descriptor_const.h - ./src/upbc -o src/descriptor src/descriptor.pb + ./tools/upbc -o upb/descriptor upb/descriptor.pb -src/upbc: src/upbc.c $(LIBUPB) +tools/upbc: tools/upbc.c $(LIBUPB) # Language extensions. python: $(LIBUPB_PIC) @@ -275,7 +273,7 @@ tests/test_table: tests/test_table.cc $(E) CXX $< $(Q) $(CXX) $(CXXFLAGS) $(CPPFLAGS) -Wno-deprecated -o $@ $< $(LIBUPB) -tests/tests: src/libupb.a +tests/tests: upb/libupb.a # Benchmarks. ################################################################## @@ -398,4 +396,4 @@ LUAEXT=lang_ext/lua/upb.so lua: $(LUAEXT) lang_ext/lua/upb.so: lang_ext/lua/upb.c $(LIBUPB_PIC) @echo CC lang_ext/lua/upb.c - @$(CC) $(CFLAGS) $(CPPFLAGS) $(LUA_CPPFLAGS) -fpic -shared -o $@ $< src/libupb_pic.a $(LUA_LDFLAGS) + @$(CC) $(CFLAGS) $(CPPFLAGS) $(LUA_CPPFLAGS) -fpic -shared -o $@ $< upb/libupb_pic.a $(LUA_LDFLAGS) diff --git a/benchmarks/parsestream.upb_table.c b/benchmarks/parsestream.upb_table.c index a321010ed2..a4022b156b 100644 --- a/benchmarks/parsestream.upb_table.c +++ b/benchmarks/parsestream.upb_table.c @@ -2,10 +2,10 @@ #include "main.c" #include -#include "upb_def.h" -#include "upb_decoder.h" -#include "upb_strstream.h" -#include "upb_glue.h" +#include "upb/bytestream.h" +#include "upb/def.h" +#include "upb/pb/decoder.h" +#include "upb/pb/glue.h" static char *input_str; static size_t input_len; diff --git a/benchmarks/parsetostruct.upb_table.c b/benchmarks/parsetostruct.upb_table.c index 76cbf2135b..88f355d3a1 100644 --- a/benchmarks/parsetostruct.upb_table.c +++ b/benchmarks/parsetostruct.upb_table.c @@ -1,11 +1,11 @@ #include "main.c" -#include "upb_def.h" -#include "upb_decoder.h" -#include "upb_strstream.h" -#include "upb_glue.h" -#include "upb_msg.h" +#include "upb/bytestream.h" +#include "upb/def.h" +#include "upb/msg.h" +#include "upb/pb/decoder.h" +#include "upb/pb/glue.h" static upb_msgdef *def; static size_t len; diff --git a/src/upb_stdio.h b/src/upb_stdio.h deleted file mode 100644 index 858830cadb..0000000000 --- a/src/upb_stdio.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2010 Google Inc. See LICENSE for details. - * Author: Josh Haberman - * - * This file provides upb_bytesrc and upb_bytesink implementations for - * ANSI C stdio, which is less efficient than posixfd, but more portable. - * - * Specifically, stdio functions acquire locks on every operation (unless you - * use the f{read,write,...}_unlocked variants, which are not standard) and - * performs redundant buffering (unless you disable it with setvbuf(), but we - * can only do this on newly-opened filehandles). - */ - -#include -#include "upb_bytestream.h" - -#ifndef UPB_STDIO_H_ -#define UPB_STDIO_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct { - uint64_t ofs; - uint32_t refcount; - char data[]; -} upb_stdio_buf; - -// We use a single object for both bytesrc and bytesink for simplicity. -// The object is still not thread-safe, and may only be used by one reader -// and one writer at a time. -typedef struct { - upb_bytesrc src; - upb_bytesink sink; - FILE *file; - bool should_close; - upb_stdio_buf **bufs; - uint32_t nbuf, szbuf; -} upb_stdio; - -void upb_stdio_init(upb_stdio *stdio); -// Caller should call upb_stdio_flush prior to calling this to ensure that -// all data is flushed, otherwise data can be silently dropped if an error -// occurs flushing the remaining buffers. -void upb_stdio_uninit(upb_stdio *stdio); - -// Resets the object to read/write to the given "file." The caller is -// responsible for closing the file, which must outlive this object. -void upb_stdio_reset(upb_stdio *stdio, FILE *file); - -// As an alternative to upb_stdio_reset(), initializes the object by opening a -// file, and will handle closing it. This may result in more efficient I/O -// than the previous since we can call setvbuf() to disable buffering. -void upb_stdio_open(upb_stdio *stdio, const char *filename, const char *mode, - upb_status *s); - -// Must be called to cleanup after the object, including closing the file if -// it was opened with upb_stdio_open() (which can fail, hence the status). -// - -upb_bytesrc *upb_stdio_bytesrc(upb_stdio *stdio); -upb_bytesink *upb_stdio_bytesink(upb_stdio *stdio); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif diff --git a/src/upb_strstream.c b/src/upb_strstream.c deleted file mode 100644 index 9e17d75b1b..0000000000 --- a/src/upb_strstream.c +++ /dev/null @@ -1,106 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2010 Google Inc. See LICENSE for details. - * Author: Josh Haberman - */ - -#include "upb_strstream.h" - -#include - - -/* upb_stringsrc **************************************************************/ - -size_t upb_stringsrc_fetch(void *_src, uint64_t ofs, upb_status *s) { - upb_stringsrc *src = _src; - size_t bytes = src->len - ofs; - if (bytes == 0) s->code = UPB_EOF; - return bytes; -} - -void upb_stringsrc_read(void *_src, uint64_t src_ofs, size_t len, char *dst) { - upb_stringsrc *src = _src; - memcpy(dst, src->str + src_ofs, len); -} - -const char *upb_stringsrc_getptr(void *_src, uint64_t ofs, size_t *len) { - upb_stringsrc *src = _src; - *len = src->len - ofs; - return src->str + ofs; -} - -void upb_stringsrc_init(upb_stringsrc *s) { - static upb_bytesrc_vtbl vtbl = { - &upb_stringsrc_fetch, - &upb_stringsrc_read, - &upb_stringsrc_getptr, - NULL, NULL, NULL, NULL - }; - upb_bytesrc_init(&s->bytesrc, &vtbl); - s->str = NULL; -} - -void upb_stringsrc_reset(upb_stringsrc *s, const char *str, size_t len) { - s->str = str; - s->len = len; -} - -void upb_stringsrc_uninit(upb_stringsrc *s) { (void)s; } - -upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s) { - return &s->bytesrc; -} - - -/* upb_stringsink *************************************************************/ - -void upb_stringsink_uninit(upb_stringsink *s) { - free(s->str); -} - -// Resets the stringsink to a state where it will append to the given string. -// The string must be newly created or recycled. The stringsink will take a -// reference on the string, so the caller need not ensure that it outlives the -// stringsink. A stringsink can be reset multiple times. -void upb_stringsink_reset(upb_stringsink *s, char *str, size_t size) { - free(s->str); - s->str = str; - s->len = 0; - s->size = size; -} - -upb_bytesink *upb_stringsink_bytesink(upb_stringsink *s) { - return &s->bytesink; -} - -static int32_t upb_stringsink_vprintf(void *_s, upb_status *status, - const char *fmt, va_list args) { - (void)status; // TODO: report realloc() errors. - upb_stringsink *s = _s; - int ret = upb_vrprintf(&s->str, &s->size, s->len, fmt, args); - if (ret >= 0) s->len += ret; - return ret; -} - -bool upb_stringsink_write(void *_s, const char *buf, size_t len, - upb_status *status) { - (void)status; // TODO: report realloc() errors. - upb_stringsink *s = _s; - if (s->len + len > s->size) { - while(s->len + len > s->size) s->size *= 2; - s->str = realloc(s->str, s->size); - } - memcpy(s->str + s->len, buf, len); - s->len += len; - return true; -} - -void upb_stringsink_init(upb_stringsink *s) { - static upb_bytesink_vtbl vtbl = { - upb_stringsink_write, - upb_stringsink_vprintf - }; - upb_bytesink_init(&s->bytesink, &vtbl); - s->str = NULL; -} diff --git a/src/upb_strstream.h b/src/upb_strstream.h deleted file mode 100644 index e57406eb89..0000000000 --- a/src/upb_strstream.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * upb - a minimalist implementation of protocol buffers. - * - * Copyright (c) 2009-2010 Google Inc. See LICENSE for details. - * Author: Josh Haberman - * - * This file contains upb_bytesrc and upb_bytesink implementations for - * upb_string. - */ - -#ifndef UPB_STRSTREAM_H -#define UPB_STRSTREAM_H - -#include "upb_bytestream.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* upb_stringsrc **************************************************************/ - -struct _upb_stringsrc { - upb_bytesrc bytesrc; - const char *str; - size_t len; -}; -typedef struct _upb_stringsrc upb_stringsrc; - -// Create/free a stringsrc. -void upb_stringsrc_init(upb_stringsrc *s); -void upb_stringsrc_uninit(upb_stringsrc *s); - -// Resets the stringsrc to a state where it will vend the given string. The -// stringsrc will take a reference on the string, so the caller need not ensure -// that it outlives the stringsrc. A stringsrc can be reset multiple times. -void upb_stringsrc_reset(upb_stringsrc *s, const char *str, size_t len); - -// Returns the upb_bytesrc* for this stringsrc. -upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s); - - -/* upb_stringsink *************************************************************/ - -struct _upb_stringsink { - upb_bytesink bytesink; - char *str; - size_t len, size; -}; -typedef struct _upb_stringsink upb_stringsink; - -// Create/free a stringsrc. -void upb_stringsink_init(upb_stringsink *s); -void upb_stringsink_uninit(upb_stringsink *s); - -// Resets the sink's string to "str", which the sink takes ownership of. -// "str" may be NULL, which will make the sink allocate a new string. -void upb_stringsink_reset(upb_stringsink *s, char *str, size_t size); - -// Releases ownership of the returned string (which is "len" bytes long) and -// resets the internal string to be empty again (as if reset were called with -// NULL). -const char *upb_stringsink_release(upb_stringsink *s, size_t *len); - -// Returns the upb_bytesink* for this stringsrc. Invalidated by reset above. -upb_bytesink *upb_stringsink_bytesink(); - - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif diff --git a/tests/test_decoder.c b/tests/test_decoder.c index 1c3bed01b5..88b87416f8 100644 --- a/tests/test_decoder.c +++ b/tests/test_decoder.c @@ -1,9 +1,9 @@ #include -#include "upb_decoder.h" -#include "upb_textprinter.h" -#include "upb_stdio.h" -#include "upb_glue.h" +#include "upb/bytestream.h" +#include "upb/pb/decoder.h" +#include "upb/pb/glue.h" +#include "upb/pb/textprinter.h" int main(int argc, char *argv[]) { if (argc < 3) { @@ -23,14 +23,12 @@ int main(int argc, char *argv[]) { upb_read_descriptor(symtab, desc, desc_len, &status); if (!upb_ok(&status)) { fprintf(stderr, "Error parsing descriptor: "); - upb_printerr(&status); + upb_status_print(&status, stderr); return 1; } free((void*)desc); - upb_string *name = upb_strdupc(argv[2]); - upb_def *md = upb_symtab_lookup(symtab, name); - upb_string_unref(name); + upb_def *md = upb_symtab_lookup(symtab, argv[2]); if (!md) { fprintf(stderr, "Descriptor did not contain message: %s\n", argv[2]); return 1; @@ -57,12 +55,12 @@ int main(int argc, char *argv[]) { upb_decoder_initforhandlers(&d, handlers); upb_decoder_reset(&d, upb_stdio_bytesrc(&in), 0, UINT64_MAX, p); - upb_clearerr(&status); + upb_status_clear(&status); upb_decoder_decode(&d, &status); if (!upb_ok(&status)) { fprintf(stderr, "Error parsing input: "); - upb_printerr(&status); + upb_status_print(&status, stderr); } upb_status_uninit(&status); diff --git a/tests/test_def.c b/tests/test_def.c index 248d14a2f4..1f014f62fd 100644 --- a/tests/test_def.c +++ b/tests/test_def.c @@ -1,6 +1,6 @@ #undef NDEBUG /* ensure tests always assert. */ -#include "upb_def.h" +#include "upb/def.h" #include int main() { diff --git a/tests/test_table.cc b/tests/test_table.cc index 0b47874ce6..8d98f2c40b 100644 --- a/tests/test_table.cc +++ b/tests/test_table.cc @@ -1,15 +1,16 @@ #undef NDEBUG /* ensure tests always assert. */ -#include "upb_table.h" +#include "upb/table.h" #include "test_util.h" #include +#include +#include +#include +#include #include +#include #include #include -#include -#include -#include -#include bool benchmark = false; #define CPU_TIME_PER_TEST 0.5 diff --git a/tests/test_varint.c b/tests/test_varint.c index 97902959ea..7dce6ab44c 100644 --- a/tests/test_varint.c +++ b/tests/test_varint.c @@ -4,7 +4,7 @@ * Copyright (c) 2011 Google Inc. See LICENSE for details. */ -#include "upb_varint.h" +#include "upb/pb/varint.h" #include "upb_test.h" static void test_varint_decoder(upb_decoderet (*decoder)(const char*)) { diff --git a/tests/test_vs_proto2.cc b/tests/test_vs_proto2.cc index d220ff3d78..0444d40bfb 100644 --- a/tests/test_vs_proto2.cc +++ b/tests/test_vs_proto2.cc @@ -11,10 +11,10 @@ #include #include #include +#include "upb/def.h" +#include "upb/msg.h" +#include "upb/pb/glue.h" #include "upb_test.h" -#include "upb_def.h" -#include "upb_glue.h" -#include "upb_msg.h" #include MESSAGE_HFILE diff --git a/tests/tests.c b/tests/tests.c index aa692f63bd..5cbbd785d7 100644 --- a/tests/tests.c +++ b/tests/tests.c @@ -2,11 +2,11 @@ #include #include #include -#include "upb_def.h" -#include "upb_glue.h" +#include "upb/def.h" +#include "upb/handlers.h" +#include "upb/pb/decoder.h" +#include "upb/pb/glue.h" #include "upb_test.h" -#include "upb_handlers.h" -#include "upb_decoder.h" static upb_symtab *load_test_proto() { upb_symtab *s = upb_symtab_new(); diff --git a/src/upbc.c b/tools/upbc.c similarity index 100% rename from src/upbc.c rename to tools/upbc.c diff --git a/src/upb_atomic.h b/upb/atomic.h similarity index 100% rename from src/upb_atomic.h rename to upb/atomic.h diff --git a/src/upb_stdio.c b/upb/bytestream.c similarity index 60% rename from src/upb_stdio.c rename to upb/bytestream.c index 20a3c15101..846b8ee6eb 100644 --- a/src/upb_stdio.c +++ b/upb/bytestream.c @@ -5,7 +5,7 @@ * Author: Josh Haberman */ -#include "upb_stdio.h" +#include "upb/bytestream.h" #include #include @@ -14,8 +14,14 @@ // We can make this configurable if necessary. #define BUF_SIZE 32768 +char *upb_strref_dup(struct _upb_strref *r) { + char *ret = (char*)malloc(r->len + 1); + upb_bytesrc_read(r->bytesrc, r->stream_offset, r->len, ret); + ret[r->len] = '\0'; + return ret; +} -/* upb_bytesrc methods ********************************************************/ +/* upb_stdio ******************************************************************/ int upb_stdio_cmpbuf(const void *_key, const void *_elem) { const uint64_t *ofs = _key; @@ -93,9 +99,6 @@ void upb_stdio_unrefregion(void *src, uint64_t ofs, size_t len) { (void)len; } - -/* upb_bytesink methods *******************************************************/ - #if 0 upb_strlen_t upb_stdio_putstr(upb_bytesink *sink, upb_string *str, upb_status *status) { upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, sink)); @@ -164,3 +167,99 @@ void upb_stdio_uninit(upb_stdio *stdio) { upb_bytesrc* upb_stdio_bytesrc(upb_stdio *stdio) { return &stdio->src; } upb_bytesink* upb_stdio_bytesink(upb_stdio *stdio) { return &stdio->sink; } + + +/* upb_stringsrc **************************************************************/ + +size_t upb_stringsrc_fetch(void *_src, uint64_t ofs, upb_status *s) { + upb_stringsrc *src = _src; + size_t bytes = src->len - ofs; + if (bytes == 0) s->code = UPB_EOF; + return bytes; +} + +void upb_stringsrc_read(void *_src, uint64_t src_ofs, size_t len, char *dst) { + upb_stringsrc *src = _src; + memcpy(dst, src->str + src_ofs, len); +} + +const char *upb_stringsrc_getptr(void *_src, uint64_t ofs, size_t *len) { + upb_stringsrc *src = _src; + *len = src->len - ofs; + return src->str + ofs; +} + +void upb_stringsrc_init(upb_stringsrc *s) { + static upb_bytesrc_vtbl vtbl = { + &upb_stringsrc_fetch, + &upb_stringsrc_read, + &upb_stringsrc_getptr, + NULL, NULL, NULL, NULL + }; + upb_bytesrc_init(&s->bytesrc, &vtbl); + s->str = NULL; +} + +void upb_stringsrc_reset(upb_stringsrc *s, const char *str, size_t len) { + s->str = str; + s->len = len; +} + +void upb_stringsrc_uninit(upb_stringsrc *s) { (void)s; } + +upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s) { + return &s->bytesrc; +} + + +/* upb_stringsink *************************************************************/ + +void upb_stringsink_uninit(upb_stringsink *s) { + free(s->str); +} + +// Resets the stringsink to a state where it will append to the given string. +// The string must be newly created or recycled. The stringsink will take a +// reference on the string, so the caller need not ensure that it outlives the +// stringsink. A stringsink can be reset multiple times. +void upb_stringsink_reset(upb_stringsink *s, char *str, size_t size) { + free(s->str); + s->str = str; + s->len = 0; + s->size = size; +} + +upb_bytesink *upb_stringsink_bytesink(upb_stringsink *s) { + return &s->bytesink; +} + +static int32_t upb_stringsink_vprintf(void *_s, upb_status *status, + const char *fmt, va_list args) { + (void)status; // TODO: report realloc() errors. + upb_stringsink *s = _s; + int ret = upb_vrprintf(&s->str, &s->size, s->len, fmt, args); + if (ret >= 0) s->len += ret; + return ret; +} + +bool upb_stringsink_write(void *_s, const char *buf, size_t len, + upb_status *status) { + (void)status; // TODO: report realloc() errors. + upb_stringsink *s = _s; + if (s->len + len > s->size) { + while(s->len + len > s->size) s->size *= 2; + s->str = realloc(s->str, s->size); + } + memcpy(s->str + s->len, buf, len); + s->len += len; + return true; +} + +void upb_stringsink_init(upb_stringsink *s) { + static upb_bytesink_vtbl vtbl = { + upb_stringsink_write, + upb_stringsink_vprintf + }; + upb_bytesink_init(&s->bytesink, &vtbl); + s->str = NULL; +} diff --git a/src/upb_bytestream.h b/upb/bytestream.h similarity index 61% rename from src/upb_bytestream.h rename to upb/bytestream.h index 836abb0868..2a6f7d20a9 100644 --- a/src/upb_bytestream.h +++ b/upb/bytestream.h @@ -52,9 +52,9 @@ INLINE void upb_bytesrc_init(upb_bytesrc *src, upb_bytesrc_vtbl *vtbl) { src->vtbl = vtbl; } -// Fetches at least minlen bytes starting at ofs, returning the actual number -// of bytes fetched (or 0 on error: see "s" for details). Gives caller a ref -// on the fetched region. It is safe to re-fetch existing regions but only if +// Fetches at least one byte starting at ofs, returning the actual number of +// bytes fetched (or 0 on error: see "s" for details). Gives caller a ref on +// the fetched region. It is safe to re-fetch existing regions but only if // they are ref'd. "ofs" may not greater than the end of the region that was // previously fetched. INLINE size_t upb_bytesrc_fetch(upb_bytesrc *src, uint64_t ofs, upb_status *s) { @@ -135,12 +135,7 @@ typedef struct _upb_strref { // Copies the contents of the strref into a newly-allocated, NULL-terminated // string. -INLINE char *upb_strref_dup(struct _upb_strref *r) { - char *ret = (char*)malloc(r->len + 1); - upb_bytesrc_read(r->bytesrc, r->stream_offset, r->len, ret); - ret[r->len] = '\0'; - return ret; -} +char *upb_strref_dup(struct _upb_strref *r); /* upb_bytesink ***************************************************************/ @@ -187,9 +182,102 @@ INLINE int32_t upb_bytesink_printf(upb_bytesink *sink, upb_status *status, // TODO: add flush() -/* upb_cbuf *******************************************************************/ +/* upb_stdio ******************************************************************/ + +// bytesrc/bytesink for ANSI C stdio, which is less efficient than posixfd, but +// more portable. +// +// Specifically, stdio functions acquire locks on every operation (unless you +// use the f{read,write,...}_unlocked variants, which are not standard) and +// performs redundant buffering (unless you disable it with setvbuf(), but we +// can only do this on newly-opened filehandles). -// A circular buffer implementation for bytesrcs that do internal buffering. +typedef struct { + uint64_t ofs; + uint32_t refcount; + char data[]; +} upb_stdio_buf; + +// We use a single object for both bytesrc and bytesink for simplicity. +// The object is still not thread-safe, and may only be used by one reader +// and one writer at a time. +typedef struct { + upb_bytesrc src; + upb_bytesink sink; + FILE *file; + bool should_close; + upb_stdio_buf **bufs; + uint32_t nbuf, szbuf; +} upb_stdio; + +void upb_stdio_init(upb_stdio *stdio); +// Caller should call upb_stdio_flush prior to calling this to ensure that +// all data is flushed, otherwise data can be silently dropped if an error +// occurs flushing the remaining buffers. +void upb_stdio_uninit(upb_stdio *stdio); + +// Resets the object to read/write to the given "file." The caller is +// responsible for closing the file, which must outlive this object. +void upb_stdio_reset(upb_stdio *stdio, FILE *file); + +// As an alternative to upb_stdio_reset(), initializes the object by opening a +// file, and will handle closing it. This may result in more efficient I/O +// than the previous since we can call setvbuf() to disable buffering. +void upb_stdio_open(upb_stdio *stdio, const char *filename, const char *mode, + upb_status *s); + +upb_bytesrc *upb_stdio_bytesrc(upb_stdio *stdio); +upb_bytesink *upb_stdio_bytesink(upb_stdio *stdio); + + +/* upb_stringsrc **************************************************************/ + +// bytesrc/bytesink for a simple contiguous string. + +struct _upb_stringsrc { + upb_bytesrc bytesrc; + const char *str; + size_t len; +}; +typedef struct _upb_stringsrc upb_stringsrc; + +// Create/free a stringsrc. +void upb_stringsrc_init(upb_stringsrc *s); +void upb_stringsrc_uninit(upb_stringsrc *s); + +// Resets the stringsrc to a state where it will vend the given string. The +// stringsrc will take a reference on the string, so the caller need not ensure +// that it outlives the stringsrc. A stringsrc can be reset multiple times. +void upb_stringsrc_reset(upb_stringsrc *s, const char *str, size_t len); + +// Returns the upb_bytesrc* for this stringsrc. +upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s); + + +/* upb_stringsink *************************************************************/ + +struct _upb_stringsink { + upb_bytesink bytesink; + char *str; + size_t len, size; +}; +typedef struct _upb_stringsink upb_stringsink; + +// Create/free a stringsrc. +void upb_stringsink_init(upb_stringsink *s); +void upb_stringsink_uninit(upb_stringsink *s); + +// Resets the sink's string to "str", which the sink takes ownership of. +// "str" may be NULL, which will make the sink allocate a new string. +void upb_stringsink_reset(upb_stringsink *s, char *str, size_t size); + +// Releases ownership of the returned string (which is "len" bytes long) and +// resets the internal string to be empty again (as if reset were called with +// NULL). +const char *upb_stringsink_release(upb_stringsink *s, size_t *len); + +// Returns the upb_bytesink* for this stringsrc. Invalidated by reset above. +upb_bytesink *upb_stringsink_bytesink(); #ifdef __cplusplus } /* extern "C" */ diff --git a/src/upb_def.c b/upb/def.c similarity index 99% rename from src/upb_def.c rename to upb/def.c index 4cd80b1269..000b7f2557 100644 --- a/src/upb_def.c +++ b/upb/def.c @@ -8,7 +8,7 @@ #include #include #include -#include "upb_def.h" +#include "upb/def.h" #define alignof(t) offsetof(struct { char c; t x; }, x) diff --git a/src/upb_def.h b/upb/def.h similarity index 99% rename from src/upb_def.h rename to upb/def.h index 34f5009334..4a7a01712c 100644 --- a/src/upb_def.h +++ b/upb/def.h @@ -19,8 +19,8 @@ #ifndef UPB_DEF_H_ #define UPB_DEF_H_ -#include "upb_atomic.h" -#include "upb_table.h" +#include "upb/atomic.h" +#include "upb/table.h" #ifdef __cplusplus extern "C" { diff --git a/src/upb_descriptor.c b/upb/descriptor.c similarity index 99% rename from src/upb_descriptor.c rename to upb/descriptor.c index f70f1bab9a..48f0165ada 100644 --- a/src/upb_descriptor.c +++ b/upb/descriptor.c @@ -5,11 +5,10 @@ * Author: Josh Haberman */ -#include "upb_descriptor.h" - #include #include -#include "upb_def.h" +#include "upb/def.h" +#include "upb/descriptor.h" // Returns a newly allocated string that joins input strings together, for example: // join("Foo.Bar", "Baz") -> "Foo.Bar.Baz" diff --git a/src/upb_descriptor.h b/upb/descriptor.h similarity index 98% rename from src/upb_descriptor.h rename to upb/descriptor.h index ee05e2f9f9..4d658fb4fa 100644 --- a/src/upb_descriptor.h +++ b/upb/descriptor.h @@ -12,7 +12,7 @@ #ifndef UPB_DESCRIPTOR_H #define UPB_DESCRIPTOR_H -#include "upb_handlers.h" +#include "upb/handlers.h" #ifdef __cplusplus extern "C" { diff --git a/src/descriptor.proto b/upb/descriptor.proto similarity index 100% rename from src/descriptor.proto rename to upb/descriptor.proto diff --git a/src/descriptor_const.h b/upb/descriptor_const.h similarity index 100% rename from src/descriptor_const.h rename to upb/descriptor_const.h diff --git a/src/upb_handlers.c b/upb/handlers.c similarity index 99% rename from src/upb_handlers.c rename to upb/handlers.c index f513dfd38b..05300c0cc1 100644 --- a/src/upb_handlers.c +++ b/upb/handlers.c @@ -6,7 +6,7 @@ */ #include -#include "upb_handlers.h" +#include "upb/handlers.h" /* upb_mhandlers **************************************************************/ diff --git a/src/upb_handlers.h b/upb/handlers.h similarity index 99% rename from src/upb_handlers.h rename to upb/handlers.h index 1ccc59f02a..e3d91cfc30 100644 --- a/src/upb_handlers.h +++ b/upb/handlers.h @@ -15,9 +15,9 @@ #define UPB_HANDLERS_H #include -#include "upb.h" -#include "upb_def.h" -#include "upb_bytestream.h" +#include "upb/upb.h" +#include "upb/def.h" +#include "upb/bytestream.h" #ifdef __cplusplus extern "C" { diff --git a/src/upb_msg.c b/upb/msg.c similarity index 99% rename from src/upb_msg.c rename to upb/msg.c index 83fa6ff49b..a2b2cf72fe 100644 --- a/src/upb_msg.c +++ b/upb/msg.c @@ -7,8 +7,8 @@ * Data structure for storing a message of protobuf data. */ -#include "upb.h" -#include "upb_msg.h" +#include "upb/upb.h" +#include "upb/msg.h" void upb_msg_clear(void *msg, upb_msgdef *md) { memset(msg, 0, md->hasbit_bytes); diff --git a/src/upb_msg.h b/upb/msg.h similarity index 99% rename from src/upb_msg.h rename to upb/msg.h index af328e3ab8..625d805d60 100644 --- a/src/upb_msg.h +++ b/upb/msg.h @@ -26,8 +26,8 @@ #define UPB_MSG_H #include -#include "upb_def.h" -#include "upb_handlers.h" +#include "upb/def.h" +#include "upb/handlers.h" #ifdef __cplusplus extern "C" { diff --git a/src/upb_decoder.c b/upb/pb/decoder.c similarity index 98% rename from src/upb_decoder.c rename to upb/pb/decoder.c index fed48af604..218c7803bb 100644 --- a/src/upb_decoder.c +++ b/upb/pb/decoder.c @@ -8,11 +8,10 @@ #include #include #include -#include "bswap.h" -#include "upb_bytestream.h" -#include "upb_decoder.h" -#include "upb_varint.h" -#include "upb_msg.h" +#include "upb/bytestream.h" +#include "upb/msg.h" +#include "upb/pb/decoder.h" +#include "upb/pb/varint.h" // Used for frames that have no specific end offset: groups, repeated primitive // fields inside groups, and the top-level message. @@ -23,7 +22,7 @@ #define Dst_REF (d->dynasm) #define Dst (d) #include "dynasm/dasm_proto.h" -#include "upb_decoder_x86.h" +#include "upb/pb/decoder_x86.h" #endif // It's unfortunate that we have to micro-manage the compiler this way, @@ -201,12 +200,12 @@ FORCEINLINE void upb_decode_fixed(upb_decoder *d, char *buf, size_t bytes) { FORCEINLINE uint32_t upb_decode_fixed32(upb_decoder *d) { uint32_t u32; upb_decode_fixed(d, (char*)&u32, sizeof(uint32_t)); - return le32toh(u32); + return u32; // TODO: proper byte swapping } FORCEINLINE uint64_t upb_decode_fixed64(upb_decoder *d) { uint64_t u64; upb_decode_fixed(d, (char*)&u64, sizeof(uint64_t)); - return le64toh(u64); + return u64; // TODO: proper byte swapping } INLINE upb_strref *upb_decode_string(upb_decoder *d) { diff --git a/src/upb_decoder.h b/upb/pb/decoder.h similarity index 99% rename from src/upb_decoder.h rename to upb/pb/decoder.h index 7a813bfb77..398135903b 100644 --- a/src/upb_decoder.h +++ b/upb/pb/decoder.h @@ -20,7 +20,7 @@ #include #include #include -#include "upb_handlers.h" +#include "upb/handlers.h" #ifdef __cplusplus extern "C" { diff --git a/src/upb_decoder_x86.dasc b/upb/pb/decoder_x86.dasc similarity index 99% rename from src/upb_decoder_x86.dasc rename to upb/pb/decoder_x86.dasc index 800b099a1b..19043c6b5f 100644 --- a/src/upb_decoder_x86.dasc +++ b/upb/pb/decoder_x86.dasc @@ -40,7 +40,7 @@ // for a few magic numbers and doing a dumb string replacement. #ifndef __APPLE__ -#include "jit_debug_elf_file.h" +#include "upb/pb/jit_debug_elf_file.h" typedef enum { @@ -69,9 +69,9 @@ void __attribute__((noinline)) __jit_debug_register_code() { __asm__ __volatile_ void upb_reg_jit_gdb(upb_decoder *d) { // Create debug info. - size_t elf_len = src_jit_debug_elf_file_o_len; + size_t elf_len = upb_pb_jit_debug_elf_file_o_len; d->debug_info = malloc(elf_len); - memcpy(d->debug_info, src_jit_debug_elf_file_o, elf_len); + memcpy(d->debug_info, upb_pb_jit_debug_elf_file_o, elf_len); uint64_t *p = (void*)d->debug_info; for (; (void*)(p+1) <= (void*)d->debug_info + elf_len; ++p) { if (*p == 0x12345678) { *p = (uintptr_t)d->jit_code; } @@ -270,7 +270,7 @@ void upb_reg_jit_gdb(upb_decoder *d) { |.endmacro #include -#include "upb_varint.h" +#include "upb/pb/varint.h" // PTR should point to the beginning of the tag. static void upb_decoder_jit_field(upb_decoder *d, uint32_t tag, uint32_t next_tag, diff --git a/src/upb_encoder.c b/upb/pb/encoder.c similarity index 100% rename from src/upb_encoder.c rename to upb/pb/encoder.c diff --git a/src/upb_encoder.h b/upb/pb/encoder.h similarity index 100% rename from src/upb_encoder.h rename to upb/pb/encoder.h diff --git a/src/upb_glue.c b/upb/pb/glue.c similarity index 95% rename from src/upb_glue.c rename to upb/pb/glue.c index 1f5bd3fedb..3763ae09c0 100644 --- a/src/upb_glue.c +++ b/upb/pb/glue.c @@ -5,12 +5,12 @@ * Author: Josh Haberman */ -#include "upb_decoder.h" -#include "upb_descriptor.h" -#include "upb_glue.h" -#include "upb_msg.h" -#include "upb_strstream.h" -#include "upb_textprinter.h" +#include "upb/bytestream.h" +#include "upb/descriptor.h" +#include "upb/msg.h" +#include "upb/pb/decoder.h" +#include "upb/pb/glue.h" +#include "upb/pb/textprinter.h" void upb_strtomsg(const char *str, size_t len, void *msg, upb_msgdef *md, upb_status *status) { diff --git a/src/upb_glue.h b/upb/pb/glue.h similarity index 99% rename from src/upb_glue.h rename to upb/pb/glue.h index 0448c2f168..5359120ede 100644 --- a/src/upb_glue.h +++ b/upb/pb/glue.h @@ -27,7 +27,7 @@ #define UPB_GLUE_H #include -#include "upb.h" +#include "upb/upb.h" #ifdef __cplusplus extern "C" { diff --git a/src/jit_debug_elf_file.s b/upb/pb/jit_debug_elf_file.s similarity index 100% rename from src/jit_debug_elf_file.s rename to upb/pb/jit_debug_elf_file.s diff --git a/src/upb_textprinter.c b/upb/pb/textprinter.c similarity index 98% rename from src/upb_textprinter.c rename to upb/pb/textprinter.c index 14cce9b637..ce029d58ba 100644 --- a/src/upb_textprinter.c +++ b/upb/pb/textprinter.c @@ -5,12 +5,11 @@ * Author: Josh Haberman */ -#include "upb_textprinter.h" - #include #include #include #include +#include "upb/pb/textprinter.h" struct _upb_textprinter { upb_bytesink *bytesink; @@ -29,7 +28,7 @@ static int upb_textprinter_putescaped(upb_textprinter *p, upb_strref *strref, char dstbuf[4096], *dst = dstbuf, *dstend = dstbuf + sizeof(dstbuf); char buf[strref->len], *src = buf; char *end = src + strref->len; - upb_strref_read(strref, src); + upb_bytesrc_read(strref->bytesrc, strref->stream_offset, strref->len, buf); // I think hex is prettier and more useful, but proto2 uses octal; should // investigate whether it can parse hex also. diff --git a/src/upb_textprinter.h b/upb/pb/textprinter.h similarity index 92% rename from src/upb_textprinter.h rename to upb/pb/textprinter.h index 2d70d2e124..94552081bd 100644 --- a/src/upb_textprinter.h +++ b/upb/pb/textprinter.h @@ -8,8 +8,8 @@ #ifndef UPB_TEXT_H_ #define UPB_TEXT_H_ -#include "upb_bytestream.h" -#include "upb_handlers.h" +#include "upb/bytestream.h" +#include "upb/handlers.h" #ifdef __cplusplus extern "C" { diff --git a/src/upb_varint.c b/upb/pb/varint.c similarity index 98% rename from src/upb_varint.c rename to upb/pb/varint.c index 25052aaf23..45caec1a9f 100644 --- a/src/upb_varint.c +++ b/upb/pb/varint.c @@ -5,7 +5,7 @@ * Author: Josh Haberman */ -#include "upb_varint.h" +#include "upb/pb/varint.h" // Given an encoded varint v, returns an integer with a single bit set that // indicates the end of the varint. Subtracting one from this value will diff --git a/src/upb_varint.h b/upb/pb/varint.h similarity index 99% rename from src/upb_varint.h rename to upb/pb/varint.h index 87fca2b732..1bbd193b0a 100644 --- a/src/upb_varint.h +++ b/upb/pb/varint.h @@ -11,9 +11,9 @@ #ifndef UPB_VARINT_DECODER_H_ #define UPB_VARINT_DECODER_H_ -#include "upb.h" #include #include +#include "upb/upb.h" #ifdef __cplusplus extern "C" { diff --git a/src/upb_table.c b/upb/table.c similarity index 99% rename from src/upb_table.c rename to upb/table.c index fc9e9deb90..71aca16bbf 100644 --- a/src/upb_table.c +++ b/upb/table.c @@ -8,7 +8,7 @@ * can be useful for debugging. */ -#include "upb_table.h" +#include "upb/table.h" #include #include diff --git a/src/upb_table.h b/upb/table.h similarity index 100% rename from src/upb_table.h rename to upb/table.h diff --git a/src/upb.c b/upb/upb.c similarity index 98% rename from src/upb.c rename to upb/upb.c index 0f3ea18f0a..0ff082f38c 100644 --- a/src/upb.c +++ b/upb/upb.c @@ -10,9 +10,9 @@ #include #include #include -#include "descriptor_const.h" -#include "upb.h" -#include "upb_bytestream.h" +#include "upb/descriptor_const.h" +#include "upb/upb.h" +#include "upb/bytestream.h" #define alignof(t) offsetof(struct { char c; t x; }, x) #define TYPE_INFO(wire_type, ctype, inmemory_type) \ diff --git a/src/upb.h b/upb/upb.h similarity index 99% rename from src/upb.h rename to upb/upb.h index b15340e4ab..153057df68 100644 --- a/src/upb.h +++ b/upb/upb.h @@ -15,7 +15,7 @@ #include // only for size_t. #include #include "descriptor_const.h" -#include "upb_atomic.h" +#include "atomic.h" #ifdef __cplusplus extern "C" {